tree-vect-loop.cc source code [gcc/tree-vect-loop.cc]

1	/ Loop Vectorization*
2	Copyright (C) 2003-2023 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4	Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#define INCLUDE_ALGORITHM
23	#include "config.h"
24	#include "system.h"
25	#include "coretypes.h"
26	#include "backend.h"
27	#include "target.h"
28	#include "rtl.h"
29	#include "tree.h"
30	#include "gimple.h"
31	#include "cfghooks.h"
32	#include "tree-pass.h"
33	#include "ssa.h"
34	#include "optabs-tree.h"
35	#include "memmodel.h"
36	#include "optabs.h"
37	#include "diagnostic-core.h"
38	#include "fold-const.h"
39	#include "stor-layout.h"
40	#include "cfganal.h"
41	#include "gimplify.h"
42	#include "gimple-iterator.h"
43	#include "gimplify-me.h"
44	#include "tree-ssa-loop-ivopts.h"
45	#include "tree-ssa-loop-manip.h"
46	#include "tree-ssa-loop-niter.h"
47	#include "tree-ssa-loop.h"
48	#include "cfgloop.h"
49	#include "tree-scalar-evolution.h"
50	#include "tree-vectorizer.h"
51	#include "gimple-fold.h"
52	#include "cgraph.h"
53	#include "tree-cfg.h"
54	#include "tree-if-conv.h"
55	#include "internal-fn.h"
56	#include "tree-vector-builder.h"
57	#include "vec-perm-indices.h"
58	#include "tree-eh.h"
59	#include "case-cfn-macros.h"
60	#include "langhooks.h"
61
62	/ Loop Vectorization Pass.*
63
64	This pass tries to vectorize loops.
65
66	For example, the vectorizer transforms the following simple loop:
67
68	short a[N]; short b[N]; short c[N]; int i;
69
70	for (i=0; i<N; i++){
71	a[i] = b[i] + c[i];
72	}
73
74	as if it was manually vectorized by rewriting the source code into:
75
76	typedef int __attribute__((mode(V8HI))) v8hi;
77	short a[N]; short b[N]; short c[N]; int i;
78	v8hi pa = (v8hi)a, pb = (v8hi)b, pc = (v8hi)c;
79	v8hi va, vb, vc;
80
81	for (i=0; i<N/8; i++){
82	vb = pb[i];
83	vc = pc[i];
84	va = vb + vc;
85	pa[i] = va;
86	}
87
88	The main entry to this pass is vectorize_loops(), in which
89	the vectorizer applies a set of analyses on a given set of loops,
90	followed by the actual vectorization transformation for the loops that
91	had successfully passed the analysis phase.
92	Throughout this pass we make a distinction between two types of
93	data: scalars (which are represented by SSA_NAMES), and memory references
94	("data-refs"). These two types of data require different handling both
95	during analysis and transformation. The types of data-refs that the
96	vectorizer currently supports are ARRAY_REFS which base is an array DECL
97	(not a pointer), and INDIRECT_REFS through pointers; both array and pointer
98	accesses are required to have a simple (consecutive) access pattern.
99
100	Analysis phase:
101	===============
102	The driver for the analysis phase is vect_analyze_loop().
103	It applies a set of analyses, some of which rely on the scalar evolution
104	analyzer (scev) developed by Sebastian Pop.
105
106	During the analysis phase the vectorizer records some information
107	per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
108	loop, as well as general information about the loop as a whole, which is
109	recorded in a "loop_vec_info" struct attached to each loop.
110
111	Transformation phase:
112	=====================
113	The loop transformation phase scans all the stmts in the loop, and
114	creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
115	the loop that needs to be vectorized. It inserts the vector code sequence
116	just before the scalar stmt S, and records a pointer to the vector code
117	in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
118	attached to S). This pointer will be used for the vectorization of following
119	stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
120	otherwise, we rely on dead code elimination for removing it.
121
122	For example, say stmt S1 was vectorized into stmt VS1:
123
124	VS1: vb = px[i];
125	S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
126	S2: a = b;
127
128	To vectorize stmt S2, the vectorizer first finds the stmt that defines
129	the operand 'b' (S1), and gets the relevant vector def 'vb' from the
130	vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
131	resulting sequence would be:
132
133	VS1: vb = px[i];
134	S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
135	VS2: va = vb;
136	S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
137
138	Operands that are not SSA_NAMEs, are data-refs that appear in
139	load/store operations (like 'x[i]' in S1), and are handled differently.
140
141	Target modeling:
142	=================
143	Currently the only target specific information that is used is the
144	size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
145	Targets that can support different sizes of vectors, for now will need
146	to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
147	flexibility will be added in the future.
148
149	Since we only vectorize operations which vector form can be
150	expressed using existing tree codes, to verify that an operation is
151	supported, the vectorizer checks the relevant optab at the relevant
152	machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
153	the value found is CODE_FOR_nothing, then there's no target support, and
154	we can't vectorize the stmt.
155
156	For additional information on this project see:
157	http://gcc.gnu.org/projects/tree-ssa/vectorization.html
158	*/
159
160	static void vect_estimate_min_profitable_iters (loop_vec_info, int , int* *,
161	unsigned *);
162	static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
163	bool , bool* , bool*);
164
165	/ Subroutine of vect_determine_vf_for_stmt that handles only one*
166	statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
167	may already be set for general statements (not just data refs). /*
168
169	static opt_result
170	vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
171	bool vectype_maybe_set_p,
172	poly_uint64 *vf)
173	{
174	gimple *stmt = stmt_info->stmt;
175
176	if ((!STMT_VINFO_RELEVANT_P (stmt_info)
177	&& !STMT_VINFO_LIVE_P (stmt_info))
178	\|\| gimple_clobber_p (s: stmt))
179	{
180	if (dump_enabled_p ())
181	dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
182	return opt_result::success ();
183	}
184
185	tree stmt_vectype, nunits_vectype;
186	opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info,
187	&stmt_vectype,
188	&nunits_vectype);
189	if (!res)
190	return res;
191
192	if (stmt_vectype)
193	{
194	if (STMT_VINFO_VECTYPE (stmt_info))
195	/ The only case when a vectype had been already set is for stmts*
196	that contain a data ref, or for "pattern-stmts" (stmts generated
197	by the vectorizer to represent/replace a certain idiom). /*
198	gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)
199	\|\| vectype_maybe_set_p)
200	&& STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype);
201	else
202	STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
203	}
204
205	if (nunits_vectype)
206	vect_update_max_nunits (max_nunits: vf, vectype: nunits_vectype);
207
208	return opt_result::success ();
209	}
210
211	/ Subroutine of vect_determine_vectorization_factor. Set the vector*
212	types of STMT_INFO and all attached pattern statements and update
213	the vectorization factor VF accordingly. Return true on success
214	or false if something prevented vectorization. /*
215
216	static opt_result
217	vect_determine_vf_for_stmt (vec_info *vinfo,
218	stmt_vec_info stmt_info, poly_uint64 *vf)
219	{
220	if (dump_enabled_p ())
221	dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
222	stmt_info->stmt);
223	opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, vectype_maybe_set_p: false, vf);
224	if (!res)
225	return res;
226
227	if (STMT_VINFO_IN_PATTERN_P (stmt_info)
228	&& STMT_VINFO_RELATED_STMT (stmt_info))
229	{
230	gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
231	stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
232
233	/ If a pattern statement has def stmts, analyze them too. /
234	for (gimple_stmt_iterator si = gsi_start (seq&: pattern_def_seq);
235	!gsi_end_p (i: si); gsi_next (i: &si))
236	{
237	stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (i: si));
238	if (dump_enabled_p ())
239	dump_printf_loc (MSG_NOTE, vect_location,
240	"==> examining pattern def stmt: %G",
241	def_stmt_info->stmt);
242	res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info: def_stmt_info, vectype_maybe_set_p: true, vf);
243	if (!res)
244	return res;
245	}
246
247	if (dump_enabled_p ())
248	dump_printf_loc (MSG_NOTE, vect_location,
249	"==> examining pattern statement: %G",
250	stmt_info->stmt);
251	res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, vectype_maybe_set_p: true, vf);
252	if (!res)
253	return res;
254	}
255
256	return opt_result::success ();
257	}
258
259	/ Function vect_determine_vectorization_factor*
260
261	Determine the vectorization factor (VF). VF is the number of data elements
262	that are operated upon in parallel in a single iteration of the vectorized
263	loop. For example, when vectorizing a loop that operates on 4byte elements,
264	on a target with vector size (VS) 16byte, the VF is set to 4, since 4
265	elements can fit in a single vector register.
266
267	We currently support vectorization of loops in which all types operated upon
268	are of the same size. Therefore this function currently sets VF according to
269	the size of the types operated upon, and fails if there are multiple sizes
270	in the loop.
271
272	VF is also the factor by which the loop iterations are strip-mined, e.g.:
273	original loop:
274	for (i=0; i<N; i++){
275	a[i] = b[i] + c[i];
276	}
277
278	vectorized loop:
279	for (i=0; i<N; i+=VF){
280	a[i:VF] = b[i:VF] + c[i:VF];
281	}
282	*/
283
284	static opt_result
285	vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
286	{
287	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
288	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
289	unsigned nbbs = loop->num_nodes;
290	poly_uint64 vectorization_factor = `1`;
291	tree scalar_type = NULL_TREE;
292	gphi *phi;
293	tree vectype;
294	stmt_vec_info stmt_info;
295	unsigned i;
296
297	DUMP_VECT_SCOPE ("vect_determine_vectorization_factor");
298
299	for (i = `0`; i < nbbs; i++)
300	{
301	basic_block bb = bbs[i];
302
303	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
304	gsi_next (i: &si))
305	{
306	phi = si.phi ();
307	stmt_info = loop_vinfo->lookup_stmt (phi);
308	if (dump_enabled_p ())
309	dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G",
310	(gimple *) phi);
311
312	gcc_assert (stmt_info);
313
314	if (STMT_VINFO_RELEVANT_P (stmt_info)
315	\|\| STMT_VINFO_LIVE_P (stmt_info))
316	{
317	gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
318	scalar_type = TREE_TYPE (PHI_RESULT (phi));
319
320	if (dump_enabled_p ())
321	dump_printf_loc (MSG_NOTE, vect_location,
322	"get vectype for scalar type: %T\n",
323	scalar_type);
324
325	vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
326	if (!vectype)
327	return opt_result::failure_at (loc: phi,
328	fmt: "not vectorized: unsupported "
329	"data-type %T\n",
330	scalar_type);
331	STMT_VINFO_VECTYPE (stmt_info) = vectype;
332
333	if (dump_enabled_p ())
334	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
335	vectype);
336
337	if (dump_enabled_p ())
338	{
339	dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
340	dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (node: vectype));
341	dump_printf (MSG_NOTE, "\n");
342	}
343
344	vect_update_max_nunits (max_nunits: &vectorization_factor, vectype);
345	}
346	}
347
348	for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (i: si);
349	gsi_next (i: &si))
350	{
351	if (is_gimple_debug (gs: gsi_stmt (i: si)))
352	continue;
353	stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
354	opt_result res
355	= vect_determine_vf_for_stmt (vinfo: loop_vinfo,
356	stmt_info, vf: &vectorization_factor);
357	if (!res)
358	return res;
359	}
360	}
361
362	/ TODO: Analyze cost. Decide if worth while to vectorize. /
363	if (dump_enabled_p ())
364	{
365	dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
366	dump_dec (MSG_NOTE, vectorization_factor);
367	dump_printf (MSG_NOTE, "\n");
368	}
369
370	if (known_le (vectorization_factor, `1U`))
371	return opt_result::failure_at (loc: vect_location,
372	fmt: "not vectorized: unsupported data-type\n");
373	LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
374	return opt_result::success ();
375	}
376
377
378	/ Function vect_is_simple_iv_evolution.*
379
380	FORNOW: A simple evolution of an induction variables in the loop is
381	considered a polynomial evolution. /*
382
383	static bool
384	vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
385	tree * step)
386	{
387	tree init_expr;
388	tree step_expr;
389	tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
390	basic_block bb;
391
392	/ When there is no evolution in this loop, the evolution function*
393	is not "simple". /*
394	if (evolution_part == NULL_TREE)
395	return false;
396
397	/ When the evolution is a polynomial of degree >= 2*
398	the evolution function is not "simple". /*
399	if (tree_is_chrec (expr: evolution_part))
400	return false;
401
402	step_expr = evolution_part;
403	init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
404
405	if (dump_enabled_p ())
406	dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n",
407	step_expr, init_expr);
408
409	*init = init_expr;
410	*step = step_expr;
411
412	if (TREE_CODE (step_expr) != INTEGER_CST
413	&& (TREE_CODE (step_expr) != SSA_NAME
414	\|\| ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
415	&& flow_bb_inside_loop_p (get_loop (cfun, num: loop_nb), bb))
416	\|\| (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
417	&& (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
418	\|\| !flag_associative_math)))
419	&& (TREE_CODE (step_expr) != REAL_CST
420	\|\| !flag_associative_math))
421	{
422	if (dump_enabled_p ())
423	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
424	"step unknown.\n");
425	return false;
426	}
427
428	return true;
429	}
430
431	/ Function vect_is_nonlinear_iv_evolution*
432
433	Only support nonlinear induction for integer type
434	1. neg
435	2. mul by constant
436	3. lshift/rshift by constant.
437
438	For neg induction, return a fake step as integer -1. /*
439	static bool
440	vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
441	gphi* loop_phi_node, tree init, tree step)
442	{
443	tree init_expr, ev_expr, result, op1, op2;
444	gimple* def;
445
446	if (gimple_phi_num_args (gs: loop_phi_node) != `2`)
447	return false;
448
449	init_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_preheader_edge (loop));
450	ev_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_latch_edge (loop));
451
452	/ Support nonlinear induction only for integer type. /
453	if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr)))
454	return false;
455
456	*init = init_expr;
457	result = PHI_RESULT (loop_phi_node);
458
459	if (TREE_CODE (ev_expr) != SSA_NAME
460	\|\| ((def = SSA_NAME_DEF_STMT (ev_expr)), false)
461	\|\| !is_gimple_assign (gs: def))
462	return false;
463
464	enum tree_code t_code = gimple_assign_rhs_code (gs: def);
465	switch (t_code)
466	{
467	case NEGATE_EXPR:
468	if (gimple_assign_rhs1 (gs: def) != result)
469	return false;
470	*step = build_int_cst (TREE_TYPE (init_expr), -`1`);
471	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_neg;
472	break;
473
474	case RSHIFT_EXPR:
475	case LSHIFT_EXPR:
476	case MULT_EXPR:
477	op1 = gimple_assign_rhs1 (gs: def);
478	op2 = gimple_assign_rhs2 (gs: def);
479	if (TREE_CODE (op2) != INTEGER_CST
480	\|\| op1 != result)
481	return false;
482	*step = op2;
483	if (t_code == LSHIFT_EXPR)
484	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shl;
485	else if (t_code == RSHIFT_EXPR)
486	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shr;
487	/ NEGATE_EXPR and MULT_EXPR are both vect_step_op_mul. /
488	else
489	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_mul;
490	break;
491
492	default:
493	return false;
494	}
495
496	STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = *init;
497	STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = *step;
498
499	return true;
500	}
501
502	/ Return true if PHI, described by STMT_INFO, is the inner PHI in*
503	what we are assuming is a double reduction. For example, given
504	a structure like this:
505
506	outer1:
507	x_1 = PHI <x_4(outer2), ...>;
508	...
509
510	inner:
511	x_2 = PHI <x_1(outer1), ...>;
512	...
513	x_3 = ...;
514	...
515
516	outer2:
517	x_4 = PHI <x_3(inner)>;
518	...
519
520	outer loop analysis would treat x_1 as a double reduction phi and
521	this function would then return true for x_2. /*
522
523	static bool
524	vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
525	{
526	use_operand_p use_p;
527	ssa_op_iter op_iter;
528	FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)
529	if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)))
530	if (STMT_VINFO_DEF_TYPE (def_info) == vect_double_reduction_def)
531	return true;
532	return false;
533	}
534
535	/ Returns true if Phi is a first-order recurrence. A first-order*
536	recurrence is a non-reduction recurrence relation in which the value of
537	the recurrence in the current loop iteration equals a value defined in
538	the previous iteration. /*
539
540	static bool
541	vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop,
542	gphi *phi)
543	{
544	/ A nested cycle isn't vectorizable as first order recurrence. /
545	if (LOOP_VINFO_LOOP (loop_vinfo) != loop)
546	return false;
547
548	/ Ensure the loop latch definition is from within the loop. /
549	edge latch = loop_latch_edge (loop);
550	tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch);
551	if (TREE_CODE (ldef) != SSA_NAME
552	\|\| SSA_NAME_IS_DEFAULT_DEF (ldef)
553	\|\| is_a <gphi *> (SSA_NAME_DEF_STMT (ldef))
554	\|\| !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef))))
555	return false;
556
557	tree def = gimple_phi_result (gs: phi);
558
559	/ Ensure every use_stmt of the phi node is dominated by the latch*
560	definition. /*
561	imm_use_iterator imm_iter;
562	use_operand_p use_p;
563	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
564	if (!is_gimple_debug (USE_STMT (use_p))
565	&& (SSA_NAME_DEF_STMT (ldef) == USE_STMT (use_p)
566	\|\| !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef),
567	USE_STMT (use_p))))
568	return false;
569
570	/ First-order recurrence autovectorization needs shuffle vector. /
571	tree scalar_type = TREE_TYPE (def);
572	tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
573	if (!vectype)
574	return false;
575
576	return true;
577	}
578
579	/ Function vect_analyze_scalar_cycles_1.*
580
581	Examine the cross iteration def-use cycles of scalar variables
582	in LOOP. LOOP_VINFO represents the loop that is now being
583	considered for vectorization (can be LOOP, or an outer-loop
584	enclosing LOOP). SLP indicates there will be some subsequent
585	slp analyses or not. /*
586
587	static void
588	vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
589	bool slp)
590	{
591	basic_block bb = loop->header;
592	tree init, step;
593	auto_vec<stmt_vec_info, `64`> worklist;
594	gphi_iterator gsi;
595	bool double_reduc, reduc_chain;
596
597	DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles");
598
599	/ First - identify all inductions. Reduction detection assumes that all the*
600	inductions have been identified, therefore, this order must not be
601	changed. /*
602	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
603	{
604	gphi *phi = gsi.phi ();
605	tree access_fn = NULL;
606	tree def = PHI_RESULT (phi);
607	stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi);
608
609	if (dump_enabled_p ())
610	dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
611	(gimple *) phi);
612
613	/ Skip virtual phi's. The data dependences that are associated with*
614	virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. /*
615	if (virtual_operand_p (op: def))
616	continue;
617
618	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
619
620	/ Analyze the evolution function. /
621	access_fn = analyze_scalar_evolution (loop, def);
622	if (access_fn)
623	{
624	STRIP_NOPS (access_fn);
625	if (dump_enabled_p ())
626	dump_printf_loc (MSG_NOTE, vect_location,
627	"Access function of PHI: %T\n", access_fn);
628	STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
629	= initial_condition_in_loop_num (access_fn, loop->num);
630	STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
631	= evolution_part_in_loop_num (access_fn, loop->num);
632	}
633
634	if ((!access_fn
635	\|\| vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
636	\|\| !vect_is_simple_iv_evolution (loop_nb: loop->num, access_fn,
637	init: &init, step: &step)
638	\|\| (LOOP_VINFO_LOOP (loop_vinfo) != loop
639	&& TREE_CODE (step) != INTEGER_CST))
640	/ Only handle nonlinear iv for same loop. /
641	&& (LOOP_VINFO_LOOP (loop_vinfo) != loop
642	\|\| !vect_is_nonlinear_iv_evolution (loop, stmt_info: stmt_vinfo,
643	loop_phi_node: phi, init: &init, step: &step)))
644	{
645	worklist.safe_push (obj: stmt_vinfo);
646	continue;
647	}
648
649	gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
650	!= NULL_TREE);
651	gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
652
653	if (dump_enabled_p ())
654	dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
655	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
656	}
657
658
659	/ Second - identify all reductions and nested cycles. /
660	while (worklist.length () > `0`)
661	{
662	stmt_vec_info stmt_vinfo = worklist.pop ();
663	gphi phi = as_a <gphi > (p: stmt_vinfo->stmt);
664	tree def = PHI_RESULT (phi);
665
666	if (dump_enabled_p ())
667	dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
668	(gimple *) phi);
669
670	gcc_assert (!virtual_operand_p (def)
671	&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
672
673	stmt_vec_info reduc_stmt_info
674	= vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
675	&reduc_chain, slp);
676	if (reduc_stmt_info)
677	{
678	STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
679	STMT_VINFO_REDUC_DEF (reduc_stmt_info) = stmt_vinfo;
680	if (double_reduc)
681	{
682	if (dump_enabled_p ())
683	dump_printf_loc (MSG_NOTE, vect_location,
684	"Detected double reduction.\n");
685
686	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
687	STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_double_reduction_def;
688	}
689	else
690	{
691	if (loop != LOOP_VINFO_LOOP (loop_vinfo))
692	{
693	if (dump_enabled_p ())
694	dump_printf_loc (MSG_NOTE, vect_location,
695	"Detected vectorizable nested cycle.\n");
696
697	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
698	}
699	else
700	{
701	if (dump_enabled_p ())
702	dump_printf_loc (MSG_NOTE, vect_location,
703	"Detected reduction.\n");
704
705	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
706	STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_reduction_def;
707	/ Store the reduction cycles for possible vectorization in*
708	loop-aware SLP if it was not detected as reduction
709	chain. /*
710	if (! reduc_chain)
711	LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
712	(obj: reduc_stmt_info);
713	}
714	}
715	}
716	else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
717	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_first_order_recurrence;
718	else
719	if (dump_enabled_p ())
720	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721	"Unknown def-use cycle pattern.\n");
722	}
723	}
724
725
726	/ Function vect_analyze_scalar_cycles.*
727
728	Examine the cross iteration def-use cycles of scalar variables, by
729	analyzing the loop-header PHIs of scalar variables. Classify each
730	cycle as one of the following: invariant, induction, reduction, unknown.
731	We do that for the loop represented by LOOP_VINFO, and also to its
732	inner-loop, if exists.
733	Examples for scalar cycles:
734
735	Example1: reduction:
736
737	loop1:
738	for (i=0; i<N; i++)
739	sum += a[i];
740
741	Example2: induction:
742
743	loop2:
744	for (i=0; i<N; i++)
745	a[i] = i; /*
746
747	static void
748	vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp)
749	{
750	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
751
752	vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp);
753
754	/ When vectorizing an outer-loop, the inner-loop is executed sequentially.*
755	Reductions in such inner-loop therefore have different properties than
756	the reductions in the nest that gets vectorized:
757	1. When vectorized, they are executed in the same order as in the original
758	scalar loop, so we can't change the order of computation when
759	vectorizing them.
760	2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
761	current checks are too strict. /*
762
763	if (loop->inner)
764	vect_analyze_scalar_cycles_1 (loop_vinfo, loop: loop->inner, slp);
765	}
766
767	/ Transfer group and reduction information from STMT_INFO to its*
768	pattern stmt. /*
769
770	static void
771	vect_fixup_reduc_chain (stmt_vec_info stmt_info)
772	{
773	stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info);
774	stmt_vec_info stmtp;
775	gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)
776	&& REDUC_GROUP_FIRST_ELEMENT (stmt_info));
777	REDUC_GROUP_SIZE (firstp) = REDUC_GROUP_SIZE (stmt_info);
778	do
779	{
780	stmtp = STMT_VINFO_RELATED_STMT (stmt_info);
781	gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)
782	== STMT_VINFO_DEF_TYPE (stmt_info));
783	REDUC_GROUP_FIRST_ELEMENT (stmtp) = firstp;
784	stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info);
785	if (stmt_info)
786	REDUC_GROUP_NEXT_ELEMENT (stmtp)
787	= STMT_VINFO_RELATED_STMT (stmt_info);
788	}
789	while (stmt_info);
790	}
791
792	/ Fixup scalar cycles that now have their stmts detected as patterns. /
793
794	static void
795	vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
796	{
797	stmt_vec_info first;
798	unsigned i;
799
800	FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
801	{
802	stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
803	while (next)
804	{
805	if ((STMT_VINFO_IN_PATTERN_P (next)
806	!= STMT_VINFO_IN_PATTERN_P (first))
807	\|\| STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next)) == -`1`)
808	break;
809	next = REDUC_GROUP_NEXT_ELEMENT (next);
810	}
811	/ If all reduction chain members are well-formed patterns adjust*
812	the group to group the pattern stmts instead. /*
813	if (! next
814	&& STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first)) != -`1`)
815	{
816	if (STMT_VINFO_IN_PATTERN_P (first))
817	{
818	vect_fixup_reduc_chain (stmt_info: first);
819	LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
820	= STMT_VINFO_RELATED_STMT (first);
821	}
822	}
823	/ If not all stmt in the chain are patterns or if we failed*
824	to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
825	it as regular reduction instead. /*
826	else
827	{
828	stmt_vec_info vinfo = first;
829	stmt_vec_info last = NULL;
830	while (vinfo)
831	{
832	next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
833	REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
834	REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
835	last = vinfo;
836	vinfo = next;
837	}
838	STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))
839	= vect_internal_def;
840	loop_vinfo->reductions.safe_push (obj: vect_stmt_to_vectorize (stmt_info: last));
841	LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).unordered_remove (ix: i);
842	--i;
843	}
844	}
845	}
846
847	/ Function vect_get_loop_niters.*
848
849	Determine how many iterations the loop is executed and place it
850	in NUMBER_OF_ITERATIONS. Place the number of latch iterations
851	in NUMBER_OF_ITERATIONSM1. Place the condition under which the
852	niter information holds in ASSUMPTIONS.
853
854	Return the loop exit conditions. /*
855
856
857	static vec<gcond *>
858	vect_get_loop_niters (class loop loop, const_edge main_exit, tree assumptions,
859	tree number_of_iterations, tree number_of_iterationsm1)
860	{
861	auto_vec<edge> exits = get_loop_exit_edges (loop);
862	vec<gcond *> conds;
863	conds.create (nelems: exits.length ());
864	class tree_niter_desc niter_desc;
865	tree niter_assumptions, niter, may_be_zero;
866
867	*assumptions = boolean_true_node;
868	*number_of_iterationsm1 = chrec_dont_know;
869	*number_of_iterations = chrec_dont_know;
870
871	DUMP_VECT_SCOPE ("get_loop_niters");
872
873	if (exits.is_empty ())
874	return conds;
875
876	if (dump_enabled_p ())
877	dump_printf_loc (MSG_NOTE, vect_location, "Loop has %d exits.\n",
878	exits.length ());
879
880	edge exit;
881	unsigned int i;
882	FOR_EACH_VEC_ELT (exits, i, exit)
883	{
884	gcond *cond = get_loop_exit_condition (exit);
885	if (cond)
886	conds.safe_push (obj: cond);
887
888	if (dump_enabled_p ())
889	dump_printf_loc (MSG_NOTE, vect_location, "Analyzing exit %d...\n", i);
890
891	if (exit != main_exit)
892	continue;
893
894	may_be_zero = NULL_TREE;
895	if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
896	\|\| chrec_contains_undetermined (niter_desc.niter))
897	continue;
898
899	niter_assumptions = niter_desc.assumptions;
900	may_be_zero = niter_desc.may_be_zero;
901	niter = niter_desc.niter;
902
903	if (may_be_zero && integer_zerop (may_be_zero))
904	may_be_zero = NULL_TREE;
905
906	if (may_be_zero)
907	{
908	if (COMPARISON_CLASS_P (may_be_zero))
909	{
910	/ Try to combine may_be_zero with assumptions, this can simplify*
911	computation of niter expression. /*
912	if (niter_assumptions && !integer_nonzerop (niter_assumptions))
913	niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
914	niter_assumptions,
915	fold_build1 (TRUTH_NOT_EXPR,
916	boolean_type_node,
917	may_be_zero));
918	else
919	niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
920	build_int_cst (TREE_TYPE (niter), `0`),
921	rewrite_to_non_trapping_overflow (niter));
922
923	may_be_zero = NULL_TREE;
924	}
925	else if (integer_nonzerop (may_be_zero))
926	{
927	*number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), `0`);
928	*number_of_iterations = build_int_cst (TREE_TYPE (niter), `1`);
929	continue;
930	}
931	else
932	continue;
933	}
934
935	/ Loop assumptions are based off the normal exit. /
936	*assumptions = niter_assumptions;
937	*number_of_iterationsm1 = niter;
938
939	/ We want the number of loop header executions which is the number*
940	of latch executions plus one.
941	??? For UINT_MAX latch executions this number overflows to zero
942	for loops like do { n++; } while (n != 0); /*
943	if (niter && !chrec_contains_undetermined (niter))
944	niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter),
945	unshare_expr (niter),
946	build_int_cst (TREE_TYPE (niter), `1`));
947	*number_of_iterations = niter;
948	}
949
950	if (dump_enabled_p ())
951	dump_printf_loc (MSG_NOTE, vect_location, "All loop exits successfully analyzed.\n");
952
953	return conds;
954	}
955
956	/ Determine the main loop exit for the vectorizer. /
957
958	edge
959	vec_init_loop_exit_info (class loop *loop)
960	{
961	/ Before we begin we must first determine which exit is the main one and*
962	which are auxilary exits. /*
963	auto_vec<edge> exits = get_loop_exit_edges (loop);
964	if (exits.length () == `1`)
965	return exits [`0`];
966
967	/ If we have multiple exits we only support counting IV at the moment. Analyze*
968	all exits and return one /*
969	class tree_niter_desc niter_desc;
970	edge candidate = NULL;
971	for (edge exit : exits)
972	{
973	if (!get_loop_exit_condition (exit))
974	continue;
975
976	if (number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
977	&& !chrec_contains_undetermined (niter_desc.niter))
978	{
979	if (!niter_desc.may_be_zero \|\| !candidate)
980	candidate = exit;
981	}
982	}
983
984	return candidate;
985	}
986
987	/ Function bb_in_loop_p*
988
989	Used as predicate for dfs order traversal of the loop bbs. /*
990
991	static bool
992	bb_in_loop_p (const_basic_block bb, const void *data)
993	{
994	const class loop *const loop = (const class loop *)data;
995	if (flow_bb_inside_loop_p (loop, bb))
996	return true;
997	return false;
998	}
999
1000
1001	/ Create and initialize a new loop_vec_info struct for LOOP_IN, as well as*
1002	stmt_vec_info structs for all the stmts in LOOP_IN. /*
1003
1004	_loop_vec_info::_loop_vec_info (class loop loop_in, vec_info_shared shared)
1005	: vec_info (vec_info::loop, shared),
1006	loop (loop_in),
1007	bbs (XCNEWVEC (basic_block, loop->num_nodes)),
1008	num_itersm1 (NULL_TREE),
1009	num_iters (NULL_TREE),
1010	num_iters_unchanged (NULL_TREE),
1011	num_iters_assumptions (NULL_TREE),
1012	vector_costs (nullptr),
1013	scalar_costs (nullptr),
1014	th (`0`),
1015	versioning_threshold (`0`),
1016	vectorization_factor (`0`),
1017	main_loop_edge (nullptr),
1018	skip_main_loop_edge (nullptr),
1019	skip_this_loop_edge (nullptr),
1020	reusable_accumulators (),
1021	suggested_unroll_factor (`1`),
1022	max_vectorization_factor (`0`),
1023	mask_skip_niters (NULL_TREE),
1024	rgroup_compare_type (NULL_TREE),
1025	simd_if_cond (NULL_TREE),
1026	partial_vector_style (vect_partial_vectors_none),
1027	unaligned_dr (NULL),
1028	peeling_for_alignment (`0`),
1029	ptr_mask (`0`),
1030	ivexpr_map (NULL),
1031	scan_map (NULL),
1032	slp_unrolling_factor (`1`),
1033	inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
1034	vectorizable (false),
1035	can_use_partial_vectors_p (param_vect_partial_vector_usage != `0`),
1036	using_partial_vectors_p (false),
1037	using_decrementing_iv_p (false),
1038	using_select_vl_p (false),
1039	epil_using_partial_vectors_p (false),
1040	partial_load_store_bias (`0`),
1041	peeling_for_gaps (false),
1042	peeling_for_niter (false),
1043	no_data_dependencies (false),
1044	has_mask_store (false),
1045	scalar_loop_scaling (profile_probability::uninitialized ()),
1046	scalar_loop (NULL),
1047	orig_loop_info (NULL),
1048	vec_loop_iv_exit (NULL),
1049	vec_epilogue_loop_iv_exit (NULL),
1050	scalar_loop_iv_exit (NULL)
1051	{
1052	/ CHECKME: We want to visit all BBs before their successors (except for*
1053	latch blocks, for which this assertion wouldn't hold). In the simple
1054	case of the loop forms we allow, a dfs order of the BBs would the same
1055	as reversed postorder traversal, so we are safe. /*
1056
1057	unsigned int nbbs = dfs_enumerate_from (loop->header, `0`, bb_in_loop_p,
1058	bbs, loop->num_nodes, loop);
1059	gcc_assert (nbbs == loop->num_nodes);
1060
1061	for (unsigned int i = `0`; i < nbbs; i++)
1062	{
1063	basic_block bb = bbs[i];
1064	gimple_stmt_iterator si;
1065
1066	for (si = gsi_start_phis (bb); !gsi_end_p (i: si); gsi_next (i: &si))
1067	{
1068	gimple *phi = gsi_stmt (i: si);
1069	gimple_set_uid (g: phi, uid: `0`);
1070	add_stmt (phi);
1071	}
1072
1073	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
1074	{
1075	gimple *stmt = gsi_stmt (i: si);
1076	gimple_set_uid (g: stmt, uid: `0`);
1077	if (is_gimple_debug (gs: stmt))
1078	continue;
1079	add_stmt (stmt);
1080	/ If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the*
1081	third argument is the #pragma omp simd if (x) condition, when 0,
1082	loop shouldn't be vectorized, when non-zero constant, it should
1083	be vectorized normally, otherwise versioned with vectorized loop
1084	done if the condition is non-zero at runtime. /*
1085	if (loop_in->simduid
1086	&& is_gimple_call (gs: stmt)
1087	&& gimple_call_internal_p (gs: stmt)
1088	&& gimple_call_internal_fn (gs: stmt) == IFN_GOMP_SIMD_LANE
1089	&& gimple_call_num_args (gs: stmt) >= `3`
1090	&& TREE_CODE (gimple_call_arg (stmt, `0`)) == SSA_NAME
1091	&& (loop_in->simduid
1092	== SSA_NAME_VAR (gimple_call_arg (stmt, `0`))))
1093	{
1094	tree arg = gimple_call_arg (gs: stmt, index: `2`);
1095	if (integer_zerop (arg) \|\| TREE_CODE (arg) == SSA_NAME)
1096	simd_if_cond = arg;
1097	else
1098	gcc_assert (integer_nonzerop (arg));
1099	}
1100	}
1101	}
1102
1103	epilogue_vinfos.create (nelems: `6`);
1104	}
1105
1106	/ Free all levels of rgroup CONTROLS. /
1107
1108	void
1109	release_vec_loop_controls (vec<rgroup_controls> *controls)
1110	{
1111	rgroup_controls *rgc;
1112	unsigned int i;
1113	FOR_EACH_VEC_ELT (*controls, i, rgc)
1114	rgc->controls.release ();
1115	controls->release ();
1116	}
1117
1118	/ Free all memory used by the _loop_vec_info, as well as all the*
1119	stmt_vec_info structs of all the stmts in the loop. /*
1120
1121	_loop_vec_info::~_loop_vec_info ()
1122	{
1123	free (ptr: bbs);
1124
1125	release_vec_loop_controls (controls: &masks.rgc_vec);
1126	release_vec_loop_controls (controls: &lens);
1127	delete ivexpr_map;
1128	delete scan_map;
1129	epilogue_vinfos.release ();
1130	delete scalar_costs;
1131	delete vector_costs;
1132
1133	/ When we release an epiloge vinfo that we do not intend to use*
1134	avoid clearing AUX of the main loop which should continue to
1135	point to the main loop vinfo since otherwise we'll leak that. /*
1136	if (loop->aux == this)
1137	loop->aux = NULL;
1138	}
1139
1140	/ Return an invariant or register for EXPR and emit necessary*
1141	computations in the LOOP_VINFO loop preheader. /*
1142
1143	tree
1144	cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
1145	{
1146	if (is_gimple_reg (expr)
1147	\|\| is_gimple_min_invariant (expr))
1148	return expr;
1149
1150	if (! loop_vinfo->ivexpr_map)
1151	loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
1152	tree &cached = loop_vinfo->ivexpr_map->get_or_insert (k: expr);
1153	if (! cached)
1154	{
1155	gimple_seq stmts = NULL;
1156	cached = force_gimple_operand (unshare_expr (expr),
1157	&stmts, true, NULL_TREE);
1158	if (stmts)
1159	{
1160	edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1161	gsi_insert_seq_on_edge_immediate (e, stmts);
1162	}
1163	}
1164	return cached;
1165	}
1166
1167	/ Return true if we can use CMP_TYPE as the comparison type to produce*
1168	all masks required to mask LOOP_VINFO. /*
1169
1170	static bool
1171	can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
1172	{
1173	rgroup_controls *rgm;
1174	unsigned int i;
1175	FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec, i, rgm)
1176	if (rgm->type != NULL_TREE
1177	&& !direct_internal_fn_supported_p (fn: IFN_WHILE_ULT,
1178	type0: cmp_type, type1: rgm->type,
1179	opt_type: OPTIMIZE_FOR_SPEED))
1180	return false;
1181	return true;
1182	}
1183
1184	/ Calculate the maximum number of scalars per iteration for every*
1185	rgroup in LOOP_VINFO. /*
1186
1187	static unsigned int
1188	vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
1189	{
1190	unsigned int res = `1`;
1191	unsigned int i;
1192	rgroup_controls *rgm;
1193	FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec, i, rgm)
1194	res = MAX (res, rgm->max_nscalars_per_iter);
1195	return res;
1196	}
1197
1198	/ Calculate the minimum precision necessary to represent:*
1199
1200	MAX_NITERS FACTOR*
1201
1202	as an unsigned integer, where MAX_NITERS is the maximum number of
1203	loop header iterations for the original scalar form of LOOP_VINFO. /*
1204
1205	static unsigned
1206	vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
1207	{
1208	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1209
1210	/ Get the maximum number of iterations that is representable*
1211	in the counter type. /*
1212	tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
1213	widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + `1`;
1214
1215	/ Get a more refined estimate for the number of iterations. /
1216	widest_int max_back_edges;
1217	if (max_loop_iterations (loop, &max_back_edges))
1218	max_ni = wi::smin (x: max_ni, y: max_back_edges + `1`);
1219
1220	/ Work out how many bits we need to represent the limit. /
1221	return wi::min_precision (x: max_ni * factor, sgn: UNSIGNED);
1222	}
1223
1224	/ True if the loop needs peeling or partial vectors when vectorized. /
1225
1226	static bool
1227	vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
1228	{
1229	unsigned HOST_WIDE_INT const_vf;
1230	HOST_WIDE_INT max_niter
1231	= likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
1232
1233	unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
1234	if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
1235	th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
1236	(loop_vinfo));
1237
1238	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1239	&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= `0`)
1240	{
1241	/ Work out the (constant) number of iterations that need to be*
1242	peeled for reasons other than niters. /*
1243	unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1244	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1245	peel_niter += `1`;
1246	if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
1247	LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
1248	return true;
1249	}
1250	else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1251	/ ??? When peeling for gaps but not alignment, we could*
1252	try to check whether the (variable) niters is known to be
1253	VF N + 1. That's something of a niche case though. /
1254	\|\| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1255	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &const_vf)
1256	\|\| ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
1257	< (unsigned) exact_log2 (x: const_vf))
1258	/ In case of versioning, check if the maximum number of*
1259	iterations is greater than th. If they are identical,
1260	the epilogue is unnecessary. /*
1261	&& (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
1262	\|\| ((unsigned HOST_WIDE_INT) max_niter
1263	> (th / const_vf) * const_vf))))
1264	return true;
1265
1266	return false;
1267	}
1268
1269	/ Each statement in LOOP_VINFO can be masked where necessary. Check*
1270	whether we can actually generate the masks required. Return true if so,
1271	storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. /*
1272
1273	static bool
1274	vect_verify_full_masking (loop_vec_info loop_vinfo)
1275	{
1276	unsigned int min_ni_width;
1277
1278	/ Use a normal loop if there are no statements that need masking.*
1279	This only happens in rare degenerate cases: it means that the loop
1280	has no loads, no stores, and no live-out values. /*
1281	if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
1282	return false;
1283
1284	/ Produce the rgroup controls. /
1285	for (auto mask : LOOP_VINFO_MASKS (loop_vinfo).mask_set)
1286	{
1287	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1288	tree vectype = mask.first;
1289	unsigned nvectors = mask.second;
1290
1291	if (masks->rgc_vec.length () < nvectors)
1292	masks->rgc_vec.safe_grow_cleared (len: nvectors, exact: true);
1293	rgroup_controls rgm = &(masks).rgc_vec [nvectors - `1`];
1294	/ The number of scalars per iteration and the number of vectors are*
1295	both compile-time constants. /*
1296	unsigned int nscalars_per_iter
1297	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
1298	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
1299
1300	if (rgm->max_nscalars_per_iter < nscalars_per_iter)
1301	{
1302	rgm->max_nscalars_per_iter = nscalars_per_iter;
1303	rgm->type = truth_type_for (vectype);
1304	rgm->factor = `1`;
1305	}
1306	}
1307
1308	unsigned int max_nscalars_per_iter
1309	= vect_get_max_nscalars_per_iter (loop_vinfo);
1310
1311	/ Work out how many bits we need to represent the limit. /
1312	min_ni_width
1313	= vect_min_prec_for_max_niters (loop_vinfo, factor: max_nscalars_per_iter);
1314
1315	/ Find a scalar mode for which WHILE_ULT is supported. /
1316	opt_scalar_int_mode cmp_mode_iter;
1317	tree cmp_type = NULL_TREE;
1318	tree iv_type = NULL_TREE;
1319	widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
1320	unsigned int iv_precision = UINT_MAX;
1321
1322	if (iv_limit != -`1`)
1323	iv_precision = wi::min_precision (x: iv_limit * max_nscalars_per_iter,
1324	sgn: UNSIGNED);
1325
1326	FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
1327	{
1328	unsigned int cmp_bits = GET_MODE_BITSIZE (mode: cmp_mode_iter.require ());
1329	if (cmp_bits >= min_ni_width
1330	&& targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1331	{
1332	tree this_type = build_nonstandard_integer_type (cmp_bits, true);
1333	if (this_type
1334	&& can_produce_all_loop_masks_p (loop_vinfo, cmp_type: this_type))
1335	{
1336	/ Although we could stop as soon as we find a valid mode,*
1337	there are at least two reasons why that's not always the
1338	best choice:
1339
1340	- An IV that's Pmode or wider is more likely to be reusable
1341	in address calculations than an IV that's narrower than
1342	Pmode.
1343
1344	- Doing the comparison in IV_PRECISION or wider allows
1345	a natural 0-based IV, whereas using a narrower comparison
1346	type requires mitigations against wrap-around.
1347
1348	Conversely, if the IV limit is variable, doing the comparison
1349	in a wider type than the original type can introduce
1350	unnecessary extensions, so picking the widest valid mode
1351	is not always a good choice either.
1352
1353	Here we prefer the first IV type that's Pmode or wider,
1354	and the first comparison type that's IV_PRECISION or wider.
1355	(The comparison type must be no wider than the IV type,
1356	to avoid extensions in the vector loop.)
1357
1358	??? We might want to try continuing beyond Pmode for ILP32
1359	targets if CMP_BITS < IV_PRECISION. /*
1360	iv_type = this_type;
1361	if (!cmp_type \|\| iv_precision > TYPE_PRECISION (cmp_type))
1362	cmp_type = this_type;
1363	if (cmp_bits >= GET_MODE_BITSIZE (Pmode))
1364	break;
1365	}
1366	}
1367	}
1368
1369	if (!cmp_type)
1370	{
1371	LOOP_VINFO_MASKS (loop_vinfo).rgc_vec.release ();
1372	return false;
1373	}
1374
1375	LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type;
1376	LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
1377	LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) = vect_partial_vectors_while_ult;
1378	return true;
1379	}
1380
1381	/ Each statement in LOOP_VINFO can be masked where necessary. Check*
1382	whether we can actually generate AVX512 style masks. Return true if so,
1383	storing the type of the scalar IV in LOOP_VINFO_RGROUP_IV_TYPE. /*
1384
1385	static bool
1386	vect_verify_full_masking_avx512 (loop_vec_info loop_vinfo)
1387	{
1388	/ Produce differently organized rgc_vec and differently check*
1389	we can produce masks. /*
1390
1391	/ Use a normal loop if there are no statements that need masking.*
1392	This only happens in rare degenerate cases: it means that the loop
1393	has no loads, no stores, and no live-out values. /*
1394	if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
1395	return false;
1396
1397	/ For the decrementing IV we need to represent all values in*
1398	[0, niter + niter_skip] where niter_skip is the elements we
1399	skip in the first iteration for prologue peeling. /*
1400	tree iv_type = NULL_TREE;
1401	widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
1402	unsigned int iv_precision = UINT_MAX;
1403	if (iv_limit != -`1`)
1404	iv_precision = wi::min_precision (x: iv_limit, sgn: UNSIGNED);
1405
1406	/ First compute the type for the IV we use to track the remaining*
1407	scalar iterations. /*
1408	opt_scalar_int_mode cmp_mode_iter;
1409	FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
1410	{
1411	unsigned int cmp_bits = GET_MODE_BITSIZE (mode: cmp_mode_iter.require ());
1412	if (cmp_bits >= iv_precision
1413	&& targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1414	{
1415	iv_type = build_nonstandard_integer_type (cmp_bits, true);
1416	if (iv_type)
1417	break;
1418	}
1419	}
1420	if (!iv_type)
1421	return false;
1422
1423	/ Produce the rgroup controls. /
1424	for (auto const &mask : LOOP_VINFO_MASKS (loop_vinfo).mask_set)
1425	{
1426	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1427	tree vectype = mask.first;
1428	unsigned nvectors = mask.second;
1429
1430	/ The number of scalars per iteration and the number of vectors are*
1431	both compile-time constants. /*
1432	unsigned int nscalars_per_iter
1433	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
1434	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
1435
1436	/ We index the rgroup_controls vector with nscalars_per_iter*
1437	which we keep constant and instead have a varying nvectors,
1438	remembering the vector mask with the fewest nV. /*
1439	if (masks->rgc_vec.length () < nscalars_per_iter)
1440	masks->rgc_vec.safe_grow_cleared (len: nscalars_per_iter, exact: true);
1441	rgroup_controls rgm = &(masks).rgc_vec [nscalars_per_iter - `1`];
1442
1443	if (!rgm->type \|\| rgm->factor > nvectors)
1444	{
1445	rgm->type = truth_type_for (vectype);
1446	rgm->compare_type = NULL_TREE;
1447	rgm->max_nscalars_per_iter = nscalars_per_iter;
1448	rgm->factor = nvectors;
1449	rgm->bias_adjusted_ctrl = NULL_TREE;
1450	}
1451	}
1452
1453	/ There is no fixed compare type we are going to use but we have to*
1454	be able to get at one for each mask group. /*
1455	unsigned int min_ni_width
1456	= wi::min_precision (x: vect_max_vf (loop_vinfo), sgn: UNSIGNED);
1457
1458	bool ok = true;
1459	for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
1460	{
1461	tree mask_type = rgc.type;
1462	if (!mask_type)
1463	continue;
1464
1465	/ For now vect_get_loop_mask only supports integer mode masks*
1466	when we need to split it. /*
1467	if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_INT
1468	\|\| TYPE_PRECISION (TREE_TYPE (mask_type)) != `1`)
1469	{
1470	ok = false;
1471	break;
1472	}
1473
1474	/ If iv_type is usable as compare type use that - we can elide the*
1475	saturation in that case. /*
1476	if (TYPE_PRECISION (iv_type) >= min_ni_width)
1477	{
1478	tree cmp_vectype
1479	= build_vector_type (iv_type, TYPE_VECTOR_SUBPARTS (node: mask_type));
1480	if (expand_vec_cmp_expr_p (cmp_vectype, mask_type, LT_EXPR))
1481	rgc.compare_type = cmp_vectype;
1482	}
1483	if (!rgc.compare_type)
1484	FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
1485	{
1486	unsigned int cmp_bits = GET_MODE_BITSIZE (mode: cmp_mode_iter.require ());
1487	if (cmp_bits >= min_ni_width
1488	&& targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1489	{
1490	tree cmp_type = build_nonstandard_integer_type (cmp_bits, true);
1491	if (!cmp_type)
1492	continue;
1493
1494	/ Check whether we can produce the mask with cmp_type. /
1495	tree cmp_vectype
1496	= build_vector_type (cmp_type, TYPE_VECTOR_SUBPARTS (node: mask_type));
1497	if (expand_vec_cmp_expr_p (cmp_vectype, mask_type, LT_EXPR))
1498	{
1499	rgc.compare_type = cmp_vectype;
1500	break;
1501	}
1502	}
1503	}
1504	if (!rgc.compare_type)
1505	{
1506	ok = false;
1507	break;
1508	}
1509	}
1510	if (!ok)
1511	{
1512	release_vec_loop_controls (controls: &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec);
1513	return false;
1514	}
1515
1516	LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = error_mark_node;
1517	LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
1518	LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) = vect_partial_vectors_avx512;
1519	return true;
1520	}
1521
1522	/ Check whether we can use vector access with length based on precison*
1523	comparison. So far, to keep it simple, we only allow the case that the
1524	precision of the target supported length is larger than the precision
1525	required by loop niters. /*
1526
1527	static bool
1528	vect_verify_loop_lens (loop_vec_info loop_vinfo)
1529	{
1530	if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
1531	return false;
1532
1533	machine_mode len_load_mode, len_store_mode;
1534	if (!get_len_load_store_mode (loop_vinfo->vector_mode, true)
1535	.exists (mode: &len_load_mode))
1536	return false;
1537	if (!get_len_load_store_mode (loop_vinfo->vector_mode, false)
1538	.exists (mode: &len_store_mode))
1539	return false;
1540
1541	signed char partial_load_bias = internal_len_load_store_bias
1542	(ifn: IFN_LEN_LOAD, len_load_mode);
1543
1544	signed char partial_store_bias = internal_len_load_store_bias
1545	(ifn: IFN_LEN_STORE, len_store_mode);
1546
1547	gcc_assert (partial_load_bias == partial_store_bias);
1548
1549	if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED)
1550	return false;
1551
1552	/ If the backend requires a bias of -1 for LEN_LOAD, we must not emit*
1553	len_loads with a length of zero. In order to avoid that we prohibit
1554	more than one loop length here. /*
1555	if (partial_load_bias == -`1`
1556	&& LOOP_VINFO_LENS (loop_vinfo).length () > `1`)
1557	return false;
1558
1559	LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) = partial_load_bias;
1560
1561	unsigned int max_nitems_per_iter = `1`;
1562	unsigned int i;
1563	rgroup_controls *rgl;
1564	/ Find the maximum number of items per iteration for every rgroup. /
1565	FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)
1566	{
1567	unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor;
1568	max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter);
1569	}
1570
1571	/ Work out how many bits we need to represent the length limit. /
1572	unsigned int min_ni_prec
1573	= vect_min_prec_for_max_niters (loop_vinfo, factor: max_nitems_per_iter);
1574
1575	/ Now use the maximum of below precisions for one suitable IV type:*
1576	- the IV's natural precision
1577	- the precision needed to hold: the maximum number of scalar
1578	iterations multiplied by the scale factor (min_ni_prec above)
1579	- the Pmode precision
1580
1581	If min_ni_prec is less than the precision of the current niters,
1582	we perfer to still use the niters type. Prefer to use Pmode and
1583	wider IV to avoid narrow conversions. /*
1584
1585	unsigned int ni_prec
1586	= TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)));
1587	min_ni_prec = MAX (min_ni_prec, ni_prec);
1588	min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode));
1589
1590	tree iv_type = NULL_TREE;
1591	opt_scalar_int_mode tmode_iter;
1592	FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)
1593	{
1594	scalar_mode tmode = tmode_iter.require ();
1595	unsigned int tbits = GET_MODE_BITSIZE (mode: tmode);
1596
1597	/ ??? Do we really want to construct one IV whose precision exceeds*
1598	BITS_PER_WORD? /*
1599	if (tbits > BITS_PER_WORD)
1600	break;
1601
1602	/ Find the first available standard integral type. /
1603	if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode))
1604	{
1605	iv_type = build_nonstandard_integer_type (tbits, true);
1606	break;
1607	}
1608	}
1609
1610	if (!iv_type)
1611	{
1612	if (dump_enabled_p ())
1613	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1614	"can't vectorize with length-based partial vectors"
1615	" because there is no suitable iv type.\n");
1616	return false;
1617	}
1618
1619	LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = iv_type;
1620	LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
1621	LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) = vect_partial_vectors_len;
1622
1623	return true;
1624	}
1625
1626	/ Calculate the cost of one scalar iteration of the loop. /
1627	static void
1628	vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1629	{
1630	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1631	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1632	int nbbs = loop->num_nodes, factor;
1633	int innerloop_iters, i;
1634
1635	DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost");
1636
1637	/ Gather costs for statements in the scalar loop. /
1638
1639	/ FORNOW. /
1640	innerloop_iters = `1`;
1641	if (loop->inner)
1642	innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
1643
1644	for (i = `0`; i < nbbs; i++)
1645	{
1646	gimple_stmt_iterator si;
1647	basic_block bb = bbs[i];
1648
1649	if (bb->loop_father == loop->inner)
1650	factor = innerloop_iters;
1651	else
1652	factor = `1`;
1653
1654	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
1655	{
1656	gimple *stmt = gsi_stmt (i: si);
1657	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
1658
1659	if (!is_gimple_assign (gs: stmt) && !is_gimple_call (gs: stmt))
1660	continue;
1661
1662	/ Skip stmts that are not vectorized inside the loop. /
1663	stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info);
1664	if (!STMT_VINFO_RELEVANT_P (vstmt_info)
1665	&& (!STMT_VINFO_LIVE_P (vstmt_info)
1666	\|\| !VECTORIZABLE_CYCLE_DEF
1667	(STMT_VINFO_DEF_TYPE (vstmt_info))))
1668	continue;
1669
1670	vect_cost_for_stmt kind;
1671	if (STMT_VINFO_DATA_REF (stmt_info))
1672	{
1673	if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1674	kind = scalar_load;
1675	else
1676	kind = scalar_store;
1677	}
1678	else if (vect_nop_conversion_p (stmt_info))
1679	continue;
1680	else
1681	kind = scalar_stmt;
1682
1683	/ We are using vect_prologue here to avoid scaling twice*
1684	by the inner loop factor. /*
1685	record_stmt_cost (body_cost_vec: &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1686	count: factor, kind, stmt_info, misalign: `0`, where: vect_prologue);
1687	}
1688	}
1689
1690	/ Now accumulate cost. /
1691	loop_vinfo->scalar_costs = init_cost (vinfo: loop_vinfo, costing_for_scalar: true);
1692	add_stmt_costs (costs: loop_vinfo->scalar_costs,
1693	cost_vec: &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo));
1694	loop_vinfo->scalar_costs->finish_cost (scalar_costs: nullptr);
1695	}
1696
1697
1698	/ Function vect_analyze_loop_form.*
1699
1700	Verify that certain CFG restrictions hold, including:
1701	- the loop has a pre-header
1702	- the loop has a single entry and exit
1703	- the loop exit condition is simple enough
1704	- the number of iterations can be analyzed, i.e, a countable loop. The
1705	niter could be analyzed under some assumptions. /*
1706
1707	opt_result
1708	vect_analyze_loop_form (class loop loop, vect_loop_form_info info)
1709	{
1710	DUMP_VECT_SCOPE ("vect_analyze_loop_form");
1711
1712	edge exit_e = vec_init_loop_exit_info (loop);
1713	if (!exit_e)
1714	return opt_result::failure_at (loc: vect_location,
1715	fmt: "not vectorized:"
1716	" could not determine main exit from"
1717	" loop with multiple exits.\n");
1718	info->loop_exit = exit_e;
1719	if (dump_enabled_p ())
1720	dump_printf_loc (MSG_NOTE, vect_location,
1721	"using as main loop exit: %d -> %d [AUX: %p]\n",
1722	exit_e->src->index, exit_e->dest->index, exit_e->aux);
1723
1724	/ Different restrictions apply when we are considering an inner-most loop,*
1725	vs. an outer (nested) loop.
1726	(FORNOW. May want to relax some of these restrictions in the future). /*
1727
1728	info->inner_loop_cond = NULL;
1729	if (!loop->inner)
1730	{
1731	/ Inner-most loop. We currently require that the number of BBs is*
1732	exactly 2 (the header and latch). Vectorizable inner-most loops
1733	look like this:
1734
1735	(pre-header)
1736	\|
1737	header <--------+
1738	\| \| \|
1739	\| +--> latch --+
1740	\|
1741	(exit-bb) /*
1742
1743	if (loop->num_nodes != `2`)
1744	return opt_result::failure_at (loc: vect_location,
1745	fmt: "not vectorized:"
1746	" control flow in loop.\n");
1747
1748	if (empty_block_p (loop->header))
1749	return opt_result::failure_at (loc: vect_location,
1750	fmt: "not vectorized: empty loop.\n");
1751	}
1752	else
1753	{
1754	class loop *innerloop = loop->inner;
1755	edge entryedge;
1756
1757	/ Nested loop. We currently require that the loop is doubly-nested,*
1758	contains a single inner loop, and the number of BBs is exactly 5.
1759	Vectorizable outer-loops look like this:
1760
1761	(pre-header)
1762	\|
1763	header <---+
1764	\| \|
1765	inner-loop \|
1766	\| \|
1767	tail ------+
1768	\|
1769	(exit-bb)
1770
1771	The inner-loop has the properties expected of inner-most loops
1772	as described above. /*
1773
1774	if ((loop->inner)->inner \|\| (loop->inner)->next)
1775	return opt_result::failure_at (loc: vect_location,
1776	fmt: "not vectorized:"
1777	" multiple nested loops.\n");
1778
1779	if (loop->num_nodes != `5`)
1780	return opt_result::failure_at (loc: vect_location,
1781	fmt: "not vectorized:"
1782	" control flow in loop.\n");
1783
1784	entryedge = loop_preheader_edge (innerloop);
1785	if (entryedge->src != loop->header
1786	\|\| !single_exit (innerloop)
1787	\|\| single_exit (innerloop)->dest != EDGE_PRED (loop->latch, `0`)->src)
1788	return opt_result::failure_at (loc: vect_location,
1789	fmt: "not vectorized:"
1790	" unsupported outerloop form.\n");
1791
1792	/ Analyze the inner-loop. /
1793	vect_loop_form_info inner;
1794	opt_result res = vect_analyze_loop_form (loop: loop->inner, info: &inner);
1795	if (!res)
1796	{
1797	if (dump_enabled_p ())
1798	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1799	"not vectorized: Bad inner loop.\n");
1800	return res;
1801	}
1802
1803	/ Don't support analyzing niter under assumptions for inner*
1804	loop. /*
1805	if (!integer_onep (inner.assumptions))
1806	return opt_result::failure_at (loc: vect_location,
1807	fmt: "not vectorized: Bad inner loop.\n");
1808
1809	if (!expr_invariant_in_loop_p (loop, inner.number_of_iterations))
1810	return opt_result::failure_at (loc: vect_location,
1811	fmt: "not vectorized: inner-loop count not"
1812	" invariant.\n");
1813
1814	if (dump_enabled_p ())
1815	dump_printf_loc (MSG_NOTE, vect_location,
1816	"Considering outer-loop vectorization.\n");
1817	info->inner_loop_cond = inner.conds [`0`];
1818	}
1819
1820	if (!single_exit (loop))
1821	return opt_result::failure_at (loc: vect_location,
1822	fmt: "not vectorized: multiple exits.\n");
1823	if (EDGE_COUNT (loop->header->preds) != `2`)
1824	return opt_result::failure_at (loc: vect_location,
1825	fmt: "not vectorized:"
1826	" too many incoming edges.\n");
1827
1828	/ We assume that the loop exit condition is at the end of the loop. i.e,*
1829	that the loop is represented as a do-while (with a proper if-guard
1830	before the loop if needed), where the loop header contains all the
1831	executable statements, and the latch is empty. /*
1832	if (!empty_block_p (loop->latch)
1833	\|\| !gimple_seq_empty_p (s: phi_nodes (bb: loop->latch)))
1834	return opt_result::failure_at (loc: vect_location,
1835	fmt: "not vectorized: latch block not empty.\n");
1836
1837	/ Make sure the exit is not abnormal. /
1838	if (exit_e->flags & EDGE_ABNORMAL)
1839	return opt_result::failure_at (loc: vect_location,
1840	fmt: "not vectorized:"
1841	" abnormal loop exit edge.\n");
1842
1843	info->conds
1844	= vect_get_loop_niters (loop, main_exit: exit_e, assumptions: &info->assumptions,
1845	number_of_iterations: &info->number_of_iterations,
1846	number_of_iterationsm1: &info->number_of_iterationsm1);
1847
1848	if (info->conds.is_empty ())
1849	return opt_result::failure_at
1850	(loc: vect_location,
1851	fmt: "not vectorized: complicated exit condition.\n");
1852
1853	/ Determine what the primary and alternate exit conds are. /
1854	for (unsigned i = `0`; i < info->conds.length (); i++)
1855	{
1856	gcond *cond = info->conds [i];
1857	if (exit_e->src == gimple_bb (g: cond))
1858	std::swap (a&: info->conds [`0`], b&: info->conds [i]);
1859	}
1860
1861	if (integer_zerop (info->assumptions)
1862	\|\| !info->number_of_iterations
1863	\|\| chrec_contains_undetermined (info->number_of_iterations))
1864	return opt_result::failure_at
1865	(loc: info->conds [`0`],
1866	fmt: "not vectorized: number of iterations cannot be computed.\n");
1867
1868	if (integer_zerop (info->number_of_iterations))
1869	return opt_result::failure_at
1870	(loc: info->conds [`0`],
1871	fmt: "not vectorized: number of iterations = 0.\n");
1872
1873	if (!(tree_fits_shwi_p (info->number_of_iterations)
1874	&& tree_to_shwi (info->number_of_iterations) > `0`))
1875	{
1876	if (dump_enabled_p ())
1877	{
1878	dump_printf_loc (MSG_NOTE, vect_location,
1879	"Symbolic number of iterations is ");
1880	dump_generic_expr (MSG_NOTE, TDF_DETAILS, info->number_of_iterations);
1881	dump_printf (MSG_NOTE, "\n");
1882	}
1883	}
1884
1885	return opt_result::success ();
1886	}
1887
1888	/ Create a loop_vec_info for LOOP with SHARED and the*
1889	vect_analyze_loop_form result. /*
1890
1891	loop_vec_info
1892	vect_create_loop_vinfo (class loop loop, vec_info_shared shared,
1893	const vect_loop_form_info *info,
1894	loop_vec_info main_loop_info)
1895	{
1896	loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared);
1897	LOOP_VINFO_NITERSM1 (loop_vinfo) = info->number_of_iterationsm1;
1898	LOOP_VINFO_NITERS (loop_vinfo) = info->number_of_iterations;
1899	LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = info->number_of_iterations;
1900	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_info;
1901	/ Also record the assumptions for versioning. /
1902	if (!integer_onep (info->assumptions) && !main_loop_info)
1903	LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = info->assumptions;
1904
1905	for (gcond *cond : info->conds)
1906	{
1907	stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (cond);
1908	STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type;
1909	}
1910
1911	for (unsigned i = `1`; i < info->conds.length (); i ++)
1912	LOOP_VINFO_LOOP_CONDS (loop_vinfo).safe_push (obj: info->conds [i]);
1913	LOOP_VINFO_LOOP_IV_COND (loop_vinfo) = info->conds [`0`];
1914
1915	LOOP_VINFO_IV_EXIT (loop_vinfo) = info->loop_exit;
1916
1917	if (info->inner_loop_cond)
1918	{
1919	stmt_vec_info inner_loop_cond_info
1920	= loop_vinfo->lookup_stmt (info->inner_loop_cond);
1921	STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type;
1922	/ If we have an estimate on the number of iterations of the inner*
1923	loop use that to limit the scale for costing, otherwise use
1924	--param vect-inner-loop-cost-factor literally. /*
1925	widest_int nit;
1926	if (estimated_stmt_executions (loop->inner, &nit))
1927	LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)
1928	= wi::smin (x: nit, param_vect_inner_loop_cost_factor).to_uhwi ();
1929	}
1930
1931	return loop_vinfo;
1932	}
1933
1934
1935
1936	/ Scan the loop stmts and dependent on whether there are any (non-)SLP*
1937	statements update the vectorization factor. /*
1938
1939	static void
1940	vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1941	{
1942	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1943	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1944	int nbbs = loop->num_nodes;
1945	poly_uint64 vectorization_factor;
1946	int i;
1947
1948	DUMP_VECT_SCOPE ("vect_update_vf_for_slp");
1949
1950	vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1951	gcc_assert (known_ne (vectorization_factor, `0U`));
1952
1953	/ If all the stmts in the loop can be SLPed, we perform only SLP, and*
1954	vectorization factor of the loop is the unrolling factor required by
1955	the SLP instances. If that unrolling factor is 1, we say, that we
1956	perform pure SLP on loop - cross iteration parallelism is not
1957	exploited. /*
1958	bool only_slp_in_loop = true;
1959	for (i = `0`; i < nbbs; i++)
1960	{
1961	basic_block bb = bbs[i];
1962	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
1963	gsi_next (i: &si))
1964	{
1965	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
1966	if (!stmt_info)
1967	continue;
1968	if ((STMT_VINFO_RELEVANT_P (stmt_info)
1969	\|\| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1970	&& !PURE_SLP_STMT (stmt_info))
1971	/ STMT needs both SLP and loop-based vectorization. /
1972	only_slp_in_loop = false;
1973	}
1974	for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (i: si);
1975	gsi_next (i: &si))
1976	{
1977	if (is_gimple_debug (gs: gsi_stmt (i: si)))
1978	continue;
1979	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
1980	stmt_info = vect_stmt_to_vectorize (stmt_info);
1981	if ((STMT_VINFO_RELEVANT_P (stmt_info)
1982	\|\| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1983	&& !PURE_SLP_STMT (stmt_info))
1984	/ STMT needs both SLP and loop-based vectorization. /
1985	only_slp_in_loop = false;
1986	}
1987	}
1988
1989	if (only_slp_in_loop)
1990	{
1991	if (dump_enabled_p ())
1992	dump_printf_loc (MSG_NOTE, vect_location,
1993	"Loop contains only SLP stmts\n");
1994	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1995	}
1996	else
1997	{
1998	if (dump_enabled_p ())
1999	dump_printf_loc (MSG_NOTE, vect_location,
2000	"Loop contains SLP and non-SLP stmts\n");
2001	/ Both the vectorization factor and unroll factor have the form*
2002	GET_MODE_SIZE (loop_vinfo->vector_mode) X for some rational X,*
2003	so they must have a common multiple. /*
2004	vectorization_factor
2005	= force_common_multiple (a: vectorization_factor,
2006	LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
2007	}
2008
2009	LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
2010	if (dump_enabled_p ())
2011	{
2012	dump_printf_loc (MSG_NOTE, vect_location,
2013	"Updating vectorization factor to ");
2014	dump_dec (MSG_NOTE, vectorization_factor);
2015	dump_printf (MSG_NOTE, ".\n");
2016	}
2017	}
2018
2019	/ Return true if STMT_INFO describes a double reduction phi and if*
2020	the other phi in the reduction is also relevant for vectorization.
2021	This rejects cases such as:
2022
2023	outer1:
2024	x_1 = PHI <x_3(outer2), ...>;
2025	...
2026
2027	inner:
2028	x_2 = ...;
2029	...
2030
2031	outer2:
2032	x_3 = PHI <x_2(inner)>;
2033
2034	if nothing in x_2 or elsewhere makes x_1 relevant. /*
2035
2036	static bool
2037	vect_active_double_reduction_p (stmt_vec_info stmt_info)
2038	{
2039	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
2040	return false;
2041
2042	return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info));
2043	}
2044
2045	/ Function vect_analyze_loop_operations.*
2046
2047	Scan the loop stmts and make sure they are all vectorizable. /*
2048
2049	static opt_result
2050	vect_analyze_loop_operations (loop_vec_info loop_vinfo)
2051	{
2052	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2053	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2054	int nbbs = loop->num_nodes;
2055	int i;
2056	stmt_vec_info stmt_info;
2057	bool need_to_vectorize = false;
2058	bool ok;
2059
2060	DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
2061
2062	auto_vec<stmt_info_for_cost> cost_vec;
2063
2064	for (i = `0`; i < nbbs; i++)
2065	{
2066	basic_block bb = bbs[i];
2067
2068	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
2069	gsi_next (i: &si))
2070	{
2071	gphi *phi = si.phi ();
2072	ok = true;
2073
2074	stmt_info = loop_vinfo->lookup_stmt (phi);
2075	if (dump_enabled_p ())
2076	dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G",
2077	(gimple *) phi);
2078	if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
2079	continue;
2080
2081	/ Inner-loop loop-closed exit phi in outer-loop vectorization*
2082	(i.e., a phi in the tail of the outer-loop). /*
2083	if (! is_loop_header_bb_p (bb))
2084	{
2085	/ FORNOW: we currently don't support the case that these phis*
2086	are not used in the outerloop (unless it is double reduction,
2087	i.e., this phi is vect_reduction_def), cause this case
2088	requires to actually do something here. /*
2089	if (STMT_VINFO_LIVE_P (stmt_info)
2090	&& !vect_active_double_reduction_p (stmt_info))
2091	return opt_result::failure_at (loc: phi,
2092	fmt: "Unsupported loop-closed phi"
2093	" in outer-loop.\n");
2094
2095	/ If PHI is used in the outer loop, we check that its operand*
2096	is defined in the inner loop. /*
2097	if (STMT_VINFO_RELEVANT_P (stmt_info))
2098	{
2099	tree phi_op;
2100
2101	if (gimple_phi_num_args (gs: phi) != `1`)
2102	return opt_result::failure_at (loc: phi, fmt: "unsupported phi");
2103
2104	phi_op = PHI_ARG_DEF (phi, `0`);
2105	stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
2106	if (!op_def_info)
2107	return opt_result::failure_at (loc: phi, fmt: "unsupported phi\n");
2108
2109	if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
2110	&& (STMT_VINFO_RELEVANT (op_def_info)
2111	!= vect_used_in_outer_by_reduction))
2112	return opt_result::failure_at (loc: phi, fmt: "unsupported phi\n");
2113
2114	if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
2115	\|\| (STMT_VINFO_DEF_TYPE (stmt_info)
2116	== vect_double_reduction_def))
2117	&& !vectorizable_lc_phi (loop_vinfo,
2118	stmt_info, NULL, NULL))
2119	return opt_result::failure_at (loc: phi, fmt: "unsupported phi\n");
2120	}
2121
2122	continue;
2123	}
2124
2125	gcc_assert (stmt_info);
2126
2127	if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
2128	\|\| STMT_VINFO_LIVE_P (stmt_info))
2129	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def
2130	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
2131	/ A scalar-dependence cycle that we don't support. /
2132	return opt_result::failure_at (loc: phi,
2133	fmt: "not vectorized:"
2134	" scalar dependence cycle.\n");
2135
2136	if (STMT_VINFO_RELEVANT_P (stmt_info))
2137	{
2138	need_to_vectorize = true;
2139	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
2140	&& ! PURE_SLP_STMT (stmt_info))
2141	ok = vectorizable_induction (loop_vinfo,
2142	stmt_info, NULL, NULL,
2143	&cost_vec);
2144	else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
2145	\|\| (STMT_VINFO_DEF_TYPE (stmt_info)
2146	== vect_double_reduction_def)
2147	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
2148	&& ! PURE_SLP_STMT (stmt_info))
2149	ok = vectorizable_reduction (loop_vinfo,
2150	stmt_info, NULL, NULL, &cost_vec);
2151	else if ((STMT_VINFO_DEF_TYPE (stmt_info)
2152	== vect_first_order_recurrence)
2153	&& ! PURE_SLP_STMT (stmt_info))
2154	ok = vectorizable_recurr (loop_vinfo, stmt_info, NULL, NULL,
2155	&cost_vec);
2156	}
2157
2158	/ SLP PHIs are tested by vect_slp_analyze_node_operations. /
2159	if (ok
2160	&& STMT_VINFO_LIVE_P (stmt_info)
2161	&& !PURE_SLP_STMT (stmt_info))
2162	ok = vectorizable_live_operation (loop_vinfo, stmt_info, NULL, NULL,
2163	-`1`, false, &cost_vec);
2164
2165	if (!ok)
2166	return opt_result::failure_at (loc: phi,
2167	fmt: "not vectorized: relevant phi not "
2168	"supported: %G",
2169	static_cast <gimple *> (phi));
2170	}
2171
2172	for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (i: si);
2173	gsi_next (i: &si))
2174	{
2175	gimple *stmt = gsi_stmt (i: si);
2176	if (!gimple_clobber_p (s: stmt)
2177	&& !is_gimple_debug (gs: stmt))
2178	{
2179	opt_result res
2180	= vect_analyze_stmt (loop_vinfo,
2181	loop_vinfo->lookup_stmt (stmt),
2182	&need_to_vectorize,
2183	NULL, NULL, &cost_vec);
2184	if (!res)
2185	return res;
2186	}
2187	}
2188	} / bbs /
2189
2190	add_stmt_costs (costs: loop_vinfo->vector_costs, cost_vec: &cost_vec);
2191
2192	/ All operations in the loop are either irrelevant (deal with loop*
2193	control, or dead), or only used outside the loop and can be moved
2194	out of the loop (e.g. invariants, inductions). The loop can be
2195	optimized away by scalar optimizations. We're better off not
2196	touching this loop. /*
2197	if (!need_to_vectorize)
2198	{
2199	if (dump_enabled_p ())
2200	dump_printf_loc (MSG_NOTE, vect_location,
2201	"All the computation can be taken out of the loop.\n");
2202	return opt_result::failure_at
2203	(loc: vect_location,
2204	fmt: "not vectorized: redundant loop. no profit to vectorize.\n");
2205	}
2206
2207	return opt_result::success ();
2208	}
2209
2210	/ Return true if we know that the iteration count is smaller than the*
2211	vectorization factor. Return false if it isn't, or if we can't be sure
2212	either way. /*
2213
2214	static bool
2215	vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
2216	{
2217	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2218
2219	HOST_WIDE_INT max_niter;
2220	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2221	max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
2222	else
2223	max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2224
2225	if (max_niter != -`1` && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
2226	return true;
2227
2228	return false;
2229	}
2230
2231	/ Analyze the cost of the loop described by LOOP_VINFO. Decide if it*
2232	is worthwhile to vectorize. Return 1 if definitely yes, 0 if
2233	definitely no, or -1 if it's worth retrying. /*
2234
2235	static int
2236	vect_analyze_loop_costing (loop_vec_info loop_vinfo,
2237	unsigned *suggested_unroll_factor)
2238	{
2239	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2240	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2241
2242	/ Only loops that can handle partially-populated vectors can have iteration*
2243	counts less than the vectorization factor. /*
2244	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2245	&& vect_known_niters_smaller_than_vf (loop_vinfo))
2246	{
2247	if (dump_enabled_p ())
2248	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2249	"not vectorized: iteration count smaller than "
2250	"vectorization factor.\n");
2251	return `0`;
2252	}
2253
2254	/ If we know the number of iterations we can do better, for the*
2255	epilogue we can also decide whether the main loop leaves us
2256	with enough iterations, prefering a smaller vector epilog then
2257	also possibly used for the case we skip the vector loop. /*
2258	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2259	{
2260	widest_int scalar_niters
2261	= wi::to_widest (LOOP_VINFO_NITERSM1 (loop_vinfo)) + `1`;
2262	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2263	{
2264	loop_vec_info orig_loop_vinfo
2265	= LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
2266	unsigned lowest_vf
2267	= constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo));
2268	int prolog_peeling = `0`;
2269	if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2270	prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo);
2271	if (prolog_peeling >= `0`
2272	&& known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
2273	lowest_vf))
2274	{
2275	unsigned gap
2276	= LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? `1` : `0`;
2277	scalar_niters = ((scalar_niters - gap - prolog_peeling)
2278	% lowest_vf + gap);
2279	}
2280	}
2281	/ Reject vectorizing for a single scalar iteration, even if*
2282	we could in principle implement that using partial vectors. /*
2283	unsigned peeling_gap = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
2284	if (scalar_niters <= peeling_gap + `1`)
2285	{
2286	if (dump_enabled_p ())
2287	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2288	"not vectorized: loop only has a single "
2289	"scalar iteration.\n");
2290	return `0`;
2291	}
2292
2293	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2294	{
2295	/ Check that the loop processes at least one full vector. /
2296	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2297	if (known_lt (scalar_niters, vf))
2298	{
2299	if (dump_enabled_p ())
2300	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2301	"loop does not have enough iterations "
2302	"to support vectorization.\n");
2303	return `0`;
2304	}
2305
2306	/ If we need to peel an extra epilogue iteration to handle data*
2307	accesses with gaps, check that there are enough scalar iterations
2308	available.
2309
2310	The check above is redundant with this one when peeling for gaps,
2311	but the distinction is useful for diagnostics. /*
2312	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2313	&& known_le (scalar_niters, vf))
2314	{
2315	if (dump_enabled_p ())
2316	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317	"loop does not have enough iterations "
2318	"to support peeling for gaps.\n");
2319	return `0`;
2320	}
2321	}
2322	}
2323
2324	/ If using the "very cheap" model. reject cases in which we'd keep*
2325	a copy of the scalar code (even if we might be able to vectorize it). /*
2326	if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
2327	&& (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2328	\|\| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2329	\|\| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
2330	{
2331	if (dump_enabled_p ())
2332	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2333	"some scalar iterations would need to be peeled\n");
2334	return `0`;
2335	}
2336
2337	int min_profitable_iters, min_profitable_estimate;
2338	vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2339	&min_profitable_estimate,
2340	suggested_unroll_factor);
2341
2342	if (min_profitable_iters < `0`)
2343	{
2344	if (dump_enabled_p ())
2345	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2346	"not vectorized: vectorization not profitable.\n");
2347	if (dump_enabled_p ())
2348	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349	"not vectorized: vector version will never be "
2350	"profitable.\n");
2351	return -`1`;
2352	}
2353
2354	int min_scalar_loop_bound = (param_min_vect_loop_bound
2355	* assumed_vf);
2356
2357	/ Use the cost model only if it is more conservative than user specified*
2358	threshold. /*
2359	unsigned int th = (unsigned) MAX (min_scalar_loop_bound,
2360	min_profitable_iters);
2361
2362	LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2363
2364	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2365	&& LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
2366	{
2367	if (dump_enabled_p ())
2368	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2369	"not vectorized: vectorization not profitable.\n");
2370	if (dump_enabled_p ())
2371	dump_printf_loc (MSG_NOTE, vect_location,
2372	"not vectorized: iteration count smaller than user "
2373	"specified loop bound parameter or minimum profitable "
2374	"iterations (whichever is more conservative).\n");
2375	return `0`;
2376	}
2377
2378	/ The static profitablity threshold min_profitable_estimate includes*
2379	the cost of having to check at runtime whether the scalar loop
2380	should be used instead. If it turns out that we don't need or want
2381	such a check, the threshold we should use for the static estimate
2382	is simply the point at which the vector loop becomes more profitable
2383	than the scalar loop. /*
2384	if (min_profitable_estimate > min_profitable_iters
2385	&& !LOOP_REQUIRES_VERSIONING (loop_vinfo)
2386	&& !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
2387	&& !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2388	&& !vect_apply_runtime_profitability_check_p (loop_vinfo))
2389	{
2390	if (dump_enabled_p ())
2391	dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime"
2392	" choice between the scalar and vector loops\n");
2393	min_profitable_estimate = min_profitable_iters;
2394	}
2395
2396	/ If the vector loop needs multiple iterations to be beneficial then*
2397	things are probably too close to call, and the conservative thing
2398	would be to stick with the scalar code. /*
2399	if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
2400	&& min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
2401	{
2402	if (dump_enabled_p ())
2403	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404	"one iteration of the vector loop would be"
2405	" more expensive than the equivalent number of"
2406	" iterations of the scalar loop\n");
2407	return `0`;
2408	}
2409
2410	HOST_WIDE_INT estimated_niter;
2411
2412	/ If we are vectorizing an epilogue then we know the maximum number of*
2413	scalar iterations it will cover is at least one lower than the
2414	vectorization factor of the main loop. /*
2415	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2416	estimated_niter
2417	= vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) - `1`;
2418	else
2419	{
2420	estimated_niter = estimated_stmt_executions_int (loop);
2421	if (estimated_niter == -`1`)
2422	estimated_niter = likely_max_stmt_executions_int (loop);
2423	}
2424	if (estimated_niter != -`1`
2425	&& ((unsigned HOST_WIDE_INT) estimated_niter
2426	< MAX (th, (unsigned) min_profitable_estimate)))
2427	{
2428	if (dump_enabled_p ())
2429	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2430	"not vectorized: estimated iteration count too "
2431	"small.\n");
2432	if (dump_enabled_p ())
2433	dump_printf_loc (MSG_NOTE, vect_location,
2434	"not vectorized: estimated iteration count smaller "
2435	"than specified loop bound parameter or minimum "
2436	"profitable iterations (whichever is more "
2437	"conservative).\n");
2438	return -`1`;
2439	}
2440
2441	return `1`;
2442	}
2443
2444	static opt_result
2445	vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
2446	vec<data_reference_p> *datarefs,
2447	unsigned int *n_stmts)
2448	{
2449	*n_stmts = `0`;
2450	for (unsigned i = `0`; i < loop->num_nodes; i++)
2451	for (gimple_stmt_iterator gsi = gsi_start_bb (bb: bbs[i]);
2452	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
2453	{
2454	gimple *stmt = gsi_stmt (i: gsi);
2455	if (is_gimple_debug (gs: stmt))
2456	continue;
2457	++(*n_stmts);
2458	opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
2459	NULL, `0`);
2460	if (!res)
2461	{
2462	if (is_gimple_call (gs: stmt) && loop->safelen)
2463	{
2464	tree fndecl = gimple_call_fndecl (gs: stmt), op;
2465	if (fndecl == NULL_TREE
2466	&& gimple_call_internal_p (gs: stmt, fn: IFN_MASK_CALL))
2467	{
2468	fndecl = gimple_call_arg (gs: stmt, index: `0`);
2469	gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
2470	fndecl = TREE_OPERAND (fndecl, `0`);
2471	gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
2472	}
2473	if (fndecl != NULL_TREE)
2474	{
2475	cgraph_node *node = cgraph_node::get (decl: fndecl);
2476	if (node != NULL && node->simd_clones != NULL)
2477	{
2478	unsigned int j, n = gimple_call_num_args (gs: stmt);
2479	for (j = `0`; j < n; j++)
2480	{
2481	op = gimple_call_arg (gs: stmt, index: j);
2482	if (DECL_P (op)
2483	\|\| (REFERENCE_CLASS_P (op)
2484	&& get_base_address (t: op)))
2485	break;
2486	}
2487	op = gimple_call_lhs (gs: stmt);
2488	/ Ignore #pragma omp declare simd functions*
2489	if they don't have data references in the
2490	call stmt itself. /*
2491	if (j == n
2492	&& !(op
2493	&& (DECL_P (op)
2494	\|\| (REFERENCE_CLASS_P (op)
2495	&& get_base_address (t: op)))))
2496	continue;
2497	}
2498	}
2499	}
2500	return res;
2501	}
2502	/ If dependence analysis will give up due to the limit on the*
2503	number of datarefs stop here and fail fatally. /*
2504	if (datarefs->length ()
2505	> (unsigned)param_loop_max_datarefs_for_datadeps)
2506	return opt_result::failure_at (loc: stmt, fmt: "exceeded param "
2507	"loop-max-datarefs-for-datadeps\n");
2508	}
2509	return opt_result::success ();
2510	}
2511
2512	/ Look for SLP-only access groups and turn each individual access into its own*
2513	group. /*
2514	static void
2515	vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
2516	{
2517	unsigned int i;
2518	struct data_reference *dr;
2519
2520	DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
2521
2522	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2523	FOR_EACH_VEC_ELT (datarefs, i, dr)
2524	{
2525	gcc_assert (DR_REF (dr));
2526	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr));
2527
2528	/ Check if the load is a part of an interleaving chain. /
2529	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2530	{
2531	stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
2532	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element);
2533	unsigned int group_size = DR_GROUP_SIZE (first_element);
2534
2535	/ Check if SLP-only groups. /
2536	if (!STMT_SLP_TYPE (stmt_info)
2537	&& STMT_VINFO_SLP_VECT_ONLY (first_element))
2538	{
2539	/ Dissolve the group. /
2540	STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
2541
2542	stmt_vec_info vinfo = first_element;
2543	while (vinfo)
2544	{
2545	stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
2546	DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
2547	DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2548	DR_GROUP_SIZE (vinfo) = `1`;
2549	if (STMT_VINFO_STRIDED_P (first_element)
2550	/ We cannot handle stores with gaps. /
2551	\|\| DR_IS_WRITE (dr_info->dr))
2552	{
2553	STMT_VINFO_STRIDED_P (vinfo) = true;
2554	DR_GROUP_GAP (vinfo) = `0`;
2555	}
2556	else
2557	DR_GROUP_GAP (vinfo) = group_size - `1`;
2558	/ Duplicate and adjust alignment info, it needs to*
2559	be present on each group leader, see dr_misalignment. /*
2560	if (vinfo != first_element)
2561	{
2562	dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo);
2563	dr_info2->target_alignment = dr_info->target_alignment;
2564	int misalignment = dr_info->misalignment;
2565	if (misalignment != DR_MISALIGNMENT_UNKNOWN)
2566	{
2567	HOST_WIDE_INT diff
2568	= (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))
2569	- TREE_INT_CST_LOW (DR_INIT (dr_info->dr)));
2570	unsigned HOST_WIDE_INT align_c
2571	= dr_info->target_alignment.to_constant ();
2572	misalignment = (misalignment + diff) % align_c;
2573	}
2574	dr_info2->misalignment = misalignment;
2575	}
2576	vinfo = next;
2577	}
2578	}
2579	}
2580	}
2581	}
2582
2583	/ Determine if operating on full vectors for LOOP_VINFO might leave*
2584	some scalar iterations still to do. If so, decide how we should
2585	handle those scalar iterations. The possibilities are:
2586
2587	(1) Make LOOP_VINFO operate on partial vectors instead of full vectors.
2588	In this case:
2589
2590	LOOP_VINFO_USING_PARTIAL_VECTORS_P == true
2591	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2592	LOOP_VINFO_PEELING_FOR_NITER == false
2593
2594	(2) Make LOOP_VINFO operate on full vectors and use an epilogue loop
2595	to handle the remaining scalar iterations. In this case:
2596
2597	LOOP_VINFO_USING_PARTIAL_VECTORS_P == false
2598	LOOP_VINFO_PEELING_FOR_NITER == true
2599
2600	There are two choices:
2601
2602	(2a) Consider vectorizing the epilogue loop at the same VF as the
2603	main loop, but using partial vectors instead of full vectors.
2604	In this case:
2605
2606	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true
2607
2608	(2b) Consider vectorizing the epilogue loop at lower VFs only.
2609	In this case:
2610
2611	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2612	*/
2613
2614	opt_result
2615	vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo)
2616	{
2617	/ Determine whether there would be any scalar iterations left over. /
2618	bool need_peeling_or_partial_vectors_p
2619	= vect_need_peeling_or_partial_vectors_p (loop_vinfo);
2620
2621	/ Decide whether to vectorize the loop with partial vectors. /
2622	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
2623	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
2624	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
2625	&& need_peeling_or_partial_vectors_p)
2626	{
2627	/ For partial-vector-usage=1, try to push the handling of partial*
2628	vectors to the epilogue, with the main loop continuing to operate
2629	on full vectors.
2630
2631	If we are unrolling we also do not want to use partial vectors. This
2632	is to avoid the overhead of generating multiple masks and also to
2633	avoid having to execute entire iterations of FALSE masked instructions
2634	when dealing with one or less full iterations.
2635
2636	??? We could then end up failing to use partial vectors if we
2637	decide to peel iterations into a prologue, and if the main loop
2638	then ends up processing fewer than VF iterations. /*
2639	if ((param_vect_partial_vector_usage == `1`
2640	\|\| loop_vinfo->suggested_unroll_factor > `1`)
2641	&& !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
2642	&& !vect_known_niters_smaller_than_vf (loop_vinfo))
2643	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
2644	else
2645	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
2646	}
2647
2648	if (dump_enabled_p ())
2649	dump_printf_loc (MSG_NOTE, vect_location,
2650	"operating on %s vectors%s.\n",
2651	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2652	? "partial" : "full",
2653	LOOP_VINFO_EPILOGUE_P (loop_vinfo)
2654	? " for epilogue loop" : "");
2655
2656	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
2657	= (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2658	&& need_peeling_or_partial_vectors_p);
2659
2660	return opt_result::success ();
2661	}
2662
2663	/ Function vect_analyze_loop_2.*
2664
2665	Apply a set of analyses on LOOP specified by LOOP_VINFO, the different
2666	analyses will record information in some members of LOOP_VINFO. FATAL
2667	indicates if some analysis meets fatal error. If one non-NULL pointer
2668	SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one
2669	worked out suggested unroll factor, while one NULL pointer shows it's
2670	going to apply the suggested unroll factor. SLP_DONE_FOR_SUGGESTED_UF
2671	is to hold the slp decision when the suggested unroll factor is worked
2672	out. /*
2673	static opt_result
2674	vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
2675	unsigned *suggested_unroll_factor,
2676	bool& slp_done_for_suggested_uf)
2677	{
2678	opt_result ok = opt_result::success ();
2679	int res;
2680	unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
2681	poly_uint64 min_vf = `2`;
2682	loop_vec_info orig_loop_vinfo = NULL;
2683
2684	/ If we are dealing with an epilogue then orig_loop_vinfo points to the*
2685	loop_vec_info of the first vectorized loop. /*
2686	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2687	orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
2688	else
2689	orig_loop_vinfo = loop_vinfo;
2690	gcc_assert (orig_loop_vinfo);
2691
2692	/ The first group of checks is independent of the vector size. /
2693	fatal = true;
2694
2695	if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)
2696	&& integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)))
2697	return opt_result::failure_at (loc: vect_location,
2698	fmt: "not vectorized: simd if(0)\n");
2699
2700	/ Find all data references in the loop (which correspond to vdefs/vuses)*
2701	and analyze their evolution in the loop. /*
2702
2703	loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
2704
2705	/ Gather the data references and count stmts in the loop. /
2706	if (!LOOP_VINFO_DATAREFS (loop_vinfo).exists ())
2707	{
2708	opt_result res
2709	= vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
2710	datarefs: &LOOP_VINFO_DATAREFS (loop_vinfo),
2711	n_stmts: &LOOP_VINFO_N_STMTS (loop_vinfo));
2712	if (!res)
2713	{
2714	if (dump_enabled_p ())
2715	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2716	"not vectorized: loop contains function "
2717	"calls or data references that cannot "
2718	"be analyzed\n");
2719	return res;
2720	}
2721	loop_vinfo->shared->save_datarefs ();
2722	}
2723	else
2724	loop_vinfo->shared->check_datarefs ();
2725
2726	/ Analyze the data references and also adjust the minimal*
2727	vectorization factor according to the loads and stores. /*
2728
2729	ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
2730	if (!ok)
2731	{
2732	if (dump_enabled_p ())
2733	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2734	"bad data references.\n");
2735	return ok;
2736	}
2737
2738	/ Check if we are applying unroll factor now. /
2739	bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > `1`;
2740	gcc_assert (!applying_suggested_uf \|\| !suggested_unroll_factor);
2741
2742	/ If the slp decision is false when suggested unroll factor is worked*
2743	out, and we are applying suggested unroll factor, we can simply skip
2744	all slp related analyses this time. /*
2745	bool slp = !applying_suggested_uf \|\| slp_done_for_suggested_uf;
2746
2747	/ Classify all cross-iteration scalar data-flow cycles.*
2748	Cross-iteration cycles caused by virtual phis are analyzed separately. /*
2749	vect_analyze_scalar_cycles (loop_vinfo, slp);
2750
2751	vect_pattern_recog (loop_vinfo);
2752
2753	vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
2754
2755	/ Analyze the access patterns of the data-refs in the loop (consecutive,*
2756	complex, etc.). FORNOW: Only handle consecutive access pattern. /*
2757
2758	ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
2759	if (!ok)
2760	{
2761	if (dump_enabled_p ())
2762	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763	"bad data access.\n");
2764	return ok;
2765	}
2766
2767	/ Data-flow analysis to detect stmts that do not need to be vectorized. /
2768
2769	ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
2770	if (!ok)
2771	{
2772	if (dump_enabled_p ())
2773	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2774	"unexpected pattern.\n");
2775	return ok;
2776	}
2777
2778	/ While the rest of the analysis below depends on it in some way. /
2779	fatal = false;
2780
2781	/ Analyze data dependences between the data-refs in the loop*
2782	and adjust the maximum vectorization factor according to
2783	the dependences.
2784	FORNOW: fail at the first data dependence that we encounter. /*
2785
2786	ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
2787	if (!ok)
2788	{
2789	if (dump_enabled_p ())
2790	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2791	"bad data dependence.\n");
2792	return ok;
2793	}
2794	if (max_vf != MAX_VECTORIZATION_FACTOR
2795	&& maybe_lt (a: max_vf, b: min_vf))
2796	return opt_result::failure_at (loc: vect_location, fmt: "bad data dependence.\n");
2797	LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf;
2798
2799	ok = vect_determine_vectorization_factor (loop_vinfo);
2800	if (!ok)
2801	{
2802	if (dump_enabled_p ())
2803	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2804	"can't determine vectorization factor.\n");
2805	return ok;
2806	}
2807	if (max_vf != MAX_VECTORIZATION_FACTOR
2808	&& maybe_lt (a: max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2809	return opt_result::failure_at (loc: vect_location, fmt: "bad data dependence.\n");
2810
2811	/ Compute the scalar iteration cost. /
2812	vect_compute_single_scalar_iteration_cost (loop_vinfo);
2813
2814	poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2815
2816	if (slp)
2817	{
2818	/ Check the SLP opportunities in the loop, analyze and build*
2819	SLP trees. /*
2820	ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
2821	if (!ok)
2822	return ok;
2823
2824	/ If there are any SLP instances mark them as pure_slp. /
2825	slp = vect_make_slp_decision (loop_vinfo);
2826	if (slp)
2827	{
2828	/ Find stmts that need to be both vectorized and SLPed. /
2829	vect_detect_hybrid_slp (loop_vinfo);
2830
2831	/ Update the vectorization factor based on the SLP decision. /
2832	vect_update_vf_for_slp (loop_vinfo);
2833
2834	/ Optimize the SLP graph with the vectorization factor fixed. /
2835	vect_optimize_slp (loop_vinfo);
2836
2837	/ Gather the loads reachable from the SLP graph entries. /
2838	vect_gather_slp_loads (loop_vinfo);
2839	}
2840	}
2841
2842	bool saved_can_use_partial_vectors_p
2843	= LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
2844
2845	/ We don't expect to have to roll back to anything other than an empty*
2846	set of rgroups. /*
2847	gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
2848
2849	/ This is the point where we can re-start analysis with SLP forced off. /
2850	start_over:
2851
2852	/ Apply the suggested unrolling factor, this was determined by the backend*
2853	during finish_cost the first time we ran the analyzis for this
2854	vector mode. /*
2855	if (applying_suggested_uf)
2856	LOOP_VINFO_VECT_FACTOR (loop_vinfo) *= loop_vinfo->suggested_unroll_factor;
2857
2858	/ Now the vectorization factor is final. /
2859	poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2860	gcc_assert (known_ne (vectorization_factor, `0U`));
2861
2862	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
2863	{
2864	dump_printf_loc (MSG_NOTE, vect_location,
2865	"vectorization_factor = ");
2866	dump_dec (MSG_NOTE, vectorization_factor);
2867	dump_printf (MSG_NOTE, ", niters = %wd\n",
2868	LOOP_VINFO_INT_NITERS (loop_vinfo));
2869	}
2870
2871	loop_vinfo->vector_costs = init_cost (vinfo: loop_vinfo, costing_for_scalar: false);
2872
2873	/ Analyze the alignment of the data-refs in the loop.*
2874	Fail if a data reference is found that cannot be vectorized. /*
2875
2876	ok = vect_analyze_data_refs_alignment (loop_vinfo);
2877	if (!ok)
2878	{
2879	if (dump_enabled_p ())
2880	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2881	"bad data alignment.\n");
2882	return ok;
2883	}
2884
2885	/ Prune the list of ddrs to be tested at run-time by versioning for alias.*
2886	It is important to call pruning after vect_analyze_data_ref_accesses,
2887	since we use grouping information gathered by interleaving analysis. /*
2888	ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2889	if (!ok)
2890	return ok;
2891
2892	/ Do not invoke vect_enhance_data_refs_alignment for epilogue*
2893	vectorization, since we do not want to add extra peeling or
2894	add versioning for alignment. /*
2895	if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2896	/ This pass will decide on using loop versioning and/or loop peeling in*
2897	order to enhance the alignment of data references in the loop. /*
2898	ok = vect_enhance_data_refs_alignment (loop_vinfo);
2899	if (!ok)
2900	return ok;
2901
2902	if (slp)
2903	{
2904	/ Analyze operations in the SLP instances. Note this may*
2905	remove unsupported SLP instances which makes the above
2906	SLP kind detection invalid. /*
2907	unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2908	vect_slp_analyze_operations (loop_vinfo);
2909	if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2910	{
2911	ok = opt_result::failure_at (loc: vect_location,
2912	fmt: "unsupported SLP instances\n");
2913	goto again;
2914	}
2915
2916	/ Check whether any load in ALL SLP instances is possibly permuted. /
2917	slp_tree load_node, slp_root;
2918	unsigned i, x;
2919	slp_instance instance;
2920	bool can_use_lanes = true;
2921	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)
2922	{
2923	slp_root = SLP_INSTANCE_TREE (instance);
2924	int group_size = SLP_TREE_LANES (slp_root);
2925	tree vectype = SLP_TREE_VECTYPE (slp_root);
2926	bool loads_permuted = false;
2927	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
2928	{
2929	if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2930	continue;
2931	unsigned j;
2932	stmt_vec_info load_info;
2933	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2934	if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2935	{
2936	loads_permuted = true;
2937	break;
2938	}
2939	}
2940
2941	/ If the loads and stores can be handled with load/store-lane*
2942	instructions record it and move on to the next instance. /*
2943	if (loads_permuted
2944	&& SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
2945	&& vect_store_lanes_supported (vectype, group_size, false)
2946	!= IFN_LAST)
2947	{
2948	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
2949	if (STMT_VINFO_GROUPED_ACCESS
2950	(SLP_TREE_REPRESENTATIVE (load_node)))
2951	{
2952	stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2953	(SLP_TREE_REPRESENTATIVE (load_node));
2954	/ Use SLP for strided accesses (or if we can't*
2955	load-lanes). /*
2956	if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2957	\|\| vect_load_lanes_supported
2958	(STMT_VINFO_VECTYPE (stmt_vinfo),
2959	DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
2960	break;
2961	}
2962
2963	can_use_lanes
2964	= can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
2965
2966	if (can_use_lanes && dump_enabled_p ())
2967	dump_printf_loc (MSG_NOTE, vect_location,
2968	"SLP instance %p can use load/store-lanes\n",
2969	(void *) instance);
2970	}
2971	else
2972	{
2973	can_use_lanes = false;
2974	break;
2975	}
2976	}
2977
2978	/ If all SLP instances can use load/store-lanes abort SLP and try again*
2979	with SLP disabled. /*
2980	if (can_use_lanes)
2981	{
2982	ok = opt_result::failure_at (loc: vect_location,
2983	fmt: "Built SLP cancelled: can use "
2984	"load/store-lanes\n");
2985	if (dump_enabled_p ())
2986	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2987	"Built SLP cancelled: all SLP instances support "
2988	"load/store-lanes\n");
2989	goto again;
2990	}
2991	}
2992
2993	/ Dissolve SLP-only groups. /
2994	vect_dissolve_slp_only_groups (loop_vinfo);
2995
2996	/ Scan all the remaining operations in the loop that are not subject*
2997	to SLP and make sure they are vectorizable. /*
2998	ok = vect_analyze_loop_operations (loop_vinfo);
2999	if (!ok)
3000	{
3001	if (dump_enabled_p ())
3002	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3003	"bad operation or unsupported loop bound.\n");
3004	return ok;
3005	}
3006
3007	/ For now, we don't expect to mix both masking and length approaches for one*
3008	loop, disable it if both are recorded. /*
3009	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3010	&& !LOOP_VINFO_MASKS (loop_vinfo).is_empty ()
3011	&& !LOOP_VINFO_LENS (loop_vinfo).is_empty ())
3012	{
3013	if (dump_enabled_p ())
3014	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3015	"can't vectorize a loop with partial vectors"
3016	" because we don't expect to mix different"
3017	" approaches with partial vectors for the"
3018	" same loop.\n");
3019	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3020	}
3021
3022	/ If we still have the option of using partial vectors,*
3023	check whether we can generate the necessary loop controls. /*
3024	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
3025	{
3026	if (!LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
3027	{
3028	if (!vect_verify_full_masking (loop_vinfo)
3029	&& !vect_verify_full_masking_avx512 (loop_vinfo))
3030	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3031	}
3032	else / !LOOP_VINFO_LENS (loop_vinfo).is_empty () /
3033	if (!vect_verify_loop_lens (loop_vinfo))
3034	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3035	}
3036
3037	/ If we're vectorizing a loop that uses length "controls" and*
3038	can iterate more than once, we apply decrementing IV approach
3039	in loop control. /*
3040	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3041	&& LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_len
3042	&& LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) == `0`
3043	&& !(LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3044	&& known_le (LOOP_VINFO_INT_NITERS (loop_vinfo),
3045	LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
3046	LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
3047
3048	/ If a loop uses length controls and has a decrementing loop control IV,*
3049	we will normally pass that IV through a MIN_EXPR to calcaluate the
3050	basis for the length controls. E.g. in a loop that processes one
3051	element per scalar iteration, the number of elements would be
3052	MIN_EXPR <N, VF>, where N is the number of scalar iterations left.
3053
3054	This MIN_EXPR approach allows us to use pointer IVs with an invariant
3055	step, since only the final iteration of the vector loop can have
3056	inactive lanes.
3057
3058	However, some targets have a dedicated instruction for calculating the
3059	preferred length, given the total number of elements that still need to
3060	be processed. This is encapsulated in the SELECT_VL internal function.
3061
3062	If the target supports SELECT_VL, we can use it instead of MIN_EXPR
3063	to determine the basis for the length controls. However, unlike the
3064	MIN_EXPR calculation, the SELECT_VL calculation can decide to make
3065	lanes inactive in any iteration of the vector loop, not just the last
3066	iteration. This SELECT_VL approach therefore requires us to use pointer
3067	IVs with variable steps.
3068
3069	Once we've decided how many elements should be processed by one
3070	iteration of the vector loop, we need to populate the rgroup controls.
3071	If a loop has multiple rgroups, we need to make sure that those rgroups
3072	"line up" (that is, they must be consistent about which elements are
3073	active and which aren't). This is done by vect_adjust_loop_lens_control.
3074
3075	In principle, it would be possible to use vect_adjust_loop_lens_control
3076	on either the result of a MIN_EXPR or the result of a SELECT_VL.
3077	However:
3078
3079	(1) In practice, it only makes sense to use SELECT_VL when a vector
3080	operation will be controlled directly by the result. It is not
3081	worth using SELECT_VL if it would only be the input to other
3082	calculations.
3083
3084	(2) If we use SELECT_VL for an rgroup that has N controls, each associated
3085	pointer IV will need N updates by a variable amount (N-1 updates
3086	within the iteration and 1 update to move to the next iteration).
3087
3088	Because of this, we prefer to use the MIN_EXPR approach whenever there
3089	is more than one length control.
3090
3091	In addition, SELECT_VL always operates to a granularity of 1 unit.
3092	If we wanted to use it to control an SLP operation on N consecutive
3093	elements, we would need to make the SELECT_VL inputs measure scalar
3094	iterations (rather than elements) and then multiply the SELECT_VL
3095	result by N. But using SELECT_VL this way is inefficient because
3096	of (1) above.
3097
3098	2. We don't apply SELECT_VL on single-rgroup when both (1) and (2) are
3099	satisfied:
3100
3101	(1). LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) is true.
3102	(2). LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant () is true.
3103
3104	Since SELECT_VL (variable step) will make SCEV analysis failed and then
3105	we will fail to gain benefits of following unroll optimizations. We prefer
3106	using the MIN_EXPR approach in this situation. /*
3107	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
3108	{
3109	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
3110	if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
3111	OPTIMIZE_FOR_SPEED)
3112	&& LOOP_VINFO_LENS (loop_vinfo).length () == `1`
3113	&& LOOP_VINFO_LENS (loop_vinfo)[`0`].factor == `1` && !slp
3114	&& (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3115	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()))
3116	LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
3117	}
3118
3119	/ Decide whether this loop_vinfo should use partial vectors or peeling,*
3120	assuming that the loop will be used as a main loop. We will redo
3121	this analysis later if we instead decide to use the loop as an
3122	epilogue loop. /*
3123	ok = vect_determine_partial_vectors_and_peeling (loop_vinfo);
3124	if (!ok)
3125	return ok;
3126
3127	/ If we're vectorizing an epilogue loop, the vectorized loop either needs*
3128	to be able to handle fewer than VF scalars, or needs to have a lower VF
3129	than the main loop. /*
3130	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
3131	&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
3132	{
3133	poly_uint64 unscaled_vf
3134	= exact_div (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
3135	b: orig_loop_vinfo->suggested_unroll_factor);
3136	if (maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo), unscaled_vf))
3137	return opt_result::failure_at (loc: vect_location,
3138	fmt: "Vectorization factor too high for"
3139	" epilogue loop.\n");
3140	}
3141
3142	/ Check the costings of the loop make vectorizing worthwhile. /
3143	res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor);
3144	if (res < `0`)
3145	{
3146	ok = opt_result::failure_at (loc: vect_location,
3147	fmt: "Loop costings may not be worthwhile.\n");
3148	goto again;
3149	}
3150	if (!res)
3151	return opt_result::failure_at (loc: vect_location,
3152	fmt: "Loop costings not worthwhile.\n");
3153
3154	/ If an epilogue loop is required make sure we can create one. /
3155	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
3156	\|\| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
3157	{
3158	if (dump_enabled_p ())
3159	dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
3160	if (!vect_can_advance_ivs_p (loop_vinfo)
3161	\|\| !slpeel_can_duplicate_loop_p (loop,
3162	LOOP_VINFO_IV_EXIT (loop_vinfo),
3163	LOOP_VINFO_IV_EXIT (loop_vinfo)))
3164	{
3165	ok = opt_result::failure_at (loc: vect_location,
3166	fmt: "not vectorized: can't create required "
3167	"epilog loop\n");
3168	goto again;
3169	}
3170	}
3171
3172	/ During peeling, we need to check if number of loop iterations is*
3173	enough for both peeled prolog loop and vector loop. This check
3174	can be merged along with threshold check of loop versioning, so
3175	increase threshold for this case if necessary.
3176
3177	If we are analyzing an epilogue we still want to check what its
3178	versioning threshold would be. If we decide to vectorize the epilogues we
3179	will want to use the lowest versioning threshold of all epilogues and main
3180	loop. This will enable us to enter a vectorized epilogue even when
3181	versioning the loop. We can't simply check whether the epilogue requires
3182	versioning though since we may have skipped some versioning checks when
3183	analyzing the epilogue. For instance, checks for alias versioning will be
3184	skipped when dealing with epilogues as we assume we already checked them
3185	for the main loop. So instead we always check the 'orig_loop_vinfo'. /*
3186	if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo))
3187	{
3188	poly_uint64 niters_th = `0`;
3189	unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3190
3191	if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
3192	{
3193	/ Niters for peeled prolog loop. /
3194	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < `0`)
3195	{
3196	dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3197	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
3198	niters_th += TYPE_VECTOR_SUBPARTS (node: vectype) - `1`;
3199	}
3200	else
3201	niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3202	}
3203
3204	/ Niters for at least one iteration of vectorized loop. /
3205	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
3206	niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3207	/ One additional iteration because of peeling for gap. /
3208	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3209	niters_th += `1`;
3210
3211	/ Use the same condition as vect_transform_loop to decide when to use*
3212	the cost to determine a versioning threshold. /*
3213	if (vect_apply_runtime_profitability_check_p (loop_vinfo)
3214	&& ordered_p (a: th, b: niters_th))
3215	niters_th = ordered_max (a: poly_uint64 (th), b: niters_th);
3216
3217	LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
3218	}
3219
3220	gcc_assert (known_eq (vectorization_factor,
3221	LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
3222
3223	slp_done_for_suggested_uf = slp;
3224
3225	/ Ok to vectorize! /
3226	LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = `1`;
3227	return opt_result::success ();
3228
3229	again:
3230	/ Ensure that "ok" is false (with an opt_problem if dumping is enabled). /
3231	gcc_assert (!ok);
3232
3233	/ Try again with SLP forced off but if we didn't do any SLP there is*
3234	no point in re-trying. /*
3235	if (!slp)
3236	return ok;
3237
3238	/ If the slp decision is true when suggested unroll factor is worked*
3239	out, and we are applying suggested unroll factor, we don't need to
3240	re-try any more. /*
3241	if (applying_suggested_uf && slp_done_for_suggested_uf)
3242	return ok;
3243
3244	/ If there are reduction chains re-trying will fail anyway. /
3245	if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
3246	return ok;
3247
3248	/ Likewise if the grouped loads or stores in the SLP cannot be handled*
3249	via interleaving or lane instructions. /*
3250	slp_instance instance;
3251	slp_tree node;
3252	unsigned i, j;
3253	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
3254	{
3255	stmt_vec_info vinfo;
3256	vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[`0`];
3257	if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
3258	continue;
3259	vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
3260	unsigned int size = DR_GROUP_SIZE (vinfo);
3261	tree vectype = STMT_VINFO_VECTYPE (vinfo);
3262	if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST
3263	&& ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), `1U`)
3264	&& ! vect_grouped_store_supported (vectype, size))
3265	return opt_result::failure_at (loc: vinfo->stmt,
3266	fmt: "unsupported grouped store\n");
3267	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
3268	{
3269	vinfo = SLP_TREE_REPRESENTATIVE (node);
3270	if (STMT_VINFO_GROUPED_ACCESS (vinfo))
3271	{
3272	vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
3273	bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
3274	size = DR_GROUP_SIZE (vinfo);
3275	vectype = STMT_VINFO_VECTYPE (vinfo);
3276	if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
3277	&& ! vect_grouped_load_supported (vectype, single_element_p,
3278	size))
3279	return opt_result::failure_at (loc: vinfo->stmt,
3280	fmt: "unsupported grouped load\n");
3281	}
3282	}
3283	}
3284
3285	if (dump_enabled_p ())
3286	dump_printf_loc (MSG_NOTE, vect_location,
3287	"re-trying with SLP disabled\n");
3288
3289	/ Roll back state appropriately. No SLP this time. /
3290	slp = false;
3291	/ Restore vectorization factor as it were without SLP. /
3292	LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
3293	/ Free the SLP instances. /
3294	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
3295	vect_free_slp_instance (instance);
3296	LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
3297	/ Reset SLP type to loop_vect on all stmts. /
3298	for (i = `0`; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
3299	{
3300	basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
3301	for (gimple_stmt_iterator si = gsi_start_phis (bb);
3302	!gsi_end_p (i: si); gsi_next (i: &si))
3303	{
3304	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
3305	STMT_SLP_TYPE (stmt_info) = loop_vect;
3306	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
3307	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
3308	{
3309	/ vectorizable_reduction adjusts reduction stmt def-types,*
3310	restore them to that of the PHI. /*
3311	STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))
3312	= STMT_VINFO_DEF_TYPE (stmt_info);
3313	STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize
3314	(STMT_VINFO_REDUC_DEF (stmt_info)))
3315	= STMT_VINFO_DEF_TYPE (stmt_info);
3316	}
3317	}
3318	for (gimple_stmt_iterator si = gsi_start_bb (bb);
3319	!gsi_end_p (i: si); gsi_next (i: &si))
3320	{
3321	if (is_gimple_debug (gs: gsi_stmt (i: si)))
3322	continue;
3323	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
3324	STMT_SLP_TYPE (stmt_info) = loop_vect;
3325	if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3326	{
3327	stmt_vec_info pattern_stmt_info
3328	= STMT_VINFO_RELATED_STMT (stmt_info);
3329	if (STMT_VINFO_SLP_VECT_ONLY_PATTERN (pattern_stmt_info))
3330	STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
3331
3332	gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
3333	STMT_SLP_TYPE (pattern_stmt_info) = loop_vect;
3334	for (gimple_stmt_iterator pi = gsi_start (seq&: pattern_def_seq);
3335	!gsi_end_p (i: pi); gsi_next (i: &pi))
3336	STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))
3337	= loop_vect;
3338	}
3339	}
3340	}
3341	/ Free optimized alias test DDRS. /
3342	LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (size: `0`);
3343	LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
3344	LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
3345	/ Reset target cost data. /
3346	delete loop_vinfo->vector_costs;
3347	loop_vinfo->vector_costs = nullptr;
3348	/ Reset accumulated rgroup information. /
3349	LOOP_VINFO_MASKS (loop_vinfo).mask_set.empty ();
3350	release_vec_loop_controls (controls: &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec);
3351	release_vec_loop_controls (controls: &LOOP_VINFO_LENS (loop_vinfo));
3352	/ Reset assorted flags. /
3353	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
3354	LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
3355	LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = `0`;
3356	LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = `0`;
3357	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3358	= saved_can_use_partial_vectors_p;
3359
3360	goto start_over;
3361	}
3362
3363	/ Return true if vectorizing a loop using NEW_LOOP_VINFO appears*
3364	to be better than vectorizing it using OLD_LOOP_VINFO. Assume that
3365	OLD_LOOP_VINFO is better unless something specifically indicates
3366	otherwise.
3367
3368	Note that this deliberately isn't a partial order. /*
3369
3370	static bool
3371	vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
3372	loop_vec_info old_loop_vinfo)
3373	{
3374	struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo);
3375	gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop);
3376
3377	poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo);
3378	poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo);
3379
3380	/ Always prefer a VF of loop->simdlen over any other VF. /
3381	if (loop->simdlen)
3382	{
3383	bool new_simdlen_p = known_eq (new_vf, loop->simdlen);
3384	bool old_simdlen_p = known_eq (old_vf, loop->simdlen);
3385	if (new_simdlen_p != old_simdlen_p)
3386	return new_simdlen_p;
3387	}
3388
3389	const auto *old_costs = old_loop_vinfo->vector_costs;
3390	const auto *new_costs = new_loop_vinfo->vector_costs;
3391	if (loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo))
3392	return new_costs->better_epilogue_loop_than_p (other: old_costs, main_loop);
3393
3394	return new_costs->better_main_loop_than_p (other: old_costs);
3395	}
3396
3397	/ Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return*
3398	true if we should. /*
3399
3400	static bool
3401	vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
3402	loop_vec_info old_loop_vinfo)
3403	{
3404	if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
3405	return false;
3406
3407	if (dump_enabled_p ())
3408	dump_printf_loc (MSG_NOTE, vect_location,
3409	"***** Preferring vector mode %s to vector mode %s\n",
3410	GET_MODE_NAME (new_loop_vinfo->vector_mode),
3411	GET_MODE_NAME (old_loop_vinfo->vector_mode));
3412	return true;
3413	}
3414
3415	/ Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is*
3416	not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
3417	MODE_I to the next mode useful to analyze.
3418	Return the loop_vinfo on success and wrapped null on failure. /*
3419
3420	static opt_loop_vec_info
3421	vect_analyze_loop_1 (class loop loop, vec_info_shared shared,
3422	const vect_loop_form_info *loop_form_info,
3423	loop_vec_info main_loop_vinfo,
3424	const vector_modes &vector_modes, unsigned &mode_i,
3425	machine_mode &autodetected_vector_mode,
3426	bool &fatal)
3427	{
3428	loop_vec_info loop_vinfo
3429	= vect_create_loop_vinfo (loop, shared, info: loop_form_info, main_loop_info: main_loop_vinfo);
3430
3431	machine_mode vector_mode = vector_modes [mode_i];
3432	loop_vinfo->vector_mode = vector_mode;
3433	unsigned int suggested_unroll_factor = `1`;
3434	bool slp_done_for_suggested_uf = false;
3435
3436	/ Run the main analysis. /
3437	opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
3438	suggested_unroll_factor: &suggested_unroll_factor,
3439	slp_done_for_suggested_uf);
3440	if (dump_enabled_p ())
3441	dump_printf_loc (MSG_NOTE, vect_location,
3442	"***** Analysis %s with vector mode %s\n",
3443	res ? "succeeded" : " failed",
3444	GET_MODE_NAME (loop_vinfo->vector_mode));
3445
3446	if (res && !main_loop_vinfo && suggested_unroll_factor > `1`)
3447	{
3448	if (dump_enabled_p ())
3449	dump_printf_loc (MSG_NOTE, vect_location,
3450	"***** Re-trying analysis for unrolling"
3451	" with unroll factor %d and slp %s.\n",
3452	suggested_unroll_factor,
3453	slp_done_for_suggested_uf ? "on" : "off");
3454	loop_vec_info unroll_vinfo
3455	= vect_create_loop_vinfo (loop, shared, info: loop_form_info, main_loop_info: main_loop_vinfo);
3456	unroll_vinfo->vector_mode = vector_mode;
3457	unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
3458	opt_result new_res = vect_analyze_loop_2 (loop_vinfo: unroll_vinfo, fatal, NULL,
3459	slp_done_for_suggested_uf);
3460	if (new_res)
3461	{
3462	delete loop_vinfo;
3463	loop_vinfo = unroll_vinfo;
3464	}
3465	else
3466	delete unroll_vinfo;
3467	}
3468
3469	/ Remember the autodetected vector mode. /
3470	if (vector_mode == VOIDmode)
3471	autodetected_vector_mode = loop_vinfo->vector_mode;
3472
3473	/ Advance mode_i, first skipping modes that would result in the*
3474	same analysis result. /*
3475	while (mode_i + `1` < vector_modes.length ()
3476	&& vect_chooses_same_modes_p (loop_vinfo,
3477	vector_modes [mode_i + `1`]))
3478	{
3479	if (dump_enabled_p ())
3480	dump_printf_loc (MSG_NOTE, vect_location,
3481	"***** The result for vector mode %s would"
3482	" be the same\n",
3483	GET_MODE_NAME (vector_modes[mode_i + `1`]));
3484	mode_i += `1`;
3485	}
3486	if (mode_i + `1` < vector_modes.length ()
3487	&& VECTOR_MODE_P (autodetected_vector_mode)
3488	&& (related_vector_mode (vector_modes [mode_i + `1`],
3489	GET_MODE_INNER (autodetected_vector_mode))
3490	== autodetected_vector_mode)
3491	&& (related_vector_mode (autodetected_vector_mode,
3492	GET_MODE_INNER (vector_modes[mode_i + `1`]))
3493	== vector_modes [mode_i + `1`]))
3494	{
3495	if (dump_enabled_p ())
3496	dump_printf_loc (MSG_NOTE, vect_location,
3497	"***** Skipping vector mode %s, which would"
3498	" repeat the analysis for %s\n",
3499	GET_MODE_NAME (vector_modes[mode_i + `1`]),
3500	GET_MODE_NAME (autodetected_vector_mode));
3501	mode_i += `1`;
3502	}
3503	mode_i++;
3504
3505	if (!res)
3506	{
3507	delete loop_vinfo;
3508	if (fatal)
3509	gcc_checking_assert (main_loop_vinfo == NULL);
3510	return opt_loop_vec_info::propagate_failure (other: res);
3511	}
3512
3513	return opt_loop_vec_info::success (ptr: loop_vinfo);
3514	}
3515
3516	/ Function vect_analyze_loop.*
3517
3518	Apply a set of analyses on LOOP, and create a loop_vec_info struct
3519	for it. The different analyses will record information in the
3520	loop_vec_info struct. /*
3521	opt_loop_vec_info
3522	vect_analyze_loop (class loop loop, vec_info_shared shared)
3523	{
3524	DUMP_VECT_SCOPE ("analyze_loop_nest");
3525
3526	if (loop_outer (loop)
3527	&& loop_vec_info_for_loop (loop: loop_outer (loop))
3528	&& LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
3529	return opt_loop_vec_info::failure_at (loc: vect_location,
3530	fmt: "outer-loop already vectorized.\n");
3531
3532	if (!find_loop_nest (loop, &shared->loop_nest))
3533	return opt_loop_vec_info::failure_at
3534	(loc: vect_location,
3535	fmt: "not vectorized: loop nest containing two or more consecutive inner"
3536	" loops cannot be vectorized\n");
3537
3538	/ Analyze the loop form. /
3539	vect_loop_form_info loop_form_info;
3540	opt_result res = vect_analyze_loop_form (loop, info: &loop_form_info);
3541	if (!res)
3542	{
3543	if (dump_enabled_p ())
3544	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3545	"bad loop form.\n");
3546	return opt_loop_vec_info::propagate_failure (other: res);
3547	}
3548	if (!integer_onep (loop_form_info.assumptions))
3549	{
3550	/ We consider to vectorize this loop by versioning it under*
3551	some assumptions. In order to do this, we need to clear
3552	existing information computed by scev and niter analyzer. /*
3553	scev_reset_htab ();
3554	free_numbers_of_iterations_estimates (loop);
3555	/ Also set flag for this loop so that following scev and niter*
3556	analysis are done under the assumptions. /*
3557	loop_constraint_set (loop, LOOP_C_FINITE);
3558	}
3559
3560	auto_vector_modes vector_modes;
3561	/ Autodetect first vector size we try. /
3562	vector_modes.safe_push (VOIDmode);
3563	unsigned int autovec_flags
3564	= targetm.vectorize.autovectorize_vector_modes (&vector_modes,
3565	loop->simdlen != `0`);
3566	bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
3567	&& !unlimited_cost_model (loop));
3568	machine_mode autodetected_vector_mode = VOIDmode;
3569	opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
3570	unsigned int mode_i = `0`;
3571	unsigned HOST_WIDE_INT simdlen = loop->simdlen;
3572
3573	/ Keep track of the VF for each mode. Initialize all to 0 which indicates*
3574	a mode has not been analyzed. /*
3575	auto_vec<poly_uint64, `8`> cached_vf_per_mode;
3576	for (unsigned i = `0`; i < vector_modes.length (); ++i)
3577	cached_vf_per_mode.safe_push (obj: `0`);
3578
3579	/ First determine the main loop vectorization mode, either the first*
3580	one that works, starting with auto-detecting the vector mode and then
3581	following the targets order of preference, or the one with the
3582	lowest cost if pick_lowest_cost_p. /*
3583	while (`1`)
3584	{
3585	bool fatal;
3586	unsigned int last_mode_i = mode_i;
3587	/ Set cached VF to -1 prior to analysis, which indicates a mode has*
3588	failed. /*
3589	cached_vf_per_mode [last_mode_i] = -`1`;
3590	opt_loop_vec_info loop_vinfo
3591	= vect_analyze_loop_1 (loop, shared, loop_form_info: &loop_form_info,
3592	NULL, vector_modes, mode_i,
3593	autodetected_vector_mode, fatal);
3594	if (fatal)
3595	break;
3596
3597	if (loop_vinfo)
3598	{
3599	/ Analyzis has been successful so update the VF value. The*
3600	VF should always be a multiple of unroll_factor and we want to
3601	capture the original VF here. /*
3602	cached_vf_per_mode [last_mode_i]
3603	= exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
3604	b: loop_vinfo ->suggested_unroll_factor);
3605	/ Once we hit the desired simdlen for the first time,*
3606	discard any previous attempts. /*
3607	if (simdlen
3608	&& known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen))
3609	{
3610	delete first_loop_vinfo;
3611	first_loop_vinfo = opt_loop_vec_info::success (NULL);
3612	simdlen = `0`;
3613	}
3614	else if (pick_lowest_cost_p
3615	&& first_loop_vinfo
3616	&& vect_joust_loop_vinfos (new_loop_vinfo: loop_vinfo, old_loop_vinfo: first_loop_vinfo))
3617	{
3618	/ Pick loop_vinfo over first_loop_vinfo. /
3619	delete first_loop_vinfo;
3620	first_loop_vinfo = opt_loop_vec_info::success (NULL);
3621	}
3622	if (first_loop_vinfo == NULL)
3623	first_loop_vinfo = loop_vinfo;
3624	else
3625	{
3626	delete loop_vinfo;
3627	loop_vinfo = opt_loop_vec_info::success (NULL);
3628	}
3629
3630	/ Commit to first_loop_vinfo if we have no reason to try*
3631	alternatives. /*
3632	if (!simdlen && !pick_lowest_cost_p)
3633	break;
3634	}
3635	if (mode_i == vector_modes.length ()
3636	\|\| autodetected_vector_mode == VOIDmode)
3637	break;
3638
3639	/ Try the next biggest vector size. /
3640	if (dump_enabled_p ())
3641	dump_printf_loc (MSG_NOTE, vect_location,
3642	"***** Re-trying analysis with vector mode %s\n",
3643	GET_MODE_NAME (vector_modes[mode_i]));
3644	}
3645	if (!first_loop_vinfo)
3646	return opt_loop_vec_info::propagate_failure (other: res);
3647
3648	if (dump_enabled_p ())
3649	dump_printf_loc (MSG_NOTE, vect_location,
3650	"***** Choosing vector mode %s\n",
3651	GET_MODE_NAME (first_loop_vinfo ->vector_mode));
3652
3653	/ Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is*
3654	enabled, SIMDUID is not set, it is the innermost loop and we have
3655	either already found the loop's SIMDLEN or there was no SIMDLEN to
3656	begin with.
3657	TODO: Enable epilogue vectorization for loops with SIMDUID set. /*
3658	bool vect_epilogues = (!simdlen
3659	&& loop->inner == NULL
3660	&& param_vect_epilogues_nomask
3661	&& LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
3662	&& !loop->simduid);
3663	if (!vect_epilogues)
3664	return first_loop_vinfo;
3665
3666	/ Now analyze first_loop_vinfo for epilogue vectorization. /
3667	poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
3668
3669	/ For epilogues start the analysis from the first mode. The motivation*
3670	behind starting from the beginning comes from cases where the VECTOR_MODES
3671	array may contain length-agnostic and length-specific modes. Their
3672	ordering is not guaranteed, so we could end up picking a mode for the main
3673	loop that is after the epilogue's optimal mode. /*
3674	vector_modes [`0`] = autodetected_vector_mode;
3675	mode_i = `0`;
3676
3677	bool supports_partial_vectors =
3678	partial_vectors_supported_p () && param_vect_partial_vector_usage != `0`;
3679	poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
3680
3681	while (`1`)
3682	{
3683	/ If the target does not support partial vectors we can shorten the*
3684	number of modes to analyze for the epilogue as we know we can't pick a
3685	mode that would lead to a VF at least as big as the
3686	FIRST_VINFO_VF. /*
3687	if (!supports_partial_vectors
3688	&& maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
3689	{
3690	mode_i++;
3691	if (mode_i == vector_modes.length ())
3692	break;
3693	continue;
3694	}
3695
3696	if (dump_enabled_p ())
3697	dump_printf_loc (MSG_NOTE, vect_location,
3698	"***** Re-trying epilogue analysis with vector "
3699	"mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
3700
3701	bool fatal;
3702	opt_loop_vec_info loop_vinfo
3703	= vect_analyze_loop_1 (loop, shared, loop_form_info: &loop_form_info,
3704	main_loop_vinfo: first_loop_vinfo,
3705	vector_modes, mode_i,
3706	autodetected_vector_mode, fatal);
3707	if (fatal)
3708	break;
3709
3710	if (loop_vinfo)
3711	{
3712	if (pick_lowest_cost_p)
3713	{
3714	/ Keep trying to roll back vectorization attempts while the*
3715	loop_vec_infos they produced were worse than this one. /*
3716	vec<loop_vec_info> &vinfos = first_loop_vinfo ->epilogue_vinfos;
3717	while (!vinfos.is_empty ()
3718	&& vect_joust_loop_vinfos (new_loop_vinfo: loop_vinfo, old_loop_vinfo: vinfos.last ()))
3719	{
3720	gcc_assert (vect_epilogues);
3721	delete vinfos.pop ();
3722	}
3723	}
3724	/ For now only allow one epilogue loop. /
3725	if (first_loop_vinfo ->epilogue_vinfos.is_empty ())
3726	{
3727	first_loop_vinfo ->epilogue_vinfos.safe_push (obj: loop_vinfo);
3728	poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3729	gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
3730	\|\| maybe_ne (lowest_th, `0U`));
3731	/ Keep track of the known smallest versioning*
3732	threshold. /*
3733	if (ordered_p (a: lowest_th, b: th))
3734	lowest_th = ordered_min (a: lowest_th, b: th);
3735	}
3736	else
3737	{
3738	delete loop_vinfo;
3739	loop_vinfo = opt_loop_vec_info::success (NULL);
3740	}
3741
3742	/ For now only allow one epilogue loop, but allow*
3743	pick_lowest_cost_p to replace it, so commit to the
3744	first epilogue if we have no reason to try alternatives. /*
3745	if (!pick_lowest_cost_p)
3746	break;
3747	}
3748
3749	if (mode_i == vector_modes.length ())
3750	break;
3751
3752	}
3753
3754	if (!first_loop_vinfo ->epilogue_vinfos.is_empty ())
3755	{
3756	LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
3757	if (dump_enabled_p ())
3758	dump_printf_loc (MSG_NOTE, vect_location,
3759	"***** Choosing epilogue vector mode %s\n",
3760	GET_MODE_NAME
3761	(first_loop_vinfo ->epilogue_vinfos[`0`]->vector_mode));
3762	}
3763
3764	return first_loop_vinfo;
3765	}
3766
3767	/ Return true if there is an in-order reduction function for CODE, storing*
3768	it in REDUC_FN if so. /
3769
3770	static bool
3771	fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn)
3772	{
3773	/ We support MINUS_EXPR by negating the operand. This also preserves an*
3774	initial -0.0 since -0.0 - 0.0 (neutral op for MINUS_EXPR) == -0.0 +
3775	(-0.0) = -0.0. /*
3776	if (code == PLUS_EXPR \|\| code == MINUS_EXPR)
3777	{
3778	*reduc_fn = IFN_FOLD_LEFT_PLUS;
3779	return true;
3780	}
3781	return false;
3782	}
3783
3784	/ Function reduction_fn_for_scalar_code*
3785
3786	Input:
3787	CODE - tree_code of a reduction operations.
3788
3789	Output:
3790	REDUC_FN - the corresponding internal function to be used to reduce the
3791	vector of partial results into a single scalar result, or IFN_LAST
3792	if the operation is a supported reduction operation, but does not have
3793	such an internal function.
3794
3795	Return FALSE if CODE currently cannot be vectorized as reduction. /*
3796
3797	bool
3798	reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
3799	{
3800	if (code.is_tree_code ())
3801	switch (tree_code (code))
3802	{
3803	case MAX_EXPR:
3804	*reduc_fn = IFN_REDUC_MAX;
3805	return true;
3806
3807	case MIN_EXPR:
3808	*reduc_fn = IFN_REDUC_MIN;
3809	return true;
3810
3811	case PLUS_EXPR:
3812	*reduc_fn = IFN_REDUC_PLUS;
3813	return true;
3814
3815	case BIT_AND_EXPR:
3816	*reduc_fn = IFN_REDUC_AND;
3817	return true;
3818
3819	case BIT_IOR_EXPR:
3820	*reduc_fn = IFN_REDUC_IOR;
3821	return true;
3822
3823	case BIT_XOR_EXPR:
3824	*reduc_fn = IFN_REDUC_XOR;
3825	return true;
3826
3827	case MULT_EXPR:
3828	case MINUS_EXPR:
3829	*reduc_fn = IFN_LAST;
3830	return true;
3831
3832	default:
3833	return false;
3834	}
3835	else
3836	switch (combined_fn (code))
3837	{
3838	CASE_CFN_FMAX:
3839	*reduc_fn = IFN_REDUC_FMAX;
3840	return true;
3841
3842	CASE_CFN_FMIN:
3843	*reduc_fn = IFN_REDUC_FMIN;
3844	return true;
3845
3846	default:
3847	return false;
3848	}
3849	}
3850
3851	/ If there is a neutral value X such that a reduction would not be affected*
3852	by the introduction of additional X elements, return that X, otherwise
3853	return null. CODE is the code of the reduction and SCALAR_TYPE is type
3854	of the scalar elements. If the reduction has just a single initial value
3855	then INITIAL_VALUE is that value, otherwise it is null.
3856	If AS_INITIAL is TRUE the value is supposed to be used as initial value.
3857	In that case no signed zero is returned. /*
3858
3859	tree
3860	neutral_op_for_reduction (tree scalar_type, code_helper code,
3861	tree initial_value, bool as_initial)
3862	{
3863	if (code.is_tree_code ())
3864	switch (tree_code (code))
3865	{
3866	case DOT_PROD_EXPR:
3867	case SAD_EXPR:
3868	case MINUS_EXPR:
3869	case BIT_IOR_EXPR:
3870	case BIT_XOR_EXPR:
3871	return build_zero_cst (scalar_type);
3872	case WIDEN_SUM_EXPR:
3873	case PLUS_EXPR:
3874	if (!as_initial && HONOR_SIGNED_ZEROS (scalar_type))
3875	return build_real (scalar_type, dconstm0);
3876	else
3877	return build_zero_cst (scalar_type);
3878
3879	case MULT_EXPR:
3880	return build_one_cst (scalar_type);
3881
3882	case BIT_AND_EXPR:
3883	return build_all_ones_cst (scalar_type);
3884
3885	case MAX_EXPR:
3886	case MIN_EXPR:
3887	return initial_value;
3888
3889	default:
3890	return NULL_TREE;
3891	}
3892	else
3893	switch (combined_fn (code))
3894	{
3895	CASE_CFN_FMIN:
3896	CASE_CFN_FMAX:
3897	return initial_value;
3898
3899	default:
3900	return NULL_TREE;
3901	}
3902	}
3903
3904	/ Error reporting helper for vect_is_simple_reduction below. GIMPLE statement*
3905	STMT is printed with a message MSG. /*
3906
3907	static void
3908	report_vect_op (dump_flags_t msg_type, gimple stmt, const* char *msg)
3909	{
3910	dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
3911	}
3912
3913	/ Return true if we need an in-order reduction for operation CODE*
3914	on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
3915	overflow must wrap. /*
3916
3917	bool
3918	needs_fold_left_reduction_p (tree type, code_helper code)
3919	{
3920	/ CHECKME: check for !flag_finite_math_only too? /
3921	if (SCALAR_FLOAT_TYPE_P (type))
3922	{
3923	if (code.is_tree_code ())
3924	switch (tree_code (code))
3925	{
3926	case MIN_EXPR:
3927	case MAX_EXPR:
3928	return false;
3929
3930	default:
3931	return !flag_associative_math;
3932	}
3933	else
3934	switch (combined_fn (code))
3935	{
3936	CASE_CFN_FMIN:
3937	CASE_CFN_FMAX:
3938	return false;
3939
3940	default:
3941	return !flag_associative_math;
3942	}
3943	}
3944
3945	if (INTEGRAL_TYPE_P (type))
3946	return (!code.is_tree_code ()
3947	\|\| !operation_no_trapping_overflow (type, tree_code (code)));
3948
3949	if (SAT_FIXED_POINT_TYPE_P (type))
3950	return true;
3951
3952	return false;
3953	}
3954
3955	/ Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and*
3956	has a handled computation expression. Store the main reduction
3957	operation in CODE. /
3958
3959	static bool
3960	check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
3961	tree loop_arg, code_helper *code,
3962	vec<std::pair<ssa_op_iter, use_operand_p> > &path)
3963	{
3964	auto_bitmap visited;
3965	tree lookfor = PHI_RESULT (phi);
3966	ssa_op_iter curri;
3967	use_operand_p curr = op_iter_init_phiuse (ptr: &curri, phi, SSA_OP_USE);
3968	while (USE_FROM_PTR (curr) != loop_arg)
3969	curr = op_iter_next_use (ptr: &curri);
3970	curri.i = curri.numops;
3971	do
3972	{
3973	path.safe_push (obj: std::make_pair (x&: curri, y&: curr));
3974	tree use = USE_FROM_PTR (curr);
3975	if (use == lookfor)
3976	break;
3977	gimple *def = SSA_NAME_DEF_STMT (use);
3978	if (gimple_nop_p (g: def)
3979	\|\| ! flow_bb_inside_loop_p (loop, gimple_bb (g: def)))
3980	{
3981	pop:
3982	do
3983	{
3984	std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
3985	curri = x.first;
3986	curr = x.second;
3987	do
3988	curr = op_iter_next_use (ptr: &curri);
3989	/ Skip already visited or non-SSA operands (from iterating*
3990	over PHI args). /*
3991	while (curr != NULL_USE_OPERAND_P
3992	&& (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
3993	\|\| ! bitmap_set_bit (visited,
3994	SSA_NAME_VERSION
3995	(USE_FROM_PTR (curr)))));
3996	}
3997	while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
3998	if (curr == NULL_USE_OPERAND_P)
3999	break;
4000	}
4001	else
4002	{
4003	if (gimple_code (g: def) == GIMPLE_PHI)
4004	curr = op_iter_init_phiuse (ptr: &curri, phi: as_a <gphi *>(p: def), SSA_OP_USE);
4005	else
4006	curr = op_iter_init_use (ptr: &curri, stmt: def, SSA_OP_USE);
4007	while (curr != NULL_USE_OPERAND_P
4008	&& (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
4009	\|\| ! bitmap_set_bit (visited,
4010	SSA_NAME_VERSION
4011	(USE_FROM_PTR (curr)))))
4012	curr = op_iter_next_use (ptr: &curri);
4013	if (curr == NULL_USE_OPERAND_P)
4014	goto pop;
4015	}
4016	}
4017	while (`1`);
4018	if (dump_file && (dump_flags & TDF_DETAILS))
4019	{
4020	dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
4021	unsigned i;
4022	std::pair<ssa_op_iter, use_operand_p> *x;
4023	FOR_EACH_VEC_ELT (path, i, x)
4024	dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second));
4025	dump_printf (MSG_NOTE, "\n");
4026	}
4027
4028	/ Check whether the reduction path detected is valid. /
4029	bool fail = path.length () == `0`;
4030	bool neg = false;
4031	int sign = -`1`;
4032	*code = ERROR_MARK;
4033	for (unsigned i = `1`; i < path.length (); ++i)
4034	{
4035	gimple *use_stmt = USE_STMT (path[i].second);
4036	gimple_match_op op;
4037	if (!gimple_extract_op (use_stmt, &op))
4038	{
4039	fail = true;
4040	break;
4041	}
4042	unsigned int opi = op.num_ops;
4043	if (gassign assign = dyn_cast<gassign > (p: use_stmt))
4044	{
4045	/ The following make sure we can compute the operand index*
4046	easily plus it mostly disallows chaining via COND_EXPR condition
4047	operands. /*
4048	for (opi = `0`; opi < op.num_ops; ++opi)
4049	if (gimple_assign_rhs1_ptr (gs: assign) + opi == path [i].second->use)
4050	break;
4051	}
4052	else if (gcall call = dyn_cast<gcall > (p: use_stmt))
4053	{
4054	for (opi = `0`; opi < op.num_ops; ++opi)
4055	if (gimple_call_arg_ptr (gs: call, index: opi) == path [i].second->use)
4056	break;
4057	}
4058	if (opi == op.num_ops)
4059	{
4060	fail = true;
4061	break;
4062	}
4063	op.code = canonicalize_code (op.code, op.type);
4064	if (op.code == MINUS_EXPR)
4065	{
4066	op.code = PLUS_EXPR;
4067	/ Track whether we negate the reduction value each iteration. /
4068	if (op.ops[`1`] == op.ops[opi])
4069	neg = ! neg;
4070	}
4071	if (CONVERT_EXPR_CODE_P (op.code)
4072	&& tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[`0`])))
4073	;
4074	else if (*code == ERROR_MARK)
4075	{
4076	*code = op.code;
4077	sign = TYPE_SIGN (op.type);
4078	}
4079	else if (op.code != *code)
4080	{
4081	fail = true;
4082	break;
4083	}
4084	else if ((op.code == MIN_EXPR
4085	\|\| op.code == MAX_EXPR)
4086	&& sign != TYPE_SIGN (op.type))
4087	{
4088	fail = true;
4089	break;
4090	}
4091	/ Check there's only a single stmt the op is used on. For the*
4092	not value-changing tail and the last stmt allow out-of-loop uses.
4093	??? We could relax this and handle arbitrary live stmts by
4094	forcing a scalar epilogue for example. /*
4095	imm_use_iterator imm_iter;
4096	use_operand_p use_p;
4097	gimple *op_use_stmt;
4098	unsigned cnt = `0`;
4099	bool cond_fn_p = op.code.is_internal_fn ()
4100	&& (conditional_internal_fn_code (internal_fn (op.code))
4101	!= ERROR_MARK);
4102
4103	FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])
4104	{
4105	/ In case of a COND_OP (mask, op1, op2, op1) reduction we might have*
4106	op1 twice (once as definition, once as else) in the same operation.
4107	Allow this. /*
4108	if (cond_fn_p)
4109	{
4110	gcall call = dyn_cast<gcall > (p: use_stmt);
4111	unsigned else_pos
4112	= internal_fn_else_index (internal_fn (op.code));
4113
4114	for (unsigned int j = `0`; j < gimple_call_num_args (gs: call); ++j)
4115	{
4116	if (j == else_pos)
4117	continue;
4118	if (gimple_call_arg (gs: call, index: j) == op.ops[opi])
4119	cnt++;
4120	}
4121	}
4122	else if (!is_gimple_debug (gs: op_use_stmt)
4123	&& (*code != ERROR_MARK
4124	\|\| flow_bb_inside_loop_p (loop,
4125	gimple_bb (g: op_use_stmt))))
4126	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4127	cnt++;
4128	}
4129
4130	if (cnt != `1`)
4131	{
4132	fail = true;
4133	break;
4134	}
4135	}
4136	return ! fail && ! neg && *code != ERROR_MARK;
4137	}
4138
4139	bool
4140	check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
4141	tree loop_arg, enum tree_code code)
4142	{
4143	auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
4144	code_helper code_;
4145	return (check_reduction_path (loc, loop, phi, loop_arg, code: &code_, path)
4146	&& code_ == code);
4147	}
4148
4149
4150
4151	/ Function vect_is_simple_reduction*
4152
4153	(1) Detect a cross-iteration def-use cycle that represents a simple
4154	reduction computation. We look for the following pattern:
4155
4156	loop_header:
4157	a1 = phi < a0, a2 >
4158	a3 = ...
4159	a2 = operation (a3, a1)
4160
4161	or
4162
4163	a3 = ...
4164	loop_header:
4165	a1 = phi < a0, a2 >
4166	a2 = operation (a3, a1)
4167
4168	such that:
4169	1. operation is commutative and associative and it is safe to
4170	change the order of the computation
4171	2. no uses for a2 in the loop (a2 is used out of the loop)
4172	3. no uses of a1 in the loop besides the reduction operation
4173	4. no uses of a1 outside the loop.
4174
4175	Conditions 1,4 are tested here.
4176	Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
4177
4178	(2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
4179	nested cycles.
4180
4181	(3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
4182	reductions:
4183
4184	a1 = phi < a0, a2 >
4185	inner loop (def of a3)
4186	a2 = phi < a3 >
4187
4188	(4) Detect condition expressions, ie:
4189	for (int i = 0; i < N; i++)
4190	if (a[i] < val)
4191	ret_val = a[i];
4192
4193	*/
4194
4195	static stmt_vec_info
4196	vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
4197	bool double_reduc, bool* reduc_chain_p, bool* slp)
4198	{
4199	gphi phi = as_a <gphi > (p: phi_info->stmt);
4200	gimple *phi_use_stmt = NULL;
4201	imm_use_iterator imm_iter;
4202	use_operand_p use_p;
4203
4204	double_reduc = false*;
4205	reduc_chain_p = false*;
4206	STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
4207
4208	tree phi_name = PHI_RESULT (phi);
4209	/ ??? If there are no uses of the PHI result the inner loop reduction*
4210	won't be detected as possibly double-reduction by vectorizable_reduction
4211	because that tries to walk the PHI arg from the preheader edge which
4212	can be constant. See PR60382. /*
4213	if (has_zero_uses (var: phi_name))
4214	return NULL;
4215	class loop *loop = (gimple_bb (g: phi))->loop_father;
4216	unsigned nphi_def_loop_uses = `0`;
4217	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
4218	{
4219	gimple *use_stmt = USE_STMT (use_p);
4220	if (is_gimple_debug (gs: use_stmt))
4221	continue;
4222
4223	if (!flow_bb_inside_loop_p (loop, gimple_bb (g: use_stmt)))
4224	{
4225	if (dump_enabled_p ())
4226	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4227	"intermediate value used outside loop.\n");
4228
4229	return NULL;
4230	}
4231
4232	/ In case of a COND_OP (mask, op1, op2, op1) reduction we might have*
4233	op1 twice (once as definition, once as else) in the same operation.
4234	Only count it as one. /*
4235	if (use_stmt != phi_use_stmt)
4236	{
4237	nphi_def_loop_uses++;
4238	phi_use_stmt = use_stmt;
4239	}
4240	}
4241
4242	tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
4243	if (TREE_CODE (latch_def) != SSA_NAME)
4244	{
4245	if (dump_enabled_p ())
4246	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4247	"reduction: not ssa_name: %T\n", latch_def);
4248	return NULL;
4249	}
4250
4251	stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
4252	if (!def_stmt_info
4253	\|\| !flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt_info->stmt)))
4254	return NULL;
4255
4256	bool nested_in_vect_loop
4257	= flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info), loop);
4258	unsigned nlatch_def_loop_uses = `0`;
4259	auto_vec<gphi *, `3`> lcphis;
4260	bool inner_loop_of_double_reduc = false;
4261	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)
4262	{
4263	gimple *use_stmt = USE_STMT (use_p);
4264	if (is_gimple_debug (gs: use_stmt))
4265	continue;
4266	if (flow_bb_inside_loop_p (loop, gimple_bb (g: use_stmt)))
4267	nlatch_def_loop_uses++;
4268	else
4269	{
4270	/ We can have more than one loop-closed PHI. /
4271	lcphis.safe_push (obj: as_a <gphi *> (p: use_stmt));
4272	if (nested_in_vect_loop
4273	&& (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
4274	== vect_double_reduction_def))
4275	inner_loop_of_double_reduc = true;
4276	}
4277	}
4278
4279	/ If we are vectorizing an inner reduction we are executing that*
4280	in the original order only in case we are not dealing with a
4281	double reduction. /*
4282	if (nested_in_vect_loop && !inner_loop_of_double_reduc)
4283	{
4284	if (dump_enabled_p ())
4285	report_vect_op (msg_type: MSG_NOTE, stmt: def_stmt_info->stmt,
4286	msg: "detected nested cycle: ");
4287	return def_stmt_info;
4288	}
4289
4290	/ When the inner loop of a double reduction ends up with more than*
4291	one loop-closed PHI we have failed to classify alternate such
4292	PHIs as double reduction, leading to wrong code. See PR103237. /*
4293	if (inner_loop_of_double_reduc && lcphis.length () != `1`)
4294	{
4295	if (dump_enabled_p ())
4296	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4297	"unhandle double reduction\n");
4298	return NULL;
4299	}
4300
4301	/ If this isn't a nested cycle or if the nested cycle reduction value*
4302	is used ouside of the inner loop we cannot handle uses of the reduction
4303	value. /*
4304	if (nlatch_def_loop_uses > `1` \|\| nphi_def_loop_uses > `1`)
4305	{
4306	if (dump_enabled_p ())
4307	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4308	"reduction used in loop.\n");
4309	return NULL;
4310	}
4311
4312	/ If DEF_STMT is a phi node itself, we expect it to have a single argument*
4313	defined in the inner loop. /*
4314	if (gphi def_stmt = dyn_cast <gphi > (p: def_stmt_info->stmt))
4315	{
4316	tree op1 = PHI_ARG_DEF (def_stmt, `0`);
4317	if (gimple_phi_num_args (gs: def_stmt) != `1`
4318	\|\| TREE_CODE (op1) != SSA_NAME)
4319	{
4320	if (dump_enabled_p ())
4321	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4322	"unsupported phi node definition.\n");
4323
4324	return NULL;
4325	}
4326
4327	/ Verify there is an inner cycle composed of the PHI phi_use_stmt*
4328	and the latch definition op1. /*
4329	gimple *def1 = SSA_NAME_DEF_STMT (op1);
4330	if (gimple_bb (g: def1)
4331	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt))
4332	&& loop->inner
4333	&& flow_bb_inside_loop_p (loop->inner, gimple_bb (g: def1))
4334	&& (is_gimple_assign (gs: def1) \|\| is_gimple_call (gs: def1))
4335	&& is_a <gphi *> (p: phi_use_stmt)
4336	&& flow_bb_inside_loop_p (loop->inner, gimple_bb (g: phi_use_stmt))
4337	&& (op1 == PHI_ARG_DEF_FROM_EDGE (phi_use_stmt,
4338	loop_latch_edge (loop->inner))))
4339	{
4340	if (dump_enabled_p ())
4341	report_vect_op (msg_type: MSG_NOTE, stmt: def_stmt,
4342	msg: "detected double reduction: ");
4343
4344	double_reduc = true*;
4345	return def_stmt_info;
4346	}
4347
4348	return NULL;
4349	}
4350
4351	/ Look for the expression computing latch_def from then loop PHI result. /
4352	auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
4353	code_helper code;
4354	if (check_reduction_path (loc: vect_location, loop, phi, loop_arg: latch_def, code: &code,
4355	path))
4356	{
4357	STMT_VINFO_REDUC_CODE (phi_info) = code;
4358	if (code == COND_EXPR && !nested_in_vect_loop)
4359	STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
4360
4361	/ Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP*
4362	reduction chain for which the additional restriction is that
4363	all operations in the chain are the same. /*
4364	auto_vec<stmt_vec_info, `8`> reduc_chain;
4365	unsigned i;
4366	bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
4367	for (i = path.length () - `1`; i >= `1`; --i)
4368	{
4369	gimple *stmt = USE_STMT (path[i].second);
4370	stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
4371	gimple_match_op op;
4372	if (!gimple_extract_op (stmt, &op))
4373	gcc_unreachable ();
4374	if (gassign assign = dyn_cast<gassign > (p: stmt))
4375	STMT_VINFO_REDUC_IDX (stmt_info)
4376	= path [i].second->use - gimple_assign_rhs1_ptr (gs: assign);
4377	else
4378	{
4379	gcall call = as_a<gcall > (p: stmt);
4380	STMT_VINFO_REDUC_IDX (stmt_info)
4381	= path [i].second->use - gimple_call_arg_ptr (gs: call, index: `0`);
4382	}
4383	bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code)
4384	&& (i == `1` \|\| i == path.length () - `1`));
4385	if ((op.code != code && !leading_conversion)
4386	/ We can only handle the final value in epilogue*
4387	generation for reduction chains. /*
4388	\|\| (i != `1` && !has_single_use (var: gimple_get_lhs (stmt))))
4389	is_slp_reduc = false;
4390	/ For reduction chains we support a trailing/leading*
4391	conversions. We do not store those in the actual chain. /*
4392	if (leading_conversion)
4393	continue;
4394	reduc_chain.safe_push (obj: stmt_info);
4395	}
4396	if (slp && is_slp_reduc && reduc_chain.length () > `1`)
4397	{
4398	for (unsigned i = `0`; i < reduc_chain.length () - `1`; ++i)
4399	{
4400	REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain [`0`];
4401	REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain [i+`1`];
4402	}
4403	REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain [`0`];
4404	REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
4405
4406	/ Save the chain for further analysis in SLP detection. /
4407	LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (obj: reduc_chain [`0`]);
4408	REDUC_GROUP_SIZE (reduc_chain[`0`]) = reduc_chain.length ();
4409
4410	reduc_chain_p = true*;
4411	if (dump_enabled_p ())
4412	dump_printf_loc (MSG_NOTE, vect_location,
4413	"reduction: detected reduction chain\n");
4414	}
4415	else if (dump_enabled_p ())
4416	dump_printf_loc (MSG_NOTE, vect_location,
4417	"reduction: detected reduction\n");
4418
4419	return def_stmt_info;
4420	}
4421
4422	if (dump_enabled_p ())
4423	dump_printf_loc (MSG_NOTE, vect_location,
4424	"reduction: unknown pattern\n");
4425
4426	return NULL;
4427	}
4428
4429	/ Estimate the number of peeled epilogue iterations for LOOP_VINFO.*
4430	PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations,
4431	or -1 if not known. /*
4432
4433	static int
4434	vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue)
4435	{
4436	int assumed_vf = vect_vf_for_cost (loop_vinfo);
4437	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) \|\| peel_iters_prologue == -`1`)
4438	{
4439	if (dump_enabled_p ())
4440	dump_printf_loc (MSG_NOTE, vect_location,
4441	"cost model: epilogue peel iters set to vf/2 "
4442	"because loop iterations are unknown .\n");
4443	return assumed_vf / `2`;
4444	}
4445	else
4446	{
4447	int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
4448	peel_iters_prologue = MIN (niters, peel_iters_prologue);
4449	int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
4450	/ If we need to peel for gaps, but no peeling is required, we have to*
4451	peel VF iterations. /*
4452	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !peel_iters_epilogue)
4453	peel_iters_epilogue = assumed_vf;
4454	return peel_iters_epilogue;
4455	}
4456	}
4457
4458	/ Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. /
4459	int
4460	vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
4461	int *peel_iters_epilogue,
4462	stmt_vector_for_cost *scalar_cost_vec,
4463	stmt_vector_for_cost *prologue_cost_vec,
4464	stmt_vector_for_cost *epilogue_cost_vec)
4465	{
4466	int retval = `0`;
4467
4468	*peel_iters_epilogue
4469	= vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue);
4470
4471	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
4472	{
4473	/ If peeled iterations are known but number of scalar loop*
4474	iterations are unknown, count a taken branch per peeled loop. /*
4475	if (peel_iters_prologue > `0`)
4476	retval = record_stmt_cost (prologue_cost_vec, `1`, cond_branch_taken,
4477	vect_prologue);
4478	if (*peel_iters_epilogue > `0`)
4479	retval += record_stmt_cost (epilogue_cost_vec, `1`, cond_branch_taken,
4480	vect_epilogue);
4481	}
4482
4483	stmt_info_for_cost *si;
4484	int j;
4485	if (peel_iters_prologue)
4486	FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
4487	retval += record_stmt_cost (body_cost_vec: prologue_cost_vec,
4488	count: si->count * peel_iters_prologue,
4489	kind: si->kind, stmt_info: si->stmt_info, misalign: si->misalign,
4490	where: vect_prologue);
4491	if (*peel_iters_epilogue)
4492	FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
4493	retval += record_stmt_cost (body_cost_vec: epilogue_cost_vec,
4494	count: si->count * *peel_iters_epilogue,
4495	kind: si->kind, stmt_info: si->stmt_info, misalign: si->misalign,
4496	where: vect_epilogue);
4497
4498	return retval;
4499	}
4500
4501	/ Function vect_estimate_min_profitable_iters*
4502
4503	Return the number of iterations required for the vector version of the
4504	loop to be profitable relative to the cost of the scalar version of the
4505	loop.
4506
4507	*RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
4508	of iterations for vectorization. -1 value means loop vectorization
4509	is not profitable. This returned value may be used for dynamic
4510	profitability check.
4511
4512	*RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
4513	for static check against estimated number of iterations. /*
4514
4515	static void
4516	vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
4517	int *ret_min_profitable_niters,
4518	int *ret_min_profitable_estimate,
4519	unsigned *suggested_unroll_factor)
4520	{
4521	int min_profitable_iters;
4522	int min_profitable_estimate;
4523	int peel_iters_prologue;
4524	int peel_iters_epilogue;
4525	unsigned vec_inside_cost = `0`;
4526	int vec_outside_cost = `0`;
4527	unsigned vec_prologue_cost = `0`;
4528	unsigned vec_epilogue_cost = `0`;
4529	int scalar_single_iter_cost = `0`;
4530	int scalar_outside_cost = `0`;
4531	int assumed_vf = vect_vf_for_cost (loop_vinfo);
4532	int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
4533	vector_costs *target_cost_data = loop_vinfo->vector_costs;
4534
4535	/ Cost model disabled. /
4536	if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
4537	{
4538	if (dump_enabled_p ())
4539	dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
4540	*ret_min_profitable_niters = `0`;
4541	*ret_min_profitable_estimate = `0`;
4542	return;
4543	}
4544
4545	/ Requires loop versioning tests to handle misalignment. /
4546	if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
4547	{
4548	/ FIXME: Make cost depend on complexity of individual check. /
4549	unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
4550	(void) add_stmt_cost (costs: target_cost_data, count: len, kind: scalar_stmt, where: vect_prologue);
4551	if (dump_enabled_p ())
4552	dump_printf (MSG_NOTE,
4553	"cost model: Adding cost of checks for loop "
4554	"versioning to treat misalignment.\n");
4555	}
4556
4557	/ Requires loop versioning with alias checks. /
4558	if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
4559	{
4560	/ FIXME: Make cost depend on complexity of individual check. /
4561	unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
4562	(void) add_stmt_cost (costs: target_cost_data, count: len, kind: scalar_stmt, where: vect_prologue);
4563	len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
4564	if (len)
4565	/ Count LEN - 1 ANDs and LEN comparisons. /
4566	(void) add_stmt_cost (costs: target_cost_data, count: len * `2` - `1`,
4567	kind: scalar_stmt, where: vect_prologue);
4568	len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
4569	if (len)
4570	{
4571	/ Count LEN - 1 ANDs and LEN comparisons. /
4572	unsigned int nstmts = len * `2` - `1`;
4573	/ +1 for each bias that needs adding. /
4574	for (unsigned int i = `0`; i < len; ++i)
4575	if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
4576	nstmts += `1`;
4577	(void) add_stmt_cost (costs: target_cost_data, count: nstmts,
4578	kind: scalar_stmt, where: vect_prologue);
4579	}
4580	if (dump_enabled_p ())
4581	dump_printf (MSG_NOTE,
4582	"cost model: Adding cost of checks for loop "
4583	"versioning aliasing.\n");
4584	}
4585
4586	/ Requires loop versioning with niter checks. /
4587	if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
4588	{
4589	/ FIXME: Make cost depend on complexity of individual check. /
4590	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: vector_stmt,
4591	NULL, NULL, NULL_TREE, misalign: `0`, where: vect_prologue);
4592	if (dump_enabled_p ())
4593	dump_printf (MSG_NOTE,
4594	"cost model: Adding cost of checks for loop "
4595	"versioning niters.\n");
4596	}
4597
4598	if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
4599	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: cond_branch_taken,
4600	where: vect_prologue);
4601
4602	/ Count statements in scalar loop. Using this as scalar cost for a single*
4603	iteration for now.
4604
4605	TODO: Add outer loop support.
4606
4607	TODO: Consider assigning different costs to different scalar
4608	statements. /*
4609
4610	scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
4611
4612	/ Add additional cost for the peeled instructions in prologue and epilogue*
4613	loop. (For fully-masked loops there will be no peeling.)
4614
4615	FORNOW: If we don't know the value of peel_iters for prologue or epilogue
4616	at compile-time - we assume it's vf/2 (the worst would be vf-1).
4617
4618	TODO: Build an expression that represents peel_iters for prologue and
4619	epilogue to be used in a run-time test. /*
4620
4621	bool prologue_need_br_taken_cost = false;
4622	bool prologue_need_br_not_taken_cost = false;
4623
4624	/ Calculate peel_iters_prologue. /
4625	if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
4626	peel_iters_prologue = `0`;
4627	else if (npeel < `0`)
4628	{
4629	peel_iters_prologue = assumed_vf / `2`;
4630	if (dump_enabled_p ())
4631	dump_printf (MSG_NOTE, "cost model: "
4632	"prologue peel iters set to vf/2.\n");
4633
4634	/ If peeled iterations are unknown, count a taken branch and a not taken*
4635	branch per peeled loop. Even if scalar loop iterations are known,
4636	vector iterations are not known since peeled prologue iterations are
4637	not known. Hence guards remain the same. /*
4638	prologue_need_br_taken_cost = true;
4639	prologue_need_br_not_taken_cost = true;
4640	}
4641	else
4642	{
4643	peel_iters_prologue = npeel;
4644	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && peel_iters_prologue > `0`)
4645	/ If peeled iterations are known but number of scalar loop*
4646	iterations are unknown, count a taken branch per peeled loop. /*
4647	prologue_need_br_taken_cost = true;
4648	}
4649
4650	bool epilogue_need_br_taken_cost = false;
4651	bool epilogue_need_br_not_taken_cost = false;
4652
4653	/ Calculate peel_iters_epilogue. /
4654	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
4655	/ We need to peel exactly one iteration for gaps. /
4656	peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? `1` : `0`;
4657	else if (npeel < `0`)
4658	{
4659	/ If peeling for alignment is unknown, loop bound of main loop*
4660	becomes unknown. /*
4661	peel_iters_epilogue = assumed_vf / `2`;
4662	if (dump_enabled_p ())
4663	dump_printf (MSG_NOTE, "cost model: "
4664	"epilogue peel iters set to vf/2 because "
4665	"peeling for alignment is unknown.\n");
4666
4667	/ See the same reason above in peel_iters_prologue calculation. /
4668	epilogue_need_br_taken_cost = true;
4669	epilogue_need_br_not_taken_cost = true;
4670	}
4671	else
4672	{
4673	peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue: npeel);
4674	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && peel_iters_epilogue > `0`)
4675	/ If peeled iterations are known but number of scalar loop*
4676	iterations are unknown, count a taken branch per peeled loop. /*
4677	epilogue_need_br_taken_cost = true;
4678	}
4679
4680	stmt_info_for_cost *si;
4681	int j;
4682	/ Add costs associated with peel_iters_prologue. /
4683	if (peel_iters_prologue)
4684	FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
4685	{
4686	(void) add_stmt_cost (costs: target_cost_data,
4687	count: si->count * peel_iters_prologue, kind: si->kind,
4688	stmt_info: si->stmt_info, node: si->node, vectype: si->vectype,
4689	misalign: si->misalign, where: vect_prologue);
4690	}
4691
4692	/ Add costs associated with peel_iters_epilogue. /
4693	if (peel_iters_epilogue)
4694	FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
4695	{
4696	(void) add_stmt_cost (costs: target_cost_data,
4697	count: si->count * peel_iters_epilogue, kind: si->kind,
4698	stmt_info: si->stmt_info, node: si->node, vectype: si->vectype,
4699	misalign: si->misalign, where: vect_epilogue);
4700	}
4701
4702	/ Add possible cond_branch_taken/cond_branch_not_taken cost. /
4703
4704	if (prologue_need_br_taken_cost)
4705	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: cond_branch_taken,
4706	where: vect_prologue);
4707
4708	if (prologue_need_br_not_taken_cost)
4709	(void) add_stmt_cost (costs: target_cost_data, count: `1`,
4710	kind: cond_branch_not_taken, where: vect_prologue);
4711
4712	if (epilogue_need_br_taken_cost)
4713	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: cond_branch_taken,
4714	where: vect_epilogue);
4715
4716	if (epilogue_need_br_not_taken_cost)
4717	(void) add_stmt_cost (costs: target_cost_data, count: `1`,
4718	kind: cond_branch_not_taken, where: vect_epilogue);
4719
4720	/ Take care of special costs for rgroup controls of partial vectors. /
4721	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
4722	&& (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
4723	== vect_partial_vectors_avx512))
4724	{
4725	/ Calculate how many masks we need to generate. /
4726	unsigned int num_masks = `0`;
4727	bool need_saturation = false;
4728	for (auto rgm : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
4729	if (rgm.type)
4730	{
4731	unsigned nvectors = rgm.factor;
4732	num_masks += nvectors;
4733	if (TYPE_PRECISION (TREE_TYPE (rgm.compare_type))
4734	< TYPE_PRECISION (LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)))
4735	need_saturation = true;
4736	}
4737
4738	/ ??? The target isn't able to identify the costs below as*
4739	producing masks so it cannot penaltize cases where we'd run
4740	out of mask registers for example. /*
4741
4742	/ ??? We are also failing to account for smaller vector masks*
4743	we generate by splitting larger masks in vect_get_loop_mask. /*
4744
4745	/ In the worst case, we need to generate each mask in the prologue*
4746	and in the loop body. We need one splat per group and one
4747	compare per mask.
4748
4749	Sometimes the prologue mask will fold to a constant,
4750	so the actual prologue cost might be smaller. However, it's
4751	simpler and safer to use the worst-case cost; if this ends up
4752	being the tie-breaker between vectorizing or not, then it's
4753	probably better not to vectorize. /*
4754	(void) add_stmt_cost (costs: target_cost_data,
4755	count: num_masks
4756	+ LOOP_VINFO_MASKS (loop_vinfo).rgc_vec.length (),
4757	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`,
4758	where: vect_prologue);
4759	(void) add_stmt_cost (costs: target_cost_data,
4760	count: num_masks
4761	+ LOOP_VINFO_MASKS (loop_vinfo).rgc_vec.length (),
4762	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`, where: vect_body);
4763
4764	/ When we need saturation we need it both in the prologue and*
4765	the epilogue. /*
4766	if (need_saturation)
4767	{
4768	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: scalar_stmt,
4769	NULL, NULL, NULL_TREE, misalign: `0`, where: vect_prologue);
4770	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: scalar_stmt,
4771	NULL, NULL, NULL_TREE, misalign: `0`, where: vect_body);
4772	}
4773	}
4774	else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
4775	&& (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
4776	== vect_partial_vectors_while_ult))
4777	{
4778	/ Calculate how many masks we need to generate. /
4779	unsigned int num_masks = `0`;
4780	rgroup_controls *rgm;
4781	unsigned int num_vectors_m1;
4782	FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec,
4783	num_vectors_m1, rgm)
4784	if (rgm->type)
4785	num_masks += num_vectors_m1 + `1`;
4786	gcc_assert (num_masks > `0`);
4787
4788	/ In the worst case, we need to generate each mask in the prologue*
4789	and in the loop body. One of the loop body mask instructions
4790	replaces the comparison in the scalar loop, and since we don't
4791	count the scalar comparison against the scalar body, we shouldn't
4792	count that vector instruction against the vector body either.
4793
4794	Sometimes we can use unpacks instead of generating prologue
4795	masks and sometimes the prologue mask will fold to a constant,
4796	so the actual prologue cost might be smaller. However, it's
4797	simpler and safer to use the worst-case cost; if this ends up
4798	being the tie-breaker between vectorizing or not, then it's
4799	probably better not to vectorize. /*
4800	(void) add_stmt_cost (costs: target_cost_data, count: num_masks,
4801	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`,
4802	where: vect_prologue);
4803	(void) add_stmt_cost (costs: target_cost_data, count: num_masks - `1`,
4804	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`,
4805	where: vect_body);
4806	}
4807	else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
4808	{
4809	/ Referring to the functions vect_set_loop_condition_partial_vectors*
4810	and vect_set_loop_controls_directly, we need to generate each
4811	length in the prologue and in the loop body if required. Although
4812	there are some possible optimizations, we consider the worst case
4813	here. /*
4814
4815	bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
4816	signed char partial_load_store_bias
4817	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
4818	bool need_iterate_p
4819	= (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
4820	&& !vect_known_niters_smaller_than_vf (loop_vinfo));
4821
4822	/ Calculate how many statements to be added. /
4823	unsigned int prologue_stmts = `0`;
4824	unsigned int body_stmts = `0`;
4825
4826	rgroup_controls *rgc;
4827	unsigned int num_vectors_m1;
4828	FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
4829	if (rgc->type)
4830	{
4831	/ May need one SHIFT for nitems_total computation. /
4832	unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
4833	if (nitems != `1` && !niters_known_p)
4834	prologue_stmts += `1`;
4835
4836	/ May need one MAX and one MINUS for wrap around. /
4837	if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc))
4838	prologue_stmts += `2`;
4839
4840	/ Need one MAX and one MINUS for each batch limit excepting for*
4841	the 1st one. /*
4842	prologue_stmts += num_vectors_m1 * `2`;
4843
4844	unsigned int num_vectors = num_vectors_m1 + `1`;
4845
4846	/ Need to set up lengths in prologue, only one MIN required*
4847	for each since start index is zero. /*
4848	prologue_stmts += num_vectors;
4849
4850	/ If we have a non-zero partial load bias, we need one PLUS*
4851	to adjust the load length. /*
4852	if (partial_load_store_bias != `0`)
4853	body_stmts += `1`;
4854
4855	/ Each may need two MINs and one MINUS to update lengths in body*
4856	for next iteration. /*
4857	if (need_iterate_p)
4858	body_stmts += `3` * num_vectors;
4859	}
4860
4861	(void) add_stmt_cost (costs: target_cost_data, count: prologue_stmts,
4862	kind: scalar_stmt, where: vect_prologue);
4863	(void) add_stmt_cost (costs: target_cost_data, count: body_stmts,
4864	kind: scalar_stmt, where: vect_body);
4865	}
4866
4867	/ FORNOW: The scalar outside cost is incremented in one of the*
4868	following ways:
4869
4870	1. The vectorizer checks for alignment and aliasing and generates
4871	a condition that allows dynamic vectorization. A cost model
4872	check is ANDED with the versioning condition. Hence scalar code
4873	path now has the added cost of the versioning check.
4874
4875	if (cost > th & versioning_check)
4876	jmp to vector code
4877
4878	Hence run-time scalar is incremented by not-taken branch cost.
4879
4880	2. The vectorizer then checks if a prologue is required. If the
4881	cost model check was not done before during versioning, it has to
4882	be done before the prologue check.
4883
4884	if (cost <= th)
4885	prologue = scalar_iters
4886	if (prologue == 0)
4887	jmp to vector code
4888	else
4889	execute prologue
4890	if (prologue == num_iters)
4891	go to exit
4892
4893	Hence the run-time scalar cost is incremented by a taken branch,
4894	plus a not-taken branch, plus a taken branch cost.
4895
4896	3. The vectorizer then checks if an epilogue is required. If the
4897	cost model check was not done before during prologue check, it
4898	has to be done with the epilogue check.
4899
4900	if (prologue == 0)
4901	jmp to vector code
4902	else
4903	execute prologue
4904	if (prologue == num_iters)
4905	go to exit
4906	vector code:
4907	if ((cost <= th) \| (scalar_iters-prologue-epilogue == 0))
4908	jmp to epilogue
4909
4910	Hence the run-time scalar cost should be incremented by 2 taken
4911	branches.
4912
4913	TODO: The back end may reorder the BBS's differently and reverse
4914	conditions/branch directions. Change the estimates below to
4915	something more reasonable. /*
4916
4917	/ If the number of iterations is known and we do not do versioning, we can*
4918	decide whether to vectorize at compile time. Hence the scalar version
4919	do not carry cost model guard costs. /*
4920	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4921	\|\| LOOP_REQUIRES_VERSIONING (loop_vinfo))
4922	{
4923	/ Cost model check occurs at versioning. /
4924	if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
4925	scalar_outside_cost += vect_get_stmt_cost (type_of_cost: cond_branch_not_taken);
4926	else
4927	{
4928	/ Cost model check occurs at prologue generation. /
4929	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < `0`)
4930	scalar_outside_cost += `2` * vect_get_stmt_cost (type_of_cost: cond_branch_taken)
4931	+ vect_get_stmt_cost (type_of_cost: cond_branch_not_taken);
4932	/ Cost model check occurs at epilogue generation. /
4933	else
4934	scalar_outside_cost += `2` * vect_get_stmt_cost (type_of_cost: cond_branch_taken);
4935	}
4936	}
4937
4938	/ Complete the target-specific cost calculations. /
4939	finish_cost (costs: loop_vinfo->vector_costs, scalar_costs: loop_vinfo->scalar_costs,
4940	prologue_cost: &vec_prologue_cost, body_cost: &vec_inside_cost, epilogue_cost: &vec_epilogue_cost,
4941	suggested_unroll_factor);
4942
4943	if (suggested_unroll_factor && *suggested_unroll_factor > `1`
4944	&& LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) != MAX_VECTORIZATION_FACTOR
4945	&& !known_le (LOOP_VINFO_VECT_FACTOR (loop_vinfo) *
4946	*suggested_unroll_factor,
4947	LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)))
4948	{
4949	if (dump_enabled_p ())
4950	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4951	"can't unroll as unrolled vectorization factor larger"
4952	" than maximum vectorization factor: "
4953	HOST_WIDE_INT_PRINT_UNSIGNED "\n",
4954	LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo));
4955	*suggested_unroll_factor = `1`;
4956	}
4957
4958	vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
4959
4960	if (dump_enabled_p ())
4961	{
4962	dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
4963	dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
4964	vec_inside_cost);
4965	dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
4966	vec_prologue_cost);
4967	dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
4968	vec_epilogue_cost);
4969	dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
4970	scalar_single_iter_cost);
4971	dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
4972	scalar_outside_cost);
4973	dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
4974	vec_outside_cost);
4975	dump_printf (MSG_NOTE, " prologue iterations: %d\n",
4976	peel_iters_prologue);
4977	dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
4978	peel_iters_epilogue);
4979	}
4980
4981	/ Calculate number of iterations required to make the vector version*
4982	profitable, relative to the loop bodies only. The following condition
4983	must hold true:
4984	SIC niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC*
4985	where
4986	SIC = scalar iteration cost, VIC = vector iteration cost,
4987	VOC = vector outside cost, VF = vectorization factor,
4988	NPEEL = prologue iterations + epilogue iterations,
4989	SOC = scalar outside cost for run time cost model check. /*
4990
4991	int saving_per_viter = (scalar_single_iter_cost * assumed_vf
4992	- vec_inside_cost);
4993	if (saving_per_viter <= `0`)
4994	{
4995	if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
4996	warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
4997	"vectorization did not happen for a simd loop");
4998
4999	if (dump_enabled_p ())
5000	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5001	"cost model: the vector iteration cost = %d "
5002	"divided by the scalar iteration cost = %d "
5003	"is greater or equal to the vectorization factor = %d"
5004	".\n",
5005	vec_inside_cost, scalar_single_iter_cost, assumed_vf);
5006	*ret_min_profitable_niters = -`1`;
5007	*ret_min_profitable_estimate = -`1`;
5008	return;
5009	}
5010
5011	/ ??? The "if" arm is written to handle all cases; see below for what*
5012	we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. /*
5013	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5014	{
5015	/ Rewriting the condition above in terms of the number of*
5016	vector iterations (vniters) rather than the number of
5017	scalar iterations (niters) gives:
5018
5019	SIC (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC*
5020
5021	<==> vniters (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC*
5022
5023	For integer N, X and Y when X > 0:
5024
5025	N X > Y <==> N >= (Y /[floor] X) + 1. /
5026	int outside_overhead = (vec_outside_cost
5027	- scalar_single_iter_cost * peel_iters_prologue
5028	- scalar_single_iter_cost * peel_iters_epilogue
5029	- scalar_outside_cost);
5030	/ We're only interested in cases that require at least one*
5031	vector iteration. /*
5032	int min_vec_niters = `1`;
5033	if (outside_overhead > `0`)
5034	min_vec_niters = outside_overhead / saving_per_viter + `1`;
5035
5036	if (dump_enabled_p ())
5037	dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n",
5038	min_vec_niters);
5039
5040	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5041	{
5042	/ Now that we know the minimum number of vector iterations,*
5043	find the minimum niters for which the scalar cost is larger:
5044
5045	SIC niters > VIC * vniters + VOC - SOC*
5046
5047	We know that the minimum niters is no more than
5048	vniters VF + NPEEL, but it might be (and often is) less*
5049	than that if a partial vector iteration is cheaper than the
5050	equivalent scalar code. /*
5051	int threshold = (vec_inside_cost * min_vec_niters
5052	+ vec_outside_cost
5053	- scalar_outside_cost);
5054	if (threshold <= `0`)
5055	min_profitable_iters = `1`;
5056	else
5057	min_profitable_iters = threshold / scalar_single_iter_cost + `1`;
5058	}
5059	else
5060	/ Convert the number of vector iterations into a number of*
5061	scalar iterations. /*
5062	min_profitable_iters = (min_vec_niters * assumed_vf
5063	+ peel_iters_prologue
5064	+ peel_iters_epilogue);
5065	}
5066	else
5067	{
5068	min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
5069	* assumed_vf
5070	- vec_inside_cost * peel_iters_prologue
5071	- vec_inside_cost * peel_iters_epilogue);
5072	if (min_profitable_iters <= `0`)
5073	min_profitable_iters = `0`;
5074	else
5075	{
5076	min_profitable_iters /= saving_per_viter;
5077
5078	if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
5079	<= (((int) vec_inside_cost * min_profitable_iters)
5080	+ (((int) vec_outside_cost - scalar_outside_cost)
5081	* assumed_vf)))
5082	min_profitable_iters++;
5083	}
5084	}
5085
5086	if (dump_enabled_p ())
5087	dump_printf (MSG_NOTE,
5088	" Calculated minimum iters for profitability: %d\n",
5089	min_profitable_iters);
5090
5091	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
5092	&& min_profitable_iters < (assumed_vf + peel_iters_prologue))
5093	/ We want the vectorized loop to execute at least once. /
5094	min_profitable_iters = assumed_vf + peel_iters_prologue;
5095	else if (min_profitable_iters < peel_iters_prologue)
5096	/ For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the*
5097	vectorized loop executes at least once. /*
5098	min_profitable_iters = peel_iters_prologue;
5099
5100	if (dump_enabled_p ())
5101	dump_printf_loc (MSG_NOTE, vect_location,
5102	" Runtime profitability threshold = %d\n",
5103	min_profitable_iters);
5104
5105	*ret_min_profitable_niters = min_profitable_iters;
5106
5107	/ Calculate number of iterations required to make the vector version*
5108	profitable, relative to the loop bodies only.
5109
5110	Non-vectorized variant is SIC niters and it must win over vector*
5111	variant on the expected loop trip count. The following condition must hold true:
5112	SIC niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC /
5113
5114	if (vec_outside_cost <= `0`)
5115	min_profitable_estimate = `0`;
5116	/ ??? This "else if" arm is written to handle all cases; see below for*
5117	what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. /*
5118	else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5119	{
5120	/ This is a repeat of the code above, but with + SOC rather*
5121	than - SOC. /*
5122	int outside_overhead = (vec_outside_cost
5123	- scalar_single_iter_cost * peel_iters_prologue
5124	- scalar_single_iter_cost * peel_iters_epilogue
5125	+ scalar_outside_cost);
5126	int min_vec_niters = `1`;
5127	if (outside_overhead > `0`)
5128	min_vec_niters = outside_overhead / saving_per_viter + `1`;
5129
5130	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5131	{
5132	int threshold = (vec_inside_cost * min_vec_niters
5133	+ vec_outside_cost
5134	+ scalar_outside_cost);
5135	min_profitable_estimate = threshold / scalar_single_iter_cost + `1`;
5136	}
5137	else
5138	min_profitable_estimate = (min_vec_niters * assumed_vf
5139	+ peel_iters_prologue
5140	+ peel_iters_epilogue);
5141	}
5142	else
5143	{
5144	min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
5145	* assumed_vf
5146	- vec_inside_cost * peel_iters_prologue
5147	- vec_inside_cost * peel_iters_epilogue)
5148	/ ((scalar_single_iter_cost * assumed_vf)
5149	- vec_inside_cost);
5150	}
5151	min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
5152	if (dump_enabled_p ())
5153	dump_printf_loc (MSG_NOTE, vect_location,
5154	" Static estimate profitability threshold = %d\n",
5155	min_profitable_estimate);
5156
5157	*ret_min_profitable_estimate = min_profitable_estimate;
5158	}
5159
5160	/ Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET*
5161	vector elements (not bits) for a vector with NELT elements. /*
5162	static void
5163	calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
5164	vec_perm_builder *sel)
5165	{
5166	/ The encoding is a single stepped pattern. Any wrap-around is handled*
5167	by vec_perm_indices. /*
5168	sel->new_vector (full_nelts: nelt, npatterns: `1`, nelts_per_pattern: `3`);
5169	for (unsigned int i = `0`; i < `3`; i++)
5170	sel->quick_push (obj: i + offset);
5171	}
5172
5173	/ Checks whether the target supports whole-vector shifts for vectors of mode*
5174	MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
5175	it supports vec_perm_const with masks for all necessary shift amounts. /*
5176	static bool
5177	have_whole_vector_shift (machine_mode mode)
5178	{
5179	if (optab_handler (op: vec_shr_optab, mode) != CODE_FOR_nothing)
5180	return true;
5181
5182	/ Variable-length vectors should be handled via the optab. /
5183	unsigned int nelt;
5184	if (!GET_MODE_NUNITS (mode).is_constant (const_value: &nelt))
5185	return false;
5186
5187	vec_perm_builder sel;
5188	vec_perm_indices indices;
5189	for (unsigned int i = nelt / `2`; i >= `1`; i /= `2`)
5190	{
5191	calc_vec_perm_mask_for_shift (offset: i, nelt, sel: &sel);
5192	indices.new_vector (sel, `2`, nelt);
5193	if (!can_vec_perm_const_p (mode, mode, indices, false))
5194	return false;
5195	}
5196	return true;
5197	}
5198
5199	/ Return true if (a) STMT_INFO is a DOT_PROD_EXPR reduction whose*
5200	multiplication operands have differing signs and (b) we intend
5201	to emulate the operation using a series of signed DOT_PROD_EXPRs.
5202	See vect_emulate_mixed_dot_prod for the actual sequence used. /*
5203
5204	static bool
5205	vect_is_emulated_mixed_dot_prod (loop_vec_info loop_vinfo,
5206	stmt_vec_info stmt_info)
5207	{
5208	gassign assign = dyn_cast<gassign > (p: stmt_info->stmt);
5209	if (!assign \|\| gimple_assign_rhs_code (gs: assign) != DOT_PROD_EXPR)
5210	return false;
5211
5212	tree rhs1 = gimple_assign_rhs1 (gs: assign);
5213	tree rhs2 = gimple_assign_rhs2 (gs: assign);
5214	if (TYPE_SIGN (TREE_TYPE (rhs1)) == TYPE_SIGN (TREE_TYPE (rhs2)))
5215	return false;
5216
5217	stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
5218	gcc_assert (reduc_info->is_reduc_info);
5219	return !directly_supported_p (DOT_PROD_EXPR,
5220	STMT_VINFO_REDUC_VECTYPE_IN (reduc_info),
5221	optab_vector_mixed_sign);
5222	}
5223
5224	/* TODO: Close dependency between vect_model__cost and vectorizable_
5225	functions. Design better to avoid maintenance issues. /*
5226
5227	/ Function vect_model_reduction_cost.*
5228
5229	Models cost for a reduction operation, including the vector ops
5230	generated within the strip-mine loop in some cases, the initial
5231	definition before the loop, and the epilogue code that must be generated. /*
5232
5233	static void
5234	vect_model_reduction_cost (loop_vec_info loop_vinfo,
5235	stmt_vec_info stmt_info, internal_fn reduc_fn,
5236	vect_reduction_type reduction_type,
5237	int ncopies, stmt_vector_for_cost *cost_vec)
5238	{
5239	int prologue_cost = `0`, epilogue_cost = `0`, inside_cost = `0`;
5240	tree vectype;
5241	machine_mode mode;
5242	class loop *loop = NULL;
5243
5244	if (loop_vinfo)
5245	loop = LOOP_VINFO_LOOP (loop_vinfo);
5246
5247	/ Condition reductions generate two reductions in the loop. /
5248	if (reduction_type == COND_REDUCTION)
5249	ncopies *= `2`;
5250
5251	vectype = STMT_VINFO_VECTYPE (stmt_info);
5252	mode = TYPE_MODE (vectype);
5253	stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
5254
5255	gimple_match_op op;
5256	if (!gimple_extract_op (orig_stmt_info->stmt, &op))
5257	gcc_unreachable ();
5258
5259	bool emulated_mixed_dot_prod
5260	= vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info);
5261	if (reduction_type == EXTRACT_LAST_REDUCTION)
5262	/ No extra instructions are needed in the prologue. The loop body*
5263	operations are costed in vectorizable_condition. /*
5264	inside_cost = `0`;
5265	else if (reduction_type == FOLD_LEFT_REDUCTION)
5266	{
5267	/ No extra instructions needed in the prologue. /
5268	prologue_cost = `0`;
5269
5270	if (reduc_fn != IFN_LAST)
5271	/ Count one reduction-like operation per vector. /
5272	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vec_to_scalar,
5273	stmt_info, misalign: `0`, where: vect_body);
5274	else
5275	{
5276	/ Use NELEMENTS extracts and NELEMENTS scalar ops. /
5277	unsigned int nelements = ncopies * vect_nunits_for_cost (vec_type: vectype);
5278	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: nelements,
5279	kind: vec_to_scalar, stmt_info, misalign: `0`,
5280	where: vect_body);
5281	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nelements,
5282	kind: scalar_stmt, stmt_info, misalign: `0`,
5283	where: vect_body);
5284	}
5285	}
5286	else
5287	{
5288	/ Add in the cost of the initial definitions. /
5289	int prologue_stmts;
5290	if (reduction_type == COND_REDUCTION)
5291	/ For cond reductions we have four vectors: initial index, step,*
5292	initial result of the data reduction, initial value of the index
5293	reduction. /*
5294	prologue_stmts = `4`;
5295	else if (emulated_mixed_dot_prod)
5296	/ We need the initial reduction value and two invariants:*
5297	one that contains the minimum signed value and one that
5298	contains half of its negative. /*
5299	prologue_stmts = `3`;
5300	else
5301	prologue_stmts = `1`;
5302	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: prologue_stmts,
5303	kind: scalar_to_vec, stmt_info, misalign: `0`,
5304	where: vect_prologue);
5305	}
5306
5307	/ Determine cost of epilogue code.*
5308
5309	We have a reduction operator that will reduce the vector in one statement.
5310	Also requires scalar extract. /*
5311
5312	if (!loop \|\| !nested_in_vect_loop_p (loop, stmt_info: orig_stmt_info))
5313	{
5314	if (reduc_fn != IFN_LAST)
5315	{
5316	if (reduction_type == COND_REDUCTION)
5317	{
5318	/ An EQ stmt and an COND_EXPR stmt. /
5319	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `2`,
5320	kind: vector_stmt, stmt_info, misalign: `0`,
5321	where: vect_epilogue);
5322	/ Reduction of the max index and a reduction of the found*
5323	values. /*
5324	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `2`,
5325	kind: vec_to_scalar, stmt_info, misalign: `0`,
5326	where: vect_epilogue);
5327	/ A broadcast of the max value. /
5328	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`,
5329	kind: scalar_to_vec, stmt_info, misalign: `0`,
5330	where: vect_epilogue);
5331	}
5332	else
5333	{
5334	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vector_stmt,
5335	stmt_info, misalign: `0`, where: vect_epilogue);
5336	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`,
5337	kind: vec_to_scalar, stmt_info, misalign: `0`,
5338	where: vect_epilogue);
5339	}
5340	}
5341	else if (reduction_type == COND_REDUCTION)
5342	{
5343	unsigned estimated_nunits = vect_nunits_for_cost (vec_type: vectype);
5344	/ Extraction of scalar elements. /
5345	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5346	count: `2` * estimated_nunits,
5347	kind: vec_to_scalar, stmt_info, misalign: `0`,
5348	where: vect_epilogue);
5349	/ Scalar max reductions via COND_EXPR / MAX_EXPR. /
5350	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5351	count: `2` * estimated_nunits - `3`,
5352	kind: scalar_stmt, stmt_info, misalign: `0`,
5353	where: vect_epilogue);
5354	}
5355	else if (reduction_type == EXTRACT_LAST_REDUCTION
5356	\|\| reduction_type == FOLD_LEFT_REDUCTION)
5357	/ No extra instructions need in the epilogue. /
5358	;
5359	else
5360	{
5361	int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
5362	tree bitsize = TYPE_SIZE (op.type);
5363	int element_bitsize = tree_to_uhwi (bitsize);
5364	int nelements = vec_size_in_bits / element_bitsize;
5365
5366	if (op.code == COND_EXPR)
5367	op.code = MAX_EXPR;
5368
5369	/ We have a whole vector shift available. /
5370	if (VECTOR_MODE_P (mode)
5371	&& directly_supported_p (op.code, vectype)
5372	&& have_whole_vector_shift (mode))
5373	{
5374	/ Final reduction via vector shifts and the reduction operator.*
5375	Also requires scalar extract. /*
5376	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5377	count: exact_log2 (x: nelements) * `2`,
5378	kind: vector_stmt, stmt_info, misalign: `0`,
5379	where: vect_epilogue);
5380	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`,
5381	kind: vec_to_scalar, stmt_info, misalign: `0`,
5382	where: vect_epilogue);
5383	}
5384	else
5385	/ Use extracts and reduction op for final reduction. For N*
5386	elements, we have N extracts and N-1 reduction ops. /*
5387	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5388	count: nelements + nelements - `1`,
5389	kind: vector_stmt, stmt_info, misalign: `0`,
5390	where: vect_epilogue);
5391	}
5392	}
5393
5394	if (dump_enabled_p ())
5395	dump_printf (MSG_NOTE,
5396	"vect_model_reduction_cost: inside_cost = %d, "
5397	"prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
5398	prologue_cost, epilogue_cost);
5399	}
5400
5401	/ SEQ is a sequence of instructions that initialize the reduction*
5402	described by REDUC_INFO. Emit them in the appropriate place. /*
5403
5404	static void
5405	vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
5406	stmt_vec_info reduc_info, gimple *seq)
5407	{
5408	if (reduc_info->reused_accumulator)
5409	{
5410	/ When reusing an accumulator from the main loop, we only need*
5411	initialization instructions if the main loop can be skipped.
5412	In that case, emit the initialization instructions at the end
5413	of the guard block that does the skip. /*
5414	edge skip_edge = loop_vinfo->skip_main_loop_edge;
5415	gcc_assert (skip_edge);
5416	gimple_stmt_iterator gsi = gsi_last_bb (bb: skip_edge->src);
5417	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
5418	}
5419	else
5420	{
5421	/ The normal case: emit the initialization instructions on the*
5422	preheader edge. /*
5423	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5424	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq);
5425	}
5426	}
5427
5428	/ Function get_initial_def_for_reduction*
5429
5430	Input:
5431	REDUC_INFO - the info_for_reduction
5432	INIT_VAL - the initial value of the reduction variable
5433	NEUTRAL_OP - a value that has no effect on the reduction, as per
5434	neutral_op_for_reduction
5435
5436	Output:
5437	Return a vector variable, initialized according to the operation that
5438	STMT_VINFO performs. This vector will be used as the initial value
5439	of the vector of partial results.
5440
5441	The value we need is a vector in which element 0 has value INIT_VAL
5442	and every other element has value NEUTRAL_OP. /*
5443
5444	static tree
5445	get_initial_def_for_reduction (loop_vec_info loop_vinfo,
5446	stmt_vec_info reduc_info,
5447	tree init_val, tree neutral_op)
5448	{
5449	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5450	tree scalar_type = TREE_TYPE (init_val);
5451	tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
5452	tree init_def;
5453	gimple_seq stmts = NULL;
5454
5455	gcc_assert (vectype);
5456
5457	gcc_assert (POINTER_TYPE_P (scalar_type) \|\| INTEGRAL_TYPE_P (scalar_type)
5458	\|\| SCALAR_FLOAT_TYPE_P (scalar_type));
5459
5460	gcc_assert (nested_in_vect_loop_p (loop, reduc_info)
5461	\|\| loop == (gimple_bb (reduc_info->stmt))->loop_father);
5462
5463	if (operand_equal_p (init_val, neutral_op))
5464	{
5465	/ If both elements are equal then the vector described above is*
5466	just a splat. /*
5467	neutral_op = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: neutral_op);
5468	init_def = gimple_build_vector_from_val (seq: &stmts, type: vectype, op: neutral_op);
5469	}
5470	else
5471	{
5472	neutral_op = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: neutral_op);
5473	init_val = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: init_val);
5474	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ())
5475	{
5476	/ Construct a splat of NEUTRAL_OP and insert INIT_VAL into*
5477	element 0. /*
5478	init_def = gimple_build_vector_from_val (seq: &stmts, type: vectype,
5479	op: neutral_op);
5480	init_def = gimple_build (seq: &stmts, fn: CFN_VEC_SHL_INSERT,
5481	type: vectype, args: init_def, args: init_val);
5482	}
5483	else
5484	{
5485	/ Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. /
5486	tree_vector_builder elts (vectype, `1`, `2`);
5487	elts.quick_push (obj: init_val);
5488	elts.quick_push (obj: neutral_op);
5489	init_def = gimple_build_vector (seq: &stmts, builder: &elts);
5490	}
5491	}
5492
5493	if (stmts)
5494	vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, seq: stmts);
5495	return init_def;
5496	}
5497
5498	/ Get at the initial defs for the reduction PHIs for REDUC_INFO,*
5499	which performs a reduction involving GROUP_SIZE scalar statements.
5500	NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP
5501	is nonnull, introducing extra elements of that value will not change the
5502	result. /*
5503
5504	static void
5505	get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
5506	stmt_vec_info reduc_info,
5507	vec<tree> *vec_oprnds,
5508	unsigned int number_of_vectors,
5509	unsigned int group_size, tree neutral_op)
5510	{
5511	vec<tree> &initial_values = reduc_info->reduc_initial_values;
5512	unsigned HOST_WIDE_INT nunits;
5513	unsigned j, number_of_places_left_in_vector;
5514	tree vector_type = STMT_VINFO_VECTYPE (reduc_info);
5515	unsigned int i;
5516
5517	gcc_assert (group_size == initial_values.length () \|\| neutral_op);
5518
5519	/ NUMBER_OF_COPIES is the number of times we need to use the same values in*
5520	created vectors. It is greater than 1 if unrolling is performed.
5521
5522	For example, we have two scalar operands, s1 and s2 (e.g., group of
5523	strided accesses of size two), while NUNITS is four (i.e., four scalars
5524	of this type can be packed in a vector). The output vector will contain
5525	two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
5526	will be 2).
5527
5528	If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several
5529	vectors containing the operands.
5530
5531	For example, NUNITS is four as before, and the group size is 8
5532	(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
5533	{s5, s6, s7, s8}. /*
5534
5535	if (!TYPE_VECTOR_SUBPARTS (node: vector_type).is_constant (const_value: &nunits))
5536	nunits = group_size;
5537
5538	number_of_places_left_in_vector = nunits;
5539	bool constant_p = true;
5540	tree_vector_builder elts (vector_type, nunits, `1`);
5541	elts.quick_grow (len: nunits);
5542	gimple_seq ctor_seq = NULL;
5543	for (j = `0`; j < nunits * number_of_vectors; ++j)
5544	{
5545	tree op;
5546	i = j % group_size;
5547
5548	/ Get the def before the loop. In reduction chain we have only*
5549	one initial value. Else we have as many as PHIs in the group. /*
5550	if (i >= initial_values.length () \|\| (j > i && neutral_op))
5551	op = neutral_op;
5552	else
5553	op = initial_values [i];
5554
5555	/ Create 'vect_ = {op0,op1,...,opn}'. /
5556	number_of_places_left_in_vector--;
5557	elts [nunits - number_of_places_left_in_vector - `1`] = op;
5558	if (!CONSTANT_CLASS_P (op))
5559	constant_p = false;
5560
5561	if (number_of_places_left_in_vector == `0`)
5562	{
5563	tree init;
5564	if (constant_p && !neutral_op
5565	? multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vector_type), b: nunits)
5566	: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
5567	/ Build the vector directly from ELTS. /
5568	init = gimple_build_vector (seq: &ctor_seq, builder: &elts);
5569	else if (neutral_op)
5570	{
5571	/ Build a vector of the neutral value and shift the*
5572	other elements into place. /*
5573	init = gimple_build_vector_from_val (seq: &ctor_seq, type: vector_type,
5574	op: neutral_op);
5575	int k = nunits;
5576	while (k > `0` && elts [k - `1`] == neutral_op)
5577	k -= `1`;
5578	while (k > `0`)
5579	{
5580	k -= `1`;
5581	init = gimple_build (seq: &ctor_seq, fn: CFN_VEC_SHL_INSERT,
5582	type: vector_type, args: init, args: elts [k]);
5583	}
5584	}
5585	else
5586	{
5587	/ First time round, duplicate ELTS to fill the*
5588	required number of vectors. /*
5589	duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type,
5590	elts, number_of_vectors, *vec_oprnds);
5591	break;
5592	}
5593	vec_oprnds->quick_push (obj: init);
5594
5595	number_of_places_left_in_vector = nunits;
5596	elts.new_vector (type: vector_type, npatterns: nunits, nelts_per_pattern: `1`);
5597	elts.quick_grow (len: nunits);
5598	constant_p = true;
5599	}
5600	}
5601	if (ctor_seq != NULL)
5602	vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, seq: ctor_seq);
5603	}
5604
5605	/ For a statement STMT_INFO taking part in a reduction operation return*
5606	the stmt_vec_info the meta information is stored on. /*
5607
5608	stmt_vec_info
5609	info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
5610	{
5611	stmt_info = vect_orig_stmt (stmt_info);
5612	gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
5613	if (!is_a <gphi *> (p: stmt_info->stmt)
5614	\|\| !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
5615	stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
5616	gphi phi = as_a <gphi > (p: stmt_info->stmt);
5617	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
5618	{
5619	if (gimple_phi_num_args (gs: phi) == `1`)
5620	stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
5621	}
5622	else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
5623	{
5624	stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi));
5625	if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def)
5626	stmt_info = info;
5627	}
5628	return stmt_info;
5629	}
5630
5631	/ See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that*
5632	REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise
5633	return false. /*
5634
5635	static bool
5636	vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
5637	stmt_vec_info reduc_info)
5638	{
5639	loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
5640	if (!main_loop_vinfo)
5641	return false;
5642
5643	if (STMT_VINFO_REDUC_TYPE (reduc_info) != TREE_CODE_REDUCTION)
5644	return false;
5645
5646	unsigned int num_phis = reduc_info->reduc_initial_values.length ();
5647	auto_vec<tree, `16`> main_loop_results (num_phis);
5648	auto_vec<tree, `16`> initial_values (num_phis);
5649	if (edge main_loop_edge = loop_vinfo->main_loop_edge)
5650	{
5651	/ The epilogue loop can be entered either from the main loop or*
5652	from an earlier guard block. /*
5653	edge skip_edge = loop_vinfo->skip_main_loop_edge;
5654	for (tree incoming_value : reduc_info->reduc_initial_values)
5655	{
5656	/ Look for:*
5657
5658	INCOMING_VALUE = phi<MAIN_LOOP_RESULT(main loop),
5659	INITIAL_VALUE(guard block)>. /*
5660	gcc_assert (TREE_CODE (incoming_value) == SSA_NAME);
5661
5662	gphi phi = as_a <gphi > (SSA_NAME_DEF_STMT (incoming_value));
5663	gcc_assert (gimple_bb (phi) == main_loop_edge->dest);
5664
5665	tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge);
5666	tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge);
5667
5668	main_loop_results.quick_push (obj: from_main_loop);
5669	initial_values.quick_push (obj: from_skip);
5670	}
5671	}
5672	else
5673	/ The main loop dominates the epilogue loop. /
5674	main_loop_results.splice (src: reduc_info->reduc_initial_values);
5675
5676	/ See if the main loop has the kind of accumulator we need. /
5677	vect_reusable_accumulator *accumulator
5678	= main_loop_vinfo->reusable_accumulators.get (k: main_loop_results [`0`]);
5679	if (!accumulator
5680	\|\| num_phis != accumulator->reduc_info->reduc_scalar_results.length ()
5681	\|\| !std::equal (first1: main_loop_results.begin (), last1: main_loop_results.end (),
5682	first2: accumulator->reduc_info->reduc_scalar_results.begin ()))
5683	return false;
5684
5685	/ Handle the case where we can reduce wider vectors to narrower ones. /
5686	tree vectype = STMT_VINFO_VECTYPE (reduc_info);
5687	tree old_vectype = TREE_TYPE (accumulator->reduc_input);
5688	unsigned HOST_WIDE_INT m;
5689	if (!constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: old_vectype),
5690	b: TYPE_VECTOR_SUBPARTS (node: vectype), multiple: &m))
5691	return false;
5692	/ Check the intermediate vector types and operations are available. /
5693	tree prev_vectype = old_vectype;
5694	poly_uint64 intermediate_nunits = TYPE_VECTOR_SUBPARTS (node: old_vectype);
5695	while (known_gt (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype)))
5696	{
5697	intermediate_nunits = exact_div (a: intermediate_nunits, b: `2`);
5698	tree intermediate_vectype = get_related_vectype_for_scalar_type
5699	(TYPE_MODE (vectype), TREE_TYPE (vectype), intermediate_nunits);
5700	if (!intermediate_vectype
5701	\|\| !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
5702	intermediate_vectype)
5703	\|\| !can_vec_extract (TYPE_MODE (prev_vectype),
5704	TYPE_MODE (intermediate_vectype)))
5705	return false;
5706	prev_vectype = intermediate_vectype;
5707	}
5708
5709	/ Non-SLP reductions might apply an adjustment after the reduction*
5710	operation, in order to simplify the initialization of the accumulator.
5711	If the epilogue loop carries on from where the main loop left off,
5712	it should apply the same adjustment to the final reduction result.
5713
5714	If the epilogue loop can also be entered directly (rather than via
5715	the main loop), we need to be able to handle that case in the same way,
5716	with the same adjustment. (In principle we could add a PHI node
5717	to select the correct adjustment, but in practice that shouldn't be
5718	necessary.) /*
5719	tree main_adjustment
5720	= STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info);
5721	if (loop_vinfo->main_loop_edge && main_adjustment)
5722	{
5723	gcc_assert (num_phis == `1`);
5724	tree initial_value = initial_values [`0`];
5725	/ Check that we can use INITIAL_VALUE as the adjustment and*
5726	initialize the accumulator with a neutral value instead. /*
5727	if (!operand_equal_p (initial_value, main_adjustment))
5728	return false;
5729	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
5730	initial_values [`0`] = neutral_op_for_reduction (TREE_TYPE (initial_value),
5731	code, initial_value);
5732	}
5733	STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment;
5734	reduc_info->reduc_initial_values.truncate (size: `0`);
5735	reduc_info->reduc_initial_values.splice (src: initial_values);
5736	reduc_info->reused_accumulator = accumulator;
5737	return true;
5738	}
5739
5740	/ Reduce the vector VEC_DEF down to VECTYPE with reduction operation*
5741	CODE emitting stmts before GSI. Returns a vector def of VECTYPE. /*
5742
5743	static tree
5744	vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code,
5745	gimple_seq *seq)
5746	{
5747	unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)).to_constant ();
5748	unsigned nunits1 = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
5749	tree stype = TREE_TYPE (vectype);
5750	tree new_temp = vec_def;
5751	while (nunits > nunits1)
5752	{
5753	nunits /= `2`;
5754	tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
5755	stype, nunits);
5756	unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1));
5757
5758	/ The target has to make sure we support lowpart/highpart*
5759	extraction, either via direct vector extract or through
5760	an integer mode punning. /*
5761	tree dst1, dst2;
5762	gimple *epilog_stmt;
5763	if (convert_optab_handler (op: vec_extract_optab,
5764	TYPE_MODE (TREE_TYPE (new_temp)),
5765	TYPE_MODE (vectype1))
5766	!= CODE_FOR_nothing)
5767	{
5768	/ Extract sub-vectors directly once vec_extract becomes*
5769	a conversion optab. /*
5770	dst1 = make_ssa_name (var: vectype1);
5771	epilog_stmt
5772	= gimple_build_assign (dst1, BIT_FIELD_REF,
5773	build3 (BIT_FIELD_REF, vectype1,
5774	new_temp, TYPE_SIZE (vectype1),
5775	bitsize_int (`0`)));
5776	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5777	dst2 = make_ssa_name (var: vectype1);
5778	epilog_stmt
5779	= gimple_build_assign (dst2, BIT_FIELD_REF,
5780	build3 (BIT_FIELD_REF, vectype1,
5781	new_temp, TYPE_SIZE (vectype1),
5782	bitsize_int (bitsize)));
5783	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5784	}
5785	else
5786	{
5787	/ Extract via punning to appropriately sized integer mode*
5788	vector. /*
5789	tree eltype = build_nonstandard_integer_type (bitsize, `1`);
5790	tree etype = build_vector_type (eltype, `2`);
5791	gcc_assert (convert_optab_handler (vec_extract_optab,
5792	TYPE_MODE (etype),
5793	TYPE_MODE (eltype))
5794	!= CODE_FOR_nothing);
5795	tree tem = make_ssa_name (var: etype);
5796	epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
5797	build1 (VIEW_CONVERT_EXPR,
5798	etype, new_temp));
5799	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5800	new_temp = tem;
5801	tem = make_ssa_name (var: eltype);
5802	epilog_stmt
5803	= gimple_build_assign (tem, BIT_FIELD_REF,
5804	build3 (BIT_FIELD_REF, eltype,
5805	new_temp, TYPE_SIZE (eltype),
5806	bitsize_int (`0`)));
5807	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5808	dst1 = make_ssa_name (var: vectype1);
5809	epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
5810	build1 (VIEW_CONVERT_EXPR,
5811	vectype1, tem));
5812	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5813	tem = make_ssa_name (var: eltype);
5814	epilog_stmt
5815	= gimple_build_assign (tem, BIT_FIELD_REF,
5816	build3 (BIT_FIELD_REF, eltype,
5817	new_temp, TYPE_SIZE (eltype),
5818	bitsize_int (bitsize)));
5819	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5820	dst2 = make_ssa_name (var: vectype1);
5821	epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
5822	build1 (VIEW_CONVERT_EXPR,
5823	vectype1, tem));
5824	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5825	}
5826
5827	new_temp = gimple_build (seq, code, type: vectype1, ops: dst1, ops: dst2);
5828	}
5829
5830	return new_temp;
5831	}
5832
5833	/ Function vect_create_epilog_for_reduction*
5834
5835	Create code at the loop-epilog to finalize the result of a reduction
5836	computation.
5837
5838	STMT_INFO is the scalar reduction stmt that is being vectorized.
5839	SLP_NODE is an SLP node containing a group of reduction statements. The
5840	first one in this group is STMT_INFO.
5841	SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
5842	REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
5843	(counting from 0)
5844
5845	This function:
5846	1. Completes the reduction def-use cycles.
5847	2. "Reduces" each vector of partial results VECT_DEFS into a single result,
5848	by calling the function specified by REDUC_FN if available, or by
5849	other means (whole-vector shifts or a scalar loop).
5850	The function also creates a new phi node at the loop exit to preserve
5851	loop-closed form, as illustrated below.
5852
5853	The flow at the entry to this function:
5854
5855	loop:
5856	vec_def = phi <vec_init, null> # REDUCTION_PHI
5857	VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5858	s_loop = scalar_stmt # (scalar) STMT_INFO
5859	loop_exit:
5860	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5861	use <s_out0>
5862	use <s_out0>
5863
5864	The above is transformed by this function into:
5865
5866	loop:
5867	vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
5868	VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5869	s_loop = scalar_stmt # (scalar) STMT_INFO
5870	loop_exit:
5871	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5872	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
5873	v_out2 = reduce <v_out1>
5874	s_out3 = extract_field <v_out2, 0>
5875	s_out4 = adjust_result <s_out3>
5876	use <s_out4>
5877	use <s_out4>
5878	*/
5879
5880	static void
5881	vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
5882	stmt_vec_info stmt_info,
5883	slp_tree slp_node,
5884	slp_instance slp_node_instance)
5885	{
5886	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
5887	gcc_assert (reduc_info->is_reduc_info);
5888	/ For double reductions we need to get at the inner loop reduction*
5889	stmt which has the meta info attached. Our stmt_info is that of the
5890	loop-closed PHI of the inner loop which we remember as
5891	def for the reduction PHI generation. /*
5892	bool double_reduc = false;
5893	stmt_vec_info rdef_info = stmt_info;
5894	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
5895	{
5896	gcc_assert (!slp_node);
5897	double_reduc = true;
5898	stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
5899	(gs: stmt_info->stmt, index: `0`));
5900	stmt_info = vect_stmt_to_vectorize (stmt_info);
5901	}
5902	gphi *reduc_def_stmt
5903	= as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
5904	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
5905	internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
5906	tree vectype;
5907	machine_mode mode;
5908	class loop loop = LOOP_VINFO_LOOP (loop_vinfo), outer_loop = NULL;
5909	basic_block exit_bb;
5910	tree scalar_dest;
5911	tree scalar_type;
5912	gimple new_phi = NULL, phi = NULL;
5913	gimple_stmt_iterator exit_gsi;
5914	tree new_temp = NULL_TREE, new_name, new_scalar_dest;
5915	gimple *epilog_stmt = NULL;
5916	gimple *exit_phi;
5917	tree bitsize;
5918	tree def;
5919	tree orig_name, scalar_result;
5920	imm_use_iterator imm_iter, phi_imm_iter;
5921	use_operand_p use_p, phi_use_p;
5922	gimple *use_stmt;
5923	auto_vec<tree> reduc_inputs;
5924	int j, i;
5925	vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
5926	unsigned int group_size = `1`, k;
5927	auto_vec<gimple *> phis;
5928	/ SLP reduction without reduction chain, e.g.,*
5929	# a1 = phi <a2, a0>
5930	# b1 = phi <b2, b0>
5931	a2 = operation (a1)
5932	b2 = operation (b1) /*
5933	bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
5934	bool direct_slp_reduc;
5935	tree induction_index = NULL_TREE;
5936
5937	if (slp_node)
5938	group_size = SLP_TREE_LANES (slp_node);
5939
5940	if (nested_in_vect_loop_p (loop, stmt_info))
5941	{
5942	outer_loop = loop;
5943	loop = loop->inner;
5944	gcc_assert (!slp_node && double_reduc);
5945	}
5946
5947	vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
5948	gcc_assert (vectype);
5949	mode = TYPE_MODE (vectype);
5950
5951	tree induc_val = NULL_TREE;
5952	tree adjustment_def = NULL;
5953	if (slp_node)
5954	;
5955	else
5956	{
5957	/ Optimize: for induction condition reduction, if we can't use zero*
5958	for induc_val, use initial_def. /*
5959	if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
5960	induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
5961	else if (double_reduc)
5962	;
5963	else
5964	adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
5965	}
5966
5967	stmt_vec_info single_live_out_stmt[] = { stmt_info };
5968	array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
5969	if (slp_reduc)
5970	/ All statements produce live-out values. /
5971	live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5972	else if (slp_node)
5973	{
5974	/ The last statement in the reduction chain produces the live-out*
5975	value. Note SLP optimization can shuffle scalar stmts to
5976	optimize permutations so we have to search for the last stmt. /*
5977	for (k = `0`; k < group_size; ++k)
5978	if (!REDUC_GROUP_NEXT_ELEMENT (SLP_TREE_SCALAR_STMTS (slp_node)[k]))
5979	{
5980	single_live_out_stmt[`0`] = SLP_TREE_SCALAR_STMTS (slp_node)[k];
5981	break;
5982	}
5983	}
5984
5985	unsigned vec_num;
5986	int ncopies;
5987	if (slp_node)
5988	{
5989	vec_num = SLP_TREE_VEC_DEFS (slp_node_instance->reduc_phis).length ();
5990	ncopies = `1`;
5991	}
5992	else
5993	{
5994	stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt);
5995	vec_num = `1`;
5996	ncopies = STMT_VINFO_VEC_STMTS (reduc_info).length ();
5997	}
5998
5999	/ For cond reductions we want to create a new vector (INDEX_COND_EXPR)*
6000	which is updated with the current index of the loop for every match of
6001	the original loop's cond_expr (VEC_STMT). This results in a vector
6002	containing the last time the condition passed for that vector lane.
6003	The first match will be a 1 to allow 0 to be used for non-matching
6004	indexes. If there are no matches at all then the vector will be all
6005	zeroes.
6006
6007	PR92772: This algorithm is broken for architectures that support
6008	masked vectors, but do not provide fold_extract_last. /*
6009	if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
6010	{
6011	auto_vec<std::pair<tree, bool>, `2`> ccompares;
6012	stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
6013	cond_info = vect_stmt_to_vectorize (stmt_info: cond_info);
6014	while (cond_info != reduc_info)
6015	{
6016	if (gimple_assign_rhs_code (gs: cond_info->stmt) == COND_EXPR)
6017	{
6018	gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)[`0`];
6019	gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
6020	ccompares.safe_push
6021	(obj: std::make_pair (x: unshare_expr (gimple_assign_rhs1 (gs: vec_stmt)),
6022	STMT_VINFO_REDUC_IDX (cond_info) == `2`));
6023	}
6024	cond_info
6025	= loop_vinfo->lookup_def (gimple_op (gs: cond_info->stmt,
6026	i: `1` + STMT_VINFO_REDUC_IDX
6027	(cond_info)));
6028	cond_info = vect_stmt_to_vectorize (stmt_info: cond_info);
6029	}
6030	gcc_assert (ccompares.length () != `0`);
6031
6032	tree indx_before_incr, indx_after_incr;
6033	poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype);
6034	int scalar_precision
6035	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype)));
6036	tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
6037	tree cr_index_vector_type = get_related_vectype_for_scalar_type
6038	(TYPE_MODE (vectype), cr_index_scalar_type,
6039	TYPE_VECTOR_SUBPARTS (node: vectype));
6040
6041	/ First we create a simple vector induction variable which starts*
6042	with the values {1,2,3,...} (SERIES_VECT) and increments by the
6043	vector size (STEP). /*
6044
6045	/ Create a {1,2,3,...} vector. /
6046	tree series_vect = build_index_vector (cr_index_vector_type, `1`, `1`);
6047
6048	/ Create a vector of the step value. /
6049	tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6050	tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6051
6052	/ Create an induction variable. /
6053	gimple_stmt_iterator incr_gsi;
6054	bool insert_after;
6055	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6056	create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, &incr_gsi,
6057	insert_after, &indx_before_incr, &indx_after_incr);
6058
6059	/ Next create a new phi node vector (NEW_PHI_TREE) which starts*
6060	filled with zeros (VEC_ZERO). /*
6061
6062	/ Create a vector of 0s. /
6063	tree zero = build_zero_cst (cr_index_scalar_type);
6064	tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6065
6066	/ Create a vector phi node. /
6067	tree new_phi_tree = make_ssa_name (var: cr_index_vector_type);
6068	new_phi = create_phi_node (new_phi_tree, loop->header);
6069	add_phi_arg (as_a <gphi *> (p: new_phi), vec_zero,
6070	loop_preheader_edge (loop), UNKNOWN_LOCATION);
6071
6072	/ Now take the condition from the loops original cond_exprs*
6073	and produce a new cond_exprs (INDEX_COND_EXPR) which for
6074	every match uses values from the induction variable
6075	(INDEX_BEFORE_INCR) otherwise uses values from the phi node
6076	(NEW_PHI_TREE).
6077	Finally, we update the phi (NEW_PHI_TREE) to take the value of
6078	the new cond_expr (INDEX_COND_EXPR). /*
6079	gimple_seq stmts = NULL;
6080	for (int i = ccompares.length () - `1`; i != -`1`; --i)
6081	{
6082	tree ccompare = ccompares [i].first;
6083	if (ccompares [i].second)
6084	new_phi_tree = gimple_build (seq: &stmts, code: VEC_COND_EXPR,
6085	type: cr_index_vector_type,
6086	ops: ccompare,
6087	ops: indx_before_incr, ops: new_phi_tree);
6088	else
6089	new_phi_tree = gimple_build (seq: &stmts, code: VEC_COND_EXPR,
6090	type: cr_index_vector_type,
6091	ops: ccompare,
6092	ops: new_phi_tree, ops: indx_before_incr);
6093	}
6094	gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
6095
6096	/ Update the phi with the vec cond. /
6097	induction_index = new_phi_tree;
6098	add_phi_arg (as_a <gphi *> (p: new_phi), induction_index,
6099	loop_latch_edge (loop), UNKNOWN_LOCATION);
6100	}
6101
6102	/ 2. Create epilog code.*
6103	The reduction epilog code operates across the elements of the vector
6104	of partial results computed by the vectorized loop.
6105	The reduction epilog code consists of:
6106
6107	step 1: compute the scalar result in a vector (v_out2)
6108	step 2: extract the scalar result (s_out3) from the vector (v_out2)
6109	step 3: adjust the scalar result (s_out3) if needed.
6110
6111	Step 1 can be accomplished using one the following three schemes:
6112	(scheme 1) using reduc_fn, if available.
6113	(scheme 2) using whole-vector shifts, if available.
6114	(scheme 3) using a scalar loop. In this case steps 1+2 above are
6115	combined.
6116
6117	The overall epilog code looks like this:
6118
6119	s_out0 = phi <s_loop> # original EXIT_PHI
6120	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
6121	v_out2 = reduce <v_out1> # step 1
6122	s_out3 = extract_field <v_out2, 0> # step 2
6123	s_out4 = adjust_result <s_out3> # step 3
6124
6125	(step 3 is optional, and steps 1 and 2 may be combined).
6126	Lastly, the uses of s_out0 are replaced by s_out4. /*
6127
6128
6129	/ 2.1 Create new loop-exit-phis to preserve loop-closed form:*
6130	v_out1 = phi <VECT_DEF>
6131	Store them in NEW_PHIS. /*
6132	if (double_reduc)
6133	loop = outer_loop;
6134	exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
6135	exit_gsi = gsi_after_labels (bb: exit_bb);
6136	reduc_inputs.create (nelems: slp_node ? vec_num : ncopies);
6137	for (unsigned i = `0`; i < vec_num; i++)
6138	{
6139	gimple_seq stmts = NULL;
6140	if (slp_node)
6141	def = vect_get_slp_vect_def (slp_node, i);
6142	else
6143	def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[`0`]);
6144	for (j = `0`; j < ncopies; j++)
6145	{
6146	tree new_def = copy_ssa_name (var: def);
6147	phi = create_phi_node (new_def, exit_bb);
6148	if (j)
6149	def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]);
6150	SET_PHI_ARG_DEF (phi, LOOP_VINFO_IV_EXIT (loop_vinfo)->dest_idx, def);
6151	new_def = gimple_convert (seq: &stmts, type: vectype, op: new_def);
6152	reduc_inputs.quick_push (obj: new_def);
6153	}
6154	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6155	}
6156
6157	/ 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3*
6158	(i.e. when reduc_fn is not available) and in the final adjustment
6159	code (if needed). Also get the original scalar reduction variable as
6160	defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
6161	represents a reduction pattern), the tree-code and scalar-def are
6162	taken from the original stmt that the pattern-stmt (STMT) replaces.
6163	Otherwise (it is a regular reduction) - the tree-code and scalar-def
6164	are taken from STMT. /*
6165
6166	stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
6167	if (orig_stmt_info != stmt_info)
6168	{
6169	/ Reduction pattern /
6170	gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
6171	gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info);
6172	}
6173
6174	scalar_dest = gimple_get_lhs (orig_stmt_info->stmt);
6175	scalar_type = TREE_TYPE (scalar_dest);
6176	scalar_results.truncate (size: `0`);
6177	scalar_results.reserve_exact (nelems: group_size);
6178	new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
6179	bitsize = TYPE_SIZE (scalar_type);
6180
6181	/ True if we should implement SLP_REDUC using native reduction operations*
6182	instead of scalar operations. /*
6183	direct_slp_reduc = (reduc_fn != IFN_LAST
6184	&& slp_reduc
6185	&& !TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ());
6186
6187	/ In case of reduction chain, e.g.,*
6188	# a1 = phi <a3, a0>
6189	a2 = operation (a1)
6190	a3 = operation (a2),
6191
6192	we may end up with more than one vector result. Here we reduce them
6193	to one vector.
6194
6195	The same is true for a SLP reduction, e.g.,
6196	# a1 = phi <a2, a0>
6197	# b1 = phi <b2, b0>
6198	a2 = operation (a1)
6199	b2 = operation (a2),
6200
6201	where we can end up with more than one vector as well. We can
6202	easily accumulate vectors when the number of vector elements is
6203	a multiple of the SLP group size.
6204
6205	The same is true if we couldn't use a single defuse cycle. /*
6206	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)
6207	\|\| direct_slp_reduc
6208	\|\| (slp_reduc
6209	&& constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: group_size))
6210	\|\| ncopies > `1`)
6211	{
6212	gimple_seq stmts = NULL;
6213	tree single_input = reduc_inputs [`0`];
6214	for (k = `1`; k < reduc_inputs.length (); k++)
6215	single_input = gimple_build (seq: &stmts, code, type: vectype,
6216	ops: single_input, ops: reduc_inputs [k]);
6217	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6218
6219	reduc_inputs.truncate (size: `0`);
6220	reduc_inputs.safe_push (obj: single_input);
6221	}
6222
6223	tree orig_reduc_input = reduc_inputs [`0`];
6224
6225	/ If this loop is an epilogue loop that can be skipped after the*
6226	main loop, we can only share a reduction operation between the
6227	main loop and the epilogue if we put it at the target of the
6228	skip edge.
6229
6230	We can still reuse accumulators if this check fails. Doing so has
6231	the minor(?) benefit of making the epilogue loop's scalar result
6232	independent of the main loop's scalar result. /*
6233	bool unify_with_main_loop_p = false;
6234	if (reduc_info->reused_accumulator
6235	&& loop_vinfo->skip_this_loop_edge
6236	&& single_succ_p (bb: exit_bb)
6237	&& single_succ (bb: exit_bb) == loop_vinfo->skip_this_loop_edge->dest)
6238	{
6239	unify_with_main_loop_p = true;
6240
6241	basic_block reduc_block = loop_vinfo->skip_this_loop_edge->dest;
6242	reduc_inputs [`0`] = make_ssa_name (var: vectype);
6243	gphi *new_phi = create_phi_node (reduc_inputs [`0`], reduc_block);
6244	add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (bb: exit_bb),
6245	UNKNOWN_LOCATION);
6246	add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input,
6247	loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION);
6248	exit_gsi = gsi_after_labels (bb: reduc_block);
6249	}
6250
6251	/ Shouldn't be used beyond this point. /
6252	exit_bb = nullptr;
6253
6254	if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
6255	&& reduc_fn != IFN_LAST)
6256	{
6257	/ For condition reductions, we have a vector (REDUC_INPUTS 0) containing*
6258	various data values where the condition matched and another vector
6259	(INDUCTION_INDEX) containing all the indexes of those matches. We
6260	need to extract the last matching index (which will be the index with
6261	highest value) and use this to index into the data vector.
6262	For the case where there were no matches, the data vector will contain
6263	all default values and the index vector will be all zeros. /*
6264
6265	/ Get various versions of the type of the vector of indexes. /
6266	tree index_vec_type = TREE_TYPE (induction_index);
6267	gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
6268	tree index_scalar_type = TREE_TYPE (index_vec_type);
6269	tree index_vec_cmp_type = truth_type_for (index_vec_type);
6270
6271	/ Get an unsigned integer version of the type of the data vector. /
6272	int scalar_precision
6273	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
6274	tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
6275	tree vectype_unsigned = get_same_sized_vectype (scalar_type_unsigned,
6276	vectype);
6277
6278	/ First we need to create a vector (ZERO_VEC) of zeros and another*
6279	vector (MAX_INDEX_VEC) filled with the last matching index, which we
6280	can create using a MAX reduction and then expanding.
6281	In the case where the loop never made any matches, the max index will
6282	be zero. /*
6283
6284	/ Vector of {0, 0, 0,...}. /
6285	tree zero_vec = build_zero_cst (vectype);
6286
6287	/ Find maximum value from the vector of found indexes. /
6288	tree max_index = make_ssa_name (var: index_scalar_type);
6289	gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
6290	`1`, induction_index);
6291	gimple_call_set_lhs (gs: max_index_stmt, lhs: max_index);
6292	gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
6293
6294	/ Vector of {max_index, max_index, max_index,...}. /
6295	tree max_index_vec = make_ssa_name (var: index_vec_type);
6296	tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
6297	max_index);
6298	gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
6299	max_index_vec_rhs);
6300	gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
6301
6302	/ Next we compare the new vector (MAX_INDEX_VEC) full of max indexes*
6303	with the vector (INDUCTION_INDEX) of found indexes, choosing values
6304	from the data vector (REDUC_INPUTS 0) for matches, 0 (ZERO_VEC)
6305	otherwise. Only one value should match, resulting in a vector
6306	(VEC_COND) with one data value and the rest zeros.
6307	In the case where the loop never made any matches, every index will
6308	match, resulting in a vector with all data values (which will all be
6309	the default value). /*
6310
6311	/ Compare the max index vector to the vector of found indexes to find*
6312	the position of the max value. /*
6313	tree vec_compare = make_ssa_name (var: index_vec_cmp_type);
6314	gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
6315	induction_index,
6316	max_index_vec);
6317	gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
6318
6319	/ Use the compare to choose either values from the data vector or*
6320	zero. /*
6321	tree vec_cond = make_ssa_name (var: vectype);
6322	gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
6323	vec_compare,
6324	reduc_inputs [`0`],
6325	zero_vec);
6326	gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
6327
6328	/ Finally we need to extract the data value from the vector (VEC_COND)*
6329	into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR
6330	reduction, but because this doesn't exist, we can use a MAX reduction
6331	instead. The data value might be signed or a float so we need to cast
6332	it first.
6333	In the case where the loop never made any matches, the data values are
6334	all identical, and so will reduce down correctly. /*
6335
6336	/ Make the matched data values unsigned. /
6337	tree vec_cond_cast = make_ssa_name (var: vectype_unsigned);
6338	tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
6339	vec_cond);
6340	gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
6341	VIEW_CONVERT_EXPR,
6342	vec_cond_cast_rhs);
6343	gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
6344
6345	/ Reduce down to a scalar value. /
6346	tree data_reduc = make_ssa_name (var: scalar_type_unsigned);
6347	gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
6348	`1`, vec_cond_cast);
6349	gimple_call_set_lhs (gs: data_reduc_stmt, lhs: data_reduc);
6350	gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
6351
6352	/ Convert the reduced value back to the result type and set as the*
6353	result. /*
6354	gimple_seq stmts = NULL;
6355	new_temp = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type: scalar_type,
6356	ops: data_reduc);
6357	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6358	scalar_results.safe_push (obj: new_temp);
6359	}
6360	else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
6361	&& reduc_fn == IFN_LAST)
6362	{
6363	/ Condition reduction without supported IFN_REDUC_MAX. Generate*
6364	idx = 0;
6365	idx_val = induction_index[0];
6366	val = data_reduc[0];
6367	for (idx = 0, val = init, i = 0; i < nelts; ++i)
6368	if (induction_index[i] > idx_val)
6369	val = data_reduc[i], idx_val = induction_index[i];
6370	return val; /*
6371
6372	tree data_eltype = TREE_TYPE (vectype);
6373	tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
6374	unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
6375	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
6376	/ Enforced by vectorizable_reduction, which ensures we have target*
6377	support before allowing a conditional reduction on variable-length
6378	vectors. /*
6379	unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant ();
6380	tree idx_val = NULL_TREE, val = NULL_TREE;
6381	for (unsigned HOST_WIDE_INT off = `0`; off < v_size; off += el_size)
6382	{
6383	tree old_idx_val = idx_val;
6384	tree old_val = val;
6385	idx_val = make_ssa_name (var: idx_eltype);
6386	epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
6387	build3 (BIT_FIELD_REF, idx_eltype,
6388	induction_index,
6389	bitsize_int (el_size),
6390	bitsize_int (off)));
6391	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6392	val = make_ssa_name (var: data_eltype);
6393	epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
6394	build3 (BIT_FIELD_REF,
6395	data_eltype,
6396	reduc_inputs [`0`],
6397	bitsize_int (el_size),
6398	bitsize_int (off)));
6399	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6400	if (off != `0`)
6401	{
6402	tree new_idx_val = idx_val;
6403	if (off != v_size - el_size)
6404	{
6405	new_idx_val = make_ssa_name (var: idx_eltype);
6406	epilog_stmt = gimple_build_assign (new_idx_val,
6407	MAX_EXPR, idx_val,
6408	old_idx_val);
6409	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6410	}
6411	tree cond = make_ssa_name (boolean_type_node);
6412	epilog_stmt = gimple_build_assign (cond, GT_EXPR,
6413	idx_val, old_idx_val);
6414	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6415	tree new_val = make_ssa_name (var: data_eltype);
6416	epilog_stmt = gimple_build_assign (new_val, COND_EXPR,
6417	cond, val, old_val);
6418	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6419	idx_val = new_idx_val;
6420	val = new_val;
6421	}
6422	}
6423	/ Convert the reduced value back to the result type and set as the*
6424	result. /*
6425	gimple_seq stmts = NULL;
6426	val = gimple_convert (seq: &stmts, type: scalar_type, op: val);
6427	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6428	scalar_results.safe_push (obj: val);
6429	}
6430
6431	/ 2.3 Create the reduction code, using one of the three schemes described*
6432	above. In SLP we simply need to extract all the elements from the
6433	vector (without reducing them), so we use scalar shifts. /*
6434	else if (reduc_fn != IFN_LAST && !slp_reduc)
6435	{
6436	tree tmp;
6437	tree vec_elem_type;
6438
6439	/ Case 1: Create:*
6440	v_out2 = reduc_expr <v_out1> /*
6441
6442	if (dump_enabled_p ())
6443	dump_printf_loc (MSG_NOTE, vect_location,
6444	"Reduce using direct vector reduction.\n");
6445
6446	gimple_seq stmts = NULL;
6447	vec_elem_type = TREE_TYPE (vectype);
6448	new_temp = gimple_build (seq: &stmts, fn: as_combined_fn (fn: reduc_fn),
6449	type: vec_elem_type, args: reduc_inputs [`0`]);
6450	new_temp = gimple_convert (seq: &stmts, type: scalar_type, op: new_temp);
6451	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6452
6453	if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
6454	&& induc_val)
6455	{
6456	/ Earlier we set the initial value to be a vector if induc_val*
6457	values. Check the result and if it is induc_val then replace
6458	with the original initial value, unless induc_val is
6459	the same as initial_def already. /*
6460	tree zcompare = make_ssa_name (boolean_type_node);
6461	epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR,
6462	new_temp, induc_val);
6463	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6464	tree initial_def = reduc_info->reduc_initial_values [`0`];
6465	tmp = make_ssa_name (var: new_scalar_dest);
6466	epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
6467	initial_def, new_temp);
6468	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6469	new_temp = tmp;
6470	}
6471
6472	scalar_results.safe_push (obj: new_temp);
6473	}
6474	else if (direct_slp_reduc)
6475	{
6476	/ Here we create one vector for each of the REDUC_GROUP_SIZE results,*
6477	with the elements for other SLP statements replaced with the
6478	neutral value. We can then do a normal reduction on each vector. /*
6479
6480	/ Enforced by vectorizable_reduction. /
6481	gcc_assert (reduc_inputs.length () == `1`);
6482	gcc_assert (pow2p_hwi (group_size));
6483
6484	gimple_seq seq = NULL;
6485
6486	/ Build a vector {0, 1, 2, ...}, with the same number of elements*
6487	and the same element size as VECTYPE. /*
6488	tree index = build_index_vector (vectype, `0`, `1`);
6489	tree index_type = TREE_TYPE (index);
6490	tree index_elt_type = TREE_TYPE (index_type);
6491	tree mask_type = truth_type_for (index_type);
6492
6493	/ Create a vector that, for each element, identifies which of*
6494	the REDUC_GROUP_SIZE results should use it. /*
6495	tree index_mask = build_int_cst (index_elt_type, group_size - `1`);
6496	index = gimple_build (seq: &seq, code: BIT_AND_EXPR, type: index_type, ops: index,
6497	ops: build_vector_from_val (index_type, index_mask));
6498
6499	/ Get a neutral vector value. This is simply a splat of the neutral*
6500	scalar value if we have one, otherwise the initial scalar value
6501	is itself a neutral value. /*
6502	tree vector_identity = NULL_TREE;
6503	tree neutral_op = NULL_TREE;
6504	if (slp_node)
6505	{
6506	tree initial_value = NULL_TREE;
6507	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
6508	initial_value = reduc_info->reduc_initial_values [`0`];
6509	neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code,
6510	initial_value, as_initial: false);
6511	}
6512	if (neutral_op)
6513	vector_identity = gimple_build_vector_from_val (seq: &seq, type: vectype,
6514	op: neutral_op);
6515	for (unsigned int i = `0`; i < group_size; ++i)
6516	{
6517	/ If there's no univeral neutral value, we can use the*
6518	initial scalar value from the original PHI. This is used
6519	for MIN and MAX reduction, for example. /*
6520	if (!neutral_op)
6521	{
6522	tree scalar_value = reduc_info->reduc_initial_values [i];
6523	scalar_value = gimple_convert (seq: &seq, TREE_TYPE (vectype),
6524	op: scalar_value);
6525	vector_identity = gimple_build_vector_from_val (seq: &seq, type: vectype,
6526	op: scalar_value);
6527	}
6528
6529	/ Calculate the equivalent of:*
6530
6531	sel[j] = (index[j] == i);
6532
6533	which selects the elements of REDUC_INPUTS[0] that should
6534	be included in the result. /*
6535	tree compare_val = build_int_cst (index_elt_type, i);
6536	compare_val = build_vector_from_val (index_type, compare_val);
6537	tree sel = gimple_build (seq: &seq, code: EQ_EXPR, type: mask_type,
6538	ops: index, ops: compare_val);
6539
6540	/ Calculate the equivalent of:*
6541
6542	vec = seq ? reduc_inputs[0] : vector_identity;
6543
6544	VEC is now suitable for a full vector reduction. /*
6545	tree vec = gimple_build (seq: &seq, code: VEC_COND_EXPR, type: vectype,
6546	ops: sel, ops: reduc_inputs [`0`], ops: vector_identity);
6547
6548	/ Do the reduction and convert it to the appropriate type. /
6549	tree scalar = gimple_build (seq: &seq, fn: as_combined_fn (fn: reduc_fn),
6550	TREE_TYPE (vectype), args: vec);
6551	scalar = gimple_convert (seq: &seq, type: scalar_type, op: scalar);
6552	scalar_results.safe_push (obj: scalar);
6553	}
6554	gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT);
6555	}
6556	else
6557	{
6558	bool reduce_with_shift;
6559	tree vec_temp;
6560
6561	gcc_assert (slp_reduc \|\| reduc_inputs.length () == `1`);
6562
6563	/ See if the target wants to do the final (shift) reduction*
6564	in a vector mode of smaller size and first reduce upper/lower
6565	halves against each other. /*
6566	enum machine_mode mode1 = mode;
6567	tree stype = TREE_TYPE (vectype);
6568	unsigned nunits = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
6569	unsigned nunits1 = nunits;
6570	if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
6571	&& reduc_inputs.length () == `1`)
6572	{
6573	nunits1 = GET_MODE_NUNITS (mode: mode1).to_constant ();
6574	/ For SLP reductions we have to make sure lanes match up, but*
6575	since we're doing individual element final reduction reducing
6576	vector width here is even more important.
6577	??? We can also separate lanes with permutes, for the common
6578	case of power-of-two group-size odd/even extracts would work. /*
6579	if (slp_reduc && nunits != nunits1)
6580	{
6581	nunits1 = least_common_multiple (nunits1, group_size);
6582	gcc_assert (exact_log2 (nunits1) != -`1` && nunits1 <= nunits);
6583	}
6584	}
6585	if (!slp_reduc
6586	&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
6587	nunits1 = GET_MODE_NUNITS (mode: mode1).to_constant ();
6588
6589	tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
6590	stype, nunits1);
6591	reduce_with_shift = have_whole_vector_shift (mode: mode1);
6592	if (!VECTOR_MODE_P (mode1)
6593	\|\| !directly_supported_p (code, vectype1))
6594	reduce_with_shift = false;
6595
6596	/ First reduce the vector to the desired vector size we should*
6597	do shift reduction on by combining upper and lower halves. /*
6598	gimple_seq stmts = NULL;
6599	new_temp = vect_create_partial_epilog (vec_def: reduc_inputs [`0`], vectype: vectype1,
6600	code, seq: &stmts);
6601	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6602	reduc_inputs [`0`] = new_temp;
6603
6604	if (reduce_with_shift && !slp_reduc)
6605	{
6606	int element_bitsize = tree_to_uhwi (bitsize);
6607	/ Enforced by vectorizable_reduction, which disallows SLP reductions*
6608	for variable-length vectors and also requires direct target support
6609	for loop reductions. /*
6610	int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
6611	int nelements = vec_size_in_bits / element_bitsize;
6612	vec_perm_builder sel;
6613	vec_perm_indices indices;
6614
6615	int elt_offset;
6616
6617	tree zero_vec = build_zero_cst (vectype1);
6618	/ Case 2: Create:*
6619	for (offset = nelements/2; offset >= 1; offset/=2)
6620	{
6621	Create: va' = vec_shift <va, offset>
6622	Create: va = vop <va, va'>
6623	} /*
6624
6625	tree rhs;
6626
6627	if (dump_enabled_p ())
6628	dump_printf_loc (MSG_NOTE, vect_location,
6629	"Reduce using vector shifts\n");
6630
6631	gimple_seq stmts = NULL;
6632	new_temp = gimple_convert (seq: &stmts, type: vectype1, op: new_temp);
6633	for (elt_offset = nelements / `2`;
6634	elt_offset >= `1`;
6635	elt_offset /= `2`)
6636	{
6637	calc_vec_perm_mask_for_shift (offset: elt_offset, nelt: nelements, sel: &sel);
6638	indices.new_vector (sel, `2`, nelements);
6639	tree mask = vect_gen_perm_mask_any (vectype1, indices);
6640	new_name = gimple_build (seq: &stmts, code: VEC_PERM_EXPR, type: vectype1,
6641	ops: new_temp, ops: zero_vec, ops: mask);
6642	new_temp = gimple_build (seq: &stmts, code,
6643	type: vectype1, ops: new_name, ops: new_temp);
6644	}
6645	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6646
6647	/ 2.4 Extract the final scalar result. Create:*
6648	s_out3 = extract_field <v_out2, bitpos> /*
6649
6650	if (dump_enabled_p ())
6651	dump_printf_loc (MSG_NOTE, vect_location,
6652	"extract scalar result\n");
6653
6654	rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
6655	bitsize, bitsize_zero_node);
6656	epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
6657	new_temp = make_ssa_name (var: new_scalar_dest, stmt: epilog_stmt);
6658	gimple_assign_set_lhs (gs: epilog_stmt, lhs: new_temp);
6659	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6660	scalar_results.safe_push (obj: new_temp);
6661	}
6662	else
6663	{
6664	/ Case 3: Create:*
6665	s = extract_field <v_out2, 0>
6666	for (offset = element_size;
6667	offset < vector_size;
6668	offset += element_size;)
6669	{
6670	Create: s' = extract_field <v_out2, offset>
6671	Create: s = op <s, s'> // For non SLP cases
6672	} /*
6673
6674	if (dump_enabled_p ())
6675	dump_printf_loc (MSG_NOTE, vect_location,
6676	"Reduce using scalar code.\n");
6677
6678	int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
6679	int element_bitsize = tree_to_uhwi (bitsize);
6680	tree compute_type = TREE_TYPE (vectype);
6681	gimple_seq stmts = NULL;
6682	FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp)
6683	{
6684	int bit_offset;
6685	new_temp = gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: compute_type,
6686	ops: vec_temp, ops: bitsize, bitsize_zero_node);
6687
6688	/ In SLP we don't need to apply reduction operation, so we just*
6689	collect s' values in SCALAR_RESULTS. /*
6690	if (slp_reduc)
6691	scalar_results.safe_push (obj: new_temp);
6692
6693	for (bit_offset = element_bitsize;
6694	bit_offset < vec_size_in_bits;
6695	bit_offset += element_bitsize)
6696	{
6697	tree bitpos = bitsize_int (bit_offset);
6698	new_name = gimple_build (seq: &stmts, code: BIT_FIELD_REF,
6699	type: compute_type, ops: vec_temp,
6700	ops: bitsize, ops: bitpos);
6701	if (slp_reduc)
6702	{
6703	/ In SLP we don't need to apply reduction operation, so*
6704	we just collect s' values in SCALAR_RESULTS. /*
6705	new_temp = new_name;
6706	scalar_results.safe_push (obj: new_name);
6707	}
6708	else
6709	new_temp = gimple_build (seq: &stmts, code, type: compute_type,
6710	ops: new_name, ops: new_temp);
6711	}
6712	}
6713
6714	/ The only case where we need to reduce scalar results in SLP, is*
6715	unrolling. If the size of SCALAR_RESULTS is greater than
6716	REDUC_GROUP_SIZE, we reduce them combining elements modulo
6717	REDUC_GROUP_SIZE. /*
6718	if (slp_reduc)
6719	{
6720	tree res, first_res, new_res;
6721
6722	/ Reduce multiple scalar results in case of SLP unrolling. /
6723	for (j = group_size; scalar_results.iterate (ix: j, ptr: &res);
6724	j++)
6725	{
6726	first_res = scalar_results [j % group_size];
6727	new_res = gimple_build (seq: &stmts, code, type: compute_type,
6728	ops: first_res, ops: res);
6729	scalar_results [j % group_size] = new_res;
6730	}
6731	scalar_results.truncate (size: group_size);
6732	for (k = `0`; k < group_size; k++)
6733	scalar_results [k] = gimple_convert (seq: &stmts, type: scalar_type,
6734	op: scalar_results [k]);
6735	}
6736	else
6737	{
6738	/ Not SLP - we have one scalar to keep in SCALAR_RESULTS. /
6739	new_temp = gimple_convert (seq: &stmts, type: scalar_type, op: new_temp);
6740	scalar_results.safe_push (obj: new_temp);
6741	}
6742
6743	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6744	}
6745
6746	if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
6747	&& induc_val)
6748	{
6749	/ Earlier we set the initial value to be a vector if induc_val*
6750	values. Check the result and if it is induc_val then replace
6751	with the original initial value, unless induc_val is
6752	the same as initial_def already. /*
6753	tree zcompare = make_ssa_name (boolean_type_node);
6754	epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR, new_temp,
6755	induc_val);
6756	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6757	tree initial_def = reduc_info->reduc_initial_values [`0`];
6758	tree tmp = make_ssa_name (var: new_scalar_dest);
6759	epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
6760	initial_def, new_temp);
6761	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6762	scalar_results [`0`] = tmp;
6763	}
6764	}
6765
6766	/ 2.5 Adjust the final result by the initial value of the reduction*
6767	variable. (When such adjustment is not needed, then
6768	'adjustment_def' is zero). For example, if code is PLUS we create:
6769	new_temp = loop_exit_def + adjustment_def /*
6770
6771	if (adjustment_def)
6772	{
6773	gcc_assert (!slp_reduc);
6774	gimple_seq stmts = NULL;
6775	if (double_reduc)
6776	{
6777	gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def)));
6778	adjustment_def = gimple_convert (seq: &stmts, type: vectype, op: adjustment_def);
6779	new_temp = gimple_build (seq: &stmts, code, type: vectype,
6780	ops: reduc_inputs [`0`], ops: adjustment_def);
6781	}
6782	else
6783	{
6784	new_temp = scalar_results [`0`];
6785	gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
6786	adjustment_def = gimple_convert (seq: &stmts, TREE_TYPE (vectype),
6787	op: adjustment_def);
6788	new_temp = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: new_temp);
6789	new_temp = gimple_build (seq: &stmts, code, TREE_TYPE (vectype),
6790	ops: new_temp, ops: adjustment_def);
6791	new_temp = gimple_convert (seq: &stmts, type: scalar_type, op: new_temp);
6792	}
6793
6794	epilog_stmt = gimple_seq_last_stmt (s: stmts);
6795	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6796	scalar_results [`0`] = new_temp;
6797	}
6798
6799	/ Record this operation if it could be reused by the epilogue loop. /
6800	if (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION
6801	&& reduc_inputs.length () == `1`)
6802	loop_vinfo->reusable_accumulators.put (k: scalar_results [`0`],
6803	v: { .reduc_input: orig_reduc_input, .reduc_info: reduc_info });
6804
6805	if (double_reduc)
6806	loop = outer_loop;
6807
6808	/ 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit*
6809	phis with new adjusted scalar results, i.e., replace use <s_out0>
6810	with use <s_out4>.
6811
6812	Transform:
6813	loop_exit:
6814	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
6815	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
6816	v_out2 = reduce <v_out1>
6817	s_out3 = extract_field <v_out2, 0>
6818	s_out4 = adjust_result <s_out3>
6819	use <s_out0>
6820	use <s_out0>
6821
6822	into:
6823
6824	loop_exit:
6825	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
6826	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
6827	v_out2 = reduce <v_out1>
6828	s_out3 = extract_field <v_out2, 0>
6829	s_out4 = adjust_result <s_out3>
6830	use <s_out4>
6831	use <s_out4> /*
6832
6833	gcc_assert (live_out_stmts.size () == scalar_results.length ());
6834	for (k = `0`; k < live_out_stmts.size (); k++)
6835	{
6836	stmt_vec_info scalar_stmt_info = vect_orig_stmt (stmt_info: live_out_stmts [k]);
6837	scalar_dest = gimple_get_lhs (scalar_stmt_info->stmt);
6838
6839	phis.create (nelems: `3`);
6840	/ Find the loop-closed-use at the loop exit of the original scalar*
6841	result. (The reduction result is expected to have two immediate uses,
6842	one at the latch block, and one at the loop exit). For double
6843	reductions we are looking for exit phis of the outer loop. /*
6844	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6845	{
6846	if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
6847	{
6848	if (!is_gimple_debug (USE_STMT (use_p)))
6849	phis.safe_push (USE_STMT (use_p));
6850	}
6851	else
6852	{
6853	if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
6854	{
6855	tree phi_res = PHI_RESULT (USE_STMT (use_p));
6856
6857	FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
6858	{
6859	if (!flow_bb_inside_loop_p (loop,
6860	gimple_bb (USE_STMT (phi_use_p)))
6861	&& !is_gimple_debug (USE_STMT (phi_use_p)))
6862	phis.safe_push (USE_STMT (phi_use_p));
6863	}
6864	}
6865	}
6866	}
6867
6868	FOR_EACH_VEC_ELT (phis, i, exit_phi)
6869	{
6870	/ Replace the uses: /
6871	orig_name = PHI_RESULT (exit_phi);
6872
6873	/ Look for a single use at the target of the skip edge. /
6874	if (unify_with_main_loop_p)
6875	{
6876	use_operand_p use_p;
6877	gimple *user;
6878	if (!single_imm_use (var: orig_name, use_p: &use_p, stmt: &user))
6879	gcc_unreachable ();
6880	orig_name = gimple_get_lhs (user);
6881	}
6882
6883	scalar_result = scalar_results [k];
6884	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
6885	{
6886	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6887	SET_USE (use_p, scalar_result);
6888	update_stmt (s: use_stmt);
6889	}
6890	}
6891
6892	phis.release ();
6893	}
6894	}
6895
6896	/ Return a vector of type VECTYPE that is equal to the vector select*
6897	operation "MASK ? VEC : IDENTITY". Insert the select statements
6898	before GSI. /*
6899
6900	static tree
6901	merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
6902	tree vec, tree identity)
6903	{
6904	tree cond = make_temp_ssa_name (type: vectype, NULL, name: "cond");
6905	gimple *new_stmt = gimple_build_assign (cond, VEC_COND_EXPR,
6906	mask, vec, identity);
6907	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6908	return cond;
6909	}
6910
6911	/ Successively apply CODE to each element of VECTOR_RHS, in left-to-right*
6912	order, starting with LHS. Insert the extraction statements before GSI and
6913	associate the new scalar SSA names with variable SCALAR_DEST.
6914	If MASK is nonzero mask the input and then operate on it unconditionally.
6915	Return the SSA name for the result. /*
6916
6917	static tree
6918	vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
6919	tree_code code, tree lhs, tree vector_rhs,
6920	tree mask)
6921	{
6922	tree vectype = TREE_TYPE (vector_rhs);
6923	tree scalar_type = TREE_TYPE (vectype);
6924	tree bitsize = TYPE_SIZE (scalar_type);
6925	unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
6926	unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
6927
6928	/ Re-create a VEC_COND_EXPR to mask the input here in order to be able*
6929	to perform an unconditional element-wise reduction of it. /*
6930	if (mask)
6931	{
6932	tree masked_vector_rhs = make_temp_ssa_name (type: vectype, NULL,
6933	name: "masked_vector_rhs");
6934	tree neutral_op = neutral_op_for_reduction (scalar_type, code, NULL_TREE,
6935	as_initial: false);
6936	tree vector_identity = build_vector_from_val (vectype, neutral_op);
6937	gassign *select = gimple_build_assign (masked_vector_rhs, VEC_COND_EXPR,
6938	mask, vector_rhs, vector_identity);
6939	gsi_insert_before (gsi, select, GSI_SAME_STMT);
6940	vector_rhs = masked_vector_rhs;
6941	}
6942
6943	for (unsigned HOST_WIDE_INT bit_offset = `0`;
6944	bit_offset < vec_size_in_bits;
6945	bit_offset += element_bitsize)
6946	{
6947	tree bitpos = bitsize_int (bit_offset);
6948	tree rhs = build3 (BIT_FIELD_REF, scalar_type, vector_rhs,
6949	bitsize, bitpos);
6950
6951	gassign *stmt = gimple_build_assign (scalar_dest, rhs);
6952	rhs = make_ssa_name (var: scalar_dest, stmt);
6953	gimple_assign_set_lhs (gs: stmt, lhs: rhs);
6954	gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
6955
6956	stmt = gimple_build_assign (scalar_dest, code, lhs, rhs);
6957	tree new_name = make_ssa_name (var: scalar_dest, stmt);
6958	gimple_assign_set_lhs (gs: stmt, lhs: new_name);
6959	gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
6960	lhs = new_name;
6961	}
6962	return lhs;
6963	}
6964
6965	/ Get a masked internal function equivalent to REDUC_FN. VECTYPE_IN is the*
6966	type of the vector input. /*
6967
6968	static internal_fn
6969	get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in)
6970	{
6971	internal_fn mask_reduc_fn;
6972	internal_fn mask_len_reduc_fn;
6973
6974	switch (reduc_fn)
6975	{
6976	case IFN_FOLD_LEFT_PLUS:
6977	mask_reduc_fn = IFN_MASK_FOLD_LEFT_PLUS;
6978	mask_len_reduc_fn = IFN_MASK_LEN_FOLD_LEFT_PLUS;
6979	break;
6980
6981	default:
6982	return IFN_LAST;
6983	}
6984
6985	if (direct_internal_fn_supported_p (mask_reduc_fn, vectype_in,
6986	OPTIMIZE_FOR_SPEED))
6987	return mask_reduc_fn;
6988	if (direct_internal_fn_supported_p (mask_len_reduc_fn, vectype_in,
6989	OPTIMIZE_FOR_SPEED))
6990	return mask_len_reduc_fn;
6991	return IFN_LAST;
6992	}
6993
6994	/ Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the*
6995	statement that sets the live-out value. REDUC_DEF_STMT is the phi
6996	statement. CODE is the operation performed by STMT_INFO and OPS are
6997	its scalar operands. REDUC_INDEX is the index of the operand in
6998	OPS that is set by REDUC_DEF_STMT. REDUC_FN is the function that
6999	implements in-order reduction, or IFN_LAST if we should open-code it.
7000	VECTYPE_IN is the type of the vector input. MASKS specifies the masks
7001	that should be used to control the operation in a fully-masked loop. /*
7002
7003	static bool
7004	vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
7005	stmt_vec_info stmt_info,
7006	gimple_stmt_iterator *gsi,
7007	gimple **vec_stmt, slp_tree slp_node,
7008	gimple *reduc_def_stmt,
7009	code_helper code, internal_fn reduc_fn,
7010	tree ops, int* num_ops, tree vectype_in,
7011	int reduc_index, vec_loop_masks *masks,
7012	vec_loop_lens *lens)
7013	{
7014	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7015	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7016	internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in);
7017
7018	int ncopies;
7019	if (slp_node)
7020	ncopies = `1`;
7021	else
7022	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
7023
7024	gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7025	gcc_assert (ncopies == `1`);
7026
7027	bool is_cond_op = false;
7028	if (!code.is_tree_code ())
7029	{
7030	code = conditional_internal_fn_code (internal_fn (code));
7031	gcc_assert (code != ERROR_MARK);
7032	is_cond_op = true;
7033	}
7034
7035	gcc_assert (TREE_CODE_LENGTH (tree_code (code)) == binary_op);
7036
7037	if (slp_node)
7038	{
7039	if (is_cond_op)
7040	{
7041	if (dump_enabled_p ())
7042	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7043	"fold-left reduction on SLP not supported.\n");
7044	return false;
7045	}
7046
7047	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
7048	TYPE_VECTOR_SUBPARTS (vectype_in)));
7049	}
7050
7051	/ The operands either come from a binary operation or an IFN_COND operation.*
7052	The former is a gimple assign with binary rhs and the latter is a
7053	gimple call with four arguments. /*
7054	gcc_assert (num_ops == `2` \|\| num_ops == `4`);
7055	tree op0, opmask;
7056	if (!is_cond_op)
7057	op0 = ops[`1` - reduc_index];
7058	else
7059	{
7060	op0 = ops[`2`];
7061	opmask = ops[`0`];
7062	gcc_assert (!slp_node);
7063	}
7064
7065	int group_size = `1`;
7066	stmt_vec_info scalar_dest_def_info;
7067	auto_vec<tree> vec_oprnds0, vec_opmask;
7068	if (slp_node)
7069	{
7070	auto_vec<vec<tree> > vec_defs (`2`);
7071	vect_get_slp_defs (loop_vinfo, slp_node, &vec_defs);
7072	vec_oprnds0.safe_splice (src: vec_defs [`1` - reduc_index]);
7073	vec_defs [`0`].release ();
7074	vec_defs [`1`].release ();
7075	group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
7076	scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - `1`];
7077	}
7078	else
7079	{
7080	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
7081	op: op0, &vec_oprnds0);
7082	scalar_dest_def_info = stmt_info;
7083
7084	/ For an IFN_COND_OP we also need the vector mask operand. /
7085	if (is_cond_op)
7086	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
7087	op: opmask, &vec_opmask);
7088	}
7089
7090	gimple *sdef = scalar_dest_def_info->stmt;
7091	tree scalar_dest = gimple_get_lhs (sdef);
7092	tree scalar_type = TREE_TYPE (scalar_dest);
7093	tree reduc_var = gimple_phi_result (gs: reduc_def_stmt);
7094
7095	int vec_num = vec_oprnds0.length ();
7096	gcc_assert (vec_num == `1` \|\| slp_node);
7097	tree vec_elem_type = TREE_TYPE (vectype_out);
7098	gcc_checking_assert (useless_type_conversion_p (scalar_type, vec_elem_type));
7099
7100	tree vector_identity = NULL_TREE;
7101	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
7102	{
7103	vector_identity = build_zero_cst (vectype_out);
7104	if (!HONOR_SIGNED_ZEROS (vectype_out))
7105	;
7106	else
7107	{
7108	gcc_assert (!HONOR_SIGN_DEPENDENT_ROUNDING (vectype_out));
7109	vector_identity = const_unop (NEGATE_EXPR, vectype_out,
7110	vector_identity);
7111	}
7112	}
7113
7114	tree scalar_dest_var = vect_create_destination_var (scalar_dest, NULL);
7115	int i;
7116	tree def0;
7117	FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
7118	{
7119	gimple *new_stmt;
7120	tree mask = NULL_TREE;
7121	tree len = NULL_TREE;
7122	tree bias = NULL_TREE;
7123	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
7124	mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i);
7125	else if (is_cond_op)
7126	mask = vec_opmask [`0`];
7127	if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
7128	{
7129	len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
7130	i, `1`);
7131	signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7132	bias = build_int_cst (intQI_type_node, biasval);
7133	if (!is_cond_op)
7134	mask = build_minus_one_cst (truth_type_for (vectype_in));
7135	}
7136
7137	/ Handle MINUS by adding the negative. /
7138	if (reduc_fn != IFN_LAST && code == MINUS_EXPR)
7139	{
7140	tree negated = make_ssa_name (var: vectype_out);
7141	new_stmt = gimple_build_assign (negated, NEGATE_EXPR, def0);
7142	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
7143	def0 = negated;
7144	}
7145
7146	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7147	&& mask && mask_reduc_fn == IFN_LAST)
7148	def0 = merge_with_identity (gsi, mask, vectype: vectype_out, vec: def0,
7149	identity: vector_identity);
7150
7151	/ On the first iteration the input is simply the scalar phi*
7152	result, and for subsequent iterations it is the output of
7153	the preceding operation. /*
7154	if (reduc_fn != IFN_LAST \|\| (mask && mask_reduc_fn != IFN_LAST))
7155	{
7156	if (mask && len && mask_reduc_fn == IFN_MASK_LEN_FOLD_LEFT_PLUS)
7157	new_stmt = gimple_build_call_internal (mask_reduc_fn, `5`, reduc_var,
7158	def0, mask, len, bias);
7159	else if (mask && mask_reduc_fn == IFN_MASK_FOLD_LEFT_PLUS)
7160	new_stmt = gimple_build_call_internal (mask_reduc_fn, `3`, reduc_var,
7161	def0, mask);
7162	else
7163	new_stmt = gimple_build_call_internal (reduc_fn, `2`, reduc_var,
7164	def0);
7165	/ For chained SLP reductions the output of the previous reduction*
7166	operation serves as the input of the next. For the final statement
7167	the output cannot be a temporary - we reuse the original
7168	scalar destination of the last statement. /*
7169	if (i != vec_num - `1`)
7170	{
7171	gimple_set_lhs (new_stmt, scalar_dest_var);
7172	reduc_var = make_ssa_name (var: scalar_dest_var, stmt: new_stmt);
7173	gimple_set_lhs (new_stmt, reduc_var);
7174	}
7175	}
7176	else
7177	{
7178	reduc_var = vect_expand_fold_left (gsi, scalar_dest: scalar_dest_var,
7179	code: tree_code (code), lhs: reduc_var, vector_rhs: def0,
7180	mask);
7181	new_stmt = SSA_NAME_DEF_STMT (reduc_var);
7182	/ Remove the statement, so that we can use the same code paths*
7183	as for statements that we've just created. /*
7184	gimple_stmt_iterator tmp_gsi = gsi_for_stmt (new_stmt);
7185	gsi_remove (&tmp_gsi, true);
7186	}
7187
7188	if (i == vec_num - `1`)
7189	{
7190	gimple_set_lhs (new_stmt, scalar_dest);
7191	vect_finish_replace_stmt (loop_vinfo,
7192	scalar_dest_def_info,
7193	new_stmt);
7194	}
7195	else
7196	vect_finish_stmt_generation (loop_vinfo,
7197	scalar_dest_def_info,
7198	new_stmt, gsi);
7199
7200	if (slp_node)
7201	slp_node->push_vec_def (def: new_stmt);
7202	else
7203	{
7204	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
7205	*vec_stmt = new_stmt;
7206	}
7207	}
7208
7209	return true;
7210	}
7211
7212	/ Function is_nonwrapping_integer_induction.*
7213
7214	Check if STMT_VINO (which is part of loop LOOP) both increments and
7215	does not cause overflow. /*
7216
7217	static bool
7218	is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, class loop *loop)
7219	{
7220	gphi phi = as_a <gphi > (p: stmt_vinfo->stmt);
7221	tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
7222	tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
7223	tree lhs_type = TREE_TYPE (gimple_phi_result (phi));
7224	widest_int ni, max_loop_value, lhs_max;
7225	wi::overflow_type overflow = wi::OVF_NONE;
7226
7227	/ Make sure the loop is integer based. /
7228	if (TREE_CODE (base) != INTEGER_CST
7229	\|\| TREE_CODE (step) != INTEGER_CST)
7230	return false;
7231
7232	/ Check that the max size of the loop will not wrap. /
7233
7234	if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
7235	return true;
7236
7237	if (! max_stmt_executions (loop, &ni))
7238	return false;
7239
7240	max_loop_value = wi::mul (x: wi::to_widest (t: step), y: ni, TYPE_SIGN (lhs_type),
7241	overflow: &overflow);
7242	if (overflow)
7243	return false;
7244
7245	max_loop_value = wi::add (x: wi::to_widest (t: base), y: max_loop_value,
7246	TYPE_SIGN (lhs_type), overflow: &overflow);
7247	if (overflow)
7248	return false;
7249
7250	return (wi::min_precision (x: max_loop_value, TYPE_SIGN (lhs_type))
7251	<= TYPE_PRECISION (lhs_type));
7252	}
7253
7254	/ Check if masking can be supported by inserting a conditional expression.*
7255	CODE is the code for the operation. COND_FN is the conditional internal
7256	function, if it exists. VECTYPE_IN is the type of the vector input. /*
7257	static bool
7258	use_mask_by_cond_expr_p (code_helper code, internal_fn cond_fn,
7259	tree vectype_in)
7260	{
7261	if (cond_fn != IFN_LAST
7262	&& direct_internal_fn_supported_p (cond_fn, vectype_in,
7263	OPTIMIZE_FOR_SPEED))
7264	return false;
7265
7266	if (code.is_tree_code ())
7267	switch (tree_code (code))
7268	{
7269	case DOT_PROD_EXPR:
7270	case SAD_EXPR:
7271	return true;
7272
7273	default:
7274	break;
7275	}
7276	return false;
7277	}
7278
7279	/ Insert a conditional expression to enable masked vectorization. CODE is the*
7280	code for the operation. VOP is the array of operands. MASK is the loop
7281	mask. GSI is a statement iterator used to place the new conditional
7282	expression. /*
7283	static void
7284	build_vect_cond_expr (code_helper code, tree vop[`3`], tree mask,
7285	gimple_stmt_iterator *gsi)
7286	{
7287	switch (tree_code (code))
7288	{
7289	case DOT_PROD_EXPR:
7290	{
7291	tree vectype = TREE_TYPE (vop[`1`]);
7292	tree zero = build_zero_cst (vectype);
7293	tree masked_op1 = make_temp_ssa_name (type: vectype, NULL, name: "masked_op1");
7294	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
7295	mask, vop[`1`], zero);
7296	gsi_insert_before (gsi, select, GSI_SAME_STMT);
7297	vop[`1`] = masked_op1;
7298	break;
7299	}
7300
7301	case SAD_EXPR:
7302	{
7303	tree vectype = TREE_TYPE (vop[`1`]);
7304	tree masked_op1 = make_temp_ssa_name (type: vectype, NULL, name: "masked_op1");
7305	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
7306	mask, vop[`1`], vop[`0`]);
7307	gsi_insert_before (gsi, select, GSI_SAME_STMT);
7308	vop[`1`] = masked_op1;
7309	break;
7310	}
7311
7312	default:
7313	gcc_unreachable ();
7314	}
7315	}
7316
7317	/ Function vectorizable_reduction.*
7318
7319	Check if STMT_INFO performs a reduction operation that can be vectorized.
7320	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7321	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7322	Return true if STMT_INFO is vectorizable in this way.
7323
7324	This function also handles reduction idioms (patterns) that have been
7325	recognized in advance during vect_pattern_recog. In this case, STMT_INFO
7326	may be of this form:
7327	X = pattern_expr (arg0, arg1, ..., X)
7328	and its STMT_VINFO_RELATED_STMT points to the last stmt in the original
7329	sequence that had been detected and replaced by the pattern-stmt
7330	(STMT_INFO).
7331
7332	This function also handles reduction of condition expressions, for example:
7333	for (int i = 0; i < N; i++)
7334	if (a[i] < value)
7335	last = a[i];
7336	This is handled by vectorising the loop and creating an additional vector
7337	containing the loop indexes for which "a[i] < value" was true. In the
7338	function epilogue this is reduced to a single max value and then used to
7339	index into the vector of results.
7340
7341	In some cases of reduction patterns, the type of the reduction variable X is
7342	different than the type of the other arguments of STMT_INFO.
7343	In such cases, the vectype that is used when transforming STMT_INFO into
7344	a vector stmt is different than the vectype that is used to determine the
7345	vectorization factor, because it consists of a different number of elements
7346	than the actual number of elements that are being operated upon in parallel.
7347
7348	For example, consider an accumulation of shorts into an int accumulator.
7349	On some targets it's possible to vectorize this pattern operating on 8
7350	shorts at a time (hence, the vectype for purposes of determining the
7351	vectorization factor should be V8HI); on the other hand, the vectype that
7352	is used to create the vector form is actually V4SI (the type of the result).
7353
7354	Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
7355	indicates what is the actual level of parallelism (V8HI in the example), so
7356	that the right vectorization factor would be derived. This vectype
7357	corresponds to the type of arguments to the reduction stmt, and should NOT
7358	be used to create the vectorized stmt. The right vectype for the vectorized
7359	stmt is obtained from the type of the result X:
7360	get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
7361
7362	This means that, contrary to "regular" reductions (or "regular" stmts in
7363	general), the following equation:
7364	STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
7365	does NOT* necessarily hold for reduction patterns. /
7366
7367	bool
7368	vectorizable_reduction (loop_vec_info loop_vinfo,
7369	stmt_vec_info stmt_info, slp_tree slp_node,
7370	slp_instance slp_node_instance,
7371	stmt_vector_for_cost *cost_vec)
7372	{
7373	tree vectype_in = NULL_TREE;
7374	tree vectype_op[`3`] = { NULL_TREE, NULL_TREE, NULL_TREE };
7375	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7376	enum vect_def_type cond_reduc_dt = vect_unknown_def_type;
7377	stmt_vec_info cond_stmt_vinfo = NULL;
7378	int i;
7379	int ncopies;
7380	bool single_defuse_cycle = false;
7381	bool nested_cycle = false;
7382	bool double_reduc = false;
7383	int vec_num;
7384	tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
7385	tree cond_reduc_val = NULL_TREE;
7386
7387	/ Make sure it was already recognized as a reduction computation. /
7388	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
7389	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
7390	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
7391	return false;
7392
7393	/ The stmt we store reduction analysis meta on. /
7394	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
7395	reduc_info->is_reduc_info = true;
7396
7397	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
7398	{
7399	if (is_a <gphi *> (p: stmt_info->stmt))
7400	{
7401	if (slp_node)
7402	{
7403	/ We eventually need to set a vector type on invariant*
7404	arguments. /*
7405	unsigned j;
7406	slp_tree child;
7407	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
7408	if (!vect_maybe_update_slp_op_vectype
7409	(child, SLP_TREE_VECTYPE (slp_node)))
7410	{
7411	if (dump_enabled_p ())
7412	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7413	"incompatible vector types for "
7414	"invariants\n");
7415	return false;
7416	}
7417	}
7418	/ Analysis for double-reduction is done on the outer*
7419	loop PHI, nested cycles have no further restrictions. /*
7420	STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
7421	}
7422	else
7423	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
7424	return true;
7425	}
7426
7427	stmt_vec_info orig_stmt_of_analysis = stmt_info;
7428	stmt_vec_info phi_info = stmt_info;
7429	if (!is_a <gphi *> (p: stmt_info->stmt))
7430	{
7431	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
7432	return true;
7433	}
7434	if (slp_node)
7435	{
7436	slp_node_instance->reduc_phis = slp_node;
7437	/ ??? We're leaving slp_node to point to the PHIs, we only*
7438	need it to get at the number of vector stmts which wasn't
7439	yet initialized for the instance root. /*
7440	}
7441	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
7442	{
7443	use_operand_p use_p;
7444	gimple *use_stmt;
7445	bool res = single_imm_use (var: gimple_phi_result (gs: stmt_info->stmt),
7446	use_p: &use_p, stmt: &use_stmt);
7447	gcc_assert (res);
7448	phi_info = loop_vinfo->lookup_stmt (use_stmt);
7449	}
7450
7451	/ PHIs should not participate in patterns. /
7452	gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
7453	gphi reduc_def_phi = as_a <gphi > (p: phi_info->stmt);
7454
7455	/ Verify following REDUC_IDX from the latch def leads us back to the PHI*
7456	and compute the reduction chain length. Discover the real
7457	reduction operation stmt on the way (stmt_info and slp_for_stmt_info). /*
7458	tree reduc_def
7459	= PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
7460	loop_latch_edge
7461	(gimple_bb (reduc_def_phi)->loop_father));
7462	unsigned reduc_chain_length = `0`;
7463	bool only_slp_reduc_chain = true;
7464	stmt_info = NULL;
7465	slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL;
7466	while (reduc_def != PHI_RESULT (reduc_def_phi))
7467	{
7468	stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
7469	stmt_vec_info vdef = vect_stmt_to_vectorize (stmt_info: def);
7470	if (STMT_VINFO_REDUC_IDX (vdef) == -`1`)
7471	{
7472	if (dump_enabled_p ())
7473	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7474	"reduction chain broken by patterns.\n");
7475	return false;
7476	}
7477	if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
7478	only_slp_reduc_chain = false;
7479	/ For epilogue generation live members of the chain need*
7480	to point back to the PHI via their original stmt for
7481	info_for_reduction to work. For SLP we need to look at
7482	all lanes here - even though we only will vectorize from
7483	the SLP node with live lane zero the other live lanes also
7484	need to be identified as part of a reduction to be able
7485	to skip code generation for them. /*
7486	if (slp_for_stmt_info)
7487	{
7488	for (auto s : SLP_TREE_SCALAR_STMTS (slp_for_stmt_info))
7489	if (STMT_VINFO_LIVE_P (s))
7490	STMT_VINFO_REDUC_DEF (vect_orig_stmt (s)) = phi_info;
7491	}
7492	else if (STMT_VINFO_LIVE_P (vdef))
7493	STMT_VINFO_REDUC_DEF (def) = phi_info;
7494	gimple_match_op op;
7495	if (!gimple_extract_op (vdef->stmt, &op))
7496	{
7497	if (dump_enabled_p ())
7498	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7499	"reduction chain includes unsupported"
7500	" statement type.\n");
7501	return false;
7502	}
7503	if (CONVERT_EXPR_CODE_P (op.code))
7504	{
7505	if (!tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[`0`])))
7506	{
7507	if (dump_enabled_p ())
7508	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7509	"conversion in the reduction chain.\n");
7510	return false;
7511	}
7512	}
7513	else if (!stmt_info)
7514	/ First non-conversion stmt. /
7515	stmt_info = vdef;
7516	reduc_def = op.ops[STMT_VINFO_REDUC_IDX (vdef)];
7517	reduc_chain_length++;
7518	if (!stmt_info && slp_node)
7519	slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[`0`];
7520	}
7521	/ PHIs should not participate in patterns. /
7522	gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
7523
7524	if (nested_in_vect_loop_p (loop, stmt_info))
7525	{
7526	loop = loop->inner;
7527	nested_cycle = true;
7528	}
7529
7530	/ STMT_VINFO_REDUC_DEF doesn't point to the first but the last*
7531	element. /*
7532	if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
7533	{
7534	gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
7535	stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
7536	}
7537	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
7538	gcc_assert (slp_node
7539	&& REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
7540
7541	/ 1. Is vectorizable reduction? /
7542	/ Not supportable if the reduction variable is used in the loop, unless*
7543	it's a reduction chain. /*
7544	if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
7545	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
7546	return false;
7547
7548	/ Reductions that are not used even in an enclosing outer-loop,*
7549	are expected to be "live" (used out of the loop). /*
7550	if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
7551	&& !STMT_VINFO_LIVE_P (stmt_info))
7552	return false;
7553
7554	/ 2. Has this been recognized as a reduction pattern?*
7555
7556	Check if STMT represents a pattern that has been recognized
7557	in earlier analysis stages. For stmts that represent a pattern,
7558	the STMT_VINFO_RELATED_STMT field records the last stmt in
7559	the original sequence that constitutes the pattern. /*
7560
7561	stmt_vec_info orig_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
7562	if (orig_stmt_info)
7563	{
7564	gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
7565	gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
7566	}
7567
7568	/ 3. Check the operands of the operation. The first operands are defined*
7569	inside the loop body. The last operand is the reduction variable,
7570	which is defined by the loop-header-phi. /*
7571
7572	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7573	STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
7574	gimple_match_op op;
7575	if (!gimple_extract_op (stmt_info->stmt, &op))
7576	gcc_unreachable ();
7577	bool lane_reduc_code_p = (op.code == DOT_PROD_EXPR
7578	\|\| op.code == WIDEN_SUM_EXPR
7579	\|\| op.code == SAD_EXPR);
7580
7581	if (!POINTER_TYPE_P (op.type) && !INTEGRAL_TYPE_P (op.type)
7582	&& !SCALAR_FLOAT_TYPE_P (op.type))
7583	return false;
7584
7585	/ Do not try to vectorize bit-precision reductions. /
7586	if (!type_has_mode_precision_p (t: op.type))
7587	return false;
7588
7589	/ For lane-reducing ops we're reducing the number of reduction PHIs*
7590	which means the only use of that may be in the lane-reducing operation. /*
7591	if (lane_reduc_code_p
7592	&& reduc_chain_length != `1`
7593	&& !only_slp_reduc_chain)
7594	{
7595	if (dump_enabled_p ())
7596	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7597	"lane-reducing reduction with extra stmts.\n");
7598	return false;
7599	}
7600
7601	/ All uses but the last are expected to be defined in the loop.*
7602	The last use is the reduction variable. In case of nested cycle this
7603	assumption is not true: we use reduc_index to record the index of the
7604	reduction variable. /*
7605	slp_tree *slp_op = XALLOCAVEC (slp_tree, op.num_ops);
7606	/ We need to skip an extra operand for COND_EXPRs with embedded*
7607	comparison. /*
7608	unsigned opno_adjust = `0`;
7609	if (op.code == COND_EXPR && COMPARISON_CLASS_P (op.ops[`0`]))
7610	opno_adjust = `1`;
7611	for (i = `0`; i < (int) op.num_ops; i++)
7612	{
7613	/ The condition of COND_EXPR is checked in vectorizable_condition(). /
7614	if (i == `0` && op.code == COND_EXPR)
7615	continue;
7616
7617	stmt_vec_info def_stmt_info;
7618	enum vect_def_type dt;
7619	if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info,
7620	i + opno_adjust, &op.ops[i], &slp_op[i], &dt,
7621	&vectype_op[i], &def_stmt_info))
7622	{
7623	if (dump_enabled_p ())
7624	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7625	"use not simple.\n");
7626	return false;
7627	}
7628	if (i == STMT_VINFO_REDUC_IDX (stmt_info))
7629	continue;
7630
7631	/ For an IFN_COND_OP we might hit the reduction definition operand*
7632	twice (once as definition, once as else). /*
7633	if (op.ops[i] == op.ops[STMT_VINFO_REDUC_IDX (stmt_info)])
7634	continue;
7635
7636	/ There should be only one cycle def in the stmt, the one*
7637	leading to reduc_def. /*
7638	if (VECTORIZABLE_CYCLE_DEF (dt))
7639	return false;
7640
7641	if (!vectype_op[i])
7642	vectype_op[i]
7643	= get_vectype_for_scalar_type (loop_vinfo,
7644	TREE_TYPE (op.ops[i]), slp_op[i]);
7645
7646	/ To properly compute ncopies we are interested in the widest*
7647	non-reduction input type in case we're looking at a widening
7648	accumulation that we later handle in vect_transform_reduction. /*
7649	if (lane_reduc_code_p
7650	&& vectype_op[i]
7651	&& (!vectype_in
7652	\|\| (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
7653	< GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_op[i]))))))
7654	vectype_in = vectype_op[i];
7655
7656	if (op.code == COND_EXPR)
7657	{
7658	/ Record how the non-reduction-def value of COND_EXPR is defined. /
7659	if (dt == vect_constant_def)
7660	{
7661	cond_reduc_dt = dt;
7662	cond_reduc_val = op.ops[i];
7663	}
7664	if (dt == vect_induction_def
7665	&& def_stmt_info
7666	&& is_nonwrapping_integer_induction (stmt_vinfo: def_stmt_info, loop))
7667	{
7668	cond_reduc_dt = dt;
7669	cond_stmt_vinfo = def_stmt_info;
7670	}
7671	}
7672	}
7673	if (!vectype_in)
7674	vectype_in = STMT_VINFO_VECTYPE (phi_info);
7675	STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
7676
7677	enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
7678	STMT_VINFO_REDUC_TYPE (reduc_info) = v_reduc_type;
7679	/ If we have a condition reduction, see if we can simplify it further. /
7680	if (v_reduc_type == COND_REDUCTION)
7681	{
7682	if (slp_node)
7683	return false;
7684
7685	/ When the condition uses the reduction value in the condition, fail. /
7686	if (STMT_VINFO_REDUC_IDX (stmt_info) == `0`)
7687	{
7688	if (dump_enabled_p ())
7689	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7690	"condition depends on previous iteration\n");
7691	return false;
7692	}
7693
7694	if (reduc_chain_length == `1`
7695	&& (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, vectype_in,
7696	OPTIMIZE_FOR_SPEED)
7697	\|\| direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
7698	vectype_in,
7699	OPTIMIZE_FOR_SPEED)))
7700	{
7701	if (dump_enabled_p ())
7702	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7703	"optimizing condition reduction with"
7704	" FOLD_EXTRACT_LAST.\n");
7705	STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION;
7706	}
7707	else if (cond_reduc_dt == vect_induction_def)
7708	{
7709	tree base
7710	= STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo);
7711	tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (cond_stmt_vinfo);
7712
7713	gcc_assert (TREE_CODE (base) == INTEGER_CST
7714	&& TREE_CODE (step) == INTEGER_CST);
7715	cond_reduc_val = NULL_TREE;
7716	enum tree_code cond_reduc_op_code = ERROR_MARK;
7717	tree res = PHI_RESULT (STMT_VINFO_STMT (cond_stmt_vinfo));
7718	if (!types_compatible_p (TREE_TYPE (res), TREE_TYPE (base)))
7719	;
7720	/ Find a suitable value, for MAX_EXPR below base, for MIN_EXPR*
7721	above base; punt if base is the minimum value of the type for
7722	MAX_EXPR or maximum value of the type for MIN_EXPR for now. /*
7723	else if (tree_int_cst_sgn (step) == -`1`)
7724	{
7725	cond_reduc_op_code = MIN_EXPR;
7726	if (tree_int_cst_sgn (base) == -`1`)
7727	cond_reduc_val = build_int_cst (TREE_TYPE (base), `0`);
7728	else if (tree_int_cst_lt (t1: base,
7729	TYPE_MAX_VALUE (TREE_TYPE (base))))
7730	cond_reduc_val
7731	= int_const_binop (PLUS_EXPR, base, integer_one_node);
7732	}
7733	else
7734	{
7735	cond_reduc_op_code = MAX_EXPR;
7736	if (tree_int_cst_sgn (base) == `1`)
7737	cond_reduc_val = build_int_cst (TREE_TYPE (base), `0`);
7738	else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)),
7739	t2: base))
7740	cond_reduc_val
7741	= int_const_binop (MINUS_EXPR, base, integer_one_node);
7742	}
7743	if (cond_reduc_val)
7744	{
7745	if (dump_enabled_p ())
7746	dump_printf_loc (MSG_NOTE, vect_location,
7747	"condition expression based on "
7748	"integer induction.\n");
7749	STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code;
7750	STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)
7751	= cond_reduc_val;
7752	STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION;
7753	}
7754	}
7755	else if (cond_reduc_dt == vect_constant_def)
7756	{
7757	enum vect_def_type cond_initial_dt;
7758	tree cond_initial_val = vect_phi_initial_value (phi: reduc_def_phi);
7759	vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt);
7760	if (cond_initial_dt == vect_constant_def
7761	&& types_compatible_p (TREE_TYPE (cond_initial_val),
7762	TREE_TYPE (cond_reduc_val)))
7763	{
7764	tree e = fold_binary (LE_EXPR, boolean_type_node,
7765	cond_initial_val, cond_reduc_val);
7766	if (e && (integer_onep (e) \|\| integer_zerop (e)))
7767	{
7768	if (dump_enabled_p ())
7769	dump_printf_loc (MSG_NOTE, vect_location,
7770	"condition expression based on "
7771	"compile time constant.\n");
7772	/ Record reduction code at analysis stage. /
7773	STMT_VINFO_REDUC_CODE (reduc_info)
7774	= integer_onep (e) ? MAX_EXPR : MIN_EXPR;
7775	STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION;
7776	}
7777	}
7778	}
7779	}
7780
7781	if (STMT_VINFO_LIVE_P (phi_info))
7782	return false;
7783
7784	if (slp_node)
7785	ncopies = `1`;
7786	else
7787	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
7788
7789	gcc_assert (ncopies >= `1`);
7790
7791	poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
7792
7793	if (nested_cycle)
7794	{
7795	gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info)
7796	== vect_double_reduction_def);
7797	double_reduc = true;
7798	}
7799
7800	/ 4.2. Check support for the epilog operation.*
7801
7802	If STMT represents a reduction pattern, then the type of the
7803	reduction variable may be different than the type of the rest
7804	of the arguments. For example, consider the case of accumulation
7805	of shorts into an int accumulator; The original code:
7806	S1: int_a = (int) short_a;
7807	orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
7808
7809	was replaced with:
7810	STMT: int_acc = widen_sum <short_a, int_acc>
7811
7812	This means that:
7813	1. The tree-code that is used to create the vector operation in the
7814	epilog code (that reduces the partial results) is not the
7815	tree-code of STMT, but is rather the tree-code of the original
7816	stmt from the pattern that STMT is replacing. I.e, in the example
7817	above we want to use 'widen_sum' in the loop, but 'plus' in the
7818	epilog.
7819	2. The type (mode) we use to check available target support
7820	for the vector operation to be created in the epilog, is
7821	determined by the type of the reduction variable (in the example
7822	above we'd check this: optab_handler (plus_optab, vect_int_mode])).
7823	However the type (mode) we use to check available target support
7824	for the vector operation to be created inside the loop, is
7825	determined by the type of the other arguments to STMT (in the
7826	example we'd check this: optab_handler (widen_sum_optab,
7827	vect_short_mode)).
7828
7829	This is contrary to "regular" reductions, in which the types of all
7830	the arguments are the same as the type of the reduction variable.
7831	For "regular" reductions we can therefore use the same vector type
7832	(and also the same tree-code) when generating the epilog code and
7833	when generating the code inside the loop. /*
7834
7835	code_helper orig_code = STMT_VINFO_REDUC_CODE (phi_info);
7836
7837	/ If conversion might have created a conditional operation like*
7838	IFN_COND_ADD already. Use the internal code for the following checks. /*
7839	if (orig_code.is_internal_fn ())
7840	{
7841	tree_code new_code = conditional_internal_fn_code (internal_fn (orig_code));
7842	orig_code = new_code != ERROR_MARK ? new_code : orig_code;
7843	}
7844
7845	STMT_VINFO_REDUC_CODE (reduc_info) = orig_code;
7846
7847	vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
7848	if (reduction_type == TREE_CODE_REDUCTION)
7849	{
7850	/ Check whether it's ok to change the order of the computation.*
7851	Generally, when vectorizing a reduction we change the order of the
7852	computation. This may change the behavior of the program in some
7853	cases, so we need to check that this is ok. One exception is when
7854	vectorizing an outer-loop: the inner-loop is executed sequentially,
7855	and therefore vectorizing reductions in the inner-loop during
7856	outer-loop vectorization is safe. Likewise when we are vectorizing
7857	a series of reductions using SLP and the VF is one the reductions
7858	are performed in scalar order. /*
7859	if (slp_node
7860	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
7861	&& known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), `1u`))
7862	;
7863	else if (needs_fold_left_reduction_p (type: op.type, code: orig_code))
7864	{
7865	/ When vectorizing a reduction chain w/o SLP the reduction PHI*
7866	is not directy used in stmt. /*
7867	if (!only_slp_reduc_chain
7868	&& reduc_chain_length != `1`)
7869	{
7870	if (dump_enabled_p ())
7871	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7872	"in-order reduction chain without SLP.\n");
7873	return false;
7874	}
7875	STMT_VINFO_REDUC_TYPE (reduc_info)
7876	= reduction_type = FOLD_LEFT_REDUCTION;
7877	}
7878	else if (!commutative_binary_op_p (orig_code, op.type)
7879	\|\| !associative_binary_op_p (orig_code, op.type))
7880	{
7881	if (dump_enabled_p ())
7882	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7883	"reduction: not commutative/associative\n");
7884	return false;
7885	}
7886	}
7887
7888	if ((double_reduc \|\| reduction_type != TREE_CODE_REDUCTION)
7889	&& ncopies > `1`)
7890	{
7891	if (dump_enabled_p ())
7892	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7893	"multiple types in double reduction or condition "
7894	"reduction or fold-left reduction.\n");
7895	return false;
7896	}
7897
7898	internal_fn reduc_fn = IFN_LAST;
7899	if (reduction_type == TREE_CODE_REDUCTION
7900	\|\| reduction_type == FOLD_LEFT_REDUCTION
7901	\|\| reduction_type == INTEGER_INDUC_COND_REDUCTION
7902	\|\| reduction_type == CONST_COND_REDUCTION)
7903	{
7904	if (reduction_type == FOLD_LEFT_REDUCTION
7905	? fold_left_reduction_fn (code: orig_code, reduc_fn: &reduc_fn)
7906	: reduction_fn_for_scalar_code (code: orig_code, reduc_fn: &reduc_fn))
7907	{
7908	if (reduc_fn != IFN_LAST
7909	&& !direct_internal_fn_supported_p (reduc_fn, vectype_out,
7910	OPTIMIZE_FOR_SPEED))
7911	{
7912	if (dump_enabled_p ())
7913	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7914	"reduc op not supported by target.\n");
7915
7916	reduc_fn = IFN_LAST;
7917	}
7918	}
7919	else
7920	{
7921	if (!nested_cycle \|\| double_reduc)
7922	{
7923	if (dump_enabled_p ())
7924	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7925	"no reduc code for scalar code.\n");
7926
7927	return false;
7928	}
7929	}
7930	}
7931	else if (reduction_type == COND_REDUCTION)
7932	{
7933	int scalar_precision
7934	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (op.type));
7935	cr_index_scalar_type = make_unsigned_type (scalar_precision);
7936	cr_index_vector_type = get_same_sized_vectype (cr_index_scalar_type,
7937	vectype_out);
7938
7939	if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
7940	OPTIMIZE_FOR_SPEED))
7941	reduc_fn = IFN_REDUC_MAX;
7942	}
7943	STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn;
7944
7945	if (reduction_type != EXTRACT_LAST_REDUCTION
7946	&& (!nested_cycle \|\| double_reduc)
7947	&& reduc_fn == IFN_LAST
7948	&& !nunits_out.is_constant ())
7949	{
7950	if (dump_enabled_p ())
7951	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7952	"missing target support for reduction on"
7953	" variable-length vectors.\n");
7954	return false;
7955	}
7956
7957	/ For SLP reductions, see if there is a neutral value we can use. /
7958	tree neutral_op = NULL_TREE;
7959	if (slp_node)
7960	{
7961	tree initial_value = NULL_TREE;
7962	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL)
7963	initial_value = vect_phi_initial_value (phi: reduc_def_phi);
7964	neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
7965	code: orig_code, initial_value);
7966	}
7967
7968	if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
7969	{
7970	/ We can't support in-order reductions of code such as this:*
7971
7972	for (int i = 0; i < n1; ++i)
7973	for (int j = 0; j < n2; ++j)
7974	l += a[j];
7975
7976	since GCC effectively transforms the loop when vectorizing:
7977
7978	for (int i = 0; i < n1 / VF; ++i)
7979	for (int j = 0; j < n2; ++j)
7980	for (int k = 0; k < VF; ++k)
7981	l += a[j];
7982
7983	which is a reassociation of the original operation. /*
7984	if (dump_enabled_p ())
7985	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7986	"in-order double reduction not supported.\n");
7987
7988	return false;
7989	}
7990
7991	if (reduction_type == FOLD_LEFT_REDUCTION
7992	&& slp_node
7993	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
7994	{
7995	/ We cannot use in-order reductions in this case because there is*
7996	an implicit reassociation of the operations involved. /*
7997	if (dump_enabled_p ())
7998	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7999	"in-order unchained SLP reductions not supported.\n");
8000	return false;
8001	}
8002
8003	/ For double reductions, and for SLP reductions with a neutral value,*
8004	we construct a variable-length initial vector by loading a vector
8005	full of the neutral value and then shift-and-inserting the start
8006	values into the low-numbered elements. /*
8007	if ((double_reduc \|\| neutral_op)
8008	&& !nunits_out.is_constant ()
8009	&& !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
8010	vectype_out, OPTIMIZE_FOR_SPEED))
8011	{
8012	if (dump_enabled_p ())
8013	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8014	"reduction on variable-length vectors requires"
8015	" target support for a vector-shift-and-insert"
8016	" operation.\n");
8017	return false;
8018	}
8019
8020	/ Check extra constraints for variable-length unchained SLP reductions. /
8021	if (slp_node
8022	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
8023	&& !nunits_out.is_constant ())
8024	{
8025	/ We checked above that we could build the initial vector when*
8026	there's a neutral element value. Check here for the case in
8027	which each SLP statement has its own initial value and in which
8028	that value needs to be repeated for every instance of the
8029	statement within the initial vector. /*
8030	unsigned int group_size = SLP_TREE_LANES (slp_node);
8031	if (!neutral_op
8032	&& !can_duplicate_and_interleave_p (loop_vinfo, group_size,
8033	TREE_TYPE (vectype_out)))
8034	{
8035	if (dump_enabled_p ())
8036	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8037	"unsupported form of SLP reduction for"
8038	" variable-length vectors: cannot build"
8039	" initial vector.\n");
8040	return false;
8041	}
8042	/ The epilogue code relies on the number of elements being a multiple*
8043	of the group size. The duplicate-and-interleave approach to setting
8044	up the initial vector does too. /*
8045	if (!multiple_p (a: nunits_out, b: group_size))
8046	{
8047	if (dump_enabled_p ())
8048	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8049	"unsupported form of SLP reduction for"
8050	" variable-length vectors: the vector size"
8051	" is not a multiple of the number of results.\n");
8052	return false;
8053	}
8054	}
8055
8056	if (reduction_type == COND_REDUCTION)
8057	{
8058	widest_int ni;
8059
8060	if (! max_loop_iterations (loop, &ni))
8061	{
8062	if (dump_enabled_p ())
8063	dump_printf_loc (MSG_NOTE, vect_location,
8064	"loop count not known, cannot create cond "
8065	"reduction.\n");
8066	return false;
8067	}
8068	/ Convert backedges to iterations. /
8069	ni += `1`;
8070
8071	/ The additional index will be the same type as the condition. Check*
8072	that the loop can fit into this less one (because we'll use up the
8073	zero slot for when there are no matches). /*
8074	tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
8075	if (wi::geu_p (x: ni, y: wi::to_widest (t: max_index)))
8076	{
8077	if (dump_enabled_p ())
8078	dump_printf_loc (MSG_NOTE, vect_location,
8079	"loop size is greater than data size.\n");
8080	return false;
8081	}
8082	}
8083
8084	/ In case the vectorization factor (VF) is bigger than the number*
8085	of elements that we can fit in a vectype (nunits), we have to generate
8086	more than one vector stmt - i.e - we need to "unroll" the
8087	vector stmt by a factor VF/nunits. For more details see documentation
8088	in vectorizable_operation. /*
8089
8090	/ If the reduction is used in an outer loop we need to generate*
8091	VF intermediate results, like so (e.g. for ncopies=2):
8092	r0 = phi (init, r0)
8093	r1 = phi (init, r1)
8094	r0 = x0 + r0;
8095	r1 = x1 + r1;
8096	(i.e. we generate VF results in 2 registers).
8097	In this case we have a separate def-use cycle for each copy, and therefore
8098	for each copy we get the vector def for the reduction variable from the
8099	respective phi node created for this copy.
8100
8101	Otherwise (the reduction is unused in the loop nest), we can combine
8102	together intermediate results, like so (e.g. for ncopies=2):
8103	r = phi (init, r)
8104	r = x0 + r;
8105	r = x1 + r;
8106	(i.e. we generate VF/2 results in a single register).
8107	In this case for each copy we get the vector def for the reduction variable
8108	from the vectorized reduction operation generated in the previous iteration.
8109
8110	This only works when we see both the reduction PHI and its only consumer
8111	in vectorizable_reduction and there are no intermediate stmts
8112	participating. When unrolling we want each unrolled iteration to have its
8113	own reduction accumulator since one of the main goals of unrolling a
8114	reduction is to reduce the aggregate loop-carried latency. /*
8115	if (ncopies > `1`
8116	&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
8117	&& reduc_chain_length == `1`
8118	&& loop_vinfo->suggested_unroll_factor == `1`)
8119	single_defuse_cycle = true;
8120
8121	if (single_defuse_cycle \|\| lane_reduc_code_p)
8122	{
8123	gcc_assert (op.code != COND_EXPR);
8124
8125	/ 4. Supportable by target? /
8126	bool ok = true;
8127
8128	/ 4.1. check support for the operation in the loop*
8129
8130	This isn't necessary for the lane reduction codes, since they
8131	can only be produced by pattern matching, and it's up to the
8132	pattern matcher to test for support. The main reason for
8133	specifically skipping this step is to avoid rechecking whether
8134	mixed-sign dot-products can be implemented using signed
8135	dot-products. /*
8136	machine_mode vec_mode = TYPE_MODE (vectype_in);
8137	if (!lane_reduc_code_p
8138	&& !directly_supported_p (op.code, vectype_in, optab_vector))
8139	{
8140	if (dump_enabled_p ())
8141	dump_printf (MSG_NOTE, "op not supported by target.\n");
8142	if (maybe_ne (a: GET_MODE_SIZE (mode: vec_mode), UNITS_PER_WORD)
8143	\|\| !vect_can_vectorize_without_simd_p (op.code))
8144	ok = false;
8145	else
8146	if (dump_enabled_p ())
8147	dump_printf (MSG_NOTE, "proceeding using word mode.\n");
8148	}
8149
8150	if (vect_emulated_vector_p (vectype_in)
8151	&& !vect_can_vectorize_without_simd_p (op.code))
8152	{
8153	if (dump_enabled_p ())
8154	dump_printf (MSG_NOTE, "using word mode not possible.\n");
8155	return false;
8156	}
8157
8158	/ lane-reducing operations have to go through vect_transform_reduction.*
8159	For the other cases try without the single cycle optimization. /*
8160	if (!ok)
8161	{
8162	if (lane_reduc_code_p)
8163	return false;
8164	else
8165	single_defuse_cycle = false;
8166	}
8167	}
8168	STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle;
8169
8170	/ If the reduction stmt is one of the patterns that have lane*
8171	reduction embedded we cannot handle the case of ! single_defuse_cycle. /*
8172	if ((ncopies > `1` && ! single_defuse_cycle)
8173	&& lane_reduc_code_p)
8174	{
8175	if (dump_enabled_p ())
8176	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8177	"multi def-use cycle not possible for lane-reducing "
8178	"reduction operation\n");
8179	return false;
8180	}
8181
8182	if (slp_node
8183	&& !(!single_defuse_cycle
8184	&& !lane_reduc_code_p
8185	&& reduction_type != FOLD_LEFT_REDUCTION))
8186	for (i = `0`; i < (int) op.num_ops; i++)
8187	if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_op[i]))
8188	{
8189	if (dump_enabled_p ())
8190	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8191	"incompatible vector types for invariants\n");
8192	return false;
8193	}
8194
8195	if (slp_node)
8196	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8197	else
8198	vec_num = `1`;
8199
8200	vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn,
8201	reduction_type, ncopies, cost_vec);
8202	/ Cost the reduction op inside the loop if transformed via*
8203	vect_transform_reduction. Otherwise this is costed by the
8204	separate vectorizable_ routines. /
8205	if (single_defuse_cycle \|\| lane_reduc_code_p)
8206	{
8207	int factor = `1`;
8208	if (vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info))
8209	/ Three dot-products and a subtraction. /
8210	factor = `4`;
8211	record_stmt_cost (body_cost_vec: cost_vec, count: ncopies * factor, kind: vector_stmt,
8212	stmt_info, misalign: `0`, where: vect_body);
8213	}
8214
8215	if (dump_enabled_p ()
8216	&& reduction_type == FOLD_LEFT_REDUCTION)
8217	dump_printf_loc (MSG_NOTE, vect_location,
8218	"using an in-order (fold-left) reduction.\n");
8219	STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
8220	/ All but single defuse-cycle optimized, lane-reducing and fold-left*
8221	reductions go through their own vectorizable_ routines. /
8222	if (!single_defuse_cycle
8223	&& !lane_reduc_code_p
8224	&& reduction_type != FOLD_LEFT_REDUCTION)
8225	{
8226	stmt_vec_info tem
8227	= vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
8228	if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
8229	{
8230	gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
8231	tem = REDUC_GROUP_FIRST_ELEMENT (tem);
8232	}
8233	STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
8234	STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
8235	}
8236	else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8237	{
8238	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
8239	vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
8240	internal_fn cond_fn = get_conditional_internal_fn (op.code, op.type);
8241
8242	if (reduction_type != FOLD_LEFT_REDUCTION
8243	&& !use_mask_by_cond_expr_p (code: op.code, cond_fn, vectype_in)
8244	&& (cond_fn == IFN_LAST
8245	\|\| !direct_internal_fn_supported_p (cond_fn, vectype_in,
8246	OPTIMIZE_FOR_SPEED)))
8247	{
8248	if (dump_enabled_p ())
8249	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8250	"can't operate on partial vectors because"
8251	" no conditional operation is available.\n");
8252	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
8253	}
8254	else if (reduction_type == FOLD_LEFT_REDUCTION
8255	&& reduc_fn == IFN_LAST
8256	&& !expand_vec_cond_expr_p (vectype_in,
8257	truth_type_for (vectype_in),
8258	SSA_NAME))
8259	{
8260	if (dump_enabled_p ())
8261	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8262	"can't operate on partial vectors because"
8263	" no conditional operation is available.\n");
8264	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
8265	}
8266	else if (reduction_type == FOLD_LEFT_REDUCTION
8267	&& internal_fn_mask_index (reduc_fn) == -`1`
8268	&& FLOAT_TYPE_P (vectype_in)
8269	&& HONOR_SIGN_DEPENDENT_ROUNDING (vectype_in))
8270	{
8271	if (dump_enabled_p ())
8272	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8273	"can't operate on partial vectors because"
8274	" signed zeros cannot be preserved.\n");
8275	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
8276	}
8277	else
8278	{
8279	internal_fn mask_reduc_fn
8280	= get_masked_reduction_fn (reduc_fn, vectype_in);
8281
8282	if (mask_reduc_fn == IFN_MASK_LEN_FOLD_LEFT_PLUS)
8283	vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num,
8284	vectype_in, `1`);
8285	else
8286	vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
8287	vectype_in, NULL);
8288	}
8289	}
8290	return true;
8291	}
8292
8293	/ STMT_INFO is a dot-product reduction whose multiplication operands*
8294	have different signs. Emit a sequence to emulate the operation
8295	using a series of signed DOT_PROD_EXPRs and return the last
8296	statement generated. VEC_DEST is the result of the vector operation
8297	and VOP lists its inputs. /*
8298
8299	static gassign *
8300	vect_emulate_mixed_dot_prod (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
8301	gimple_stmt_iterator *gsi, tree vec_dest,
8302	tree vop[`3`])
8303	{
8304	tree wide_vectype = signed_type_for (TREE_TYPE (vec_dest));
8305	tree narrow_vectype = signed_type_for (TREE_TYPE (vop[`0`]));
8306	tree narrow_elttype = TREE_TYPE (narrow_vectype);
8307	gimple *new_stmt;
8308
8309	/ Make VOP[0] the unsigned operand VOP[1] the signed operand. /
8310	if (!TYPE_UNSIGNED (TREE_TYPE (vop[`0`])))
8311	std::swap (a&: vop[`0`], b&: vop[`1`]);
8312
8313	/ Convert all inputs to signed types. /
8314	for (int i = `0`; i < `3`; ++i)
8315	if (TYPE_UNSIGNED (TREE_TYPE (vop[i])))
8316	{
8317	tree tmp = make_ssa_name (var: signed_type_for (TREE_TYPE (vop[i])));
8318	new_stmt = gimple_build_assign (tmp, NOP_EXPR, vop[i]);
8319	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8320	vop[i] = tmp;
8321	}
8322
8323	/ In the comments below we assume 8-bit inputs for simplicity,*
8324	but the approach works for any full integer type. /*
8325
8326	/ Create a vector of -128. /
8327	tree min_narrow_elttype = TYPE_MIN_VALUE (narrow_elttype);
8328	tree min_narrow = build_vector_from_val (narrow_vectype,
8329	min_narrow_elttype);
8330
8331	/ Create a vector of 64. /
8332	auto half_wi = wi::lrshift (x: wi::to_wide (t: min_narrow_elttype), y: `1`);
8333	tree half_narrow = wide_int_to_tree (type: narrow_elttype, cst: half_wi);
8334	half_narrow = build_vector_from_val (narrow_vectype, half_narrow);
8335
8336	/ Emit: SUB_RES = VOP[0] - 128. /
8337	tree sub_res = make_ssa_name (var: narrow_vectype);
8338	new_stmt = gimple_build_assign (sub_res, PLUS_EXPR, vop[`0`], min_narrow);
8339	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8340
8341	/ Emit:*
8342
8343	STAGE1 = DOT_PROD_EXPR <VOP[1], 64, VOP[2]>;
8344	STAGE2 = DOT_PROD_EXPR <VOP[1], 64, STAGE1>;
8345	STAGE3 = DOT_PROD_EXPR <SUB_RES, -128, STAGE2>;
8346
8347	on the basis that x y == (x - 128) * y + 64 * y + 64 * y*
8348	Doing the two 64 y steps first allows more time to compute x. /
8349	tree stage1 = make_ssa_name (var: wide_vectype);
8350	new_stmt = gimple_build_assign (stage1, DOT_PROD_EXPR,
8351	vop[`1`], half_narrow, vop[`2`]);
8352	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8353
8354	tree stage2 = make_ssa_name (var: wide_vectype);
8355	new_stmt = gimple_build_assign (stage2, DOT_PROD_EXPR,
8356	vop[`1`], half_narrow, stage1);
8357	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8358
8359	tree stage3 = make_ssa_name (var: wide_vectype);
8360	new_stmt = gimple_build_assign (stage3, DOT_PROD_EXPR,
8361	sub_res, vop[`1`], stage2);
8362	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8363
8364	/ Convert STAGE3 to the reduction type. /
8365	return gimple_build_assign (vec_dest, CONVERT_EXPR, stage3);
8366	}
8367
8368	/ Transform the definition stmt STMT_INFO of a reduction PHI backedge*
8369	value. /*
8370
8371	bool
8372	vect_transform_reduction (loop_vec_info loop_vinfo,
8373	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8374	gimple **vec_stmt, slp_tree slp_node)
8375	{
8376	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
8377	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8378	int i;
8379	int ncopies;
8380	int vec_num;
8381
8382	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
8383	gcc_assert (reduc_info->is_reduc_info);
8384
8385	if (nested_in_vect_loop_p (loop, stmt_info))
8386	{
8387	loop = loop->inner;
8388	gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def);
8389	}
8390
8391	gimple_match_op op;
8392	if (!gimple_extract_op (stmt_info->stmt, &op))
8393	gcc_unreachable ();
8394
8395	/ All uses but the last are expected to be defined in the loop.*
8396	The last use is the reduction variable. In case of nested cycle this
8397	assumption is not true: we use reduc_index to record the index of the
8398	reduction variable. /*
8399	stmt_vec_info phi_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
8400	gphi reduc_def_phi = as_a <gphi > (p: phi_info->stmt);
8401	int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
8402	tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
8403
8404	if (slp_node)
8405	{
8406	ncopies = `1`;
8407	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8408	}
8409	else
8410	{
8411	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
8412	vec_num = `1`;
8413	}
8414
8415	code_helper code = canonicalize_code (op.code, op.type);
8416	internal_fn cond_fn = get_conditional_internal_fn (code, op.type);
8417
8418	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
8419	vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
8420	bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
8421
8422	/ Transform. /
8423	tree new_temp = NULL_TREE;
8424	auto_vec<tree> vec_oprnds0;
8425	auto_vec<tree> vec_oprnds1;
8426	auto_vec<tree> vec_oprnds2;
8427	tree def0;
8428
8429	if (dump_enabled_p ())
8430	dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
8431
8432	/ FORNOW: Multiple types are not supported for condition. /
8433	if (code == COND_EXPR)
8434	gcc_assert (ncopies == `1`);
8435
8436	/ A binary COND_OP reduction must have the same definition and else*
8437	value. /*
8438	bool cond_fn_p = code.is_internal_fn ()
8439	&& conditional_internal_fn_code (internal_fn (code)) != ERROR_MARK;
8440	if (cond_fn_p)
8441	{
8442	gcc_assert (code == IFN_COND_ADD \|\| code == IFN_COND_SUB
8443	\|\| code == IFN_COND_MUL \|\| code == IFN_COND_AND
8444	\|\| code == IFN_COND_IOR \|\| code == IFN_COND_XOR);
8445	gcc_assert (op.num_ops == `4` && (op.ops[`1`] == op.ops[`3`]));
8446	}
8447
8448	bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
8449
8450	vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
8451	if (reduction_type == FOLD_LEFT_REDUCTION)
8452	{
8453	internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
8454	gcc_assert (code.is_tree_code () \|\| cond_fn_p);
8455	return vectorize_fold_left_reduction
8456	(loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_stmt: reduc_def_phi,
8457	code, reduc_fn, ops: op.ops, num_ops: op.num_ops, vectype_in,
8458	reduc_index, masks, lens);
8459	}
8460
8461	bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
8462	gcc_assert (single_defuse_cycle
8463	\|\| code == DOT_PROD_EXPR
8464	\|\| code == WIDEN_SUM_EXPR
8465	\|\| code == SAD_EXPR);
8466
8467	/ Create the destination vector /
8468	tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8469	tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
8470
8471	/ Get NCOPIES vector definitions for all operands except the reduction*
8472	definition. /*
8473	vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
8474	single_defuse_cycle && reduc_index == `0`
8475	? NULL_TREE : op.ops[`0`], &vec_oprnds0,
8476	single_defuse_cycle && reduc_index == `1`
8477	? NULL_TREE : op.ops[`1`], &vec_oprnds1,
8478	op.num_ops == `4`
8479	\|\| (op.num_ops == `3`
8480	&& !(single_defuse_cycle && reduc_index == `2`))
8481	? op.ops[`2`] : NULL_TREE, &vec_oprnds2);
8482
8483	/ For single def-use cycles get one copy of the vectorized reduction*
8484	definition. /*
8485	if (single_defuse_cycle)
8486	{
8487	gcc_assert (!slp_node);
8488	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
8489	op: op.ops[reduc_index],
8490	reduc_index == `0` ? &vec_oprnds0
8491	: (reduc_index == `1` ? &vec_oprnds1
8492	: &vec_oprnds2));
8493	}
8494
8495	bool emulated_mixed_dot_prod
8496	= vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info);
8497	FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
8498	{
8499	gimple *new_stmt;
8500	tree vop[`3`] = { def0, vec_oprnds1 [i], NULL_TREE };
8501	if (masked_loop_p && !mask_by_cond_expr)
8502	{
8503	/ No conditional ifns have been defined for dot-product yet. /
8504	gcc_assert (code != DOT_PROD_EXPR);
8505
8506	/ Make sure that the reduction accumulator is vop[0]. /
8507	if (reduc_index == `1`)
8508	{
8509	gcc_assert (commutative_binary_op_p (code, op.type));
8510	std::swap (a&: vop[`0`], b&: vop[`1`]);
8511	}
8512	tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
8513	vec_num * ncopies, vectype_in, i);
8514	gcall *call = gimple_build_call_internal (cond_fn, `4`, mask,
8515	vop[`0`], vop[`1`], vop[`0`]);
8516	new_temp = make_ssa_name (var: vec_dest, stmt: call);
8517	gimple_call_set_lhs (gs: call, lhs: new_temp);
8518	gimple_call_set_nothrow (s: call, nothrow_p: true);
8519	vect_finish_stmt_generation (loop_vinfo, stmt_info, call, gsi);
8520	new_stmt = call;
8521	}
8522	else
8523	{
8524	if (op.num_ops >= `3`)
8525	vop[`2`] = vec_oprnds2 [i];
8526
8527	if (masked_loop_p && mask_by_cond_expr)
8528	{
8529	tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
8530	vec_num * ncopies, vectype_in, i);
8531	build_vect_cond_expr (code, vop, mask, gsi);
8532	}
8533
8534	if (emulated_mixed_dot_prod)
8535	new_stmt = vect_emulate_mixed_dot_prod (loop_vinfo, stmt_info, gsi,
8536	vec_dest, vop);
8537
8538	else if (code.is_internal_fn () && !cond_fn_p)
8539	new_stmt = gimple_build_call_internal (internal_fn (code),
8540	op.num_ops,
8541	vop[`0`], vop[`1`], vop[`2`]);
8542	else if (code.is_internal_fn () && cond_fn_p)
8543	new_stmt = gimple_build_call_internal (internal_fn (code),
8544	op.num_ops,
8545	vop[`0`], vop[`1`], vop[`2`],
8546	vop[`1`]);
8547	else
8548	new_stmt = gimple_build_assign (vec_dest, tree_code (op.code),
8549	vop[`0`], vop[`1`], vop[`2`]);
8550	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
8551	gimple_set_lhs (new_stmt, new_temp);
8552	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8553	}
8554
8555	if (slp_node)
8556	slp_node->push_vec_def (def: new_stmt);
8557	else if (single_defuse_cycle
8558	&& i < ncopies - `1`)
8559	{
8560	if (reduc_index == `0`)
8561	vec_oprnds0.safe_push (obj: gimple_get_lhs (new_stmt));
8562	else if (reduc_index == `1`)
8563	vec_oprnds1.safe_push (obj: gimple_get_lhs (new_stmt));
8564	else if (reduc_index == `2`)
8565	vec_oprnds2.safe_push (obj: gimple_get_lhs (new_stmt));
8566	}
8567	else
8568	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
8569	}
8570
8571	if (!slp_node)
8572	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
8573
8574	return true;
8575	}
8576
8577	/ Transform phase of a cycle PHI. /
8578
8579	bool
8580	vect_transform_cycle_phi (loop_vec_info loop_vinfo,
8581	stmt_vec_info stmt_info, gimple **vec_stmt,
8582	slp_tree slp_node, slp_instance slp_node_instance)
8583	{
8584	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
8585	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8586	int i;
8587	int ncopies;
8588	int j;
8589	bool nested_cycle = false;
8590	int vec_num;
8591
8592	if (nested_in_vect_loop_p (loop, stmt_info))
8593	{
8594	loop = loop->inner;
8595	nested_cycle = true;
8596	}
8597
8598	stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
8599	reduc_stmt_info = vect_stmt_to_vectorize (stmt_info: reduc_stmt_info);
8600	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
8601	gcc_assert (reduc_info->is_reduc_info);
8602
8603	if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
8604	\|\| STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)
8605	/ Leave the scalar phi in place. /
8606	return true;
8607
8608	tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
8609	/ For a nested cycle we do not fill the above. /
8610	if (!vectype_in)
8611	vectype_in = STMT_VINFO_VECTYPE (stmt_info);
8612	gcc_assert (vectype_in);
8613
8614	if (slp_node)
8615	{
8616	/ The size vect_schedule_slp_instance computes is off for us. /
8617	vec_num = vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
8618	* SLP_TREE_LANES (slp_node), vectype: vectype_in);
8619	ncopies = `1`;
8620	}
8621	else
8622	{
8623	vec_num = `1`;
8624	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
8625	}
8626
8627	/ Check whether we should use a single PHI node and accumulate*
8628	vectors to one before the backedge. /*
8629	if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info))
8630	ncopies = `1`;
8631
8632	/ Create the destination vector /
8633	gphi phi = as_a <gphi > (p: stmt_info->stmt);
8634	tree vec_dest = vect_create_destination_var (gimple_phi_result (gs: phi),
8635	vectype_out);
8636
8637	/ Get the loop-entry arguments. /
8638	tree vec_initial_def = NULL_TREE;
8639	auto_vec<tree> vec_initial_defs;
8640	if (slp_node)
8641	{
8642	vec_initial_defs.reserve (nelems: vec_num);
8643	if (nested_cycle)
8644	{
8645	unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
8646	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
8647	&vec_initial_defs);
8648	}
8649	else
8650	{
8651	gcc_assert (slp_node == slp_node_instance->reduc_phis);
8652	vec<tree> &initial_values = reduc_info->reduc_initial_values;
8653	vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
8654
8655	unsigned int num_phis = stmts.length ();
8656	if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
8657	num_phis = `1`;
8658	initial_values.reserve (nelems: num_phis);
8659	for (unsigned int i = `0`; i < num_phis; ++i)
8660	{
8661	gphi this_phi = as_a<gphi > (p: stmts [i]->stmt);
8662	initial_values.quick_push (obj: vect_phi_initial_value (phi: this_phi));
8663	}
8664	if (vec_num == `1`)
8665	vect_find_reusable_accumulator (loop_vinfo, reduc_info);
8666	if (!initial_values.is_empty ())
8667	{
8668	tree initial_value
8669	= (num_phis == `1` ? initial_values [`0`] : NULL_TREE);
8670	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
8671	tree neutral_op
8672	= neutral_op_for_reduction (TREE_TYPE (vectype_out),
8673	code, initial_value);
8674	get_initial_defs_for_reduction (loop_vinfo, reduc_info,
8675	vec_oprnds: &vec_initial_defs, number_of_vectors: vec_num,
8676	group_size: stmts.length (), neutral_op);
8677	}
8678	}
8679	}
8680	else
8681	{
8682	/ Get at the scalar def before the loop, that defines the initial*
8683	value of the reduction variable. /*
8684	tree initial_def = vect_phi_initial_value (phi);
8685	reduc_info->reduc_initial_values.safe_push (obj: initial_def);
8686	/ Optimize: if initial_def is for REDUC_MAX smaller than the base*
8687	and we can't use zero for induc_val, use initial_def. Similarly
8688	for REDUC_MIN and initial_def larger than the base. /*
8689	if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
8690	{
8691	tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
8692	if (TREE_CODE (initial_def) == INTEGER_CST
8693	&& !integer_zerop (induc_val)
8694	&& ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
8695	&& tree_int_cst_lt (t1: initial_def, t2: induc_val))
8696	\|\| (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
8697	&& tree_int_cst_lt (t1: induc_val, t2: initial_def))))
8698	{
8699	induc_val = initial_def;
8700	/ Communicate we used the initial_def to epilouge*
8701	generation. /*
8702	STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
8703	}
8704	vec_initial_def = build_vector_from_val (vectype_out, induc_val);
8705	}
8706	else if (nested_cycle)
8707	{
8708	/ Do not use an adjustment def as that case is not supported*
8709	correctly if ncopies is not one. /*
8710	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, reduc_stmt_info,
8711	ncopies, op: initial_def,
8712	&vec_initial_defs);
8713	}
8714	else if (STMT_VINFO_REDUC_TYPE (reduc_info) == CONST_COND_REDUCTION
8715	\|\| STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
8716	/ Fill the initial vector with the initial scalar value. /
8717	vec_initial_def
8718	= get_initial_def_for_reduction (loop_vinfo, reduc_info: reduc_stmt_info,
8719	init_val: initial_def, neutral_op: initial_def);
8720	else
8721	{
8722	if (ncopies == `1`)
8723	vect_find_reusable_accumulator (loop_vinfo, reduc_info);
8724	if (!reduc_info->reduc_initial_values.is_empty ())
8725	{
8726	initial_def = reduc_info->reduc_initial_values [`0`];
8727	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
8728	tree neutral_op
8729	= neutral_op_for_reduction (TREE_TYPE (initial_def),
8730	code, initial_value: initial_def);
8731	gcc_assert (neutral_op);
8732	/ Try to simplify the vector initialization by applying an*
8733	adjustment after the reduction has been performed. /*
8734	if (!reduc_info->reused_accumulator
8735	&& STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
8736	&& !operand_equal_p (neutral_op, initial_def))
8737	{
8738	STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
8739	= initial_def;
8740	initial_def = neutral_op;
8741	}
8742	vec_initial_def
8743	= get_initial_def_for_reduction (loop_vinfo, reduc_info,
8744	init_val: initial_def, neutral_op);
8745	}
8746	}
8747	}
8748
8749	if (vec_initial_def)
8750	{
8751	vec_initial_defs.create (nelems: ncopies);
8752	for (i = `0`; i < ncopies; ++i)
8753	vec_initial_defs.quick_push (obj: vec_initial_def);
8754	}
8755
8756	if (auto *accumulator = reduc_info->reused_accumulator)
8757	{
8758	tree def = accumulator->reduc_input;
8759	if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
8760	{
8761	unsigned int nreduc;
8762	bool res = constant_multiple_p (a: TYPE_VECTOR_SUBPARTS
8763	(TREE_TYPE (def)),
8764	b: TYPE_VECTOR_SUBPARTS (node: vectype_out),
8765	multiple: &nreduc);
8766	gcc_assert (res);
8767	gimple_seq stmts = NULL;
8768	/ Reduce the single vector to a smaller one. /
8769	if (nreduc != `1`)
8770	{
8771	/ Perform the reduction in the appropriate type. /
8772	tree rvectype = vectype_out;
8773	if (!useless_type_conversion_p (TREE_TYPE (vectype_out),
8774	TREE_TYPE (TREE_TYPE (def))))
8775	rvectype = build_vector_type (TREE_TYPE (TREE_TYPE (def)),
8776	TYPE_VECTOR_SUBPARTS
8777	(node: vectype_out));
8778	def = vect_create_partial_epilog (vec_def: def, vectype: rvectype,
8779	STMT_VINFO_REDUC_CODE
8780	(reduc_info),
8781	seq: &stmts);
8782	}
8783	/ The epilogue loop might use a different vector mode, like*
8784	VNx2DI vs. V2DI. /*
8785	if (TYPE_MODE (vectype_out) != TYPE_MODE (TREE_TYPE (def)))
8786	{
8787	tree reduc_type = build_vector_type_for_mode
8788	(TREE_TYPE (TREE_TYPE (def)), TYPE_MODE (vectype_out));
8789	def = gimple_convert (seq: &stmts, type: reduc_type, op: def);
8790	}
8791	/ Adjust the input so we pick up the partially reduced value*
8792	for the skip edge in vect_create_epilog_for_reduction. /*
8793	accumulator->reduc_input = def;
8794	/ And the reduction could be carried out using a different sign. /
8795	if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
8796	def = gimple_convert (seq: &stmts, type: vectype_out, op: def);
8797	if (loop_vinfo->main_loop_edge)
8798	{
8799	/ While we'd like to insert on the edge this will split*
8800	blocks and disturb bookkeeping, we also will eventually
8801	need this on the skip edge. Rely on sinking to
8802	fixup optimal placement and insert in the pred. /*
8803	gimple_stmt_iterator gsi
8804	= gsi_last_bb (bb: loop_vinfo->main_loop_edge->src);
8805	/ Insert before a cond that eventually skips the*
8806	epilogue. /*
8807	if (!gsi_end_p (i: gsi) && stmt_ends_bb_p (gsi_stmt (i: gsi)))
8808	gsi_prev (i: &gsi);
8809	gsi_insert_seq_after (&gsi, stmts, GSI_CONTINUE_LINKING);
8810	}
8811	else
8812	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
8813	stmts);
8814	}
8815	if (loop_vinfo->main_loop_edge)
8816	vec_initial_defs [`0`]
8817	= vect_get_main_loop_result (loop_vinfo, def,
8818	vec_initial_defs [`0`]);
8819	else
8820	vec_initial_defs.safe_push (obj: def);
8821	}
8822
8823	/ Generate the reduction PHIs upfront. /
8824	for (i = `0`; i < vec_num; i++)
8825	{
8826	tree vec_init_def = vec_initial_defs [i];
8827	for (j = `0`; j < ncopies; j++)
8828	{
8829	/ Create the reduction-phi that defines the reduction*
8830	operand. /*
8831	gphi *new_phi = create_phi_node (vec_dest, loop->header);
8832
8833	/ Set the loop-entry arg of the reduction-phi. /
8834	if (j != `0` && nested_cycle)
8835	vec_init_def = vec_initial_defs [j];
8836	add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
8837	UNKNOWN_LOCATION);
8838
8839	/ The loop-latch arg is set in epilogue processing. /
8840
8841	if (slp_node)
8842	slp_node->push_vec_def (def: new_phi);
8843	else
8844	{
8845	if (j == `0`)
8846	*vec_stmt = new_phi;
8847	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_phi);
8848	}
8849	}
8850	}
8851
8852	return true;
8853	}
8854
8855	/ Vectorizes LC PHIs. /
8856
8857	bool
8858	vectorizable_lc_phi (loop_vec_info loop_vinfo,
8859	stmt_vec_info stmt_info, gimple **vec_stmt,
8860	slp_tree slp_node)
8861	{
8862	if (!loop_vinfo
8863	\|\| !is_a <gphi *> (p: stmt_info->stmt)
8864	\|\| gimple_phi_num_args (gs: stmt_info->stmt) != `1`)
8865	return false;
8866
8867	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8868	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
8869	return false;
8870
8871	if (!vec_stmt) / transformation not required. /
8872	{
8873	/ Deal with copies from externs or constants that disguise as*
8874	loop-closed PHI nodes (PR97886). /*
8875	if (slp_node
8876	&& !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[`0`],
8877	SLP_TREE_VECTYPE (slp_node)))
8878	{
8879	if (dump_enabled_p ())
8880	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8881	"incompatible vector types for invariants\n");
8882	return false;
8883	}
8884	STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
8885	return true;
8886	}
8887
8888	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8889	tree scalar_dest = gimple_phi_result (gs: stmt_info->stmt);
8890	basic_block bb = gimple_bb (g: stmt_info->stmt);
8891	edge e = single_pred_edge (bb);
8892	tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
8893	auto_vec<tree> vec_oprnds;
8894	vect_get_vec_defs (loop_vinfo, stmt_info, slp_node,
8895	!slp_node ? vect_get_num_copies (loop_vinfo, vectype) : `1`,
8896	gimple_phi_arg_def (gs: stmt_info->stmt, index: `0`), &vec_oprnds);
8897	for (unsigned i = `0`; i < vec_oprnds.length (); i++)
8898	{
8899	/ Create the vectorized LC PHI node. /
8900	gphi *new_phi = create_phi_node (vec_dest, bb);
8901	add_phi_arg (new_phi, vec_oprnds [i], e, UNKNOWN_LOCATION);
8902	if (slp_node)
8903	slp_node->push_vec_def (def: new_phi);
8904	else
8905	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_phi);
8906	}
8907	if (!slp_node)
8908	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
8909
8910	return true;
8911	}
8912
8913	/ Vectorizes PHIs. /
8914
8915	bool
8916	vectorizable_phi (vec_info *,
8917	stmt_vec_info stmt_info, gimple **vec_stmt,
8918	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8919	{
8920	if (!is_a <gphi *> (p: stmt_info->stmt) \|\| !slp_node)
8921	return false;
8922
8923	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
8924	return false;
8925
8926	tree vectype = SLP_TREE_VECTYPE (slp_node);
8927
8928	if (!vec_stmt) / transformation not required. /
8929	{
8930	slp_tree child;
8931	unsigned i;
8932	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), i, child)
8933	if (!child)
8934	{
8935	if (dump_enabled_p ())
8936	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8937	"PHI node with unvectorized backedge def\n");
8938	return false;
8939	}
8940	else if (!vect_maybe_update_slp_op_vectype (child, vectype))
8941	{
8942	if (dump_enabled_p ())
8943	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8944	"incompatible vector types for invariants\n");
8945	return false;
8946	}
8947	else if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
8948	&& !useless_type_conversion_p (vectype,
8949	SLP_TREE_VECTYPE (child)))
8950	{
8951	/ With bools we can have mask and non-mask precision vectors*
8952	or different non-mask precisions. while pattern recog is
8953	supposed to guarantee consistency here bugs in it can cause
8954	mismatches (PR103489 and PR103800 for example).
8955	Deal with them here instead of ICEing later. /*
8956	if (dump_enabled_p ())
8957	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8958	"incompatible vector type setup from "
8959	"bool pattern detection\n");
8960	return false;
8961	}
8962
8963	/ For single-argument PHIs assume coalescing which means zero cost*
8964	for the scalar and the vector PHIs. This avoids artificially
8965	favoring the vector path (but may pessimize it in some cases). /*
8966	if (gimple_phi_num_args (gs: as_a <gphi *> (p: stmt_info->stmt)) > `1`)
8967	record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
8968	vector_stmt, stmt_info, vectype, `0`, vect_body);
8969	STMT_VINFO_TYPE (stmt_info) = phi_info_type;
8970	return true;
8971	}
8972
8973	tree scalar_dest = gimple_phi_result (gs: stmt_info->stmt);
8974	basic_block bb = gimple_bb (g: stmt_info->stmt);
8975	tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
8976	auto_vec<gphi *> new_phis;
8977	for (unsigned i = `0`; i < gimple_phi_num_args (gs: stmt_info->stmt); ++i)
8978	{
8979	slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
8980
8981	/ Skip not yet vectorized defs. /
8982	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
8983	&& SLP_TREE_VEC_DEFS (child).is_empty ())
8984	continue;
8985
8986	auto_vec<tree> vec_oprnds;
8987	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[i], &vec_oprnds);
8988	if (!new_phis.exists ())
8989	{
8990	new_phis.create (nelems: vec_oprnds.length ());
8991	for (unsigned j = `0`; j < vec_oprnds.length (); j++)
8992	{
8993	/ Create the vectorized LC PHI node. /
8994	new_phis.quick_push (obj: create_phi_node (vec_dest, bb));
8995	slp_node->push_vec_def (def: new_phis [j]);
8996	}
8997	}
8998	edge e = gimple_phi_arg_edge (phi: as_a <gphi *> (p: stmt_info->stmt), i);
8999	for (unsigned j = `0`; j < vec_oprnds.length (); j++)
9000	add_phi_arg (new_phis [j], vec_oprnds [j], e, UNKNOWN_LOCATION);
9001	}
9002	/ We should have at least one already vectorized child. /
9003	gcc_assert (new_phis.exists ());
9004
9005	return true;
9006	}
9007
9008	/ Vectorizes first order recurrences. An overview of the transformation*
9009	is described below. Suppose we have the following loop.
9010
9011	int t = 0;
9012	for (int i = 0; i < n; ++i)
9013	{
9014	b[i] = a[i] - t;
9015	t = a[i];
9016	}
9017
9018	There is a first-order recurrence on 'a'. For this loop, the scalar IR
9019	looks (simplified) like:
9020
9021	scalar.preheader:
9022	init = 0;
9023
9024	scalar.body:
9025	i = PHI <0(scalar.preheader), i+1(scalar.body)>
9026	_2 = PHI <(init(scalar.preheader), <_1(scalar.body)>
9027	_1 = a[i]
9028	b[i] = _1 - _2
9029	if (i < n) goto scalar.body
9030
9031	In this example, _2 is a recurrence because it's value depends on the
9032	previous iteration. We vectorize this as (VF = 4)
9033
9034	vector.preheader:
9035	vect_init = vect_cst(..., ..., ..., 0)
9036
9037	vector.body
9038	i = PHI <0(vector.preheader), i+4(vector.body)>
9039	vect_1 = PHI <vect_init(vector.preheader), v2(vector.body)>
9040	vect_2 = a[i, i+1, i+2, i+3];
9041	vect_3 = vec_perm (vect_1, vect_2, { 3, 4, 5, 6 })
9042	b[i, i+1, i+2, i+3] = vect_2 - vect_3
9043	if (..) goto vector.body
9044
9045	In this function, vectorizable_recurr, we code generate both the
9046	vector PHI node and the permute since those together compute the
9047	vectorized value of the scalar PHI. We do not yet have the
9048	backedge value to fill in there nor into the vec_perm. Those
9049	are filled in maybe_set_vectorized_backedge_value and
9050	vect_schedule_scc.
9051
9052	TODO: Since the scalar loop does not have a use of the recurrence
9053	outside of the loop the natural way to implement peeling via
9054	vectorizing the live value doesn't work. For now peeling of loops
9055	with a recurrence is not implemented. For SLP the supported cases
9056	are restricted to those requiring a single vector recurrence PHI. /*
9057
9058	bool
9059	vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
9060	gimple **vec_stmt, slp_tree slp_node,
9061	stmt_vector_for_cost *cost_vec)
9062	{
9063	if (!loop_vinfo \|\| !is_a<gphi *> (p: stmt_info->stmt))
9064	return false;
9065
9066	gphi phi = as_a<gphi > (p: stmt_info->stmt);
9067
9068	/ So far we only support first-order recurrence auto-vectorization. /
9069	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
9070	return false;
9071
9072	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9073	unsigned ncopies;
9074	if (slp_node)
9075	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9076	else
9077	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9078	poly_int64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9079	unsigned dist = slp_node ? SLP_TREE_LANES (slp_node) : `1`;
9080	/ We need to be able to make progress with a single vector. /
9081	if (maybe_gt (dist * `2`, nunits))
9082	{
9083	if (dump_enabled_p ())
9084	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9085	"first order recurrence exceeds half of "
9086	"a vector\n");
9087	return false;
9088	}
9089
9090	/ First-order recurrence autovectorization needs to handle permutation*
9091	with indices = [nunits-1, nunits, nunits+1, ...]. /*
9092	vec_perm_builder sel (nunits, `1`, `3`);
9093	for (int i = `0`; i < `3`; ++i)
9094	sel.quick_push (obj: nunits - dist + i);
9095	vec_perm_indices indices (sel, `2`, nunits);
9096
9097	if (!vec_stmt) / transformation not required. /
9098	{
9099	if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
9100	indices))
9101	return false;
9102
9103	if (slp_node)
9104	{
9105	/ We eventually need to set a vector type on invariant*
9106	arguments. /*
9107	unsigned j;
9108	slp_tree child;
9109	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
9110	if (!vect_maybe_update_slp_op_vectype
9111	(child, SLP_TREE_VECTYPE (slp_node)))
9112	{
9113	if (dump_enabled_p ())
9114	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9115	"incompatible vector types for "
9116	"invariants\n");
9117	return false;
9118	}
9119	}
9120	/ The recurrence costs the initialization vector and one permute*
9121	for each copy. /*
9122	unsigned prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
9123	stmt_info, misalign: `0`, where: vect_prologue);
9124	unsigned inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_stmt,
9125	stmt_info, misalign: `0`, where: vect_body);
9126	if (dump_enabled_p ())
9127	dump_printf_loc (MSG_NOTE, vect_location,
9128	"vectorizable_recurr: inside_cost = %d, "
9129	"prologue_cost = %d .\n", inside_cost,
9130	prologue_cost);
9131
9132	STMT_VINFO_TYPE (stmt_info) = recurr_info_type;
9133	return true;
9134	}
9135
9136	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
9137	basic_block bb = gimple_bb (g: phi);
9138	tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
9139	if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader)))
9140	{
9141	gimple_seq stmts = NULL;
9142	preheader = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: preheader);
9143	gsi_insert_seq_on_edge_immediate (pe, stmts);
9144	}
9145	tree vec_init = build_vector_from_val (vectype, preheader);
9146	vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL);
9147
9148	/ Create the vectorized first-order PHI node. /
9149	tree vec_dest = vect_get_new_vect_var (vectype,
9150	vect_simple_var, "vec_recur_");
9151	gphi *new_phi = create_phi_node (vec_dest, bb);
9152	add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION);
9153
9154	/ Insert shuffles the first-order recurrence autovectorization.*
9155	result = VEC_PERM <vec_recur, vect_1, index[nunits-1, nunits, ...]>. /*
9156	tree perm = vect_gen_perm_mask_checked (vectype, indices);
9157
9158	/ Insert the required permute after the latch definition. The*
9159	second and later operands are tentative and will be updated when we have
9160	vectorized the latch definition. /*
9161	edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
9162	gimple *latch_def = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, le));
9163	gimple_stmt_iterator gsi2 = gsi_for_stmt (latch_def);
9164	gsi_next (i: &gsi2);
9165
9166	for (unsigned i = `0`; i < ncopies; ++i)
9167	{
9168	vec_dest = make_ssa_name (var: vectype);
9169	gassign *vperm
9170	= gimple_build_assign (vec_dest, VEC_PERM_EXPR,
9171	i == `0` ? gimple_phi_result (gs: new_phi) : NULL,
9172	NULL, perm);
9173	vect_finish_stmt_generation (loop_vinfo, stmt_info, vperm, &gsi2);
9174
9175	if (slp_node)
9176	slp_node->push_vec_def (def: vperm);
9177	else
9178	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: vperm);
9179	}
9180
9181	if (!slp_node)
9182	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
9183	return true;
9184	}
9185
9186	/ Return true if VECTYPE represents a vector that requires lowering*
9187	by the vector lowering pass. /*
9188
9189	bool
9190	vect_emulated_vector_p (tree vectype)
9191	{
9192	return (!VECTOR_MODE_P (TYPE_MODE (vectype))
9193	&& (!VECTOR_BOOLEAN_TYPE_P (vectype)
9194	\|\| TYPE_PRECISION (TREE_TYPE (vectype)) != `1`));
9195	}
9196
9197	/ Return true if we can emulate CODE on an integer mode representation*
9198	of a vector. /*
9199
9200	bool
9201	vect_can_vectorize_without_simd_p (tree_code code)
9202	{
9203	switch (code)
9204	{
9205	case PLUS_EXPR:
9206	case MINUS_EXPR:
9207	case NEGATE_EXPR:
9208	case BIT_AND_EXPR:
9209	case BIT_IOR_EXPR:
9210	case BIT_XOR_EXPR:
9211	case BIT_NOT_EXPR:
9212	return true;
9213
9214	default:
9215	return false;
9216	}
9217	}
9218
9219	/ Likewise, but taking a code_helper. /
9220
9221	bool
9222	vect_can_vectorize_without_simd_p (code_helper code)
9223	{
9224	return (code.is_tree_code ()
9225	&& vect_can_vectorize_without_simd_p (code: tree_code (code)));
9226	}
9227
9228	/ Create vector init for vectorized iv. /
9229	static tree
9230	vect_create_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
9231	tree step_expr, poly_uint64 nunits,
9232	tree vectype,
9233	enum vect_induction_op_type induction_type)
9234	{
9235	unsigned HOST_WIDE_INT const_nunits;
9236	tree vec_shift, vec_init, new_name;
9237	unsigned i;
9238	tree itype = TREE_TYPE (vectype);
9239
9240	/ iv_loop is the loop to be vectorized. Create:*
9241	vec_init = [X, X+S, X+2S, X+3S] (S = step_expr, X = init_expr). /*
9242	new_name = gimple_convert (seq: stmts, type: itype, op: init_expr);
9243	switch (induction_type)
9244	{
9245	case vect_step_op_shr:
9246	case vect_step_op_shl:
9247	/ Build the Initial value from shift_expr. /
9248	vec_init = gimple_build_vector_from_val (seq: stmts,
9249	type: vectype,
9250	op: new_name);
9251	vec_shift = gimple_build (seq: stmts, code: VEC_SERIES_EXPR, type: vectype,
9252	ops: build_zero_cst (itype), ops: step_expr);
9253	vec_init = gimple_build (seq: stmts,
9254	code: (induction_type == vect_step_op_shr
9255	? RSHIFT_EXPR : LSHIFT_EXPR),
9256	type: vectype, ops: vec_init, ops: vec_shift);
9257	break;
9258
9259	case vect_step_op_neg:
9260	{
9261	vec_init = gimple_build_vector_from_val (seq: stmts,
9262	type: vectype,
9263	op: new_name);
9264	tree vec_neg = gimple_build (seq: stmts, code: NEGATE_EXPR,
9265	type: vectype, ops: vec_init);
9266	/ The encoding has 2 interleaved stepped patterns. /
9267	vec_perm_builder sel (nunits, `2`, `3`);
9268	sel.quick_grow (len: `6`);
9269	for (i = `0`; i < `3`; i++)
9270	{
9271	sel [`2` * i] = i;
9272	sel [`2` * i + `1`] = i + nunits;
9273	}
9274	vec_perm_indices indices (sel, `2`, nunits);
9275	/ Don't use vect_gen_perm_mask_checked since can_vec_perm_const_p may*
9276	fail when vec_init is const vector. In that situation vec_perm is not
9277	really needed. /*
9278	tree perm_mask_even
9279	= vect_gen_perm_mask_any (vectype, indices);
9280	vec_init = gimple_build (seq: stmts, code: VEC_PERM_EXPR,
9281	type: vectype,
9282	ops: vec_init, ops: vec_neg,
9283	ops: perm_mask_even);
9284	}
9285	break;
9286
9287	case vect_step_op_mul:
9288	{
9289	/ Use unsigned mult to avoid UD integer overflow. /
9290	gcc_assert (nunits.is_constant (&const_nunits));
9291	tree utype = unsigned_type_for (itype);
9292	tree uvectype = build_vector_type (utype,
9293	TYPE_VECTOR_SUBPARTS (node: vectype));
9294	new_name = gimple_convert (seq: stmts, type: utype, op: new_name);
9295	vec_init = gimple_build_vector_from_val (seq: stmts,
9296	type: uvectype,
9297	op: new_name);
9298	tree_vector_builder elts (uvectype, const_nunits, `1`);
9299	tree elt_step = build_one_cst (utype);
9300
9301	elts.quick_push (obj: elt_step);
9302	for (i = `1`; i < const_nunits; i++)
9303	{
9304	/ Create: new_name_i = new_name + step_expr. /
9305	elt_step = gimple_build (seq: stmts, code: MULT_EXPR,
9306	type: utype, ops: elt_step, ops: step_expr);
9307	elts.quick_push (obj: elt_step);
9308	}
9309	/ Create a vector from [new_name_0, new_name_1, ...,*
9310	new_name_nunits-1]. /*
9311	tree vec_mul = gimple_build_vector (seq: stmts, builder: &elts);
9312	vec_init = gimple_build (seq: stmts, code: MULT_EXPR, type: uvectype,
9313	ops: vec_init, ops: vec_mul);
9314	vec_init = gimple_convert (seq: stmts, type: vectype, op: vec_init);
9315	}
9316	break;
9317
9318	default:
9319	gcc_unreachable ();
9320	}
9321
9322	return vec_init;
9323	}
9324
9325	/ Peel init_expr by skip_niter for induction_type. /
9326	tree
9327	vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
9328	tree skip_niters, tree step_expr,
9329	enum vect_induction_op_type induction_type)
9330	{
9331	gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST);
9332	tree type = TREE_TYPE (init_expr);
9333	unsigned prec = TYPE_PRECISION (type);
9334	switch (induction_type)
9335	{
9336	case vect_step_op_neg:
9337	if (TREE_INT_CST_LOW (skip_niters) % `2`)
9338	init_expr = gimple_build (seq: stmts, code: NEGATE_EXPR, type, ops: init_expr);
9339	/ else no change. /
9340	break;
9341
9342	case vect_step_op_shr:
9343	case vect_step_op_shl:
9344	skip_niters = gimple_convert (seq: stmts, type, op: skip_niters);
9345	step_expr = gimple_build (seq: stmts, code: MULT_EXPR, type, ops: step_expr, ops: skip_niters);
9346	/ When shift mount >= precision, need to avoid UD.*
9347	In the original loop, there's no UD, and according to semantic,
9348	init_expr should be 0 for lshr, ashl, and >>= (prec - 1) for ashr. /*
9349	if (!tree_fits_uhwi_p (step_expr)
9350	\|\| tree_to_uhwi (step_expr) >= prec)
9351	{
9352	if (induction_type == vect_step_op_shl
9353	\|\| TYPE_UNSIGNED (type))
9354	init_expr = build_zero_cst (type);
9355	else
9356	init_expr = gimple_build (seq: stmts, code: RSHIFT_EXPR, type,
9357	ops: init_expr,
9358	ops: wide_int_to_tree (type, cst: prec - `1`));
9359	}
9360	else
9361	init_expr = gimple_build (seq: stmts, code: (induction_type == vect_step_op_shr
9362	? RSHIFT_EXPR : LSHIFT_EXPR),
9363	type, ops: init_expr, ops: step_expr);
9364	break;
9365
9366	case vect_step_op_mul:
9367	{
9368	tree utype = unsigned_type_for (type);
9369	init_expr = gimple_convert (seq: stmts, type: utype, op: init_expr);
9370	wide_int skipn = wi::to_wide (t: skip_niters);
9371	wide_int begin = wi::to_wide (t: step_expr);
9372	auto_mpz base, exp, mod, res;
9373	wi::to_mpz (begin, base, TYPE_SIGN (type));
9374	wi::to_mpz (skipn, exp, UNSIGNED);
9375	mpz_ui_pow_ui (mod, `2`, TYPE_PRECISION (type));
9376	mpz_powm (res, base, exp, mod);
9377	begin = wi::from_mpz (type, res, TYPE_SIGN (type));
9378	tree mult_expr = wide_int_to_tree (type: utype, cst: begin);
9379	init_expr = gimple_build (seq: stmts, code: MULT_EXPR, type: utype,
9380	ops: init_expr, ops: mult_expr);
9381	init_expr = gimple_convert (seq: stmts, type, op: init_expr);
9382	}
9383	break;
9384
9385	default:
9386	gcc_unreachable ();
9387	}
9388
9389	return init_expr;
9390	}
9391
9392	/ Create vector step for vectorized iv. /
9393	static tree
9394	vect_create_nonlinear_iv_step (gimple_seq* stmts, tree step_expr,
9395	poly_uint64 vf,
9396	enum vect_induction_op_type induction_type)
9397	{
9398	tree expr = build_int_cst (TREE_TYPE (step_expr), vf);
9399	tree new_name = NULL;
9400	/ Step should be pow (step, vf) for mult induction. /
9401	if (induction_type == vect_step_op_mul)
9402	{
9403	gcc_assert (vf.is_constant ());
9404	wide_int begin = wi::to_wide (t: step_expr);
9405
9406	for (unsigned i = `0`; i != vf.to_constant () - `1`; i++)
9407	begin = wi::mul (x: begin, y: wi::to_wide (t: step_expr));
9408
9409	new_name = wide_int_to_tree (TREE_TYPE (step_expr), cst: begin);
9410	}
9411	else if (induction_type == vect_step_op_neg)
9412	/ Do nothing. /
9413	;
9414	else
9415	new_name = gimple_build (seq: stmts, code: MULT_EXPR, TREE_TYPE (step_expr),
9416	ops: expr, ops: step_expr);
9417	return new_name;
9418	}
9419
9420	static tree
9421	vect_create_nonlinear_iv_vec_step (loop_vec_info loop_vinfo,
9422	stmt_vec_info stmt_info,
9423	tree new_name, tree vectype,
9424	enum vect_induction_op_type induction_type)
9425	{
9426	/ No step is needed for neg induction. /
9427	if (induction_type == vect_step_op_neg)
9428	return NULL;
9429
9430	tree t = unshare_expr (new_name);
9431	gcc_assert (CONSTANT_CLASS_P (new_name)
9432	\|\| TREE_CODE (new_name) == SSA_NAME);
9433	tree new_vec = build_vector_from_val (vectype, t);
9434	tree vec_step = vect_init_vector (loop_vinfo, stmt_info,
9435	new_vec, vectype, NULL);
9436	return vec_step;
9437	}
9438
9439	/ Update vectorized iv with vect_step, induc_def is init. /
9440	static tree
9441	vect_update_nonlinear_iv (gimple_seq* stmts, tree vectype,
9442	tree induc_def, tree vec_step,
9443	enum vect_induction_op_type induction_type)
9444	{
9445	tree vec_def = induc_def;
9446	switch (induction_type)
9447	{
9448	case vect_step_op_mul:
9449	{
9450	/ Use unsigned mult to avoid UD integer overflow. /
9451	tree uvectype
9452	= build_vector_type (unsigned_type_for (TREE_TYPE (vectype)),
9453	TYPE_VECTOR_SUBPARTS (node: vectype));
9454	vec_def = gimple_convert (seq: stmts, type: uvectype, op: vec_def);
9455	vec_step = gimple_convert (seq: stmts, type: uvectype, op: vec_step);
9456	vec_def = gimple_build (seq: stmts, code: MULT_EXPR, type: uvectype,
9457	ops: vec_def, ops: vec_step);
9458	vec_def = gimple_convert (seq: stmts, type: vectype, op: vec_def);
9459	}
9460	break;
9461
9462	case vect_step_op_shr:
9463	vec_def = gimple_build (seq: stmts, code: RSHIFT_EXPR, type: vectype,
9464	ops: vec_def, ops: vec_step);
9465	break;
9466
9467	case vect_step_op_shl:
9468	vec_def = gimple_build (seq: stmts, code: LSHIFT_EXPR, type: vectype,
9469	ops: vec_def, ops: vec_step);
9470	break;
9471	case vect_step_op_neg:
9472	vec_def = induc_def;
9473	/ Do nothing. /
9474	break;
9475	default:
9476	gcc_unreachable ();
9477	}
9478
9479	return vec_def;
9480
9481	}
9482
9483	/ Function vectorizable_induction*
9484
9485	Check if STMT_INFO performs an nonlinear induction computation that can be
9486	vectorized. If VEC_STMT is also passed, vectorize the induction PHI: create
9487	a vectorized phi to replace it, put it in VEC_STMT, and add it to the same
9488	basic block.
9489	Return true if STMT_INFO is vectorizable in this way. /*
9490
9491	static bool
9492	vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
9493	stmt_vec_info stmt_info,
9494	gimple **vec_stmt, slp_tree slp_node,
9495	stmt_vector_for_cost *cost_vec)
9496	{
9497	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
9498	unsigned ncopies;
9499	bool nested_in_vect_loop = false;
9500	class loop *iv_loop;
9501	tree vec_def;
9502	edge pe = loop_preheader_edge (loop);
9503	basic_block new_bb;
9504	tree vec_init, vec_step;
9505	tree new_name;
9506	gimple *new_stmt;
9507	gphi *induction_phi;
9508	tree induc_def, vec_dest;
9509	tree init_expr, step_expr;
9510	tree niters_skip;
9511	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9512	unsigned i;
9513	gimple_stmt_iterator si;
9514
9515	gphi phi = dyn_cast <gphi > (p: stmt_info->stmt);
9516
9517	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9518	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9519	enum vect_induction_op_type induction_type
9520	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
9521
9522	gcc_assert (induction_type > vect_step_op_add);
9523
9524	if (slp_node)
9525	ncopies = `1`;
9526	else
9527	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9528	gcc_assert (ncopies >= `1`);
9529
9530	/ FORNOW. Only handle nonlinear induction in the same loop. /
9531	if (nested_in_vect_loop_p (loop, stmt_info))
9532	{
9533	if (dump_enabled_p ())
9534	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9535	"nonlinear induction in nested loop.\n");
9536	return false;
9537	}
9538
9539	iv_loop = loop;
9540	gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
9541
9542	/ TODO: Support slp for nonlinear iv. There should be separate vector iv*
9543	update for each iv and a permutation to generate wanted vector iv. /*
9544	if (slp_node)
9545	{
9546	if (dump_enabled_p ())
9547	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9548	"SLP induction not supported for nonlinear"
9549	" induction.\n");
9550	return false;
9551	}
9552
9553	if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
9554	{
9555	if (dump_enabled_p ())
9556	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9557	"floating point nonlinear induction vectorization"
9558	" not supported.\n");
9559	return false;
9560	}
9561
9562	step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
9563	init_expr = vect_phi_initial_value (phi);
9564	gcc_assert (step_expr != NULL_TREE && init_expr != NULL
9565	&& TREE_CODE (step_expr) == INTEGER_CST);
9566	/ step_expr should be aligned with init_expr,*
9567	.i.e. uint64 a >> 1, step is int, but vector<uint64> shift is used. /*
9568	step_expr = fold_convert (TREE_TYPE (vectype), step_expr);
9569
9570	if (TREE_CODE (init_expr) == INTEGER_CST)
9571	init_expr = fold_convert (TREE_TYPE (vectype), init_expr);
9572	else
9573	gcc_assert (tree_nop_conversion_p (TREE_TYPE (vectype),
9574	TREE_TYPE (init_expr)));
9575
9576	switch (induction_type)
9577	{
9578	case vect_step_op_neg:
9579	if (TREE_CODE (init_expr) != INTEGER_CST
9580	&& TREE_CODE (init_expr) != REAL_CST)
9581	{
9582	/ Check for backend support of NEGATE_EXPR and vec_perm. /
9583	if (!directly_supported_p (NEGATE_EXPR, vectype))
9584	return false;
9585
9586	/ The encoding has 2 interleaved stepped patterns. /
9587	vec_perm_builder sel (nunits, `2`, `3`);
9588	machine_mode mode = TYPE_MODE (vectype);
9589	sel.quick_grow (len: `6`);
9590	for (i = `0`; i < `3`; i++)
9591	{
9592	sel [i * `2`] = i;
9593	sel [i * `2` + `1`] = i + nunits;
9594	}
9595	vec_perm_indices indices (sel, `2`, nunits);
9596	if (!can_vec_perm_const_p (mode, mode, indices))
9597	return false;
9598	}
9599	break;
9600
9601	case vect_step_op_mul:
9602	{
9603	/ Check for backend support of MULT_EXPR. /
9604	if (!directly_supported_p (MULT_EXPR, vectype))
9605	return false;
9606
9607	/ ?? How to construct vector step for variable number vector.*
9608	[ 1, step, pow (step, 2), pow (step, 4), .. ]. /*
9609	if (!vf.is_constant ())
9610	return false;
9611	}
9612	break;
9613
9614	case vect_step_op_shr:
9615	/ Check for backend support of RSHIFT_EXPR. /
9616	if (!directly_supported_p (RSHIFT_EXPR, vectype, optab_vector))
9617	return false;
9618
9619	/ Don't shift more than type precision to avoid UD. /
9620	if (!tree_fits_uhwi_p (step_expr)
9621	\|\| maybe_ge (nunits * tree_to_uhwi (step_expr),
9622	TYPE_PRECISION (TREE_TYPE (init_expr))))
9623	return false;
9624	break;
9625
9626	case vect_step_op_shl:
9627	/ Check for backend support of RSHIFT_EXPR. /
9628	if (!directly_supported_p (LSHIFT_EXPR, vectype, optab_vector))
9629	return false;
9630
9631	/ Don't shift more than type precision to avoid UD. /
9632	if (!tree_fits_uhwi_p (step_expr)
9633	\|\| maybe_ge (nunits * tree_to_uhwi (step_expr),
9634	TYPE_PRECISION (TREE_TYPE (init_expr))))
9635	return false;
9636
9637	break;
9638
9639	default:
9640	gcc_unreachable ();
9641	}
9642
9643	if (!vec_stmt) / transformation not required. /
9644	{
9645	unsigned inside_cost = `0`, prologue_cost = `0`;
9646	/ loop cost for vec_loop. Neg induction doesn't have any*
9647	inside_cost. /*
9648	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_stmt,
9649	stmt_info, misalign: `0`, where: vect_body);
9650
9651	/ loop cost for vec_loop. Neg induction doesn't have any*
9652	inside_cost. /*
9653	if (induction_type == vect_step_op_neg)
9654	inside_cost = `0`;
9655
9656	/ prologue cost for vec_init and vec_step. /
9657	prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `2`, kind: scalar_to_vec,
9658	stmt_info, misalign: `0`, where: vect_prologue);
9659
9660	if (dump_enabled_p ())
9661	dump_printf_loc (MSG_NOTE, vect_location,
9662	"vect_model_induction_cost: inside_cost = %d, "
9663	"prologue_cost = %d. \n", inside_cost,
9664	prologue_cost);
9665
9666	STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
9667	DUMP_VECT_SCOPE ("vectorizable_nonlinear_induction");
9668	return true;
9669	}
9670
9671	/ Transform. /
9672
9673	/ Compute a vector variable, initialized with the first VF values of*
9674	the induction variable. E.g., for an iv with IV_PHI='X' and
9675	evolution S, for a vector of 4 units, we want to compute:
9676	[X, X + S, X + 2S, X + 3S]. /*
9677
9678	if (dump_enabled_p ())
9679	dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
9680
9681	pe = loop_preheader_edge (iv_loop);
9682	/ Find the first insertion point in the BB. /
9683	basic_block bb = gimple_bb (g: phi);
9684	si = gsi_after_labels (bb);
9685
9686	gimple_seq stmts = NULL;
9687
9688	niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
9689	/ If we are using the loop mask to "peel" for alignment then we need*
9690	to adjust the start value here. /*
9691	if (niters_skip != NULL_TREE)
9692	init_expr = vect_peel_nonlinear_iv_init (stmts: &stmts, init_expr, skip_niters: niters_skip,
9693	step_expr, induction_type);
9694
9695	vec_init = vect_create_nonlinear_iv_init (stmts: &stmts, init_expr,
9696	step_expr, nunits, vectype,
9697	induction_type);
9698	if (stmts)
9699	{
9700	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
9701	gcc_assert (!new_bb);
9702	}
9703
9704	stmts = NULL;
9705	new_name = vect_create_nonlinear_iv_step (stmts: &stmts, step_expr,
9706	vf, induction_type);
9707	if (stmts)
9708	{
9709	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
9710	gcc_assert (!new_bb);
9711	}
9712
9713	vec_step = vect_create_nonlinear_iv_vec_step (loop_vinfo, stmt_info,
9714	new_name, vectype,
9715	induction_type);
9716	/ Create the following def-use cycle:*
9717	loop prolog:
9718	vec_init = ...
9719	vec_step = ...
9720	loop:
9721	vec_iv = PHI <vec_init, vec_loop>
9722	...
9723	STMT
9724	...
9725	vec_loop = vec_iv + vec_step; /*
9726
9727	/ Create the induction-phi that defines the induction-operand. /
9728	vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
9729	induction_phi = create_phi_node (vec_dest, iv_loop->header);
9730	induc_def = PHI_RESULT (induction_phi);
9731
9732	/ Create the iv update inside the loop. /
9733	stmts = NULL;
9734	vec_def = vect_update_nonlinear_iv (stmts: &stmts, vectype,
9735	induc_def, vec_step,
9736	induction_type);
9737
9738	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
9739	new_stmt = SSA_NAME_DEF_STMT (vec_def);
9740
9741	/ Set the arguments of the phi node: /
9742	add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
9743	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
9744	UNKNOWN_LOCATION);
9745
9746	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: induction_phi);
9747	*vec_stmt = induction_phi;
9748
9749	/ In case that vectorization factor (VF) is bigger than the number*
9750	of elements that we can fit in a vectype (nunits), we have to generate
9751	more than one vector stmt - i.e - we need to "unroll" the
9752	vector stmt by a factor VF/nunits. For more details see documentation
9753	in vectorizable_operation. /*
9754
9755	if (ncopies > `1`)
9756	{
9757	stmts = NULL;
9758	/ FORNOW. This restriction should be relaxed. /
9759	gcc_assert (!nested_in_vect_loop);
9760
9761	new_name = vect_create_nonlinear_iv_step (stmts: &stmts, step_expr,
9762	vf: nunits, induction_type);
9763
9764	vec_step = vect_create_nonlinear_iv_vec_step (loop_vinfo, stmt_info,
9765	new_name, vectype,
9766	induction_type);
9767	vec_def = induc_def;
9768	for (i = `1`; i < ncopies; i++)
9769	{
9770	/ vec_i = vec_prev + vec_step. /
9771	stmts = NULL;
9772	vec_def = vect_update_nonlinear_iv (stmts: &stmts, vectype,
9773	induc_def: vec_def, vec_step,
9774	induction_type);
9775	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
9776	new_stmt = SSA_NAME_DEF_STMT (vec_def);
9777	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9778	}
9779	}
9780
9781	if (dump_enabled_p ())
9782	dump_printf_loc (MSG_NOTE, vect_location,
9783	"transform induction: created def-use cycle: %G%G",
9784	(gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def));
9785
9786	return true;
9787	}
9788
9789	/ Function vectorizable_induction*
9790
9791	Check if STMT_INFO performs an induction computation that can be vectorized.
9792	If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
9793	phi to replace it, put it in VEC_STMT, and add it to the same basic block.
9794	Return true if STMT_INFO is vectorizable in this way. /*
9795
9796	bool
9797	vectorizable_induction (loop_vec_info loop_vinfo,
9798	stmt_vec_info stmt_info,
9799	gimple **vec_stmt, slp_tree slp_node,
9800	stmt_vector_for_cost *cost_vec)
9801	{
9802	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
9803	unsigned ncopies;
9804	bool nested_in_vect_loop = false;
9805	class loop *iv_loop;
9806	tree vec_def;
9807	edge pe = loop_preheader_edge (loop);
9808	basic_block new_bb;
9809	tree new_vec, vec_init, vec_step, t;
9810	tree new_name;
9811	gimple *new_stmt;
9812	gphi *induction_phi;
9813	tree induc_def, vec_dest;
9814	tree init_expr, step_expr;
9815	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9816	unsigned i;
9817	tree expr;
9818	gimple_stmt_iterator si;
9819	enum vect_induction_op_type induction_type
9820	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
9821
9822	gphi phi = dyn_cast <gphi > (p: stmt_info->stmt);
9823	if (!phi)
9824	return false;
9825
9826	if (!STMT_VINFO_RELEVANT_P (stmt_info))
9827	return false;
9828
9829	/ Make sure it was recognized as induction computation. /
9830	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
9831	return false;
9832
9833	/ Handle nonlinear induction in a separate place. /
9834	if (induction_type != vect_step_op_add)
9835	return vectorizable_nonlinear_induction (loop_vinfo, stmt_info,
9836	vec_stmt, slp_node, cost_vec);
9837
9838	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9839	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9840
9841	if (slp_node)
9842	ncopies = `1`;
9843	else
9844	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9845	gcc_assert (ncopies >= `1`);
9846
9847	/ FORNOW. These restrictions should be relaxed. /
9848	if (nested_in_vect_loop_p (loop, stmt_info))
9849	{
9850	imm_use_iterator imm_iter;
9851	use_operand_p use_p;
9852	gimple *exit_phi;
9853	edge latch_e;
9854	tree loop_arg;
9855
9856	if (ncopies > `1`)
9857	{
9858	if (dump_enabled_p ())
9859	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9860	"multiple types in nested loop.\n");
9861	return false;
9862	}
9863
9864	exit_phi = NULL;
9865	latch_e = loop_latch_edge (loop->inner);
9866	loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
9867	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
9868	{
9869	gimple *use_stmt = USE_STMT (use_p);
9870	if (is_gimple_debug (gs: use_stmt))
9871	continue;
9872
9873	if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (g: use_stmt)))
9874	{
9875	exit_phi = use_stmt;
9876	break;
9877	}
9878	}
9879	if (exit_phi)
9880	{
9881	stmt_vec_info exit_phi_vinfo = loop_vinfo->lookup_stmt (exit_phi);
9882	if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
9883	&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
9884	{
9885	if (dump_enabled_p ())
9886	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9887	"inner-loop induction only used outside "
9888	"of the outer vectorized loop.\n");
9889	return false;
9890	}
9891	}
9892
9893	nested_in_vect_loop = true;
9894	iv_loop = loop->inner;
9895	}
9896	else
9897	iv_loop = loop;
9898	gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
9899
9900	if (slp_node && !nunits.is_constant ())
9901	{
9902	/ The current SLP code creates the step value element-by-element. /
9903	if (dump_enabled_p ())
9904	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9905	"SLP induction not supported for variable-length"
9906	" vectors.\n");
9907	return false;
9908	}
9909
9910	if (FLOAT_TYPE_P (vectype) && !param_vect_induction_float)
9911	{
9912	if (dump_enabled_p ())
9913	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9914	"floating point induction vectorization disabled\n");
9915	return false;
9916	}
9917
9918	step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
9919	gcc_assert (step_expr != NULL_TREE);
9920	tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype);
9921
9922	/ Check for backend support of PLUS/MINUS_EXPR. /
9923	if (!directly_supported_p (PLUS_EXPR, step_vectype)
9924	\|\| !directly_supported_p (MINUS_EXPR, step_vectype))
9925	return false;
9926
9927	if (!vec_stmt) / transformation not required. /
9928	{
9929	unsigned inside_cost = `0`, prologue_cost = `0`;
9930	if (slp_node)
9931	{
9932	/ We eventually need to set a vector type on invariant*
9933	arguments. /*
9934	unsigned j;
9935	slp_tree child;
9936	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
9937	if (!vect_maybe_update_slp_op_vectype
9938	(child, SLP_TREE_VECTYPE (slp_node)))
9939	{
9940	if (dump_enabled_p ())
9941	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9942	"incompatible vector types for "
9943	"invariants\n");
9944	return false;
9945	}
9946	/ loop cost for vec_loop. /
9947	inside_cost
9948	= record_stmt_cost (body_cost_vec: cost_vec,
9949	SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
9950	kind: vector_stmt, stmt_info, misalign: `0`, where: vect_body);
9951	/ prologue cost for vec_init (if not nested) and step. /
9952	prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1` + !nested_in_vect_loop,
9953	kind: scalar_to_vec,
9954	stmt_info, misalign: `0`, where: vect_prologue);
9955	}
9956	else / if (!slp_node) /
9957	{
9958	/ loop cost for vec_loop. /
9959	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_stmt,
9960	stmt_info, misalign: `0`, where: vect_body);
9961	/ prologue cost for vec_init and vec_step. /
9962	prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `2`, kind: scalar_to_vec,
9963	stmt_info, misalign: `0`, where: vect_prologue);
9964	}
9965	if (dump_enabled_p ())
9966	dump_printf_loc (MSG_NOTE, vect_location,
9967	"vect_model_induction_cost: inside_cost = %d, "
9968	"prologue_cost = %d .\n", inside_cost,
9969	prologue_cost);
9970
9971	STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
9972	DUMP_VECT_SCOPE ("vectorizable_induction");
9973	return true;
9974	}
9975
9976	/ Transform. /
9977
9978	/ Compute a vector variable, initialized with the first VF values of*
9979	the induction variable. E.g., for an iv with IV_PHI='X' and
9980	evolution S, for a vector of 4 units, we want to compute:
9981	[X, X + S, X + 2S, X + 3S]. /*
9982
9983	if (dump_enabled_p ())
9984	dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
9985
9986	pe = loop_preheader_edge (iv_loop);
9987	/ Find the first insertion point in the BB. /
9988	basic_block bb = gimple_bb (g: phi);
9989	si = gsi_after_labels (bb);
9990
9991	/ For SLP induction we have to generate several IVs as for example*
9992	with group size 3 we need
9993	[i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2S0, i1 + 2S1]
9994	[i2 + 2S2, i0 + 3S0, i1 + 3S1, i2 + 3S2]. /*
9995	if (slp_node)
9996	{
9997	/ Enforced above. /
9998	unsigned int const_nunits = nunits.to_constant ();
9999
10000	/ The initial values are vectorized, but any lanes > group_size*
10001	need adjustment. /*
10002	slp_tree init_node
10003	= SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
10004
10005	/ Gather steps. Since we do not vectorize inductions as*
10006	cycles we have to reconstruct the step from SCEV data. /*
10007	unsigned group_size = SLP_TREE_LANES (slp_node);
10008	tree *steps = XALLOCAVEC (tree, group_size);
10009	tree *inits = XALLOCAVEC (tree, group_size);
10010	stmt_vec_info phi_info;
10011	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
10012	{
10013	steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
10014	if (!init_node)
10015	inits[i] = gimple_phi_arg_def (gs: as_a<gphi *> (p: phi_info->stmt),
10016	index: pe->dest_idx);
10017	}
10018
10019	/ Now generate the IVs. /
10020	unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10021	gcc_assert ((const_nunits * nvects) % group_size == `0`);
10022	unsigned nivs;
10023	if (nested_in_vect_loop)
10024	nivs = nvects;
10025	else
10026	{
10027	/ Compute the number of distinct IVs we need. First reduce*
10028	group_size if it is a multiple of const_nunits so we get
10029	one IV for a group_size of 4 but const_nunits 2. /*
10030	unsigned group_sizep = group_size;
10031	if (group_sizep % const_nunits == `0`)
10032	group_sizep = group_sizep / const_nunits;
10033	nivs = least_common_multiple (group_sizep,
10034	const_nunits) / const_nunits;
10035	}
10036	tree stept = TREE_TYPE (step_vectype);
10037	tree lupdate_mul = NULL_TREE;
10038	if (!nested_in_vect_loop)
10039	{
10040	/ The number of iterations covered in one vector iteration. /
10041	unsigned lup_mul = (nvects * const_nunits) / group_size;
10042	lupdate_mul
10043	= build_vector_from_val (step_vectype,
10044	SCALAR_FLOAT_TYPE_P (stept)
10045	? build_real_from_wide (stept, lup_mul,
10046	UNSIGNED)
10047	: build_int_cstu (type: stept, lup_mul));
10048	}
10049	tree peel_mul = NULL_TREE;
10050	gimple_seq init_stmts = NULL;
10051	if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
10052	{
10053	if (SCALAR_FLOAT_TYPE_P (stept))
10054	peel_mul = gimple_build (seq: &init_stmts, code: FLOAT_EXPR, type: stept,
10055	LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
10056	else
10057	peel_mul = gimple_convert (seq: &init_stmts, type: stept,
10058	LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
10059	peel_mul = gimple_build_vector_from_val (seq: &init_stmts,
10060	type: step_vectype, op: peel_mul);
10061	}
10062	unsigned ivn;
10063	auto_vec<tree> vec_steps;
10064	for (ivn = `0`; ivn < nivs; ++ivn)
10065	{
10066	tree_vector_builder step_elts (step_vectype, const_nunits, `1`);
10067	tree_vector_builder init_elts (vectype, const_nunits, `1`);
10068	tree_vector_builder mul_elts (step_vectype, const_nunits, `1`);
10069	for (unsigned eltn = `0`; eltn < const_nunits; ++eltn)
10070	{
10071	/ The scalar steps of the IVs. /
10072	tree elt = steps[(ivn*const_nunits + eltn) % group_size];
10073	elt = gimple_convert (seq: &init_stmts, TREE_TYPE (step_vectype), op: elt);
10074	step_elts.quick_push (obj: elt);
10075	if (!init_node)
10076	{
10077	/ The scalar inits of the IVs if not vectorized. /
10078	elt = inits[(ivn*const_nunits + eltn) % group_size];
10079	if (!useless_type_conversion_p (TREE_TYPE (vectype),
10080	TREE_TYPE (elt)))
10081	elt = gimple_build (seq: &init_stmts, code: VIEW_CONVERT_EXPR,
10082	TREE_TYPE (vectype), ops: elt);
10083	init_elts.quick_push (obj: elt);
10084	}
10085	/ The number of steps to add to the initial values. /
10086	unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
10087	mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
10088	? build_real_from_wide (stept,
10089	mul_elt, UNSIGNED)
10090	: build_int_cstu (type: stept, mul_elt));
10091	}
10092	vec_step = gimple_build_vector (seq: &init_stmts, builder: &step_elts);
10093	vec_steps.safe_push (obj: vec_step);
10094	tree step_mul = gimple_build_vector (seq: &init_stmts, builder: &mul_elts);
10095	if (peel_mul)
10096	step_mul = gimple_build (seq: &init_stmts, code: PLUS_EXPR, type: step_vectype,
10097	ops: step_mul, ops: peel_mul);
10098	if (!init_node)
10099	vec_init = gimple_build_vector (seq: &init_stmts, builder: &init_elts);
10100
10101	/ Create the induction-phi that defines the induction-operand. /
10102	vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
10103	"vec_iv_");
10104	induction_phi = create_phi_node (vec_dest, iv_loop->header);
10105	induc_def = PHI_RESULT (induction_phi);
10106
10107	/ Create the iv update inside the loop /
10108	tree up = vec_step;
10109	if (lupdate_mul)
10110	up = gimple_build (seq: &init_stmts, code: MULT_EXPR, type: step_vectype,
10111	ops: vec_step, ops: lupdate_mul);
10112	gimple_seq stmts = NULL;
10113	vec_def = gimple_convert (seq: &stmts, type: step_vectype, op: induc_def);
10114	vec_def = gimple_build (seq: &stmts,
10115	code: PLUS_EXPR, type: step_vectype, ops: vec_def, ops: up);
10116	vec_def = gimple_convert (seq: &stmts, type: vectype, op: vec_def);
10117	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10118	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
10119	UNKNOWN_LOCATION);
10120
10121	if (init_node)
10122	vec_init = vect_get_slp_vect_def (init_node, ivn);
10123	if (!nested_in_vect_loop
10124	&& !integer_zerop (step_mul))
10125	{
10126	vec_def = gimple_convert (seq: &init_stmts, type: step_vectype, op: vec_init);
10127	up = gimple_build (seq: &init_stmts, code: MULT_EXPR, type: step_vectype,
10128	ops: vec_step, ops: step_mul);
10129	vec_def = gimple_build (seq: &init_stmts, code: PLUS_EXPR, type: step_vectype,
10130	ops: vec_def, ops: up);
10131	vec_init = gimple_convert (seq: &init_stmts, type: vectype, op: vec_def);
10132	}
10133
10134	/ Set the arguments of the phi node: /
10135	add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
10136
10137	slp_node->push_vec_def (def: induction_phi);
10138	}
10139	if (!nested_in_vect_loop)
10140	{
10141	/ Fill up to the number of vectors we need for the whole group. /
10142	nivs = least_common_multiple (group_size,
10143	const_nunits) / const_nunits;
10144	vec_steps.reserve (nelems: nivs-ivn);
10145	for (; ivn < nivs; ++ivn)
10146	{
10147	slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[`0`]);
10148	vec_steps.quick_push (obj: vec_steps [`0`]);
10149	}
10150	}
10151
10152	/ Re-use IVs when we can. We are generating further vector*
10153	stmts by adding VF' stride to the IVs generated above. /
10154	if (ivn < nvects)
10155	{
10156	unsigned vfp
10157	= least_common_multiple (group_size, const_nunits) / group_size;
10158	tree lupdate_mul
10159	= build_vector_from_val (step_vectype,
10160	SCALAR_FLOAT_TYPE_P (stept)
10161	? build_real_from_wide (stept,
10162	vfp, UNSIGNED)
10163	: build_int_cstu (type: stept, vfp));
10164	for (; ivn < nvects; ++ivn)
10165	{
10166	gimple *iv
10167	= SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]);
10168	tree def = gimple_get_lhs (iv);
10169	if (ivn < `2`*nivs)
10170	vec_steps [ivn - nivs]
10171	= gimple_build (seq: &init_stmts, code: MULT_EXPR, type: step_vectype,
10172	ops: vec_steps [ivn - nivs], ops: lupdate_mul);
10173	gimple_seq stmts = NULL;
10174	def = gimple_convert (seq: &stmts, type: step_vectype, op: def);
10175	def = gimple_build (seq: &stmts, code: PLUS_EXPR, type: step_vectype,
10176	ops: def, ops: vec_steps [ivn % nivs]);
10177	def = gimple_convert (seq: &stmts, type: vectype, op: def);
10178	if (gimple_code (g: iv) == GIMPLE_PHI)
10179	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10180	else
10181	{
10182	gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
10183	gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
10184	}
10185	slp_node->push_vec_def (def);
10186	}
10187	}
10188
10189	new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
10190	gcc_assert (!new_bb);
10191
10192	return true;
10193	}
10194
10195	init_expr = vect_phi_initial_value (phi);
10196
10197	gimple_seq stmts = NULL;
10198	if (!nested_in_vect_loop)
10199	{
10200	/ Convert the initial value to the IV update type. /
10201	tree new_type = TREE_TYPE (step_expr);
10202	init_expr = gimple_convert (seq: &stmts, type: new_type, op: init_expr);
10203
10204	/ If we are using the loop mask to "peel" for alignment then we need*
10205	to adjust the start value here. /*
10206	tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
10207	if (skip_niters != NULL_TREE)
10208	{
10209	if (FLOAT_TYPE_P (vectype))
10210	skip_niters = gimple_build (seq: &stmts, code: FLOAT_EXPR, type: new_type,
10211	ops: skip_niters);
10212	else
10213	skip_niters = gimple_convert (seq: &stmts, type: new_type, op: skip_niters);
10214	tree skip_step = gimple_build (seq: &stmts, code: MULT_EXPR, type: new_type,
10215	ops: skip_niters, ops: step_expr);
10216	init_expr = gimple_build (seq: &stmts, code: MINUS_EXPR, type: new_type,
10217	ops: init_expr, ops: skip_step);
10218	}
10219	}
10220
10221	if (stmts)
10222	{
10223	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
10224	gcc_assert (!new_bb);
10225	}
10226
10227	/ Create the vector that holds the initial_value of the induction. /
10228	if (nested_in_vect_loop)
10229	{
10230	/ iv_loop is nested in the loop to be vectorized. init_expr had already*
10231	been created during vectorization of previous stmts. We obtain it
10232	from the STMT_VINFO_VEC_STMT of the defining stmt. /*
10233	auto_vec<tree> vec_inits;
10234	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
10235	op: init_expr, &vec_inits);
10236	vec_init = vec_inits [`0`];
10237	/ If the initial value is not of proper type, convert it. /
10238	if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
10239	{
10240	new_stmt
10241	= gimple_build_assign (vect_get_new_ssa_name (vectype,
10242	vect_simple_var,
10243	"vec_iv_"),
10244	VIEW_CONVERT_EXPR,
10245	build1 (VIEW_CONVERT_EXPR, vectype,
10246	vec_init));
10247	vec_init = gimple_assign_lhs (gs: new_stmt);
10248	new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
10249	new_stmt);
10250	gcc_assert (!new_bb);
10251	}
10252	}
10253	else
10254	{
10255	/ iv_loop is the loop to be vectorized. Create:*
10256	vec_init = [X, X+S, X+2S, X+3S] (S = step_expr, X = init_expr) /*
10257	stmts = NULL;
10258	new_name = gimple_convert (seq: &stmts, TREE_TYPE (step_expr), op: init_expr);
10259
10260	unsigned HOST_WIDE_INT const_nunits;
10261	if (nunits.is_constant (const_value: &const_nunits))
10262	{
10263	tree_vector_builder elts (step_vectype, const_nunits, `1`);
10264	elts.quick_push (obj: new_name);
10265	for (i = `1`; i < const_nunits; i++)
10266	{
10267	/ Create: new_name_i = new_name + step_expr /
10268	new_name = gimple_build (seq: &stmts, code: PLUS_EXPR, TREE_TYPE (new_name),
10269	ops: new_name, ops: step_expr);
10270	elts.quick_push (obj: new_name);
10271	}
10272	/ Create a vector from [new_name_0, new_name_1, ...,*
10273	new_name_nunits-1] /*
10274	vec_init = gimple_build_vector (seq: &stmts, builder: &elts);
10275	}
10276	else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr)))
10277	/ Build the initial value directly from a VEC_SERIES_EXPR. /
10278	vec_init = gimple_build (seq: &stmts, code: VEC_SERIES_EXPR, type: step_vectype,
10279	ops: new_name, ops: step_expr);
10280	else
10281	{
10282	/ Build:*
10283	[base, base, base, ...]
10284	+ (vectype) [0, 1, 2, ...] [step, step, step, ...]. /
10285	gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)));
10286	gcc_assert (flag_associative_math);
10287	tree index = build_index_vector (step_vectype, `0`, `1`);
10288	tree base_vec = gimple_build_vector_from_val (seq: &stmts, type: step_vectype,
10289	op: new_name);
10290	tree step_vec = gimple_build_vector_from_val (seq: &stmts, type: step_vectype,
10291	op: step_expr);
10292	vec_init = gimple_build (seq: &stmts, code: FLOAT_EXPR, type: step_vectype, ops: index);
10293	vec_init = gimple_build (seq: &stmts, code: MULT_EXPR, type: step_vectype,
10294	ops: vec_init, ops: step_vec);
10295	vec_init = gimple_build (seq: &stmts, code: PLUS_EXPR, type: step_vectype,
10296	ops: vec_init, ops: base_vec);
10297	}
10298	vec_init = gimple_convert (seq: &stmts, type: vectype, op: vec_init);
10299
10300	if (stmts)
10301	{
10302	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
10303	gcc_assert (!new_bb);
10304	}
10305	}
10306
10307
10308	/ Create the vector that holds the step of the induction. /
10309	if (nested_in_vect_loop)
10310	/ iv_loop is nested in the loop to be vectorized. Generate:*
10311	vec_step = [S, S, S, S] /*
10312	new_name = step_expr;
10313	else
10314	{
10315	/ iv_loop is the loop to be vectorized. Generate:*
10316	vec_step = [VFS, VFS, VFS, VFS] /*
10317	gimple_seq seq = NULL;
10318	if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
10319	{
10320	expr = build_int_cst (integer_type_node, vf);
10321	expr = gimple_build (seq: &seq, code: FLOAT_EXPR, TREE_TYPE (step_expr), ops: expr);
10322	}
10323	else
10324	expr = build_int_cst (TREE_TYPE (step_expr), vf);
10325	new_name = gimple_build (seq: &seq, code: MULT_EXPR, TREE_TYPE (step_expr),
10326	ops: expr, ops: step_expr);
10327	if (seq)
10328	{
10329	new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
10330	gcc_assert (!new_bb);
10331	}
10332	}
10333
10334	t = unshare_expr (new_name);
10335	gcc_assert (CONSTANT_CLASS_P (new_name)
10336	\|\| TREE_CODE (new_name) == SSA_NAME);
10337	new_vec = build_vector_from_val (step_vectype, t);
10338	vec_step = vect_init_vector (loop_vinfo, stmt_info,
10339	new_vec, step_vectype, NULL);
10340
10341
10342	/ Create the following def-use cycle:*
10343	loop prolog:
10344	vec_init = ...
10345	vec_step = ...
10346	loop:
10347	vec_iv = PHI <vec_init, vec_loop>
10348	...
10349	STMT
10350	...
10351	vec_loop = vec_iv + vec_step; /*
10352
10353	/ Create the induction-phi that defines the induction-operand. /
10354	vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
10355	induction_phi = create_phi_node (vec_dest, iv_loop->header);
10356	induc_def = PHI_RESULT (induction_phi);
10357
10358	/ Create the iv update inside the loop /
10359	stmts = NULL;
10360	vec_def = gimple_convert (seq: &stmts, type: step_vectype, op: induc_def);
10361	vec_def = gimple_build (seq: &stmts, code: PLUS_EXPR, type: step_vectype, ops: vec_def, ops: vec_step);
10362	vec_def = gimple_convert (seq: &stmts, type: vectype, op: vec_def);
10363	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10364	new_stmt = SSA_NAME_DEF_STMT (vec_def);
10365
10366	/ Set the arguments of the phi node: /
10367	add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
10368	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
10369	UNKNOWN_LOCATION);
10370
10371	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: induction_phi);
10372	*vec_stmt = induction_phi;
10373
10374	/ In case that vectorization factor (VF) is bigger than the number*
10375	of elements that we can fit in a vectype (nunits), we have to generate
10376	more than one vector stmt - i.e - we need to "unroll" the
10377	vector stmt by a factor VF/nunits. For more details see documentation
10378	in vectorizable_operation. /*
10379
10380	if (ncopies > `1`)
10381	{
10382	gimple_seq seq = NULL;
10383	/ FORNOW. This restriction should be relaxed. /
10384	gcc_assert (!nested_in_vect_loop);
10385
10386	/ Create the vector that holds the step of the induction. /
10387	if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
10388	{
10389	expr = build_int_cst (integer_type_node, nunits);
10390	expr = gimple_build (seq: &seq, code: FLOAT_EXPR, TREE_TYPE (step_expr), ops: expr);
10391	}
10392	else
10393	expr = build_int_cst (TREE_TYPE (step_expr), nunits);
10394	new_name = gimple_build (seq: &seq, code: MULT_EXPR, TREE_TYPE (step_expr),
10395	ops: expr, ops: step_expr);
10396	if (seq)
10397	{
10398	new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
10399	gcc_assert (!new_bb);
10400	}
10401
10402	t = unshare_expr (new_name);
10403	gcc_assert (CONSTANT_CLASS_P (new_name)
10404	\|\| TREE_CODE (new_name) == SSA_NAME);
10405	new_vec = build_vector_from_val (step_vectype, t);
10406	vec_step = vect_init_vector (loop_vinfo, stmt_info,
10407	new_vec, step_vectype, NULL);
10408
10409	vec_def = induc_def;
10410	for (i = `1`; i < ncopies + `1`; i++)
10411	{
10412	/ vec_i = vec_prev + vec_step /
10413	gimple_seq stmts = NULL;
10414	vec_def = gimple_convert (seq: &stmts, type: step_vectype, op: vec_def);
10415	vec_def = gimple_build (seq: &stmts,
10416	code: PLUS_EXPR, type: step_vectype, ops: vec_def, ops: vec_step);
10417	vec_def = gimple_convert (seq: &stmts, type: vectype, op: vec_def);
10418
10419	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10420	if (i < ncopies)
10421	{
10422	new_stmt = SSA_NAME_DEF_STMT (vec_def);
10423	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10424	}
10425	else
10426	{
10427	/ vec_1 = vec_iv + (VF/n * S)*
10428	vec_2 = vec_1 + (VF/n S)*
10429	...
10430	vec_n = vec_prev + (VF/n S) = vec_iv + VF * S = vec_loop*
10431
10432	vec_n is used as vec_loop to save the large step register and
10433	related operations. /*
10434	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
10435	UNKNOWN_LOCATION);
10436	}
10437	}
10438	}
10439
10440	if (dump_enabled_p ())
10441	dump_printf_loc (MSG_NOTE, vect_location,
10442	"transform induction: created def-use cycle: %G%G",
10443	(gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def));
10444
10445	return true;
10446	}
10447
10448	/ Function vectorizable_live_operation.*
10449
10450	STMT_INFO computes a value that is used outside the loop. Check if
10451	it can be supported. /*
10452
10453	bool
10454	vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
10455	slp_tree slp_node, slp_instance slp_node_instance,
10456	int slp_index, bool vec_stmt_p,
10457	stmt_vector_for_cost *cost_vec)
10458	{
10459	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
10460	imm_use_iterator imm_iter;
10461	tree lhs, lhs_type, bitsize;
10462	tree vectype = (slp_node
10463	? SLP_TREE_VECTYPE (slp_node)
10464	: STMT_VINFO_VECTYPE (stmt_info));
10465	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
10466	int ncopies;
10467	gimple *use_stmt;
10468	auto_vec<tree> vec_oprnds;
10469	int vec_entry = `0`;
10470	poly_uint64 vec_index = `0`;
10471
10472	gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
10473
10474	/ If a stmt of a reduction is live, vectorize it via*
10475	vect_create_epilog_for_reduction. vectorizable_reduction assessed
10476	validity so just trigger the transform here. /*
10477	if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
10478	{
10479	if (!vec_stmt_p)
10480	return true;
10481	if (slp_node)
10482	{
10483	/ For reduction chains the meta-info is attached to*
10484	the group leader. /*
10485	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
10486	stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
10487	/ For SLP reductions we vectorize the epilogue for*
10488	all involved stmts together. /*
10489	else if (slp_index != `0`)
10490	return true;
10491	}
10492	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
10493	gcc_assert (reduc_info->is_reduc_info);
10494	if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
10495	\|\| STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
10496	return true;
10497	vect_create_epilog_for_reduction (loop_vinfo, stmt_info, slp_node,
10498	slp_node_instance);
10499	return true;
10500	}
10501
10502	/ If STMT is not relevant and it is a simple assignment and its inputs are*
10503	invariant then it can remain in place, unvectorized. The original last
10504	scalar value that it computes will be used. /*
10505	if (!STMT_VINFO_RELEVANT_P (stmt_info))
10506	{
10507	gcc_assert (is_simple_and_all_uses_invariant (stmt_info, loop_vinfo));
10508	if (dump_enabled_p ())
10509	dump_printf_loc (MSG_NOTE, vect_location,
10510	"statement is simple and uses invariant. Leaving in "
10511	"place.\n");
10512	return true;
10513	}
10514
10515	if (slp_node)
10516	ncopies = `1`;
10517	else
10518	ncopies = vect_get_num_copies (loop_vinfo, vectype);
10519
10520	if (slp_node)
10521	{
10522	gcc_assert (slp_index >= `0`);
10523
10524	/ Get the last occurrence of the scalar index from the concatenation of*
10525	all the slp vectors. Calculate which slp vector it is and the index
10526	within. /*
10527	int num_scalar = SLP_TREE_LANES (slp_node);
10528	int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10529	poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
10530
10531	/ Calculate which vector contains the result, and which lane of*
10532	that vector we need. /*
10533	if (!can_div_trunc_p (a: pos, b: nunits, quotient: &vec_entry, remainder: &vec_index))
10534	{
10535	if (dump_enabled_p ())
10536	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10537	"Cannot determine which vector holds the"
10538	" final result.\n");
10539	return false;
10540	}
10541	}
10542
10543	if (!vec_stmt_p)
10544	{
10545	/ No transformation required. /
10546	if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10547	{
10548	if (slp_node)
10549	{
10550	if (dump_enabled_p ())
10551	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10552	"can't operate on partial vectors "
10553	"because an SLP statement is live after "
10554	"the loop.\n");
10555	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10556	}
10557	else if (ncopies > `1`)
10558	{
10559	if (dump_enabled_p ())
10560	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10561	"can't operate on partial vectors "
10562	"because ncopies is greater than 1.\n");
10563	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10564	}
10565	else
10566	{
10567	gcc_assert (ncopies == `1` && !slp_node);
10568	if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
10569	OPTIMIZE_FOR_SPEED))
10570	vect_record_loop_mask (loop_vinfo,
10571	&LOOP_VINFO_MASKS (loop_vinfo),
10572	`1`, vectype, NULL);
10573	else if (can_vec_extract_var_idx_p (
10574	TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype))))
10575	vect_record_loop_len (loop_vinfo,
10576	&LOOP_VINFO_LENS (loop_vinfo),
10577	`1`, vectype, `1`);
10578	else
10579	{
10580	if (dump_enabled_p ())
10581	dump_printf_loc (
10582	MSG_MISSED_OPTIMIZATION, vect_location,
10583	"can't operate on partial vectors "
10584	"because the target doesn't support extract "
10585	"last reduction.\n");
10586	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10587	}
10588	}
10589	}
10590	/ ??? Enable for loop costing as well. /
10591	if (!loop_vinfo)
10592	record_stmt_cost (cost_vec, `1`, vec_to_scalar, stmt_info, NULL_TREE,
10593	`0`, vect_epilogue);
10594	return true;
10595	}
10596
10597	/ Use the lhs of the original scalar statement. /
10598	gimple *stmt = vect_orig_stmt (stmt_info)->stmt;
10599	if (dump_enabled_p ())
10600	dump_printf_loc (MSG_NOTE, vect_location, "extracting lane for live "
10601	"stmt %G", stmt);
10602
10603	lhs = gimple_get_lhs (stmt);
10604	lhs_type = TREE_TYPE (lhs);
10605
10606	bitsize = vector_element_bits_tree (vectype);
10607
10608	/ Get the vectorized lhs of STMT and the lane to use (counted in bits). /
10609	tree vec_lhs, bitstart;
10610	gimple *vec_stmt;
10611	if (slp_node)
10612	{
10613	gcc_assert (!loop_vinfo
10614	\|\| (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10615	&& !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
10616
10617	/ Get the correct slp vectorized stmt. /
10618	vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
10619	vec_stmt = SSA_NAME_DEF_STMT (vec_lhs);
10620
10621	/ Get entry to use. /
10622	bitstart = bitsize_int (vec_index);
10623	bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
10624	}
10625	else
10626	{
10627	/ For multiple copies, get the last copy. /
10628	vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info).last ();
10629	vec_lhs = gimple_get_lhs (vec_stmt);
10630
10631	/ Get the last lane in the vector. /
10632	bitstart = int_const_binop (MULT_EXPR, bitsize, bitsize_int (nunits - `1`));
10633	}
10634
10635	if (loop_vinfo)
10636	{
10637	/ Ensure the VEC_LHS for lane extraction stmts satisfy loop-closed PHI*
10638	requirement, insert one phi node for it. It looks like:
10639	loop;
10640	BB:
10641	# lhs' = PHI <lhs>
10642	==>
10643	loop;
10644	BB:
10645	# vec_lhs' = PHI <vec_lhs>
10646	new_tree = lane_extract <vec_lhs', ...>;
10647	lhs' = new_tree; /*
10648
10649	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
10650	basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
10651	gcc_assert (single_pred_p (exit_bb));
10652
10653	tree vec_lhs_phi = copy_ssa_name (var: vec_lhs);
10654	gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
10655	SET_PHI_ARG_DEF (phi, LOOP_VINFO_IV_EXIT (loop_vinfo)->dest_idx, vec_lhs);
10656
10657	gimple_seq stmts = NULL;
10658	tree new_tree;
10659	if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
10660	{
10661	/ Emit:*
10662
10663	SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1>
10664
10665	where VEC_LHS is the vectorized live-out result and MASK is
10666	the loop mask for the final iteration. /*
10667	gcc_assert (ncopies == `1` && !slp_node);
10668	gimple_seq tem = NULL;
10669	gimple_stmt_iterator gsi = gsi_last (seq&: tem);
10670	tree len
10671	= vect_get_loop_len (loop_vinfo, &gsi,
10672	&LOOP_VINFO_LENS (loop_vinfo),
10673	`1`, vectype, `0`, `0`);
10674
10675	/ BIAS - 1. /
10676	signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10677	tree bias_minus_one
10678	= int_const_binop (MINUS_EXPR,
10679	build_int_cst (TREE_TYPE (len), biasval),
10680	build_one_cst (TREE_TYPE (len)));
10681
10682	/ LAST_INDEX = LEN + (BIAS - 1). /
10683	tree last_index = gimple_build (seq: &stmts, code: PLUS_EXPR, TREE_TYPE (len),
10684	ops: len, ops: bias_minus_one);
10685
10686	/ SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1>. /
10687	tree scalar_res
10688	= gimple_build (seq: &stmts, fn: CFN_VEC_EXTRACT, TREE_TYPE (vectype),
10689	args: vec_lhs_phi, args: last_index);
10690
10691	/ Convert the extracted vector element to the scalar type. /
10692	new_tree = gimple_convert (seq: &stmts, type: lhs_type, op: scalar_res);
10693	}
10694	else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10695	{
10696	/ Emit:*
10697
10698	SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
10699
10700	where VEC_LHS is the vectorized live-out result and MASK is
10701	the loop mask for the final iteration. /*
10702	gcc_assert (ncopies == `1` && !slp_node);
10703	tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
10704	gimple_seq tem = NULL;
10705	gimple_stmt_iterator gsi = gsi_last (seq&: tem);
10706	tree mask = vect_get_loop_mask (loop_vinfo, &gsi,
10707	&LOOP_VINFO_MASKS (loop_vinfo),
10708	`1`, vectype, `0`);
10709	gimple_seq_add_seq (&stmts, tem);
10710	tree scalar_res = gimple_build (seq: &stmts, fn: CFN_EXTRACT_LAST, type: scalar_type,
10711	args: mask, args: vec_lhs_phi);
10712
10713	/ Convert the extracted vector element to the scalar type. /
10714	new_tree = gimple_convert (seq: &stmts, type: lhs_type, op: scalar_res);
10715	}
10716	else
10717	{
10718	tree bftype = TREE_TYPE (vectype);
10719	if (VECTOR_BOOLEAN_TYPE_P (vectype))
10720	bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), `1`);
10721	new_tree = build3 (BIT_FIELD_REF, bftype,
10722	vec_lhs_phi, bitsize, bitstart);
10723	new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
10724	&stmts, true, NULL_TREE);
10725	}
10726
10727	gimple_stmt_iterator exit_gsi = gsi_after_labels (bb: exit_bb);
10728	if (stmts)
10729	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
10730
10731	/ Remove existing phis that copy from lhs and create copies*
10732	from new_tree. /*
10733	gimple_stmt_iterator gsi;
10734	for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (i: gsi);)
10735	{
10736	gimple *phi = gsi_stmt (i: gsi);
10737	if ((gimple_phi_arg_def (gs: phi, index: `0`) == lhs))
10738	{
10739	remove_phi_node (&gsi, false);
10740	tree lhs_phi = gimple_phi_result (gs: phi);
10741	gimple *copy = gimple_build_assign (lhs_phi, new_tree);
10742	gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT);
10743	}
10744	else
10745	gsi_next (i: &gsi);
10746	}
10747
10748	/ There a no further out-of-loop uses of lhs by LC-SSA construction. /
10749	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
10750	gcc_assert (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)));
10751	}
10752	else
10753	{
10754	/ For basic-block vectorization simply insert the lane-extraction. /
10755	tree bftype = TREE_TYPE (vectype);
10756	if (VECTOR_BOOLEAN_TYPE_P (vectype))
10757	bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), `1`);
10758	tree new_tree = build3 (BIT_FIELD_REF, bftype,
10759	vec_lhs, bitsize, bitstart);
10760	gimple_seq stmts = NULL;
10761	new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
10762	&stmts, true, NULL_TREE);
10763	if (TREE_CODE (new_tree) == SSA_NAME
10764	&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
10765	SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree) = `1`;
10766	if (is_a <gphi *> (p: vec_stmt))
10767	{
10768	gimple_stmt_iterator si = gsi_after_labels (bb: gimple_bb (g: vec_stmt));
10769	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10770	}
10771	else
10772	{
10773	gimple_stmt_iterator si = gsi_for_stmt (vec_stmt);
10774	gsi_insert_seq_after (&si, stmts, GSI_SAME_STMT);
10775	}
10776
10777	/ Replace use of lhs with newly computed result. If the use stmt is a*
10778	single arg PHI, just replace all uses of PHI result. It's necessary
10779	because lcssa PHI defining lhs may be before newly inserted stmt. /*
10780	use_operand_p use_p;
10781	stmt_vec_info use_stmt_info;
10782	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
10783	if (!is_gimple_debug (gs: use_stmt)
10784	&& (!(use_stmt_info = vinfo->lookup_stmt (use_stmt))
10785	\|\| !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info))))
10786	{
10787	/ ??? This can happen when the live lane ends up being*
10788	rooted in a vector construction code-generated by an
10789	external SLP node (and code-generation for that already
10790	happened). See gcc.dg/vect/bb-slp-47.c.
10791	Doing this is what would happen if that vector CTOR
10792	were not code-generated yet so it is not too bad.
10793	??? In fact we'd likely want to avoid this situation
10794	in the first place. /*
10795	if (TREE_CODE (new_tree) == SSA_NAME
10796	&& !SSA_NAME_IS_DEFAULT_DEF (new_tree)
10797	&& gimple_code (g: use_stmt) != GIMPLE_PHI
10798	&& !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (new_tree),
10799	use_stmt))
10800	{
10801	if (dump_enabled_p ())
10802	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10803	"Using original scalar computation for "
10804	"live lane because use preceeds vector "
10805	"def\n");
10806	continue;
10807	}
10808	/ ??? It can also happen that we end up pulling a def into*
10809	a loop where replacing out-of-loop uses would require
10810	a new LC SSA PHI node. Retain the original scalar in
10811	those cases as well. PR98064. /*
10812	if (TREE_CODE (new_tree) == SSA_NAME
10813	&& !SSA_NAME_IS_DEFAULT_DEF (new_tree)
10814	&& (gimple_bb (g: use_stmt)->loop_father
10815	!= gimple_bb (g: vec_stmt)->loop_father)
10816	&& !flow_loop_nested_p (gimple_bb (g: vec_stmt)->loop_father,
10817	gimple_bb (g: use_stmt)->loop_father))
10818	{
10819	if (dump_enabled_p ())
10820	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10821	"Using original scalar computation for "
10822	"live lane because there is an out-of-loop "
10823	"definition for it\n");
10824	continue;
10825	}
10826	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
10827	SET_USE (use_p, new_tree);
10828	update_stmt (s: use_stmt);
10829	}
10830	}
10831
10832	return true;
10833	}
10834
10835	/ Kill any debug uses outside LOOP of SSA names defined in STMT_INFO. /
10836
10837	static void
10838	vect_loop_kill_debug_uses (class loop *loop, stmt_vec_info stmt_info)
10839	{
10840	ssa_op_iter op_iter;
10841	imm_use_iterator imm_iter;
10842	def_operand_p def_p;
10843	gimple *ustmt;
10844
10845	FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
10846	{
10847	FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
10848	{
10849	basic_block bb;
10850
10851	if (!is_gimple_debug (gs: ustmt))
10852	continue;
10853
10854	bb = gimple_bb (g: ustmt);
10855
10856	if (!flow_bb_inside_loop_p (loop, bb))
10857	{
10858	if (gimple_debug_bind_p (s: ustmt))
10859	{
10860	if (dump_enabled_p ())
10861	dump_printf_loc (MSG_NOTE, vect_location,
10862	"killing debug use\n");
10863
10864	gimple_debug_bind_reset_value (dbg: ustmt);
10865	update_stmt (s: ustmt);
10866	}
10867	else
10868	gcc_unreachable ();
10869	}
10870	}
10871	}
10872	}
10873
10874	/ Given loop represented by LOOP_VINFO, return true if computation of*
10875	LOOP_VINFO_NITERS (= LOOP_VINFO_NITERSM1 + 1) doesn't overflow, false
10876	otherwise. /*
10877
10878	static bool
10879	loop_niters_no_overflow (loop_vec_info loop_vinfo)
10880	{
10881	/ Constant case. /
10882	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
10883	{
10884	tree cst_niters = LOOP_VINFO_NITERS (loop_vinfo);
10885	tree cst_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
10886
10887	gcc_assert (TREE_CODE (cst_niters) == INTEGER_CST);
10888	gcc_assert (TREE_CODE (cst_nitersm1) == INTEGER_CST);
10889	if (wi::to_widest (t: cst_nitersm1) < wi::to_widest (t: cst_niters))
10890	return true;
10891	}
10892
10893	widest_int max;
10894	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
10895	/ Check the upper bound of loop niters. /
10896	if (get_max_loop_iterations (loop, nit: &max))
10897	{
10898	tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
10899	signop sgn = TYPE_SIGN (type);
10900	widest_int type_max = widest_int::from (x: wi::max_value (type), sgn);
10901	if (max < type_max)
10902	return true;
10903	}
10904	return false;
10905	}
10906
10907	/ Return a mask type with half the number of elements as OLD_TYPE,*
10908	given that it should have mode NEW_MODE. /*
10909
10910	tree
10911	vect_halve_mask_nunits (tree old_type, machine_mode new_mode)
10912	{
10913	poly_uint64 nunits = exact_div (a: TYPE_VECTOR_SUBPARTS (node: old_type), b: `2`);
10914	return build_truth_vector_type_for_mode (nunits, new_mode);
10915	}
10916
10917	/ Return a mask type with twice as many elements as OLD_TYPE,*
10918	given that it should have mode NEW_MODE. /*
10919
10920	tree
10921	vect_double_mask_nunits (tree old_type, machine_mode new_mode)
10922	{
10923	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: old_type) * `2`;
10924	return build_truth_vector_type_for_mode (nunits, new_mode);
10925	}
10926
10927	/ Record that a fully-masked version of LOOP_VINFO would need MASKS to*
10928	contain a sequence of NVECTORS masks that each control a vector of type
10929	VECTYPE. If SCALAR_MASK is nonnull, the fully-masked loop would AND
10930	these vector masks with the vector version of SCALAR_MASK. /*
10931
10932	void
10933	vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
10934	unsigned int nvectors, tree vectype, tree scalar_mask)
10935	{
10936	gcc_assert (nvectors != `0`);
10937
10938	if (scalar_mask)
10939	{
10940	scalar_cond_masked_key cond (scalar_mask, nvectors);
10941	loop_vinfo->scalar_cond_masked_set.add (k: cond);
10942	}
10943
10944	masks->mask_set.add (k: std::make_pair (x&: vectype, y&: nvectors));
10945	}
10946
10947	/ Given a complete set of masks MASKS, extract mask number INDEX*
10948	for an rgroup that operates on NVECTORS vectors of type VECTYPE,
10949	where 0 <= INDEX < NVECTORS. Insert any set-up statements before GSI.
10950
10951	See the comment above vec_loop_masks for more details about the mask
10952	arrangement. /*
10953
10954	tree
10955	vect_get_loop_mask (loop_vec_info loop_vinfo,
10956	gimple_stmt_iterator gsi, vec_loop_masks masks,
10957	unsigned int nvectors, tree vectype, unsigned int index)
10958	{
10959	if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
10960	== vect_partial_vectors_while_ult)
10961	{
10962	rgroup_controls *rgm = &(masks->rgc_vec)[nvectors - `1`];
10963	tree mask_type = rgm->type;
10964
10965	/ Populate the rgroup's mask array, if this is the first time we've*
10966	used it. /*
10967	if (rgm->controls.is_empty ())
10968	{
10969	rgm->controls.safe_grow_cleared (len: nvectors, exact: true);
10970	for (unsigned int i = `0`; i < nvectors; ++i)
10971	{
10972	tree mask = make_temp_ssa_name (type: mask_type, NULL, name: "loop_mask");
10973	/ Provide a dummy definition until the real one is available. /
10974	SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
10975	rgm->controls [i] = mask;
10976	}
10977	}
10978
10979	tree mask = rgm->controls [index];
10980	if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: mask_type),
10981	b: TYPE_VECTOR_SUBPARTS (node: vectype)))
10982	{
10983	/ A loop mask for data type X can be reused for data type Y*
10984	if X has N times more elements than Y and if Y's elements
10985	are N times bigger than X's. In this case each sequence
10986	of N elements in the loop mask will be all-zero or all-one.
10987	We can then view-convert the mask so that each sequence of
10988	N elements is replaced by a single element. /*
10989	gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type),
10990	TYPE_VECTOR_SUBPARTS (vectype)));
10991	gimple_seq seq = NULL;
10992	mask_type = truth_type_for (vectype);
10993	mask = gimple_build (seq: &seq, code: VIEW_CONVERT_EXPR, type: mask_type, ops: mask);
10994	if (seq)
10995	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
10996	}
10997	return mask;
10998	}
10999	else if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
11000	== vect_partial_vectors_avx512)
11001	{
11002	/ The number of scalars per iteration and the number of vectors are*
11003	both compile-time constants. /*
11004	unsigned int nscalars_per_iter
11005	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
11006	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
11007
11008	rgroup_controls *rgm = &masks->rgc_vec [nscalars_per_iter - `1`];
11009
11010	/ The stored nV is dependent on the mask type produced. /
11011	gcc_assert (exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
11012	TYPE_VECTOR_SUBPARTS (rgm->type)).to_constant ()
11013	== rgm->factor);
11014	nvectors = rgm->factor;
11015
11016	/ Populate the rgroup's mask array, if this is the first time we've*
11017	used it. /*
11018	if (rgm->controls.is_empty ())
11019	{
11020	rgm->controls.safe_grow_cleared (len: nvectors, exact: true);
11021	for (unsigned int i = `0`; i < nvectors; ++i)
11022	{
11023	tree mask = make_temp_ssa_name (type: rgm->type, NULL, name: "loop_mask");
11024	/ Provide a dummy definition until the real one is available. /
11025	SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
11026	rgm->controls [i] = mask;
11027	}
11028	}
11029	if (known_eq (TYPE_VECTOR_SUBPARTS (rgm->type),
11030	TYPE_VECTOR_SUBPARTS (vectype)))
11031	return rgm->controls [index];
11032
11033	/ Split the vector if needed. Since we are dealing with integer mode*
11034	masks with AVX512 we can operate on the integer representation
11035	performing the whole vector shifting. /*
11036	unsigned HOST_WIDE_INT factor;
11037	bool ok = constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: rgm->type),
11038	b: TYPE_VECTOR_SUBPARTS (node: vectype), multiple: &factor);
11039	gcc_assert (ok);
11040	gcc_assert (GET_MODE_CLASS (TYPE_MODE (rgm->type)) == MODE_INT);
11041	tree mask_type = truth_type_for (vectype);
11042	gcc_assert (GET_MODE_CLASS (TYPE_MODE (mask_type)) == MODE_INT);
11043	unsigned vi = index / factor;
11044	unsigned vpart = index % factor;
11045	tree vec = rgm->controls [vi];
11046	gimple_seq seq = NULL;
11047	vec = gimple_build (seq: &seq, code: VIEW_CONVERT_EXPR,
11048	type: lang_hooks.types.type_for_mode
11049	(TYPE_MODE (rgm->type), `1`), ops: vec);
11050	/ For integer mode masks simply shift the right bits into position. /
11051	if (vpart != `0`)
11052	vec = gimple_build (seq: &seq, code: RSHIFT_EXPR, TREE_TYPE (vec), ops: vec,
11053	ops: build_int_cst (integer_type_node,
11054	(TYPE_VECTOR_SUBPARTS (node: vectype)
11055	* vpart)));
11056	vec = gimple_convert (seq: &seq, type: lang_hooks.types.type_for_mode
11057	(TYPE_MODE (mask_type), `1`), op: vec);
11058	vec = gimple_build (seq: &seq, code: VIEW_CONVERT_EXPR, type: mask_type, ops: vec);
11059	if (seq)
11060	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
11061	return vec;
11062	}
11063	else
11064	gcc_unreachable ();
11065	}
11066
11067	/ Record that LOOP_VINFO would need LENS to contain a sequence of NVECTORS*
11068	lengths for controlling an operation on VECTYPE. The operation splits
11069	each element of VECTYPE into FACTOR separate subelements, measuring the
11070	length as a number of these subelements. /*
11071
11072	void
11073	vect_record_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
11074	unsigned int nvectors, tree vectype, unsigned int factor)
11075	{
11076	gcc_assert (nvectors != `0`);
11077	if (lens->length () < nvectors)
11078	lens->safe_grow_cleared (len: nvectors, exact: true);
11079	rgroup_controls rgl = &(lens)[nvectors - `1`];
11080
11081	/ The number of scalars per iteration, scalar occupied bytes and*
11082	the number of vectors are both compile-time constants. /*
11083	unsigned int nscalars_per_iter
11084	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
11085	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
11086
11087	if (rgl->max_nscalars_per_iter < nscalars_per_iter)
11088	{
11089	/ For now, we only support cases in which all loads and stores fall back*
11090	to VnQI or none do. /*
11091	gcc_assert (!rgl->max_nscalars_per_iter
11092	\|\| (rgl->factor == `1` && factor == `1`)
11093	\|\| (rgl->max_nscalars_per_iter * rgl->factor
11094	== nscalars_per_iter * factor));
11095	rgl->max_nscalars_per_iter = nscalars_per_iter;
11096	rgl->type = vectype;
11097	rgl->factor = factor;
11098	}
11099	}
11100
11101	/ Given a complete set of lengths LENS, extract length number INDEX*
11102	for an rgroup that operates on NVECTORS vectors of type VECTYPE,
11103	where 0 <= INDEX < NVECTORS. Return a value that contains FACTOR
11104	multipled by the number of elements that should be processed.
11105	Insert any set-up statements before GSI. /*
11106
11107	tree
11108	vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
11109	vec_loop_lens lens, unsigned* int nvectors, tree vectype,
11110	unsigned int index, unsigned int factor)
11111	{
11112	rgroup_controls rgl = &(lens)[nvectors - `1`];
11113	bool use_bias_adjusted_len =
11114	LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) != `0`;
11115
11116	/ Populate the rgroup's len array, if this is the first time we've*
11117	used it. /*
11118	if (rgl->controls.is_empty ())
11119	{
11120	rgl->controls.safe_grow_cleared (len: nvectors, exact: true);
11121	for (unsigned int i = `0`; i < nvectors; ++i)
11122	{
11123	tree len_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
11124	gcc_assert (len_type != NULL_TREE);
11125
11126	tree len = make_temp_ssa_name (type: len_type, NULL, name: "loop_len");
11127
11128	/ Provide a dummy definition until the real one is available. /
11129	SSA_NAME_DEF_STMT (len) = gimple_build_nop ();
11130	rgl->controls [i] = len;
11131
11132	if (use_bias_adjusted_len)
11133	{
11134	gcc_assert (i == `0`);
11135	tree adjusted_len =
11136	make_temp_ssa_name (type: len_type, NULL, name: "adjusted_loop_len");
11137	SSA_NAME_DEF_STMT (adjusted_len) = gimple_build_nop ();
11138	rgl->bias_adjusted_ctrl = adjusted_len;
11139	}
11140	}
11141	}
11142
11143	if (use_bias_adjusted_len)
11144	return rgl->bias_adjusted_ctrl;
11145
11146	tree loop_len = rgl->controls [index];
11147	if (rgl->factor == `1` && factor == `1`)
11148	{
11149	poly_int64 nunits1 = TYPE_VECTOR_SUBPARTS (node: rgl->type);
11150	poly_int64 nunits2 = TYPE_VECTOR_SUBPARTS (node: vectype);
11151	if (maybe_ne (a: nunits1, b: nunits2))
11152	{
11153	/ A loop len for data type X can be reused for data type Y*
11154	if X has N times more elements than Y and if Y's elements
11155	are N times bigger than X's. /*
11156	gcc_assert (multiple_p (nunits1, nunits2));
11157	factor = exact_div (a: nunits1, b: nunits2).to_constant ();
11158	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
11159	gimple_seq seq = NULL;
11160	loop_len = gimple_build (seq: &seq, code: RDIV_EXPR, type: iv_type, ops: loop_len,
11161	ops: build_int_cst (iv_type, factor));
11162	if (seq)
11163	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
11164	}
11165	}
11166	return loop_len;
11167	}
11168
11169	/ Scale profiling counters by estimation for LOOP which is vectorized*
11170	by factor VF.
11171	If FLAT is true, the loop we started with had unrealistically flat
11172	profile. /*
11173
11174	static void
11175	scale_profile_for_vect_loop (class loop loop, edge exit_e, unsigned* vf, bool flat)
11176	{
11177	/ For flat profiles do not scale down proportionally by VF and only*
11178	cap by known iteration count bounds. /*
11179	if (flat)
11180	{
11181	if (dump_file && (dump_flags & TDF_DETAILS))
11182	fprintf (stream: dump_file,
11183	format: "Vectorized loop profile seems flat; not scaling iteration "
11184	"count down by the vectorization factor %i\n", vf);
11185	scale_loop_profile (loop, profile_probability::always (),
11186	get_likely_max_loop_iterations_int (loop));
11187	return;
11188	}
11189	/ Loop body executes VF fewer times and exit increases VF times. /
11190	profile_count entry_count = loop_preheader_edge (loop)->count ();
11191
11192	/ If we have unreliable loop profile avoid dropping entry*
11193	count bellow header count. This can happen since loops
11194	has unrealistically low trip counts. /*
11195	while (vf > `1`
11196	&& loop->header->count > entry_count
11197	&& loop->header->count < entry_count * vf)
11198	{
11199	if (dump_file && (dump_flags & TDF_DETAILS))
11200	fprintf (stream: dump_file,
11201	format: "Vectorization factor %i seems too large for profile "
11202	"prevoiusly believed to be consistent; reducing.\n", vf);
11203	vf /= `2`;
11204	}
11205
11206	if (entry_count.nonzero_p ())
11207	set_edge_probability_and_rescale_others
11208	(exit_e,
11209	entry_count.probability_in (overall: loop->header->count / vf));
11210	/ Avoid producing very large exit probability when we do not have*
11211	sensible profile. /*
11212	else if (exit_e->probability < profile_probability::always () / (vf * `2`))
11213	set_edge_probability_and_rescale_others (exit_e, exit_e->probability * vf);
11214	loop->latch->count = single_pred_edge (bb: loop->latch)->count ();
11215
11216	scale_loop_profile (loop, profile_probability::always () / vf,
11217	get_likely_max_loop_iterations_int (loop));
11218	}
11219
11220	/ For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI*
11221	latch edge values originally defined by it. /*
11222
11223	static void
11224	maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
11225	stmt_vec_info def_stmt_info)
11226	{
11227	tree def = gimple_get_lhs (vect_orig_stmt (stmt_info: def_stmt_info)->stmt);
11228	if (!def \|\| TREE_CODE (def) != SSA_NAME)
11229	return;
11230	stmt_vec_info phi_info;
11231	imm_use_iterator iter;
11232	use_operand_p use_p;
11233	FOR_EACH_IMM_USE_FAST (use_p, iter, def)
11234	{
11235	gphi phi = dyn_cast <gphi > (USE_STMT (use_p));
11236	if (!phi)
11237	continue;
11238	if (!(gimple_bb (g: phi)->loop_father->header == gimple_bb (g: phi)
11239	&& (phi_info = loop_vinfo->lookup_stmt (phi))
11240	&& STMT_VINFO_RELEVANT_P (phi_info)))
11241	continue;
11242	loop_p loop = gimple_bb (g: phi)->loop_father;
11243	edge e = loop_latch_edge (loop);
11244	if (PHI_ARG_DEF_FROM_EDGE (phi, e) != def)
11245	continue;
11246
11247	if (VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
11248	&& STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
11249	&& STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
11250	{
11251	vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
11252	vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
11253	gcc_assert (phi_defs.length () == latch_defs.length ());
11254	for (unsigned i = `0`; i < phi_defs.length (); ++i)
11255	add_phi_arg (as_a <gphi *> (p: phi_defs [i]),
11256	gimple_get_lhs (latch_defs [i]), e,
11257	gimple_phi_arg_location (phi, i: e->dest_idx));
11258	}
11259	else if (STMT_VINFO_DEF_TYPE (phi_info) == vect_first_order_recurrence)
11260	{
11261	/ For first order recurrences we have to update both uses of*
11262	the latch definition, the one in the PHI node and the one
11263	in the generated VEC_PERM_EXPR. /*
11264	vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
11265	vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
11266	gcc_assert (phi_defs.length () == latch_defs.length ());
11267	tree phidef = gimple_assign_rhs1 (gs: phi_defs [`0`]);
11268	gphi vphi = as_a <gphi > (SSA_NAME_DEF_STMT (phidef));
11269	for (unsigned i = `0`; i < phi_defs.length (); ++i)
11270	{
11271	gassign perm = as_a <gassign > (p: phi_defs [i]);
11272	if (i > `0`)
11273	gimple_assign_set_rhs1 (gs: perm, rhs: gimple_get_lhs (latch_defs [i-`1`]));
11274	gimple_assign_set_rhs2 (gs: perm, rhs: gimple_get_lhs (latch_defs [i]));
11275	update_stmt (s: perm);
11276	}
11277	add_phi_arg (vphi, gimple_get_lhs (latch_defs.last ()), e,
11278	gimple_phi_arg_location (phi, i: e->dest_idx));
11279	}
11280	}
11281	}
11282
11283	/ Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.*
11284	When vectorizing STMT_INFO as a store, set SEEN_STORE to its*
11285	stmt_vec_info. /*
11286
11287	static bool
11288	vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
11289	gimple_stmt_iterator gsi, stmt_vec_info seen_store)
11290	{
11291	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
11292	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
11293
11294	if (dump_enabled_p ())
11295	dump_printf_loc (MSG_NOTE, vect_location,
11296	"------>vectorizing statement: %G", stmt_info->stmt);
11297
11298	if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
11299	vect_loop_kill_debug_uses (loop, stmt_info);
11300
11301	if (!STMT_VINFO_RELEVANT_P (stmt_info)
11302	&& !STMT_VINFO_LIVE_P (stmt_info))
11303	return false;
11304
11305	if (STMT_VINFO_VECTYPE (stmt_info))
11306	{
11307	poly_uint64 nunits
11308	= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
11309	if (!STMT_SLP_TYPE (stmt_info)
11310	&& maybe_ne (a: nunits, b: vf)
11311	&& dump_enabled_p ())
11312	/ For SLP VF is set according to unrolling factor, and not*
11313	to vector size, hence for SLP this print is not valid. /*
11314	dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
11315	}
11316
11317	/ Pure SLP statements have already been vectorized. We still need*
11318	to apply loop vectorization to hybrid SLP statements. /*
11319	if (PURE_SLP_STMT (stmt_info))
11320	return false;
11321
11322	if (dump_enabled_p ())
11323	dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
11324
11325	if (vect_transform_stmt (loop_vinfo, stmt_info, gsi, NULL, NULL))
11326	*seen_store = stmt_info;
11327
11328	return true;
11329	}
11330
11331	/ Helper function to pass to simplify_replace_tree to enable replacing tree's*
11332	in the hash_map with its corresponding values. /*
11333
11334	static tree
11335	find_in_mapping (tree t, void *context)
11336	{
11337	hash_map<tree,tree>* mapping = (hash_map<tree, tree>*) context;
11338
11339	tree *value = mapping->get (k: t);
11340	return value ? *value : t;
11341	}
11342
11343	/ Update EPILOGUE's loop_vec_info. EPILOGUE was constructed as a copy of the*
11344	original loop that has now been vectorized.
11345
11346	The inits of the data_references need to be advanced with the number of
11347	iterations of the main loop. This has been computed in vect_do_peeling and
11348	is stored in parameter ADVANCE. We first restore the data_references
11349	initial offset with the values recored in ORIG_DRS_INIT.
11350
11351	Since the loop_vec_info of this EPILOGUE was constructed for the original
11352	loop, its stmt_vec_infos all point to the original statements. These need
11353	to be updated to point to their corresponding copies as well as the SSA_NAMES
11354	in their PATTERN_DEF_SEQs and RELATED_STMTs.
11355
11356	The data_reference's connections also need to be updated. Their
11357	corresponding dr_vec_info need to be reconnected to the EPILOGUE's
11358	stmt_vec_infos, their statements need to point to their corresponding copy,
11359	if they are gather loads or scatter stores then their reference needs to be
11360	updated to point to its corresponding copy and finally we set
11361	'base_misaligned' to false as we have already peeled for alignment in the
11362	prologue of the main loop. /*
11363
11364	static void
11365	update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
11366	{
11367	loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (loop: epilogue);
11368	auto_vec<gimple *> stmt_worklist;
11369	hash_map<tree,tree> mapping;
11370	gimple orig_stmt, new_stmt;
11371	gimple_stmt_iterator epilogue_gsi;
11372	gphi_iterator epilogue_phi_gsi;
11373	stmt_vec_info stmt_vinfo = NULL, related_vinfo;
11374	basic_block *epilogue_bbs = get_loop_body (epilogue);
11375	unsigned i;
11376
11377	free (LOOP_VINFO_BBS (epilogue_vinfo));
11378	LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs;
11379
11380	/ Advance data_reference's with the number of iterations of the previous*
11381	loop and its prologue. /*
11382	vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR);
11383
11384
11385	/ The EPILOGUE loop is a copy of the original loop so they share the same*
11386	gimple UIDs. In this loop we update the loop_vec_info of the EPILOGUE to
11387	point to the copied statements. We also create a mapping of all LHS' in
11388	the original loop and all the LHS' in the EPILOGUE and create worklists to
11389	update teh STMT_VINFO_PATTERN_DEF_SEQs and STMT_VINFO_RELATED_STMTs. /*
11390	for (unsigned i = `0`; i < epilogue->num_nodes; ++i)
11391	{
11392	for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]);
11393	!gsi_end_p (i: epilogue_phi_gsi); gsi_next (i: &epilogue_phi_gsi))
11394	{
11395	new_stmt = epilogue_phi_gsi.phi ();
11396
11397	gcc_assert (gimple_uid (new_stmt) > `0`);
11398	stmt_vinfo
11399	= epilogue_vinfo->stmt_vec_infos [gimple_uid (g: new_stmt) - `1`];
11400
11401	orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
11402	STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
11403
11404	mapping.put (k: gimple_phi_result (gs: orig_stmt),
11405	v: gimple_phi_result (gs: new_stmt));
11406	/ PHI nodes can not have patterns or related statements. /
11407	gcc_assert (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) == NULL
11408	&& STMT_VINFO_RELATED_STMT (stmt_vinfo) == NULL);
11409	}
11410
11411	for (epilogue_gsi = gsi_start_bb (bb: epilogue_bbs[i]);
11412	!gsi_end_p (i: epilogue_gsi); gsi_next (i: &epilogue_gsi))
11413	{
11414	new_stmt = gsi_stmt (i: epilogue_gsi);
11415	if (is_gimple_debug (gs: new_stmt))
11416	continue;
11417
11418	gcc_assert (gimple_uid (new_stmt) > `0`);
11419	stmt_vinfo
11420	= epilogue_vinfo->stmt_vec_infos [gimple_uid (g: new_stmt) - `1`];
11421
11422	orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
11423	STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
11424
11425	if (tree old_lhs = gimple_get_lhs (orig_stmt))
11426	mapping.put (k: old_lhs, v: gimple_get_lhs (new_stmt));
11427
11428	if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo))
11429	{
11430	gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
11431	for (gimple_stmt_iterator gsi = gsi_start (seq);
11432	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
11433	stmt_worklist.safe_push (obj: gsi_stmt (i: gsi));
11434	}
11435
11436	related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
11437	if (related_vinfo != NULL && related_vinfo != stmt_vinfo)
11438	{
11439	gimple *stmt = STMT_VINFO_STMT (related_vinfo);
11440	stmt_worklist.safe_push (obj: stmt);
11441	/ Set BB such that the assert in*
11442	'get_initial_def_for_reduction' is able to determine that
11443	the BB of the related stmt is inside this loop. /*
11444	gimple_set_bb (stmt,
11445	gimple_bb (g: new_stmt));
11446	related_vinfo = STMT_VINFO_RELATED_STMT (related_vinfo);
11447	gcc_assert (related_vinfo == NULL
11448	\|\| related_vinfo == stmt_vinfo);
11449	}
11450	}
11451	}
11452
11453	/ The PATTERN_DEF_SEQs and RELATED_STMTs in the epilogue were constructed*
11454	using the original main loop and thus need to be updated to refer to the
11455	cloned variables used in the epilogue. /*
11456	for (unsigned i = `0`; i < stmt_worklist.length (); ++i)
11457	{
11458	gimple *stmt = stmt_worklist [i];
11459	tree *new_op;
11460
11461	for (unsigned j = `1`; j < gimple_num_ops (gs: stmt); ++j)
11462	{
11463	tree op = gimple_op (gs: stmt, i: j);
11464	if ((new_op = mapping.get(k: op)))
11465	gimple_set_op (gs: stmt, i: j, op: *new_op);
11466	else
11467	{
11468	/ PR92429: The last argument of simplify_replace_tree disables*
11469	folding when replacing arguments. This is required as
11470	otherwise you might end up with different statements than the
11471	ones analyzed in vect_loop_analyze, leading to different
11472	vectorization. /*
11473	op = simplify_replace_tree (op, NULL_TREE, NULL_TREE,
11474	&find_in_mapping, &mapping, do_fold: false);
11475	gimple_set_op (gs: stmt, i: j, op);
11476	}
11477	}
11478	}
11479
11480	struct data_reference *dr;
11481	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (epilogue_vinfo);
11482	FOR_EACH_VEC_ELT (datarefs, i, dr)
11483	{
11484	orig_stmt = DR_STMT (dr);
11485	gcc_assert (gimple_uid (orig_stmt) > `0`);
11486	stmt_vinfo = epilogue_vinfo->stmt_vec_infos [gimple_uid (g: orig_stmt) - `1`];
11487	/ Data references for gather loads and scatter stores do not use the*
11488	updated offset we set using ADVANCE. Instead we have to make sure the
11489	reference in the data references point to the corresponding copy of
11490	the original in the epilogue. Make sure to update both
11491	gather/scatters recognized by dataref analysis and also other
11492	refs that get_load_store_type classified as VMAT_GATHER_SCATTER. /*
11493	auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_info: stmt_vinfo);
11494	if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER
11495	\|\| STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo))
11496	{
11497	DR_REF (dr)
11498	= simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
11499	&find_in_mapping, &mapping);
11500	DR_BASE_ADDRESS (dr)
11501	= simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
11502	&find_in_mapping, &mapping);
11503	}
11504	DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
11505	stmt_vinfo->dr_aux.stmt = stmt_vinfo;
11506	/ The vector size of the epilogue is smaller than that of the main loop*
11507	so the alignment is either the same or lower. This means the dr will
11508	thus by definition be aligned. /*
11509	STMT_VINFO_DR_INFO (stmt_vinfo)->base_misaligned = false;
11510	}
11511
11512	epilogue_vinfo->shared->datarefs_copy.release ();
11513	epilogue_vinfo->shared->save_datarefs ();
11514	}
11515
11516	/ Function vect_transform_loop.*
11517
11518	The analysis phase has determined that the loop is vectorizable.
11519	Vectorize the loop - created vectorized stmts to replace the scalar
11520	stmts in the loop, and update the loop exit condition.
11521	Returns scalar epilogue loop if any. /*
11522
11523	class loop *
11524	vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
11525	{
11526	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
11527	class loop *epilogue = NULL;
11528	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
11529	int nbbs = loop->num_nodes;
11530	int i;
11531	tree niters_vector = NULL_TREE;
11532	tree step_vector = NULL_TREE;
11533	tree niters_vector_mult_vf = NULL_TREE;
11534	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
11535	unsigned int lowest_vf = constant_lower_bound (a: vf);
11536	gimple *stmt;
11537	bool check_profitability = false;
11538	unsigned int th;
11539	bool flat = maybe_flat_loop_profile (loop);
11540
11541	DUMP_VECT_SCOPE ("vec_transform_loop");
11542
11543	loop_vinfo->shared->check_datarefs ();
11544
11545	/ Use the more conservative vectorization threshold. If the number*
11546	of iterations is constant assume the cost check has been performed
11547	by our caller. If the threshold makes all loops profitable that
11548	run at least the (estimated) vectorization factor number of times
11549	checking is pointless, too. /*
11550	th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
11551	if (vect_apply_runtime_profitability_check_p (loop_vinfo))
11552	{
11553	if (dump_enabled_p ())
11554	dump_printf_loc (MSG_NOTE, vect_location,
11555	"Profitability threshold is %d loop iterations.\n",
11556	th);
11557	check_profitability = true;
11558	}
11559
11560	/ Make sure there exists a single-predecessor exit bb. Do this before*
11561	versioning. /*
11562	edge e = LOOP_VINFO_IV_EXIT (loop_vinfo);
11563	if (! single_pred_p (bb: e->dest))
11564	{
11565	split_loop_exit_edge (e, true);
11566	if (dump_enabled_p ())
11567	dump_printf (MSG_NOTE, "split exit edge\n");
11568	}
11569
11570	/ Version the loop first, if required, so the profitability check*
11571	comes first. /*
11572
11573	if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
11574	{
11575	class loop *sloop
11576	= vect_loop_versioning (loop_vinfo, loop_vectorized_call);
11577	sloop->force_vectorize = false;
11578	check_profitability = false;
11579	}
11580
11581	/ Make sure there exists a single-predecessor exit bb also on the*
11582	scalar loop copy. Do this after versioning but before peeling
11583	so CFG structure is fine for both scalar and if-converted loop
11584	to make slpeel_duplicate_current_defs_from_edges face matched
11585	loop closed PHI nodes on the exit. /*
11586	if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
11587	{
11588	e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
11589	if (! single_pred_p (bb: e->dest))
11590	{
11591	split_loop_exit_edge (e, true);
11592	if (dump_enabled_p ())
11593	dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
11594	}
11595	}
11596
11597	tree niters = vect_build_loop_niters (loop_vinfo);
11598	LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
11599	tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
11600	bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
11601	tree advance;
11602	drs_init_vec orig_drs_init;
11603
11604	epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
11605	&step_vector, &niters_vector_mult_vf, th,
11606	check_profitability, niters_no_overflow,
11607	&advance);
11608	if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)
11609	&& LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo).initialized_p ())
11610	{
11611	/ Ifcvt duplicates loop preheader, loop body and produces an basic*
11612	block after loop exit. We need to scale all that. /*
11613	basic_block preheader
11614	= loop_preheader_edge (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))->src;
11615	preheader->count
11616	= preheader->count.apply_probability
11617	(LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo));
11618	scale_loop_frequencies (LOOP_VINFO_SCALAR_LOOP (loop_vinfo),
11619	LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo));
11620	single_exit (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))->dest->count
11621	= preheader->count;
11622	}
11623
11624	if (niters_vector == NULL_TREE)
11625	{
11626	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
11627	&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
11628	&& known_eq (lowest_vf, vf))
11629	{
11630	niters_vector
11631	= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
11632	LOOP_VINFO_INT_NITERS (loop_vinfo) / lowest_vf);
11633	step_vector = build_one_cst (TREE_TYPE (niters));
11634	}
11635	else if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
11636	vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
11637	&step_vector, niters_no_overflow);
11638	else
11639	/ vect_do_peeling subtracted the number of peeled prologue*
11640	iterations from LOOP_VINFO_NITERS. /*
11641	vect_gen_vector_loop_niters (loop_vinfo, LOOP_VINFO_NITERS (loop_vinfo),
11642	&niters_vector, &step_vector,
11643	niters_no_overflow);
11644	}
11645
11646	/ 1) Make sure the loop header has exactly two entries*
11647	2) Make sure we have a preheader basic block. /*
11648
11649	gcc_assert (EDGE_COUNT (loop->header->preds) == `2`);
11650
11651	split_edge (loop_preheader_edge (loop));
11652
11653	if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
11654	/ This will deal with any possible peeling. /
11655	vect_prepare_for_masked_peels (loop_vinfo);
11656
11657	/ Schedule the SLP instances first, then handle loop vectorization*
11658	below. /*
11659	if (!loop_vinfo->slp_instances.is_empty ())
11660	{
11661	DUMP_VECT_SCOPE ("scheduling SLP instances");
11662	vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
11663	}
11664
11665	/ FORNOW: the vectorizer supports only loops which body consist*
11666	of one basic block (header + empty latch). When the vectorizer will
11667	support more involved loop forms, the order by which the BBs are
11668	traversed need to be reconsidered. /*
11669
11670	for (i = `0`; i < nbbs; i++)
11671	{
11672	basic_block bb = bbs[i];
11673	stmt_vec_info stmt_info;
11674
11675	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
11676	gsi_next (i: &si))
11677	{
11678	gphi *phi = si.phi ();
11679	if (dump_enabled_p ())
11680	dump_printf_loc (MSG_NOTE, vect_location,
11681	"------>vectorizing phi: %G", (gimple *) phi);
11682	stmt_info = loop_vinfo->lookup_stmt (phi);
11683	if (!stmt_info)
11684	continue;
11685
11686	if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
11687	vect_loop_kill_debug_uses (loop, stmt_info);
11688
11689	if (!STMT_VINFO_RELEVANT_P (stmt_info)
11690	&& !STMT_VINFO_LIVE_P (stmt_info))
11691	continue;
11692
11693	if (STMT_VINFO_VECTYPE (stmt_info)
11694	&& (maybe_ne
11695	(a: TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), b: vf))
11696	&& dump_enabled_p ())
11697	dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
11698
11699	if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
11700	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
11701	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
11702	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
11703	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence
11704	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
11705	&& ! PURE_SLP_STMT (stmt_info))
11706	{
11707	if (dump_enabled_p ())
11708	dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
11709	vect_transform_stmt (loop_vinfo, stmt_info, NULL, NULL, NULL);
11710	}
11711	}
11712
11713	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
11714	gsi_next (i: &si))
11715	{
11716	gphi *phi = si.phi ();
11717	stmt_info = loop_vinfo->lookup_stmt (phi);
11718	if (!stmt_info)
11719	continue;
11720
11721	if (!STMT_VINFO_RELEVANT_P (stmt_info)
11722	&& !STMT_VINFO_LIVE_P (stmt_info))
11723	continue;
11724
11725	if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
11726	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
11727	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
11728	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
11729	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
11730	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence)
11731	&& ! PURE_SLP_STMT (stmt_info))
11732	maybe_set_vectorized_backedge_value (loop_vinfo, def_stmt_info: stmt_info);
11733	}
11734
11735	for (gimple_stmt_iterator si = gsi_start_bb (bb);
11736	!gsi_end_p (i: si);)
11737	{
11738	stmt = gsi_stmt (i: si);
11739	/ During vectorization remove existing clobber stmts. /
11740	if (gimple_clobber_p (s: stmt))
11741	{
11742	unlink_stmt_vdef (stmt);
11743	gsi_remove (&si, true);
11744	release_defs (stmt);
11745	}
11746	else
11747	{
11748	/ Ignore vector stmts created in the outer loop. /
11749	stmt_info = loop_vinfo->lookup_stmt (stmt);
11750
11751	/ vector stmts created in the outer-loop during vectorization of*
11752	stmts in an inner-loop may not have a stmt_info, and do not
11753	need to be vectorized. /*
11754	stmt_vec_info seen_store = NULL;
11755	if (stmt_info)
11756	{
11757	if (STMT_VINFO_IN_PATTERN_P (stmt_info))
11758	{
11759	gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
11760	for (gimple_stmt_iterator subsi = gsi_start (seq&: def_seq);
11761	!gsi_end_p (i: subsi); gsi_next (i: &subsi))
11762	{
11763	stmt_vec_info pat_stmt_info
11764	= loop_vinfo->lookup_stmt (gsi_stmt (i: subsi));
11765	vect_transform_loop_stmt (loop_vinfo, stmt_info: pat_stmt_info,
11766	gsi: &si, seen_store: &seen_store);
11767	}
11768	stmt_vec_info pat_stmt_info
11769	= STMT_VINFO_RELATED_STMT (stmt_info);
11770	if (vect_transform_loop_stmt (loop_vinfo, stmt_info: pat_stmt_info,
11771	gsi: &si, seen_store: &seen_store))
11772	maybe_set_vectorized_backedge_value (loop_vinfo,
11773	def_stmt_info: pat_stmt_info);
11774	}
11775	else
11776	{
11777	if (vect_transform_loop_stmt (loop_vinfo, stmt_info, gsi: &si,
11778	seen_store: &seen_store))
11779	maybe_set_vectorized_backedge_value (loop_vinfo,
11780	def_stmt_info: stmt_info);
11781	}
11782	}
11783	gsi_next (i: &si);
11784	if (seen_store)
11785	{
11786	if (STMT_VINFO_GROUPED_ACCESS (seen_store))
11787	/ Interleaving. If IS_STORE is TRUE, the*
11788	vectorization of the interleaving chain was
11789	completed - free all the stores in the chain. /*
11790	vect_remove_stores (loop_vinfo,
11791	DR_GROUP_FIRST_ELEMENT (seen_store));
11792	else
11793	/ Free the attached stmt_vec_info and remove the stmt. /
11794	loop_vinfo->remove_stmt (stmt_info);
11795	}
11796	}
11797	}
11798
11799	/ Stub out scalar statements that must not survive vectorization.*
11800	Doing this here helps with grouped statements, or statements that
11801	are involved in patterns. /*
11802	for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
11803	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
11804	{
11805	gcall call = dyn_cast <gcall > (p: gsi_stmt (i: gsi));
11806	if (!call \|\| !gimple_call_internal_p (gs: call))
11807	continue;
11808	internal_fn ifn = gimple_call_internal_fn (gs: call);
11809	if (ifn == IFN_MASK_LOAD)
11810	{
11811	tree lhs = gimple_get_lhs (call);
11812	if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
11813	{
11814	tree zero = build_zero_cst (TREE_TYPE (lhs));
11815	gimple *new_stmt = gimple_build_assign (lhs, zero);
11816	gsi_replace (&gsi, new_stmt, true);
11817	}
11818	}
11819	else if (conditional_internal_fn_code (ifn) != ERROR_MARK)
11820	{
11821	tree lhs = gimple_get_lhs (call);
11822	if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
11823	{
11824	tree else_arg
11825	= gimple_call_arg (gs: call, index: gimple_call_num_args (gs: call) - `1`);
11826	gimple *new_stmt = gimple_build_assign (lhs, else_arg);
11827	gsi_replace (&gsi, new_stmt, true);
11828	}
11829	}
11830	}
11831	} / BBs in loop /
11832
11833	/ The vectorization factor is always > 1, so if we use an IV increment of 1.*
11834	a zero NITERS becomes a nonzero NITERS_VECTOR. /*
11835	if (integer_onep (step_vector))
11836	niters_no_overflow = true;
11837	vect_set_loop_condition (loop, LOOP_VINFO_IV_EXIT (loop_vinfo), loop_vinfo,
11838	niters_vector, step_vector, niters_vector_mult_vf,
11839	!niters_no_overflow);
11840
11841	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
11842
11843	/ True if the final iteration might not handle a full vector's*
11844	worth of scalar iterations. /*
11845	bool final_iter_may_be_partial
11846	= LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo);
11847	/ The minimum number of iterations performed by the epilogue. This*
11848	is 1 when peeling for gaps because we always need a final scalar
11849	iteration. /*
11850	int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? `1` : `0`;
11851	/ +1 to convert latch counts to loop iteration counts,*
11852	-min_epilogue_iters to remove iterations that cannot be performed
11853	by the vector code. /*
11854	int bias_for_lowest = `1` - min_epilogue_iters;
11855	int bias_for_assumed = bias_for_lowest;
11856	int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
11857	if (alignment_npeels && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
11858	{
11859	/ When the amount of peeling is known at compile time, the first*
11860	iteration will have exactly alignment_npeels active elements.
11861	In the worst case it will have at least one. /*
11862	int min_first_active = (alignment_npeels > `0` ? alignment_npeels : `1`);
11863	bias_for_lowest += lowest_vf - min_first_active;
11864	bias_for_assumed += assumed_vf - min_first_active;
11865	}
11866	/ In these calculations the "- 1" converts loop iteration counts*
11867	back to latch counts. /*
11868	if (loop->any_upper_bound)
11869	{
11870	loop_vec_info main_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
11871	loop->nb_iterations_upper_bound
11872	= (final_iter_may_be_partial
11873	? wi::udiv_ceil (x: loop->nb_iterations_upper_bound + bias_for_lowest,
11874	y: lowest_vf) - `1`
11875	: wi::udiv_floor (x: loop->nb_iterations_upper_bound + bias_for_lowest,
11876	y: lowest_vf) - `1`);
11877	if (main_vinfo
11878	/ Both peeling for alignment and peeling for gaps can end up*
11879	with the scalar epilogue running for more than VF-1 iterations. /*
11880	&& !main_vinfo->peeling_for_alignment
11881	&& !main_vinfo->peeling_for_gaps)
11882	{
11883	unsigned int bound;
11884	poly_uint64 main_iters
11885	= upper_bound (LOOP_VINFO_VECT_FACTOR (main_vinfo),
11886	LOOP_VINFO_COST_MODEL_THRESHOLD (main_vinfo));
11887	main_iters
11888	= upper_bound (a: main_iters,
11889	LOOP_VINFO_VERSIONING_THRESHOLD (main_vinfo));
11890	if (can_div_away_from_zero_p (a: main_iters,
11891	LOOP_VINFO_VECT_FACTOR (loop_vinfo),
11892	quotient: &bound))
11893	loop->nb_iterations_upper_bound
11894	= wi::umin (x: (bound_wide_int) (bound - `1`),
11895	y: loop->nb_iterations_upper_bound);
11896	}
11897	}
11898	if (loop->any_likely_upper_bound)
11899	loop->nb_iterations_likely_upper_bound
11900	= (final_iter_may_be_partial
11901	? wi::udiv_ceil (x: loop->nb_iterations_likely_upper_bound
11902	+ bias_for_lowest, y: lowest_vf) - `1`
11903	: wi::udiv_floor (x: loop->nb_iterations_likely_upper_bound
11904	+ bias_for_lowest, y: lowest_vf) - `1`);
11905	if (loop->any_estimate)
11906	loop->nb_iterations_estimate
11907	= (final_iter_may_be_partial
11908	? wi::udiv_ceil (x: loop->nb_iterations_estimate + bias_for_assumed,
11909	y: assumed_vf) - `1`
11910	: wi::udiv_floor (x: loop->nb_iterations_estimate + bias_for_assumed,
11911	y: assumed_vf) - `1`);
11912	scale_profile_for_vect_loop (loop, LOOP_VINFO_IV_EXIT (loop_vinfo),
11913	vf: assumed_vf, flat);
11914
11915	if (dump_enabled_p ())
11916	{
11917	if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
11918	{
11919	dump_printf_loc (MSG_NOTE, vect_location,
11920	"LOOP VECTORIZED\n");
11921	if (loop->inner)
11922	dump_printf_loc (MSG_NOTE, vect_location,
11923	"OUTER LOOP VECTORIZED\n");
11924	dump_printf (MSG_NOTE, "\n");
11925	}
11926	else
11927	dump_printf_loc (MSG_NOTE, vect_location,
11928	"LOOP EPILOGUE VECTORIZED (MODE=%s)\n",
11929	GET_MODE_NAME (loop_vinfo->vector_mode));
11930	}
11931
11932	/ Loops vectorized with a variable factor won't benefit from*
11933	unrolling/peeling. /*
11934	if (!vf.is_constant ())
11935	{
11936	loop->unroll = `1`;
11937	if (dump_enabled_p ())
11938	dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
11939	" variable-length vectorization factor\n");
11940	}
11941	/ Free SLP instances here because otherwise stmt reference counting*
11942	won't work. /*
11943	slp_instance instance;
11944	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
11945	vect_free_slp_instance (instance);
11946	LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
11947	/ Clear-up safelen field since its value is invalid after vectorization*
11948	since vectorized loop can have loop-carried dependencies. /*
11949	loop->safelen = `0`;
11950
11951	if (epilogue)
11952	{
11953	update_epilogue_loop_vinfo (epilogue, advance);
11954
11955	epilogue->simduid = loop->simduid;
11956	epilogue->force_vectorize = loop->force_vectorize;
11957	epilogue->dont_vectorize = false;
11958	}
11959
11960	return epilogue;
11961	}
11962
11963	/ The code below is trying to perform simple optimization - revert*
11964	if-conversion for masked stores, i.e. if the mask of a store is zero
11965	do not perform it and all stored value producers also if possible.
11966	For example,
11967	for (i=0; i<n; i++)
11968	if (c[i])
11969	{
11970	p1[i] += 1;
11971	p2[i] = p3[i] +2;
11972	}
11973	this transformation will produce the following semi-hammock:
11974
11975	if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
11976	{
11977	vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
11978	vect__12.22_172 = vect__11.19_170 + vect_cst__171;
11979	MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
11980	vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
11981	vect__19.28_184 = vect__18.25_182 + vect_cst__183;
11982	MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
11983	}
11984	*/
11985
11986	void
11987	optimize_mask_stores (class loop *loop)
11988	{
11989	basic_block *bbs = get_loop_body (loop);
11990	unsigned nbbs = loop->num_nodes;
11991	unsigned i;
11992	basic_block bb;
11993	class loop *bb_loop;
11994	gimple_stmt_iterator gsi;
11995	gimple *stmt;
11996	auto_vec<gimple *> worklist;
11997	auto_purge_vect_location sentinel;
11998
11999	vect_location = find_loop_location (loop);
12000	/ Pick up all masked stores in loop if any. /
12001	for (i = `0`; i < nbbs; i++)
12002	{
12003	bb = bbs[i];
12004	for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
12005	gsi_next (i: &gsi))
12006	{
12007	stmt = gsi_stmt (i: gsi);
12008	if (gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
12009	worklist.safe_push (obj: stmt);
12010	}
12011	}
12012
12013	free (ptr: bbs);
12014	if (worklist.is_empty ())
12015	return;
12016
12017	/ Loop has masked stores. /
12018	while (!worklist.is_empty ())
12019	{
12020	gimple last, last_store;
12021	edge e, efalse;
12022	tree mask;
12023	basic_block store_bb, join_bb;
12024	gimple_stmt_iterator gsi_to;
12025	tree vdef, new_vdef;
12026	gphi *phi;
12027	tree vectype;
12028	tree zero;
12029
12030	last = worklist.pop ();
12031	mask = gimple_call_arg (gs: last, index: `2`);
12032	bb = gimple_bb (g: last);
12033	/ Create then_bb and if-then structure in CFG, then_bb belongs to*
12034	the same loop as if_bb. It could be different to LOOP when two
12035	level loop-nest is vectorized and mask_store belongs to the inner
12036	one. /*
12037	e = split_block (bb, last);
12038	bb_loop = bb->loop_father;
12039	gcc_assert (loop == bb_loop \|\| flow_loop_nested_p (loop, bb_loop));
12040	join_bb = e->dest;
12041	store_bb = create_empty_bb (bb);
12042	add_bb_to_loop (store_bb, bb_loop);
12043	e->flags = EDGE_TRUE_VALUE;
12044	efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
12045	/ Put STORE_BB to likely part. /
12046	efalse->probability = profile_probability::likely ();
12047	e->probability = efalse->probability.invert ();
12048	store_bb->count = efalse->count ();
12049	make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
12050	if (dom_info_available_p (CDI_DOMINATORS))
12051	set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
12052	if (dump_enabled_p ())
12053	dump_printf_loc (MSG_NOTE, vect_location,
12054	"Create new block %d to sink mask stores.",
12055	store_bb->index);
12056	/ Create vector comparison with boolean result. /
12057	vectype = TREE_TYPE (mask);
12058	zero = build_zero_cst (vectype);
12059	stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
12060	gsi = gsi_last_bb (bb);
12061	gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
12062	/ Create new PHI node for vdef of the last masked store:*
12063	.MEM_2 = VDEF <.MEM_1>
12064	will be converted to
12065	.MEM.3 = VDEF <.MEM_1>
12066	and new PHI node will be created in join bb
12067	.MEM_2 = PHI <.MEM_1, .MEM_3>
12068	*/
12069	vdef = gimple_vdef (g: last);
12070	new_vdef = make_ssa_name (var: gimple_vop (cfun), stmt: last);
12071	gimple_set_vdef (g: last, vdef: new_vdef);
12072	phi = create_phi_node (vdef, join_bb);
12073	add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, `0`), UNKNOWN_LOCATION);
12074
12075	/ Put all masked stores with the same mask to STORE_BB if possible. /
12076	while (true)
12077	{
12078	gimple_stmt_iterator gsi_from;
12079	gimple *stmt1 = NULL;
12080
12081	/ Move masked store to STORE_BB. /
12082	last_store = last;
12083	gsi = gsi_for_stmt (last);
12084	gsi_from = gsi;
12085	/ Shift GSI to the previous stmt for further traversal. /
12086	gsi_prev (i: &gsi);
12087	gsi_to = gsi_start_bb (bb: store_bb);
12088	gsi_move_before (&gsi_from, &gsi_to);
12089	/ Setup GSI_TO to the non-empty block start. /
12090	gsi_to = gsi_start_bb (bb: store_bb);
12091	if (dump_enabled_p ())
12092	dump_printf_loc (MSG_NOTE, vect_location,
12093	"Move stmt to created bb\n%G", last);
12094	/ Move all stored value producers if possible. /
12095	while (!gsi_end_p (i: gsi))
12096	{
12097	tree lhs;
12098	imm_use_iterator imm_iter;
12099	use_operand_p use_p;
12100	bool res;
12101
12102	/ Skip debug statements. /
12103	if (is_gimple_debug (gs: gsi_stmt (i: gsi)))
12104	{
12105	gsi_prev (i: &gsi);
12106	continue;
12107	}
12108	stmt1 = gsi_stmt (i: gsi);
12109	/ Do not consider statements writing to memory or having*
12110	volatile operand. /*
12111	if (gimple_vdef (g: stmt1)
12112	\|\| gimple_has_volatile_ops (stmt: stmt1))
12113	break;
12114	gsi_from = gsi;
12115	gsi_prev (i: &gsi);
12116	lhs = gimple_get_lhs (stmt1);
12117	if (!lhs)
12118	break;
12119
12120	/ LHS of vectorized stmt must be SSA_NAME. /
12121	if (TREE_CODE (lhs) != SSA_NAME)
12122	break;
12123
12124	if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
12125	{
12126	/ Remove dead scalar statement. /
12127	if (has_zero_uses (var: lhs))
12128	{
12129	gsi_remove (&gsi_from, true);
12130	continue;
12131	}
12132	}
12133
12134	/ Check that LHS does not have uses outside of STORE_BB. /
12135	res = true;
12136	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
12137	{
12138	gimple *use_stmt;
12139	use_stmt = USE_STMT (use_p);
12140	if (is_gimple_debug (gs: use_stmt))
12141	continue;
12142	if (gimple_bb (g: use_stmt) != store_bb)
12143	{
12144	res = false;
12145	break;
12146	}
12147	}
12148	if (!res)
12149	break;
12150
12151	if (gimple_vuse (g: stmt1)
12152	&& gimple_vuse (g: stmt1) != gimple_vuse (g: last_store))
12153	break;
12154
12155	/ Can move STMT1 to STORE_BB. /
12156	if (dump_enabled_p ())
12157	dump_printf_loc (MSG_NOTE, vect_location,
12158	"Move stmt to created bb\n%G", stmt1);
12159	gsi_move_before (&gsi_from, &gsi_to);
12160	/ Shift GSI_TO for further insertion. /
12161	gsi_prev (i: &gsi_to);
12162	}
12163	/ Put other masked stores with the same mask to STORE_BB. /
12164	if (worklist.is_empty ()
12165	\|\| gimple_call_arg (gs: worklist.last (), index: `2`) != mask
12166	\|\| worklist.last () != stmt1)
12167	break;
12168	last = worklist.pop ();
12169	}
12170	add_phi_arg (phi, gimple_vuse (g: last_store), e, UNKNOWN_LOCATION);
12171	}
12172	}
12173
12174	/ Decide whether it is possible to use a zero-based induction variable*
12175	when vectorizing LOOP_VINFO with partial vectors. If it is, return
12176	the value that the induction variable must be able to hold in order
12177	to ensure that the rgroups eventually have no active vector elements.
12178	Return -1 otherwise. /*
12179
12180	widest_int
12181	vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo)
12182	{
12183	tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
12184	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
12185	unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
12186
12187	/ Calculate the value that the induction variable must be able*
12188	to hit in order to ensure that we end the loop with an all-false mask.
12189	This involves adding the maximum number of inactive trailing scalar
12190	iterations. /*
12191	widest_int iv_limit = -`1`;
12192	if (max_loop_iterations (loop, &iv_limit))
12193	{
12194	if (niters_skip)
12195	{
12196	/ Add the maximum number of skipped iterations to the*
12197	maximum iteration count. /*
12198	if (TREE_CODE (niters_skip) == INTEGER_CST)
12199	iv_limit += wi::to_widest (t: niters_skip);
12200	else
12201	iv_limit += max_vf - `1`;
12202	}
12203	else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
12204	/ Make a conservatively-correct assumption. /
12205	iv_limit += max_vf - `1`;
12206
12207	/ IV_LIMIT is the maximum number of latch iterations, which is also*
12208	the maximum in-range IV value. Round this value down to the previous
12209	vector alignment boundary and then add an extra full iteration. /*
12210	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
12211	iv_limit = (iv_limit & -(int) known_alignment (a: vf)) + max_vf;
12212	}
12213	return iv_limit;
12214	}
12215
12216	/ For the given rgroup_controls RGC, check whether an induction variable*
12217	would ever hit a value that produces a set of all-false masks or zero
12218	lengths before wrapping around. Return true if it's possible to wrap
12219	around before hitting the desirable value, otherwise return false. /*
12220
12221	bool
12222	vect_rgroup_iv_might_wrap_p (loop_vec_info loop_vinfo, rgroup_controls *rgc)
12223	{
12224	widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
12225
12226	if (iv_limit == -`1`)
12227	return true;
12228
12229	tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
12230	unsigned int compare_precision = TYPE_PRECISION (compare_type);
12231	unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
12232
12233	if (wi::min_precision (x: iv_limit * nitems, sgn: UNSIGNED) > compare_precision)
12234	return true;
12235
12236	return false;
12237	}
12238

source code of gcc/tree-vect-loop.cc