tree-vect-loop-manip.cc source code [gcc/tree-vect-loop-manip.cc]

1	/ Vectorizer Specific Loop Manipulations*
2	Copyright (C) 2003-2023 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com>
4	and Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#include "config.h"
23	#include "system.h"
24	#include "coretypes.h"
25	#include "backend.h"
26	#include "tree.h"
27	#include "gimple.h"
28	#include "cfghooks.h"
29	#include "tree-pass.h"
30	#include "ssa.h"
31	#include "fold-const.h"
32	#include "cfganal.h"
33	#include "gimplify.h"
34	#include "gimple-iterator.h"
35	#include "gimplify-me.h"
36	#include "tree-cfg.h"
37	#include "tree-ssa-loop-manip.h"
38	#include "tree-into-ssa.h"
39	#include "tree-ssa.h"
40	#include "cfgloop.h"
41	#include "tree-scalar-evolution.h"
42	#include "tree-vectorizer.h"
43	#include "tree-ssa-loop-ivopts.h"
44	#include "gimple-fold.h"
45	#include "tree-ssa-loop-niter.h"
46	#include "internal-fn.h"
47	#include "stor-layout.h"
48	#include "optabs-query.h"
49	#include "vec-perm-indices.h"
50	#include "insn-config.h"
51	#include "rtl.h"
52	#include "recog.h"
53	#include "langhooks.h"
54	#include "tree-vector-builder.h"
55	#include "optabs-tree.h"
56
57	/*************************************************************************
58	Simple Loop Peeling Utilities
59
60	Utilities to support loop peeling for vectorization purposes.
61	*************************************************************************/
62
63
64	/ Renames the use OP_P. /*
65
66	static void
67	rename_use_op (use_operand_p op_p)
68	{
69	tree new_name;
70
71	if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
72	return;
73
74	new_name = get_current_def (USE_FROM_PTR (op_p));
75
76	/ Something defined outside of the loop. /
77	if (!new_name)
78	return;
79
80	/ An ordinary ssa name defined in the loop. /
81
82	SET_USE (op_p, new_name);
83	}
84
85
86	/ Renames the variables in basic block BB. Allow renaming of PHI arguments*
87	on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
88	true. /*
89
90	static void
91	rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
92	{
93	gimple *stmt;
94	use_operand_p use_p;
95	ssa_op_iter iter;
96	edge e;
97	edge_iterator ei;
98	class loop *loop = bb->loop_father;
99	class loop *outer_loop = NULL;
100
101	if (rename_from_outer_loop)
102	{
103	gcc_assert (loop);
104	outer_loop = loop_outer (loop);
105	}
106
107	for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
108	gsi_next (i: &gsi))
109	{
110	stmt = gsi_stmt (i: gsi);
111	FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
112	rename_use_op (op_p: use_p);
113	}
114
115	FOR_EACH_EDGE (e, ei, bb->preds)
116	{
117	if (!flow_bb_inside_loop_p (loop, e->src))
118	{
119	if (!rename_from_outer_loop)
120	continue;
121	if (e->src != outer_loop->header)
122	{
123	if (outer_loop->inner->next)
124	{
125	/ If outer_loop has 2 inner loops, allow there to*
126	be an extra basic block which decides which of the
127	two loops to use using LOOP_VECTORIZED. /*
128	if (!single_pred_p (bb: e->src)
129	\|\| single_pred (bb: e->src) != outer_loop->header)
130	continue;
131	}
132	}
133	}
134	for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
135	gsi_next (i: &gsi))
136	rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
137	}
138	}
139
140
141	struct adjust_info
142	{
143	tree from, to;
144	basic_block bb;
145	};
146
147	/ A stack of values to be adjusted in debug stmts. We have to*
148	process them LIFO, so that the closest substitution applies. If we
149	processed them FIFO, without the stack, we might substitute uses
150	with a PHI DEF that would soon become non-dominant, and when we got
151	to the suitable one, it wouldn't have anything to substitute any
152	more. /*
153	static vec<adjust_info, va_heap> adjust_vec;
154
155	/ Adjust any debug stmts that referenced AI->from values to use the*
156	loop-closed AI->to, if the references are dominated by AI->bb and
157	not by the definition of AI->from. /*
158
159	static void
160	adjust_debug_stmts_now (adjust_info *ai)
161	{
162	basic_block bbphi = ai->bb;
163	tree orig_def = ai->from;
164	tree new_def = ai->to;
165	imm_use_iterator imm_iter;
166	gimple *stmt;
167	basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
168
169	gcc_assert (dom_info_available_p (CDI_DOMINATORS));
170
171	/ Adjust any debug stmts that held onto non-loop-closed*
172	references. /*
173	FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
174	{
175	use_operand_p use_p;
176	basic_block bbuse;
177
178	if (!is_gimple_debug (gs: stmt))
179	continue;
180
181	gcc_assert (gimple_debug_bind_p (stmt));
182
183	bbuse = gimple_bb (g: stmt);
184
185	if ((bbuse == bbphi
186	\|\| dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
187	&& !(bbuse == bbdef
188	\|\| dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
189	{
190	if (new_def)
191	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
192	SET_USE (use_p, new_def);
193	else
194	{
195	gimple_debug_bind_reset_value (dbg: stmt);
196	update_stmt (s: stmt);
197	}
198	}
199	}
200	}
201
202	/ Adjust debug stmts as scheduled before. /
203
204	static void
205	adjust_vec_debug_stmts (void)
206	{
207	if (!MAY_HAVE_DEBUG_BIND_STMTS)
208	return;
209
210	gcc_assert (adjust_vec.exists ());
211
212	while (!adjust_vec.is_empty ())
213	{
214	adjust_debug_stmts_now (ai: &adjust_vec.last ());
215	adjust_vec.pop ();
216	}
217	}
218
219	/ Adjust any debug stmts that referenced FROM values to use the*
220	loop-closed TO, if the references are dominated by BB and not by
221	the definition of FROM. If adjust_vec is non-NULL, adjustments
222	will be postponed until adjust_vec_debug_stmts is called. /*
223
224	static void
225	adjust_debug_stmts (tree from, tree to, basic_block bb)
226	{
227	adjust_info ai;
228
229	if (MAY_HAVE_DEBUG_BIND_STMTS
230	&& TREE_CODE (from) == SSA_NAME
231	&& ! SSA_NAME_IS_DEFAULT_DEF (from)
232	&& ! virtual_operand_p (op: from))
233	{
234	ai.from = from;
235	ai.to = to;
236	ai.bb = bb;
237
238	if (adjust_vec.exists ())
239	adjust_vec.safe_push (obj: ai);
240	else
241	adjust_debug_stmts_now (ai: &ai);
242	}
243	}
244
245	/ Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information*
246	to adjust any debug stmts that referenced the old phi arg,
247	presumably non-loop-closed references left over from other
248	transformations. /*
249
250	static void
251	adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
252	{
253	tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
254
255	gcc_assert (TREE_CODE (orig_def) != SSA_NAME
256	\|\| orig_def != new_def);
257
258	SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
259
260	if (MAY_HAVE_DEBUG_BIND_STMTS)
261	adjust_debug_stmts (from: orig_def, PHI_RESULT (update_phi),
262	bb: gimple_bb (g: update_phi));
263	}
264
265	/ Define one loop rgroup control CTRL from loop LOOP. INIT_CTRL is the value*
266	that the control should have during the first iteration and NEXT_CTRL is the
267	value that it should have on subsequent iterations. /*
268
269	static void
270	vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
271	tree next_ctrl)
272	{
273	gphi *phi = create_phi_node (ctrl, loop->header);
274	add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
275	add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
276	}
277
278	/ Add SEQ to the end of LOOP's preheader block. /
279
280	static void
281	add_preheader_seq (class loop *loop, gimple_seq seq)
282	{
283	if (seq)
284	{
285	edge pe = loop_preheader_edge (loop);
286	basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
287	gcc_assert (!new_bb);
288	}
289	}
290
291	/ Add SEQ to the beginning of LOOP's header block. /
292
293	static void
294	add_header_seq (class loop *loop, gimple_seq seq)
295	{
296	if (seq)
297	{
298	gimple_stmt_iterator gsi = gsi_after_labels (bb: loop->header);
299	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
300	}
301	}
302
303	/ Return true if the target can interleave elements of two vectors.*
304	OFFSET is 0 if the first half of the vectors should be interleaved
305	or 1 if the second half should. When returning true, store the
306	associated permutation in INDICES. /*
307
308	static bool
309	interleave_supported_p (vec_perm_indices *indices, tree vectype,
310	unsigned int offset)
311	{
312	poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (node: vectype);
313	poly_uint64 base = exact_div (a: nelts, b: `2`) * offset;
314	vec_perm_builder sel (nelts, `2`, `3`);
315	for (unsigned int i = `0`; i < `3`; ++i)
316	{
317	sel.quick_push (obj: base + i);
318	sel.quick_push (obj: base + i + nelts);
319	}
320	indices->new_vector (sel, `2`, nelts);
321	return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
322	*indices);
323	}
324
325	/ Try to use permutes to define the masks in DEST_RGM using the masks*
326	in SRC_RGM, given that the former has twice as many masks as the
327	latter. Return true on success, adding any new statements to SEQ. /*
328
329	static bool
330	vect_maybe_permute_loop_masks (gimple_seq seq, rgroup_controls dest_rgm,
331	rgroup_controls *src_rgm)
332	{
333	tree src_masktype = src_rgm->type;
334	tree dest_masktype = dest_rgm->type;
335	machine_mode src_mode = TYPE_MODE (src_masktype);
336	insn_code icode1, icode2;
337	if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
338	&& (icode1 = optab_handler (op: vec_unpacku_hi_optab,
339	mode: src_mode)) != CODE_FOR_nothing
340	&& (icode2 = optab_handler (op: vec_unpacku_lo_optab,
341	mode: src_mode)) != CODE_FOR_nothing)
342	{
343	/ Unpacking the source masks gives at least as many mask bits as*
344	we need. We can then VIEW_CONVERT any excess bits away. /*
345	machine_mode dest_mode = insn_data[icode1].operand[`0`].mode;
346	gcc_assert (dest_mode == insn_data[icode2].operand[`0`].mode);
347	tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
348	for (unsigned int i = `0`; i < dest_rgm->controls.length (); ++i)
349	{
350	tree src = src_rgm->controls [i / `2`];
351	tree dest = dest_rgm->controls [i];
352	tree_code code = ((i & `1`) == (BYTES_BIG_ENDIAN ? `0` : `1`)
353	? VEC_UNPACK_HI_EXPR
354	: VEC_UNPACK_LO_EXPR);
355	gassign *stmt;
356	if (dest_masktype == unpack_masktype)
357	stmt = gimple_build_assign (dest, code, src);
358	else
359	{
360	tree temp = make_ssa_name (var: unpack_masktype);
361	stmt = gimple_build_assign (temp, code, src);
362	gimple_seq_add_stmt (seq, stmt);
363	stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
364	build1 (VIEW_CONVERT_EXPR,
365	dest_masktype, temp));
366	}
367	gimple_seq_add_stmt (seq, stmt);
368	}
369	return true;
370	}
371	vec_perm_indices indices[`2`];
372	if (dest_masktype == src_masktype
373	&& interleave_supported_p (indices: &indices[`0`], vectype: src_masktype, offset: `0`)
374	&& interleave_supported_p (indices: &indices[`1`], vectype: src_masktype, offset: `1`))
375	{
376	/ The destination requires twice as many mask bits as the source, so*
377	we can use interleaving permutes to double up the number of bits. /*
378	tree masks[`2`];
379	for (unsigned int i = `0`; i < `2`; ++i)
380	masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
381	for (unsigned int i = `0`; i < dest_rgm->controls.length (); ++i)
382	{
383	tree src = src_rgm->controls [i / `2`];
384	tree dest = dest_rgm->controls [i];
385	gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
386	src, src, masks[i & `1`]);
387	gimple_seq_add_stmt (seq, stmt);
388	}
389	return true;
390	}
391	return false;
392	}
393
394	/ Populate DEST_RGM->controls, given that they should add up to STEP.*
395
396	STEP = MIN_EXPR <ivtmp_34, VF>;
397
398	First length (MIN (X, VF/N)):
399	loop_len_15 = MIN_EXPR <STEP, VF/N>;
400
401	Second length:
402	tmp = STEP - loop_len_15;
403	loop_len_16 = MIN (tmp, VF/N);
404
405	Third length:
406	tmp2 = tmp - loop_len_16;
407	loop_len_17 = MIN (tmp2, VF/N);
408
409	Last length:
410	loop_len_18 = tmp2 - loop_len_17;
411	*/
412
413	static void
414	vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
415	rgroup_controls *dest_rgm, tree step)
416	{
417	tree ctrl_type = dest_rgm->type;
418	poly_uint64 nitems_per_ctrl
419	= TYPE_VECTOR_SUBPARTS (node: ctrl_type) * dest_rgm->factor;
420	tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
421
422	for (unsigned int i = `0`; i < dest_rgm->controls.length (); ++i)
423	{
424	tree ctrl = dest_rgm->controls [i];
425	if (i == `0`)
426	{
427	/ First iteration: MIN (X, VF/N) capped to the range [0, VF/N]. /
428	gassign *assign
429	= gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
430	gimple_seq_add_stmt (seq, assign);
431	}
432	else if (i == dest_rgm->controls.length () - `1`)
433	{
434	/ Last iteration: Remain capped to the range [0, VF/N]. /
435	gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
436	dest_rgm->controls [i - `1`]);
437	gimple_seq_add_stmt (seq, assign);
438	}
439	else
440	{
441	/ (MIN (remain, VFI/N)) capped to the range [0, VF/N]. /*
442	step = gimple_build (seq, code: MINUS_EXPR, type: iv_type, ops: step,
443	ops: dest_rgm->controls [i - `1`]);
444	gassign *assign
445	= gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
446	gimple_seq_add_stmt (seq, assign);
447	}
448	}
449	}
450
451	/ Helper for vect_set_loop_condition_partial_vectors. Generate definitions*
452	for all the rgroup controls in RGC and return a control that is nonzero
453	when the loop needs to iterate. Add any new preheader statements to
454	PREHEADER_SEQ. Use LOOP_COND_GSI to insert code before the exit gcond.
455
456	RGC belongs to loop LOOP. The loop originally iterated NITERS
457	times and has been vectorized according to LOOP_VINFO.
458
459	If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
460	starts with NITERS_SKIP dummy iterations of the scalar loop before
461	the real work starts. The mask elements for these dummy iterations
462	must be 0, to ensure that the extra iterations do not have an effect.
463
464	It is known that:
465
466	NITERS RGC->max_nscalars_per_iter * RGC->factor*
467
468	does not overflow. However, MIGHT_WRAP_P says whether an induction
469	variable that starts at 0 and has step:
470
471	VF RGC->max_nscalars_per_iter * RGC->factor*
472
473	might overflow before hitting a value above:
474
475	(NITERS + NITERS_SKIP) RGC->max_nscalars_per_iter * RGC->factor*
476
477	This means that we cannot guarantee that such an induction variable
478	would ever hit a value that produces a set of all-false masks or zero
479	lengths for RGC.
480
481	Note: the cost of the code generated by this function is modeled
482	by vect_estimate_min_profitable_iters, so changes here may need
483	corresponding changes there. /*
484
485	static tree
486	vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
487	gimple_seq *preheader_seq,
488	gimple_seq *header_seq,
489	gimple_stmt_iterator loop_cond_gsi,
490	rgroup_controls *rgc, tree niters,
491	tree niters_skip, bool might_wrap_p,
492	tree iv_step, tree compare_step)
493	{
494	tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
495	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
496	bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
497
498	tree ctrl_type = rgc->type;
499	unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
500	poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (node: ctrl_type) * rgc->factor;
501	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
502	tree length_limit = NULL_TREE;
503	/ For length, we need length_limit to ensure length in range. /
504	if (!use_masks_p)
505	length_limit = build_int_cst (compare_type, nitems_per_ctrl);
506
507	/ Calculate the maximum number of item values that the rgroup*
508	handles in total, the number that it handles for each iteration
509	of the vector loop, and the number that it should skip during the
510	first iteration of the vector loop. /*
511	tree nitems_total = niters;
512	tree nitems_step = build_int_cst (iv_type, vf);
513	tree nitems_skip = niters_skip;
514	if (nitems_per_iter != `1`)
515	{
516	/ We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that*
517	these multiplications don't overflow. /*
518	tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
519	tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
520	nitems_total = gimple_build (seq: preheader_seq, code: MULT_EXPR, type: compare_type,
521	ops: nitems_total, ops: compare_factor);
522	nitems_step = gimple_build (seq: preheader_seq, code: MULT_EXPR, type: iv_type,
523	ops: nitems_step, ops: iv_factor);
524	if (nitems_skip)
525	nitems_skip = gimple_build (seq: preheader_seq, code: MULT_EXPR, type: compare_type,
526	ops: nitems_skip, ops: compare_factor);
527	}
528
529	/ Create an induction variable that counts the number of items*
530	processed. /*
531	tree index_before_incr, index_after_incr;
532	gimple_stmt_iterator incr_gsi;
533	bool insert_after;
534	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
535	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
536	{
537	/ Create an IV that counts down from niters_total and whose step*
538	is the (variable) amount processed in the current iteration:
539	...
540	_10 = (unsigned long) count_12(D);
541	...
542	# ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
543	_36 = (MIN_EXPR \| SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
544	...
545	vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
546	...
547	ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
548	...
549	if (ivtmp_9 > POLY_INT_CST [4, 4])
550	goto <bb 4>; [83.33%]
551	else
552	goto <bb 5>; [16.67%]
553	*/
554	nitems_total = gimple_convert (seq: preheader_seq, type: iv_type, op: nitems_total);
555	tree step = rgc->controls.length () == `1` ? rgc->controls [`0`]
556	: make_ssa_name (var: iv_type);
557	/ Create decrement IV. /
558	if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
559	{
560	create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
561	insert_after, &index_before_incr, &index_after_incr);
562	tree len = gimple_build (seq: header_seq, code: IFN_SELECT_VL, type: iv_type,
563	ops: index_before_incr, ops: nitems_step);
564	gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
565	}
566	else
567	{
568	create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
569	&incr_gsi, insert_after, &index_before_incr,
570	&index_after_incr);
571	gimple_seq_add_stmt (header_seq,
572	gimple_build_assign (step, MIN_EXPR,
573	index_before_incr,
574	nitems_step));
575	}
576	*iv_step = step;
577	*compare_step = nitems_step;
578	return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
579	: index_before_incr;
580	}
581
582	/ Create increment IV. /
583	create_iv (build_int_cst (iv_type, `0`), PLUS_EXPR, nitems_step, NULL_TREE,
584	loop, &incr_gsi, insert_after, &index_before_incr,
585	&index_after_incr);
586
587	tree zero_index = build_int_cst (compare_type, `0`);
588	tree test_index, test_limit, first_limit;
589	gimple_stmt_iterator *test_gsi;
590	if (might_wrap_p)
591	{
592	/ In principle the loop should stop iterating once the incremented*
593	IV reaches a value greater than or equal to:
594
595	NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
596
597	However, there's no guarantee that this addition doesn't overflow
598	the comparison type, or that the IV hits a value above it before
599	wrapping around. We therefore adjust the limit down by one
600	IV step:
601
602	(NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
603	-[infinite-prec] NITEMS_STEP
604
605	and compare the IV against this limit _before_ incrementing it.
606	Since the comparison type is unsigned, we actually want the
607	subtraction to saturate at zero:
608
609	(NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
610	-[sat] NITEMS_STEP
611
612	And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
613
614	NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
615
616	where the rightmost subtraction can be done directly in
617	COMPARE_TYPE. /*
618	test_index = index_before_incr;
619	tree adjust = gimple_convert (seq: preheader_seq, type: compare_type,
620	op: nitems_step);
621	if (nitems_skip)
622	adjust = gimple_build (seq: preheader_seq, code: MINUS_EXPR, type: compare_type,
623	ops: adjust, ops: nitems_skip);
624	test_limit = gimple_build (seq: preheader_seq, code: MAX_EXPR, type: compare_type,
625	ops: nitems_total, ops: adjust);
626	test_limit = gimple_build (seq: preheader_seq, code: MINUS_EXPR, type: compare_type,
627	ops: test_limit, ops: adjust);
628	test_gsi = &incr_gsi;
629
630	/ Get a safe limit for the first iteration. /
631	if (nitems_skip)
632	{
633	/ The first vector iteration can handle at most NITEMS_STEP*
634	items. NITEMS_STEP <= CONST_LIMIT, and adding
635	NITEMS_SKIP to that cannot overflow. /*
636	tree const_limit = build_int_cst (compare_type,
637	LOOP_VINFO_VECT_FACTOR (loop_vinfo)
638	* nitems_per_iter);
639	first_limit = gimple_build (seq: preheader_seq, code: MIN_EXPR, type: compare_type,
640	ops: nitems_total, ops: const_limit);
641	first_limit = gimple_build (seq: preheader_seq, code: PLUS_EXPR, type: compare_type,
642	ops: first_limit, ops: nitems_skip);
643	}
644	else
645	/ For the first iteration it doesn't matter whether the IV hits*
646	a value above NITEMS_TOTAL. That only matters for the latch
647	condition. /*
648	first_limit = nitems_total;
649	}
650	else
651	{
652	/ Test the incremented IV, which will always hit a value above*
653	the bound before wrapping. /*
654	test_index = index_after_incr;
655	test_limit = nitems_total;
656	if (nitems_skip)
657	test_limit = gimple_build (seq: preheader_seq, code: PLUS_EXPR, type: compare_type,
658	ops: test_limit, ops: nitems_skip);
659	test_gsi = &loop_cond_gsi;
660
661	first_limit = test_limit;
662	}
663
664	/ Convert the IV value to the comparison type (either a no-op or*
665	a demotion). /*
666	gimple_seq test_seq = NULL;
667	test_index = gimple_convert (seq: &test_seq, type: compare_type, op: test_index);
668	gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
669
670	/ Provide a definition of each control in the group. /
671	tree next_ctrl = NULL_TREE;
672	tree ctrl;
673	unsigned int i;
674	FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
675	{
676	/ Previous controls will cover BIAS items. This control covers the*
677	next batch. /*
678	poly_uint64 bias = nitems_per_ctrl * i;
679	tree bias_tree = build_int_cst (compare_type, bias);
680
681	/ See whether the first iteration of the vector loop is known*
682	to have a full control. /*
683	poly_uint64 const_limit;
684	bool first_iteration_full
685	= (poly_int_tree_p (t: first_limit, value: &const_limit)
686	&& known_ge (const_limit, (i + `1`) * nitems_per_ctrl));
687
688	/ Rather than have a new IV that starts at BIAS and goes up to*
689	TEST_LIMIT, prefer to use the same 0-based IV for each control
690	and adjust the bound down by BIAS. /*
691	tree this_test_limit = test_limit;
692	if (i != `0`)
693	{
694	this_test_limit = gimple_build (seq: preheader_seq, code: MAX_EXPR,
695	type: compare_type, ops: this_test_limit,
696	ops: bias_tree);
697	this_test_limit = gimple_build (seq: preheader_seq, code: MINUS_EXPR,
698	type: compare_type, ops: this_test_limit,
699	ops: bias_tree);
700	}
701
702	/ Create the initial control. First include all items that*
703	are within the loop limit. /*
704	tree init_ctrl = NULL_TREE;
705	if (!first_iteration_full)
706	{
707	tree start, end;
708	if (first_limit == test_limit)
709	{
710	/ Use a natural test between zero (the initial IV value)*
711	and the loop limit. The "else" block would be valid too,
712	but this choice can avoid the need to load BIAS_TREE into
713	a register. /*
714	start = zero_index;
715	end = this_test_limit;
716	}
717	else
718	{
719	/ FIRST_LIMIT is the maximum number of items handled by the*
720	first iteration of the vector loop. Test the portion
721	associated with this control. /*
722	start = bias_tree;
723	end = first_limit;
724	}
725
726	if (use_masks_p)
727	init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
728	start, end, "max_mask");
729	else
730	{
731	init_ctrl = make_temp_ssa_name (type: compare_type, NULL, name: "max_len");
732	gimple_seq seq = vect_gen_len (init_ctrl, start,
733	end, length_limit);
734	gimple_seq_add_seq (preheader_seq, seq);
735	}
736	}
737
738	/ Now AND out the bits that are within the number of skipped*
739	items. /*
740	poly_uint64 const_skip;
741	if (nitems_skip
742	&& !(poly_int_tree_p (t: nitems_skip, value: &const_skip)
743	&& known_le (const_skip, bias)))
744	{
745	gcc_assert (use_masks_p);
746	tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
747	bias_tree, nitems_skip);
748	if (init_ctrl)
749	init_ctrl = gimple_build (seq: preheader_seq, code: BIT_AND_EXPR, type: ctrl_type,
750	ops: init_ctrl, ops: unskipped_mask);
751	else
752	init_ctrl = unskipped_mask;
753	}
754
755	if (!init_ctrl)
756	{
757	/ First iteration is full. /
758	if (use_masks_p)
759	init_ctrl = build_minus_one_cst (ctrl_type);
760	else
761	init_ctrl = length_limit;
762	}
763
764	/ Get the control value for the next iteration of the loop. /
765	if (use_masks_p)
766	{
767	gimple_seq stmts = NULL;
768	next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
769	this_test_limit, "next_mask");
770	gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
771	}
772	else
773	{
774	next_ctrl = make_temp_ssa_name (type: compare_type, NULL, name: "next_len");
775	gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
776	length_limit);
777	gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
778	}
779
780	vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
781	}
782
783	int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
784	if (partial_load_bias != `0`)
785	{
786	tree adjusted_len = rgc->bias_adjusted_ctrl;
787	gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
788	rgc->controls [`0`],
789	build_int_cst
790	(TREE_TYPE (rgc->controls[`0`]),
791	partial_load_bias));
792	gimple_seq_add_stmt (header_seq, minus);
793	}
794
795	return next_ctrl;
796	}
797
798	/ Set up the iteration condition and rgroup controls for LOOP, given*
799	that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
800	loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
801	the number of iterations of the original scalar loop that should be
802	handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
803	for vect_set_loop_condition.
804
805	Insert the branch-back condition before LOOP_COND_GSI and return the
806	final gcond. /*
807
808	static gcond *
809	vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
810	loop_vec_info loop_vinfo, tree niters,
811	tree final_iv, bool niters_maybe_zero,
812	gimple_stmt_iterator loop_cond_gsi)
813	{
814	gimple_seq preheader_seq = NULL;
815	gimple_seq header_seq = NULL;
816
817	bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
818	tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
819	unsigned int compare_precision = TYPE_PRECISION (compare_type);
820	tree orig_niters = niters;
821
822	/ Type of the initial value of NITERS. /
823	tree ni_actual_type = TREE_TYPE (niters);
824	unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
825	tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
826	if (niters_skip)
827	niters_skip = gimple_convert (seq: &preheader_seq, type: compare_type, op: niters_skip);
828
829	/ Convert NITERS to the same size as the compare. /
830	if (compare_precision > ni_actual_precision
831	&& niters_maybe_zero)
832	{
833	/ We know that there is always at least one iteration, so if the*
834	count is zero then it must have wrapped. Cope with this by
835	subtracting 1 before the conversion and adding 1 to the result. /*
836	gcc_assert (TYPE_UNSIGNED (ni_actual_type));
837	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, type: ni_actual_type,
838	ops: niters, ops: build_minus_one_cst (ni_actual_type));
839	niters = gimple_convert (seq: &preheader_seq, type: compare_type, op: niters);
840	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, type: compare_type,
841	ops: niters, ops: build_one_cst (compare_type));
842	}
843	else
844	niters = gimple_convert (seq: &preheader_seq, type: compare_type, op: niters);
845
846	/ Iterate over all the rgroups and fill in their controls. We could use*
847	the first control from any rgroup for the loop condition; here we
848	arbitrarily pick the last. /*
849	tree test_ctrl = NULL_TREE;
850	tree iv_step = NULL_TREE;
851	tree compare_step = NULL_TREE;
852	rgroup_controls *rgc;
853	rgroup_controls iv_rgc = nullptr*;
854	unsigned int i;
855	auto_vec<rgroup_controls> *controls = use_masks_p
856	? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
857	: &LOOP_VINFO_LENS (loop_vinfo);
858	FOR_EACH_VEC_ELT (*controls, i, rgc)
859	if (!rgc->controls.is_empty ())
860	{
861	/ First try using permutes. This adds a single vector*
862	instruction to the loop for each mask, but needs no extra
863	loop invariants or IVs. /*
864	unsigned int nmasks = i + `1`;
865	if (use_masks_p && (nmasks & `1`) == `0`)
866	{
867	rgroup_controls half_rgc = &(controls)[nmasks / `2` - `1`];
868	if (!half_rgc->controls.is_empty ()
869	&& vect_maybe_permute_loop_masks (seq: &header_seq, dest_rgm: rgc, src_rgm: half_rgc))
870	continue;
871	}
872
873	if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
874	\|\| !iv_rgc
875	\|\| (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
876	!= rgc->max_nscalars_per_iter * rgc->factor))
877	{
878	/ See whether zero-based IV would ever generate all-false masks*
879	or zero length before wrapping around. /*
880	bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
881
882	/ Set up all controls for this group. /
883	test_ctrl
884	= vect_set_loop_controls_directly (loop, loop_vinfo,
885	preheader_seq: &preheader_seq, header_seq: &header_seq,
886	loop_cond_gsi, rgc, niters,
887	niters_skip, might_wrap_p,
888	iv_step: &iv_step, compare_step: &compare_step);
889
890	iv_rgc = rgc;
891	}
892
893	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
894	&& rgc->controls.length () > `1`)
895	{
896	/ vect_set_loop_controls_directly creates an IV whose step*
897	is equal to the expected sum of RGC->controls. Use that
898	information to populate RGC->controls. /*
899	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
900	gcc_assert (iv_step);
901	vect_adjust_loop_lens_control (iv_type, seq: &header_seq, dest_rgm: rgc, step: iv_step);
902	}
903	}
904
905	/ Emit all accumulated statements. /
906	add_preheader_seq (loop, seq: preheader_seq);
907	add_header_seq (loop, seq: header_seq);
908
909	/ Get a boolean result that tells us whether to iterate. /
910	gcond *cond_stmt;
911	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
912	&& !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
913	{
914	gcc_assert (compare_step);
915	tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
916	cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
917	NULL_TREE);
918	}
919	else
920	{
921	tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
922	tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
923	cond_stmt
924	= gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
925	}
926	gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
927
928	/ The loop iterates (NITERS - 1) / VF + 1 times.*
929	Subtract one from this to get the latch count. /*
930	tree step = build_int_cst (compare_type,
931	LOOP_VINFO_VECT_FACTOR (loop_vinfo));
932	tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
933	build_minus_one_cst (compare_type));
934	loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
935	niters_minus_one, step);
936
937	if (final_iv)
938	{
939	gassign *assign = gimple_build_assign (final_iv, orig_niters);
940	gsi_insert_on_edge_immediate (exit_edge, assign);
941	}
942
943	return cond_stmt;
944	}
945
946	/ Set up the iteration condition and rgroup controls for LOOP in AVX512*
947	style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
948	vectorized loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
949	the number of iterations of the original scalar loop that should be
950	handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
951	for vect_set_loop_condition.
952
953	Insert the branch-back condition before LOOP_COND_GSI and return the
954	final gcond. /*
955
956	static gcond *
957	vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
958	edge exit_edge,
959	loop_vec_info loop_vinfo, tree niters,
960	tree final_iv,
961	bool niters_maybe_zero,
962	gimple_stmt_iterator loop_cond_gsi)
963	{
964	tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
965	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
966	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
967	tree orig_niters = niters;
968	gimple_seq preheader_seq = NULL;
969
970	/ Create an IV that counts down from niters and whose step*
971	is the number of iterations processed in the current iteration.
972	Produce the controls with compares like the following.
973
974	# iv_2 = PHI <niters, iv_3>
975	rem_4 = MIN <iv_2, VF>;
976	remv_6 = { rem_4, rem_4, rem_4, ... }
977	mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
978	iv_3 = iv_2 - VF;
979	if (iv_2 > VF)
980	continue;
981
982	Where the constant is built with elements at most VF - 1 and
983	repetitions according to max_nscalars_per_iter which is guarnateed
984	to be the same within a group. /*
985
986	/ Convert NITERS to the determined IV type. /
987	if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
988	&& niters_maybe_zero)
989	{
990	/ We know that there is always at least one iteration, so if the*
991	count is zero then it must have wrapped. Cope with this by
992	subtracting 1 before the conversion and adding 1 to the result. /*
993	gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
994	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, TREE_TYPE (niters),
995	ops: niters, ops: build_minus_one_cst (TREE_TYPE (niters)));
996	niters = gimple_convert (seq: &preheader_seq, type: iv_type, op: niters);
997	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, type: iv_type,
998	ops: niters, ops: build_one_cst (iv_type));
999	}
1000	else
1001	niters = gimple_convert (seq: &preheader_seq, type: iv_type, op: niters);
1002
1003	/ Bias the initial value of the IV in case we need to skip iterations*
1004	at the beginning. /*
1005	tree niters_adj = niters;
1006	if (niters_skip)
1007	{
1008	tree skip = gimple_convert (seq: &preheader_seq, type: iv_type, op: niters_skip);
1009	niters_adj = gimple_build (seq: &preheader_seq, code: PLUS_EXPR,
1010	type: iv_type, ops: niters, ops: skip);
1011	}
1012
1013	/ The iteration step is the vectorization factor. /
1014	tree iv_step = build_int_cst (iv_type, vf);
1015
1016	/ Create the decrement IV. /
1017	tree index_before_incr, index_after_incr;
1018	gimple_stmt_iterator incr_gsi;
1019	bool insert_after;
1020	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1021	create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
1022	&incr_gsi, insert_after, &index_before_incr,
1023	&index_after_incr);
1024
1025	/ Iterate over all the rgroups and fill in their controls. /
1026	for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
1027	{
1028	if (rgc.controls.is_empty ())
1029	continue;
1030
1031	tree ctrl_type = rgc.type;
1032	poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (node: ctrl_type);
1033
1034	tree vectype = rgc.compare_type;
1035
1036	/ index_after_incr is the IV specifying the remaining iterations in*
1037	the next iteration. /*
1038	tree rem = index_after_incr;
1039	/ When the data type for the compare to produce the mask is*
1040	smaller than the IV type we need to saturate. Saturate to
1041	the smallest possible value (IV_TYPE) so we only have to
1042	saturate once (CSE will catch redundant ones we add). /*
1043	if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
1044	rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1045	UNKNOWN_LOCATION,
1046	MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
1047	rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
1048	UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
1049
1050	/ Build a data vector composed of the remaining iterations. /
1051	rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
1052	UNKNOWN_LOCATION, vectype, rem);
1053
1054	/ Provide a definition of each vector in the control group. /
1055	tree next_ctrl = NULL_TREE;
1056	tree first_rem = NULL_TREE;
1057	tree ctrl;
1058	unsigned int i;
1059	FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
1060	{
1061	/ Previous controls will cover BIAS items. This control covers the*
1062	next batch. /*
1063	poly_uint64 bias = nitems_per_ctrl * i;
1064
1065	/ Build the constant to compare the remaining iters against,*
1066	this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
1067	split into pieces. /*
1068	unsigned n = TYPE_VECTOR_SUBPARTS (node: ctrl_type).to_constant ();
1069	tree_vector_builder builder (vectype, n, `1`);
1070	for (unsigned i = `0`; i < n; ++i)
1071	{
1072	unsigned HOST_WIDE_INT val
1073	= (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
1074	gcc_assert (val < vf.to_constant ());
1075	builder.quick_push (obj: build_int_cst (TREE_TYPE (vectype), val));
1076	}
1077	tree cmp_series = builder.build ();
1078
1079	/ Create the initial control. First include all items that*
1080	are within the loop limit. /*
1081	tree init_ctrl = NULL_TREE;
1082	poly_uint64 const_limit;
1083	/ See whether the first iteration of the vector loop is known*
1084	to have a full control. /*
1085	if (poly_int_tree_p (t: niters, value: &const_limit)
1086	&& known_ge (const_limit, (i + `1`) * nitems_per_ctrl))
1087	init_ctrl = build_minus_one_cst (ctrl_type);
1088	else
1089	{
1090	/ The remaining work items initially are niters. Saturate,*
1091	splat and compare. /*
1092	if (!first_rem)
1093	{
1094	first_rem = niters;
1095	if (TYPE_PRECISION (TREE_TYPE (vectype))
1096	< TYPE_PRECISION (iv_type))
1097	first_rem = gimple_build (seq: &preheader_seq,
1098	code: MIN_EXPR, TREE_TYPE (first_rem),
1099	ops: first_rem, ops: iv_step);
1100	first_rem = gimple_convert (seq: &preheader_seq, TREE_TYPE (vectype),
1101	op: first_rem);
1102	first_rem = gimple_build_vector_from_val (seq: &preheader_seq,
1103	type: vectype, op: first_rem);
1104	}
1105	init_ctrl = gimple_build (seq: &preheader_seq, code: LT_EXPR, type: ctrl_type,
1106	ops: cmp_series, ops: first_rem);
1107	}
1108
1109	/ Now AND out the bits that are within the number of skipped*
1110	items. /*
1111	poly_uint64 const_skip;
1112	if (niters_skip
1113	&& !(poly_int_tree_p (t: niters_skip, value: &const_skip)
1114	&& known_le (const_skip, bias)))
1115	{
1116	/ For integer mode masks it's cheaper to shift out the bits*
1117	since that avoids loading a constant. /*
1118	gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
1119	init_ctrl = gimple_build (seq: &preheader_seq, code: VIEW_CONVERT_EXPR,
1120	type: lang_hooks.types.type_for_mode
1121	(TYPE_MODE (ctrl_type), `1`),
1122	ops: init_ctrl);
1123	/ ??? But when the shift amount isn't constant this requires*
1124	a round-trip to GRPs. We could apply the bias to either
1125	side of the compare instead. /*
1126	tree shift = gimple_build (seq: &preheader_seq, code: MULT_EXPR,
1127	TREE_TYPE (niters_skip), ops: niters_skip,
1128	ops: build_int_cst (TREE_TYPE (niters_skip),
1129	rgc.max_nscalars_per_iter));
1130	init_ctrl = gimple_build (seq: &preheader_seq, code: LSHIFT_EXPR,
1131	TREE_TYPE (init_ctrl),
1132	ops: init_ctrl, ops: shift);
1133	init_ctrl = gimple_build (seq: &preheader_seq, code: VIEW_CONVERT_EXPR,
1134	type: ctrl_type, ops: init_ctrl);
1135	}
1136
1137	/ Get the control value for the next iteration of the loop. /
1138	next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1139	UNKNOWN_LOCATION,
1140	LT_EXPR, ctrl_type, cmp_series, rem);
1141
1142	vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
1143	}
1144	}
1145
1146	/ Emit all accumulated statements. /
1147	add_preheader_seq (loop, seq: preheader_seq);
1148
1149	/ Adjust the exit test using the decrementing IV. /
1150	tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
1151	/ When we peel for alignment with niter_skip != 0 this can*
1152	cause niter + niter_skip to wrap and since we are comparing the
1153	value before the decrement here we get a false early exit.
1154	We can't compare the value after decrement either because that
1155	decrement could wrap as well as we're not doing a saturating
1156	decrement. To avoid this situation we force a larger
1157	iv_type. /*
1158	gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
1159	NULL_TREE, NULL_TREE);
1160	gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1161
1162	/ The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.*
1163	Subtract one from this to get the latch count. /*
1164	tree niters_minus_one
1165	= fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
1166	build_minus_one_cst (TREE_TYPE (orig_niters)));
1167	tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
1168	if (niters_skip)
1169	niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
1170	fold_convert (iv_type, niters_skip));
1171	loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
1172	niters_adj2, iv_step);
1173
1174	if (final_iv)
1175	{
1176	gassign *assign = gimple_build_assign (final_iv, orig_niters);
1177	gsi_insert_on_edge_immediate (single_exit (loop), assign);
1178	}
1179
1180	return cond_stmt;
1181	}
1182
1183
1184	/ Like vect_set_loop_condition, but handle the case in which the vector*
1185	loop handles exactly VF scalars per iteration. /*
1186
1187	static gcond *
1188	vect_set_loop_condition_normal (loop_vec_info / loop_vinfo /, edge exit_edge,
1189	class loop *loop, tree niters, tree step,
1190	tree final_iv, bool niters_maybe_zero,
1191	gimple_stmt_iterator loop_cond_gsi)
1192	{
1193	tree indx_before_incr, indx_after_incr;
1194	gcond *cond_stmt;
1195	gcond *orig_cond;
1196	edge pe = loop_preheader_edge (loop);
1197	gimple_stmt_iterator incr_gsi;
1198	bool insert_after;
1199	enum tree_code code;
1200	tree niters_type = TREE_TYPE (niters);
1201
1202	orig_cond = get_loop_exit_condition (exit_edge);
1203	gcc_assert (orig_cond);
1204	loop_cond_gsi = gsi_for_stmt (orig_cond);
1205
1206	tree init, limit;
1207	if (!niters_maybe_zero && integer_onep (step))
1208	{
1209	/ In this case we can use a simple 0-based IV:*
1210
1211	A:
1212	x = 0;
1213	do
1214	{
1215	...
1216	x += 1;
1217	}
1218	while (x < NITERS); /*
1219	code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1220	init = build_zero_cst (niters_type);
1221	limit = niters;
1222	}
1223	else
1224	{
1225	/ The following works for all values of NITERS except 0:*
1226
1227	B:
1228	x = 0;
1229	do
1230	{
1231	...
1232	x += STEP;
1233	}
1234	while (x <= NITERS - STEP);
1235
1236	so that the loop continues to iterate if x + STEP - 1 < NITERS
1237	but stops if x + STEP - 1 >= NITERS.
1238
1239	However, if NITERS is zero, x never hits a value above NITERS - STEP
1240	before wrapping around. There are two obvious ways of dealing with
1241	this:
1242
1243	- start at STEP - 1 and compare x before incrementing it
1244	- start at -1 and compare x after incrementing it
1245
1246	The latter is simpler and is what we use. The loop in this case
1247	looks like:
1248
1249	C:
1250	x = -1;
1251	do
1252	{
1253	...
1254	x += STEP;
1255	}
1256	while (x < NITERS - STEP);
1257
1258	In both cases the loop limit is NITERS - STEP. /*
1259	gimple_seq seq = NULL;
1260	limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
1261	limit = gimple_build (seq: &seq, code: MINUS_EXPR, TREE_TYPE (limit), ops: limit, ops: step);
1262	if (seq)
1263	{
1264	basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1265	gcc_assert (!new_bb);
1266	}
1267	if (niters_maybe_zero)
1268	{
1269	/ Case C. /
1270	code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1271	init = build_all_ones_cst (niters_type);
1272	}
1273	else
1274	{
1275	/ Case B. /
1276	code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
1277	init = build_zero_cst (niters_type);
1278	}
1279	}
1280
1281	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1282	create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
1283	&incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
1284	indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
1285	true, NULL_TREE, true,
1286	GSI_SAME_STMT);
1287	limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
1288	true, GSI_SAME_STMT);
1289
1290	cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
1291	NULL_TREE);
1292
1293	gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1294
1295	/ Record the number of latch iterations. /
1296	if (limit == niters)
1297	/ Case A: the loop iterates NITERS times. Subtract one to get the*
1298	latch count. /*
1299	loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
1300	build_int_cst (niters_type, `1`));
1301	else
1302	/ Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.*
1303	Subtract one from this to get the latch count. /*
1304	loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
1305	limit, step);
1306
1307	if (final_iv)
1308	{
1309	gassign *assign;
1310	gcc_assert (single_pred_p (exit_edge->dest));
1311	tree phi_dest
1312	= integer_zerop (init) ? final_iv : copy_ssa_name (var: indx_after_incr);
1313	/ Make sure to maintain LC SSA form here and elide the subtraction*
1314	if the value is zero. /*
1315	gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
1316	add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
1317	if (!integer_zerop (init))
1318	{
1319	assign = gimple_build_assign (final_iv, MINUS_EXPR,
1320	phi_dest, init);
1321	gimple_stmt_iterator gsi = gsi_after_labels (bb: exit_edge->dest);
1322	gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
1323	}
1324	}
1325
1326	return cond_stmt;
1327	}
1328
1329	/ If we're using fully-masked loops, make LOOP iterate:*
1330
1331	N == (NITERS - 1) / STEP + 1
1332
1333	times. When NITERS is zero, this is equivalent to making the loop
1334	execute (1 << M) / STEP times, where M is the precision of NITERS.
1335	NITERS_MAYBE_ZERO is true if this last case might occur.
1336
1337	If we're not using fully-masked loops, make LOOP iterate:
1338
1339	N == (NITERS - STEP) / STEP + 1
1340
1341	times, where NITERS is known to be outside the range [1, STEP - 1].
1342	This is equivalent to making the loop execute NITERS / STEP times
1343	when NITERS is nonzero and (1 << M) / STEP times otherwise.
1344	NITERS_MAYBE_ZERO again indicates whether this last case might occur.
1345
1346	If FINAL_IV is nonnull, it is an SSA name that should be set to
1347	N STEP on exit from the loop.*
1348
1349	Assumption: the exit-condition of LOOP is the last stmt in the loop. /*
1350
1351	void
1352	vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
1353	tree niters, tree step, tree final_iv,
1354	bool niters_maybe_zero)
1355	{
1356	gcond *cond_stmt;
1357	gcond *orig_cond = get_loop_exit_condition (loop_e);
1358	gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
1359
1360	if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
1361	{
1362	if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
1363	cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, exit_edge: loop_e,
1364	loop_vinfo,
1365	niters, final_iv,
1366	niters_maybe_zero,
1367	loop_cond_gsi);
1368	else
1369	cond_stmt = vect_set_loop_condition_partial_vectors (loop, exit_edge: loop_e,
1370	loop_vinfo,
1371	niters, final_iv,
1372	niters_maybe_zero,
1373	loop_cond_gsi);
1374	}
1375	else
1376	cond_stmt = vect_set_loop_condition_normal (loop_vinfo, exit_edge: loop_e, loop,
1377	niters,
1378	step, final_iv,
1379	niters_maybe_zero,
1380	loop_cond_gsi);
1381
1382	/ Remove old loop exit test. /
1383	stmt_vec_info orig_cond_info;
1384	if (loop_vinfo
1385	&& (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
1386	loop_vinfo->remove_stmt (orig_cond_info);
1387	else
1388	gsi_remove (&loop_cond_gsi, true);
1389
1390	if (dump_enabled_p ())
1391	dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
1392	(gimple *) cond_stmt);
1393	}
1394
1395	/ Given LOOP this function generates a new copy of it and puts it*
1396	on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
1397	non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1398	basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1399	entry or exit of LOOP. If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
1400	continuation. This is correct for cases where one loop continues from the
1401	other like in the vectorizer, but not true for uses in e.g. loop distribution
1402	where the contents of the loop body are split but the iteration space of both
1403	copies remains the same.
1404
1405	If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
1406	dominators were updated during the peeling. /*
1407
1408	class loop *
1409	slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
1410	class loop *scalar_loop,
1411	edge scalar_exit, edge e, edge *new_e,
1412	bool flow_loops)
1413	{
1414	class loop *new_loop;
1415	basic_block new_bbs, bbs, *pbbs;
1416	bool at_exit;
1417	bool was_imm_dom;
1418	basic_block exit_dest;
1419	edge exit, new_exit;
1420	bool duplicate_outer_loop = false;
1421
1422	exit = loop_exit;
1423	at_exit = (e == exit);
1424	if (!at_exit && e != loop_preheader_edge (loop))
1425	return NULL;
1426
1427	if (scalar_loop == NULL)
1428	{
1429	scalar_loop = loop;
1430	scalar_exit = loop_exit;
1431	}
1432	else if (scalar_loop == loop)
1433	scalar_exit = loop_exit;
1434	else
1435	{
1436	/ Loop has been version, match exits up using the aux index. /
1437	for (edge exit : get_loop_exit_edges (scalar_loop))
1438	if (exit->aux == loop_exit->aux)
1439	{
1440	scalar_exit = exit;
1441	break;
1442	}
1443
1444	gcc_assert (scalar_exit);
1445	}
1446
1447	bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + `1`);
1448	pbbs = bbs + `1`;
1449	get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
1450	/ Allow duplication of outer loops. /
1451	if (scalar_loop->inner)
1452	duplicate_outer_loop = true;
1453
1454	/ Generate new loop structure. /
1455	new_loop = duplicate_loop (scalar_loop, loop_outer (loop: scalar_loop));
1456	duplicate_subloops (scalar_loop, new_loop);
1457
1458	exit_dest = exit->dest;
1459	was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1460	exit_dest) == loop->header ?
1461	true : false);
1462
1463	/ Also copy the pre-header, this avoids jumping through hoops to*
1464	duplicate the loop entry PHI arguments. Create an empty
1465	pre-header unconditionally for this. /*
1466	basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
1467	edge entry_e = single_pred_edge (bb: preheader);
1468	bbs[`0`] = preheader;
1469	new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + `1`);
1470
1471	copy_bbs (bbs, scalar_loop->num_nodes + `1`, new_bbs,
1472	&scalar_exit, `1`, &new_exit, NULL,
1473	at_exit ? loop->latch : e->src, true);
1474	exit = loop_exit;
1475	basic_block new_preheader = new_bbs[`0`];
1476
1477	gcc_assert (new_exit);
1478
1479	/ Record the new loop exit information. new_loop doesn't have SCEV data and*
1480	so we must initialize the exit information. /*
1481	if (new_e)
1482	*new_e = new_exit;
1483
1484	/ Before installing PHI arguments make sure that the edges*
1485	into them match that of the scalar loop we analyzed. This
1486	makes sure the SLP tree matches up between the main vectorized
1487	loop and the epilogue vectorized copies. /*
1488	if (single_succ_edge (bb: preheader)->dest_idx
1489	!= single_succ_edge (bb: new_bbs[`0`])->dest_idx)
1490	{
1491	basic_block swap_bb = new_bbs[`1`];
1492	gcc_assert (EDGE_COUNT (swap_bb->preds) == `2`);
1493	std::swap (EDGE_PRED (swap_bb, `0`), EDGE_PRED (swap_bb, `1`));
1494	EDGE_PRED (swap_bb, `0`)->dest_idx = `0`;
1495	EDGE_PRED (swap_bb, `1`)->dest_idx = `1`;
1496	}
1497	if (duplicate_outer_loop)
1498	{
1499	class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
1500	if (loop_preheader_edge (scalar_loop)->dest_idx
1501	!= loop_preheader_edge (new_inner_loop)->dest_idx)
1502	{
1503	basic_block swap_bb = new_inner_loop->header;
1504	gcc_assert (EDGE_COUNT (swap_bb->preds) == `2`);
1505	std::swap (EDGE_PRED (swap_bb, `0`), EDGE_PRED (swap_bb, `1`));
1506	EDGE_PRED (swap_bb, `0`)->dest_idx = `0`;
1507	EDGE_PRED (swap_bb, `1`)->dest_idx = `1`;
1508	}
1509	}
1510
1511	add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + `1`, NULL);
1512
1513	/ Skip new preheader since it's deleted if copy loop is added at entry. /
1514	for (unsigned i = (at_exit ? `0` : `1`); i < scalar_loop->num_nodes + `1`; i++)
1515	rename_variables_in_bb (bb: new_bbs[i], rename_from_outer_loop: duplicate_outer_loop);
1516
1517	/ Rename the exit uses. /
1518	for (edge exit : get_loop_exit_edges (new_loop))
1519	for (auto gsi = gsi_start_phis (exit->dest);
1520	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
1521	{
1522	tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
1523	rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
1524	if (MAY_HAVE_DEBUG_BIND_STMTS)
1525	adjust_debug_stmts (from: orig_def, PHI_RESULT (gsi.phi ()), bb: exit->dest);
1526	}
1527
1528	auto loop_exits = get_loop_exit_edges (loop);
1529	auto_vec<basic_block> doms;
1530
1531	if (at_exit) / Add the loop copy at exit. /
1532	{
1533	if (scalar_loop != loop && new_exit->dest != exit_dest)
1534	{
1535	new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1536	flush_pending_stmts (new_exit);
1537	}
1538
1539	auto_vec <gimple *> new_phis;
1540	hash_map <tree, tree> new_phi_args;
1541	/ First create the empty phi nodes so that when we flush the*
1542	statements they can be filled in. However because there is no order
1543	between the PHI nodes in the exits and the loop headers we need to
1544	order them base on the order of the two headers. First record the new
1545	phi nodes. /*
1546	for (auto gsi_from = gsi_start_phis (scalar_exit->dest);
1547	!gsi_end_p (i: gsi_from); gsi_next (i: &gsi_from))
1548	{
1549	gimple *from_phi = gsi_stmt (i: gsi_from);
1550	tree new_res = copy_ssa_name (var: gimple_phi_result (gs: from_phi));
1551	gphi *res = create_phi_node (new_res, new_preheader);
1552	new_phis.safe_push (obj: res);
1553	}
1554
1555	/ Then redirect the edges and flush the changes. This writes out the new*
1556	SSA names. /*
1557	for (edge exit : loop_exits)
1558	{
1559	edge temp_e = redirect_edge_and_branch (exit, new_preheader);
1560	flush_pending_stmts (temp_e);
1561	}
1562	/ Record the new SSA names in the cache so that we can skip materializing*
1563	them again when we fill in the rest of the LCSSA variables. /*
1564	for (auto phi : new_phis)
1565	{
1566	tree new_arg = gimple_phi_arg (gs: phi, index: `0`)->def;
1567
1568	if (!SSA_VAR_P (new_arg))
1569	continue;
1570	/ If the PHI MEM node dominates the loop then we shouldn't create*
1571	a new LC-SSSA PHI for it in the intermediate block. /*
1572	/ A MEM phi that consitutes a new DEF for the vUSE chain can either*
1573	be a .VDEF or a PHI that operates on MEM. And said definition
1574	must not be inside the main loop. Or we must be a parameter.
1575	In the last two cases we may remove a non-MEM PHI node, but since
1576	they dominate both loops the removal is unlikely to cause trouble
1577	as the exits must already be using them. /*
1578	if (virtual_operand_p (op: new_arg)
1579	&& (SSA_NAME_IS_DEFAULT_DEF (new_arg)
1580	\|\| !flow_bb_inside_loop_p (loop,
1581	gimple_bb (SSA_NAME_DEF_STMT (new_arg)))))
1582	{
1583	auto gsi = gsi_for_stmt (phi);
1584	remove_phi_node (&gsi, true);
1585	continue;
1586	}
1587	new_phi_args.put (k: new_arg, v: gimple_phi_result (gs: phi));
1588
1589	if (TREE_CODE (new_arg) != SSA_NAME)
1590	continue;
1591	}
1592
1593	/ Copy the current loop LC PHI nodes between the original loop exit*
1594	block and the new loop header. This allows us to later split the
1595	preheader block and still find the right LC nodes. /*
1596	edge loop_entry = single_succ_edge (bb: new_preheader);
1597	if (flow_loops)
1598	for (auto gsi_from = gsi_start_phis (loop->header),
1599	gsi_to = gsi_start_phis (new_loop->header);
1600	!gsi_end_p (i: gsi_from) && !gsi_end_p (i: gsi_to);
1601	gsi_next (i: &gsi_from), gsi_next (i: &gsi_to))
1602	{
1603	gimple *from_phi = gsi_stmt (i: gsi_from);
1604	gimple *to_phi = gsi_stmt (i: gsi_to);
1605	tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1606	loop_latch_edge (loop));
1607
1608	/ Check if we've already created a new phi node during edge*
1609	redirection. If we have, only propagate the value downwards. /*
1610	if (tree *res = new_phi_args.get (k: new_arg))
1611	{
1612	adjust_phi_and_debug_stmts (update_phi: to_phi, e: loop_entry, new_def: *res);
1613	continue;
1614	}
1615
1616	tree new_res = copy_ssa_name (var: gimple_phi_result (gs: from_phi));
1617	gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
1618
1619	/ Main loop exit should use the final iter value. /
1620	add_phi_arg (lcssa_phi, new_arg, loop_exit, UNKNOWN_LOCATION);
1621
1622	adjust_phi_and_debug_stmts (update_phi: to_phi, e: loop_entry, new_def: new_res);
1623	}
1624
1625	set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
1626
1627	if (was_imm_dom \|\| duplicate_outer_loop)
1628	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
1629
1630	/ And remove the non-necessary forwarder again. Keep the other*
1631	one so we have a proper pre-header for the loop at the exit edge. /*
1632	redirect_edge_pred (single_succ_edge (bb: preheader),
1633	single_pred (bb: preheader));
1634	delete_basic_block (preheader);
1635	set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1636	loop_preheader_edge (scalar_loop)->src);
1637	}
1638	else / Add the copy at entry. /
1639	{
1640	/ Copy the current loop LC PHI nodes between the original loop exit*
1641	block and the new loop header. This allows us to later split the
1642	preheader block and still find the right LC nodes. /*
1643	if (flow_loops)
1644	for (auto gsi_from = gsi_start_phis (new_loop->header),
1645	gsi_to = gsi_start_phis (loop->header);
1646	!gsi_end_p (i: gsi_from) && !gsi_end_p (i: gsi_to);
1647	gsi_next (i: &gsi_from), gsi_next (i: &gsi_to))
1648	{
1649	gimple *from_phi = gsi_stmt (i: gsi_from);
1650	gimple *to_phi = gsi_stmt (i: gsi_to);
1651	tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1652	loop_latch_edge (new_loop));
1653	adjust_phi_and_debug_stmts (update_phi: to_phi, e: loop_preheader_edge (loop),
1654	new_def: new_arg);
1655	}
1656
1657	if (scalar_loop != loop)
1658	{
1659	/ Remove the non-necessary forwarder of scalar_loop again. /
1660	redirect_edge_pred (single_succ_edge (bb: preheader),
1661	single_pred (bb: preheader));
1662	delete_basic_block (preheader);
1663	set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1664	loop_preheader_edge (scalar_loop)->src);
1665	preheader = split_edge (loop_preheader_edge (loop));
1666	entry_e = single_pred_edge (bb: preheader);
1667	}
1668
1669	redirect_edge_and_branch_force (entry_e, new_preheader);
1670	flush_pending_stmts (entry_e);
1671	set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1672
1673	redirect_edge_and_branch_force (new_exit, preheader);
1674	flush_pending_stmts (new_exit);
1675	set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1676
1677	/ And remove the non-necessary forwarder again. Keep the other*
1678	one so we have a proper pre-header for the loop at the exit edge. /*
1679	redirect_edge_pred (single_succ_edge (bb: new_preheader),
1680	single_pred (bb: new_preheader));
1681	delete_basic_block (new_preheader);
1682	set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1683	loop_preheader_edge (new_loop)->src);
1684	}
1685
1686	free (ptr: new_bbs);
1687	free (ptr: bbs);
1688
1689	checking_verify_dominators (dir: CDI_DOMINATORS);
1690
1691	return new_loop;
1692	}
1693
1694
1695	/ Given the condition expression COND, put it as the last statement of*
1696	GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1697	DOM_BB; return the skip edge. GUARD_TO is the target basic block to
1698	skip the loop. PROBABILITY is the skip edge's probability. Mark the
1699	new edge as irreducible if IRREDUCIBLE_P is true. /*
1700
1701	static edge
1702	slpeel_add_loop_guard (basic_block guard_bb, tree cond,
1703	basic_block guard_to, basic_block dom_bb,
1704	profile_probability probability, bool irreducible_p)
1705	{
1706	gimple_stmt_iterator gsi;
1707	edge new_e, enter_e;
1708	gcond *cond_stmt;
1709	gimple_seq gimplify_stmt_list = NULL;
1710
1711	enter_e = EDGE_SUCC (guard_bb, `0`);
1712	enter_e->flags &= ~EDGE_FALLTHRU;
1713	enter_e->flags \|= EDGE_FALSE_VALUE;
1714	gsi = gsi_last_bb (bb: guard_bb);
1715
1716	cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
1717	is_gimple_condexpr_for_cond, NULL_TREE);
1718	if (gimplify_stmt_list)
1719	gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1720
1721	cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
1722	gsi = gsi_last_bb (bb: guard_bb);
1723	gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1724
1725	/ Add new edge to connect guard block to the merge/loop-exit block. /
1726	new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
1727
1728	new_e->probability = probability;
1729	if (irreducible_p)
1730	new_e->flags \|= EDGE_IRREDUCIBLE_LOOP;
1731
1732	enter_e->probability = probability.invert ();
1733	set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
1734
1735	/ Split enter_e to preserve LOOPS_HAVE_PREHEADERS. /
1736	if (enter_e->dest->loop_father->header == enter_e->dest)
1737	split_edge (enter_e);
1738
1739	return new_e;
1740	}
1741
1742
1743	/ This function verifies that the following restrictions apply to LOOP:*
1744	(1) it consists of exactly 2 basic blocks - header, and an empty latch
1745	for innermost loop and 5 basic blocks for outer-loop.
1746	(2) it is single entry, single exit
1747	(3) its exit condition is the last stmt in the header
1748	(4) E is the entry/exit edge of LOOP.
1749	*/
1750
1751	bool
1752	slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
1753	const_edge e)
1754	{
1755	edge entry_e = loop_preheader_edge (loop);
1756	gcond *orig_cond = get_loop_exit_condition (exit_e);
1757	gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (bb: exit_e->src);
1758	unsigned int num_bb = loop->inner? `5` : `2`;
1759
1760	/ All loops have an outer scope; the only case loop->outer is NULL is for*
1761	the function itself. /*
1762	if (!loop_outer (loop)
1763	\|\| loop->num_nodes != num_bb
1764	\|\| !empty_block_p (loop->latch)
1765	\|\| !exit_e
1766	/ Verify that new loop exit condition can be trivially modified. /
1767	\|\| (!orig_cond \|\| orig_cond != gsi_stmt (i: loop_exit_gsi))
1768	\|\| (e != exit_e && e != entry_e))
1769	return false;
1770
1771	basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
1772	get_loop_body_with_size (loop, bbs, loop->num_nodes);
1773	bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
1774	free (ptr: bbs);
1775	return ret;
1776	}
1777
1778	/ Function find_loop_location.*
1779
1780	Extract the location of the loop in the source code.
1781	If the loop is not well formed for vectorization, an estimated
1782	location is calculated.
1783	Return the loop location if succeed and NULL if not. /*
1784
1785	dump_user_location_t
1786	find_loop_location (class loop *loop)
1787	{
1788	gimple *stmt = NULL;
1789	basic_block bb;
1790	gimple_stmt_iterator si;
1791
1792	if (!loop)
1793	return dump_user_location_t ();
1794
1795	if (loops_state_satisfies_p (flags: LOOPS_HAVE_RECORDED_EXITS))
1796	{
1797	/ We only care about the loop location, so use any exit with location*
1798	information. /*
1799	for (edge e : get_loop_exit_edges (loop))
1800	{
1801	stmt = get_loop_exit_condition (e);
1802
1803	if (stmt
1804	&& LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1805	return stmt;
1806	}
1807	}
1808
1809	/ If we got here the loop is probably not "well formed",*
1810	try to estimate the loop location /*
1811
1812	if (!loop->header)
1813	return dump_user_location_t ();
1814
1815	bb = loop->header;
1816
1817	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
1818	{
1819	stmt = gsi_stmt (i: si);
1820	if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1821	return stmt;
1822	}
1823
1824	return dump_user_location_t ();
1825	}
1826
1827	/ Return true if the phi described by STMT_INFO defines an IV of the*
1828	loop to be vectorized. /*
1829
1830	static bool
1831	iv_phi_p (stmt_vec_info stmt_info)
1832	{
1833	gphi phi = as_a <gphi > (p: stmt_info->stmt);
1834	if (virtual_operand_p (PHI_RESULT (phi)))
1835	return false;
1836
1837	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1838	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1839	return false;
1840
1841	return true;
1842	}
1843
1844	/ Return true if vectorizer can peel for nonlinear iv. /
1845	static bool
1846	vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
1847	stmt_vec_info stmt_info)
1848	{
1849	enum vect_induction_op_type induction_type
1850	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
1851	tree niters_skip;
1852	/ Init_expr will be update by vect_update_ivs_after_vectorizer,*
1853	if niters or vf is unkown:
1854	For shift, when shift mount >= precision, there would be UD.
1855	For mult, don't known how to generate
1856	init_expr pow (step, niters) for variable niters.*
1857	For neg, it should be ok, since niters of vectorized main loop
1858	will always be multiple of 2. /*
1859	if ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1860	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
1861	&& induction_type != vect_step_op_neg)
1862	{
1863	if (dump_enabled_p ())
1864	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1865	"Peeling for epilogue is not supported"
1866	" for nonlinear induction except neg"
1867	" when iteration count is unknown.\n");
1868	return false;
1869	}
1870
1871	/ Avoid compile time hog on vect_peel_nonlinear_iv_init. /
1872	if (induction_type == vect_step_op_mul)
1873	{
1874	tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1875	tree type = TREE_TYPE (step_expr);
1876
1877	if (wi::exact_log2 (wi::to_wide (t: step_expr)) == -`1`
1878	&& LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
1879	{
1880	if (dump_enabled_p ())
1881	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1882	"Avoid compile time hog on"
1883	" vect_peel_nonlinear_iv_init"
1884	" for nonlinear induction vec_step_op_mul"
1885	" when iteration count is too big.\n");
1886	return false;
1887	}
1888	}
1889
1890	/ Also doens't support peel for neg when niter is variable.*
1891	??? generate something like niter_expr & 1 ? init_expr : -init_expr? /*
1892	niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
1893	if ((niters_skip != NULL_TREE
1894	&& (TREE_CODE (niters_skip) != INTEGER_CST
1895	\|\| (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < `0`))
1896	\|\| (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
1897	&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < `0`))
1898	{
1899	if (dump_enabled_p ())
1900	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1901	"Peeling for alignement is not supported"
1902	" for nonlinear induction when niters_skip"
1903	" is not constant.\n");
1904	return false;
1905	}
1906
1907	return true;
1908	}
1909
1910	/ Function vect_can_advance_ivs_p*
1911
1912	In case the number of iterations that LOOP iterates is unknown at compile
1913	time, an epilog loop will be generated, and the loop induction variables
1914	(IVs) will be "advanced" to the value they are supposed to take just before
1915	the epilog loop. Here we check that the access function of the loop IVs
1916	and the expression that represents the loop bound are simple enough.
1917	These restrictions will be relaxed in the future. /*
1918
1919	bool
1920	vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1921	{
1922	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1923	basic_block bb = loop->header;
1924	gphi_iterator gsi;
1925
1926	/ Analyze phi functions of the loop header. /
1927
1928	if (dump_enabled_p ())
1929	dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1930	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1931	{
1932	tree evolution_part;
1933	enum vect_induction_op_type induction_type;
1934
1935	gphi *phi = gsi.phi ();
1936	stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
1937	if (dump_enabled_p ())
1938	dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
1939	phi_info->stmt);
1940
1941	/ Skip virtual phi's. The data dependences that are associated with*
1942	virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
1943
1944	Skip reduction phis. /*
1945	if (!iv_phi_p (stmt_info: phi_info))
1946	{
1947	if (dump_enabled_p ())
1948	dump_printf_loc (MSG_NOTE, vect_location,
1949	"reduc or virtual phi. skip.\n");
1950	continue;
1951	}
1952
1953	induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
1954	if (induction_type != vect_step_op_add)
1955	{
1956	if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, stmt_info: phi_info))
1957	return false;
1958
1959	continue;
1960	}
1961
1962	/ Analyze the evolution function. /
1963
1964	evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
1965	if (evolution_part == NULL_TREE)
1966	{
1967	if (dump_enabled_p ())
1968	dump_printf (MSG_MISSED_OPTIMIZATION,
1969	"No access function or evolution.\n");
1970	return false;
1971	}
1972
1973	/ FORNOW: We do not transform initial conditions of IVs*
1974	which evolution functions are not invariants in the loop. /*
1975
1976	if (!expr_invariant_in_loop_p (loop, evolution_part))
1977	{
1978	if (dump_enabled_p ())
1979	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1980	"evolution not invariant in loop.\n");
1981	return false;
1982	}
1983
1984	/ FORNOW: We do not transform initial conditions of IVs*
1985	which evolution functions are a polynomial of degree >= 2. /*
1986
1987	if (tree_is_chrec (expr: evolution_part))
1988	{
1989	if (dump_enabled_p ())
1990	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1991	"evolution is chrec.\n");
1992	return false;
1993	}
1994	}
1995
1996	return true;
1997	}
1998
1999
2000	/ Function vect_update_ivs_after_vectorizer.*
2001
2002	"Advance" the induction variables of LOOP to the value they should take
2003	after the execution of LOOP. This is currently necessary because the
2004	vectorizer does not handle induction variables that are used after the
2005	loop. Such a situation occurs when the last iterations of LOOP are
2006	peeled, because:
2007	1. We introduced new uses after LOOP for IVs that were not originally used
2008	after LOOP: the IVs of LOOP are now used by an epilog loop.
2009	2. LOOP is going to be vectorized; this means that it will iterate N/VF
2010	times, whereas the loop IVs should be bumped N times.
2011
2012	Input:
2013	- LOOP - a loop that is going to be vectorized. The last few iterations
2014	of LOOP were peeled.
2015	- NITERS - the number of iterations that LOOP executes (before it is
2016	vectorized). i.e, the number of times the ivs should be bumped.
2017	- UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2018	coming out from LOOP on which there are uses of the LOOP ivs
2019	(this is the path from LOOP->exit to epilog_loop->preheader).
2020
2021	The new definitions of the ivs are placed in LOOP->exit.
2022	The phi args associated with the edge UPDATE_E in the bb
2023	UPDATE_E->dest are updated accordingly.
2024
2025	Assumption 1: Like the rest of the vectorizer, this function assumes
2026	a single loop exit that has a single predecessor.
2027
2028	Assumption 2: The phi nodes in the LOOP header and in update_bb are
2029	organized in the same order.
2030
2031	Assumption 3: The access function of the ivs is simple enough (see
2032	vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2033
2034	Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2035	coming out of LOOP on which the ivs of LOOP are used (this is the path
2036	that leads to the epilog loop; other paths skip the epilog loop). This
2037	path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2038	needs to have its phis updated.
2039	*/
2040
2041	static void
2042	vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
2043	tree niters, edge update_e)
2044	{
2045	gphi_iterator gsi, gsi1;
2046	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2047	basic_block update_bb = update_e->dest;
2048
2049	basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
2050
2051	/ Make sure there exists a single-predecessor exit bb: /
2052	gcc_assert (single_pred_p (exit_bb));
2053	gcc_assert (single_succ_edge (exit_bb) == update_e);
2054
2055	for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
2056	!gsi_end_p (i: gsi) && !gsi_end_p (i: gsi1);
2057	gsi_next (i: &gsi), gsi_next (i: &gsi1))
2058	{
2059	tree init_expr;
2060	tree step_expr, off;
2061	tree type;
2062	tree var, ni, ni_name;
2063	gimple_stmt_iterator last_gsi;
2064
2065	gphi *phi = gsi.phi ();
2066	gphi *phi1 = gsi1.phi ();
2067	stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
2068	if (dump_enabled_p ())
2069	dump_printf_loc (MSG_NOTE, vect_location,
2070	"vect_update_ivs_after_vectorizer: phi: %G",
2071	(gimple *) phi);
2072
2073	/ Skip reduction and virtual phis. /
2074	if (!iv_phi_p (stmt_info: phi_info))
2075	{
2076	if (dump_enabled_p ())
2077	dump_printf_loc (MSG_NOTE, vect_location,
2078	"reduc or virtual phi. skip.\n");
2079	continue;
2080	}
2081
2082	type = TREE_TYPE (gimple_phi_result (phi));
2083	step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2084	step_expr = unshare_expr (step_expr);
2085
2086	/ FORNOW: We do not support IVs whose evolution function is a polynomial*
2087	of degree >= 2 or exponential. /*
2088	gcc_assert (!tree_is_chrec (step_expr));
2089
2090	init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2091	gimple_seq stmts = NULL;
2092	enum vect_induction_op_type induction_type
2093	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2094
2095	if (induction_type == vect_step_op_add)
2096	{
2097	tree stype = TREE_TYPE (step_expr);
2098	off = fold_build2 (MULT_EXPR, stype,
2099	fold_convert (stype, niters), step_expr);
2100	if (POINTER_TYPE_P (type))
2101	ni = fold_build_pointer_plus (init_expr, off);
2102	else
2103	ni = fold_convert (type,
2104	fold_build2 (PLUS_EXPR, stype,
2105	fold_convert (stype, init_expr),
2106	off));
2107	}
2108	/ Don't bother call vect_peel_nonlinear_iv_init. /
2109	else if (induction_type == vect_step_op_neg)
2110	ni = init_expr;
2111	else
2112	ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
2113	niters, step_expr,
2114	induction_type);
2115
2116	var = create_tmp_var (type, "tmp");
2117
2118	last_gsi = gsi_last_bb (bb: exit_bb);
2119	gimple_seq new_stmts = NULL;
2120	ni_name = force_gimple_operand (ni, &new_stmts, false, var);
2121	/ Exit_bb shouldn't be empty. /
2122	if (!gsi_end_p (i: last_gsi))
2123	{
2124	gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
2125	gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
2126	}
2127	else
2128	{
2129	gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
2130	gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
2131	}
2132
2133	/ Fix phi expressions in the successor bb. /
2134	adjust_phi_and_debug_stmts (update_phi: phi1, e: update_e, new_def: ni_name);
2135	}
2136	}
2137
2138	/ Return a gimple value containing the misalignment (measured in vector*
2139	elements) for the loop described by LOOP_VINFO, i.e. how many elements
2140	it is away from a perfectly aligned address. Add any new statements
2141	to SEQ. /*
2142
2143	static tree
2144	get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
2145	{
2146	dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2147	stmt_vec_info stmt_info = dr_info->stmt;
2148	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2149
2150	poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2151	unsigned HOST_WIDE_INT target_align_c;
2152	tree target_align_minus_1;
2153
2154	bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2155	size_zero_node) < `0`;
2156	tree offset = (negative
2157	? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + `1`)
2158	* TREE_INT_CST_LOW
2159	(TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
2160	: size_zero_node);
2161	tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
2162	stmt_info, seq,
2163	offset);
2164	tree type = unsigned_type_for (TREE_TYPE (start_addr));
2165	if (target_align.is_constant (const_value: &target_align_c))
2166	target_align_minus_1 = build_int_cst (type, target_align_c - `1`);
2167	else
2168	{
2169	tree vla = build_int_cst (type, target_align);
2170	tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
2171	fold_build2 (MINUS_EXPR, type,
2172	build_int_cst (type, `0`), vla));
2173	target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
2174	build_int_cst (type, `1`));
2175	}
2176
2177	HOST_WIDE_INT elem_size
2178	= int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2179	tree elem_size_log = build_int_cst (type, exact_log2 (x: elem_size));
2180
2181	/ Create: misalign_in_bytes = addr & (target_align - 1). /
2182	tree int_start_addr = fold_convert (type, start_addr);
2183	tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
2184	target_align_minus_1);
2185
2186	/ Create: misalign_in_elems = misalign_in_bytes / element_size. /
2187	tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
2188	elem_size_log);
2189
2190	return misalign_in_elems;
2191	}
2192
2193	/ Function vect_gen_prolog_loop_niters*
2194
2195	Generate the number of iterations which should be peeled as prolog for the
2196	loop represented by LOOP_VINFO. It is calculated as the misalignment of
2197	DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
2198	As a result, after the execution of this loop, the data reference DR will
2199	refer to an aligned location. The following computation is generated:
2200
2201	If the misalignment of DR is known at compile time:
2202	addr_mis = int mis = DR_MISALIGNMENT (dr);
2203	Else, compute address misalignment in bytes:
2204	addr_mis = addr & (target_align - 1)
2205
2206	prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
2207
2208	(elem_size = element type size; an element is the scalar element whose type
2209	is the inner type of the vectype)
2210
2211	The computations will be emitted at the end of BB. We also compute and
2212	store upper bound (included) of the result in BOUND.
2213
2214	When the step of the data-ref in the loop is not 1 (as in interleaved data
2215	and SLP), the number of iterations of the prolog must be divided by the step
2216	(which is equal to the size of interleaved group).
2217
2218	The above formulas assume that VF == number of elements in the vector. This
2219	may not hold when there are multiple-types in the loop.
2220	In this case, for some data-references in the loop the VF does not represent
2221	the number of elements that fit in the vector. Therefore, instead of VF we
2222	use TYPE_VECTOR_SUBPARTS. /*
2223
2224	static tree
2225	vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
2226	basic_block bb, int *bound)
2227	{
2228	dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2229	tree var;
2230	tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2231	gimple_seq stmts = NULL, new_stmts = NULL;
2232	tree iters, iters_name;
2233	stmt_vec_info stmt_info = dr_info->stmt;
2234	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2235	poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2236
2237	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > `0`)
2238	{
2239	int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2240
2241	if (dump_enabled_p ())
2242	dump_printf_loc (MSG_NOTE, vect_location,
2243	"known peeling = %d.\n", npeel);
2244
2245	iters = build_int_cst (niters_type, npeel);
2246	*bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2247	}
2248	else
2249	{
2250	tree misalign_in_elems = get_misalign_in_elems (seq: &stmts, loop_vinfo);
2251	tree type = TREE_TYPE (misalign_in_elems);
2252	HOST_WIDE_INT elem_size
2253	= int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2254	/ We only do prolog peeling if the target alignment is known at compile*
2255	time. /*
2256	poly_uint64 align_in_elems =
2257	exact_div (a: target_align, b: elem_size);
2258	tree align_in_elems_minus_1 =
2259	build_int_cst (type, align_in_elems - `1`);
2260	tree align_in_elems_tree = build_int_cst (type, align_in_elems);
2261
2262	/ Create: (niters_type) ((align_in_elems - misalign_in_elems)*
2263	& (align_in_elems - 1)). /*
2264	bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2265	size_zero_node) < `0`;
2266	if (negative)
2267	iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
2268	align_in_elems_tree);
2269	else
2270	iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
2271	misalign_in_elems);
2272	iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
2273	iters = fold_convert (niters_type, iters);
2274	unsigned HOST_WIDE_INT align_in_elems_c;
2275	if (align_in_elems.is_constant (const_value: &align_in_elems_c))
2276	*bound = align_in_elems_c - `1`;
2277	else
2278	*bound = -`1`;
2279	}
2280
2281	if (dump_enabled_p ())
2282	dump_printf_loc (MSG_NOTE, vect_location,
2283	"niters for prolog loop: %T\n", iters);
2284
2285	var = create_tmp_var (niters_type, "prolog_loop_niters");
2286	iters_name = force_gimple_operand (iters, &new_stmts, false, var);
2287
2288	if (new_stmts)
2289	gimple_seq_add_seq (&stmts, new_stmts);
2290	if (stmts)
2291	{
2292	gcc_assert (single_succ_p (bb));
2293	gimple_stmt_iterator gsi = gsi_last_bb (bb);
2294	if (gsi_end_p (i: gsi))
2295	gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2296	else
2297	gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2298	}
2299	return iters_name;
2300	}
2301
2302
2303	/ Function vect_update_init_of_dr*
2304
2305	If CODE is PLUS, the vector loop starts NITERS iterations after the
2306	scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
2307	iterations before the scalar one (using masking to skip inactive
2308	elements). This function updates the information recorded in DR to
2309	account for the difference. Specifically, it updates the OFFSET
2310	field of DR_INFO. /*
2311
2312	static void
2313	vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
2314	{
2315	struct data_reference *dr = dr_info->dr;
2316	tree offset = dr_info->offset;
2317	if (!offset)
2318	offset = build_zero_cst (sizetype);
2319
2320	niters = fold_build2 (MULT_EXPR, sizetype,
2321	fold_convert (sizetype, niters),
2322	fold_convert (sizetype, DR_STEP (dr)));
2323	offset = fold_build2 (code, sizetype,
2324	fold_convert (sizetype, offset), niters);
2325	dr_info->offset = offset;
2326	}
2327
2328
2329	/ Function vect_update_inits_of_drs*
2330
2331	Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
2332	CODE and NITERS are as for vect_update_inits_of_dr. /*
2333
2334	void
2335	vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
2336	tree_code code)
2337	{
2338	unsigned int i;
2339	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2340	struct data_reference *dr;
2341
2342	DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
2343
2344	/ Adjust niters to sizetype. We used to insert the stmts on loop preheader*
2345	here, but since we might use these niters to update the epilogues niters
2346	and data references we can't insert them here as this definition might not
2347	always dominate its uses. /*
2348	if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
2349	niters = fold_convert (sizetype, niters);
2350
2351	FOR_EACH_VEC_ELT (datarefs, i, dr)
2352	{
2353	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2354	if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
2355	&& !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
2356	vect_update_init_of_dr (dr_info, niters, code);
2357	}
2358	}
2359
2360	/ For the information recorded in LOOP_VINFO prepare the loop for peeling*
2361	by masking. This involves calculating the number of iterations to
2362	be peeled and then aligning all memory references appropriately. /*
2363
2364	void
2365	vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
2366	{
2367	tree misalign_in_elems;
2368	tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2369
2370	gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
2371
2372	/ From the information recorded in LOOP_VINFO get the number of iterations*
2373	that need to be skipped via masking. /*
2374	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > `0`)
2375	{
2376	poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2377	- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
2378	misalign_in_elems = build_int_cst (type, misalign);
2379	}
2380	else
2381	{
2382	gimple_seq seq1 = NULL, seq2 = NULL;
2383	misalign_in_elems = get_misalign_in_elems (seq: &seq1, loop_vinfo);
2384	misalign_in_elems = fold_convert (type, misalign_in_elems);
2385	misalign_in_elems = force_gimple_operand (misalign_in_elems,
2386	&seq2, true, NULL_TREE);
2387	gimple_seq_add_seq (&seq1, seq2);
2388	if (seq1)
2389	{
2390	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2391	basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
2392	gcc_assert (!new_bb);
2393	}
2394	}
2395
2396	if (dump_enabled_p ())
2397	dump_printf_loc (MSG_NOTE, vect_location,
2398	"misalignment for fully-masked loop: %T\n",
2399	misalign_in_elems);
2400
2401	LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
2402
2403	vect_update_inits_of_drs (loop_vinfo, niters: misalign_in_elems, code: MINUS_EXPR);
2404	}
2405
2406	/ This function builds ni_name = number of iterations. Statements*
2407	are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
2408	it to TRUE if new ssa_var is generated. /*
2409
2410	tree
2411	vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
2412	{
2413	tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2414	if (TREE_CODE (ni) == INTEGER_CST)
2415	return ni;
2416	else
2417	{
2418	tree ni_name, var;
2419	gimple_seq stmts = NULL;
2420	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2421
2422	var = create_tmp_var (TREE_TYPE (ni), "niters");
2423	ni_name = force_gimple_operand (ni, &stmts, false, var);
2424	if (stmts)
2425	{
2426	gsi_insert_seq_on_edge_immediate (pe, stmts);
2427	if (new_var_p != NULL)
2428	new_var_p = true*;
2429	}
2430
2431	return ni_name;
2432	}
2433	}
2434
2435	/ Calculate the number of iterations above which vectorized loop will be*
2436	preferred than scalar loop. NITERS_PROLOG is the number of iterations
2437	of prolog loop. If it's integer const, the integer number is also passed
2438	in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
2439	number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
2440	value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
2441	threshold below which the scalar (rather than vectorized) loop will be
2442	executed. This function stores the upper bound (inclusive) of the result
2443	in BOUND_SCALAR. /*
2444
2445	static tree
2446	vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
2447	int bound_prolog, poly_int64 bound_epilog, int th,
2448	poly_uint64 *bound_scalar,
2449	bool check_profitability)
2450	{
2451	tree type = TREE_TYPE (niters_prolog);
2452	tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
2453	build_int_cst (type, bound_epilog));
2454
2455	*bound_scalar = bound_prolog + bound_epilog;
2456	if (check_profitability)
2457	{
2458	/ TH indicates the minimum niters of vectorized loop, while we*
2459	compute the maximum niters of scalar loop. /*
2460	th--;
2461	/ Peeling for constant times. /
2462	if (int_niters_prolog >= `0`)
2463	{
2464	*bound_scalar = upper_bound (a: int_niters_prolog + bound_epilog, b: th);
2465	return build_int_cst (type, *bound_scalar);
2466	}
2467	/ Peeling an unknown number of times. Note that both BOUND_PROLOG*
2468	and BOUND_EPILOG are inclusive upper bounds. /*
2469	if (known_ge (th, bound_prolog + bound_epilog))
2470	{
2471	*bound_scalar = th;
2472	return build_int_cst (type, th);
2473	}
2474	/ Need to do runtime comparison. /
2475	else if (maybe_gt (th, bound_epilog))
2476	{
2477	bound_scalar = upper_bound (a: bound_scalar, b: th);
2478	return fold_build2 (MAX_EXPR, type,
2479	build_int_cst (type, th), niters);
2480	}
2481	}
2482	return niters;
2483	}
2484
2485	/ NITERS is the number of times that the original scalar loop executes*
2486	after peeling. Work out the maximum number of iterations N that can
2487	be handled by the vectorized form of the loop and then either:
2488
2489	a) set STEP_VECTOR_PTR to the vectorization factor and generate:*
2490
2491	niters_vector = N
2492
2493	b) set STEP_VECTOR_PTR to one and generate:*
2494
2495	niters_vector = N / vf
2496
2497	In both cases, store niters_vector in NITERS_VECTOR_PTR and add*
2498	any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
2499	is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). /*
2500
2501	void
2502	vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
2503	tree niters_vector_ptr, tree step_vector_ptr,
2504	bool niters_no_overflow)
2505	{
2506	tree ni_minus_gap, var;
2507	tree niters_vector, step_vector, type = TREE_TYPE (niters);
2508	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2509	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2510	tree log_vf = NULL_TREE;
2511
2512	/ If epilogue loop is required because of data accesses with gaps, we*
2513	subtract one iteration from the total number of iterations here for
2514	correct calculation of RATIO. /*
2515	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2516	{
2517	ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
2518	build_one_cst (type));
2519	if (!is_gimple_val (ni_minus_gap))
2520	{
2521	var = create_tmp_var (type, "ni_gap");
2522	gimple *stmts = NULL;
2523	ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
2524	true, var);
2525	gsi_insert_seq_on_edge_immediate (pe, stmts);
2526	}
2527	}
2528	else
2529	ni_minus_gap = niters;
2530
2531	/ To silence some unexpected warnings, simply initialize to 0. /
2532	unsigned HOST_WIDE_INT const_vf = `0`;
2533	if (vf.is_constant (const_value: &const_vf)
2534	&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2535	{
2536	/ Create: niters >> log2(vf) /
2537	/ If it's known that niters == number of latch executions + 1 doesn't*
2538	overflow, we can generate niters >> log2(vf); otherwise we generate
2539	(niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
2540	will be at least one. /*
2541	log_vf = build_int_cst (type, exact_log2 (x: const_vf));
2542	if (niters_no_overflow)
2543	niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
2544	else
2545	niters_vector
2546	= fold_build2 (PLUS_EXPR, type,
2547	fold_build2 (RSHIFT_EXPR, type,
2548	fold_build2 (MINUS_EXPR, type,
2549	ni_minus_gap,
2550	build_int_cst (type, vf)),
2551	log_vf),
2552	build_int_cst (type, `1`));
2553	step_vector = build_one_cst (type);
2554	}
2555	else
2556	{
2557	niters_vector = ni_minus_gap;
2558	step_vector = build_int_cst (type, vf);
2559	}
2560
2561	if (!is_gimple_val (niters_vector))
2562	{
2563	var = create_tmp_var (type, "bnd");
2564	gimple_seq stmts = NULL;
2565	niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
2566	gsi_insert_seq_on_edge_immediate (pe, stmts);
2567	/ Peeling algorithm guarantees that vector loop bound is at least ONE,*
2568	we set range information to make niters analyzer's life easier.
2569	Note the number of latch iteration value can be TYPE_MAX_VALUE so
2570	we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf. /*
2571	if (stmts != NULL && log_vf)
2572	{
2573	if (niters_no_overflow)
2574	{
2575	value_range vr (type,
2576	wi::one (TYPE_PRECISION (type)),
2577	wi::rshift (x: wi::max_value (TYPE_PRECISION (type),
2578	TYPE_SIGN (type)),
2579	y: exact_log2 (x: const_vf),
2580	TYPE_SIGN (type)));
2581	set_range_info (niters_vector, vr);
2582	}
2583	/ For VF == 1 the vector IV might also overflow so we cannot*
2584	assert a minimum value of 1. /*
2585	else if (const_vf > `1`)
2586	{
2587	value_range vr (type,
2588	wi::one (TYPE_PRECISION (type)),
2589	wi::rshift (x: wi::max_value (TYPE_PRECISION (type),
2590	TYPE_SIGN (type))
2591	- (const_vf - `1`),
2592	y: exact_log2 (x: const_vf), TYPE_SIGN (type))
2593	+ `1`);
2594	set_range_info (niters_vector, vr);
2595	}
2596	}
2597	}
2598	*niters_vector_ptr = niters_vector;
2599	*step_vector_ptr = step_vector;
2600
2601	return;
2602	}
2603
2604	/ Given NITERS_VECTOR which is the number of iterations for vectorized*
2605	loop specified by LOOP_VINFO after vectorization, compute the number
2606	of iterations before vectorization (niters_vector vf) and store it*
2607	to NITERS_VECTOR_MULT_VF_PTR. /*
2608
2609	static void
2610	vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
2611	tree niters_vector,
2612	tree *niters_vector_mult_vf_ptr)
2613	{
2614	/ We should be using a step_vector of VF if VF is variable. /
2615	int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
2616	tree type = TREE_TYPE (niters_vector);
2617	tree log_vf = build_int_cst (type, exact_log2 (x: vf));
2618	basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
2619
2620	gcc_assert (niters_vector_mult_vf_ptr != NULL);
2621	tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2622	niters_vector, log_vf);
2623	if (!is_gimple_val (niters_vector_mult_vf))
2624	{
2625	tree var = create_tmp_var (type, "niters_vector_mult_vf");
2626	gimple_seq stmts = NULL;
2627	niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2628	&stmts, true, var);
2629	gimple_stmt_iterator gsi = gsi_start_bb (bb: exit_bb);
2630	gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2631	}
2632	*niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2633	}
2634
2635	/ Function slpeel_add_loop_guard adds guard skipping from the beginning*
2636	of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2637	are two pred edges of the merge point before UPDATE_LOOP. The two loops
2638	appear like below:
2639
2640	guard_bb:
2641	if (cond)
2642	goto merge_bb;
2643	else
2644	goto skip_loop;
2645
2646	skip_loop:
2647	header_a:
2648	i_1 = PHI<i_0, i_2>;
2649	...
2650	i_2 = i_1 + 1;
2651	if (cond_a)
2652	goto latch_a;
2653	else
2654	goto exit_a;
2655	latch_a:
2656	goto header_a;
2657
2658	exit_a:
2659	i_5 = PHI<i_2>;
2660
2661	merge_bb:
2662	;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2663
2664	update_loop:
2665	header_b:
2666	i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
2667	...
2668	i_4 = i_3 + 1;
2669	if (cond_b)
2670	goto latch_b;
2671	else
2672	goto exit_bb;
2673	latch_b:
2674	goto header_b;
2675
2676	exit_bb:
2677
2678	This function creates PHI nodes at merge_bb and replaces the use of i_5
2679	in the update_loop's PHI node with the result of new PHI result. /*
2680
2681	static void
2682	slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
2683	class loop *update_loop,
2684	edge guard_edge, edge merge_edge)
2685	{
2686	location_t merge_loc, guard_loc;
2687	edge orig_e = loop_preheader_edge (skip_loop);
2688	edge update_e = loop_preheader_edge (update_loop);
2689	gphi_iterator gsi_orig, gsi_update;
2690
2691	for ((gsi_orig = gsi_start_phis (skip_loop->header),
2692	gsi_update = gsi_start_phis (update_loop->header));
2693	!gsi_end_p (i: gsi_orig) && !gsi_end_p (i: gsi_update);
2694	gsi_next (i: &gsi_orig), gsi_next (i: &gsi_update))
2695	{
2696	gphi *orig_phi = gsi_orig.phi ();
2697	gphi *update_phi = gsi_update.phi ();
2698
2699	/ Generate new phi node at merge bb of the guard. /
2700	tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2701	gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2702
2703	/ Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the*
2704	args in NEW_PHI for these edges. /*
2705	tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2706	tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2707	merge_loc = gimple_phi_arg_location_from_edge (phi: update_phi, e: update_e);
2708	guard_loc = gimple_phi_arg_location_from_edge (phi: orig_phi, e: orig_e);
2709	add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2710	add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2711
2712	/ Update phi in UPDATE_PHI. /
2713	adjust_phi_and_debug_stmts (update_phi, e: update_e, new_def: new_res);
2714	}
2715	}
2716
2717	/ LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.*
2718	Return a value that equals:
2719
2720	- MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
2721	- SKIP_VALUE when the main loop is skipped. /*
2722
2723	tree
2724	vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
2725	tree skip_value)
2726	{
2727	gcc_assert (loop_vinfo->main_loop_edge);
2728
2729	tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
2730	basic_block bb = loop_vinfo->main_loop_edge->dest;
2731	gphi *new_phi = create_phi_node (phi_result, bb);
2732	add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
2733	UNKNOWN_LOCATION);
2734	add_phi_arg (new_phi, skip_value,
2735	loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
2736	return phi_result;
2737	}
2738
2739	/ Function vect_do_peeling.*
2740
2741	Input:
2742	- LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2743
2744	preheader:
2745	LOOP:
2746	header_bb:
2747	loop_body
2748	if (exit_loop_cond) goto exit_bb
2749	else goto header_bb
2750	exit_bb:
2751
2752	- NITERS: The number of iterations of the loop.
2753	- NITERSM1: The number of iterations of the loop's latch.
2754	- NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2755	- TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2756	CHECK_PROFITABILITY is true.
2757	Output:
2758	- NITERS_VECTOR and STEP_VECTOR describe how the main loop should
2759	iterate after vectorization; see vect_set_loop_condition for details.
2760	- NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that*
2761	should be set to the number of scalar iterations handled by the
2762	vector loop. The SSA name is only used on exit from the loop.
2763
2764	This function peels prolog and epilog from the loop, adds guards skipping
2765	PROLOG and EPILOG for various conditions. As a result, the changed CFG
2766	would look like:
2767
2768	guard_bb_1:
2769	if (prefer_scalar_loop) goto merge_bb_1
2770	else goto guard_bb_2
2771
2772	guard_bb_2:
2773	if (skip_prolog) goto merge_bb_2
2774	else goto prolog_preheader
2775
2776	prolog_preheader:
2777	PROLOG:
2778	prolog_header_bb:
2779	prolog_body
2780	if (exit_prolog_cond) goto prolog_exit_bb
2781	else goto prolog_header_bb
2782	prolog_exit_bb:
2783
2784	merge_bb_2:
2785
2786	vector_preheader:
2787	VECTOR LOOP:
2788	vector_header_bb:
2789	vector_body
2790	if (exit_vector_cond) goto vector_exit_bb
2791	else goto vector_header_bb
2792	vector_exit_bb:
2793
2794	guard_bb_3:
2795	if (skip_epilog) goto merge_bb_3
2796	else goto epilog_preheader
2797
2798	merge_bb_1:
2799
2800	epilog_preheader:
2801	EPILOG:
2802	epilog_header_bb:
2803	epilog_body
2804	if (exit_epilog_cond) goto merge_bb_3
2805	else goto epilog_header_bb
2806
2807	merge_bb_3:
2808
2809	Note this function peels prolog and epilog only if it's necessary,
2810	as well as guards.
2811	This function returns the epilogue loop if a decision was made to vectorize
2812	it, otherwise NULL.
2813
2814	The analysis resulting in this epilogue loop's loop_vec_info was performed
2815	in the same vect_analyze_loop call as the main loop's. At that time
2816	vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
2817	vectorization factors than the main loop. This list is stored in the main
2818	loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
2819	vectorize the epilogue loop for a lower vectorization factor, the
2820	loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
2821	updated and linked to the epilogue loop. This is later used to vectorize
2822	the epilogue. The reason the loop_vec_info needs updating is that it was
2823	constructed based on the original main loop, and the epilogue loop is a
2824	copy of this loop, so all links pointing to statements in the original loop
2825	need updating. Furthermore, these loop_vec_infos share the
2826	data_reference's records, which will also need to be updated.
2827
2828	TODO: Guard for prefer_scalar_loop should be emitted along with
2829	versioning conditions if loop versioning is needed. /*
2830
2831
2832	class loop *
2833	vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
2834	tree niters_vector, tree step_vector,
2835	tree niters_vector_mult_vf_var, int* th,
2836	bool check_profitability, bool niters_no_overflow,
2837	tree *advance)
2838	{
2839	edge e, guard_e;
2840	tree type = TREE_TYPE (niters), guard_cond;
2841	basic_block guard_bb, guard_to;
2842	profile_probability prob_prolog, prob_vector, prob_epilog;
2843	int estimated_vf;
2844	int prolog_peeling = `0`;
2845	bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > `0`;
2846	/ We currently do not support prolog peeling if the target alignment is not*
2847	known at compile time. 'vect_gen_prolog_loop_niters' depends on the
2848	target alignment being constant. /*
2849	dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2850	if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
2851	return NULL;
2852
2853	if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2854	prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2855
2856	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2857	poly_uint64 bound_epilog = `0`;
2858	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2859	&& LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2860	bound_epilog += vf - `1`;
2861	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2862	bound_epilog += `1`;
2863	bool epilog_peeling = maybe_ne (a: bound_epilog, b: `0U`);
2864	poly_uint64 bound_scalar = bound_epilog;
2865
2866	if (!prolog_peeling && !epilog_peeling)
2867	return NULL;
2868
2869	/ Before doing any peeling make sure to reset debug binds outside of*
2870	the loop refering to defs not in LC SSA. /*
2871	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2872	for (unsigned i = `0`; i < loop->num_nodes; ++i)
2873	{
2874	basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2875	imm_use_iterator ui;
2876	gimple *use_stmt;
2877	for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
2878	gsi_next (i: &gsi))
2879	{
2880	FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
2881	if (gimple_debug_bind_p (s: use_stmt)
2882	&& loop != gimple_bb (g: use_stmt)->loop_father
2883	&& !flow_loop_nested_p (loop,
2884	gimple_bb (g: use_stmt)->loop_father))
2885	{
2886	gimple_debug_bind_reset_value (dbg: use_stmt);
2887	update_stmt (s: use_stmt);
2888	}
2889	}
2890	for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
2891	gsi_next (i: &gsi))
2892	{
2893	ssa_op_iter op_iter;
2894	def_operand_p def_p;
2895	FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
2896	FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
2897	if (gimple_debug_bind_p (s: use_stmt)
2898	&& loop != gimple_bb (g: use_stmt)->loop_father
2899	&& !flow_loop_nested_p (loop,
2900	gimple_bb (g: use_stmt)->loop_father))
2901	{
2902	gimple_debug_bind_reset_value (dbg: use_stmt);
2903	update_stmt (s: use_stmt);
2904	}
2905	}
2906	}
2907
2908	prob_vector = profile_probability::guessed_always ().apply_scale (num: `9`, den: `10`);
2909	estimated_vf = vect_vf_for_cost (loop_vinfo);
2910	if (estimated_vf == `2`)
2911	estimated_vf = `3`;
2912	prob_prolog = prob_epilog = profile_probability::guessed_always ()
2913	.apply_scale (num: estimated_vf - `1`, den: estimated_vf);
2914
2915	class loop prolog, epilog = NULL;
2916	class loop *first_loop = loop;
2917	bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
2918
2919	/ SSA form needs to be up-to-date since we are going to manually*
2920	update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
2921	update SSA state after that, so we have to make sure to not lose any
2922	pending update needs. /*
2923	gcc_assert (!need_ssa_update_p (cfun));
2924
2925	/ If we're vectorizing an epilogue loop, we have ensured that the*
2926	virtual operand is in SSA form throughout the vectorized main loop.
2927	Normally it is possible to trace the updated
2928	vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
2929	back to scalar-stmt vuses, meaning that the effect of the SSA update
2930	remains local to the main loop. However, there are rare cases in
2931	which the vectorized loop should have vdefs even when the original scalar
2932	loop didn't. For example, vectorizing a load with IFN_LOAD_LANES
2933	introduces clobbers of the temporary vector array, which in turn
2934	needs new vdefs. If the scalar loop doesn't write to memory, these
2935	new vdefs will be the only ones in the vector loop.
2936	We are currently defering updating virtual SSA form and creating
2937	of a virtual PHI for this case so we do not have to make sure the
2938	newly introduced virtual def is in LCSSA form. /*
2939
2940	if (MAY_HAVE_DEBUG_BIND_STMTS)
2941	{
2942	gcc_assert (!adjust_vec.exists ());
2943	adjust_vec.create (nelems: `32`);
2944	}
2945	initialize_original_copy_tables ();
2946
2947	/ Record the anchor bb at which the guard should be placed if the scalar*
2948	loop might be preferred. /*
2949	basic_block anchor = loop_preheader_edge (loop)->src;
2950
2951	/ Generate the number of iterations for the prolog loop. We do this here*
2952	so that we can also get the upper bound on the number of iterations. /*
2953	tree niters_prolog;
2954	int bound_prolog = `0`;
2955	if (prolog_peeling)
2956	{
2957	niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, bb: anchor,
2958	bound: &bound_prolog);
2959	/ If algonment peeling is known, we will always execute prolog. /
2960	if (TREE_CODE (niters_prolog) == INTEGER_CST)
2961	prob_prolog = profile_probability::always ();
2962	}
2963	else
2964	niters_prolog = build_int_cst (type, `0`);
2965
2966	loop_vec_info epilogue_vinfo = NULL;
2967	if (vect_epilogues)
2968	{
2969	epilogue_vinfo = loop_vinfo->epilogue_vinfos [`0`];
2970	loop_vinfo->epilogue_vinfos.ordered_remove (ix: `0`);
2971	}
2972
2973	tree niters_vector_mult_vf = NULL_TREE;
2974	/ Saving NITERs before the loop, as this may be changed by prologue. /
2975	tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
2976	edge update_e = NULL, skip_e = NULL;
2977	unsigned int lowest_vf = constant_lower_bound (a: vf);
2978	/ Prolog loop may be skipped. /
2979	bool skip_prolog = (prolog_peeling != `0`);
2980	/ Skip this loop to epilog when there are not enough iterations to enter this*
2981	vectorized loop. If true we should perform runtime checks on the NITERS
2982	to check whether we should skip the current vectorized loop. If we know
2983	the number of scalar iterations we may choose to add a runtime check if
2984	this number "maybe" smaller than the number of iterations required
2985	when we know the number of scalar iterations may potentially
2986	be smaller than the number of iterations required to enter this loop, for
2987	this we use the upper bounds on the prolog and epilog peeling. When we
2988	don't know the number of iterations and don't require versioning it is
2989	because we have asserted that there are enough scalar iterations to enter
2990	the main loop, so this skip is not necessary. When we are versioning then
2991	we only add such a skip if we have chosen to vectorize the epilogue. /*
2992	bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2993	? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
2994	b: bound_prolog + bound_epilog)
2995	: (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2996	\|\| vect_epilogues));
2997	/ Epilog loop must be executed if the number of iterations for epilog*
2998	loop is known at compile time, otherwise we need to add a check at
2999	the end of vector loop and skip to the end of epilog loop. /*
3000	bool skip_epilog = (prolog_peeling < `0`
3001	\|\| !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3002	\|\| !vf.is_constant ());
3003	/ PEELING_FOR_GAPS is special because epilog loop must be executed. /
3004	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3005	skip_epilog = false;
3006
3007	class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3008	auto_vec<profile_count> original_counts;
3009	basic_block *original_bbs = NULL;
3010
3011	if (skip_vector)
3012	{
3013	split_edge (loop_preheader_edge (loop));
3014
3015	if (epilog_peeling && (vect_epilogues \|\| scalar_loop == NULL))
3016	{
3017	original_bbs = get_loop_body (loop);
3018	for (unsigned int i = `0`; i < loop->num_nodes; i++)
3019	original_counts.safe_push(obj: original_bbs[i]->count);
3020	}
3021
3022	/ Due to the order in which we peel prolog and epilog, we first*
3023	propagate probability to the whole loop. The purpose is to
3024	avoid adjusting probabilities of both prolog and vector loops
3025	separately. Note in this case, the probability of epilog loop
3026	needs to be scaled back later. /*
3027	basic_block bb_before_loop = loop_preheader_edge (loop)->src;
3028	if (prob_vector.initialized_p ())
3029	{
3030	scale_bbs_frequencies (&bb_before_loop, `1`, prob_vector);
3031	scale_loop_profile (loop, prob_vector, -`1`);
3032	}
3033	}
3034
3035	if (vect_epilogues)
3036	{
3037	/ Make sure to set the epilogue's epilogue scalar loop, such that we can*
3038	use the original scalar loop as remaining epilogue if necessary. /*
3039	LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
3040	= LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3041	LOOP_VINFO_SCALAR_IV_EXIT (epilogue_vinfo)
3042	= LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3043	}
3044
3045	if (prolog_peeling)
3046	{
3047	e = loop_preheader_edge (loop);
3048	edge exit_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
3049	gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e));
3050
3051	/ Peel prolog and put it on preheader edge of loop. /
3052	edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3053	edge prolog_e = NULL;
3054	prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loop_exit: exit_e,
3055	scalar_loop, scalar_exit: scalar_e,
3056	e, new_e: &prolog_e);
3057	gcc_assert (prolog);
3058	prolog->force_vectorize = false;
3059
3060	first_loop = prolog;
3061	reset_original_copy_tables ();
3062
3063	/ Update the number of iterations for prolog loop. /
3064	tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
3065	vect_set_loop_condition (loop: prolog, loop_e: prolog_e, NULL, niters: niters_prolog,
3066	step: step_prolog, NULL_TREE, niters_maybe_zero: false);
3067
3068	/ Skip the prolog loop. /
3069	if (skip_prolog)
3070	{
3071	guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3072	niters_prolog, build_int_cst (type, `0`));
3073	guard_bb = loop_preheader_edge (prolog)->src;
3074	basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
3075	guard_to = split_edge (loop_preheader_edge (loop));
3076	guard_e = slpeel_add_loop_guard (guard_bb, cond: guard_cond,
3077	guard_to, dom_bb: guard_bb,
3078	probability: prob_prolog.invert (),
3079	irreducible_p: irred_flag);
3080	e = EDGE_PRED (guard_to, `0`);
3081	e = (e != guard_e ? e : EDGE_PRED (guard_to, `1`));
3082	slpeel_update_phi_nodes_for_guard1 (skip_loop: prolog, update_loop: loop, guard_edge: guard_e, merge_edge: e);
3083
3084	scale_bbs_frequencies (&bb_after_prolog, `1`, prob_prolog);
3085	scale_loop_profile (prolog, prob_prolog, bound_prolog - `1`);
3086	}
3087
3088	/ Update init address of DRs. /
3089	vect_update_inits_of_drs (loop_vinfo, niters: niters_prolog, code: PLUS_EXPR);
3090	/ Update niters for vector loop. /
3091	LOOP_VINFO_NITERS (loop_vinfo)
3092	= fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
3093	LOOP_VINFO_NITERSM1 (loop_vinfo)
3094	= fold_build2 (MINUS_EXPR, type,
3095	LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3096	bool new_var_p = false;
3097	niters = vect_build_loop_niters (loop_vinfo, new_var_p: &new_var_p);
3098	/ It's guaranteed that vector loop bound before vectorization is at*
3099	least VF, so set range information for newly generated var. /*
3100	if (new_var_p)
3101	{
3102	value_range vr (type,
3103	wi::to_wide (t: build_int_cst (type, lowest_vf)),
3104	wi::to_wide (TYPE_MAX_VALUE (type)));
3105	set_range_info (niters, vr);
3106	}
3107
3108	/ Prolog iterates at most bound_prolog times, latch iterates at*
3109	most bound_prolog - 1 times. /*
3110	record_niter_bound (prolog, bound_prolog - `1`, false, true);
3111	delete_update_ssa ();
3112	adjust_vec_debug_stmts ();
3113	scev_reset ();
3114	}
3115	basic_block bb_before_epilog = NULL;
3116
3117	if (epilog_peeling)
3118	{
3119	e = LOOP_VINFO_IV_EXIT (loop_vinfo);
3120	gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
3121
3122	/ Peel epilog and put it on exit edge of loop. If we are vectorizing*
3123	said epilog then we should use a copy of the main loop as a starting
3124	point. This loop may have already had some preliminary transformations
3125	to allow for more optimal vectorization, for example if-conversion.
3126	If we are not vectorizing the epilog then we should use the scalar loop
3127	as the transformations mentioned above make less or no sense when not
3128	vectorizing. /*
3129	edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3130	epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
3131	edge epilog_e = vect_epilogues ? e : scalar_e;
3132	edge new_epilog_e = NULL;
3133	epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loop_exit: e, scalar_loop: epilog,
3134	scalar_exit: epilog_e, e,
3135	new_e: &new_epilog_e);
3136	LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo) = new_epilog_e;
3137	gcc_assert (epilog);
3138	epilog->force_vectorize = false;
3139	bb_before_epilog = loop_preheader_edge (epilog)->src;
3140
3141	/ Scalar version loop may be preferred. In this case, add guard*
3142	and skip to epilog. Note this only happens when the number of
3143	iterations of loop is unknown at compile time, otherwise this
3144	won't be vectorized. /*
3145	if (skip_vector)
3146	{
3147	/ Additional epilogue iteration is peeled if gap exists. /
3148	tree t = vect_gen_scalar_loop_niters (niters_prolog, int_niters_prolog: prolog_peeling,
3149	bound_prolog, bound_epilog,
3150	th, bound_scalar: &bound_scalar,
3151	check_profitability);
3152	/ Build guard against NITERSM1 since NITERS may overflow. /
3153	guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
3154	guard_bb = anchor;
3155	guard_to = split_edge (loop_preheader_edge (epilog));
3156	guard_e = slpeel_add_loop_guard (guard_bb, cond: guard_cond,
3157	guard_to, dom_bb: guard_bb,
3158	probability: prob_vector.invert (),
3159	irreducible_p: irred_flag);
3160	skip_e = guard_e;
3161	e = EDGE_PRED (guard_to, `0`);
3162	e = (e != guard_e ? e : EDGE_PRED (guard_to, `1`));
3163	slpeel_update_phi_nodes_for_guard1 (skip_loop: first_loop, update_loop: epilog, guard_edge: guard_e, merge_edge: e);
3164
3165	/ Simply propagate profile info from guard_bb to guard_to which is*
3166	a merge point of control flow. /*
3167	profile_count old_count = guard_to->count;
3168	guard_to->count = guard_bb->count;
3169
3170	/ Restore the counts of the epilog loop if we didn't use the scalar loop. /
3171	if (vect_epilogues \|\| scalar_loop == NULL)
3172	{
3173	gcc_assert(epilog->num_nodes == loop->num_nodes);
3174	basic_block *bbs = get_loop_body (epilog);
3175	for (unsigned int i = `0`; i < epilog->num_nodes; i++)
3176	{
3177	gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
3178	bbs[i]->count = original_counts [i];
3179	}
3180	free (ptr: bbs);
3181	free (ptr: original_bbs);
3182	}
3183	else if (old_count.nonzero_p ())
3184	scale_loop_profile (epilog, guard_to->count.probability_in (overall: old_count), -`1`);
3185
3186	/ Only need to handle basic block before epilog loop if it's not*
3187	the guard_bb, which is the case when skip_vector is true. /*
3188	if (guard_bb != bb_before_epilog)
3189	bb_before_epilog->count = single_pred_edge (bb: bb_before_epilog)->count ();
3190	bb_before_epilog = loop_preheader_edge (epilog)->src;
3191	}
3192	/ If loop is peeled for non-zero constant times, now niters refers to*
3193	orig_niters - prolog_peeling, it won't overflow even the orig_niters
3194	overflows. /*
3195	niters_no_overflow \|= (prolog_peeling > `0`);
3196	vect_gen_vector_loop_niters (loop_vinfo, niters,
3197	niters_vector_ptr: niters_vector, step_vector_ptr: step_vector,
3198	niters_no_overflow);
3199	if (!integer_onep (*step_vector))
3200	{
3201	/ On exit from the loop we will have an easy way of calcalating*
3202	NITERS_VECTOR / STEP STEP. Install a dummy definition*
3203	until then. /*
3204	niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
3205	SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
3206	*niters_vector_mult_vf_var = niters_vector_mult_vf;
3207	}
3208	else
3209	vect_gen_vector_loop_niters_mult_vf (loop_vinfo, niters_vector: *niters_vector,
3210	niters_vector_mult_vf_ptr: &niters_vector_mult_vf);
3211	/ Update IVs of original loop as if they were advanced by*
3212	niters_vector_mult_vf steps. /*
3213	gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
3214	update_e = skip_vector ? e : loop_preheader_edge (epilog);
3215	vect_update_ivs_after_vectorizer (loop_vinfo, niters: niters_vector_mult_vf,
3216	update_e);
3217
3218	if (skip_epilog)
3219	{
3220	guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3221	niters, niters_vector_mult_vf);
3222	guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
3223	edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
3224	guard_to = epilog_e->dest;
3225	guard_e = slpeel_add_loop_guard (guard_bb, cond: guard_cond, guard_to,
3226	dom_bb: skip_vector ? anchor : guard_bb,
3227	probability: prob_epilog.invert (),
3228	irreducible_p: irred_flag);
3229	if (vect_epilogues)
3230	epilogue_vinfo->skip_this_loop_edge = guard_e;
3231	edge main_iv = LOOP_VINFO_IV_EXIT (loop_vinfo);
3232	gphi_iterator gsi2 = gsi_start_phis (main_iv->dest);
3233	for (gphi_iterator gsi = gsi_start_phis (guard_to);
3234	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
3235	{
3236	/ We are expecting all of the PHIs we have on epilog_e*
3237	to be also on the main loop exit. But sometimes
3238	a stray virtual definition can appear at epilog_e
3239	which we can then take as the same on all exits,
3240	we've removed the LC SSA PHI on the main exit before
3241	so we wouldn't need to create a loop PHI for it. /*
3242	if (virtual_operand_p (op: gimple_phi_result (gs: *gsi))
3243	&& (gsi_end_p (i: gsi2)
3244	\|\| !virtual_operand_p (op: gimple_phi_result (gs: *gsi2))))
3245	add_phi_arg (*gsi,
3246	gimple_phi_arg_def_from_edge (gs: *gsi, e: epilog_e),
3247	guard_e, UNKNOWN_LOCATION);
3248	else
3249	{
3250	add_phi_arg (gsi, gimple_phi_result (gs: gsi2), guard_e,
3251	UNKNOWN_LOCATION);
3252	gsi_next (i: &gsi2);
3253	}
3254	}
3255
3256	/ Only need to handle basic block before epilog loop if it's not*
3257	the guard_bb, which is the case when skip_vector is true. /*
3258	if (guard_bb != bb_before_epilog)
3259	{
3260	prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
3261
3262	scale_bbs_frequencies (&bb_before_epilog, `1`, prob_epilog);
3263	}
3264	scale_loop_profile (epilog, prob_epilog, -`1`);
3265	}
3266
3267	unsigned HOST_WIDE_INT bound;
3268	if (bound_scalar.is_constant (const_value: &bound))
3269	{
3270	gcc_assert (bound != `0`);
3271	/ -1 to convert loop iterations to latch iterations. /
3272	record_niter_bound (epilog, bound - `1`, false, true);
3273	scale_loop_profile (epilog, profile_probability::always (),
3274	bound - `1`);
3275	}
3276
3277	delete_update_ssa ();
3278	adjust_vec_debug_stmts ();
3279	scev_reset ();
3280	}
3281
3282	if (vect_epilogues)
3283	{
3284	epilog->aux = epilogue_vinfo;
3285	LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
3286	LOOP_VINFO_IV_EXIT (epilogue_vinfo)
3287	= LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
3288
3289	loop_constraint_clear (loop: epilog, LOOP_C_INFINITE);
3290
3291	/ We now must calculate the number of NITERS performed by the previous*
3292	loop and EPILOGUE_NITERS to be performed by the epilogue. /*
3293	tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
3294	niters_prolog, niters_vector_mult_vf);
3295
3296	/ If skip_vector we may skip the previous loop, we insert a phi-node to*
3297	determine whether we are coming from the previous vectorized loop
3298	using the update_e edge or the skip_vector basic block using the
3299	skip_e edge. /*
3300	if (skip_vector)
3301	{
3302	gcc_assert (update_e != NULL && skip_e != NULL);
3303	gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
3304	update_e->dest);
3305	tree new_ssa = make_ssa_name (TREE_TYPE (niters));
3306	gimple *stmt = gimple_build_assign (new_ssa, niters);
3307	gimple_stmt_iterator gsi;
3308	if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
3309	&& SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
3310	{
3311	gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
3312	gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
3313	}
3314	else
3315	{
3316	gsi = gsi_last_bb (bb: update_e->src);
3317	gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3318	}
3319
3320	niters = new_ssa;
3321	add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
3322	add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
3323	UNKNOWN_LOCATION);
3324	niters = PHI_RESULT (new_phi);
3325	epilogue_vinfo->main_loop_edge = update_e;
3326	epilogue_vinfo->skip_main_loop_edge = skip_e;
3327	}
3328
3329	/ Set ADVANCE to the number of iterations performed by the previous*
3330	loop and its prologue. /*
3331	*advance = niters;
3332
3333	/ Subtract the number of iterations performed by the vectorized loop*
3334	from the number of total iterations. /*
3335	tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
3336	before_loop_niters,
3337	niters);
3338
3339	LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
3340	LOOP_VINFO_NITERSM1 (epilogue_vinfo)
3341	= fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
3342	epilogue_niters,
3343	build_one_cst (TREE_TYPE (epilogue_niters)));
3344
3345	/ Decide what to do if the number of epilogue iterations is not*
3346	a multiple of the epilogue loop's vectorization factor.
3347	We should have rejected the loop during the analysis phase
3348	if this fails. /*
3349	bool res = vect_determine_partial_vectors_and_peeling (epilogue_vinfo);
3350	gcc_assert (res);
3351	}
3352
3353	adjust_vec.release ();
3354	free_original_copy_tables ();
3355
3356	return vect_epilogues ? epilog : NULL;
3357	}
3358
3359	/ Function vect_create_cond_for_niters_checks.*
3360
3361	Create a conditional expression that represents the run-time checks for
3362	loop's niter. The loop is guaranteed to terminate if the run-time
3363	checks hold.
3364
3365	Input:
3366	COND_EXPR - input conditional expression. New conditions will be chained
3367	with logical AND operation. If it is NULL, then the function
3368	is used to return the number of alias checks.
3369	LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3370	to be checked.
3371
3372	Output:
3373	COND_EXPR - conditional expression.
3374
3375	The returned COND_EXPR is the conditional expression to be used in the
3376	if statement that controls which version of the loop gets executed at
3377	runtime. /*
3378
3379	static void
3380	vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
3381	{
3382	tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
3383
3384	if (*cond_expr)
3385	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3386	*cond_expr, part_cond_expr);
3387	else
3388	*cond_expr = part_cond_expr;
3389	}
3390
3391	/ Set COND_EXPR to a tree that is true when both the original COND_EXPR*
3392	and PART_COND_EXPR are true. Treat a null COND_EXPR as "true". /
3393
3394	static void
3395	chain_cond_expr (tree *cond_expr, tree part_cond_expr)
3396	{
3397	if (*cond_expr)
3398	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3399	*cond_expr, part_cond_expr);
3400	else
3401	*cond_expr = part_cond_expr;
3402	}
3403
3404	/ Function vect_create_cond_for_align_checks.*
3405
3406	Create a conditional expression that represents the alignment checks for
3407	all of data references (array element references) whose alignment must be
3408	checked at runtime.
3409
3410	Input:
3411	COND_EXPR - input conditional expression. New conditions will be chained
3412	with logical AND operation.
3413	LOOP_VINFO - two fields of the loop information are used.
3414	LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3415	LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3416
3417	Output:
3418	COND_EXPR_STMT_LIST - statements needed to construct the conditional
3419	expression.
3420	The returned value is the conditional expression to be used in the if
3421	statement that controls which version of the loop gets executed at runtime.
3422
3423	The algorithm makes two assumptions:
3424	1) The number of bytes "n" in a vector is a power of 2.
3425	2) An address "a" is aligned if a%n is zero and that this
3426	test can be done as a&(n-1) == 0. For example, for 16
3427	byte vectors the test is a&0xf == 0. /*
3428
3429	static void
3430	vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3431	tree *cond_expr,
3432	gimple_seq *cond_expr_stmt_list)
3433	{
3434	const vec<stmt_vec_info> &may_misalign_stmts
3435	= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3436	stmt_vec_info stmt_info;
3437	int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3438	tree mask_cst;
3439	unsigned int i;
3440	tree int_ptrsize_type;
3441	char tmp_name[`20`];
3442	tree or_tmp_name = NULL_TREE;
3443	tree and_tmp_name;
3444	gimple *and_stmt;
3445	tree ptrsize_zero;
3446	tree part_cond_expr;
3447
3448	/ Check that mask is one less than a power of 2, i.e., mask is*
3449	all zeros followed by all ones. /*
3450	gcc_assert ((mask != `0`) && ((mask & (mask+`1`)) == `0`));
3451
3452	int_ptrsize_type = signed_type_for (ptr_type_node);
3453
3454	/ Create expression (mask & (dr_1 \|\| ... \|\| dr_n)) where dr_i is the address*
3455	of the first vector of the i'th data reference. /*
3456
3457	FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3458	{
3459	gimple_seq new_stmt_list = NULL;
3460	tree addr_base;
3461	tree addr_tmp_name;
3462	tree new_or_tmp_name;
3463	gimple addr_stmt, or_stmt;
3464	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3465	bool negative = tree_int_cst_compare
3466	(DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < `0`;
3467	tree offset = negative
3468	? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + `1`)
3469	* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
3470	: size_zero_node;
3471
3472	/ create: addr_tmp = (int)(address_of_first_vector) /
3473	addr_base =
3474	vect_create_addr_base_for_vector_ref (loop_vinfo,
3475	stmt_info, &new_stmt_list,
3476	offset);
3477	if (new_stmt_list != NULL)
3478	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
3479
3480	sprintf (s: tmp_name, format: "addr2int%d", i);
3481	addr_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL, name: tmp_name);
3482	addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
3483	gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
3484
3485	/ The addresses are OR together. /
3486
3487	if (or_tmp_name != NULL_TREE)
3488	{
3489	/ create: or_tmp = or_tmp \| addr_tmp /
3490	sprintf (s: tmp_name, format: "orptrs%d", i);
3491	new_or_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL, name: tmp_name);
3492	or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
3493	or_tmp_name, addr_tmp_name);
3494	gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
3495	or_tmp_name = new_or_tmp_name;
3496	}
3497	else
3498	or_tmp_name = addr_tmp_name;
3499
3500	} / end for i /
3501
3502	mask_cst = build_int_cst (int_ptrsize_type, mask);
3503
3504	/ create: and_tmp = or_tmp & mask /
3505	and_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL, name: "andmask");
3506
3507	and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
3508	or_tmp_name, mask_cst);
3509	gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
3510
3511	/ Make and_tmp the left operand of the conditional test against zero.*
3512	if and_tmp has a nonzero bit then some address is unaligned. /*
3513	ptrsize_zero = build_int_cst (int_ptrsize_type, `0`);
3514	part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
3515	and_tmp_name, ptrsize_zero);
3516	chain_cond_expr (cond_expr, part_cond_expr);
3517	}
3518
3519	/ If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,*
3520	create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
3521	Set COND_EXPR to a tree that is true when both the original COND_EXPR
3522	and this new condition are true. Treat a null COND_EXPR as "true". /
3523
3524	static void
3525	vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
3526	{
3527	const vec<vec_object_pair> &pairs
3528	= LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3529	unsigned int i;
3530	vec_object_pair *pair;
3531	FOR_EACH_VEC_ELT (pairs, i, pair)
3532	{
3533	tree addr1 = build_fold_addr_expr (pair->first);
3534	tree addr2 = build_fold_addr_expr (pair->second);
3535	tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
3536	addr1, addr2);
3537	chain_cond_expr (cond_expr, part_cond_expr);
3538	}
3539	}
3540
3541	/ Create an expression that is true when all lower-bound conditions for*
3542	the vectorized loop are met. Chain this condition with COND_EXPR. /
3543
3544	static void
3545	vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
3546	{
3547	const vec<vec_lower_bound> &lower_bounds
3548	= LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3549	for (unsigned int i = `0`; i < lower_bounds.length (); ++i)
3550	{
3551	tree expr = lower_bounds [i].expr;
3552	tree type = unsigned_type_for (TREE_TYPE (expr));
3553	expr = fold_convert (type, expr);
3554	poly_uint64 bound = lower_bounds [i].min_value;
3555	if (!lower_bounds [i].unsigned_p)
3556	{
3557	expr = fold_build2 (PLUS_EXPR, type, expr,
3558	build_int_cstu (type, bound - `1`));
3559	bound += bound - `1`;
3560	}
3561	tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
3562	build_int_cstu (type, bound));
3563	chain_cond_expr (cond_expr, part_cond_expr);
3564	}
3565	}
3566
3567	/ Function vect_create_cond_for_alias_checks.*
3568
3569	Create a conditional expression that represents the run-time checks for
3570	overlapping of address ranges represented by a list of data references
3571	relations passed as input.
3572
3573	Input:
3574	COND_EXPR - input conditional expression. New conditions will be chained
3575	with logical AND operation. If it is NULL, then the function
3576	is used to return the number of alias checks.
3577	LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3578	to be checked.
3579
3580	Output:
3581	COND_EXPR - conditional expression.
3582
3583	The returned COND_EXPR is the conditional expression to be used in the if
3584	statement that controls which version of the loop gets executed at runtime.
3585	*/
3586
3587	void
3588	vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
3589	{
3590	const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
3591	LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3592
3593	if (comp_alias_ddrs.is_empty ())
3594	return;
3595
3596	create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
3597	&comp_alias_ddrs, cond_expr);
3598	if (dump_enabled_p ())
3599	dump_printf_loc (MSG_NOTE, vect_location,
3600	"created %u versioning for alias checks.\n",
3601	comp_alias_ddrs.length ());
3602	}
3603
3604
3605	/ Function vect_loop_versioning.*
3606
3607	If the loop has data references that may or may not be aligned or/and
3608	has data reference relations whose independence was not proven then
3609	two versions of the loop need to be generated, one which is vectorized
3610	and one which isn't. A test is then generated to control which of the
3611	loops is executed. The test checks for the alignment of all of the
3612	data references that may or may not be aligned. An additional
3613	sequence of runtime tests is generated for each pairs of DDRs whose
3614	independence was not proven. The vectorized version of loop is
3615	executed only if both alias and alignment tests are passed.
3616
3617	The test generated to check which version of loop is executed
3618	is modified to also check for profitability as indicated by the
3619	cost model threshold TH.
3620
3621	The versioning precondition(s) are placed in COND_EXPR and*
3622	COND_EXPR_STMT_LIST. /
3623
3624	class loop *
3625	vect_loop_versioning (loop_vec_info loop_vinfo,
3626	gimple *loop_vectorized_call)
3627	{
3628	class loop loop = LOOP_VINFO_LOOP (loop_vinfo), nloop;
3629	class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3630	basic_block condition_bb;
3631	gphi_iterator gsi;
3632	gimple_stmt_iterator cond_exp_gsi;
3633	basic_block merge_bb;
3634	basic_block new_exit_bb;
3635	edge new_exit_e, e;
3636	gphi orig_phi, new_phi;
3637	tree cond_expr = NULL_TREE;
3638	gimple_seq cond_expr_stmt_list = NULL;
3639	tree arg;
3640	profile_probability prob = profile_probability::likely ();
3641	gimple_seq gimplify_stmt_list = NULL;
3642	tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
3643	bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
3644	bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
3645	bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
3646	poly_uint64 versioning_threshold
3647	= LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3648	tree version_simd_if_cond
3649	= LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
3650	unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3651
3652	if (vect_apply_runtime_profitability_check_p (loop_vinfo)
3653	&& !ordered_p (a: th, b: versioning_threshold))
3654	cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3655	build_int_cst (TREE_TYPE (scalar_loop_iters),
3656	th - `1`));
3657	if (maybe_ne (a: versioning_threshold, b: `0U`))
3658	{
3659	tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3660	build_int_cst (TREE_TYPE (scalar_loop_iters),
3661	versioning_threshold - `1`));
3662	if (cond_expr)
3663	cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
3664	expr, cond_expr);
3665	else
3666	cond_expr = expr;
3667	}
3668
3669	tree cost_name = NULL_TREE;
3670	profile_probability prob2 = profile_probability::always ();
3671	if (cond_expr
3672	&& EXPR_P (cond_expr)
3673	&& (version_niter
3674	\|\| version_align
3675	\|\| version_alias
3676	\|\| version_simd_if_cond))
3677	{
3678	cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3679	&cond_expr_stmt_list,
3680	is_gimple_val, NULL_TREE);
3681	/ Split prob () into two so that the overall probability of passing*
3682	both the cost-model and versioning checks is the orig prob. /*
3683	prob2 = prob = prob.sqrt ();
3684	}
3685
3686	if (version_niter)
3687	vect_create_cond_for_niters_checks (loop_vinfo, cond_expr: &cond_expr);
3688
3689	if (cond_expr)
3690	{
3691	gimple_seq tem = NULL;
3692	cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3693	&tem, is_gimple_condexpr_for_cond,
3694	NULL_TREE);
3695	gimple_seq_add_seq (&cond_expr_stmt_list, tem);
3696	}
3697
3698	if (version_align)
3699	vect_create_cond_for_align_checks (loop_vinfo, cond_expr: &cond_expr,
3700	cond_expr_stmt_list: &cond_expr_stmt_list);
3701
3702	if (version_alias)
3703	{
3704	vect_create_cond_for_unequal_addrs (loop_vinfo, cond_expr: &cond_expr);
3705	vect_create_cond_for_lower_bounds (loop_vinfo, cond_expr: &cond_expr);
3706	vect_create_cond_for_alias_checks (loop_vinfo, cond_expr: &cond_expr);
3707	}
3708
3709	if (version_simd_if_cond)
3710	{
3711	gcc_assert (dom_info_available_p (CDI_DOMINATORS));
3712	if (flag_checking)
3713	if (basic_block bb
3714	= gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
3715	gcc_assert (bb != loop->header
3716	&& dominated_by_p (CDI_DOMINATORS, loop->header, bb)
3717	&& (scalar_loop == NULL
3718	\|\| (bb != scalar_loop->header
3719	&& dominated_by_p (CDI_DOMINATORS,
3720	scalar_loop->header, bb))));
3721	tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
3722	tree c = fold_build2 (NE_EXPR, boolean_type_node,
3723	version_simd_if_cond, zero);
3724	if (cond_expr)
3725	cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3726	c, cond_expr);
3727	else
3728	cond_expr = c;
3729	if (dump_enabled_p ())
3730	dump_printf_loc (MSG_NOTE, vect_location,
3731	"created versioning for simd if condition check.\n");
3732	}
3733
3734	cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3735	&gimplify_stmt_list,
3736	is_gimple_condexpr_for_cond, NULL_TREE);
3737	gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
3738
3739	/ Compute the outermost loop cond_expr and cond_expr_stmt_list are*
3740	invariant in. /*
3741	class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
3742	for (gimple_stmt_iterator gsi = gsi_start (seq&: cond_expr_stmt_list);
3743	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
3744	{
3745	gimple *stmt = gsi_stmt (i: gsi);
3746	update_stmt (s: stmt);
3747	ssa_op_iter iter;
3748	use_operand_p use_p;
3749	basic_block def_bb;
3750	FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3751	if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
3752	&& flow_bb_inside_loop_p (outermost, def_bb))
3753	outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + `1`);
3754	}
3755
3756	/ Search for the outermost loop we can version. Avoid versioning of*
3757	non-perfect nests but allow if-conversion versioned loops inside. /*
3758	class loop *loop_to_version = loop;
3759	if (flow_loop_nested_p (outermost, loop))
3760	{
3761	if (dump_enabled_p ())
3762	dump_printf_loc (MSG_NOTE, vect_location,
3763	"trying to apply versioning to outer loop %d\n",
3764	outermost->num);
3765	if (outermost->num == `0`)
3766	outermost = superloop_at_depth (loop, `1`);
3767	/ And avoid applying versioning on non-perfect nests. /
3768	while (loop_to_version != outermost
3769	&& (e = single_exit (loop_outer (loop: loop_to_version)))
3770	&& !(e->flags & EDGE_COMPLEX)
3771	&& (!loop_outer (loop: loop_to_version)->inner->next
3772	\|\| vect_loop_vectorized_call (loop_to_version))
3773	&& (!loop_outer (loop: loop_to_version)->inner->next
3774	\|\| !loop_outer (loop: loop_to_version)->inner->next->next))
3775	loop_to_version = loop_outer (loop: loop_to_version);
3776	}
3777
3778	/ Apply versioning. If there is already a scalar version created by*
3779	if-conversion re-use that. Note we cannot re-use the copy of
3780	an if-converted outer-loop when vectorizing the inner loop only. /*
3781	gcond *cond;
3782	if ((!loop_to_version->inner \|\| loop == loop_to_version)
3783	&& loop_vectorized_call)
3784	{
3785	gcc_assert (scalar_loop);
3786	condition_bb = gimple_bb (g: loop_vectorized_call);
3787	cond = as_a <gcond > (p: gsi_last_bb (bb: condition_bb));
3788	gimple_cond_set_condition_from_tree (cond, cond_expr);
3789	update_stmt (s: cond);
3790
3791	if (cond_expr_stmt_list)
3792	{
3793	cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
3794	gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3795	GSI_SAME_STMT);
3796	}
3797
3798	/ if-conversion uses profile_probability::always () for both paths,*
3799	reset the paths probabilities appropriately. /*
3800	edge te, fe;
3801	extract_true_false_edges_from_block (condition_bb, &te, &fe);
3802	te->probability = prob;
3803	fe->probability = prob.invert ();
3804	/ We can scale loops counts immediately but have to postpone*
3805	scaling the scalar loop because we re-use it during peeling.
3806
3807	Ifcvt duplicates loop preheader, loop body and produces an basic
3808	block after loop exit. We need to scale all that. /*
3809	basic_block preheader = loop_preheader_edge (loop_to_version)->src;
3810	preheader->count = preheader->count.apply_probability (prob: prob * prob2);
3811	scale_loop_frequencies (loop_to_version, prob * prob2);
3812	single_exit (loop_to_version)->dest->count = preheader->count;
3813	LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
3814
3815	nloop = scalar_loop;
3816	if (dump_enabled_p ())
3817	dump_printf_loc (MSG_NOTE, vect_location,
3818	"reusing %sloop version created by if conversion\n",
3819	loop_to_version != loop ? "outer " : "");
3820	}
3821	else
3822	{
3823	if (loop_to_version != loop
3824	&& dump_enabled_p ())
3825	dump_printf_loc (MSG_NOTE, vect_location,
3826	"applying loop versioning to outer loop %d\n",
3827	loop_to_version->num);
3828
3829	unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
3830
3831	initialize_original_copy_tables ();
3832	nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
3833	prob * prob2, (prob * prob2).invert (),
3834	prob * prob2, (prob * prob2).invert (),
3835	true);
3836	/ We will later insert second conditional so overall outcome of*
3837	both is prob prob2. /
3838	edge true_e, false_e;
3839	extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
3840	true_e->probability = prob;
3841	false_e->probability = prob.invert ();
3842	gcc_assert (nloop);
3843	nloop = get_loop_copy (loop);
3844
3845	/ For cycle vectorization with SLP we rely on the PHI arguments*
3846	appearing in the same order as the SLP node operands which for the
3847	loop PHI nodes means the preheader edge dest index needs to remain
3848	the same for the analyzed loop which also becomes the vectorized one.
3849	Make it so in case the state after versioning differs by redirecting
3850	the first edge into the header to the same destination which moves
3851	it last. /*
3852	if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
3853	{
3854	edge e = EDGE_PRED (loop->header, `0`);
3855	ssa_redirect_edge (e, e->dest);
3856	flush_pending_stmts (e);
3857	}
3858	gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
3859
3860	/ Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will*
3861	reap those otherwise; they also refer to the original
3862	loops. /*
3863	class loop *l = loop;
3864	while (gimple *call = vect_loop_vectorized_call (l))
3865	{
3866	call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
3867	fold_loop_internal_call (call, boolean_false_node);
3868	l = loop_outer (loop: l);
3869	}
3870	free_original_copy_tables ();
3871
3872	if (cond_expr_stmt_list)
3873	{
3874	cond_exp_gsi = gsi_last_bb (bb: condition_bb);
3875	gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3876	GSI_SAME_STMT);
3877	}
3878
3879	/ Loop versioning violates an assumption we try to maintain during*
3880	vectorization - that the loop exit block has a single predecessor.
3881	After versioning, the exit block of both loop versions is the same
3882	basic block (i.e. it has two predecessors). Just in order to simplify
3883	following transformations in the vectorizer, we fix this situation
3884	here by adding a new (empty) block on the exit-edge of the loop,
3885	with the proper loop-exit phis to maintain loop-closed-form.
3886	If loop versioning wasn't done from loop, but scalar_loop instead,
3887	merge_bb will have already just a single successor. /*
3888
3889	merge_bb = single_exit (loop_to_version)->dest;
3890	if (EDGE_COUNT (merge_bb->preds) >= `2`)
3891	{
3892	gcc_assert (EDGE_COUNT (merge_bb->preds) >= `2`);
3893	new_exit_bb = split_edge (single_exit (loop_to_version));
3894	new_exit_e = single_exit (loop_to_version);
3895	e = EDGE_SUCC (new_exit_bb, `0`);
3896
3897	for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (i: gsi);
3898	gsi_next (i: &gsi))
3899	{
3900	tree new_res;
3901	orig_phi = gsi.phi ();
3902	new_res = copy_ssa_name (PHI_RESULT (orig_phi));
3903	new_phi = create_phi_node (new_res, new_exit_bb);
3904	arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3905	add_phi_arg (new_phi, arg, new_exit_e,
3906	gimple_phi_arg_location_from_edge (phi: orig_phi, e));
3907	adjust_phi_and_debug_stmts (update_phi: orig_phi, e, PHI_RESULT (new_phi));
3908	}
3909	}
3910
3911	update_ssa (TODO_update_ssa_no_phi);
3912	}
3913
3914	/ Split the cost model check off to a separate BB. Costing assumes*
3915	this is the only thing we perform when we enter the scalar loop
3916	from a failed cost decision. /*
3917	if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
3918	{
3919	gimple *def = SSA_NAME_DEF_STMT (cost_name);
3920	gcc_assert (gimple_bb (def) == condition_bb);
3921	/ All uses of the cost check are 'true' after the check we*
3922	are going to insert. /*
3923	replace_uses_by (cost_name, boolean_true_node);
3924	/ And we're going to build the new single use of it. /
3925	gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
3926	NULL_TREE, NULL_TREE);
3927	edge e = split_block (gimple_bb (g: def), def);
3928	gimple_stmt_iterator gsi = gsi_for_stmt (def);
3929	gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
3930	edge true_e, false_e;
3931	extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
3932	e->flags &= ~EDGE_FALLTHRU;
3933	e->flags \|= EDGE_TRUE_VALUE;
3934	edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
3935	e->probability = prob2;
3936	e2->probability = prob2.invert ();
3937	e->dest->count = e->count ();
3938	set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
3939	auto_vec<basic_block, `3`> adj;
3940	for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
3941	son;
3942	son = next_dom_son (CDI_DOMINATORS, son))
3943	if (EDGE_COUNT (son->preds) > `1`)
3944	adj.safe_push (obj: son);
3945	for (auto son : adj)
3946	set_immediate_dominator (CDI_DOMINATORS, son, e->src);
3947	//debug_bb (condition_bb);
3948	//debug_bb (e->src);
3949	}
3950
3951	if (version_niter)
3952	{
3953	/ The versioned loop could be infinite, we need to clear existing*
3954	niter information which is copied from the original loop. /*
3955	gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
3956	vect_free_loop_info_assumptions (nloop);
3957	}
3958
3959	if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
3960	&& dump_enabled_p ())
3961	{
3962	if (version_alias)
3963	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS \| MSG_PRIORITY_USER_FACING,
3964	vect_location,
3965	"loop versioned for vectorization because of "
3966	"possible aliasing\n");
3967	if (version_align)
3968	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS \| MSG_PRIORITY_USER_FACING,
3969	vect_location,
3970	"loop versioned for vectorization to enhance "
3971	"alignment\n");
3972
3973	}
3974
3975	return nloop;
3976	}
3977

source code of gcc/tree-vect-loop-manip.cc