tree-vect-data-refs.cc source code [gcc/tree-vect-data-refs.cc]

1	/ Data References Analysis and Manipulation Utilities for Vectorization.*
2	Copyright (C) 2003-2023 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com>
4	and Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#include "config.h"
23	#include "system.h"
24	#include "coretypes.h"
25	#include "backend.h"
26	#include "target.h"
27	#include "rtl.h"
28	#include "tree.h"
29	#include "gimple.h"
30	#include "predict.h"
31	#include "memmodel.h"
32	#include "tm_p.h"
33	#include "ssa.h"
34	#include "optabs-tree.h"
35	#include "cgraph.h"
36	#include "dumpfile.h"
37	#include "alias.h"
38	#include "fold-const.h"
39	#include "stor-layout.h"
40	#include "tree-eh.h"
41	#include "gimplify.h"
42	#include "gimple-iterator.h"
43	#include "gimplify-me.h"
44	#include "tree-ssa-loop-ivopts.h"
45	#include "tree-ssa-loop-manip.h"
46	#include "tree-ssa-loop.h"
47	#include "cfgloop.h"
48	#include "tree-scalar-evolution.h"
49	#include "tree-vectorizer.h"
50	#include "expr.h"
51	#include "builtins.h"
52	#include "tree-cfg.h"
53	#include "tree-hash-traits.h"
54	#include "vec-perm-indices.h"
55	#include "internal-fn.h"
56	#include "gimple-fold.h"
57
58	/ Return true if load- or store-lanes optab OPTAB is implemented for*
59	COUNT vectors of type VECTYPE. NAME is the name of OPTAB. /*
60
61	static bool
62	vect_lanes_optab_supported_p (const char *name, convert_optab optab,
63	tree vectype, unsigned HOST_WIDE_INT count)
64	{
65	machine_mode mode, array_mode;
66	bool limit_p;
67
68	mode = TYPE_MODE (vectype);
69	if (!targetm.array_mode (mode, count).exists (mode: &array_mode))
70	{
71	poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
72	limit_p = !targetm.array_mode_supported_p (mode, count);
73	if (!int_mode_for_size (size: bits, limit: limit_p).exists (mode: &array_mode))
74	{
75	if (dump_enabled_p ())
76	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
77	"no array mode for %s[%wu]\n",
78	GET_MODE_NAME (mode), count);
79	return false;
80	}
81	}
82
83	if (convert_optab_handler (op: optab, to_mode: array_mode, from_mode: mode) == CODE_FOR_nothing)
84	{
85	if (dump_enabled_p ())
86	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
87	"cannot use %s<%s><%s>\n", name,
88	GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
89	return false;
90	}
91
92	if (dump_enabled_p ())
93	dump_printf_loc (MSG_NOTE, vect_location,
94	"can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
95	GET_MODE_NAME (mode));
96
97	return true;
98	}
99
100	/ Helper function to identify a simd clone call. If this is a call to a*
101	function with simd clones then return the corresponding cgraph_node,
102	otherwise return NULL. /*
103
104	static cgraph_node*
105	simd_clone_call_p (gimple *stmt)
106	{
107	gcall call = dyn_cast <gcall > (p: stmt);
108	if (!call)
109	return NULL;
110
111	tree fndecl = NULL_TREE;
112	if (gimple_call_internal_p (gs: call, fn: IFN_MASK_CALL))
113	fndecl = TREE_OPERAND (gimple_call_arg (stmt, `0`), `0`);
114	else
115	fndecl = gimple_call_fndecl (gs: stmt);
116
117	if (fndecl == NULL_TREE)
118	return NULL;
119
120	cgraph_node *node = cgraph_node::get (decl: fndecl);
121	if (node && node->simd_clones != NULL)
122	return node;
123
124	return NULL;
125	}
126
127
128
129	/ Return the smallest scalar part of STMT_INFO.*
130	This is used to determine the vectype of the stmt. We generally set the
131	vectype according to the type of the result (lhs). For stmts whose
132	result-type is different than the type of the arguments (e.g., demotion,
133	promotion), vectype will be reset appropriately (later). Note that we have
134	to visit the smallest datatype in this function, because that determines the
135	VF. If the smallest datatype in the loop is present only as the rhs of a
136	promotion operation - we'd miss it.
137	Such a case, where a variable of this datatype does not appear in the lhs
138	anywhere in the loop, can only occur if it's an invariant: e.g.:
139	'int_x = (int) short_inv', which we'd expect to have been optimized away by
140	invariant motion. However, we cannot rely on invariant motion to always
141	take invariants out of the loop, and so in the case of promotion we also
142	have to check the rhs.
143	LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
144	types. /*
145
146	tree
147	vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
148	{
149	HOST_WIDE_INT lhs, rhs;
150
151	/ During the analysis phase, this function is called on arbitrary*
152	statements that might not have scalar results. /*
153	if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
154	return scalar_type;
155
156	lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
157
158	gassign assign = dyn_cast <gassign > (p: stmt_info->stmt);
159	if (assign)
160	{
161	scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
162	if (gimple_assign_cast_p (s: assign)
163	\|\| gimple_assign_rhs_code (gs: assign) == DOT_PROD_EXPR
164	\|\| gimple_assign_rhs_code (gs: assign) == WIDEN_SUM_EXPR
165	\|\| gimple_assign_rhs_code (gs: assign) == WIDEN_MULT_EXPR
166	\|\| gimple_assign_rhs_code (gs: assign) == WIDEN_LSHIFT_EXPR
167	\|\| gimple_assign_rhs_code (gs: assign) == FLOAT_EXPR)
168	{
169	tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
170
171	rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
172	if (rhs < lhs)
173	scalar_type = rhs_type;
174	}
175	}
176	else if (cgraph_node *node = simd_clone_call_p (stmt: stmt_info->stmt))
177	{
178	auto clone = node->simd_clones->simdclone;
179	for (unsigned int i = `0`; i < clone->nargs; ++i)
180	{
181	if (clone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
182	{
183	tree arg_scalar_type = TREE_TYPE (clone->args[i].vector_type);
184	rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (arg_scalar_type));
185	if (rhs < lhs)
186	{
187	scalar_type = arg_scalar_type;
188	lhs = rhs;
189	}
190	}
191	}
192	}
193	else if (gcall call = dyn_cast <gcall > (p: stmt_info->stmt))
194	{
195	unsigned int i = `0`;
196	if (gimple_call_internal_p (gs: call))
197	{
198	internal_fn ifn = gimple_call_internal_fn (gs: call);
199	if (internal_load_fn_p (ifn))
200	/ For loads the LHS type does the trick. /
201	i = ~`0U`;
202	else if (internal_store_fn_p (ifn))
203	{
204	/ For stores use the tyep of the stored value. /
205	i = internal_fn_stored_value_index (ifn);
206	scalar_type = TREE_TYPE (gimple_call_arg (call, i));
207	i = ~`0U`;
208	}
209	else if (internal_fn_mask_index (ifn) == `0`)
210	i = `1`;
211	}
212	if (i < gimple_call_num_args (gs: call))
213	{
214	tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
215	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
216	{
217	rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
218	if (rhs < lhs)
219	scalar_type = rhs_type;
220	}
221	}
222	}
223
224	return scalar_type;
225	}
226
227
228	/ Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be*
229	tested at run-time. Return TRUE if DDR was successfully inserted.
230	Return false if versioning is not supported. /*
231
232	static opt_result
233	vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
234	{
235	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
236
237	if ((unsigned) param_vect_max_version_for_alias_checks == `0`)
238	return opt_result::failure_at (loc: vect_location,
239	fmt: "will not create alias checks, as"
240	" --param vect-max-version-for-alias-checks"
241	" == 0\n");
242
243	opt_result res
244	= runtime_alias_check_p (ddr, loop,
245	optimize_loop_nest_for_speed_p (loop));
246	if (!res)
247	return res;
248
249	LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (obj: ddr);
250	return opt_result::success ();
251	}
252
253	/ Record that loop LOOP_VINFO needs to check that VALUE is nonzero. /
254
255	static void
256	vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
257	{
258	const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
259	for (unsigned int i = `0`; i < checks.length(); ++i)
260	if (checks [i] == value)
261	return;
262
263	if (dump_enabled_p ())
264	dump_printf_loc (MSG_NOTE, vect_location,
265	"need run-time check that %T is nonzero\n",
266	value);
267	LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (obj: value);
268	}
269
270	/ Return true if we know that the order of vectorized DR_INFO_A and*
271	vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
272	DR_INFO_B. At least one of the accesses is a write. /*
273
274	static bool
275	vect_preserves_scalar_order_p (dr_vec_info dr_info_a, dr_vec_info dr_info_b)
276	{
277	stmt_vec_info stmtinfo_a = dr_info_a->stmt;
278	stmt_vec_info stmtinfo_b = dr_info_b->stmt;
279
280	/ Single statements are always kept in their original order. /
281	if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
282	&& !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
283	return true;
284
285	/ STMT_A and STMT_B belong to overlapping groups. All loads are*
286	emitted at the position of the first scalar load.
287	Stores in a group are emitted at the position of the last scalar store.
288	Compute that position and check whether the resulting order matches
289	the current one. /*
290	stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
291	if (il_a)
292	{
293	if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
294	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
295	s = DR_GROUP_NEXT_ELEMENT (s))
296	il_a = get_later_stmt (stmt1_info: il_a, stmt2_info: s);
297	else / DR_IS_READ /
298	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
299	s = DR_GROUP_NEXT_ELEMENT (s))
300	if (get_later_stmt (stmt1_info: il_a, stmt2_info: s) == il_a)
301	il_a = s;
302	}
303	else
304	il_a = stmtinfo_a;
305	stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
306	if (il_b)
307	{
308	if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
309	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
310	s = DR_GROUP_NEXT_ELEMENT (s))
311	il_b = get_later_stmt (stmt1_info: il_b, stmt2_info: s);
312	else / DR_IS_READ /
313	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
314	s = DR_GROUP_NEXT_ELEMENT (s))
315	if (get_later_stmt (stmt1_info: il_b, stmt2_info: s) == il_b)
316	il_b = s;
317	}
318	else
319	il_b = stmtinfo_b;
320	bool a_after_b = (get_later_stmt (stmt1_info: stmtinfo_a, stmt2_info: stmtinfo_b) == stmtinfo_a);
321	return (get_later_stmt (stmt1_info: il_a, stmt2_info: il_b) == il_a) == a_after_b;
322	}
323
324	/ A subroutine of vect_analyze_data_ref_dependence. Handle*
325	DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
326	distances. These distances are conservatively correct but they don't
327	reflect a guaranteed dependence.
328
329	Return true if this function does all the work necessary to avoid
330	an alias or false if the caller should use the dependence distances
331	to limit the vectorization factor in the usual way. LOOP_DEPTH is
332	the depth of the loop described by LOOP_VINFO and the other arguments
333	are as for vect_analyze_data_ref_dependence. /*
334
335	static bool
336	vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
337	loop_vec_info loop_vinfo,
338	int loop_depth, unsigned int *max_vf)
339	{
340	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
341	for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
342	{
343	int dist = dist_v[loop_depth];
344	if (dist != `0` && !(dist > `0` && DDR_REVERSED_P (ddr)))
345	{
346	/ If the user asserted safelen >= DIST consecutive iterations*
347	can be executed concurrently, assume independence.
348
349	??? An alternative would be to add the alias check even
350	in this case, and vectorize the fallback loop with the
351	maximum VF set to safelen. However, if the user has
352	explicitly given a length, it's less likely that that
353	would be a win. /*
354	if (loop->safelen >= `2` && abs_hwi (x: dist) <= loop->safelen)
355	{
356	if ((unsigned int) loop->safelen < *max_vf)
357	*max_vf = loop->safelen;
358	LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
359	continue;
360	}
361
362	/ For dependence distances of 2 or more, we have the option*
363	of limiting VF or checking for an alias at runtime.
364	Prefer to check at runtime if we can, to avoid limiting
365	the VF unnecessarily when the bases are in fact independent.
366
367	Note that the alias checks will be removed if the VF ends up
368	being small enough. /*
369	dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
370	dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
371	return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
372	&& !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
373	&& vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
374	}
375	}
376	return true;
377	}
378
379
380	/ Function vect_analyze_data_ref_dependence.*
381
382	FIXME: I needed to change the sense of the returned flag.
383
384	Return FALSE if there (might) exist a dependence between a memory-reference
385	DRA and a memory-reference DRB. When versioning for alias may check a
386	dependence at run-time, return TRUE. Adjust MAX_VF according to*
387	the data dependence. /*
388
389	static opt_result
390	vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
391	loop_vec_info loop_vinfo,
392	unsigned int *max_vf)
393	{
394	unsigned int i;
395	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
396	struct data_reference *dra = DDR_A (ddr);
397	struct data_reference *drb = DDR_B (ddr);
398	dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
399	dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
400	stmt_vec_info stmtinfo_a = dr_info_a->stmt;
401	stmt_vec_info stmtinfo_b = dr_info_b->stmt;
402	lambda_vector dist_v;
403	unsigned int loop_depth;
404
405	/ If user asserted safelen consecutive iterations can be*
406	executed concurrently, assume independence. /*
407	auto apply_safelen = [&]()
408	{
409	if (loop->safelen >= `2`)
410	{
411	if ((unsigned int) loop->safelen < *max_vf)
412	*max_vf = loop->safelen;
413	LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
414	return true;
415	}
416	return false;
417	};
418
419	/ In loop analysis all data references should be vectorizable. /
420	if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
421	\|\| !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
422	gcc_unreachable ();
423
424	/ Independent data accesses. /
425	if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
426	return opt_result::success ();
427
428	if (dra == drb
429	\|\| (DR_IS_READ (dra) && DR_IS_READ (drb)))
430	return opt_result::success ();
431
432	/ We do not have to consider dependences between accesses that belong*
433	to the same group, unless the stride could be smaller than the
434	group size. /*
435	if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
436	&& (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
437	== DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
438	&& !STMT_VINFO_STRIDED_P (stmtinfo_a))
439	return opt_result::success ();
440
441	/ Even if we have an anti-dependence then, as the vectorized loop covers at*
442	least two scalar iterations, there is always also a true dependence.
443	As the vectorizer does not re-order loads and stores we can ignore
444	the anti-dependence if TBAA can disambiguate both DRs similar to the
445	case with known negative distance anti-dependences (positive
446	distance anti-dependences would violate TBAA constraints). /*
447	if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
448	\|\| (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
449	&& !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
450	get_alias_set (DR_REF (drb))))
451	return opt_result::success ();
452
453	if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
454	\|\| STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
455	{
456	if (apply_safelen ())
457	return opt_result::success ();
458
459	return opt_result::failure_at
460	(loc: stmtinfo_a->stmt,
461	fmt: "possible alias involving gather/scatter between %T and %T\n",
462	DR_REF (dra), DR_REF (drb));
463	}
464
465	/ Unknown data dependence. /
466	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
467	{
468	if (apply_safelen ())
469	return opt_result::success ();
470
471	if (dump_enabled_p ())
472	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
473	"versioning for alias required: "
474	"can't determine dependence between %T and %T\n",
475	DR_REF (dra), DR_REF (drb));
476
477	/ Add to list of ddrs that need to be tested at run-time. /
478	return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
479	}
480
481	/ Known data dependence. /
482	if (DDR_NUM_DIST_VECTS (ddr) == `0`)
483	{
484	if (apply_safelen ())
485	return opt_result::success ();
486
487	if (dump_enabled_p ())
488	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
489	"versioning for alias required: "
490	"bad dist vector for %T and %T\n",
491	DR_REF (dra), DR_REF (drb));
492	/ Add to list of ddrs that need to be tested at run-time. /
493	return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
494	}
495
496	loop_depth = index_in_loop_nest (var: loop->num, DDR_LOOP_NEST (ddr));
497
498	if (DDR_COULD_BE_INDEPENDENT_P (ddr)
499	&& vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
500	loop_depth, max_vf))
501	return opt_result::success ();
502
503	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
504	{
505	int dist = dist_v[loop_depth];
506
507	if (dump_enabled_p ())
508	dump_printf_loc (MSG_NOTE, vect_location,
509	"dependence distance = %d.\n", dist);
510
511	if (dist == `0`)
512	{
513	if (dump_enabled_p ())
514	dump_printf_loc (MSG_NOTE, vect_location,
515	"dependence distance == 0 between %T and %T\n",
516	DR_REF (dra), DR_REF (drb));
517
518	/ When we perform grouped accesses and perform implicit CSE*
519	by detecting equal accesses and doing disambiguation with
520	runtime alias tests like for
521	.. = a[i];
522	.. = a[i+1];
523	a[i] = ..;
524	a[i+1] = ..;
525	*p = ..;
526	.. = a[i];
527	.. = a[i+1];
528	where we will end up loading { a[i], a[i+1] } once, make
529	sure that inserting group loads before the first load and
530	stores after the last store will do the right thing.
531	Similar for groups like
532	a[i] = ...;
533	... = a[i];
534	a[i+1] = ...;
535	where loads from the group interleave with the store. /*
536	if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
537	return opt_result::failure_at (loc: stmtinfo_a->stmt,
538	fmt: "READ_WRITE dependence"
539	" in interleaving.\n");
540
541	if (loop->safelen < `2`)
542	{
543	tree indicator = dr_zero_step_indicator (dra);
544	if (!indicator \|\| integer_zerop (indicator))
545	return opt_result::failure_at (loc: stmtinfo_a->stmt,
546	fmt: "access also has a zero step\n");
547	else if (TREE_CODE (indicator) != INTEGER_CST)
548	vect_check_nonzero_value (loop_vinfo, value: indicator);
549	}
550	continue;
551	}
552
553	if (dist > `0` && DDR_REVERSED_P (ddr))
554	{
555	/ If DDR_REVERSED_P the order of the data-refs in DDR was*
556	reversed (to make distance vector positive), and the actual
557	distance is negative. /*
558	if (dump_enabled_p ())
559	dump_printf_loc (MSG_NOTE, vect_location,
560	"dependence distance negative.\n");
561	/ When doing outer loop vectorization, we need to check if there is*
562	a backward dependence at the inner loop level if the dependence
563	at the outer loop is reversed. See PR81740. /*
564	if (nested_in_vect_loop_p (loop, stmt_info: stmtinfo_a)
565	\|\| nested_in_vect_loop_p (loop, stmt_info: stmtinfo_b))
566	{
567	unsigned inner_depth = index_in_loop_nest (var: loop->inner->num,
568	DDR_LOOP_NEST (ddr));
569	if (dist_v[inner_depth] < `0`)
570	return opt_result::failure_at (loc: stmtinfo_a->stmt,
571	fmt: "not vectorized, dependence "
572	"between data-refs %T and %T\n",
573	DR_REF (dra), DR_REF (drb));
574	}
575	/ Record a negative dependence distance to later limit the*
576	amount of stmt copying / unrolling we can perform.
577	Only need to handle read-after-write dependence. /*
578	if (DR_IS_READ (drb)
579	&& (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == `0`
580	\|\| STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
581	STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
582	continue;
583	}
584
585	unsigned int abs_dist = abs (x: dist);
586	if (abs_dist >= `2` && abs_dist < *max_vf)
587	{
588	/ The dependence distance requires reduction of the maximal*
589	vectorization factor. /*
590	*max_vf = abs_dist;
591	if (dump_enabled_p ())
592	dump_printf_loc (MSG_NOTE, vect_location,
593	"adjusting maximal vectorization factor to %i\n",
594	*max_vf);
595	}
596
597	if (abs_dist >= *max_vf)
598	{
599	/ Dependence distance does not create dependence, as far as*
600	vectorization is concerned, in this case. /*
601	if (dump_enabled_p ())
602	dump_printf_loc (MSG_NOTE, vect_location,
603	"dependence distance >= VF.\n");
604	continue;
605	}
606
607	return opt_result::failure_at (loc: stmtinfo_a->stmt,
608	fmt: "not vectorized, possible dependence "
609	"between data-refs %T and %T\n",
610	DR_REF (dra), DR_REF (drb));
611	}
612
613	return opt_result::success ();
614	}
615
616	/ Function vect_analyze_data_ref_dependences.*
617
618	Examine all the data references in the loop, and make sure there do not
619	exist any data dependences between them. Set MAX_VF according to*
620	the maximum vectorization factor the data dependences allow. /*
621
622	opt_result
623	vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
624	unsigned int *max_vf)
625	{
626	unsigned int i;
627	struct data_dependence_relation *ddr;
628
629	DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
630
631	if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
632	{
633	LOOP_VINFO_DDRS (loop_vinfo)
634	.create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
635	* LOOP_VINFO_DATAREFS (loop_vinfo).length ());
636	/ We do not need read-read dependences. /
637	bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
638	&LOOP_VINFO_DDRS (loop_vinfo),
639	LOOP_VINFO_LOOP_NEST (loop_vinfo),
640	false);
641	gcc_assert (res);
642	}
643
644	LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
645
646	/ For epilogues we either have no aliases or alias versioning*
647	was applied to original loop. Therefore we may just get max_vf
648	using VF of original loop. /*
649	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
650	*max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
651	else
652	FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
653	{
654	opt_result res
655	= vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
656	if (!res)
657	return res;
658	}
659
660	return opt_result::success ();
661	}
662
663
664	/ Function vect_slp_analyze_data_ref_dependence.*
665
666	Return TRUE if there (might) exist a dependence between a memory-reference
667	DRA and a memory-reference DRB for VINFO. When versioning for alias
668	may check a dependence at run-time, return FALSE. Adjust MAX_VF*
669	according to the data dependence. /*
670
671	static bool
672	vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
673	struct data_dependence_relation *ddr)
674	{
675	struct data_reference *dra = DDR_A (ddr);
676	struct data_reference *drb = DDR_B (ddr);
677	dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
678	dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
679
680	/ We need to check dependences of statements marked as unvectorizable*
681	as well, they still can prohibit vectorization. /*
682
683	/ Independent data accesses. /
684	if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
685	return false;
686
687	if (dra == drb)
688	return false;
689
690	/ Read-read is OK. /
691	if (DR_IS_READ (dra) && DR_IS_READ (drb))
692	return false;
693
694	/ If dra and drb are part of the same interleaving chain consider*
695	them independent. /*
696	if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
697	&& (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
698	== DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
699	return false;
700
701	/ Unknown data dependence. /
702	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
703	{
704	if (dump_enabled_p ())
705	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
706	"can't determine dependence between %T and %T\n",
707	DR_REF (dra), DR_REF (drb));
708	}
709	else if (dump_enabled_p ())
710	dump_printf_loc (MSG_NOTE, vect_location,
711	"determined dependence between %T and %T\n",
712	DR_REF (dra), DR_REF (drb));
713
714	return true;
715	}
716
717
718	/ Analyze dependences involved in the transform of a store SLP NODE. /
719
720	static bool
721	vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
722	{
723	/ This walks over all stmts involved in the SLP store done*
724	in NODE verifying we can sink them up to the last stmt in the
725	group. /*
726	stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
727	gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
728
729	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
730	{
731	stmt_vec_info access_info
732	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
733	if (access_info == last_access_info)
734	continue;
735	data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
736	ao_ref ref;
737	bool ref_initialized_p = false;
738	for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
739	gsi_stmt (i: gsi) != last_access_info->stmt; gsi_next (i: &gsi))
740	{
741	gimple *stmt = gsi_stmt (i: gsi);
742	if (! gimple_vuse (g: stmt))
743	continue;
744
745	/ If we couldn't record a (single) data reference for this*
746	stmt we have to resort to the alias oracle. /*
747	stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
748	data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
749	if (!dr_b)
750	{
751	/ We are moving a store - this means*
752	we cannot use TBAA for disambiguation. /*
753	if (!ref_initialized_p)
754	ao_ref_init (&ref, DR_REF (dr_a));
755	if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
756	\|\| ref_maybe_used_by_stmt_p (stmt, &ref, false))
757	return false;
758	continue;
759	}
760
761	gcc_assert (!gimple_visited_p (stmt));
762
763	ddr_p ddr = initialize_data_dependence_relation (dr_a,
764	dr_b, vNULL);
765	bool dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
766	free_dependence_relation (ddr);
767	if (dependent)
768	return false;
769	}
770	}
771	return true;
772	}
773
774	/ Analyze dependences involved in the transform of a load SLP NODE. STORES*
775	contain the vector of scalar stores of this instance if we are
776	disambiguating the loads. /*
777
778	static bool
779	vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
780	vec<stmt_vec_info> stores,
781	stmt_vec_info last_store_info)
782	{
783	/ This walks over all stmts involved in the SLP load done*
784	in NODE verifying we can hoist them up to the first stmt in the
785	group. /*
786	stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
787	gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
788
789	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
790	{
791	stmt_vec_info access_info
792	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
793	if (access_info == first_access_info)
794	continue;
795	data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
796	ao_ref ref;
797	bool ref_initialized_p = false;
798	hash_set<stmt_vec_info> grp_visited;
799	for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
800	gsi_stmt (i: gsi) != first_access_info->stmt; gsi_prev (i: &gsi))
801	{
802	gimple *stmt = gsi_stmt (i: gsi);
803	if (! gimple_vdef (g: stmt))
804	continue;
805
806	stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
807
808	/ If we run into a store of this same instance (we've just*
809	marked those) then delay dependence checking until we run
810	into the last store because this is where it will have
811	been sunk to (and we verified that we can do that already). /*
812	if (gimple_visited_p (stmt))
813	{
814	if (stmt_info != last_store_info)
815	continue;
816
817	for (stmt_vec_info &store_info : stores)
818	{
819	data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
820	ddr_p ddr = initialize_data_dependence_relation
821	(dr_a, store_dr, vNULL);
822	bool dependent
823	= vect_slp_analyze_data_ref_dependence (vinfo, ddr);
824	free_dependence_relation (ddr);
825	if (dependent)
826	return false;
827	}
828	continue;
829	}
830
831	auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
832	{
833	/ We are hoisting a load - this means we can use TBAA for*
834	disambiguation. /*
835	if (!ref_initialized_p)
836	ao_ref_init (&ref, DR_REF (dr_a));
837	if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
838	{
839	/ If we couldn't record a (single) data reference for this*
840	stmt we have to give up now. /*
841	data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
842	if (!dr_b)
843	return false;
844	ddr_p ddr = initialize_data_dependence_relation (dr_a,
845	dr_b, vNULL);
846	bool dependent
847	= vect_slp_analyze_data_ref_dependence (vinfo, ddr);
848	free_dependence_relation (ddr);
849	if (dependent)
850	return false;
851	}
852	/ No dependence. /
853	return true;
854	};
855	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
856	{
857	/ When we run into a store group we have to honor*
858	that earlier stores might be moved here. We don't
859	know exactly which and where to since we lack a
860	back-mapping from DR to SLP node, so assume all
861	earlier stores are sunk here. It's enough to
862	consider the last stmt of a group for this.
863	??? Both this and the fact that we disregard that
864	the conflicting instance might be removed later
865	is overly conservative. /*
866	if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
867	for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
868	store_info != NULL;
869	store_info = DR_GROUP_NEXT_ELEMENT (store_info))
870	if ((store_info == stmt_info
871	\|\| get_later_stmt (stmt1_info: store_info, stmt2_info: stmt_info) == stmt_info)
872	&& !check_hoist (store_info))
873	return false;
874	}
875	else
876	{
877	if (!check_hoist (stmt_info))
878	return false;
879	}
880	}
881	}
882	return true;
883	}
884
885
886	/ Function vect_analyze_data_ref_dependences.*
887
888	Examine all the data references in the basic-block, and make sure there
889	do not exist any data dependences between them. Set MAX_VF according to*
890	the maximum vectorization factor the data dependences allow. /*
891
892	bool
893	vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
894	{
895	DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
896
897	/ The stores of this instance are at the root of the SLP tree. /
898	slp_tree store = NULL;
899	if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
900	store = SLP_INSTANCE_TREE (instance);
901
902	/ Verify we can sink stores to the vectorized stmt insert location. /
903	stmt_vec_info last_store_info = NULL;
904	if (store)
905	{
906	if (! vect_slp_analyze_store_dependences (vinfo, node: store))
907	return false;
908
909	/ Mark stores in this instance and remember the last one. /
910	last_store_info = vect_find_last_scalar_stmt_in_slp (store);
911	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
912	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, visited_p: true);
913	}
914
915	bool res = true;
916
917	/ Verify we can sink loads to the vectorized stmt insert location,*
918	special-casing stores of this instance. /*
919	for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
920	if (! vect_slp_analyze_load_dependences (vinfo, node: load,
921	stores: store
922	? SLP_TREE_SCALAR_STMTS (store)
923	: vNULL, last_store_info))
924	{
925	res = false;
926	break;
927	}
928
929	/ Unset the visited flag. /
930	if (store)
931	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
932	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, visited_p: false);
933
934	return res;
935	}
936
937	/ Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET*
938	applied. /*
939
940	int
941	dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
942	{
943	HOST_WIDE_INT diff = `0`;
944	/ Alignment is only analyzed for the first element of a DR group,*
945	use that but adjust misalignment by the offset of the access. /*
946	if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
947	{
948	dr_vec_info *first_dr
949	= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
950	/ vect_analyze_data_ref_accesses guarantees that DR_INIT are*
951	INTEGER_CSTs and the first element in the group has the lowest
952	address. /*
953	diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
954	- TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
955	gcc_assert (diff >= `0`);
956	dr_info = first_dr;
957	}
958
959	int misalign = dr_info->misalignment;
960	gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
961	if (misalign == DR_MISALIGNMENT_UNKNOWN)
962	return misalign;
963
964	/ If the access is only aligned for a vector type with smaller alignment*
965	requirement the access has unknown misalignment. /*
966	if (maybe_lt (a: dr_info->target_alignment * BITS_PER_UNIT,
967	b: targetm.vectorize.preferred_vector_alignment (vectype)))
968	return DR_MISALIGNMENT_UNKNOWN;
969
970	/ Apply the offset from the DR group start and the externally supplied*
971	offset which can for example result from a negative stride access. /*
972	poly_int64 misalignment = misalign + diff + offset;
973
974	/ vect_compute_data_ref_alignment will have ensured that target_alignment*
975	is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN. /*
976	unsigned HOST_WIDE_INT target_alignment_c
977	= dr_info->target_alignment.to_constant ();
978	if (!known_misalignment (value: misalignment, align: target_alignment_c, misalign: &misalign))
979	return DR_MISALIGNMENT_UNKNOWN;
980	return misalign;
981	}
982
983	/ Record the base alignment guarantee given by DRB, which occurs*
984	in STMT_INFO. /*
985
986	static void
987	vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
988	innermost_loop_behavior *drb)
989	{
990	bool existed;
991	std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
992	= vinfo->base_alignments.get_or_insert (k: drb->base_address, existed: &existed);
993	if (!existed \|\| entry.second->base_alignment < drb->base_alignment)
994	{
995	entry = std::make_pair (x&: stmt_info, y&: drb);
996	if (dump_enabled_p ())
997	dump_printf_loc (MSG_NOTE, vect_location,
998	"recording new base alignment for %T\n"
999	" alignment: %d\n"
1000	" misalignment: %d\n"
1001	" based on: %G",
1002	drb->base_address,
1003	drb->base_alignment,
1004	drb->base_misalignment,
1005	stmt_info->stmt);
1006	}
1007	}
1008
1009	/ If the region we're going to vectorize is reached, all unconditional*
1010	data references occur at least once. We can therefore pool the base
1011	alignment guarantees from each unconditional reference. Do this by
1012	going through all the data references in VINFO and checking whether
1013	the containing statement makes the reference unconditionally. If so,
1014	record the alignment of the base address in VINFO so that it can be
1015	used for all other references with the same base. /*
1016
1017	void
1018	vect_record_base_alignments (vec_info *vinfo)
1019	{
1020	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1021	class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1022	for (data_reference *dr : vinfo->shared->datarefs)
1023	{
1024	dr_vec_info *dr_info = vinfo->lookup_dr (dr);
1025	stmt_vec_info stmt_info = dr_info->stmt;
1026	if (!DR_IS_CONDITIONAL_IN_STMT (dr)
1027	&& STMT_VINFO_VECTORIZABLE (stmt_info)
1028	&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1029	{
1030	vect_record_base_alignment (vinfo, stmt_info, drb: &DR_INNERMOST (dr));
1031
1032	/ If DR is nested in the loop that is being vectorized, we can also*
1033	record the alignment of the base wrt the outer loop. /*
1034	if (loop && nested_in_vect_loop_p (loop, stmt_info))
1035	vect_record_base_alignment
1036	(vinfo, stmt_info, drb: &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
1037	}
1038	}
1039	}
1040
1041	/ Function vect_compute_data_ref_alignment*
1042
1043	Compute the misalignment of the data reference DR_INFO when vectorizing
1044	with VECTYPE.
1045
1046	Output:
1047	1. initialized misalignment info for DR_INFO
1048
1049	FOR NOW: No analysis is actually performed. Misalignment is calculated
1050	only for trivial cases. TODO. /*
1051
1052	static void
1053	vect_compute_data_ref_alignment (vec_info vinfo, dr_vec_info dr_info,
1054	tree vectype)
1055	{
1056	stmt_vec_info stmt_info = dr_info->stmt;
1057	vec_base_alignments *base_alignments = &vinfo->base_alignments;
1058	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1059	class loop *loop = NULL;
1060	tree ref = DR_REF (dr_info->dr);
1061
1062	if (dump_enabled_p ())
1063	dump_printf_loc (MSG_NOTE, vect_location,
1064	"vect_compute_data_ref_alignment:\n");
1065
1066	if (loop_vinfo)
1067	loop = LOOP_VINFO_LOOP (loop_vinfo);
1068
1069	/ Initialize misalignment to unknown. /
1070	SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1071
1072	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1073	return;
1074
1075	innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
1076	bool step_preserves_misalignment_p;
1077
1078	poly_uint64 vector_alignment
1079	= exact_div (a: targetm.vectorize.preferred_vector_alignment (vectype),
1080	BITS_PER_UNIT);
1081	SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
1082
1083	/ If the main loop has peeled for alignment we have no way of knowing*
1084	whether the data accesses in the epilogues are aligned. We can't at
1085	compile time answer the question whether we have entered the main loop or
1086	not. Fixes PR 92351. /*
1087	if (loop_vinfo)
1088	{
1089	loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
1090	if (orig_loop_vinfo
1091	&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != `0`)
1092	return;
1093	}
1094
1095	unsigned HOST_WIDE_INT vect_align_c;
1096	if (!vector_alignment.is_constant (const_value: &vect_align_c))
1097	return;
1098
1099	/ No step for BB vectorization. /
1100	if (!loop)
1101	{
1102	gcc_assert (integer_zerop (drb->step));
1103	step_preserves_misalignment_p = true;
1104	}
1105
1106	/ In case the dataref is in an inner-loop of the loop that is being*
1107	vectorized (LOOP), we use the base and misalignment information
1108	relative to the outer-loop (LOOP). This is ok only if the misalignment
1109	stays the same throughout the execution of the inner-loop, which is why
1110	we have to check that the stride of the dataref in the inner-loop evenly
1111	divides by the vector alignment. /*
1112	else if (nested_in_vect_loop_p (loop, stmt_info))
1113	{
1114	step_preserves_misalignment_p
1115	= (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == `0`;
1116
1117	if (dump_enabled_p ())
1118	{
1119	if (step_preserves_misalignment_p)
1120	dump_printf_loc (MSG_NOTE, vect_location,
1121	"inner step divides the vector alignment.\n");
1122	else
1123	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1124	"inner step doesn't divide the vector"
1125	" alignment.\n");
1126	}
1127	}
1128
1129	/ Similarly we can only use base and misalignment information relative to*
1130	an innermost loop if the misalignment stays the same throughout the
1131	execution of the loop. As above, this is the case if the stride of
1132	the dataref evenly divides by the alignment. /*
1133	else
1134	{
1135	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1136	step_preserves_misalignment_p
1137	= multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, b: vect_align_c);
1138
1139	if (!step_preserves_misalignment_p && dump_enabled_p ())
1140	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1141	"step doesn't divide the vector alignment.\n");
1142	}
1143
1144	unsigned int base_alignment = drb->base_alignment;
1145	unsigned int base_misalignment = drb->base_misalignment;
1146
1147	/ Calculate the maximum of the pooled base address alignment and the*
1148	alignment that we can compute for DR itself. /*
1149	std::pair<stmt_vec_info, innermost_loop_behavior > entry
1150	= base_alignments->get (k: drb->base_address);
1151	if (entry
1152	&& base_alignment < (*entry).second->base_alignment
1153	&& (loop_vinfo
1154	\|\| (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: stmt_info->stmt),
1155	gimple_bb (g: entry->first->stmt))
1156	&& (gimple_bb (g: stmt_info->stmt) != gimple_bb (g: entry->first->stmt)
1157	\|\| (entry->first->dr_aux.group <= dr_info->group)))))
1158	{
1159	base_alignment = entry->second->base_alignment;
1160	base_misalignment = entry->second->base_misalignment;
1161	}
1162
1163	if (drb->offset_alignment < vect_align_c
1164	\|\| !step_preserves_misalignment_p
1165	/ We need to know whether the step wrt the vectorized loop is*
1166	negative when computing the starting misalignment below. /*
1167	\|\| TREE_CODE (drb->step) != INTEGER_CST)
1168	{
1169	if (dump_enabled_p ())
1170	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1171	"Unknown alignment for access: %T\n", ref);
1172	return;
1173	}
1174
1175	if (base_alignment < vect_align_c)
1176	{
1177	unsigned int max_alignment;
1178	tree base = get_base_for_alignment (drb->base_address, &max_alignment);
1179	if (max_alignment < vect_align_c
1180	\|\| !vect_can_force_dr_alignment_p (base,
1181	vect_align_c * BITS_PER_UNIT))
1182	{
1183	if (dump_enabled_p ())
1184	dump_printf_loc (MSG_NOTE, vect_location,
1185	"can't force alignment of ref: %T\n", ref);
1186	return;
1187	}
1188
1189	/ Force the alignment of the decl.*
1190	NOTE: This is the only change to the code we make during
1191	the analysis phase, before deciding to vectorize the loop. /*
1192	if (dump_enabled_p ())
1193	dump_printf_loc (MSG_NOTE, vect_location,
1194	"force alignment of %T\n", ref);
1195
1196	dr_info->base_decl = base;
1197	dr_info->base_misaligned = true;
1198	base_misalignment = `0`;
1199	}
1200	poly_int64 misalignment
1201	= base_misalignment + wi::to_poly_offset (t: drb->init).force_shwi ();
1202
1203	unsigned int const_misalignment;
1204	if (!known_misalignment (value: misalignment, align: vect_align_c, misalign: &const_misalignment))
1205	{
1206	if (dump_enabled_p ())
1207	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1208	"Non-constant misalignment for access: %T\n", ref);
1209	return;
1210	}
1211
1212	SET_DR_MISALIGNMENT (dr_info, const_misalignment);
1213
1214	if (dump_enabled_p ())
1215	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1216	"misalign = %d bytes of ref %T\n",
1217	const_misalignment, ref);
1218
1219	return;
1220	}
1221
1222	/ Return whether DR_INFO, which is related to DR_PEEL_INFO in*
1223	that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
1224	is made aligned via peeling. /*
1225
1226	static bool
1227	vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
1228	dr_vec_info *dr_peel_info)
1229	{
1230	if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
1231	DR_TARGET_ALIGNMENT (dr_info)))
1232	{
1233	poly_offset_int diff
1234	= (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
1235	- wi::to_poly_offset (DR_INIT (dr_info->dr)));
1236	if (known_eq (diff, `0`)
1237	\|\| multiple_p (a: diff, DR_TARGET_ALIGNMENT (dr_info)))
1238	return true;
1239	}
1240	return false;
1241	}
1242
1243	/ Return whether DR_INFO is aligned if DR_PEEL_INFO is made*
1244	aligned via peeling. /*
1245
1246	static bool
1247	vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
1248	dr_vec_info *dr_peel_info)
1249	{
1250	if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
1251	DR_BASE_ADDRESS (dr_peel_info->dr), flags: `0`)
1252	\|\| !operand_equal_p (DR_OFFSET (dr_info->dr),
1253	DR_OFFSET (dr_peel_info->dr), flags: `0`)
1254	\|\| !operand_equal_p (DR_STEP (dr_info->dr),
1255	DR_STEP (dr_peel_info->dr), flags: `0`))
1256	return false;
1257
1258	return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
1259	}
1260
1261	/ Compute the value for dr_info->misalign so that the access appears*
1262	aligned. This is used by peeling to compensate for dr_misalignment
1263	applying the offset for negative step. /*
1264
1265	int
1266	vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
1267	{
1268	if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= `0`)
1269	return `0`;
1270
1271	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1272	poly_int64 misalignment
1273	= ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
1274	* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1275
1276	unsigned HOST_WIDE_INT target_alignment_c;
1277	int misalign;
1278	if (!dr_info->target_alignment.is_constant (const_value: &target_alignment_c)
1279	\|\| !known_misalignment (value: misalignment, align: target_alignment_c, misalign: &misalign))
1280	return DR_MISALIGNMENT_UNKNOWN;
1281	return misalign;
1282	}
1283
1284	/ Function vect_update_misalignment_for_peel.*
1285	Sets DR_INFO's misalignment
1286	- to 0 if it has the same alignment as DR_PEEL_INFO,
1287	- to the misalignment computed using NPEEL if DR_INFO's salignment is known,
1288	- to -1 (unknown) otherwise.
1289
1290	DR_INFO - the data reference whose misalignment is to be adjusted.
1291	DR_PEEL_INFO - the data reference whose misalignment is being made
1292	zero in the vector loop by the peel.
1293	NPEEL - the number of iterations in the peel loop if the misalignment
1294	of DR_PEEL_INFO is known at compile time. /*
1295
1296	static void
1297	vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1298	dr_vec_info dr_peel_info, int* npeel)
1299	{
1300	/ If dr_info is aligned of dr_peel_info is, then mark it so. /
1301	if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
1302	{
1303	SET_DR_MISALIGNMENT (dr_info,
1304	vect_dr_misalign_for_aligned_access (dr_peel_info));
1305	return;
1306	}
1307
1308	unsigned HOST_WIDE_INT alignment;
1309	if (DR_TARGET_ALIGNMENT (dr_info).is_constant (const_value: &alignment)
1310	&& known_alignment_for_access_p (dr_info,
1311	STMT_VINFO_VECTYPE (dr_info->stmt))
1312	&& known_alignment_for_access_p (dr_info: dr_peel_info,
1313	STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
1314	{
1315	int misal = dr_info->misalignment;
1316	misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1317	misal &= alignment - `1`;
1318	set_dr_misalignment (dr_info, val: misal);
1319	return;
1320	}
1321
1322	if (dump_enabled_p ())
1323	dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1324	"to unknown (-1).\n");
1325	SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1326	}
1327
1328	/ Return true if alignment is relevant for DR_INFO. /
1329
1330	static bool
1331	vect_relevant_for_alignment_p (dr_vec_info *dr_info)
1332	{
1333	stmt_vec_info stmt_info = dr_info->stmt;
1334
1335	if (!STMT_VINFO_RELEVANT_P (stmt_info))
1336	return false;
1337
1338	/ For interleaving, only the alignment of the first access matters. /
1339	if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1340	&& DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1341	return false;
1342
1343	/ Scatter-gather and invariant accesses continue to address individual*
1344	scalars, so vector-level alignment is irrelevant. /*
1345	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1346	\|\| integer_zerop (DR_STEP (dr_info->dr)))
1347	return false;
1348
1349	/ Strided accesses perform only component accesses, alignment is*
1350	irrelevant for them. /*
1351	if (STMT_VINFO_STRIDED_P (stmt_info)
1352	&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1353	return false;
1354
1355	return true;
1356	}
1357
1358	/ Given an memory reference EXP return whether its alignment is less*
1359	than its size. /*
1360
1361	static bool
1362	not_size_aligned (tree exp)
1363	{
1364	if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1365	return true;
1366
1367	return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1368	> get_object_alignment (exp));
1369	}
1370
1371	/ Function vector_alignment_reachable_p*
1372
1373	Return true if vector alignment for DR_INFO is reachable by peeling
1374	a few loop iterations. Return false otherwise. /*
1375
1376	static bool
1377	vector_alignment_reachable_p (dr_vec_info *dr_info)
1378	{
1379	stmt_vec_info stmt_info = dr_info->stmt;
1380	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1381
1382	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1383	{
1384	/ For interleaved access we peel only if number of iterations in*
1385	the prolog loop ({VF - misalignment}), is a multiple of the
1386	number of the interleaved accesses. /*
1387	int elem_size, mis_in_elements;
1388
1389	/ FORNOW: handle only known alignment. /
1390	if (!known_alignment_for_access_p (dr_info, vectype))
1391	return false;
1392
1393	poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (node: vectype);
1394	poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1395	elem_size = vector_element_size (vector_size, nelements);
1396	mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
1397
1398	if (!multiple_p (a: nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
1399	return false;
1400	}
1401
1402	/ If misalignment is known at the compile time then allow peeling*
1403	only if natural alignment is reachable through peeling. /*
1404	if (known_alignment_for_access_p (dr_info, vectype)
1405	&& !aligned_access_p (dr_info, vectype))
1406	{
1407	HOST_WIDE_INT elmsize =
1408	int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1409	if (dump_enabled_p ())
1410	{
1411	dump_printf_loc (MSG_NOTE, vect_location,
1412	"data size = %wd. misalignment = %d.\n", elmsize,
1413	dr_misalignment (dr_info, vectype));
1414	}
1415	if (dr_misalignment (dr_info, vectype) % elmsize)
1416	{
1417	if (dump_enabled_p ())
1418	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1419	"data size does not divide the misalignment.\n");
1420	return false;
1421	}
1422	}
1423
1424	if (!known_alignment_for_access_p (dr_info, vectype))
1425	{
1426	tree type = TREE_TYPE (DR_REF (dr_info->dr));
1427	bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
1428	if (dump_enabled_p ())
1429	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1430	"Unknown misalignment, %snaturally aligned\n",
1431	is_packed ? "not " : "");
1432	return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1433	}
1434
1435	return true;
1436	}
1437
1438
1439	/ Calculate the cost of the memory access represented by DR_INFO. /
1440
1441	static void
1442	vect_get_data_access_cost (vec_info vinfo, dr_vec_info dr_info,
1443	dr_alignment_support alignment_support_scheme,
1444	int misalignment,
1445	unsigned int *inside_cost,
1446	unsigned int *outside_cost,
1447	stmt_vector_for_cost *body_cost_vec,
1448	stmt_vector_for_cost *prologue_cost_vec)
1449	{
1450	stmt_vec_info stmt_info = dr_info->stmt;
1451	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1452	int ncopies;
1453
1454	if (PURE_SLP_STMT (stmt_info))
1455	ncopies = `1`;
1456	else
1457	ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
1458
1459	if (DR_IS_READ (dr_info->dr))
1460	vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1461	misalignment, true, inside_cost,
1462	outside_cost, prologue_cost_vec, body_cost_vec, false);
1463	else
1464	vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme,
1465	misalignment, inside_cost, body_cost_vec);
1466
1467	if (dump_enabled_p ())
1468	dump_printf_loc (MSG_NOTE, vect_location,
1469	"vect_get_data_access_cost: inside_cost = %d, "
1470	"outside_cost = %d.\n", inside_cost, outside_cost);
1471	}
1472
1473
1474	typedef struct _vect_peel_info
1475	{
1476	dr_vec_info *dr_info;
1477	int npeel;
1478	unsigned int count;
1479	} *vect_peel_info;
1480
1481	typedef struct _vect_peel_extended_info
1482	{
1483	vec_info *vinfo;
1484	struct _vect_peel_info peel_info;
1485	unsigned int inside_cost;
1486	unsigned int outside_cost;
1487	} *vect_peel_extended_info;
1488
1489
1490	/ Peeling hashtable helpers. /
1491
1492	struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1493	{
1494	static inline hashval_t hash (const _vect_peel_info *);
1495	static inline bool equal (const _vect_peel_info , const* _vect_peel_info *);
1496	};
1497
1498	inline hashval_t
1499	peel_info_hasher::hash (const _vect_peel_info *peel_info)
1500	{
1501	return (hashval_t) peel_info->npeel;
1502	}
1503
1504	inline bool
1505	peel_info_hasher::equal (const _vect_peel_info a, const* _vect_peel_info *b)
1506	{
1507	return (a->npeel == b->npeel);
1508	}
1509
1510
1511	/ Insert DR_INFO into peeling hash table with NPEEL as key. /
1512
1513	static void
1514	vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1515	loop_vec_info loop_vinfo, dr_vec_info *dr_info,
1516	int npeel, bool supportable_if_not_aligned)
1517	{
1518	struct _vect_peel_info elem, *slot;
1519	_vect_peel_info **new_slot;
1520
1521	elem.npeel = npeel;
1522	slot = peeling_htab->find (value: &elem);
1523	if (slot)
1524	slot->count++;
1525	else
1526	{
1527	slot = XNEW (struct _vect_peel_info);
1528	slot->npeel = npeel;
1529	slot->dr_info = dr_info;
1530	slot->count = `1`;
1531	new_slot = peeling_htab->find_slot (value: slot, insert: INSERT);
1532	*new_slot = slot;
1533	}
1534
1535	/ If this DR is not supported with unknown misalignment then bias*
1536	this slot when the cost model is disabled. /*
1537	if (!supportable_if_not_aligned
1538	&& unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1539	slot->count += VECT_MAX_COST;
1540	}
1541
1542
1543	/ Traverse peeling hash table to find peeling option that aligns maximum*
1544	number of data accesses. /*
1545
1546	int
1547	vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1548	_vect_peel_extended_info *max)
1549	{
1550	vect_peel_info elem = *slot;
1551
1552	if (elem->count > max->peel_info.count
1553	\|\| (elem->count == max->peel_info.count
1554	&& max->peel_info.npeel > elem->npeel))
1555	{
1556	max->peel_info.npeel = elem->npeel;
1557	max->peel_info.count = elem->count;
1558	max->peel_info.dr_info = elem->dr_info;
1559	}
1560
1561	return `1`;
1562	}
1563
1564	/ Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking*
1565	data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true,
1566	npeel is computed at runtime but DR0_INFO's misalignment will be zero
1567	after peeling. /*
1568
1569	static void
1570	vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
1571	dr_vec_info *dr0_info,
1572	unsigned int *inside_cost,
1573	unsigned int *outside_cost,
1574	stmt_vector_for_cost *body_cost_vec,
1575	stmt_vector_for_cost *prologue_cost_vec,
1576	unsigned int npeel)
1577	{
1578	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1579
1580	bool dr0_alignment_known_p
1581	= (dr0_info
1582	&& known_alignment_for_access_p (dr_info: dr0_info,
1583	STMT_VINFO_VECTYPE (dr0_info->stmt)));
1584
1585	for (data_reference *dr : datarefs)
1586	{
1587	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1588	if (!vect_relevant_for_alignment_p (dr_info))
1589	continue;
1590
1591	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1592	dr_alignment_support alignment_support_scheme;
1593	int misalignment;
1594	unsigned HOST_WIDE_INT alignment;
1595
1596	bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1597	size_zero_node) < `0`;
1598	poly_int64 off = `0`;
1599	if (negative)
1600	off = ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
1601	* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1602
1603	if (npeel == `0`)
1604	misalignment = dr_misalignment (dr_info, vectype, offset: off);
1605	else if (dr_info == dr0_info
1606	\|\| vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info: dr0_info))
1607	misalignment = `0`;
1608	else if (!dr0_alignment_known_p
1609	\|\| !known_alignment_for_access_p (dr_info, vectype)
1610	\|\| !DR_TARGET_ALIGNMENT (dr_info).is_constant (const_value: &alignment))
1611	misalignment = DR_MISALIGNMENT_UNKNOWN;
1612	else
1613	{
1614	misalignment = dr_misalignment (dr_info, vectype, offset: off);
1615	misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1616	misalignment &= alignment - `1`;
1617	}
1618	alignment_support_scheme
1619	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
1620	misalignment);
1621
1622	vect_get_data_access_cost (vinfo: loop_vinfo, dr_info,
1623	alignment_support_scheme, misalignment,
1624	inside_cost, outside_cost,
1625	body_cost_vec, prologue_cost_vec);
1626	}
1627	}
1628
1629	/ Traverse peeling hash table and calculate cost for each peeling option.*
1630	Find the one with the lowest cost. /*
1631
1632	int
1633	vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1634	_vect_peel_extended_info *min)
1635	{
1636	vect_peel_info elem = *slot;
1637	int dummy;
1638	unsigned int inside_cost = `0`, outside_cost = `0`;
1639	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: min->vinfo);
1640	stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1641	epilogue_cost_vec;
1642
1643	prologue_cost_vec.create (nelems: `2`);
1644	body_cost_vec.create (nelems: `2`);
1645	epilogue_cost_vec.create (nelems: `2`);
1646
1647	vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info: elem->dr_info, inside_cost: &inside_cost,
1648	outside_cost: &outside_cost, body_cost_vec: &body_cost_vec,
1649	prologue_cost_vec: &prologue_cost_vec, npeel: elem->npeel);
1650
1651	body_cost_vec.release ();
1652
1653	outside_cost += vect_get_known_peeling_cost
1654	(loop_vinfo, elem->npeel, &dummy,
1655	&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1656	&prologue_cost_vec, &epilogue_cost_vec);
1657
1658	/ Prologue and epilogue costs are added to the target model later.*
1659	These costs depend only on the scalar iteration cost, the
1660	number of peeling iterations finally chosen, and the number of
1661	misaligned statements. So discard the information found here. /*
1662	prologue_cost_vec.release ();
1663	epilogue_cost_vec.release ();
1664
1665	if (inside_cost < min->inside_cost
1666	\|\| (inside_cost == min->inside_cost
1667	&& outside_cost < min->outside_cost))
1668	{
1669	min->inside_cost = inside_cost;
1670	min->outside_cost = outside_cost;
1671	min->peel_info.dr_info = elem->dr_info;
1672	min->peel_info.npeel = elem->npeel;
1673	min->peel_info.count = elem->count;
1674	}
1675
1676	return `1`;
1677	}
1678
1679
1680	/ Choose best peeling option by traversing peeling hash table and either*
1681	choosing an option with the lowest cost (if cost model is enabled) or the
1682	option that aligns as many accesses as possible. /*
1683
1684	static struct _vect_peel_extended_info
1685	vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
1686	loop_vec_info loop_vinfo)
1687	{
1688	struct _vect_peel_extended_info res;
1689
1690	res.peel_info.dr_info = NULL;
1691	res.vinfo = loop_vinfo;
1692
1693	if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1694	{
1695	res.inside_cost = INT_MAX;
1696	res.outside_cost = INT_MAX;
1697	peeling_htab->traverse <_vect_peel_extended_info *,
1698	vect_peeling_hash_get_lowest_cost> (argument: &res);
1699	}
1700	else
1701	{
1702	res.peel_info.count = `0`;
1703	peeling_htab->traverse <_vect_peel_extended_info *,
1704	vect_peeling_hash_get_most_frequent> (argument: &res);
1705	res.inside_cost = `0`;
1706	res.outside_cost = `0`;
1707	}
1708
1709	return res;
1710	}
1711
1712	/ Return true if the new peeling NPEEL is supported. /
1713
1714	static bool
1715	vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
1716	unsigned npeel)
1717	{
1718	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1719	enum dr_alignment_support supportable_dr_alignment;
1720
1721	bool dr0_alignment_known_p
1722	= known_alignment_for_access_p (dr_info: dr0_info,
1723	STMT_VINFO_VECTYPE (dr0_info->stmt));
1724
1725	/ Ensure that all data refs can be vectorized after the peel. /
1726	for (data_reference *dr : datarefs)
1727	{
1728	if (dr == dr0_info->dr)
1729	continue;
1730
1731	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1732	if (!vect_relevant_for_alignment_p (dr_info)
1733	\|\| vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info: dr0_info))
1734	continue;
1735
1736	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1737	int misalignment;
1738	unsigned HOST_WIDE_INT alignment;
1739	if (!dr0_alignment_known_p
1740	\|\| !known_alignment_for_access_p (dr_info, vectype)
1741	\|\| !DR_TARGET_ALIGNMENT (dr_info).is_constant (const_value: &alignment))
1742	misalignment = DR_MISALIGNMENT_UNKNOWN;
1743	else
1744	{
1745	misalignment = dr_misalignment (dr_info, vectype);
1746	misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1747	misalignment &= alignment - `1`;
1748	}
1749	supportable_dr_alignment
1750	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
1751	misalignment);
1752	if (supportable_dr_alignment == dr_unaligned_unsupported)
1753	return false;
1754	}
1755
1756	return true;
1757	}
1758
1759	/ Compare two data-references DRA and DRB to group them into chunks*
1760	with related alignment. /*
1761
1762	static int
1763	dr_align_group_sort_cmp (const void dra_, const* void *drb_)
1764	{
1765	data_reference_p dra = (data_reference_p )const_cast<void *>(dra_);
1766	data_reference_p drb = (data_reference_p )const_cast<void *>(drb_);
1767	int cmp;
1768
1769	/ Stabilize sort. /
1770	if (dra == drb)
1771	return `0`;
1772
1773	/ Ordering of DRs according to base. /
1774	cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
1775	DR_BASE_ADDRESS (drb));
1776	if (cmp != `0`)
1777	return cmp;
1778
1779	/ And according to DR_OFFSET. /
1780	cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
1781	if (cmp != `0`)
1782	return cmp;
1783
1784	/ And after step. /
1785	cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
1786	if (cmp != `0`)
1787	return cmp;
1788
1789	/ Then sort after DR_INIT. In case of identical DRs sort after stmt UID. /
1790	cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
1791	if (cmp == `0`)
1792	return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -`1` : `1`;
1793	return cmp;
1794	}
1795
1796	/ Function vect_enhance_data_refs_alignment*
1797
1798	This pass will use loop versioning and loop peeling in order to enhance
1799	the alignment of data references in the loop.
1800
1801	FOR NOW: we assume that whatever versioning/peeling takes place, only the
1802	original loop is to be vectorized. Any other loops that are created by
1803	the transformations performed in this pass - are not supposed to be
1804	vectorized. This restriction will be relaxed.
1805
1806	This pass will require a cost model to guide it whether to apply peeling
1807	or versioning or a combination of the two. For example, the scheme that
1808	intel uses when given a loop with several memory accesses, is as follows:
1809	choose one memory access ('p') which alignment you want to force by doing
1810	peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1811	other accesses are not necessarily aligned, or (2) use loop versioning to
1812	generate one loop in which all accesses are aligned, and another loop in
1813	which only 'p' is necessarily aligned.
1814
1815	("Automatic Intra-Register Vectorization for the Intel Architecture",
1816	Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1817	Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1818
1819	Devising a cost model is the most critical aspect of this work. It will
1820	guide us on which access to peel for, whether to use loop versioning, how
1821	many versions to create, etc. The cost model will probably consist of
1822	generic considerations as well as target specific considerations (on
1823	powerpc for example, misaligned stores are more painful than misaligned
1824	loads).
1825
1826	Here are the general steps involved in alignment enhancements:
1827
1828	-- original loop, before alignment analysis:
1829	for (i=0; i<N; i++){
1830	x = q[i]; # DR_MISALIGNMENT(q) = unknown
1831	p[i] = y; # DR_MISALIGNMENT(p) = unknown
1832	}
1833
1834	-- After vect_compute_data_refs_alignment:
1835	for (i=0; i<N; i++){
1836	x = q[i]; # DR_MISALIGNMENT(q) = 3
1837	p[i] = y; # DR_MISALIGNMENT(p) = unknown
1838	}
1839
1840	-- Possibility 1: we do loop versioning:
1841	if (p is aligned) {
1842	for (i=0; i<N; i++){ # loop 1A
1843	x = q[i]; # DR_MISALIGNMENT(q) = 3
1844	p[i] = y; # DR_MISALIGNMENT(p) = 0
1845	}
1846	}
1847	else {
1848	for (i=0; i<N; i++){ # loop 1B
1849	x = q[i]; # DR_MISALIGNMENT(q) = 3
1850	p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1851	}
1852	}
1853
1854	-- Possibility 2: we do loop peeling:
1855	for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1856	x = q[i];
1857	p[i] = y;
1858	}
1859	for (i = 3; i < N; i++){ # loop 2A
1860	x = q[i]; # DR_MISALIGNMENT(q) = 0
1861	p[i] = y; # DR_MISALIGNMENT(p) = unknown
1862	}
1863
1864	-- Possibility 3: combination of loop peeling and versioning:
1865	for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1866	x = q[i];
1867	p[i] = y;
1868	}
1869	if (p is aligned) {
1870	for (i = 3; i<N; i++){ # loop 3A
1871	x = q[i]; # DR_MISALIGNMENT(q) = 0
1872	p[i] = y; # DR_MISALIGNMENT(p) = 0
1873	}
1874	}
1875	else {
1876	for (i = 3; i<N; i++){ # loop 3B
1877	x = q[i]; # DR_MISALIGNMENT(q) = 0
1878	p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1879	}
1880	}
1881
1882	These loops are later passed to loop_transform to be vectorized. The
1883	vectorizer will use the alignment information to guide the transformation
1884	(whether to generate regular loads/stores, or with special handling for
1885	misalignment). /*
1886
1887	opt_result
1888	vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1889	{
1890	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1891	dr_vec_info *first_store = NULL;
1892	dr_vec_info *dr0_info = NULL;
1893	struct data_reference *dr;
1894	unsigned int i;
1895	bool do_peeling = false;
1896	bool do_versioning = false;
1897	unsigned int npeel = `0`;
1898	bool one_misalignment_known = false;
1899	bool one_misalignment_unknown = false;
1900	bool one_dr_unsupportable = false;
1901	dr_vec_info *unsupportable_dr_info = NULL;
1902	unsigned int dr0_same_align_drs = `0`, first_store_same_align_drs = `0`;
1903	hash_table<peel_info_hasher> peeling_htab (`1`);
1904
1905	DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
1906
1907	/ Reset data so we can safely be called multiple times. /
1908	LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (size: `0`);
1909	LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = `0`;
1910
1911	if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
1912	return opt_result::success ();
1913
1914	/ Sort the vector of datarefs so DRs that have the same or dependent*
1915	alignment are next to each other. /*
1916	auto_vec<data_reference_p> datarefs
1917	= LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
1918	datarefs.qsort (dr_align_group_sort_cmp);
1919
1920	/ Compute the number of DRs that become aligned when we peel*
1921	a dataref so it becomes aligned. /*
1922	auto_vec<unsigned> n_same_align_refs (datarefs.length ());
1923	n_same_align_refs.quick_grow_cleared (len: datarefs.length ());
1924	unsigned i0;
1925	for (i0 = `0`; i0 < datarefs.length (); ++i0)
1926	if (DR_BASE_ADDRESS (datarefs[i0]))
1927	break;
1928	for (i = i0 + `1`; i <= datarefs.length (); ++i)
1929	{
1930	if (i == datarefs.length ()
1931	\|\| !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
1932	DR_BASE_ADDRESS (datarefs[i]), flags: `0`)
1933	\|\| !operand_equal_p (DR_OFFSET (datarefs[i0]),
1934	DR_OFFSET (datarefs[i]), flags: `0`)
1935	\|\| !operand_equal_p (DR_STEP (datarefs[i0]),
1936	DR_STEP (datarefs[i]), flags: `0`))
1937	{
1938	/ The subgroup [i0, i-1] now only differs in DR_INIT and*
1939	possibly DR_TARGET_ALIGNMENT. Still the whole subgroup
1940	will get known misalignment if we align one of the refs
1941	with the largest DR_TARGET_ALIGNMENT. /*
1942	for (unsigned j = i0; j < i; ++j)
1943	{
1944	dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs [j]);
1945	for (unsigned k = i0; k < i; ++k)
1946	{
1947	if (k == j)
1948	continue;
1949	dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs [k]);
1950	if (vect_dr_aligned_if_related_peeled_dr_is (dr_info: dr_infok,
1951	dr_peel_info: dr_infoj))
1952	n_same_align_refs [j]++;
1953	}
1954	}
1955	i0 = i;
1956	}
1957	}
1958
1959	/ While cost model enhancements are expected in the future, the high level*
1960	view of the code at this time is as follows:
1961
1962	A) If there is a misaligned access then see if peeling to align
1963	this access can make all data references satisfy
1964	vect_supportable_dr_alignment. If so, update data structures
1965	as needed and return true.
1966
1967	B) If peeling wasn't possible and there is a data reference with an
1968	unknown misalignment that does not satisfy vect_supportable_dr_alignment
1969	then see if loop versioning checks can be used to make all data
1970	references satisfy vect_supportable_dr_alignment. If so, update
1971	data structures as needed and return true.
1972
1973	C) If neither peeling nor versioning were successful then return false if
1974	any data reference does not satisfy vect_supportable_dr_alignment.
1975
1976	D) Return true (all data references satisfy vect_supportable_dr_alignment).
1977
1978	Note, Possibility 3 above (which is peeling and versioning together) is not
1979	being done at this time. /*
1980
1981	/ (1) Peeling to force alignment. /
1982
1983	/ (1.1) Decide whether to perform peeling, and how many iterations to peel:*
1984	Considerations:
1985	+ How many accesses will become aligned due to the peeling
1986	- How many accesses will become unaligned due to the peeling,
1987	and the cost of misaligned accesses.
1988	- The cost of peeling (the extra runtime checks, the increase
1989	in code size). /*
1990
1991	FOR_EACH_VEC_ELT (datarefs, i, dr)
1992	{
1993	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1994	if (!vect_relevant_for_alignment_p (dr_info))
1995	continue;
1996
1997	stmt_vec_info stmt_info = dr_info->stmt;
1998	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1999	do_peeling = vector_alignment_reachable_p (dr_info);
2000	if (do_peeling)
2001	{
2002	if (known_alignment_for_access_p (dr_info, vectype))
2003	{
2004	unsigned int npeel_tmp = `0`;
2005	bool negative = tree_int_cst_compare (DR_STEP (dr),
2006	size_zero_node) < `0`;
2007
2008	/ If known_alignment_for_access_p then we have set*
2009	DR_MISALIGNMENT which is only done if we know it at compiler
2010	time, so it is safe to assume target alignment is constant.
2011	*/
2012	unsigned int target_align =
2013	DR_TARGET_ALIGNMENT (dr_info).to_constant ();
2014	unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
2015	poly_int64 off = `0`;
2016	if (negative)
2017	off = (TYPE_VECTOR_SUBPARTS (node: vectype) - `1`) * -dr_size;
2018	unsigned int mis = dr_misalignment (dr_info, vectype, offset: off);
2019	mis = negative ? mis : -mis;
2020	if (mis != `0`)
2021	npeel_tmp = (mis & (target_align - `1`)) / dr_size;
2022
2023	/ For multiple types, it is possible that the bigger type access*
2024	will have more than one peeling option. E.g., a loop with two
2025	types: one of size (vector size / 4), and the other one of
2026	size (vector size / 8). Vectorization factor will 8. If both
2027	accesses are misaligned by 3, the first one needs one scalar
2028	iteration to be aligned, and the second one needs 5. But the
2029	first one will be aligned also by peeling 5 scalar
2030	iterations, and in that case both accesses will be aligned.
2031	Hence, except for the immediate peeling amount, we also want
2032	to try to add full vector size, while we don't exceed
2033	vectorization factor.
2034	We do this automatically for cost model, since we calculate
2035	cost for every peeling option. /*
2036	poly_uint64 nscalars = npeel_tmp;
2037	if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2038	{
2039	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2040	nscalars = (STMT_SLP_TYPE (stmt_info)
2041	? vf * DR_GROUP_SIZE (stmt_info) : vf);
2042	}
2043
2044	/ Save info about DR in the hash table. Also include peeling*
2045	amounts according to the explanation above. Indicate
2046	the alignment status when the ref is not aligned.
2047	??? Rather than using unknown alignment here we should
2048	prune all entries from the peeling hashtable which cause
2049	DRs to be not supported. /*
2050	bool supportable_if_not_aligned
2051	= vect_supportable_dr_alignment
2052	(loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
2053	while (known_le (npeel_tmp, nscalars))
2054	{
2055	vect_peeling_hash_insert (peeling_htab: &peeling_htab, loop_vinfo,
2056	dr_info, npeel: npeel_tmp,
2057	supportable_if_not_aligned);
2058	npeel_tmp += MAX (`1`, target_align / dr_size);
2059	}
2060
2061	one_misalignment_known = true;
2062	}
2063	else
2064	{
2065	/ If we don't know any misalignment values, we prefer*
2066	peeling for data-ref that has the maximum number of data-refs
2067	with the same alignment, unless the target prefers to align
2068	stores over load. /*
2069	unsigned same_align_drs = n_same_align_refs [i];
2070	if (!dr0_info
2071	\|\| dr0_same_align_drs < same_align_drs)
2072	{
2073	dr0_same_align_drs = same_align_drs;
2074	dr0_info = dr_info;
2075	}
2076	/ For data-refs with the same number of related*
2077	accesses prefer the one where the misalign
2078	computation will be invariant in the outermost loop. /*
2079	else if (dr0_same_align_drs == same_align_drs)
2080	{
2081	class loop ivloop0, ivloop;
2082	ivloop0 = outermost_invariant_loop_for_expr
2083	(loop, DR_BASE_ADDRESS (dr0_info->dr));
2084	ivloop = outermost_invariant_loop_for_expr
2085	(loop, DR_BASE_ADDRESS (dr));
2086	if ((ivloop && !ivloop0)
2087	\|\| (ivloop && ivloop0
2088	&& flow_loop_nested_p (ivloop, ivloop0)))
2089	dr0_info = dr_info;
2090	}
2091
2092	one_misalignment_unknown = true;
2093
2094	/ Check for data refs with unsupportable alignment that*
2095	can be peeled. /*
2096	enum dr_alignment_support supportable_dr_alignment
2097	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2098	DR_MISALIGNMENT_UNKNOWN);
2099	if (supportable_dr_alignment == dr_unaligned_unsupported)
2100	{
2101	one_dr_unsupportable = true;
2102	unsupportable_dr_info = dr_info;
2103	}
2104
2105	if (!first_store && DR_IS_WRITE (dr))
2106	{
2107	first_store = dr_info;
2108	first_store_same_align_drs = same_align_drs;
2109	}
2110	}
2111	}
2112	else
2113	{
2114	if (!aligned_access_p (dr_info, vectype))
2115	{
2116	if (dump_enabled_p ())
2117	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2118	"vector alignment may not be reachable\n");
2119	break;
2120	}
2121	}
2122	}
2123
2124	/ Check if we can possibly peel the loop. /
2125	if (!vect_can_advance_ivs_p (loop_vinfo)
2126	\|\| !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_IV_EXIT (loop_vinfo),
2127	LOOP_VINFO_IV_EXIT (loop_vinfo))
2128	\|\| loop->inner)
2129	do_peeling = false;
2130
2131	struct _vect_peel_extended_info peel_for_known_alignment;
2132	struct _vect_peel_extended_info peel_for_unknown_alignment;
2133	struct _vect_peel_extended_info best_peel;
2134
2135	peel_for_unknown_alignment.inside_cost = INT_MAX;
2136	peel_for_unknown_alignment.outside_cost = INT_MAX;
2137	peel_for_unknown_alignment.peel_info.count = `0`;
2138
2139	if (do_peeling
2140	&& one_misalignment_unknown)
2141	{
2142	/ Check if the target requires to prefer stores over loads, i.e., if*
2143	misaligned stores are more expensive than misaligned loads (taking
2144	drs with same alignment into account). /*
2145	unsigned int load_inside_cost = `0`;
2146	unsigned int load_outside_cost = `0`;
2147	unsigned int store_inside_cost = `0`;
2148	unsigned int store_outside_cost = `0`;
2149	unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / `2`;
2150
2151	stmt_vector_for_cost dummy;
2152	dummy.create (nelems: `2`);
2153	vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
2154	inside_cost: &load_inside_cost,
2155	outside_cost: &load_outside_cost,
2156	body_cost_vec: &dummy, prologue_cost_vec: &dummy, npeel: estimated_npeels);
2157	dummy.release ();
2158
2159	if (first_store)
2160	{
2161	dummy.create (nelems: `2`);
2162	vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info: first_store,
2163	inside_cost: &store_inside_cost,
2164	outside_cost: &store_outside_cost,
2165	body_cost_vec: &dummy, prologue_cost_vec: &dummy,
2166	npeel: estimated_npeels);
2167	dummy.release ();
2168	}
2169	else
2170	{
2171	store_inside_cost = INT_MAX;
2172	store_outside_cost = INT_MAX;
2173	}
2174
2175	if (load_inside_cost > store_inside_cost
2176	\|\| (load_inside_cost == store_inside_cost
2177	&& load_outside_cost > store_outside_cost))
2178	{
2179	dr0_info = first_store;
2180	dr0_same_align_drs = first_store_same_align_drs;
2181	peel_for_unknown_alignment.inside_cost = store_inside_cost;
2182	peel_for_unknown_alignment.outside_cost = store_outside_cost;
2183	}
2184	else
2185	{
2186	peel_for_unknown_alignment.inside_cost = load_inside_cost;
2187	peel_for_unknown_alignment.outside_cost = load_outside_cost;
2188	}
2189
2190	stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2191	prologue_cost_vec.create (nelems: `2`);
2192	epilogue_cost_vec.create (nelems: `2`);
2193
2194	int dummy2;
2195	peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
2196	(loop_vinfo, estimated_npeels, &dummy2,
2197	&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2198	&prologue_cost_vec, &epilogue_cost_vec);
2199
2200	prologue_cost_vec.release ();
2201	epilogue_cost_vec.release ();
2202
2203	peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + `1`;
2204	}
2205
2206	peel_for_unknown_alignment.peel_info.npeel = `0`;
2207	peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
2208
2209	best_peel = peel_for_unknown_alignment;
2210
2211	peel_for_known_alignment.inside_cost = INT_MAX;
2212	peel_for_known_alignment.outside_cost = INT_MAX;
2213	peel_for_known_alignment.peel_info.count = `0`;
2214	peel_for_known_alignment.peel_info.dr_info = NULL;
2215
2216	if (do_peeling && one_misalignment_known)
2217	{
2218	/ Peeling is possible, but there is no data access that is not supported*
2219	unless aligned. So we try to choose the best possible peeling from
2220	the hash table. /*
2221	peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
2222	(peeling_htab: &peeling_htab, loop_vinfo);
2223	}
2224
2225	/ Compare costs of peeling for known and unknown alignment. /
2226	if (peel_for_known_alignment.peel_info.dr_info != NULL
2227	&& peel_for_unknown_alignment.inside_cost
2228	>= peel_for_known_alignment.inside_cost)
2229	{
2230	best_peel = peel_for_known_alignment;
2231
2232	/ If the best peeling for known alignment has NPEEL == 0, perform no*
2233	peeling at all except if there is an unsupportable dr that we can
2234	align. /*
2235	if (best_peel.peel_info.npeel == `0` && !one_dr_unsupportable)
2236	do_peeling = false;
2237	}
2238
2239	/ If there is an unsupportable data ref, prefer this over all choices so far*
2240	since we'd have to discard a chosen peeling except when it accidentally
2241	aligned the unsupportable data ref. /*
2242	if (one_dr_unsupportable)
2243	dr0_info = unsupportable_dr_info;
2244	else if (do_peeling)
2245	{
2246	/ Calculate the penalty for no peeling, i.e. leaving everything as-is.*
2247	TODO: Use nopeel_outside_cost or get rid of it? /*
2248	unsigned nopeel_inside_cost = `0`;
2249	unsigned nopeel_outside_cost = `0`;
2250
2251	stmt_vector_for_cost dummy;
2252	dummy.create (nelems: `2`);
2253	vect_get_peeling_costs_all_drs (loop_vinfo, NULL, inside_cost: &nopeel_inside_cost,
2254	outside_cost: &nopeel_outside_cost, body_cost_vec: &dummy, prologue_cost_vec: &dummy, npeel: `0`);
2255	dummy.release ();
2256
2257	/ Add epilogue costs. As we do not peel for alignment here, no prologue*
2258	costs will be recorded. /*
2259	stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2260	prologue_cost_vec.create (nelems: `2`);
2261	epilogue_cost_vec.create (nelems: `2`);
2262
2263	int dummy2;
2264	nopeel_outside_cost += vect_get_known_peeling_cost
2265	(loop_vinfo, `0`, &dummy2,
2266	&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2267	&prologue_cost_vec, &epilogue_cost_vec);
2268
2269	prologue_cost_vec.release ();
2270	epilogue_cost_vec.release ();
2271
2272	npeel = best_peel.peel_info.npeel;
2273	dr0_info = best_peel.peel_info.dr_info;
2274
2275	/ If no peeling is not more expensive than the best peeling we*
2276	have so far, don't perform any peeling. /*
2277	if (nopeel_inside_cost <= best_peel.inside_cost)
2278	do_peeling = false;
2279	}
2280
2281	if (do_peeling)
2282	{
2283	stmt_vec_info stmt_info = dr0_info->stmt;
2284	if (known_alignment_for_access_p (dr_info: dr0_info,
2285	STMT_VINFO_VECTYPE (stmt_info)))
2286	{
2287	bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
2288	size_zero_node) < `0`;
2289	if (!npeel)
2290	{
2291	/ Since it's known at compile time, compute the number of*
2292	iterations in the peeled loop (the peeling factor) for use in
2293	updating DR_MISALIGNMENT values. The peeling factor is the
2294	vectorization factor minus the misalignment as an element
2295	count. /*
2296	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2297	poly_int64 off = `0`;
2298	if (negative)
2299	off = ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
2300	* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2301	unsigned int mis
2302	= dr_misalignment (dr_info: dr0_info, vectype, offset: off);
2303	mis = negative ? mis : -mis;
2304	/ If known_alignment_for_access_p then we have set*
2305	DR_MISALIGNMENT which is only done if we know it at compiler
2306	time, so it is safe to assume target alignment is constant.
2307	*/
2308	unsigned int target_align =
2309	DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
2310	npeel = ((mis & (target_align - `1`))
2311	/ vect_get_scalar_dr_size (dr_info: dr0_info));
2312	}
2313
2314	/ For interleaved data access every iteration accesses all the*
2315	members of the group, therefore we divide the number of iterations
2316	by the group size. /*
2317	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2318	npeel /= DR_GROUP_SIZE (stmt_info);
2319
2320	if (dump_enabled_p ())
2321	dump_printf_loc (MSG_NOTE, vect_location,
2322	"Try peeling by %d\n", npeel);
2323	}
2324
2325	/ Ensure that all datarefs can be vectorized after the peel. /
2326	if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
2327	do_peeling = false;
2328
2329	/ Check if all datarefs are supportable and log. /
2330	if (do_peeling
2331	&& npeel == `0`
2332	&& known_alignment_for_access_p (dr_info: dr0_info,
2333	STMT_VINFO_VECTYPE (stmt_info)))
2334	return opt_result::success ();
2335
2336	/ Cost model #1 - honor --param vect-max-peeling-for-alignment. /
2337	if (do_peeling)
2338	{
2339	unsigned max_allowed_peel
2340	= param_vect_max_peeling_for_alignment;
2341	if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
2342	max_allowed_peel = `0`;
2343	if (max_allowed_peel != (unsigned)-`1`)
2344	{
2345	unsigned max_peel = npeel;
2346	if (max_peel == `0`)
2347	{
2348	poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2349	unsigned HOST_WIDE_INT target_align_c;
2350	if (target_align.is_constant (const_value: &target_align_c))
2351	max_peel =
2352	target_align_c / vect_get_scalar_dr_size (dr_info: dr0_info) - `1`;
2353	else
2354	{
2355	do_peeling = false;
2356	if (dump_enabled_p ())
2357	dump_printf_loc (MSG_NOTE, vect_location,
2358	"Disable peeling, max peels set and vector"
2359	" alignment unknown\n");
2360	}
2361	}
2362	if (max_peel > max_allowed_peel)
2363	{
2364	do_peeling = false;
2365	if (dump_enabled_p ())
2366	dump_printf_loc (MSG_NOTE, vect_location,
2367	"Disable peeling, max peels reached: %d\n", max_peel);
2368	}
2369	}
2370	}
2371
2372	/ Cost model #2 - if peeling may result in a remaining loop not*
2373	iterating enough to be vectorized then do not peel. Since this
2374	is a cost heuristic rather than a correctness decision, use the
2375	most likely runtime value for variable vectorization factors. /*
2376	if (do_peeling
2377	&& LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2378	{
2379	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2380	unsigned int max_peel = npeel == `0` ? assumed_vf - `1` : npeel;
2381	if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2382	< assumed_vf + max_peel)
2383	do_peeling = false;
2384	}
2385
2386	if (do_peeling)
2387	{
2388	/ (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.*
2389	If the misalignment of DR_i is identical to that of dr0 then set
2390	DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2391	dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2392	by the peeling factor times the element size of DR_i (MOD the
2393	vectorization factor times the size). Otherwise, the
2394	misalignment of DR_i must be set to unknown. /*
2395	FOR_EACH_VEC_ELT (datarefs, i, dr)
2396	if (dr != dr0_info->dr)
2397	{
2398	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2399	if (!vect_relevant_for_alignment_p (dr_info))
2400	continue;
2401
2402	vect_update_misalignment_for_peel (dr_info, dr_peel_info: dr0_info, npeel);
2403	}
2404
2405	LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
2406	if (npeel)
2407	LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2408	else
2409	LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -`1`;
2410	SET_DR_MISALIGNMENT (dr0_info,
2411	vect_dr_misalign_for_aligned_access (dr0_info));
2412	if (dump_enabled_p ())
2413	{
2414	dump_printf_loc (MSG_NOTE, vect_location,
2415	"Alignment of access forced using peeling.\n");
2416	dump_printf_loc (MSG_NOTE, vect_location,
2417	"Peeling for alignment will be applied.\n");
2418	}
2419
2420	/ The inside-loop cost will be accounted for in vectorizable_load*
2421	and vectorizable_store correctly with adjusted alignments.
2422	Drop the body_cst_vec on the floor here. /*
2423	return opt_result::success ();
2424	}
2425	}
2426
2427	/ (2) Versioning to force alignment. /
2428
2429	/ Try versioning if:*
2430	1) optimize loop for speed and the cost-model is not cheap
2431	2) there is at least one unsupported misaligned data ref with an unknown
2432	misalignment, and
2433	3) all misaligned data refs with a known misalignment are supported, and
2434	4) the number of runtime alignment checks is within reason. /*
2435
2436	do_versioning
2437	= (optimize_loop_nest_for_speed_p (loop)
2438	&& !loop->inner / FORNOW /
2439	&& loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
2440
2441	if (do_versioning)
2442	{
2443	FOR_EACH_VEC_ELT (datarefs, i, dr)
2444	{
2445	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2446	if (!vect_relevant_for_alignment_p (dr_info))
2447	continue;
2448
2449	stmt_vec_info stmt_info = dr_info->stmt;
2450	if (STMT_VINFO_STRIDED_P (stmt_info))
2451	{
2452	do_versioning = false;
2453	break;
2454	}
2455
2456	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2457	bool negative = tree_int_cst_compare (DR_STEP (dr),
2458	size_zero_node) < `0`;
2459	poly_int64 off = `0`;
2460	if (negative)
2461	off = ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
2462	* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2463	int misalignment;
2464	if ((misalignment = dr_misalignment (dr_info, vectype, offset: off)) == `0`)
2465	continue;
2466
2467	enum dr_alignment_support supportable_dr_alignment
2468	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2469	misalignment);
2470	if (supportable_dr_alignment == dr_unaligned_unsupported)
2471	{
2472	if (misalignment != DR_MISALIGNMENT_UNKNOWN
2473	\|\| (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2474	>= (unsigned) param_vect_max_version_for_alignment_checks))
2475	{
2476	do_versioning = false;
2477	break;
2478	}
2479
2480	/ At present we don't support versioning for alignment*
2481	with variable VF, since there's no guarantee that the
2482	VF is a power of two. We could relax this if we added
2483	a way of enforcing a power-of-two size. /*
2484	unsigned HOST_WIDE_INT size;
2485	if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (const_value: &size))
2486	{
2487	do_versioning = false;
2488	break;
2489	}
2490
2491	/ Forcing alignment in the first iteration is no good if*
2492	we don't keep it across iterations. For now, just disable
2493	versioning in this case.
2494	?? We could actually unroll the loop to achieve the required
2495	overall step alignment, and forcing the alignment could be
2496	done by doing some iterations of the non-vectorized loop. /*
2497	if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2498	* DR_STEP_ALIGNMENT (dr),
2499	DR_TARGET_ALIGNMENT (dr_info)))
2500	{
2501	do_versioning = false;
2502	break;
2503	}
2504
2505	/ The rightmost bits of an aligned address must be zeros.*
2506	Construct the mask needed for this test. For example,
2507	GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2508	mask must be 15 = 0xf. /*
2509	int mask = size - `1`;
2510
2511	/ FORNOW: use the same mask to test all potentially unaligned*
2512	references in the loop. /*
2513	if (LOOP_VINFO_PTR_MASK (loop_vinfo)
2514	&& LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
2515	{
2516	do_versioning = false;
2517	break;
2518	}
2519
2520	LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2521	LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (obj: stmt_info);
2522	}
2523	}
2524
2525	/ Versioning requires at least one misaligned data reference. /
2526	if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2527	do_versioning = false;
2528	else if (!do_versioning)
2529	LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (size: `0`);
2530	}
2531
2532	if (do_versioning)
2533	{
2534	const vec<stmt_vec_info> &may_misalign_stmts
2535	= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2536	stmt_vec_info stmt_info;
2537
2538	/ It can now be assumed that the data references in the statements*
2539	in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2540	of the loop being vectorized. /*
2541	FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
2542	{
2543	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2544	SET_DR_MISALIGNMENT (dr_info,
2545	vect_dr_misalign_for_aligned_access (dr_info));
2546	if (dump_enabled_p ())
2547	dump_printf_loc (MSG_NOTE, vect_location,
2548	"Alignment of access forced using versioning.\n");
2549	}
2550
2551	if (dump_enabled_p ())
2552	dump_printf_loc (MSG_NOTE, vect_location,
2553	"Versioning for alignment will be applied.\n");
2554
2555	/ Peeling and versioning can't be done together at this time. /
2556	gcc_assert (! (do_peeling && do_versioning));
2557
2558	return opt_result::success ();
2559	}
2560
2561	/ This point is reached if neither peeling nor versioning is being done. /
2562	gcc_assert (! (do_peeling \|\| do_versioning));
2563
2564	return opt_result::success ();
2565	}
2566
2567
2568	/ Function vect_analyze_data_refs_alignment*
2569
2570	Analyze the alignment of the data-references in the loop.
2571	Return FALSE if a data reference is found that cannot be vectorized. /*
2572
2573	opt_result
2574	vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2575	{
2576	DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
2577
2578	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2579	struct data_reference *dr;
2580	unsigned int i;
2581
2582	vect_record_base_alignments (vinfo: loop_vinfo);
2583	FOR_EACH_VEC_ELT (datarefs, i, dr)
2584	{
2585	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2586	if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
2587	{
2588	if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
2589	&& DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
2590	continue;
2591	vect_compute_data_ref_alignment (vinfo: loop_vinfo, dr_info,
2592	STMT_VINFO_VECTYPE (dr_info->stmt));
2593	}
2594	}
2595
2596	return opt_result::success ();
2597	}
2598
2599
2600	/ Analyze alignment of DRs of stmts in NODE. /
2601
2602	static bool
2603	vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
2604	{
2605	/ Alignment is maintained in the first element of the group. /
2606	stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[`0`];
2607	first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
2608	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2609	tree vectype = SLP_TREE_VECTYPE (node);
2610	poly_uint64 vector_alignment
2611	= exact_div (a: targetm.vectorize.preferred_vector_alignment (vectype),
2612	BITS_PER_UNIT);
2613	if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
2614	vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
2615	/ Re-analyze alignment when we're facing a vectorization with a bigger*
2616	alignment requirement. /*
2617	else if (known_lt (dr_info->target_alignment, vector_alignment))
2618	{
2619	poly_uint64 old_target_alignment = dr_info->target_alignment;
2620	int old_misalignment = dr_info->misalignment;
2621	vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
2622	/ But keep knowledge about a smaller alignment. /
2623	if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
2624	&& dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
2625	{
2626	dr_info->target_alignment = old_target_alignment;
2627	dr_info->misalignment = old_misalignment;
2628	}
2629	}
2630	/ When we ever face unordered target alignments the first one wins in terms*
2631	of analyzing and the other will become unknown in dr_misalignment. /*
2632	return true;
2633	}
2634
2635	/ Function vect_slp_analyze_instance_alignment*
2636
2637	Analyze the alignment of the data-references in the SLP instance.
2638	Return FALSE if a data reference is found that cannot be vectorized. /*
2639
2640	bool
2641	vect_slp_analyze_instance_alignment (vec_info *vinfo,
2642	slp_instance instance)
2643	{
2644	DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
2645
2646	slp_tree node;
2647	unsigned i;
2648	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2649	if (! vect_slp_analyze_node_alignment (vinfo, node))
2650	return false;
2651
2652	if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
2653	&& ! vect_slp_analyze_node_alignment
2654	(vinfo, SLP_INSTANCE_TREE (instance)))
2655	return false;
2656
2657	return true;
2658	}
2659
2660
2661	/ Analyze groups of accesses: check that DR_INFO belongs to a group of*
2662	accesses of legal size, step, etc. Detect gaps, single element
2663	interleaving, and other special cases. Set grouped access info.
2664	Collect groups of strided stores for further use in SLP analysis.
2665	Worker for vect_analyze_group_access. /*
2666
2667	static bool
2668	vect_analyze_group_access_1 (vec_info vinfo, dr_vec_info dr_info)
2669	{
2670	data_reference *dr = dr_info->dr;
2671	tree step = DR_STEP (dr);
2672	tree scalar_type = TREE_TYPE (DR_REF (dr));
2673	HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2674	stmt_vec_info stmt_info = dr_info->stmt;
2675	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2676	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
2677	HOST_WIDE_INT dr_step = -`1`;
2678	HOST_WIDE_INT groupsize, last_accessed_element = `1`;
2679	bool slp_impossible = false;
2680
2681	/ For interleaving, GROUPSIZE is STEP counted in elements, i.e., the*
2682	size of the interleaving group (including gaps). /*
2683	if (tree_fits_shwi_p (step))
2684	{
2685	dr_step = tree_to_shwi (step);
2686	/ Check that STEP is a multiple of type size. Otherwise there is*
2687	a non-element-sized gap at the end of the group which we
2688	cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
2689	??? As we can handle non-constant step fine here we should
2690	simply remove uses of DR_GROUP_GAP between the last and first
2691	element and instead rely on DR_STEP. DR_GROUP_SIZE then would
2692	simply not include that gap. /*
2693	if ((dr_step % type_size) != `0`)
2694	{
2695	if (dump_enabled_p ())
2696	dump_printf_loc (MSG_NOTE, vect_location,
2697	"Step %T is not a multiple of the element size"
2698	" for %T\n",
2699	step, DR_REF (dr));
2700	return false;
2701	}
2702	groupsize = absu_hwi (x: dr_step) / type_size;
2703	}
2704	else
2705	groupsize = `0`;
2706
2707	/ Not consecutive access is possible only if it is a part of interleaving. /
2708	if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
2709	{
2710	/ Check if it this DR is a part of interleaving, and is a single*
2711	element of the group that is accessed in the loop. /*
2712
2713	/ Gaps are supported only for loads. STEP must be a multiple of the type*
2714	size. /*
2715	if (DR_IS_READ (dr)
2716	&& (dr_step % type_size) == `0`
2717	&& groupsize > `0`
2718	/ This could be UINT_MAX but as we are generating code in a very*
2719	inefficient way we have to cap earlier.
2720	See PR91403 for example. /*
2721	&& groupsize <= `4096`)
2722	{
2723	DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
2724	DR_GROUP_SIZE (stmt_info) = groupsize;
2725	DR_GROUP_GAP (stmt_info) = groupsize - `1`;
2726	if (dump_enabled_p ())
2727	dump_printf_loc (MSG_NOTE, vect_location,
2728	"Detected single element interleaving %T"
2729	" step %T\n",
2730	DR_REF (dr), step);
2731
2732	return true;
2733	}
2734
2735	if (dump_enabled_p ())
2736	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2737	"not consecutive access %G", stmt_info->stmt);
2738
2739	if (bb_vinfo)
2740	{
2741	/ Mark the statement as unvectorizable. /
2742	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2743	return true;
2744	}
2745
2746	if (dump_enabled_p ())
2747	dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
2748	STMT_VINFO_STRIDED_P (stmt_info) = true;
2749	return true;
2750	}
2751
2752	if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
2753	{
2754	/ First stmt in the interleaving chain. Check the chain. /
2755	stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2756	struct data_reference *data_ref = dr;
2757	unsigned int count = `1`;
2758	tree prev_init = DR_INIT (data_ref);
2759	HOST_WIDE_INT diff, gaps = `0`;
2760
2761	/ By construction, all group members have INTEGER_CST DR_INITs. /
2762	while (next)
2763	{
2764	/ We never have the same DR multiple times. /
2765	gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
2766	DR_INIT (STMT_VINFO_DATA_REF (next))) != `0`);
2767
2768	data_ref = STMT_VINFO_DATA_REF (next);
2769
2770	/ All group members have the same STEP by construction. /
2771	gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, `0`));
2772
2773	/ Check that the distance between two accesses is equal to the type*
2774	size. Otherwise, we have gaps. /*
2775	diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2776	- TREE_INT_CST_LOW (prev_init)) / type_size;
2777	if (diff < `1` \|\| diff > UINT_MAX)
2778	{
2779	/ For artificial testcases with array accesses with large*
2780	constant indices we can run into overflow issues which
2781	can end up fooling the groupsize constraint below so
2782	check the individual gaps (which are represented as
2783	unsigned int) as well. /*
2784	if (dump_enabled_p ())
2785	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2786	"interleaved access with gap larger "
2787	"than representable\n");
2788	return false;
2789	}
2790	if (diff != `1`)
2791	{
2792	/ FORNOW: SLP of accesses with gaps is not supported. /
2793	slp_impossible = true;
2794	if (DR_IS_WRITE (data_ref))
2795	{
2796	if (dump_enabled_p ())
2797	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2798	"interleaved store with gaps\n");
2799	return false;
2800	}
2801
2802	gaps += diff - `1`;
2803	}
2804
2805	last_accessed_element += diff;
2806
2807	/ Store the gap from the previous member of the group. If there is no*
2808	gap in the access, DR_GROUP_GAP is always 1. /*
2809	DR_GROUP_GAP (next) = diff;
2810
2811	prev_init = DR_INIT (data_ref);
2812	next = DR_GROUP_NEXT_ELEMENT (next);
2813	/ Count the number of data-refs in the chain. /
2814	count++;
2815	}
2816
2817	if (groupsize == `0`)
2818	groupsize = count + gaps;
2819
2820	/ This could be UINT_MAX but as we are generating code in a very*
2821	inefficient way we have to cap earlier. See PR78699 for example. /*
2822	if (groupsize > `4096`)
2823	{
2824	if (dump_enabled_p ())
2825	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2826	"group is too large\n");
2827	return false;
2828	}
2829
2830	/ Check that the size of the interleaving is equal to count for stores,*
2831	i.e., that there are no gaps. /*
2832	if (groupsize != count
2833	&& !DR_IS_READ (dr))
2834	{
2835	groupsize = count;
2836	STMT_VINFO_STRIDED_P (stmt_info) = true;
2837	}
2838
2839	/ If there is a gap after the last load in the group it is the*
2840	difference between the groupsize and the last accessed
2841	element.
2842	When there is no gap, this difference should be 0. /*
2843	DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
2844
2845	DR_GROUP_SIZE (stmt_info) = groupsize;
2846	if (dump_enabled_p ())
2847	{
2848	dump_printf_loc (MSG_NOTE, vect_location,
2849	"Detected interleaving ");
2850	if (DR_IS_READ (dr))
2851	dump_printf (MSG_NOTE, "load ");
2852	else if (STMT_VINFO_STRIDED_P (stmt_info))
2853	dump_printf (MSG_NOTE, "strided store ");
2854	else
2855	dump_printf (MSG_NOTE, "store ");
2856	dump_printf (MSG_NOTE, "of size %u\n",
2857	(unsigned)groupsize);
2858	dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
2859	next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2860	while (next)
2861	{
2862	if (DR_GROUP_GAP (next) != `1`)
2863	dump_printf_loc (MSG_NOTE, vect_location,
2864	"\t<gap of %d elements>\n",
2865	DR_GROUP_GAP (next) - `1`);
2866	dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
2867	next = DR_GROUP_NEXT_ELEMENT (next);
2868	}
2869	if (DR_GROUP_GAP (stmt_info) != `0`)
2870	dump_printf_loc (MSG_NOTE, vect_location,
2871	"\t<gap of %d elements>\n",
2872	DR_GROUP_GAP (stmt_info));
2873	}
2874
2875	/ SLP: create an SLP data structure for every interleaving group of*
2876	stores for further analysis in vect_analyse_slp. /*
2877	if (DR_IS_WRITE (dr) && !slp_impossible)
2878	{
2879	if (loop_vinfo)
2880	LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (obj: stmt_info);
2881	if (bb_vinfo)
2882	BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (obj: stmt_info);
2883	}
2884	}
2885
2886	return true;
2887	}
2888
2889	/ Analyze groups of accesses: check that DR_INFO belongs to a group of*
2890	accesses of legal size, step, etc. Detect gaps, single element
2891	interleaving, and other special cases. Set grouped access info.
2892	Collect groups of strided stores for further use in SLP analysis. /*
2893
2894	static bool
2895	vect_analyze_group_access (vec_info vinfo, dr_vec_info dr_info)
2896	{
2897	if (!vect_analyze_group_access_1 (vinfo, dr_info))
2898	{
2899	/ Dissolve the group if present. /
2900	stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
2901	while (stmt_info)
2902	{
2903	stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2904	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2905	DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
2906	stmt_info = next;
2907	}
2908	return false;
2909	}
2910	return true;
2911	}
2912
2913	/ Analyze the access pattern of the data-reference DR_INFO.*
2914	In case of non-consecutive accesses call vect_analyze_group_access() to
2915	analyze groups of accesses. /*
2916
2917	static bool
2918	vect_analyze_data_ref_access (vec_info vinfo, dr_vec_info dr_info)
2919	{
2920	data_reference *dr = dr_info->dr;
2921	tree step = DR_STEP (dr);
2922	tree scalar_type = TREE_TYPE (DR_REF (dr));
2923	stmt_vec_info stmt_info = dr_info->stmt;
2924	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2925	class loop *loop = NULL;
2926
2927	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2928	return true;
2929
2930	if (loop_vinfo)
2931	loop = LOOP_VINFO_LOOP (loop_vinfo);
2932
2933	if (loop_vinfo && !step)
2934	{
2935	if (dump_enabled_p ())
2936	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2937	"bad data-ref access in loop\n");
2938	return false;
2939	}
2940
2941	/ Allow loads with zero step in inner-loop vectorization. /
2942	if (loop_vinfo && integer_zerop (step))
2943	{
2944	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2945	if (!nested_in_vect_loop_p (loop, stmt_info))
2946	return DR_IS_READ (dr);
2947	/ Allow references with zero step for outer loops marked*
2948	with pragma omp simd only - it guarantees absence of
2949	loop-carried dependencies between inner loop iterations. /*
2950	if (loop->safelen < `2`)
2951	{
2952	if (dump_enabled_p ())
2953	dump_printf_loc (MSG_NOTE, vect_location,
2954	"zero step in inner loop of nest\n");
2955	return false;
2956	}
2957	}
2958
2959	if (loop && nested_in_vect_loop_p (loop, stmt_info))
2960	{
2961	/ Interleaved accesses are not yet supported within outer-loop*
2962	vectorization for references in the inner-loop. /*
2963	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2964
2965	/ For the rest of the analysis we use the outer-loop step. /
2966	step = STMT_VINFO_DR_STEP (stmt_info);
2967	if (integer_zerop (step))
2968	{
2969	if (dump_enabled_p ())
2970	dump_printf_loc (MSG_NOTE, vect_location,
2971	"zero step in outer loop.\n");
2972	return DR_IS_READ (dr);
2973	}
2974	}
2975
2976	/ Consecutive? /
2977	if (TREE_CODE (step) == INTEGER_CST)
2978	{
2979	HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2980	if (!tree_int_cst_compare (t1: step, TYPE_SIZE_UNIT (scalar_type))
2981	\|\| (dr_step < `0`
2982	&& !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2983	{
2984	/ Mark that it is not interleaving. /
2985	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2986	return true;
2987	}
2988	}
2989
2990	if (loop && nested_in_vect_loop_p (loop, stmt_info))
2991	{
2992	if (dump_enabled_p ())
2993	dump_printf_loc (MSG_NOTE, vect_location,
2994	"grouped access in outer loop.\n");
2995	return false;
2996	}
2997
2998
2999	/ Assume this is a DR handled by non-constant strided load case. /
3000	if (TREE_CODE (step) != INTEGER_CST)
3001	return (STMT_VINFO_STRIDED_P (stmt_info)
3002	&& (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3003	\|\| vect_analyze_group_access (vinfo, dr_info)));
3004
3005	/ Not consecutive access - check if it's a part of interleaving group. /
3006	return vect_analyze_group_access (vinfo, dr_info);
3007	}
3008
3009	/ Compare two data-references DRA and DRB to group them into chunks*
3010	suitable for grouping. /*
3011
3012	static int
3013	dr_group_sort_cmp (const void dra_, const* void *drb_)
3014	{
3015	dr_vec_info dra_info = (dr_vec_info )const_cast*<void* *>(dra_);
3016	dr_vec_info drb_info = (dr_vec_info )const_cast*<void* *>(drb_);
3017	data_reference_p dra = dra_info->dr;
3018	data_reference_p drb = drb_info->dr;
3019	int cmp;
3020
3021	/ Stabilize sort. /
3022	if (dra == drb)
3023	return `0`;
3024
3025	/ Different group IDs lead never belong to the same group. /
3026	if (dra_info->group != drb_info->group)
3027	return dra_info->group < drb_info->group ? -`1` : `1`;
3028
3029	/ Ordering of DRs according to base. /
3030	cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3031	DR_BASE_ADDRESS (drb));
3032	if (cmp != `0`)
3033	return cmp;
3034
3035	/ And according to DR_OFFSET. /
3036	cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
3037	if (cmp != `0`)
3038	return cmp;
3039
3040	/ Put reads before writes. /
3041	if (DR_IS_READ (dra) != DR_IS_READ (drb))
3042	return DR_IS_READ (dra) ? -`1` : `1`;
3043
3044	/ Then sort after access size. /
3045	cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
3046	TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
3047	if (cmp != `0`)
3048	return cmp;
3049
3050	/ And after step. /
3051	cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
3052	if (cmp != `0`)
3053	return cmp;
3054
3055	/ Then sort after DR_INIT. In case of identical DRs sort after stmt UID. /
3056	cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
3057	if (cmp == `0`)
3058	return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -`1` : `1`;
3059	return cmp;
3060	}
3061
3062	/ If OP is the result of a conversion, return the unconverted value,*
3063	otherwise return null. /*
3064
3065	static tree
3066	strip_conversion (tree op)
3067	{
3068	if (TREE_CODE (op) != SSA_NAME)
3069	return NULL_TREE;
3070	gimple *stmt = SSA_NAME_DEF_STMT (op);
3071	if (!is_gimple_assign (gs: stmt)
3072	\|\| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
3073	return NULL_TREE;
3074	return gimple_assign_rhs1 (gs: stmt);
3075	}
3076
3077	/ Return true if vectorizable_* routines can handle statements STMT1_INFO*
3078	and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
3079	be grouped in SLP mode. /*
3080
3081	static bool
3082	can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
3083	bool allow_slp_p)
3084	{
3085	if (gimple_assign_single_p (gs: stmt1_info->stmt))
3086	return gimple_assign_single_p (gs: stmt2_info->stmt);
3087
3088	gcall call1 = dyn_cast <gcall > (p: stmt1_info->stmt);
3089	if (call1 && gimple_call_internal_p (gs: call1))
3090	{
3091	/ Check for two masked loads or two masked stores. /
3092	gcall call2 = dyn_cast <gcall > (p: stmt2_info->stmt);
3093	if (!call2 \|\| !gimple_call_internal_p (gs: call2))
3094	return false;
3095	internal_fn ifn = gimple_call_internal_fn (gs: call1);
3096	if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
3097	return false;
3098	if (ifn != gimple_call_internal_fn (gs: call2))
3099	return false;
3100
3101	/ Check that the masks are the same. Cope with casts of masks,*
3102	like those created by build_mask_conversion. /*
3103	tree mask1 = gimple_call_arg (gs: call1, index: `2`);
3104	tree mask2 = gimple_call_arg (gs: call2, index: `2`);
3105	if (!operand_equal_p (mask1, mask2, flags: `0`) && !allow_slp_p)
3106	{
3107	mask1 = strip_conversion (op: mask1);
3108	if (!mask1)
3109	return false;
3110	mask2 = strip_conversion (op: mask2);
3111	if (!mask2)
3112	return false;
3113	if (!operand_equal_p (mask1, mask2, flags: `0`))
3114	return false;
3115	}
3116	return true;
3117	}
3118
3119	return false;
3120	}
3121
3122	/ Function vect_analyze_data_ref_accesses.*
3123
3124	Analyze the access pattern of all the data references in the loop.
3125
3126	FORNOW: the only access pattern that is considered vectorizable is a
3127	simple step 1 (consecutive) access.
3128
3129	FORNOW: handle only arrays and pointer accesses. /*
3130
3131	opt_result
3132	vect_analyze_data_ref_accesses (vec_info *vinfo,
3133	vec<int> *dataref_groups)
3134	{
3135	unsigned int i;
3136	vec<data_reference_p> datarefs = vinfo->shared->datarefs;
3137
3138	DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
3139
3140	if (datarefs.is_empty ())
3141	return opt_result::success ();
3142
3143	/ Sort the array of datarefs to make building the interleaving chains*
3144	linear. Don't modify the original vector's order, it is needed for
3145	determining what dependencies are reversed. /*
3146	vec<dr_vec_info *> datarefs_copy;
3147	datarefs_copy.create (nelems: datarefs.length ());
3148	for (unsigned i = `0`; i < datarefs.length (); i++)
3149	{
3150	dr_vec_info *dr_info = vinfo->lookup_dr (datarefs [i]);
3151	/ If the caller computed DR grouping use that, otherwise group by*
3152	basic blocks. /*
3153	if (dataref_groups)
3154	dr_info->group = (*dataref_groups)[i];
3155	else
3156	dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
3157	datarefs_copy.quick_push (obj: dr_info);
3158	}
3159	datarefs_copy.qsort (dr_group_sort_cmp);
3160	hash_set<stmt_vec_info> to_fixup;
3161
3162	/ Build the interleaving chains. /
3163	for (i = `0`; i < datarefs_copy.length () - `1`;)
3164	{
3165	dr_vec_info *dr_info_a = datarefs_copy [i];
3166	data_reference_p dra = dr_info_a->dr;
3167	int dra_group_id = dr_info_a->group;
3168	stmt_vec_info stmtinfo_a = dr_info_a->stmt;
3169	stmt_vec_info lastinfo = NULL;
3170	if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
3171	\|\| STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
3172	{
3173	++i;
3174	continue;
3175	}
3176	for (i = i + `1`; i < datarefs_copy.length (); ++i)
3177	{
3178	dr_vec_info *dr_info_b = datarefs_copy [i];
3179	data_reference_p drb = dr_info_b->dr;
3180	int drb_group_id = dr_info_b->group;
3181	stmt_vec_info stmtinfo_b = dr_info_b->stmt;
3182	if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
3183	\|\| STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
3184	break;
3185
3186	/ ??? Imperfect sorting (non-compatible types, non-modulo*
3187	accesses, same accesses) can lead to a group to be artificially
3188	split here as we don't just skip over those. If it really
3189	matters we can push those to a worklist and re-iterate
3190	over them. The we can just skip ahead to the next DR here. /*
3191
3192	/ DRs in a different DR group should not be put into the same*
3193	interleaving group. /*
3194	if (dra_group_id != drb_group_id)
3195	break;
3196
3197	/ Check that the data-refs have same first location (except init)*
3198	and they are both either store or load (not load and store,
3199	not masked loads or stores). /*
3200	if (DR_IS_READ (dra) != DR_IS_READ (drb)
3201	\|\| data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3202	DR_BASE_ADDRESS (drb)) != `0`
3203	\|\| data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != `0`
3204	\|\| !can_group_stmts_p (stmt1_info: stmtinfo_a, stmt2_info: stmtinfo_b, allow_slp_p: true))
3205	break;
3206
3207	/ Check that the data-refs have the same constant size. /
3208	tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
3209	tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
3210	if (!tree_fits_uhwi_p (sza)
3211	\|\| !tree_fits_uhwi_p (szb)
3212	\|\| !tree_int_cst_equal (sza, szb))
3213	break;
3214
3215	/ Check that the data-refs have the same step. /
3216	if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != `0`)
3217	break;
3218
3219	/ Check the types are compatible.*
3220	??? We don't distinguish this during sorting. /*
3221	if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
3222	TREE_TYPE (DR_REF (drb))))
3223	break;
3224
3225	/ Check that the DR_INITs are compile-time constants. /
3226	if (!tree_fits_shwi_p (DR_INIT (dra))
3227	\|\| !tree_fits_shwi_p (DR_INIT (drb)))
3228	break;
3229
3230	/ Different .GOMP_SIMD_LANE calls still give the same lane,*
3231	just hold extra information. /*
3232	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3233	&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3234	&& data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == `0`)
3235	break;
3236
3237	/ Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). /
3238	HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3239	HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
3240	HOST_WIDE_INT init_prev
3241	= TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-`1`]->dr));
3242	gcc_assert (init_a <= init_b
3243	&& init_a <= init_prev
3244	&& init_prev <= init_b);
3245
3246	/ Do not place the same access in the interleaving chain twice. /
3247	if (init_b == init_prev)
3248	{
3249	gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-`1`]->dr))
3250	< gimple_uid (DR_STMT (drb)));
3251	/ Simply link in duplicates and fix up the chain below. /
3252	}
3253	else
3254	{
3255	/ If init_b == init_a + the size of the type * k, we have an*
3256	interleaving, and DRA is accessed before DRB. /*
3257	unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3258	if (type_size_a == `0`
3259	\|\| (((unsigned HOST_WIDE_INT)init_b - init_a)
3260	% type_size_a != `0`))
3261	break;
3262
3263	/ If we have a store, the accesses are adjacent. This splits*
3264	groups into chunks we support (we don't support vectorization
3265	of stores with gaps). /*
3266	if (!DR_IS_READ (dra)
3267	&& (((unsigned HOST_WIDE_INT)init_b - init_prev)
3268	!= type_size_a))
3269	break;
3270
3271	/ If the step (if not zero or non-constant) is smaller than the*
3272	difference between data-refs' inits this splits groups into
3273	suitable sizes. /*
3274	if (tree_fits_shwi_p (DR_STEP (dra)))
3275	{
3276	unsigned HOST_WIDE_INT step
3277	= absu_hwi (x: tree_to_shwi (DR_STEP (dra)));
3278	if (step != `0`
3279	&& step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
3280	break;
3281	}
3282	}
3283
3284	if (dump_enabled_p ())
3285	dump_printf_loc (MSG_NOTE, vect_location,
3286	DR_IS_READ (dra)
3287	? "Detected interleaving load %T and %T\n"
3288	: "Detected interleaving store %T and %T\n",
3289	DR_REF (dra), DR_REF (drb));
3290
3291	/ Link the found element into the group list. /
3292	if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3293	{
3294	DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
3295	lastinfo = stmtinfo_a;
3296	}
3297	DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3298	DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
3299	lastinfo = stmtinfo_b;
3300
3301	STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3302	= !can_group_stmts_p (stmt1_info: stmtinfo_a, stmt2_info: stmtinfo_b, allow_slp_p: false);
3303
3304	if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3305	dump_printf_loc (MSG_NOTE, vect_location,
3306	"Load suitable for SLP vectorization only.\n");
3307
3308	if (init_b == init_prev
3309	&& !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3310	&& dump_enabled_p ())
3311	dump_printf_loc (MSG_NOTE, vect_location,
3312	"Queuing group with duplicate access for fixup\n");
3313	}
3314	}
3315
3316	/ Fixup groups with duplicate entries by splitting it. /
3317	while (`1`)
3318	{
3319	hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3320	if (!(it != to_fixup.end ()))
3321	break;
3322	stmt_vec_info grp = *it;
3323	to_fixup.remove (k: grp);
3324
3325	/ Find the earliest duplicate group member. /
3326	unsigned first_duplicate = -`1u`;
3327	stmt_vec_info next, g = grp;
3328	while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3329	{
3330	if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
3331	DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
3332	&& gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3333	first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3334	g = next;
3335	}
3336	if (first_duplicate == -`1U`)
3337	continue;
3338
3339	/ Then move all stmts after the first duplicate to a new group.*
3340	Note this is a heuristic but one with the property that it*
3341	is fixed up completely. /*
3342	g = grp;
3343	stmt_vec_info newgroup = NULL, ng = grp;
3344	while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3345	{
3346	if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3347	{
3348	DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3349	if (!newgroup)
3350	newgroup = next;
3351	else
3352	DR_GROUP_NEXT_ELEMENT (ng) = next;
3353	ng = next;
3354	DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3355	}
3356	else
3357	g = DR_GROUP_NEXT_ELEMENT (g);
3358	}
3359	DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3360
3361	/ Fixup the new group which still may contain duplicates. /
3362	to_fixup.add (k: newgroup);
3363	}
3364
3365	dr_vec_info *dr_info;
3366	FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
3367	{
3368	if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3369	&& !vect_analyze_data_ref_access (vinfo, dr_info))
3370	{
3371	if (dump_enabled_p ())
3372	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3373	"not vectorized: complicated access pattern.\n");
3374
3375	if (is_a <bb_vec_info> (p: vinfo))
3376	{
3377	/ Mark the statement as not vectorizable. /
3378	STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3379	continue;
3380	}
3381	else
3382	{
3383	datarefs_copy.release ();
3384	return opt_result::failure_at (loc: dr_info->stmt->stmt,
3385	fmt: "not vectorized:"
3386	" complicated access pattern.\n");
3387	}
3388	}
3389	}
3390
3391	datarefs_copy.release ();
3392	return opt_result::success ();
3393	}
3394
3395	/ Function vect_vfa_segment_size.*
3396
3397	Input:
3398	DR_INFO: The data reference.
3399	LENGTH_FACTOR: segment length to consider.
3400
3401	Return a value suitable for the dr_with_seg_len::seg_len field.
3402	This is the "distance travelled" by the pointer from the first
3403	iteration in the segment to the last. Note that it does not include
3404	the size of the access; in effect it only describes the first byte. /*
3405
3406	static tree
3407	vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
3408	{
3409	length_factor = size_binop (MINUS_EXPR,
3410	fold_convert (sizetype, length_factor),
3411	size_one_node);
3412	return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
3413	length_factor);
3414	}
3415
3416	/ Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),*
3417	gives the worst-case number of bytes covered by the segment. /*
3418
3419	static unsigned HOST_WIDE_INT
3420	vect_vfa_access_size (vec_info vinfo, dr_vec_info dr_info)
3421	{
3422	stmt_vec_info stmt_vinfo = dr_info->stmt;
3423	tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
3424	unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3425	unsigned HOST_WIDE_INT access_size = ref_size;
3426	if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
3427	{
3428	gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
3429	access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
3430	}
3431	tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3432	int misalignment;
3433	if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
3434	&& ((misalignment = dr_misalignment (dr_info, vectype)), true)
3435	&& (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
3436	== dr_explicit_realign_optimized))
3437	{
3438	/ We might access a full vector's worth. /
3439	access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
3440	}
3441	return access_size;
3442	}
3443
3444	/ Get the minimum alignment for all the scalar accesses that DR_INFO*
3445	describes. /*
3446
3447	static unsigned int
3448	vect_vfa_align (dr_vec_info *dr_info)
3449	{
3450	return dr_alignment (dr: dr_info->dr);
3451	}
3452
3453	/ Function vect_no_alias_p.*
3454
3455	Given data references A and B with equal base and offset, see whether
3456	the alias relation can be decided at compilation time. Return 1 if
3457	it can and the references alias, 0 if it can and the references do
3458	not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
3459	SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
3460	of dr_with_seg_len::{seg_len,access_size} for A and B. /*
3461
3462	static int
3463	vect_compile_time_alias (dr_vec_info a, dr_vec_info b,
3464	tree segment_length_a, tree segment_length_b,
3465	unsigned HOST_WIDE_INT access_size_a,
3466	unsigned HOST_WIDE_INT access_size_b)
3467	{
3468	poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
3469	poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
3470	poly_uint64 const_length_a;
3471	poly_uint64 const_length_b;
3472
3473	/ For negative step, we need to adjust address range by TYPE_SIZE_UNIT*
3474	bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3475	[a, a+12) /*
3476	if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < `0`)
3477	{
3478	const_length_a = (-wi::to_poly_wide (t: segment_length_a)).force_uhwi ();
3479	offset_a -= const_length_a;
3480	}
3481	else
3482	const_length_a = tree_to_poly_uint64 (segment_length_a);
3483	if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < `0`)
3484	{
3485	const_length_b = (-wi::to_poly_wide (t: segment_length_b)).force_uhwi ();
3486	offset_b -= const_length_b;
3487	}
3488	else
3489	const_length_b = tree_to_poly_uint64 (segment_length_b);
3490
3491	const_length_a += access_size_a;
3492	const_length_b += access_size_b;
3493
3494	if (ranges_known_overlap_p (pos1: offset_a, size1: const_length_a,
3495	pos2: offset_b, size2: const_length_b))
3496	return `1`;
3497
3498	if (!ranges_maybe_overlap_p (pos1: offset_a, size1: const_length_a,
3499	pos2: offset_b, size2: const_length_b))
3500	return `0`;
3501
3502	return -`1`;
3503	}
3504
3505	/ Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH*
3506	in DDR is >= VF. /*
3507
3508	static bool
3509	dependence_distance_ge_vf (data_dependence_relation *ddr,
3510	unsigned int loop_depth, poly_uint64 vf)
3511	{
3512	if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3513	\|\| DDR_NUM_DIST_VECTS (ddr) == `0`)
3514	return false;
3515
3516	/ If the dependence is exact, we should have limited the VF instead. /
3517	gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3518
3519	unsigned int i;
3520	lambda_vector dist_v;
3521	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3522	{
3523	HOST_WIDE_INT dist = dist_v[loop_depth];
3524	if (dist != `0`
3525	&& !(dist > `0` && DDR_REVERSED_P (ddr))
3526	&& maybe_lt (a: (unsigned HOST_WIDE_INT) abs_hwi (x: dist), b: vf))
3527	return false;
3528	}
3529
3530	if (dump_enabled_p ())
3531	dump_printf_loc (MSG_NOTE, vect_location,
3532	"dependence distance between %T and %T is >= VF\n",
3533	DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
3534
3535	return true;
3536	}
3537
3538	/ Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. /
3539
3540	static void
3541	dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
3542	{
3543	dump_printf (dump_kind, "%s (%T) >= ",
3544	lower_bound.unsigned_p ? "unsigned" : "abs",
3545	lower_bound.expr);
3546	dump_dec (dump_kind, lower_bound.min_value);
3547	}
3548
3549	/ Record that the vectorized loop requires the vec_lower_bound described*
3550	by EXPR, UNSIGNED_P and MIN_VALUE. /*
3551
3552	static void
3553	vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
3554	poly_uint64 min_value)
3555	{
3556	vec<vec_lower_bound> &lower_bounds
3557	= LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3558	for (unsigned int i = `0`; i < lower_bounds.length (); ++i)
3559	if (operand_equal_p (lower_bounds [i].expr, expr, flags: `0`))
3560	{
3561	unsigned_p &= lower_bounds [i].unsigned_p;
3562	min_value = upper_bound (a: lower_bounds [i].min_value, b: min_value);
3563	if (lower_bounds [i].unsigned_p != unsigned_p
3564	\|\| maybe_lt (a: lower_bounds [i].min_value, b: min_value))
3565	{
3566	lower_bounds [i].unsigned_p = unsigned_p;
3567	lower_bounds [i].min_value = min_value;
3568	if (dump_enabled_p ())
3569	{
3570	dump_printf_loc (MSG_NOTE, vect_location,
3571	"updating run-time check to ");
3572	dump_lower_bound (dump_kind: MSG_NOTE, lower_bound: lower_bounds [i]);
3573	dump_printf (MSG_NOTE, "\n");
3574	}
3575	}
3576	return;
3577	}
3578
3579	vec_lower_bound lower_bound (expr, unsigned_p, min_value);
3580	if (dump_enabled_p ())
3581	{
3582	dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
3583	dump_lower_bound (dump_kind: MSG_NOTE, lower_bound);
3584	dump_printf (MSG_NOTE, "\n");
3585	}
3586	LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (obj: lower_bound);
3587	}
3588
3589	/ Return true if it's unlikely that the step of the vectorized form of DR_INFO*
3590	will span fewer than GAP bytes. /*
3591
3592	static bool
3593	vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
3594	poly_int64 gap)
3595	{
3596	stmt_vec_info stmt_info = dr_info->stmt;
3597	HOST_WIDE_INT count
3598	= estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
3599	if (DR_GROUP_FIRST_ELEMENT (stmt_info))
3600	count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
3601	return (estimated_poly_value (x: gap)
3602	<= count * vect_get_scalar_dr_size (dr_info));
3603	}
3604
3605	/ Return true if we know that there is no alias between DR_INFO_A and*
3606	DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
3607	When returning true, set LOWER_BOUND_OUT to this N. /
3608
3609	static bool
3610	vectorizable_with_step_bound_p (dr_vec_info dr_info_a, dr_vec_info dr_info_b,
3611	poly_uint64 *lower_bound_out)
3612	{
3613	/ Check that there is a constant gap of known sign between DR_A*
3614	and DR_B. /*
3615	data_reference *dr_a = dr_info_a->dr;
3616	data_reference *dr_b = dr_info_b->dr;
3617	poly_int64 init_a, init_b;
3618	if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), flags: `0`)
3619	\|\| !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), flags: `0`)
3620	\|\| !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), flags: `0`)
3621	\|\| !poly_int_tree_p (DR_INIT (dr_a), value: &init_a)
3622	\|\| !poly_int_tree_p (DR_INIT (dr_b), value: &init_b)
3623	\|\| !ordered_p (a: init_a, b: init_b))
3624	return false;
3625
3626	/ Sort DR_A and DR_B by the address they access. /
3627	if (maybe_lt (a: init_b, b: init_a))
3628	{
3629	std::swap (a&: init_a, b&: init_b);
3630	std::swap (a&: dr_info_a, b&: dr_info_b);
3631	std::swap (a&: dr_a, b&: dr_b);
3632	}
3633
3634	/ If the two accesses could be dependent within a scalar iteration,*
3635	make sure that we'd retain their order. /*
3636	if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
3637	&& !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
3638	return false;
3639
3640	/ There is no alias if abs (DR_STEP) is greater than or equal to*
3641	the bytes spanned by the combination of the two accesses. /*
3642	*lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info: dr_info_b) - init_a;
3643	return true;
3644	}
3645
3646	/ Function vect_prune_runtime_alias_test_list.*
3647
3648	Prune a list of ddrs to be tested at run-time by versioning for alias.
3649	Merge several alias checks into one if possible.
3650	Return FALSE if resulting list of ddrs is longer then allowed by
3651	PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. /*
3652
3653	opt_result
3654	vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3655	{
3656	typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3657	hash_set <tree_pair_hash> compared_objects;
3658
3659	const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3660	vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3661	= LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3662	const vec<vec_object_pair> &check_unequal_addrs
3663	= LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3664	poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3665	tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3666
3667	ddr_p ddr;
3668	unsigned int i;
3669	tree length_factor;
3670
3671	DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
3672
3673	/ Step values are irrelevant for aliasing if the number of vector*
3674	iterations is equal to the number of scalar iterations (which can
3675	happen for fully-SLP loops). /*
3676	bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), `1U`);
3677
3678	if (!vf_one_p)
3679	{
3680	/ Convert the checks for nonzero steps into bound tests. /
3681	tree value;
3682	FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
3683	vect_check_lower_bound (loop_vinfo, expr: value, unsigned_p: true, min_value: `1`);
3684	}
3685
3686	if (may_alias_ddrs.is_empty ())
3687	return opt_result::success ();
3688
3689	comp_alias_ddrs.create (nelems: may_alias_ddrs.length ());
3690
3691	unsigned int loop_depth
3692	= index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3693	LOOP_VINFO_LOOP_NEST (loop_vinfo));
3694
3695	/ First, we collect all data ref pairs for aliasing checks. /
3696	FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
3697	{
3698	poly_uint64 lower_bound;
3699	tree segment_length_a, segment_length_b;
3700	unsigned HOST_WIDE_INT access_size_a, access_size_b;
3701	unsigned int align_a, align_b;
3702
3703	/ Ignore the alias if the VF we chose ended up being no greater*
3704	than the dependence distance. /*
3705	if (dependence_distance_ge_vf (ddr, loop_depth, vf: vect_factor))
3706	continue;
3707
3708	if (DDR_OBJECT_A (ddr))
3709	{
3710	vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3711	if (!compared_objects.add (k: new_pair))
3712	{
3713	if (dump_enabled_p ())
3714	dump_printf_loc (MSG_NOTE, vect_location,
3715	"checking that %T and %T"
3716	" have different addresses\n",
3717	new_pair.first, new_pair.second);
3718	LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (obj: new_pair);
3719	}
3720	continue;
3721	}
3722
3723	dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
3724	stmt_vec_info stmt_info_a = dr_info_a->stmt;
3725
3726	dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
3727	stmt_vec_info stmt_info_b = dr_info_b->stmt;
3728
3729	bool preserves_scalar_order_p
3730	= vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
3731	bool ignore_step_p
3732	= (vf_one_p
3733	&& (preserves_scalar_order_p
3734	\|\| operand_equal_p (DR_STEP (dr_info_a->dr),
3735	DR_STEP (dr_info_b->dr))));
3736
3737	/ Skip the pair if inter-iteration dependencies are irrelevant*
3738	and intra-iteration dependencies are guaranteed to be honored. /*
3739	if (ignore_step_p
3740	&& (preserves_scalar_order_p
3741	\|\| vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3742	lower_bound_out: &lower_bound)))
3743	{
3744	if (dump_enabled_p ())
3745	dump_printf_loc (MSG_NOTE, vect_location,
3746	"no need for alias check between "
3747	"%T and %T when VF is 1\n",
3748	DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
3749	continue;
3750	}
3751
3752	/ See whether we can handle the alias using a bounds check on*
3753	the step, and whether that's likely to be the best approach.
3754	(It might not be, for example, if the minimum step is much larger
3755	than the number of bytes handled by one vector iteration.) /*
3756	if (!ignore_step_p
3757	&& TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
3758	&& vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3759	lower_bound_out: &lower_bound)
3760	&& (vect_small_gap_p (loop_vinfo, dr_info: dr_info_a, gap: lower_bound)
3761	\|\| vect_small_gap_p (loop_vinfo, dr_info: dr_info_b, gap: lower_bound)))
3762	{
3763	bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
3764	if (dump_enabled_p ())
3765	{
3766	dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
3767	"%T and %T when the step %T is outside ",
3768	DR_REF (dr_info_a->dr),
3769	DR_REF (dr_info_b->dr),
3770	DR_STEP (dr_info_a->dr));
3771	if (unsigned_p)
3772	dump_printf (MSG_NOTE, "[0");
3773	else
3774	{
3775	dump_printf (MSG_NOTE, "(");
3776	dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
3777	}
3778	dump_printf (MSG_NOTE, ", ");
3779	dump_dec (MSG_NOTE, lower_bound);
3780	dump_printf (MSG_NOTE, ")\n");
3781	}
3782	vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
3783	unsigned_p, min_value: lower_bound);
3784	continue;
3785	}
3786
3787	stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
3788	if (dr_group_first_a)
3789	{
3790	stmt_info_a = dr_group_first_a;
3791	dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
3792	}
3793
3794	stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
3795	if (dr_group_first_b)
3796	{
3797	stmt_info_b = dr_group_first_b;
3798	dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
3799	}
3800
3801	if (ignore_step_p)
3802	{
3803	segment_length_a = size_zero_node;
3804	segment_length_b = size_zero_node;
3805	}
3806	else
3807	{
3808	if (!operand_equal_p (DR_STEP (dr_info_a->dr),
3809	DR_STEP (dr_info_b->dr), flags: `0`))
3810	length_factor = scalar_loop_iters;
3811	else
3812	length_factor = size_int (vect_factor);
3813	segment_length_a = vect_vfa_segment_size (dr_info: dr_info_a, length_factor);
3814	segment_length_b = vect_vfa_segment_size (dr_info: dr_info_b, length_factor);
3815	}
3816	access_size_a = vect_vfa_access_size (vinfo: loop_vinfo, dr_info: dr_info_a);
3817	access_size_b = vect_vfa_access_size (vinfo: loop_vinfo, dr_info: dr_info_b);
3818	align_a = vect_vfa_align (dr_info: dr_info_a);
3819	align_b = vect_vfa_align (dr_info: dr_info_b);
3820
3821	/ See whether the alias is known at compilation time. /
3822	if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
3823	DR_BASE_ADDRESS (dr_info_b->dr), flags: `0`)
3824	&& operand_equal_p (DR_OFFSET (dr_info_a->dr),
3825	DR_OFFSET (dr_info_b->dr), flags: `0`)
3826	&& TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
3827	&& TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
3828	&& poly_int_tree_p (t: segment_length_a)
3829	&& poly_int_tree_p (t: segment_length_b))
3830	{
3831	int res = vect_compile_time_alias (a: dr_info_a, b: dr_info_b,
3832	segment_length_a,
3833	segment_length_b,
3834	access_size_a,
3835	access_size_b);
3836	if (res >= `0` && dump_enabled_p ())
3837	{
3838	dump_printf_loc (MSG_NOTE, vect_location,
3839	"can tell at compile time that %T and %T",
3840	DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
3841	if (res == `0`)
3842	dump_printf (MSG_NOTE, " do not alias\n");
3843	else
3844	dump_printf (MSG_NOTE, " alias\n");
3845	}
3846
3847	if (res == `0`)
3848	continue;
3849
3850	if (res == `1`)
3851	return opt_result::failure_at (loc: stmt_info_b->stmt,
3852	fmt: "not vectorized:"
3853	" compilation time alias: %G%G",
3854	stmt_info_a->stmt,
3855	stmt_info_b->stmt);
3856	}
3857
3858	dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
3859	access_size_a, align_a);
3860	dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
3861	access_size_b, align_b);
3862	/ Canonicalize the order to be the one that's needed for accurate*
3863	RAW, WAR and WAW flags, in cases where the data references are
3864	well-ordered. The order doesn't really matter otherwise,
3865	but we might as well be consistent. /*
3866	if (get_later_stmt (stmt1_info: stmt_info_a, stmt2_info: stmt_info_b) == stmt_info_a)
3867	std::swap (a&: dr_a, b&: dr_b);
3868
3869	dr_with_seg_len_pair_t dr_with_seg_len_pair
3870	(dr_a, dr_b, (preserves_scalar_order_p
3871	? dr_with_seg_len_pair_t::WELL_ORDERED
3872	: dr_with_seg_len_pair_t::REORDERED));
3873
3874	comp_alias_ddrs.safe_push (obj: dr_with_seg_len_pair);
3875	}
3876
3877	prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
3878
3879	unsigned int count = (comp_alias_ddrs.length ()
3880	+ check_unequal_addrs.length ());
3881
3882	if (count
3883	&& (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
3884	== VECT_COST_MODEL_VERY_CHEAP))
3885	return opt_result::failure_at
3886	(loc: vect_location, fmt: "would need a runtime alias check\n");
3887
3888	if (dump_enabled_p ())
3889	dump_printf_loc (MSG_NOTE, vect_location,
3890	"improved number of alias checks from %d to %d\n",
3891	may_alias_ddrs.length (), count);
3892	unsigned limit = param_vect_max_version_for_alias_checks;
3893	if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
3894	limit = param_vect_max_version_for_alias_checks * `6` / `10`;
3895	if (count > limit)
3896	return opt_result::failure_at
3897	(loc: vect_location,
3898	fmt: "number of versioning for alias run-time tests exceeds %d "
3899	"(--param vect-max-version-for-alias-checks)\n", limit);
3900
3901	return opt_result::success ();
3902	}
3903
3904	/ Check whether we can use an internal function for a gather load*
3905	or scatter store. READ_P is true for loads and false for stores.
3906	MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
3907	the type of the memory elements being loaded or stored. OFFSET_TYPE
3908	is the type of the offset that is being applied to the invariant
3909	base address. SCALE is the amount by which the offset should
3910	be multiplied after* it has been converted to address width.*
3911
3912	Return true if the function is supported, storing the function id in
3913	IFN_OUT and the vector type for the offset in OFFSET_VECTYPE_OUT. */
3914
3915	bool
3916	vect_gather_scatter_fn_p (vec_info vinfo, bool* read_p, bool masked_p,
3917	tree vectype, tree memory_type, tree offset_type,
3918	int scale, internal_fn *ifn_out,
3919	tree *offset_vectype_out)
3920	{
3921	unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
3922	unsigned int element_bits = vector_element_bits (vectype);
3923	if (element_bits != memory_bits)
3924	/ For now the vector elements must be the same width as the*
3925	memory elements. /*
3926	return false;
3927
3928	/ Work out which function we need. /
3929	internal_fn ifn, alt_ifn, alt_ifn2;
3930	if (read_p)
3931	{
3932	ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
3933	alt_ifn = IFN_MASK_GATHER_LOAD;
3934	/ When target supports MASK_LEN_GATHER_LOAD, we always*
3935	use MASK_LEN_GATHER_LOAD regardless whether len and
3936	mask are valid or not. /*
3937	alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
3938	}
3939	else
3940	{
3941	ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
3942	alt_ifn = IFN_MASK_SCATTER_STORE;
3943	/ When target supports MASK_LEN_SCATTER_STORE, we always*
3944	use MASK_LEN_SCATTER_STORE regardless whether len and
3945	mask are valid or not. /*
3946	alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
3947	}
3948
3949	for (;;)
3950	{
3951	tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
3952	if (!offset_vectype)
3953	return false;
3954
3955	/ Test whether the target supports this combination. /
3956	if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
3957	offset_vectype, scale))
3958	{
3959	*ifn_out = ifn;
3960	*offset_vectype_out = offset_vectype;
3961	return true;
3962	}
3963	else if (!masked_p
3964	&& internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
3965	memory_type,
3966	offset_vectype,
3967	scale))
3968	{
3969	*ifn_out = alt_ifn;
3970	*offset_vectype_out = offset_vectype;
3971	return true;
3972	}
3973	else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
3974	memory_type,
3975	offset_vectype, scale))
3976	{
3977	*ifn_out = alt_ifn2;
3978	*offset_vectype_out = offset_vectype;
3979	return true;
3980	}
3981
3982	if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
3983	&& TYPE_PRECISION (offset_type) >= element_bits)
3984	return false;
3985
3986	offset_type = build_nonstandard_integer_type
3987	(TYPE_PRECISION (offset_type) * `2`, TYPE_UNSIGNED (offset_type));
3988	}
3989	}
3990
3991	/ STMT_INFO is a call to an internal gather load or scatter store function.*
3992	Describe the operation in INFO. /*
3993
3994	static void
3995	vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
3996	gather_scatter_info *info)
3997	{
3998	gcall call = as_a <gcall > (p: stmt_info->stmt);
3999	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4000	data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4001
4002	info->ifn = gimple_call_internal_fn (gs: call);
4003	info->decl = NULL_TREE;
4004	info->base = gimple_call_arg (gs: call, index: `0`);
4005	info->offset = gimple_call_arg (gs: call, index: `1`);
4006	info->offset_dt = vect_unknown_def_type;
4007	info->offset_vectype = NULL_TREE;
4008	info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, `2`));
4009	info->element_type = TREE_TYPE (vectype);
4010	info->memory_type = TREE_TYPE (DR_REF (dr));
4011	}
4012
4013	/ Return true if a non-affine read or write in STMT_INFO is suitable for a*
4014	gather load or scatter store. Describe the operation in INFO if so. /
4015
4016	bool
4017	vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
4018	gather_scatter_info *info)
4019	{
4020	HOST_WIDE_INT scale = `1`;
4021	poly_int64 pbitpos, pbitsize;
4022	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4023	struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4024	tree offtype = NULL_TREE;
4025	tree decl = NULL_TREE, base, off;
4026	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4027	tree memory_type = TREE_TYPE (DR_REF (dr));
4028	machine_mode pmode;
4029	int punsignedp, reversep, pvolatilep = `0`;
4030	internal_fn ifn;
4031	tree offset_vectype;
4032	bool masked_p = false;
4033
4034	/ See whether this is already a call to a gather/scatter internal function.*
4035	If not, see whether it's a masked load or store. /*
4036	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
4037	if (call && gimple_call_internal_p (gs: call))
4038	{
4039	ifn = gimple_call_internal_fn (gs: call);
4040	if (internal_gather_scatter_fn_p (ifn))
4041	{
4042	vect_describe_gather_scatter_call (stmt_info, info);
4043	return true;
4044	}
4045	masked_p = (ifn == IFN_MASK_LOAD \|\| ifn == IFN_MASK_STORE);
4046	}
4047
4048	/ True if we should aim to use internal functions rather than*
4049	built-in functions. /*
4050	bool use_ifn_p = (DR_IS_READ (dr)
4051	? supports_vec_gather_load_p (TYPE_MODE (vectype))
4052	: supports_vec_scatter_store_p (TYPE_MODE (vectype)));
4053
4054	base = DR_REF (dr);
4055	/ For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,*
4056	see if we can use the def stmt of the address. /*
4057	if (masked_p
4058	&& TREE_CODE (base) == MEM_REF
4059	&& TREE_CODE (TREE_OPERAND (base, `0`)) == SSA_NAME
4060	&& integer_zerop (TREE_OPERAND (base, `1`))
4061	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, `0`)))
4062	{
4063	gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, `0`));
4064	if (is_gimple_assign (gs: def_stmt)
4065	&& gimple_assign_rhs_code (gs: def_stmt) == ADDR_EXPR)
4066	base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), `0`);
4067	}
4068
4069	/ The gather and scatter builtins need address of the form*
4070	loop_invariant + vector {1, 2, 4, 8}*
4071	or
4072	loop_invariant + sign_extend (vector) { 1, 2, 4, 8 }.*
4073	Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
4074	of loop invariants/SSA_NAMEs defined in the loop, with casts,
4075	multiplications and additions in it. To get a vector, we need
4076	a single SSA_NAME that will be defined in the loop and will
4077	contain everything that is not loop invariant and that can be
4078	vectorized. The following code attempts to find such a preexistng
4079	SSA_NAME OFF and put the loop invariants into a tree BASE
4080	that can be gimplified before the loop. /*
4081	base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
4082	&punsignedp, &reversep, &pvolatilep);
4083	if (reversep)
4084	return false;
4085
4086	/ PR 107346. Packed structs can have fields at offsets that are not*
4087	multiples of BITS_PER_UNIT. Do not use gather/scatters in such cases. /*
4088	if (!multiple_p (a: pbitpos, BITS_PER_UNIT))
4089	return false;
4090
4091	poly_int64 pbytepos = exact_div (a: pbitpos, BITS_PER_UNIT);
4092
4093	if (TREE_CODE (base) == MEM_REF)
4094	{
4095	if (!integer_zerop (TREE_OPERAND (base, `1`)))
4096	{
4097	if (off == NULL_TREE)
4098	off = wide_int_to_tree (sizetype, cst: mem_ref_offset (base));
4099	else
4100	off = size_binop (PLUS_EXPR, off,
4101	fold_convert (sizetype, TREE_OPERAND (base, `1`)));
4102	}
4103	base = TREE_OPERAND (base, `0`);
4104	}
4105	else
4106	base = build_fold_addr_expr (base);
4107
4108	if (off == NULL_TREE)
4109	off = size_zero_node;
4110
4111	/ If base is not loop invariant, either off is 0, then we start with just*
4112	the constant offset in the loop invariant BASE and continue with base
4113	as OFF, otherwise give up.
4114	We could handle that case by gimplifying the addition of base + off
4115	into some SSA_NAME and use that as off, but for now punt. /*
4116	if (!expr_invariant_in_loop_p (loop, base))
4117	{
4118	if (!integer_zerop (off))
4119	return false;
4120	off = base;
4121	base = size_int (pbytepos);
4122	}
4123	/ Otherwise put base + constant offset into the loop invariant BASE*
4124	and continue with OFF. /*
4125	else
4126	{
4127	base = fold_convert (sizetype, base);
4128	base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
4129	}
4130
4131	/ OFF at this point may be either a SSA_NAME or some tree expression*
4132	from get_inner_reference. Try to peel off loop invariants from it
4133	into BASE as long as possible. /*
4134	STRIP_NOPS (off);
4135	while (offtype == NULL_TREE)
4136	{
4137	enum tree_code code;
4138	tree op0, op1, add = NULL_TREE;
4139
4140	if (TREE_CODE (off) == SSA_NAME)
4141	{
4142	gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4143
4144	if (expr_invariant_in_loop_p (loop, off))
4145	return false;
4146
4147	if (gimple_code (g: def_stmt) != GIMPLE_ASSIGN)
4148	break;
4149
4150	op0 = gimple_assign_rhs1 (gs: def_stmt);
4151	code = gimple_assign_rhs_code (gs: def_stmt);
4152	op1 = gimple_assign_rhs2 (gs: def_stmt);
4153	}
4154	else
4155	{
4156	if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
4157	return false;
4158	code = TREE_CODE (off);
4159	extract_ops_from_tree (expr: off, code: &code, op0: &op0, op1: &op1);
4160	}
4161	switch (code)
4162	{
4163	case POINTER_PLUS_EXPR:
4164	case PLUS_EXPR:
4165	if (expr_invariant_in_loop_p (loop, op0))
4166	{
4167	add = op0;
4168	off = op1;
4169	do_add:
4170	add = fold_convert (sizetype, add);
4171	if (scale != `1`)
4172	add = size_binop (MULT_EXPR, add, size_int (scale));
4173	base = size_binop (PLUS_EXPR, base, add);
4174	continue;
4175	}
4176	if (expr_invariant_in_loop_p (loop, op1))
4177	{
4178	add = op1;
4179	off = op0;
4180	goto do_add;
4181	}
4182	break;
4183	case MINUS_EXPR:
4184	if (expr_invariant_in_loop_p (loop, op1))
4185	{
4186	add = fold_convert (sizetype, op1);
4187	add = size_binop (MINUS_EXPR, size_zero_node, add);
4188	off = op0;
4189	goto do_add;
4190	}
4191	break;
4192	case MULT_EXPR:
4193	if (scale == `1` && tree_fits_shwi_p (op1))
4194	{
4195	int new_scale = tree_to_shwi (op1);
4196	/ Only treat this as a scaling operation if the target*
4197	supports it for at least some offset type. /*
4198	if (use_ifn_p
4199	&& !vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr),
4200	masked_p, vectype, memory_type,
4201	signed_char_type_node,
4202	scale: new_scale, ifn_out: &ifn,
4203	offset_vectype_out: &offset_vectype)
4204	&& !vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr),
4205	masked_p, vectype, memory_type,
4206	unsigned_char_type_node,
4207	scale: new_scale, ifn_out: &ifn,
4208	offset_vectype_out: &offset_vectype))
4209	break;
4210	scale = new_scale;
4211	off = op0;
4212	continue;
4213	}
4214	break;
4215	case SSA_NAME:
4216	off = op0;
4217	continue;
4218	CASE_CONVERT:
4219	if (!POINTER_TYPE_P (TREE_TYPE (op0))
4220	&& !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4221	break;
4222
4223	/ Don't include the conversion if the target is happy with*
4224	the current offset type. /*
4225	if (use_ifn_p
4226	&& TREE_CODE (off) == SSA_NAME
4227	&& !POINTER_TYPE_P (TREE_TYPE (off))
4228	&& vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr),
4229	masked_p, vectype, memory_type,
4230	TREE_TYPE (off), scale, ifn_out: &ifn,
4231	offset_vectype_out: &offset_vectype))
4232	break;
4233
4234	if (TYPE_PRECISION (TREE_TYPE (op0))
4235	== TYPE_PRECISION (TREE_TYPE (off)))
4236	{
4237	off = op0;
4238	continue;
4239	}
4240
4241	/ Include the conversion if it is widening and we're using*
4242	the IFN path or the target can handle the converted from
4243	offset or the current size is not already the same as the
4244	data vector element size. /*
4245	if ((TYPE_PRECISION (TREE_TYPE (op0))
4246	< TYPE_PRECISION (TREE_TYPE (off)))
4247	&& (use_ifn_p
4248	\|\| (DR_IS_READ (dr)
4249	? (targetm.vectorize.builtin_gather
4250	&& targetm.vectorize.builtin_gather (vectype,
4251	TREE_TYPE (op0),
4252	scale))
4253	: (targetm.vectorize.builtin_scatter
4254	&& targetm.vectorize.builtin_scatter (vectype,
4255	TREE_TYPE (op0),
4256	scale)))
4257	\|\| !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
4258	TYPE_SIZE (TREE_TYPE (vectype)), flags: `0`)))
4259	{
4260	off = op0;
4261	offtype = TREE_TYPE (off);
4262	STRIP_NOPS (off);
4263	continue;
4264	}
4265	break;
4266	default:
4267	break;
4268	}
4269	break;
4270	}
4271
4272	/ If at the end OFF still isn't a SSA_NAME or isn't*
4273	defined in the loop, punt. /*
4274	if (TREE_CODE (off) != SSA_NAME
4275	\|\| expr_invariant_in_loop_p (loop, off))
4276	return false;
4277
4278	if (offtype == NULL_TREE)
4279	offtype = TREE_TYPE (off);
4280
4281	if (use_ifn_p)
4282	{
4283	if (!vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr), masked_p,
4284	vectype, memory_type, offset_type: offtype, scale,
4285	ifn_out: &ifn, offset_vectype_out: &offset_vectype))
4286	ifn = IFN_LAST;
4287	decl = NULL_TREE;
4288	}
4289	else
4290	{
4291	if (DR_IS_READ (dr))
4292	{
4293	if (targetm.vectorize.builtin_gather)
4294	decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
4295	}
4296	else
4297	{
4298	if (targetm.vectorize.builtin_scatter)
4299	decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
4300	}
4301	ifn = IFN_LAST;
4302	/ The offset vector type will be read from DECL when needed. /
4303	offset_vectype = NULL_TREE;
4304	}
4305
4306	info->ifn = ifn;
4307	info->decl = decl;
4308	info->base = base;
4309	info->offset = off;
4310	info->offset_dt = vect_unknown_def_type;
4311	info->offset_vectype = offset_vectype;
4312	info->scale = scale;
4313	info->element_type = TREE_TYPE (vectype);
4314	info->memory_type = memory_type;
4315	return true;
4316	}
4317
4318	/ Find the data references in STMT, analyze them with respect to LOOP and*
4319	append them to DATAREFS. Return false if datarefs in this stmt cannot
4320	be handled. /*
4321
4322	opt_result
4323	vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
4324	vec<data_reference_p> *datarefs,
4325	vec<int> dataref_groups, int* group_id)
4326	{
4327	/ We can ignore clobbers for dataref analysis - they are removed during*
4328	loop vectorization and BB vectorization checks dependences with a
4329	stmt walk. /*
4330	if (gimple_clobber_p (s: stmt))
4331	return opt_result::success ();
4332
4333	if (gimple_has_volatile_ops (stmt))
4334	return opt_result::failure_at (loc: stmt, fmt: "not vectorized: volatile type: %G",
4335	stmt);
4336
4337	if (stmt_can_throw_internal (cfun, stmt))
4338	return opt_result::failure_at (loc: stmt,
4339	fmt: "not vectorized:"
4340	" statement can throw an exception: %G",
4341	stmt);
4342
4343	auto_vec<data_reference_p, `2`> refs;
4344	opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
4345	if (!res)
4346	return res;
4347
4348	if (refs.is_empty ())
4349	return opt_result::success ();
4350
4351	if (refs.length () > `1`)
4352	{
4353	while (!refs.is_empty ())
4354	free_data_ref (refs.pop ());
4355	return opt_result::failure_at (loc: stmt,
4356	fmt: "not vectorized: more than one "
4357	"data ref in stmt: %G", stmt);
4358	}
4359
4360	data_reference_p dr = refs.pop ();
4361	if (gcall call = dyn_cast <gcall > (p: stmt))
4362	if (!gimple_call_internal_p (gs: call)
4363	\|\| (gimple_call_internal_fn (gs: call) != IFN_MASK_LOAD
4364	&& gimple_call_internal_fn (gs: call) != IFN_MASK_STORE))
4365	{
4366	free_data_ref (dr);
4367	return opt_result::failure_at (loc: stmt,
4368	fmt: "not vectorized: dr in a call %G", stmt);
4369	}
4370
4371	if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
4372	&& DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), `1`)))
4373	{
4374	free_data_ref (dr);
4375	return opt_result::failure_at (loc: stmt,
4376	fmt: "not vectorized:"
4377	" statement is an unsupported"
4378	" bitfield access %G", stmt);
4379	}
4380
4381	if (DR_BASE_ADDRESS (dr)
4382	&& TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
4383	{
4384	free_data_ref (dr);
4385	return opt_result::failure_at (loc: stmt,
4386	fmt: "not vectorized:"
4387	" base addr of dr is a constant\n");
4388	}
4389
4390	/ Check whether this may be a SIMD lane access and adjust the*
4391	DR to make it easier for us to handle it. /*
4392	if (loop
4393	&& loop->simduid
4394	&& (!DR_BASE_ADDRESS (dr)
4395	\|\| !DR_OFFSET (dr)
4396	\|\| !DR_INIT (dr)
4397	\|\| !DR_STEP (dr)))
4398	{
4399	struct data_reference *newdr
4400	= create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
4401	DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
4402	if (DR_BASE_ADDRESS (newdr)
4403	&& DR_OFFSET (newdr)
4404	&& DR_INIT (newdr)
4405	&& DR_STEP (newdr)
4406	&& TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
4407	&& integer_zerop (DR_STEP (newdr)))
4408	{
4409	tree base_address = DR_BASE_ADDRESS (newdr);
4410	tree off = DR_OFFSET (newdr);
4411	tree step = ssize_int (`1`);
4412	if (integer_zerop (off)
4413	&& TREE_CODE (base_address) == POINTER_PLUS_EXPR)
4414	{
4415	off = TREE_OPERAND (base_address, `1`);
4416	base_address = TREE_OPERAND (base_address, `0`);
4417	}
4418	STRIP_NOPS (off);
4419	if (TREE_CODE (off) == MULT_EXPR
4420	&& tree_fits_uhwi_p (TREE_OPERAND (off, `1`)))
4421	{
4422	step = TREE_OPERAND (off, `1`);
4423	off = TREE_OPERAND (off, `0`);
4424	STRIP_NOPS (off);
4425	}
4426	if (CONVERT_EXPR_P (off)
4427	&& (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, `0`)))
4428	< TYPE_PRECISION (TREE_TYPE (off))))
4429	off = TREE_OPERAND (off, `0`);
4430	if (TREE_CODE (off) == SSA_NAME)
4431	{
4432	gimple *def = SSA_NAME_DEF_STMT (off);
4433	/ Look through widening conversion. /
4434	if (is_gimple_assign (gs: def)
4435	&& CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
4436	{
4437	tree rhs1 = gimple_assign_rhs1 (gs: def);
4438	if (TREE_CODE (rhs1) == SSA_NAME
4439	&& INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
4440	&& (TYPE_PRECISION (TREE_TYPE (off))
4441	> TYPE_PRECISION (TREE_TYPE (rhs1))))
4442	def = SSA_NAME_DEF_STMT (rhs1);
4443	}
4444	if (is_gimple_call (gs: def)
4445	&& gimple_call_internal_p (gs: def)
4446	&& (gimple_call_internal_fn (gs: def) == IFN_GOMP_SIMD_LANE))
4447	{
4448	tree arg = gimple_call_arg (gs: def, index: `0`);
4449	tree reft = TREE_TYPE (DR_REF (newdr));
4450	gcc_assert (TREE_CODE (arg) == SSA_NAME);
4451	arg = SSA_NAME_VAR (arg);
4452	if (arg == loop->simduid
4453	/ For now. /
4454	&& tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
4455	{
4456	DR_BASE_ADDRESS (newdr) = base_address;
4457	DR_OFFSET (newdr) = ssize_int (`0`);
4458	DR_STEP (newdr) = step;
4459	DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
4460	DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
4461	/ Mark as simd-lane access. /
4462	tree arg2 = gimple_call_arg (gs: def, index: `1`);
4463	newdr->aux = (void *) (-`1` - tree_to_uhwi (arg2));
4464	free_data_ref (dr);
4465	datarefs->safe_push (obj: newdr);
4466	if (dataref_groups)
4467	dataref_groups->safe_push (obj: group_id);
4468	return opt_result::success ();
4469	}
4470	}
4471	}
4472	}
4473	free_data_ref (newdr);
4474	}
4475
4476	datarefs->safe_push (obj: dr);
4477	if (dataref_groups)
4478	dataref_groups->safe_push (obj: group_id);
4479	return opt_result::success ();
4480	}
4481
4482	/ Function vect_analyze_data_refs.*
4483
4484	Find all the data references in the loop or basic block.
4485
4486	The general structure of the analysis of data refs in the vectorizer is as
4487	follows:
4488	1- vect_analyze_data_refs(loop/bb): call
4489	compute_data_dependences_for_loop/bb to find and analyze all data-refs
4490	in the loop/bb and their dependences.
4491	2- vect_analyze_dependences(): apply dependence testing using ddrs.
4492	3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4493	4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4494
4495	*/
4496
4497	opt_result
4498	vect_analyze_data_refs (vec_info vinfo, poly_uint64 min_vf, bool *fatal)
4499	{
4500	class loop *loop = NULL;
4501	unsigned int i;
4502	struct data_reference *dr;
4503	tree scalar_type;
4504
4505	DUMP_VECT_SCOPE ("vect_analyze_data_refs");
4506
4507	if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
4508	loop = LOOP_VINFO_LOOP (loop_vinfo);
4509
4510	/ Go through the data-refs, check that the analysis succeeded. Update*
4511	pointer from stmt_vec_info struct to DR and vectype. /*
4512
4513	vec<data_reference_p> datarefs = vinfo->shared->datarefs;
4514	FOR_EACH_VEC_ELT (datarefs, i, dr)
4515	{
4516	enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
4517	poly_uint64 vf;
4518
4519	gcc_assert (DR_REF (dr));
4520	stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
4521	gcc_assert (!stmt_info->dr_aux.dr);
4522	stmt_info->dr_aux.dr = dr;
4523	stmt_info->dr_aux.stmt = stmt_info;
4524
4525	/ Check that analysis of the data-ref succeeded. /
4526	if (!DR_BASE_ADDRESS (dr) \|\| !DR_OFFSET (dr) \|\| !DR_INIT (dr)
4527	\|\| !DR_STEP (dr))
4528	{
4529	bool maybe_gather
4530	= DR_IS_READ (dr)
4531	&& !TREE_THIS_VOLATILE (DR_REF (dr));
4532	bool maybe_scatter
4533	= DR_IS_WRITE (dr)
4534	&& !TREE_THIS_VOLATILE (DR_REF (dr));
4535
4536	/ If target supports vector gather loads or scatter stores,*
4537	see if they can't be used. /*
4538	if (is_a <loop_vec_info> (p: vinfo)
4539	&& !nested_in_vect_loop_p (loop, stmt_info))
4540	{
4541	if (maybe_gather \|\| maybe_scatter)
4542	{
4543	if (maybe_gather)
4544	gatherscatter = GATHER;
4545	else
4546	gatherscatter = SCATTER;
4547	}
4548	}
4549
4550	if (gatherscatter == SG_NONE)
4551	{
4552	if (dump_enabled_p ())
4553	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4554	"not vectorized: data ref analysis "
4555	"failed %G", stmt_info->stmt);
4556	if (is_a <bb_vec_info> (p: vinfo))
4557	{
4558	/ In BB vectorization the ref can still participate*
4559	in dependence analysis, we just can't vectorize it. /*
4560	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4561	continue;
4562	}
4563	return opt_result::failure_at (loc: stmt_info->stmt,
4564	fmt: "not vectorized:"
4565	" data ref analysis failed: %G",
4566	stmt_info->stmt);
4567	}
4568	}
4569
4570	/ See if this was detected as SIMD lane access. /
4571	if (dr->aux == (void *)-`1`
4572	\|\| dr->aux == (void *)-`2`
4573	\|\| dr->aux == (void *)-`3`
4574	\|\| dr->aux == (void *)-`4`)
4575	{
4576	if (nested_in_vect_loop_p (loop, stmt_info))
4577	return opt_result::failure_at (loc: stmt_info->stmt,
4578	fmt: "not vectorized:"
4579	" data ref analysis failed: %G",
4580	stmt_info->stmt);
4581	STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
4582	= -(uintptr_t) dr->aux;
4583	}
4584
4585	tree base = get_base_address (DR_REF (dr));
4586	if (base && VAR_P (base) && DECL_NONALIASED (base))
4587	{
4588	if (dump_enabled_p ())
4589	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4590	"not vectorized: base object not addressable "
4591	"for stmt: %G", stmt_info->stmt);
4592	if (is_a <bb_vec_info> (p: vinfo))
4593	{
4594	/ In BB vectorization the ref can still participate*
4595	in dependence analysis, we just can't vectorize it. /*
4596	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4597	continue;
4598	}
4599	return opt_result::failure_at (loc: stmt_info->stmt,
4600	fmt: "not vectorized: base object not"
4601	" addressable for stmt: %G",
4602	stmt_info->stmt);
4603	}
4604
4605	if (is_a <loop_vec_info> (p: vinfo)
4606	&& DR_STEP (dr)
4607	&& TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
4608	{
4609	if (nested_in_vect_loop_p (loop, stmt_info))
4610	return opt_result::failure_at (loc: stmt_info->stmt,
4611	fmt: "not vectorized: "
4612	"not suitable for strided load %G",
4613	stmt_info->stmt);
4614	STMT_VINFO_STRIDED_P (stmt_info) = true;
4615	}
4616
4617	/ Update DR field in stmt_vec_info struct. /
4618
4619	/ If the dataref is in an inner-loop of the loop that is considered for*
4620	for vectorization, we also want to analyze the access relative to
4621	the outer-loop (DR contains information only relative to the
4622	inner-most enclosing loop). We do that by building a reference to the
4623	first location accessed by the inner-loop, and analyze it relative to
4624	the outer-loop. /*
4625	if (loop && nested_in_vect_loop_p (loop, stmt_info))
4626	{
4627	/ Build a reference to the first location accessed by the*
4628	inner loop: (BASE + INIT + OFFSET). By construction,*
4629	this address must be invariant in the inner loop, so we
4630	can consider it as being used in the outer loop. /*
4631	tree base = unshare_expr (DR_BASE_ADDRESS (dr));
4632	tree offset = unshare_expr (DR_OFFSET (dr));
4633	tree init = unshare_expr (DR_INIT (dr));
4634	tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
4635	init, offset);
4636	tree init_addr = fold_build_pointer_plus (base, init_offset);
4637	tree init_ref = build_fold_indirect_ref (init_addr);
4638
4639	if (dump_enabled_p ())
4640	dump_printf_loc (MSG_NOTE, vect_location,
4641	"analyze in outer loop: %T\n", init_ref);
4642
4643	opt_result res
4644	= dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
4645	init_ref, loop, stmt_info->stmt);
4646	if (!res)
4647	/ dr_analyze_innermost already explained the failure. /
4648	return res;
4649
4650	if (dump_enabled_p ())
4651	dump_printf_loc (MSG_NOTE, vect_location,
4652	"\touter base_address: %T\n"
4653	"\touter offset from base address: %T\n"
4654	"\touter constant offset from base address: %T\n"
4655	"\touter step: %T\n"
4656	"\touter base alignment: %d\n\n"
4657	"\touter base misalignment: %d\n"
4658	"\touter offset alignment: %d\n"
4659	"\touter step alignment: %d\n",
4660	STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
4661	STMT_VINFO_DR_OFFSET (stmt_info),
4662	STMT_VINFO_DR_INIT (stmt_info),
4663	STMT_VINFO_DR_STEP (stmt_info),
4664	STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
4665	STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
4666	STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
4667	STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
4668	}
4669
4670	/ Set vectype for STMT. /
4671	scalar_type = TREE_TYPE (DR_REF (dr));
4672	tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
4673	if (!vectype)
4674	{
4675	if (dump_enabled_p ())
4676	{
4677	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4678	"not vectorized: no vectype for stmt: %G",
4679	stmt_info->stmt);
4680	dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
4681	dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
4682	scalar_type);
4683	dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4684	}
4685
4686	if (is_a <bb_vec_info> (p: vinfo))
4687	{
4688	/ No vector type is fine, the ref can still participate*
4689	in dependence analysis, we just can't vectorize it. /*
4690	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4691	continue;
4692	}
4693	if (fatal)
4694	fatal = false*;
4695	return opt_result::failure_at (loc: stmt_info->stmt,
4696	fmt: "not vectorized:"
4697	" no vectype for stmt: %G"
4698	" scalar_type: %T\n",
4699	stmt_info->stmt, scalar_type);
4700	}
4701	else
4702	{
4703	if (dump_enabled_p ())
4704	dump_printf_loc (MSG_NOTE, vect_location,
4705	"got vectype for stmt: %G%T\n",
4706	stmt_info->stmt, vectype);
4707	}
4708
4709	/ Adjust the minimal vectorization factor according to the*
4710	vector type. /*
4711	vf = TYPE_VECTOR_SUBPARTS (node: vectype);
4712	min_vf = upper_bound (a: min_vf, b: vf);
4713
4714	/ Leave the BB vectorizer to pick the vector type later, based on*
4715	the final dataref group size and SLP node size. /*
4716	if (is_a <loop_vec_info> (p: vinfo))
4717	STMT_VINFO_VECTYPE (stmt_info) = vectype;
4718
4719	if (gatherscatter != SG_NONE)
4720	{
4721	gather_scatter_info gs_info;
4722	if (!vect_check_gather_scatter (stmt_info,
4723	loop_vinfo: as_a <loop_vec_info> (p: vinfo),
4724	info: &gs_info)
4725	\|\| !get_vectype_for_scalar_type (vinfo,
4726	TREE_TYPE (gs_info.offset)))
4727	{
4728	if (fatal)
4729	fatal = false*;
4730	return opt_result::failure_at
4731	(loc: stmt_info->stmt,
4732	fmt: (gatherscatter == GATHER)
4733	? "not vectorized: not suitable for gather load %G"
4734	: "not vectorized: not suitable for scatter store %G",
4735	stmt_info->stmt);
4736	}
4737	STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
4738	}
4739	}
4740
4741	/ We used to stop processing and prune the list here. Verify we no*
4742	longer need to. /*
4743	gcc_assert (i == datarefs.length ());
4744
4745	return opt_result::success ();
4746	}
4747
4748
4749	/ Function vect_get_new_vect_var.*
4750
4751	Returns a name for a new variable. The current naming scheme appends the
4752	prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
4753	the name of vectorizer generated variables, and appends that to NAME if
4754	provided. /*
4755
4756	tree
4757	vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
4758	{
4759	const char *prefix;
4760	tree new_vect_var;
4761
4762	switch (var_kind)
4763	{
4764	case vect_simple_var:
4765	prefix = "vect";
4766	break;
4767	case vect_scalar_var:
4768	prefix = "stmp";
4769	break;
4770	case vect_mask_var:
4771	prefix = "mask";
4772	break;
4773	case vect_pointer_var:
4774	prefix = "vectp";
4775	break;
4776	default:
4777	gcc_unreachable ();
4778	}
4779
4780	if (name)
4781	{
4782	char* tmp = concat (prefix, "_", name, NULL);
4783	new_vect_var = create_tmp_reg (type, tmp);
4784	free (ptr: tmp);
4785	}
4786	else
4787	new_vect_var = create_tmp_reg (type, prefix);
4788
4789	return new_vect_var;
4790	}
4791
4792	/ Like vect_get_new_vect_var but return an SSA name. /
4793
4794	tree
4795	vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
4796	{
4797	const char *prefix;
4798	tree new_vect_var;
4799
4800	switch (var_kind)
4801	{
4802	case vect_simple_var:
4803	prefix = "vect";
4804	break;
4805	case vect_scalar_var:
4806	prefix = "stmp";
4807	break;
4808	case vect_pointer_var:
4809	prefix = "vectp";
4810	break;
4811	default:
4812	gcc_unreachable ();
4813	}
4814
4815	if (name)
4816	{
4817	char* tmp = concat (prefix, "_", name, NULL);
4818	new_vect_var = make_temp_ssa_name (type, NULL, name: tmp);
4819	free (ptr: tmp);
4820	}
4821	else
4822	new_vect_var = make_temp_ssa_name (type, NULL, name: prefix);
4823
4824	return new_vect_var;
4825	}
4826
4827	/ Duplicate points-to info on NAME from DR_INFO. /
4828
4829	static void
4830	vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
4831	{
4832	duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
4833	/ DR_PTR_INFO is for a base SSA name, not including constant or*
4834	variable offsets in the ref so its alignment info does not apply. /*
4835	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4836	}
4837
4838	/ Function vect_create_addr_base_for_vector_ref.*
4839
4840	Create an expression that computes the address of the first memory location
4841	that will be accessed for a data reference.
4842
4843	Input:
4844	STMT_INFO: The statement containing the data reference.
4845	NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
4846	OFFSET: Optional. If supplied, it is be added to the initial address.
4847	LOOP: Specify relative to which loop-nest should the address be computed.
4848	For example, when the dataref is in an inner-loop nested in an
4849	outer-loop that is now being vectorized, LOOP can be either the
4850	outer-loop, or the inner-loop. The first memory location accessed
4851	by the following dataref ('in' points to short):
4852
4853	for (i=0; i<N; i++)
4854	for (j=0; j<M; j++)
4855	s += in[i+j]
4856
4857	is as follows:
4858	if LOOP=i_loop: &in (relative to i_loop)
4859	if LOOP=j_loop: &in+i2B (relative to j_loop)*
4860
4861	Output:
4862	1. Return an SSA_NAME whose value is the address of the memory location of
4863	the first vector of the data reference.
4864	2. If new_stmt_list is not NULL_TREE after return then the caller must insert
4865	these statement(s) which define the returned SSA_NAME.
4866
4867	FORNOW: We are only handling array accesses with step 1. /*
4868
4869	tree
4870	vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
4871	gimple_seq *new_stmt_list,
4872	tree offset)
4873	{
4874	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
4875	struct data_reference *dr = dr_info->dr;
4876	const char *base_name;
4877	tree addr_base;
4878	tree dest;
4879	gimple_seq seq = NULL;
4880	tree vect_ptr_type;
4881	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
4882	innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
4883
4884	tree data_ref_base = unshare_expr (drb->base_address);
4885	tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, check_outer: true));
4886	tree init = unshare_expr (drb->init);
4887
4888	if (loop_vinfo)
4889	base_name = get_name (data_ref_base);
4890	else
4891	{
4892	base_offset = ssize_int (`0`);
4893	init = ssize_int (`0`);
4894	base_name = get_name (DR_REF (dr));
4895	}
4896
4897	/ Create base_offset /
4898	base_offset = size_binop (PLUS_EXPR,
4899	fold_convert (sizetype, base_offset),
4900	fold_convert (sizetype, init));
4901
4902	if (offset)
4903	{
4904	offset = fold_convert (sizetype, offset);
4905	base_offset = fold_build2 (PLUS_EXPR, sizetype,
4906	base_offset, offset);
4907	}
4908
4909	/ base + base_offset /
4910	if (loop_vinfo)
4911	addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
4912	else
4913	addr_base = build1 (ADDR_EXPR,
4914	build_pointer_type (TREE_TYPE (DR_REF (dr))),
4915	/ Strip zero offset components since we don't need*
4916	them and they can confuse late diagnostics if
4917	we CSE them wrongly. See PR106904 for example. /*
4918	unshare_expr (strip_zero_offset_components
4919	(DR_REF (dr))));
4920
4921	vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
4922	dest = vect_get_new_vect_var (type: vect_ptr_type, var_kind: vect_pointer_var, name: base_name);
4923	addr_base = force_gimple_operand (addr_base, &seq, true, dest);
4924	gimple_seq_add_seq (new_stmt_list, seq);
4925
4926	if (DR_PTR_INFO (dr)
4927	&& TREE_CODE (addr_base) == SSA_NAME
4928	/ We should only duplicate pointer info to newly created SSA names. /
4929	&& SSA_NAME_VAR (addr_base) == dest)
4930	{
4931	gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
4932	vect_duplicate_ssa_name_ptr_info (name: addr_base, dr_info);
4933	}
4934
4935	if (dump_enabled_p ())
4936	dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
4937
4938	return addr_base;
4939	}
4940
4941
4942	/ Function vect_create_data_ref_ptr.*
4943
4944	Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
4945	location accessed in the loop by STMT_INFO, along with the def-use update
4946	chain to appropriately advance the pointer through the loop iterations.
4947	Also set aliasing information for the pointer. This pointer is used by
4948	the callers to this function to create a memory reference expression for
4949	vector load/store access.
4950
4951	Input:
4952	1. STMT_INFO: a stmt that references memory. Expected to be of the form
4953	GIMPLE_ASSIGN <name, data-ref> or
4954	GIMPLE_ASSIGN <data-ref, name>.
4955	2. AGGR_TYPE: the type of the reference, which should be either a vector
4956	or an array.
4957	3. AT_LOOP: the loop where the vector memref is to be created.
4958	4. OFFSET (optional): a byte offset to be added to the initial address
4959	accessed by the data-ref in STMT_INFO.
4960	5. BSI: location where the new stmts are to be placed if there is no loop
4961	6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
4962	pointing to the initial address.
4963	8. IV_STEP (optional, defaults to NULL): the amount that should be added
4964	to the IV during each iteration of the loop. NULL says to move
4965	by one copy of AGGR_TYPE up or down, depending on the step of the
4966	data reference.
4967
4968	Output:
4969	1. Declare a new ptr to vector_type, and have it point to the base of the
4970	data reference (initial addressed accessed by the data reference).
4971	For example, for vector of type V8HI, the following code is generated:
4972
4973	v8hi ap;*
4974	ap = (v8hi )initial_address;*
4975
4976	if OFFSET is not supplied:
4977	initial_address = &a[init];
4978	if OFFSET is supplied:
4979	initial_address = &a[init] + OFFSET;
4980	if BYTE_OFFSET is supplied:
4981	initial_address = &a[init] + BYTE_OFFSET;
4982
4983	Return the initial_address in INITIAL_ADDRESS.
4984
4985	2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
4986	update the pointer in each iteration of the loop.
4987
4988	Return the increment stmt that updates the pointer in PTR_INCR.
4989
4990	3. Return the pointer. /*
4991
4992	tree
4993	vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
4994	tree aggr_type, class loop *at_loop, tree offset,
4995	tree initial_address, gimple_stmt_iterator gsi,
4996	gimple *ptr_incr, bool* only_init,
4997	tree iv_step)
4998	{
4999	const char *base_name;
5000	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5001	class loop *loop = NULL;
5002	bool nested_in_vect_loop = false;
5003	class loop *containing_loop = NULL;
5004	tree aggr_ptr_type;
5005	tree aggr_ptr;
5006	tree new_temp;
5007	gimple_seq new_stmt_list = NULL;
5008	edge pe = NULL;
5009	basic_block new_bb;
5010	tree aggr_ptr_init;
5011	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5012	struct data_reference *dr = dr_info->dr;
5013	tree aptr;
5014	gimple_stmt_iterator incr_gsi;
5015	bool insert_after;
5016	tree indx_before_incr, indx_after_incr;
5017	gimple *incr;
5018	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5019
5020	gcc_assert (iv_step != NULL_TREE
5021	\|\| TREE_CODE (aggr_type) == ARRAY_TYPE
5022	\|\| TREE_CODE (aggr_type) == VECTOR_TYPE);
5023
5024	if (loop_vinfo)
5025	{
5026	loop = LOOP_VINFO_LOOP (loop_vinfo);
5027	nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5028	containing_loop = (gimple_bb (g: stmt_info->stmt))->loop_father;
5029	pe = loop_preheader_edge (loop);
5030	}
5031	else
5032	{
5033	gcc_assert (bb_vinfo);
5034	only_init = true;
5035	*ptr_incr = NULL;
5036	}
5037
5038	/ Create an expression for the first address accessed by this load*
5039	in LOOP. /*
5040	base_name = get_name (DR_BASE_ADDRESS (dr));
5041
5042	if (dump_enabled_p ())
5043	{
5044	tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
5045	dump_printf_loc (MSG_NOTE, vect_location,
5046	"create %s-pointer variable to type: %T",
5047	get_tree_code_name (TREE_CODE (aggr_type)),
5048	aggr_type);
5049	if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
5050	dump_printf (MSG_NOTE, " vectorizing an array ref: ");
5051	else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
5052	dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
5053	else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
5054	dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
5055	else
5056	dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
5057	dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
5058	}
5059
5060	/ (1) Create the new aggregate-pointer variable.*
5061	Vector and array types inherit the alias set of their component
5062	type by default so we need to use a ref-all pointer if the data
5063	reference does not conflict with the created aggregated data
5064	reference because it is not addressable. /*
5065	bool need_ref_all = false;
5066	if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5067	get_alias_set (DR_REF (dr))))
5068	need_ref_all = true;
5069	/ Likewise for any of the data references in the stmt group. /
5070	else if (DR_GROUP_SIZE (stmt_info) > `1`)
5071	{
5072	stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
5073	do
5074	{
5075	struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
5076	if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5077	get_alias_set (DR_REF (sdr))))
5078	{
5079	need_ref_all = true;
5080	break;
5081	}
5082	sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
5083	}
5084	while (sinfo);
5085	}
5086	aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
5087	need_ref_all);
5088	aggr_ptr = vect_get_new_vect_var (type: aggr_ptr_type, var_kind: vect_pointer_var, name: base_name);
5089
5090
5091	/ Note: If the dataref is in an inner-loop nested in LOOP, and we are*
5092	vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
5093	def-use update cycles for the pointer: one relative to the outer-loop
5094	(LOOP), which is what steps (3) and (4) below do. The other is relative
5095	to the inner-loop (which is the inner-most loop containing the dataref),
5096	and this is done be step (5) below.
5097
5098	When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
5099	inner-most loop, and so steps (3),(4) work the same, and step (5) is
5100	redundant. Steps (3),(4) create the following:
5101
5102	vp0 = &base_addr;
5103	LOOP: vp1 = phi(vp0,vp2)
5104	...
5105	...
5106	vp2 = vp1 + step
5107	goto LOOP
5108
5109	If there is an inner-loop nested in loop, then step (5) will also be
5110	applied, and an additional update in the inner-loop will be created:
5111
5112	vp0 = &base_addr;
5113	LOOP: vp1 = phi(vp0,vp2)
5114	...
5115	inner: vp3 = phi(vp1,vp4)
5116	vp4 = vp3 + inner_step
5117	if () goto inner
5118	...
5119	vp2 = vp1 + step
5120	if () goto LOOP /*
5121
5122	/ (2) Calculate the initial address of the aggregate-pointer, and set*
5123	the aggregate-pointer to point to it before the loop. /*
5124
5125	/ Create: (&(base[init_val]+offset) in the loop preheader. /
5126
5127	new_temp = vect_create_addr_base_for_vector_ref (vinfo,
5128	stmt_info, new_stmt_list: &new_stmt_list,
5129	offset);
5130	if (new_stmt_list)
5131	{
5132	if (pe)
5133	{
5134	new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
5135	gcc_assert (!new_bb);
5136	}
5137	else
5138	gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
5139	}
5140
5141	*initial_address = new_temp;
5142	aggr_ptr_init = new_temp;
5143
5144	/ (3) Handle the updating of the aggregate-pointer inside the loop.*
5145	This is needed when ONLY_INIT is false, and also when AT_LOOP is the
5146	inner-loop nested in LOOP (during outer-loop vectorization). /*
5147
5148	/ No update in loop is required. /
5149	if (only_init && (!loop_vinfo \|\| at_loop == loop))
5150	aptr = aggr_ptr_init;
5151	else
5152	{
5153	/ Accesses to invariant addresses should be handled specially*
5154	by the caller. /*
5155	tree step = vect_dr_behavior (vinfo, dr_info)->step;
5156	gcc_assert (!integer_zerop (step));
5157
5158	if (iv_step == NULL_TREE)
5159	{
5160	/ The step of the aggregate pointer is the type size,*
5161	negated for downward accesses. /*
5162	iv_step = TYPE_SIZE_UNIT (aggr_type);
5163	if (tree_int_cst_sgn (step) == -`1`)
5164	iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
5165	}
5166
5167	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5168
5169	create_iv (aggr_ptr_init, PLUS_EXPR,
5170	fold_convert (aggr_ptr_type, iv_step),
5171	aggr_ptr, loop, &incr_gsi, insert_after,
5172	&indx_before_incr, &indx_after_incr);
5173	incr = gsi_stmt (i: incr_gsi);
5174
5175	/ Copy the points-to information if it exists. /
5176	if (DR_PTR_INFO (dr))
5177	{
5178	vect_duplicate_ssa_name_ptr_info (name: indx_before_incr, dr_info);
5179	vect_duplicate_ssa_name_ptr_info (name: indx_after_incr, dr_info);
5180	}
5181	if (ptr_incr)
5182	*ptr_incr = incr;
5183
5184	aptr = indx_before_incr;
5185	}
5186
5187	if (!nested_in_vect_loop \|\| only_init)
5188	return aptr;
5189
5190
5191	/ (4) Handle the updating of the aggregate-pointer inside the inner-loop*
5192	nested in LOOP, if exists. /*
5193
5194	gcc_assert (nested_in_vect_loop);
5195	if (!only_init)
5196	{
5197	standard_iv_increment_position (containing_loop, &incr_gsi,
5198	&insert_after);
5199	create_iv (aptr, PLUS_EXPR, fold_convert (aggr_ptr_type, DR_STEP (dr)),
5200	aggr_ptr, containing_loop, &incr_gsi, insert_after,
5201	&indx_before_incr, &indx_after_incr);
5202	incr = gsi_stmt (i: incr_gsi);
5203
5204	/ Copy the points-to information if it exists. /
5205	if (DR_PTR_INFO (dr))
5206	{
5207	vect_duplicate_ssa_name_ptr_info (name: indx_before_incr, dr_info);
5208	vect_duplicate_ssa_name_ptr_info (name: indx_after_incr, dr_info);
5209	}
5210	if (ptr_incr)
5211	*ptr_incr = incr;
5212
5213	return indx_before_incr;
5214	}
5215	else
5216	gcc_unreachable ();
5217	}
5218
5219
5220	/ Function bump_vector_ptr*
5221
5222	Increment a pointer (to a vector type) by vector-size. If requested,
5223	i.e. if PTR-INCR is given, then also connect the new increment stmt
5224	to the existing def-use update-chain of the pointer, by modifying
5225	the PTR_INCR as illustrated below:
5226
5227	The pointer def-use update-chain before this function:
5228	DATAREF_PTR = phi (p_0, p_2)
5229	....
5230	PTR_INCR: p_2 = DATAREF_PTR + step
5231
5232	The pointer def-use update-chain after this function:
5233	DATAREF_PTR = phi (p_0, p_2)
5234	....
5235	NEW_DATAREF_PTR = DATAREF_PTR + BUMP
5236	....
5237	PTR_INCR: p_2 = NEW_DATAREF_PTR + step
5238
5239	Input:
5240	DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
5241	in the loop.
5242	PTR_INCR - optional. The stmt that updates the pointer in each iteration of
5243	the loop. The increment amount across iterations is expected
5244	to be vector_size.
5245	BSI - location where the new update stmt is to be placed.
5246	STMT_INFO - the original scalar memory-access stmt that is being vectorized.
5247	BUMP - optional. The offset by which to bump the pointer. If not given,
5248	the offset is assumed to be vector_size.
5249
5250	Output: Return NEW_DATAREF_PTR as illustrated above.
5251
5252	*/
5253
5254	tree
5255	bump_vector_ptr (vec_info *vinfo,
5256	tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator gsi,
5257	stmt_vec_info stmt_info, tree bump)
5258	{
5259	struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5260	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5261	tree update = TYPE_SIZE_UNIT (vectype);
5262	gimple *incr_stmt;
5263	ssa_op_iter iter;
5264	use_operand_p use_p;
5265	tree new_dataref_ptr;
5266
5267	if (bump)
5268	update = bump;
5269
5270	if (TREE_CODE (dataref_ptr) == SSA_NAME)
5271	new_dataref_ptr = copy_ssa_name (var: dataref_ptr);
5272	else if (is_gimple_min_invariant (dataref_ptr))
5273	/ When possible avoid emitting a separate increment stmt that will*
5274	force the addressed object addressable. /*
5275	return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr),
5276	fold_build2 (MEM_REF,
5277	TREE_TYPE (TREE_TYPE (dataref_ptr)),
5278	dataref_ptr,
5279	fold_convert (ptr_type_node, update)));
5280	else
5281	new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
5282	incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
5283	dataref_ptr, update);
5284	vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
5285	/ Fold the increment, avoiding excessive chains use-def chains of*
5286	those, leading to compile-time issues for passes until the next
5287	forwprop pass which would do this as well. /*
5288	gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
5289	if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
5290	{
5291	incr_stmt = gsi_stmt (i: fold_gsi);
5292	update_stmt (s: incr_stmt);
5293	}
5294
5295	/ Copy the points-to information if it exists. /
5296	if (DR_PTR_INFO (dr))
5297	{
5298	duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
5299	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
5300	}
5301
5302	if (!ptr_incr)
5303	return new_dataref_ptr;
5304
5305	/ Update the vector-pointer's cross-iteration increment. /
5306	FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
5307	{
5308	tree use = USE_FROM_PTR (use_p);
5309
5310	if (use == dataref_ptr)
5311	SET_USE (use_p, new_dataref_ptr);
5312	else
5313	gcc_assert (operand_equal_p (use, update, `0`));
5314	}
5315
5316	return new_dataref_ptr;
5317	}
5318
5319
5320	/ Copy memory reference info such as base/clique from the SRC reference*
5321	to the DEST MEM_REF. /*
5322
5323	void
5324	vect_copy_ref_info (tree dest, tree src)
5325	{
5326	if (TREE_CODE (dest) != MEM_REF)
5327	return;
5328
5329	tree src_base = src;
5330	while (handled_component_p (t: src_base))
5331	src_base = TREE_OPERAND (src_base, `0`);
5332	if (TREE_CODE (src_base) != MEM_REF
5333	&& TREE_CODE (src_base) != TARGET_MEM_REF)
5334	return;
5335
5336	MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
5337	MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
5338	}
5339
5340
5341	/ Function vect_create_destination_var.*
5342
5343	Create a new temporary of type VECTYPE. /*
5344
5345	tree
5346	vect_create_destination_var (tree scalar_dest, tree vectype)
5347	{
5348	tree vec_dest;
5349	const char *name;
5350	char *new_name;
5351	tree type;
5352	enum vect_var_kind kind;
5353
5354	kind = vectype
5355	? VECTOR_BOOLEAN_TYPE_P (vectype)
5356	? vect_mask_var
5357	: vect_simple_var
5358	: vect_scalar_var;
5359	type = vectype ? vectype : TREE_TYPE (scalar_dest);
5360
5361	gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
5362
5363	name = get_name (scalar_dest);
5364	if (name)
5365	new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
5366	else
5367	new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
5368	vec_dest = vect_get_new_vect_var (type, var_kind: kind, name: new_name);
5369	free (ptr: new_name);
5370
5371	return vec_dest;
5372	}
5373
5374	/ Function vect_grouped_store_supported.*
5375
5376	Returns TRUE if interleave high and interleave low permutations
5377	are supported, and FALSE otherwise. /*
5378
5379	bool
5380	vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
5381	{
5382	machine_mode mode = TYPE_MODE (vectype);
5383
5384	/ vect_permute_store_chain requires the group size to be equal to 3 or*
5385	be a power of two. /*
5386	if (count != `3` && exact_log2 (x: count) == -`1`)
5387	{
5388	if (dump_enabled_p ())
5389	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5390	"the size of the group of accesses"
5391	" is not a power of 2 or not eqaul to 3\n");
5392	return false;
5393	}
5394
5395	/ Check that the permutation is supported. /
5396	if (VECTOR_MODE_P (mode))
5397	{
5398	unsigned int i;
5399	if (count == `3`)
5400	{
5401	unsigned int j0 = `0`, j1 = `0`, j2 = `0`;
5402	unsigned int i, j;
5403
5404	unsigned int nelt;
5405	if (!GET_MODE_NUNITS (mode).is_constant (const_value: &nelt))
5406	{
5407	if (dump_enabled_p ())
5408	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5409	"cannot handle groups of 3 stores for"
5410	" variable-length vectors\n");
5411	return false;
5412	}
5413
5414	vec_perm_builder sel (nelt, nelt, `1`);
5415	sel.quick_grow (len: nelt);
5416	vec_perm_indices indices;
5417	for (j = `0`; j < `3`; j++)
5418	{
5419	int nelt0 = ((`3` - j) * nelt) % `3`;
5420	int nelt1 = ((`3` - j) * nelt + `1`) % `3`;
5421	int nelt2 = ((`3` - j) * nelt + `2`) % `3`;
5422	for (i = `0`; i < nelt; i++)
5423	{
5424	if (`3` * i + nelt0 < nelt)
5425	sel [`3` * i + nelt0] = j0++;
5426	if (`3` * i + nelt1 < nelt)
5427	sel [`3` * i + nelt1] = nelt + j1++;
5428	if (`3` * i + nelt2 < nelt)
5429	sel [`3` * i + nelt2] = `0`;
5430	}
5431	indices.new_vector (sel, `2`, nelt);
5432	if (!can_vec_perm_const_p (mode, mode, indices))
5433	{
5434	if (dump_enabled_p ())
5435	dump_printf (MSG_MISSED_OPTIMIZATION,
5436	"permutation op not supported by target.\n");
5437	return false;
5438	}
5439
5440	for (i = `0`; i < nelt; i++)
5441	{
5442	if (`3` * i + nelt0 < nelt)
5443	sel [`3` * i + nelt0] = `3` * i + nelt0;
5444	if (`3` * i + nelt1 < nelt)
5445	sel [`3` * i + nelt1] = `3` * i + nelt1;
5446	if (`3` * i + nelt2 < nelt)
5447	sel [`3` * i + nelt2] = nelt + j2++;
5448	}
5449	indices.new_vector (sel, `2`, nelt);
5450	if (!can_vec_perm_const_p (mode, mode, indices))
5451	{
5452	if (dump_enabled_p ())
5453	dump_printf (MSG_MISSED_OPTIMIZATION,
5454	"permutation op not supported by target.\n");
5455	return false;
5456	}
5457	}
5458	return true;
5459	}
5460	else
5461	{
5462	/ If length is not equal to 3 then only power of 2 is supported. /
5463	gcc_assert (pow2p_hwi (count));
5464	poly_uint64 nelt = GET_MODE_NUNITS (mode);
5465
5466	/ The encoding has 2 interleaved stepped patterns. /
5467	if(!multiple_p (a: nelt, b: `2`))
5468	return false;
5469	vec_perm_builder sel (nelt, `2`, `3`);
5470	sel.quick_grow (len: `6`);
5471	for (i = `0`; i < `3`; i++)
5472	{
5473	sel [i * `2`] = i;
5474	sel [i * `2` + `1`] = i + nelt;
5475	}
5476	vec_perm_indices indices (sel, `2`, nelt);
5477	if (can_vec_perm_const_p (mode, mode, indices))
5478	{
5479	for (i = `0`; i < `6`; i++)
5480	sel [i] += exact_div (a: nelt, b: `2`);
5481	indices.new_vector (sel, `2`, nelt);
5482	if (can_vec_perm_const_p (mode, mode, indices))
5483	return true;
5484	}
5485	}
5486	}
5487
5488	if (dump_enabled_p ())
5489	dump_printf (MSG_MISSED_OPTIMIZATION,
5490	"permutation op not supported by target.\n");
5491	return false;
5492	}
5493
5494	/ Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors*
5495	of type VECTYPE. MASKED_P says whether the masked form is needed. /*
5496
5497	internal_fn
5498	vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5499	bool masked_p)
5500	{
5501	if (vect_lanes_optab_supported_p (name: "vec_mask_len_store_lanes",
5502	optab: vec_mask_len_store_lanes_optab, vectype,
5503	count))
5504	return IFN_MASK_LEN_STORE_LANES;
5505	else if (masked_p)
5506	{
5507	if (vect_lanes_optab_supported_p (name: "vec_mask_store_lanes",
5508	optab: vec_mask_store_lanes_optab, vectype,
5509	count))
5510	return IFN_MASK_STORE_LANES;
5511	}
5512	else
5513	{
5514	if (vect_lanes_optab_supported_p (name: "vec_store_lanes",
5515	optab: vec_store_lanes_optab, vectype, count))
5516	return IFN_STORE_LANES;
5517	}
5518	return IFN_LAST;
5519	}
5520
5521
5522	/ Function vect_permute_store_chain.*
5523
5524	Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
5525	a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
5526	the data correctly for the stores. Return the final references for stores
5527	in RESULT_CHAIN.
5528
5529	E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5530	The input is 4 vectors each containing 8 elements. We assign a number to
5531	each element, the input sequence is:
5532
5533	1st vec: 0 1 2 3 4 5 6 7
5534	2nd vec: 8 9 10 11 12 13 14 15
5535	3rd vec: 16 17 18 19 20 21 22 23
5536	4th vec: 24 25 26 27 28 29 30 31
5537
5538	The output sequence should be:
5539
5540	1st vec: 0 8 16 24 1 9 17 25
5541	2nd vec: 2 10 18 26 3 11 19 27
5542	3rd vec: 4 12 20 28 5 13 21 30
5543	4th vec: 6 14 22 30 7 15 23 31
5544
5545	i.e., we interleave the contents of the four vectors in their order.
5546
5547	We use interleave_high/low instructions to create such output. The input of
5548	each interleave_high/low operation is two vectors:
5549	1st vec 2nd vec
5550	0 1 2 3 4 5 6 7
5551	the even elements of the result vector are obtained left-to-right from the
5552	high/low elements of the first vector. The odd elements of the result are
5553	obtained left-to-right from the high/low elements of the second vector.
5554	The output of interleave_high will be: 0 4 1 5
5555	and of interleave_low: 2 6 3 7
5556
5557
5558	The permutation is done in log LENGTH stages. In each stage interleave_high
5559	and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
5560	where the first argument is taken from the first half of DR_CHAIN and the
5561	second argument from it's second half.
5562	In our example,
5563
5564	I1: interleave_high (1st vec, 3rd vec)
5565	I2: interleave_low (1st vec, 3rd vec)
5566	I3: interleave_high (2nd vec, 4th vec)
5567	I4: interleave_low (2nd vec, 4th vec)
5568
5569	The output for the first stage is:
5570
5571	I1: 0 16 1 17 2 18 3 19
5572	I2: 4 20 5 21 6 22 7 23
5573	I3: 8 24 9 25 10 26 11 27
5574	I4: 12 28 13 29 14 30 15 31
5575
5576	The output of the second stage, i.e. the final result is:
5577
5578	I1: 0 8 16 24 1 9 17 25
5579	I2: 2 10 18 26 3 11 19 27
5580	I3: 4 12 20 28 5 13 21 30
5581	I4: 6 14 22 30 7 15 23 31. /*
5582
5583	void
5584	vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
5585	unsigned int length,
5586	stmt_vec_info stmt_info,
5587	gimple_stmt_iterator *gsi,
5588	vec<tree> *result_chain)
5589	{
5590	tree vect1, vect2, high, low;
5591	gimple *perm_stmt;
5592	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5593	tree perm_mask_low, perm_mask_high;
5594	tree data_ref;
5595	tree perm3_mask_low, perm3_mask_high;
5596	unsigned int i, j, n, log_length = exact_log2 (x: length);
5597
5598	result_chain->quick_grow (len: length);
5599	memcpy (dest: result_chain->address (), src: dr_chain.address (),
5600	n: length * sizeof (tree));
5601
5602	if (length == `3`)
5603	{
5604	/ vect_grouped_store_supported ensures that this is constant. /
5605	unsigned int nelt = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
5606	unsigned int j0 = `0`, j1 = `0`, j2 = `0`;
5607
5608	vec_perm_builder sel (nelt, nelt, `1`);
5609	sel.quick_grow (len: nelt);
5610	vec_perm_indices indices;
5611	for (j = `0`; j < `3`; j++)
5612	{
5613	int nelt0 = ((`3` - j) * nelt) % `3`;
5614	int nelt1 = ((`3` - j) * nelt + `1`) % `3`;
5615	int nelt2 = ((`3` - j) * nelt + `2`) % `3`;
5616
5617	for (i = `0`; i < nelt; i++)
5618	{
5619	if (`3` * i + nelt0 < nelt)
5620	sel [`3` * i + nelt0] = j0++;
5621	if (`3` * i + nelt1 < nelt)
5622	sel [`3` * i + nelt1] = nelt + j1++;
5623	if (`3` * i + nelt2 < nelt)
5624	sel [`3` * i + nelt2] = `0`;
5625	}
5626	indices.new_vector (sel, `2`, nelt);
5627	perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5628
5629	for (i = `0`; i < nelt; i++)
5630	{
5631	if (`3` * i + nelt0 < nelt)
5632	sel [`3` * i + nelt0] = `3` * i + nelt0;
5633	if (`3` * i + nelt1 < nelt)
5634	sel [`3` * i + nelt1] = `3` * i + nelt1;
5635	if (`3` * i + nelt2 < nelt)
5636	sel [`3` * i + nelt2] = nelt + j2++;
5637	}
5638	indices.new_vector (sel, `2`, nelt);
5639	perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5640
5641	vect1 = dr_chain [`0`];
5642	vect2 = dr_chain [`1`];
5643
5644	/ Create interleaving stmt:*
5645	low = VEC_PERM_EXPR <vect1, vect2,
5646	{j, nelt, , j + 1, nelt + j + 1, ,
5647	j + 2, nelt + j + 2, , ...}> /
5648	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_low");
5649	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5650	vect2, perm3_mask_low);
5651	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5652
5653	vect1 = data_ref;
5654	vect2 = dr_chain [`2`];
5655	/ Create interleaving stmt:*
5656	low = VEC_PERM_EXPR <vect1, vect2,
5657	{0, 1, nelt + j, 3, 4, nelt + j + 1,
5658	6, 7, nelt + j + 2, ...}> /*
5659	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_high");
5660	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5661	vect2, perm3_mask_high);
5662	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5663	(*result_chain)[j] = data_ref;
5664	}
5665	}
5666	else
5667	{
5668	/ If length is not equal to 3 then only power of 2 is supported. /
5669	gcc_assert (pow2p_hwi (length));
5670
5671	/ The encoding has 2 interleaved stepped patterns. /
5672	poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (node: vectype);
5673	vec_perm_builder sel (nelt, `2`, `3`);
5674	sel.quick_grow (len: `6`);
5675	for (i = `0`; i < `3`; i++)
5676	{
5677	sel [i * `2`] = i;
5678	sel [i * `2` + `1`] = i + nelt;
5679	}
5680	vec_perm_indices indices (sel, `2`, nelt);
5681	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5682
5683	for (i = `0`; i < `6`; i++)
5684	sel [i] += exact_div (a: nelt, b: `2`);
5685	indices.new_vector (sel, `2`, nelt);
5686	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5687
5688	for (i = `0`, n = log_length; i < n; i++)
5689	{
5690	for (j = `0`; j < length/`2`; j++)
5691	{
5692	vect1 = dr_chain [j];
5693	vect2 = dr_chain [j+length/`2`];
5694
5695	/ Create interleaving stmt:*
5696	high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
5697	...}> /*
5698	high = make_temp_ssa_name (type: vectype, NULL, name: "vect_inter_high");
5699	perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
5700	vect2, perm_mask_high);
5701	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5702	(result_chain)[`2`j] = high;
5703
5704	/ Create interleaving stmt:*
5705	low = VEC_PERM_EXPR <vect1, vect2,
5706	{nelt/2, nelt3/2, nelt/2+1, nelt3/2+1,
5707	...}> /*
5708	low = make_temp_ssa_name (type: vectype, NULL, name: "vect_inter_low");
5709	perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
5710	vect2, perm_mask_low);
5711	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5712	(result_chain)[`2`j+`1`] = low;
5713	}
5714	memcpy (dest: dr_chain.address (), src: result_chain->address (),
5715	n: length * sizeof (tree));
5716	}
5717	}
5718	}
5719
5720	/ Function vect_setup_realignment*
5721
5722	This function is called when vectorizing an unaligned load using
5723	the dr_explicit_realign[_optimized] scheme.
5724	This function generates the following code at the loop prolog:
5725
5726	p = initial_addr;
5727	x msq_init = (floor(p)); # prolog load*
5728	realignment_token = call target_builtin;
5729	loop:
5730	x msq = phi (msq_init, ---)
5731
5732	The stmts marked with x are generated only for the case of
5733	dr_explicit_realign_optimized.
5734
5735	The code above sets up a new (vector) pointer, pointing to the first
5736	location accessed by STMT_INFO, and a "floor-aligned" load using that
5737	pointer. It also generates code to compute the "realignment-token"
5738	(if the relevant target hook was defined), and creates a phi-node at the
5739	loop-header bb whose arguments are the result of the prolog-load (created
5740	by this function) and the result of a load that takes place in the loop
5741	(to be created by the caller to this function).
5742
5743	For the case of dr_explicit_realign_optimized:
5744	The caller to this function uses the phi-result (msq) to create the
5745	realignment code inside the loop, and sets up the missing phi argument,
5746	as follows:
5747	loop:
5748	msq = phi (msq_init, lsq)
5749	lsq = (floor(p')); # load in loop*
5750	result = realign_load (msq, lsq, realignment_token);
5751
5752	For the case of dr_explicit_realign:
5753	loop:
5754	msq = (floor(p)); # load in loop*
5755	p' = p + (VS-1);
5756	lsq = (floor(p')); # load in loop*
5757	result = realign_load (msq, lsq, realignment_token);
5758
5759	Input:
5760	STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
5761	a memory location that may be unaligned.
5762	BSI - place where new code is to be inserted.
5763	ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5764	is used.
5765
5766	Output:
5767	REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5768	target hook, if defined.
5769	Return value - the result of the loop-header phi node. /*
5770
5771	tree
5772	vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info,
5773	gimple_stmt_iterator gsi, tree realignment_token,
5774	enum dr_alignment_support alignment_support_scheme,
5775	tree init_addr,
5776	class loop **at_loop)
5777	{
5778	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5779	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5780	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5781	struct data_reference *dr = dr_info->dr;
5782	class loop *loop = NULL;
5783	edge pe = NULL;
5784	tree scalar_dest = gimple_assign_lhs (gs: stmt_info->stmt);
5785	tree vec_dest;
5786	gimple *inc;
5787	tree ptr;
5788	tree data_ref;
5789	basic_block new_bb;
5790	tree msq_init = NULL_TREE;
5791	tree new_temp;
5792	gphi *phi_stmt;
5793	tree msq = NULL_TREE;
5794	gimple_seq stmts = NULL;
5795	bool compute_in_loop = false;
5796	bool nested_in_vect_loop = false;
5797	class loop *containing_loop = (gimple_bb (g: stmt_info->stmt))->loop_father;
5798	class loop *loop_for_initial_load = NULL;
5799
5800	if (loop_vinfo)
5801	{
5802	loop = LOOP_VINFO_LOOP (loop_vinfo);
5803	nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5804	}
5805
5806	gcc_assert (alignment_support_scheme == dr_explicit_realign
5807	\|\| alignment_support_scheme == dr_explicit_realign_optimized);
5808
5809	/ We need to generate three things:*
5810	1. the misalignment computation
5811	2. the extra vector load (for the optimized realignment scheme).
5812	3. the phi node for the two vectors from which the realignment is
5813	done (for the optimized realignment scheme). /*
5814
5815	/ 1. Determine where to generate the misalignment computation.*
5816
5817	If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5818	calculation will be generated by this function, outside the loop (in the
5819	preheader). Otherwise, INIT_ADDR had already been computed for us by the
5820	caller, inside the loop.
5821
5822	Background: If the misalignment remains fixed throughout the iterations of
5823	the loop, then both realignment schemes are applicable, and also the
5824	misalignment computation can be done outside LOOP. This is because we are
5825	vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5826	are a multiple of VS (the Vector Size), and therefore the misalignment in
5827	different vectorized LOOP iterations is always the same.
5828	The problem arises only if the memory access is in an inner-loop nested
5829	inside LOOP, which is now being vectorized using outer-loop vectorization.
5830	This is the only case when the misalignment of the memory access may not
5831	remain fixed throughout the iterations of the inner-loop (as explained in
5832	detail in vect_supportable_dr_alignment). In this case, not only is the
5833	optimized realignment scheme not applicable, but also the misalignment
5834	computation (and generation of the realignment token that is passed to
5835	REALIGN_LOAD) have to be done inside the loop.
5836
5837	In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5838	or not, which in turn determines if the misalignment is computed inside
5839	the inner-loop, or outside LOOP. /*
5840
5841	if (init_addr != NULL_TREE \|\| !loop_vinfo)
5842	{
5843	compute_in_loop = true;
5844	gcc_assert (alignment_support_scheme == dr_explicit_realign);
5845	}
5846
5847
5848	/ 2. Determine where to generate the extra vector load.*
5849
5850	For the optimized realignment scheme, instead of generating two vector
5851	loads in each iteration, we generate a single extra vector load in the
5852	preheader of the loop, and in each iteration reuse the result of the
5853	vector load from the previous iteration. In case the memory access is in
5854	an inner-loop nested inside LOOP, which is now being vectorized using
5855	outer-loop vectorization, we need to determine whether this initial vector
5856	load should be generated at the preheader of the inner-loop, or can be
5857	generated at the preheader of LOOP. If the memory access has no evolution
5858	in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5859	to be generated inside LOOP (in the preheader of the inner-loop). /*
5860
5861	if (nested_in_vect_loop)
5862	{
5863	tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5864	bool invariant_in_outerloop =
5865	(tree_int_cst_compare (t1: outerloop_step, size_zero_node) == `0`);
5866	loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5867	}
5868	else
5869	loop_for_initial_load = loop;
5870	if (at_loop)
5871	*at_loop = loop_for_initial_load;
5872
5873	tree vuse = NULL_TREE;
5874	if (loop_for_initial_load)
5875	{
5876	pe = loop_preheader_edge (loop_for_initial_load);
5877	if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
5878	vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
5879	}
5880	if (!vuse)
5881	vuse = gimple_vuse (g: gsi_stmt (i: *gsi));
5882
5883	/ 3. For the case of the optimized realignment, create the first vector*
5884	load at the loop preheader. /*
5885
5886	if (alignment_support_scheme == dr_explicit_realign_optimized)
5887	{
5888	/ Create msq_init = (floor(p1)) in the loop preheader /*
5889	gassign *new_stmt;
5890
5891	gcc_assert (!compute_in_loop);
5892	vec_dest = vect_create_destination_var (scalar_dest, vectype);
5893	ptr = vect_create_data_ref_ptr (vinfo, stmt_info, aggr_type: vectype,
5894	at_loop: loop_for_initial_load, NULL_TREE,
5895	initial_address: &init_addr, NULL, ptr_incr: &inc, only_init: true);
5896	if (TREE_CODE (ptr) == SSA_NAME)
5897	new_temp = copy_ssa_name (var: ptr);
5898	else
5899	new_temp = make_ssa_name (TREE_TYPE (ptr));
5900	poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
5901	tree type = TREE_TYPE (ptr);
5902	new_stmt = gimple_build_assign
5903	(new_temp, BIT_AND_EXPR, ptr,
5904	fold_build2 (MINUS_EXPR, type,
5905	build_int_cst (type, `0`),
5906	build_int_cst (type, align)));
5907	new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5908	gcc_assert (!new_bb);
5909	data_ref
5910	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
5911	build_int_cst (reference_alias_ptr_type (DR_REF (dr)), `0`));
5912	vect_copy_ref_info (dest: data_ref, DR_REF (dr));
5913	new_stmt = gimple_build_assign (vec_dest, data_ref);
5914	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5915	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
5916	gimple_set_vuse (g: new_stmt, vuse);
5917	if (pe)
5918	{
5919	new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5920	gcc_assert (!new_bb);
5921	}
5922	else
5923	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5924
5925	msq_init = gimple_assign_lhs (gs: new_stmt);
5926	}
5927
5928	/ 4. Create realignment token using a target builtin, if available.*
5929	It is done either inside the containing loop, or before LOOP (as
5930	determined above). /*
5931
5932	if (targetm.vectorize.builtin_mask_for_load)
5933	{
5934	gcall *new_stmt;
5935	tree builtin_decl;
5936
5937	/ Compute INIT_ADDR - the initial addressed accessed by this memref. /
5938	if (!init_addr)
5939	{
5940	/ Generate the INIT_ADDR computation outside LOOP. /
5941	init_addr = vect_create_addr_base_for_vector_ref (vinfo,
5942	stmt_info, new_stmt_list: &stmts,
5943	NULL_TREE);
5944	if (loop)
5945	{
5946	pe = loop_preheader_edge (loop);
5947	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
5948	gcc_assert (!new_bb);
5949	}
5950	else
5951	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5952	}
5953
5954	builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5955	new_stmt = gimple_build_call (builtin_decl, `1`, init_addr);
5956	vec_dest =
5957	vect_create_destination_var (scalar_dest,
5958	vectype: gimple_call_return_type (gs: new_stmt));
5959	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5960	gimple_call_set_lhs (gs: new_stmt, lhs: new_temp);
5961
5962	if (compute_in_loop)
5963	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5964	else
5965	{
5966	/ Generate the misalignment computation outside LOOP. /
5967	pe = loop_preheader_edge (loop);
5968	new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5969	gcc_assert (!new_bb);
5970	}
5971
5972	*realignment_token = gimple_call_lhs (gs: new_stmt);
5973
5974	/ The result of the CALL_EXPR to this builtin is determined from*
5975	the value of the parameter and no global variables are touched
5976	which makes the builtin a "const" function. Requiring the
5977	builtin to have the "const" attribute makes it unnecessary
5978	to call mark_call_clobbered. /*
5979	gcc_assert (TREE_READONLY (builtin_decl));
5980	}
5981
5982	if (alignment_support_scheme == dr_explicit_realign)
5983	return msq;
5984
5985	gcc_assert (!compute_in_loop);
5986	gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5987
5988
5989	/ 5. Create msq = phi <msq_init, lsq> in loop /
5990
5991	pe = loop_preheader_edge (containing_loop);
5992	vec_dest = vect_create_destination_var (scalar_dest, vectype);
5993	msq = make_ssa_name (var: vec_dest);
5994	phi_stmt = create_phi_node (msq, containing_loop->header);
5995	add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
5996
5997	return msq;
5998	}
5999
6000
6001	/ Function vect_grouped_load_supported.*
6002
6003	COUNT is the size of the load group (the number of statements plus the
6004	number of gaps). SINGLE_ELEMENT_P is true if there is actually
6005	only one statement, with a gap of COUNT - 1.
6006
6007	Returns true if a suitable permute exists. /*
6008
6009	bool
6010	vect_grouped_load_supported (tree vectype, bool single_element_p,
6011	unsigned HOST_WIDE_INT count)
6012	{
6013	machine_mode mode = TYPE_MODE (vectype);
6014
6015	/ If this is single-element interleaving with an element distance*
6016	that leaves unused vector loads around punt - we at least create
6017	very sub-optimal code in that case (and blow up memory,
6018	see PR65518). /*
6019	if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
6020	{
6021	if (dump_enabled_p ())
6022	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6023	"single-element interleaving not supported "
6024	"for not adjacent vector loads\n");
6025	return false;
6026	}
6027
6028	/ vect_permute_load_chain requires the group size to be equal to 3 or*
6029	be a power of two. /*
6030	if (count != `3` && exact_log2 (x: count) == -`1`)
6031	{
6032	if (dump_enabled_p ())
6033	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6034	"the size of the group of accesses"
6035	" is not a power of 2 or not equal to 3\n");
6036	return false;
6037	}
6038
6039	/ Check that the permutation is supported. /
6040	if (VECTOR_MODE_P (mode))
6041	{
6042	unsigned int i, j;
6043	if (count == `3`)
6044	{
6045	unsigned int nelt;
6046	if (!GET_MODE_NUNITS (mode).is_constant (const_value: &nelt))
6047	{
6048	if (dump_enabled_p ())
6049	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6050	"cannot handle groups of 3 loads for"
6051	" variable-length vectors\n");
6052	return false;
6053	}
6054
6055	vec_perm_builder sel (nelt, nelt, `1`);
6056	sel.quick_grow (len: nelt);
6057	vec_perm_indices indices;
6058	unsigned int k;
6059	for (k = `0`; k < `3`; k++)
6060	{
6061	for (i = `0`; i < nelt; i++)
6062	if (`3` * i + k < `2` * nelt)
6063	sel [i] = `3` * i + k;
6064	else
6065	sel [i] = `0`;
6066	indices.new_vector (sel, `2`, nelt);
6067	if (!can_vec_perm_const_p (mode, mode, indices))
6068	{
6069	if (dump_enabled_p ())
6070	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6071	"shuffle of 3 loads is not supported by"
6072	" target\n");
6073	return false;
6074	}
6075	for (i = `0`, j = `0`; i < nelt; i++)
6076	if (`3` * i + k < `2` * nelt)
6077	sel [i] = i;
6078	else
6079	sel [i] = nelt + ((nelt + k) % `3`) + `3` * (j++);
6080	indices.new_vector (sel, `2`, nelt);
6081	if (!can_vec_perm_const_p (mode, mode, indices))
6082	{
6083	if (dump_enabled_p ())
6084	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6085	"shuffle of 3 loads is not supported by"
6086	" target\n");
6087	return false;
6088	}
6089	}
6090	return true;
6091	}
6092	else
6093	{
6094	/ If length is not equal to 3 then only power of 2 is supported. /
6095	gcc_assert (pow2p_hwi (count));
6096	poly_uint64 nelt = GET_MODE_NUNITS (mode);
6097
6098	/ The encoding has a single stepped pattern. /
6099	vec_perm_builder sel (nelt, `1`, `3`);
6100	sel.quick_grow (len: `3`);
6101	for (i = `0`; i < `3`; i++)
6102	sel [i] = i * `2`;
6103	vec_perm_indices indices (sel, `2`, nelt);
6104	if (can_vec_perm_const_p (mode, mode, indices))
6105	{
6106	for (i = `0`; i < `3`; i++)
6107	sel [i] = i * `2` + `1`;
6108	indices.new_vector (sel, `2`, nelt);
6109	if (can_vec_perm_const_p (mode, mode, indices))
6110	return true;
6111	}
6112	}
6113	}
6114
6115	if (dump_enabled_p ())
6116	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6117	"extract even/odd not supported by target\n");
6118	return false;
6119	}
6120
6121	/ Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors*
6122	of type VECTYPE. MASKED_P says whether the masked form is needed. /*
6123
6124	internal_fn
6125	vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6126	bool masked_p)
6127	{
6128	if (vect_lanes_optab_supported_p (name: "vec_mask_len_load_lanes",
6129	optab: vec_mask_len_load_lanes_optab, vectype,
6130	count))
6131	return IFN_MASK_LEN_LOAD_LANES;
6132	else if (masked_p)
6133	{
6134	if (vect_lanes_optab_supported_p (name: "vec_mask_load_lanes",
6135	optab: vec_mask_load_lanes_optab, vectype,
6136	count))
6137	return IFN_MASK_LOAD_LANES;
6138	}
6139	else
6140	{
6141	if (vect_lanes_optab_supported_p (name: "vec_load_lanes", optab: vec_load_lanes_optab,
6142	vectype, count))
6143	return IFN_LOAD_LANES;
6144	}
6145	return IFN_LAST;
6146	}
6147
6148	/ Function vect_permute_load_chain.*
6149
6150	Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
6151	a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
6152	the input data correctly. Return the final references for loads in
6153	RESULT_CHAIN.
6154
6155	E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
6156	The input is 4 vectors each containing 8 elements. We assign a number to each
6157	element, the input sequence is:
6158
6159	1st vec: 0 1 2 3 4 5 6 7
6160	2nd vec: 8 9 10 11 12 13 14 15
6161	3rd vec: 16 17 18 19 20 21 22 23
6162	4th vec: 24 25 26 27 28 29 30 31
6163
6164	The output sequence should be:
6165
6166	1st vec: 0 4 8 12 16 20 24 28
6167	2nd vec: 1 5 9 13 17 21 25 29
6168	3rd vec: 2 6 10 14 18 22 26 30
6169	4th vec: 3 7 11 15 19 23 27 31
6170
6171	i.e., the first output vector should contain the first elements of each
6172	interleaving group, etc.
6173
6174	We use extract_even/odd instructions to create such output. The input of
6175	each extract_even/odd operation is two vectors
6176	1st vec 2nd vec
6177	0 1 2 3 4 5 6 7
6178
6179	and the output is the vector of extracted even/odd elements. The output of
6180	extract_even will be: 0 2 4 6
6181	and of extract_odd: 1 3 5 7
6182
6183
6184	The permutation is done in log LENGTH stages. In each stage extract_even
6185	and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
6186	their order. In our example,
6187
6188	E1: extract_even (1st vec, 2nd vec)
6189	E2: extract_odd (1st vec, 2nd vec)
6190	E3: extract_even (3rd vec, 4th vec)
6191	E4: extract_odd (3rd vec, 4th vec)
6192
6193	The output for the first stage will be:
6194
6195	E1: 0 2 4 6 8 10 12 14
6196	E2: 1 3 5 7 9 11 13 15
6197	E3: 16 18 20 22 24 26 28 30
6198	E4: 17 19 21 23 25 27 29 31
6199
6200	In order to proceed and create the correct sequence for the next stage (or
6201	for the correct output, if the second stage is the last one, as in our
6202	example), we first put the output of extract_even operation and then the
6203	output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
6204	The input for the second stage is:
6205
6206	1st vec (E1): 0 2 4 6 8 10 12 14
6207	2nd vec (E3): 16 18 20 22 24 26 28 30
6208	3rd vec (E2): 1 3 5 7 9 11 13 15
6209	4th vec (E4): 17 19 21 23 25 27 29 31
6210
6211	The output of the second stage:
6212
6213	E1: 0 4 8 12 16 20 24 28
6214	E2: 2 6 10 14 18 22 26 30
6215	E3: 1 5 9 13 17 21 25 29
6216	E4: 3 7 11 15 19 23 27 31
6217
6218	And RESULT_CHAIN after reordering:
6219
6220	1st vec (E1): 0 4 8 12 16 20 24 28
6221	2nd vec (E3): 1 5 9 13 17 21 25 29
6222	3rd vec (E2): 2 6 10 14 18 22 26 30
6223	4th vec (E4): 3 7 11 15 19 23 27 31. /*
6224
6225	static void
6226	vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
6227	unsigned int length,
6228	stmt_vec_info stmt_info,
6229	gimple_stmt_iterator *gsi,
6230	vec<tree> *result_chain)
6231	{
6232	tree data_ref, first_vect, second_vect;
6233	tree perm_mask_even, perm_mask_odd;
6234	tree perm3_mask_low, perm3_mask_high;
6235	gimple *perm_stmt;
6236	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6237	unsigned int i, j, log_length = exact_log2 (x: length);
6238
6239	result_chain->quick_grow (len: length);
6240	memcpy (dest: result_chain->address (), src: dr_chain.address (),
6241	n: length * sizeof (tree));
6242
6243	if (length == `3`)
6244	{
6245	/ vect_grouped_load_supported ensures that this is constant. /
6246	unsigned nelt = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
6247	unsigned int k;
6248
6249	vec_perm_builder sel (nelt, nelt, `1`);
6250	sel.quick_grow (len: nelt);
6251	vec_perm_indices indices;
6252	for (k = `0`; k < `3`; k++)
6253	{
6254	for (i = `0`; i < nelt; i++)
6255	if (`3` * i + k < `2` * nelt)
6256	sel [i] = `3` * i + k;
6257	else
6258	sel [i] = `0`;
6259	indices.new_vector (sel, `2`, nelt);
6260	perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6261
6262	for (i = `0`, j = `0`; i < nelt; i++)
6263	if (`3` * i + k < `2` * nelt)
6264	sel [i] = i;
6265	else
6266	sel [i] = nelt + ((nelt + k) % `3`) + `3` * (j++);
6267	indices.new_vector (sel, `2`, nelt);
6268	perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6269
6270	first_vect = dr_chain [`0`];
6271	second_vect = dr_chain [`1`];
6272
6273	/ Create interleaving stmt (low part of):*
6274	low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
6275	...}> /*
6276	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_low");
6277	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
6278	second_vect, perm3_mask_low);
6279	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6280
6281	/ Create interleaving stmt (high part of):*
6282	high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
6283	...}> /*
6284	first_vect = data_ref;
6285	second_vect = dr_chain [`2`];
6286	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_high");
6287	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
6288	second_vect, perm3_mask_high);
6289	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6290	(*result_chain)[k] = data_ref;
6291	}
6292	}
6293	else
6294	{
6295	/ If length is not equal to 3 then only power of 2 is supported. /
6296	gcc_assert (pow2p_hwi (length));
6297
6298	/ The encoding has a single stepped pattern. /
6299	poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (node: vectype);
6300	vec_perm_builder sel (nelt, `1`, `3`);
6301	sel.quick_grow (len: `3`);
6302	for (i = `0`; i < `3`; ++i)
6303	sel [i] = i * `2`;
6304	vec_perm_indices indices (sel, `2`, nelt);
6305	perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
6306
6307	for (i = `0`; i < `3`; ++i)
6308	sel [i] = i * `2` + `1`;
6309	indices.new_vector (sel, `2`, nelt);
6310	perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
6311
6312	for (i = `0`; i < log_length; i++)
6313	{
6314	for (j = `0`; j < length; j += `2`)
6315	{
6316	first_vect = dr_chain [j];
6317	second_vect = dr_chain [j+`1`];
6318
6319	/ data_ref = permute_even (first_data_ref, second_data_ref); /
6320	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_perm_even");
6321	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6322	first_vect, second_vect,
6323	perm_mask_even);
6324	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6325	(*result_chain)[j/`2`] = data_ref;
6326
6327	/ data_ref = permute_odd (first_data_ref, second_data_ref); /
6328	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_perm_odd");
6329	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6330	first_vect, second_vect,
6331	perm_mask_odd);
6332	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6333	(*result_chain)[j/`2`+length/`2`] = data_ref;
6334	}
6335	memcpy (dest: dr_chain.address (), src: result_chain->address (),
6336	n: length * sizeof (tree));
6337	}
6338	}
6339	}
6340
6341	/ Function vect_shift_permute_load_chain.*
6342
6343	Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
6344	sequence of stmts to reorder the input data accordingly.
6345	Return the final references for loads in RESULT_CHAIN.
6346	Return true if successed, false otherwise.
6347
6348	E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
6349	The input is 3 vectors each containing 8 elements. We assign a
6350	number to each element, the input sequence is:
6351
6352	1st vec: 0 1 2 3 4 5 6 7
6353	2nd vec: 8 9 10 11 12 13 14 15
6354	3rd vec: 16 17 18 19 20 21 22 23
6355
6356	The output sequence should be:
6357
6358	1st vec: 0 3 6 9 12 15 18 21
6359	2nd vec: 1 4 7 10 13 16 19 22
6360	3rd vec: 2 5 8 11 14 17 20 23
6361
6362	We use 3 shuffle instructions and 3 3 - 1 shifts to create such output.*
6363
6364	First we shuffle all 3 vectors to get correct elements order:
6365
6366	1st vec: ( 0 3 6) ( 1 4 7) ( 2 5)
6367	2nd vec: ( 8 11 14) ( 9 12 15) (10 13)
6368	3rd vec: (16 19 22) (17 20 23) (18 21)
6369
6370	Next we unite and shift vector 3 times:
6371
6372	1st step:
6373	shift right by 6 the concatenation of:
6374	"1st vec" and "2nd vec"
6375	( 0 3 6) ( 1 4 7) \|( 2 5) _ ( 8 11 14) ( 9 12 15)\| (10 13)
6376	"2nd vec" and "3rd vec"
6377	( 8 11 14) ( 9 12 15) \|(10 13) _ (16 19 22) (17 20 23)\| (18 21)
6378	"3rd vec" and "1st vec"
6379	(16 19 22) (17 20 23) \|(18 21) _ ( 0 3 6) ( 1 4 7)\| ( 2 5)
6380	\| New vectors \|
6381
6382	So that now new vectors are:
6383
6384	1st vec: ( 2 5) ( 8 11 14) ( 9 12 15)
6385	2nd vec: (10 13) (16 19 22) (17 20 23)
6386	3rd vec: (18 21) ( 0 3 6) ( 1 4 7)
6387
6388	2nd step:
6389	shift right by 5 the concatenation of:
6390	"1st vec" and "3rd vec"
6391	( 2 5) ( 8 11 14) \|( 9 12 15) _ (18 21) ( 0 3 6)\| ( 1 4 7)
6392	"2nd vec" and "1st vec"
6393	(10 13) (16 19 22) \|(17 20 23) _ ( 2 5) ( 8 11 14)\| ( 9 12 15)
6394	"3rd vec" and "2nd vec"
6395	(18 21) ( 0 3 6) \|( 1 4 7) _ (10 13) (16 19 22)\| (17 20 23)
6396	\| New vectors \|
6397
6398	So that now new vectors are:
6399
6400	1st vec: ( 9 12 15) (18 21) ( 0 3 6)
6401	2nd vec: (17 20 23) ( 2 5) ( 8 11 14)
6402	3rd vec: ( 1 4 7) (10 13) (16 19 22) READY
6403
6404	3rd step:
6405	shift right by 5 the concatenation of:
6406	"1st vec" and "1st vec"
6407	( 9 12 15) (18 21) \|( 0 3 6) _ ( 9 12 15) (18 21)\| ( 0 3 6)
6408	shift right by 3 the concatenation of:
6409	"2nd vec" and "2nd vec"
6410	(17 20 23) \|( 2 5) ( 8 11 14) _ (17 20 23)\| ( 2 5) ( 8 11 14)
6411	\| New vectors \|
6412
6413	So that now all vectors are READY:
6414	1st vec: ( 0 3 6) ( 9 12 15) (18 21)
6415	2nd vec: ( 2 5) ( 8 11 14) (17 20 23)
6416	3rd vec: ( 1 4 7) (10 13) (16 19 22)
6417
6418	This algorithm is faster than one in vect_permute_load_chain if:
6419	1. "shift of a concatination" is faster than general permutation.
6420	This is usually so.
6421	2. The TARGET machine can't execute vector instructions in parallel.
6422	This is because each step of the algorithm depends on previous.
6423	The algorithm in vect_permute_load_chain is much more parallel.
6424
6425	The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
6426	*/
6427
6428	static bool
6429	vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
6430	unsigned int length,
6431	stmt_vec_info stmt_info,
6432	gimple_stmt_iterator *gsi,
6433	vec<tree> *result_chain)
6434	{
6435	tree vect[`3`], vect_shift[`3`], data_ref, first_vect, second_vect;
6436	tree perm2_mask1, perm2_mask2, perm3_mask;
6437	tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
6438	gimple *perm_stmt;
6439
6440	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6441	machine_mode vmode = TYPE_MODE (vectype);
6442	unsigned int i;
6443	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6444
6445	unsigned HOST_WIDE_INT nelt, vf;
6446	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nelt)
6447	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &vf))
6448	/ Not supported for variable-length vectors. /
6449	return false;
6450
6451	vec_perm_builder sel (nelt, nelt, `1`);
6452	sel.quick_grow (len: nelt);
6453
6454	result_chain->quick_grow (len: length);
6455	memcpy (dest: result_chain->address (), src: dr_chain.address (),
6456	n: length * sizeof (tree));
6457
6458	if (pow2p_hwi (x: length) && vf > `4`)
6459	{
6460	unsigned int j, log_length = exact_log2 (x: length);
6461	for (i = `0`; i < nelt / `2`; ++i)
6462	sel [i] = i * `2`;
6463	for (i = `0`; i < nelt / `2`; ++i)
6464	sel [nelt / `2` + i] = i * `2` + `1`;
6465	vec_perm_indices indices (sel, `2`, nelt);
6466	if (!can_vec_perm_const_p (vmode, vmode, indices))
6467	{
6468	if (dump_enabled_p ())
6469	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6470	"shuffle of 2 fields structure is not \
6471	supported by target\n");
6472	return false;
6473	}
6474	perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
6475
6476	for (i = `0`; i < nelt / `2`; ++i)
6477	sel [i] = i * `2` + `1`;
6478	for (i = `0`; i < nelt / `2`; ++i)
6479	sel [nelt / `2` + i] = i * `2`;
6480	indices.new_vector (sel, `2`, nelt);
6481	if (!can_vec_perm_const_p (vmode, vmode, indices))
6482	{
6483	if (dump_enabled_p ())
6484	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6485	"shuffle of 2 fields structure is not \
6486	supported by target\n");
6487	return false;
6488	}
6489	perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
6490
6491	/ Generating permutation constant to shift all elements.*
6492	For vector length 8 it is {4 5 6 7 8 9 10 11}. /*
6493	for (i = `0`; i < nelt; i++)
6494	sel [i] = nelt / `2` + i;
6495	indices.new_vector (sel, `2`, nelt);
6496	if (!can_vec_perm_const_p (vmode, vmode, indices))
6497	{
6498	if (dump_enabled_p ())
6499	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6500	"shift permutation is not supported by target\n");
6501	return false;
6502	}
6503	shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6504
6505	/ Generating permutation constant to select vector from 2.*
6506	For vector length 8 it is {0 1 2 3 12 13 14 15}. /*
6507	for (i = `0`; i < nelt / `2`; i++)
6508	sel [i] = i;
6509	for (i = nelt / `2`; i < nelt; i++)
6510	sel [i] = nelt + i;
6511	indices.new_vector (sel, `2`, nelt);
6512	if (!can_vec_perm_const_p (vmode, vmode, indices))
6513	{
6514	if (dump_enabled_p ())
6515	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6516	"select is not supported by target\n");
6517	return false;
6518	}
6519	select_mask = vect_gen_perm_mask_checked (vectype, indices);
6520
6521	for (i = `0`; i < log_length; i++)
6522	{
6523	for (j = `0`; j < length; j += `2`)
6524	{
6525	first_vect = dr_chain [j];
6526	second_vect = dr_chain [j + `1`];
6527
6528	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle2");
6529	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6530	first_vect, first_vect,
6531	perm2_mask1);
6532	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6533	vect[`0`] = data_ref;
6534
6535	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle2");
6536	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6537	second_vect, second_vect,
6538	perm2_mask2);
6539	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6540	vect[`1`] = data_ref;
6541
6542	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift");
6543	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6544	vect[`0`], vect[`1`], shift1_mask);
6545	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6546	(*result_chain)[j/`2` + length/`2`] = data_ref;
6547
6548	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_select");
6549	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6550	vect[`0`], vect[`1`], select_mask);
6551	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6552	(*result_chain)[j/`2`] = data_ref;
6553	}
6554	memcpy (dest: dr_chain.address (), src: result_chain->address (),
6555	n: length * sizeof (tree));
6556	}
6557	return true;
6558	}
6559	if (length == `3` && vf > `2`)
6560	{
6561	unsigned int k = `0`, l = `0`;
6562
6563	/ Generating permutation constant to get all elements in rigth order.*
6564	For vector length 8 it is {0 3 6 1 4 7 2 5}. /*
6565	for (i = `0`; i < nelt; i++)
6566	{
6567	if (`3` * k + (l % `3`) >= nelt)
6568	{
6569	k = `0`;
6570	l += (`3` - (nelt % `3`));
6571	}
6572	sel [i] = `3` * k + (l % `3`);
6573	k++;
6574	}
6575	vec_perm_indices indices (sel, `2`, nelt);
6576	if (!can_vec_perm_const_p (vmode, vmode, indices))
6577	{
6578	if (dump_enabled_p ())
6579	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6580	"shuffle of 3 fields structure is not \
6581	supported by target\n");
6582	return false;
6583	}
6584	perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
6585
6586	/ Generating permutation constant to shift all elements.*
6587	For vector length 8 it is {6 7 8 9 10 11 12 13}. /*
6588	for (i = `0`; i < nelt; i++)
6589	sel [i] = `2` * (nelt / `3`) + (nelt % `3`) + i;
6590	indices.new_vector (sel, `2`, nelt);
6591	if (!can_vec_perm_const_p (vmode, vmode, indices))
6592	{
6593	if (dump_enabled_p ())
6594	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6595	"shift permutation is not supported by target\n");
6596	return false;
6597	}
6598	shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6599
6600	/ Generating permutation constant to shift all elements.*
6601	For vector length 8 it is {5 6 7 8 9 10 11 12}. /*
6602	for (i = `0`; i < nelt; i++)
6603	sel [i] = `2` * (nelt / `3`) + `1` + i;
6604	indices.new_vector (sel, `2`, nelt);
6605	if (!can_vec_perm_const_p (vmode, vmode, indices))
6606	{
6607	if (dump_enabled_p ())
6608	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6609	"shift permutation is not supported by target\n");
6610	return false;
6611	}
6612	shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
6613
6614	/ Generating permutation constant to shift all elements.*
6615	For vector length 8 it is {3 4 5 6 7 8 9 10}. /*
6616	for (i = `0`; i < nelt; i++)
6617	sel [i] = (nelt / `3`) + (nelt % `3`) / `2` + i;
6618	indices.new_vector (sel, `2`, nelt);
6619	if (!can_vec_perm_const_p (vmode, vmode, indices))
6620	{
6621	if (dump_enabled_p ())
6622	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6623	"shift permutation is not supported by target\n");
6624	return false;
6625	}
6626	shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
6627
6628	/ Generating permutation constant to shift all elements.*
6629	For vector length 8 it is {5 6 7 8 9 10 11 12}. /*
6630	for (i = `0`; i < nelt; i++)
6631	sel [i] = `2` * (nelt / `3`) + (nelt % `3`) / `2` + i;
6632	indices.new_vector (sel, `2`, nelt);
6633	if (!can_vec_perm_const_p (vmode, vmode, indices))
6634	{
6635	if (dump_enabled_p ())
6636	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6637	"shift permutation is not supported by target\n");
6638	return false;
6639	}
6640	shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
6641
6642	for (k = `0`; k < `3`; k++)
6643	{
6644	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3");
6645	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6646	dr_chain [k], dr_chain [k],
6647	perm3_mask);
6648	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6649	vect[k] = data_ref;
6650	}
6651
6652	for (k = `0`; k < `3`; k++)
6653	{
6654	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift1");
6655	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6656	vect[k % `3`], vect[(k + `1`) % `3`],
6657	shift1_mask);
6658	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6659	vect_shift[k] = data_ref;
6660	}
6661
6662	for (k = `0`; k < `3`; k++)
6663	{
6664	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift2");
6665	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6666	vect_shift[(`4` - k) % `3`],
6667	vect_shift[(`3` - k) % `3`],
6668	shift2_mask);
6669	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6670	vect[k] = data_ref;
6671	}
6672
6673	(*result_chain)[`3` - (nelt % `3`)] = vect[`2`];
6674
6675	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift3");
6676	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[`0`],
6677	vect[`0`], shift3_mask);
6678	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6679	(*result_chain)[nelt % `3`] = data_ref;
6680
6681	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift4");
6682	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[`1`],
6683	vect[`1`], shift4_mask);
6684	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6685	(*result_chain)[`0`] = data_ref;
6686	return true;
6687	}
6688	return false;
6689	}
6690
6691	/ Function vect_transform_grouped_load.*
6692
6693	Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
6694	to perform their permutation and ascribe the result vectorized statements to
6695	the scalar statements.
6696	*/
6697
6698	void
6699	vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info,
6700	vec<tree> dr_chain,
6701	int size, gimple_stmt_iterator *gsi)
6702	{
6703	machine_mode mode;
6704	vec<tree> result_chain = vNULL;
6705
6706	/ DR_CHAIN contains input data-refs that are a part of the interleaving.*
6707	RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
6708	vectors, that are ready for vector computation. /*
6709	result_chain.create (nelems: size);
6710
6711	/ If reassociation width for vector type is 2 or greater target machine can*
6712	execute 2 or more vector instructions in parallel. Otherwise try to
6713	get chain for loads group using vect_shift_permute_load_chain. /*
6714	mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
6715	if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > `1`
6716	\|\| pow2p_hwi (x: size)
6717	\|\| !vect_shift_permute_load_chain (vinfo, dr_chain, length: size, stmt_info,
6718	gsi, result_chain: &result_chain))
6719	vect_permute_load_chain (vinfo, dr_chain,
6720	length: size, stmt_info, gsi, result_chain: &result_chain);
6721	vect_record_grouped_load_vectors (vinfo, stmt_info, result_chain);
6722	result_chain.release ();
6723	}
6724
6725	/ RESULT_CHAIN contains the output of a group of grouped loads that were*
6726	generated as part of the vectorization of STMT_INFO. Assign the statement
6727	for each vector to the associated scalar statement. /*
6728
6729	void
6730	vect_record_grouped_load_vectors (vec_info *, stmt_vec_info stmt_info,
6731	vec<tree> result_chain)
6732	{
6733	stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6734	unsigned int i, gap_count;
6735	tree tmp_data_ref;
6736
6737	/ Put a permuted data-ref in the VECTORIZED_STMT field.*
6738	Since we scan the chain starting from it's first node, their order
6739	corresponds the order of data-refs in RESULT_CHAIN. /*
6740	stmt_vec_info next_stmt_info = first_stmt_info;
6741	gap_count = `1`;
6742	FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
6743	{
6744	if (!next_stmt_info)
6745	break;
6746
6747	/ Skip the gaps. Loads created for the gaps will be removed by dead*
6748	code elimination pass later. No need to check for the first stmt in
6749	the group, since it always exists.
6750	DR_GROUP_GAP is the number of steps in elements from the previous
6751	access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
6752	correspond to the gaps. /*
6753	if (next_stmt_info != first_stmt_info
6754	&& gap_count < DR_GROUP_GAP (next_stmt_info))
6755	{
6756	gap_count++;
6757	continue;
6758	}
6759
6760	/ ??? The following needs cleanup after the removal of*
6761	DR_GROUP_SAME_DR_STMT. /*
6762	if (next_stmt_info)
6763	{
6764	gimple *new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
6765	/ We assume that if VEC_STMT is not NULL, this is a case of multiple*
6766	copies, and we put the new vector statement last. /*
6767	STMT_VINFO_VEC_STMTS (next_stmt_info).safe_push (obj: new_stmt);
6768
6769	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6770	gap_count = `1`;
6771	}
6772	}
6773	}
6774
6775	/ Function vect_force_dr_alignment_p.*
6776
6777	Returns whether the alignment of a DECL can be forced to be aligned
6778	on ALIGNMENT bit boundary. /*
6779
6780	bool
6781	vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
6782	{
6783	if (!VAR_P (decl))
6784	return false;
6785
6786	if (decl_in_symtab_p (decl)
6787	&& !symtab_node::get (decl)->can_increase_alignment_p ())
6788	return false;
6789
6790	if (TREE_STATIC (decl))
6791	return (known_le (alignment,
6792	(unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
6793	else
6794	return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
6795	}
6796
6797	/ Return whether the data reference DR_INFO is supported with respect to its*
6798	alignment.
6799	If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6800	it is aligned, i.e., check if it is possible to vectorize it with different
6801	alignment. /*
6802
6803	enum dr_alignment_support
6804	vect_supportable_dr_alignment (vec_info vinfo, dr_vec_info dr_info,
6805	tree vectype, int misalignment)
6806	{
6807	data_reference *dr = dr_info->dr;
6808	stmt_vec_info stmt_info = dr_info->stmt;
6809	machine_mode mode = TYPE_MODE (vectype);
6810	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6811	class loop *vect_loop = NULL;
6812	bool nested_in_vect_loop = false;
6813
6814	if (misalignment == `0`)
6815	return dr_aligned;
6816
6817	/ For now assume all conditional loads/stores support unaligned*
6818	access without any special code. /*
6819	if (gcall stmt = dyn_cast <gcall > (p: stmt_info->stmt))
6820	if (gimple_call_internal_p (gs: stmt)
6821	&& (gimple_call_internal_fn (gs: stmt) == IFN_MASK_LOAD
6822	\|\| gimple_call_internal_fn (gs: stmt) == IFN_MASK_STORE))
6823	return dr_unaligned_supported;
6824
6825	if (loop_vinfo)
6826	{
6827	vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
6828	nested_in_vect_loop = nested_in_vect_loop_p (loop: vect_loop, stmt_info);
6829	}
6830
6831	/ Possibly unaligned access. /
6832
6833	/ We can choose between using the implicit realignment scheme (generating*
6834	a misaligned_move stmt) and the explicit realignment scheme (generating
6835	aligned loads with a REALIGN_LOAD). There are two variants to the
6836	explicit realignment scheme: optimized, and unoptimized.
6837	We can optimize the realignment only if the step between consecutive
6838	vector loads is equal to the vector size. Since the vector memory
6839	accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6840	is guaranteed that the misalignment amount remains the same throughout the
6841	execution of the vectorized loop. Therefore, we can create the
6842	"realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6843	at the loop preheader.
6844
6845	However, in the case of outer-loop vectorization, when vectorizing a
6846	memory access in the inner-loop nested within the LOOP that is now being
6847	vectorized, while it is guaranteed that the misalignment of the
6848	vectorized memory access will remain the same in different outer-loop
6849	iterations, it is not* guaranteed that is will remain the same throughout*
6850	the execution of the inner-loop. This is because the inner-loop advances
6851	with the original scalar step (and not in steps of VS). If the inner-loop
6852	step happens to be a multiple of VS, then the misalignment remains fixed
6853	and we can use the optimized realignment scheme. For example:
6854
6855	for (i=0; i<N; i++)
6856	for (j=0; j<M; j++)
6857	s += a[i+j];
6858
6859	When vectorizing the i-loop in the above example, the step between
6860	consecutive vector loads is 1, and so the misalignment does not remain
6861	fixed across the execution of the inner-loop, and the realignment cannot
6862	be optimized (as illustrated in the following pseudo vectorized loop):
6863
6864	for (i=0; i<N; i+=4)
6865	for (j=0; j<M; j++){
6866	vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6867	// when j is {0,1,2,3,4,5,6,7,...} respectively.
6868	// (assuming that we start from an aligned address).
6869	}
6870
6871	We therefore have to use the unoptimized realignment scheme:
6872
6873	for (i=0; i<N; i+=4)
6874	for (j=k; j<M; j+=4)
6875	vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6876	// that the misalignment of the initial address is
6877	// 0).
6878
6879	The loop can then be vectorized as follows:
6880
6881	for (k=0; k<4; k++){
6882	rt = get_realignment_token (&vp[k]);
6883	for (i=0; i<N; i+=4){
6884	v1 = vp[i+k];
6885	for (j=k; j<M; j+=4){
6886	v2 = vp[i+j+VS-1];
6887	va = REALIGN_LOAD <v1,v2,rt>;
6888	vs += va;
6889	v1 = v2;
6890	}
6891	}
6892	} /*
6893
6894	if (DR_IS_READ (dr))
6895	{
6896	if (optab_handler (op: vec_realign_load_optab, mode) != CODE_FOR_nothing
6897	&& (!targetm.vectorize.builtin_mask_for_load
6898	\|\| targetm.vectorize.builtin_mask_for_load ()))
6899	{
6900	/ If we are doing SLP then the accesses need not have the*
6901	same alignment, instead it depends on the SLP group size. /*
6902	if (loop_vinfo
6903	&& STMT_SLP_TYPE (stmt_info)
6904	&& (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
6905	\|\| !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6906	* (DR_GROUP_SIZE
6907	(DR_GROUP_FIRST_ELEMENT (stmt_info))),
6908	b: TYPE_VECTOR_SUBPARTS (node: vectype))))
6909	;
6910	else if (!loop_vinfo
6911	\|\| (nested_in_vect_loop
6912	&& maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6913	b: GET_MODE_SIZE (TYPE_MODE (vectype)))))
6914	return dr_explicit_realign;
6915	else
6916	return dr_explicit_realign_optimized;
6917	}
6918	}
6919
6920	bool is_packed = false;
6921	tree type = TREE_TYPE (DR_REF (dr));
6922	if (misalignment == DR_MISALIGNMENT_UNKNOWN)
6923	is_packed = not_size_aligned (DR_REF (dr));
6924	if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
6925	is_packed))
6926	return dr_unaligned_supported;
6927
6928	/ Unsupported. /
6929	return dr_unaligned_unsupported;
6930	}
6931

source code of gcc/tree-vect-data-refs.cc