tree-vect-stmts.cc source code [gcc/tree-vect-stmts.cc]

1	/ Statement Analysis and Transformation for Vectorization*
2	Copyright (C) 2003-2023 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com>
4	and Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#include "config.h"
23	#include "system.h"
24	#include "coretypes.h"
25	#include "backend.h"
26	#include "target.h"
27	#include "rtl.h"
28	#include "tree.h"
29	#include "gimple.h"
30	#include "ssa.h"
31	#include "optabs-tree.h"
32	#include "insn-config.h"
33	#include "recog.h" /* FIXME: for insn_data */
34	#include "cgraph.h"
35	#include "dumpfile.h"
36	#include "alias.h"
37	#include "fold-const.h"
38	#include "stor-layout.h"
39	#include "tree-eh.h"
40	#include "gimplify.h"
41	#include "gimple-iterator.h"
42	#include "gimplify-me.h"
43	#include "tree-cfg.h"
44	#include "tree-ssa-loop-manip.h"
45	#include "cfgloop.h"
46	#include "explow.h"
47	#include "tree-ssa-loop.h"
48	#include "tree-scalar-evolution.h"
49	#include "tree-vectorizer.h"
50	#include "builtins.h"
51	#include "internal-fn.h"
52	#include "tree-vector-builder.h"
53	#include "vec-perm-indices.h"
54	#include "gimple-range.h"
55	#include "tree-ssa-loop-niter.h"
56	#include "gimple-fold.h"
57	#include "regs.h"
58	#include "attribs.h"
59	#include "optabs-libfuncs.h"
60
61	/ For lang_hooks.types.type_for_mode. /
62	#include "langhooks.h"
63
64	/ Return the vectorized type for the given statement. /
65
66	tree
67	stmt_vectype (class _stmt_vec_info *stmt_info)
68	{
69	return STMT_VINFO_VECTYPE (stmt_info);
70	}
71
72	/ Return TRUE iff the given statement is in an inner loop relative to*
73	the loop being vectorized. /*
74	bool
75	stmt_in_inner_loop_p (vec_info vinfo, class* _stmt_vec_info *stmt_info)
76	{
77	gimple *stmt = STMT_VINFO_STMT (stmt_info);
78	basic_block bb = gimple_bb (g: stmt);
79	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
80	class loop* loop;
81
82	if (!loop_vinfo)
83	return false;
84
85	loop = LOOP_VINFO_LOOP (loop_vinfo);
86
87	return (bb->loop_father == loop->inner);
88	}
89
90	/ Record the cost of a statement, either by directly informing the*
91	target model or by saving it in a vector for later processing.
92	Return a preliminary estimate of the statement's cost. /*
93
94	static unsigned
95	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
96	enum vect_cost_for_stmt kind,
97	stmt_vec_info stmt_info, slp_tree node,
98	tree vectype, int misalign,
99	enum vect_cost_model_location where)
100	{
101	if ((kind == vector_load \|\| kind == unaligned_load)
102	&& (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103	kind = vector_gather_load;
104	if ((kind == vector_store \|\| kind == unaligned_store)
105	&& (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106	kind = vector_scatter_store;
107
108	stmt_info_for_cost si
109	= { .count: count, .kind: kind, .where: where, .stmt_info: stmt_info, .node: node, .vectype: vectype, .misalign: misalign };
110	body_cost_vec->safe_push (obj: si);
111
112	return (unsigned)
113	(builtin_vectorization_cost (type_of_cost: kind, vectype, misalign) * count);
114	}
115
116	unsigned
117	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
118	enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119	tree vectype, int misalign,
120	enum vect_cost_model_location where)
121	{
122	return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123	vectype, misalign, where);
124	}
125
126	unsigned
127	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
128	enum vect_cost_for_stmt kind, slp_tree node,
129	tree vectype, int misalign,
130	enum vect_cost_model_location where)
131	{
132	return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133	vectype, misalign, where);
134	}
135
136	unsigned
137	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
138	enum vect_cost_for_stmt kind,
139	enum vect_cost_model_location where)
140	{
141	gcc_assert (kind == cond_branch_taken \|\| kind == cond_branch_not_taken
142	\|\| kind == scalar_stmt);
143	return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144	NULL_TREE, misalign: `0`, where);
145	}
146
147	/ Return a variable of type ELEM_TYPE[NELEMS]. /
148
149	static tree
150	create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
151	{
152	return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153	"vect_array");
154	}
155
156	/ ARRAY is an array of vectors created by create_vector_array.*
157	Return an SSA_NAME for the vector in index N. The reference
158	is part of the vectorization of STMT_INFO and the vector is associated
159	with scalar destination SCALAR_DEST. /*
160
161	static tree
162	read_vector_array (vec_info *vinfo,
163	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164	tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
165	{
166	tree vect_type, vect, vect_name, array_ref;
167	gimple *new_stmt;
168
169	gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170	vect_type = TREE_TYPE (TREE_TYPE (array));
171	vect = vect_create_destination_var (scalar_dest, vect_type);
172	array_ref = build4 (ARRAY_REF, vect_type, array,
173	build_int_cst (size_type_node, n),
174	NULL_TREE, NULL_TREE);
175
176	new_stmt = gimple_build_assign (vect, array_ref);
177	vect_name = make_ssa_name (var: vect, stmt: new_stmt);
178	gimple_assign_set_lhs (gs: new_stmt, lhs: vect_name);
179	vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
180
181	return vect_name;
182	}
183
184	/ ARRAY is an array of vectors created by create_vector_array.*
185	Emit code to store SSA_NAME VECT in index N of the array.
186	The store is part of the vectorization of STMT_INFO. /*
187
188	static void
189	write_vector_array (vec_info *vinfo,
190	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191	tree vect, tree array, unsigned HOST_WIDE_INT n)
192	{
193	tree array_ref;
194	gimple *new_stmt;
195
196	array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197	build_int_cst (size_type_node, n),
198	NULL_TREE, NULL_TREE);
199
200	new_stmt = gimple_build_assign (array_ref, vect);
201	vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202	}
203
204	/ PTR is a pointer to an array of type TYPE. Return a representation*
205	of PTR. The memory reference replaces those in FIRST_DR*
206	(and its group). /*
207
208	static tree
209	create_array_ref (tree type, tree ptr, tree alias_ptr_type)
210	{
211	tree mem_ref;
212
213	mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, `0`));
214	/ Arrays have the same alignment as their type. /
215	set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), `0`);
216	return mem_ref;
217	}
218
219	/ Add a clobber of variable VAR to the vectorization of STMT_INFO.*
220	Emit the clobber before GSI. /
221
222	static void
223	vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224	gimple_stmt_iterator *gsi, tree var)
225	{
226	tree clobber = build_clobber (TREE_TYPE (var));
227	gimple *new_stmt = gimple_build_assign (var, clobber);
228	vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229	}
230
231	/ Utility functions used by vect_mark_stmts_to_be_vectorized. /
232
233	/ Function vect_mark_relevant.*
234
235	Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. /*
236
237	static void
238	vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239	enum vect_relevant relevant, bool live_p)
240	{
241	enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242	bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
243
244	if (dump_enabled_p ())
245	dump_printf_loc (MSG_NOTE, vect_location,
246	"mark relevant %d, live %d: %G", relevant, live_p,
247	stmt_info->stmt);
248
249	/ If this stmt is an original stmt in a pattern, we might need to mark its*
250	related pattern stmt instead of the original stmt. However, such stmts
251	may have their own uses that are not in any pattern, in such cases the
252	stmt itself should be marked. /*
253	if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254	{
255	/ This is the last stmt in a sequence that was detected as a*
256	pattern that can potentially be vectorized. Don't mark the stmt
257	as relevant/live because it's not going to be vectorized.
258	Instead mark the pattern-stmt that replaces it. /*
259
260	if (dump_enabled_p ())
261	dump_printf_loc (MSG_NOTE, vect_location,
262	"last stmt in pattern. don't mark"
263	" relevant/live.\n");
264
265	stmt_vec_info old_stmt_info = stmt_info;
266	stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267	gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268	save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269	save_live_p = STMT_VINFO_LIVE_P (stmt_info);
270
271	if (live_p && relevant == vect_unused_in_scope)
272	{
273	if (dump_enabled_p ())
274	dump_printf_loc (MSG_NOTE, vect_location,
275	"vec_stmt_relevant_p: forcing live pattern stmt "
276	"relevant.\n");
277	relevant = vect_used_only_live;
278	}
279
280	if (dump_enabled_p ())
281	dump_printf_loc (MSG_NOTE, vect_location,
282	"mark relevant %d, live %d: %G", relevant, live_p,
283	stmt_info->stmt);
284	}
285
286	STMT_VINFO_LIVE_P (stmt_info) \|= live_p;
287	if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288	STMT_VINFO_RELEVANT (stmt_info) = relevant;
289
290	if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291	&& STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
292	{
293	if (dump_enabled_p ())
294	dump_printf_loc (MSG_NOTE, vect_location,
295	"already marked relevant/live.\n");
296	return;
297	}
298
299	worklist->safe_push (obj: stmt_info);
300	}
301
302
303	/ Function is_simple_and_all_uses_invariant*
304
305	Return true if STMT_INFO is simple and all uses of it are invariant. /*
306
307	bool
308	is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309	loop_vec_info loop_vinfo)
310	{
311	tree op;
312	ssa_op_iter iter;
313
314	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
315	if (!stmt)
316	return false;
317
318	FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
319	{
320	enum vect_def_type dt = vect_uninitialized_def;
321
322	if (!vect_is_simple_use (op, loop_vinfo, &dt))
323	{
324	if (dump_enabled_p ())
325	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326	"use not simple.\n");
327	return false;
328	}
329
330	if (dt != vect_external_def && dt != vect_constant_def)
331	return false;
332	}
333	return true;
334	}
335
336	/ Function vect_stmt_relevant_p.*
337
338	Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339	is "relevant for vectorization".
340
341	A stmt is considered "relevant for vectorization" if:
342	- it has uses outside the loop.
343	- it has vdefs (it alters memory).
344	- control stmts in the loop (except for the exit condition).
345
346	CHECKME: what other side effects would the vectorizer allow? /*
347
348	static bool
349	vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350	enum vect_relevant relevant, bool* *live_p)
351	{
352	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353	ssa_op_iter op_iter;
354	imm_use_iterator imm_iter;
355	use_operand_p use_p;
356	def_operand_p def_p;
357
358	*relevant = vect_unused_in_scope;
359	live_p = false*;
360
361	/ cond stmt other than loop exit cond. /
362	if (is_ctrl_stmt (stmt_info->stmt)
363	&& STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364	*relevant = vect_used_in_scope;
365
366	/ changing memory. /
367	if (gimple_code (g: stmt_info->stmt) != GIMPLE_PHI)
368	if (gimple_vdef (g: stmt_info->stmt)
369	&& !gimple_clobber_p (s: stmt_info->stmt))
370	{
371	if (dump_enabled_p ())
372	dump_printf_loc (MSG_NOTE, vect_location,
373	"vec_stmt_relevant_p: stmt has vdefs.\n");
374	*relevant = vect_used_in_scope;
375	}
376
377	/ uses outside the loop. /
378	FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
379	{
380	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
381	{
382	basic_block bb = gimple_bb (USE_STMT (use_p));
383	if (!flow_bb_inside_loop_p (loop, bb))
384	{
385	if (is_gimple_debug (USE_STMT (use_p)))
386	continue;
387
388	if (dump_enabled_p ())
389	dump_printf_loc (MSG_NOTE, vect_location,
390	"vec_stmt_relevant_p: used out of loop.\n");
391
392	/ We expect all such uses to be in the loop exit phis*
393	(because of loop closed form) /*
394	gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395	gcc_assert (bb == single_exit (loop)->dest);
396
397	live_p = true*;
398	}
399	}
400	}
401
402	if (live_p && relevant == vect_unused_in_scope
403	&& !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
404	{
405	if (dump_enabled_p ())
406	dump_printf_loc (MSG_NOTE, vect_location,
407	"vec_stmt_relevant_p: stmt live but not relevant.\n");
408	*relevant = vect_used_only_live;
409	}
410
411	return (live_p \|\| relevant);
412	}
413
414
415	/ Function exist_non_indexing_operands_for_use_p*
416
417	USE is one of the uses attached to STMT_INFO. Check if USE is
418	used in STMT_INFO for anything other than indexing an array. /*
419
420	static bool
421	exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
422	{
423	tree operand;
424
425	/ USE corresponds to some operand in STMT. If there is no data*
426	reference in STMT, then any operand that corresponds to USE
427	is not indexing an array. /*
428	if (!STMT_VINFO_DATA_REF (stmt_info))
429	return true;
430
431	/ STMT has a data_ref. FORNOW this means that its of one of*
432	the following forms:
433	-1- ARRAY_REF = var
434	-2- var = ARRAY_REF
435	(This should have been verified in analyze_data_refs).
436
437	'var' in the second case corresponds to a def, not a use,
438	so USE cannot correspond to any operands that are not used
439	for array indexing.
440
441	Therefore, all we need to check is if STMT falls into the
442	first case, and whether var corresponds to USE. /*
443
444	gassign assign = dyn_cast <gassign > (p: stmt_info->stmt);
445	if (!assign \|\| !gimple_assign_copy_p (assign))
446	{
447	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
448	if (call && gimple_call_internal_p (gs: call))
449	{
450	internal_fn ifn = gimple_call_internal_fn (gs: call);
451	int mask_index = internal_fn_mask_index (ifn);
452	if (mask_index >= `0`
453	&& use == gimple_call_arg (gs: call, index: mask_index))
454	return true;
455	int stored_value_index = internal_fn_stored_value_index (ifn);
456	if (stored_value_index >= `0`
457	&& use == gimple_call_arg (gs: call, index: stored_value_index))
458	return true;
459	if (internal_gather_scatter_fn_p (ifn)
460	&& use == gimple_call_arg (gs: call, index: `1`))
461	return true;
462	}
463	return false;
464	}
465
466	if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467	return false;
468	operand = gimple_assign_rhs1 (gs: assign);
469	if (TREE_CODE (operand) != SSA_NAME)
470	return false;
471
472	if (operand == use)
473	return true;
474
475	return false;
476	}
477
478
479	/*
480	Function process_use.
481
482	Inputs:
483	- a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484	- RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485	that defined USE. This is done by calling mark_relevant and passing it
486	the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487	- FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488	be performed.
489
490	Outputs:
491	Generally, LIVE_P and RELEVANT are used to define the liveness and
492	relevance info of the DEF_STMT of this USE:
493	STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494	STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495	Exceptions:
496	- case 1: If USE is used only for address computations (e.g. array indexing),
497	which does not need to be directly vectorized, then the liveness/relevance
498	of the respective DEF_STMT is left unchanged.
499	- case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500	we skip DEF_STMT cause it had already been processed.
501	- case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502	"relevant" will be modified accordingly.
503
504	Return true if everything is as expected. Return false otherwise. /*
505
506	static opt_result
507	process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508	enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509	bool force)
510	{
511	stmt_vec_info dstmt_vinfo;
512	enum vect_def_type dt;
513
514	/ case 1: we are only interested in uses that need to be vectorized. Uses*
515	that are used for address computation are not considered relevant. /*
516	if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_info: stmt_vinfo))
517	return opt_result::success ();
518
519	if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520	return opt_result::failure_at (loc: stmt_vinfo->stmt,
521	fmt: "not vectorized:"
522	" unsupported use in stmt.\n");
523
524	if (!dstmt_vinfo)
525	return opt_result::success ();
526
527	basic_block def_bb = gimple_bb (g: dstmt_vinfo->stmt);
528	basic_block bb = gimple_bb (g: stmt_vinfo->stmt);
529
530	/ case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).*
531	We have to force the stmt live since the epilogue loop needs it to
532	continue computing the reduction. /*
533	if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
534	&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535	&& gimple_code (g: dstmt_vinfo->stmt) != GIMPLE_PHI
536	&& STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537	&& bb->loop_father == def_bb->loop_father)
538	{
539	if (dump_enabled_p ())
540	dump_printf_loc (MSG_NOTE, vect_location,
541	"reduc-stmt defining reduc-phi in the same nest.\n");
542	vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: true);
543	return opt_result::success ();
544	}
545
546	/ case 3a: outer-loop stmt defining an inner-loop stmt:*
547	outer-loop-header-bb:
548	d = dstmt_vinfo
549	inner-loop:
550	stmt # use (d)
551	outer-loop-tail-bb:
552	... /*
553	if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
554	{
555	if (dump_enabled_p ())
556	dump_printf_loc (MSG_NOTE, vect_location,
557	"outer-loop def-stmt defining inner-loop stmt.\n");
558
559	switch (relevant)
560	{
561	case vect_unused_in_scope:
562	relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563	vect_used_in_scope : vect_unused_in_scope;
564	break;
565
566	case vect_used_in_outer_by_reduction:
567	gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568	relevant = vect_used_by_reduction;
569	break;
570
571	case vect_used_in_outer:
572	gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573	relevant = vect_used_in_scope;
574	break;
575
576	case vect_used_in_scope:
577	break;
578
579	default:
580	gcc_unreachable ();
581	}
582	}
583
584	/ case 3b: inner-loop stmt defining an outer-loop stmt:*
585	outer-loop-header-bb:
586	...
587	inner-loop:
588	d = dstmt_vinfo
589	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590	stmt # use (d) /*
591	else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
592	{
593	if (dump_enabled_p ())
594	dump_printf_loc (MSG_NOTE, vect_location,
595	"inner-loop def-stmt defining outer-loop stmt.\n");
596
597	switch (relevant)
598	{
599	case vect_unused_in_scope:
600	relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601	\|\| STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602	vect_used_in_outer_by_reduction : vect_unused_in_scope;
603	break;
604
605	case vect_used_by_reduction:
606	case vect_used_only_live:
607	relevant = vect_used_in_outer_by_reduction;
608	break;
609
610	case vect_used_in_scope:
611	relevant = vect_used_in_outer;
612	break;
613
614	default:
615	gcc_unreachable ();
616	}
617	}
618	/ We are also not interested in uses on loop PHI backedges that are*
619	inductions. Otherwise we'll needlessly vectorize the IV increment
620	and cause hybrid SLP for SLP inductions. Unless the PHI is live
621	of course. /*
622	else if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
623	&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624	&& ! STMT_VINFO_LIVE_P (stmt_vinfo)
625	&& (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626	loop_latch_edge (bb->loop_father))
627	== use))
628	{
629	if (dump_enabled_p ())
630	dump_printf_loc (MSG_NOTE, vect_location,
631	"induction value on backedge.\n");
632	return opt_result::success ();
633	}
634
635
636	vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: false);
637	return opt_result::success ();
638	}
639
640
641	/ Function vect_mark_stmts_to_be_vectorized.*
642
643	Not all stmts in the loop need to be vectorized. For example:
644
645	for i...
646	for j...
647	1. T0 = i + j
648	2. T1 = a[T0]
649
650	3. j = j + 1
651
652	Stmt 1 and 3 do not need to be vectorized, because loop control and
653	addressing of vectorized data-refs are handled differently.
654
655	This pass detects such stmts. /*
656
657	opt_result
658	vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
659	{
660	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662	unsigned int nbbs = loop->num_nodes;
663	gimple_stmt_iterator si;
664	unsigned int i;
665	basic_block bb;
666	bool live_p;
667	enum vect_relevant relevant;
668
669	DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
670
671	auto_vec<stmt_vec_info, `64`> worklist;
672
673	/ 1. Init worklist. /
674	for (i = `0`; i < nbbs; i++)
675	{
676	bb = bbs[i];
677	for (si = gsi_start_phis (bb); !gsi_end_p (i: si); gsi_next (i: &si))
678	{
679	stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
680	if (dump_enabled_p ())
681	dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682	phi_info->stmt);
683
684	if (vect_stmt_relevant_p (stmt_info: phi_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
685	vect_mark_relevant (worklist: &worklist, stmt_info: phi_info, relevant, live_p);
686	}
687	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
688	{
689	if (is_gimple_debug (gs: gsi_stmt (i: si)))
690	continue;
691	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
692	if (dump_enabled_p ())
693	dump_printf_loc (MSG_NOTE, vect_location,
694	"init: stmt relevant? %G", stmt_info->stmt);
695
696	if (vect_stmt_relevant_p (stmt_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
697	vect_mark_relevant (worklist: &worklist, stmt_info, relevant, live_p);
698	}
699	}
700
701	/ 2. Process_worklist /
702	while (worklist.length () > `0`)
703	{
704	use_operand_p use_p;
705	ssa_op_iter iter;
706
707	stmt_vec_info stmt_vinfo = worklist.pop ();
708	if (dump_enabled_p ())
709	dump_printf_loc (MSG_NOTE, vect_location,
710	"worklist: examine stmt: %G", stmt_vinfo->stmt);
711
712	/ Examine the USEs of STMT. For each USE, mark the stmt that defines it*
713	(DEF_STMT) as relevant/irrelevant according to the relevance property
714	of STMT. /*
715	relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716
717	/ Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is*
718	propagated as is to the DEF_STMTs of its USEs.
719
720	One exception is when STMT has been identified as defining a reduction
721	variable; in this case we set the relevance to vect_used_by_reduction.
722	This is because we distinguish between two kinds of relevant stmts -
723	those that are used by a reduction computation, and those that are
724	(also) used by a regular computation. This allows us later on to
725	identify stmts that are used solely by a reduction, and therefore the
726	order of the results that they produce does not have to be kept. /*
727
728	switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
729	{
730	case vect_reduction_def:
731	gcc_assert (relevant != vect_unused_in_scope);
732	if (relevant != vect_unused_in_scope
733	&& relevant != vect_used_in_scope
734	&& relevant != vect_used_by_reduction
735	&& relevant != vect_used_only_live)
736	return opt_result::failure_at
737	(loc: stmt_vinfo->stmt, fmt: "unsupported use of reduction.\n");
738	break;
739
740	case vect_nested_cycle:
741	if (relevant != vect_unused_in_scope
742	&& relevant != vect_used_in_outer_by_reduction
743	&& relevant != vect_used_in_outer)
744	return opt_result::failure_at
745	(loc: stmt_vinfo->stmt, fmt: "unsupported use of nested cycle.\n");
746	break;
747
748	case vect_double_reduction_def:
749	if (relevant != vect_unused_in_scope
750	&& relevant != vect_used_by_reduction
751	&& relevant != vect_used_only_live)
752	return opt_result::failure_at
753	(loc: stmt_vinfo->stmt, fmt: "unsupported use of double reduction.\n");
754	break;
755
756	default:
757	break;
758	}
759
760	if (is_pattern_stmt_p (stmt_info: stmt_vinfo))
761	{
762	/ Pattern statements are not inserted into the code, so*
763	FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764	have to scan the RHS or function arguments instead. /*
765	if (gassign assign = dyn_cast <gassign > (p: stmt_vinfo->stmt))
766	{
767	enum tree_code rhs_code = gimple_assign_rhs_code (gs: assign);
768	tree op = gimple_assign_rhs1 (gs: assign);
769
770	i = `1`;
771	if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
772	{
773	opt_result res
774	= process_use (stmt_vinfo, TREE_OPERAND (op, `0`),
775	loop_vinfo, relevant, worklist: &worklist, force: false);
776	if (!res)
777	return res;
778	res = process_use (stmt_vinfo, TREE_OPERAND (op, `1`),
779	loop_vinfo, relevant, worklist: &worklist, force: false);
780	if (!res)
781	return res;
782	i = `2`;
783	}
784	for (; i < gimple_num_ops (gs: assign); i++)
785	{
786	op = gimple_op (gs: assign, i);
787	if (TREE_CODE (op) == SSA_NAME)
788	{
789	opt_result res
790	= process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
791	worklist: &worklist, force: false);
792	if (!res)
793	return res;
794	}
795	}
796	}
797	else if (gcall call = dyn_cast <gcall > (p: stmt_vinfo->stmt))
798	{
799	for (i = `0`; i < gimple_call_num_args (gs: call); i++)
800	{
801	tree arg = gimple_call_arg (gs: call, index: i);
802	opt_result res
803	= process_use (stmt_vinfo, use: arg, loop_vinfo, relevant,
804	worklist: &worklist, force: false);
805	if (!res)
806	return res;
807	}
808	}
809	}
810	else
811	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
812	{
813	tree op = USE_FROM_PTR (use_p);
814	opt_result res
815	= process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
816	worklist: &worklist, force: false);
817	if (!res)
818	return res;
819	}
820
821	if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
822	{
823	gather_scatter_info gs_info;
824	if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825	gcc_unreachable ();
826	opt_result res
827	= process_use (stmt_vinfo, use: gs_info.offset, loop_vinfo, relevant,
828	worklist: &worklist, force: true);
829	if (!res)
830	{
831	if (fatal)
832	fatal = false*;
833	return res;
834	}
835	}
836	} / while worklist /
837
838	return opt_result::success ();
839	}
840
841	/ Function vect_model_simple_cost.*
842
843	Models cost for simple operations, i.e. those that only emit ncopies of a
844	single op. Right now, this does not account for multiple insns that could
845	be generated for the single vector op. We will handle that shortly. /*
846
847	static void
848	vect_model_simple_cost (vec_info *,
849	stmt_vec_info stmt_info, int ncopies,
850	enum vect_def_type *dt,
851	int ndts,
852	slp_tree node,
853	stmt_vector_for_cost *cost_vec,
854	vect_cost_for_stmt kind = vector_stmt)
855	{
856	int inside_cost = `0`, prologue_cost = `0`;
857
858	gcc_assert (cost_vec != NULL);
859
860	/ ??? Somehow we need to fix this at the callers. /
861	if (node)
862	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
863
864	if (!node)
865	/ Cost the "broadcast" of a scalar operand in to a vector operand.*
866	Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867	cost model. /*
868	for (int i = `0`; i < ndts; i++)
869	if (dt[i] == vect_constant_def \|\| dt[i] == vect_external_def)
870	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
871	stmt_info, misalign: `0`, where: vect_prologue);
872
873	/ Pass the inside-of-loop statements to the target-specific cost model. /
874	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind,
875	stmt_info, misalign: `0`, where: vect_body);
876
877	if (dump_enabled_p ())
878	dump_printf_loc (MSG_NOTE, vect_location,
879	"vect_model_simple_cost: inside_cost = %d, "
880	"prologue_cost = %d .\n", inside_cost, prologue_cost);
881	}
882
883
884	/ Model cost for type demotion and promotion operations. PWR is*
885	normally zero for single-step promotions and demotions. It will be
886	one if two-step promotion/demotion is required, and so on. NCOPIES
887	is the number of vector results (and thus number of instructions)
888	for the narrowest end of the operation chain. Each additional
889	step doubles the number of instructions required. If WIDEN_ARITH
890	is true the stmt is doing widening arithmetic. /*
891
892	static void
893	vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894	enum vect_def_type *dt,
895	unsigned int ncopies, int pwr,
896	stmt_vector_for_cost *cost_vec,
897	bool widen_arith)
898	{
899	int i;
900	int inside_cost = `0`, prologue_cost = `0`;
901
902	for (i = `0`; i < pwr + `1`; i++)
903	{
904	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies,
905	kind: widen_arith
906	? vector_stmt : vec_promote_demote,
907	stmt_info, misalign: `0`, where: vect_body);
908	ncopies *= `2`;
909	}
910
911	/ FORNOW: Assuming maximum 2 args per stmts. /
912	for (i = `0`; i < `2`; i++)
913	if (dt[i] == vect_constant_def \|\| dt[i] == vect_external_def)
914	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vector_stmt,
915	stmt_info, misalign: `0`, where: vect_prologue);
916
917	if (dump_enabled_p ())
918	dump_printf_loc (MSG_NOTE, vect_location,
919	"vect_model_promotion_demotion_cost: inside_cost = %d, "
920	"prologue_cost = %d .\n", inside_cost, prologue_cost);
921	}
922
923	/ Returns true if the current function returns DECL. /
924
925	static bool
926	cfun_returns (tree decl)
927	{
928	edge_iterator ei;
929	edge e;
930	FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
931	{
932	greturn ret = safe_dyn_cast <greturn > (p: *gsi_last_bb (bb: e->src));
933	if (!ret)
934	continue;
935	if (gimple_return_retval (gs: ret) == decl)
936	return true;
937	/ We often end up with an aggregate copy to the result decl,*
938	handle that case as well. First skip intermediate clobbers
939	though. /*
940	gimple *def = ret;
941	do
942	{
943	def = SSA_NAME_DEF_STMT (gimple_vuse (def));
944	}
945	while (gimple_clobber_p (s: def));
946	if (is_a <gassign *> (p: def)
947	&& gimple_assign_lhs (gs: def) == gimple_return_retval (gs: ret)
948	&& gimple_assign_rhs1 (gs: def) == decl)
949	return true;
950	}
951	return false;
952	}
953
954	/ Calculate cost of DR's memory access. /
955	void
956	vect_get_store_cost (vec_info , stmt_vec_info stmt_info, int* ncopies,
957	dr_alignment_support alignment_support_scheme,
958	int misalignment,
959	unsigned int *inside_cost,
960	stmt_vector_for_cost *body_cost_vec)
961	{
962	switch (alignment_support_scheme)
963	{
964	case dr_aligned:
965	{
966	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
967	kind: vector_store, stmt_info, misalign: `0`,
968	where: vect_body);
969
970	if (dump_enabled_p ())
971	dump_printf_loc (MSG_NOTE, vect_location,
972	"vect_model_store_cost: aligned.\n");
973	break;
974	}
975
976	case dr_unaligned_supported:
977	{
978	/ Here, we assign an additional cost for the unaligned store. /
979	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
980	kind: unaligned_store, stmt_info,
981	misalign: misalignment, where: vect_body);
982	if (dump_enabled_p ())
983	dump_printf_loc (MSG_NOTE, vect_location,
984	"vect_model_store_cost: unaligned supported by "
985	"hardware.\n");
986	break;
987	}
988
989	case dr_unaligned_unsupported:
990	{
991	*inside_cost = VECT_MAX_COST;
992
993	if (dump_enabled_p ())
994	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995	"vect_model_store_cost: unsupported access.\n");
996	break;
997	}
998
999	default:
1000	gcc_unreachable ();
1001	}
1002	}
1003
1004	/ Calculate cost of DR's memory access. /
1005	void
1006	vect_get_load_cost (vec_info , stmt_vec_info stmt_info, int* ncopies,
1007	dr_alignment_support alignment_support_scheme,
1008	int misalignment,
1009	bool add_realign_cost, unsigned int *inside_cost,
1010	unsigned int *prologue_cost,
1011	stmt_vector_for_cost *prologue_cost_vec,
1012	stmt_vector_for_cost *body_cost_vec,
1013	bool record_prologue_costs)
1014	{
1015	switch (alignment_support_scheme)
1016	{
1017	case dr_aligned:
1018	{
1019	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1020	stmt_info, misalign: `0`, where: vect_body);
1021
1022	if (dump_enabled_p ())
1023	dump_printf_loc (MSG_NOTE, vect_location,
1024	"vect_model_load_cost: aligned.\n");
1025
1026	break;
1027	}
1028	case dr_unaligned_supported:
1029	{
1030	/ Here, we assign an additional cost for the unaligned load. /
1031	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1032	kind: unaligned_load, stmt_info,
1033	misalign: misalignment, where: vect_body);
1034
1035	if (dump_enabled_p ())
1036	dump_printf_loc (MSG_NOTE, vect_location,
1037	"vect_model_load_cost: unaligned supported by "
1038	"hardware.\n");
1039
1040	break;
1041	}
1042	case dr_explicit_realign:
1043	{
1044	inside_cost += record_stmt_cost (body_cost_vec, count: ncopies `2`,
1045	kind: vector_load, stmt_info, misalign: `0`, where: vect_body);
1046	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1047	kind: vec_perm, stmt_info, misalign: `0`, where: vect_body);
1048
1049	/ FIXME: If the misalignment remains fixed across the iterations of*
1050	the containing loop, the following cost should be added to the
1051	prologue costs. /*
1052	if (targetm.vectorize.builtin_mask_for_load)
1053	*inside_cost += record_stmt_cost (body_cost_vec, count: `1`, kind: vector_stmt,
1054	stmt_info, misalign: `0`, where: vect_body);
1055
1056	if (dump_enabled_p ())
1057	dump_printf_loc (MSG_NOTE, vect_location,
1058	"vect_model_load_cost: explicit realign\n");
1059
1060	break;
1061	}
1062	case dr_explicit_realign_optimized:
1063	{
1064	if (dump_enabled_p ())
1065	dump_printf_loc (MSG_NOTE, vect_location,
1066	"vect_model_load_cost: unaligned software "
1067	"pipelined.\n");
1068
1069	/ Unaligned software pipeline has a load of an address, an initial*
1070	load, and possibly a mask operation to "prime" the loop. However,
1071	if this is an access in a group of loads, which provide grouped
1072	access, then the above cost should only be considered for one
1073	access in the group. Inside the loop, there is a load op
1074	and a realignment op. /*
1075
1076	if (add_realign_cost && record_prologue_costs)
1077	{
1078	*prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: `2`,
1079	kind: vector_stmt, stmt_info,
1080	misalign: `0`, where: vect_prologue);
1081	if (targetm.vectorize.builtin_mask_for_load)
1082	*prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: `1`,
1083	kind: vector_stmt, stmt_info,
1084	misalign: `0`, where: vect_prologue);
1085	}
1086
1087	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1088	stmt_info, misalign: `0`, where: vect_body);
1089	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vec_perm,
1090	stmt_info, misalign: `0`, where: vect_body);
1091
1092	if (dump_enabled_p ())
1093	dump_printf_loc (MSG_NOTE, vect_location,
1094	"vect_model_load_cost: explicit realign optimized"
1095	"\n");
1096
1097	break;
1098	}
1099
1100	case dr_unaligned_unsupported:
1101	{
1102	*inside_cost = VECT_MAX_COST;
1103
1104	if (dump_enabled_p ())
1105	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106	"vect_model_load_cost: unsupported access.\n");
1107	break;
1108	}
1109
1110	default:
1111	gcc_unreachable ();
1112	}
1113	}
1114
1115	/ Insert the new stmt NEW_STMT at GSI or at the appropriate place in
1116	the loop preheader for the vectorized stmt STMT_VINFO. /*
1117
1118	static void
1119	vect_init_vector_1 (vec_info vinfo, stmt_vec_info stmt_vinfo, gimple new_stmt,
1120	gimple_stmt_iterator *gsi)
1121	{
1122	if (gsi)
1123	vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124	else
1125	vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1126
1127	if (dump_enabled_p ())
1128	dump_printf_loc (MSG_NOTE, vect_location,
1129	"created new init_stmt: %G", new_stmt);
1130	}
1131
1132	/ Function vect_init_vector.*
1133
1134	Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135	TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136	vector type a vector with all elements equal to VAL is created first.
1137	Place the initialization at GSI if it is not NULL. Otherwise, place the
1138	initialization at the loop preheader.
1139	Return the DEF of INIT_STMT.
1140	It will be used in the vectorization of STMT_INFO. /*
1141
1142	tree
1143	vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144	gimple_stmt_iterator *gsi)
1145	{
1146	gimple *init_stmt;
1147	tree new_temp;
1148
1149	/ We abuse this function to push sth to a SSA name with initial 'val'. /
1150	if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1151	{
1152	gcc_assert (VECTOR_TYPE_P (type));
1153	if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1154	{
1155	/ Scalar boolean value should be transformed into*
1156	all zeros or all ones value before building a vector. /*
1157	if (VECTOR_BOOLEAN_TYPE_P (type))
1158	{
1159	tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160	tree false_val = build_zero_cst (TREE_TYPE (type));
1161
1162	if (CONSTANT_CLASS_P (val))
1163	val = integer_zerop (val) ? false_val : true_val;
1164	else
1165	{
1166	new_temp = make_ssa_name (TREE_TYPE (type));
1167	init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168	val, true_val, false_val);
1169	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1170	val = new_temp;
1171	}
1172	}
1173	else
1174	{
1175	gimple_seq stmts = NULL;
1176	if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177	val = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR,
1178	TREE_TYPE (type), ops: val);
1179	else
1180	/ ??? Condition vectorization expects us to do*
1181	promotion of invariant/external defs. /*
1182	val = gimple_convert (seq: &stmts, TREE_TYPE (type), op: val);
1183	for (gimple_stmt_iterator gsi2 = gsi_start (seq&: stmts);
1184	!gsi_end_p (i: gsi2); )
1185	{
1186	init_stmt = gsi_stmt (i: gsi2);
1187	gsi_remove (&gsi2, false);
1188	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1189	}
1190	}
1191	}
1192	val = build_vector_from_val (type, val);
1193	}
1194
1195	new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196	init_stmt = gimple_build_assign (new_temp, val);
1197	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1198	return new_temp;
1199	}
1200
1201
1202	/ Function vect_get_vec_defs_for_operand.*
1203
1204	OP is an operand in STMT_VINFO. This function returns a vector of
1205	NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1206
1207	In the case that OP is an SSA_NAME which is defined in the loop, then
1208	STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1209
1210	In case OP is an invariant or constant, a new stmt that creates a vector def
1211	needs to be introduced. VECTYPE may be used to specify a required type for
1212	vector invariant. /*
1213
1214	void
1215	vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216	unsigned ncopies,
1217	tree op, vec<tree> *vec_oprnds, tree vectype)
1218	{
1219	gimple *def_stmt;
1220	enum vect_def_type dt;
1221	bool is_simple_use;
1222	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1223
1224	if (dump_enabled_p ())
1225	dump_printf_loc (MSG_NOTE, vect_location,
1226	"vect_get_vec_defs_for_operand: %T\n", op);
1227
1228	stmt_vec_info def_stmt_info;
1229	is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230	&def_stmt_info, &def_stmt);
1231	gcc_assert (is_simple_use);
1232	if (def_stmt && dump_enabled_p ())
1233	dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1234
1235	vec_oprnds->create (nelems: ncopies);
1236	if (dt == vect_constant_def \|\| dt == vect_external_def)
1237	{
1238	tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239	tree vector_type;
1240
1241	if (vectype)
1242	vector_type = vectype;
1243	else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244	&& VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245	vector_type = truth_type_for (stmt_vectype);
1246	else
1247	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1248
1249	gcc_assert (vector_type);
1250	tree vop = vect_init_vector (vinfo, stmt_info: stmt_vinfo, val: op, type: vector_type, NULL);
1251	while (ncopies--)
1252	vec_oprnds->quick_push (obj: vop);
1253	}
1254	else
1255	{
1256	def_stmt_info = vect_stmt_to_vectorize (stmt_info: def_stmt_info);
1257	gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258	for (unsigned i = `0`; i < ncopies; ++i)
1259	vec_oprnds->quick_push (obj: gimple_get_lhs
1260	(STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1261	}
1262	}
1263
1264
1265	/ Get vectorized definitions for OP0 and OP1. /
1266
1267	void
1268	vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269	unsigned ncopies,
1270	tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271	tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272	tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273	tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1274	{
1275	if (slp_node)
1276	{
1277	if (op0)
1278	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`0`], vec_oprnds0);
1279	if (op1)
1280	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`1`], vec_oprnds1);
1281	if (op2)
1282	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`2`], vec_oprnds2);
1283	if (op3)
1284	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`3`], vec_oprnds3);
1285	}
1286	else
1287	{
1288	if (op0)
1289	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1290	op: op0, vec_oprnds: vec_oprnds0, vectype: vectype0);
1291	if (op1)
1292	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1293	op: op1, vec_oprnds: vec_oprnds1, vectype: vectype1);
1294	if (op2)
1295	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1296	op: op2, vec_oprnds: vec_oprnds2, vectype: vectype2);
1297	if (op3)
1298	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1299	op: op3, vec_oprnds: vec_oprnds3, vectype: vectype3);
1300	}
1301	}
1302
1303	void
1304	vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305	unsigned ncopies,
1306	tree op0, vec<tree> *vec_oprnds0,
1307	tree op1, vec<tree> *vec_oprnds1,
1308	tree op2, vec<tree> *vec_oprnds2,
1309	tree op3, vec<tree> *vec_oprnds3)
1310	{
1311	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312	op0, vec_oprnds0, NULL_TREE,
1313	op1, vec_oprnds1, NULL_TREE,
1314	op2, vec_oprnds2, NULL_TREE,
1315	op3, vec_oprnds3, NULL_TREE);
1316	}
1317
1318	/ Helper function called by vect_finish_replace_stmt and*
1319	vect_finish_stmt_generation. Set the location of the new
1320	statement and create and return a stmt_vec_info for it. /*
1321
1322	static void
1323	vect_finish_stmt_generation_1 (vec_info *,
1324	stmt_vec_info stmt_info, gimple *vec_stmt)
1325	{
1326	if (dump_enabled_p ())
1327	dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1328
1329	if (stmt_info)
1330	{
1331	gimple_set_location (g: vec_stmt, location: gimple_location (g: stmt_info->stmt));
1332
1333	/ While EH edges will generally prevent vectorization, stmt might*
1334	e.g. be in a must-not-throw region. Ensure newly created stmts
1335	that could throw are part of the same region. /*
1336	int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337	if (lp_nr != `0` && stmt_could_throw_p (cfun, vec_stmt))
1338	add_stmt_to_eh_lp (vec_stmt, lp_nr);
1339	}
1340	else
1341	gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1342	}
1343
1344	/ Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,*
1345	which sets the same scalar result as STMT_INFO did. Create and return a
1346	stmt_vec_info for VEC_STMT. /*
1347
1348	void
1349	vect_finish_replace_stmt (vec_info *vinfo,
1350	stmt_vec_info stmt_info, gimple *vec_stmt)
1351	{
1352	gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353	gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1354
1355	gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356	gsi_replace (&gsi, vec_stmt, true);
1357
1358	vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1359	}
1360
1361	/ Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it*
1362	before GSI. Create and return a stmt_vec_info for VEC_STMT. /
1363
1364	void
1365	vect_finish_stmt_generation (vec_info *vinfo,
1366	stmt_vec_info stmt_info, gimple *vec_stmt,
1367	gimple_stmt_iterator *gsi)
1368	{
1369	gcc_assert (!stmt_info \|\| gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1370
1371	if (!gsi_end_p (i: *gsi)
1372	&& gimple_has_mem_ops (g: vec_stmt))
1373	{
1374	gimple at_stmt = gsi_stmt (i: gsi);
1375	tree vuse = gimple_vuse (g: at_stmt);
1376	if (vuse && TREE_CODE (vuse) == SSA_NAME)
1377	{
1378	tree vdef = gimple_vdef (g: at_stmt);
1379	gimple_set_vuse (g: vec_stmt, vuse: gimple_vuse (g: at_stmt));
1380	gimple_set_modified (s: vec_stmt, modifiedp: true);
1381	/ If we have an SSA vuse and insert a store, update virtual*
1382	SSA form to avoid triggering the renamer. Do so only
1383	if we can easily see all uses - which is what almost always
1384	happens with the way vectorized stmts are inserted. /*
1385	if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386	&& ((is_gimple_assign (gs: vec_stmt)
1387	&& !is_gimple_reg (gimple_assign_lhs (gs: vec_stmt)))
1388	\|\| (is_gimple_call (gs: vec_stmt)
1389	&& (!(gimple_call_flags (vec_stmt)
1390	& (ECF_CONST\|ECF_PURE\|ECF_NOVOPS))
1391	\|\| (gimple_call_lhs (gs: vec_stmt)
1392	&& !is_gimple_reg (gimple_call_lhs (gs: vec_stmt)))))))
1393	{
1394	tree new_vdef = copy_ssa_name (var: vuse, stmt: vec_stmt);
1395	gimple_set_vdef (g: vec_stmt, vdef: new_vdef);
1396	SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1397	}
1398	}
1399	}
1400	gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401	vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1402	}
1403
1404	/ We want to vectorize a call to combined function CFN with function*
1405	decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406	as the types of all inputs. Check whether this is possible using
1407	an internal function, returning its code if so or IFN_LAST if not. /*
1408
1409	static internal_fn
1410	vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411	tree vectype_out, tree vectype_in)
1412	{
1413	internal_fn ifn;
1414	if (internal_fn_p (code: cfn))
1415	ifn = as_internal_fn (code: cfn);
1416	else
1417	ifn = associated_internal_fn (fndecl);
1418	if (ifn != IFN_LAST && direct_internal_fn_p (fn: ifn))
1419	{
1420	const direct_internal_fn_info &info = direct_internal_fn (fn: ifn);
1421	if (info.vectorizable)
1422	{
1423	bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1424	tree type0 = (info.type0 < `0` ? vectype_out : vectype_in);
1425	tree type1 = (info.type1 < `0` ? vectype_out : vectype_in);
1426
1427	/ The type size of both the vectype_in and vectype_out should be*
1428	exactly the same when vectype_out isn't participating the optab.
1429	While there is no restriction for type size when vectype_out
1430	is part of the optab query. /*
1431	if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1432	return IFN_LAST;
1433
1434	if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1435	OPTIMIZE_FOR_SPEED))
1436	return ifn;
1437	}
1438	}
1439	return IFN_LAST;
1440	}
1441
1442
1443	static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1444	gimple_stmt_iterator *);
1445
1446	/ Check whether a load or store statement in the loop described by*
1447	LOOP_VINFO is possible in a loop using partial vectors. This is
1448	testing whether the vectorizer pass has the appropriate support,
1449	as well as whether the target does.
1450
1451	VLS_TYPE says whether the statement is a load or store and VECTYPE
1452	is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453	node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454	says how the load or store is going to be implemented and GROUP_SIZE
1455	is the number of load or store statements in the containing group.
1456	If the access is a gather load or scatter store, GS_INFO describes
1457	its arguments. If the load or store is conditional, SCALAR_MASK is the
1458	condition under which it occurs.
1459
1460	Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461	vectors is not supported, otherwise record the required rgroup control
1462	types. /*
1463
1464	static void
1465	check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1466	slp_tree slp_node,
1467	vec_load_store_type vls_type,
1468	int group_size,
1469	vect_memory_access_type
1470	memory_access_type,
1471	gather_scatter_info *gs_info,
1472	tree scalar_mask)
1473	{
1474	/ Invariant loads need no special support. /
1475	if (memory_access_type == VMAT_INVARIANT)
1476	return;
1477
1478	unsigned int nvectors;
1479	if (slp_node)
1480	nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1481	else
1482	nvectors = vect_get_num_copies (loop_vinfo, vectype);
1483
1484	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1485	vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1486	machine_mode vecmode = TYPE_MODE (vectype);
1487	bool is_load = (vls_type == VLS_LOAD);
1488	if (memory_access_type == VMAT_LOAD_STORE_LANES)
1489	{
1490	internal_fn ifn
1491	= (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1492	: vect_store_lanes_supported (vectype, group_size, true));
1493	if (ifn == IFN_MASK_LEN_LOAD_LANES \|\| ifn == IFN_MASK_LEN_STORE_LANES)
1494	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, `1`);
1495	else if (ifn == IFN_MASK_LOAD_LANES \|\| ifn == IFN_MASK_STORE_LANES)
1496	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1497	scalar_mask);
1498	else
1499	{
1500	if (dump_enabled_p ())
1501	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1502	"can't operate on partial vectors because"
1503	" the target doesn't have an appropriate"
1504	" load/store-lanes instruction.\n");
1505	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1506	}
1507	return;
1508	}
1509
1510	if (memory_access_type == VMAT_GATHER_SCATTER)
1511	{
1512	internal_fn ifn = (is_load
1513	? IFN_MASK_GATHER_LOAD
1514	: IFN_MASK_SCATTER_STORE);
1515	internal_fn len_ifn = (is_load
1516	? IFN_MASK_LEN_GATHER_LOAD
1517	: IFN_MASK_LEN_SCATTER_STORE);
1518	if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1519	gs_info->memory_type,
1520	gs_info->offset_vectype,
1521	gs_info->scale))
1522	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, `1`);
1523	else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1524	gs_info->memory_type,
1525	gs_info->offset_vectype,
1526	gs_info->scale))
1527	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528	scalar_mask);
1529	else
1530	{
1531	if (dump_enabled_p ())
1532	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533	"can't operate on partial vectors because"
1534	" the target doesn't have an appropriate"
1535	" gather load or scatter store instruction.\n");
1536	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1537	}
1538	return;
1539	}
1540
1541	if (memory_access_type != VMAT_CONTIGUOUS
1542	&& memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1543	{
1544	/ Element X of the data must come from iteration i * VF + X of the*
1545	scalar loop. We need more work to support other mappings. /*
1546	if (dump_enabled_p ())
1547	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1548	"can't operate on partial vectors because an"
1549	" access isn't contiguous.\n");
1550	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1551	return;
1552	}
1553
1554	if (!VECTOR_MODE_P (vecmode))
1555	{
1556	if (dump_enabled_p ())
1557	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1558	"can't operate on partial vectors when emulating"
1559	" vector operations.\n");
1560	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1561	return;
1562	}
1563
1564	/ We might load more scalars than we need for permuting SLP loads.*
1565	We checked in get_group_load_store_type that the extra elements
1566	don't leak into a new vector. /*
1567	auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1568	{
1569	unsigned int nvectors;
1570	if (can_div_away_from_zero_p (a: size, b: nunits, quotient: &nvectors))
1571	return nvectors;
1572	gcc_unreachable ();
1573	};
1574
1575	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1576	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1577	machine_mode mask_mode;
1578	machine_mode vmode;
1579	bool using_partial_vectors_p = false;
1580	if (get_len_load_store_mode (vecmode, is_load).exists (mode: &vmode))
1581	{
1582	nvectors = group_memory_nvectors (group_size * vf, nunits);
1583	unsigned factor = (vecmode == vmode) ? `1` : GET_MODE_UNIT_SIZE (vecmode);
1584	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1585	using_partial_vectors_p = true;
1586	}
1587	else if (targetm.vectorize.get_mask_mode (vecmode).exists (mode: &mask_mode)
1588	&& can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1589	{
1590	nvectors = group_memory_nvectors (group_size * vf, nunits);
1591	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1592	using_partial_vectors_p = true;
1593	}
1594
1595	if (!using_partial_vectors_p)
1596	{
1597	if (dump_enabled_p ())
1598	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599	"can't operate on partial vectors because the"
1600	" target doesn't have the appropriate partial"
1601	" vectorization load or store.\n");
1602	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1603	}
1604	}
1605
1606	/ Return the mask input to a masked load or store. VEC_MASK is the vectorized*
1607	form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608	that needs to be applied to all loads and stores in a vectorized loop.
1609	Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610	otherwise return VEC_MASK & LOOP_MASK.
1611
1612	MASK_TYPE is the type of both masks. If new statements are needed,
1613	insert them before GSI. /*
1614
1615	static tree
1616	prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1617	tree vec_mask, gimple_stmt_iterator *gsi)
1618	{
1619	gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1620	if (!loop_mask)
1621	return vec_mask;
1622
1623	gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1624
1625	if (loop_vinfo->vec_cond_masked_set.contains (k: { vec_mask, loop_mask }))
1626	return vec_mask;
1627
1628	tree and_res = make_temp_ssa_name (type: mask_type, NULL, name: "vec_mask_and");
1629	gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1630	vec_mask, loop_mask);
1631
1632	gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1633	return and_res;
1634	}
1635
1636	/ Determine whether we can use a gather load or scatter store to vectorize*
1637	strided load or store STMT_INFO by truncating the current offset to a
1638	smaller width. We need to be able to construct an offset vector:
1639
1640	{ 0, X, X2, X3, ... }
1641
1642	without loss of precision, where X is STMT_INFO's DR_STEP.
1643
1644	Return true if this is possible, describing the gather load or scatter
1645	store in GS_INFO. MASKED_P is true if the load or store is conditional. /*
1646
1647	static bool
1648	vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1649	loop_vec_info loop_vinfo, bool masked_p,
1650	gather_scatter_info *gs_info)
1651	{
1652	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1653	data_reference *dr = dr_info->dr;
1654	tree step = DR_STEP (dr);
1655	if (TREE_CODE (step) != INTEGER_CST)
1656	{
1657	/ ??? Perhaps we could use range information here? /
1658	if (dump_enabled_p ())
1659	dump_printf_loc (MSG_NOTE, vect_location,
1660	"cannot truncate variable step.\n");
1661	return false;
1662	}
1663
1664	/ Get the number of bits in an element. /
1665	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1666	scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1667	unsigned int element_bits = GET_MODE_BITSIZE (mode: element_mode);
1668
1669	/ Set COUNT to the upper limit on the number of elements - 1.*
1670	Start with the maximum vectorization factor. /*
1671	unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - `1`;
1672
1673	/ Try lowering COUNT to the number of scalar latch iterations. /
1674	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675	widest_int max_iters;
1676	if (max_loop_iterations (loop, &max_iters)
1677	&& max_iters < count)
1678	count = max_iters.to_shwi ();
1679
1680	/ Try scales of 1 and the element size. /
1681	int scales[] = { `1`, vect_get_scalar_dr_size (dr_info) };
1682	wi::overflow_type overflow = wi::OVF_NONE;
1683	for (int i = `0`; i < `2`; ++i)
1684	{
1685	int scale = scales[i];
1686	widest_int factor;
1687	if (!wi::multiple_of_p (x: wi::to_widest (t: step), y: scale, sgn: SIGNED, res: &factor))
1688	continue;
1689
1690	/ Determine the minimum precision of (COUNT - 1) * STEP / SCALE. /
1691	widest_int range = wi::mul (x: count, y: factor, sgn: SIGNED, overflow: &overflow);
1692	if (overflow)
1693	continue;
1694	signop sign = range >= `0` ? UNSIGNED : SIGNED;
1695	unsigned int min_offset_bits = wi::min_precision (x: range, sgn: sign);
1696
1697	/ Find the narrowest viable offset type. /
1698	unsigned int offset_bits = `1U` << ceil_log2 (x: min_offset_bits);
1699	tree offset_type = build_nonstandard_integer_type (offset_bits,
1700	sign == UNSIGNED);
1701
1702	/ See whether the target supports the operation with an offset*
1703	no narrower than OFFSET_TYPE. /*
1704	tree memory_type = TREE_TYPE (DR_REF (dr));
1705	if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1706	vectype, memory_type, offset_type, scale,
1707	&gs_info->ifn, &gs_info->offset_vectype)
1708	\|\| gs_info->ifn == IFN_LAST)
1709	continue;
1710
1711	gs_info->decl = NULL_TREE;
1712	/ Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,*
1713	but we don't need to store that here. /*
1714	gs_info->base = NULL_TREE;
1715	gs_info->element_type = TREE_TYPE (vectype);
1716	gs_info->offset = fold_convert (offset_type, step);
1717	gs_info->offset_dt = vect_constant_def;
1718	gs_info->scale = scale;
1719	gs_info->memory_type = memory_type;
1720	return true;
1721	}
1722
1723	if (overflow && dump_enabled_p ())
1724	dump_printf_loc (MSG_NOTE, vect_location,
1725	"truncating gather/scatter offset to %d bits"
1726	" might change its value.\n", element_bits);
1727
1728	return false;
1729	}
1730
1731	/ Return true if we can use gather/scatter internal functions to*
1732	vectorize STMT_INFO, which is a grouped or strided load or store.
1733	MASKED_P is true if load or store is conditional. When returning
1734	true, fill in GS_INFO with the information required to perform the
1735	operation. /*
1736
1737	static bool
1738	vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1739	loop_vec_info loop_vinfo, bool masked_p,
1740	gather_scatter_info *gs_info)
1741	{
1742	if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1743	\|\| gs_info->ifn == IFN_LAST)
1744	return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1745	masked_p, gs_info);
1746
1747	tree old_offset_type = TREE_TYPE (gs_info->offset);
1748	tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1749
1750	gcc_assert (TYPE_PRECISION (new_offset_type)
1751	>= TYPE_PRECISION (old_offset_type));
1752	gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1753
1754	if (dump_enabled_p ())
1755	dump_printf_loc (MSG_NOTE, vect_location,
1756	"using gather/scatter for strided/grouped access,"
1757	" scale = %d\n", gs_info->scale);
1758
1759	return true;
1760	}
1761
1762	/ STMT_INFO is a non-strided load or store, meaning that it accesses*
1763	elements with a known constant step. Return -1 if that step
1764	is negative, 0 if it is zero, and 1 if it is greater than zero. /*
1765
1766	static int
1767	compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1768	{
1769	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1770	return tree_int_cst_compare (t1: vect_dr_behavior (vinfo, dr_info)->step,
1771	size_zero_node);
1772	}
1773
1774	/ If the target supports a permute mask that reverses the elements in*
1775	a vector of type VECTYPE, return that mask, otherwise return null. /*
1776
1777	static tree
1778	perm_mask_for_reverse (tree vectype)
1779	{
1780	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1781
1782	/ The encoding has a single stepped pattern. /
1783	vec_perm_builder sel (nunits, `1`, `3`);
1784	for (int i = `0`; i < `3`; ++i)
1785	sel.quick_push (obj: nunits - `1` - i);
1786
1787	vec_perm_indices indices (sel, `1`, nunits);
1788	if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1789	indices))
1790	return NULL_TREE;
1791	return vect_gen_perm_mask_checked (vectype, indices);
1792	}
1793
1794	/ A subroutine of get_load_store_type, with a subset of the same*
1795	arguments. Handle the case where STMT_INFO is a load or store that
1796	accesses consecutive elements with a negative step. Sets POFFSET*
1797	to the offset to be applied to the DR for the first access. /*
1798
1799	static vect_memory_access_type
1800	get_negative_load_store_type (vec_info *vinfo,
1801	stmt_vec_info stmt_info, tree vectype,
1802	vec_load_store_type vls_type,
1803	unsigned int ncopies, poly_int64 *poffset)
1804	{
1805	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1806	dr_alignment_support alignment_support_scheme;
1807
1808	if (ncopies > `1`)
1809	{
1810	if (dump_enabled_p ())
1811	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812	"multiple types with negative step.\n");
1813	return VMAT_ELEMENTWISE;
1814	}
1815
1816	/ For backward running DRs the first access in vectype actually is*
1817	N-1 elements before the address of the DR. /*
1818	*poffset = ((-TYPE_VECTOR_SUBPARTS (node: vectype) + `1`)
1819	* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1820
1821	int misalignment = dr_misalignment (dr_info, vectype, offset: *poffset);
1822	alignment_support_scheme
1823	= vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1824	if (alignment_support_scheme != dr_aligned
1825	&& alignment_support_scheme != dr_unaligned_supported)
1826	{
1827	if (dump_enabled_p ())
1828	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1829	"negative step but alignment required.\n");
1830	*poffset = `0`;
1831	return VMAT_ELEMENTWISE;
1832	}
1833
1834	if (vls_type == VLS_STORE_INVARIANT)
1835	{
1836	if (dump_enabled_p ())
1837	dump_printf_loc (MSG_NOTE, vect_location,
1838	"negative step with invariant source;"
1839	" no permute needed.\n");
1840	return VMAT_CONTIGUOUS_DOWN;
1841	}
1842
1843	if (!perm_mask_for_reverse (vectype))
1844	{
1845	if (dump_enabled_p ())
1846	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847	"negative step and reversing not supported.\n");
1848	*poffset = `0`;
1849	return VMAT_ELEMENTWISE;
1850	}
1851
1852	return VMAT_CONTIGUOUS_REVERSE;
1853	}
1854
1855	/ STMT_INFO is either a masked or unconditional store. Return the value*
1856	being stored. /*
1857
1858	tree
1859	vect_get_store_rhs (stmt_vec_info stmt_info)
1860	{
1861	if (gassign assign = dyn_cast <gassign > (p: stmt_info->stmt))
1862	{
1863	gcc_assert (gimple_assign_single_p (assign));
1864	return gimple_assign_rhs1 (gs: assign);
1865	}
1866	if (gcall call = dyn_cast <gcall > (p: stmt_info->stmt))
1867	{
1868	internal_fn ifn = gimple_call_internal_fn (gs: call);
1869	int index = internal_fn_stored_value_index (ifn);
1870	gcc_assert (index >= `0`);
1871	return gimple_call_arg (gs: call, index);
1872	}
1873	gcc_unreachable ();
1874	}
1875
1876	/ Function VECTOR_VECTOR_COMPOSITION_TYPE*
1877
1878	This function returns a vector type which can be composed with NETLS pieces,
1879	whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880	same vector size as the return vector. It checks target whether supports
1881	pieces-size vector mode for construction firstly, if target fails to, check
1882	pieces-size scalar mode for construction further. It returns NULL_TREE if
1883	fails to find the available composition.
1884
1885	For example, for (vtype=V16QI, nelts=4), we can probably get:
1886	- V16QI with PTYPE V4QI.
1887	- V4SI with PTYPE SI.
1888	- NULL_TREE. /*
1889
1890	static tree
1891	vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1892	{
1893	gcc_assert (VECTOR_TYPE_P (vtype));
1894	gcc_assert (known_gt (nelts, `0U`));
1895
1896	machine_mode vmode = TYPE_MODE (vtype);
1897	if (!VECTOR_MODE_P (vmode))
1898	return NULL_TREE;
1899
1900	/ When we are asked to compose the vector from its components let*
1901	that happen directly. /*
1902	if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1903	{
1904	*ptype = TREE_TYPE (vtype);
1905	return vtype;
1906	}
1907
1908	poly_uint64 vbsize = GET_MODE_BITSIZE (mode: vmode);
1909	unsigned int pbsize;
1910	if (constant_multiple_p (a: vbsize, b: nelts, multiple: &pbsize))
1911	{
1912	/ First check if vec_init optab supports construction from*
1913	vector pieces directly. /*
1914	scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1915	poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (mode: elmode);
1916	machine_mode rmode;
1917	if (related_vector_mode (vmode, elmode, inelts).exists (mode: &rmode)
1918	&& (convert_optab_handler (op: vec_init_optab, to_mode: vmode, from_mode: rmode)
1919	!= CODE_FOR_nothing))
1920	{
1921	*ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1922	return vtype;
1923	}
1924
1925	/ Otherwise check if exists an integer type of the same piece size and*
1926	if vec_init optab supports construction from it directly. /*
1927	if (int_mode_for_size (size: pbsize, limit: `0`).exists (mode: &elmode)
1928	&& related_vector_mode (vmode, elmode, nelts).exists (mode: &rmode)
1929	&& (convert_optab_handler (op: vec_init_optab, to_mode: rmode, from_mode: elmode)
1930	!= CODE_FOR_nothing))
1931	{
1932	*ptype = build_nonstandard_integer_type (pbsize, `1`);
1933	return build_vector_type (*ptype, nelts);
1934	}
1935	}
1936
1937	return NULL_TREE;
1938	}
1939
1940	/ A subroutine of get_load_store_type, with a subset of the same*
1941	arguments. Handle the case where STMT_INFO is part of a grouped load
1942	or store.
1943
1944	For stores, the statements in the group are all consecutive
1945	and there is no gap at the end. For loads, the statements in the
1946	group might not be consecutive; there can be gaps between statements
1947	as well as at the end. /*
1948
1949	static bool
1950	get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1951	tree vectype, slp_tree slp_node,
1952	bool masked_p, vec_load_store_type vls_type,
1953	vect_memory_access_type *memory_access_type,
1954	poly_int64 *poffset,
1955	dr_alignment_support *alignment_support_scheme,
1956	int *misalignment,
1957	gather_scatter_info *gs_info,
1958	internal_fn *lanes_ifn)
1959	{
1960	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1961	class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1962	stmt_vec_info first_stmt_info;
1963	unsigned int group_size;
1964	unsigned HOST_WIDE_INT gap;
1965	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1966	{
1967	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1968	group_size = DR_GROUP_SIZE (first_stmt_info);
1969	gap = DR_GROUP_GAP (first_stmt_info);
1970	}
1971	else
1972	{
1973	first_stmt_info = stmt_info;
1974	group_size = `1`;
1975	gap = `0`;
1976	}
1977	dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1978	bool single_element_p = (stmt_info == first_stmt_info
1979	&& !DR_GROUP_NEXT_ELEMENT (stmt_info));
1980	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1981
1982	/ True if the vectorized statements would access beyond the last*
1983	statement in the group. /*
1984	bool overrun_p = false;
1985
1986	/ True if we can cope with such overrun by peeling for gaps, so that*
1987	there is at least one final scalar iteration after the vector loop. /*
1988	bool can_overrun_p = (!masked_p
1989	&& vls_type == VLS_LOAD
1990	&& loop_vinfo
1991	&& !loop->inner);
1992
1993	/ There can only be a gap at the end of the group if the stride is*
1994	known at compile time. /*
1995	gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) \|\| gap == `0`);
1996
1997	/ Stores can't yet have gaps. /
1998	gcc_assert (slp_node \|\| vls_type == VLS_LOAD \|\| gap == `0`);
1999
2000	if (slp_node)
2001	{
2002	/ For SLP vectorization we directly vectorize a subchain*
2003	without permutation. /*
2004	if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2005	first_dr_info
2006	= STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[`0`]);
2007	if (STMT_VINFO_STRIDED_P (first_stmt_info))
2008	{
2009	/ Try to use consecutive accesses of DR_GROUP_SIZE elements,*
2010	separated by the stride, until we have a complete vector.
2011	Fall back to scalar accesses if that isn't possible. /*
2012	if (multiple_p (a: nunits, b: group_size))
2013	*memory_access_type = VMAT_STRIDED_SLP;
2014	else
2015	*memory_access_type = VMAT_ELEMENTWISE;
2016	}
2017	else
2018	{
2019	overrun_p = loop_vinfo && gap != `0`;
2020	if (overrun_p && vls_type != VLS_LOAD)
2021	{
2022	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2023	"Grouped store with gaps requires"
2024	" non-consecutive accesses\n");
2025	return false;
2026	}
2027	/ An overrun is fine if the trailing elements are smaller*
2028	than the alignment boundary B. Every vector access will
2029	be a multiple of B and so we are guaranteed to access a
2030	non-gap element in the same B-sized block. /*
2031	if (overrun_p
2032	&& gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info,
2033	vectype)
2034	/ vect_get_scalar_dr_size (dr_info: first_dr_info)))
2035	overrun_p = false;
2036
2037	/ If the gap splits the vector in half and the target*
2038	can do half-vector operations avoid the epilogue peeling
2039	by simply loading half of the vector only. Usually
2040	the construction with an upper zero half will be elided. /*
2041	dr_alignment_support alss;
2042	int misalign = dr_misalignment (dr_info: first_dr_info, vectype);
2043	tree half_vtype;
2044	if (overrun_p
2045	&& !masked_p
2046	&& (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2047	vectype, misalign)))
2048	== dr_aligned
2049	\|\| alss == dr_unaligned_supported)
2050	&& known_eq (nunits, (group_size - gap) * `2`)
2051	&& known_eq (nunits, group_size)
2052	&& (vector_vector_composition_type (vtype: vectype, nelts: `2`, ptype: &half_vtype)
2053	!= NULL_TREE))
2054	overrun_p = false;
2055
2056	if (overrun_p && !can_overrun_p)
2057	{
2058	if (dump_enabled_p ())
2059	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060	"Peeling for outer loop is not supported\n");
2061	return false;
2062	}
2063	int cmp = compare_step_with_zero (vinfo, stmt_info);
2064	if (cmp < `0`)
2065	{
2066	if (single_element_p)
2067	/ ??? The VMAT_CONTIGUOUS_REVERSE code generation is*
2068	only correct for single element "interleaving" SLP. /*
2069	*memory_access_type = get_negative_load_store_type
2070	(vinfo, stmt_info, vectype, vls_type, ncopies: `1`, poffset);
2071	else
2072	{
2073	/ Try to use consecutive accesses of DR_GROUP_SIZE elements,*
2074	separated by the stride, until we have a complete vector.
2075	Fall back to scalar accesses if that isn't possible. /*
2076	if (multiple_p (a: nunits, b: group_size))
2077	*memory_access_type = VMAT_STRIDED_SLP;
2078	else
2079	*memory_access_type = VMAT_ELEMENTWISE;
2080	}
2081	}
2082	else if (cmp == `0` && loop_vinfo)
2083	{
2084	gcc_assert (vls_type == VLS_LOAD);
2085	*memory_access_type = VMAT_INVARIANT;
2086	/ Invariant accesses perform only component accesses, alignment*
2087	is irrelevant for them. /*
2088	*alignment_support_scheme = dr_unaligned_supported;
2089	}
2090	else
2091	*memory_access_type = VMAT_CONTIGUOUS;
2092
2093	/ When we have a contiguous access across loop iterations*
2094	but the access in the loop doesn't cover the full vector
2095	we can end up with no gap recorded but still excess
2096	elements accessed, see PR103116. Make sure we peel for
2097	gaps if necessary and sufficient and give up if not.
2098
2099	If there is a combination of the access not covering the full
2100	vector and a gap recorded then we may need to peel twice. /*
2101	if (loop_vinfo
2102	&& *memory_access_type == VMAT_CONTIGUOUS
2103	&& SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2104	&& !multiple_p (a: group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2105	b: nunits))
2106	{
2107	unsigned HOST_WIDE_INT cnunits, cvf;
2108	if (!can_overrun_p
2109	\|\| !nunits.is_constant (const_value: &cnunits)
2110	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &cvf)
2111	/ Peeling for gaps assumes that a single scalar iteration*
2112	is enough to make sure the last vector iteration doesn't
2113	access excess elements.
2114	??? Enhancements include peeling multiple iterations
2115	or using masked loads with a static mask. /*
2116	\|\| (group_size * cvf) % cnunits + group_size - gap < cnunits)
2117	{
2118	if (dump_enabled_p ())
2119	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120	"peeling for gaps insufficient for "
2121	"access\n");
2122	return false;
2123	}
2124	overrun_p = true;
2125	}
2126	}
2127	}
2128	else
2129	{
2130	/ We can always handle this case using elementwise accesses,*
2131	but see if something more efficient is available. /*
2132	*memory_access_type = VMAT_ELEMENTWISE;
2133
2134	/ If there is a gap at the end of the group then these optimizations*
2135	would access excess elements in the last iteration. /*
2136	bool would_overrun_p = (gap != `0`);
2137	/ An overrun is fine if the trailing elements are smaller than the*
2138	alignment boundary B. Every vector access will be a multiple of B
2139	and so we are guaranteed to access a non-gap element in the
2140	same B-sized block. /*
2141	if (would_overrun_p
2142	&& !masked_p
2143	&& gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype)
2144	/ vect_get_scalar_dr_size (dr_info: first_dr_info)))
2145	would_overrun_p = false;
2146
2147	if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2148	&& (can_overrun_p \|\| !would_overrun_p)
2149	&& compare_step_with_zero (vinfo, stmt_info) > `0`)
2150	{
2151	/ First cope with the degenerate case of a single-element*
2152	vector. /*
2153	if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), `1U`))
2154	;
2155
2156	else
2157	{
2158	/ Otherwise try using LOAD/STORE_LANES. /
2159	*lanes_ifn
2160	= vls_type == VLS_LOAD
2161	? vect_load_lanes_supported (vectype, group_size, masked_p)
2162	: vect_store_lanes_supported (vectype, group_size,
2163	masked_p);
2164	if (*lanes_ifn != IFN_LAST)
2165	{
2166	*memory_access_type = VMAT_LOAD_STORE_LANES;
2167	overrun_p = would_overrun_p;
2168	}
2169
2170	/ If that fails, try using permuting loads. /
2171	else if (vls_type == VLS_LOAD
2172	? vect_grouped_load_supported (vectype,
2173	single_element_p,
2174	group_size)
2175	: vect_grouped_store_supported (vectype, group_size))
2176	{
2177	*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2178	overrun_p = would_overrun_p;
2179	}
2180	}
2181	}
2182
2183	/ As a last resort, trying using a gather load or scatter store.*
2184
2185	??? Although the code can handle all group sizes correctly,
2186	it probably isn't a win to use separate strided accesses based
2187	on nearby locations. Or, even if it's a win over scalar code,
2188	it might not be a win over vectorizing at a lower VF, if that
2189	allows us to use contiguous accesses. /*
2190	if (*memory_access_type == VMAT_ELEMENTWISE
2191	&& single_element_p
2192	&& loop_vinfo
2193	&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2194	masked_p, gs_info))
2195	*memory_access_type = VMAT_GATHER_SCATTER;
2196	}
2197
2198	if (*memory_access_type == VMAT_GATHER_SCATTER
2199	\|\| *memory_access_type == VMAT_ELEMENTWISE)
2200	{
2201	*alignment_support_scheme = dr_unaligned_supported;
2202	*misalignment = DR_MISALIGNMENT_UNKNOWN;
2203	}
2204	else
2205	{
2206	misalignment = dr_misalignment (dr_info: first_dr_info, vectype, offset: poffset);
2207	*alignment_support_scheme
2208	= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2209	*misalignment);
2210	}
2211
2212	if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2213	{
2214	/ STMT is the leader of the group. Check the operands of all the*
2215	stmts of the group. /*
2216	stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2217	while (next_stmt_info)
2218	{
2219	tree op = vect_get_store_rhs (stmt_info: next_stmt_info);
2220	enum vect_def_type dt;
2221	if (!vect_is_simple_use (op, vinfo, &dt))
2222	{
2223	if (dump_enabled_p ())
2224	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2225	"use not simple.\n");
2226	return false;
2227	}
2228	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2229	}
2230	}
2231
2232	if (overrun_p)
2233	{
2234	gcc_assert (can_overrun_p);
2235	if (dump_enabled_p ())
2236	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237	"Data access with gaps requires scalar "
2238	"epilogue loop\n");
2239	LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2240	}
2241
2242	return true;
2243	}
2244
2245	/ Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true*
2246	if there is a memory access type that the vectorized form can use,
2247	storing it in MEMORY_ACCESS_TYPE if so. If we decide to use gathers*
2248	or scatters, fill in GS_INFO accordingly. In addition
2249	*ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250	the target does not support the alignment scheme. MISALIGNMENT*
2251	is set according to the alignment of the access (including
2252	DR_MISALIGNMENT_UNKNOWN when it is unknown).
2253
2254	SLP says whether we're performing SLP rather than loop vectorization.
2255	MASKED_P is true if the statement is conditional on a vectorized mask.
2256	VECTYPE is the vector type that the vectorized statements will use.
2257	NCOPIES is the number of vector statements that will be needed. /*
2258
2259	static bool
2260	get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2261	tree vectype, slp_tree slp_node,
2262	bool masked_p, vec_load_store_type vls_type,
2263	unsigned int ncopies,
2264	vect_memory_access_type *memory_access_type,
2265	poly_int64 *poffset,
2266	dr_alignment_support *alignment_support_scheme,
2267	int *misalignment,
2268	gather_scatter_info *gs_info,
2269	internal_fn *lanes_ifn)
2270	{
2271	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2272	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2273	*misalignment = DR_MISALIGNMENT_UNKNOWN;
2274	*poffset = `0`;
2275	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2276	{
2277	*memory_access_type = VMAT_GATHER_SCATTER;
2278	if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2279	gcc_unreachable ();
2280	/ When using internal functions, we rely on pattern recognition*
2281	to convert the type of the offset to the type that the target
2282	requires, with the result being a call to an internal function.
2283	If that failed for some reason (e.g. because another pattern
2284	took priority), just handle cases in which the offset already
2285	has the right type. /*
2286	else if (gs_info->ifn != IFN_LAST
2287	&& !is_gimple_call (gs: stmt_info->stmt)
2288	&& !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2289	TREE_TYPE (gs_info->offset_vectype)))
2290	{
2291	if (dump_enabled_p ())
2292	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293	"%s offset requires a conversion\n",
2294	vls_type == VLS_LOAD ? "gather" : "scatter");
2295	return false;
2296	}
2297	else if (!vect_is_simple_use (gs_info->offset, vinfo,
2298	&gs_info->offset_dt,
2299	&gs_info->offset_vectype))
2300	{
2301	if (dump_enabled_p ())
2302	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303	"%s index use not simple.\n",
2304	vls_type == VLS_LOAD ? "gather" : "scatter");
2305	return false;
2306	}
2307	else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2308	{
2309	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ()
2310	\|\| !TYPE_VECTOR_SUBPARTS (node: gs_info->offset_vectype).is_constant ()
2311	\|\| !constant_multiple_p (a: TYPE_VECTOR_SUBPARTS
2312	(node: gs_info->offset_vectype),
2313	b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2314	{
2315	if (dump_enabled_p ())
2316	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317	"unsupported vector types for emulated "
2318	"gather.\n");
2319	return false;
2320	}
2321	}
2322	/ Gather-scatter accesses perform only component accesses, alignment*
2323	is irrelevant for them. /*
2324	*alignment_support_scheme = dr_unaligned_supported;
2325	}
2326	else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) \|\| slp_node)
2327	{
2328	if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2329	masked_p,
2330	vls_type, memory_access_type, poffset,
2331	alignment_support_scheme,
2332	misalignment, gs_info, lanes_ifn))
2333	return false;
2334	}
2335	else if (STMT_VINFO_STRIDED_P (stmt_info))
2336	{
2337	gcc_assert (!slp_node);
2338	if (loop_vinfo
2339	&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2340	masked_p, gs_info))
2341	*memory_access_type = VMAT_GATHER_SCATTER;
2342	else
2343	*memory_access_type = VMAT_ELEMENTWISE;
2344	/ Alignment is irrelevant here. /
2345	*alignment_support_scheme = dr_unaligned_supported;
2346	}
2347	else
2348	{
2349	int cmp = compare_step_with_zero (vinfo, stmt_info);
2350	if (cmp == `0`)
2351	{
2352	gcc_assert (vls_type == VLS_LOAD);
2353	*memory_access_type = VMAT_INVARIANT;
2354	/ Invariant accesses perform only component accesses, alignment*
2355	is irrelevant for them. /*
2356	*alignment_support_scheme = dr_unaligned_supported;
2357	}
2358	else
2359	{
2360	if (cmp < `0`)
2361	*memory_access_type = get_negative_load_store_type
2362	(vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2363	else
2364	*memory_access_type = VMAT_CONTIGUOUS;
2365	*misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2366	vectype, offset: *poffset);
2367	*alignment_support_scheme
2368	= vect_supportable_dr_alignment (vinfo,
2369	STMT_VINFO_DR_INFO (stmt_info),
2370	vectype, *misalignment);
2371	}
2372	}
2373
2374	if ((*memory_access_type == VMAT_ELEMENTWISE
2375	\|\| *memory_access_type == VMAT_STRIDED_SLP)
2376	&& !nunits.is_constant ())
2377	{
2378	if (dump_enabled_p ())
2379	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380	"Not using elementwise accesses due to variable "
2381	"vectorization factor.\n");
2382	return false;
2383	}
2384
2385	if (*alignment_support_scheme == dr_unaligned_unsupported)
2386	{
2387	if (dump_enabled_p ())
2388	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389	"unsupported unaligned access\n");
2390	return false;
2391	}
2392
2393	/ FIXME: At the moment the cost model seems to underestimate the*
2394	cost of using elementwise accesses. This check preserves the
2395	traditional behavior until that can be fixed. /*
2396	stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2397	if (!first_stmt_info)
2398	first_stmt_info = stmt_info;
2399	if (*memory_access_type == VMAT_ELEMENTWISE
2400	&& !STMT_VINFO_STRIDED_P (first_stmt_info)
2401	&& !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2402	&& !DR_GROUP_NEXT_ELEMENT (stmt_info)
2403	&& !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2404	{
2405	if (dump_enabled_p ())
2406	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407	"not falling back to elementwise accesses\n");
2408	return false;
2409	}
2410	return true;
2411	}
2412
2413	/ Return true if boolean argument at MASK_INDEX is suitable for vectorizing*
2414	conditional operation STMT_INFO. When returning true, store the mask
2415	in MASK, the type of its definition in MASK_DT_OUT, the type of the
2416	vectorized mask in MASK_VECTYPE_OUT and the SLP node corresponding*
2417	to the mask in MASK_NODE if MASK_NODE is not NULL. /
2418
2419	static bool
2420	vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2421	slp_tree slp_node, unsigned mask_index,
2422	tree mask, slp_tree mask_node,
2423	vect_def_type mask_dt_out, tree mask_vectype_out)
2424	{
2425	enum vect_def_type mask_dt;
2426	tree mask_vectype;
2427	slp_tree mask_node_1;
2428	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2429	mask, &mask_node_1, &mask_dt, &mask_vectype))
2430	{
2431	if (dump_enabled_p ())
2432	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433	"mask use not simple.\n");
2434	return false;
2435	}
2436
2437	if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2438	{
2439	if (dump_enabled_p ())
2440	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441	"mask argument is not a boolean.\n");
2442	return false;
2443	}
2444
2445	/ If the caller is not prepared for adjusting an external/constant*
2446	SLP mask vector type fail. /*
2447	if (slp_node
2448	&& !mask_node
2449	&& SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2450	{
2451	if (dump_enabled_p ())
2452	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453	"SLP mask argument is not vectorized.\n");
2454	return false;
2455	}
2456
2457	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2458	if (!mask_vectype)
2459	mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2460	mask_node_1);
2461
2462	if (!mask_vectype \|\| !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2463	{
2464	if (dump_enabled_p ())
2465	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466	"could not find an appropriate vector mask type.\n");
2467	return false;
2468	}
2469
2470	if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: mask_vectype),
2471	b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2472	{
2473	if (dump_enabled_p ())
2474	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475	"vector mask type %T"
2476	" does not match vector data type %T.\n",
2477	mask_vectype, vectype);
2478
2479	return false;
2480	}
2481
2482	*mask_dt_out = mask_dt;
2483	*mask_vectype_out = mask_vectype;
2484	if (mask_node)
2485	*mask_node = mask_node_1;
2486	return true;
2487	}
2488
2489	/ Return true if stored value is suitable for vectorizing store*
2490	statement STMT_INFO. When returning true, store the scalar stored
2491	in RHS and RHS_NODE, the type of the definition in RHS_DT_OUT,*
2492	the type of the vectorized store value in
2493	RHS_VECTYPE_OUT and the type of the store in VLS_TYPE_OUT. */
2494
2495	static bool
2496	vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2497	slp_tree slp_node, tree rhs, slp_tree rhs_node,
2498	vect_def_type rhs_dt_out, tree rhs_vectype_out,
2499	vec_load_store_type *vls_type_out)
2500	{
2501	int op_no = `0`;
2502	if (gcall call = dyn_cast <gcall > (p: stmt_info->stmt))
2503	{
2504	if (gimple_call_internal_p (gs: call)
2505	&& internal_store_fn_p (gimple_call_internal_fn (gs: call)))
2506	op_no = internal_fn_stored_value_index (gimple_call_internal_fn (gs: call));
2507	}
2508	if (slp_node)
2509	op_no = vect_slp_child_index_for_operand
2510	(stmt_info->stmt, op: op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2511
2512	enum vect_def_type rhs_dt;
2513	tree rhs_vectype;
2514	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2515	rhs, rhs_node, &rhs_dt, &rhs_vectype))
2516	{
2517	if (dump_enabled_p ())
2518	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519	"use not simple.\n");
2520	return false;
2521	}
2522
2523	/ In the case this is a store from a constant make sure*
2524	native_encode_expr can handle it. /*
2525	if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, `64`) == `0`)
2526	{
2527	if (dump_enabled_p ())
2528	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529	"cannot encode constant as a byte sequence.\n");
2530	return false;
2531	}
2532
2533	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534	if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2535	{
2536	if (dump_enabled_p ())
2537	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538	"incompatible vector types.\n");
2539	return false;
2540	}
2541
2542	*rhs_dt_out = rhs_dt;
2543	*rhs_vectype_out = rhs_vectype;
2544	if (rhs_dt == vect_constant_def \|\| rhs_dt == vect_external_def)
2545	*vls_type_out = VLS_STORE_INVARIANT;
2546	else
2547	*vls_type_out = VLS_STORE;
2548	return true;
2549	}
2550
2551	/ Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.*
2552	Note that we support masks with floating-point type, in which case the
2553	floats are interpreted as a bitmask. /*
2554
2555	static tree
2556	vect_build_all_ones_mask (vec_info *vinfo,
2557	stmt_vec_info stmt_info, tree masktype)
2558	{
2559	if (TREE_CODE (masktype) == INTEGER_TYPE)
2560	return build_int_cst (masktype, -`1`);
2561	else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2562	\|\| TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2563	{
2564	tree mask = build_int_cst (TREE_TYPE (masktype), -`1`);
2565	mask = build_vector_from_val (masktype, mask);
2566	return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2567	}
2568	else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2569	{
2570	REAL_VALUE_TYPE r;
2571	long tmp[`6`];
2572	for (int j = `0`; j < `6`; ++j)
2573	tmp[j] = -`1`;
2574	real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2575	tree mask = build_real (TREE_TYPE (masktype), r);
2576	mask = build_vector_from_val (masktype, mask);
2577	return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2578	}
2579	gcc_unreachable ();
2580	}
2581
2582	/ Build an all-zero merge value of type VECTYPE while vectorizing*
2583	STMT_INFO as a gather load. /*
2584
2585	static tree
2586	vect_build_zero_merge_argument (vec_info *vinfo,
2587	stmt_vec_info stmt_info, tree vectype)
2588	{
2589	tree merge;
2590	if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2591	merge = build_int_cst (TREE_TYPE (vectype), `0`);
2592	else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2593	{
2594	REAL_VALUE_TYPE r;
2595	long tmp[`6`];
2596	for (int j = `0`; j < `6`; ++j)
2597	tmp[j] = `0`;
2598	real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2599	merge = build_real (TREE_TYPE (vectype), r);
2600	}
2601	else
2602	gcc_unreachable ();
2603	merge = build_vector_from_val (vectype, merge);
2604	return vect_init_vector (vinfo, stmt_info, val: merge, type: vectype, NULL);
2605	}
2606
2607	/ Build a gather load call while vectorizing STMT_INFO. Insert new*
2608	instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609	the gather load operation. If the load is conditional, MASK is the
2610	vectorized condition, otherwise MASK is null. PTR is the base
2611	pointer and OFFSET is the vectorized offset. /*
2612
2613	static gimple *
2614	vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2615	gimple_stmt_iterator *gsi,
2616	gather_scatter_info *gs_info,
2617	tree ptr, tree offset, tree mask)
2618	{
2619	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2620	tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2621	tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2622	tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2623	/ ptrtype / arglist = TREE_CHAIN (arglist);
2624	tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2625	tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2626	tree scaletype = TREE_VALUE (arglist);
2627	tree var;
2628	gcc_checking_assert (types_compatible_p (srctype, rettype)
2629	&& (!mask
2630	\|\| TREE_CODE (masktype) == INTEGER_TYPE
2631	\|\| types_compatible_p (srctype, masktype)));
2632
2633	tree op = offset;
2634	if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2635	{
2636	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2637	TYPE_VECTOR_SUBPARTS (idxtype)));
2638	var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2639	op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2640	gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2641	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2642	op = var;
2643	}
2644
2645	tree src_op = NULL_TREE;
2646	tree mask_op = NULL_TREE;
2647	if (mask)
2648	{
2649	if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2650	{
2651	tree utype, optype = TREE_TYPE (mask);
2652	if (VECTOR_TYPE_P (masktype)
2653	\|\| TYPE_MODE (masktype) == TYPE_MODE (optype))
2654	utype = masktype;
2655	else
2656	utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), `1`);
2657	var = vect_get_new_ssa_name (utype, vect_scalar_var);
2658	tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2659	gassign *new_stmt
2660	= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2661	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2662	mask_arg = var;
2663	if (!useless_type_conversion_p (masktype, utype))
2664	{
2665	gcc_assert (TYPE_PRECISION (utype)
2666	<= TYPE_PRECISION (masktype));
2667	var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2668	new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2669	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2670	mask_arg = var;
2671	}
2672	src_op = build_zero_cst (srctype);
2673	mask_op = mask_arg;
2674	}
2675	else
2676	{
2677	src_op = mask;
2678	mask_op = mask;
2679	}
2680	}
2681	else
2682	{
2683	src_op = vect_build_zero_merge_argument (vinfo, stmt_info, vectype: rettype);
2684	mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2685	}
2686
2687	tree scale = build_int_cst (scaletype, gs_info->scale);
2688	gimple *new_stmt = gimple_build_call (gs_info->decl, `5`, src_op, ptr, op,
2689	mask_op, scale);
2690
2691	if (!useless_type_conversion_p (vectype, rettype))
2692	{
2693	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2694	TYPE_VECTOR_SUBPARTS (rettype)));
2695	op = vect_get_new_ssa_name (rettype, vect_simple_var);
2696	gimple_call_set_lhs (gs: new_stmt, lhs: op);
2697	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2698	op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2699	new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2700	}
2701
2702	return new_stmt;
2703	}
2704
2705	/ Build a scatter store call while vectorizing STMT_INFO. Insert new*
2706	instructions before GSI. GS_INFO describes the scatter store operation.
2707	PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2708	vectorized data to store.
2709	If the store is conditional, MASK is the vectorized condition, otherwise
2710	MASK is null. /*
2711
2712	static gimple *
2713	vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2714	gimple_stmt_iterator *gsi,
2715	gather_scatter_info *gs_info,
2716	tree ptr, tree offset, tree oprnd, tree mask)
2717	{
2718	tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2719	tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2720	/ tree ptrtype = TREE_VALUE (arglist); / arglist = TREE_CHAIN (arglist);
2721	tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2722	tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2723	tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2724	tree scaletype = TREE_VALUE (arglist);
2725	gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2726	&& TREE_CODE (rettype) == VOID_TYPE);
2727
2728	tree mask_arg = NULL_TREE;
2729	if (mask)
2730	{
2731	mask_arg = mask;
2732	tree optype = TREE_TYPE (mask_arg);
2733	tree utype;
2734	if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2735	utype = masktype;
2736	else
2737	utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), `1`);
2738	tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2739	mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2740	gassign *new_stmt
2741	= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2742	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2743	mask_arg = var;
2744	if (!useless_type_conversion_p (masktype, utype))
2745	{
2746	gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2747	tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2748	new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2749	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2750	mask_arg = var;
2751	}
2752	}
2753	else
2754	{
2755	mask_arg = build_int_cst (masktype, -`1`);
2756	mask_arg = vect_init_vector (vinfo, stmt_info, val: mask_arg, type: masktype, NULL);
2757	}
2758
2759	tree src = oprnd;
2760	if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2761	{
2762	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2763	TYPE_VECTOR_SUBPARTS (srctype)));
2764	tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2765	src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2766	gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2767	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2768	src = var;
2769	}
2770
2771	tree op = offset;
2772	if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2773	{
2774	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2775	TYPE_VECTOR_SUBPARTS (idxtype)));
2776	tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2777	op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2778	gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2779	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2780	op = var;
2781	}
2782
2783	tree scale = build_int_cst (scaletype, gs_info->scale);
2784	gcall *new_stmt
2785	= gimple_build_call (gs_info->decl, `5`, ptr, mask_arg, op, src, scale);
2786	return new_stmt;
2787	}
2788
2789	/ Prepare the base and offset in GS_INFO for vectorization.*
2790	Set DATAREF_PTR to the loop-invariant base address and VEC_OFFSET
2791	to the vectorized offset argument for the first copy of STMT_INFO.
2792	STMT_INFO is the statement described by GS_INFO and LOOP is the
2793	containing loop. /*
2794
2795	static void
2796	vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2797	class loop *loop, stmt_vec_info stmt_info,
2798	slp_tree slp_node, gather_scatter_info *gs_info,
2799	tree dataref_ptr, vec<tree> vec_offset)
2800	{
2801	gimple_seq stmts = NULL;
2802	dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true*, NULL_TREE);
2803	if (stmts != NULL)
2804	{
2805	basic_block new_bb;
2806	edge pe = loop_preheader_edge (loop);
2807	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2808	gcc_assert (!new_bb);
2809	}
2810	if (slp_node)
2811	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`0`], vec_offset);
2812	else
2813	{
2814	unsigned ncopies
2815	= vect_get_num_copies (loop_vinfo, vectype: gs_info->offset_vectype);
2816	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_vinfo: stmt_info, ncopies,
2817	op: gs_info->offset, vec_oprnds: vec_offset,
2818	vectype: gs_info->offset_vectype);
2819	}
2820	}
2821
2822	/ Prepare to implement a grouped or strided load or store using*
2823	the gather load or scatter store operation described by GS_INFO.
2824	STMT_INFO is the load or store statement.
2825
2826	Set DATAREF_BUMP to the amount that should be added to the base*
2827	address after each copy of the vectorized statement. Set VEC_OFFSET*
2828	to an invariant offset vector in which element I has the value
2829	I DR_STEP / SCALE. /
2830
2831	static void
2832	vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2833	loop_vec_info loop_vinfo,
2834	gimple_stmt_iterator *gsi,
2835	gather_scatter_info *gs_info,
2836	tree dataref_bump, tree vec_offset,
2837	vec_loop_lens *loop_lens)
2838	{
2839	struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2840	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2841
2842	if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2843	{
2844	/ _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);*
2845	ivtmp_8 = _31 16 (step in bytes);*
2846	.MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2847	vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; /*
2848	tree loop_len
2849	= vect_get_loop_len (loop_vinfo, gsi, loop_lens, `1`, vectype, `0`, `0`);
2850	tree tmp
2851	= fold_build2 (MULT_EXPR, sizetype,
2852	fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2853	loop_len);
2854	dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true*,
2855	GSI_SAME_STMT);
2856	}
2857	else
2858	{
2859	tree bump
2860	= size_binop (MULT_EXPR,
2861	fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2862	size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2863	*dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2864	}
2865
2866	/ The offset given in GS_INFO can have pointer type, so use the element*
2867	type of the vector instead. /*
2868	tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2869
2870	/ Calculate X = DR_STEP / SCALE and convert it to the appropriate type. /
2871	tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2872	ssize_int (gs_info->scale));
2873	step = fold_convert (offset_type, step);
2874
2875	/ Create {0, X, X2, X3, ...}. /
2876	tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2877	build_zero_cst (offset_type), step);
2878	*vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2879	}
2880
2881	/ Prepare the pointer IVs which needs to be updated by a variable amount.*
2882	Such variable amount is the outcome of .SELECT_VL. In this case, we can
2883	allow each iteration process the flexible number of elements as long as
2884	the number <= vf elments.
2885
2886	Return data reference according to SELECT_VL.
2887	If new statements are needed, insert them before GSI. /*
2888
2889	static tree
2890	vect_get_loop_variant_data_ptr_increment (
2891	vec_info vinfo, tree aggr_type, gimple_stmt_iterator gsi,
2892	vec_loop_lens loop_lens, dr_vec_info dr_info,
2893	vect_memory_access_type memory_access_type)
2894	{
2895	loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2896	tree step = vect_dr_behavior (vinfo, dr_info)->step;
2897
2898	/ gather/scatter never reach here. /
2899	gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2900
2901	/ When we support SELECT_VL pattern, we dynamic adjust*
2902	the memory address by .SELECT_VL result.
2903
2904	The result of .SELECT_VL is the number of elements to
2905	be processed of each iteration. So the memory address
2906	adjustment operation should be:
2907
2908	addr = addr + .SELECT_VL (ARG..) step;*
2909	*/
2910	tree loop_len
2911	= vect_get_loop_len (loop_vinfo, gsi, loop_lens, `1`, aggr_type, `0`, `0`);
2912	tree len_type = TREE_TYPE (loop_len);
2913	/ Since the outcome of .SELECT_VL is element size, we should adjust*
2914	it into bytesize so that it can be used in address pointer variable
2915	amount IVs adjustment. /*
2916	tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2917	wide_int_to_tree (len_type, wi::to_widest (step)));
2918	tree bump = make_temp_ssa_name (type: len_type, NULL, name: "ivtmp");
2919	gassign *assign = gimple_build_assign (bump, tmp);
2920	gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2921	return bump;
2922	}
2923
2924	/ Return the amount that should be added to a vector pointer to move*
2925	to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2926	being vectorized and MEMORY_ACCESS_TYPE describes the type of
2927	vectorization. /*
2928
2929	static tree
2930	vect_get_data_ptr_increment (vec_info vinfo, gimple_stmt_iterator gsi,
2931	dr_vec_info *dr_info, tree aggr_type,
2932	vect_memory_access_type memory_access_type,
2933	vec_loop_lens loop_lens = nullptr*)
2934	{
2935	if (memory_access_type == VMAT_INVARIANT)
2936	return size_zero_node;
2937
2938	loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2939	if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2940	return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2941	loop_lens, dr_info,
2942	memory_access_type);
2943
2944	tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2945	tree step = vect_dr_behavior (vinfo, dr_info)->step;
2946	if (tree_int_cst_sgn (step) == -`1`)
2947	iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2948	return iv_step;
2949	}
2950
2951	/ Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. /
2952
2953	static bool
2954	vectorizable_bswap (vec_info *vinfo,
2955	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2956	gimple **vec_stmt, slp_tree slp_node,
2957	slp_tree *slp_op,
2958	tree vectype_in, stmt_vector_for_cost *cost_vec)
2959	{
2960	tree op, vectype;
2961	gcall stmt = as_a <gcall > (p: stmt_info->stmt);
2962	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2963	unsigned ncopies;
2964
2965	op = gimple_call_arg (gs: stmt, index: `0`);
2966	vectype = STMT_VINFO_VECTYPE (stmt_info);
2967	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2968
2969	/ Multiple types in SLP are handled by creating the appropriate number of*
2970	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2971	case of SLP. /*
2972	if (slp_node)
2973	ncopies = `1`;
2974	else
2975	ncopies = vect_get_num_copies (loop_vinfo, vectype);
2976
2977	gcc_assert (ncopies >= `1`);
2978
2979	tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2980	if (! char_vectype)
2981	return false;
2982
2983	poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (node: char_vectype);
2984	unsigned word_bytes;
2985	if (!constant_multiple_p (a: num_bytes, b: nunits, multiple: &word_bytes))
2986	return false;
2987
2988	/ The encoding uses one stepped pattern for each byte in the word. /
2989	vec_perm_builder elts (num_bytes, word_bytes, `3`);
2990	for (unsigned i = `0`; i < `3`; ++i)
2991	for (unsigned j = `0`; j < word_bytes; ++j)
2992	elts.quick_push (obj: (i + `1`) * word_bytes - j - `1`);
2993
2994	vec_perm_indices indices (elts, `1`, num_bytes);
2995	machine_mode vmode = TYPE_MODE (char_vectype);
2996	if (!can_vec_perm_const_p (vmode, vmode, indices))
2997	return false;
2998
2999	if (! vec_stmt)
3000	{
3001	if (slp_node
3002	&& !vect_maybe_update_slp_op_vectype (slp_op[`0`], vectype_in))
3003	{
3004	if (dump_enabled_p ())
3005	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3006	"incompatible vector types for invariants\n");
3007	return false;
3008	}
3009
3010	STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3011	DUMP_VECT_SCOPE ("vectorizable_bswap");
3012	record_stmt_cost (body_cost_vec: cost_vec,
3013	count: `1`, kind: vector_stmt, stmt_info, misalign: `0`, where: vect_prologue);
3014	record_stmt_cost (body_cost_vec: cost_vec,
3015	count: slp_node
3016	? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3017	kind: vec_perm, stmt_info, misalign: `0`, where: vect_body);
3018	return true;
3019	}
3020
3021	tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3022
3023	/ Transform. /
3024	vec<tree> vec_oprnds = vNULL;
3025	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3026	op0: op, vec_oprnds0: &vec_oprnds);
3027	/ Arguments are ready. create the new vector stmt. /
3028	unsigned i;
3029	tree vop;
3030	FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3031	{
3032	gimple *new_stmt;
3033	tree tem = make_ssa_name (var: char_vectype);
3034	new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3035	char_vectype, vop));
3036	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3037	tree tem2 = make_ssa_name (var: char_vectype);
3038	new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3039	tem, tem, bswap_vconst);
3040	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3041	tem = make_ssa_name (var: vectype);
3042	new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3043	vectype, tem2));
3044	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3045	if (slp_node)
3046	slp_node->push_vec_def (def: new_stmt);
3047	else
3048	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3049	}
3050
3051	if (!slp_node)
3052	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
3053
3054	vec_oprnds.release ();
3055	return true;
3056	}
3057
3058	/ Return true if vector types VECTYPE_IN and VECTYPE_OUT have*
3059	integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3060	in a single step. On success, store the binary pack code in
3061	CONVERT_CODE. /
3062
3063	static bool
3064	simple_integer_narrowing (tree vectype_out, tree vectype_in,
3065	code_helper *convert_code)
3066	{
3067	if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3068	\|\| !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3069	return false;
3070
3071	code_helper code;
3072	int multi_step_cvt = `0`;
3073	auto_vec <tree, `8`> interm_types;
3074	if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3075	&code, &multi_step_cvt, &interm_types)
3076	\|\| multi_step_cvt)
3077	return false;
3078
3079	*convert_code = code;
3080	return true;
3081	}
3082
3083	/ Function vectorizable_call.*
3084
3085	Check if STMT_INFO performs a function call that can be vectorized.
3086	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3087	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3088	Return true if STMT_INFO is vectorizable in this way. /*
3089
3090	static bool
3091	vectorizable_call (vec_info *vinfo,
3092	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3093	gimple **vec_stmt, slp_tree slp_node,
3094	stmt_vector_for_cost *cost_vec)
3095	{
3096	gcall *stmt;
3097	tree vec_dest;
3098	tree scalar_dest;
3099	tree op;
3100	tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3101	tree vectype_out, vectype_in;
3102	poly_uint64 nunits_in;
3103	poly_uint64 nunits_out;
3104	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3105	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3106	tree fndecl, new_temp, rhs_type;
3107	enum vect_def_type dt[`4`]
3108	= { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3109	vect_unknown_def_type };
3110	tree vectypes[ARRAY_SIZE (dt)] = {};
3111	slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3112	int ndts = ARRAY_SIZE (dt);
3113	int ncopies, j;
3114	auto_vec<tree, `8`> vargs;
3115	enum { NARROW, NONE, WIDEN } modifier;
3116	size_t i, nargs;
3117	tree lhs;
3118
3119	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3120	return false;
3121
3122	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3123	&& ! vec_stmt)
3124	return false;
3125
3126	/ Is STMT_INFO a vectorizable call? /
3127	stmt = dyn_cast <gcall *> (p: stmt_info->stmt);
3128	if (!stmt)
3129	return false;
3130
3131	if (gimple_call_internal_p (gs: stmt)
3132	&& (internal_load_fn_p (gimple_call_internal_fn (gs: stmt))
3133	\|\| internal_store_fn_p (gimple_call_internal_fn (gs: stmt))))
3134	/ Handled by vectorizable_load and vectorizable_store. /
3135	return false;
3136
3137	if (gimple_call_lhs (gs: stmt) == NULL_TREE
3138	\|\| TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3139	return false;
3140
3141	gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3142
3143	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3144
3145	/ Process function arguments. /
3146	rhs_type = NULL_TREE;
3147	vectype_in = NULL_TREE;
3148	nargs = gimple_call_num_args (gs: stmt);
3149
3150	/ Bail out if the function has more than four arguments, we do not have*
3151	interesting builtin functions to vectorize with more than two arguments
3152	except for fma. No arguments is also not good. /*
3153	if (nargs == `0` \|\| nargs > `4`)
3154	return false;
3155
3156	/ Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. /
3157	combined_fn cfn = gimple_call_combined_fn (stmt);
3158	if (cfn == CFN_GOMP_SIMD_LANE)
3159	{
3160	nargs = `0`;
3161	rhs_type = unsigned_type_node;
3162	}
3163
3164	int mask_opno = -`1`;
3165	if (internal_fn_p (code: cfn))
3166	mask_opno = internal_fn_mask_index (as_internal_fn (code: cfn));
3167
3168	for (i = `0`; i < nargs; i++)
3169	{
3170	if ((int) i == mask_opno)
3171	{
3172	if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index: mask_opno,
3173	mask: &op, mask_node: &slp_op[i], mask_dt_out: &dt[i], mask_vectype_out: &vectypes[i]))
3174	return false;
3175	continue;
3176	}
3177
3178	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3179	i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3180	{
3181	if (dump_enabled_p ())
3182	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3183	"use not simple.\n");
3184	return false;
3185	}
3186
3187	/ We can only handle calls with arguments of the same type. /
3188	if (rhs_type
3189	&& !types_compatible_p (type1: rhs_type, TREE_TYPE (op)))
3190	{
3191	if (dump_enabled_p ())
3192	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3193	"argument types differ.\n");
3194	return false;
3195	}
3196	if (!rhs_type)
3197	rhs_type = TREE_TYPE (op);
3198
3199	if (!vectype_in)
3200	vectype_in = vectypes[i];
3201	else if (vectypes[i]
3202	&& !types_compatible_p (type1: vectypes[i], type2: vectype_in))
3203	{
3204	if (dump_enabled_p ())
3205	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3206	"argument vector types differ.\n");
3207	return false;
3208	}
3209	}
3210	/ If all arguments are external or constant defs, infer the vector type*
3211	from the scalar type. /*
3212	if (!vectype_in)
3213	vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3214	if (vec_stmt)
3215	gcc_assert (vectype_in);
3216	if (!vectype_in)
3217	{
3218	if (dump_enabled_p ())
3219	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3220	"no vectype for scalar type %T\n", rhs_type);
3221
3222	return false;
3223	}
3224
3225	if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3226	!= VECTOR_BOOLEAN_TYPE_P (vectype_in))
3227	{
3228	if (dump_enabled_p ())
3229	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3230	"mixed mask and nonmask vector types\n");
3231	return false;
3232	}
3233
3234	if (vect_emulated_vector_p (vectype_in) \|\| vect_emulated_vector_p (vectype_out))
3235	{
3236	if (dump_enabled_p ())
3237	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3238	"use emulated vector type for call\n");
3239	return false;
3240	}
3241
3242	/ FORNOW /
3243	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
3244	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
3245	if (known_eq (nunits_in * `2`, nunits_out))
3246	modifier = NARROW;
3247	else if (known_eq (nunits_out, nunits_in))
3248	modifier = NONE;
3249	else if (known_eq (nunits_out * `2`, nunits_in))
3250	modifier = WIDEN;
3251	else
3252	return false;
3253
3254	/ We only handle functions that do not read or clobber memory. /
3255	if (gimple_vuse (g: stmt))
3256	{
3257	if (dump_enabled_p ())
3258	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3259	"function reads from or writes to memory.\n");
3260	return false;
3261	}
3262
3263	/ For now, we only vectorize functions if a target specific builtin*
3264	is available. TODO -- in some cases, it might be profitable to
3265	insert the calls for pieces of the vector, in order to be able
3266	to vectorize other operations in the loop. /*
3267	fndecl = NULL_TREE;
3268	internal_fn ifn = IFN_LAST;
3269	tree callee = gimple_call_fndecl (gs: stmt);
3270
3271	/ First try using an internal function. /
3272	code_helper convert_code = MAX_TREE_CODES;
3273	if (cfn != CFN_LAST
3274	&& (modifier == NONE
3275	\|\| (modifier == NARROW
3276	&& simple_integer_narrowing (vectype_out, vectype_in,
3277	convert_code: &convert_code))))
3278	ifn = vectorizable_internal_function (cfn, fndecl: callee, vectype_out,
3279	vectype_in);
3280
3281	/ If that fails, try asking for a target-specific built-in function. /
3282	if (ifn == IFN_LAST)
3283	{
3284	if (cfn != CFN_LAST)
3285	fndecl = targetm.vectorize.builtin_vectorized_function
3286	(cfn, vectype_out, vectype_in);
3287	else if (callee && fndecl_built_in_p (node: callee, klass: BUILT_IN_MD))
3288	fndecl = targetm.vectorize.builtin_md_vectorized_function
3289	(callee, vectype_out, vectype_in);
3290	}
3291
3292	if (ifn == IFN_LAST && !fndecl)
3293	{
3294	if (cfn == CFN_GOMP_SIMD_LANE
3295	&& !slp_node
3296	&& loop_vinfo
3297	&& LOOP_VINFO_LOOP (loop_vinfo)->simduid
3298	&& TREE_CODE (gimple_call_arg (stmt, `0`)) == SSA_NAME
3299	&& LOOP_VINFO_LOOP (loop_vinfo)->simduid
3300	== SSA_NAME_VAR (gimple_call_arg (stmt, `0`)))
3301	{
3302	/ We can handle IFN_GOMP_SIMD_LANE by returning a*
3303	{ 0, 1, 2, ... vf - 1 } vector. /*
3304	gcc_assert (nargs == `0`);
3305	}
3306	else if (modifier == NONE
3307	&& (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3308	\|\| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3309	\|\| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3310	\|\| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3311	return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3312	slp_op, vectype_in, cost_vec);
3313	else
3314	{
3315	if (dump_enabled_p ())
3316	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317	"function is not vectorizable.\n");
3318	return false;
3319	}
3320	}
3321
3322	if (slp_node)
3323	ncopies = `1`;
3324	else if (modifier == NARROW && ifn == IFN_LAST)
3325	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
3326	else
3327	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
3328
3329	/ Sanity check: make sure that at least one copy of the vectorized stmt*
3330	needs to be generated. /*
3331	gcc_assert (ncopies >= `1`);
3332
3333	int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3334	internal_fn cond_fn = get_conditional_internal_fn (ifn);
3335	internal_fn cond_len_fn = get_len_internal_fn (ifn);
3336	int len_opno = internal_fn_len_index (cond_len_fn);
3337	vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3338	vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3339	if (!vec_stmt) / transformation not required. /
3340	{
3341	if (slp_node)
3342	for (i = `0`; i < nargs; ++i)
3343	if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3344	vectypes[i]
3345	? vectypes[i] : vectype_in))
3346	{
3347	if (dump_enabled_p ())
3348	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3349	"incompatible vector types for invariants\n");
3350	return false;
3351	}
3352	STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3353	DUMP_VECT_SCOPE ("vectorizable_call");
3354	vect_model_simple_cost (vinfo, stmt_info,
3355	ncopies, dt, ndts, node: slp_node, cost_vec);
3356	if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3357	record_stmt_cost (body_cost_vec: cost_vec, count: ncopies / `2`,
3358	kind: vec_promote_demote, stmt_info, misalign: `0`, where: vect_body);
3359
3360	if (loop_vinfo
3361	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3362	&& (reduc_idx >= `0` \|\| mask_opno >= `0`))
3363	{
3364	if (reduc_idx >= `0`
3365	&& (cond_fn == IFN_LAST
3366	\|\| !direct_internal_fn_supported_p (cond_fn, vectype_out,
3367	OPTIMIZE_FOR_SPEED))
3368	&& (cond_len_fn == IFN_LAST
3369	\|\| !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3370	OPTIMIZE_FOR_SPEED)))
3371	{
3372	if (dump_enabled_p ())
3373	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3374	"can't use a fully-masked loop because no"
3375	" conditional operation is available.\n");
3376	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3377	}
3378	else
3379	{
3380	unsigned int nvectors
3381	= (slp_node
3382	? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3383	: ncopies);
3384	tree scalar_mask = NULL_TREE;
3385	if (mask_opno >= `0`)
3386	scalar_mask = gimple_call_arg (gs: stmt_info->stmt, index: mask_opno);
3387	if (cond_len_fn != IFN_LAST
3388	&& direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3389	OPTIMIZE_FOR_SPEED))
3390	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3391	`1`);
3392	else
3393	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3394	scalar_mask);
3395	}
3396	}
3397	return true;
3398	}
3399
3400	/ Transform. /
3401
3402	if (dump_enabled_p ())
3403	dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3404
3405	/ Handle def. /
3406	scalar_dest = gimple_call_lhs (gs: stmt);
3407	vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3408
3409	bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3410	bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3411	unsigned int vect_nargs = nargs;
3412	if (len_loop_p)
3413	{
3414	if (len_opno >= `0`)
3415	{
3416	ifn = cond_len_fn;
3417	/ COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. /
3418	vect_nargs += `2`;
3419	}
3420	else if (reduc_idx >= `0`)
3421	gcc_unreachable ();
3422	}
3423	else if (masked_loop_p && reduc_idx >= `0`)
3424	{
3425	ifn = cond_fn;
3426	vect_nargs += `2`;
3427	}
3428
3429	if (modifier == NONE \|\| ifn != IFN_LAST)
3430	{
3431	tree prev_res = NULL_TREE;
3432	vargs.safe_grow (len: vect_nargs, exact: true);
3433	auto_vec<vec<tree> > vec_defs (nargs);
3434	for (j = `0`; j < ncopies; ++j)
3435	{
3436	/ Build argument list for the vectorized call. /
3437	if (slp_node)
3438	{
3439	vec<tree> vec_oprnds0;
3440
3441	vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3442	vec_oprnds0 = vec_defs [`0`];
3443
3444	/ Arguments are ready. Create the new vector stmt. /
3445	FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3446	{
3447	int varg = `0`;
3448	if (masked_loop_p && reduc_idx >= `0`)
3449	{
3450	unsigned int vec_num = vec_oprnds0.length ();
3451	/ Always true for SLP. /
3452	gcc_assert (ncopies == `1`);
3453	vargs [varg++] = vect_get_loop_mask (loop_vinfo,
3454	gsi, masks, vec_num,
3455	vectype_out, i);
3456	}
3457	size_t k;
3458	for (k = `0`; k < nargs; k++)
3459	{
3460	vec<tree> vec_oprndsk = vec_defs [k];
3461	vargs [varg++] = vec_oprndsk [i];
3462	}
3463	if (masked_loop_p && reduc_idx >= `0`)
3464	vargs [varg++] = vargs [reduc_idx + `1`];
3465	gimple *new_stmt;
3466	if (modifier == NARROW)
3467	{
3468	/ We don't define any narrowing conditional functions*
3469	at present. /*
3470	gcc_assert (mask_opno < `0`);
3471	tree half_res = make_ssa_name (var: vectype_in);
3472	gcall *call
3473	= gimple_build_call_internal_vec (ifn, vargs);
3474	gimple_call_set_lhs (gs: call, lhs: half_res);
3475	gimple_call_set_nothrow (s: call, nothrow_p: true);
3476	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3477	if ((i & `1`) == `0`)
3478	{
3479	prev_res = half_res;
3480	continue;
3481	}
3482	new_temp = make_ssa_name (var: vec_dest);
3483	new_stmt = vect_gimple_build (new_temp, convert_code,
3484	prev_res, half_res);
3485	vect_finish_stmt_generation (vinfo, stmt_info,
3486	vec_stmt: new_stmt, gsi);
3487	}
3488	else
3489	{
3490	if (len_opno >= `0` && len_loop_p)
3491	{
3492	unsigned int vec_num = vec_oprnds0.length ();
3493	/ Always true for SLP. /
3494	gcc_assert (ncopies == `1`);
3495	tree len
3496	= vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3497	vectype_out, i, `1`);
3498	signed char biasval
3499	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3500	tree bias = build_int_cst (intQI_type_node, biasval);
3501	vargs [len_opno] = len;
3502	vargs [len_opno + `1`] = bias;
3503	}
3504	else if (mask_opno >= `0` && masked_loop_p)
3505	{
3506	unsigned int vec_num = vec_oprnds0.length ();
3507	/ Always true for SLP. /
3508	gcc_assert (ncopies == `1`);
3509	tree mask = vect_get_loop_mask (loop_vinfo,
3510	gsi, masks, vec_num,
3511	vectype_out, i);
3512	vargs [mask_opno] = prepare_vec_mask
3513	(loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3514	vec_mask: vargs [mask_opno], gsi);
3515	}
3516
3517	gcall *call;
3518	if (ifn != IFN_LAST)
3519	call = gimple_build_call_internal_vec (ifn, vargs);
3520	else
3521	call = gimple_build_call_vec (fndecl, vargs);
3522	new_temp = make_ssa_name (var: vec_dest, stmt: call);
3523	gimple_call_set_lhs (gs: call, lhs: new_temp);
3524	gimple_call_set_nothrow (s: call, nothrow_p: true);
3525	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3526	new_stmt = call;
3527	}
3528	slp_node->push_vec_def (def: new_stmt);
3529	}
3530	continue;
3531	}
3532
3533	int varg = `0`;
3534	if (masked_loop_p && reduc_idx >= `0`)
3535	vargs [varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3536	vectype_out, j);
3537	for (i = `0`; i < nargs; i++)
3538	{
3539	op = gimple_call_arg (gs: stmt, index: i);
3540	if (j == `0`)
3541	{
3542	vec_defs.quick_push (obj: vNULL);
3543	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
3544	op, vec_oprnds: &vec_defs [i],
3545	vectype: vectypes[i]);
3546	}
3547	vargs [varg++] = vec_defs [i][j];
3548	}
3549	if (masked_loop_p && reduc_idx >= `0`)
3550	vargs [varg++] = vargs [reduc_idx + `1`];
3551
3552	if (len_opno >= `0` && len_loop_p)
3553	{
3554	tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3555	vectype_out, j, `1`);
3556	signed char biasval
3557	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3558	tree bias = build_int_cst (intQI_type_node, biasval);
3559	vargs [len_opno] = len;
3560	vargs [len_opno + `1`] = bias;
3561	}
3562	else if (mask_opno >= `0` && masked_loop_p)
3563	{
3564	tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3565	vectype_out, j);
3566	vargs [mask_opno]
3567	= prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3568	vec_mask: vargs [mask_opno], gsi);
3569	}
3570
3571	gimple *new_stmt;
3572	if (cfn == CFN_GOMP_SIMD_LANE)
3573	{
3574	tree cst = build_index_vector (vectype_out, j * nunits_out, `1`);
3575	tree new_var
3576	= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3577	gimple *init_stmt = gimple_build_assign (new_var, cst);
3578	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, NULL);
3579	new_temp = make_ssa_name (var: vec_dest);
3580	new_stmt = gimple_build_assign (new_temp, new_var);
3581	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3582	}
3583	else if (modifier == NARROW)
3584	{
3585	/ We don't define any narrowing conditional functions at*
3586	present. /*
3587	gcc_assert (mask_opno < `0`);
3588	tree half_res = make_ssa_name (var: vectype_in);
3589	gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3590	gimple_call_set_lhs (gs: call, lhs: half_res);
3591	gimple_call_set_nothrow (s: call, nothrow_p: true);
3592	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3593	if ((j & `1`) == `0`)
3594	{
3595	prev_res = half_res;
3596	continue;
3597	}
3598	new_temp = make_ssa_name (var: vec_dest);
3599	new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3600	half_res);
3601	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3602	}
3603	else
3604	{
3605	gcall *call;
3606	if (ifn != IFN_LAST)
3607	call = gimple_build_call_internal_vec (ifn, vargs);
3608	else
3609	call = gimple_build_call_vec (fndecl, vargs);
3610	new_temp = make_ssa_name (var: vec_dest, stmt: call);
3611	gimple_call_set_lhs (gs: call, lhs: new_temp);
3612	gimple_call_set_nothrow (s: call, nothrow_p: true);
3613	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3614	new_stmt = call;
3615	}
3616
3617	if (j == (modifier == NARROW ? `1` : `0`))
3618	*vec_stmt = new_stmt;
3619	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3620	}
3621	for (i = `0`; i < nargs; i++)
3622	{
3623	vec<tree> vec_oprndsi = vec_defs [i];
3624	vec_oprndsi.release ();
3625	}
3626	}
3627	else if (modifier == NARROW)
3628	{
3629	auto_vec<vec<tree> > vec_defs (nargs);
3630	/ We don't define any narrowing conditional functions at present. /
3631	gcc_assert (mask_opno < `0`);
3632	for (j = `0`; j < ncopies; ++j)
3633	{
3634	/ Build argument list for the vectorized call. /
3635	if (j == `0`)
3636	vargs.create (nelems: nargs * `2`);
3637	else
3638	vargs.truncate (size: `0`);
3639
3640	if (slp_node)
3641	{
3642	vec<tree> vec_oprnds0;
3643
3644	vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3645	vec_oprnds0 = vec_defs [`0`];
3646
3647	/ Arguments are ready. Create the new vector stmt. /
3648	for (i = `0`; vec_oprnds0.iterate (ix: i, ptr: &vec_oprnd0); i += `2`)
3649	{
3650	size_t k;
3651	vargs.truncate (size: `0`);
3652	for (k = `0`; k < nargs; k++)
3653	{
3654	vec<tree> vec_oprndsk = vec_defs [k];
3655	vargs.quick_push (obj: vec_oprndsk [i]);
3656	vargs.quick_push (obj: vec_oprndsk [i + `1`]);
3657	}
3658	gcall *call;
3659	if (ifn != IFN_LAST)
3660	call = gimple_build_call_internal_vec (ifn, vargs);
3661	else
3662	call = gimple_build_call_vec (fndecl, vargs);
3663	new_temp = make_ssa_name (var: vec_dest, stmt: call);
3664	gimple_call_set_lhs (gs: call, lhs: new_temp);
3665	gimple_call_set_nothrow (s: call, nothrow_p: true);
3666	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3667	slp_node->push_vec_def (def: call);
3668	}
3669	continue;
3670	}
3671
3672	for (i = `0`; i < nargs; i++)
3673	{
3674	op = gimple_call_arg (gs: stmt, index: i);
3675	if (j == `0`)
3676	{
3677	vec_defs.quick_push (obj: vNULL);
3678	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies: `2` * ncopies,
3679	op, vec_oprnds: &vec_defs [i], vectype: vectypes[i]);
3680	}
3681	vec_oprnd0 = vec_defs [i][`2`*j];
3682	vec_oprnd1 = vec_defs [i][`2`*j+`1`];
3683
3684	vargs.quick_push (obj: vec_oprnd0);
3685	vargs.quick_push (obj: vec_oprnd1);
3686	}
3687
3688	gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3689	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
3690	gimple_call_set_lhs (gs: new_stmt, lhs: new_temp);
3691	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3692
3693	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3694	}
3695
3696	if (!slp_node)
3697	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
3698
3699	for (i = `0`; i < nargs; i++)
3700	{
3701	vec<tree> vec_oprndsi = vec_defs [i];
3702	vec_oprndsi.release ();
3703	}
3704	}
3705	else
3706	/ No current target implements this case. /
3707	return false;
3708
3709	vargs.release ();
3710
3711	/ The call in STMT might prevent it from being removed in dce.*
3712	We however cannot remove it here, due to the way the ssa name
3713	it defines is mapped to the new definition. So just replace
3714	rhs of the statement with something harmless. /*
3715
3716	if (slp_node)
3717	return true;
3718
3719	stmt_info = vect_orig_stmt (stmt_info);
3720	lhs = gimple_get_lhs (stmt_info->stmt);
3721
3722	gassign *new_stmt
3723	= gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3724	vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3725
3726	return true;
3727	}
3728
3729
3730	struct simd_call_arg_info
3731	{
3732	tree vectype;
3733	tree op;
3734	HOST_WIDE_INT linear_step;
3735	enum vect_def_type dt;
3736	unsigned int align;
3737	bool simd_lane_linear;
3738	};
3739
3740	/ Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,*
3741	is linear within simd lane (but not within whole loop), note it in
3742	ARGINFO. /
3743
3744	static void
3745	vect_simd_lane_linear (tree op, class loop *loop,
3746	struct simd_call_arg_info *arginfo)
3747	{
3748	gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3749
3750	if (!is_gimple_assign (gs: def_stmt)
3751	\|\| gimple_assign_rhs_code (gs: def_stmt) != POINTER_PLUS_EXPR
3752	\|\| !is_gimple_min_invariant (gimple_assign_rhs1 (gs: def_stmt)))
3753	return;
3754
3755	tree base = gimple_assign_rhs1 (gs: def_stmt);
3756	HOST_WIDE_INT linear_step = `0`;
3757	tree v = gimple_assign_rhs2 (gs: def_stmt);
3758	while (TREE_CODE (v) == SSA_NAME)
3759	{
3760	tree t;
3761	def_stmt = SSA_NAME_DEF_STMT (v);
3762	if (is_gimple_assign (gs: def_stmt))
3763	switch (gimple_assign_rhs_code (gs: def_stmt))
3764	{
3765	case PLUS_EXPR:
3766	t = gimple_assign_rhs2 (gs: def_stmt);
3767	if (linear_step \|\| TREE_CODE (t) != INTEGER_CST)
3768	return;
3769	base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3770	v = gimple_assign_rhs1 (gs: def_stmt);
3771	continue;
3772	case MULT_EXPR:
3773	t = gimple_assign_rhs2 (gs: def_stmt);
3774	if (linear_step \|\| !tree_fits_shwi_p (t) \|\| integer_zerop (t))
3775	return;
3776	linear_step = tree_to_shwi (t);
3777	v = gimple_assign_rhs1 (gs: def_stmt);
3778	continue;
3779	CASE_CONVERT:
3780	t = gimple_assign_rhs1 (gs: def_stmt);
3781	if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3782	\|\| (TYPE_PRECISION (TREE_TYPE (v))
3783	< TYPE_PRECISION (TREE_TYPE (t))))
3784	return;
3785	if (!linear_step)
3786	linear_step = `1`;
3787	v = t;
3788	continue;
3789	default:
3790	return;
3791	}
3792	else if (gimple_call_internal_p (gs: def_stmt, fn: IFN_GOMP_SIMD_LANE)
3793	&& loop->simduid
3794	&& TREE_CODE (gimple_call_arg (def_stmt, `0`)) == SSA_NAME
3795	&& (SSA_NAME_VAR (gimple_call_arg (def_stmt, `0`))
3796	== loop->simduid))
3797	{
3798	if (!linear_step)
3799	linear_step = `1`;
3800	arginfo->linear_step = linear_step;
3801	arginfo->op = base;
3802	arginfo->simd_lane_linear = true;
3803	return;
3804	}
3805	}
3806	}
3807
3808	/ Function vectorizable_simd_clone_call.*
3809
3810	Check if STMT_INFO performs a function call that can be vectorized
3811	by calling a simd clone of the function.
3812	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3813	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3814	Return true if STMT_INFO is vectorizable in this way. /*
3815
3816	static bool
3817	vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3818	gimple_stmt_iterator *gsi,
3819	gimple **vec_stmt, slp_tree slp_node,
3820	stmt_vector_for_cost *)
3821	{
3822	tree vec_dest;
3823	tree scalar_dest;
3824	tree op, type;
3825	tree vec_oprnd0 = NULL_TREE;
3826	tree vectype;
3827	poly_uint64 nunits;
3828	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3829	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3830	class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3831	tree fndecl, new_temp;
3832	int ncopies, j;
3833	auto_vec<simd_call_arg_info> arginfo;
3834	vec<tree> vargs = vNULL;
3835	size_t i, nargs;
3836	tree lhs, rtype, ratype;
3837	vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3838	int masked_call_offset = `0`;
3839
3840	/ Is STMT a vectorizable call? /
3841	gcall stmt = dyn_cast <gcall > (p: stmt_info->stmt);
3842	if (!stmt)
3843	return false;
3844
3845	fndecl = gimple_call_fndecl (gs: stmt);
3846	if (fndecl == NULL_TREE
3847	&& gimple_call_internal_p (gs: stmt, fn: IFN_MASK_CALL))
3848	{
3849	fndecl = gimple_call_arg (gs: stmt, index: `0`);
3850	gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3851	fndecl = TREE_OPERAND (fndecl, `0`);
3852	gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3853	masked_call_offset = `1`;
3854	}
3855	if (fndecl == NULL_TREE)
3856	return false;
3857
3858	struct cgraph_node *node = cgraph_node::get (decl: fndecl);
3859	if (node == NULL \|\| node->simd_clones == NULL)
3860	return false;
3861
3862	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3863	return false;
3864
3865	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3866	&& ! vec_stmt)
3867	return false;
3868
3869	if (gimple_call_lhs (gs: stmt)
3870	&& TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3871	return false;
3872
3873	gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3874
3875	vectype = STMT_VINFO_VECTYPE (stmt_info);
3876
3877	if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3878	return false;
3879
3880	/ Process function arguments. /
3881	nargs = gimple_call_num_args (gs: stmt) - masked_call_offset;
3882
3883	/ Bail out if the function has zero arguments. /
3884	if (nargs == `0`)
3885	return false;
3886
3887	vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3888	: STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3889	arginfo.reserve (nelems: nargs, exact: true);
3890	auto_vec<slp_tree> slp_op;
3891	slp_op.safe_grow_cleared (len: nargs);
3892
3893	for (i = `0`; i < nargs; i++)
3894	{
3895	simd_call_arg_info thisarginfo;
3896	affine_iv iv;
3897
3898	thisarginfo.linear_step = `0`;
3899	thisarginfo.align = `0`;
3900	thisarginfo.op = NULL_TREE;
3901	thisarginfo.simd_lane_linear = false;
3902
3903	int op_no = i + masked_call_offset;
3904	if (slp_node)
3905	op_no = vect_slp_child_index_for_operand (stmt, op: op_no, false);
3906	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3907	op_no, &op, &slp_op [i],
3908	&thisarginfo.dt, &thisarginfo.vectype)
3909	\|\| thisarginfo.dt == vect_uninitialized_def)
3910	{
3911	if (dump_enabled_p ())
3912	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3913	"use not simple.\n");
3914	return false;
3915	}
3916
3917	if (thisarginfo.dt == vect_constant_def
3918	\|\| thisarginfo.dt == vect_external_def)
3919	{
3920	/ With SLP we determine the vector type of constants/externals*
3921	at analysis time, handling conflicts via
3922	vect_maybe_update_slp_op_vectype. At transform time
3923	we have a vector type recorded for SLP. /*
3924	gcc_assert (!vec_stmt
3925	\|\| !slp_node
3926	\|\| thisarginfo.vectype != NULL_TREE);
3927	if (!vec_stmt)
3928	thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3929	TREE_TYPE (op),
3930	slp_node);
3931	}
3932	else
3933	gcc_assert (thisarginfo.vectype != NULL_TREE);
3934
3935	/ For linear arguments, the analyze phase should have saved*
3936	the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. /*
3937	if (i * `3` + `4` <= simd_clone_info.length ()
3938	&& simd_clone_info [i * `3` + `2`])
3939	{
3940	gcc_assert (vec_stmt);
3941	thisarginfo.linear_step = tree_to_shwi (simd_clone_info [i * `3` + `2`]);
3942	thisarginfo.op = simd_clone_info [i * `3` + `1`];
3943	thisarginfo.simd_lane_linear
3944	= (simd_clone_info [i * `3` + `3`] == boolean_true_node);
3945	/ If loop has been peeled for alignment, we need to adjust it. /
3946	tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3947	tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3948	if (n1 != n2 && !thisarginfo.simd_lane_linear)
3949	{
3950	tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3951	tree step = simd_clone_info [i * `3` + `2`];
3952	tree opt = TREE_TYPE (thisarginfo.op);
3953	bias = fold_convert (TREE_TYPE (step), bias);
3954	bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3955	thisarginfo.op
3956	= fold_build2 (POINTER_TYPE_P (opt)
3957	? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3958	thisarginfo.op, bias);
3959	}
3960	}
3961	else if (!vec_stmt
3962	&& thisarginfo.dt != vect_constant_def
3963	&& thisarginfo.dt != vect_external_def
3964	&& loop_vinfo
3965	&& TREE_CODE (op) == SSA_NAME
3966	&& simple_iv (loop, loop_containing_stmt (stmt), op,
3967	&iv, false)
3968	&& tree_fits_shwi_p (iv.step))
3969	{
3970	thisarginfo.linear_step = tree_to_shwi (iv.step);
3971	thisarginfo.op = iv.base;
3972	}
3973	else if ((thisarginfo.dt == vect_constant_def
3974	\|\| thisarginfo.dt == vect_external_def)
3975	&& POINTER_TYPE_P (TREE_TYPE (op)))
3976	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3977	/ Addresses of array elements indexed by GOMP_SIMD_LANE are*
3978	linear too. /*
3979	if (POINTER_TYPE_P (TREE_TYPE (op))
3980	&& !thisarginfo.linear_step
3981	&& !vec_stmt
3982	&& thisarginfo.dt != vect_constant_def
3983	&& thisarginfo.dt != vect_external_def
3984	&& loop_vinfo
3985	&& TREE_CODE (op) == SSA_NAME)
3986	vect_simd_lane_linear (op, loop, arginfo: &thisarginfo);
3987
3988	arginfo.quick_push (obj: thisarginfo);
3989	}
3990
3991	poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : `1`;
3992	unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : `1`;
3993	unsigned int badness = `0`;
3994	struct cgraph_node *bestn = NULL;
3995	if (simd_clone_info.exists ())
3996	bestn = cgraph_node::get (decl: simd_clone_info [`0`]);
3997	else
3998	for (struct cgraph_node *n = node->simd_clones; n != NULL;
3999	n = n->simdclone->next_clone)
4000	{
4001	unsigned int this_badness = `0`;
4002	unsigned int num_calls;
4003	/ The number of arguments in the call and the number of parameters in*
4004	the simdclone should match. However, when the simdclone is
4005	'inbranch', it could have one more paramater than nargs when using
4006	an inbranch simdclone to call a non-inbranch call, either in a
4007	non-masked loop using a all true constant mask, or inside a masked
4008	loop using it's mask. /*
4009	size_t simd_nargs = n->simdclone->nargs;
4010	if (!masked_call_offset && n->simdclone->inbranch)
4011	simd_nargs--;
4012	if (!constant_multiple_p (a: vf * group_size, b: n->simdclone->simdlen,
4013	multiple: &num_calls)
4014	\|\| (!n->simdclone->inbranch && (masked_call_offset > `0`))
4015	\|\| (nargs != simd_nargs))
4016	continue;
4017	if (num_calls != `1`)
4018	this_badness += exact_log2 (x: num_calls) * `4096`;
4019	if (n->simdclone->inbranch)
4020	this_badness += `8192`;
4021	int target_badness = targetm.simd_clone.usable (n);
4022	if (target_badness < `0`)
4023	continue;
4024	this_badness += target_badness * `512`;
4025	for (i = `0`; i < nargs; i++)
4026	{
4027	switch (n->simdclone->args[i].arg_type)
4028	{
4029	case SIMD_CLONE_ARG_TYPE_VECTOR:
4030	if (!useless_type_conversion_p
4031	(n->simdclone->args[i].orig_type,
4032	TREE_TYPE (gimple_call_arg (stmt,
4033	i + masked_call_offset))))
4034	i = -`1`;
4035	else if (arginfo [i].dt == vect_constant_def
4036	\|\| arginfo [i].dt == vect_external_def
4037	\|\| arginfo [i].linear_step)
4038	this_badness += `64`;
4039	break;
4040	case SIMD_CLONE_ARG_TYPE_UNIFORM:
4041	if (arginfo [i].dt != vect_constant_def
4042	&& arginfo [i].dt != vect_external_def)
4043	i = -`1`;
4044	break;
4045	case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4046	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4047	if (arginfo [i].dt == vect_constant_def
4048	\|\| arginfo [i].dt == vect_external_def
4049	\|\| (arginfo [i].linear_step
4050	!= n->simdclone->args[i].linear_step))
4051	i = -`1`;
4052	break;
4053	case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4054	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4055	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4056	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4057	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4058	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4059	/ FORNOW /
4060	i = -`1`;
4061	break;
4062	case SIMD_CLONE_ARG_TYPE_MASK:
4063	/ While we can create a traditional data vector from*
4064	an incoming integer mode mask we have no good way to
4065	force generate an integer mode mask from a traditional
4066	boolean vector input. /*
4067	if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4068	&& !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4069	i = -`1`;
4070	else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4071	&& SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4072	this_badness += `2048`;
4073	break;
4074	}
4075	if (i == (size_t) -`1`)
4076	break;
4077	if (n->simdclone->args[i].alignment > arginfo [i].align)
4078	{
4079	i = -`1`;
4080	break;
4081	}
4082	if (arginfo [i].align)
4083	this_badness += (exact_log2 (x: arginfo [i].align)
4084	- exact_log2 (x: n->simdclone->args[i].alignment));
4085	}
4086	if (i == (size_t) -`1`)
4087	continue;
4088	if (masked_call_offset == `0`
4089	&& n->simdclone->inbranch
4090	&& n->simdclone->nargs > nargs)
4091	{
4092	gcc_assert (n->simdclone->args[n->simdclone->nargs - `1`].arg_type ==
4093	SIMD_CLONE_ARG_TYPE_MASK);
4094	/ Penalize using a masked SIMD clone in a non-masked loop, that is*
4095	not in a branch, as we'd have to construct an all-true mask. /*
4096	if (!loop_vinfo \|\| !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4097	this_badness += `64`;
4098	}
4099	if (bestn == NULL \|\| this_badness < badness)
4100	{
4101	bestn = n;
4102	badness = this_badness;
4103	}
4104	}
4105
4106	if (bestn == NULL)
4107	return false;
4108
4109	unsigned int num_mask_args = `0`;
4110	if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4111	for (i = `0`; i < nargs; i++)
4112	if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4113	num_mask_args++;
4114
4115	for (i = `0`; i < nargs; i++)
4116	{
4117	if ((arginfo [i].dt == vect_constant_def
4118	\|\| arginfo [i].dt == vect_external_def)
4119	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4120	{
4121	tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4122	i + masked_call_offset));
4123	arginfo [i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4124	slp_node);
4125	if (arginfo [i].vectype == NULL
4126	\|\| !constant_multiple_p (a: bestn->simdclone->simdlen,
4127	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4128	return false;
4129	}
4130
4131	if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4132	&& VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4133	{
4134	if (dump_enabled_p ())
4135	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136	"vector mask arguments are not supported.\n");
4137	return false;
4138	}
4139
4140	if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4141	{
4142	tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4143	if (bestn->simdclone->mask_mode == VOIDmode)
4144	{
4145	if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: clone_arg_vectype),
4146	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4147	{
4148	/ FORNOW we only have partial support for vector-type masks*
4149	that can't hold all of simdlen. /*
4150	if (dump_enabled_p ())
4151	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4152	vect_location,
4153	"in-branch vector clones are not yet"
4154	" supported for mismatched vector sizes.\n");
4155	return false;
4156	}
4157	}
4158	else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4159	{
4160	if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4161	\|\| maybe_ne (a: exact_div (a: bestn->simdclone->simdlen,
4162	b: num_mask_args),
4163	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4164	{
4165	/ FORNOW we only have partial support for integer-type masks*
4166	that represent the same number of lanes as the
4167	vectorized mask inputs. /*
4168	if (dump_enabled_p ())
4169	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4170	vect_location,
4171	"in-branch vector clones are not yet "
4172	"supported for mismatched vector sizes.\n");
4173	return false;
4174	}
4175	}
4176	else
4177	{
4178	if (dump_enabled_p ())
4179	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4180	vect_location,
4181	"in-branch vector clones not supported"
4182	" on this target.\n");
4183	return false;
4184	}
4185	}
4186	}
4187
4188	fndecl = bestn->decl;
4189	nunits = bestn->simdclone->simdlen;
4190	if (slp_node)
4191	ncopies = vector_unroll_factor (vf * group_size, nunits);
4192	else
4193	ncopies = vector_unroll_factor (vf, nunits);
4194
4195	/ If the function isn't const, only allow it in simd loops where user*
4196	has asserted that at least nunits consecutive iterations can be
4197	performed using SIMD instructions. /*
4198	if ((loop == NULL \|\| maybe_lt (a: (unsigned) loop->safelen, b: nunits))
4199	&& gimple_vuse (g: stmt))
4200	return false;
4201
4202	/ Sanity check: make sure that at least one copy of the vectorized stmt*
4203	needs to be generated. /*
4204	gcc_assert (ncopies >= `1`);
4205
4206	if (!vec_stmt) / transformation not required. /
4207	{
4208	if (slp_node)
4209	for (unsigned i = `0`; i < nargs; ++i)
4210	if (!vect_maybe_update_slp_op_vectype (slp_op [i], arginfo [i].vectype))
4211	{
4212	if (dump_enabled_p ())
4213	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4214	"incompatible vector types for invariants\n");
4215	return false;
4216	}
4217	/ When the original call is pure or const but the SIMD ABI dictates*
4218	an aggregate return we will have to use a virtual definition and
4219	in a loop eventually even need to add a virtual PHI. That's
4220	not straight-forward so allow to fix this up via renaming. /*
4221	if (gimple_call_lhs (gs: stmt)
4222	&& !gimple_vdef (g: stmt)
4223	&& TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4224	vinfo->any_known_not_updated_vssa = true;
4225	/ ??? For SLP code-gen we end up inserting after the last*
4226	vector argument def rather than at the original call position
4227	so automagic virtual operand updating doesn't work. /*
4228	if (gimple_vuse (g: stmt) && slp_node)
4229	vinfo->any_known_not_updated_vssa = true;
4230	simd_clone_info.safe_push (obj: bestn->decl);
4231	for (i = `0`; i < bestn->simdclone->nargs; i++)
4232	{
4233	switch (bestn->simdclone->args[i].arg_type)
4234	{
4235	default:
4236	continue;
4237	case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4238	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4239	{
4240	simd_clone_info.safe_grow_cleared (len: i * `3` + `1`, exact: true);
4241	simd_clone_info.safe_push (obj: arginfo [i].op);
4242	tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4243	? size_type_node : TREE_TYPE (arginfo[i].op);
4244	tree ls = build_int_cst (lst, arginfo [i].linear_step);
4245	simd_clone_info.safe_push (obj: ls);
4246	tree sll = arginfo [i].simd_lane_linear
4247	? boolean_true_node : boolean_false_node;
4248	simd_clone_info.safe_push (obj: sll);
4249	}
4250	break;
4251	case SIMD_CLONE_ARG_TYPE_MASK:
4252	if (loop_vinfo
4253	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4254	vect_record_loop_mask (loop_vinfo,
4255	&LOOP_VINFO_MASKS (loop_vinfo),
4256	ncopies, vectype, op);
4257
4258	break;
4259	}
4260	}
4261
4262	if (!bestn->simdclone->inbranch && loop_vinfo)
4263	{
4264	if (dump_enabled_p ()
4265	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4266	dump_printf_loc (MSG_NOTE, vect_location,
4267	"can't use a fully-masked loop because a"
4268	" non-masked simd clone was selected.\n");
4269	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4270	}
4271
4272	STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4273	DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4274	/ vect_model_simple_cost (vinfo, stmt_info, ncopies,*
4275	dt, slp_node, cost_vec); /*
4276	return true;
4277	}
4278
4279	/ Transform. /
4280
4281	if (dump_enabled_p ())
4282	dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4283
4284	/ Handle def. /
4285	scalar_dest = gimple_call_lhs (gs: stmt);
4286	vec_dest = NULL_TREE;
4287	rtype = NULL_TREE;
4288	ratype = NULL_TREE;
4289	if (scalar_dest)
4290	{
4291	vec_dest = vect_create_destination_var (scalar_dest, vectype);
4292	rtype = TREE_TYPE (TREE_TYPE (fndecl));
4293	if (TREE_CODE (rtype) == ARRAY_TYPE)
4294	{
4295	ratype = rtype;
4296	rtype = TREE_TYPE (ratype);
4297	}
4298	}
4299
4300	auto_vec<vec<tree> > vec_oprnds;
4301	auto_vec<unsigned> vec_oprnds_i;
4302	vec_oprnds_i.safe_grow_cleared (len: nargs, exact: true);
4303	if (slp_node)
4304	{
4305	vec_oprnds.reserve_exact (nelems: nargs);
4306	vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4307	}
4308	else
4309	vec_oprnds.safe_grow_cleared (len: nargs, exact: true);
4310	for (j = `0`; j < ncopies; ++j)
4311	{
4312	poly_uint64 callee_nelements;
4313	poly_uint64 caller_nelements;
4314	/ Build argument list for the vectorized call. /
4315	if (j == `0`)
4316	vargs.create (nelems: nargs);
4317	else
4318	vargs.truncate (size: `0`);
4319
4320	for (i = `0`; i < nargs; i++)
4321	{
4322	unsigned int k, l, m, o;
4323	tree atype;
4324	op = gimple_call_arg (gs: stmt, index: i + masked_call_offset);
4325	switch (bestn->simdclone->args[i].arg_type)
4326	{
4327	case SIMD_CLONE_ARG_TYPE_VECTOR:
4328	atype = bestn->simdclone->args[i].vector_type;
4329	caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype);
4330	callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4331	o = vector_unroll_factor (nunits, callee_nelements);
4332	for (m = j * o; m < (j + `1`) * o; m++)
4333	{
4334	if (known_lt (callee_nelements, caller_nelements))
4335	{
4336	poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4337	if (!constant_multiple_p (a: caller_nelements,
4338	b: callee_nelements, multiple: &k))
4339	gcc_unreachable ();
4340
4341	gcc_assert ((k & (k - `1`)) == `0`);
4342	if (m == `0`)
4343	{
4344	if (!slp_node)
4345	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4346	ncopies: ncopies * o / k, op,
4347	vec_oprnds: &vec_oprnds [i]);
4348	vec_oprnds_i [i] = `0`;
4349	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4350	}
4351	else
4352	{
4353	vec_oprnd0 = arginfo [i].op;
4354	if ((m & (k - `1`)) == `0`)
4355	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4356	}
4357	arginfo [i].op = vec_oprnd0;
4358	vec_oprnd0
4359	= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4360	bitsize_int (prec),
4361	bitsize_int ((m & (k - `1`)) * prec));
4362	gassign *new_stmt
4363	= gimple_build_assign (make_ssa_name (var: atype),
4364	vec_oprnd0);
4365	vect_finish_stmt_generation (vinfo, stmt_info,
4366	vec_stmt: new_stmt, gsi);
4367	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4368	}
4369	else
4370	{
4371	if (!constant_multiple_p (a: callee_nelements,
4372	b: caller_nelements, multiple: &k))
4373	gcc_unreachable ();
4374	gcc_assert ((k & (k - `1`)) == `0`);
4375	vec<constructor_elt, va_gc> *ctor_elts;
4376	if (k != `1`)
4377	vec_alloc (v&: ctor_elts, nelems: k);
4378	else
4379	ctor_elts = NULL;
4380	for (l = `0`; l < k; l++)
4381	{
4382	if (m == `0` && l == `0`)
4383	{
4384	if (!slp_node)
4385	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4386	ncopies: k * o * ncopies,
4387	op,
4388	vec_oprnds: &vec_oprnds [i]);
4389	vec_oprnds_i [i] = `0`;
4390	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4391	}
4392	else
4393	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4394	arginfo [i].op = vec_oprnd0;
4395	if (k == `1`)
4396	break;
4397	CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4398	vec_oprnd0);
4399	}
4400	if (k == `1`)
4401	if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4402	atype))
4403	{
4404	vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4405	vec_oprnd0);
4406	gassign *new_stmt
4407	= gimple_build_assign (make_ssa_name (var: atype),
4408	vec_oprnd0);
4409	vect_finish_stmt_generation (vinfo, stmt_info,
4410	vec_stmt: new_stmt, gsi);
4411	vargs.safe_push (obj: gimple_get_lhs (new_stmt));
4412	}
4413	else
4414	vargs.safe_push (obj: vec_oprnd0);
4415	else
4416	{
4417	vec_oprnd0 = build_constructor (atype, ctor_elts);
4418	gassign *new_stmt
4419	= gimple_build_assign (make_ssa_name (var: atype),
4420	vec_oprnd0);
4421	vect_finish_stmt_generation (vinfo, stmt_info,
4422	vec_stmt: new_stmt, gsi);
4423	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4424	}
4425	}
4426	}
4427	break;
4428	case SIMD_CLONE_ARG_TYPE_MASK:
4429	if (bestn->simdclone->mask_mode == VOIDmode)
4430	{
4431	atype = bestn->simdclone->args[i].vector_type;
4432	tree elt_type = TREE_TYPE (atype);
4433	tree one = fold_convert (elt_type, integer_one_node);
4434	tree zero = fold_convert (elt_type, integer_zero_node);
4435	callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4436	caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype);
4437	o = vector_unroll_factor (nunits, callee_nelements);
4438	for (m = j * o; m < (j + `1`) * o; m++)
4439	{
4440	if (maybe_lt (a: callee_nelements, b: caller_nelements))
4441	{
4442	/ The mask type has fewer elements than simdlen. /
4443
4444	/ FORNOW /
4445	gcc_unreachable ();
4446	}
4447	else if (known_eq (callee_nelements, caller_nelements))
4448	{
4449	/ The SIMD clone function has the same number of*
4450	elements as the current function. /*
4451	if (m == `0`)
4452	{
4453	if (!slp_node)
4454	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4455	ncopies: o * ncopies,
4456	op,
4457	vec_oprnds: &vec_oprnds [i]);
4458	vec_oprnds_i [i] = `0`;
4459	}
4460	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4461	if (loop_vinfo
4462	&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4463	{
4464	vec_loop_masks *loop_masks
4465	= &LOOP_VINFO_MASKS (loop_vinfo);
4466	tree loop_mask
4467	= vect_get_loop_mask (loop_vinfo, gsi,
4468	loop_masks, ncopies,
4469	vectype, j);
4470	vec_oprnd0
4471	= prepare_vec_mask (loop_vinfo,
4472	TREE_TYPE (loop_mask),
4473	loop_mask, vec_mask: vec_oprnd0,
4474	gsi);
4475	loop_vinfo->vec_cond_masked_set.add (k: { vec_oprnd0,
4476	loop_mask });
4477
4478	}
4479	vec_oprnd0
4480	= build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4481	build_vector_from_val (atype, one),
4482	build_vector_from_val (atype, zero));
4483	gassign *new_stmt
4484	= gimple_build_assign (make_ssa_name (var: atype),
4485	vec_oprnd0);
4486	vect_finish_stmt_generation (vinfo, stmt_info,
4487	vec_stmt: new_stmt, gsi);
4488	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4489	}
4490	else
4491	{
4492	/ The mask type has more elements than simdlen. /
4493
4494	/ FORNOW /
4495	gcc_unreachable ();
4496	}
4497	}
4498	}
4499	else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4500	{
4501	atype = bestn->simdclone->args[i].vector_type;
4502	/ Guess the number of lanes represented by atype. /
4503	poly_uint64 atype_subparts
4504	= exact_div (a: bestn->simdclone->simdlen,
4505	b: num_mask_args);
4506	o = vector_unroll_factor (nunits, atype_subparts);
4507	for (m = j * o; m < (j + `1`) * o; m++)
4508	{
4509	if (m == `0`)
4510	{
4511	if (!slp_node)
4512	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4513	ncopies: o * ncopies,
4514	op,
4515	vec_oprnds: &vec_oprnds [i]);
4516	vec_oprnds_i [i] = `0`;
4517	}
4518	if (maybe_lt (a: atype_subparts,
4519	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4520	{
4521	/ The mask argument has fewer elements than the*
4522	input vector. /*
4523	/ FORNOW /
4524	gcc_unreachable ();
4525	}
4526	else if (known_eq (atype_subparts,
4527	TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4528	{
4529	/ The vector mask argument matches the input*
4530	in the number of lanes, but not necessarily
4531	in the mode. /*
4532	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4533	tree st = lang_hooks.types.type_for_mode
4534	(TYPE_MODE (TREE_TYPE (vec_oprnd0)), `1`);
4535	vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4536	vec_oprnd0);
4537	gassign *new_stmt
4538	= gimple_build_assign (make_ssa_name (var: st),
4539	vec_oprnd0);
4540	vect_finish_stmt_generation (vinfo, stmt_info,
4541	vec_stmt: new_stmt, gsi);
4542	if (!types_compatible_p (type1: atype, type2: st))
4543	{
4544	new_stmt
4545	= gimple_build_assign (make_ssa_name (var: atype),
4546	NOP_EXPR,
4547	gimple_assign_lhs
4548	(gs: new_stmt));
4549	vect_finish_stmt_generation (vinfo, stmt_info,
4550	vec_stmt: new_stmt, gsi);
4551	}
4552	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4553	}
4554	else
4555	{
4556	/ The mask argument has more elements than the*
4557	input vector. /*
4558	/ FORNOW /
4559	gcc_unreachable ();
4560	}
4561	}
4562	}
4563	else
4564	gcc_unreachable ();
4565	break;
4566	case SIMD_CLONE_ARG_TYPE_UNIFORM:
4567	vargs.safe_push (obj: op);
4568	break;
4569	case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4570	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4571	if (j == `0`)
4572	{
4573	gimple_seq stmts;
4574	arginfo [i].op
4575	= force_gimple_operand (unshare_expr (arginfo [i].op),
4576	&stmts, true, NULL_TREE);
4577	if (stmts != NULL)
4578	{
4579	basic_block new_bb;
4580	edge pe = loop_preheader_edge (loop);
4581	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4582	gcc_assert (!new_bb);
4583	}
4584	if (arginfo [i].simd_lane_linear)
4585	{
4586	vargs.safe_push (obj: arginfo [i].op);
4587	break;
4588	}
4589	tree phi_res = copy_ssa_name (var: op);
4590	gphi *new_phi = create_phi_node (phi_res, loop->header);
4591	add_phi_arg (new_phi, arginfo [i].op,
4592	loop_preheader_edge (loop), UNKNOWN_LOCATION);
4593	enum tree_code code
4594	= POINTER_TYPE_P (TREE_TYPE (op))
4595	? POINTER_PLUS_EXPR : PLUS_EXPR;
4596	tree type = POINTER_TYPE_P (TREE_TYPE (op))
4597	? sizetype : TREE_TYPE (op);
4598	poly_widest_int cst
4599	= wi::mul (a: bestn->simdclone->args[i].linear_step,
4600	b: ncopies * nunits);
4601	tree tcst = wide_int_to_tree (type, cst);
4602	tree phi_arg = copy_ssa_name (var: op);
4603	gassign *new_stmt
4604	= gimple_build_assign (phi_arg, code, phi_res, tcst);
4605	gimple_stmt_iterator si = gsi_after_labels (bb: loop->header);
4606	gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4607	add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4608	UNKNOWN_LOCATION);
4609	arginfo [i].op = phi_res;
4610	vargs.safe_push (obj: phi_res);
4611	}
4612	else
4613	{
4614	enum tree_code code
4615	= POINTER_TYPE_P (TREE_TYPE (op))
4616	? POINTER_PLUS_EXPR : PLUS_EXPR;
4617	tree type = POINTER_TYPE_P (TREE_TYPE (op))
4618	? sizetype : TREE_TYPE (op);
4619	poly_widest_int cst
4620	= wi::mul (a: bestn->simdclone->args[i].linear_step,
4621	b: j * nunits);
4622	tree tcst = wide_int_to_tree (type, cst);
4623	new_temp = make_ssa_name (TREE_TYPE (op));
4624	gassign *new_stmt
4625	= gimple_build_assign (new_temp, code,
4626	arginfo [i].op, tcst);
4627	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4628	vargs.safe_push (obj: new_temp);
4629	}
4630	break;
4631	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4632	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4633	case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4634	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4635	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4636	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4637	default:
4638	gcc_unreachable ();
4639	}
4640	}
4641
4642	if (masked_call_offset == `0`
4643	&& bestn->simdclone->inbranch
4644	&& bestn->simdclone->nargs > nargs)
4645	{
4646	unsigned long m, o;
4647	size_t mask_i = bestn->simdclone->nargs - `1`;
4648	tree mask;
4649	gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4650	SIMD_CLONE_ARG_TYPE_MASK);
4651
4652	tree masktype = bestn->simdclone->args[mask_i].vector_type;
4653	callee_nelements = TYPE_VECTOR_SUBPARTS (node: masktype);
4654	o = vector_unroll_factor (nunits, callee_nelements);
4655	for (m = j * o; m < (j + `1`) * o; m++)
4656	{
4657	if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4658	{
4659	vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4660	mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4661	ncopies, vectype, j);
4662	}
4663	else
4664	mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4665
4666	gassign *new_stmt;
4667	if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4668	{
4669	/ This means we are dealing with integer mask modes.*
4670	First convert to an integer type with the same size as
4671	the current vector type. /*
4672	unsigned HOST_WIDE_INT intermediate_size
4673	= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4674	tree mid_int_type =
4675	build_nonstandard_integer_type (intermediate_size, `1`);
4676	mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4677	new_stmt
4678	= gimple_build_assign (make_ssa_name (var: mid_int_type),
4679	mask);
4680	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4681	/ Then zero-extend to the mask mode. /
4682	mask = fold_build1 (NOP_EXPR, masktype,
4683	gimple_get_lhs (new_stmt));
4684	}
4685	else if (bestn->simdclone->mask_mode == VOIDmode)
4686	{
4687	tree one = fold_convert (TREE_TYPE (masktype),
4688	integer_one_node);
4689	tree zero = fold_convert (TREE_TYPE (masktype),
4690	integer_zero_node);
4691	mask = build3 (VEC_COND_EXPR, masktype, mask,
4692	build_vector_from_val (masktype, one),
4693	build_vector_from_val (masktype, zero));
4694	}
4695	else
4696	gcc_unreachable ();
4697
4698	new_stmt = gimple_build_assign (make_ssa_name (var: masktype), mask);
4699	vect_finish_stmt_generation (vinfo, stmt_info,
4700	vec_stmt: new_stmt, gsi);
4701	mask = gimple_assign_lhs (gs: new_stmt);
4702	vargs.safe_push (obj: mask);
4703	}
4704	}
4705
4706	gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4707	if (vec_dest)
4708	{
4709	gcc_assert (ratype
4710	\|\| known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4711	if (ratype)
4712	new_temp = create_tmp_var (ratype);
4713	else if (useless_type_conversion_p (vectype, rtype))
4714	new_temp = make_ssa_name (var: vec_dest, stmt: new_call);
4715	else
4716	new_temp = make_ssa_name (var: rtype, stmt: new_call);
4717	gimple_call_set_lhs (gs: new_call, lhs: new_temp);
4718	}
4719	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_call, gsi);
4720	gimple *new_stmt = new_call;
4721
4722	if (vec_dest)
4723	{
4724	if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
4725	{
4726	unsigned int k, l;
4727	poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4728	poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4729	k = vector_unroll_factor (nunits,
4730	TYPE_VECTOR_SUBPARTS (vectype));
4731	gcc_assert ((k & (k - `1`)) == `0`);
4732	for (l = `0`; l < k; l++)
4733	{
4734	tree t;
4735	if (ratype)
4736	{
4737	t = build_fold_addr_expr (new_temp);
4738	t = build2 (MEM_REF, vectype, t,
4739	build_int_cst (TREE_TYPE (t), l * bytes));
4740	}
4741	else
4742	t = build3 (BIT_FIELD_REF, vectype, new_temp,
4743	bitsize_int (prec), bitsize_int (l * prec));
4744	new_stmt = gimple_build_assign (make_ssa_name (var: vectype), t);
4745	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4746
4747	if (j == `0` && l == `0`)
4748	*vec_stmt = new_stmt;
4749	if (slp_node)
4750	SLP_TREE_VEC_DEFS (slp_node)
4751	.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4752	else
4753	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4754	}
4755
4756	if (ratype)
4757	vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4758	continue;
4759	}
4760	else if (!multiple_p (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
4761	{
4762	unsigned int k;
4763	if (!constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype),
4764	b: TYPE_VECTOR_SUBPARTS (node: rtype), multiple: &k))
4765	gcc_unreachable ();
4766	gcc_assert ((k & (k - `1`)) == `0`);
4767	if ((j & (k - `1`)) == `0`)
4768	vec_alloc (v&: ret_ctor_elts, nelems: k);
4769	if (ratype)
4770	{
4771	unsigned int m, o;
4772	o = vector_unroll_factor (nunits,
4773	TYPE_VECTOR_SUBPARTS (rtype));
4774	for (m = `0`; m < o; m++)
4775	{
4776	tree tem = build4 (ARRAY_REF, rtype, new_temp,
4777	size_int (m), NULL_TREE, NULL_TREE);
4778	new_stmt = gimple_build_assign (make_ssa_name (var: rtype),
4779	tem);
4780	vect_finish_stmt_generation (vinfo, stmt_info,
4781	vec_stmt: new_stmt, gsi);
4782	CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4783	gimple_assign_lhs (new_stmt));
4784	}
4785	vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4786	}
4787	else
4788	CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4789	if ((j & (k - `1`)) != k - `1`)
4790	continue;
4791	vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4792	new_stmt
4793	= gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4794	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4795
4796	if ((unsigned) j == k - `1`)
4797	*vec_stmt = new_stmt;
4798	if (slp_node)
4799	SLP_TREE_VEC_DEFS (slp_node)
4800	.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4801	else
4802	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4803	continue;
4804	}
4805	else if (ratype)
4806	{
4807	tree t = build_fold_addr_expr (new_temp);
4808	t = build2 (MEM_REF, vectype, t,
4809	build_int_cst (TREE_TYPE (t), `0`));
4810	new_stmt = gimple_build_assign (make_ssa_name (var: vec_dest), t);
4811	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4812	vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4813	}
4814	else if (!useless_type_conversion_p (vectype, rtype))
4815	{
4816	vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4817	new_stmt
4818	= gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4819	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4820	}
4821	}
4822
4823	if (j == `0`)
4824	*vec_stmt = new_stmt;
4825	if (slp_node)
4826	SLP_TREE_VEC_DEFS (slp_node).quick_push (obj: gimple_get_lhs (new_stmt));
4827	else
4828	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4829	}
4830
4831	for (i = `0`; i < nargs; ++i)
4832	{
4833	vec<tree> oprndsi = vec_oprnds [i];
4834	oprndsi.release ();
4835	}
4836	vargs.release ();
4837
4838	/ Mark the clone as no longer being a candidate for GC. /
4839	bestn->gc_candidate = false;
4840
4841	/ The call in STMT might prevent it from being removed in dce.*
4842	We however cannot remove it here, due to the way the ssa name
4843	it defines is mapped to the new definition. So just replace
4844	rhs of the statement with something harmless. /*
4845
4846	if (slp_node)
4847	return true;
4848
4849	gimple *new_stmt;
4850	if (scalar_dest)
4851	{
4852	type = TREE_TYPE (scalar_dest);
4853	lhs = gimple_call_lhs (gs: vect_orig_stmt (stmt_info)->stmt);
4854	new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4855	}
4856	else
4857	new_stmt = gimple_build_nop ();
4858	vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4859	unlink_stmt_vdef (stmt);
4860
4861	return true;
4862	}
4863
4864
4865	/ Function vect_gen_widened_results_half*
4866
4867	Create a vector stmt whose code, type, number of arguments, and result
4868	variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4869	VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4870	In the case that CODE is a CALL_EXPR, this means that a call to DECL
4871	needs to be created (DECL is a function-decl of a target-builtin).
4872	STMT_INFO is the original scalar stmt that we are vectorizing. /*
4873
4874	static gimple *
4875	vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4876	tree vec_oprnd0, tree vec_oprnd1, int op_type,
4877	tree vec_dest, gimple_stmt_iterator *gsi,
4878	stmt_vec_info stmt_info)
4879	{
4880	gimple *new_stmt;
4881	tree new_temp;
4882
4883	/ Generate half of the widened result: /
4884	if (op_type != binary_op)
4885	vec_oprnd1 = NULL;
4886	new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4887	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4888	gimple_set_lhs (new_stmt, new_temp);
4889	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4890
4891	return new_stmt;
4892	}
4893
4894
4895	/ Create vectorized demotion statements for vector operands from VEC_OPRNDS.*
4896	For multi-step conversions store the resulting vectors and call the function
4897	recursively. When NARROW_SRC_P is true, there's still a conversion after
4898	narrowing, don't store the vectors in the SLP_NODE or in vector info of
4899	the scalar statement(or in STMT_VINFO_RELATED_STMT chain). /*
4900
4901	static void
4902	vect_create_vectorized_demotion_stmts (vec_info vinfo, vec<tree> vec_oprnds,
4903	int multi_step_cvt,
4904	stmt_vec_info stmt_info,
4905	vec<tree> &vec_dsts,
4906	gimple_stmt_iterator *gsi,
4907	slp_tree slp_node, code_helper code,
4908	bool narrow_src_p)
4909	{
4910	unsigned int i;
4911	tree vop0, vop1, new_tmp, vec_dest;
4912
4913	vec_dest = vec_dsts.pop ();
4914
4915	for (i = `0`; i < vec_oprnds->length (); i += `2`)
4916	{
4917	/ Create demotion operation. /
4918	vop0 = (*vec_oprnds)[i];
4919	vop1 = (*vec_oprnds)[i + `1`];
4920	gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4921	new_tmp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4922	gimple_set_lhs (new_stmt, new_tmp);
4923	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4924	if (multi_step_cvt \|\| narrow_src_p)
4925	/ Store the resulting vector for next recursive call,*
4926	or return the resulting vector_tmp for NARROW FLOAT_EXPR. /*
4927	(*vec_oprnds)[i/`2`] = new_tmp;
4928	else
4929	{
4930	/ This is the last step of the conversion sequence. Store the*
4931	vectors in SLP_NODE or in vector info of the scalar statement
4932	(or in STMT_VINFO_RELATED_STMT chain). /*
4933	if (slp_node)
4934	slp_node->push_vec_def (def: new_stmt);
4935	else
4936	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4937	}
4938	}
4939
4940	/ For multi-step demotion operations we first generate demotion operations*
4941	from the source type to the intermediate types, and then combine the
4942	results (stored in VEC_OPRNDS) in demotion operation to the destination
4943	type. /*
4944	if (multi_step_cvt)
4945	{
4946	/ At each level of recursion we have half of the operands we had at the*
4947	previous level. /*
4948	vec_oprnds->truncate (size: (i+`1`)/`2`);
4949	vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4950	multi_step_cvt: multi_step_cvt - `1`,
4951	stmt_info, vec_dsts, gsi,
4952	slp_node, code: VEC_PACK_TRUNC_EXPR,
4953	narrow_src_p);
4954	}
4955
4956	vec_dsts.quick_push (obj: vec_dest);
4957	}
4958
4959
4960	/ Create vectorized promotion statements for vector operands from VEC_OPRNDS0*
4961	and VEC_OPRNDS1, for a binary operation associated with scalar statement
4962	STMT_INFO. For multi-step conversions store the resulting vectors and
4963	call the function recursively. /*
4964
4965	static void
4966	vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4967	vec<tree> *vec_oprnds0,
4968	vec<tree> *vec_oprnds1,
4969	stmt_vec_info stmt_info, tree vec_dest,
4970	gimple_stmt_iterator *gsi,
4971	code_helper ch1,
4972	code_helper ch2, int op_type)
4973	{
4974	int i;
4975	tree vop0, vop1, new_tmp1, new_tmp2;
4976	gimple new_stmt1, new_stmt2;
4977	vec<tree> vec_tmp = vNULL;
4978
4979	vec_tmp.create (nelems: vec_oprnds0->length () * `2`);
4980	FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4981	{
4982	if (op_type == binary_op)
4983	vop1 = (*vec_oprnds1)[i];
4984	else
4985	vop1 = NULL_TREE;
4986
4987	/ Generate the two halves of promotion operation. /
4988	new_stmt1 = vect_gen_widened_results_half (vinfo, ch: ch1, vec_oprnd0: vop0, vec_oprnd1: vop1,
4989	op_type, vec_dest, gsi,
4990	stmt_info);
4991	new_stmt2 = vect_gen_widened_results_half (vinfo, ch: ch2, vec_oprnd0: vop0, vec_oprnd1: vop1,
4992	op_type, vec_dest, gsi,
4993	stmt_info);
4994	if (is_gimple_call (gs: new_stmt1))
4995	{
4996	new_tmp1 = gimple_call_lhs (gs: new_stmt1);
4997	new_tmp2 = gimple_call_lhs (gs: new_stmt2);
4998	}
4999	else
5000	{
5001	new_tmp1 = gimple_assign_lhs (gs: new_stmt1);
5002	new_tmp2 = gimple_assign_lhs (gs: new_stmt2);
5003	}
5004
5005	/ Store the results for the next step. /
5006	vec_tmp.quick_push (obj: new_tmp1);
5007	vec_tmp.quick_push (obj: new_tmp2);
5008	}
5009
5010	vec_oprnds0->release ();
5011	*vec_oprnds0 = vec_tmp;
5012	}
5013
5014	/ Create vectorized promotion stmts for widening stmts using only half the*
5015	potential vector size for input. /*
5016	static void
5017	vect_create_half_widening_stmts (vec_info *vinfo,
5018	vec<tree> *vec_oprnds0,
5019	vec<tree> *vec_oprnds1,
5020	stmt_vec_info stmt_info, tree vec_dest,
5021	gimple_stmt_iterator *gsi,
5022	code_helper code1,
5023	int op_type)
5024	{
5025	int i;
5026	tree vop0, vop1;
5027	gimple *new_stmt1;
5028	gimple *new_stmt2;
5029	gimple *new_stmt3;
5030	vec<tree> vec_tmp = vNULL;
5031
5032	vec_tmp.create (nelems: vec_oprnds0->length ());
5033	FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5034	{
5035	tree new_tmp1, new_tmp2, new_tmp3, out_type;
5036
5037	gcc_assert (op_type == binary_op);
5038	vop1 = (*vec_oprnds1)[i];
5039
5040	/ Widen the first vector input. /
5041	out_type = TREE_TYPE (vec_dest);
5042	new_tmp1 = make_ssa_name (var: out_type);
5043	new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5044	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt1, gsi);
5045	if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5046	{
5047	/ Widen the second vector input. /
5048	new_tmp2 = make_ssa_name (var: out_type);
5049	new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5050	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt2, gsi);
5051	/ Perform the operation. With both vector inputs widened. /
5052	new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5053	}
5054	else
5055	{
5056	/ Perform the operation. With the single vector input widened. /
5057	new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5058	}
5059
5060	new_tmp3 = make_ssa_name (var: vec_dest, stmt: new_stmt3);
5061	gimple_assign_set_lhs (gs: new_stmt3, lhs: new_tmp3);
5062	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt3, gsi);
5063
5064	/ Store the results for the next step. /
5065	vec_tmp.quick_push (obj: new_tmp3);
5066	}
5067
5068	vec_oprnds0->release ();
5069	*vec_oprnds0 = vec_tmp;
5070	}
5071
5072
5073	/ Check if STMT_INFO performs a conversion operation that can be vectorized.*
5074	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5075	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5076	Return true if STMT_INFO is vectorizable in this way. /*
5077
5078	static bool
5079	vectorizable_conversion (vec_info *vinfo,
5080	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5081	gimple **vec_stmt, slp_tree slp_node,
5082	stmt_vector_for_cost *cost_vec)
5083	{
5084	tree vec_dest, cvt_op = NULL_TREE;
5085	tree scalar_dest;
5086	tree op0, op1 = NULL_TREE;
5087	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5088	tree_code tc1, tc2;
5089	code_helper code, code1, code2;
5090	code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5091	tree new_temp;
5092	enum vect_def_type dt[`2`] = {vect_unknown_def_type, vect_unknown_def_type};
5093	int ndts = `2`;
5094	poly_uint64 nunits_in;
5095	poly_uint64 nunits_out;
5096	tree vectype_out, vectype_in;
5097	int ncopies, i;
5098	tree lhs_type, rhs_type;
5099	/ For conversions between floating point and integer, there're 2 NARROW*
5100	cases. NARROW_SRC is for FLOAT_EXPR, means
5101	integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5102	This is safe when the range of the source integer can fit into the lower
5103	precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5104	floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5105	For other conversions, when there's narrowing, NARROW_DST is used as
5106	default. /*
5107	enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5108	vec<tree> vec_oprnds0 = vNULL;
5109	vec<tree> vec_oprnds1 = vNULL;
5110	tree vop0;
5111	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5112	int multi_step_cvt = `0`;
5113	vec<tree> interm_types = vNULL;
5114	tree intermediate_type, cvt_type = NULL_TREE;
5115	int op_type;
5116	unsigned short fltsz;
5117
5118	/ Is STMT a vectorizable conversion? /
5119
5120	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5121	return false;
5122
5123	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5124	&& ! vec_stmt)
5125	return false;
5126
5127	gimple* stmt = stmt_info->stmt;
5128	if (!(is_gimple_assign (gs: stmt) \|\| is_gimple_call (gs: stmt)))
5129	return false;
5130
5131	if (gimple_get_lhs (stmt) == NULL_TREE
5132	\|\| TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5133	return false;
5134
5135	if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5136	return false;
5137
5138	if (is_gimple_assign (gs: stmt))
5139	{
5140	code = gimple_assign_rhs_code (gs: stmt);
5141	op_type = TREE_CODE_LENGTH ((tree_code) code);
5142	}
5143	else if (gimple_call_internal_p (gs: stmt))
5144	{
5145	code = gimple_call_internal_fn (gs: stmt);
5146	op_type = gimple_call_num_args (gs: stmt);
5147	}
5148	else
5149	return false;
5150
5151	bool widen_arith = (code == WIDEN_MULT_EXPR
5152	\|\| code == WIDEN_LSHIFT_EXPR
5153	\|\| widening_fn_p (code));
5154
5155	if (!widen_arith
5156	&& !CONVERT_EXPR_CODE_P (code)
5157	&& code != FIX_TRUNC_EXPR
5158	&& code != FLOAT_EXPR)
5159	return false;
5160
5161	/ Check types of lhs and rhs. /
5162	scalar_dest = gimple_get_lhs (stmt);
5163	lhs_type = TREE_TYPE (scalar_dest);
5164	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5165
5166	/ Check the operands of the operation. /
5167	slp_tree slp_op0, slp_op1 = NULL;
5168	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5169	`0`, &op0, &slp_op0, &dt[`0`], &vectype_in))
5170	{
5171	if (dump_enabled_p ())
5172	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5173	"use not simple.\n");
5174	return false;
5175	}
5176
5177	rhs_type = TREE_TYPE (op0);
5178	if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5179	&& !((INTEGRAL_TYPE_P (lhs_type)
5180	&& INTEGRAL_TYPE_P (rhs_type))
5181	\|\| (SCALAR_FLOAT_TYPE_P (lhs_type)
5182	&& SCALAR_FLOAT_TYPE_P (rhs_type))))
5183	return false;
5184
5185	if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5186	&& ((INTEGRAL_TYPE_P (lhs_type)
5187	&& !type_has_mode_precision_p (t: lhs_type))
5188	\|\| (INTEGRAL_TYPE_P (rhs_type)
5189	&& !type_has_mode_precision_p (t: rhs_type))))
5190	{
5191	if (dump_enabled_p ())
5192	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5193	"type conversion to/from bit-precision unsupported."
5194	"\n");
5195	return false;
5196	}
5197
5198	if (op_type == binary_op)
5199	{
5200	gcc_assert (code == WIDEN_MULT_EXPR
5201	\|\| code == WIDEN_LSHIFT_EXPR
5202	\|\| widening_fn_p (code));
5203
5204	op1 = is_gimple_assign (gs: stmt) ? gimple_assign_rhs2 (gs: stmt) :
5205	gimple_call_arg (gs: stmt, index: `0`);
5206	tree vectype1_in;
5207	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1`,
5208	&op1, &slp_op1, &dt[`1`], &vectype1_in))
5209	{
5210	if (dump_enabled_p ())
5211	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5212	"use not simple.\n");
5213	return false;
5214	}
5215	/ For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of*
5216	OP1. /*
5217	if (!vectype_in)
5218	vectype_in = vectype1_in;
5219	}
5220
5221	/ If op0 is an external or constant def, infer the vector type*
5222	from the scalar type. /*
5223	if (!vectype_in)
5224	vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5225	if (vec_stmt)
5226	gcc_assert (vectype_in);
5227	if (!vectype_in)
5228	{
5229	if (dump_enabled_p ())
5230	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5231	"no vectype for scalar type %T\n", rhs_type);
5232
5233	return false;
5234	}
5235
5236	if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5237	&& !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5238	{
5239	if (dump_enabled_p ())
5240	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5241	"can't convert between boolean and non "
5242	"boolean vectors %T\n", rhs_type);
5243
5244	return false;
5245	}
5246
5247	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
5248	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
5249	if (known_eq (nunits_out, nunits_in))
5250	if (widen_arith)
5251	modifier = WIDEN;
5252	else
5253	modifier = NONE;
5254	else if (multiple_p (a: nunits_out, b: nunits_in))
5255	modifier = NARROW_DST;
5256	else
5257	{
5258	gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5259	modifier = WIDEN;
5260	}
5261
5262	/ Multiple types in SLP are handled by creating the appropriate number of*
5263	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5264	case of SLP. /*
5265	if (slp_node)
5266	ncopies = `1`;
5267	else if (modifier == NARROW_DST)
5268	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
5269	else
5270	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
5271
5272	/ Sanity check: make sure that at least one copy of the vectorized stmt*
5273	needs to be generated. /*
5274	gcc_assert (ncopies >= `1`);
5275
5276	bool found_mode = false;
5277	scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5278	scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5279	opt_scalar_mode rhs_mode_iter;
5280
5281	/ Supportable by target? /
5282	switch (modifier)
5283	{
5284	case NONE:
5285	if (code != FIX_TRUNC_EXPR
5286	&& code != FLOAT_EXPR
5287	&& !CONVERT_EXPR_CODE_P (code))
5288	return false;
5289	gcc_assert (code.is_tree_code ());
5290	if (supportable_convert_operation ((tree_code) code, vectype_out,
5291	vectype_in, &tc1))
5292	{
5293	code1 = tc1;
5294	break;
5295	}
5296
5297	/ For conversions between float and integer types try whether*
5298	we can use intermediate signed integer types to support the
5299	conversion. /*
5300	if (GET_MODE_SIZE (mode: lhs_mode) != GET_MODE_SIZE (mode: rhs_mode)
5301	&& (code == FLOAT_EXPR \|\|
5302	(code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5303	{
5304	bool demotion = GET_MODE_SIZE (mode: rhs_mode) > GET_MODE_SIZE (mode: lhs_mode);
5305	bool float_expr_p = code == FLOAT_EXPR;
5306	unsigned short target_size;
5307	scalar_mode intermediate_mode;
5308	if (demotion)
5309	{
5310	intermediate_mode = lhs_mode;
5311	target_size = GET_MODE_SIZE (mode: rhs_mode);
5312	}
5313	else
5314	{
5315	target_size = GET_MODE_SIZE (mode: lhs_mode);
5316	if (!int_mode_for_size
5317	(size: GET_MODE_BITSIZE (mode: rhs_mode), limit: `0`).exists (mode: &intermediate_mode))
5318	goto unsupported;
5319	}
5320	code1 = float_expr_p ? code : NOP_EXPR;
5321	codecvt1 = float_expr_p ? NOP_EXPR : code;
5322	opt_scalar_mode mode_iter;
5323	FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5324	{
5325	intermediate_mode = mode_iter.require ();
5326
5327	if (GET_MODE_SIZE (mode: intermediate_mode) > target_size)
5328	break;
5329
5330	scalar_mode cvt_mode;
5331	if (!int_mode_for_size
5332	(size: GET_MODE_BITSIZE (mode: intermediate_mode), limit: `0`).exists (mode: &cvt_mode))
5333	break;
5334
5335	cvt_type = build_nonstandard_integer_type
5336	(GET_MODE_BITSIZE (mode: cvt_mode), `0`);
5337
5338	/ Check if the intermediate type can hold OP0's range.*
5339	When converting from float to integer this is not necessary
5340	because values that do not fit the (smaller) target type are
5341	unspecified anyway. /*
5342	if (demotion && float_expr_p)
5343	{
5344	wide_int op_min_value, op_max_value;
5345	if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5346	break;
5347
5348	if (cvt_type == NULL_TREE
5349	\|\| (wi::min_precision (x: op_max_value, sgn: SIGNED)
5350	> TYPE_PRECISION (cvt_type))
5351	\|\| (wi::min_precision (x: op_min_value, sgn: SIGNED)
5352	> TYPE_PRECISION (cvt_type)))
5353	continue;
5354	}
5355
5356	cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5357	/ This should only happened for SLP as long as loop vectorizer*
5358	only supports same-sized vector. /*
5359	if (cvt_type == NULL_TREE
5360	\|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: cvt_type), b: nunits_in)
5361	\|\| !supportable_convert_operation ((tree_code) code1,
5362	vectype_out,
5363	cvt_type, &tc1)
5364	\|\| !supportable_convert_operation ((tree_code) codecvt1,
5365	cvt_type,
5366	vectype_in, &tc2))
5367	continue;
5368
5369	found_mode = true;
5370	break;
5371	}
5372
5373	if (found_mode)
5374	{
5375	multi_step_cvt++;
5376	interm_types.safe_push (obj: cvt_type);
5377	cvt_type = NULL_TREE;
5378	code1 = tc1;
5379	codecvt1 = tc2;
5380	break;
5381	}
5382	}
5383	/ FALLTHRU /
5384	unsupported:
5385	if (dump_enabled_p ())
5386	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5387	"conversion not supported by target.\n");
5388	return false;
5389
5390	case WIDEN:
5391	if (known_eq (nunits_in, nunits_out))
5392	{
5393	if (!(code.is_tree_code ()
5394	&& supportable_half_widening_operation ((tree_code) code,
5395	vectype_out, vectype_in,
5396	&tc1)))
5397	goto unsupported;
5398	code1 = tc1;
5399	gcc_assert (!(multi_step_cvt && op_type == binary_op));
5400	break;
5401	}
5402	if (supportable_widening_operation (vinfo, code, stmt_info,
5403	vectype_out, vectype_in, &code1,
5404	&code2, &multi_step_cvt,
5405	&interm_types))
5406	{
5407	/ Binary widening operation can only be supported directly by the*
5408	architecture. /*
5409	gcc_assert (!(multi_step_cvt && op_type == binary_op));
5410	break;
5411	}
5412
5413	if (code != FLOAT_EXPR
5414	\|\| GET_MODE_SIZE (mode: lhs_mode) <= GET_MODE_SIZE (mode: rhs_mode))
5415	goto unsupported;
5416
5417	fltsz = GET_MODE_SIZE (mode: lhs_mode);
5418	FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5419	{
5420	rhs_mode = rhs_mode_iter.require ();
5421	if (GET_MODE_SIZE (mode: rhs_mode) > fltsz)
5422	break;
5423
5424	cvt_type
5425	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), `0`);
5426	cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5427	if (cvt_type == NULL_TREE)
5428	goto unsupported;
5429
5430	if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5431	{
5432	tc1 = ERROR_MARK;
5433	gcc_assert (code.is_tree_code ());
5434	if (!supportable_convert_operation ((tree_code) code, vectype_out,
5435	cvt_type, &tc1))
5436	goto unsupported;
5437	codecvt1 = tc1;
5438	}
5439	else if (!supportable_widening_operation (vinfo, code,
5440	stmt_info, vectype_out,
5441	cvt_type, &codecvt1,
5442	&codecvt2, &multi_step_cvt,
5443	&interm_types))
5444	continue;
5445	else
5446	gcc_assert (multi_step_cvt == `0`);
5447
5448	if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5449	cvt_type,
5450	vectype_in, &code1,
5451	&code2, &multi_step_cvt,
5452	&interm_types))
5453	{
5454	found_mode = true;
5455	break;
5456	}
5457	}
5458
5459	if (!found_mode)
5460	goto unsupported;
5461
5462	if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5463	codecvt2 = ERROR_MARK;
5464	else
5465	{
5466	multi_step_cvt++;
5467	interm_types.safe_push (obj: cvt_type);
5468	cvt_type = NULL_TREE;
5469	}
5470	break;
5471
5472	case NARROW_DST:
5473	gcc_assert (op_type == unary_op);
5474	if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5475	&code1, &multi_step_cvt,
5476	&interm_types))
5477	break;
5478
5479	if (GET_MODE_SIZE (mode: lhs_mode) >= GET_MODE_SIZE (mode: rhs_mode))
5480	goto unsupported;
5481
5482	if (code == FIX_TRUNC_EXPR)
5483	{
5484	cvt_type
5485	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), `0`);
5486	cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5487	if (cvt_type == NULL_TREE)
5488	goto unsupported;
5489	if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5490	&tc1))
5491	codecvt1 = tc1;
5492	else
5493	goto unsupported;
5494	if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5495	&code1, &multi_step_cvt,
5496	&interm_types))
5497	break;
5498	}
5499	/ If op0 can be represented with low precision integer,*
5500	truncate it to cvt_type and the do FLOAT_EXPR. /*
5501	else if (code == FLOAT_EXPR)
5502	{
5503	wide_int op_min_value, op_max_value;
5504	if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5505	goto unsupported;
5506
5507	cvt_type
5508	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: lhs_mode), `0`);
5509	if (cvt_type == NULL_TREE
5510	\|\| (wi::min_precision (x: op_max_value, sgn: SIGNED)
5511	> TYPE_PRECISION (cvt_type))
5512	\|\| (wi::min_precision (x: op_min_value, sgn: SIGNED)
5513	> TYPE_PRECISION (cvt_type)))
5514	goto unsupported;
5515
5516	cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5517	if (cvt_type == NULL_TREE)
5518	goto unsupported;
5519	if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5520	&code1, &multi_step_cvt,
5521	&interm_types))
5522	goto unsupported;
5523	if (supportable_convert_operation ((tree_code) code, vectype_out,
5524	cvt_type, &tc1))
5525	{
5526	codecvt1 = tc1;
5527	modifier = NARROW_SRC;
5528	break;
5529	}
5530	}
5531
5532	goto unsupported;
5533
5534	default:
5535	gcc_unreachable ();
5536	}
5537
5538	if (!vec_stmt) / transformation not required. /
5539	{
5540	if (slp_node
5541	&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5542	\|\| !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5543	{
5544	if (dump_enabled_p ())
5545	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5546	"incompatible vector types for invariants\n");
5547	return false;
5548	}
5549	DUMP_VECT_SCOPE ("vectorizable_conversion");
5550	if (modifier == NONE)
5551	{
5552	STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5553	vect_model_simple_cost (vinfo, stmt_info,
5554	ncopies: ncopies * (`1` + multi_step_cvt),
5555	dt, ndts, node: slp_node, cost_vec);
5556	}
5557	else if (modifier == NARROW_SRC \|\| modifier == NARROW_DST)
5558	{
5559	STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5560	/ The final packing step produces one vector result per copy. /
5561	unsigned int nvectors
5562	= (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5563	vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5564	pwr: multi_step_cvt, cost_vec,
5565	widen_arith);
5566	}
5567	else
5568	{
5569	STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5570	/ The initial unpacking step produces two vector results*
5571	per copy. MULTI_STEP_CVT is 0 for a single conversion,
5572	so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). /*
5573	unsigned int nvectors
5574	= (slp_node
5575	? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5576	: ncopies * `2`);
5577	vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5578	pwr: multi_step_cvt, cost_vec,
5579	widen_arith);
5580	}
5581	interm_types.release ();
5582	return true;
5583	}
5584
5585	/ Transform. /
5586	if (dump_enabled_p ())
5587	dump_printf_loc (MSG_NOTE, vect_location,
5588	"transform conversion. ncopies = %d.\n", ncopies);
5589
5590	if (op_type == binary_op)
5591	{
5592	if (CONSTANT_CLASS_P (op0))
5593	op0 = fold_convert (TREE_TYPE (op1), op0);
5594	else if (CONSTANT_CLASS_P (op1))
5595	op1 = fold_convert (TREE_TYPE (op0), op1);
5596	}
5597
5598	/ In case of multi-step conversion, we first generate conversion operations*
5599	to the intermediate types, and then from that types to the final one.
5600	We create vector destinations for the intermediate type (TYPES) received
5601	from supportable__operation, and store them in the correct order*
5602	for future use in vect_create_vectorized__stmts (). /
5603	auto_vec<tree> vec_dsts (multi_step_cvt + `1`);
5604	bool widen_or_narrow_float_p
5605	= cvt_type && (modifier == WIDEN \|\| modifier == NARROW_SRC);
5606	vec_dest = vect_create_destination_var (scalar_dest,
5607	widen_or_narrow_float_p
5608	? cvt_type : vectype_out);
5609	vec_dsts.quick_push (obj: vec_dest);
5610
5611	if (multi_step_cvt)
5612	{
5613	for (i = interm_types.length () - `1`;
5614	interm_types.iterate (ix: i, ptr: &intermediate_type); i--)
5615	{
5616	vec_dest = vect_create_destination_var (scalar_dest,
5617	intermediate_type);
5618	vec_dsts.quick_push (obj: vec_dest);
5619	}
5620	}
5621
5622	if (cvt_type)
5623	vec_dest = vect_create_destination_var (scalar_dest,
5624	widen_or_narrow_float_p
5625	? vectype_out : cvt_type);
5626
5627	int ninputs = `1`;
5628	if (!slp_node)
5629	{
5630	if (modifier == WIDEN)
5631	;
5632	else if (modifier == NARROW_SRC \|\| modifier == NARROW_DST)
5633	{
5634	if (multi_step_cvt)
5635	ninputs = vect_pow2 (x: multi_step_cvt);
5636	ninputs *= `2`;
5637	}
5638	}
5639
5640	switch (modifier)
5641	{
5642	case NONE:
5643	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5644	op0, vec_oprnds0: &vec_oprnds0);
5645	/ vec_dest is intermediate type operand when multi_step_cvt. /
5646	if (multi_step_cvt)
5647	{
5648	cvt_op = vec_dest;
5649	vec_dest = vec_dsts [`0`];
5650	}
5651
5652	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5653	{
5654	/ Arguments are ready, create the new vector stmt. /
5655	gimple* new_stmt;
5656	if (multi_step_cvt)
5657	{
5658	gcc_assert (multi_step_cvt == `1`);
5659	new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5660	new_temp = make_ssa_name (var: cvt_op, stmt: new_stmt);
5661	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
5662	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5663	vop0 = new_temp;
5664	}
5665	new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5666	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5667	gimple_set_lhs (new_stmt, new_temp);
5668	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5669
5670	if (slp_node)
5671	slp_node->push_vec_def (def: new_stmt);
5672	else
5673	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5674	}
5675	break;
5676
5677	case WIDEN:
5678	/ In case the vectorization factor (VF) is bigger than the number*
5679	of elements that we can fit in a vectype (nunits), we have to
5680	generate more than one vector stmt - i.e - we need to "unroll"
5681	the vector stmt by a factor VF/nunits. /*
5682	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5683	op0, vec_oprnds0: &vec_oprnds0,
5684	op1: code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5685	vec_oprnds1: &vec_oprnds1);
5686	if (code == WIDEN_LSHIFT_EXPR)
5687	{
5688	int oprnds_size = vec_oprnds0.length ();
5689	vec_oprnds1.create (nelems: oprnds_size);
5690	for (i = `0`; i < oprnds_size; ++i)
5691	vec_oprnds1.quick_push (obj: op1);
5692	}
5693	/ Arguments are ready. Create the new vector stmts. /
5694	for (i = multi_step_cvt; i >= `0`; i--)
5695	{
5696	tree this_dest = vec_dsts [i];
5697	code_helper c1 = code1, c2 = code2;
5698	if (i == `0` && codecvt2 != ERROR_MARK)
5699	{
5700	c1 = codecvt1;
5701	c2 = codecvt2;
5702	}
5703	if (known_eq (nunits_out, nunits_in))
5704	vect_create_half_widening_stmts (vinfo, vec_oprnds0: &vec_oprnds0, vec_oprnds1: &vec_oprnds1,
5705	stmt_info, vec_dest: this_dest, gsi, code1: c1,
5706	op_type);
5707	else
5708	vect_create_vectorized_promotion_stmts (vinfo, vec_oprnds0: &vec_oprnds0,
5709	vec_oprnds1: &vec_oprnds1, stmt_info,
5710	vec_dest: this_dest, gsi,
5711	ch1: c1, ch2: c2, op_type);
5712	}
5713
5714	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5715	{
5716	gimple *new_stmt;
5717	if (cvt_type)
5718	{
5719	new_temp = make_ssa_name (var: vec_dest);
5720	new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5721	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5722	}
5723	else
5724	new_stmt = SSA_NAME_DEF_STMT (vop0);
5725
5726	if (slp_node)
5727	slp_node->push_vec_def (def: new_stmt);
5728	else
5729	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5730	}
5731	break;
5732
5733	case NARROW_SRC:
5734	case NARROW_DST:
5735	/ In case the vectorization factor (VF) is bigger than the number*
5736	of elements that we can fit in a vectype (nunits), we have to
5737	generate more than one vector stmt - i.e - we need to "unroll"
5738	the vector stmt by a factor VF/nunits. /*
5739	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5740	op0, vec_oprnds0: &vec_oprnds0);
5741	/ Arguments are ready. Create the new vector stmts. /
5742	if (cvt_type && modifier == NARROW_DST)
5743	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5744	{
5745	new_temp = make_ssa_name (var: vec_dest);
5746	gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5747	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5748	vec_oprnds0 [i] = new_temp;
5749	}
5750
5751	vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds: &vec_oprnds0,
5752	multi_step_cvt,
5753	stmt_info, vec_dsts, gsi,
5754	slp_node, code: code1,
5755	narrow_src_p: modifier == NARROW_SRC);
5756	/ After demoting op0 to cvt_type, convert it to dest. /
5757	if (cvt_type && code == FLOAT_EXPR)
5758	{
5759	for (unsigned int i = `0`; i != vec_oprnds0.length() / `2`; i++)
5760	{
5761	/ Arguments are ready, create the new vector stmt. /
5762	gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5763	gimple *new_stmt
5764	= vect_gimple_build (vec_dest, codecvt1, vec_oprnds0 [i]);
5765	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5766	gimple_set_lhs (new_stmt, new_temp);
5767	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5768
5769	/ This is the last step of the conversion sequence. Store the*
5770	vectors in SLP_NODE or in vector info of the scalar statement
5771	(or in STMT_VINFO_RELATED_STMT chain). /*
5772	if (slp_node)
5773	slp_node->push_vec_def (def: new_stmt);
5774	else
5775	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5776	}
5777	}
5778	break;
5779	}
5780	if (!slp_node)
5781	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
5782
5783	vec_oprnds0.release ();
5784	vec_oprnds1.release ();
5785	interm_types.release ();
5786
5787	return true;
5788	}
5789
5790	/ Return true if we can assume from the scalar form of STMT_INFO that*
5791	neither the scalar nor the vector forms will generate code. STMT_INFO
5792	is known not to involve a data reference. /*
5793
5794	bool
5795	vect_nop_conversion_p (stmt_vec_info stmt_info)
5796	{
5797	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
5798	if (!stmt)
5799	return false;
5800
5801	tree lhs = gimple_assign_lhs (gs: stmt);
5802	tree_code code = gimple_assign_rhs_code (gs: stmt);
5803	tree rhs = gimple_assign_rhs1 (gs: stmt);
5804
5805	if (code == SSA_NAME \|\| code == VIEW_CONVERT_EXPR)
5806	return true;
5807
5808	if (CONVERT_EXPR_CODE_P (code))
5809	return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5810
5811	return false;
5812	}
5813
5814	/ Function vectorizable_assignment.*
5815
5816	Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5817	If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5818	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5819	Return true if STMT_INFO is vectorizable in this way. /*
5820
5821	static bool
5822	vectorizable_assignment (vec_info *vinfo,
5823	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5824	gimple **vec_stmt, slp_tree slp_node,
5825	stmt_vector_for_cost *cost_vec)
5826	{
5827	tree vec_dest;
5828	tree scalar_dest;
5829	tree op;
5830	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5831	tree new_temp;
5832	enum vect_def_type dt[`1`] = {vect_unknown_def_type};
5833	int ndts = `1`;
5834	int ncopies;
5835	int i;
5836	vec<tree> vec_oprnds = vNULL;
5837	tree vop;
5838	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5839	enum tree_code code;
5840	tree vectype_in;
5841
5842	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5843	return false;
5844
5845	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5846	&& ! vec_stmt)
5847	return false;
5848
5849	/ Is vectorizable assignment? /
5850	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
5851	if (!stmt)
5852	return false;
5853
5854	scalar_dest = gimple_assign_lhs (gs: stmt);
5855	if (TREE_CODE (scalar_dest) != SSA_NAME)
5856	return false;
5857
5858	if (STMT_VINFO_DATA_REF (stmt_info))
5859	return false;
5860
5861	code = gimple_assign_rhs_code (gs: stmt);
5862	if (!(gimple_assign_single_p (gs: stmt)
5863	\|\| code == PAREN_EXPR
5864	\|\| CONVERT_EXPR_CODE_P (code)))
5865	return false;
5866
5867	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5868	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
5869
5870	/ Multiple types in SLP are handled by creating the appropriate number of*
5871	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5872	case of SLP. /*
5873	if (slp_node)
5874	ncopies = `1`;
5875	else
5876	ncopies = vect_get_num_copies (loop_vinfo, vectype);
5877
5878	gcc_assert (ncopies >= `1`);
5879
5880	slp_tree slp_op;
5881	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `0`, &op, &slp_op,
5882	&dt[`0`], &vectype_in))
5883	{
5884	if (dump_enabled_p ())
5885	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5886	"use not simple.\n");
5887	return false;
5888	}
5889	if (!vectype_in)
5890	vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5891
5892	/ We can handle NOP_EXPR conversions that do not change the number*
5893	of elements or the vector size. /*
5894	if ((CONVERT_EXPR_CODE_P (code)
5895	\|\| code == VIEW_CONVERT_EXPR)
5896	&& (!vectype_in
5897	\|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype_in), b: nunits)
5898	\|\| maybe_ne (a: GET_MODE_SIZE (TYPE_MODE (vectype)),
5899	b: GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5900	return false;
5901
5902	if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5903	{
5904	if (dump_enabled_p ())
5905	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5906	"can't convert between boolean and non "
5907	"boolean vectors %T\n", TREE_TYPE (op));
5908
5909	return false;
5910	}
5911
5912	/ We do not handle bit-precision changes. /
5913	if ((CONVERT_EXPR_CODE_P (code)
5914	\|\| code == VIEW_CONVERT_EXPR)
5915	&& ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5916	&& !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5917	\|\| (INTEGRAL_TYPE_P (TREE_TYPE (op))
5918	&& !type_has_mode_precision_p (TREE_TYPE (op))))
5919	/ But a conversion that does not change the bit-pattern is ok. /
5920	&& !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5921	&& INTEGRAL_TYPE_P (TREE_TYPE (op))
5922	&& (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5923	> TYPE_PRECISION (TREE_TYPE (op)))
5924	&& TYPE_UNSIGNED (TREE_TYPE (op)))
5925	\|\| (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5926	== TYPE_PRECISION (TREE_TYPE (op))))))
5927	{
5928	if (dump_enabled_p ())
5929	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5930	"type conversion to/from bit-precision "
5931	"unsupported.\n");
5932	return false;
5933	}
5934
5935	if (!vec_stmt) / transformation not required. /
5936	{
5937	if (slp_node
5938	&& !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5939	{
5940	if (dump_enabled_p ())
5941	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5942	"incompatible vector types for invariants\n");
5943	return false;
5944	}
5945	STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5946	DUMP_VECT_SCOPE ("vectorizable_assignment");
5947	if (!vect_nop_conversion_p (stmt_info))
5948	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, node: slp_node,
5949	cost_vec);
5950	return true;
5951	}
5952
5953	/ Transform. /
5954	if (dump_enabled_p ())
5955	dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5956
5957	/ Handle def. /
5958	vec_dest = vect_create_destination_var (scalar_dest, vectype);
5959
5960	/ Handle use. /
5961	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op0: op, vec_oprnds0: &vec_oprnds);
5962
5963	/ Arguments are ready. create the new vector stmt. /
5964	FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5965	{
5966	if (CONVERT_EXPR_CODE_P (code)
5967	\|\| code == VIEW_CONVERT_EXPR)
5968	vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5969	gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5970	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5971	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
5972	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5973	if (slp_node)
5974	slp_node->push_vec_def (def: new_stmt);
5975	else
5976	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5977	}
5978	if (!slp_node)
5979	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
5980
5981	vec_oprnds.release ();
5982	return true;
5983	}
5984
5985
5986	/ Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE*
5987	either as shift by a scalar or by a vector. /*
5988
5989	bool
5990	vect_supportable_shift (vec_info vinfo, enum* tree_code code, tree scalar_type)
5991	{
5992
5993	machine_mode vec_mode;
5994	optab optab;
5995	int icode;
5996	tree vectype;
5997
5998	vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5999	if (!vectype)
6000	return false;
6001
6002	optab = optab_for_tree_code (code, vectype, optab_scalar);
6003	if (!optab
6004	\|\| optab_handler (op: optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6005	{
6006	optab = optab_for_tree_code (code, vectype, optab_vector);
6007	if (!optab
6008	\|\| (optab_handler (op: optab, TYPE_MODE (vectype))
6009	== CODE_FOR_nothing))
6010	return false;
6011	}
6012
6013	vec_mode = TYPE_MODE (vectype);
6014	icode = (int) optab_handler (op: optab, mode: vec_mode);
6015	if (icode == CODE_FOR_nothing)
6016	return false;
6017
6018	return true;
6019	}
6020
6021
6022	/ Function vectorizable_shift.*
6023
6024	Check if STMT_INFO performs a shift operation that can be vectorized.
6025	If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6026	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6027	Return true if STMT_INFO is vectorizable in this way. /*
6028
6029	static bool
6030	vectorizable_shift (vec_info *vinfo,
6031	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6032	gimple **vec_stmt, slp_tree slp_node,
6033	stmt_vector_for_cost *cost_vec)
6034	{
6035	tree vec_dest;
6036	tree scalar_dest;
6037	tree op0, op1 = NULL;
6038	tree vec_oprnd1 = NULL_TREE;
6039	tree vectype;
6040	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6041	enum tree_code code;
6042	machine_mode vec_mode;
6043	tree new_temp;
6044	optab optab;
6045	int icode;
6046	machine_mode optab_op2_mode;
6047	enum vect_def_type dt[`2`] = {vect_unknown_def_type, vect_unknown_def_type};
6048	int ndts = `2`;
6049	poly_uint64 nunits_in;
6050	poly_uint64 nunits_out;
6051	tree vectype_out;
6052	tree op1_vectype;
6053	int ncopies;
6054	int i;
6055	vec<tree> vec_oprnds0 = vNULL;
6056	vec<tree> vec_oprnds1 = vNULL;
6057	tree vop0, vop1;
6058	unsigned int k;
6059	bool scalar_shift_arg = true;
6060	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6061	bool incompatible_op1_vectype_p = false;
6062
6063	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6064	return false;
6065
6066	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6067	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6068	&& ! vec_stmt)
6069	return false;
6070
6071	/ Is STMT a vectorizable binary/unary operation? /
6072	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
6073	if (!stmt)
6074	return false;
6075
6076	if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6077	return false;
6078
6079	code = gimple_assign_rhs_code (gs: stmt);
6080
6081	if (!(code == LSHIFT_EXPR \|\| code == RSHIFT_EXPR \|\| code == LROTATE_EXPR
6082	\|\| code == RROTATE_EXPR))
6083	return false;
6084
6085	scalar_dest = gimple_assign_lhs (gs: stmt);
6086	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6087	if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6088	{
6089	if (dump_enabled_p ())
6090	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6091	"bit-precision shifts not supported.\n");
6092	return false;
6093	}
6094
6095	slp_tree slp_op0;
6096	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6097	`0`, &op0, &slp_op0, &dt[`0`], &vectype))
6098	{
6099	if (dump_enabled_p ())
6100	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6101	"use not simple.\n");
6102	return false;
6103	}
6104	/ If op0 is an external or constant def, infer the vector type*
6105	from the scalar type. /*
6106	if (!vectype)
6107	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6108	if (vec_stmt)
6109	gcc_assert (vectype);
6110	if (!vectype)
6111	{
6112	if (dump_enabled_p ())
6113	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6114	"no vectype for scalar type\n");
6115	return false;
6116	}
6117
6118	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6119	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6120	if (maybe_ne (a: nunits_out, b: nunits_in))
6121	return false;
6122
6123	stmt_vec_info op1_def_stmt_info;
6124	slp_tree slp_op1;
6125	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1`, &op1, &slp_op1,
6126	&dt[`1`], &op1_vectype, &op1_def_stmt_info))
6127	{
6128	if (dump_enabled_p ())
6129	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130	"use not simple.\n");
6131	return false;
6132	}
6133
6134	/ Multiple types in SLP are handled by creating the appropriate number of*
6135	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6136	case of SLP. /*
6137	if (slp_node)
6138	ncopies = `1`;
6139	else
6140	ncopies = vect_get_num_copies (loop_vinfo, vectype);
6141
6142	gcc_assert (ncopies >= `1`);
6143
6144	/ Determine whether the shift amount is a vector, or scalar. If the*
6145	shift/rotate amount is a vector, use the vector/vector shift optabs. /*
6146
6147	if ((dt[`1`] == vect_internal_def
6148	\|\| dt[`1`] == vect_induction_def
6149	\|\| dt[`1`] == vect_nested_cycle)
6150	&& !slp_node)
6151	scalar_shift_arg = false;
6152	else if (dt[`1`] == vect_constant_def
6153	\|\| dt[`1`] == vect_external_def
6154	\|\| dt[`1`] == vect_internal_def)
6155	{
6156	/ In SLP, need to check whether the shift count is the same,*
6157	in loops if it is a constant or invariant, it is always
6158	a scalar shift. /*
6159	if (slp_node)
6160	{
6161	vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6162	stmt_vec_info slpstmt_info;
6163
6164	FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6165	{
6166	gassign slpstmt = as_a <gassign > (p: slpstmt_info->stmt);
6167	if (!operand_equal_p (gimple_assign_rhs2 (gs: slpstmt), op1, flags: `0`))
6168	scalar_shift_arg = false;
6169	}
6170
6171	/ For internal SLP defs we have to make sure we see scalar stmts*
6172	for all vector elements.
6173	??? For different vectors we could resort to a different
6174	scalar shift operand but code-generation below simply always
6175	takes the first. /*
6176	if (dt[`1`] == vect_internal_def
6177	&& maybe_ne (a: nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6178	b: stmts.length ()))
6179	scalar_shift_arg = false;
6180	}
6181
6182	/ If the shift amount is computed by a pattern stmt we cannot*
6183	use the scalar amount directly thus give up and use a vector
6184	shift. /*
6185	if (op1_def_stmt_info && is_pattern_stmt_p (stmt_info: op1_def_stmt_info))
6186	scalar_shift_arg = false;
6187	}
6188	else
6189	{
6190	if (dump_enabled_p ())
6191	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6192	"operand mode requires invariant argument.\n");
6193	return false;
6194	}
6195
6196	/ Vector shifted by vector. /
6197	bool was_scalar_shift_arg = scalar_shift_arg;
6198	if (!scalar_shift_arg)
6199	{
6200	optab = optab_for_tree_code (code, vectype, optab_vector);
6201	if (dump_enabled_p ())
6202	dump_printf_loc (MSG_NOTE, vect_location,
6203	"vector/vector shift/rotate found.\n");
6204
6205	if (!op1_vectype)
6206	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6207	slp_op1);
6208	incompatible_op1_vectype_p
6209	= (op1_vectype == NULL_TREE
6210	\|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: op1_vectype),
6211	b: TYPE_VECTOR_SUBPARTS (node: vectype))
6212	\|\| TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6213	if (incompatible_op1_vectype_p
6214	&& (!slp_node
6215	\|\| SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6216	\|\| slp_op1->refcnt != `1`))
6217	{
6218	if (dump_enabled_p ())
6219	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6220	"unusable type for last operand in"
6221	" vector/vector shift/rotate.\n");
6222	return false;
6223	}
6224	}
6225	/ See if the machine has a vector shifted by scalar insn and if not*
6226	then see if it has a vector shifted by vector insn. /*
6227	else
6228	{
6229	optab = optab_for_tree_code (code, vectype, optab_scalar);
6230	if (optab
6231	&& optab_handler (op: optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6232	{
6233	if (dump_enabled_p ())
6234	dump_printf_loc (MSG_NOTE, vect_location,
6235	"vector/scalar shift/rotate found.\n");
6236	}
6237	else
6238	{
6239	optab = optab_for_tree_code (code, vectype, optab_vector);
6240	if (optab
6241	&& (optab_handler (op: optab, TYPE_MODE (vectype))
6242	!= CODE_FOR_nothing))
6243	{
6244	scalar_shift_arg = false;
6245
6246	if (dump_enabled_p ())
6247	dump_printf_loc (MSG_NOTE, vect_location,
6248	"vector/vector shift/rotate found.\n");
6249
6250	if (!op1_vectype)
6251	op1_vectype = get_vectype_for_scalar_type (vinfo,
6252	TREE_TYPE (op1),
6253	slp_op1);
6254
6255	/ Unlike the other binary operators, shifts/rotates have*
6256	the rhs being int, instead of the same type as the lhs,
6257	so make sure the scalar is the right type if we are
6258	dealing with vectors of long long/long/short/char. /*
6259	incompatible_op1_vectype_p
6260	= (!op1_vectype
6261	\|\| !tree_nop_conversion_p (TREE_TYPE (vectype),
6262	TREE_TYPE (op1)));
6263	if (incompatible_op1_vectype_p
6264	&& dt[`1`] == vect_internal_def)
6265	{
6266	if (dump_enabled_p ())
6267	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6268	"unusable type for last operand in"
6269	" vector/vector shift/rotate.\n");
6270	return false;
6271	}
6272	}
6273	}
6274	}
6275
6276	/ Supportable by target? /
6277	if (!optab)
6278	{
6279	if (dump_enabled_p ())
6280	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6281	"no optab.\n");
6282	return false;
6283	}
6284	vec_mode = TYPE_MODE (vectype);
6285	icode = (int) optab_handler (op: optab, mode: vec_mode);
6286	if (icode == CODE_FOR_nothing)
6287	{
6288	if (dump_enabled_p ())
6289	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290	"op not supported by target.\n");
6291	return false;
6292	}
6293	/ vector lowering cannot optimize vector shifts using word arithmetic. /
6294	if (vect_emulated_vector_p (vectype))
6295	return false;
6296
6297	if (!vec_stmt) / transformation not required. /
6298	{
6299	if (slp_node
6300	&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6301	\|\| ((!scalar_shift_arg \|\| dt[`1`] == vect_internal_def)
6302	&& (!incompatible_op1_vectype_p
6303	\|\| dt[`1`] == vect_constant_def)
6304	&& !vect_maybe_update_slp_op_vectype
6305	(slp_op1,
6306	incompatible_op1_vectype_p ? vectype : op1_vectype))))
6307	{
6308	if (dump_enabled_p ())
6309	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6310	"incompatible vector types for invariants\n");
6311	return false;
6312	}
6313	/ Now adjust the constant shift amount in place. /
6314	if (slp_node
6315	&& incompatible_op1_vectype_p
6316	&& dt[`1`] == vect_constant_def)
6317	{
6318	for (unsigned i = `0`;
6319	i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6320	{
6321	SLP_TREE_SCALAR_OPS (slp_op1)[i]
6322	= fold_convert (TREE_TYPE (vectype),
6323	SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6324	gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6325	== INTEGER_CST));
6326	}
6327	}
6328	STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6329	DUMP_VECT_SCOPE ("vectorizable_shift");
6330	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6331	ndts: scalar_shift_arg ? `1` : ndts, node: slp_node, cost_vec);
6332	return true;
6333	}
6334
6335	/ Transform. /
6336
6337	if (dump_enabled_p ())
6338	dump_printf_loc (MSG_NOTE, vect_location,
6339	"transform binary/unary operation.\n");
6340
6341	if (incompatible_op1_vectype_p && !slp_node)
6342	{
6343	gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6344	op1 = fold_convert (TREE_TYPE (vectype), op1);
6345	if (dt[`1`] != vect_constant_def)
6346	op1 = vect_init_vector (vinfo, stmt_info, val: op1,
6347	TREE_TYPE (vectype), NULL);
6348	}
6349
6350	/ Handle def. /
6351	vec_dest = vect_create_destination_var (scalar_dest, vectype);
6352
6353	if (scalar_shift_arg && dt[`1`] != vect_internal_def)
6354	{
6355	/ Vector shl and shr insn patterns can be defined with scalar*
6356	operand 2 (shift operand). In this case, use constant or loop
6357	invariant op1 directly, without extending it to vector mode
6358	first. /*
6359	optab_op2_mode = insn_data[icode].operand[`2`].mode;
6360	if (!VECTOR_MODE_P (optab_op2_mode))
6361	{
6362	if (dump_enabled_p ())
6363	dump_printf_loc (MSG_NOTE, vect_location,
6364	"operand 1 using scalar mode.\n");
6365	vec_oprnd1 = op1;
6366	vec_oprnds1.create (nelems: slp_node ? slp_node->vec_stmts_size : ncopies);
6367	vec_oprnds1.quick_push (obj: vec_oprnd1);
6368	/ Store vec_oprnd1 for every vector stmt to be created.*
6369	We check during the analysis that all the shift arguments
6370	are the same.
6371	TODO: Allow different constants for different vector
6372	stmts generated for an SLP instance. /*
6373	for (k = `0`;
6374	k < (slp_node ? slp_node->vec_stmts_size - `1` : ncopies - `1`); k++)
6375	vec_oprnds1.quick_push (obj: vec_oprnd1);
6376	}
6377	}
6378	else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6379	{
6380	if (was_scalar_shift_arg)
6381	{
6382	/ If the argument was the same in all lanes create*
6383	the correctly typed vector shift amount directly. /*
6384	op1 = fold_convert (TREE_TYPE (vectype), op1);
6385	op1 = vect_init_vector (vinfo, stmt_info, val: op1, TREE_TYPE (vectype),
6386	gsi: !loop_vinfo ? gsi : NULL);
6387	vec_oprnd1 = vect_init_vector (vinfo, stmt_info, val: op1, type: vectype,
6388	gsi: !loop_vinfo ? gsi : NULL);
6389	vec_oprnds1.create (nelems: slp_node->vec_stmts_size);
6390	for (k = `0`; k < slp_node->vec_stmts_size; k++)
6391	vec_oprnds1.quick_push (obj: vec_oprnd1);
6392	}
6393	else if (dt[`1`] == vect_constant_def)
6394	/ The constant shift amount has been adjusted in place. /
6395	;
6396	else
6397	gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6398	}
6399
6400	/ vec_oprnd1 is available if operand 1 should be of a scalar-type*
6401	(a special case for certain kind of vector shifts); otherwise,
6402	operand 1 should be of a vector type (the usual case). /*
6403	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6404	op0, vec_oprnds0: &vec_oprnds0,
6405	op1: vec_oprnd1 ? NULL_TREE : op1, vec_oprnds1: &vec_oprnds1);
6406
6407	/ Arguments are ready. Create the new vector stmt. /
6408	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6409	{
6410	/ For internal defs where we need to use a scalar shift arg*
6411	extract the first lane. /*
6412	if (scalar_shift_arg && dt[`1`] == vect_internal_def)
6413	{
6414	vop1 = vec_oprnds1 [`0`];
6415	new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6416	gassign *new_stmt
6417	= gimple_build_assign (new_temp,
6418	build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6419	vop1,
6420	TYPE_SIZE (TREE_TYPE (new_temp)),
6421	bitsize_zero_node));
6422	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6423	vop1 = new_temp;
6424	}
6425	else
6426	vop1 = vec_oprnds1 [i];
6427	gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6428	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6429	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6430	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6431	if (slp_node)
6432	slp_node->push_vec_def (def: new_stmt);
6433	else
6434	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
6435	}
6436
6437	if (!slp_node)
6438	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
6439
6440	vec_oprnds0.release ();
6441	vec_oprnds1.release ();
6442
6443	return true;
6444	}
6445
6446	/ Function vectorizable_operation.*
6447
6448	Check if STMT_INFO performs a binary, unary or ternary operation that can
6449	be vectorized.
6450	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6451	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6452	Return true if STMT_INFO is vectorizable in this way. /*
6453
6454	static bool
6455	vectorizable_operation (vec_info *vinfo,
6456	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6457	gimple **vec_stmt, slp_tree slp_node,
6458	stmt_vector_for_cost *cost_vec)
6459	{
6460	tree vec_dest;
6461	tree scalar_dest;
6462	tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6463	tree vectype;
6464	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6465	enum tree_code code, orig_code;
6466	machine_mode vec_mode;
6467	tree new_temp;
6468	int op_type;
6469	optab optab;
6470	bool target_support_p;
6471	enum vect_def_type dt[`3`]
6472	= {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6473	int ndts = `3`;
6474	poly_uint64 nunits_in;
6475	poly_uint64 nunits_out;
6476	tree vectype_out;
6477	int ncopies, vec_num;
6478	int i;
6479	vec<tree> vec_oprnds0 = vNULL;
6480	vec<tree> vec_oprnds1 = vNULL;
6481	vec<tree> vec_oprnds2 = vNULL;
6482	tree vop0, vop1, vop2;
6483	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6484
6485	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6486	return false;
6487
6488	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6489	&& ! vec_stmt)
6490	return false;
6491
6492	/ Is STMT a vectorizable binary/unary operation? /
6493	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
6494	if (!stmt)
6495	return false;
6496
6497	/ Loads and stores are handled in vectorizable_{load,store}. /
6498	if (STMT_VINFO_DATA_REF (stmt_info))
6499	return false;
6500
6501	orig_code = code = gimple_assign_rhs_code (gs: stmt);
6502
6503	/ Shifts are handled in vectorizable_shift. /
6504	if (code == LSHIFT_EXPR
6505	\|\| code == RSHIFT_EXPR
6506	\|\| code == LROTATE_EXPR
6507	\|\| code == RROTATE_EXPR)
6508	return false;
6509
6510	/ Comparisons are handled in vectorizable_comparison. /
6511	if (TREE_CODE_CLASS (code) == tcc_comparison)
6512	return false;
6513
6514	/ Conditions are handled in vectorizable_condition. /
6515	if (code == COND_EXPR)
6516	return false;
6517
6518	/ For pointer addition and subtraction, we should use the normal*
6519	plus and minus for the vector operation. /*
6520	if (code == POINTER_PLUS_EXPR)
6521	code = PLUS_EXPR;
6522	if (code == POINTER_DIFF_EXPR)
6523	code = MINUS_EXPR;
6524
6525	/ Support only unary or binary operations. /
6526	op_type = TREE_CODE_LENGTH (code);
6527	if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6528	{
6529	if (dump_enabled_p ())
6530	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6531	"num. args = %d (not unary/binary/ternary op).\n",
6532	op_type);
6533	return false;
6534	}
6535
6536	scalar_dest = gimple_assign_lhs (gs: stmt);
6537	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6538
6539	/ Most operations cannot handle bit-precision types without extra*
6540	truncations. /*
6541	bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6542	if (!mask_op_p
6543	&& !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6544	/ Exception are bitwise binary operations. /
6545	&& code != BIT_IOR_EXPR
6546	&& code != BIT_XOR_EXPR
6547	&& code != BIT_AND_EXPR)
6548	{
6549	if (dump_enabled_p ())
6550	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6551	"bit-precision arithmetic not supported.\n");
6552	return false;
6553	}
6554
6555	slp_tree slp_op0;
6556	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6557	`0`, &op0, &slp_op0, &dt[`0`], &vectype))
6558	{
6559	if (dump_enabled_p ())
6560	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6561	"use not simple.\n");
6562	return false;
6563	}
6564	bool is_invariant = (dt[`0`] == vect_external_def
6565	\|\| dt[`0`] == vect_constant_def);
6566	/ If op0 is an external or constant def, infer the vector type*
6567	from the scalar type. /*
6568	if (!vectype)
6569	{
6570	/ For boolean type we cannot determine vectype by*
6571	invariant value (don't know whether it is a vector
6572	of booleans or vector of integers). We use output
6573	vectype because operations on boolean don't change
6574	type. /*
6575	if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6576	{
6577	if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6578	{
6579	if (dump_enabled_p ())
6580	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6581	"not supported operation on bool value.\n");
6582	return false;
6583	}
6584	vectype = vectype_out;
6585	}
6586	else
6587	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6588	slp_node);
6589	}
6590	if (vec_stmt)
6591	gcc_assert (vectype);
6592	if (!vectype)
6593	{
6594	if (dump_enabled_p ())
6595	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6596	"no vectype for scalar type %T\n",
6597	TREE_TYPE (op0));
6598
6599	return false;
6600	}
6601
6602	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6603	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6604	if (maybe_ne (a: nunits_out, b: nunits_in))
6605	return false;
6606
6607	tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6608	slp_tree slp_op1 = NULL, slp_op2 = NULL;
6609	if (op_type == binary_op \|\| op_type == ternary_op)
6610	{
6611	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6612	`1`, &op1, &slp_op1, &dt[`1`], &vectype2))
6613	{
6614	if (dump_enabled_p ())
6615	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6616	"use not simple.\n");
6617	return false;
6618	}
6619	is_invariant &= (dt[`1`] == vect_external_def
6620	\|\| dt[`1`] == vect_constant_def);
6621	if (vectype2
6622	&& maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
6623	return false;
6624	}
6625	if (op_type == ternary_op)
6626	{
6627	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6628	`2`, &op2, &slp_op2, &dt[`2`], &vectype3))
6629	{
6630	if (dump_enabled_p ())
6631	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6632	"use not simple.\n");
6633	return false;
6634	}
6635	is_invariant &= (dt[`2`] == vect_external_def
6636	\|\| dt[`2`] == vect_constant_def);
6637	if (vectype3
6638	&& maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype3)))
6639	return false;
6640	}
6641
6642	/ Multiple types in SLP are handled by creating the appropriate number of*
6643	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6644	case of SLP. /*
6645	if (slp_node)
6646	{
6647	ncopies = `1`;
6648	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6649	}
6650	else
6651	{
6652	ncopies = vect_get_num_copies (loop_vinfo, vectype);
6653	vec_num = `1`;
6654	}
6655
6656	gcc_assert (ncopies >= `1`);
6657
6658	/ Reject attempts to combine mask types with nonmask types, e.g. if*
6659	we have an AND between a (nonmask) boolean loaded from memory and
6660	a (mask) boolean result of a comparison.
6661
6662	TODO: We could easily fix these cases up using pattern statements. /*
6663	if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6664	\|\| (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6665	\|\| (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6666	{
6667	if (dump_enabled_p ())
6668	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6669	"mixed mask and nonmask vector types\n");
6670	return false;
6671	}
6672
6673	/ Supportable by target? /
6674
6675	vec_mode = TYPE_MODE (vectype);
6676	if (code == MULT_HIGHPART_EXPR)
6677	target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6678	else
6679	{
6680	optab = optab_for_tree_code (code, vectype, optab_default);
6681	if (!optab)
6682	{
6683	if (dump_enabled_p ())
6684	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6685	"no optab.\n");
6686	return false;
6687	}
6688	target_support_p = (optab_handler (op: optab, mode: vec_mode) != CODE_FOR_nothing
6689	\|\| optab_libfunc (optab, vec_mode));
6690	}
6691
6692	bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6693	if (!target_support_p \|\| using_emulated_vectors_p)
6694	{
6695	if (dump_enabled_p ())
6696	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6697	"op not supported by target.\n");
6698	/ When vec_mode is not a vector mode and we verified ops we*
6699	do not have to lower like AND are natively supported let
6700	those through even when the mode isn't word_mode. For
6701	ops we have to lower the lowering code assumes we are
6702	dealing with word_mode. /*
6703	if ((((code == PLUS_EXPR \|\| code == MINUS_EXPR \|\| code == NEGATE_EXPR)
6704	\|\| !target_support_p)
6705	&& maybe_ne (a: GET_MODE_SIZE (mode: vec_mode), UNITS_PER_WORD))
6706	/ Check only during analysis. /
6707	\|\| (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6708	{
6709	if (dump_enabled_p ())
6710	dump_printf (MSG_NOTE, "using word mode not possible.\n");
6711	return false;
6712	}
6713	if (dump_enabled_p ())
6714	dump_printf_loc (MSG_NOTE, vect_location,
6715	"proceeding using word mode.\n");
6716	using_emulated_vectors_p = true;
6717	}
6718
6719	int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6720	vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6721	vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6722	internal_fn cond_fn = get_conditional_internal_fn (code);
6723	internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6724
6725	/ If operating on inactive elements could generate spurious traps,*
6726	we need to restrict the operation to active lanes. Note that this
6727	specifically doesn't apply to unhoisted invariants, since they
6728	operate on the same value for every lane.
6729
6730	Similarly, if this operation is part of a reduction, a fully-masked
6731	loop should only change the active lanes of the reduction chain,
6732	keeping the inactive lanes as-is. /*
6733	bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6734	\|\| reduc_idx >= `0`);
6735
6736	if (!vec_stmt) / transformation not required. /
6737	{
6738	if (loop_vinfo
6739	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6740	&& mask_out_inactive)
6741	{
6742	if (cond_len_fn != IFN_LAST
6743	&& direct_internal_fn_supported_p (cond_len_fn, vectype,
6744	OPTIMIZE_FOR_SPEED))
6745	vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6746	`1`);
6747	else if (cond_fn != IFN_LAST
6748	&& direct_internal_fn_supported_p (cond_fn, vectype,
6749	OPTIMIZE_FOR_SPEED))
6750	vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6751	vectype, NULL);
6752	else
6753	{
6754	if (dump_enabled_p ())
6755	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6756	"can't use a fully-masked loop because no"
6757	" conditional operation is available.\n");
6758	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6759	}
6760	}
6761
6762	/ Put types on constant and invariant SLP children. /
6763	if (slp_node
6764	&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6765	\|\| !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6766	\|\| !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6767	{
6768	if (dump_enabled_p ())
6769	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6770	"incompatible vector types for invariants\n");
6771	return false;
6772	}
6773
6774	STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6775	DUMP_VECT_SCOPE ("vectorizable_operation");
6776	vect_model_simple_cost (vinfo, stmt_info,
6777	ncopies, dt, ndts, node: slp_node, cost_vec);
6778	if (using_emulated_vectors_p)
6779	{
6780	/ The above vect_model_simple_cost call handles constants*
6781	in the prologue and (mis-)costs one of the stmts as
6782	vector stmt. See below for the actual lowering that will
6783	be applied. /*
6784	unsigned n
6785	= slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6786	switch (code)
6787	{
6788	case PLUS_EXPR:
6789	n *= `5`;
6790	break;
6791	case MINUS_EXPR:
6792	n *= `6`;
6793	break;
6794	case NEGATE_EXPR:
6795	n *= `4`;
6796	break;
6797	default:
6798	/ Bit operations do not have extra cost and are accounted*
6799	as vector stmt by vect_model_simple_cost. /*
6800	n = `0`;
6801	break;
6802	}
6803	if (n != `0`)
6804	{
6805	/ We also need to materialize two large constants. /
6806	record_stmt_cost (body_cost_vec: cost_vec, count: `2`, kind: scalar_stmt, stmt_info,
6807	misalign: `0`, where: vect_prologue);
6808	record_stmt_cost (body_cost_vec: cost_vec, count: n, kind: scalar_stmt, stmt_info,
6809	misalign: `0`, where: vect_body);
6810	}
6811	}
6812	return true;
6813	}
6814
6815	/ Transform. /
6816
6817	if (dump_enabled_p ())
6818	dump_printf_loc (MSG_NOTE, vect_location,
6819	"transform binary/unary operation.\n");
6820
6821	bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6822	bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6823
6824	/ POINTER_DIFF_EXPR has pointer arguments which are vectorized as*
6825	vectors with unsigned elements, but the result is signed. So, we
6826	need to compute the MINUS_EXPR into vectype temporary and
6827	VIEW_CONVERT_EXPR it into the final vectype_out result. /*
6828	tree vec_cvt_dest = NULL_TREE;
6829	if (orig_code == POINTER_DIFF_EXPR)
6830	{
6831	vec_dest = vect_create_destination_var (scalar_dest, vectype);
6832	vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6833	}
6834	/ Handle def. /
6835	else
6836	vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6837
6838	/ In case the vectorization factor (VF) is bigger than the number*
6839	of elements that we can fit in a vectype (nunits), we have to generate
6840	more than one vector stmt - i.e - we need to "unroll" the
6841	vector stmt by a factor VF/nunits. In doing so, we record a pointer
6842	from one copy of the vector stmt to the next, in the field
6843	STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6844	stages to find the correct vector defs to be used when vectorizing
6845	stmts that use the defs of the current stmt. The example below
6846	illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6847	we need to create 4 vectorized stmts):
6848
6849	before vectorization:
6850	RELATED_STMT VEC_STMT
6851	S1: x = memref - -
6852	S2: z = x + 1 - -
6853
6854	step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6855	there):
6856	RELATED_STMT VEC_STMT
6857	VS1_0: vx0 = memref0 VS1_1 -
6858	VS1_1: vx1 = memref1 VS1_2 -
6859	VS1_2: vx2 = memref2 VS1_3 -
6860	VS1_3: vx3 = memref3 - -
6861	S1: x = load - VS1_0
6862	S2: z = x + 1 - -
6863
6864	step2: vectorize stmt S2 (done here):
6865	To vectorize stmt S2 we first need to find the relevant vector
6866	def for the first operand 'x'. This is, as usual, obtained from
6867	the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6868	that defines 'x' (S1). This way we find the stmt VS1_0, and the
6869	relevant vector def 'vx0'. Having found 'vx0' we can generate
6870	the vector stmt VS2_0, and as usual, record it in the
6871	STMT_VINFO_VEC_STMT of stmt S2.
6872	When creating the second copy (VS2_1), we obtain the relevant vector
6873	def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6874	stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6875	vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6876	pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6877	Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6878	chain of stmts and pointers:
6879	RELATED_STMT VEC_STMT
6880	VS1_0: vx0 = memref0 VS1_1 -
6881	VS1_1: vx1 = memref1 VS1_2 -
6882	VS1_2: vx2 = memref2 VS1_3 -
6883	VS1_3: vx3 = memref3 - -
6884	S1: x = load - VS1_0
6885	VS2_0: vz0 = vx0 + v1 VS2_1 -
6886	VS2_1: vz1 = vx1 + v1 VS2_2 -
6887	VS2_2: vz2 = vx2 + v1 VS2_3 -
6888	VS2_3: vz3 = vx3 + v1 - -
6889	S2: z = x + 1 - VS2_0 /*
6890
6891	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6892	op0, vec_oprnds0: &vec_oprnds0, op1, vec_oprnds1: &vec_oprnds1, op2, vec_oprnds2: &vec_oprnds2);
6893	/ Arguments are ready. Create the new vector stmt. /
6894	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6895	{
6896	gimple *new_stmt = NULL;
6897	vop1 = ((op_type == binary_op \|\| op_type == ternary_op)
6898	? vec_oprnds1 [i] : NULL_TREE);
6899	vop2 = ((op_type == ternary_op) ? vec_oprnds2 [i] : NULL_TREE);
6900	if (using_emulated_vectors_p
6901	&& (code == PLUS_EXPR \|\| code == MINUS_EXPR \|\| code == NEGATE_EXPR))
6902	{
6903	/ Lower the operation. This follows vector lowering. /
6904	unsigned int width = vector_element_bits (vectype);
6905	tree inner_type = TREE_TYPE (vectype);
6906	tree word_type
6907	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: word_mode), `1`);
6908	HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6909	tree low_bits = build_replicated_int_cst (word_type, width, max >> `1`);
6910	tree high_bits
6911	= build_replicated_int_cst (word_type, width, max & ~(max >> `1`));
6912	tree wvop0 = make_ssa_name (var: word_type);
6913	new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6914	build1 (VIEW_CONVERT_EXPR,
6915	word_type, vop0));
6916	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6917	tree result_low, signs;
6918	if (code == PLUS_EXPR \|\| code == MINUS_EXPR)
6919	{
6920	tree wvop1 = make_ssa_name (var: word_type);
6921	new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6922	build1 (VIEW_CONVERT_EXPR,
6923	word_type, vop1));
6924	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6925	signs = make_ssa_name (var: word_type);
6926	new_stmt = gimple_build_assign (signs,
6927	BIT_XOR_EXPR, wvop0, wvop1);
6928	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6929	tree b_low = make_ssa_name (var: word_type);
6930	new_stmt = gimple_build_assign (b_low,
6931	BIT_AND_EXPR, wvop1, low_bits);
6932	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6933	tree a_low = make_ssa_name (var: word_type);
6934	if (code == PLUS_EXPR)
6935	new_stmt = gimple_build_assign (a_low,
6936	BIT_AND_EXPR, wvop0, low_bits);
6937	else
6938	new_stmt = gimple_build_assign (a_low,
6939	BIT_IOR_EXPR, wvop0, high_bits);
6940	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6941	if (code == MINUS_EXPR)
6942	{
6943	new_stmt = gimple_build_assign (NULL_TREE,
6944	BIT_NOT_EXPR, signs);
6945	signs = make_ssa_name (var: word_type);
6946	gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
6947	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6948	}
6949	new_stmt = gimple_build_assign (NULL_TREE,
6950	BIT_AND_EXPR, signs, high_bits);
6951	signs = make_ssa_name (var: word_type);
6952	gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
6953	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6954	result_low = make_ssa_name (var: word_type);
6955	new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6956	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6957	}
6958	else
6959	{
6960	tree a_low = make_ssa_name (var: word_type);
6961	new_stmt = gimple_build_assign (a_low,
6962	BIT_AND_EXPR, wvop0, low_bits);
6963	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6964	signs = make_ssa_name (var: word_type);
6965	new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6966	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6967	new_stmt = gimple_build_assign (NULL_TREE,
6968	BIT_AND_EXPR, signs, high_bits);
6969	signs = make_ssa_name (var: word_type);
6970	gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
6971	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6972	result_low = make_ssa_name (var: word_type);
6973	new_stmt = gimple_build_assign (result_low,
6974	MINUS_EXPR, high_bits, a_low);
6975	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6976	}
6977	new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6978	signs);
6979	result_low = make_ssa_name (var: word_type);
6980	gimple_assign_set_lhs (gs: new_stmt, lhs: result_low);
6981	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6982	new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6983	build1 (VIEW_CONVERT_EXPR,
6984	vectype, result_low));
6985	new_temp = make_ssa_name (var: vectype);
6986	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6987	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6988	}
6989	else if ((masked_loop_p \|\| len_loop_p) && mask_out_inactive)
6990	{
6991	tree mask;
6992	if (masked_loop_p)
6993	mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
6994	vec_num * ncopies, vectype, i);
6995	else
6996	/ Dummy mask. /
6997	mask = build_minus_one_cst (truth_type_for (vectype));
6998	auto_vec<tree> vops (`6`);
6999	vops.quick_push (obj: mask);
7000	vops.quick_push (obj: vop0);
7001	if (vop1)
7002	vops.quick_push (obj: vop1);
7003	if (vop2)
7004	vops.quick_push (obj: vop2);
7005	if (reduc_idx >= `0`)
7006	{
7007	/ Perform the operation on active elements only and take*
7008	inactive elements from the reduction chain input. /*
7009	gcc_assert (!vop2);
7010	vops.quick_push (obj: reduc_idx == `1` ? vop1 : vop0);
7011	}
7012	else
7013	{
7014	auto else_value = targetm.preferred_else_value
7015	(cond_fn, vectype, vops.length () - `1`, &vops [`1`]);
7016	vops.quick_push (obj: else_value);
7017	}
7018	if (len_loop_p)
7019	{
7020	tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7021	vec_num * ncopies, vectype, i, `1`);
7022	signed char biasval
7023	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7024	tree bias = build_int_cst (intQI_type_node, biasval);
7025	vops.quick_push (obj: len);
7026	vops.quick_push (obj: bias);
7027	}
7028	gcall *call
7029	= gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7030	: cond_len_fn,
7031	vops);
7032	new_temp = make_ssa_name (var: vec_dest, stmt: call);
7033	gimple_call_set_lhs (gs: call, lhs: new_temp);
7034	gimple_call_set_nothrow (s: call, nothrow_p: true);
7035	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
7036	new_stmt = call;
7037	}
7038	else
7039	{
7040	tree mask = NULL_TREE;
7041	/ When combining two masks check if either of them is elsewhere*
7042	combined with a loop mask, if that's the case we can mark that the
7043	new combined mask doesn't need to be combined with a loop mask. /*
7044	if (masked_loop_p
7045	&& code == BIT_AND_EXPR
7046	&& VECTOR_BOOLEAN_TYPE_P (vectype))
7047	{
7048	if (loop_vinfo->scalar_cond_masked_set.contains (k: { op0,
7049	ncopies}))
7050	{
7051	mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7052	vec_num * ncopies, vectype, i);
7053
7054	vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7055	vec_mask: vop0, gsi);
7056	}
7057
7058	if (loop_vinfo->scalar_cond_masked_set.contains (k: { op1,
7059	ncopies }))
7060	{
7061	mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7062	vec_num * ncopies, vectype, i);
7063
7064	vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7065	vec_mask: vop1, gsi);
7066	}
7067	}
7068
7069	new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7070	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
7071	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7072	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7073	if (using_emulated_vectors_p)
7074	suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7075
7076	/ Enter the combined value into the vector cond hash so we don't*
7077	AND it with a loop mask again. /*
7078	if (mask)
7079	loop_vinfo->vec_cond_masked_set.add (k: { new_temp, mask });
7080	}
7081
7082	if (vec_cvt_dest)
7083	{
7084	new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7085	new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7086	new_temp);
7087	new_temp = make_ssa_name (var: vec_cvt_dest, stmt: new_stmt);
7088	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7089	vect_finish_stmt_generation (vinfo, stmt_info,
7090	vec_stmt: new_stmt, gsi);
7091	}
7092
7093	if (slp_node)
7094	slp_node->push_vec_def (def: new_stmt);
7095	else
7096	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
7097	}
7098
7099	if (!slp_node)
7100	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
7101
7102	vec_oprnds0.release ();
7103	vec_oprnds1.release ();
7104	vec_oprnds2.release ();
7105
7106	return true;
7107	}
7108
7109	/ A helper function to ensure data reference DR_INFO's base alignment. /
7110
7111	static void
7112	ensure_base_align (dr_vec_info *dr_info)
7113	{
7114	/ Alignment is only analyzed for the first element of a DR group,*
7115	use that to look at base alignment we need to enforce. /*
7116	if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7117	dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7118
7119	gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7120
7121	if (dr_info->base_misaligned)
7122	{
7123	tree base_decl = dr_info->base_decl;
7124
7125	// We should only be able to increase the alignment of a base object if
7126	// we know what its new alignment should be at compile time.
7127	unsigned HOST_WIDE_INT align_base_to =
7128	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7129
7130	if (decl_in_symtab_p (decl: base_decl))
7131	symtab_node::get (decl: base_decl)->increase_alignment (align: align_base_to);
7132	else if (DECL_ALIGN (base_decl) < align_base_to)
7133	{
7134	SET_DECL_ALIGN (base_decl, align_base_to);
7135	DECL_USER_ALIGN (base_decl) = `1`;
7136	}
7137	dr_info->base_misaligned = false;
7138	}
7139	}
7140
7141
7142	/ Function get_group_alias_ptr_type.*
7143
7144	Return the alias type for the group starting at FIRST_STMT_INFO. /*
7145
7146	static tree
7147	get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7148	{
7149	struct data_reference first_dr, next_dr;
7150
7151	first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7152	stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7153	while (next_stmt_info)
7154	{
7155	next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7156	if (get_alias_set (DR_REF (first_dr))
7157	!= get_alias_set (DR_REF (next_dr)))
7158	{
7159	if (dump_enabled_p ())
7160	dump_printf_loc (MSG_NOTE, vect_location,
7161	"conflicting alias set types.\n");
7162	return ptr_type_node;
7163	}
7164	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7165	}
7166	return reference_alias_ptr_type (DR_REF (first_dr));
7167	}
7168
7169
7170	/ Function scan_operand_equal_p.*
7171
7172	Helper function for check_scan_store. Compare two references
7173	with .GOMP_SIMD_LANE bases. /*
7174
7175	static bool
7176	scan_operand_equal_p (tree ref1, tree ref2)
7177	{
7178	tree ref[`2`] = { ref1, ref2 };
7179	poly_int64 bitsize[`2`], bitpos[`2`];
7180	tree offset[`2`], base[`2`];
7181	for (int i = `0`; i < `2`; ++i)
7182	{
7183	machine_mode mode;
7184	int unsignedp, reversep, volatilep = `0`;
7185	base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7186	&offset[i], &mode, &unsignedp,
7187	&reversep, &volatilep);
7188	if (reversep \|\| volatilep \|\| maybe_ne (a: bitpos[i], b: `0`))
7189	return false;
7190	if (TREE_CODE (base[i]) == MEM_REF
7191	&& offset[i] == NULL_TREE
7192	&& TREE_CODE (TREE_OPERAND (base[i], `0`)) == SSA_NAME)
7193	{
7194	gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], `0`));
7195	if (is_gimple_assign (gs: def_stmt)
7196	&& gimple_assign_rhs_code (gs: def_stmt) == POINTER_PLUS_EXPR
7197	&& TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7198	&& TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7199	{
7200	if (maybe_ne (a: mem_ref_offset (base[i]), b: `0`))
7201	return false;
7202	base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), `0`);
7203	offset[i] = gimple_assign_rhs2 (gs: def_stmt);
7204	}
7205	}
7206	}
7207
7208	if (!operand_equal_p (base[`0`], base[`1`], flags: `0`))
7209	return false;
7210	if (maybe_ne (a: bitsize[`0`], b: bitsize[`1`]))
7211	return false;
7212	if (offset[`0`] != offset[`1`])
7213	{
7214	if (!offset[`0`] \|\| !offset[`1`])
7215	return false;
7216	if (!operand_equal_p (offset[`0`], offset[`1`], flags: `0`))
7217	{
7218	tree step[`2`];
7219	for (int i = `0`; i < `2`; ++i)
7220	{
7221	step[i] = integer_one_node;
7222	if (TREE_CODE (offset[i]) == SSA_NAME)
7223	{
7224	gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7225	if (is_gimple_assign (gs: def_stmt)
7226	&& gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR
7227	&& (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7228	== INTEGER_CST))
7229	{
7230	step[i] = gimple_assign_rhs2 (gs: def_stmt);
7231	offset[i] = gimple_assign_rhs1 (gs: def_stmt);
7232	}
7233	}
7234	else if (TREE_CODE (offset[i]) == MULT_EXPR)
7235	{
7236	step[i] = TREE_OPERAND (offset[i], `1`);
7237	offset[i] = TREE_OPERAND (offset[i], `0`);
7238	}
7239	tree rhs1 = NULL_TREE;
7240	if (TREE_CODE (offset[i]) == SSA_NAME)
7241	{
7242	gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7243	if (gimple_assign_cast_p (s: def_stmt))
7244	rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7245	}
7246	else if (CONVERT_EXPR_P (offset[i]))
7247	rhs1 = TREE_OPERAND (offset[i], `0`);
7248	if (rhs1
7249	&& INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7250	&& INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7251	&& (TYPE_PRECISION (TREE_TYPE (offset[i]))
7252	>= TYPE_PRECISION (TREE_TYPE (rhs1))))
7253	offset[i] = rhs1;
7254	}
7255	if (!operand_equal_p (offset[`0`], offset[`1`], flags: `0`)
7256	\|\| !operand_equal_p (step[`0`], step[`1`], flags: `0`))
7257	return false;
7258	}
7259	}
7260	return true;
7261	}
7262
7263
7264	enum scan_store_kind {
7265	/ Normal permutation. /
7266	scan_store_kind_perm,
7267
7268	/ Whole vector left shift permutation with zero init. /
7269	scan_store_kind_lshift_zero,
7270
7271	/ Whole vector left shift permutation and VEC_COND_EXPR. /
7272	scan_store_kind_lshift_cond
7273	};
7274
7275	/ Function check_scan_store.*
7276
7277	Verify if we can perform the needed permutations or whole vector shifts.
7278	Return -1 on failure, otherwise exact log2 of vectype's nunits.
7279	USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7280	to do at each step. /*
7281
7282	static int
7283	scan_store_can_perm_p (tree vectype, tree init,
7284	vec<enum scan_store_kind> *use_whole_vector = NULL)
7285	{
7286	enum machine_mode vec_mode = TYPE_MODE (vectype);
7287	unsigned HOST_WIDE_INT nunits;
7288	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7289	return -`1`;
7290	int units_log2 = exact_log2 (x: nunits);
7291	if (units_log2 <= `0`)
7292	return -`1`;
7293
7294	int i;
7295	enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7296	for (i = `0`; i <= units_log2; ++i)
7297	{
7298	unsigned HOST_WIDE_INT j, k;
7299	enum scan_store_kind kind = scan_store_kind_perm;
7300	vec_perm_builder sel (nunits, nunits, `1`);
7301	sel.quick_grow (len: nunits);
7302	if (i == units_log2)
7303	{
7304	for (j = `0`; j < nunits; ++j)
7305	sel [j] = nunits - `1`;
7306	}
7307	else
7308	{
7309	for (j = `0`; j < (HOST_WIDE_INT_1U << i); ++j)
7310	sel [j] = j;
7311	for (k = `0`; j < nunits; ++j, ++k)
7312	sel [j] = nunits + k;
7313	}
7314	vec_perm_indices indices (sel, i == units_log2 ? `1` : `2`, nunits);
7315	if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7316	{
7317	if (i == units_log2)
7318	return -`1`;
7319
7320	if (whole_vector_shift_kind == scan_store_kind_perm)
7321	{
7322	if (optab_handler (op: vec_shl_optab, mode: vec_mode) == CODE_FOR_nothing)
7323	return -`1`;
7324	whole_vector_shift_kind = scan_store_kind_lshift_zero;
7325	/ Whole vector shifts shift in zeros, so if init is all zero*
7326	constant, there is no need to do anything further. /*
7327	if ((TREE_CODE (init) != INTEGER_CST
7328	&& TREE_CODE (init) != REAL_CST)
7329	\|\| !initializer_zerop (init))
7330	{
7331	tree masktype = truth_type_for (vectype);
7332	if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7333	return -`1`;
7334	whole_vector_shift_kind = scan_store_kind_lshift_cond;
7335	}
7336	}
7337	kind = whole_vector_shift_kind;
7338	}
7339	if (use_whole_vector)
7340	{
7341	if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7342	use_whole_vector->safe_grow_cleared (len: i, exact: true);
7343	if (kind != scan_store_kind_perm \|\| !use_whole_vector->is_empty ())
7344	use_whole_vector->safe_push (obj: kind);
7345	}
7346	}
7347
7348	return units_log2;
7349	}
7350
7351
7352	/ Function check_scan_store.*
7353
7354	Check magic stores for #pragma omp scan {in,ex}clusive reductions. /*
7355
7356	static bool
7357	check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7358	enum vect_def_type rhs_dt, bool slp, tree mask,
7359	vect_memory_access_type memory_access_type)
7360	{
7361	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7362	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7363	tree ref_type;
7364
7365	gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > `1`);
7366	if (slp
7367	\|\| mask
7368	\|\| memory_access_type != VMAT_CONTIGUOUS
7369	\|\| TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7370	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`))
7371	\|\| loop_vinfo == NULL
7372	\|\| LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7373	\|\| STMT_VINFO_GROUPED_ACCESS (stmt_info)
7374	\|\| !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7375	\|\| !integer_zerop (DR_INIT (dr_info->dr))
7376	\|\| !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7377	\|\| !alias_sets_conflict_p (get_alias_set (vectype),
7378	get_alias_set (TREE_TYPE (ref_type))))
7379	{
7380	if (dump_enabled_p ())
7381	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7382	"unsupported OpenMP scan store.\n");
7383	return false;
7384	}
7385
7386	/ We need to pattern match code built by OpenMP lowering and simplified*
7387	by following optimizations into something we can handle.
7388	#pragma omp simd reduction(inscan,+:r)
7389	for (...)
7390	{
7391	r += something ();
7392	#pragma omp scan inclusive (r)
7393	use (r);
7394	}
7395	shall have body with:
7396	// Initialization for input phase, store the reduction initializer:
7397	_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7398	_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7399	D.2042[_21] = 0;
7400	// Actual input phase:
7401	...
7402	r.0_5 = D.2042[_20];
7403	_6 = _4 + r.0_5;
7404	D.2042[_20] = _6;
7405	// Initialization for scan phase:
7406	_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7407	_26 = D.2043[_25];
7408	_27 = D.2042[_25];
7409	_28 = _26 + _27;
7410	D.2043[_25] = _28;
7411	D.2042[_25] = _28;
7412	// Actual scan phase:
7413	...
7414	r.1_8 = D.2042[_20];
7415	...
7416	The "omp simd array" variable D.2042 holds the privatized copy used
7417	inside of the loop and D.2043 is another one that holds copies of
7418	the current original list item. The separate GOMP_SIMD_LANE ifn
7419	kinds are there in order to allow optimizing the initializer store
7420	and combiner sequence, e.g. if it is originally some C++ish user
7421	defined reduction, but allow the vectorizer to pattern recognize it
7422	and turn into the appropriate vectorized scan.
7423
7424	For exclusive scan, this is slightly different:
7425	#pragma omp simd reduction(inscan,+:r)
7426	for (...)
7427	{
7428	use (r);
7429	#pragma omp scan exclusive (r)
7430	r += something ();
7431	}
7432	shall have body with:
7433	// Initialization for input phase, store the reduction initializer:
7434	_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7435	_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7436	D.2042[_21] = 0;
7437	// Actual input phase:
7438	...
7439	r.0_5 = D.2042[_20];
7440	_6 = _4 + r.0_5;
7441	D.2042[_20] = _6;
7442	// Initialization for scan phase:
7443	_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7444	_26 = D.2043[_25];
7445	D.2044[_25] = _26;
7446	_27 = D.2042[_25];
7447	_28 = _26 + _27;
7448	D.2043[_25] = _28;
7449	// Actual scan phase:
7450	...
7451	r.1_8 = D.2044[_20];
7452	... /*
7453
7454	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `2`)
7455	{
7456	/ Match the D.2042[_21] = 0; store above. Just require that*
7457	it is a constant or external definition store. /*
7458	if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7459	{
7460	fail_init:
7461	if (dump_enabled_p ())
7462	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7463	"unsupported OpenMP scan initializer store.\n");
7464	return false;
7465	}
7466
7467	if (! loop_vinfo->scan_map)
7468	loop_vinfo->scan_map = new hash_map<tree, tree>;
7469	tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7470	tree &cached = loop_vinfo->scan_map->get_or_insert (k: var);
7471	if (cached)
7472	goto fail_init;
7473	cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7474
7475	/ These stores can be vectorized normally. /
7476	return true;
7477	}
7478
7479	if (rhs_dt != vect_internal_def)
7480	{
7481	fail:
7482	if (dump_enabled_p ())
7483	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7484	"unsupported OpenMP scan combiner pattern.\n");
7485	return false;
7486	}
7487
7488	gimple *stmt = STMT_VINFO_STMT (stmt_info);
7489	tree rhs = gimple_assign_rhs1 (gs: stmt);
7490	if (TREE_CODE (rhs) != SSA_NAME)
7491	goto fail;
7492
7493	gimple *other_store_stmt = NULL;
7494	tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7495	bool inscan_var_store
7496	= lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7497
7498	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
7499	{
7500	if (!inscan_var_store)
7501	{
7502	use_operand_p use_p;
7503	imm_use_iterator iter;
7504	FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7505	{
7506	gimple *use_stmt = USE_STMT (use_p);
7507	if (use_stmt == stmt \|\| is_gimple_debug (gs: use_stmt))
7508	continue;
7509	if (gimple_bb (g: use_stmt) != gimple_bb (g: stmt)
7510	\|\| !is_gimple_assign (gs: use_stmt)
7511	\|\| gimple_assign_rhs_class (gs: use_stmt) != GIMPLE_BINARY_RHS
7512	\|\| other_store_stmt
7513	\|\| TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7514	goto fail;
7515	other_store_stmt = use_stmt;
7516	}
7517	if (other_store_stmt == NULL)
7518	goto fail;
7519	rhs = gimple_assign_lhs (gs: other_store_stmt);
7520	if (!single_imm_use (var: rhs, use_p: &use_p, stmt: &other_store_stmt))
7521	goto fail;
7522	}
7523	}
7524	else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `3`)
7525	{
7526	use_operand_p use_p;
7527	imm_use_iterator iter;
7528	FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7529	{
7530	gimple *use_stmt = USE_STMT (use_p);
7531	if (use_stmt == stmt \|\| is_gimple_debug (gs: use_stmt))
7532	continue;
7533	if (other_store_stmt)
7534	goto fail;
7535	other_store_stmt = use_stmt;
7536	}
7537	}
7538	else
7539	goto fail;
7540
7541	gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7542	if (gimple_bb (g: def_stmt) != gimple_bb (g: stmt)
7543	\|\| !is_gimple_assign (gs: def_stmt)
7544	\|\| gimple_assign_rhs_class (gs: def_stmt) != GIMPLE_BINARY_RHS)
7545	goto fail;
7546
7547	enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7548	/ For pointer addition, we should use the normal plus for the vector*
7549	operation. /*
7550	switch (code)
7551	{
7552	case POINTER_PLUS_EXPR:
7553	code = PLUS_EXPR;
7554	break;
7555	case MULT_HIGHPART_EXPR:
7556	goto fail;
7557	default:
7558	break;
7559	}
7560	if (TREE_CODE_LENGTH (code) != binary_op \|\| !commutative_tree_code (code))
7561	goto fail;
7562
7563	tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7564	tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7565	if (TREE_CODE (rhs1) != SSA_NAME \|\| TREE_CODE (rhs2) != SSA_NAME)
7566	goto fail;
7567
7568	gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7569	gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7570	if (gimple_bb (g: load1_stmt) != gimple_bb (g: stmt)
7571	\|\| !gimple_assign_load_p (load1_stmt)
7572	\|\| gimple_bb (g: load2_stmt) != gimple_bb (g: stmt)
7573	\|\| !gimple_assign_load_p (load2_stmt))
7574	goto fail;
7575
7576	stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7577	stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7578	if (load1_stmt_info == NULL
7579	\|\| load2_stmt_info == NULL
7580	\|\| (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7581	!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7582	\|\| (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7583	!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7584	goto fail;
7585
7586	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && inscan_var_store)
7587	{
7588	dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7589	if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7590	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), `0`)))
7591	goto fail;
7592	tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), `0`);
7593	tree lrhs;
7594	if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7595	lrhs = rhs1;
7596	else
7597	lrhs = rhs2;
7598	use_operand_p use_p;
7599	imm_use_iterator iter;
7600	FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7601	{
7602	gimple *use_stmt = USE_STMT (use_p);
7603	if (use_stmt == def_stmt \|\| is_gimple_debug (gs: use_stmt))
7604	continue;
7605	if (other_store_stmt)
7606	goto fail;
7607	other_store_stmt = use_stmt;
7608	}
7609	}
7610
7611	if (other_store_stmt == NULL)
7612	goto fail;
7613	if (gimple_bb (g: other_store_stmt) != gimple_bb (g: stmt)
7614	\|\| !gimple_store_p (gs: other_store_stmt))
7615	goto fail;
7616
7617	stmt_vec_info other_store_stmt_info
7618	= loop_vinfo->lookup_stmt (other_store_stmt);
7619	if (other_store_stmt_info == NULL
7620	\|\| (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7621	!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7622	goto fail;
7623
7624	gimple *stmt1 = stmt;
7625	gimple *stmt2 = other_store_stmt;
7626	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && !inscan_var_store)
7627	std::swap (a&: stmt1, b&: stmt2);
7628	if (scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7629	ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7630	{
7631	std::swap (a&: rhs1, b&: rhs2);
7632	std::swap (a&: load1_stmt, b&: load2_stmt);
7633	std::swap (a&: load1_stmt_info, b&: load2_stmt_info);
7634	}
7635	if (!scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7636	ref2: gimple_assign_rhs1 (gs: load1_stmt)))
7637	goto fail;
7638
7639	tree var3 = NULL_TREE;
7640	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `3`
7641	&& !scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt2),
7642	ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7643	goto fail;
7644	else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
7645	{
7646	dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7647	if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7648	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), `0`)))
7649	goto fail;
7650	var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), `0`);
7651	if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var3))
7652	\|\| lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var3))
7653	\|\| lookup_attribute (attr_name: "omp simd inscan exclusive",
7654	DECL_ATTRIBUTES (var3)))
7655	goto fail;
7656	}
7657
7658	dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7659	if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7660	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), `0`)))
7661	goto fail;
7662
7663	tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7664	tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), `0`);
7665	if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var1))
7666	\|\| !lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var2))
7667	\|\| (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7668	== (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var2))))
7669	goto fail;
7670
7671	if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7672	std::swap (a&: var1, b&: var2);
7673
7674	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
7675	{
7676	if (!lookup_attribute (attr_name: "omp simd inscan exclusive",
7677	DECL_ATTRIBUTES (var1)))
7678	goto fail;
7679	var1 = var3;
7680	}
7681
7682	if (loop_vinfo->scan_map == NULL)
7683	goto fail;
7684	tree *init = loop_vinfo->scan_map->get (k: var1);
7685	if (init == NULL)
7686	goto fail;
7687
7688	/ The IL is as expected, now check if we can actually vectorize it.*
7689	Inclusive scan:
7690	_26 = D.2043[_25];
7691	_27 = D.2042[_25];
7692	_28 = _26 + _27;
7693	D.2043[_25] = _28;
7694	D.2042[_25] = _28;
7695	should be vectorized as (where _40 is the vectorized rhs
7696	from the D.2042[_21] = 0; store):
7697	_30 = MEM <vector(8) int> [(int )&D.2043];*
7698	_31 = MEM <vector(8) int> [(int )&D.2042];*
7699	_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7700	_33 = _31 + _32;
7701	// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7702	_34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7703	_35 = _33 + _34;
7704	// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7705	// _31[1]+.._31[4], ... _31[4]+.._31[7] };
7706	_36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7707	_37 = _35 + _36;
7708	// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7709	// _31[0]+.._31[4], ... _31[0]+.._31[7] };
7710	_38 = _30 + _37;
7711	_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7712	MEM <vector(8) int> [(int )&D.2043] = _39;*
7713	MEM <vector(8) int> [(int )&D.2042] = _38;*
7714	Exclusive scan:
7715	_26 = D.2043[_25];
7716	D.2044[_25] = _26;
7717	_27 = D.2042[_25];
7718	_28 = _26 + _27;
7719	D.2043[_25] = _28;
7720	should be vectorized as (where _40 is the vectorized rhs
7721	from the D.2042[_21] = 0; store):
7722	_30 = MEM <vector(8) int> [(int )&D.2043];*
7723	_31 = MEM <vector(8) int> [(int )&D.2042];*
7724	_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7725	_33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7726	_34 = _32 + _33;
7727	// _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7728	// _31[3]+_31[4], ... _31[5]+.._31[6] };
7729	_35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7730	_36 = _34 + _35;
7731	// _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7732	// _31[1]+.._31[4], ... _31[3]+.._31[6] };
7733	_37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7734	_38 = _36 + _37;
7735	// _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7736	// _31[0]+.._31[4], ... _31[0]+.._31[6] };
7737	_39 = _30 + _38;
7738	_50 = _31 + _39;
7739	_51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7740	MEM <vector(8) int> [(int )&D.2044] = _39;*
7741	MEM <vector(8) int> [(int )&D.2042] = _51; /
7742	enum machine_mode vec_mode = TYPE_MODE (vectype);
7743	optab optab = optab_for_tree_code (code, vectype, optab_default);
7744	if (!optab \|\| optab_handler (op: optab, mode: vec_mode) == CODE_FOR_nothing)
7745	goto fail;
7746
7747	int units_log2 = scan_store_can_perm_p (vectype, init: *init);
7748	if (units_log2 == -`1`)
7749	goto fail;
7750
7751	return true;
7752	}
7753
7754
7755	/ Function vectorizable_scan_store.*
7756
7757	Helper of vectorizable_score, arguments like on vectorizable_store.
7758	Handle only the transformation, checking is done in check_scan_store. /*
7759
7760	static bool
7761	vectorizable_scan_store (vec_info *vinfo,
7762	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7763	gimple *vec_stmt, int* ncopies)
7764	{
7765	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7766	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7767	tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7768	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7769
7770	if (dump_enabled_p ())
7771	dump_printf_loc (MSG_NOTE, vect_location,
7772	"transform scan store. ncopies = %d\n", ncopies);
7773
7774	gimple *stmt = STMT_VINFO_STMT (stmt_info);
7775	tree rhs = gimple_assign_rhs1 (gs: stmt);
7776	gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7777
7778	tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7779	bool inscan_var_store
7780	= lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7781
7782	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && !inscan_var_store)
7783	{
7784	use_operand_p use_p;
7785	imm_use_iterator iter;
7786	FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7787	{
7788	gimple *use_stmt = USE_STMT (use_p);
7789	if (use_stmt == stmt \|\| is_gimple_debug (gs: use_stmt))
7790	continue;
7791	rhs = gimple_assign_lhs (gs: use_stmt);
7792	break;
7793	}
7794	}
7795
7796	gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7797	enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7798	if (code == POINTER_PLUS_EXPR)
7799	code = PLUS_EXPR;
7800	gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7801	&& commutative_tree_code (code));
7802	tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7803	tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7804	gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7805	gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7806	gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7807	stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7808	stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7809	dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7810	dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7811	tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), `0`);
7812	tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), `0`);
7813
7814	if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7815	{
7816	std::swap (a&: rhs1, b&: rhs2);
7817	std::swap (a&: var1, b&: var2);
7818	std::swap (a&: load1_dr_info, b&: load2_dr_info);
7819	}
7820
7821	tree *init = loop_vinfo->scan_map->get (k: var1);
7822	gcc_assert (init);
7823
7824	unsigned HOST_WIDE_INT nunits;
7825	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7826	gcc_unreachable ();
7827	auto_vec<enum scan_store_kind, `16`> use_whole_vector;
7828	int units_log2 = scan_store_can_perm_p (vectype, init: *init, use_whole_vector: &use_whole_vector);
7829	gcc_assert (units_log2 > `0`);
7830	auto_vec<tree, `16`> perms;
7831	perms.quick_grow (len: units_log2 + `1`);
7832	tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7833	for (int i = `0`; i <= units_log2; ++i)
7834	{
7835	unsigned HOST_WIDE_INT j, k;
7836	vec_perm_builder sel (nunits, nunits, `1`);
7837	sel.quick_grow (len: nunits);
7838	if (i == units_log2)
7839	for (j = `0`; j < nunits; ++j)
7840	sel [j] = nunits - `1`;
7841	else
7842	{
7843	for (j = `0`; j < (HOST_WIDE_INT_1U << i); ++j)
7844	sel [j] = j;
7845	for (k = `0`; j < nunits; ++j, ++k)
7846	sel [j] = nunits + k;
7847	}
7848	vec_perm_indices indices (sel, i == units_log2 ? `1` : `2`, nunits);
7849	if (!use_whole_vector.is_empty ()
7850	&& use_whole_vector [i] != scan_store_kind_perm)
7851	{
7852	if (zero_vec == NULL_TREE)
7853	zero_vec = build_zero_cst (vectype);
7854	if (masktype == NULL_TREE
7855	&& use_whole_vector [i] == scan_store_kind_lshift_cond)
7856	masktype = truth_type_for (vectype);
7857	perms [i] = vect_gen_perm_mask_any (vectype, indices);
7858	}
7859	else
7860	perms [i] = vect_gen_perm_mask_checked (vectype, indices);
7861	}
7862
7863	tree vec_oprnd1 = NULL_TREE;
7864	tree vec_oprnd2 = NULL_TREE;
7865	tree vec_oprnd3 = NULL_TREE;
7866	tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7867	tree dataref_offset = build_int_cst (ref_type, `0`);
7868	tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7869	aggr_type: vectype, memory_access_type: VMAT_CONTIGUOUS);
7870	tree ldataref_ptr = NULL_TREE;
7871	tree orig = NULL_TREE;
7872	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && !inscan_var_store)
7873	ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7874	auto_vec<tree> vec_oprnds1;
7875	auto_vec<tree> vec_oprnds2;
7876	auto_vec<tree> vec_oprnds3;
7877	vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7878	op0: *init, vec_oprnds0: &vec_oprnds1,
7879	op1: ldataref_ptr == NULL ? rhs1 : NULL, vec_oprnds1: &vec_oprnds2,
7880	op2: rhs2, vec_oprnds2: &vec_oprnds3);
7881	for (int j = `0`; j < ncopies; j++)
7882	{
7883	vec_oprnd1 = vec_oprnds1 [j];
7884	if (ldataref_ptr == NULL)
7885	vec_oprnd2 = vec_oprnds2 [j];
7886	vec_oprnd3 = vec_oprnds3 [j];
7887	if (j == `0`)
7888	orig = vec_oprnd3;
7889	else if (!inscan_var_store)
7890	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7891
7892	if (ldataref_ptr)
7893	{
7894	vec_oprnd2 = make_ssa_name (var: vectype);
7895	tree data_ref = fold_build2 (MEM_REF, vectype,
7896	unshare_expr (ldataref_ptr),
7897	dataref_offset);
7898	vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7899	gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7900	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7901	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7902	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
7903	}
7904
7905	tree v = vec_oprnd2;
7906	for (int i = `0`; i < units_log2; ++i)
7907	{
7908	tree new_temp = make_ssa_name (var: vectype);
7909	gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7910	(zero_vec
7911	&& (use_whole_vector [i]
7912	!= scan_store_kind_perm))
7913	? zero_vec : vec_oprnd1, v,
7914	perms [i]);
7915	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7916	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7917	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
7918
7919	if (zero_vec && use_whole_vector [i] == scan_store_kind_lshift_cond)
7920	{
7921	/ Whole vector shift shifted in zero bits, but if init
7922	is not initializer_zerop, we need to replace those elements
7923	with elements from vec_oprnd1. /*
7924	tree_vector_builder vb (masktype, nunits, `1`);
7925	for (unsigned HOST_WIDE_INT k = `0`; k < nunits; ++k)
7926	vb.quick_push (obj: k < (HOST_WIDE_INT_1U << i)
7927	? boolean_false_node : boolean_true_node);
7928
7929	tree new_temp2 = make_ssa_name (var: vectype);
7930	g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7931	new_temp, vec_oprnd1);
7932	vect_finish_stmt_generation (vinfo, stmt_info,
7933	vec_stmt: g, gsi);
7934	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7935	new_temp = new_temp2;
7936	}
7937
7938	/ For exclusive scan, perform the perms[i] permutation once*
7939	more. /*
7940	if (i == `0`
7941	&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`
7942	&& v == vec_oprnd2)
7943	{
7944	v = new_temp;
7945	--i;
7946	continue;
7947	}
7948
7949	tree new_temp2 = make_ssa_name (var: vectype);
7950	g = gimple_build_assign (new_temp2, code, v, new_temp);
7951	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7952	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7953
7954	v = new_temp2;
7955	}
7956
7957	tree new_temp = make_ssa_name (var: vectype);
7958	gimple *g = gimple_build_assign (new_temp, code, orig, v);
7959	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7960	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7961
7962	tree last_perm_arg = new_temp;
7963	/ For exclusive scan, new_temp computed above is the exclusive scan*
7964	prefix sum. Turn it into inclusive prefix sum for the broadcast
7965	of the last element into orig. /*
7966	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
7967	{
7968	last_perm_arg = make_ssa_name (var: vectype);
7969	g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7970	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7971	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7972	}
7973
7974	orig = make_ssa_name (var: vectype);
7975	g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7976	last_perm_arg, perms [units_log2]);
7977	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7978	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7979
7980	if (!inscan_var_store)
7981	{
7982	tree data_ref = fold_build2 (MEM_REF, vectype,
7983	unshare_expr (dataref_ptr),
7984	dataref_offset);
7985	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7986	g = gimple_build_assign (data_ref, new_temp);
7987	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7988	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7989	}
7990	}
7991
7992	if (inscan_var_store)
7993	for (int j = `0`; j < ncopies; j++)
7994	{
7995	if (j != `0`)
7996	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7997
7998	tree data_ref = fold_build2 (MEM_REF, vectype,
7999	unshare_expr (dataref_ptr),
8000	dataref_offset);
8001	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8002	gimple *g = gimple_build_assign (data_ref, orig);
8003	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8004	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8005	}
8006	return true;
8007	}
8008
8009
8010	/ Function vectorizable_store.*
8011
8012	Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8013	that can be vectorized.
8014	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8015	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8016	Return true if STMT_INFO is vectorizable in this way. /*
8017
8018	static bool
8019	vectorizable_store (vec_info *vinfo,
8020	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8021	gimple **vec_stmt, slp_tree slp_node,
8022	stmt_vector_for_cost *cost_vec)
8023	{
8024	tree data_ref;
8025	tree vec_oprnd = NULL_TREE;
8026	tree elem_type;
8027	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
8028	class loop *loop = NULL;
8029	machine_mode vec_mode;
8030	tree dummy;
8031	enum vect_def_type rhs_dt = vect_unknown_def_type;
8032	enum vect_def_type mask_dt = vect_unknown_def_type;
8033	tree dataref_ptr = NULL_TREE;
8034	tree dataref_offset = NULL_TREE;
8035	gimple *ptr_incr = NULL;
8036	int ncopies;
8037	int j;
8038	stmt_vec_info first_stmt_info;
8039	bool grouped_store;
8040	unsigned int group_size, i;
8041	bool slp = (slp_node != NULL);
8042	unsigned int vec_num;
8043	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
8044	tree aggr_type;
8045	gather_scatter_info gs_info;
8046	poly_uint64 vf;
8047	vec_load_store_type vls_type;
8048	tree ref_type;
8049
8050	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8051	return false;
8052
8053	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8054	&& ! vec_stmt)
8055	return false;
8056
8057	/ Is vectorizable store? /
8058
8059	tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8060	slp_tree mask_node = NULL;
8061	if (gassign assign = dyn_cast <gassign > (p: stmt_info->stmt))
8062	{
8063	tree scalar_dest = gimple_assign_lhs (gs: assign);
8064	if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8065	&& is_pattern_stmt_p (stmt_info))
8066	scalar_dest = TREE_OPERAND (scalar_dest, `0`);
8067	if (TREE_CODE (scalar_dest) != ARRAY_REF
8068	&& TREE_CODE (scalar_dest) != BIT_FIELD_REF
8069	&& TREE_CODE (scalar_dest) != INDIRECT_REF
8070	&& TREE_CODE (scalar_dest) != COMPONENT_REF
8071	&& TREE_CODE (scalar_dest) != IMAGPART_EXPR
8072	&& TREE_CODE (scalar_dest) != REALPART_EXPR
8073	&& TREE_CODE (scalar_dest) != MEM_REF)
8074	return false;
8075	}
8076	else
8077	{
8078	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
8079	if (!call \|\| !gimple_call_internal_p (gs: call))
8080	return false;
8081
8082	internal_fn ifn = gimple_call_internal_fn (gs: call);
8083	if (!internal_store_fn_p (ifn))
8084	return false;
8085
8086	int mask_index = internal_fn_mask_index (ifn);
8087	if (mask_index >= `0` && slp_node)
8088	mask_index = vect_slp_child_index_for_operand
8089	(call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8090	if (mask_index >= `0`
8091	&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8092	mask: &mask, mask_node: &mask_node, mask_dt_out: &mask_dt,
8093	mask_vectype_out: &mask_vectype))
8094	return false;
8095	}
8096
8097	/ Cannot have hybrid store SLP -- that would mean storing to the*
8098	same location twice. /*
8099	gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8100
8101	tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8102	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
8103
8104	if (loop_vinfo)
8105	{
8106	loop = LOOP_VINFO_LOOP (loop_vinfo);
8107	vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8108	}
8109	else
8110	vf = `1`;
8111
8112	/ Multiple types in SLP are handled by creating the appropriate number of*
8113	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8114	case of SLP. /*
8115	if (slp)
8116	ncopies = `1`;
8117	else
8118	ncopies = vect_get_num_copies (loop_vinfo, vectype);
8119
8120	gcc_assert (ncopies >= `1`);
8121
8122	/ FORNOW. This restriction should be relaxed. /
8123	if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > `1`)
8124	{
8125	if (dump_enabled_p ())
8126	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8127	"multiple types in nested loop.\n");
8128	return false;
8129	}
8130
8131	tree op;
8132	slp_tree op_node;
8133	if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8134	rhs: &op, rhs_node: &op_node, rhs_dt_out: &rhs_dt, rhs_vectype_out: &rhs_vectype, vls_type_out: &vls_type))
8135	return false;
8136
8137	elem_type = TREE_TYPE (vectype);
8138	vec_mode = TYPE_MODE (vectype);
8139
8140	if (!STMT_VINFO_DATA_REF (stmt_info))
8141	return false;
8142
8143	vect_memory_access_type memory_access_type;
8144	enum dr_alignment_support alignment_support_scheme;
8145	int misalignment;
8146	poly_int64 poffset;
8147	internal_fn lanes_ifn;
8148	if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type,
8149	ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
8150	alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
8151	lanes_ifn: &lanes_ifn))
8152	return false;
8153
8154	if (mask)
8155	{
8156	if (memory_access_type == VMAT_CONTIGUOUS)
8157	{
8158	if (!VECTOR_MODE_P (vec_mode)
8159	\|\| !can_vec_mask_load_store_p (vec_mode,
8160	TYPE_MODE (mask_vectype), false))
8161	return false;
8162	}
8163	else if (memory_access_type != VMAT_LOAD_STORE_LANES
8164	&& (memory_access_type != VMAT_GATHER_SCATTER
8165	\|\| (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8166	{
8167	if (dump_enabled_p ())
8168	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8169	"unsupported access type for masked store.\n");
8170	return false;
8171	}
8172	else if (memory_access_type == VMAT_GATHER_SCATTER
8173	&& gs_info.ifn == IFN_LAST
8174	&& !gs_info.decl)
8175	{
8176	if (dump_enabled_p ())
8177	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8178	"unsupported masked emulated scatter.\n");
8179	return false;
8180	}
8181	}
8182	else
8183	{
8184	/ FORNOW. In some cases can vectorize even if data-type not supported*
8185	(e.g. - array initialization with 0). /*
8186	if (optab_handler (op: mov_optab, mode: vec_mode) == CODE_FOR_nothing)
8187	return false;
8188	}
8189
8190	dr_vec_info dr_info = STMT_VINFO_DR_INFO (stmt_info), first_dr_info = NULL;
8191	grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8192	&& memory_access_type != VMAT_GATHER_SCATTER
8193	&& (slp \|\| memory_access_type != VMAT_CONTIGUOUS));
8194	if (grouped_store)
8195	{
8196	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8197	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8198	group_size = DR_GROUP_SIZE (first_stmt_info);
8199	}
8200	else
8201	{
8202	first_stmt_info = stmt_info;
8203	first_dr_info = dr_info;
8204	group_size = vec_num = `1`;
8205	}
8206
8207	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > `1` && !vec_stmt)
8208	{
8209	if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8210	memory_access_type))
8211	return false;
8212	}
8213
8214	bool costing_p = !vec_stmt;
8215	if (costing_p) / transformation not required. /
8216	{
8217	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8218
8219	if (loop_vinfo
8220	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8221	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8222	vls_type, group_size,
8223	memory_access_type, gs_info: &gs_info,
8224	scalar_mask: mask);
8225
8226	if (slp_node
8227	&& (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8228	\|\| (mask
8229	&& !vect_maybe_update_slp_op_vectype (mask_node,
8230	mask_vectype))))
8231	{
8232	if (dump_enabled_p ())
8233	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8234	"incompatible vector types for invariants\n");
8235	return false;
8236	}
8237
8238	if (dump_enabled_p ()
8239	&& memory_access_type != VMAT_ELEMENTWISE
8240	&& memory_access_type != VMAT_GATHER_SCATTER
8241	&& alignment_support_scheme != dr_aligned)
8242	dump_printf_loc (MSG_NOTE, vect_location,
8243	"Vectorizing an unaligned access.\n");
8244
8245	STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8246
8247	/ As function vect_transform_stmt shows, for interleaving stores*
8248	the whole chain is vectorized when the last store in the chain
8249	is reached, the other stores in the group are skipped. So we
8250	want to only cost the last one here, but it's not trivial to
8251	get the last, as it's equivalent to use the first one for
8252	costing, use the first one instead. /*
8253	if (grouped_store
8254	&& !slp
8255	&& first_stmt_info != stmt_info)
8256	return true;
8257	}
8258	gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8259
8260	/ Transform. /
8261
8262	ensure_base_align (dr_info);
8263
8264	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= `3`)
8265	{
8266	gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8267	gcc_assert (!slp);
8268	if (costing_p)
8269	{
8270	unsigned int inside_cost = `0`, prologue_cost = `0`;
8271	if (vls_type == VLS_STORE_INVARIANT)
8272	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
8273	stmt_info, misalign: `0`, where: vect_prologue);
8274	vect_get_store_cost (vinfo, stmt_info, ncopies,
8275	alignment_support_scheme, misalignment,
8276	inside_cost: &inside_cost, body_cost_vec: cost_vec);
8277
8278	if (dump_enabled_p ())
8279	dump_printf_loc (MSG_NOTE, vect_location,
8280	"vect_model_store_cost: inside_cost = %d, "
8281	"prologue_cost = %d .\n",
8282	inside_cost, prologue_cost);
8283
8284	return true;
8285	}
8286	return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8287	}
8288
8289	if (grouped_store)
8290	{
8291	/ FORNOW /
8292	gcc_assert (!loop \|\| !nested_in_vect_loop_p (loop, stmt_info));
8293
8294	if (slp)
8295	{
8296	grouped_store = false;
8297	/ VEC_NUM is the number of vect stmts to be created for this*
8298	group. /*
8299	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8300	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[`0`];
8301	gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8302	== first_stmt_info);
8303	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8304	op = vect_get_store_rhs (stmt_info: first_stmt_info);
8305	}
8306	else
8307	/ VEC_NUM is the number of vect stmts to be created for this*
8308	group. /*
8309	vec_num = group_size;
8310
8311	ref_type = get_group_alias_ptr_type (first_stmt_info);
8312	}
8313	else
8314	ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8315
8316	if (!costing_p && dump_enabled_p ())
8317	dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8318	ncopies);
8319
8320	/ Check if we need to update prologue cost for invariant,*
8321	and update it accordingly if so. If it's not for
8322	interleaving store, we can just check vls_type; but if
8323	it's for interleaving store, need to check the def_type
8324	of the stored value since the current vls_type is just
8325	for first_stmt_info. /*
8326	auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8327	{
8328	gcc_assert (costing_p);
8329	if (slp)
8330	return;
8331	if (grouped_store)
8332	{
8333	gcc_assert (store_rhs);
8334	enum vect_def_type cdt;
8335	gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8336	if (cdt != vect_constant_def && cdt != vect_external_def)
8337	return;
8338	}
8339	else if (vls_type != VLS_STORE_INVARIANT)
8340	return;
8341	*prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec, stmt_info,
8342	misalign: `0`, where: vect_prologue);
8343	};
8344
8345	if (memory_access_type == VMAT_ELEMENTWISE
8346	\|\| memory_access_type == VMAT_STRIDED_SLP)
8347	{
8348	unsigned inside_cost = `0`, prologue_cost = `0`;
8349	gimple_stmt_iterator incr_gsi;
8350	bool insert_after;
8351	gimple *incr;
8352	tree offvar;
8353	tree ivstep;
8354	tree running_off;
8355	tree stride_base, stride_step, alias_off;
8356	tree vec_oprnd = NULL_TREE;
8357	tree dr_offset;
8358	unsigned int g;
8359	/ Checked by get_load_store_type. /
8360	unsigned int const_nunits = nunits.to_constant ();
8361
8362	gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8363	gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8364
8365	dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
8366	stride_base
8367	= fold_build_pointer_plus
8368	(DR_BASE_ADDRESS (first_dr_info->dr),
8369	size_binop (PLUS_EXPR,
8370	convert_to_ptrofftype (dr_offset),
8371	convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8372	stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8373
8374	/ For a store with loop-invariant (but other than power-of-2)*
8375	stride (i.e. not a grouped access) like so:
8376
8377	for (i = 0; i < n; i += stride)
8378	array[i] = ...;
8379
8380	we generate a new induction variable and new stores from
8381	the components of the (vectorized) rhs:
8382
8383	for (j = 0; ; j += VFstride)*
8384	vectemp = ...;
8385	tmp1 = vectemp[0];
8386	array[j] = tmp1;
8387	tmp2 = vectemp[1];
8388	array[j + stride] = tmp2;
8389	...
8390	*/
8391
8392	unsigned nstores = const_nunits;
8393	unsigned lnel = `1`;
8394	tree ltype = elem_type;
8395	tree lvectype = vectype;
8396	if (slp)
8397	{
8398	if (group_size < const_nunits
8399	&& const_nunits % group_size == `0`)
8400	{
8401	nstores = const_nunits / group_size;
8402	lnel = group_size;
8403	ltype = build_vector_type (elem_type, group_size);
8404	lvectype = vectype;
8405
8406	/ First check if vec_extract optab doesn't support extraction*
8407	of vector elts directly. /*
8408	scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8409	machine_mode vmode;
8410	if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8411	\|\| !related_vector_mode (TYPE_MODE (vectype), elmode,
8412	group_size).exists (mode: &vmode)
8413	\|\| (convert_optab_handler (op: vec_extract_optab,
8414	TYPE_MODE (vectype), from_mode: vmode)
8415	== CODE_FOR_nothing))
8416	{
8417	/ Try to avoid emitting an extract of vector elements*
8418	by performing the extracts using an integer type of the
8419	same size, extracting from a vector of those and then
8420	re-interpreting it as the original vector type if
8421	supported. /*
8422	unsigned lsize
8423	= group_size * GET_MODE_BITSIZE (mode: elmode);
8424	unsigned int lnunits = const_nunits / group_size;
8425	/ If we can't construct such a vector fall back to*
8426	element extracts from the original vector type and
8427	element size stores. /*
8428	if (int_mode_for_size (size: lsize, limit: `0`).exists (mode: &elmode)
8429	&& VECTOR_MODE_P (TYPE_MODE (vectype))
8430	&& related_vector_mode (TYPE_MODE (vectype), elmode,
8431	lnunits).exists (mode: &vmode)
8432	&& (convert_optab_handler (op: vec_extract_optab,
8433	to_mode: vmode, from_mode: elmode)
8434	!= CODE_FOR_nothing))
8435	{
8436	nstores = lnunits;
8437	lnel = group_size;
8438	ltype = build_nonstandard_integer_type (lsize, `1`);
8439	lvectype = build_vector_type (ltype, nstores);
8440	}
8441	/ Else fall back to vector extraction anyway.*
8442	Fewer stores are more important than avoiding spilling
8443	of the vector we extract from. Compared to the
8444	construction case in vectorizable_load no store-forwarding
8445	issue exists here for reasonable archs. /*
8446	}
8447	}
8448	else if (group_size >= const_nunits
8449	&& group_size % const_nunits == `0`)
8450	{
8451	int mis_align = dr_misalignment (dr_info: first_dr_info, vectype);
8452	dr_alignment_support dr_align
8453	= vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8454	mis_align);
8455	if (dr_align == dr_aligned
8456	\|\| dr_align == dr_unaligned_supported)
8457	{
8458	nstores = `1`;
8459	lnel = const_nunits;
8460	ltype = vectype;
8461	lvectype = vectype;
8462	alignment_support_scheme = dr_align;
8463	misalignment = mis_align;
8464	}
8465	}
8466	ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8467	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8468	}
8469
8470	if (!costing_p)
8471	{
8472	ivstep = stride_step;
8473	ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8474	build_int_cst (TREE_TYPE (ivstep), vf));
8475
8476	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8477
8478	stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8479	ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8480	create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8481	insert_after, &offvar, NULL);
8482	incr = gsi_stmt (i: incr_gsi);
8483
8484	stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8485	}
8486
8487	alias_off = build_int_cst (ref_type, `0`);
8488	stmt_vec_info next_stmt_info = first_stmt_info;
8489	auto_vec<tree> vec_oprnds (ncopies);
8490	/ For costing some adjacent vector stores, we'd like to cost with*
8491	the total number of them once instead of cost each one by one. /*
8492	unsigned int n_adjacent_stores = `0`;
8493	for (g = `0`; g < group_size; g++)
8494	{
8495	running_off = offvar;
8496	if (!costing_p)
8497	{
8498	if (g)
8499	{
8500	tree size = TYPE_SIZE_UNIT (ltype);
8501	tree pos
8502	= fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8503	tree newoff = copy_ssa_name (var: running_off, NULL);
8504	incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8505	running_off, pos);
8506	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8507	running_off = newoff;
8508	}
8509	}
8510	if (!slp)
8511	op = vect_get_store_rhs (stmt_info: next_stmt_info);
8512	if (!costing_p)
8513	vect_get_vec_defs (vinfo, stmt_info: next_stmt_info, slp_node, ncopies, op0: op,
8514	vec_oprnds0: &vec_oprnds);
8515	else
8516	update_prologue_cost (&prologue_cost, op);
8517	unsigned int group_el = `0`;
8518	unsigned HOST_WIDE_INT
8519	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8520	for (j = `0`; j < ncopies; j++)
8521	{
8522	if (!costing_p)
8523	{
8524	vec_oprnd = vec_oprnds [j];
8525	/ Pun the vector to extract from if necessary. /
8526	if (lvectype != vectype)
8527	{
8528	tree tem = make_ssa_name (var: lvectype);
8529	tree cvt
8530	= build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8531	gimple *pun = gimple_build_assign (tem, cvt);
8532	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: pun, gsi);
8533	vec_oprnd = tem;
8534	}
8535	}
8536	for (i = `0`; i < nstores; i++)
8537	{
8538	if (costing_p)
8539	{
8540	/ Only need vector extracting when there are more*
8541	than one stores. /*
8542	if (nstores > `1`)
8543	inside_cost
8544	+= record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_to_scalar,
8545	stmt_info, misalign: `0`, where: vect_body);
8546	/ Take a single lane vector type store as scalar*
8547	store to avoid ICE like 110776. /*
8548	if (VECTOR_TYPE_P (ltype)
8549	&& known_ne (TYPE_VECTOR_SUBPARTS (ltype), `1U`))
8550	n_adjacent_stores++;
8551	else
8552	inside_cost
8553	+= record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_store,
8554	stmt_info, misalign: `0`, where: vect_body);
8555	continue;
8556	}
8557	tree newref, newoff;
8558	gimple incr, assign;
8559	tree size = TYPE_SIZE (ltype);
8560	/ Extract the i'th component. /
8561	tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8562	bitsize_int (i), size);
8563	tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8564	size, pos);
8565
8566	elem = force_gimple_operand_gsi (gsi, elem, true,
8567	NULL_TREE, true,
8568	GSI_SAME_STMT);
8569
8570	tree this_off = build_int_cst (TREE_TYPE (alias_off),
8571	group_el * elsz);
8572	newref = build2 (MEM_REF, ltype,
8573	running_off, this_off);
8574	vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8575
8576	/ And store it to running_off. /*
8577	assign = gimple_build_assign (newref, elem);
8578	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: assign, gsi);
8579
8580	group_el += lnel;
8581	if (! slp
8582	\|\| group_el == group_size)
8583	{
8584	newoff = copy_ssa_name (var: running_off, NULL);
8585	incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8586	running_off, stride_step);
8587	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8588
8589	running_off = newoff;
8590	group_el = `0`;
8591	}
8592	if (g == group_size - `1`
8593	&& !slp)
8594	{
8595	if (j == `0` && i == `0`)
8596	*vec_stmt = assign;
8597	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: assign);
8598	}
8599	}
8600	}
8601	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8602	vec_oprnds.truncate(size: `0`);
8603	if (slp)
8604	break;
8605	}
8606
8607	if (costing_p)
8608	{
8609	if (n_adjacent_stores > `0`)
8610	vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8611	alignment_support_scheme, misalignment,
8612	inside_cost: &inside_cost, body_cost_vec: cost_vec);
8613	if (dump_enabled_p ())
8614	dump_printf_loc (MSG_NOTE, vect_location,
8615	"vect_model_store_cost: inside_cost = %d, "
8616	"prologue_cost = %d .\n",
8617	inside_cost, prologue_cost);
8618	}
8619
8620	return true;
8621	}
8622
8623	gcc_assert (alignment_support_scheme);
8624	vec_loop_masks *loop_masks
8625	= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8626	? &LOOP_VINFO_MASKS (loop_vinfo)
8627	: NULL);
8628	vec_loop_lens *loop_lens
8629	= (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8630	? &LOOP_VINFO_LENS (loop_vinfo)
8631	: NULL);
8632
8633	/ Shouldn't go with length-based approach if fully masked. /
8634	gcc_assert (!loop_lens \|\| !loop_masks);
8635
8636	/ Targets with store-lane instructions must not require explicit*
8637	realignment. vect_supportable_dr_alignment always returns either
8638	dr_aligned or dr_unaligned_supported for masked operations. /*
8639	gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8640	&& !mask
8641	&& !loop_masks)
8642	\|\| alignment_support_scheme == dr_aligned
8643	\|\| alignment_support_scheme == dr_unaligned_supported);
8644
8645	tree offset = NULL_TREE;
8646	if (!known_eq (poffset, `0`))
8647	offset = size_int (poffset);
8648
8649	tree bump;
8650	tree vec_offset = NULL_TREE;
8651	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8652	{
8653	aggr_type = NULL_TREE;
8654	bump = NULL_TREE;
8655	}
8656	else if (memory_access_type == VMAT_GATHER_SCATTER)
8657	{
8658	aggr_type = elem_type;
8659	if (!costing_p)
8660	vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
8661	dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
8662	}
8663	else
8664	{
8665	if (memory_access_type == VMAT_LOAD_STORE_LANES)
8666	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8667	else
8668	aggr_type = vectype;
8669	bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8670	memory_access_type, loop_lens);
8671	}
8672
8673	if (mask && !costing_p)
8674	LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8675
8676	/ In case the vectorization factor (VF) is bigger than the number*
8677	of elements that we can fit in a vectype (nunits), we have to generate
8678	more than one vector stmt - i.e - we need to "unroll" the
8679	vector stmt by a factor VF/nunits. /*
8680
8681	/ In case of interleaving (non-unit grouped access):*
8682
8683	S1: &base + 2 = x2
8684	S2: &base = x0
8685	S3: &base + 1 = x1
8686	S4: &base + 3 = x3
8687
8688	We create vectorized stores starting from base address (the access of the
8689	first stmt in the chain (S2 in the above example), when the last store stmt
8690	of the chain (S4) is reached:
8691
8692	VS1: &base = vx2
8693	VS2: &base + vec_size1 = vx0*
8694	VS3: &base + vec_size2 = vx1*
8695	VS4: &base + vec_size3 = vx3*
8696
8697	Then permutation statements are generated:
8698
8699	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8700	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8701	...
8702
8703	And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8704	(the order of the data-refs in the output of vect_permute_store_chain
8705	corresponds to the order of scalar stmts in the interleaving chain - see
8706	the documentation of vect_permute_store_chain()).
8707
8708	In case of both multiple types and interleaving, above vector stores and
8709	permutation stmts are created for every copy. The result vector stmts are
8710	put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8711	STMT_VINFO_RELATED_STMT for the next copies.
8712	*/
8713
8714	auto_vec<tree> dr_chain (group_size);
8715	auto_vec<tree> vec_masks;
8716	tree vec_mask = NULL;
8717	auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8718	for (i = `0`; i < group_size; i++)
8719	gvec_oprnds.quick_push (obj: new auto_vec<tree> (ncopies));
8720
8721	if (memory_access_type == VMAT_LOAD_STORE_LANES)
8722	{
8723	gcc_assert (!slp && grouped_store);
8724	unsigned inside_cost = `0`, prologue_cost = `0`;
8725	/ For costing some adjacent vector stores, we'd like to cost with*
8726	the total number of them once instead of cost each one by one. /*
8727	unsigned int n_adjacent_stores = `0`;
8728	for (j = `0`; j < ncopies; j++)
8729	{
8730	gimple *new_stmt;
8731	if (j == `0`)
8732	{
8733	/ For interleaved stores we collect vectorized defs for all*
8734	the stores in the group in DR_CHAIN. DR_CHAIN is then used
8735	as an input to vect_permute_store_chain(). /*
8736	stmt_vec_info next_stmt_info = first_stmt_info;
8737	for (i = `0`; i < group_size; i++)
8738	{
8739	/ Since gaps are not supported for interleaved stores,*
8740	DR_GROUP_SIZE is the exact number of stmts in the
8741	chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. /*
8742	op = vect_get_store_rhs (stmt_info: next_stmt_info);
8743	if (costing_p)
8744	update_prologue_cost (&prologue_cost, op);
8745	else
8746	{
8747	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
8748	ncopies, op,
8749	vec_oprnds: gvec_oprnds [i]);
8750	vec_oprnd = (*gvec_oprnds [i])[`0`];
8751	dr_chain.quick_push (obj: vec_oprnd);
8752	}
8753	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8754	}
8755
8756	if (!costing_p)
8757	{
8758	if (mask)
8759	{
8760	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
8761	op: mask, vec_oprnds: &vec_masks,
8762	vectype: mask_vectype);
8763	vec_mask = vec_masks [`0`];
8764	}
8765
8766	/ We should have catched mismatched types earlier. /
8767	gcc_assert (
8768	useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8769	dataref_ptr
8770	= vect_create_data_ref_ptr (vinfo, first_stmt_info,
8771	aggr_type, NULL, offset, &dummy,
8772	gsi, &ptr_incr, false, bump);
8773	}
8774	}
8775	else if (!costing_p)
8776	{
8777	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8778	/ DR_CHAIN is then used as an input to*
8779	vect_permute_store_chain(). /*
8780	for (i = `0`; i < group_size; i++)
8781	{
8782	vec_oprnd = (*gvec_oprnds [i])[j];
8783	dr_chain [i] = vec_oprnd;
8784	}
8785	if (mask)
8786	vec_mask = vec_masks [j];
8787	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8788	stmt_info, bump);
8789	}
8790
8791	if (costing_p)
8792	{
8793	n_adjacent_stores += vec_num;
8794	continue;
8795	}
8796
8797	/ Get an array into which we can store the individual vectors. /
8798	tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
8799
8800	/ Invalidate the current contents of VEC_ARRAY. This should*
8801	become an RTL clobber too, which prevents the vector registers
8802	from being upward-exposed. /*
8803	vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8804
8805	/ Store the individual vectors into the array. /
8806	for (i = `0`; i < vec_num; i++)
8807	{
8808	vec_oprnd = dr_chain [i];
8809	write_vector_array (vinfo, stmt_info, gsi, vect: vec_oprnd, array: vec_array,
8810	n: i);
8811	}
8812
8813	tree final_mask = NULL;
8814	tree final_len = NULL;
8815	tree bias = NULL;
8816	if (loop_masks)
8817	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8818	ncopies, vectype, j);
8819	if (vec_mask)
8820	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
8821	vec_mask, gsi);
8822
8823	if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8824	{
8825	if (loop_lens)
8826	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8827	ncopies, vectype, j, `1`);
8828	else
8829	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8830	signed char biasval
8831	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8832	bias = build_int_cst (intQI_type_node, biasval);
8833	if (!final_mask)
8834	{
8835	mask_vectype = truth_type_for (vectype);
8836	final_mask = build_minus_one_cst (mask_vectype);
8837	}
8838	}
8839
8840	gcall *call;
8841	if (final_len && final_mask)
8842	{
8843	/ Emit:*
8844	MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8845	LEN, BIAS, VEC_ARRAY). /*
8846	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8847	tree alias_ptr = build_int_cst (ref_type, align);
8848	call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, `6`,
8849	dataref_ptr, alias_ptr,
8850	final_mask, final_len, bias,
8851	vec_array);
8852	}
8853	else if (final_mask)
8854	{
8855	/ Emit:*
8856	MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8857	VEC_ARRAY). /*
8858	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8859	tree alias_ptr = build_int_cst (ref_type, align);
8860	call = gimple_build_call_internal (IFN_MASK_STORE_LANES, `4`,
8861	dataref_ptr, alias_ptr,
8862	final_mask, vec_array);
8863	}
8864	else
8865	{
8866	/ Emit:*
8867	MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). /*
8868	data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
8869	call = gimple_build_call_internal (IFN_STORE_LANES, `1`, vec_array);
8870	gimple_call_set_lhs (gs: call, lhs: data_ref);
8871	}
8872	gimple_call_set_nothrow (s: call, nothrow_p: true);
8873	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
8874	new_stmt = call;
8875
8876	/ Record that VEC_ARRAY is now dead. /
8877	vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8878	if (j == `0`)
8879	*vec_stmt = new_stmt;
8880	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
8881	}
8882
8883	if (costing_p)
8884	{
8885	if (n_adjacent_stores > `0`)
8886	vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8887	alignment_support_scheme, misalignment,
8888	inside_cost: &inside_cost, body_cost_vec: cost_vec);
8889	if (dump_enabled_p ())
8890	dump_printf_loc (MSG_NOTE, vect_location,
8891	"vect_model_store_cost: inside_cost = %d, "
8892	"prologue_cost = %d .\n",
8893	inside_cost, prologue_cost);
8894	}
8895
8896	return true;
8897	}
8898
8899	if (memory_access_type == VMAT_GATHER_SCATTER)
8900	{
8901	gcc_assert (!grouped_store);
8902	auto_vec<tree> vec_offsets;
8903	unsigned int inside_cost = `0`, prologue_cost = `0`;
8904	for (j = `0`; j < ncopies; j++)
8905	{
8906	gimple *new_stmt;
8907	if (j == `0`)
8908	{
8909	if (costing_p && vls_type == VLS_STORE_INVARIANT)
8910	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
8911	stmt_info, misalign: `0`, where: vect_prologue);
8912	else if (!costing_p)
8913	{
8914	/ Since the store is not grouped, DR_GROUP_SIZE is 1, and*
8915	DR_CHAIN is of size 1. /*
8916	gcc_assert (group_size == `1`);
8917	if (slp_node)
8918	vect_get_slp_defs (op_node, gvec_oprnds [`0`]);
8919	else
8920	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: first_stmt_info,
8921	ncopies, op, vec_oprnds: gvec_oprnds [`0`]);
8922	if (mask)
8923	{
8924	if (slp_node)
8925	vect_get_slp_defs (mask_node, &vec_masks);
8926	else
8927	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
8928	ncopies,
8929	op: mask, vec_oprnds: &vec_masks,
8930	vectype: mask_vectype);
8931	}
8932
8933	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8934	vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8935	slp_node, gs_info: &gs_info,
8936	dataref_ptr: &dataref_ptr, vec_offset: &vec_offsets);
8937	else
8938	dataref_ptr
8939	= vect_create_data_ref_ptr (vinfo, first_stmt_info,
8940	aggr_type, NULL, offset,
8941	&dummy, gsi, &ptr_incr, false,
8942	bump);
8943	}
8944	}
8945	else if (!costing_p)
8946	{
8947	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8948	if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8949	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8950	gsi, stmt_info, bump);
8951	}
8952
8953	new_stmt = NULL;
8954	for (i = `0`; i < vec_num; ++i)
8955	{
8956	if (!costing_p)
8957	{
8958	vec_oprnd = (gvec_oprnds [`0`])[vec_num j + i];
8959	if (mask)
8960	vec_mask = vec_masks [vec_num * j + i];
8961	/ We should have catched mismatched types earlier. /
8962	gcc_assert (useless_type_conversion_p (vectype,
8963	TREE_TYPE (vec_oprnd)));
8964	}
8965	unsigned HOST_WIDE_INT align;
8966	tree final_mask = NULL_TREE;
8967	tree final_len = NULL_TREE;
8968	tree bias = NULL_TREE;
8969	if (!costing_p)
8970	{
8971	if (loop_masks)
8972	final_mask = vect_get_loop_mask (loop_vinfo, gsi,
8973	loop_masks, ncopies,
8974	vectype, j);
8975	if (vec_mask)
8976	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
8977	loop_mask: final_mask, vec_mask, gsi);
8978	}
8979
8980	if (gs_info.ifn != IFN_LAST)
8981	{
8982	if (costing_p)
8983	{
8984	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
8985	inside_cost
8986	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
8987	stmt_info, misalign: `0`, where: vect_body);
8988	continue;
8989	}
8990
8991	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8992	vec_offset = vec_offsets [vec_num * j + i];
8993	tree scale = size_int (gs_info.scale);
8994
8995	if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
8996	{
8997	if (loop_lens)
8998	final_len = vect_get_loop_len (loop_vinfo, gsi,
8999	loop_lens, ncopies,
9000	vectype, j, `1`);
9001	else
9002	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9003	signed char biasval
9004	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9005	bias = build_int_cst (intQI_type_node, biasval);
9006	if (!final_mask)
9007	{
9008	mask_vectype = truth_type_for (vectype);
9009	final_mask = build_minus_one_cst (mask_vectype);
9010	}
9011	}
9012
9013	gcall *call;
9014	if (final_len && final_mask)
9015	call = gimple_build_call_internal
9016	(IFN_MASK_LEN_SCATTER_STORE, `7`, dataref_ptr,
9017	vec_offset, scale, vec_oprnd, final_mask,
9018	final_len, bias);
9019	else if (final_mask)
9020	call = gimple_build_call_internal
9021	(IFN_MASK_SCATTER_STORE, `5`, dataref_ptr,
9022	vec_offset, scale, vec_oprnd, final_mask);
9023	else
9024	call = gimple_build_call_internal (IFN_SCATTER_STORE, `4`,
9025	dataref_ptr, vec_offset,
9026	scale, vec_oprnd);
9027	gimple_call_set_nothrow (s: call, nothrow_p: true);
9028	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9029	new_stmt = call;
9030	}
9031	else if (gs_info.decl)
9032	{
9033	/ The builtin decls path for scatter is legacy, x86 only. /
9034	gcc_assert (nunits.is_constant ()
9035	&& (!final_mask
9036	\|\| SCALAR_INT_MODE_P
9037	(TYPE_MODE (TREE_TYPE (final_mask)))));
9038	if (costing_p)
9039	{
9040	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9041	inside_cost
9042	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9043	stmt_info, misalign: `0`, where: vect_body);
9044	continue;
9045	}
9046	poly_uint64 offset_nunits
9047	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
9048	if (known_eq (nunits, offset_nunits))
9049	{
9050	new_stmt = vect_build_one_scatter_store_call
9051	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9052	ptr: dataref_ptr, offset: vec_offsets [vec_num * j + i],
9053	oprnd: vec_oprnd, mask: final_mask);
9054	vect_finish_stmt_generation (vinfo, stmt_info,
9055	vec_stmt: new_stmt, gsi);
9056	}
9057	else if (known_eq (nunits, offset_nunits * `2`))
9058	{
9059	/ We have a offset vector with half the number of*
9060	lanes but the builtins will store full vectype
9061	data from the lower lanes. /*
9062	new_stmt = vect_build_one_scatter_store_call
9063	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9064	ptr: dataref_ptr,
9065	offset: vec_offsets [`2` * vec_num * j + `2` * i],
9066	oprnd: vec_oprnd, mask: final_mask);
9067	vect_finish_stmt_generation (vinfo, stmt_info,
9068	vec_stmt: new_stmt, gsi);
9069	int count = nunits.to_constant ();
9070	vec_perm_builder sel (count, count, `1`);
9071	sel.quick_grow (len: count);
9072	for (int i = `0`; i < count; ++i)
9073	sel [i] = i \| (count / `2`);
9074	vec_perm_indices indices (sel, `2`, count);
9075	tree perm_mask
9076	= vect_gen_perm_mask_checked (vectype, indices);
9077	new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9078	vec_oprnd, vec_oprnd,
9079	perm_mask);
9080	vec_oprnd = make_ssa_name (var: vectype);
9081	gimple_set_lhs (new_stmt, vec_oprnd);
9082	vect_finish_stmt_generation (vinfo, stmt_info,
9083	vec_stmt: new_stmt, gsi);
9084	if (final_mask)
9085	{
9086	new_stmt = gimple_build_assign (NULL_TREE,
9087	VEC_UNPACK_HI_EXPR,
9088	final_mask);
9089	final_mask = make_ssa_name
9090	(var: truth_type_for (gs_info.offset_vectype));
9091	gimple_set_lhs (new_stmt, final_mask);
9092	vect_finish_stmt_generation (vinfo, stmt_info,
9093	vec_stmt: new_stmt, gsi);
9094	}
9095	new_stmt = vect_build_one_scatter_store_call
9096	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9097	ptr: dataref_ptr,
9098	offset: vec_offsets [`2` * vec_num * j + `2` * i + `1`],
9099	oprnd: vec_oprnd, mask: final_mask);
9100	vect_finish_stmt_generation (vinfo, stmt_info,
9101	vec_stmt: new_stmt, gsi);
9102	}
9103	else if (known_eq (nunits * `2`, offset_nunits))
9104	{
9105	/ We have a offset vector with double the number of*
9106	lanes. Select the low/high part accordingly. /*
9107	vec_offset = vec_offsets [(vec_num * j + i) / `2`];
9108	if ((vec_num * j + i) & `1`)
9109	{
9110	int count = offset_nunits.to_constant ();
9111	vec_perm_builder sel (count, count, `1`);
9112	sel.quick_grow (len: count);
9113	for (int i = `0`; i < count; ++i)
9114	sel [i] = i \| (count / `2`);
9115	vec_perm_indices indices (sel, `2`, count);
9116	tree perm_mask = vect_gen_perm_mask_checked
9117	(TREE_TYPE (vec_offset), indices);
9118	new_stmt = gimple_build_assign (NULL_TREE,
9119	VEC_PERM_EXPR,
9120	vec_offset,
9121	vec_offset,
9122	perm_mask);
9123	vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9124	gimple_set_lhs (new_stmt, vec_offset);
9125	vect_finish_stmt_generation (vinfo, stmt_info,
9126	vec_stmt: new_stmt, gsi);
9127	}
9128	new_stmt = vect_build_one_scatter_store_call
9129	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9130	ptr: dataref_ptr, offset: vec_offset,
9131	oprnd: vec_oprnd, mask: final_mask);
9132	vect_finish_stmt_generation (vinfo, stmt_info,
9133	vec_stmt: new_stmt, gsi);
9134	}
9135	else
9136	gcc_unreachable ();
9137	}
9138	else
9139	{
9140	/ Emulated scatter. /
9141	gcc_assert (!final_mask);
9142	if (costing_p)
9143	{
9144	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9145	/ For emulated scatter N offset vector element extracts*
9146	(we assume the scalar scaling and ptr + offset add is
9147	consumed by the load). /*
9148	inside_cost
9149	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9150	stmt_info, misalign: `0`, where: vect_body);
9151	/ N scalar stores plus extracting the elements. /
9152	inside_cost
9153	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9154	stmt_info, misalign: `0`, where: vect_body);
9155	inside_cost
9156	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9157	stmt_info, misalign: `0`, where: vect_body);
9158	continue;
9159	}
9160
9161	unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9162	unsigned HOST_WIDE_INT const_offset_nunits
9163	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype).to_constant ();
9164	vec<constructor_elt, va_gc> *ctor_elts;
9165	vec_alloc (v&: ctor_elts, nelems: const_nunits);
9166	gimple_seq stmts = NULL;
9167	tree elt_type = TREE_TYPE (vectype);
9168	unsigned HOST_WIDE_INT elt_size
9169	= tree_to_uhwi (TYPE_SIZE (elt_type));
9170	/ We support offset vectors with more elements*
9171	than the data vector for now. /*
9172	unsigned HOST_WIDE_INT factor
9173	= const_offset_nunits / const_nunits;
9174	vec_offset = vec_offsets [(vec_num * j + i) / factor];
9175	unsigned elt_offset = (j % factor) * const_nunits;
9176	tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9177	tree scale = size_int (gs_info.scale);
9178	align = get_object_alignment (DR_REF (first_dr_info->dr));
9179	tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9180	for (unsigned k = `0`; k < const_nunits; ++k)
9181	{
9182	/ Compute the offsetted pointer. /
9183	tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9184	bitsize_int (k + elt_offset));
9185	tree idx
9186	= gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
9187	ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
9188	idx = gimple_convert (seq: &stmts, sizetype, op: idx);
9189	idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype,
9190	ops: idx, ops: scale);
9191	tree ptr
9192	= gimple_build (seq: &stmts, code: PLUS_EXPR,
9193	TREE_TYPE (dataref_ptr),
9194	ops: dataref_ptr, ops: idx);
9195	ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
9196	/ Extract the element to be stored. /
9197	tree elt
9198	= gimple_build (seq: &stmts, code: BIT_FIELD_REF,
9199	TREE_TYPE (vectype),
9200	ops: vec_oprnd, TYPE_SIZE (elt_type),
9201	bitsize_int (k * elt_size));
9202	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9203	stmts = NULL;
9204	tree ref
9205	= build2 (MEM_REF, ltype, ptr,
9206	build_int_cst (ref_type, `0`));
9207	new_stmt = gimple_build_assign (ref, elt);
9208	vect_finish_stmt_generation (vinfo, stmt_info,
9209	vec_stmt: new_stmt, gsi);
9210	}
9211	if (slp)
9212	slp_node->push_vec_def (def: new_stmt);
9213	}
9214	}
9215	if (!slp && !costing_p)
9216	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9217	}
9218
9219	if (!slp && !costing_p)
9220	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
9221
9222	if (costing_p && dump_enabled_p ())
9223	dump_printf_loc (MSG_NOTE, vect_location,
9224	"vect_model_store_cost: inside_cost = %d, "
9225	"prologue_cost = %d .\n",
9226	inside_cost, prologue_cost);
9227
9228	return true;
9229	}
9230
9231	gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9232	\|\| memory_access_type == VMAT_CONTIGUOUS_DOWN
9233	\|\| memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9234	\|\| memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9235
9236	unsigned inside_cost = `0`, prologue_cost = `0`;
9237	/ For costing some adjacent vector stores, we'd like to cost with*
9238	the total number of them once instead of cost each one by one. /*
9239	unsigned int n_adjacent_stores = `0`;
9240	auto_vec<tree> result_chain (group_size);
9241	auto_vec<tree, `1`> vec_oprnds;
9242	for (j = `0`; j < ncopies; j++)
9243	{
9244	gimple *new_stmt;
9245	if (j == `0`)
9246	{
9247	if (slp && !costing_p)
9248	{
9249	/ Get vectorized arguments for SLP_NODE. /
9250	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: `1`, op0: op,
9251	vec_oprnds0: &vec_oprnds, op1: mask, vec_oprnds1: &vec_masks);
9252	vec_oprnd = vec_oprnds [`0`];
9253	if (mask)
9254	vec_mask = vec_masks [`0`];
9255	}
9256	else
9257	{
9258	/ For interleaved stores we collect vectorized defs for all the*
9259	stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9260	input to vect_permute_store_chain().
9261
9262	If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9263	is of size 1. /*
9264	stmt_vec_info next_stmt_info = first_stmt_info;
9265	for (i = `0`; i < group_size; i++)
9266	{
9267	/ Since gaps are not supported for interleaved stores,*
9268	DR_GROUP_SIZE is the exact number of stmts in the chain.
9269	Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9270	that there is no interleaving, DR_GROUP_SIZE is 1,
9271	and only one iteration of the loop will be executed. /*
9272	op = vect_get_store_rhs (stmt_info: next_stmt_info);
9273	if (costing_p)
9274	update_prologue_cost (&prologue_cost, op);
9275	else
9276	{
9277	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
9278	ncopies, op,
9279	vec_oprnds: gvec_oprnds [i]);
9280	vec_oprnd = (*gvec_oprnds [i])[`0`];
9281	dr_chain.quick_push (obj: vec_oprnd);
9282	}
9283	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9284	}
9285	if (mask && !costing_p)
9286	{
9287	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
9288	op: mask, vec_oprnds: &vec_masks,
9289	vectype: mask_vectype);
9290	vec_mask = vec_masks [`0`];
9291	}
9292	}
9293
9294	/ We should have catched mismatched types earlier. /
9295	gcc_assert (costing_p
9296	\|\| useless_type_conversion_p (vectype,
9297	TREE_TYPE (vec_oprnd)));
9298	bool simd_lane_access_p
9299	= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != `0`;
9300	if (!costing_p
9301	&& simd_lane_access_p
9302	&& !loop_masks
9303	&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9304	&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), `0`))
9305	&& integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
9306	&& integer_zerop (DR_INIT (first_dr_info->dr))
9307	&& alias_sets_conflict_p (get_alias_set (aggr_type),
9308	get_alias_set (TREE_TYPE (ref_type))))
9309	{
9310	dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9311	dataref_offset = build_int_cst (ref_type, `0`);
9312	}
9313	else if (!costing_p)
9314	dataref_ptr
9315	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9316	simd_lane_access_p ? loop : NULL,
9317	offset, &dummy, gsi, &ptr_incr,
9318	simd_lane_access_p, bump);
9319	}
9320	else if (!costing_p)
9321	{
9322	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9323	/ DR_CHAIN is then used as an input to vect_permute_store_chain().*
9324	If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9325	of size 1. /*
9326	for (i = `0`; i < group_size; i++)
9327	{
9328	vec_oprnd = (*gvec_oprnds [i])[j];
9329	dr_chain [i] = vec_oprnd;
9330	}
9331	if (mask)
9332	vec_mask = vec_masks [j];
9333	if (dataref_offset)
9334	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9335	else
9336	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9337	stmt_info, bump);
9338	}
9339
9340	new_stmt = NULL;
9341	if (grouped_store)
9342	{
9343	/ Permute. /
9344	gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9345	if (costing_p)
9346	{
9347	int group_size = DR_GROUP_SIZE (first_stmt_info);
9348	int nstmts = ceil_log2 (x: group_size) * group_size;
9349	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
9350	stmt_info, misalign: `0`, where: vect_body);
9351	if (dump_enabled_p ())
9352	dump_printf_loc (MSG_NOTE, vect_location,
9353	"vect_model_store_cost: "
9354	"strided group_size = %d .\n",
9355	group_size);
9356	}
9357	else
9358	vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9359	gsi, &result_chain);
9360	}
9361
9362	stmt_vec_info next_stmt_info = first_stmt_info;
9363	for (i = `0`; i < vec_num; i++)
9364	{
9365	if (!costing_p)
9366	{
9367	if (slp)
9368	vec_oprnd = vec_oprnds [i];
9369	else if (grouped_store)
9370	/ For grouped stores vectorized defs are interleaved in*
9371	vect_permute_store_chain(). /*
9372	vec_oprnd = result_chain [i];
9373	}
9374
9375	if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9376	{
9377	if (costing_p)
9378	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_perm,
9379	stmt_info, misalign: `0`, where: vect_body);
9380	else
9381	{
9382	tree perm_mask = perm_mask_for_reverse (vectype);
9383	tree perm_dest = vect_create_destination_var (
9384	vect_get_store_rhs (stmt_info), vectype);
9385	tree new_temp = make_ssa_name (var: perm_dest);
9386
9387	/ Generate the permute statement. /
9388	gimple *perm_stmt
9389	= gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9390	vec_oprnd, perm_mask);
9391	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt,
9392	gsi);
9393
9394	perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9395	vec_oprnd = new_temp;
9396	}
9397	}
9398
9399	if (costing_p)
9400	{
9401	n_adjacent_stores++;
9402
9403	if (!slp)
9404	{
9405	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9406	if (!next_stmt_info)
9407	break;
9408	}
9409
9410	continue;
9411	}
9412
9413	tree final_mask = NULL_TREE;
9414	tree final_len = NULL_TREE;
9415	tree bias = NULL_TREE;
9416	if (loop_masks)
9417	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9418	vec_num * ncopies, vectype,
9419	vec_num * j + i);
9420	if (slp && vec_mask)
9421	vec_mask = vec_masks [i];
9422	if (vec_mask)
9423	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
9424	vec_mask, gsi);
9425
9426	if (i > `0`)
9427	/ Bump the vector pointer. /
9428	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9429	stmt_info, bump);
9430
9431	unsigned misalign;
9432	unsigned HOST_WIDE_INT align;
9433	align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9434	if (alignment_support_scheme == dr_aligned)
9435	misalign = `0`;
9436	else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9437	{
9438	align = dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
9439	misalign = `0`;
9440	}
9441	else
9442	misalign = misalignment;
9443	if (dataref_offset == NULL_TREE
9444	&& TREE_CODE (dataref_ptr) == SSA_NAME)
9445	set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9446	misalign);
9447	align = least_bit_hwi (x: misalign \| align);
9448
9449	/ Compute IFN when LOOP_LENS or final_mask valid. /
9450	machine_mode vmode = TYPE_MODE (vectype);
9451	machine_mode new_vmode = vmode;
9452	internal_fn partial_ifn = IFN_LAST;
9453	if (loop_lens)
9454	{
9455	opt_machine_mode new_ovmode
9456	= get_len_load_store_mode (vmode, false, &partial_ifn);
9457	new_vmode = new_ovmode.require ();
9458	unsigned factor
9459	= (new_ovmode == vmode) ? `1` : GET_MODE_UNIT_SIZE (vmode);
9460	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9461	vec_num * ncopies, vectype,
9462	vec_num * j + i, factor);
9463	}
9464	else if (final_mask)
9465	{
9466	if (!can_vec_mask_load_store_p (
9467	vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9468	&partial_ifn))
9469	gcc_unreachable ();
9470	}
9471
9472	if (partial_ifn == IFN_MASK_LEN_STORE)
9473	{
9474	if (!final_len)
9475	{
9476	/ Pass VF value to 'len' argument of*
9477	MASK_LEN_STORE if LOOP_LENS is invalid. /*
9478	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9479	}
9480	if (!final_mask)
9481	{
9482	/ Pass all ones value to 'mask' argument of*
9483	MASK_LEN_STORE if final_mask is invalid. /*
9484	mask_vectype = truth_type_for (vectype);
9485	final_mask = build_minus_one_cst (mask_vectype);
9486	}
9487	}
9488	if (final_len)
9489	{
9490	signed char biasval
9491	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9492
9493	bias = build_int_cst (intQI_type_node, biasval);
9494	}
9495
9496	/ Arguments are ready. Create the new vector stmt. /
9497	if (final_len)
9498	{
9499	gcall *call;
9500	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9501	/ Need conversion if it's wrapped with VnQI. /
9502	if (vmode != new_vmode)
9503	{
9504	tree new_vtype
9505	= build_vector_type_for_mode (unsigned_intQI_type_node,
9506	new_vmode);
9507	tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9508	vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9509	gassign *new_stmt
9510	= gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9511	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9512	vec_oprnd = var;
9513	}
9514
9515	if (partial_ifn == IFN_MASK_LEN_STORE)
9516	call = gimple_build_call_internal (IFN_MASK_LEN_STORE, `6`,
9517	dataref_ptr, ptr, final_mask,
9518	final_len, bias, vec_oprnd);
9519	else
9520	call = gimple_build_call_internal (IFN_LEN_STORE, `5`,
9521	dataref_ptr, ptr, final_len,
9522	bias, vec_oprnd);
9523	gimple_call_set_nothrow (s: call, nothrow_p: true);
9524	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9525	new_stmt = call;
9526	}
9527	else if (final_mask)
9528	{
9529	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9530	gcall *call
9531	= gimple_build_call_internal (IFN_MASK_STORE, `4`, dataref_ptr,
9532	ptr, final_mask, vec_oprnd);
9533	gimple_call_set_nothrow (s: call, nothrow_p: true);
9534	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9535	new_stmt = call;
9536	}
9537	else
9538	{
9539	data_ref
9540	= fold_build2 (MEM_REF, vectype, dataref_ptr,
9541	dataref_offset ? dataref_offset
9542	: build_int_cst (ref_type, `0`));
9543	if (alignment_support_scheme == dr_aligned)
9544	;
9545	else
9546	TREE_TYPE (data_ref)
9547	= build_aligned_type (TREE_TYPE (data_ref),
9548	align * BITS_PER_UNIT);
9549	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9550	new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9551	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9552	}
9553
9554	if (slp)
9555	continue;
9556
9557	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9558	if (!next_stmt_info)
9559	break;
9560	}
9561	if (!slp && !costing_p)
9562	{
9563	if (j == `0`)
9564	*vec_stmt = new_stmt;
9565	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9566	}
9567	}
9568
9569	if (costing_p)
9570	{
9571	if (n_adjacent_stores > `0`)
9572	vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
9573	alignment_support_scheme, misalignment,
9574	inside_cost: &inside_cost, body_cost_vec: cost_vec);
9575
9576	/ When vectorizing a store into the function result assign*
9577	a penalty if the function returns in a multi-register location.
9578	In this case we assume we'll end up with having to spill the
9579	vector result and do piecewise loads as a conservative estimate. /*
9580	tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9581	if (base
9582	&& (TREE_CODE (base) == RESULT_DECL
9583	\|\| (DECL_P (base) && cfun_returns (decl: base)))
9584	&& !aggregate_value_p (base, cfun->decl))
9585	{
9586	rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, `0`, `1`);
9587	/ ??? Handle PARALLEL in some way. /
9588	if (REG_P (reg))
9589	{
9590	int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9591	/ Assume that a single reg-reg move is possible and cheap,*
9592	do not account for vector to gp register move cost. /*
9593	if (nregs > `1`)
9594	{
9595	/ Spill. /
9596	prologue_cost
9597	+= record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_store,
9598	stmt_info, misalign: `0`, where: vect_epilogue);
9599	/ Loads. /
9600	prologue_cost
9601	+= record_stmt_cost (body_cost_vec: cost_vec, count: ncopies * nregs, kind: scalar_load,
9602	stmt_info, misalign: `0`, where: vect_epilogue);
9603	}
9604	}
9605	}
9606	if (dump_enabled_p ())
9607	dump_printf_loc (MSG_NOTE, vect_location,
9608	"vect_model_store_cost: inside_cost = %d, "
9609	"prologue_cost = %d .\n",
9610	inside_cost, prologue_cost);
9611	}
9612
9613	return true;
9614	}
9615
9616	/ Given a vector type VECTYPE, turns permutation SEL into the equivalent*
9617	VECTOR_CST mask. No checks are made that the target platform supports the
9618	mask, so callers may wish to test can_vec_perm_const_p separately, or use
9619	vect_gen_perm_mask_checked. /*
9620
9621	tree
9622	vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9623	{
9624	tree mask_type;
9625
9626	poly_uint64 nunits = sel.length ();
9627	gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9628
9629	mask_type = build_vector_type (ssizetype, nunits);
9630	return vec_perm_indices_to_tree (mask_type, sel);
9631	}
9632
9633	/ Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,*
9634	i.e. that the target supports the pattern _for arbitrary input vectors_. /*
9635
9636	tree
9637	vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9638	{
9639	machine_mode vmode = TYPE_MODE (vectype);
9640	gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9641	return vect_gen_perm_mask_any (vectype, sel);
9642	}
9643
9644	/ Given a vector variable X and Y, that was generated for the scalar*
9645	STMT_INFO, generate instructions to permute the vector elements of X and Y
9646	using permutation mask MASK_VEC, insert them at GSI and return the*
9647	permuted vector variable. /*
9648
9649	static tree
9650	permute_vec_elements (vec_info *vinfo,
9651	tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9652	gimple_stmt_iterator *gsi)
9653	{
9654	tree vectype = TREE_TYPE (x);
9655	tree perm_dest, data_ref;
9656	gimple *perm_stmt;
9657
9658	tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9659	if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9660	perm_dest = vect_create_destination_var (scalar_dest, vectype);
9661	else
9662	perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9663	data_ref = make_ssa_name (var: perm_dest);
9664
9665	/ Generate the permute statement. /
9666	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9667	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt, gsi);
9668
9669	return data_ref;
9670	}
9671
9672	/ Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,*
9673	inserting them on the loops preheader edge. Returns true if we
9674	were successful in doing so (and thus STMT_INFO can be moved then),
9675	otherwise returns false. HOIST_P indicates if we want to hoist the
9676	definitions of all SSA uses, it would be false when we are costing. /*
9677
9678	static bool
9679	hoist_defs_of_uses (stmt_vec_info stmt_info, class loop loop, bool* hoist_p)
9680	{
9681	ssa_op_iter i;
9682	tree op;
9683	bool any = false;
9684
9685	FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9686	{
9687	gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9688	if (!gimple_nop_p (g: def_stmt)
9689	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9690	{
9691	/ Make sure we don't need to recurse. While we could do*
9692	so in simple cases when there are more complex use webs
9693	we don't have an easy way to preserve stmt order to fulfil
9694	dependencies within them. /*
9695	tree op2;
9696	ssa_op_iter i2;
9697	if (gimple_code (g: def_stmt) == GIMPLE_PHI)
9698	return false;
9699	FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9700	{
9701	gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9702	if (!gimple_nop_p (g: def_stmt2)
9703	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt2)))
9704	return false;
9705	}
9706	any = true;
9707	}
9708	}
9709
9710	if (!any)
9711	return true;
9712
9713	if (!hoist_p)
9714	return true;
9715
9716	FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9717	{
9718	gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9719	if (!gimple_nop_p (g: def_stmt)
9720	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9721	{
9722	gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9723	gsi_remove (&gsi, false);
9724	gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9725	}
9726	}
9727
9728	return true;
9729	}
9730
9731	/ vectorizable_load.*
9732
9733	Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9734	that can be vectorized.
9735	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9736	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9737	Return true if STMT_INFO is vectorizable in this way. /*
9738
9739	static bool
9740	vectorizable_load (vec_info *vinfo,
9741	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9742	gimple **vec_stmt, slp_tree slp_node,
9743	stmt_vector_for_cost *cost_vec)
9744	{
9745	tree scalar_dest;
9746	tree vec_dest = NULL;
9747	tree data_ref = NULL;
9748	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
9749	class loop *loop = NULL;
9750	class loop *containing_loop = gimple_bb (g: stmt_info->stmt)->loop_father;
9751	bool nested_in_vect_loop = false;
9752	tree elem_type;
9753	/ Avoid false positive uninitialized warning, see PR110652. /
9754	tree new_temp = NULL_TREE;
9755	machine_mode mode;
9756	tree dummy;
9757	tree dataref_ptr = NULL_TREE;
9758	tree dataref_offset = NULL_TREE;
9759	gimple *ptr_incr = NULL;
9760	int ncopies;
9761	int i, j;
9762	unsigned int group_size;
9763	poly_uint64 group_gap_adj;
9764	tree msq = NULL_TREE, lsq;
9765	tree realignment_token = NULL_TREE;
9766	gphi *phi = NULL;
9767	vec<tree> dr_chain = vNULL;
9768	bool grouped_load = false;
9769	stmt_vec_info first_stmt_info;
9770	stmt_vec_info first_stmt_info_for_drptr = NULL;
9771	bool compute_in_loop = false;
9772	class loop *at_loop;
9773	int vec_num;
9774	bool slp = (slp_node != NULL);
9775	bool slp_perm = false;
9776	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
9777	poly_uint64 vf;
9778	tree aggr_type;
9779	gather_scatter_info gs_info;
9780	tree ref_type;
9781	enum vect_def_type mask_dt = vect_unknown_def_type;
9782
9783	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9784	return false;
9785
9786	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9787	&& ! vec_stmt)
9788	return false;
9789
9790	if (!STMT_VINFO_DATA_REF (stmt_info))
9791	return false;
9792
9793	tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9794	int mask_index = -`1`;
9795	slp_tree slp_op = NULL;
9796	if (gassign assign = dyn_cast <gassign > (p: stmt_info->stmt))
9797	{
9798	scalar_dest = gimple_assign_lhs (gs: assign);
9799	if (TREE_CODE (scalar_dest) != SSA_NAME)
9800	return false;
9801
9802	tree_code code = gimple_assign_rhs_code (gs: assign);
9803	if (code != ARRAY_REF
9804	&& code != BIT_FIELD_REF
9805	&& code != INDIRECT_REF
9806	&& code != COMPONENT_REF
9807	&& code != IMAGPART_EXPR
9808	&& code != REALPART_EXPR
9809	&& code != MEM_REF
9810	&& TREE_CODE_CLASS (code) != tcc_declaration)
9811	return false;
9812	}
9813	else
9814	{
9815	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
9816	if (!call \|\| !gimple_call_internal_p (gs: call))
9817	return false;
9818
9819	internal_fn ifn = gimple_call_internal_fn (gs: call);
9820	if (!internal_load_fn_p (ifn))
9821	return false;
9822
9823	scalar_dest = gimple_call_lhs (gs: call);
9824	if (!scalar_dest)
9825	return false;
9826
9827	mask_index = internal_fn_mask_index (ifn);
9828	if (mask_index >= `0` && slp_node)
9829	mask_index = vect_slp_child_index_for_operand
9830	(call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9831	if (mask_index >= `0`
9832	&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9833	mask: &mask, mask_node: &slp_op, mask_dt_out: &mask_dt, mask_vectype_out: &mask_vectype))
9834	return false;
9835	}
9836
9837	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9838	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9839
9840	if (loop_vinfo)
9841	{
9842	loop = LOOP_VINFO_LOOP (loop_vinfo);
9843	nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9844	vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9845	}
9846	else
9847	vf = `1`;
9848
9849	/ Multiple types in SLP are handled by creating the appropriate number of*
9850	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9851	case of SLP. /*
9852	if (slp)
9853	ncopies = `1`;
9854	else
9855	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9856
9857	gcc_assert (ncopies >= `1`);
9858
9859	/ FORNOW. This restriction should be relaxed. /
9860	if (nested_in_vect_loop && ncopies > `1`)
9861	{
9862	if (dump_enabled_p ())
9863	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9864	"multiple types in nested loop.\n");
9865	return false;
9866	}
9867
9868	/ Invalidate assumptions made by dependence analysis when vectorization*
9869	on the unrolled body effectively re-orders stmts. /*
9870	if (ncopies > `1`
9871	&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != `0`
9872	&& maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9873	STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9874	{
9875	if (dump_enabled_p ())
9876	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9877	"cannot perform implicit CSE when unrolling "
9878	"with negative dependence distance\n");
9879	return false;
9880	}
9881
9882	elem_type = TREE_TYPE (vectype);
9883	mode = TYPE_MODE (vectype);
9884
9885	/ FORNOW. In some cases can vectorize even if data-type not supported*
9886	(e.g. - data copies). /*
9887	if (optab_handler (op: mov_optab, mode) == CODE_FOR_nothing)
9888	{
9889	if (dump_enabled_p ())
9890	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9891	"Aligned load, but unsupported type.\n");
9892	return false;
9893	}
9894
9895	/ Check if the load is a part of an interleaving chain. /
9896	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9897	{
9898	grouped_load = true;
9899	/ FORNOW /
9900	gcc_assert (!nested_in_vect_loop);
9901	gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9902
9903	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9904	group_size = DR_GROUP_SIZE (first_stmt_info);
9905
9906	/ Refuse non-SLP vectorization of SLP-only groups. /
9907	if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9908	{
9909	if (dump_enabled_p ())
9910	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9911	"cannot vectorize load in non-SLP mode.\n");
9912	return false;
9913	}
9914
9915	/ Invalidate assumptions made by dependence analysis when vectorization*
9916	on the unrolled body effectively re-orders stmts. /*
9917	if (!PURE_SLP_STMT (stmt_info)
9918	&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != `0`
9919	&& maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9920	STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9921	{
9922	if (dump_enabled_p ())
9923	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9924	"cannot perform implicit CSE when performing "
9925	"group loads with negative dependence distance\n");
9926	return false;
9927	}
9928	}
9929	else
9930	group_size = `1`;
9931
9932	if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9933	{
9934	slp_perm = true;
9935
9936	if (!loop_vinfo)
9937	{
9938	/ In BB vectorization we may not actually use a loaded vector*
9939	accessing elements in excess of DR_GROUP_SIZE. /*
9940	stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[`0`];
9941	group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9942	unsigned HOST_WIDE_INT nunits;
9943	unsigned j, k, maxk = `0`;
9944	FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9945	if (k > maxk)
9946	maxk = k;
9947	tree vectype = SLP_TREE_VECTYPE (slp_node);
9948	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits)
9949	\|\| maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - `1`)))
9950	{
9951	if (dump_enabled_p ())
9952	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9953	"BB vectorization with gaps at the end of "
9954	"a load is not supported\n");
9955	return false;
9956	}
9957	}
9958
9959	auto_vec<tree> tem;
9960	unsigned n_perms;
9961	if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9962	true, &n_perms))
9963	{
9964	if (dump_enabled_p ())
9965	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9966	vect_location,
9967	"unsupported load permutation\n");
9968	return false;
9969	}
9970	}
9971
9972	vect_memory_access_type memory_access_type;
9973	enum dr_alignment_support alignment_support_scheme;
9974	int misalignment;
9975	poly_int64 poffset;
9976	internal_fn lanes_ifn;
9977	if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type: VLS_LOAD,
9978	ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
9979	alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
9980	lanes_ifn: &lanes_ifn))
9981	return false;
9982
9983	if (mask)
9984	{
9985	if (memory_access_type == VMAT_CONTIGUOUS)
9986	{
9987	machine_mode vec_mode = TYPE_MODE (vectype);
9988	if (!VECTOR_MODE_P (vec_mode)
9989	\|\| !can_vec_mask_load_store_p (vec_mode,
9990	TYPE_MODE (mask_vectype), true))
9991	return false;
9992	}
9993	else if (memory_access_type != VMAT_LOAD_STORE_LANES
9994	&& memory_access_type != VMAT_GATHER_SCATTER)
9995	{
9996	if (dump_enabled_p ())
9997	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9998	"unsupported access type for masked load.\n");
9999	return false;
10000	}
10001	else if (memory_access_type == VMAT_GATHER_SCATTER
10002	&& gs_info.ifn == IFN_LAST
10003	&& !gs_info.decl)
10004	{
10005	if (dump_enabled_p ())
10006	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10007	"unsupported masked emulated gather.\n");
10008	return false;
10009	}
10010	}
10011
10012	bool costing_p = !vec_stmt;
10013
10014	if (costing_p) / transformation not required. /
10015	{
10016	if (slp_node
10017	&& mask
10018	&& !vect_maybe_update_slp_op_vectype (slp_op,
10019	mask_vectype))
10020	{
10021	if (dump_enabled_p ())
10022	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10023	"incompatible vector types for invariants\n");
10024	return false;
10025	}
10026
10027	if (!slp)
10028	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10029
10030	if (loop_vinfo
10031	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10032	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10033	vls_type: VLS_LOAD, group_size,
10034	memory_access_type, gs_info: &gs_info,
10035	scalar_mask: mask);
10036
10037	if (dump_enabled_p ()
10038	&& memory_access_type != VMAT_ELEMENTWISE
10039	&& memory_access_type != VMAT_GATHER_SCATTER
10040	&& alignment_support_scheme != dr_aligned)
10041	dump_printf_loc (MSG_NOTE, vect_location,
10042	"Vectorizing an unaligned access.\n");
10043
10044	if (memory_access_type == VMAT_LOAD_STORE_LANES)
10045	vinfo->any_known_not_updated_vssa = true;
10046
10047	STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10048	}
10049
10050	if (!slp)
10051	gcc_assert (memory_access_type
10052	== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10053
10054	if (dump_enabled_p () && !costing_p)
10055	dump_printf_loc (MSG_NOTE, vect_location,
10056	"transform load. ncopies = %d\n", ncopies);
10057
10058	/ Transform. /
10059
10060	dr_vec_info dr_info = STMT_VINFO_DR_INFO (stmt_info), first_dr_info = NULL;
10061	ensure_base_align (dr_info);
10062
10063	if (memory_access_type == VMAT_INVARIANT)
10064	{
10065	gcc_assert (!grouped_load && !mask && !bb_vinfo);
10066	/ If we have versioned for aliasing or the loop doesn't*
10067	have any data dependencies that would preclude this,
10068	then we are sure this is a loop invariant load and
10069	thus we can insert it on the preheader edge.
10070	TODO: hoist_defs_of_uses should ideally be computed
10071	once at analysis time, remembered and used in the
10072	transform time. /*
10073	bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10074	&& !nested_in_vect_loop
10075	&& hoist_defs_of_uses (stmt_info, loop, hoist_p: !costing_p));
10076	if (costing_p)
10077	{
10078	enum vect_cost_model_location cost_loc
10079	= hoist_p ? vect_prologue : vect_body;
10080	unsigned int cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_load,
10081	stmt_info, misalign: `0`, where: cost_loc);
10082	cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec, stmt_info, misalign: `0`,
10083	where: cost_loc);
10084	unsigned int prologue_cost = hoist_p ? cost : `0`;
10085	unsigned int inside_cost = hoist_p ? `0` : cost;
10086	if (dump_enabled_p ())
10087	dump_printf_loc (MSG_NOTE, vect_location,
10088	"vect_model_load_cost: inside_cost = %d, "
10089	"prologue_cost = %d .\n",
10090	inside_cost, prologue_cost);
10091	return true;
10092	}
10093	if (hoist_p)
10094	{
10095	gassign stmt = as_a <gassign > (p: stmt_info->stmt);
10096	if (dump_enabled_p ())
10097	dump_printf_loc (MSG_NOTE, vect_location,
10098	"hoisting out of the vectorized loop: %G",
10099	(gimple *) stmt);
10100	scalar_dest = copy_ssa_name (var: scalar_dest);
10101	tree rhs = unshare_expr (gimple_assign_rhs1 (gs: stmt));
10102	edge pe = loop_preheader_edge (loop);
10103	gphi *vphi = get_virtual_phi (loop->header);
10104	tree vuse;
10105	if (vphi)
10106	vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10107	else
10108	vuse = gimple_vuse (g: gsi_stmt (i: *gsi));
10109	gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10110	gimple_set_vuse (g: new_stmt, vuse);
10111	gsi_insert_on_edge_immediate (pe, new_stmt);
10112	}
10113	/ These copies are all equivalent. /
10114	if (hoist_p)
10115	new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10116	type: vectype, NULL);
10117	else
10118	{
10119	gimple_stmt_iterator gsi2 = *gsi;
10120	gsi_next (i: &gsi2);
10121	new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10122	type: vectype, gsi: &gsi2);
10123	}
10124	gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10125	if (slp)
10126	for (j = `0`; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10127	slp_node->push_vec_def (def: new_stmt);
10128	else
10129	{
10130	for (j = `0`; j < ncopies; ++j)
10131	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10132	*vec_stmt = new_stmt;
10133	}
10134	return true;
10135	}
10136
10137	if (memory_access_type == VMAT_ELEMENTWISE
10138	\|\| memory_access_type == VMAT_STRIDED_SLP)
10139	{
10140	gimple_stmt_iterator incr_gsi;
10141	bool insert_after;
10142	tree offvar;
10143	tree ivstep;
10144	tree running_off;
10145	vec<constructor_elt, va_gc> *v = NULL;
10146	tree stride_base, stride_step, alias_off;
10147	/ Checked by get_load_store_type. /
10148	unsigned int const_nunits = nunits.to_constant ();
10149	unsigned HOST_WIDE_INT cst_offset = `0`;
10150	tree dr_offset;
10151	unsigned int inside_cost = `0`;
10152
10153	gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10154	gcc_assert (!nested_in_vect_loop);
10155
10156	if (grouped_load)
10157	{
10158	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10159	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10160	}
10161	else
10162	{
10163	first_stmt_info = stmt_info;
10164	first_dr_info = dr_info;
10165	}
10166
10167	if (slp && grouped_load)
10168	{
10169	group_size = DR_GROUP_SIZE (first_stmt_info);
10170	ref_type = get_group_alias_ptr_type (first_stmt_info);
10171	}
10172	else
10173	{
10174	if (grouped_load)
10175	cst_offset
10176	= (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10177	* vect_get_place_in_interleaving_chain (stmt_info,
10178	first_stmt_info));
10179	group_size = `1`;
10180	ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10181	}
10182
10183	if (!costing_p)
10184	{
10185	dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
10186	stride_base = fold_build_pointer_plus (
10187	DR_BASE_ADDRESS (first_dr_info->dr),
10188	size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10189	convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10190	stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10191
10192	/ For a load with loop-invariant (but other than power-of-2)*
10193	stride (i.e. not a grouped access) like so:
10194
10195	for (i = 0; i < n; i += stride)
10196	... = array[i];
10197
10198	we generate a new induction variable and new accesses to
10199	form a new vector (or vectors, depending on ncopies):
10200
10201	for (j = 0; ; j += VFstride)*
10202	tmp1 = array[j];
10203	tmp2 = array[j + stride];
10204	...
10205	vectemp = {tmp1, tmp2, ...}
10206	*/
10207
10208	ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10209	build_int_cst (TREE_TYPE (stride_step), vf));
10210
10211	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10212
10213	stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10214	ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10215	create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10216	loop, &incr_gsi, insert_after,
10217	&offvar, NULL);
10218
10219	stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10220	}
10221
10222	running_off = offvar;
10223	alias_off = build_int_cst (ref_type, `0`);
10224	int nloads = const_nunits;
10225	int lnel = `1`;
10226	tree ltype = TREE_TYPE (vectype);
10227	tree lvectype = vectype;
10228	auto_vec<tree> dr_chain;
10229	if (memory_access_type == VMAT_STRIDED_SLP)
10230	{
10231	if (group_size < const_nunits)
10232	{
10233	/ First check if vec_init optab supports construction from vector*
10234	elts directly. Otherwise avoid emitting a constructor of
10235	vector elements by performing the loads using an integer type
10236	of the same size, constructing a vector of those and then
10237	re-interpreting it as the original vector type. This avoids a
10238	huge runtime penalty due to the general inability to perform
10239	store forwarding from smaller stores to a larger load. /*
10240	tree ptype;
10241	tree vtype
10242	= vector_vector_composition_type (vtype: vectype,
10243	nelts: const_nunits / group_size,
10244	ptype: &ptype);
10245	if (vtype != NULL_TREE)
10246	{
10247	nloads = const_nunits / group_size;
10248	lnel = group_size;
10249	lvectype = vtype;
10250	ltype = ptype;
10251	}
10252	}
10253	else
10254	{
10255	nloads = `1`;
10256	lnel = const_nunits;
10257	ltype = vectype;
10258	}
10259	ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10260	}
10261	/ Load vector(1) scalar_type if it's 1 element-wise vectype. /
10262	else if (nloads == `1`)
10263	ltype = vectype;
10264
10265	if (slp)
10266	{
10267	/ For SLP permutation support we need to load the whole group,*
10268	not only the number of vector stmts the permutation result
10269	fits in. /*
10270	if (slp_perm)
10271	{
10272	/ We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for*
10273	variable VF. /*
10274	unsigned int const_vf = vf.to_constant ();
10275	ncopies = CEIL (group_size * const_vf, const_nunits);
10276	dr_chain.create (nelems: ncopies);
10277	}
10278	else
10279	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10280	}
10281	unsigned int group_el = `0`;
10282	unsigned HOST_WIDE_INT
10283	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10284	unsigned int n_groups = `0`;
10285	/ For costing some adjacent vector loads, we'd like to cost with*
10286	the total number of them once instead of cost each one by one. /*
10287	unsigned int n_adjacent_loads = `0`;
10288	for (j = `0`; j < ncopies; j++)
10289	{
10290	if (nloads > `1` && !costing_p)
10291	vec_alloc (v, nelems: nloads);
10292	gimple *new_stmt = NULL;
10293	for (i = `0`; i < nloads; i++)
10294	{
10295	if (costing_p)
10296	{
10297	/ For VMAT_ELEMENTWISE, just cost it as scalar_load to*
10298	avoid ICE, see PR110776. /*
10299	if (VECTOR_TYPE_P (ltype)
10300	&& memory_access_type != VMAT_ELEMENTWISE)
10301	n_adjacent_loads++;
10302	else
10303	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_load,
10304	stmt_info, misalign: `0`, where: vect_body);
10305	continue;
10306	}
10307	tree this_off = build_int_cst (TREE_TYPE (alias_off),
10308	group_el * elsz + cst_offset);
10309	tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10310	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10311	new_stmt = gimple_build_assign (make_ssa_name (var: ltype), data_ref);
10312	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
10313	if (nloads > `1`)
10314	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10315	gimple_assign_lhs (new_stmt));
10316
10317	group_el += lnel;
10318	if (! slp
10319	\|\| group_el == group_size)
10320	{
10321	n_groups++;
10322	/ When doing SLP make sure to not load elements from*
10323	the next vector iteration, those will not be accessed
10324	so just use the last element again. See PR107451. /*
10325	if (!slp \|\| known_lt (n_groups, vf))
10326	{
10327	tree newoff = copy_ssa_name (var: running_off);
10328	gimple *incr
10329	= gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10330	running_off, stride_step);
10331	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
10332	running_off = newoff;
10333	}
10334	group_el = `0`;
10335	}
10336	}
10337
10338	if (nloads > `1`)
10339	{
10340	if (costing_p)
10341	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_construct,
10342	stmt_info, misalign: `0`, where: vect_body);
10343	else
10344	{
10345	tree vec_inv = build_constructor (lvectype, v);
10346	new_temp = vect_init_vector (vinfo, stmt_info, val: vec_inv,
10347	type: lvectype, gsi);
10348	new_stmt = SSA_NAME_DEF_STMT (new_temp);
10349	if (lvectype != vectype)
10350	{
10351	new_stmt
10352	= gimple_build_assign (make_ssa_name (var: vectype),
10353	VIEW_CONVERT_EXPR,
10354	build1 (VIEW_CONVERT_EXPR,
10355	vectype, new_temp));
10356	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
10357	gsi);
10358	}
10359	}
10360	}
10361
10362	if (!costing_p)
10363	{
10364	if (slp)
10365	{
10366	if (slp_perm)
10367	dr_chain.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
10368	else
10369	slp_node->push_vec_def (def: new_stmt);
10370	}
10371	else
10372	{
10373	if (j == `0`)
10374	*vec_stmt = new_stmt;
10375	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10376	}
10377	}
10378	}
10379	if (slp_perm)
10380	{
10381	unsigned n_perms;
10382	if (costing_p)
10383	{
10384	unsigned n_loads;
10385	vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10386	true, &n_perms, &n_loads);
10387	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
10388	stmt_info: first_stmt_info, misalign: `0`, where: vect_body);
10389	}
10390	else
10391	vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10392	false, &n_perms);
10393	}
10394
10395	if (costing_p)
10396	{
10397	if (n_adjacent_loads > `0`)
10398	vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10399	alignment_support_scheme, misalignment, add_realign_cost: false,
10400	inside_cost: &inside_cost, prologue_cost: nullptr, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10401	record_prologue_costs: true);
10402	if (dump_enabled_p ())
10403	dump_printf_loc (MSG_NOTE, vect_location,
10404	"vect_model_load_cost: inside_cost = %u, "
10405	"prologue_cost = 0 .\n",
10406	inside_cost);
10407	}
10408
10409	return true;
10410	}
10411
10412	if (memory_access_type == VMAT_GATHER_SCATTER
10413	\|\| (!slp && memory_access_type == VMAT_CONTIGUOUS))
10414	grouped_load = false;
10415
10416	if (grouped_load
10417	\|\| (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10418	{
10419	if (grouped_load)
10420	{
10421	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10422	group_size = DR_GROUP_SIZE (first_stmt_info);
10423	}
10424	else
10425	{
10426	first_stmt_info = stmt_info;
10427	group_size = `1`;
10428	}
10429	/ For SLP vectorization we directly vectorize a subchain*
10430	without permutation. /*
10431	if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10432	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[`0`];
10433	/ For BB vectorization always use the first stmt to base*
10434	the data ref pointer on. /*
10435	if (bb_vinfo)
10436	first_stmt_info_for_drptr
10437	= vect_find_first_scalar_stmt_in_slp (slp_node);
10438
10439	/ Check if the chain of loads is already vectorized. /
10440	if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10441	/ For SLP we would need to copy over SLP_TREE_VEC_DEFS.*
10442	??? But we can only do so if there is exactly one
10443	as we have no way to get at the rest. Leave the CSE
10444	opportunity alone.
10445	??? With the group load eventually participating
10446	in multiple different permutations (having multiple
10447	slp nodes which refer to the same group) the CSE
10448	is even wrong code. See PR56270. /*
10449	&& !slp)
10450	{
10451	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
10452	return true;
10453	}
10454	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10455	group_gap_adj = `0`;
10456
10457	/ VEC_NUM is the number of vect stmts to be created for this group. /
10458	if (slp)
10459	{
10460	grouped_load = false;
10461	/ If an SLP permutation is from N elements to N elements,*
10462	and if one vector holds a whole number of N, we can load
10463	the inputs to the permutation in the same way as an
10464	unpermuted sequence. In other cases we need to load the
10465	whole group, not only the number of vector stmts the
10466	permutation result fits in. /*
10467	unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10468	if (slp_perm
10469	&& (group_size != scalar_lanes
10470	\|\| !multiple_p (a: nunits, b: group_size)))
10471	{
10472	/ We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for*
10473	variable VF; see vect_transform_slp_perm_load. /*
10474	unsigned int const_vf = vf.to_constant ();
10475	unsigned int const_nunits = nunits.to_constant ();
10476	vec_num = CEIL (group_size * const_vf, const_nunits);
10477	group_gap_adj = vf * group_size - nunits * vec_num;
10478	}
10479	else
10480	{
10481	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10482	group_gap_adj
10483	= group_size - scalar_lanes;
10484	}
10485	}
10486	else
10487	vec_num = group_size;
10488
10489	ref_type = get_group_alias_ptr_type (first_stmt_info);
10490	}
10491	else
10492	{
10493	first_stmt_info = stmt_info;
10494	first_dr_info = dr_info;
10495	group_size = vec_num = `1`;
10496	group_gap_adj = `0`;
10497	ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10498	if (slp)
10499	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10500	}
10501
10502	gcc_assert (alignment_support_scheme);
10503	vec_loop_masks *loop_masks
10504	= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10505	? &LOOP_VINFO_MASKS (loop_vinfo)
10506	: NULL);
10507	vec_loop_lens *loop_lens
10508	= (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10509	? &LOOP_VINFO_LENS (loop_vinfo)
10510	: NULL);
10511
10512	/ Shouldn't go with length-based approach if fully masked. /
10513	gcc_assert (!loop_lens \|\| !loop_masks);
10514
10515	/ Targets with store-lane instructions must not require explicit*
10516	realignment. vect_supportable_dr_alignment always returns either
10517	dr_aligned or dr_unaligned_supported for masked operations. /*
10518	gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10519	&& !mask
10520	&& !loop_masks)
10521	\|\| alignment_support_scheme == dr_aligned
10522	\|\| alignment_support_scheme == dr_unaligned_supported);
10523
10524	/ In case the vectorization factor (VF) is bigger than the number*
10525	of elements that we can fit in a vectype (nunits), we have to generate
10526	more than one vector stmt - i.e - we need to "unroll" the
10527	vector stmt by a factor VF/nunits. In doing so, we record a pointer
10528	from one copy of the vector stmt to the next, in the field
10529	STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10530	stages to find the correct vector defs to be used when vectorizing
10531	stmts that use the defs of the current stmt. The example below
10532	illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10533	need to create 4 vectorized stmts):
10534
10535	before vectorization:
10536	RELATED_STMT VEC_STMT
10537	S1: x = memref - -
10538	S2: z = x + 1 - -
10539
10540	step 1: vectorize stmt S1:
10541	We first create the vector stmt VS1_0, and, as usual, record a
10542	pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10543	Next, we create the vector stmt VS1_1, and record a pointer to
10544	it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10545	Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10546	stmts and pointers:
10547	RELATED_STMT VEC_STMT
10548	VS1_0: vx0 = memref0 VS1_1 -
10549	VS1_1: vx1 = memref1 VS1_2 -
10550	VS1_2: vx2 = memref2 VS1_3 -
10551	VS1_3: vx3 = memref3 - -
10552	S1: x = load - VS1_0
10553	S2: z = x + 1 - -
10554	*/
10555
10556	/ In case of interleaving (non-unit grouped access):*
10557
10558	S1: x2 = &base + 2
10559	S2: x0 = &base
10560	S3: x1 = &base + 1
10561	S4: x3 = &base + 3
10562
10563	Vectorized loads are created in the order of memory accesses
10564	starting from the access of the first stmt of the chain:
10565
10566	VS1: vx0 = &base
10567	VS2: vx1 = &base + vec_size1*
10568	VS3: vx3 = &base + vec_size2*
10569	VS4: vx4 = &base + vec_size3*
10570
10571	Then permutation statements are generated:
10572
10573	VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i2 } >*
10574	VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i2+1 } >*
10575	...
10576
10577	And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10578	(the order of the data-refs in the output of vect_permute_load_chain
10579	corresponds to the order of scalar stmts in the interleaving chain - see
10580	the documentation of vect_permute_load_chain()).
10581	The generation of permutation stmts and recording them in
10582	STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10583
10584	In case of both multiple types and interleaving, the vector loads and
10585	permutation stmts above are created for every copy. The result vector
10586	stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10587	corresponding STMT_VINFO_RELATED_STMT for the next copies. /*
10588
10589	/ If the data reference is aligned (dr_aligned) or potentially unaligned*
10590	on a target that supports unaligned accesses (dr_unaligned_supported)
10591	we generate the following code:
10592	p = initial_addr;
10593	indx = 0;
10594	loop {
10595	p = p + indx vectype_size;*
10596	vec_dest = (p);*
10597	indx = indx + 1;
10598	}
10599
10600	Otherwise, the data reference is potentially unaligned on a target that
10601	does not support unaligned accesses (dr_explicit_realign_optimized) -
10602	then generate the following code, in which the data in each iteration is
10603	obtained by two vector loads, one from the previous iteration, and one
10604	from the current iteration:
10605	p1 = initial_addr;
10606	msq_init = (floor(p1))*
10607	p2 = initial_addr + VS - 1;
10608	realignment_token = call target_builtin;
10609	indx = 0;
10610	loop {
10611	p2 = p2 + indx vectype_size*
10612	lsq = (floor(p2))*
10613	vec_dest = realign_load (msq, lsq, realignment_token)
10614	indx = indx + 1;
10615	msq = lsq;
10616	} /*
10617
10618	/ If the misalignment remains the same throughout the execution of the*
10619	loop, we can create the init_addr and permutation mask at the loop
10620	preheader. Otherwise, it needs to be created inside the loop.
10621	This can only occur when vectorizing memory accesses in the inner-loop
10622	nested within an outer-loop that is being vectorized. /*
10623
10624	if (nested_in_vect_loop
10625	&& !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10626	b: GET_MODE_SIZE (TYPE_MODE (vectype))))
10627	{
10628	gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10629	compute_in_loop = true;
10630	}
10631
10632	bool diff_first_stmt_info
10633	= first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10634
10635	tree offset = NULL_TREE;
10636	if ((alignment_support_scheme == dr_explicit_realign_optimized
10637	\|\| alignment_support_scheme == dr_explicit_realign)
10638	&& !compute_in_loop)
10639	{
10640	/ If we have different first_stmt_info, we can't set up realignment*
10641	here, since we can't guarantee first_stmt_info DR has been
10642	initialized yet, use first_stmt_info_for_drptr DR by bumping the
10643	distance from first_stmt_info DR instead as below. /*
10644	if (!costing_p)
10645	{
10646	if (!diff_first_stmt_info)
10647	msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10648	&realignment_token,
10649	alignment_support_scheme, NULL_TREE,
10650	&at_loop);
10651	if (alignment_support_scheme == dr_explicit_realign_optimized)
10652	{
10653	phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10654	offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10655	size_one_node);
10656	gcc_assert (!first_stmt_info_for_drptr);
10657	}
10658	}
10659	}
10660	else
10661	at_loop = loop;
10662
10663	if (!known_eq (poffset, `0`))
10664	offset = (offset
10665	? size_binop (PLUS_EXPR, offset, size_int (poffset))
10666	: size_int (poffset));
10667
10668	tree bump;
10669	tree vec_offset = NULL_TREE;
10670	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10671	{
10672	aggr_type = NULL_TREE;
10673	bump = NULL_TREE;
10674	}
10675	else if (memory_access_type == VMAT_GATHER_SCATTER)
10676	{
10677	aggr_type = elem_type;
10678	if (!costing_p)
10679	vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
10680	dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
10681	}
10682	else
10683	{
10684	if (memory_access_type == VMAT_LOAD_STORE_LANES)
10685	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10686	else
10687	aggr_type = vectype;
10688	bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10689	memory_access_type, loop_lens);
10690	}
10691
10692	auto_vec<tree> vec_offsets;
10693	auto_vec<tree> vec_masks;
10694	if (mask && !costing_p)
10695	{
10696	if (slp_node)
10697	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10698	&vec_masks);
10699	else
10700	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies, op: mask,
10701	vec_oprnds: &vec_masks, vectype: mask_vectype);
10702	}
10703
10704	tree vec_mask = NULL_TREE;
10705	if (memory_access_type == VMAT_LOAD_STORE_LANES)
10706	{
10707	gcc_assert (alignment_support_scheme == dr_aligned
10708	\|\| alignment_support_scheme == dr_unaligned_supported);
10709	gcc_assert (grouped_load && !slp);
10710
10711	unsigned int inside_cost = `0`, prologue_cost = `0`;
10712	/ For costing some adjacent vector loads, we'd like to cost with*
10713	the total number of them once instead of cost each one by one. /*
10714	unsigned int n_adjacent_loads = `0`;
10715	for (j = `0`; j < ncopies; j++)
10716	{
10717	if (costing_p)
10718	{
10719	/ An IFN_LOAD_LANES will load all its vector results,*
10720	regardless of which ones we actually need. Account
10721	for the cost of unused results. /*
10722	if (first_stmt_info == stmt_info)
10723	{
10724	unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10725	stmt_vec_info next_stmt_info = first_stmt_info;
10726	do
10727	{
10728	gaps -= `1`;
10729	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10730	}
10731	while (next_stmt_info);
10732	if (gaps)
10733	{
10734	if (dump_enabled_p ())
10735	dump_printf_loc (MSG_NOTE, vect_location,
10736	"vect_model_load_cost: %d "
10737	"unused vectors.\n",
10738	gaps);
10739	vect_get_load_cost (vinfo, stmt_info, ncopies: gaps,
10740	alignment_support_scheme,
10741	misalignment, add_realign_cost: false, inside_cost: &inside_cost,
10742	prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10743	record_prologue_costs: true);
10744	}
10745	}
10746	n_adjacent_loads++;
10747	continue;
10748	}
10749
10750	/ 1. Create the vector or array pointer update chain. /
10751	if (j == `0`)
10752	dataref_ptr
10753	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10754	at_loop, offset, &dummy, gsi,
10755	&ptr_incr, false, bump);
10756	else
10757	{
10758	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10759	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10760	stmt_info, bump);
10761	}
10762	if (mask)
10763	vec_mask = vec_masks [j];
10764
10765	tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
10766
10767	tree final_mask = NULL_TREE;
10768	tree final_len = NULL_TREE;
10769	tree bias = NULL_TREE;
10770	if (loop_masks)
10771	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10772	ncopies, vectype, j);
10773	if (vec_mask)
10774	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
10775	vec_mask, gsi);
10776
10777	if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10778	{
10779	if (loop_lens)
10780	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10781	ncopies, vectype, j, `1`);
10782	else
10783	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10784	signed char biasval
10785	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10786	bias = build_int_cst (intQI_type_node, biasval);
10787	if (!final_mask)
10788	{
10789	mask_vectype = truth_type_for (vectype);
10790	final_mask = build_minus_one_cst (mask_vectype);
10791	}
10792	}
10793
10794	gcall *call;
10795	if (final_len && final_mask)
10796	{
10797	/ Emit:*
10798	VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10799	VEC_MASK, LEN, BIAS). /*
10800	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10801	tree alias_ptr = build_int_cst (ref_type, align);
10802	call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, `5`,
10803	dataref_ptr, alias_ptr,
10804	final_mask, final_len, bias);
10805	}
10806	else if (final_mask)
10807	{
10808	/ Emit:*
10809	VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10810	VEC_MASK). /*
10811	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10812	tree alias_ptr = build_int_cst (ref_type, align);
10813	call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, `3`,
10814	dataref_ptr, alias_ptr,
10815	final_mask);
10816	}
10817	else
10818	{
10819	/ Emit:*
10820	VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). /*
10821	data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
10822	call = gimple_build_call_internal (IFN_LOAD_LANES, `1`, data_ref);
10823	}
10824	gimple_call_set_lhs (gs: call, lhs: vec_array);
10825	gimple_call_set_nothrow (s: call, nothrow_p: true);
10826	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
10827
10828	dr_chain.create (nelems: vec_num);
10829	/ Extract each vector into an SSA_NAME. /
10830	for (i = `0`; i < vec_num; i++)
10831	{
10832	new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10833	array: vec_array, n: i);
10834	dr_chain.quick_push (obj: new_temp);
10835	}
10836
10837	/ Record the mapping between SSA_NAMEs and statements. /
10838	vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10839
10840	/ Record that VEC_ARRAY is now dead. /
10841	vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
10842
10843	dr_chain.release ();
10844
10845	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
10846	}
10847
10848	if (costing_p)
10849	{
10850	if (n_adjacent_loads > `0`)
10851	vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10852	alignment_support_scheme, misalignment, add_realign_cost: false,
10853	inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec,
10854	body_cost_vec: cost_vec, record_prologue_costs: true);
10855	if (dump_enabled_p ())
10856	dump_printf_loc (MSG_NOTE, vect_location,
10857	"vect_model_load_cost: inside_cost = %u, "
10858	"prologue_cost = %u .\n",
10859	inside_cost, prologue_cost);
10860	}
10861
10862	return true;
10863	}
10864
10865	if (memory_access_type == VMAT_GATHER_SCATTER)
10866	{
10867	gcc_assert (alignment_support_scheme == dr_aligned
10868	\|\| alignment_support_scheme == dr_unaligned_supported);
10869	gcc_assert (!grouped_load && !slp_perm);
10870
10871	unsigned int inside_cost = `0`, prologue_cost = `0`;
10872	for (j = `0`; j < ncopies; j++)
10873	{
10874	/ 1. Create the vector or array pointer update chain. /
10875	if (j == `0` && !costing_p)
10876	{
10877	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10878	vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10879	slp_node, gs_info: &gs_info, dataref_ptr: &dataref_ptr,
10880	vec_offset: &vec_offsets);
10881	else
10882	dataref_ptr
10883	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10884	at_loop, offset, &dummy, gsi,
10885	&ptr_incr, false, bump);
10886	}
10887	else if (!costing_p)
10888	{
10889	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10890	if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10891	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10892	gsi, stmt_info, bump);
10893	}
10894
10895	gimple *new_stmt = NULL;
10896	for (i = `0`; i < vec_num; i++)
10897	{
10898	tree final_mask = NULL_TREE;
10899	tree final_len = NULL_TREE;
10900	tree bias = NULL_TREE;
10901	if (!costing_p)
10902	{
10903	if (mask)
10904	vec_mask = vec_masks [vec_num * j + i];
10905	if (loop_masks)
10906	final_mask
10907	= vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10908	vec_num * ncopies, vectype,
10909	vec_num * j + i);
10910	if (vec_mask)
10911	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
10912	loop_mask: final_mask, vec_mask, gsi);
10913
10914	if (i > `0` && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10915	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10916	gsi, stmt_info, bump);
10917	}
10918
10919	/ 2. Create the vector-load in the loop. /
10920	unsigned HOST_WIDE_INT align;
10921	if (gs_info.ifn != IFN_LAST)
10922	{
10923	if (costing_p)
10924	{
10925	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
10926	inside_cost
10927	= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
10928	stmt_info, misalign: `0`, where: vect_body);
10929	continue;
10930	}
10931	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10932	vec_offset = vec_offsets [vec_num * j + i];
10933	tree zero = build_zero_cst (vectype);
10934	tree scale = size_int (gs_info.scale);
10935
10936	if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10937	{
10938	if (loop_lens)
10939	final_len
10940	= vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10941	vec_num * ncopies, vectype,
10942	vec_num * j + i, `1`);
10943	else
10944	final_len
10945	= build_int_cst (sizetype,
10946	TYPE_VECTOR_SUBPARTS (node: vectype));
10947	signed char biasval
10948	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10949	bias = build_int_cst (intQI_type_node, biasval);
10950	if (!final_mask)
10951	{
10952	mask_vectype = truth_type_for (vectype);
10953	final_mask = build_minus_one_cst (mask_vectype);
10954	}
10955	}
10956
10957	gcall *call;
10958	if (final_len && final_mask)
10959	call
10960	= gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, `7`,
10961	dataref_ptr, vec_offset,
10962	scale, zero, final_mask,
10963	final_len, bias);
10964	else if (final_mask)
10965	call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, `5`,
10966	dataref_ptr, vec_offset,
10967	scale, zero, final_mask);
10968	else
10969	call = gimple_build_call_internal (IFN_GATHER_LOAD, `4`,
10970	dataref_ptr, vec_offset,
10971	scale, zero);
10972	gimple_call_set_nothrow (s: call, nothrow_p: true);
10973	new_stmt = call;
10974	data_ref = NULL_TREE;
10975	}
10976	else if (gs_info.decl)
10977	{
10978	/ The builtin decls path for gather is legacy, x86 only. /
10979	gcc_assert (!final_len && nunits.is_constant ());
10980	if (costing_p)
10981	{
10982	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
10983	inside_cost
10984	= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
10985	stmt_info, misalign: `0`, where: vect_body);
10986	continue;
10987	}
10988	poly_uint64 offset_nunits
10989	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
10990	if (known_eq (nunits, offset_nunits))
10991	{
10992	new_stmt = vect_build_one_gather_load_call
10993	(vinfo, stmt_info, gsi, gs_info: &gs_info,
10994	ptr: dataref_ptr, offset: vec_offsets [vec_num * j + i],
10995	mask: final_mask);
10996	data_ref = NULL_TREE;
10997	}
10998	else if (known_eq (nunits, offset_nunits * `2`))
10999	{
11000	/ We have a offset vector with half the number of*
11001	lanes but the builtins will produce full vectype
11002	data with just the lower lanes filled. /*
11003	new_stmt = vect_build_one_gather_load_call
11004	(vinfo, stmt_info, gsi, gs_info: &gs_info,
11005	ptr: dataref_ptr, offset: vec_offsets [`2` * vec_num * j + `2` * i],
11006	mask: final_mask);
11007	tree low = make_ssa_name (var: vectype);
11008	gimple_set_lhs (new_stmt, low);
11009	vect_finish_stmt_generation (vinfo, stmt_info,
11010	vec_stmt: new_stmt, gsi);
11011
11012	/ now put upper half of final_mask in final_mask low. /
11013	if (final_mask
11014	&& !SCALAR_INT_MODE_P
11015	(TYPE_MODE (TREE_TYPE (final_mask))))
11016	{
11017	int count = nunits.to_constant ();
11018	vec_perm_builder sel (count, count, `1`);
11019	sel.quick_grow (len: count);
11020	for (int i = `0`; i < count; ++i)
11021	sel [i] = i \| (count / `2`);
11022	vec_perm_indices indices (sel, `2`, count);
11023	tree perm_mask = vect_gen_perm_mask_checked
11024	(TREE_TYPE (final_mask), sel: indices);
11025	new_stmt = gimple_build_assign (NULL_TREE,
11026	VEC_PERM_EXPR,
11027	final_mask,
11028	final_mask,
11029	perm_mask);
11030	final_mask = make_ssa_name (TREE_TYPE (final_mask));
11031	gimple_set_lhs (new_stmt, final_mask);
11032	vect_finish_stmt_generation (vinfo, stmt_info,
11033	vec_stmt: new_stmt, gsi);
11034	}
11035	else if (final_mask)
11036	{
11037	new_stmt = gimple_build_assign (NULL_TREE,
11038	VEC_UNPACK_HI_EXPR,
11039	final_mask);
11040	final_mask = make_ssa_name
11041	(var: truth_type_for (gs_info.offset_vectype));
11042	gimple_set_lhs (new_stmt, final_mask);
11043	vect_finish_stmt_generation (vinfo, stmt_info,
11044	vec_stmt: new_stmt, gsi);
11045	}
11046
11047	new_stmt = vect_build_one_gather_load_call
11048	(vinfo, stmt_info, gsi, gs_info: &gs_info,
11049	ptr: dataref_ptr,
11050	offset: vec_offsets [`2` * vec_num * j + `2` * i + `1`],
11051	mask: final_mask);
11052	tree high = make_ssa_name (var: vectype);
11053	gimple_set_lhs (new_stmt, high);
11054	vect_finish_stmt_generation (vinfo, stmt_info,
11055	vec_stmt: new_stmt, gsi);
11056
11057	/ compose low + high. /
11058	int count = nunits.to_constant ();
11059	vec_perm_builder sel (count, count, `1`);
11060	sel.quick_grow (len: count);
11061	for (int i = `0`; i < count; ++i)
11062	sel [i] = i < count / `2` ? i : i + count / `2`;
11063	vec_perm_indices indices (sel, `2`, count);
11064	tree perm_mask
11065	= vect_gen_perm_mask_checked (vectype, sel: indices);
11066	new_stmt = gimple_build_assign (NULL_TREE,
11067	VEC_PERM_EXPR,
11068	low, high, perm_mask);
11069	data_ref = NULL_TREE;
11070	}
11071	else if (known_eq (nunits * `2`, offset_nunits))
11072	{
11073	/ We have a offset vector with double the number of*
11074	lanes. Select the low/high part accordingly. /*
11075	vec_offset = vec_offsets [(vec_num * j + i) / `2`];
11076	if ((vec_num * j + i) & `1`)
11077	{
11078	int count = offset_nunits.to_constant ();
11079	vec_perm_builder sel (count, count, `1`);
11080	sel.quick_grow (len: count);
11081	for (int i = `0`; i < count; ++i)
11082	sel [i] = i \| (count / `2`);
11083	vec_perm_indices indices (sel, `2`, count);
11084	tree perm_mask = vect_gen_perm_mask_checked
11085	(TREE_TYPE (vec_offset), sel: indices);
11086	new_stmt = gimple_build_assign (NULL_TREE,
11087	VEC_PERM_EXPR,
11088	vec_offset,
11089	vec_offset,
11090	perm_mask);
11091	vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11092	gimple_set_lhs (new_stmt, vec_offset);
11093	vect_finish_stmt_generation (vinfo, stmt_info,
11094	vec_stmt: new_stmt, gsi);
11095	}
11096	new_stmt = vect_build_one_gather_load_call
11097	(vinfo, stmt_info, gsi, gs_info: &gs_info,
11098	ptr: dataref_ptr, offset: vec_offset, mask: final_mask);
11099	data_ref = NULL_TREE;
11100	}
11101	else
11102	gcc_unreachable ();
11103	}
11104	else
11105	{
11106	/ Emulated gather-scatter. /
11107	gcc_assert (!final_mask);
11108	unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11109	if (costing_p)
11110	{
11111	/ For emulated gathers N offset vector element*
11112	offset add is consumed by the load). /*
11113	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits,
11114	kind: vec_to_scalar, stmt_info,
11115	misalign: `0`, where: vect_body);
11116	/ N scalar loads plus gathering them into a*
11117	vector. /*
11118	inside_cost
11119	= record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits, kind: scalar_load,
11120	stmt_info, misalign: `0`, where: vect_body);
11121	inside_cost
11122	= record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_construct,
11123	stmt_info, misalign: `0`, where: vect_body);
11124	continue;
11125	}
11126	unsigned HOST_WIDE_INT const_offset_nunits
11127	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype)
11128	.to_constant ();
11129	vec<constructor_elt, va_gc> *ctor_elts;
11130	vec_alloc (v&: ctor_elts, nelems: const_nunits);
11131	gimple_seq stmts = NULL;
11132	/ We support offset vectors with more elements*
11133	than the data vector for now. /*
11134	unsigned HOST_WIDE_INT factor
11135	= const_offset_nunits / const_nunits;
11136	vec_offset = vec_offsets [(vec_num * j + i) / factor];
11137	unsigned elt_offset = (j % factor) * const_nunits;
11138	tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11139	tree scale = size_int (gs_info.scale);
11140	align = get_object_alignment (DR_REF (first_dr_info->dr));
11141	tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11142	for (unsigned k = `0`; k < const_nunits; ++k)
11143	{
11144	tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11145	bitsize_int (k + elt_offset));
11146	tree idx
11147	= gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
11148	ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
11149	idx = gimple_convert (seq: &stmts, sizetype, op: idx);
11150	idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype, ops: idx,
11151	ops: scale);
11152	tree ptr = gimple_build (seq: &stmts, code: PLUS_EXPR,
11153	TREE_TYPE (dataref_ptr),
11154	ops: dataref_ptr, ops: idx);
11155	ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
11156	tree elt = make_ssa_name (TREE_TYPE (vectype));
11157	tree ref = build2 (MEM_REF, ltype, ptr,
11158	build_int_cst (ref_type, `0`));
11159	new_stmt = gimple_build_assign (elt, ref);
11160	gimple_set_vuse (g: new_stmt, vuse: gimple_vuse (g: gsi_stmt (i: *gsi)));
11161	gimple_seq_add_stmt (&stmts, new_stmt);
11162	CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11163	}
11164	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11165	new_stmt = gimple_build_assign (
11166	NULL_TREE, build_constructor (vectype, ctor_elts));
11167	data_ref = NULL_TREE;
11168	}
11169
11170	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11171	/ DATA_REF is null if we've already built the statement. /
11172	if (data_ref)
11173	{
11174	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11175	new_stmt = gimple_build_assign (vec_dest, data_ref);
11176	}
11177	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11178	gimple_set_lhs (new_stmt, new_temp);
11179	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11180
11181	/ Store vector loads in the corresponding SLP_NODE. /
11182	if (slp)
11183	slp_node->push_vec_def (def: new_stmt);
11184	}
11185
11186	if (!slp && !costing_p)
11187	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11188	}
11189
11190	if (!slp && !costing_p)
11191	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
11192
11193	if (costing_p && dump_enabled_p ())
11194	dump_printf_loc (MSG_NOTE, vect_location,
11195	"vect_model_load_cost: inside_cost = %u, "
11196	"prologue_cost = %u .\n",
11197	inside_cost, prologue_cost);
11198	return true;
11199	}
11200
11201	poly_uint64 group_elt = `0`;
11202	unsigned int inside_cost = `0`, prologue_cost = `0`;
11203	/ For costing some adjacent vector loads, we'd like to cost with*
11204	the total number of them once instead of cost each one by one. /*
11205	unsigned int n_adjacent_loads = `0`;
11206	for (j = `0`; j < ncopies; j++)
11207	{
11208	/ 1. Create the vector or array pointer update chain. /
11209	if (j == `0` && !costing_p)
11210	{
11211	bool simd_lane_access_p
11212	= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != `0`;
11213	if (simd_lane_access_p
11214	&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11215	&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), `0`))
11216	&& integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
11217	&& integer_zerop (DR_INIT (first_dr_info->dr))
11218	&& alias_sets_conflict_p (get_alias_set (aggr_type),
11219	get_alias_set (TREE_TYPE (ref_type)))
11220	&& (alignment_support_scheme == dr_aligned
11221	\|\| alignment_support_scheme == dr_unaligned_supported))
11222	{
11223	dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11224	dataref_offset = build_int_cst (ref_type, `0`);
11225	}
11226	else if (diff_first_stmt_info)
11227	{
11228	dataref_ptr
11229	= vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11230	aggr_type, at_loop, offset, &dummy,
11231	gsi, &ptr_incr, simd_lane_access_p,
11232	bump);
11233	/ Adjust the pointer by the difference to first_stmt. /
11234	data_reference_p ptrdr
11235	= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11236	tree diff
11237	= fold_convert (sizetype,
11238	size_binop (MINUS_EXPR,
11239	DR_INIT (first_dr_info->dr),
11240	DR_INIT (ptrdr)));
11241	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11242	stmt_info, diff);
11243	if (alignment_support_scheme == dr_explicit_realign)
11244	{
11245	msq = vect_setup_realignment (vinfo,
11246	first_stmt_info_for_drptr, gsi,
11247	&realignment_token,
11248	alignment_support_scheme,
11249	dataref_ptr, &at_loop);
11250	gcc_assert (!compute_in_loop);
11251	}
11252	}
11253	else
11254	dataref_ptr
11255	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11256	at_loop,
11257	offset, &dummy, gsi, &ptr_incr,
11258	simd_lane_access_p, bump);
11259	}
11260	else if (!costing_p)
11261	{
11262	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11263	if (dataref_offset)
11264	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11265	bump);
11266	else
11267	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11268	stmt_info, bump);
11269	}
11270
11271	if (grouped_load \|\| slp_perm)
11272	dr_chain.create (nelems: vec_num);
11273
11274	gimple *new_stmt = NULL;
11275	for (i = `0`; i < vec_num; i++)
11276	{
11277	tree final_mask = NULL_TREE;
11278	tree final_len = NULL_TREE;
11279	tree bias = NULL_TREE;
11280	if (!costing_p)
11281	{
11282	if (mask)
11283	vec_mask = vec_masks [vec_num * j + i];
11284	if (loop_masks)
11285	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11286	vec_num * ncopies, vectype,
11287	vec_num * j + i);
11288	if (vec_mask)
11289	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
11290	loop_mask: final_mask, vec_mask, gsi);
11291
11292	if (i > `0`)
11293	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11294	gsi, stmt_info, bump);
11295	}
11296
11297	/ 2. Create the vector-load in the loop. /
11298	switch (alignment_support_scheme)
11299	{
11300	case dr_aligned:
11301	case dr_unaligned_supported:
11302	{
11303	if (costing_p)
11304	break;
11305
11306	unsigned int misalign;
11307	unsigned HOST_WIDE_INT align;
11308	align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11309	if (alignment_support_scheme == dr_aligned)
11310	misalign = `0`;
11311	else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11312	{
11313	align
11314	= dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
11315	misalign = `0`;
11316	}
11317	else
11318	misalign = misalignment;
11319	if (dataref_offset == NULL_TREE
11320	&& TREE_CODE (dataref_ptr) == SSA_NAME)
11321	set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11322	misalign);
11323	align = least_bit_hwi (x: misalign \| align);
11324
11325	/ Compute IFN when LOOP_LENS or final_mask valid. /
11326	machine_mode vmode = TYPE_MODE (vectype);
11327	machine_mode new_vmode = vmode;
11328	internal_fn partial_ifn = IFN_LAST;
11329	if (loop_lens)
11330	{
11331	opt_machine_mode new_ovmode
11332	= get_len_load_store_mode (vmode, true, &partial_ifn);
11333	new_vmode = new_ovmode.require ();
11334	unsigned factor
11335	= (new_ovmode == vmode) ? `1` : GET_MODE_UNIT_SIZE (vmode);
11336	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11337	vec_num * ncopies, vectype,
11338	vec_num * j + i, factor);
11339	}
11340	else if (final_mask)
11341	{
11342	if (!can_vec_mask_load_store_p (
11343	vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11344	&partial_ifn))
11345	gcc_unreachable ();
11346	}
11347
11348	if (partial_ifn == IFN_MASK_LEN_LOAD)
11349	{
11350	if (!final_len)
11351	{
11352	/ Pass VF value to 'len' argument of*
11353	MASK_LEN_LOAD if LOOP_LENS is invalid. /*
11354	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11355	}
11356	if (!final_mask)
11357	{
11358	/ Pass all ones value to 'mask' argument of*
11359	MASK_LEN_LOAD if final_mask is invalid. /*
11360	mask_vectype = truth_type_for (vectype);
11361	final_mask = build_minus_one_cst (mask_vectype);
11362	}
11363	}
11364	if (final_len)
11365	{
11366	signed char biasval
11367	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11368
11369	bias = build_int_cst (intQI_type_node, biasval);
11370	}
11371
11372	if (final_len)
11373	{
11374	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11375	gcall *call;
11376	if (partial_ifn == IFN_MASK_LEN_LOAD)
11377	call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, `5`,
11378	dataref_ptr, ptr,
11379	final_mask, final_len,
11380	bias);
11381	else
11382	call = gimple_build_call_internal (IFN_LEN_LOAD, `4`,
11383	dataref_ptr, ptr,
11384	final_len, bias);
11385	gimple_call_set_nothrow (s: call, nothrow_p: true);
11386	new_stmt = call;
11387	data_ref = NULL_TREE;
11388
11389	/ Need conversion if it's wrapped with VnQI. /
11390	if (vmode != new_vmode)
11391	{
11392	tree new_vtype = build_vector_type_for_mode (
11393	unsigned_intQI_type_node, new_vmode);
11394	tree var
11395	= vect_get_new_ssa_name (new_vtype, vect_simple_var);
11396	gimple_set_lhs (call, var);
11397	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call,
11398	gsi);
11399	tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11400	new_stmt = gimple_build_assign (vec_dest,
11401	VIEW_CONVERT_EXPR, op);
11402	}
11403	}
11404	else if (final_mask)
11405	{
11406	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11407	gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, `3`,
11408	dataref_ptr, ptr,
11409	final_mask);
11410	gimple_call_set_nothrow (s: call, nothrow_p: true);
11411	new_stmt = call;
11412	data_ref = NULL_TREE;
11413	}
11414	else
11415	{
11416	tree ltype = vectype;
11417	tree new_vtype = NULL_TREE;
11418	unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11419	unsigned int vect_align
11420	= vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype);
11421	unsigned int scalar_dr_size
11422	= vect_get_scalar_dr_size (dr_info: first_dr_info);
11423	/ If there's no peeling for gaps but we have a gap*
11424	with slp loads then load the lower half of the
11425	vector only. See get_group_load_store_type for
11426	when we apply this optimization. /*
11427	if (slp
11428	&& loop_vinfo
11429	&& !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != `0`
11430	&& known_eq (nunits, (group_size - gap) * `2`)
11431	&& known_eq (nunits, group_size)
11432	&& gap >= (vect_align / scalar_dr_size))
11433	{
11434	tree half_vtype;
11435	new_vtype
11436	= vector_vector_composition_type (vtype: vectype, nelts: `2`,
11437	ptype: &half_vtype);
11438	if (new_vtype != NULL_TREE)
11439	ltype = half_vtype;
11440	}
11441	tree offset
11442	= (dataref_offset ? dataref_offset
11443	: build_int_cst (ref_type, `0`));
11444	if (ltype != vectype
11445	&& memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11446	{
11447	unsigned HOST_WIDE_INT gap_offset
11448	= gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11449	tree gapcst = build_int_cst (ref_type, gap_offset);
11450	offset = size_binop (PLUS_EXPR, offset, gapcst);
11451	}
11452	data_ref
11453	= fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11454	if (alignment_support_scheme == dr_aligned)
11455	;
11456	else
11457	TREE_TYPE (data_ref)
11458	= build_aligned_type (TREE_TYPE (data_ref),
11459	align * BITS_PER_UNIT);
11460	if (ltype != vectype)
11461	{
11462	vect_copy_ref_info (data_ref,
11463	DR_REF (first_dr_info->dr));
11464	tree tem = make_ssa_name (var: ltype);
11465	new_stmt = gimple_build_assign (tem, data_ref);
11466	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
11467	gsi);
11468	data_ref = NULL;
11469	vec<constructor_elt, va_gc> *v;
11470	vec_alloc (v, nelems: `2`);
11471	if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11472	{
11473	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11474	build_zero_cst (ltype));
11475	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11476	}
11477	else
11478	{
11479	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11480	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11481	build_zero_cst (ltype));
11482	}
11483	gcc_assert (new_vtype != NULL_TREE);
11484	if (new_vtype == vectype)
11485	new_stmt = gimple_build_assign (
11486	vec_dest, build_constructor (vectype, v));
11487	else
11488	{
11489	tree new_vname = make_ssa_name (var: new_vtype);
11490	new_stmt = gimple_build_assign (
11491	new_vname, build_constructor (new_vtype, v));
11492	vect_finish_stmt_generation (vinfo, stmt_info,
11493	vec_stmt: new_stmt, gsi);
11494	new_stmt = gimple_build_assign (
11495	vec_dest,
11496	build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11497	}
11498	}
11499	}
11500	break;
11501	}
11502	case dr_explicit_realign:
11503	{
11504	if (costing_p)
11505	break;
11506	tree ptr, bump;
11507
11508	tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11509
11510	if (compute_in_loop)
11511	msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11512	&realignment_token,
11513	dr_explicit_realign,
11514	dataref_ptr, NULL);
11515
11516	if (TREE_CODE (dataref_ptr) == SSA_NAME)
11517	ptr = copy_ssa_name (var: dataref_ptr);
11518	else
11519	ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11520	// For explicit realign the target alignment should be
11521	// known at compile time.
11522	unsigned HOST_WIDE_INT align
11523	= DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11524	new_stmt = gimple_build_assign (
11525	ptr, BIT_AND_EXPR, dataref_ptr,
11526	build_int_cst (TREE_TYPE (dataref_ptr),
11527	-(HOST_WIDE_INT) align));
11528	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11529	data_ref
11530	= build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, `0`));
11531	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11532	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11533	new_stmt = gimple_build_assign (vec_dest, data_ref);
11534	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11535	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11536	gimple_move_vops (new_stmt, stmt_info->stmt);
11537	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11538	msq = new_temp;
11539
11540	bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11541	bump = size_binop (MINUS_EXPR, bump, size_one_node);
11542	ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11543	bump);
11544	new_stmt = gimple_build_assign (
11545	NULL_TREE, BIT_AND_EXPR, ptr,
11546	build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11547	if (TREE_CODE (ptr) == SSA_NAME)
11548	ptr = copy_ssa_name (var: ptr, stmt: new_stmt);
11549	else
11550	ptr = make_ssa_name (TREE_TYPE (ptr), stmt: new_stmt);
11551	gimple_assign_set_lhs (gs: new_stmt, lhs: ptr);
11552	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11553	data_ref
11554	= build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, `0`));
11555	break;
11556	}
11557	case dr_explicit_realign_optimized:
11558	{
11559	if (costing_p)
11560	break;
11561	if (TREE_CODE (dataref_ptr) == SSA_NAME)
11562	new_temp = copy_ssa_name (var: dataref_ptr);
11563	else
11564	new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11565	// We should only be doing this if we know the target
11566	// alignment at compile time.
11567	unsigned HOST_WIDE_INT align
11568	= DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11569	new_stmt = gimple_build_assign (
11570	new_temp, BIT_AND_EXPR, dataref_ptr,
11571	build_int_cst (TREE_TYPE (dataref_ptr),
11572	-(HOST_WIDE_INT) align));
11573	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11574	data_ref = build2 (MEM_REF, vectype, new_temp,
11575	build_int_cst (ref_type, `0`));
11576	break;
11577	}
11578	default:
11579	gcc_unreachable ();
11580	}
11581
11582	/ One common place to cost the above vect load for different*
11583	alignment support schemes. /*
11584	if (costing_p)
11585	{
11586	/ For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we*
11587	only need to take care of the first stmt, whose
11588	stmt_info is first_stmt_info, vec_num iterating on it
11589	will cover the cost for the remaining, it's consistent
11590	with transforming. For the prologue cost for realign,
11591	we only need to count it once for the whole group. /*
11592	bool first_stmt_info_p = first_stmt_info == stmt_info;
11593	bool add_realign_cost = first_stmt_info_p && i == `0`;
11594	if (memory_access_type == VMAT_CONTIGUOUS
11595	\|\| memory_access_type == VMAT_CONTIGUOUS_REVERSE
11596	\|\| (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11597	&& (!grouped_load \|\| first_stmt_info_p)))
11598	{
11599	/ Leave realign cases alone to keep them simple. /
11600	if (alignment_support_scheme == dr_explicit_realign_optimized
11601	\|\| alignment_support_scheme == dr_explicit_realign)
11602	vect_get_load_cost (vinfo, stmt_info, ncopies: `1`,
11603	alignment_support_scheme, misalignment,
11604	add_realign_cost, inside_cost: &inside_cost,
11605	prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11606	record_prologue_costs: true);
11607	else
11608	n_adjacent_loads++;
11609	}
11610	}
11611	else
11612	{
11613	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11614	/ DATA_REF is null if we've already built the statement. /
11615	if (data_ref)
11616	{
11617	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11618	new_stmt = gimple_build_assign (vec_dest, data_ref);
11619	}
11620	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11621	gimple_set_lhs (new_stmt, new_temp);
11622	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11623	}
11624
11625	/ 3. Handle explicit realignment if necessary/supported.*
11626	Create in loop:
11627	vec_dest = realign_load (msq, lsq, realignment_token) /*
11628	if (!costing_p
11629	&& (alignment_support_scheme == dr_explicit_realign_optimized
11630	\|\| alignment_support_scheme == dr_explicit_realign))
11631	{
11632	lsq = gimple_assign_lhs (gs: new_stmt);
11633	if (!realignment_token)
11634	realignment_token = dataref_ptr;
11635	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11636	new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11637	lsq, realignment_token);
11638	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11639	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11640	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11641
11642	if (alignment_support_scheme == dr_explicit_realign_optimized)
11643	{
11644	gcc_assert (phi);
11645	if (i == vec_num - `1` && j == ncopies - `1`)
11646	add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11647	UNKNOWN_LOCATION);
11648	msq = lsq;
11649	}
11650	}
11651
11652	if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11653	{
11654	if (costing_p)
11655	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_perm,
11656	stmt_info, misalign: `0`, where: vect_body);
11657	else
11658	{
11659	tree perm_mask = perm_mask_for_reverse (vectype);
11660	new_temp = permute_vec_elements (vinfo, x: new_temp, y: new_temp,
11661	mask_vec: perm_mask, stmt_info, gsi);
11662	new_stmt = SSA_NAME_DEF_STMT (new_temp);
11663	}
11664	}
11665
11666	/ Collect vector loads and later create their permutation in*
11667	vect_transform_grouped_load (). /*
11668	if (!costing_p && (grouped_load \|\| slp_perm))
11669	dr_chain.quick_push (obj: new_temp);
11670
11671	/ Store vector loads in the corresponding SLP_NODE. /
11672	if (!costing_p && slp && !slp_perm)
11673	slp_node->push_vec_def (def: new_stmt);
11674
11675	/ With SLP permutation we load the gaps as well, without*
11676	we need to skip the gaps after we manage to fully load
11677	all elements. group_gap_adj is DR_GROUP_SIZE here. /*
11678	group_elt += nunits;
11679	if (!costing_p
11680	&& maybe_ne (a: group_gap_adj, b: `0U`)
11681	&& !slp_perm
11682	&& known_eq (group_elt, group_size - group_gap_adj))
11683	{
11684	poly_wide_int bump_val
11685	= (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11686	if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11687	== -`1`)
11688	bump_val = -bump_val;
11689	tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11690	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11691	stmt_info, bump);
11692	group_elt = `0`;
11693	}
11694	}
11695	/ Bump the vector pointer to account for a gap or for excess*
11696	elements loaded for a permuted SLP load. /*
11697	if (!costing_p
11698	&& maybe_ne (a: group_gap_adj, b: `0U`)
11699	&& slp_perm)
11700	{
11701	poly_wide_int bump_val
11702	= (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11703	if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -`1`)
11704	bump_val = -bump_val;
11705	tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11706	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11707	stmt_info, bump);
11708	}
11709
11710	if (slp && !slp_perm)
11711	continue;
11712
11713	if (slp_perm)
11714	{
11715	unsigned n_perms;
11716	/ For SLP we know we've seen all possible uses of dr_chain so*
11717	direct vect_transform_slp_perm_load to DCE the unused parts.
11718	??? This is a hack to prevent compile-time issues as seen
11719	in PR101120 and friends. /*
11720	if (costing_p)
11721	{
11722	vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11723	true, &n_perms, nullptr);
11724	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
11725	stmt_info, misalign: `0`, where: vect_body);
11726	}
11727	else
11728	{
11729	bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11730	gsi, vf, false, &n_perms,
11731	nullptr, true);
11732	gcc_assert (ok);
11733	}
11734	}
11735	else
11736	{
11737	if (grouped_load)
11738	{
11739	gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11740	/ We assume that the cost of a single load-lanes instruction*
11741	is equivalent to the cost of DR_GROUP_SIZE separate loads.
11742	If a grouped access is instead being provided by a
11743	load-and-permute operation, include the cost of the
11744	permutes. /*
11745	if (costing_p && first_stmt_info == stmt_info)
11746	{
11747	/ Uses an even and odd extract operations or shuffle*
11748	operations for each needed permute. /*
11749	int group_size = DR_GROUP_SIZE (first_stmt_info);
11750	int nstmts = ceil_log2 (x: group_size) * group_size;
11751	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
11752	stmt_info, misalign: `0`, where: vect_body);
11753
11754	if (dump_enabled_p ())
11755	dump_printf_loc (MSG_NOTE, vect_location,
11756	"vect_model_load_cost:"
11757	"strided group_size = %d .\n",
11758	group_size);
11759	}
11760	else if (!costing_p)
11761	{
11762	vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11763	group_size, gsi);
11764	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
11765	}
11766	}
11767	else if (!costing_p)
11768	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11769	}
11770	dr_chain.release ();
11771	}
11772	if (!slp && !costing_p)
11773	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
11774
11775	if (costing_p)
11776	{
11777	gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11778	\|\| memory_access_type == VMAT_CONTIGUOUS_REVERSE
11779	\|\| memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11780	if (n_adjacent_loads > `0`)
11781	vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
11782	alignment_support_scheme, misalignment, add_realign_cost: false,
11783	inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11784	record_prologue_costs: true);
11785	if (dump_enabled_p ())
11786	dump_printf_loc (MSG_NOTE, vect_location,
11787	"vect_model_load_cost: inside_cost = %u, "
11788	"prologue_cost = %u .\n",
11789	inside_cost, prologue_cost);
11790	}
11791
11792	return true;
11793	}
11794
11795	/ Function vect_is_simple_cond.*
11796
11797	Input:
11798	LOOP - the loop that is being vectorized.
11799	COND - Condition that is checked for simple use.
11800
11801	Output:
11802	*COMP_VECTYPE - the vector type for the comparison.
11803	*DTS - The def types for the arguments of the comparison
11804
11805	Returns whether a COND can be vectorized. Checks whether
11806	condition operands are supportable using vec_is_simple_use. /*
11807
11808	static bool
11809	vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11810	slp_tree slp_node, tree *comp_vectype,
11811	enum vect_def_type *dts, tree vectype)
11812	{
11813	tree lhs, rhs;
11814	tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11815	slp_tree slp_op;
11816
11817	/ Mask case. /
11818	if (TREE_CODE (cond) == SSA_NAME
11819	&& VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11820	{
11821	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `0`, &cond,
11822	&slp_op, &dts[`0`], comp_vectype)
11823	\|\| !*comp_vectype
11824	\|\| !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11825	return false;
11826	return true;
11827	}
11828
11829	if (!COMPARISON_CLASS_P (cond))
11830	return false;
11831
11832	lhs = TREE_OPERAND (cond, `0`);
11833	rhs = TREE_OPERAND (cond, `1`);
11834
11835	if (TREE_CODE (lhs) == SSA_NAME)
11836	{
11837	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `0`,
11838	&lhs, &slp_op, &dts[`0`], &vectype1))
11839	return false;
11840	}
11841	else if (TREE_CODE (lhs) == INTEGER_CST \|\| TREE_CODE (lhs) == REAL_CST
11842	\|\| TREE_CODE (lhs) == FIXED_CST)
11843	dts[`0`] = vect_constant_def;
11844	else
11845	return false;
11846
11847	if (TREE_CODE (rhs) == SSA_NAME)
11848	{
11849	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1`,
11850	&rhs, &slp_op, &dts[`1`], &vectype2))
11851	return false;
11852	}
11853	else if (TREE_CODE (rhs) == INTEGER_CST \|\| TREE_CODE (rhs) == REAL_CST
11854	\|\| TREE_CODE (rhs) == FIXED_CST)
11855	dts[`1`] = vect_constant_def;
11856	else
11857	return false;
11858
11859	if (vectype1 && vectype2
11860	&& maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
11861	b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
11862	return false;
11863
11864	*comp_vectype = vectype1 ? vectype1 : vectype2;
11865	/ Invariant comparison. /
11866	if (! *comp_vectype)
11867	{
11868	tree scalar_type = TREE_TYPE (lhs);
11869	if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11870	*comp_vectype = truth_type_for (vectype);
11871	else
11872	{
11873	/ If we can widen the comparison to match vectype do so. /
11874	if (INTEGRAL_TYPE_P (scalar_type)
11875	&& !slp_node
11876	&& tree_int_cst_lt (TYPE_SIZE (scalar_type),
11877	TYPE_SIZE (TREE_TYPE (vectype))))
11878	scalar_type = build_nonstandard_integer_type
11879	(vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11880	*comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11881	slp_node);
11882	}
11883	}
11884
11885	return true;
11886	}
11887
11888	/ vectorizable_condition.*
11889
11890	Check if STMT_INFO is conditional modify expression that can be vectorized.
11891	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11892	stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11893	at GSI.
11894
11895	When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11896
11897	Return true if STMT_INFO is vectorizable in this way. /*
11898
11899	static bool
11900	vectorizable_condition (vec_info *vinfo,
11901	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11902	gimple **vec_stmt,
11903	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11904	{
11905	tree scalar_dest = NULL_TREE;
11906	tree vec_dest = NULL_TREE;
11907	tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11908	tree then_clause, else_clause;
11909	tree comp_vectype = NULL_TREE;
11910	tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11911	tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11912	tree vec_compare;
11913	tree new_temp;
11914	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
11915	enum vect_def_type dts[`4`]
11916	= {vect_unknown_def_type, vect_unknown_def_type,
11917	vect_unknown_def_type, vect_unknown_def_type};
11918	int ndts = `4`;
11919	int ncopies;
11920	int vec_num;
11921	enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11922	int i;
11923	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
11924	vec<tree> vec_oprnds0 = vNULL;
11925	vec<tree> vec_oprnds1 = vNULL;
11926	vec<tree> vec_oprnds2 = vNULL;
11927	vec<tree> vec_oprnds3 = vNULL;
11928	tree vec_cmp_type;
11929	bool masked = false;
11930
11931	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11932	return false;
11933
11934	/ Is vectorizable conditional operation? /
11935	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
11936	if (!stmt)
11937	return false;
11938
11939	code = gimple_assign_rhs_code (gs: stmt);
11940	if (code != COND_EXPR)
11941	return false;
11942
11943	stmt_vec_info reduc_info = NULL;
11944	int reduc_index = -`1`;
11945	vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11946	bool for_reduction
11947	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11948	if (for_reduction)
11949	{
11950	if (slp_node)
11951	return false;
11952	reduc_info = info_for_reduction (vinfo, stmt_info);
11953	reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11954	reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11955	gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11956	\|\| reduc_index != -`1`);
11957	}
11958	else
11959	{
11960	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11961	return false;
11962	}
11963
11964	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11965	tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11966
11967	if (slp_node)
11968	{
11969	ncopies = `1`;
11970	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11971	}
11972	else
11973	{
11974	ncopies = vect_get_num_copies (loop_vinfo, vectype);
11975	vec_num = `1`;
11976	}
11977
11978	gcc_assert (ncopies >= `1`);
11979	if (for_reduction && ncopies > `1`)
11980	return false; / FORNOW /
11981
11982	cond_expr = gimple_assign_rhs1 (gs: stmt);
11983
11984	if (!vect_is_simple_cond (cond: cond_expr, vinfo, stmt_info, slp_node,
11985	comp_vectype: &comp_vectype, dts: &dts[`0`], vectype)
11986	\|\| !comp_vectype)
11987	return false;
11988
11989	unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? `1` : `0`;
11990	slp_tree then_slp_node, else_slp_node;
11991	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1` + op_adjust,
11992	&then_clause, &then_slp_node, &dts[`2`], &vectype1))
11993	return false;
11994	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `2` + op_adjust,
11995	&else_clause, &else_slp_node, &dts[`3`], &vectype2))
11996	return false;
11997
11998	if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11999	return false;
12000
12001	if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12002	return false;
12003
12004	masked = !COMPARISON_CLASS_P (cond_expr);
12005	vec_cmp_type = truth_type_for (comp_vectype);
12006
12007	if (vec_cmp_type == NULL_TREE)
12008	return false;
12009
12010	cond_code = TREE_CODE (cond_expr);
12011	if (!masked)
12012	{
12013	cond_expr0 = TREE_OPERAND (cond_expr, `0`);
12014	cond_expr1 = TREE_OPERAND (cond_expr, `1`);
12015	}
12016
12017	/ For conditional reductions, the "then" value needs to be the candidate*
12018	value calculated by this iteration while the "else" value needs to be
12019	the result carried over from previous iterations. If the COND_EXPR
12020	is the other way around, we need to swap it. /*
12021	bool must_invert_cmp_result = false;
12022	if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == `1`)
12023	{
12024	if (masked)
12025	must_invert_cmp_result = true;
12026	else
12027	{
12028	bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12029	tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12030	if (new_code == ERROR_MARK)
12031	must_invert_cmp_result = true;
12032	else
12033	{
12034	cond_code = new_code;
12035	/ Make sure we don't accidentally use the old condition. /
12036	cond_expr = NULL_TREE;
12037	}
12038	}
12039	std::swap (a&: then_clause, b&: else_clause);
12040	}
12041
12042	if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12043	{
12044	/ Boolean values may have another representation in vectors*
12045	and therefore we prefer bit operations over comparison for
12046	them (which also works for scalar masks). We store opcodes
12047	to use in bitop1 and bitop2. Statement is vectorized as
12048	BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12049	depending on bitop1 and bitop2 arity. /*
12050	switch (cond_code)
12051	{
12052	case GT_EXPR:
12053	bitop1 = BIT_NOT_EXPR;
12054	bitop2 = BIT_AND_EXPR;
12055	break;
12056	case GE_EXPR:
12057	bitop1 = BIT_NOT_EXPR;
12058	bitop2 = BIT_IOR_EXPR;
12059	break;
12060	case LT_EXPR:
12061	bitop1 = BIT_NOT_EXPR;
12062	bitop2 = BIT_AND_EXPR;
12063	std::swap (a&: cond_expr0, b&: cond_expr1);
12064	break;
12065	case LE_EXPR:
12066	bitop1 = BIT_NOT_EXPR;
12067	bitop2 = BIT_IOR_EXPR;
12068	std::swap (a&: cond_expr0, b&: cond_expr1);
12069	break;
12070	case NE_EXPR:
12071	bitop1 = BIT_XOR_EXPR;
12072	break;
12073	case EQ_EXPR:
12074	bitop1 = BIT_XOR_EXPR;
12075	bitop2 = BIT_NOT_EXPR;
12076	break;
12077	default:
12078	return false;
12079	}
12080	cond_code = SSA_NAME;
12081	}
12082
12083	if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12084	&& reduction_type == EXTRACT_LAST_REDUCTION
12085	&& !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12086	{
12087	if (dump_enabled_p ())
12088	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12089	"reduction comparison operation not supported.\n");
12090	return false;
12091	}
12092
12093	if (!vec_stmt)
12094	{
12095	if (bitop1 != NOP_EXPR)
12096	{
12097	machine_mode mode = TYPE_MODE (comp_vectype);
12098	optab optab;
12099
12100	optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12101	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12102	return false;
12103
12104	if (bitop2 != NOP_EXPR)
12105	{
12106	optab = optab_for_tree_code (bitop2, comp_vectype,
12107	optab_default);
12108	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12109	return false;
12110	}
12111	}
12112
12113	vect_cost_for_stmt kind = vector_stmt;
12114	if (reduction_type == EXTRACT_LAST_REDUCTION)
12115	/ Count one reduction-like operation per vector. /
12116	kind = vec_to_scalar;
12117	else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12118	&& (masked
12119	\|\| (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12120	cond_code)
12121	\|\| !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12122	ERROR_MARK))))
12123	return false;
12124
12125	if (slp_node
12126	&& (!vect_maybe_update_slp_op_vectype
12127	(SLP_TREE_CHILDREN (slp_node)[`0`], comp_vectype)
12128	\|\| (op_adjust == `1`
12129	&& !vect_maybe_update_slp_op_vectype
12130	(SLP_TREE_CHILDREN (slp_node)[`1`], comp_vectype))
12131	\|\| !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12132	\|\| !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12133	{
12134	if (dump_enabled_p ())
12135	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12136	"incompatible vector types for invariants\n");
12137	return false;
12138	}
12139
12140	if (loop_vinfo && for_reduction
12141	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12142	{
12143	if (reduction_type == EXTRACT_LAST_REDUCTION)
12144	{
12145	if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12146	vectype, OPTIMIZE_FOR_SPEED))
12147	vect_record_loop_len (loop_vinfo,
12148	&LOOP_VINFO_LENS (loop_vinfo),
12149	ncopies * vec_num, vectype, `1`);
12150	else
12151	vect_record_loop_mask (loop_vinfo,
12152	&LOOP_VINFO_MASKS (loop_vinfo),
12153	ncopies * vec_num, vectype, NULL);
12154	}
12155	/ Extra inactive lanes should be safe for vect_nested_cycle. /
12156	else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12157	{
12158	if (dump_enabled_p ())
12159	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12160	"conditional reduction prevents the use"
12161	" of partial vectors.\n");
12162	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12163	}
12164	}
12165
12166	STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12167	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt: dts, ndts, node: slp_node,
12168	cost_vec, kind);
12169	return true;
12170	}
12171
12172	/ Transform. /
12173
12174	/ Handle def. /
12175	scalar_dest = gimple_assign_lhs (gs: stmt);
12176	if (reduction_type != EXTRACT_LAST_REDUCTION)
12177	vec_dest = vect_create_destination_var (scalar_dest, vectype);
12178
12179	bool swap_cond_operands = false;
12180
12181	/ See whether another part of the vectorized code applies a loop*
12182	mask to the condition, or to its inverse. /*
12183
12184	vec_loop_masks *masks = NULL;
12185	vec_loop_lens *lens = NULL;
12186	if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12187	{
12188	if (reduction_type == EXTRACT_LAST_REDUCTION)
12189	lens = &LOOP_VINFO_LENS (loop_vinfo);
12190	}
12191	else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12192	{
12193	if (reduction_type == EXTRACT_LAST_REDUCTION)
12194	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12195	else
12196	{
12197	scalar_cond_masked_key cond (cond_expr, ncopies);
12198	if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12199	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12200	else
12201	{
12202	bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12203	tree_code orig_code = cond.code;
12204	cond.code = invert_tree_comparison (cond.code, honor_nans);
12205	if (!masked && loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12206	{
12207	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12208	cond_code = cond.code;
12209	swap_cond_operands = true;
12210	}
12211	else
12212	{
12213	/ Try the inverse of the current mask. We check if the*
12214	inverse mask is live and if so we generate a negate of
12215	the current mask such that we still honor NaNs. /*
12216	cond.inverted_p = true;
12217	cond.code = orig_code;
12218	if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12219	{
12220	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12221	cond_code = cond.code;
12222	swap_cond_operands = true;
12223	must_invert_cmp_result = true;
12224	}
12225	}
12226	}
12227	}
12228	}
12229
12230	/ Handle cond expr. /
12231	if (masked)
12232	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12233	op0: cond_expr, vec_oprnds0: &vec_oprnds0, vectype0: comp_vectype,
12234	op1: then_clause, vec_oprnds1: &vec_oprnds2, vectype1: vectype,
12235	op2: reduction_type != EXTRACT_LAST_REDUCTION
12236	? else_clause : NULL, vec_oprnds2: &vec_oprnds3, vectype2: vectype);
12237	else
12238	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12239	op0: cond_expr0, vec_oprnds0: &vec_oprnds0, vectype0: comp_vectype,
12240	op1: cond_expr1, vec_oprnds1: &vec_oprnds1, vectype1: comp_vectype,
12241	op2: then_clause, vec_oprnds2: &vec_oprnds2, vectype2: vectype,
12242	op3: reduction_type != EXTRACT_LAST_REDUCTION
12243	? else_clause : NULL, vec_oprnds3: &vec_oprnds3, vectype3: vectype);
12244
12245	/ Arguments are ready. Create the new vector stmt. /
12246	FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12247	{
12248	vec_then_clause = vec_oprnds2 [i];
12249	if (reduction_type != EXTRACT_LAST_REDUCTION)
12250	vec_else_clause = vec_oprnds3 [i];
12251
12252	if (swap_cond_operands)
12253	std::swap (a&: vec_then_clause, b&: vec_else_clause);
12254
12255	if (masked)
12256	vec_compare = vec_cond_lhs;
12257	else
12258	{
12259	vec_cond_rhs = vec_oprnds1 [i];
12260	if (bitop1 == NOP_EXPR)
12261	{
12262	gimple_seq stmts = NULL;
12263	vec_compare = gimple_build (seq: &stmts, code: cond_code, type: vec_cmp_type,
12264	ops: vec_cond_lhs, ops: vec_cond_rhs);
12265	gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12266	}
12267	else
12268	{
12269	new_temp = make_ssa_name (var: vec_cmp_type);
12270	gassign *new_stmt;
12271	if (bitop1 == BIT_NOT_EXPR)
12272	new_stmt = gimple_build_assign (new_temp, bitop1,
12273	vec_cond_rhs);
12274	else
12275	new_stmt
12276	= gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12277	vec_cond_rhs);
12278	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12279	if (bitop2 == NOP_EXPR)
12280	vec_compare = new_temp;
12281	else if (bitop2 == BIT_NOT_EXPR
12282	&& reduction_type != EXTRACT_LAST_REDUCTION)
12283	{
12284	/ Instead of doing ~x ? y : z do x ? z : y. /
12285	vec_compare = new_temp;
12286	std::swap (a&: vec_then_clause, b&: vec_else_clause);
12287	}
12288	else
12289	{
12290	vec_compare = make_ssa_name (var: vec_cmp_type);
12291	if (bitop2 == BIT_NOT_EXPR)
12292	new_stmt
12293	= gimple_build_assign (vec_compare, bitop2, new_temp);
12294	else
12295	new_stmt
12296	= gimple_build_assign (vec_compare, bitop2,
12297	vec_cond_lhs, new_temp);
12298	vect_finish_stmt_generation (vinfo, stmt_info,
12299	vec_stmt: new_stmt, gsi);
12300	}
12301	}
12302	}
12303
12304	/ If we decided to apply a loop mask to the result of the vector*
12305	comparison, AND the comparison with the mask now. Later passes
12306	should then be able to reuse the AND results between mulitple
12307	vector statements.
12308
12309	For example:
12310	for (int i = 0; i < 100; ++i)
12311	x[i] = y[i] ? z[i] : 10;
12312
12313	results in following optimized GIMPLE:
12314
12315	mask__35.8_43 = vect__4.7_41 != { 0, ... };
12316	vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12317	_19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12318	vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12319	vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12320	vect_iftmp.11_47, { 10, ... }>;
12321
12322	instead of using a masked and unmasked forms of
12323	vec != { 0, ... } (masked in the MASK_LOAD,
12324	unmasked in the VEC_COND_EXPR). /*
12325
12326	/ Force vec_compare to be an SSA_NAME rather than a comparison,*
12327	in cases where that's necessary. /*
12328
12329	tree len = NULL_TREE, bias = NULL_TREE;
12330	if (masks \|\| lens \|\| reduction_type == EXTRACT_LAST_REDUCTION)
12331	{
12332	if (!is_gimple_val (vec_compare))
12333	{
12334	tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12335	gassign *new_stmt = gimple_build_assign (vec_compare_name,
12336	vec_compare);
12337	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12338	vec_compare = vec_compare_name;
12339	}
12340
12341	if (must_invert_cmp_result)
12342	{
12343	tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12344	gassign *new_stmt = gimple_build_assign (vec_compare_name,
12345	BIT_NOT_EXPR,
12346	vec_compare);
12347	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12348	vec_compare = vec_compare_name;
12349	}
12350
12351	if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12352	vectype, OPTIMIZE_FOR_SPEED))
12353	{
12354	if (lens)
12355	{
12356	len = vect_get_loop_len (loop_vinfo, gsi, lens,
12357	vec_num * ncopies, vectype, i, `1`);
12358	signed char biasval
12359	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12360	bias = build_int_cst (intQI_type_node, biasval);
12361	}
12362	else
12363	{
12364	len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12365	bias = build_int_cst (intQI_type_node, `0`);
12366	}
12367	}
12368	if (masks)
12369	{
12370	tree loop_mask
12371	= vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12372	vectype, i);
12373	tree tmp2 = make_ssa_name (var: vec_cmp_type);
12374	gassign *g
12375	= gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12376	loop_mask);
12377	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
12378	vec_compare = tmp2;
12379	}
12380	}
12381
12382	gimple *new_stmt;
12383	if (reduction_type == EXTRACT_LAST_REDUCTION)
12384	{
12385	gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12386	tree lhs = gimple_get_lhs (old_stmt);
12387	if (len)
12388	new_stmt = gimple_build_call_internal
12389	(IFN_LEN_FOLD_EXTRACT_LAST, `5`, else_clause, vec_compare,
12390	vec_then_clause, len, bias);
12391	else
12392	new_stmt = gimple_build_call_internal
12393	(IFN_FOLD_EXTRACT_LAST, `3`, else_clause, vec_compare,
12394	vec_then_clause);
12395	gimple_call_set_lhs (gs: new_stmt, lhs);
12396	SSA_NAME_DEF_STMT (lhs) = new_stmt;
12397	if (old_stmt == gsi_stmt (i: *gsi))
12398	vect_finish_replace_stmt (vinfo, stmt_info, vec_stmt: new_stmt);
12399	else
12400	{
12401	/ In this case we're moving the definition to later in the*
12402	block. That doesn't matter because the only uses of the
12403	lhs are in phi statements. /*
12404	gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12405	gsi_remove (&old_gsi, true);
12406	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12407	}
12408	}
12409	else
12410	{
12411	new_temp = make_ssa_name (var: vec_dest);
12412	new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12413	vec_then_clause, vec_else_clause);
12414	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12415	}
12416	if (slp_node)
12417	slp_node->push_vec_def (def: new_stmt);
12418	else
12419	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12420	}
12421
12422	if (!slp_node)
12423	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
12424
12425	vec_oprnds0.release ();
12426	vec_oprnds1.release ();
12427	vec_oprnds2.release ();
12428	vec_oprnds3.release ();
12429
12430	return true;
12431	}
12432
12433	/ Helper of vectorizable_comparison.*
12434
12435	Check if STMT_INFO is comparison expression CODE that can be vectorized.
12436	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12437	comparison, put it in VEC_STMT, and insert it at GSI.
12438
12439	Return true if STMT_INFO is vectorizable in this way. /*
12440
12441	static bool
12442	vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12443	stmt_vec_info stmt_info, tree_code code,
12444	gimple_stmt_iterator gsi, gimple *vec_stmt,
12445	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12446	{
12447	tree lhs, rhs1, rhs2;
12448	tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12449	tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12450	tree new_temp;
12451	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12452	enum vect_def_type dts[`2`] = {vect_unknown_def_type, vect_unknown_def_type};
12453	int ndts = `2`;
12454	poly_uint64 nunits;
12455	int ncopies;
12456	enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12457	int i;
12458	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12459	vec<tree> vec_oprnds0 = vNULL;
12460	vec<tree> vec_oprnds1 = vNULL;
12461	tree mask_type;
12462	tree mask;
12463
12464	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12465	return false;
12466
12467	if (!vectype \|\| !VECTOR_BOOLEAN_TYPE_P (vectype))
12468	return false;
12469
12470	mask_type = vectype;
12471	nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
12472
12473	if (slp_node)
12474	ncopies = `1`;
12475	else
12476	ncopies = vect_get_num_copies (loop_vinfo, vectype);
12477
12478	gcc_assert (ncopies >= `1`);
12479
12480	if (TREE_CODE_CLASS (code) != tcc_comparison)
12481	return false;
12482
12483	slp_tree slp_rhs1, slp_rhs2;
12484	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12485	`0`, &rhs1, &slp_rhs1, &dts[`0`], &vectype1))
12486	return false;
12487
12488	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12489	`1`, &rhs2, &slp_rhs2, &dts[`1`], &vectype2))
12490	return false;
12491
12492	if (vectype1 && vectype2
12493	&& maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
12494	b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
12495	return false;
12496
12497	vectype = vectype1 ? vectype1 : vectype2;
12498
12499	/ Invariant comparison. /
12500	if (!vectype)
12501	{
12502	if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12503	vectype = mask_type;
12504	else
12505	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12506	slp_node);
12507	if (!vectype \|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
12508	return false;
12509	}
12510	else if (maybe_ne (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
12511	return false;
12512
12513	/ Can't compare mask and non-mask types. /
12514	if (vectype1 && vectype2
12515	&& (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12516	return false;
12517
12518	/ Boolean values may have another representation in vectors*
12519	and therefore we prefer bit operations over comparison for
12520	them (which also works for scalar masks). We store opcodes
12521	to use in bitop1 and bitop2. Statement is vectorized as
12522	BITOP2 (rhs1 BITOP1 rhs2) or
12523	rhs1 BITOP2 (BITOP1 rhs2)
12524	depending on bitop1 and bitop2 arity. /*
12525	bool swap_p = false;
12526	if (VECTOR_BOOLEAN_TYPE_P (vectype))
12527	{
12528	if (code == GT_EXPR)
12529	{
12530	bitop1 = BIT_NOT_EXPR;
12531	bitop2 = BIT_AND_EXPR;
12532	}
12533	else if (code == GE_EXPR)
12534	{
12535	bitop1 = BIT_NOT_EXPR;
12536	bitop2 = BIT_IOR_EXPR;
12537	}
12538	else if (code == LT_EXPR)
12539	{
12540	bitop1 = BIT_NOT_EXPR;
12541	bitop2 = BIT_AND_EXPR;
12542	swap_p = true;
12543	}
12544	else if (code == LE_EXPR)
12545	{
12546	bitop1 = BIT_NOT_EXPR;
12547	bitop2 = BIT_IOR_EXPR;
12548	swap_p = true;
12549	}
12550	else
12551	{
12552	bitop1 = BIT_XOR_EXPR;
12553	if (code == EQ_EXPR)
12554	bitop2 = BIT_NOT_EXPR;
12555	}
12556	}
12557
12558	if (!vec_stmt)
12559	{
12560	if (bitop1 == NOP_EXPR)
12561	{
12562	if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12563	return false;
12564	}
12565	else
12566	{
12567	machine_mode mode = TYPE_MODE (vectype);
12568	optab optab;
12569
12570	optab = optab_for_tree_code (bitop1, vectype, optab_default);
12571	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12572	return false;
12573
12574	if (bitop2 != NOP_EXPR)
12575	{
12576	optab = optab_for_tree_code (bitop2, vectype, optab_default);
12577	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12578	return false;
12579	}
12580	}
12581
12582	/ Put types on constant and invariant SLP children. /
12583	if (slp_node
12584	&& (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12585	\|\| !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12586	{
12587	if (dump_enabled_p ())
12588	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12589	"incompatible vector types for invariants\n");
12590	return false;
12591	}
12592
12593	vect_model_simple_cost (vinfo, stmt_info,
12594	ncopies: ncopies * (`1` + (bitop2 != NOP_EXPR)),
12595	dt: dts, ndts, node: slp_node, cost_vec);
12596	return true;
12597	}
12598
12599	/ Transform. /
12600
12601	/ Handle def. /
12602	lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12603	mask = vect_create_destination_var (lhs, mask_type);
12604
12605	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12606	op0: rhs1, vec_oprnds0: &vec_oprnds0, vectype0: vectype,
12607	op1: rhs2, vec_oprnds1: &vec_oprnds1, vectype1: vectype);
12608	if (swap_p)
12609	std::swap (a&: vec_oprnds0, b&: vec_oprnds1);
12610
12611	/ Arguments are ready. Create the new vector stmt. /
12612	FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12613	{
12614	gimple *new_stmt;
12615	vec_rhs2 = vec_oprnds1 [i];
12616
12617	new_temp = make_ssa_name (var: mask);
12618	if (bitop1 == NOP_EXPR)
12619	{
12620	new_stmt = gimple_build_assign (new_temp, code,
12621	vec_rhs1, vec_rhs2);
12622	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12623	}
12624	else
12625	{
12626	if (bitop1 == BIT_NOT_EXPR)
12627	new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12628	else
12629	new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12630	vec_rhs2);
12631	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12632	if (bitop2 != NOP_EXPR)
12633	{
12634	tree res = make_ssa_name (var: mask);
12635	if (bitop2 == BIT_NOT_EXPR)
12636	new_stmt = gimple_build_assign (res, bitop2, new_temp);
12637	else
12638	new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12639	new_temp);
12640	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12641	}
12642	}
12643	if (slp_node)
12644	slp_node->push_vec_def (def: new_stmt);
12645	else
12646	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12647	}
12648
12649	if (!slp_node)
12650	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
12651
12652	vec_oprnds0.release ();
12653	vec_oprnds1.release ();
12654
12655	return true;
12656	}
12657
12658	/ vectorizable_comparison.*
12659
12660	Check if STMT_INFO is comparison expression that can be vectorized.
12661	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12662	comparison, put it in VEC_STMT, and insert it at GSI.
12663
12664	Return true if STMT_INFO is vectorizable in this way. /*
12665
12666	static bool
12667	vectorizable_comparison (vec_info *vinfo,
12668	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12669	gimple **vec_stmt,
12670	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12671	{
12672	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12673
12674	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12675	return false;
12676
12677	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12678	return false;
12679
12680	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
12681	if (!stmt)
12682	return false;
12683
12684	enum tree_code code = gimple_assign_rhs_code (gs: stmt);
12685	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12686	if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12687	vec_stmt, slp_node, cost_vec))
12688	return false;
12689
12690	if (!vec_stmt)
12691	STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12692
12693	return true;
12694	}
12695
12696	/ If SLP_NODE is nonnull, return true if vectorizable_live_operation*
12697	can handle all live statements in the node. Otherwise return true
12698	if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12699	VEC_STMT_P is as for vectorizable_live_operation. /*
12700
12701	static bool
12702	can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12703	slp_tree slp_node, slp_instance slp_node_instance,
12704	bool vec_stmt_p,
12705	stmt_vector_for_cost *cost_vec)
12706	{
12707	if (slp_node)
12708	{
12709	stmt_vec_info slp_stmt_info;
12710	unsigned int i;
12711	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12712	{
12713	if (STMT_VINFO_LIVE_P (slp_stmt_info)
12714	&& !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12715	slp_node_instance, i,
12716	vec_stmt_p, cost_vec))
12717	return false;
12718	}
12719	}
12720	else if (STMT_VINFO_LIVE_P (stmt_info)
12721	&& !vectorizable_live_operation (vinfo, stmt_info,
12722	slp_node, slp_node_instance, -`1`,
12723	vec_stmt_p, cost_vec))
12724	return false;
12725
12726	return true;
12727	}
12728
12729	/ Make sure the statement is vectorizable. /
12730
12731	opt_result
12732	vect_analyze_stmt (vec_info *vinfo,
12733	stmt_vec_info stmt_info, bool *need_to_vectorize,
12734	slp_tree node, slp_instance node_instance,
12735	stmt_vector_for_cost *cost_vec)
12736	{
12737	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12738	enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12739	bool ok;
12740	gimple_seq pattern_def_seq;
12741
12742	if (dump_enabled_p ())
12743	dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12744	stmt_info->stmt);
12745
12746	if (gimple_has_volatile_ops (stmt: stmt_info->stmt))
12747	return opt_result::failure_at (loc: stmt_info->stmt,
12748	fmt: "not vectorized:"
12749	" stmt has volatile operands: %G\n",
12750	stmt_info->stmt);
12751
12752	if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12753	&& node == NULL
12754	&& (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12755	{
12756	gimple_stmt_iterator si;
12757
12758	for (si = gsi_start (seq&: pattern_def_seq); !gsi_end_p (i: si); gsi_next (i: &si))
12759	{
12760	stmt_vec_info pattern_def_stmt_info
12761	= vinfo->lookup_stmt (gsi_stmt (i: si));
12762	if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12763	\|\| STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12764	{
12765	/ Analyze def stmt of STMT if it's a pattern stmt. /
12766	if (dump_enabled_p ())
12767	dump_printf_loc (MSG_NOTE, vect_location,
12768	"==> examining pattern def statement: %G",
12769	pattern_def_stmt_info->stmt);
12770
12771	opt_result res
12772	= vect_analyze_stmt (vinfo, stmt_info: pattern_def_stmt_info,
12773	need_to_vectorize, node, node_instance,
12774	cost_vec);
12775	if (!res)
12776	return res;
12777	}
12778	}
12779	}
12780
12781	/ Skip stmts that do not need to be vectorized. In loops this is expected*
12782	to include:
12783	- the COND_EXPR which is the loop exit condition
12784	- any LABEL_EXPRs in the loop
12785	- computations that are used only for array indexing or loop control.
12786	In basic blocks we only analyze statements that are a part of some SLP
12787	instance, therefore, all the statements are relevant.
12788
12789	Pattern statement needs to be analyzed instead of the original statement
12790	if the original statement is not relevant. Otherwise, we analyze both
12791	statements. In basic blocks we are called from some SLP instance
12792	traversal, don't analyze pattern stmts instead, the pattern stmts
12793	already will be part of SLP instance. /*
12794
12795	stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12796	if (!STMT_VINFO_RELEVANT_P (stmt_info)
12797	&& !STMT_VINFO_LIVE_P (stmt_info))
12798	{
12799	if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12800	&& pattern_stmt_info
12801	&& (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12802	\|\| STMT_VINFO_LIVE_P (pattern_stmt_info)))
12803	{
12804	/ Analyze PATTERN_STMT instead of the original stmt. /
12805	stmt_info = pattern_stmt_info;
12806	if (dump_enabled_p ())
12807	dump_printf_loc (MSG_NOTE, vect_location,
12808	"==> examining pattern statement: %G",
12809	stmt_info->stmt);
12810	}
12811	else
12812	{
12813	if (dump_enabled_p ())
12814	dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12815
12816	return opt_result::success ();
12817	}
12818	}
12819	else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12820	&& node == NULL
12821	&& pattern_stmt_info
12822	&& (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12823	\|\| STMT_VINFO_LIVE_P (pattern_stmt_info)))
12824	{
12825	/ Analyze PATTERN_STMT too. /
12826	if (dump_enabled_p ())
12827	dump_printf_loc (MSG_NOTE, vect_location,
12828	"==> examining pattern statement: %G",
12829	pattern_stmt_info->stmt);
12830
12831	opt_result res
12832	= vect_analyze_stmt (vinfo, stmt_info: pattern_stmt_info, need_to_vectorize, node,
12833	node_instance, cost_vec);
12834	if (!res)
12835	return res;
12836	}
12837
12838	switch (STMT_VINFO_DEF_TYPE (stmt_info))
12839	{
12840	case vect_internal_def:
12841	break;
12842
12843	case vect_reduction_def:
12844	case vect_nested_cycle:
12845	gcc_assert (!bb_vinfo
12846	&& (relevance == vect_used_in_outer
12847	\|\| relevance == vect_used_in_outer_by_reduction
12848	\|\| relevance == vect_used_by_reduction
12849	\|\| relevance == vect_unused_in_scope
12850	\|\| relevance == vect_used_only_live));
12851	break;
12852
12853	case vect_induction_def:
12854	case vect_first_order_recurrence:
12855	gcc_assert (!bb_vinfo);
12856	break;
12857
12858	case vect_constant_def:
12859	case vect_external_def:
12860	case vect_unknown_def_type:
12861	default:
12862	gcc_unreachable ();
12863	}
12864
12865	tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12866	if (node)
12867	STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12868
12869	if (STMT_VINFO_RELEVANT_P (stmt_info))
12870	{
12871	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
12872	gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12873	\|\| (call && gimple_call_lhs (call) == NULL_TREE));
12874	need_to_vectorize = true*;
12875	}
12876
12877	if (PURE_SLP_STMT (stmt_info) && !node)
12878	{
12879	if (dump_enabled_p ())
12880	dump_printf_loc (MSG_NOTE, vect_location,
12881	"handled only by SLP analysis\n");
12882	return opt_result::success ();
12883	}
12884
12885	ok = true;
12886	if (!bb_vinfo
12887	&& (STMT_VINFO_RELEVANT_P (stmt_info)
12888	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12889	/ Prefer vectorizable_call over vectorizable_simd_clone_call so*
12890	-mveclibabi= takes preference over library functions with
12891	the simd attribute. /*
12892	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12893	\|\| vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, slp_node: node,
12894	cost_vec)
12895	\|\| vectorizable_conversion (vinfo, stmt_info,
12896	NULL, NULL, slp_node: node, cost_vec)
12897	\|\| vectorizable_operation (vinfo, stmt_info,
12898	NULL, NULL, slp_node: node, cost_vec)
12899	\|\| vectorizable_assignment (vinfo, stmt_info,
12900	NULL, NULL, slp_node: node, cost_vec)
12901	\|\| vectorizable_load (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12902	\|\| vectorizable_store (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12903	\|\| vectorizable_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
12904	node, node_instance, cost_vec)
12905	\|\| vectorizable_induction (as_a <loop_vec_info> (p: vinfo), stmt_info,
12906	NULL, node, cost_vec)
12907	\|\| vectorizable_shift (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12908	\|\| vectorizable_condition (vinfo, stmt_info,
12909	NULL, NULL, slp_node: node, cost_vec)
12910	\|\| vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
12911	cost_vec)
12912	\|\| vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
12913	stmt_info, NULL, node)
12914	\|\| vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
12915	stmt_info, NULL, node, cost_vec));
12916	else
12917	{
12918	if (bb_vinfo)
12919	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12920	\|\| vectorizable_simd_clone_call (vinfo, stmt_info,
12921	NULL, NULL, slp_node: node, cost_vec)
12922	\|\| vectorizable_conversion (vinfo, stmt_info, NULL, NULL, slp_node: node,
12923	cost_vec)
12924	\|\| vectorizable_shift (vinfo, stmt_info,
12925	NULL, NULL, slp_node: node, cost_vec)
12926	\|\| vectorizable_operation (vinfo, stmt_info,
12927	NULL, NULL, slp_node: node, cost_vec)
12928	\|\| vectorizable_assignment (vinfo, stmt_info, NULL, NULL, slp_node: node,
12929	cost_vec)
12930	\|\| vectorizable_load (vinfo, stmt_info,
12931	NULL, NULL, slp_node: node, cost_vec)
12932	\|\| vectorizable_store (vinfo, stmt_info,
12933	NULL, NULL, slp_node: node, cost_vec)
12934	\|\| vectorizable_condition (vinfo, stmt_info,
12935	NULL, NULL, slp_node: node, cost_vec)
12936	\|\| vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
12937	cost_vec)
12938	\|\| vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12939	}
12940
12941	if (node)
12942	STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12943
12944	if (!ok)
12945	return opt_result::failure_at (loc: stmt_info->stmt,
12946	fmt: "not vectorized:"
12947	" relevant stmt not supported: %G",
12948	stmt_info->stmt);
12949
12950	/ Stmts that are (also) "live" (i.e. - that are used out of the loop)*
12951	need extra handling, except for vectorizable reductions. /*
12952	if (!bb_vinfo
12953	&& STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12954	&& STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12955	&& !can_vectorize_live_stmts (vinfo: as_a <loop_vec_info> (p: vinfo),
12956	stmt_info, slp_node: node, slp_node_instance: node_instance,
12957	vec_stmt_p: false, cost_vec))
12958	return opt_result::failure_at (loc: stmt_info->stmt,
12959	fmt: "not vectorized:"
12960	" live stmt not supported: %G",
12961	stmt_info->stmt);
12962
12963	return opt_result::success ();
12964	}
12965
12966
12967	/ Function vect_transform_stmt.*
12968
12969	Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. /*
12970
12971	bool
12972	vect_transform_stmt (vec_info *vinfo,
12973	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12974	slp_tree slp_node, slp_instance slp_node_instance)
12975	{
12976	bool is_store = false;
12977	gimple *vec_stmt = NULL;
12978	bool done;
12979
12980	gcc_assert (slp_node \|\| !PURE_SLP_STMT (stmt_info));
12981
12982	tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12983	if (slp_node)
12984	STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12985
12986	switch (STMT_VINFO_TYPE (stmt_info))
12987	{
12988	case type_demotion_vec_info_type:
12989	case type_promotion_vec_info_type:
12990	case type_conversion_vec_info_type:
12991	done = vectorizable_conversion (vinfo, stmt_info,
12992	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
12993	gcc_assert (done);
12994	break;
12995
12996	case induc_vec_info_type:
12997	done = vectorizable_induction (as_a <loop_vec_info> (p: vinfo),
12998	stmt_info, &vec_stmt, slp_node,
12999	NULL);
13000	gcc_assert (done);
13001	break;
13002
13003	case shift_vec_info_type:
13004	done = vectorizable_shift (vinfo, stmt_info,
13005	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13006	gcc_assert (done);
13007	break;
13008
13009	case op_vec_info_type:
13010	done = vectorizable_operation (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13011	NULL);
13012	gcc_assert (done);
13013	break;
13014
13015	case assignment_vec_info_type:
13016	done = vectorizable_assignment (vinfo, stmt_info,
13017	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13018	gcc_assert (done);
13019	break;
13020
13021	case load_vec_info_type:
13022	done = vectorizable_load (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13023	NULL);
13024	gcc_assert (done);
13025	break;
13026
13027	case store_vec_info_type:
13028	if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13029	&& !slp_node
13030	&& (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13031	< DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13032	/ In case of interleaving, the whole chain is vectorized when the*
13033	last store in the chain is reached. Store stmts before the last
13034	one are skipped, and there vec_stmt_info shouldn't be freed
13035	meanwhile. /*
13036	;
13037	else
13038	{
13039	done = vectorizable_store (vinfo, stmt_info,
13040	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13041	gcc_assert (done);
13042	is_store = true;
13043	}
13044	break;
13045
13046	case condition_vec_info_type:
13047	done = vectorizable_condition (vinfo, stmt_info,
13048	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13049	gcc_assert (done);
13050	break;
13051
13052	case comparison_vec_info_type:
13053	done = vectorizable_comparison (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13054	slp_node, NULL);
13055	gcc_assert (done);
13056	break;
13057
13058	case call_vec_info_type:
13059	done = vectorizable_call (vinfo, stmt_info,
13060	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13061	break;
13062
13063	case call_simd_clone_vec_info_type:
13064	done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13065	slp_node, NULL);
13066	break;
13067
13068	case reduc_vec_info_type:
13069	done = vect_transform_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13070	gsi, &vec_stmt, slp_node);
13071	gcc_assert (done);
13072	break;
13073
13074	case cycle_phi_info_type:
13075	done = vect_transform_cycle_phi (as_a <loop_vec_info> (p: vinfo), stmt_info,
13076	&vec_stmt, slp_node, slp_node_instance);
13077	gcc_assert (done);
13078	break;
13079
13080	case lc_phi_info_type:
13081	done = vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
13082	stmt_info, &vec_stmt, slp_node);
13083	gcc_assert (done);
13084	break;
13085
13086	case recurr_info_type:
13087	done = vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
13088	stmt_info, &vec_stmt, slp_node, NULL);
13089	gcc_assert (done);
13090	break;
13091
13092	case phi_info_type:
13093	done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13094	gcc_assert (done);
13095	break;
13096
13097	default:
13098	if (!STMT_VINFO_LIVE_P (stmt_info))
13099	{
13100	if (dump_enabled_p ())
13101	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13102	"stmt not supported.\n");
13103	gcc_unreachable ();
13104	}
13105	done = true;
13106	}
13107
13108	if (!slp_node && vec_stmt)
13109	gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13110
13111	if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13112	{
13113	/ Handle stmts whose DEF is used outside the loop-nest that is*
13114	being vectorized. /*
13115	done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13116	slp_node_instance, vec_stmt_p: true, NULL);
13117	gcc_assert (done);
13118	}
13119
13120	if (slp_node)
13121	STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13122
13123	return is_store;
13124	}
13125
13126
13127	/ Remove a group of stores (for SLP or interleaving), free their*
13128	stmt_vec_info. /*
13129
13130	void
13131	vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13132	{
13133	stmt_vec_info next_stmt_info = first_stmt_info;
13134
13135	while (next_stmt_info)
13136	{
13137	stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13138	next_stmt_info = vect_orig_stmt (stmt_info: next_stmt_info);
13139	/ Free the attached stmt_vec_info and remove the stmt. /
13140	vinfo->remove_stmt (next_stmt_info);
13141	next_stmt_info = tmp;
13142	}
13143	}
13144
13145	/ If NUNITS is nonzero, return a vector type that contains NUNITS*
13146	elements of type SCALAR_TYPE, or null if the target doesn't support
13147	such a type.
13148
13149	If NUNITS is zero, return a vector type that contains elements of
13150	type SCALAR_TYPE, choosing whichever vector size the target prefers.
13151
13152	If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13153	for this vectorization region and want to "autodetect" the best choice.
13154	Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13155	and we want the new type to be interoperable with it. PREVAILING_MODE
13156	in this case can be a scalar integer mode or a vector mode; when it
13157	is a vector mode, the function acts like a tree-level version of
13158	related_vector_mode. /*
13159
13160	tree
13161	get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13162	tree scalar_type, poly_uint64 nunits)
13163	{
13164	tree orig_scalar_type = scalar_type;
13165	scalar_mode inner_mode;
13166	machine_mode simd_mode;
13167	tree vectype;
13168
13169	if ((!INTEGRAL_TYPE_P (scalar_type)
13170	&& !POINTER_TYPE_P (scalar_type)
13171	&& !SCALAR_FLOAT_TYPE_P (scalar_type))
13172	\|\| (!is_int_mode (TYPE_MODE (scalar_type), int_mode: &inner_mode)
13173	&& !is_float_mode (TYPE_MODE (scalar_type), float_mode: &inner_mode)))
13174	return NULL_TREE;
13175
13176	unsigned int nbytes = GET_MODE_SIZE (mode: inner_mode);
13177
13178	/ Interoperability between modes requires one to be a constant multiple*
13179	of the other, so that the number of vectors required for each operation
13180	is a compile-time constant. /*
13181	if (prevailing_mode != VOIDmode
13182	&& !constant_multiple_p (a: nunits * nbytes,
13183	b: GET_MODE_SIZE (mode: prevailing_mode))
13184	&& !constant_multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode),
13185	b: nunits * nbytes))
13186	return NULL_TREE;
13187
13188	/ For vector types of elements whose mode precision doesn't*
13189	match their types precision we use a element type of mode
13190	precision. The vectorization routines will have to make sure
13191	they support the proper result truncation/extension.
13192	We also make sure to build vector types with INTEGER_TYPE
13193	component type only. /*
13194	if (INTEGRAL_TYPE_P (scalar_type)
13195	&& (GET_MODE_BITSIZE (mode: inner_mode) != TYPE_PRECISION (scalar_type)
13196	\|\| TREE_CODE (scalar_type) != INTEGER_TYPE))
13197	scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: inner_mode),
13198	TYPE_UNSIGNED (scalar_type));
13199
13200	/ We shouldn't end up building VECTOR_TYPEs of non-scalar components.*
13201	When the component mode passes the above test simply use a type
13202	corresponding to that mode. The theory is that any use that
13203	would cause problems with this will disable vectorization anyway. /*
13204	else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13205	&& !INTEGRAL_TYPE_P (scalar_type))
13206	scalar_type = lang_hooks.types.type_for_mode (inner_mode, `1`);
13207
13208	/ We can't build a vector type of elements with alignment bigger than*
13209	their size. /*
13210	else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13211	scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13212	TYPE_UNSIGNED (scalar_type));
13213
13214	/ If we felt back to using the mode fail if there was*
13215	no scalar type for it. /*
13216	if (scalar_type == NULL_TREE)
13217	return NULL_TREE;
13218
13219	/ If no prevailing mode was supplied, use the mode the target prefers.*
13220	Otherwise lookup a vector mode based on the prevailing mode. /*
13221	if (prevailing_mode == VOIDmode)
13222	{
13223	gcc_assert (known_eq (nunits, `0U`));
13224	simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13225	if (SCALAR_INT_MODE_P (simd_mode))
13226	{
13227	/ Traditional behavior is not to take the integer mode*
13228	literally, but simply to use it as a way of determining
13229	the vector size. It is up to mode_for_vector to decide
13230	what the TYPE_MODE should be.
13231
13232	Note that nunits == 1 is allowed in order to support single
13233	element vector types. /*
13234	if (!multiple_p (a: GET_MODE_SIZE (mode: simd_mode), b: nbytes, multiple: &nunits)
13235	\|\| !mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13236	return NULL_TREE;
13237	}
13238	}
13239	else if (SCALAR_INT_MODE_P (prevailing_mode)
13240	\|\| !related_vector_mode (prevailing_mode,
13241	inner_mode, nunits).exists (mode: &simd_mode))
13242	{
13243	/ Fall back to using mode_for_vector, mostly in the hope of being*
13244	able to use an integer mode. /*
13245	if (known_eq (nunits, `0U`)
13246	&& !multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode), b: nbytes, multiple: &nunits))
13247	return NULL_TREE;
13248
13249	if (!mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13250	return NULL_TREE;
13251	}
13252
13253	vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13254
13255	/ In cases where the mode was chosen by mode_for_vector, check that*
13256	the target actually supports the chosen mode, or that it at least
13257	allows the vector mode to be replaced by a like-sized integer. /*
13258	if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13259	&& !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13260	return NULL_TREE;
13261
13262	/ Re-attach the address-space qualifier if we canonicalized the scalar*
13263	type. /*
13264	if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13265	return build_qualified_type
13266	(vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13267
13268	return vectype;
13269	}
13270
13271	/ Function get_vectype_for_scalar_type.*
13272
13273	Returns the vector type corresponding to SCALAR_TYPE as supported
13274	by the target. If GROUP_SIZE is nonzero and we're performing BB
13275	vectorization, make sure that the number of elements in the vector
13276	is no bigger than GROUP_SIZE. /*
13277
13278	tree
13279	get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13280	unsigned int group_size)
13281	{
13282	/ For BB vectorization, we should always have a group size once we've*
13283	constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13284	are tentative requests during things like early data reference
13285	analysis and pattern recognition. /*
13286	if (is_a <bb_vec_info> (p: vinfo))
13287	gcc_assert (vinfo->slp_instances.is_empty () \|\| group_size != `0`);
13288	else
13289	group_size = `0`;
13290
13291	tree vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13292	scalar_type);
13293	if (vectype && vinfo->vector_mode == VOIDmode)
13294	vinfo->vector_mode = TYPE_MODE (vectype);
13295
13296	/ Register the natural choice of vector type, before the group size*
13297	has been applied. /*
13298	if (vectype)
13299	vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13300
13301	/ If the natural choice of vector type doesn't satisfy GROUP_SIZE,*
13302	try again with an explicit number of elements. /*
13303	if (vectype
13304	&& group_size
13305	&& maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13306	{
13307	/ Start with the biggest number of units that fits within*
13308	GROUP_SIZE and halve it until we find a valid vector type.
13309	Usually either the first attempt will succeed or all will
13310	fail (in the latter case because GROUP_SIZE is too small
13311	for the target), but it's possible that a target could have
13312	a hole between supported vector types.
13313
13314	If GROUP_SIZE is not a power of 2, this has the effect of
13315	trying the largest power of 2 that fits within the group,
13316	even though the group is not a multiple of that vector size.
13317	The BB vectorizer will then try to carve up the group into
13318	smaller pieces. /*
13319	unsigned int nunits = `1` << floor_log2 (x: group_size);
13320	do
13321	{
13322	vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13323	scalar_type, nunits);
13324	nunits /= `2`;
13325	}
13326	while (nunits > `1` && !vectype);
13327	}
13328
13329	return vectype;
13330	}
13331
13332	/ Return the vector type corresponding to SCALAR_TYPE as supported*
13333	by the target. NODE, if nonnull, is the SLP tree node that will
13334	use the returned vector type. /*
13335
13336	tree
13337	get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13338	{
13339	unsigned int group_size = `0`;
13340	if (node)
13341	group_size = SLP_TREE_LANES (node);
13342	return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13343	}
13344
13345	/ Function get_mask_type_for_scalar_type.*
13346
13347	Returns the mask type corresponding to a result of comparison
13348	of vectors of specified SCALAR_TYPE as supported by target.
13349	If GROUP_SIZE is nonzero and we're performing BB vectorization,
13350	make sure that the number of elements in the vector is no bigger
13351	than GROUP_SIZE. /*
13352
13353	tree
13354	get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13355	unsigned int group_size)
13356	{
13357	tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13358
13359	if (!vectype)
13360	return NULL;
13361
13362	return truth_type_for (vectype);
13363	}
13364
13365	/ Function get_mask_type_for_scalar_type.*
13366
13367	Returns the mask type corresponding to a result of comparison
13368	of vectors of specified SCALAR_TYPE as supported by target.
13369	NODE, if nonnull, is the SLP tree node that will use the returned
13370	vector type. /*
13371
13372	tree
13373	get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13374	slp_tree node)
13375	{
13376	tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13377
13378	if (!vectype)
13379	return NULL;
13380
13381	return truth_type_for (vectype);
13382	}
13383
13384	/ Function get_same_sized_vectype*
13385
13386	Returns a vector type corresponding to SCALAR_TYPE of size
13387	VECTOR_TYPE if supported by the target. /*
13388
13389	tree
13390	get_same_sized_vectype (tree scalar_type, tree vector_type)
13391	{
13392	if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13393	return truth_type_for (vector_type);
13394
13395	poly_uint64 nunits;
13396	if (!multiple_p (a: GET_MODE_SIZE (TYPE_MODE (vector_type)),
13397	b: GET_MODE_SIZE (TYPE_MODE (scalar_type)), multiple: &nunits))
13398	return NULL_TREE;
13399
13400	return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13401	scalar_type, nunits);
13402	}
13403
13404	/ Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE*
13405	would not change the chosen vector modes. /*
13406
13407	bool
13408	vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13409	{
13410	for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13411	i != vinfo->used_vector_modes.end (); ++i)
13412	if (!VECTOR_MODE_P (*i)
13413	\|\| related_vector_mode (vector_mode, GET_MODE_INNER (i), `0`) != i)
13414	return false;
13415	return true;
13416	}
13417
13418	/ Function vect_is_simple_use.*
13419
13420	Input:
13421	VINFO - the vect info of the loop or basic block that is being vectorized.
13422	OPERAND - operand in the loop or bb.
13423	Output:
13424	DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13425	case OPERAND is an SSA_NAME that is defined in the vectorizable region
13426	DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13427	the definition could be anywhere in the function
13428	DT - the type of definition
13429
13430	Returns whether a stmt with OPERAND can be vectorized.
13431	For loops, supportable operands are constants, loop invariants, and operands
13432	that are defined by the current iteration of the loop. Unsupportable
13433	operands are those that are defined by a previous iteration of the loop (as
13434	is the case in reduction/induction computations).
13435	For basic blocks, supportable operands are constants and bb invariants.
13436	For now, operands defined outside the basic block are not supported. /*
13437
13438	bool
13439	vect_is_simple_use (tree operand, vec_info vinfo, enum* vect_def_type *dt,
13440	stmt_vec_info def_stmt_info_out, gimple *def_stmt_out)
13441	{
13442	if (def_stmt_info_out)
13443	*def_stmt_info_out = NULL;
13444	if (def_stmt_out)
13445	*def_stmt_out = NULL;
13446	*dt = vect_unknown_def_type;
13447
13448	if (dump_enabled_p ())
13449	{
13450	dump_printf_loc (MSG_NOTE, vect_location,
13451	"vect_is_simple_use: operand ");
13452	if (TREE_CODE (operand) == SSA_NAME
13453	&& !SSA_NAME_IS_DEFAULT_DEF (operand))
13454	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), `0`);
13455	else
13456	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13457	}
13458
13459	if (CONSTANT_CLASS_P (operand))
13460	*dt = vect_constant_def;
13461	else if (is_gimple_min_invariant (operand))
13462	*dt = vect_external_def;
13463	else if (TREE_CODE (operand) != SSA_NAME)
13464	*dt = vect_unknown_def_type;
13465	else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13466	*dt = vect_external_def;
13467	else
13468	{
13469	gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13470	stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13471	if (!stmt_vinfo)
13472	*dt = vect_external_def;
13473	else
13474	{
13475	stmt_vinfo = vect_stmt_to_vectorize (stmt_info: stmt_vinfo);
13476	def_stmt = stmt_vinfo->stmt;
13477	*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13478	if (def_stmt_info_out)
13479	*def_stmt_info_out = stmt_vinfo;
13480	}
13481	if (def_stmt_out)
13482	*def_stmt_out = def_stmt;
13483	}
13484
13485	if (dump_enabled_p ())
13486	{
13487	dump_printf (MSG_NOTE, ", type of def: ");
13488	switch (*dt)
13489	{
13490	case vect_uninitialized_def:
13491	dump_printf (MSG_NOTE, "uninitialized\n");
13492	break;
13493	case vect_constant_def:
13494	dump_printf (MSG_NOTE, "constant\n");
13495	break;
13496	case vect_external_def:
13497	dump_printf (MSG_NOTE, "external\n");
13498	break;
13499	case vect_internal_def:
13500	dump_printf (MSG_NOTE, "internal\n");
13501	break;
13502	case vect_induction_def:
13503	dump_printf (MSG_NOTE, "induction\n");
13504	break;
13505	case vect_reduction_def:
13506	dump_printf (MSG_NOTE, "reduction\n");
13507	break;
13508	case vect_double_reduction_def:
13509	dump_printf (MSG_NOTE, "double reduction\n");
13510	break;
13511	case vect_nested_cycle:
13512	dump_printf (MSG_NOTE, "nested cycle\n");
13513	break;
13514	case vect_first_order_recurrence:
13515	dump_printf (MSG_NOTE, "first order recurrence\n");
13516	break;
13517	case vect_unknown_def_type:
13518	dump_printf (MSG_NOTE, "unknown\n");
13519	break;
13520	}
13521	}
13522
13523	if (*dt == vect_unknown_def_type)
13524	{
13525	if (dump_enabled_p ())
13526	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13527	"Unsupported pattern.\n");
13528	return false;
13529	}
13530
13531	return true;
13532	}
13533
13534	/ Function vect_is_simple_use.*
13535
13536	Same as vect_is_simple_use but also determines the vector operand
13537	type of OPERAND and stores it to VECTYPE. If the definition of*
13538	OPERAND is vect_uninitialized_def, vect_constant_def or
13539	vect_external_def VECTYPE will be set to NULL_TREE and the caller*
13540	is responsible to compute the best suited vector type for the
13541	scalar operand. /*
13542
13543	bool
13544	vect_is_simple_use (tree operand, vec_info vinfo, enum* vect_def_type *dt,
13545	tree vectype, stmt_vec_info def_stmt_info_out,
13546	gimple **def_stmt_out)
13547	{
13548	stmt_vec_info def_stmt_info;
13549	gimple *def_stmt;
13550	if (!vect_is_simple_use (operand, vinfo, dt, def_stmt_info_out: &def_stmt_info, def_stmt_out: &def_stmt))
13551	return false;
13552
13553	if (def_stmt_out)
13554	*def_stmt_out = def_stmt;
13555	if (def_stmt_info_out)
13556	*def_stmt_info_out = def_stmt_info;
13557
13558	/ Now get a vector type if the def is internal, otherwise supply*
13559	NULL_TREE and leave it up to the caller to figure out a proper
13560	type for the use stmt. /*
13561	if (*dt == vect_internal_def
13562	\|\| *dt == vect_induction_def
13563	\|\| *dt == vect_reduction_def
13564	\|\| *dt == vect_double_reduction_def
13565	\|\| *dt == vect_nested_cycle
13566	\|\| *dt == vect_first_order_recurrence)
13567	{
13568	*vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13569	gcc_assert (*vectype != NULL_TREE);
13570	if (dump_enabled_p ())
13571	dump_printf_loc (MSG_NOTE, vect_location,
13572	"vect_is_simple_use: vectype %T\n", *vectype);
13573	}
13574	else if (*dt == vect_uninitialized_def
13575	\|\| *dt == vect_constant_def
13576	\|\| *dt == vect_external_def)
13577	*vectype = NULL_TREE;
13578	else
13579	gcc_unreachable ();
13580
13581	return true;
13582	}
13583
13584	/ Function vect_is_simple_use.*
13585
13586	Same as vect_is_simple_use but determines the operand by operand
13587	position OPERAND from either STMT or SLP_NODE, filling in OP*
13588	and SLP_DEF (when SLP_NODE is not NULL). /
13589
13590	bool
13591	vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13592	unsigned operand, tree op, slp_tree slp_def,
13593	enum vect_def_type *dt,
13594	tree vectype, stmt_vec_info def_stmt_info_out)
13595	{
13596	if (slp_node)
13597	{
13598	slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13599	*slp_def = child;
13600	*vectype = SLP_TREE_VECTYPE (child);
13601	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13602	{
13603	*op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13604	return vect_is_simple_use (operand: *op, vinfo, dt, def_stmt_info_out);
13605	}
13606	else
13607	{
13608	if (def_stmt_info_out)
13609	*def_stmt_info_out = NULL;
13610	*op = SLP_TREE_SCALAR_OPS (child)[`0`];
13611	*dt = SLP_TREE_DEF_TYPE (child);
13612	return true;
13613	}
13614	}
13615	else
13616	{
13617	*slp_def = NULL;
13618	if (gassign ass = dyn_cast <gassign > (p: stmt->stmt))
13619	{
13620	if (gimple_assign_rhs_code (gs: ass) == COND_EXPR
13621	&& COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13622	{
13623	if (operand < `2`)
13624	*op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13625	else
13626	*op = gimple_op (gs: ass, i: operand);
13627	}
13628	else if (gimple_assign_rhs_code (gs: ass) == VIEW_CONVERT_EXPR)
13629	*op = TREE_OPERAND (gimple_assign_rhs1 (ass), `0`);
13630	else
13631	*op = gimple_op (gs: ass, i: operand + `1`);
13632	}
13633	else if (gcall call = dyn_cast <gcall > (p: stmt->stmt))
13634	*op = gimple_call_arg (gs: call, index: operand);
13635	else
13636	gcc_unreachable ();
13637	return vect_is_simple_use (operand: *op, vinfo, dt, vectype, def_stmt_info_out);
13638	}
13639	}
13640
13641	/ If OP is not NULL and is external or constant update its vector*
13642	type with VECTYPE. Returns true if successful or false if not,
13643	for example when conflicting vector types are present. /*
13644
13645	bool
13646	vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13647	{
13648	if (!op \|\| SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13649	return true;
13650	if (SLP_TREE_VECTYPE (op))
13651	return types_compatible_p (SLP_TREE_VECTYPE (op), type2: vectype);
13652	/ For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those*
13653	should be handled by patters. Allow vect_constant_def for now. /*
13654	if (VECTOR_BOOLEAN_TYPE_P (vectype)
13655	&& SLP_TREE_DEF_TYPE (op) == vect_external_def)
13656	return false;
13657	SLP_TREE_VECTYPE (op) = vectype;
13658	return true;
13659	}
13660
13661	/ Function supportable_widening_operation*
13662
13663	Check whether an operation represented by the code CODE is a
13664	widening operation that is supported by the target platform in
13665	vector form (i.e., when operating on arguments of type VECTYPE_IN
13666	producing a result of type VECTYPE_OUT).
13667
13668	Widening operations we currently support are NOP (CONVERT), FLOAT,
13669	FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13670	are supported by the target platform either directly (via vector
13671	tree-codes), or via target builtins.
13672
13673	Output:
13674	- CODE1 and CODE2 are codes of vector operations to be used when
13675	vectorizing the operation, if available.
13676	- MULTI_STEP_CVT determines the number of required intermediate steps in
13677	case of multi-step conversion (like char->short->int - in that case
13678	MULTI_STEP_CVT will be 1).
13679	- INTERM_TYPES contains the intermediate type required to perform the
13680	widening operation (short in the above example). /*
13681
13682	bool
13683	supportable_widening_operation (vec_info *vinfo,
13684	code_helper code,
13685	stmt_vec_info stmt_info,
13686	tree vectype_out, tree vectype_in,
13687	code_helper *code1,
13688	code_helper *code2,
13689	int *multi_step_cvt,
13690	vec<tree> *interm_types)
13691	{
13692	loop_vec_info loop_info = dyn_cast <loop_vec_info> (p: vinfo);
13693	class loop *vect_loop = NULL;
13694	machine_mode vec_mode;
13695	enum insn_code icode1, icode2;
13696	optab optab1 = unknown_optab, optab2 = unknown_optab;
13697	tree vectype = vectype_in;
13698	tree wide_vectype = vectype_out;
13699	tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13700	int i;
13701	tree prev_type, intermediate_type;
13702	machine_mode intermediate_mode, prev_mode;
13703	optab optab3, optab4;
13704
13705	*multi_step_cvt = `0`;
13706	if (loop_info)
13707	vect_loop = LOOP_VINFO_LOOP (loop_info);
13708
13709	switch (code.safe_as_tree_code ())
13710	{
13711	case MAX_TREE_CODES:
13712	/ Don't set c1 and c2 if code is not a tree_code. /
13713	break;
13714
13715	case WIDEN_MULT_EXPR:
13716	/ The result of a vectorized widening operation usually requires*
13717	two vectors (because the widened results do not fit into one vector).
13718	The generated vector results would normally be expected to be
13719	generated in the same order as in the original scalar computation,
13720	i.e. if 8 results are generated in each vector iteration, they are
13721	to be organized as follows:
13722	vect1: [res1,res2,res3,res4],
13723	vect2: [res5,res6,res7,res8].
13724
13725	However, in the special case that the result of the widening
13726	operation is used in a reduction computation only, the order doesn't
13727	matter (because when vectorizing a reduction we change the order of
13728	the computation). Some targets can take advantage of this and
13729	generate more efficient code. For example, targets like Altivec,
13730	that support widen_mult using a sequence of {mult_even,mult_odd}
13731	generate the following vectors:
13732	vect1: [res1,res3,res5,res7],
13733	vect2: [res2,res4,res6,res8].
13734
13735	When vectorizing outer-loops, we execute the inner-loop sequentially
13736	(each vectorized inner-loop iteration contributes to VF outer-loop
13737	iterations in parallel). We therefore don't allow to change the
13738	order of the computation in the inner-loop during outer-loop
13739	vectorization. /*
13740	/ TODO: Another case in which order doesn't really matter is when we*
13741	widen and then contract again, e.g. (short)((int)x y >> 8).*
13742	Normally, pack_trunc performs an even/odd permute, whereas the
13743	repack from an even/odd expansion would be an interleave, which
13744	would be significantly simpler for e.g. AVX2. /*
13745	/ In any case, in order to avoid duplicating the code below, recurse*
13746	on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13747	are properly set up for the caller. If we fail, we'll continue with
13748	a VEC_WIDEN_MULT_LO/HI_EXPR check. /*
13749	if (vect_loop
13750	&& STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13751	&& !nested_in_vect_loop_p (loop: vect_loop, stmt_info)
13752	&& supportable_widening_operation (vinfo, code: VEC_WIDEN_MULT_EVEN_EXPR,
13753	stmt_info, vectype_out,
13754	vectype_in, code1,
13755	code2, multi_step_cvt,
13756	interm_types))
13757	{
13758	/ Elements in a vector with vect_used_by_reduction property cannot*
13759	be reordered if the use chain with this property does not have the
13760	same operation. One such an example is s += a b, where elements*
13761	in a and b cannot be reordered. Here we check if the vector defined
13762	by STMT is only directly used in the reduction statement. /*
13763	tree lhs = gimple_assign_lhs (gs: stmt_info->stmt);
13764	stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13765	if (use_stmt_info
13766	&& STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13767	return true;
13768	}
13769	c1 = VEC_WIDEN_MULT_LO_EXPR;
13770	c2 = VEC_WIDEN_MULT_HI_EXPR;
13771	break;
13772
13773	case DOT_PROD_EXPR:
13774	c1 = DOT_PROD_EXPR;
13775	c2 = DOT_PROD_EXPR;
13776	break;
13777
13778	case SAD_EXPR:
13779	c1 = SAD_EXPR;
13780	c2 = SAD_EXPR;
13781	break;
13782
13783	case VEC_WIDEN_MULT_EVEN_EXPR:
13784	/ Support the recursion induced just above. /
13785	c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13786	c2 = VEC_WIDEN_MULT_ODD_EXPR;
13787	break;
13788
13789	case WIDEN_LSHIFT_EXPR:
13790	c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13791	c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13792	break;
13793
13794	CASE_CONVERT:
13795	c1 = VEC_UNPACK_LO_EXPR;
13796	c2 = VEC_UNPACK_HI_EXPR;
13797	break;
13798
13799	case FLOAT_EXPR:
13800	c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13801	c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13802	break;
13803
13804	case FIX_TRUNC_EXPR:
13805	c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13806	c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13807	break;
13808
13809	default:
13810	gcc_unreachable ();
13811	}
13812
13813	if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13814	std::swap (a&: c1, b&: c2);
13815
13816	if (code == FIX_TRUNC_EXPR)
13817	{
13818	/ The signedness is determined from output operand. /
13819	optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13820	optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13821	}
13822	else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13823	&& VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13824	&& VECTOR_BOOLEAN_TYPE_P (vectype)
13825	&& TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13826	&& SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13827	{
13828	/ If the input and result modes are the same, a different optab*
13829	is needed where we pass in the number of units in vectype. /*
13830	optab1 = vec_unpacks_sbool_lo_optab;
13831	optab2 = vec_unpacks_sbool_hi_optab;
13832	}
13833
13834	vec_mode = TYPE_MODE (vectype);
13835	if (widening_fn_p (code))
13836	{
13837	/ If this is an internal fn then we must check whether the target*
13838	supports either a low-high split or an even-odd split. /*
13839	internal_fn ifn = as_internal_fn (code: (combined_fn) code);
13840
13841	internal_fn lo, hi, even, odd;
13842	lookup_hilo_internal_fn (ifn, &lo, &hi);
13843	*code1 = as_combined_fn (fn: lo);
13844	*code2 = as_combined_fn (fn: hi);
13845	optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13846	optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13847
13848	/ If we don't support low-high, then check for even-odd. /
13849	if (!optab1
13850	\|\| (icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
13851	\|\| !optab2
13852	\|\| (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
13853	{
13854	lookup_evenodd_internal_fn (ifn, &even, &odd);
13855	*code1 = as_combined_fn (fn: even);
13856	*code2 = as_combined_fn (fn: odd);
13857	optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13858	optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13859	}
13860	}
13861	else if (code.is_tree_code ())
13862	{
13863	if (code == FIX_TRUNC_EXPR)
13864	{
13865	/ The signedness is determined from output operand. /
13866	optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13867	optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13868	}
13869	else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13870	&& VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13871	&& VECTOR_BOOLEAN_TYPE_P (vectype)
13872	&& TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13873	&& SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13874	{
13875	/ If the input and result modes are the same, a different optab*
13876	is needed where we pass in the number of units in vectype. /*
13877	optab1 = vec_unpacks_sbool_lo_optab;
13878	optab2 = vec_unpacks_sbool_hi_optab;
13879	}
13880	else
13881	{
13882	optab1 = optab_for_tree_code (c1, vectype, optab_default);
13883	optab2 = optab_for_tree_code (c2, vectype, optab_default);
13884	}
13885	*code1 = c1;
13886	*code2 = c2;
13887	}
13888
13889	if (!optab1 \|\| !optab2)
13890	return false;
13891
13892	if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
13893	\|\| (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
13894	return false;
13895
13896
13897	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (wide_vectype)
13898	&& insn_data[icode2].operand[`0`].mode == TYPE_MODE (wide_vectype))
13899	{
13900	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13901	return true;
13902	/ For scalar masks we may have different boolean*
13903	vector types having the same QImode. Thus we
13904	add additional check for elements number. /*
13905	if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13906	TYPE_VECTOR_SUBPARTS (wide_vectype) * `2`))
13907	return true;
13908	}
13909
13910	/ Check if it's a multi-step conversion that can be done using intermediate*
13911	types. /*
13912
13913	prev_type = vectype;
13914	prev_mode = vec_mode;
13915
13916	if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13917	return false;
13918
13919	/ We assume here that there will not be more than MAX_INTERM_CVT_STEPS*
13920	intermediate steps in promotion sequence. We try
13921	MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13922	not. /*
13923	interm_types->create (MAX_INTERM_CVT_STEPS);
13924	for (i = `0`; i < MAX_INTERM_CVT_STEPS; i++)
13925	{
13926	intermediate_mode = insn_data[icode1].operand[`0`].mode;
13927	if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13928	intermediate_type
13929	= vect_halve_mask_nunits (prev_type, intermediate_mode);
13930	else if (VECTOR_MODE_P (intermediate_mode))
13931	{
13932	tree intermediate_element_type
13933	= lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13934	TYPE_UNSIGNED (prev_type));
13935	intermediate_type
13936	= build_vector_type_for_mode (intermediate_element_type,
13937	intermediate_mode);
13938	}
13939	else
13940	intermediate_type
13941	= lang_hooks.types.type_for_mode (intermediate_mode,
13942	TYPE_UNSIGNED (prev_type));
13943
13944	if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13945	&& VECTOR_BOOLEAN_TYPE_P (prev_type)
13946	&& intermediate_mode == prev_mode
13947	&& SCALAR_INT_MODE_P (prev_mode))
13948	{
13949	/ If the input and result modes are the same, a different optab*
13950	is needed where we pass in the number of units in vectype. /*
13951	optab3 = vec_unpacks_sbool_lo_optab;
13952	optab4 = vec_unpacks_sbool_hi_optab;
13953	}
13954	else
13955	{
13956	optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13957	optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13958	}
13959
13960	if (!optab3 \|\| !optab4
13961	\|\| (icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing
13962	\|\| insn_data[icode1].operand[`0`].mode != intermediate_mode
13963	\|\| (icode2 = optab_handler (op: optab2, mode: prev_mode)) == CODE_FOR_nothing
13964	\|\| insn_data[icode2].operand[`0`].mode != intermediate_mode
13965	\|\| ((icode1 = optab_handler (op: optab3, mode: intermediate_mode))
13966	== CODE_FOR_nothing)
13967	\|\| ((icode2 = optab_handler (op: optab4, mode: intermediate_mode))
13968	== CODE_FOR_nothing))
13969	break;
13970
13971	interm_types->quick_push (obj: intermediate_type);
13972	(*multi_step_cvt)++;
13973
13974	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (wide_vectype)
13975	&& insn_data[icode2].operand[`0`].mode == TYPE_MODE (wide_vectype))
13976	{
13977	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13978	return true;
13979	if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13980	TYPE_VECTOR_SUBPARTS (wide_vectype) * `2`))
13981	return true;
13982	}
13983
13984	prev_type = intermediate_type;
13985	prev_mode = intermediate_mode;
13986	}
13987
13988	interm_types->release ();
13989	return false;
13990	}
13991
13992
13993	/ Function supportable_narrowing_operation*
13994
13995	Check whether an operation represented by the code CODE is a
13996	narrowing operation that is supported by the target platform in
13997	vector form (i.e., when operating on arguments of type VECTYPE_IN
13998	and producing a result of type VECTYPE_OUT).
13999
14000	Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14001	and FLOAT. This function checks if these operations are supported by
14002	the target platform directly via vector tree-codes.
14003
14004	Output:
14005	- CODE1 is the code of a vector operation to be used when
14006	vectorizing the operation, if available.
14007	- MULTI_STEP_CVT determines the number of required intermediate steps in
14008	case of multi-step conversion (like int->short->char - in that case
14009	MULTI_STEP_CVT will be 1).
14010	- INTERM_TYPES contains the intermediate type required to perform the
14011	narrowing operation (short in the above example). /*
14012
14013	bool
14014	supportable_narrowing_operation (code_helper code,
14015	tree vectype_out, tree vectype_in,
14016	code_helper code1, int* *multi_step_cvt,
14017	vec<tree> *interm_types)
14018	{
14019	machine_mode vec_mode;
14020	enum insn_code icode1;
14021	optab optab1, interm_optab;
14022	tree vectype = vectype_in;
14023	tree narrow_vectype = vectype_out;
14024	enum tree_code c1;
14025	tree intermediate_type, prev_type;
14026	machine_mode intermediate_mode, prev_mode;
14027	int i;
14028	unsigned HOST_WIDE_INT n_elts;
14029	bool uns;
14030
14031	if (!code.is_tree_code ())
14032	return false;
14033
14034	*multi_step_cvt = `0`;
14035	switch ((tree_code) code)
14036	{
14037	CASE_CONVERT:
14038	c1 = VEC_PACK_TRUNC_EXPR;
14039	if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14040	&& VECTOR_BOOLEAN_TYPE_P (vectype)
14041	&& SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14042	&& TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &n_elts)
14043	&& n_elts < BITS_PER_UNIT)
14044	optab1 = vec_pack_sbool_trunc_optab;
14045	else
14046	optab1 = optab_for_tree_code (c1, vectype, optab_default);
14047	break;
14048
14049	case FIX_TRUNC_EXPR:
14050	c1 = VEC_PACK_FIX_TRUNC_EXPR;
14051	/ The signedness is determined from output operand. /
14052	optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14053	break;
14054
14055	case FLOAT_EXPR:
14056	c1 = VEC_PACK_FLOAT_EXPR;
14057	optab1 = optab_for_tree_code (c1, vectype, optab_default);
14058	break;
14059
14060	default:
14061	gcc_unreachable ();
14062	}
14063
14064	if (!optab1)
14065	return false;
14066
14067	vec_mode = TYPE_MODE (vectype);
14068	if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing)
14069	return false;
14070
14071	*code1 = c1;
14072
14073	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (narrow_vectype))
14074	{
14075	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14076	return true;
14077	/ For scalar masks we may have different boolean*
14078	vector types having the same QImode. Thus we
14079	add additional check for elements number. /*
14080	if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * `2`,
14081	TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14082	return true;
14083	}
14084
14085	if (code == FLOAT_EXPR)
14086	return false;
14087
14088	/ Check if it's a multi-step conversion that can be done using intermediate*
14089	types. /*
14090	prev_mode = vec_mode;
14091	prev_type = vectype;
14092	if (code == FIX_TRUNC_EXPR)
14093	uns = TYPE_UNSIGNED (vectype_out);
14094	else
14095	uns = TYPE_UNSIGNED (vectype);
14096
14097	/ For multi-step FIX_TRUNC_EXPR prefer signed floating to integer*
14098	conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14099	costly than signed. /*
14100	if (code == FIX_TRUNC_EXPR && uns)
14101	{
14102	enum insn_code icode2;
14103
14104	intermediate_type
14105	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), `0`);
14106	interm_optab
14107	= optab_for_tree_code (c1, intermediate_type, optab_default);
14108	if (interm_optab != unknown_optab
14109	&& (icode2 = optab_handler (op: optab1, mode: vec_mode)) != CODE_FOR_nothing
14110	&& insn_data[icode1].operand[`0`].mode
14111	== insn_data[icode2].operand[`0`].mode)
14112	{
14113	uns = false;
14114	optab1 = interm_optab;
14115	icode1 = icode2;
14116	}
14117	}
14118
14119	/ We assume here that there will not be more than MAX_INTERM_CVT_STEPS*
14120	intermediate steps in promotion sequence. We try
14121	MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. /*
14122	interm_types->create (MAX_INTERM_CVT_STEPS);
14123	for (i = `0`; i < MAX_INTERM_CVT_STEPS; i++)
14124	{
14125	intermediate_mode = insn_data[icode1].operand[`0`].mode;
14126	if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14127	intermediate_type
14128	= vect_double_mask_nunits (prev_type, intermediate_mode);
14129	else
14130	intermediate_type
14131	= lang_hooks.types.type_for_mode (intermediate_mode, uns);
14132	if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14133	&& VECTOR_BOOLEAN_TYPE_P (prev_type)
14134	&& SCALAR_INT_MODE_P (prev_mode)
14135	&& TYPE_VECTOR_SUBPARTS (node: intermediate_type).is_constant (const_value: &n_elts)
14136	&& n_elts < BITS_PER_UNIT)
14137	interm_optab = vec_pack_sbool_trunc_optab;
14138	else
14139	interm_optab
14140	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14141	optab_default);
14142	if (!interm_optab
14143	\|\| ((icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing)
14144	\|\| insn_data[icode1].operand[`0`].mode != intermediate_mode
14145	\|\| ((icode1 = optab_handler (op: interm_optab, mode: intermediate_mode))
14146	== CODE_FOR_nothing))
14147	break;
14148
14149	interm_types->quick_push (obj: intermediate_type);
14150	(*multi_step_cvt)++;
14151
14152	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (narrow_vectype))
14153	{
14154	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14155	return true;
14156	if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * `2`,
14157	TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14158	return true;
14159	}
14160
14161	prev_mode = intermediate_mode;
14162	prev_type = intermediate_type;
14163	optab1 = interm_optab;
14164	}
14165
14166	interm_types->release ();
14167	return false;
14168	}
14169
14170	/ Generate and return a vector mask of MASK_TYPE such that*
14171	mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14172	Add the statements to SEQ. /*
14173
14174	tree
14175	vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14176	tree end_index, const char *name)
14177	{
14178	tree cmp_type = TREE_TYPE (start_index);
14179	gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14180	cmp_type, mask_type,
14181	OPTIMIZE_FOR_SPEED));
14182	gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, `3`,
14183	start_index, end_index,
14184	build_zero_cst (mask_type));
14185	tree tmp;
14186	if (name)
14187	tmp = make_temp_ssa_name (type: mask_type, NULL, name);
14188	else
14189	tmp = make_ssa_name (var: mask_type);
14190	gimple_call_set_lhs (gs: call, lhs: tmp);
14191	gimple_seq_add_stmt (seq, call);
14192	return tmp;
14193	}
14194
14195	/ Generate a vector mask of type MASK_TYPE for which index I is false iff*
14196	J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. /*
14197
14198	tree
14199	vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14200	tree end_index)
14201	{
14202	tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14203	return gimple_build (seq, code: BIT_NOT_EXPR, type: mask_type, ops: tmp);
14204	}
14205
14206	/ Try to compute the vector types required to vectorize STMT_INFO,*
14207	returning true on success and false if vectorization isn't possible.
14208	If GROUP_SIZE is nonzero and we're performing BB vectorization,
14209	take sure that the number of elements in the vectors is no bigger
14210	than GROUP_SIZE.
14211
14212	On success:
14213
14214	- Set STMT_VECTYPE_OUT to:*
14215	- NULL_TREE if the statement doesn't need to be vectorized;
14216	- the equivalent of STMT_VINFO_VECTYPE otherwise.
14217
14218	- Set NUNITS_VECTYPE_OUT to the vector type that contains the maximum*
14219	number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14220	statement does not help to determine the overall number of units. /*
14221
14222	opt_result
14223	vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14224	tree *stmt_vectype_out,
14225	tree *nunits_vectype_out,
14226	unsigned int group_size)
14227	{
14228	gimple *stmt = stmt_info->stmt;
14229
14230	/ For BB vectorization, we should always have a group size once we've*
14231	constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14232	are tentative requests during things like early data reference
14233	analysis and pattern recognition. /*
14234	if (is_a <bb_vec_info> (p: vinfo))
14235	gcc_assert (vinfo->slp_instances.is_empty () \|\| group_size != `0`);
14236	else
14237	group_size = `0`;
14238
14239	*stmt_vectype_out = NULL_TREE;
14240	*nunits_vectype_out = NULL_TREE;
14241
14242	if (gimple_get_lhs (stmt) == NULL_TREE
14243	/ MASK_STORE has no lhs, but is ok. /
14244	&& !gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14245	{
14246	if (is_a <gcall *> (p: stmt))
14247	{
14248	/ Ignore calls with no lhs. These must be calls to*
14249	#pragma omp simd functions, and what vectorization factor
14250	it really needs can't be determined until
14251	vectorizable_simd_clone_call. /*
14252	if (dump_enabled_p ())
14253	dump_printf_loc (MSG_NOTE, vect_location,
14254	"defer to SIMD clone analysis.\n");
14255	return opt_result::success ();
14256	}
14257
14258	return opt_result::failure_at (loc: stmt,
14259	fmt: "not vectorized: irregular stmt.%G", stmt);
14260	}
14261
14262	tree vectype;
14263	tree scalar_type = NULL_TREE;
14264	if (group_size == `0` && STMT_VINFO_VECTYPE (stmt_info))
14265	{
14266	vectype = STMT_VINFO_VECTYPE (stmt_info);
14267	if (dump_enabled_p ())
14268	dump_printf_loc (MSG_NOTE, vect_location,
14269	"precomputed vectype: %T\n", vectype);
14270	}
14271	else if (vect_use_mask_type_p (stmt_info))
14272	{
14273	unsigned int precision = stmt_info->mask_precision;
14274	scalar_type = build_nonstandard_integer_type (precision, `1`);
14275	vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14276	if (!vectype)
14277	return opt_result::failure_at (loc: stmt, fmt: "not vectorized: unsupported"
14278	" data-type %T\n", scalar_type);
14279	if (dump_enabled_p ())
14280	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14281	}
14282	else
14283	{
14284	if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14285	scalar_type = TREE_TYPE (DR_REF (dr));
14286	else if (gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14287	scalar_type = TREE_TYPE (gimple_call_arg (stmt, `3`));
14288	else
14289	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14290
14291	if (dump_enabled_p ())
14292	{
14293	if (group_size)
14294	dump_printf_loc (MSG_NOTE, vect_location,
14295	"get vectype for scalar type (group size %d):"
14296	" %T\n", group_size, scalar_type);
14297	else
14298	dump_printf_loc (MSG_NOTE, vect_location,
14299	"get vectype for scalar type: %T\n", scalar_type);
14300	}
14301	vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14302	if (!vectype)
14303	return opt_result::failure_at (loc: stmt,
14304	fmt: "not vectorized:"
14305	" unsupported data-type %T\n",
14306	scalar_type);
14307
14308	if (dump_enabled_p ())
14309	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14310	}
14311
14312	if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14313	return opt_result::failure_at (loc: stmt,
14314	fmt: "not vectorized: vector stmt in loop:%G",
14315	stmt);
14316
14317	*stmt_vectype_out = vectype;
14318
14319	/ Don't try to compute scalar types if the stmt produces a boolean*
14320	vector; use the existing vector type instead. /*
14321	tree nunits_vectype = vectype;
14322	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14323	{
14324	/ The number of units is set according to the smallest scalar*
14325	type (or the largest vector size, but we only support one
14326	vector size per vectorization). /*
14327	scalar_type = vect_get_smallest_scalar_type (stmt_info,
14328	TREE_TYPE (vectype));
14329	if (scalar_type != TREE_TYPE (vectype))
14330	{
14331	if (dump_enabled_p ())
14332	dump_printf_loc (MSG_NOTE, vect_location,
14333	"get vectype for smallest scalar type: %T\n",
14334	scalar_type);
14335	nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14336	group_size);
14337	if (!nunits_vectype)
14338	return opt_result::failure_at
14339	(loc: stmt, fmt: "not vectorized: unsupported data-type %T\n",
14340	scalar_type);
14341	if (dump_enabled_p ())
14342	dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14343	nunits_vectype);
14344	}
14345	}
14346
14347	if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: nunits_vectype),
14348	b: TYPE_VECTOR_SUBPARTS (node: *stmt_vectype_out)))
14349	return opt_result::failure_at (loc: stmt,
14350	fmt: "Not vectorized: Incompatible number "
14351	"of vector subparts between %T and %T\n",
14352	nunits_vectype, *stmt_vectype_out);
14353
14354	if (dump_enabled_p ())
14355	{
14356	dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14357	dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (node: nunits_vectype));
14358	dump_printf (MSG_NOTE, "\n");
14359	}
14360
14361	*nunits_vectype_out = nunits_vectype;
14362	return opt_result::success ();
14363	}
14364
14365	/ Generate and return statement sequence that sets vector length LEN that is:*
14366
14367	min_of_start_and_end = min (START_INDEX, END_INDEX);
14368	left_len = END_INDEX - min_of_start_and_end;
14369	rhs = min (left_len, LEN_LIMIT);
14370	LEN = rhs;
14371
14372	Note: the cost of the code generated by this function is modeled
14373	by vect_estimate_min_profitable_iters, so changes here may need
14374	corresponding changes there. /*
14375
14376	gimple_seq
14377	vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14378	{
14379	gimple_seq stmts = NULL;
14380	tree len_type = TREE_TYPE (len);
14381	gcc_assert (TREE_TYPE (start_index) == len_type);
14382
14383	tree min = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: start_index, ops: end_index);
14384	tree left_len = gimple_build (seq: &stmts, code: MINUS_EXPR, type: len_type, ops: end_index, ops: min);
14385	tree rhs = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: left_len, ops: len_limit);
14386	gimple* stmt = gimple_build_assign (len, rhs);
14387	gimple_seq_add_stmt (&stmts, stmt);
14388
14389	return stmts;
14390	}
14391
14392

source code of gcc/tree-vect-stmts.cc