1/* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "gimple.h"
30#include "ssa.h"
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
35#include "dumpfile.h"
36#include "alias.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "tree-eh.h"
40#include "gimplify.h"
41#include "gimple-iterator.h"
42#include "gimplify-me.h"
43#include "tree-cfg.h"
44#include "tree-ssa-loop-manip.h"
45#include "cfgloop.h"
46#include "explow.h"
47#include "tree-ssa-loop.h"
48#include "tree-scalar-evolution.h"
49#include "tree-vectorizer.h"
50#include "builtins.h"
51#include "internal-fn.h"
52#include "tree-vector-builder.h"
53#include "vec-perm-indices.h"
54#include "gimple-range.h"
55#include "tree-ssa-loop-niter.h"
56#include "gimple-fold.h"
57#include "regs.h"
58#include "attribs.h"
59#include "optabs-libfuncs.h"
60
61/* For lang_hooks.types.type_for_mode. */
62#include "langhooks.h"
63
64/* Return the vectorized type for the given statement. */
65
66tree
67stmt_vectype (class _stmt_vec_info *stmt_info)
68{
69 return STMT_VINFO_VECTYPE (stmt_info);
70}
71
72/* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74bool
75stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
76{
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (g: stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
80 class loop* loop;
81
82 if (!loop_vinfo)
83 return false;
84
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
86
87 return (bb->loop_father == loop->inner);
88}
89
90/* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
93
94static unsigned
95record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
100{
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
107
108 stmt_info_for_cost si
109 = { .count: count, .kind: kind, .where: where, .stmt_info: stmt_info, .node: node, .vectype: vectype, .misalign: misalign };
110 body_cost_vec->safe_push (obj: si);
111
112 return (unsigned)
113 (builtin_vectorization_cost (type_of_cost: kind, vectype, misalign) * count);
114}
115
116unsigned
117record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
121{
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
124}
125
126unsigned
127record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
131{
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
134}
135
136unsigned
137record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
140{
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, misalign: 0, where);
145}
146
147/* Return a variable of type ELEM_TYPE[NELEMS]. */
148
149static tree
150create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
151{
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
154}
155
156/* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
160
161static tree
162read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
165{
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
168
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
175
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (var: vect, stmt: new_stmt);
178 gimple_assign_set_lhs (gs: new_stmt, lhs: vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
180
181 return vect_name;
182}
183
184/* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
187
188static void
189write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
192{
193 tree array_ref;
194 gimple *new_stmt;
195
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
199
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202}
203
204/* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
207
208static tree
209create_array_ref (tree type, tree ptr, tree alias_ptr_type)
210{
211 tree mem_ref;
212
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
217}
218
219/* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
221
222static void
223vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
225{
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229}
230
231/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
232
233/* Function vect_mark_relevant.
234
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236
237static void
238vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
240{
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
243
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
248
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254 {
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
259
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
264
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
270
271 if (live_p && relevant == vect_unused_in_scope)
272 {
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
278 }
279
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
284 }
285
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
289
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
292 {
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
297 }
298
299 worklist->safe_push (obj: stmt_info);
300}
301
302
303/* Function is_simple_and_all_uses_invariant
304
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
306
307bool
308is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
310{
311 tree op;
312 ssa_op_iter iter;
313
314 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
315 if (!stmt)
316 return false;
317
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
319 {
320 enum vect_def_type dt = vect_uninitialized_def;
321
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
323 {
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
328 }
329
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
332 }
333 return true;
334}
335
336/* Function vect_stmt_relevant_p.
337
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
340
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345
346 CHECKME: what other side effects would the vectorizer allow? */
347
348static bool
349vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
351{
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
357
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
360
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
365
366 /* changing memory. */
367 if (gimple_code (g: stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (g: stmt_info->stmt)
369 && !gimple_clobber_p (s: stmt_info->stmt))
370 {
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
375 }
376
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
379 {
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
381 {
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
384 {
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
387
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
391
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
396
397 *live_p = true;
398 }
399 }
400 }
401
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
404 {
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
409 }
410
411 return (*live_p || *relevant);
412}
413
414
415/* Function exist_non_indexing_operands_for_use_p
416
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
419
420static bool
421exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
422{
423 tree operand;
424
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
430
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
436
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
440
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
443
444 gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
446 {
447 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
448 if (call && gimple_call_internal_p (gs: call))
449 {
450 internal_fn ifn = gimple_call_internal_fn (gs: call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (gs: call, index: mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (gs: call, index: stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (gs: call, index: 1))
461 return true;
462 }
463 return false;
464 }
465
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (gs: assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
471
472 if (operand == use)
473 return true;
474
475 return false;
476}
477
478
479/*
480 Function process_use.
481
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
489
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
503
504 Return true if everything is as expected. Return false otherwise. */
505
506static opt_result
507process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
510{
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
513
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_info: stmt_vinfo))
517 return opt_result::success ();
518
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (loc: stmt_vinfo->stmt,
521 fmt: "not vectorized:"
522 " unsupported use in stmt.\n");
523
524 if (!dstmt_vinfo)
525 return opt_result::success ();
526
527 basic_block def_bb = gimple_bb (g: dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (g: stmt_vinfo->stmt);
529
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (g: dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
538 {
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: true);
543 return opt_result::success ();
544 }
545
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
554 {
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
558
559 switch (relevant)
560 {
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
565
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
570
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
575
576 case vect_used_in_scope:
577 break;
578
579 default:
580 gcc_unreachable ();
581 }
582 }
583
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
586 ...
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
592 {
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
596
597 switch (relevant)
598 {
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
604
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
609
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
613
614 default:
615 gcc_unreachable ();
616 }
617 }
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
628 {
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
633 }
634
635
636 vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: false);
637 return opt_result::success ();
638}
639
640
641/* Function vect_mark_stmts_to_be_vectorized.
642
643 Not all stmts in the loop need to be vectorized. For example:
644
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
649
650 3. j = j + 1
651
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
654
655 This pass detects such stmts. */
656
657opt_result
658vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
659{
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
668
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
670
671 auto_vec<stmt_vec_info, 64> worklist;
672
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
675 {
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (i: si); gsi_next (i: &si))
678 {
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
683
684 if (vect_stmt_relevant_p (stmt_info: phi_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
685 vect_mark_relevant (worklist: &worklist, stmt_info: phi_info, relevant, live_p);
686 }
687 for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
688 {
689 if (is_gimple_debug (gs: gsi_stmt (i: si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
695
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
697 vect_mark_relevant (worklist: &worklist, stmt_info, relevant, live_p);
698 }
699 }
700
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
703 {
704 use_operand_p use_p;
705 ssa_op_iter iter;
706
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
711
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
719
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
727
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
729 {
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (loc: stmt_vinfo->stmt, fmt: "unsupported use of reduction.\n");
738 break;
739
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (loc: stmt_vinfo->stmt, fmt: "unsupported use of nested cycle.\n");
746 break;
747
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (loc: stmt_vinfo->stmt, fmt: "unsupported use of double reduction.\n");
754 break;
755
756 default:
757 break;
758 }
759
760 if (is_pattern_stmt_p (stmt_info: stmt_vinfo))
761 {
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (p: stmt_vinfo->stmt))
766 {
767 enum tree_code rhs_code = gimple_assign_rhs_code (gs: assign);
768 tree op = gimple_assign_rhs1 (gs: assign);
769
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
772 {
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, worklist: &worklist, force: false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, worklist: &worklist, force: false);
780 if (!res)
781 return res;
782 i = 2;
783 }
784 for (; i < gimple_num_ops (gs: assign); i++)
785 {
786 op = gimple_op (gs: assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
788 {
789 opt_result res
790 = process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
791 worklist: &worklist, force: false);
792 if (!res)
793 return res;
794 }
795 }
796 }
797 else if (gcall *call = dyn_cast <gcall *> (p: stmt_vinfo->stmt))
798 {
799 for (i = 0; i < gimple_call_num_args (gs: call); i++)
800 {
801 tree arg = gimple_call_arg (gs: call, index: i);
802 opt_result res
803 = process_use (stmt_vinfo, use: arg, loop_vinfo, relevant,
804 worklist: &worklist, force: false);
805 if (!res)
806 return res;
807 }
808 }
809 }
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
812 {
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
816 worklist: &worklist, force: false);
817 if (!res)
818 return res;
819 }
820
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
822 {
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, use: gs_info.offset, loop_vinfo, relevant,
828 worklist: &worklist, force: true);
829 if (!res)
830 {
831 if (fatal)
832 *fatal = false;
833 return res;
834 }
835 }
836 } /* while worklist */
837
838 return opt_result::success ();
839}
840
841/* Function vect_model_simple_cost.
842
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
846
847static void
848vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
855{
856 int inside_cost = 0, prologue_cost = 0;
857
858 gcc_assert (cost_vec != NULL);
859
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
863
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec,
871 stmt_info, misalign: 0, where: vect_prologue);
872
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind,
875 stmt_info, misalign: 0, where: vect_body);
876
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
881}
882
883
884/* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
891
892static void
893vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
898{
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
901
902 for (i = 0; i < pwr + 1; i++)
903 {
904 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies,
905 kind: widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, misalign: 0, where: vect_body);
908 ncopies *= 2;
909 }
910
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vector_stmt,
915 stmt_info, misalign: 0, where: vect_prologue);
916
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
921}
922
923/* Returns true if the current function returns DECL. */
924
925static bool
926cfun_returns (tree decl)
927{
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
931 {
932 greturn *ret = safe_dyn_cast <greturn *> (p: *gsi_last_bb (bb: e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (gs: ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
941 do
942 {
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
944 }
945 while (gimple_clobber_p (s: def));
946 if (is_a <gassign *> (p: def)
947 && gimple_assign_lhs (gs: def) == gimple_return_retval (gs: ret)
948 && gimple_assign_rhs1 (gs: def) == decl)
949 return true;
950 }
951 return false;
952}
953
954/* Calculate cost of DR's memory access. */
955void
956vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
961{
962 switch (alignment_support_scheme)
963 {
964 case dr_aligned:
965 {
966 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
967 kind: vector_store, stmt_info, misalign: 0,
968 where: vect_body);
969
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
974 }
975
976 case dr_unaligned_supported:
977 {
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
980 kind: unaligned_store, stmt_info,
981 misalign: misalignment, where: vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
987 }
988
989 case dr_unaligned_unsupported:
990 {
991 *inside_cost = VECT_MAX_COST;
992
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
997 }
998
999 default:
1000 gcc_unreachable ();
1001 }
1002}
1003
1004/* Calculate cost of DR's memory access. */
1005void
1006vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1014{
1015 switch (alignment_support_scheme)
1016 {
1017 case dr_aligned:
1018 {
1019 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1020 stmt_info, misalign: 0, where: vect_body);
1021
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1025
1026 break;
1027 }
1028 case dr_unaligned_supported:
1029 {
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1032 kind: unaligned_load, stmt_info,
1033 misalign: misalignment, where: vect_body);
1034
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1039
1040 break;
1041 }
1042 case dr_explicit_realign:
1043 {
1044 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies * 2,
1045 kind: vector_load, stmt_info, misalign: 0, where: vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1047 kind: vec_perm, stmt_info, misalign: 0, where: vect_body);
1048
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, count: 1, kind: vector_stmt,
1054 stmt_info, misalign: 0, where: vect_body);
1055
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1059
1060 break;
1061 }
1062 case dr_explicit_realign_optimized:
1063 {
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1068
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1075
1076 if (add_realign_cost && record_prologue_costs)
1077 {
1078 *prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: 2,
1079 kind: vector_stmt, stmt_info,
1080 misalign: 0, where: vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: 1,
1083 kind: vector_stmt, stmt_info,
1084 misalign: 0, where: vect_prologue);
1085 }
1086
1087 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1088 stmt_info, misalign: 0, where: vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vec_perm,
1090 stmt_info, misalign: 0, where: vect_body);
1091
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1096
1097 break;
1098 }
1099
1100 case dr_unaligned_unsupported:
1101 {
1102 *inside_cost = VECT_MAX_COST;
1103
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1108 }
1109
1110 default:
1111 gcc_unreachable ();
1112 }
1113}
1114
1115/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1117
1118static void
1119vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1121{
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1126
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1130}
1131
1132/* Function vect_init_vector.
1133
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1141
1142tree
1143vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1145{
1146 gimple *init_stmt;
1147 tree new_temp;
1148
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1151 {
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1154 {
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1158 {
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1161
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1165 {
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1170 val = new_temp;
1171 }
1172 }
1173 else
1174 {
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), ops: val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (seq: &stmts, TREE_TYPE (type), op: val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (seq&: stmts);
1184 !gsi_end_p (i: gsi2); )
1185 {
1186 init_stmt = gsi_stmt (i: gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1189 }
1190 }
1191 }
1192 val = build_vector_from_val (type, val);
1193 }
1194
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1198 return new_temp;
1199}
1200
1201
1202/* Function vect_get_vec_defs_for_operand.
1203
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1206
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1209
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1213
1214void
1215vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1218{
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1223
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1227
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1234
1235 vec_oprnds->create (nelems: ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1237 {
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1240
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1248
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_info: stmt_vinfo, val: op, type: vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (obj: vop);
1253 }
1254 else
1255 {
1256 def_stmt_info = vect_stmt_to_vectorize (stmt_info: def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (obj: gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1261 }
1262}
1263
1264
1265/* Get vectorized definitions for OP0 and OP1. */
1266
1267void
1268vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1274{
1275 if (slp_node)
1276 {
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1285 }
1286 else
1287 {
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1290 op: op0, vec_oprnds: vec_oprnds0, vectype: vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1293 op: op1, vec_oprnds: vec_oprnds1, vectype: vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1296 op: op2, vec_oprnds: vec_oprnds2, vectype: vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1299 op: op3, vec_oprnds: vec_oprnds3, vectype: vectype3);
1300 }
1301}
1302
1303void
1304vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1310{
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, vec_oprnds0, NULL_TREE,
1313 op1, vec_oprnds1, NULL_TREE,
1314 op2, vec_oprnds2, NULL_TREE,
1315 op3, vec_oprnds3, NULL_TREE);
1316}
1317
1318/* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1321
1322static void
1323vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1325{
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1328
1329 if (stmt_info)
1330 {
1331 gimple_set_location (g: vec_stmt, location: gimple_location (g: stmt_info->stmt));
1332
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1339 }
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1342}
1343
1344/* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1347
1348void
1349vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1351{
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1354
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1357
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1359}
1360
1361/* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1363
1364void
1365vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1368{
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1370
1371 if (!gsi_end_p (i: *gsi)
1372 && gimple_has_mem_ops (g: vec_stmt))
1373 {
1374 gimple *at_stmt = gsi_stmt (i: *gsi);
1375 tree vuse = gimple_vuse (g: at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1377 {
1378 tree vdef = gimple_vdef (g: at_stmt);
1379 gimple_set_vuse (g: vec_stmt, vuse: gimple_vuse (g: at_stmt));
1380 gimple_set_modified (s: vec_stmt, modifiedp: true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (gs: vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (gs: vec_stmt)))
1388 || (is_gimple_call (gs: vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (gs: vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (gs: vec_stmt)))))))
1393 {
1394 tree new_vdef = copy_ssa_name (var: vuse, stmt: vec_stmt);
1395 gimple_set_vdef (g: vec_stmt, vdef: new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1397 }
1398 }
1399 }
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1402}
1403
1404/* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1408
1409static internal_fn
1410vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1412{
1413 internal_fn ifn;
1414 if (internal_fn_p (code: cfn))
1415 ifn = as_internal_fn (code: cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (fn: ifn))
1419 {
1420 const direct_internal_fn_info &info = direct_internal_fn (fn: ifn);
1421 if (info.vectorizable)
1422 {
1423 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1424 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1425 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1426
1427 /* The type size of both the vectype_in and vectype_out should be
1428 exactly the same when vectype_out isn't participating the optab.
1429 While there is no restriction for type size when vectype_out
1430 is part of the optab query. */
1431 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1432 return IFN_LAST;
1433
1434 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1435 OPTIMIZE_FOR_SPEED))
1436 return ifn;
1437 }
1438 }
1439 return IFN_LAST;
1440}
1441
1442
1443static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1444 gimple_stmt_iterator *);
1445
1446/* Check whether a load or store statement in the loop described by
1447 LOOP_VINFO is possible in a loop using partial vectors. This is
1448 testing whether the vectorizer pass has the appropriate support,
1449 as well as whether the target does.
1450
1451 VLS_TYPE says whether the statement is a load or store and VECTYPE
1452 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454 says how the load or store is going to be implemented and GROUP_SIZE
1455 is the number of load or store statements in the containing group.
1456 If the access is a gather load or scatter store, GS_INFO describes
1457 its arguments. If the load or store is conditional, SCALAR_MASK is the
1458 condition under which it occurs.
1459
1460 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461 vectors is not supported, otherwise record the required rgroup control
1462 types. */
1463
1464static void
1465check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1466 slp_tree slp_node,
1467 vec_load_store_type vls_type,
1468 int group_size,
1469 vect_memory_access_type
1470 memory_access_type,
1471 gather_scatter_info *gs_info,
1472 tree scalar_mask)
1473{
1474 /* Invariant loads need no special support. */
1475 if (memory_access_type == VMAT_INVARIANT)
1476 return;
1477
1478 unsigned int nvectors;
1479 if (slp_node)
1480 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1481 else
1482 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1483
1484 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1485 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1486 machine_mode vecmode = TYPE_MODE (vectype);
1487 bool is_load = (vls_type == VLS_LOAD);
1488 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1489 {
1490 internal_fn ifn
1491 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1492 : vect_store_lanes_supported (vectype, group_size, true));
1493 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1494 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1495 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1496 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1497 scalar_mask);
1498 else
1499 {
1500 if (dump_enabled_p ())
1501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1502 "can't operate on partial vectors because"
1503 " the target doesn't have an appropriate"
1504 " load/store-lanes instruction.\n");
1505 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1506 }
1507 return;
1508 }
1509
1510 if (memory_access_type == VMAT_GATHER_SCATTER)
1511 {
1512 internal_fn ifn = (is_load
1513 ? IFN_MASK_GATHER_LOAD
1514 : IFN_MASK_SCATTER_STORE);
1515 internal_fn len_ifn = (is_load
1516 ? IFN_MASK_LEN_GATHER_LOAD
1517 : IFN_MASK_LEN_SCATTER_STORE);
1518 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1519 gs_info->memory_type,
1520 gs_info->offset_vectype,
1521 gs_info->scale))
1522 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1523 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1524 gs_info->memory_type,
1525 gs_info->offset_vectype,
1526 gs_info->scale))
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1530 {
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " gather load or scatter store instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1537 }
1538 return;
1539 }
1540
1541 if (memory_access_type != VMAT_CONTIGUOUS
1542 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1543 {
1544 /* Element X of the data must come from iteration i * VF + X of the
1545 scalar loop. We need more work to support other mappings. */
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1548 "can't operate on partial vectors because an"
1549 " access isn't contiguous.\n");
1550 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1551 return;
1552 }
1553
1554 if (!VECTOR_MODE_P (vecmode))
1555 {
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1558 "can't operate on partial vectors when emulating"
1559 " vector operations.\n");
1560 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1561 return;
1562 }
1563
1564 /* We might load more scalars than we need for permuting SLP loads.
1565 We checked in get_group_load_store_type that the extra elements
1566 don't leak into a new vector. */
1567 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1568 {
1569 unsigned int nvectors;
1570 if (can_div_away_from_zero_p (a: size, b: nunits, quotient: &nvectors))
1571 return nvectors;
1572 gcc_unreachable ();
1573 };
1574
1575 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1576 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1577 machine_mode mask_mode;
1578 machine_mode vmode;
1579 bool using_partial_vectors_p = false;
1580 if (get_len_load_store_mode (vecmode, is_load).exists (mode: &vmode))
1581 {
1582 nvectors = group_memory_nvectors (group_size * vf, nunits);
1583 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1584 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1585 using_partial_vectors_p = true;
1586 }
1587 else if (targetm.vectorize.get_mask_mode (vecmode).exists (mode: &mask_mode)
1588 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1589 {
1590 nvectors = group_memory_nvectors (group_size * vf, nunits);
1591 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1592 using_partial_vectors_p = true;
1593 }
1594
1595 if (!using_partial_vectors_p)
1596 {
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599 "can't operate on partial vectors because the"
1600 " target doesn't have the appropriate partial"
1601 " vectorization load or store.\n");
1602 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1603 }
1604}
1605
1606/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1607 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608 that needs to be applied to all loads and stores in a vectorized loop.
1609 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610 otherwise return VEC_MASK & LOOP_MASK.
1611
1612 MASK_TYPE is the type of both masks. If new statements are needed,
1613 insert them before GSI. */
1614
1615static tree
1616prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1617 tree vec_mask, gimple_stmt_iterator *gsi)
1618{
1619 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1620 if (!loop_mask)
1621 return vec_mask;
1622
1623 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1624
1625 if (loop_vinfo->vec_cond_masked_set.contains (k: { vec_mask, loop_mask }))
1626 return vec_mask;
1627
1628 tree and_res = make_temp_ssa_name (type: mask_type, NULL, name: "vec_mask_and");
1629 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1630 vec_mask, loop_mask);
1631
1632 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1633 return and_res;
1634}
1635
1636/* Determine whether we can use a gather load or scatter store to vectorize
1637 strided load or store STMT_INFO by truncating the current offset to a
1638 smaller width. We need to be able to construct an offset vector:
1639
1640 { 0, X, X*2, X*3, ... }
1641
1642 without loss of precision, where X is STMT_INFO's DR_STEP.
1643
1644 Return true if this is possible, describing the gather load or scatter
1645 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1646
1647static bool
1648vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1649 loop_vec_info loop_vinfo, bool masked_p,
1650 gather_scatter_info *gs_info)
1651{
1652 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1653 data_reference *dr = dr_info->dr;
1654 tree step = DR_STEP (dr);
1655 if (TREE_CODE (step) != INTEGER_CST)
1656 {
1657 /* ??? Perhaps we could use range information here? */
1658 if (dump_enabled_p ())
1659 dump_printf_loc (MSG_NOTE, vect_location,
1660 "cannot truncate variable step.\n");
1661 return false;
1662 }
1663
1664 /* Get the number of bits in an element. */
1665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1666 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1667 unsigned int element_bits = GET_MODE_BITSIZE (mode: element_mode);
1668
1669 /* Set COUNT to the upper limit on the number of elements - 1.
1670 Start with the maximum vectorization factor. */
1671 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1672
1673 /* Try lowering COUNT to the number of scalar latch iterations. */
1674 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675 widest_int max_iters;
1676 if (max_loop_iterations (loop, &max_iters)
1677 && max_iters < count)
1678 count = max_iters.to_shwi ();
1679
1680 /* Try scales of 1 and the element size. */
1681 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1682 wi::overflow_type overflow = wi::OVF_NONE;
1683 for (int i = 0; i < 2; ++i)
1684 {
1685 int scale = scales[i];
1686 widest_int factor;
1687 if (!wi::multiple_of_p (x: wi::to_widest (t: step), y: scale, sgn: SIGNED, res: &factor))
1688 continue;
1689
1690 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1691 widest_int range = wi::mul (x: count, y: factor, sgn: SIGNED, overflow: &overflow);
1692 if (overflow)
1693 continue;
1694 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1695 unsigned int min_offset_bits = wi::min_precision (x: range, sgn: sign);
1696
1697 /* Find the narrowest viable offset type. */
1698 unsigned int offset_bits = 1U << ceil_log2 (x: min_offset_bits);
1699 tree offset_type = build_nonstandard_integer_type (offset_bits,
1700 sign == UNSIGNED);
1701
1702 /* See whether the target supports the operation with an offset
1703 no narrower than OFFSET_TYPE. */
1704 tree memory_type = TREE_TYPE (DR_REF (dr));
1705 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1706 vectype, memory_type, offset_type, scale,
1707 &gs_info->ifn, &gs_info->offset_vectype)
1708 || gs_info->ifn == IFN_LAST)
1709 continue;
1710
1711 gs_info->decl = NULL_TREE;
1712 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1713 but we don't need to store that here. */
1714 gs_info->base = NULL_TREE;
1715 gs_info->element_type = TREE_TYPE (vectype);
1716 gs_info->offset = fold_convert (offset_type, step);
1717 gs_info->offset_dt = vect_constant_def;
1718 gs_info->scale = scale;
1719 gs_info->memory_type = memory_type;
1720 return true;
1721 }
1722
1723 if (overflow && dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location,
1725 "truncating gather/scatter offset to %d bits"
1726 " might change its value.\n", element_bits);
1727
1728 return false;
1729}
1730
1731/* Return true if we can use gather/scatter internal functions to
1732 vectorize STMT_INFO, which is a grouped or strided load or store.
1733 MASKED_P is true if load or store is conditional. When returning
1734 true, fill in GS_INFO with the information required to perform the
1735 operation. */
1736
1737static bool
1738vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1739 loop_vec_info loop_vinfo, bool masked_p,
1740 gather_scatter_info *gs_info)
1741{
1742 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1743 || gs_info->ifn == IFN_LAST)
1744 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1745 masked_p, gs_info);
1746
1747 tree old_offset_type = TREE_TYPE (gs_info->offset);
1748 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1749
1750 gcc_assert (TYPE_PRECISION (new_offset_type)
1751 >= TYPE_PRECISION (old_offset_type));
1752 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1753
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "using gather/scatter for strided/grouped access,"
1757 " scale = %d\n", gs_info->scale);
1758
1759 return true;
1760}
1761
1762/* STMT_INFO is a non-strided load or store, meaning that it accesses
1763 elements with a known constant step. Return -1 if that step
1764 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1765
1766static int
1767compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1768{
1769 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1770 return tree_int_cst_compare (t1: vect_dr_behavior (vinfo, dr_info)->step,
1771 size_zero_node);
1772}
1773
1774/* If the target supports a permute mask that reverses the elements in
1775 a vector of type VECTYPE, return that mask, otherwise return null. */
1776
1777static tree
1778perm_mask_for_reverse (tree vectype)
1779{
1780 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1781
1782 /* The encoding has a single stepped pattern. */
1783 vec_perm_builder sel (nunits, 1, 3);
1784 for (int i = 0; i < 3; ++i)
1785 sel.quick_push (obj: nunits - 1 - i);
1786
1787 vec_perm_indices indices (sel, 1, nunits);
1788 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1789 indices))
1790 return NULL_TREE;
1791 return vect_gen_perm_mask_checked (vectype, indices);
1792}
1793
1794/* A subroutine of get_load_store_type, with a subset of the same
1795 arguments. Handle the case where STMT_INFO is a load or store that
1796 accesses consecutive elements with a negative step. Sets *POFFSET
1797 to the offset to be applied to the DR for the first access. */
1798
1799static vect_memory_access_type
1800get_negative_load_store_type (vec_info *vinfo,
1801 stmt_vec_info stmt_info, tree vectype,
1802 vec_load_store_type vls_type,
1803 unsigned int ncopies, poly_int64 *poffset)
1804{
1805 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1806 dr_alignment_support alignment_support_scheme;
1807
1808 if (ncopies > 1)
1809 {
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "multiple types with negative step.\n");
1813 return VMAT_ELEMENTWISE;
1814 }
1815
1816 /* For backward running DRs the first access in vectype actually is
1817 N-1 elements before the address of the DR. */
1818 *poffset = ((-TYPE_VECTOR_SUBPARTS (node: vectype) + 1)
1819 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1820
1821 int misalignment = dr_misalignment (dr_info, vectype, offset: *poffset);
1822 alignment_support_scheme
1823 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1824 if (alignment_support_scheme != dr_aligned
1825 && alignment_support_scheme != dr_unaligned_supported)
1826 {
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1829 "negative step but alignment required.\n");
1830 *poffset = 0;
1831 return VMAT_ELEMENTWISE;
1832 }
1833
1834 if (vls_type == VLS_STORE_INVARIANT)
1835 {
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE, vect_location,
1838 "negative step with invariant source;"
1839 " no permute needed.\n");
1840 return VMAT_CONTIGUOUS_DOWN;
1841 }
1842
1843 if (!perm_mask_for_reverse (vectype))
1844 {
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "negative step and reversing not supported.\n");
1848 *poffset = 0;
1849 return VMAT_ELEMENTWISE;
1850 }
1851
1852 return VMAT_CONTIGUOUS_REVERSE;
1853}
1854
1855/* STMT_INFO is either a masked or unconditional store. Return the value
1856 being stored. */
1857
1858tree
1859vect_get_store_rhs (stmt_vec_info stmt_info)
1860{
1861 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
1862 {
1863 gcc_assert (gimple_assign_single_p (assign));
1864 return gimple_assign_rhs1 (gs: assign);
1865 }
1866 if (gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt))
1867 {
1868 internal_fn ifn = gimple_call_internal_fn (gs: call);
1869 int index = internal_fn_stored_value_index (ifn);
1870 gcc_assert (index >= 0);
1871 return gimple_call_arg (gs: call, index);
1872 }
1873 gcc_unreachable ();
1874}
1875
1876/* Function VECTOR_VECTOR_COMPOSITION_TYPE
1877
1878 This function returns a vector type which can be composed with NETLS pieces,
1879 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880 same vector size as the return vector. It checks target whether supports
1881 pieces-size vector mode for construction firstly, if target fails to, check
1882 pieces-size scalar mode for construction further. It returns NULL_TREE if
1883 fails to find the available composition.
1884
1885 For example, for (vtype=V16QI, nelts=4), we can probably get:
1886 - V16QI with PTYPE V4QI.
1887 - V4SI with PTYPE SI.
1888 - NULL_TREE. */
1889
1890static tree
1891vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1892{
1893 gcc_assert (VECTOR_TYPE_P (vtype));
1894 gcc_assert (known_gt (nelts, 0U));
1895
1896 machine_mode vmode = TYPE_MODE (vtype);
1897 if (!VECTOR_MODE_P (vmode))
1898 return NULL_TREE;
1899
1900 /* When we are asked to compose the vector from its components let
1901 that happen directly. */
1902 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1903 {
1904 *ptype = TREE_TYPE (vtype);
1905 return vtype;
1906 }
1907
1908 poly_uint64 vbsize = GET_MODE_BITSIZE (mode: vmode);
1909 unsigned int pbsize;
1910 if (constant_multiple_p (a: vbsize, b: nelts, multiple: &pbsize))
1911 {
1912 /* First check if vec_init optab supports construction from
1913 vector pieces directly. */
1914 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1915 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (mode: elmode);
1916 machine_mode rmode;
1917 if (related_vector_mode (vmode, elmode, inelts).exists (mode: &rmode)
1918 && (convert_optab_handler (op: vec_init_optab, to_mode: vmode, from_mode: rmode)
1919 != CODE_FOR_nothing))
1920 {
1921 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1922 return vtype;
1923 }
1924
1925 /* Otherwise check if exists an integer type of the same piece size and
1926 if vec_init optab supports construction from it directly. */
1927 if (int_mode_for_size (size: pbsize, limit: 0).exists (mode: &elmode)
1928 && related_vector_mode (vmode, elmode, nelts).exists (mode: &rmode)
1929 && (convert_optab_handler (op: vec_init_optab, to_mode: rmode, from_mode: elmode)
1930 != CODE_FOR_nothing))
1931 {
1932 *ptype = build_nonstandard_integer_type (pbsize, 1);
1933 return build_vector_type (*ptype, nelts);
1934 }
1935 }
1936
1937 return NULL_TREE;
1938}
1939
1940/* A subroutine of get_load_store_type, with a subset of the same
1941 arguments. Handle the case where STMT_INFO is part of a grouped load
1942 or store.
1943
1944 For stores, the statements in the group are all consecutive
1945 and there is no gap at the end. For loads, the statements in the
1946 group might not be consecutive; there can be gaps between statements
1947 as well as at the end. */
1948
1949static bool
1950get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1951 tree vectype, slp_tree slp_node,
1952 bool masked_p, vec_load_store_type vls_type,
1953 vect_memory_access_type *memory_access_type,
1954 poly_int64 *poffset,
1955 dr_alignment_support *alignment_support_scheme,
1956 int *misalignment,
1957 gather_scatter_info *gs_info,
1958 internal_fn *lanes_ifn)
1959{
1960 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1961 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1962 stmt_vec_info first_stmt_info;
1963 unsigned int group_size;
1964 unsigned HOST_WIDE_INT gap;
1965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1966 {
1967 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1968 group_size = DR_GROUP_SIZE (first_stmt_info);
1969 gap = DR_GROUP_GAP (first_stmt_info);
1970 }
1971 else
1972 {
1973 first_stmt_info = stmt_info;
1974 group_size = 1;
1975 gap = 0;
1976 }
1977 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1978 bool single_element_p = (stmt_info == first_stmt_info
1979 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1980 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1981
1982 /* True if the vectorized statements would access beyond the last
1983 statement in the group. */
1984 bool overrun_p = false;
1985
1986 /* True if we can cope with such overrun by peeling for gaps, so that
1987 there is at least one final scalar iteration after the vector loop. */
1988 bool can_overrun_p = (!masked_p
1989 && vls_type == VLS_LOAD
1990 && loop_vinfo
1991 && !loop->inner);
1992
1993 /* There can only be a gap at the end of the group if the stride is
1994 known at compile time. */
1995 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1996
1997 /* Stores can't yet have gaps. */
1998 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
1999
2000 if (slp_node)
2001 {
2002 /* For SLP vectorization we directly vectorize a subchain
2003 without permutation. */
2004 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2005 first_dr_info
2006 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2007 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2008 {
2009 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2010 separated by the stride, until we have a complete vector.
2011 Fall back to scalar accesses if that isn't possible. */
2012 if (multiple_p (a: nunits, b: group_size))
2013 *memory_access_type = VMAT_STRIDED_SLP;
2014 else
2015 *memory_access_type = VMAT_ELEMENTWISE;
2016 }
2017 else
2018 {
2019 overrun_p = loop_vinfo && gap != 0;
2020 if (overrun_p && vls_type != VLS_LOAD)
2021 {
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2023 "Grouped store with gaps requires"
2024 " non-consecutive accesses\n");
2025 return false;
2026 }
2027 /* An overrun is fine if the trailing elements are smaller
2028 than the alignment boundary B. Every vector access will
2029 be a multiple of B and so we are guaranteed to access a
2030 non-gap element in the same B-sized block. */
2031 if (overrun_p
2032 && gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info,
2033 vectype)
2034 / vect_get_scalar_dr_size (dr_info: first_dr_info)))
2035 overrun_p = false;
2036
2037 /* If the gap splits the vector in half and the target
2038 can do half-vector operations avoid the epilogue peeling
2039 by simply loading half of the vector only. Usually
2040 the construction with an upper zero half will be elided. */
2041 dr_alignment_support alss;
2042 int misalign = dr_misalignment (dr_info: first_dr_info, vectype);
2043 tree half_vtype;
2044 if (overrun_p
2045 && !masked_p
2046 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2047 vectype, misalign)))
2048 == dr_aligned
2049 || alss == dr_unaligned_supported)
2050 && known_eq (nunits, (group_size - gap) * 2)
2051 && known_eq (nunits, group_size)
2052 && (vector_vector_composition_type (vtype: vectype, nelts: 2, ptype: &half_vtype)
2053 != NULL_TREE))
2054 overrun_p = false;
2055
2056 if (overrun_p && !can_overrun_p)
2057 {
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "Peeling for outer loop is not supported\n");
2061 return false;
2062 }
2063 int cmp = compare_step_with_zero (vinfo, stmt_info);
2064 if (cmp < 0)
2065 {
2066 if (single_element_p)
2067 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2068 only correct for single element "interleaving" SLP. */
2069 *memory_access_type = get_negative_load_store_type
2070 (vinfo, stmt_info, vectype, vls_type, ncopies: 1, poffset);
2071 else
2072 {
2073 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2074 separated by the stride, until we have a complete vector.
2075 Fall back to scalar accesses if that isn't possible. */
2076 if (multiple_p (a: nunits, b: group_size))
2077 *memory_access_type = VMAT_STRIDED_SLP;
2078 else
2079 *memory_access_type = VMAT_ELEMENTWISE;
2080 }
2081 }
2082 else if (cmp == 0 && loop_vinfo)
2083 {
2084 gcc_assert (vls_type == VLS_LOAD);
2085 *memory_access_type = VMAT_INVARIANT;
2086 /* Invariant accesses perform only component accesses, alignment
2087 is irrelevant for them. */
2088 *alignment_support_scheme = dr_unaligned_supported;
2089 }
2090 else
2091 *memory_access_type = VMAT_CONTIGUOUS;
2092
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2098
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2101 if (loop_vinfo
2102 && *memory_access_type == VMAT_CONTIGUOUS
2103 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2104 && !multiple_p (a: group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2105 b: nunits))
2106 {
2107 unsigned HOST_WIDE_INT cnunits, cvf;
2108 if (!can_overrun_p
2109 || !nunits.is_constant (const_value: &cnunits)
2110 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &cvf)
2111 /* Peeling for gaps assumes that a single scalar iteration
2112 is enough to make sure the last vector iteration doesn't
2113 access excess elements.
2114 ??? Enhancements include peeling multiple iterations
2115 or using masked loads with a static mask. */
2116 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2117 {
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 "peeling for gaps insufficient for "
2121 "access\n");
2122 return false;
2123 }
2124 overrun_p = true;
2125 }
2126 }
2127 }
2128 else
2129 {
2130 /* We can always handle this case using elementwise accesses,
2131 but see if something more efficient is available. */
2132 *memory_access_type = VMAT_ELEMENTWISE;
2133
2134 /* If there is a gap at the end of the group then these optimizations
2135 would access excess elements in the last iteration. */
2136 bool would_overrun_p = (gap != 0);
2137 /* An overrun is fine if the trailing elements are smaller than the
2138 alignment boundary B. Every vector access will be a multiple of B
2139 and so we are guaranteed to access a non-gap element in the
2140 same B-sized block. */
2141 if (would_overrun_p
2142 && !masked_p
2143 && gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype)
2144 / vect_get_scalar_dr_size (dr_info: first_dr_info)))
2145 would_overrun_p = false;
2146
2147 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2148 && (can_overrun_p || !would_overrun_p)
2149 && compare_step_with_zero (vinfo, stmt_info) > 0)
2150 {
2151 /* First cope with the degenerate case of a single-element
2152 vector. */
2153 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2154 ;
2155
2156 else
2157 {
2158 /* Otherwise try using LOAD/STORE_LANES. */
2159 *lanes_ifn
2160 = vls_type == VLS_LOAD
2161 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2162 : vect_store_lanes_supported (vectype, group_size,
2163 masked_p);
2164 if (*lanes_ifn != IFN_LAST)
2165 {
2166 *memory_access_type = VMAT_LOAD_STORE_LANES;
2167 overrun_p = would_overrun_p;
2168 }
2169
2170 /* If that fails, try using permuting loads. */
2171 else if (vls_type == VLS_LOAD
2172 ? vect_grouped_load_supported (vectype,
2173 single_element_p,
2174 group_size)
2175 : vect_grouped_store_supported (vectype, group_size))
2176 {
2177 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2178 overrun_p = would_overrun_p;
2179 }
2180 }
2181 }
2182
2183 /* As a last resort, trying using a gather load or scatter store.
2184
2185 ??? Although the code can handle all group sizes correctly,
2186 it probably isn't a win to use separate strided accesses based
2187 on nearby locations. Or, even if it's a win over scalar code,
2188 it might not be a win over vectorizing at a lower VF, if that
2189 allows us to use contiguous accesses. */
2190 if (*memory_access_type == VMAT_ELEMENTWISE
2191 && single_element_p
2192 && loop_vinfo
2193 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2194 masked_p, gs_info))
2195 *memory_access_type = VMAT_GATHER_SCATTER;
2196 }
2197
2198 if (*memory_access_type == VMAT_GATHER_SCATTER
2199 || *memory_access_type == VMAT_ELEMENTWISE)
2200 {
2201 *alignment_support_scheme = dr_unaligned_supported;
2202 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2203 }
2204 else
2205 {
2206 *misalignment = dr_misalignment (dr_info: first_dr_info, vectype, offset: *poffset);
2207 *alignment_support_scheme
2208 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2209 *misalignment);
2210 }
2211
2212 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2213 {
2214 /* STMT is the leader of the group. Check the operands of all the
2215 stmts of the group. */
2216 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2217 while (next_stmt_info)
2218 {
2219 tree op = vect_get_store_rhs (stmt_info: next_stmt_info);
2220 enum vect_def_type dt;
2221 if (!vect_is_simple_use (op, vinfo, &dt))
2222 {
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2225 "use not simple.\n");
2226 return false;
2227 }
2228 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2229 }
2230 }
2231
2232 if (overrun_p)
2233 {
2234 gcc_assert (can_overrun_p);
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "Data access with gaps requires scalar "
2238 "epilogue loop\n");
2239 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2240 }
2241
2242 return true;
2243}
2244
2245/* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2246 if there is a memory access type that the vectorized form can use,
2247 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2248 or scatters, fill in GS_INFO accordingly. In addition
2249 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250 the target does not support the alignment scheme. *MISALIGNMENT
2251 is set according to the alignment of the access (including
2252 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2253
2254 SLP says whether we're performing SLP rather than loop vectorization.
2255 MASKED_P is true if the statement is conditional on a vectorized mask.
2256 VECTYPE is the vector type that the vectorized statements will use.
2257 NCOPIES is the number of vector statements that will be needed. */
2258
2259static bool
2260get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2261 tree vectype, slp_tree slp_node,
2262 bool masked_p, vec_load_store_type vls_type,
2263 unsigned int ncopies,
2264 vect_memory_access_type *memory_access_type,
2265 poly_int64 *poffset,
2266 dr_alignment_support *alignment_support_scheme,
2267 int *misalignment,
2268 gather_scatter_info *gs_info,
2269 internal_fn *lanes_ifn)
2270{
2271 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2272 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2273 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2274 *poffset = 0;
2275 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2276 {
2277 *memory_access_type = VMAT_GATHER_SCATTER;
2278 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2279 gcc_unreachable ();
2280 /* When using internal functions, we rely on pattern recognition
2281 to convert the type of the offset to the type that the target
2282 requires, with the result being a call to an internal function.
2283 If that failed for some reason (e.g. because another pattern
2284 took priority), just handle cases in which the offset already
2285 has the right type. */
2286 else if (gs_info->ifn != IFN_LAST
2287 && !is_gimple_call (gs: stmt_info->stmt)
2288 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2289 TREE_TYPE (gs_info->offset_vectype)))
2290 {
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293 "%s offset requires a conversion\n",
2294 vls_type == VLS_LOAD ? "gather" : "scatter");
2295 return false;
2296 }
2297 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2298 &gs_info->offset_dt,
2299 &gs_info->offset_vectype))
2300 {
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "%s index use not simple.\n",
2304 vls_type == VLS_LOAD ? "gather" : "scatter");
2305 return false;
2306 }
2307 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2308 {
2309 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ()
2310 || !TYPE_VECTOR_SUBPARTS (node: gs_info->offset_vectype).is_constant ()
2311 || !constant_multiple_p (a: TYPE_VECTOR_SUBPARTS
2312 (node: gs_info->offset_vectype),
2313 b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2314 {
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "unsupported vector types for emulated "
2318 "gather.\n");
2319 return false;
2320 }
2321 }
2322 /* Gather-scatter accesses perform only component accesses, alignment
2323 is irrelevant for them. */
2324 *alignment_support_scheme = dr_unaligned_supported;
2325 }
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2327 {
2328 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2329 masked_p,
2330 vls_type, memory_access_type, poffset,
2331 alignment_support_scheme,
2332 misalignment, gs_info, lanes_ifn))
2333 return false;
2334 }
2335 else if (STMT_VINFO_STRIDED_P (stmt_info))
2336 {
2337 gcc_assert (!slp_node);
2338 if (loop_vinfo
2339 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2340 masked_p, gs_info))
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 else
2343 *memory_access_type = VMAT_ELEMENTWISE;
2344 /* Alignment is irrelevant here. */
2345 *alignment_support_scheme = dr_unaligned_supported;
2346 }
2347 else
2348 {
2349 int cmp = compare_step_with_zero (vinfo, stmt_info);
2350 if (cmp == 0)
2351 {
2352 gcc_assert (vls_type == VLS_LOAD);
2353 *memory_access_type = VMAT_INVARIANT;
2354 /* Invariant accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2357 }
2358 else
2359 {
2360 if (cmp < 0)
2361 *memory_access_type = get_negative_load_store_type
2362 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2363 else
2364 *memory_access_type = VMAT_CONTIGUOUS;
2365 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2366 vectype, offset: *poffset);
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info),
2370 vectype, *misalignment);
2371 }
2372 }
2373
2374 if ((*memory_access_type == VMAT_ELEMENTWISE
2375 || *memory_access_type == VMAT_STRIDED_SLP)
2376 && !nunits.is_constant ())
2377 {
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "Not using elementwise accesses due to variable "
2381 "vectorization factor.\n");
2382 return false;
2383 }
2384
2385 if (*alignment_support_scheme == dr_unaligned_unsupported)
2386 {
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389 "unsupported unaligned access\n");
2390 return false;
2391 }
2392
2393 /* FIXME: At the moment the cost model seems to underestimate the
2394 cost of using elementwise accesses. This check preserves the
2395 traditional behavior until that can be fixed. */
2396 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2397 if (!first_stmt_info)
2398 first_stmt_info = stmt_info;
2399 if (*memory_access_type == VMAT_ELEMENTWISE
2400 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2401 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2402 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2403 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2404 {
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "not falling back to elementwise accesses\n");
2408 return false;
2409 }
2410 return true;
2411}
2412
2413/* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2414 conditional operation STMT_INFO. When returning true, store the mask
2415 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2416 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2417 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2418
2419static bool
2420vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2421 slp_tree slp_node, unsigned mask_index,
2422 tree *mask, slp_tree *mask_node,
2423 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2424{
2425 enum vect_def_type mask_dt;
2426 tree mask_vectype;
2427 slp_tree mask_node_1;
2428 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2429 mask, &mask_node_1, &mask_dt, &mask_vectype))
2430 {
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask use not simple.\n");
2434 return false;
2435 }
2436
2437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2438 {
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "mask argument is not a boolean.\n");
2442 return false;
2443 }
2444
2445 /* If the caller is not prepared for adjusting an external/constant
2446 SLP mask vector type fail. */
2447 if (slp_node
2448 && !mask_node
2449 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2450 {
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453 "SLP mask argument is not vectorized.\n");
2454 return false;
2455 }
2456
2457 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2458 if (!mask_vectype)
2459 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2460 mask_node_1);
2461
2462 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2463 {
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "could not find an appropriate vector mask type.\n");
2467 return false;
2468 }
2469
2470 if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: mask_vectype),
2471 b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2472 {
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "vector mask type %T"
2476 " does not match vector data type %T.\n",
2477 mask_vectype, vectype);
2478
2479 return false;
2480 }
2481
2482 *mask_dt_out = mask_dt;
2483 *mask_vectype_out = mask_vectype;
2484 if (mask_node)
2485 *mask_node = mask_node_1;
2486 return true;
2487}
2488
2489/* Return true if stored value is suitable for vectorizing store
2490 statement STMT_INFO. When returning true, store the scalar stored
2491 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2492 the type of the vectorized store value in
2493 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2494
2495static bool
2496vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2497 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2498 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2499 vec_load_store_type *vls_type_out)
2500{
2501 int op_no = 0;
2502 if (gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt))
2503 {
2504 if (gimple_call_internal_p (gs: call)
2505 && internal_store_fn_p (gimple_call_internal_fn (gs: call)))
2506 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (gs: call));
2507 }
2508 if (slp_node)
2509 op_no = vect_slp_child_index_for_operand
2510 (stmt_info->stmt, op: op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2511
2512 enum vect_def_type rhs_dt;
2513 tree rhs_vectype;
2514 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2515 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "use not simple.\n");
2520 return false;
2521 }
2522
2523 /* In the case this is a store from a constant make sure
2524 native_encode_expr can handle it. */
2525 if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2526 {
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "cannot encode constant as a byte sequence.\n");
2530 return false;
2531 }
2532
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2535 {
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "incompatible vector types.\n");
2539 return false;
2540 }
2541
2542 *rhs_dt_out = rhs_dt;
2543 *rhs_vectype_out = rhs_vectype;
2544 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2545 *vls_type_out = VLS_STORE_INVARIANT;
2546 else
2547 *vls_type_out = VLS_STORE;
2548 return true;
2549}
2550
2551/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2554
2555static tree
2556vect_build_all_ones_mask (vec_info *vinfo,
2557 stmt_vec_info stmt_info, tree masktype)
2558{
2559 if (TREE_CODE (masktype) == INTEGER_TYPE)
2560 return build_int_cst (masktype, -1);
2561 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2562 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2563 {
2564 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2565 mask = build_vector_from_val (masktype, mask);
2566 return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2567 }
2568 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2569 {
2570 REAL_VALUE_TYPE r;
2571 long tmp[6];
2572 for (int j = 0; j < 6; ++j)
2573 tmp[j] = -1;
2574 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2575 tree mask = build_real (TREE_TYPE (masktype), r);
2576 mask = build_vector_from_val (masktype, mask);
2577 return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2578 }
2579 gcc_unreachable ();
2580}
2581
2582/* Build an all-zero merge value of type VECTYPE while vectorizing
2583 STMT_INFO as a gather load. */
2584
2585static tree
2586vect_build_zero_merge_argument (vec_info *vinfo,
2587 stmt_vec_info stmt_info, tree vectype)
2588{
2589 tree merge;
2590 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2591 merge = build_int_cst (TREE_TYPE (vectype), 0);
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2593 {
2594 REAL_VALUE_TYPE r;
2595 long tmp[6];
2596 for (int j = 0; j < 6; ++j)
2597 tmp[j] = 0;
2598 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2599 merge = build_real (TREE_TYPE (vectype), r);
2600 }
2601 else
2602 gcc_unreachable ();
2603 merge = build_vector_from_val (vectype, merge);
2604 return vect_init_vector (vinfo, stmt_info, val: merge, type: vectype, NULL);
2605}
2606
2607/* Build a gather load call while vectorizing STMT_INFO. Insert new
2608 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609 the gather load operation. If the load is conditional, MASK is the
2610 vectorized condition, otherwise MASK is null. PTR is the base
2611 pointer and OFFSET is the vectorized offset. */
2612
2613static gimple *
2614vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2615 gimple_stmt_iterator *gsi,
2616 gather_scatter_info *gs_info,
2617 tree ptr, tree offset, tree mask)
2618{
2619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2620 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2621 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2622 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2623 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2624 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2625 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2626 tree scaletype = TREE_VALUE (arglist);
2627 tree var;
2628 gcc_checking_assert (types_compatible_p (srctype, rettype)
2629 && (!mask
2630 || TREE_CODE (masktype) == INTEGER_TYPE
2631 || types_compatible_p (srctype, masktype)));
2632
2633 tree op = offset;
2634 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2635 {
2636 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2637 TYPE_VECTOR_SUBPARTS (idxtype)));
2638 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2639 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2640 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2641 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2642 op = var;
2643 }
2644
2645 tree src_op = NULL_TREE;
2646 tree mask_op = NULL_TREE;
2647 if (mask)
2648 {
2649 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2650 {
2651 tree utype, optype = TREE_TYPE (mask);
2652 if (VECTOR_TYPE_P (masktype)
2653 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2654 utype = masktype;
2655 else
2656 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2657 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2658 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2659 gassign *new_stmt
2660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2661 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2662 mask_arg = var;
2663 if (!useless_type_conversion_p (masktype, utype))
2664 {
2665 gcc_assert (TYPE_PRECISION (utype)
2666 <= TYPE_PRECISION (masktype));
2667 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2668 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2669 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2670 mask_arg = var;
2671 }
2672 src_op = build_zero_cst (srctype);
2673 mask_op = mask_arg;
2674 }
2675 else
2676 {
2677 src_op = mask;
2678 mask_op = mask;
2679 }
2680 }
2681 else
2682 {
2683 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, vectype: rettype);
2684 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2685 }
2686
2687 tree scale = build_int_cst (scaletype, gs_info->scale);
2688 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2689 mask_op, scale);
2690
2691 if (!useless_type_conversion_p (vectype, rettype))
2692 {
2693 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2694 TYPE_VECTOR_SUBPARTS (rettype)));
2695 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2696 gimple_call_set_lhs (gs: new_stmt, lhs: op);
2697 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2698 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2699 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2700 }
2701
2702 return new_stmt;
2703}
2704
2705/* Build a scatter store call while vectorizing STMT_INFO. Insert new
2706 instructions before GSI. GS_INFO describes the scatter store operation.
2707 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2708 vectorized data to store.
2709 If the store is conditional, MASK is the vectorized condition, otherwise
2710 MASK is null. */
2711
2712static gimple *
2713vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2714 gimple_stmt_iterator *gsi,
2715 gather_scatter_info *gs_info,
2716 tree ptr, tree offset, tree oprnd, tree mask)
2717{
2718 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2719 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2720 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2721 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2722 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2723 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2724 tree scaletype = TREE_VALUE (arglist);
2725 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2726 && TREE_CODE (rettype) == VOID_TYPE);
2727
2728 tree mask_arg = NULL_TREE;
2729 if (mask)
2730 {
2731 mask_arg = mask;
2732 tree optype = TREE_TYPE (mask_arg);
2733 tree utype;
2734 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2735 utype = masktype;
2736 else
2737 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2738 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2739 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2740 gassign *new_stmt
2741 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2742 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2743 mask_arg = var;
2744 if (!useless_type_conversion_p (masktype, utype))
2745 {
2746 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2747 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2748 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2749 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2750 mask_arg = var;
2751 }
2752 }
2753 else
2754 {
2755 mask_arg = build_int_cst (masktype, -1);
2756 mask_arg = vect_init_vector (vinfo, stmt_info, val: mask_arg, type: masktype, NULL);
2757 }
2758
2759 tree src = oprnd;
2760 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2761 {
2762 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2763 TYPE_VECTOR_SUBPARTS (srctype)));
2764 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2765 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2766 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2767 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2768 src = var;
2769 }
2770
2771 tree op = offset;
2772 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2773 {
2774 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2775 TYPE_VECTOR_SUBPARTS (idxtype)));
2776 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2777 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2778 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2779 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2780 op = var;
2781 }
2782
2783 tree scale = build_int_cst (scaletype, gs_info->scale);
2784 gcall *new_stmt
2785 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2786 return new_stmt;
2787}
2788
2789/* Prepare the base and offset in GS_INFO for vectorization.
2790 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2791 to the vectorized offset argument for the first copy of STMT_INFO.
2792 STMT_INFO is the statement described by GS_INFO and LOOP is the
2793 containing loop. */
2794
2795static void
2796vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2797 class loop *loop, stmt_vec_info stmt_info,
2798 slp_tree slp_node, gather_scatter_info *gs_info,
2799 tree *dataref_ptr, vec<tree> *vec_offset)
2800{
2801 gimple_seq stmts = NULL;
2802 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2803 if (stmts != NULL)
2804 {
2805 basic_block new_bb;
2806 edge pe = loop_preheader_edge (loop);
2807 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2808 gcc_assert (!new_bb);
2809 }
2810 if (slp_node)
2811 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2812 else
2813 {
2814 unsigned ncopies
2815 = vect_get_num_copies (loop_vinfo, vectype: gs_info->offset_vectype);
2816 vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_vinfo: stmt_info, ncopies,
2817 op: gs_info->offset, vec_oprnds: vec_offset,
2818 vectype: gs_info->offset_vectype);
2819 }
2820}
2821
2822/* Prepare to implement a grouped or strided load or store using
2823 the gather load or scatter store operation described by GS_INFO.
2824 STMT_INFO is the load or store statement.
2825
2826 Set *DATAREF_BUMP to the amount that should be added to the base
2827 address after each copy of the vectorized statement. Set *VEC_OFFSET
2828 to an invariant offset vector in which element I has the value
2829 I * DR_STEP / SCALE. */
2830
2831static void
2832vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2833 loop_vec_info loop_vinfo,
2834 gimple_stmt_iterator *gsi,
2835 gather_scatter_info *gs_info,
2836 tree *dataref_bump, tree *vec_offset,
2837 vec_loop_lens *loop_lens)
2838{
2839 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2841
2842 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2843 {
2844 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2845 ivtmp_8 = _31 * 16 (step in bytes);
2846 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2847 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2848 tree loop_len
2849 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2850 tree tmp
2851 = fold_build2 (MULT_EXPR, sizetype,
2852 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2853 loop_len);
2854 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2855 GSI_SAME_STMT);
2856 }
2857 else
2858 {
2859 tree bump
2860 = size_binop (MULT_EXPR,
2861 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2862 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2863 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2864 }
2865
2866 /* The offset given in GS_INFO can have pointer type, so use the element
2867 type of the vector instead. */
2868 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2869
2870 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2871 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2872 ssize_int (gs_info->scale));
2873 step = fold_convert (offset_type, step);
2874
2875 /* Create {0, X, X*2, X*3, ...}. */
2876 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2877 build_zero_cst (offset_type), step);
2878 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2879}
2880
2881/* Prepare the pointer IVs which needs to be updated by a variable amount.
2882 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2883 allow each iteration process the flexible number of elements as long as
2884 the number <= vf elments.
2885
2886 Return data reference according to SELECT_VL.
2887 If new statements are needed, insert them before GSI. */
2888
2889static tree
2890vect_get_loop_variant_data_ptr_increment (
2891 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2892 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2893 vect_memory_access_type memory_access_type)
2894{
2895 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2896 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2897
2898 /* gather/scatter never reach here. */
2899 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2900
2901 /* When we support SELECT_VL pattern, we dynamic adjust
2902 the memory address by .SELECT_VL result.
2903
2904 The result of .SELECT_VL is the number of elements to
2905 be processed of each iteration. So the memory address
2906 adjustment operation should be:
2907
2908 addr = addr + .SELECT_VL (ARG..) * step;
2909 */
2910 tree loop_len
2911 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2912 tree len_type = TREE_TYPE (loop_len);
2913 /* Since the outcome of .SELECT_VL is element size, we should adjust
2914 it into bytesize so that it can be used in address pointer variable
2915 amount IVs adjustment. */
2916 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2917 wide_int_to_tree (len_type, wi::to_widest (step)));
2918 tree bump = make_temp_ssa_name (type: len_type, NULL, name: "ivtmp");
2919 gassign *assign = gimple_build_assign (bump, tmp);
2920 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2921 return bump;
2922}
2923
2924/* Return the amount that should be added to a vector pointer to move
2925 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2926 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2927 vectorization. */
2928
2929static tree
2930vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
2931 dr_vec_info *dr_info, tree aggr_type,
2932 vect_memory_access_type memory_access_type,
2933 vec_loop_lens *loop_lens = nullptr)
2934{
2935 if (memory_access_type == VMAT_INVARIANT)
2936 return size_zero_node;
2937
2938 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2939 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2940 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2941 loop_lens, dr_info,
2942 memory_access_type);
2943
2944 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2945 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2946 if (tree_int_cst_sgn (step) == -1)
2947 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2948 return iv_step;
2949}
2950
2951/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2952
2953static bool
2954vectorizable_bswap (vec_info *vinfo,
2955 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2956 gimple **vec_stmt, slp_tree slp_node,
2957 slp_tree *slp_op,
2958 tree vectype_in, stmt_vector_for_cost *cost_vec)
2959{
2960 tree op, vectype;
2961 gcall *stmt = as_a <gcall *> (p: stmt_info->stmt);
2962 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2963 unsigned ncopies;
2964
2965 op = gimple_call_arg (gs: stmt, index: 0);
2966 vectype = STMT_VINFO_VECTYPE (stmt_info);
2967 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2968
2969 /* Multiple types in SLP are handled by creating the appropriate number of
2970 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2971 case of SLP. */
2972 if (slp_node)
2973 ncopies = 1;
2974 else
2975 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2976
2977 gcc_assert (ncopies >= 1);
2978
2979 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2980 if (! char_vectype)
2981 return false;
2982
2983 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (node: char_vectype);
2984 unsigned word_bytes;
2985 if (!constant_multiple_p (a: num_bytes, b: nunits, multiple: &word_bytes))
2986 return false;
2987
2988 /* The encoding uses one stepped pattern for each byte in the word. */
2989 vec_perm_builder elts (num_bytes, word_bytes, 3);
2990 for (unsigned i = 0; i < 3; ++i)
2991 for (unsigned j = 0; j < word_bytes; ++j)
2992 elts.quick_push (obj: (i + 1) * word_bytes - j - 1);
2993
2994 vec_perm_indices indices (elts, 1, num_bytes);
2995 machine_mode vmode = TYPE_MODE (char_vectype);
2996 if (!can_vec_perm_const_p (vmode, vmode, indices))
2997 return false;
2998
2999 if (! vec_stmt)
3000 {
3001 if (slp_node
3002 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3003 {
3004 if (dump_enabled_p ())
3005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3006 "incompatible vector types for invariants\n");
3007 return false;
3008 }
3009
3010 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3011 DUMP_VECT_SCOPE ("vectorizable_bswap");
3012 record_stmt_cost (body_cost_vec: cost_vec,
3013 count: 1, kind: vector_stmt, stmt_info, misalign: 0, where: vect_prologue);
3014 record_stmt_cost (body_cost_vec: cost_vec,
3015 count: slp_node
3016 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3017 kind: vec_perm, stmt_info, misalign: 0, where: vect_body);
3018 return true;
3019 }
3020
3021 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3022
3023 /* Transform. */
3024 vec<tree> vec_oprnds = vNULL;
3025 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3026 op0: op, vec_oprnds0: &vec_oprnds);
3027 /* Arguments are ready. create the new vector stmt. */
3028 unsigned i;
3029 tree vop;
3030 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3031 {
3032 gimple *new_stmt;
3033 tree tem = make_ssa_name (var: char_vectype);
3034 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3035 char_vectype, vop));
3036 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3037 tree tem2 = make_ssa_name (var: char_vectype);
3038 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3039 tem, tem, bswap_vconst);
3040 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3041 tem = make_ssa_name (var: vectype);
3042 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3043 vectype, tem2));
3044 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3045 if (slp_node)
3046 slp_node->push_vec_def (def: new_stmt);
3047 else
3048 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3049 }
3050
3051 if (!slp_node)
3052 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3053
3054 vec_oprnds.release ();
3055 return true;
3056}
3057
3058/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3059 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3060 in a single step. On success, store the binary pack code in
3061 *CONVERT_CODE. */
3062
3063static bool
3064simple_integer_narrowing (tree vectype_out, tree vectype_in,
3065 code_helper *convert_code)
3066{
3067 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3068 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3069 return false;
3070
3071 code_helper code;
3072 int multi_step_cvt = 0;
3073 auto_vec <tree, 8> interm_types;
3074 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3075 &code, &multi_step_cvt, &interm_types)
3076 || multi_step_cvt)
3077 return false;
3078
3079 *convert_code = code;
3080 return true;
3081}
3082
3083/* Function vectorizable_call.
3084
3085 Check if STMT_INFO performs a function call that can be vectorized.
3086 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3087 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3088 Return true if STMT_INFO is vectorizable in this way. */
3089
3090static bool
3091vectorizable_call (vec_info *vinfo,
3092 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3093 gimple **vec_stmt, slp_tree slp_node,
3094 stmt_vector_for_cost *cost_vec)
3095{
3096 gcall *stmt;
3097 tree vec_dest;
3098 tree scalar_dest;
3099 tree op;
3100 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3101 tree vectype_out, vectype_in;
3102 poly_uint64 nunits_in;
3103 poly_uint64 nunits_out;
3104 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3105 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3106 tree fndecl, new_temp, rhs_type;
3107 enum vect_def_type dt[4]
3108 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3109 vect_unknown_def_type };
3110 tree vectypes[ARRAY_SIZE (dt)] = {};
3111 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3112 int ndts = ARRAY_SIZE (dt);
3113 int ncopies, j;
3114 auto_vec<tree, 8> vargs;
3115 enum { NARROW, NONE, WIDEN } modifier;
3116 size_t i, nargs;
3117 tree lhs;
3118
3119 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3120 return false;
3121
3122 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3123 && ! vec_stmt)
3124 return false;
3125
3126 /* Is STMT_INFO a vectorizable call? */
3127 stmt = dyn_cast <gcall *> (p: stmt_info->stmt);
3128 if (!stmt)
3129 return false;
3130
3131 if (gimple_call_internal_p (gs: stmt)
3132 && (internal_load_fn_p (gimple_call_internal_fn (gs: stmt))
3133 || internal_store_fn_p (gimple_call_internal_fn (gs: stmt))))
3134 /* Handled by vectorizable_load and vectorizable_store. */
3135 return false;
3136
3137 if (gimple_call_lhs (gs: stmt) == NULL_TREE
3138 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3139 return false;
3140
3141 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3142
3143 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3144
3145 /* Process function arguments. */
3146 rhs_type = NULL_TREE;
3147 vectype_in = NULL_TREE;
3148 nargs = gimple_call_num_args (gs: stmt);
3149
3150 /* Bail out if the function has more than four arguments, we do not have
3151 interesting builtin functions to vectorize with more than two arguments
3152 except for fma. No arguments is also not good. */
3153 if (nargs == 0 || nargs > 4)
3154 return false;
3155
3156 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3157 combined_fn cfn = gimple_call_combined_fn (stmt);
3158 if (cfn == CFN_GOMP_SIMD_LANE)
3159 {
3160 nargs = 0;
3161 rhs_type = unsigned_type_node;
3162 }
3163
3164 int mask_opno = -1;
3165 if (internal_fn_p (code: cfn))
3166 mask_opno = internal_fn_mask_index (as_internal_fn (code: cfn));
3167
3168 for (i = 0; i < nargs; i++)
3169 {
3170 if ((int) i == mask_opno)
3171 {
3172 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index: mask_opno,
3173 mask: &op, mask_node: &slp_op[i], mask_dt_out: &dt[i], mask_vectype_out: &vectypes[i]))
3174 return false;
3175 continue;
3176 }
3177
3178 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3179 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3180 {
3181 if (dump_enabled_p ())
3182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3183 "use not simple.\n");
3184 return false;
3185 }
3186
3187 /* We can only handle calls with arguments of the same type. */
3188 if (rhs_type
3189 && !types_compatible_p (type1: rhs_type, TREE_TYPE (op)))
3190 {
3191 if (dump_enabled_p ())
3192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3193 "argument types differ.\n");
3194 return false;
3195 }
3196 if (!rhs_type)
3197 rhs_type = TREE_TYPE (op);
3198
3199 if (!vectype_in)
3200 vectype_in = vectypes[i];
3201 else if (vectypes[i]
3202 && !types_compatible_p (type1: vectypes[i], type2: vectype_in))
3203 {
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3206 "argument vector types differ.\n");
3207 return false;
3208 }
3209 }
3210 /* If all arguments are external or constant defs, infer the vector type
3211 from the scalar type. */
3212 if (!vectype_in)
3213 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3214 if (vec_stmt)
3215 gcc_assert (vectype_in);
3216 if (!vectype_in)
3217 {
3218 if (dump_enabled_p ())
3219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3220 "no vectype for scalar type %T\n", rhs_type);
3221
3222 return false;
3223 }
3224
3225 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3226 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3227 {
3228 if (dump_enabled_p ())
3229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3230 "mixed mask and nonmask vector types\n");
3231 return false;
3232 }
3233
3234 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3235 {
3236 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3238 "use emulated vector type for call\n");
3239 return false;
3240 }
3241
3242 /* FORNOW */
3243 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
3244 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
3245 if (known_eq (nunits_in * 2, nunits_out))
3246 modifier = NARROW;
3247 else if (known_eq (nunits_out, nunits_in))
3248 modifier = NONE;
3249 else if (known_eq (nunits_out * 2, nunits_in))
3250 modifier = WIDEN;
3251 else
3252 return false;
3253
3254 /* We only handle functions that do not read or clobber memory. */
3255 if (gimple_vuse (g: stmt))
3256 {
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3259 "function reads from or writes to memory.\n");
3260 return false;
3261 }
3262
3263 /* For now, we only vectorize functions if a target specific builtin
3264 is available. TODO -- in some cases, it might be profitable to
3265 insert the calls for pieces of the vector, in order to be able
3266 to vectorize other operations in the loop. */
3267 fndecl = NULL_TREE;
3268 internal_fn ifn = IFN_LAST;
3269 tree callee = gimple_call_fndecl (gs: stmt);
3270
3271 /* First try using an internal function. */
3272 code_helper convert_code = MAX_TREE_CODES;
3273 if (cfn != CFN_LAST
3274 && (modifier == NONE
3275 || (modifier == NARROW
3276 && simple_integer_narrowing (vectype_out, vectype_in,
3277 convert_code: &convert_code))))
3278 ifn = vectorizable_internal_function (cfn, fndecl: callee, vectype_out,
3279 vectype_in);
3280
3281 /* If that fails, try asking for a target-specific built-in function. */
3282 if (ifn == IFN_LAST)
3283 {
3284 if (cfn != CFN_LAST)
3285 fndecl = targetm.vectorize.builtin_vectorized_function
3286 (cfn, vectype_out, vectype_in);
3287 else if (callee && fndecl_built_in_p (node: callee, klass: BUILT_IN_MD))
3288 fndecl = targetm.vectorize.builtin_md_vectorized_function
3289 (callee, vectype_out, vectype_in);
3290 }
3291
3292 if (ifn == IFN_LAST && !fndecl)
3293 {
3294 if (cfn == CFN_GOMP_SIMD_LANE
3295 && !slp_node
3296 && loop_vinfo
3297 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3298 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3299 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3300 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3301 {
3302 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3303 { 0, 1, 2, ... vf - 1 } vector. */
3304 gcc_assert (nargs == 0);
3305 }
3306 else if (modifier == NONE
3307 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3308 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3309 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3310 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3311 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3312 slp_op, vectype_in, cost_vec);
3313 else
3314 {
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317 "function is not vectorizable.\n");
3318 return false;
3319 }
3320 }
3321
3322 if (slp_node)
3323 ncopies = 1;
3324 else if (modifier == NARROW && ifn == IFN_LAST)
3325 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
3326 else
3327 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
3328
3329 /* Sanity check: make sure that at least one copy of the vectorized stmt
3330 needs to be generated. */
3331 gcc_assert (ncopies >= 1);
3332
3333 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3334 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3335 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3336 int len_opno = internal_fn_len_index (cond_len_fn);
3337 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3338 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3339 if (!vec_stmt) /* transformation not required. */
3340 {
3341 if (slp_node)
3342 for (i = 0; i < nargs; ++i)
3343 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3344 vectypes[i]
3345 ? vectypes[i] : vectype_in))
3346 {
3347 if (dump_enabled_p ())
3348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3349 "incompatible vector types for invariants\n");
3350 return false;
3351 }
3352 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3353 DUMP_VECT_SCOPE ("vectorizable_call");
3354 vect_model_simple_cost (vinfo, stmt_info,
3355 ncopies, dt, ndts, node: slp_node, cost_vec);
3356 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3357 record_stmt_cost (body_cost_vec: cost_vec, count: ncopies / 2,
3358 kind: vec_promote_demote, stmt_info, misalign: 0, where: vect_body);
3359
3360 if (loop_vinfo
3361 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3362 && (reduc_idx >= 0 || mask_opno >= 0))
3363 {
3364 if (reduc_idx >= 0
3365 && (cond_fn == IFN_LAST
3366 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3367 OPTIMIZE_FOR_SPEED))
3368 && (cond_len_fn == IFN_LAST
3369 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3370 OPTIMIZE_FOR_SPEED)))
3371 {
3372 if (dump_enabled_p ())
3373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3374 "can't use a fully-masked loop because no"
3375 " conditional operation is available.\n");
3376 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3377 }
3378 else
3379 {
3380 unsigned int nvectors
3381 = (slp_node
3382 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3383 : ncopies);
3384 tree scalar_mask = NULL_TREE;
3385 if (mask_opno >= 0)
3386 scalar_mask = gimple_call_arg (gs: stmt_info->stmt, index: mask_opno);
3387 if (cond_len_fn != IFN_LAST
3388 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3389 OPTIMIZE_FOR_SPEED))
3390 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3391 1);
3392 else
3393 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3394 scalar_mask);
3395 }
3396 }
3397 return true;
3398 }
3399
3400 /* Transform. */
3401
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3404
3405 /* Handle def. */
3406 scalar_dest = gimple_call_lhs (gs: stmt);
3407 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3408
3409 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3410 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3411 unsigned int vect_nargs = nargs;
3412 if (len_loop_p)
3413 {
3414 if (len_opno >= 0)
3415 {
3416 ifn = cond_len_fn;
3417 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3418 vect_nargs += 2;
3419 }
3420 else if (reduc_idx >= 0)
3421 gcc_unreachable ();
3422 }
3423 else if (masked_loop_p && reduc_idx >= 0)
3424 {
3425 ifn = cond_fn;
3426 vect_nargs += 2;
3427 }
3428
3429 if (modifier == NONE || ifn != IFN_LAST)
3430 {
3431 tree prev_res = NULL_TREE;
3432 vargs.safe_grow (len: vect_nargs, exact: true);
3433 auto_vec<vec<tree> > vec_defs (nargs);
3434 for (j = 0; j < ncopies; ++j)
3435 {
3436 /* Build argument list for the vectorized call. */
3437 if (slp_node)
3438 {
3439 vec<tree> vec_oprnds0;
3440
3441 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3442 vec_oprnds0 = vec_defs[0];
3443
3444 /* Arguments are ready. Create the new vector stmt. */
3445 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3446 {
3447 int varg = 0;
3448 if (masked_loop_p && reduc_idx >= 0)
3449 {
3450 unsigned int vec_num = vec_oprnds0.length ();
3451 /* Always true for SLP. */
3452 gcc_assert (ncopies == 1);
3453 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3454 gsi, masks, vec_num,
3455 vectype_out, i);
3456 }
3457 size_t k;
3458 for (k = 0; k < nargs; k++)
3459 {
3460 vec<tree> vec_oprndsk = vec_defs[k];
3461 vargs[varg++] = vec_oprndsk[i];
3462 }
3463 if (masked_loop_p && reduc_idx >= 0)
3464 vargs[varg++] = vargs[reduc_idx + 1];
3465 gimple *new_stmt;
3466 if (modifier == NARROW)
3467 {
3468 /* We don't define any narrowing conditional functions
3469 at present. */
3470 gcc_assert (mask_opno < 0);
3471 tree half_res = make_ssa_name (var: vectype_in);
3472 gcall *call
3473 = gimple_build_call_internal_vec (ifn, vargs);
3474 gimple_call_set_lhs (gs: call, lhs: half_res);
3475 gimple_call_set_nothrow (s: call, nothrow_p: true);
3476 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3477 if ((i & 1) == 0)
3478 {
3479 prev_res = half_res;
3480 continue;
3481 }
3482 new_temp = make_ssa_name (var: vec_dest);
3483 new_stmt = vect_gimple_build (new_temp, convert_code,
3484 prev_res, half_res);
3485 vect_finish_stmt_generation (vinfo, stmt_info,
3486 vec_stmt: new_stmt, gsi);
3487 }
3488 else
3489 {
3490 if (len_opno >= 0 && len_loop_p)
3491 {
3492 unsigned int vec_num = vec_oprnds0.length ();
3493 /* Always true for SLP. */
3494 gcc_assert (ncopies == 1);
3495 tree len
3496 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3497 vectype_out, i, 1);
3498 signed char biasval
3499 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3500 tree bias = build_int_cst (intQI_type_node, biasval);
3501 vargs[len_opno] = len;
3502 vargs[len_opno + 1] = bias;
3503 }
3504 else if (mask_opno >= 0 && masked_loop_p)
3505 {
3506 unsigned int vec_num = vec_oprnds0.length ();
3507 /* Always true for SLP. */
3508 gcc_assert (ncopies == 1);
3509 tree mask = vect_get_loop_mask (loop_vinfo,
3510 gsi, masks, vec_num,
3511 vectype_out, i);
3512 vargs[mask_opno] = prepare_vec_mask
3513 (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3514 vec_mask: vargs[mask_opno], gsi);
3515 }
3516
3517 gcall *call;
3518 if (ifn != IFN_LAST)
3519 call = gimple_build_call_internal_vec (ifn, vargs);
3520 else
3521 call = gimple_build_call_vec (fndecl, vargs);
3522 new_temp = make_ssa_name (var: vec_dest, stmt: call);
3523 gimple_call_set_lhs (gs: call, lhs: new_temp);
3524 gimple_call_set_nothrow (s: call, nothrow_p: true);
3525 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3526 new_stmt = call;
3527 }
3528 slp_node->push_vec_def (def: new_stmt);
3529 }
3530 continue;
3531 }
3532
3533 int varg = 0;
3534 if (masked_loop_p && reduc_idx >= 0)
3535 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3536 vectype_out, j);
3537 for (i = 0; i < nargs; i++)
3538 {
3539 op = gimple_call_arg (gs: stmt, index: i);
3540 if (j == 0)
3541 {
3542 vec_defs.quick_push (obj: vNULL);
3543 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
3544 op, vec_oprnds: &vec_defs[i],
3545 vectype: vectypes[i]);
3546 }
3547 vargs[varg++] = vec_defs[i][j];
3548 }
3549 if (masked_loop_p && reduc_idx >= 0)
3550 vargs[varg++] = vargs[reduc_idx + 1];
3551
3552 if (len_opno >= 0 && len_loop_p)
3553 {
3554 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3555 vectype_out, j, 1);
3556 signed char biasval
3557 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3558 tree bias = build_int_cst (intQI_type_node, biasval);
3559 vargs[len_opno] = len;
3560 vargs[len_opno + 1] = bias;
3561 }
3562 else if (mask_opno >= 0 && masked_loop_p)
3563 {
3564 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3565 vectype_out, j);
3566 vargs[mask_opno]
3567 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3568 vec_mask: vargs[mask_opno], gsi);
3569 }
3570
3571 gimple *new_stmt;
3572 if (cfn == CFN_GOMP_SIMD_LANE)
3573 {
3574 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3575 tree new_var
3576 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3577 gimple *init_stmt = gimple_build_assign (new_var, cst);
3578 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, NULL);
3579 new_temp = make_ssa_name (var: vec_dest);
3580 new_stmt = gimple_build_assign (new_temp, new_var);
3581 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3582 }
3583 else if (modifier == NARROW)
3584 {
3585 /* We don't define any narrowing conditional functions at
3586 present. */
3587 gcc_assert (mask_opno < 0);
3588 tree half_res = make_ssa_name (var: vectype_in);
3589 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3590 gimple_call_set_lhs (gs: call, lhs: half_res);
3591 gimple_call_set_nothrow (s: call, nothrow_p: true);
3592 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3593 if ((j & 1) == 0)
3594 {
3595 prev_res = half_res;
3596 continue;
3597 }
3598 new_temp = make_ssa_name (var: vec_dest);
3599 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3600 half_res);
3601 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3602 }
3603 else
3604 {
3605 gcall *call;
3606 if (ifn != IFN_LAST)
3607 call = gimple_build_call_internal_vec (ifn, vargs);
3608 else
3609 call = gimple_build_call_vec (fndecl, vargs);
3610 new_temp = make_ssa_name (var: vec_dest, stmt: call);
3611 gimple_call_set_lhs (gs: call, lhs: new_temp);
3612 gimple_call_set_nothrow (s: call, nothrow_p: true);
3613 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3614 new_stmt = call;
3615 }
3616
3617 if (j == (modifier == NARROW ? 1 : 0))
3618 *vec_stmt = new_stmt;
3619 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3620 }
3621 for (i = 0; i < nargs; i++)
3622 {
3623 vec<tree> vec_oprndsi = vec_defs[i];
3624 vec_oprndsi.release ();
3625 }
3626 }
3627 else if (modifier == NARROW)
3628 {
3629 auto_vec<vec<tree> > vec_defs (nargs);
3630 /* We don't define any narrowing conditional functions at present. */
3631 gcc_assert (mask_opno < 0);
3632 for (j = 0; j < ncopies; ++j)
3633 {
3634 /* Build argument list for the vectorized call. */
3635 if (j == 0)
3636 vargs.create (nelems: nargs * 2);
3637 else
3638 vargs.truncate (size: 0);
3639
3640 if (slp_node)
3641 {
3642 vec<tree> vec_oprnds0;
3643
3644 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3645 vec_oprnds0 = vec_defs[0];
3646
3647 /* Arguments are ready. Create the new vector stmt. */
3648 for (i = 0; vec_oprnds0.iterate (ix: i, ptr: &vec_oprnd0); i += 2)
3649 {
3650 size_t k;
3651 vargs.truncate (size: 0);
3652 for (k = 0; k < nargs; k++)
3653 {
3654 vec<tree> vec_oprndsk = vec_defs[k];
3655 vargs.quick_push (obj: vec_oprndsk[i]);
3656 vargs.quick_push (obj: vec_oprndsk[i + 1]);
3657 }
3658 gcall *call;
3659 if (ifn != IFN_LAST)
3660 call = gimple_build_call_internal_vec (ifn, vargs);
3661 else
3662 call = gimple_build_call_vec (fndecl, vargs);
3663 new_temp = make_ssa_name (var: vec_dest, stmt: call);
3664 gimple_call_set_lhs (gs: call, lhs: new_temp);
3665 gimple_call_set_nothrow (s: call, nothrow_p: true);
3666 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3667 slp_node->push_vec_def (def: call);
3668 }
3669 continue;
3670 }
3671
3672 for (i = 0; i < nargs; i++)
3673 {
3674 op = gimple_call_arg (gs: stmt, index: i);
3675 if (j == 0)
3676 {
3677 vec_defs.quick_push (obj: vNULL);
3678 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies: 2 * ncopies,
3679 op, vec_oprnds: &vec_defs[i], vectype: vectypes[i]);
3680 }
3681 vec_oprnd0 = vec_defs[i][2*j];
3682 vec_oprnd1 = vec_defs[i][2*j+1];
3683
3684 vargs.quick_push (obj: vec_oprnd0);
3685 vargs.quick_push (obj: vec_oprnd1);
3686 }
3687
3688 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3689 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
3690 gimple_call_set_lhs (gs: new_stmt, lhs: new_temp);
3691 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3692
3693 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3694 }
3695
3696 if (!slp_node)
3697 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3698
3699 for (i = 0; i < nargs; i++)
3700 {
3701 vec<tree> vec_oprndsi = vec_defs[i];
3702 vec_oprndsi.release ();
3703 }
3704 }
3705 else
3706 /* No current target implements this case. */
3707 return false;
3708
3709 vargs.release ();
3710
3711 /* The call in STMT might prevent it from being removed in dce.
3712 We however cannot remove it here, due to the way the ssa name
3713 it defines is mapped to the new definition. So just replace
3714 rhs of the statement with something harmless. */
3715
3716 if (slp_node)
3717 return true;
3718
3719 stmt_info = vect_orig_stmt (stmt_info);
3720 lhs = gimple_get_lhs (stmt_info->stmt);
3721
3722 gassign *new_stmt
3723 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3724 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3725
3726 return true;
3727}
3728
3729
3730struct simd_call_arg_info
3731{
3732 tree vectype;
3733 tree op;
3734 HOST_WIDE_INT linear_step;
3735 enum vect_def_type dt;
3736 unsigned int align;
3737 bool simd_lane_linear;
3738};
3739
3740/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3741 is linear within simd lane (but not within whole loop), note it in
3742 *ARGINFO. */
3743
3744static void
3745vect_simd_lane_linear (tree op, class loop *loop,
3746 struct simd_call_arg_info *arginfo)
3747{
3748 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3749
3750 if (!is_gimple_assign (gs: def_stmt)
3751 || gimple_assign_rhs_code (gs: def_stmt) != POINTER_PLUS_EXPR
3752 || !is_gimple_min_invariant (gimple_assign_rhs1 (gs: def_stmt)))
3753 return;
3754
3755 tree base = gimple_assign_rhs1 (gs: def_stmt);
3756 HOST_WIDE_INT linear_step = 0;
3757 tree v = gimple_assign_rhs2 (gs: def_stmt);
3758 while (TREE_CODE (v) == SSA_NAME)
3759 {
3760 tree t;
3761 def_stmt = SSA_NAME_DEF_STMT (v);
3762 if (is_gimple_assign (gs: def_stmt))
3763 switch (gimple_assign_rhs_code (gs: def_stmt))
3764 {
3765 case PLUS_EXPR:
3766 t = gimple_assign_rhs2 (gs: def_stmt);
3767 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3768 return;
3769 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3770 v = gimple_assign_rhs1 (gs: def_stmt);
3771 continue;
3772 case MULT_EXPR:
3773 t = gimple_assign_rhs2 (gs: def_stmt);
3774 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3775 return;
3776 linear_step = tree_to_shwi (t);
3777 v = gimple_assign_rhs1 (gs: def_stmt);
3778 continue;
3779 CASE_CONVERT:
3780 t = gimple_assign_rhs1 (gs: def_stmt);
3781 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3782 || (TYPE_PRECISION (TREE_TYPE (v))
3783 < TYPE_PRECISION (TREE_TYPE (t))))
3784 return;
3785 if (!linear_step)
3786 linear_step = 1;
3787 v = t;
3788 continue;
3789 default:
3790 return;
3791 }
3792 else if (gimple_call_internal_p (gs: def_stmt, fn: IFN_GOMP_SIMD_LANE)
3793 && loop->simduid
3794 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3795 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3796 == loop->simduid))
3797 {
3798 if (!linear_step)
3799 linear_step = 1;
3800 arginfo->linear_step = linear_step;
3801 arginfo->op = base;
3802 arginfo->simd_lane_linear = true;
3803 return;
3804 }
3805 }
3806}
3807
3808/* Function vectorizable_simd_clone_call.
3809
3810 Check if STMT_INFO performs a function call that can be vectorized
3811 by calling a simd clone of the function.
3812 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3813 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3814 Return true if STMT_INFO is vectorizable in this way. */
3815
3816static bool
3817vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3818 gimple_stmt_iterator *gsi,
3819 gimple **vec_stmt, slp_tree slp_node,
3820 stmt_vector_for_cost *)
3821{
3822 tree vec_dest;
3823 tree scalar_dest;
3824 tree op, type;
3825 tree vec_oprnd0 = NULL_TREE;
3826 tree vectype;
3827 poly_uint64 nunits;
3828 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3829 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3830 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3831 tree fndecl, new_temp;
3832 int ncopies, j;
3833 auto_vec<simd_call_arg_info> arginfo;
3834 vec<tree> vargs = vNULL;
3835 size_t i, nargs;
3836 tree lhs, rtype, ratype;
3837 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3838 int masked_call_offset = 0;
3839
3840 /* Is STMT a vectorizable call? */
3841 gcall *stmt = dyn_cast <gcall *> (p: stmt_info->stmt);
3842 if (!stmt)
3843 return false;
3844
3845 fndecl = gimple_call_fndecl (gs: stmt);
3846 if (fndecl == NULL_TREE
3847 && gimple_call_internal_p (gs: stmt, fn: IFN_MASK_CALL))
3848 {
3849 fndecl = gimple_call_arg (gs: stmt, index: 0);
3850 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3851 fndecl = TREE_OPERAND (fndecl, 0);
3852 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3853 masked_call_offset = 1;
3854 }
3855 if (fndecl == NULL_TREE)
3856 return false;
3857
3858 struct cgraph_node *node = cgraph_node::get (decl: fndecl);
3859 if (node == NULL || node->simd_clones == NULL)
3860 return false;
3861
3862 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3863 return false;
3864
3865 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3866 && ! vec_stmt)
3867 return false;
3868
3869 if (gimple_call_lhs (gs: stmt)
3870 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3871 return false;
3872
3873 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3874
3875 vectype = STMT_VINFO_VECTYPE (stmt_info);
3876
3877 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3878 return false;
3879
3880 /* Process function arguments. */
3881 nargs = gimple_call_num_args (gs: stmt) - masked_call_offset;
3882
3883 /* Bail out if the function has zero arguments. */
3884 if (nargs == 0)
3885 return false;
3886
3887 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3888 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3889 arginfo.reserve (nelems: nargs, exact: true);
3890 auto_vec<slp_tree> slp_op;
3891 slp_op.safe_grow_cleared (len: nargs);
3892
3893 for (i = 0; i < nargs; i++)
3894 {
3895 simd_call_arg_info thisarginfo;
3896 affine_iv iv;
3897
3898 thisarginfo.linear_step = 0;
3899 thisarginfo.align = 0;
3900 thisarginfo.op = NULL_TREE;
3901 thisarginfo.simd_lane_linear = false;
3902
3903 int op_no = i + masked_call_offset;
3904 if (slp_node)
3905 op_no = vect_slp_child_index_for_operand (stmt, op: op_no, false);
3906 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3907 op_no, &op, &slp_op[i],
3908 &thisarginfo.dt, &thisarginfo.vectype)
3909 || thisarginfo.dt == vect_uninitialized_def)
3910 {
3911 if (dump_enabled_p ())
3912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3913 "use not simple.\n");
3914 return false;
3915 }
3916
3917 if (thisarginfo.dt == vect_constant_def
3918 || thisarginfo.dt == vect_external_def)
3919 {
3920 /* With SLP we determine the vector type of constants/externals
3921 at analysis time, handling conflicts via
3922 vect_maybe_update_slp_op_vectype. At transform time
3923 we have a vector type recorded for SLP. */
3924 gcc_assert (!vec_stmt
3925 || !slp_node
3926 || thisarginfo.vectype != NULL_TREE);
3927 if (!vec_stmt)
3928 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3929 TREE_TYPE (op),
3930 slp_node);
3931 }
3932 else
3933 gcc_assert (thisarginfo.vectype != NULL_TREE);
3934
3935 /* For linear arguments, the analyze phase should have saved
3936 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3937 if (i * 3 + 4 <= simd_clone_info.length ()
3938 && simd_clone_info[i * 3 + 2])
3939 {
3940 gcc_assert (vec_stmt);
3941 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
3942 thisarginfo.op = simd_clone_info[i * 3 + 1];
3943 thisarginfo.simd_lane_linear
3944 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
3945 /* If loop has been peeled for alignment, we need to adjust it. */
3946 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3947 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3948 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3949 {
3950 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3951 tree step = simd_clone_info[i * 3 + 2];
3952 tree opt = TREE_TYPE (thisarginfo.op);
3953 bias = fold_convert (TREE_TYPE (step), bias);
3954 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3955 thisarginfo.op
3956 = fold_build2 (POINTER_TYPE_P (opt)
3957 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3958 thisarginfo.op, bias);
3959 }
3960 }
3961 else if (!vec_stmt
3962 && thisarginfo.dt != vect_constant_def
3963 && thisarginfo.dt != vect_external_def
3964 && loop_vinfo
3965 && TREE_CODE (op) == SSA_NAME
3966 && simple_iv (loop, loop_containing_stmt (stmt), op,
3967 &iv, false)
3968 && tree_fits_shwi_p (iv.step))
3969 {
3970 thisarginfo.linear_step = tree_to_shwi (iv.step);
3971 thisarginfo.op = iv.base;
3972 }
3973 else if ((thisarginfo.dt == vect_constant_def
3974 || thisarginfo.dt == vect_external_def)
3975 && POINTER_TYPE_P (TREE_TYPE (op)))
3976 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3977 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3978 linear too. */
3979 if (POINTER_TYPE_P (TREE_TYPE (op))
3980 && !thisarginfo.linear_step
3981 && !vec_stmt
3982 && thisarginfo.dt != vect_constant_def
3983 && thisarginfo.dt != vect_external_def
3984 && loop_vinfo
3985 && TREE_CODE (op) == SSA_NAME)
3986 vect_simd_lane_linear (op, loop, arginfo: &thisarginfo);
3987
3988 arginfo.quick_push (obj: thisarginfo);
3989 }
3990
3991 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
3992 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
3993 unsigned int badness = 0;
3994 struct cgraph_node *bestn = NULL;
3995 if (simd_clone_info.exists ())
3996 bestn = cgraph_node::get (decl: simd_clone_info[0]);
3997 else
3998 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3999 n = n->simdclone->next_clone)
4000 {
4001 unsigned int this_badness = 0;
4002 unsigned int num_calls;
4003 /* The number of arguments in the call and the number of parameters in
4004 the simdclone should match. However, when the simdclone is
4005 'inbranch', it could have one more paramater than nargs when using
4006 an inbranch simdclone to call a non-inbranch call, either in a
4007 non-masked loop using a all true constant mask, or inside a masked
4008 loop using it's mask. */
4009 size_t simd_nargs = n->simdclone->nargs;
4010 if (!masked_call_offset && n->simdclone->inbranch)
4011 simd_nargs--;
4012 if (!constant_multiple_p (a: vf * group_size, b: n->simdclone->simdlen,
4013 multiple: &num_calls)
4014 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4015 || (nargs != simd_nargs))
4016 continue;
4017 if (num_calls != 1)
4018 this_badness += exact_log2 (x: num_calls) * 4096;
4019 if (n->simdclone->inbranch)
4020 this_badness += 8192;
4021 int target_badness = targetm.simd_clone.usable (n);
4022 if (target_badness < 0)
4023 continue;
4024 this_badness += target_badness * 512;
4025 for (i = 0; i < nargs; i++)
4026 {
4027 switch (n->simdclone->args[i].arg_type)
4028 {
4029 case SIMD_CLONE_ARG_TYPE_VECTOR:
4030 if (!useless_type_conversion_p
4031 (n->simdclone->args[i].orig_type,
4032 TREE_TYPE (gimple_call_arg (stmt,
4033 i + masked_call_offset))))
4034 i = -1;
4035 else if (arginfo[i].dt == vect_constant_def
4036 || arginfo[i].dt == vect_external_def
4037 || arginfo[i].linear_step)
4038 this_badness += 64;
4039 break;
4040 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4041 if (arginfo[i].dt != vect_constant_def
4042 && arginfo[i].dt != vect_external_def)
4043 i = -1;
4044 break;
4045 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4046 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4047 if (arginfo[i].dt == vect_constant_def
4048 || arginfo[i].dt == vect_external_def
4049 || (arginfo[i].linear_step
4050 != n->simdclone->args[i].linear_step))
4051 i = -1;
4052 break;
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4055 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4056 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4057 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4058 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4059 /* FORNOW */
4060 i = -1;
4061 break;
4062 case SIMD_CLONE_ARG_TYPE_MASK:
4063 /* While we can create a traditional data vector from
4064 an incoming integer mode mask we have no good way to
4065 force generate an integer mode mask from a traditional
4066 boolean vector input. */
4067 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4068 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4069 i = -1;
4070 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4071 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4072 this_badness += 2048;
4073 break;
4074 }
4075 if (i == (size_t) -1)
4076 break;
4077 if (n->simdclone->args[i].alignment > arginfo[i].align)
4078 {
4079 i = -1;
4080 break;
4081 }
4082 if (arginfo[i].align)
4083 this_badness += (exact_log2 (x: arginfo[i].align)
4084 - exact_log2 (x: n->simdclone->args[i].alignment));
4085 }
4086 if (i == (size_t) -1)
4087 continue;
4088 if (masked_call_offset == 0
4089 && n->simdclone->inbranch
4090 && n->simdclone->nargs > nargs)
4091 {
4092 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4093 SIMD_CLONE_ARG_TYPE_MASK);
4094 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4095 not in a branch, as we'd have to construct an all-true mask. */
4096 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4097 this_badness += 64;
4098 }
4099 if (bestn == NULL || this_badness < badness)
4100 {
4101 bestn = n;
4102 badness = this_badness;
4103 }
4104 }
4105
4106 if (bestn == NULL)
4107 return false;
4108
4109 unsigned int num_mask_args = 0;
4110 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4111 for (i = 0; i < nargs; i++)
4112 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4113 num_mask_args++;
4114
4115 for (i = 0; i < nargs; i++)
4116 {
4117 if ((arginfo[i].dt == vect_constant_def
4118 || arginfo[i].dt == vect_external_def)
4119 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4120 {
4121 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4122 i + masked_call_offset));
4123 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4124 slp_node);
4125 if (arginfo[i].vectype == NULL
4126 || !constant_multiple_p (a: bestn->simdclone->simdlen,
4127 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4128 return false;
4129 }
4130
4131 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4132 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4133 {
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136 "vector mask arguments are not supported.\n");
4137 return false;
4138 }
4139
4140 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4141 {
4142 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4143 if (bestn->simdclone->mask_mode == VOIDmode)
4144 {
4145 if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: clone_arg_vectype),
4146 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4147 {
4148 /* FORNOW we only have partial support for vector-type masks
4149 that can't hold all of simdlen. */
4150 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4152 vect_location,
4153 "in-branch vector clones are not yet"
4154 " supported for mismatched vector sizes.\n");
4155 return false;
4156 }
4157 }
4158 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4159 {
4160 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4161 || maybe_ne (a: exact_div (a: bestn->simdclone->simdlen,
4162 b: num_mask_args),
4163 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4164 {
4165 /* FORNOW we only have partial support for integer-type masks
4166 that represent the same number of lanes as the
4167 vectorized mask inputs. */
4168 if (dump_enabled_p ())
4169 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4170 vect_location,
4171 "in-branch vector clones are not yet "
4172 "supported for mismatched vector sizes.\n");
4173 return false;
4174 }
4175 }
4176 else
4177 {
4178 if (dump_enabled_p ())
4179 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4180 vect_location,
4181 "in-branch vector clones not supported"
4182 " on this target.\n");
4183 return false;
4184 }
4185 }
4186 }
4187
4188 fndecl = bestn->decl;
4189 nunits = bestn->simdclone->simdlen;
4190 if (slp_node)
4191 ncopies = vector_unroll_factor (vf * group_size, nunits);
4192 else
4193 ncopies = vector_unroll_factor (vf, nunits);
4194
4195 /* If the function isn't const, only allow it in simd loops where user
4196 has asserted that at least nunits consecutive iterations can be
4197 performed using SIMD instructions. */
4198 if ((loop == NULL || maybe_lt (a: (unsigned) loop->safelen, b: nunits))
4199 && gimple_vuse (g: stmt))
4200 return false;
4201
4202 /* Sanity check: make sure that at least one copy of the vectorized stmt
4203 needs to be generated. */
4204 gcc_assert (ncopies >= 1);
4205
4206 if (!vec_stmt) /* transformation not required. */
4207 {
4208 if (slp_node)
4209 for (unsigned i = 0; i < nargs; ++i)
4210 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4211 {
4212 if (dump_enabled_p ())
4213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4214 "incompatible vector types for invariants\n");
4215 return false;
4216 }
4217 /* When the original call is pure or const but the SIMD ABI dictates
4218 an aggregate return we will have to use a virtual definition and
4219 in a loop eventually even need to add a virtual PHI. That's
4220 not straight-forward so allow to fix this up via renaming. */
4221 if (gimple_call_lhs (gs: stmt)
4222 && !gimple_vdef (g: stmt)
4223 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4224 vinfo->any_known_not_updated_vssa = true;
4225 /* ??? For SLP code-gen we end up inserting after the last
4226 vector argument def rather than at the original call position
4227 so automagic virtual operand updating doesn't work. */
4228 if (gimple_vuse (g: stmt) && slp_node)
4229 vinfo->any_known_not_updated_vssa = true;
4230 simd_clone_info.safe_push (obj: bestn->decl);
4231 for (i = 0; i < bestn->simdclone->nargs; i++)
4232 {
4233 switch (bestn->simdclone->args[i].arg_type)
4234 {
4235 default:
4236 continue;
4237 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4238 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4239 {
4240 simd_clone_info.safe_grow_cleared (len: i * 3 + 1, exact: true);
4241 simd_clone_info.safe_push (obj: arginfo[i].op);
4242 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4243 ? size_type_node : TREE_TYPE (arginfo[i].op);
4244 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4245 simd_clone_info.safe_push (obj: ls);
4246 tree sll = arginfo[i].simd_lane_linear
4247 ? boolean_true_node : boolean_false_node;
4248 simd_clone_info.safe_push (obj: sll);
4249 }
4250 break;
4251 case SIMD_CLONE_ARG_TYPE_MASK:
4252 if (loop_vinfo
4253 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4254 vect_record_loop_mask (loop_vinfo,
4255 &LOOP_VINFO_MASKS (loop_vinfo),
4256 ncopies, vectype, op);
4257
4258 break;
4259 }
4260 }
4261
4262 if (!bestn->simdclone->inbranch && loop_vinfo)
4263 {
4264 if (dump_enabled_p ()
4265 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4266 dump_printf_loc (MSG_NOTE, vect_location,
4267 "can't use a fully-masked loop because a"
4268 " non-masked simd clone was selected.\n");
4269 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4270 }
4271
4272 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4273 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4274/* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4275 dt, slp_node, cost_vec); */
4276 return true;
4277 }
4278
4279 /* Transform. */
4280
4281 if (dump_enabled_p ())
4282 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4283
4284 /* Handle def. */
4285 scalar_dest = gimple_call_lhs (gs: stmt);
4286 vec_dest = NULL_TREE;
4287 rtype = NULL_TREE;
4288 ratype = NULL_TREE;
4289 if (scalar_dest)
4290 {
4291 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4292 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4293 if (TREE_CODE (rtype) == ARRAY_TYPE)
4294 {
4295 ratype = rtype;
4296 rtype = TREE_TYPE (ratype);
4297 }
4298 }
4299
4300 auto_vec<vec<tree> > vec_oprnds;
4301 auto_vec<unsigned> vec_oprnds_i;
4302 vec_oprnds_i.safe_grow_cleared (len: nargs, exact: true);
4303 if (slp_node)
4304 {
4305 vec_oprnds.reserve_exact (nelems: nargs);
4306 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4307 }
4308 else
4309 vec_oprnds.safe_grow_cleared (len: nargs, exact: true);
4310 for (j = 0; j < ncopies; ++j)
4311 {
4312 poly_uint64 callee_nelements;
4313 poly_uint64 caller_nelements;
4314 /* Build argument list for the vectorized call. */
4315 if (j == 0)
4316 vargs.create (nelems: nargs);
4317 else
4318 vargs.truncate (size: 0);
4319
4320 for (i = 0; i < nargs; i++)
4321 {
4322 unsigned int k, l, m, o;
4323 tree atype;
4324 op = gimple_call_arg (gs: stmt, index: i + masked_call_offset);
4325 switch (bestn->simdclone->args[i].arg_type)
4326 {
4327 case SIMD_CLONE_ARG_TYPE_VECTOR:
4328 atype = bestn->simdclone->args[i].vector_type;
4329 caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype);
4330 callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4331 o = vector_unroll_factor (nunits, callee_nelements);
4332 for (m = j * o; m < (j + 1) * o; m++)
4333 {
4334 if (known_lt (callee_nelements, caller_nelements))
4335 {
4336 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4337 if (!constant_multiple_p (a: caller_nelements,
4338 b: callee_nelements, multiple: &k))
4339 gcc_unreachable ();
4340
4341 gcc_assert ((k & (k - 1)) == 0);
4342 if (m == 0)
4343 {
4344 if (!slp_node)
4345 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4346 ncopies: ncopies * o / k, op,
4347 vec_oprnds: &vec_oprnds[i]);
4348 vec_oprnds_i[i] = 0;
4349 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4350 }
4351 else
4352 {
4353 vec_oprnd0 = arginfo[i].op;
4354 if ((m & (k - 1)) == 0)
4355 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4356 }
4357 arginfo[i].op = vec_oprnd0;
4358 vec_oprnd0
4359 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4360 bitsize_int (prec),
4361 bitsize_int ((m & (k - 1)) * prec));
4362 gassign *new_stmt
4363 = gimple_build_assign (make_ssa_name (var: atype),
4364 vec_oprnd0);
4365 vect_finish_stmt_generation (vinfo, stmt_info,
4366 vec_stmt: new_stmt, gsi);
4367 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4368 }
4369 else
4370 {
4371 if (!constant_multiple_p (a: callee_nelements,
4372 b: caller_nelements, multiple: &k))
4373 gcc_unreachable ();
4374 gcc_assert ((k & (k - 1)) == 0);
4375 vec<constructor_elt, va_gc> *ctor_elts;
4376 if (k != 1)
4377 vec_alloc (v&: ctor_elts, nelems: k);
4378 else
4379 ctor_elts = NULL;
4380 for (l = 0; l < k; l++)
4381 {
4382 if (m == 0 && l == 0)
4383 {
4384 if (!slp_node)
4385 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4386 ncopies: k * o * ncopies,
4387 op,
4388 vec_oprnds: &vec_oprnds[i]);
4389 vec_oprnds_i[i] = 0;
4390 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4391 }
4392 else
4393 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4394 arginfo[i].op = vec_oprnd0;
4395 if (k == 1)
4396 break;
4397 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4398 vec_oprnd0);
4399 }
4400 if (k == 1)
4401 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4402 atype))
4403 {
4404 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4405 vec_oprnd0);
4406 gassign *new_stmt
4407 = gimple_build_assign (make_ssa_name (var: atype),
4408 vec_oprnd0);
4409 vect_finish_stmt_generation (vinfo, stmt_info,
4410 vec_stmt: new_stmt, gsi);
4411 vargs.safe_push (obj: gimple_get_lhs (new_stmt));
4412 }
4413 else
4414 vargs.safe_push (obj: vec_oprnd0);
4415 else
4416 {
4417 vec_oprnd0 = build_constructor (atype, ctor_elts);
4418 gassign *new_stmt
4419 = gimple_build_assign (make_ssa_name (var: atype),
4420 vec_oprnd0);
4421 vect_finish_stmt_generation (vinfo, stmt_info,
4422 vec_stmt: new_stmt, gsi);
4423 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4424 }
4425 }
4426 }
4427 break;
4428 case SIMD_CLONE_ARG_TYPE_MASK:
4429 if (bestn->simdclone->mask_mode == VOIDmode)
4430 {
4431 atype = bestn->simdclone->args[i].vector_type;
4432 tree elt_type = TREE_TYPE (atype);
4433 tree one = fold_convert (elt_type, integer_one_node);
4434 tree zero = fold_convert (elt_type, integer_zero_node);
4435 callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4436 caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype);
4437 o = vector_unroll_factor (nunits, callee_nelements);
4438 for (m = j * o; m < (j + 1) * o; m++)
4439 {
4440 if (maybe_lt (a: callee_nelements, b: caller_nelements))
4441 {
4442 /* The mask type has fewer elements than simdlen. */
4443
4444 /* FORNOW */
4445 gcc_unreachable ();
4446 }
4447 else if (known_eq (callee_nelements, caller_nelements))
4448 {
4449 /* The SIMD clone function has the same number of
4450 elements as the current function. */
4451 if (m == 0)
4452 {
4453 if (!slp_node)
4454 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4455 ncopies: o * ncopies,
4456 op,
4457 vec_oprnds: &vec_oprnds[i]);
4458 vec_oprnds_i[i] = 0;
4459 }
4460 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4461 if (loop_vinfo
4462 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4463 {
4464 vec_loop_masks *loop_masks
4465 = &LOOP_VINFO_MASKS (loop_vinfo);
4466 tree loop_mask
4467 = vect_get_loop_mask (loop_vinfo, gsi,
4468 loop_masks, ncopies,
4469 vectype, j);
4470 vec_oprnd0
4471 = prepare_vec_mask (loop_vinfo,
4472 TREE_TYPE (loop_mask),
4473 loop_mask, vec_mask: vec_oprnd0,
4474 gsi);
4475 loop_vinfo->vec_cond_masked_set.add (k: { vec_oprnd0,
4476 loop_mask });
4477
4478 }
4479 vec_oprnd0
4480 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4481 build_vector_from_val (atype, one),
4482 build_vector_from_val (atype, zero));
4483 gassign *new_stmt
4484 = gimple_build_assign (make_ssa_name (var: atype),
4485 vec_oprnd0);
4486 vect_finish_stmt_generation (vinfo, stmt_info,
4487 vec_stmt: new_stmt, gsi);
4488 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4489 }
4490 else
4491 {
4492 /* The mask type has more elements than simdlen. */
4493
4494 /* FORNOW */
4495 gcc_unreachable ();
4496 }
4497 }
4498 }
4499 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4500 {
4501 atype = bestn->simdclone->args[i].vector_type;
4502 /* Guess the number of lanes represented by atype. */
4503 poly_uint64 atype_subparts
4504 = exact_div (a: bestn->simdclone->simdlen,
4505 b: num_mask_args);
4506 o = vector_unroll_factor (nunits, atype_subparts);
4507 for (m = j * o; m < (j + 1) * o; m++)
4508 {
4509 if (m == 0)
4510 {
4511 if (!slp_node)
4512 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4513 ncopies: o * ncopies,
4514 op,
4515 vec_oprnds: &vec_oprnds[i]);
4516 vec_oprnds_i[i] = 0;
4517 }
4518 if (maybe_lt (a: atype_subparts,
4519 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4520 {
4521 /* The mask argument has fewer elements than the
4522 input vector. */
4523 /* FORNOW */
4524 gcc_unreachable ();
4525 }
4526 else if (known_eq (atype_subparts,
4527 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4528 {
4529 /* The vector mask argument matches the input
4530 in the number of lanes, but not necessarily
4531 in the mode. */
4532 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4533 tree st = lang_hooks.types.type_for_mode
4534 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4535 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4536 vec_oprnd0);
4537 gassign *new_stmt
4538 = gimple_build_assign (make_ssa_name (var: st),
4539 vec_oprnd0);
4540 vect_finish_stmt_generation (vinfo, stmt_info,
4541 vec_stmt: new_stmt, gsi);
4542 if (!types_compatible_p (type1: atype, type2: st))
4543 {
4544 new_stmt
4545 = gimple_build_assign (make_ssa_name (var: atype),
4546 NOP_EXPR,
4547 gimple_assign_lhs
4548 (gs: new_stmt));
4549 vect_finish_stmt_generation (vinfo, stmt_info,
4550 vec_stmt: new_stmt, gsi);
4551 }
4552 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4553 }
4554 else
4555 {
4556 /* The mask argument has more elements than the
4557 input vector. */
4558 /* FORNOW */
4559 gcc_unreachable ();
4560 }
4561 }
4562 }
4563 else
4564 gcc_unreachable ();
4565 break;
4566 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4567 vargs.safe_push (obj: op);
4568 break;
4569 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4570 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4571 if (j == 0)
4572 {
4573 gimple_seq stmts;
4574 arginfo[i].op
4575 = force_gimple_operand (unshare_expr (arginfo[i].op),
4576 &stmts, true, NULL_TREE);
4577 if (stmts != NULL)
4578 {
4579 basic_block new_bb;
4580 edge pe = loop_preheader_edge (loop);
4581 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4582 gcc_assert (!new_bb);
4583 }
4584 if (arginfo[i].simd_lane_linear)
4585 {
4586 vargs.safe_push (obj: arginfo[i].op);
4587 break;
4588 }
4589 tree phi_res = copy_ssa_name (var: op);
4590 gphi *new_phi = create_phi_node (phi_res, loop->header);
4591 add_phi_arg (new_phi, arginfo[i].op,
4592 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4593 enum tree_code code
4594 = POINTER_TYPE_P (TREE_TYPE (op))
4595 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4596 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4597 ? sizetype : TREE_TYPE (op);
4598 poly_widest_int cst
4599 = wi::mul (a: bestn->simdclone->args[i].linear_step,
4600 b: ncopies * nunits);
4601 tree tcst = wide_int_to_tree (type, cst);
4602 tree phi_arg = copy_ssa_name (var: op);
4603 gassign *new_stmt
4604 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4605 gimple_stmt_iterator si = gsi_after_labels (bb: loop->header);
4606 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4607 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4608 UNKNOWN_LOCATION);
4609 arginfo[i].op = phi_res;
4610 vargs.safe_push (obj: phi_res);
4611 }
4612 else
4613 {
4614 enum tree_code code
4615 = POINTER_TYPE_P (TREE_TYPE (op))
4616 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4617 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4618 ? sizetype : TREE_TYPE (op);
4619 poly_widest_int cst
4620 = wi::mul (a: bestn->simdclone->args[i].linear_step,
4621 b: j * nunits);
4622 tree tcst = wide_int_to_tree (type, cst);
4623 new_temp = make_ssa_name (TREE_TYPE (op));
4624 gassign *new_stmt
4625 = gimple_build_assign (new_temp, code,
4626 arginfo[i].op, tcst);
4627 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4628 vargs.safe_push (obj: new_temp);
4629 }
4630 break;
4631 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4632 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4633 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4634 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4635 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4636 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4637 default:
4638 gcc_unreachable ();
4639 }
4640 }
4641
4642 if (masked_call_offset == 0
4643 && bestn->simdclone->inbranch
4644 && bestn->simdclone->nargs > nargs)
4645 {
4646 unsigned long m, o;
4647 size_t mask_i = bestn->simdclone->nargs - 1;
4648 tree mask;
4649 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4650 SIMD_CLONE_ARG_TYPE_MASK);
4651
4652 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4653 callee_nelements = TYPE_VECTOR_SUBPARTS (node: masktype);
4654 o = vector_unroll_factor (nunits, callee_nelements);
4655 for (m = j * o; m < (j + 1) * o; m++)
4656 {
4657 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4658 {
4659 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4660 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4661 ncopies, vectype, j);
4662 }
4663 else
4664 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4665
4666 gassign *new_stmt;
4667 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4668 {
4669 /* This means we are dealing with integer mask modes.
4670 First convert to an integer type with the same size as
4671 the current vector type. */
4672 unsigned HOST_WIDE_INT intermediate_size
4673 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4674 tree mid_int_type =
4675 build_nonstandard_integer_type (intermediate_size, 1);
4676 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4677 new_stmt
4678 = gimple_build_assign (make_ssa_name (var: mid_int_type),
4679 mask);
4680 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4681 /* Then zero-extend to the mask mode. */
4682 mask = fold_build1 (NOP_EXPR, masktype,
4683 gimple_get_lhs (new_stmt));
4684 }
4685 else if (bestn->simdclone->mask_mode == VOIDmode)
4686 {
4687 tree one = fold_convert (TREE_TYPE (masktype),
4688 integer_one_node);
4689 tree zero = fold_convert (TREE_TYPE (masktype),
4690 integer_zero_node);
4691 mask = build3 (VEC_COND_EXPR, masktype, mask,
4692 build_vector_from_val (masktype, one),
4693 build_vector_from_val (masktype, zero));
4694 }
4695 else
4696 gcc_unreachable ();
4697
4698 new_stmt = gimple_build_assign (make_ssa_name (var: masktype), mask);
4699 vect_finish_stmt_generation (vinfo, stmt_info,
4700 vec_stmt: new_stmt, gsi);
4701 mask = gimple_assign_lhs (gs: new_stmt);
4702 vargs.safe_push (obj: mask);
4703 }
4704 }
4705
4706 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4707 if (vec_dest)
4708 {
4709 gcc_assert (ratype
4710 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4711 if (ratype)
4712 new_temp = create_tmp_var (ratype);
4713 else if (useless_type_conversion_p (vectype, rtype))
4714 new_temp = make_ssa_name (var: vec_dest, stmt: new_call);
4715 else
4716 new_temp = make_ssa_name (var: rtype, stmt: new_call);
4717 gimple_call_set_lhs (gs: new_call, lhs: new_temp);
4718 }
4719 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_call, gsi);
4720 gimple *new_stmt = new_call;
4721
4722 if (vec_dest)
4723 {
4724 if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
4725 {
4726 unsigned int k, l;
4727 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4728 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4729 k = vector_unroll_factor (nunits,
4730 TYPE_VECTOR_SUBPARTS (vectype));
4731 gcc_assert ((k & (k - 1)) == 0);
4732 for (l = 0; l < k; l++)
4733 {
4734 tree t;
4735 if (ratype)
4736 {
4737 t = build_fold_addr_expr (new_temp);
4738 t = build2 (MEM_REF, vectype, t,
4739 build_int_cst (TREE_TYPE (t), l * bytes));
4740 }
4741 else
4742 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4743 bitsize_int (prec), bitsize_int (l * prec));
4744 new_stmt = gimple_build_assign (make_ssa_name (var: vectype), t);
4745 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4746
4747 if (j == 0 && l == 0)
4748 *vec_stmt = new_stmt;
4749 if (slp_node)
4750 SLP_TREE_VEC_DEFS (slp_node)
4751 .quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4752 else
4753 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4754 }
4755
4756 if (ratype)
4757 vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4758 continue;
4759 }
4760 else if (!multiple_p (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
4761 {
4762 unsigned int k;
4763 if (!constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype),
4764 b: TYPE_VECTOR_SUBPARTS (node: rtype), multiple: &k))
4765 gcc_unreachable ();
4766 gcc_assert ((k & (k - 1)) == 0);
4767 if ((j & (k - 1)) == 0)
4768 vec_alloc (v&: ret_ctor_elts, nelems: k);
4769 if (ratype)
4770 {
4771 unsigned int m, o;
4772 o = vector_unroll_factor (nunits,
4773 TYPE_VECTOR_SUBPARTS (rtype));
4774 for (m = 0; m < o; m++)
4775 {
4776 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4777 size_int (m), NULL_TREE, NULL_TREE);
4778 new_stmt = gimple_build_assign (make_ssa_name (var: rtype),
4779 tem);
4780 vect_finish_stmt_generation (vinfo, stmt_info,
4781 vec_stmt: new_stmt, gsi);
4782 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4783 gimple_assign_lhs (new_stmt));
4784 }
4785 vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4786 }
4787 else
4788 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4789 if ((j & (k - 1)) != k - 1)
4790 continue;
4791 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4792 new_stmt
4793 = gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4794 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4795
4796 if ((unsigned) j == k - 1)
4797 *vec_stmt = new_stmt;
4798 if (slp_node)
4799 SLP_TREE_VEC_DEFS (slp_node)
4800 .quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4801 else
4802 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4803 continue;
4804 }
4805 else if (ratype)
4806 {
4807 tree t = build_fold_addr_expr (new_temp);
4808 t = build2 (MEM_REF, vectype, t,
4809 build_int_cst (TREE_TYPE (t), 0));
4810 new_stmt = gimple_build_assign (make_ssa_name (var: vec_dest), t);
4811 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4812 vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4813 }
4814 else if (!useless_type_conversion_p (vectype, rtype))
4815 {
4816 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4817 new_stmt
4818 = gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4819 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4820 }
4821 }
4822
4823 if (j == 0)
4824 *vec_stmt = new_stmt;
4825 if (slp_node)
4826 SLP_TREE_VEC_DEFS (slp_node).quick_push (obj: gimple_get_lhs (new_stmt));
4827 else
4828 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4829 }
4830
4831 for (i = 0; i < nargs; ++i)
4832 {
4833 vec<tree> oprndsi = vec_oprnds[i];
4834 oprndsi.release ();
4835 }
4836 vargs.release ();
4837
4838 /* Mark the clone as no longer being a candidate for GC. */
4839 bestn->gc_candidate = false;
4840
4841 /* The call in STMT might prevent it from being removed in dce.
4842 We however cannot remove it here, due to the way the ssa name
4843 it defines is mapped to the new definition. So just replace
4844 rhs of the statement with something harmless. */
4845
4846 if (slp_node)
4847 return true;
4848
4849 gimple *new_stmt;
4850 if (scalar_dest)
4851 {
4852 type = TREE_TYPE (scalar_dest);
4853 lhs = gimple_call_lhs (gs: vect_orig_stmt (stmt_info)->stmt);
4854 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4855 }
4856 else
4857 new_stmt = gimple_build_nop ();
4858 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4859 unlink_stmt_vdef (stmt);
4860
4861 return true;
4862}
4863
4864
4865/* Function vect_gen_widened_results_half
4866
4867 Create a vector stmt whose code, type, number of arguments, and result
4868 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4869 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4870 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4871 needs to be created (DECL is a function-decl of a target-builtin).
4872 STMT_INFO is the original scalar stmt that we are vectorizing. */
4873
4874static gimple *
4875vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4876 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4877 tree vec_dest, gimple_stmt_iterator *gsi,
4878 stmt_vec_info stmt_info)
4879{
4880 gimple *new_stmt;
4881 tree new_temp;
4882
4883 /* Generate half of the widened result: */
4884 if (op_type != binary_op)
4885 vec_oprnd1 = NULL;
4886 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4887 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4888 gimple_set_lhs (new_stmt, new_temp);
4889 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4890
4891 return new_stmt;
4892}
4893
4894
4895/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4896 For multi-step conversions store the resulting vectors and call the function
4897 recursively. When NARROW_SRC_P is true, there's still a conversion after
4898 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4899 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4900
4901static void
4902vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4903 int multi_step_cvt,
4904 stmt_vec_info stmt_info,
4905 vec<tree> &vec_dsts,
4906 gimple_stmt_iterator *gsi,
4907 slp_tree slp_node, code_helper code,
4908 bool narrow_src_p)
4909{
4910 unsigned int i;
4911 tree vop0, vop1, new_tmp, vec_dest;
4912
4913 vec_dest = vec_dsts.pop ();
4914
4915 for (i = 0; i < vec_oprnds->length (); i += 2)
4916 {
4917 /* Create demotion operation. */
4918 vop0 = (*vec_oprnds)[i];
4919 vop1 = (*vec_oprnds)[i + 1];
4920 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4921 new_tmp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4922 gimple_set_lhs (new_stmt, new_tmp);
4923 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4924 if (multi_step_cvt || narrow_src_p)
4925 /* Store the resulting vector for next recursive call,
4926 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4927 (*vec_oprnds)[i/2] = new_tmp;
4928 else
4929 {
4930 /* This is the last step of the conversion sequence. Store the
4931 vectors in SLP_NODE or in vector info of the scalar statement
4932 (or in STMT_VINFO_RELATED_STMT chain). */
4933 if (slp_node)
4934 slp_node->push_vec_def (def: new_stmt);
4935 else
4936 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4937 }
4938 }
4939
4940 /* For multi-step demotion operations we first generate demotion operations
4941 from the source type to the intermediate types, and then combine the
4942 results (stored in VEC_OPRNDS) in demotion operation to the destination
4943 type. */
4944 if (multi_step_cvt)
4945 {
4946 /* At each level of recursion we have half of the operands we had at the
4947 previous level. */
4948 vec_oprnds->truncate (size: (i+1)/2);
4949 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4950 multi_step_cvt: multi_step_cvt - 1,
4951 stmt_info, vec_dsts, gsi,
4952 slp_node, code: VEC_PACK_TRUNC_EXPR,
4953 narrow_src_p);
4954 }
4955
4956 vec_dsts.quick_push (obj: vec_dest);
4957}
4958
4959
4960/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4961 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4962 STMT_INFO. For multi-step conversions store the resulting vectors and
4963 call the function recursively. */
4964
4965static void
4966vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4967 vec<tree> *vec_oprnds0,
4968 vec<tree> *vec_oprnds1,
4969 stmt_vec_info stmt_info, tree vec_dest,
4970 gimple_stmt_iterator *gsi,
4971 code_helper ch1,
4972 code_helper ch2, int op_type)
4973{
4974 int i;
4975 tree vop0, vop1, new_tmp1, new_tmp2;
4976 gimple *new_stmt1, *new_stmt2;
4977 vec<tree> vec_tmp = vNULL;
4978
4979 vec_tmp.create (nelems: vec_oprnds0->length () * 2);
4980 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4981 {
4982 if (op_type == binary_op)
4983 vop1 = (*vec_oprnds1)[i];
4984 else
4985 vop1 = NULL_TREE;
4986
4987 /* Generate the two halves of promotion operation. */
4988 new_stmt1 = vect_gen_widened_results_half (vinfo, ch: ch1, vec_oprnd0: vop0, vec_oprnd1: vop1,
4989 op_type, vec_dest, gsi,
4990 stmt_info);
4991 new_stmt2 = vect_gen_widened_results_half (vinfo, ch: ch2, vec_oprnd0: vop0, vec_oprnd1: vop1,
4992 op_type, vec_dest, gsi,
4993 stmt_info);
4994 if (is_gimple_call (gs: new_stmt1))
4995 {
4996 new_tmp1 = gimple_call_lhs (gs: new_stmt1);
4997 new_tmp2 = gimple_call_lhs (gs: new_stmt2);
4998 }
4999 else
5000 {
5001 new_tmp1 = gimple_assign_lhs (gs: new_stmt1);
5002 new_tmp2 = gimple_assign_lhs (gs: new_stmt2);
5003 }
5004
5005 /* Store the results for the next step. */
5006 vec_tmp.quick_push (obj: new_tmp1);
5007 vec_tmp.quick_push (obj: new_tmp2);
5008 }
5009
5010 vec_oprnds0->release ();
5011 *vec_oprnds0 = vec_tmp;
5012}
5013
5014/* Create vectorized promotion stmts for widening stmts using only half the
5015 potential vector size for input. */
5016static void
5017vect_create_half_widening_stmts (vec_info *vinfo,
5018 vec<tree> *vec_oprnds0,
5019 vec<tree> *vec_oprnds1,
5020 stmt_vec_info stmt_info, tree vec_dest,
5021 gimple_stmt_iterator *gsi,
5022 code_helper code1,
5023 int op_type)
5024{
5025 int i;
5026 tree vop0, vop1;
5027 gimple *new_stmt1;
5028 gimple *new_stmt2;
5029 gimple *new_stmt3;
5030 vec<tree> vec_tmp = vNULL;
5031
5032 vec_tmp.create (nelems: vec_oprnds0->length ());
5033 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5034 {
5035 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5036
5037 gcc_assert (op_type == binary_op);
5038 vop1 = (*vec_oprnds1)[i];
5039
5040 /* Widen the first vector input. */
5041 out_type = TREE_TYPE (vec_dest);
5042 new_tmp1 = make_ssa_name (var: out_type);
5043 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5044 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt1, gsi);
5045 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5046 {
5047 /* Widen the second vector input. */
5048 new_tmp2 = make_ssa_name (var: out_type);
5049 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5050 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt2, gsi);
5051 /* Perform the operation. With both vector inputs widened. */
5052 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5053 }
5054 else
5055 {
5056 /* Perform the operation. With the single vector input widened. */
5057 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5058 }
5059
5060 new_tmp3 = make_ssa_name (var: vec_dest, stmt: new_stmt3);
5061 gimple_assign_set_lhs (gs: new_stmt3, lhs: new_tmp3);
5062 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt3, gsi);
5063
5064 /* Store the results for the next step. */
5065 vec_tmp.quick_push (obj: new_tmp3);
5066 }
5067
5068 vec_oprnds0->release ();
5069 *vec_oprnds0 = vec_tmp;
5070}
5071
5072
5073/* Check if STMT_INFO performs a conversion operation that can be vectorized.
5074 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5075 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5076 Return true if STMT_INFO is vectorizable in this way. */
5077
5078static bool
5079vectorizable_conversion (vec_info *vinfo,
5080 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5081 gimple **vec_stmt, slp_tree slp_node,
5082 stmt_vector_for_cost *cost_vec)
5083{
5084 tree vec_dest, cvt_op = NULL_TREE;
5085 tree scalar_dest;
5086 tree op0, op1 = NULL_TREE;
5087 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5088 tree_code tc1, tc2;
5089 code_helper code, code1, code2;
5090 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5091 tree new_temp;
5092 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5093 int ndts = 2;
5094 poly_uint64 nunits_in;
5095 poly_uint64 nunits_out;
5096 tree vectype_out, vectype_in;
5097 int ncopies, i;
5098 tree lhs_type, rhs_type;
5099 /* For conversions between floating point and integer, there're 2 NARROW
5100 cases. NARROW_SRC is for FLOAT_EXPR, means
5101 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5102 This is safe when the range of the source integer can fit into the lower
5103 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5104 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5105 For other conversions, when there's narrowing, NARROW_DST is used as
5106 default. */
5107 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5108 vec<tree> vec_oprnds0 = vNULL;
5109 vec<tree> vec_oprnds1 = vNULL;
5110 tree vop0;
5111 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5112 int multi_step_cvt = 0;
5113 vec<tree> interm_types = vNULL;
5114 tree intermediate_type, cvt_type = NULL_TREE;
5115 int op_type;
5116 unsigned short fltsz;
5117
5118 /* Is STMT a vectorizable conversion? */
5119
5120 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5121 return false;
5122
5123 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5124 && ! vec_stmt)
5125 return false;
5126
5127 gimple* stmt = stmt_info->stmt;
5128 if (!(is_gimple_assign (gs: stmt) || is_gimple_call (gs: stmt)))
5129 return false;
5130
5131 if (gimple_get_lhs (stmt) == NULL_TREE
5132 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5133 return false;
5134
5135 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5136 return false;
5137
5138 if (is_gimple_assign (gs: stmt))
5139 {
5140 code = gimple_assign_rhs_code (gs: stmt);
5141 op_type = TREE_CODE_LENGTH ((tree_code) code);
5142 }
5143 else if (gimple_call_internal_p (gs: stmt))
5144 {
5145 code = gimple_call_internal_fn (gs: stmt);
5146 op_type = gimple_call_num_args (gs: stmt);
5147 }
5148 else
5149 return false;
5150
5151 bool widen_arith = (code == WIDEN_MULT_EXPR
5152 || code == WIDEN_LSHIFT_EXPR
5153 || widening_fn_p (code));
5154
5155 if (!widen_arith
5156 && !CONVERT_EXPR_CODE_P (code)
5157 && code != FIX_TRUNC_EXPR
5158 && code != FLOAT_EXPR)
5159 return false;
5160
5161 /* Check types of lhs and rhs. */
5162 scalar_dest = gimple_get_lhs (stmt);
5163 lhs_type = TREE_TYPE (scalar_dest);
5164 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5165
5166 /* Check the operands of the operation. */
5167 slp_tree slp_op0, slp_op1 = NULL;
5168 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5169 0, &op0, &slp_op0, &dt[0], &vectype_in))
5170 {
5171 if (dump_enabled_p ())
5172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5173 "use not simple.\n");
5174 return false;
5175 }
5176
5177 rhs_type = TREE_TYPE (op0);
5178 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5179 && !((INTEGRAL_TYPE_P (lhs_type)
5180 && INTEGRAL_TYPE_P (rhs_type))
5181 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5182 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5183 return false;
5184
5185 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5186 && ((INTEGRAL_TYPE_P (lhs_type)
5187 && !type_has_mode_precision_p (t: lhs_type))
5188 || (INTEGRAL_TYPE_P (rhs_type)
5189 && !type_has_mode_precision_p (t: rhs_type))))
5190 {
5191 if (dump_enabled_p ())
5192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5193 "type conversion to/from bit-precision unsupported."
5194 "\n");
5195 return false;
5196 }
5197
5198 if (op_type == binary_op)
5199 {
5200 gcc_assert (code == WIDEN_MULT_EXPR
5201 || code == WIDEN_LSHIFT_EXPR
5202 || widening_fn_p (code));
5203
5204 op1 = is_gimple_assign (gs: stmt) ? gimple_assign_rhs2 (gs: stmt) :
5205 gimple_call_arg (gs: stmt, index: 0);
5206 tree vectype1_in;
5207 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5208 &op1, &slp_op1, &dt[1], &vectype1_in))
5209 {
5210 if (dump_enabled_p ())
5211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5212 "use not simple.\n");
5213 return false;
5214 }
5215 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5216 OP1. */
5217 if (!vectype_in)
5218 vectype_in = vectype1_in;
5219 }
5220
5221 /* If op0 is an external or constant def, infer the vector type
5222 from the scalar type. */
5223 if (!vectype_in)
5224 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5225 if (vec_stmt)
5226 gcc_assert (vectype_in);
5227 if (!vectype_in)
5228 {
5229 if (dump_enabled_p ())
5230 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5231 "no vectype for scalar type %T\n", rhs_type);
5232
5233 return false;
5234 }
5235
5236 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5237 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5238 {
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5241 "can't convert between boolean and non "
5242 "boolean vectors %T\n", rhs_type);
5243
5244 return false;
5245 }
5246
5247 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
5248 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
5249 if (known_eq (nunits_out, nunits_in))
5250 if (widen_arith)
5251 modifier = WIDEN;
5252 else
5253 modifier = NONE;
5254 else if (multiple_p (a: nunits_out, b: nunits_in))
5255 modifier = NARROW_DST;
5256 else
5257 {
5258 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5259 modifier = WIDEN;
5260 }
5261
5262 /* Multiple types in SLP are handled by creating the appropriate number of
5263 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5264 case of SLP. */
5265 if (slp_node)
5266 ncopies = 1;
5267 else if (modifier == NARROW_DST)
5268 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
5269 else
5270 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
5271
5272 /* Sanity check: make sure that at least one copy of the vectorized stmt
5273 needs to be generated. */
5274 gcc_assert (ncopies >= 1);
5275
5276 bool found_mode = false;
5277 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5278 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5279 opt_scalar_mode rhs_mode_iter;
5280
5281 /* Supportable by target? */
5282 switch (modifier)
5283 {
5284 case NONE:
5285 if (code != FIX_TRUNC_EXPR
5286 && code != FLOAT_EXPR
5287 && !CONVERT_EXPR_CODE_P (code))
5288 return false;
5289 gcc_assert (code.is_tree_code ());
5290 if (supportable_convert_operation ((tree_code) code, vectype_out,
5291 vectype_in, &tc1))
5292 {
5293 code1 = tc1;
5294 break;
5295 }
5296
5297 /* For conversions between float and integer types try whether
5298 we can use intermediate signed integer types to support the
5299 conversion. */
5300 if (GET_MODE_SIZE (mode: lhs_mode) != GET_MODE_SIZE (mode: rhs_mode)
5301 && (code == FLOAT_EXPR ||
5302 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5303 {
5304 bool demotion = GET_MODE_SIZE (mode: rhs_mode) > GET_MODE_SIZE (mode: lhs_mode);
5305 bool float_expr_p = code == FLOAT_EXPR;
5306 unsigned short target_size;
5307 scalar_mode intermediate_mode;
5308 if (demotion)
5309 {
5310 intermediate_mode = lhs_mode;
5311 target_size = GET_MODE_SIZE (mode: rhs_mode);
5312 }
5313 else
5314 {
5315 target_size = GET_MODE_SIZE (mode: lhs_mode);
5316 if (!int_mode_for_size
5317 (size: GET_MODE_BITSIZE (mode: rhs_mode), limit: 0).exists (mode: &intermediate_mode))
5318 goto unsupported;
5319 }
5320 code1 = float_expr_p ? code : NOP_EXPR;
5321 codecvt1 = float_expr_p ? NOP_EXPR : code;
5322 opt_scalar_mode mode_iter;
5323 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5324 {
5325 intermediate_mode = mode_iter.require ();
5326
5327 if (GET_MODE_SIZE (mode: intermediate_mode) > target_size)
5328 break;
5329
5330 scalar_mode cvt_mode;
5331 if (!int_mode_for_size
5332 (size: GET_MODE_BITSIZE (mode: intermediate_mode), limit: 0).exists (mode: &cvt_mode))
5333 break;
5334
5335 cvt_type = build_nonstandard_integer_type
5336 (GET_MODE_BITSIZE (mode: cvt_mode), 0);
5337
5338 /* Check if the intermediate type can hold OP0's range.
5339 When converting from float to integer this is not necessary
5340 because values that do not fit the (smaller) target type are
5341 unspecified anyway. */
5342 if (demotion && float_expr_p)
5343 {
5344 wide_int op_min_value, op_max_value;
5345 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5346 break;
5347
5348 if (cvt_type == NULL_TREE
5349 || (wi::min_precision (x: op_max_value, sgn: SIGNED)
5350 > TYPE_PRECISION (cvt_type))
5351 || (wi::min_precision (x: op_min_value, sgn: SIGNED)
5352 > TYPE_PRECISION (cvt_type)))
5353 continue;
5354 }
5355
5356 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5357 /* This should only happened for SLP as long as loop vectorizer
5358 only supports same-sized vector. */
5359 if (cvt_type == NULL_TREE
5360 || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: cvt_type), b: nunits_in)
5361 || !supportable_convert_operation ((tree_code) code1,
5362 vectype_out,
5363 cvt_type, &tc1)
5364 || !supportable_convert_operation ((tree_code) codecvt1,
5365 cvt_type,
5366 vectype_in, &tc2))
5367 continue;
5368
5369 found_mode = true;
5370 break;
5371 }
5372
5373 if (found_mode)
5374 {
5375 multi_step_cvt++;
5376 interm_types.safe_push (obj: cvt_type);
5377 cvt_type = NULL_TREE;
5378 code1 = tc1;
5379 codecvt1 = tc2;
5380 break;
5381 }
5382 }
5383 /* FALLTHRU */
5384 unsupported:
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5387 "conversion not supported by target.\n");
5388 return false;
5389
5390 case WIDEN:
5391 if (known_eq (nunits_in, nunits_out))
5392 {
5393 if (!(code.is_tree_code ()
5394 && supportable_half_widening_operation ((tree_code) code,
5395 vectype_out, vectype_in,
5396 &tc1)))
5397 goto unsupported;
5398 code1 = tc1;
5399 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5400 break;
5401 }
5402 if (supportable_widening_operation (vinfo, code, stmt_info,
5403 vectype_out, vectype_in, &code1,
5404 &code2, &multi_step_cvt,
5405 &interm_types))
5406 {
5407 /* Binary widening operation can only be supported directly by the
5408 architecture. */
5409 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5410 break;
5411 }
5412
5413 if (code != FLOAT_EXPR
5414 || GET_MODE_SIZE (mode: lhs_mode) <= GET_MODE_SIZE (mode: rhs_mode))
5415 goto unsupported;
5416
5417 fltsz = GET_MODE_SIZE (mode: lhs_mode);
5418 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5419 {
5420 rhs_mode = rhs_mode_iter.require ();
5421 if (GET_MODE_SIZE (mode: rhs_mode) > fltsz)
5422 break;
5423
5424 cvt_type
5425 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), 0);
5426 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5427 if (cvt_type == NULL_TREE)
5428 goto unsupported;
5429
5430 if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5431 {
5432 tc1 = ERROR_MARK;
5433 gcc_assert (code.is_tree_code ());
5434 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5435 cvt_type, &tc1))
5436 goto unsupported;
5437 codecvt1 = tc1;
5438 }
5439 else if (!supportable_widening_operation (vinfo, code,
5440 stmt_info, vectype_out,
5441 cvt_type, &codecvt1,
5442 &codecvt2, &multi_step_cvt,
5443 &interm_types))
5444 continue;
5445 else
5446 gcc_assert (multi_step_cvt == 0);
5447
5448 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5449 cvt_type,
5450 vectype_in, &code1,
5451 &code2, &multi_step_cvt,
5452 &interm_types))
5453 {
5454 found_mode = true;
5455 break;
5456 }
5457 }
5458
5459 if (!found_mode)
5460 goto unsupported;
5461
5462 if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5463 codecvt2 = ERROR_MARK;
5464 else
5465 {
5466 multi_step_cvt++;
5467 interm_types.safe_push (obj: cvt_type);
5468 cvt_type = NULL_TREE;
5469 }
5470 break;
5471
5472 case NARROW_DST:
5473 gcc_assert (op_type == unary_op);
5474 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5475 &code1, &multi_step_cvt,
5476 &interm_types))
5477 break;
5478
5479 if (GET_MODE_SIZE (mode: lhs_mode) >= GET_MODE_SIZE (mode: rhs_mode))
5480 goto unsupported;
5481
5482 if (code == FIX_TRUNC_EXPR)
5483 {
5484 cvt_type
5485 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), 0);
5486 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5487 if (cvt_type == NULL_TREE)
5488 goto unsupported;
5489 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5490 &tc1))
5491 codecvt1 = tc1;
5492 else
5493 goto unsupported;
5494 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5495 &code1, &multi_step_cvt,
5496 &interm_types))
5497 break;
5498 }
5499 /* If op0 can be represented with low precision integer,
5500 truncate it to cvt_type and the do FLOAT_EXPR. */
5501 else if (code == FLOAT_EXPR)
5502 {
5503 wide_int op_min_value, op_max_value;
5504 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5505 goto unsupported;
5506
5507 cvt_type
5508 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: lhs_mode), 0);
5509 if (cvt_type == NULL_TREE
5510 || (wi::min_precision (x: op_max_value, sgn: SIGNED)
5511 > TYPE_PRECISION (cvt_type))
5512 || (wi::min_precision (x: op_min_value, sgn: SIGNED)
5513 > TYPE_PRECISION (cvt_type)))
5514 goto unsupported;
5515
5516 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5517 if (cvt_type == NULL_TREE)
5518 goto unsupported;
5519 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5520 &code1, &multi_step_cvt,
5521 &interm_types))
5522 goto unsupported;
5523 if (supportable_convert_operation ((tree_code) code, vectype_out,
5524 cvt_type, &tc1))
5525 {
5526 codecvt1 = tc1;
5527 modifier = NARROW_SRC;
5528 break;
5529 }
5530 }
5531
5532 goto unsupported;
5533
5534 default:
5535 gcc_unreachable ();
5536 }
5537
5538 if (!vec_stmt) /* transformation not required. */
5539 {
5540 if (slp_node
5541 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5542 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5543 {
5544 if (dump_enabled_p ())
5545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5546 "incompatible vector types for invariants\n");
5547 return false;
5548 }
5549 DUMP_VECT_SCOPE ("vectorizable_conversion");
5550 if (modifier == NONE)
5551 {
5552 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5553 vect_model_simple_cost (vinfo, stmt_info,
5554 ncopies: ncopies * (1 + multi_step_cvt),
5555 dt, ndts, node: slp_node, cost_vec);
5556 }
5557 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5558 {
5559 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5560 /* The final packing step produces one vector result per copy. */
5561 unsigned int nvectors
5562 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5563 vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5564 pwr: multi_step_cvt, cost_vec,
5565 widen_arith);
5566 }
5567 else
5568 {
5569 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5570 /* The initial unpacking step produces two vector results
5571 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5572 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5573 unsigned int nvectors
5574 = (slp_node
5575 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5576 : ncopies * 2);
5577 vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5578 pwr: multi_step_cvt, cost_vec,
5579 widen_arith);
5580 }
5581 interm_types.release ();
5582 return true;
5583 }
5584
5585 /* Transform. */
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_NOTE, vect_location,
5588 "transform conversion. ncopies = %d.\n", ncopies);
5589
5590 if (op_type == binary_op)
5591 {
5592 if (CONSTANT_CLASS_P (op0))
5593 op0 = fold_convert (TREE_TYPE (op1), op0);
5594 else if (CONSTANT_CLASS_P (op1))
5595 op1 = fold_convert (TREE_TYPE (op0), op1);
5596 }
5597
5598 /* In case of multi-step conversion, we first generate conversion operations
5599 to the intermediate types, and then from that types to the final one.
5600 We create vector destinations for the intermediate type (TYPES) received
5601 from supportable_*_operation, and store them in the correct order
5602 for future use in vect_create_vectorized_*_stmts (). */
5603 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5604 bool widen_or_narrow_float_p
5605 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5606 vec_dest = vect_create_destination_var (scalar_dest,
5607 widen_or_narrow_float_p
5608 ? cvt_type : vectype_out);
5609 vec_dsts.quick_push (obj: vec_dest);
5610
5611 if (multi_step_cvt)
5612 {
5613 for (i = interm_types.length () - 1;
5614 interm_types.iterate (ix: i, ptr: &intermediate_type); i--)
5615 {
5616 vec_dest = vect_create_destination_var (scalar_dest,
5617 intermediate_type);
5618 vec_dsts.quick_push (obj: vec_dest);
5619 }
5620 }
5621
5622 if (cvt_type)
5623 vec_dest = vect_create_destination_var (scalar_dest,
5624 widen_or_narrow_float_p
5625 ? vectype_out : cvt_type);
5626
5627 int ninputs = 1;
5628 if (!slp_node)
5629 {
5630 if (modifier == WIDEN)
5631 ;
5632 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5633 {
5634 if (multi_step_cvt)
5635 ninputs = vect_pow2 (x: multi_step_cvt);
5636 ninputs *= 2;
5637 }
5638 }
5639
5640 switch (modifier)
5641 {
5642 case NONE:
5643 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5644 op0, vec_oprnds0: &vec_oprnds0);
5645 /* vec_dest is intermediate type operand when multi_step_cvt. */
5646 if (multi_step_cvt)
5647 {
5648 cvt_op = vec_dest;
5649 vec_dest = vec_dsts[0];
5650 }
5651
5652 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5653 {
5654 /* Arguments are ready, create the new vector stmt. */
5655 gimple* new_stmt;
5656 if (multi_step_cvt)
5657 {
5658 gcc_assert (multi_step_cvt == 1);
5659 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5660 new_temp = make_ssa_name (var: cvt_op, stmt: new_stmt);
5661 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
5662 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5663 vop0 = new_temp;
5664 }
5665 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5666 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5667 gimple_set_lhs (new_stmt, new_temp);
5668 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5669
5670 if (slp_node)
5671 slp_node->push_vec_def (def: new_stmt);
5672 else
5673 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5674 }
5675 break;
5676
5677 case WIDEN:
5678 /* In case the vectorization factor (VF) is bigger than the number
5679 of elements that we can fit in a vectype (nunits), we have to
5680 generate more than one vector stmt - i.e - we need to "unroll"
5681 the vector stmt by a factor VF/nunits. */
5682 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5683 op0, vec_oprnds0: &vec_oprnds0,
5684 op1: code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5685 vec_oprnds1: &vec_oprnds1);
5686 if (code == WIDEN_LSHIFT_EXPR)
5687 {
5688 int oprnds_size = vec_oprnds0.length ();
5689 vec_oprnds1.create (nelems: oprnds_size);
5690 for (i = 0; i < oprnds_size; ++i)
5691 vec_oprnds1.quick_push (obj: op1);
5692 }
5693 /* Arguments are ready. Create the new vector stmts. */
5694 for (i = multi_step_cvt; i >= 0; i--)
5695 {
5696 tree this_dest = vec_dsts[i];
5697 code_helper c1 = code1, c2 = code2;
5698 if (i == 0 && codecvt2 != ERROR_MARK)
5699 {
5700 c1 = codecvt1;
5701 c2 = codecvt2;
5702 }
5703 if (known_eq (nunits_out, nunits_in))
5704 vect_create_half_widening_stmts (vinfo, vec_oprnds0: &vec_oprnds0, vec_oprnds1: &vec_oprnds1,
5705 stmt_info, vec_dest: this_dest, gsi, code1: c1,
5706 op_type);
5707 else
5708 vect_create_vectorized_promotion_stmts (vinfo, vec_oprnds0: &vec_oprnds0,
5709 vec_oprnds1: &vec_oprnds1, stmt_info,
5710 vec_dest: this_dest, gsi,
5711 ch1: c1, ch2: c2, op_type);
5712 }
5713
5714 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5715 {
5716 gimple *new_stmt;
5717 if (cvt_type)
5718 {
5719 new_temp = make_ssa_name (var: vec_dest);
5720 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5721 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5722 }
5723 else
5724 new_stmt = SSA_NAME_DEF_STMT (vop0);
5725
5726 if (slp_node)
5727 slp_node->push_vec_def (def: new_stmt);
5728 else
5729 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5730 }
5731 break;
5732
5733 case NARROW_SRC:
5734 case NARROW_DST:
5735 /* In case the vectorization factor (VF) is bigger than the number
5736 of elements that we can fit in a vectype (nunits), we have to
5737 generate more than one vector stmt - i.e - we need to "unroll"
5738 the vector stmt by a factor VF/nunits. */
5739 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5740 op0, vec_oprnds0: &vec_oprnds0);
5741 /* Arguments are ready. Create the new vector stmts. */
5742 if (cvt_type && modifier == NARROW_DST)
5743 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5744 {
5745 new_temp = make_ssa_name (var: vec_dest);
5746 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5747 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5748 vec_oprnds0[i] = new_temp;
5749 }
5750
5751 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds: &vec_oprnds0,
5752 multi_step_cvt,
5753 stmt_info, vec_dsts, gsi,
5754 slp_node, code: code1,
5755 narrow_src_p: modifier == NARROW_SRC);
5756 /* After demoting op0 to cvt_type, convert it to dest. */
5757 if (cvt_type && code == FLOAT_EXPR)
5758 {
5759 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5760 {
5761 /* Arguments are ready, create the new vector stmt. */
5762 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5763 gimple *new_stmt
5764 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5765 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5766 gimple_set_lhs (new_stmt, new_temp);
5767 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5768
5769 /* This is the last step of the conversion sequence. Store the
5770 vectors in SLP_NODE or in vector info of the scalar statement
5771 (or in STMT_VINFO_RELATED_STMT chain). */
5772 if (slp_node)
5773 slp_node->push_vec_def (def: new_stmt);
5774 else
5775 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5776 }
5777 }
5778 break;
5779 }
5780 if (!slp_node)
5781 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5782
5783 vec_oprnds0.release ();
5784 vec_oprnds1.release ();
5785 interm_types.release ();
5786
5787 return true;
5788}
5789
5790/* Return true if we can assume from the scalar form of STMT_INFO that
5791 neither the scalar nor the vector forms will generate code. STMT_INFO
5792 is known not to involve a data reference. */
5793
5794bool
5795vect_nop_conversion_p (stmt_vec_info stmt_info)
5796{
5797 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
5798 if (!stmt)
5799 return false;
5800
5801 tree lhs = gimple_assign_lhs (gs: stmt);
5802 tree_code code = gimple_assign_rhs_code (gs: stmt);
5803 tree rhs = gimple_assign_rhs1 (gs: stmt);
5804
5805 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5806 return true;
5807
5808 if (CONVERT_EXPR_CODE_P (code))
5809 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5810
5811 return false;
5812}
5813
5814/* Function vectorizable_assignment.
5815
5816 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5817 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5818 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5819 Return true if STMT_INFO is vectorizable in this way. */
5820
5821static bool
5822vectorizable_assignment (vec_info *vinfo,
5823 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5824 gimple **vec_stmt, slp_tree slp_node,
5825 stmt_vector_for_cost *cost_vec)
5826{
5827 tree vec_dest;
5828 tree scalar_dest;
5829 tree op;
5830 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5831 tree new_temp;
5832 enum vect_def_type dt[1] = {vect_unknown_def_type};
5833 int ndts = 1;
5834 int ncopies;
5835 int i;
5836 vec<tree> vec_oprnds = vNULL;
5837 tree vop;
5838 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5839 enum tree_code code;
5840 tree vectype_in;
5841
5842 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5843 return false;
5844
5845 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5846 && ! vec_stmt)
5847 return false;
5848
5849 /* Is vectorizable assignment? */
5850 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
5851 if (!stmt)
5852 return false;
5853
5854 scalar_dest = gimple_assign_lhs (gs: stmt);
5855 if (TREE_CODE (scalar_dest) != SSA_NAME)
5856 return false;
5857
5858 if (STMT_VINFO_DATA_REF (stmt_info))
5859 return false;
5860
5861 code = gimple_assign_rhs_code (gs: stmt);
5862 if (!(gimple_assign_single_p (gs: stmt)
5863 || code == PAREN_EXPR
5864 || CONVERT_EXPR_CODE_P (code)))
5865 return false;
5866
5867 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5868 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
5869
5870 /* Multiple types in SLP are handled by creating the appropriate number of
5871 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5872 case of SLP. */
5873 if (slp_node)
5874 ncopies = 1;
5875 else
5876 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5877
5878 gcc_assert (ncopies >= 1);
5879
5880 slp_tree slp_op;
5881 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5882 &dt[0], &vectype_in))
5883 {
5884 if (dump_enabled_p ())
5885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5886 "use not simple.\n");
5887 return false;
5888 }
5889 if (!vectype_in)
5890 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5891
5892 /* We can handle NOP_EXPR conversions that do not change the number
5893 of elements or the vector size. */
5894 if ((CONVERT_EXPR_CODE_P (code)
5895 || code == VIEW_CONVERT_EXPR)
5896 && (!vectype_in
5897 || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype_in), b: nunits)
5898 || maybe_ne (a: GET_MODE_SIZE (TYPE_MODE (vectype)),
5899 b: GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5900 return false;
5901
5902 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5903 {
5904 if (dump_enabled_p ())
5905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5906 "can't convert between boolean and non "
5907 "boolean vectors %T\n", TREE_TYPE (op));
5908
5909 return false;
5910 }
5911
5912 /* We do not handle bit-precision changes. */
5913 if ((CONVERT_EXPR_CODE_P (code)
5914 || code == VIEW_CONVERT_EXPR)
5915 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5916 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5917 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5918 && !type_has_mode_precision_p (TREE_TYPE (op))))
5919 /* But a conversion that does not change the bit-pattern is ok. */
5920 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5921 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5922 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5923 > TYPE_PRECISION (TREE_TYPE (op)))
5924 && TYPE_UNSIGNED (TREE_TYPE (op)))
5925 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5926 == TYPE_PRECISION (TREE_TYPE (op))))))
5927 {
5928 if (dump_enabled_p ())
5929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5930 "type conversion to/from bit-precision "
5931 "unsupported.\n");
5932 return false;
5933 }
5934
5935 if (!vec_stmt) /* transformation not required. */
5936 {
5937 if (slp_node
5938 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5939 {
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5942 "incompatible vector types for invariants\n");
5943 return false;
5944 }
5945 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5946 DUMP_VECT_SCOPE ("vectorizable_assignment");
5947 if (!vect_nop_conversion_p (stmt_info))
5948 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, node: slp_node,
5949 cost_vec);
5950 return true;
5951 }
5952
5953 /* Transform. */
5954 if (dump_enabled_p ())
5955 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5956
5957 /* Handle def. */
5958 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5959
5960 /* Handle use. */
5961 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op0: op, vec_oprnds0: &vec_oprnds);
5962
5963 /* Arguments are ready. create the new vector stmt. */
5964 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5965 {
5966 if (CONVERT_EXPR_CODE_P (code)
5967 || code == VIEW_CONVERT_EXPR)
5968 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5969 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5970 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5971 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
5972 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5973 if (slp_node)
5974 slp_node->push_vec_def (def: new_stmt);
5975 else
5976 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5977 }
5978 if (!slp_node)
5979 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5980
5981 vec_oprnds.release ();
5982 return true;
5983}
5984
5985
5986/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5987 either as shift by a scalar or by a vector. */
5988
5989bool
5990vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5991{
5992
5993 machine_mode vec_mode;
5994 optab optab;
5995 int icode;
5996 tree vectype;
5997
5998 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5999 if (!vectype)
6000 return false;
6001
6002 optab = optab_for_tree_code (code, vectype, optab_scalar);
6003 if (!optab
6004 || optab_handler (op: optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6005 {
6006 optab = optab_for_tree_code (code, vectype, optab_vector);
6007 if (!optab
6008 || (optab_handler (op: optab, TYPE_MODE (vectype))
6009 == CODE_FOR_nothing))
6010 return false;
6011 }
6012
6013 vec_mode = TYPE_MODE (vectype);
6014 icode = (int) optab_handler (op: optab, mode: vec_mode);
6015 if (icode == CODE_FOR_nothing)
6016 return false;
6017
6018 return true;
6019}
6020
6021
6022/* Function vectorizable_shift.
6023
6024 Check if STMT_INFO performs a shift operation that can be vectorized.
6025 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6026 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6027 Return true if STMT_INFO is vectorizable in this way. */
6028
6029static bool
6030vectorizable_shift (vec_info *vinfo,
6031 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6032 gimple **vec_stmt, slp_tree slp_node,
6033 stmt_vector_for_cost *cost_vec)
6034{
6035 tree vec_dest;
6036 tree scalar_dest;
6037 tree op0, op1 = NULL;
6038 tree vec_oprnd1 = NULL_TREE;
6039 tree vectype;
6040 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6041 enum tree_code code;
6042 machine_mode vec_mode;
6043 tree new_temp;
6044 optab optab;
6045 int icode;
6046 machine_mode optab_op2_mode;
6047 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6048 int ndts = 2;
6049 poly_uint64 nunits_in;
6050 poly_uint64 nunits_out;
6051 tree vectype_out;
6052 tree op1_vectype;
6053 int ncopies;
6054 int i;
6055 vec<tree> vec_oprnds0 = vNULL;
6056 vec<tree> vec_oprnds1 = vNULL;
6057 tree vop0, vop1;
6058 unsigned int k;
6059 bool scalar_shift_arg = true;
6060 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6061 bool incompatible_op1_vectype_p = false;
6062
6063 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6064 return false;
6065
6066 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6067 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6068 && ! vec_stmt)
6069 return false;
6070
6071 /* Is STMT a vectorizable binary/unary operation? */
6072 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
6073 if (!stmt)
6074 return false;
6075
6076 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6077 return false;
6078
6079 code = gimple_assign_rhs_code (gs: stmt);
6080
6081 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6082 || code == RROTATE_EXPR))
6083 return false;
6084
6085 scalar_dest = gimple_assign_lhs (gs: stmt);
6086 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6087 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6088 {
6089 if (dump_enabled_p ())
6090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6091 "bit-precision shifts not supported.\n");
6092 return false;
6093 }
6094
6095 slp_tree slp_op0;
6096 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6097 0, &op0, &slp_op0, &dt[0], &vectype))
6098 {
6099 if (dump_enabled_p ())
6100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6101 "use not simple.\n");
6102 return false;
6103 }
6104 /* If op0 is an external or constant def, infer the vector type
6105 from the scalar type. */
6106 if (!vectype)
6107 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6108 if (vec_stmt)
6109 gcc_assert (vectype);
6110 if (!vectype)
6111 {
6112 if (dump_enabled_p ())
6113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6114 "no vectype for scalar type\n");
6115 return false;
6116 }
6117
6118 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6119 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6120 if (maybe_ne (a: nunits_out, b: nunits_in))
6121 return false;
6122
6123 stmt_vec_info op1_def_stmt_info;
6124 slp_tree slp_op1;
6125 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6126 &dt[1], &op1_vectype, &op1_def_stmt_info))
6127 {
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "use not simple.\n");
6131 return false;
6132 }
6133
6134 /* Multiple types in SLP are handled by creating the appropriate number of
6135 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6136 case of SLP. */
6137 if (slp_node)
6138 ncopies = 1;
6139 else
6140 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6141
6142 gcc_assert (ncopies >= 1);
6143
6144 /* Determine whether the shift amount is a vector, or scalar. If the
6145 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6146
6147 if ((dt[1] == vect_internal_def
6148 || dt[1] == vect_induction_def
6149 || dt[1] == vect_nested_cycle)
6150 && !slp_node)
6151 scalar_shift_arg = false;
6152 else if (dt[1] == vect_constant_def
6153 || dt[1] == vect_external_def
6154 || dt[1] == vect_internal_def)
6155 {
6156 /* In SLP, need to check whether the shift count is the same,
6157 in loops if it is a constant or invariant, it is always
6158 a scalar shift. */
6159 if (slp_node)
6160 {
6161 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6162 stmt_vec_info slpstmt_info;
6163
6164 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6165 {
6166 gassign *slpstmt = as_a <gassign *> (p: slpstmt_info->stmt);
6167 if (!operand_equal_p (gimple_assign_rhs2 (gs: slpstmt), op1, flags: 0))
6168 scalar_shift_arg = false;
6169 }
6170
6171 /* For internal SLP defs we have to make sure we see scalar stmts
6172 for all vector elements.
6173 ??? For different vectors we could resort to a different
6174 scalar shift operand but code-generation below simply always
6175 takes the first. */
6176 if (dt[1] == vect_internal_def
6177 && maybe_ne (a: nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6178 b: stmts.length ()))
6179 scalar_shift_arg = false;
6180 }
6181
6182 /* If the shift amount is computed by a pattern stmt we cannot
6183 use the scalar amount directly thus give up and use a vector
6184 shift. */
6185 if (op1_def_stmt_info && is_pattern_stmt_p (stmt_info: op1_def_stmt_info))
6186 scalar_shift_arg = false;
6187 }
6188 else
6189 {
6190 if (dump_enabled_p ())
6191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6192 "operand mode requires invariant argument.\n");
6193 return false;
6194 }
6195
6196 /* Vector shifted by vector. */
6197 bool was_scalar_shift_arg = scalar_shift_arg;
6198 if (!scalar_shift_arg)
6199 {
6200 optab = optab_for_tree_code (code, vectype, optab_vector);
6201 if (dump_enabled_p ())
6202 dump_printf_loc (MSG_NOTE, vect_location,
6203 "vector/vector shift/rotate found.\n");
6204
6205 if (!op1_vectype)
6206 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6207 slp_op1);
6208 incompatible_op1_vectype_p
6209 = (op1_vectype == NULL_TREE
6210 || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: op1_vectype),
6211 b: TYPE_VECTOR_SUBPARTS (node: vectype))
6212 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6213 if (incompatible_op1_vectype_p
6214 && (!slp_node
6215 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6216 || slp_op1->refcnt != 1))
6217 {
6218 if (dump_enabled_p ())
6219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6220 "unusable type for last operand in"
6221 " vector/vector shift/rotate.\n");
6222 return false;
6223 }
6224 }
6225 /* See if the machine has a vector shifted by scalar insn and if not
6226 then see if it has a vector shifted by vector insn. */
6227 else
6228 {
6229 optab = optab_for_tree_code (code, vectype, optab_scalar);
6230 if (optab
6231 && optab_handler (op: optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6232 {
6233 if (dump_enabled_p ())
6234 dump_printf_loc (MSG_NOTE, vect_location,
6235 "vector/scalar shift/rotate found.\n");
6236 }
6237 else
6238 {
6239 optab = optab_for_tree_code (code, vectype, optab_vector);
6240 if (optab
6241 && (optab_handler (op: optab, TYPE_MODE (vectype))
6242 != CODE_FOR_nothing))
6243 {
6244 scalar_shift_arg = false;
6245
6246 if (dump_enabled_p ())
6247 dump_printf_loc (MSG_NOTE, vect_location,
6248 "vector/vector shift/rotate found.\n");
6249
6250 if (!op1_vectype)
6251 op1_vectype = get_vectype_for_scalar_type (vinfo,
6252 TREE_TYPE (op1),
6253 slp_op1);
6254
6255 /* Unlike the other binary operators, shifts/rotates have
6256 the rhs being int, instead of the same type as the lhs,
6257 so make sure the scalar is the right type if we are
6258 dealing with vectors of long long/long/short/char. */
6259 incompatible_op1_vectype_p
6260 = (!op1_vectype
6261 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6262 TREE_TYPE (op1)));
6263 if (incompatible_op1_vectype_p
6264 && dt[1] == vect_internal_def)
6265 {
6266 if (dump_enabled_p ())
6267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6268 "unusable type for last operand in"
6269 " vector/vector shift/rotate.\n");
6270 return false;
6271 }
6272 }
6273 }
6274 }
6275
6276 /* Supportable by target? */
6277 if (!optab)
6278 {
6279 if (dump_enabled_p ())
6280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6281 "no optab.\n");
6282 return false;
6283 }
6284 vec_mode = TYPE_MODE (vectype);
6285 icode = (int) optab_handler (op: optab, mode: vec_mode);
6286 if (icode == CODE_FOR_nothing)
6287 {
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "op not supported by target.\n");
6291 return false;
6292 }
6293 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6294 if (vect_emulated_vector_p (vectype))
6295 return false;
6296
6297 if (!vec_stmt) /* transformation not required. */
6298 {
6299 if (slp_node
6300 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6301 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6302 && (!incompatible_op1_vectype_p
6303 || dt[1] == vect_constant_def)
6304 && !vect_maybe_update_slp_op_vectype
6305 (slp_op1,
6306 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6307 {
6308 if (dump_enabled_p ())
6309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6310 "incompatible vector types for invariants\n");
6311 return false;
6312 }
6313 /* Now adjust the constant shift amount in place. */
6314 if (slp_node
6315 && incompatible_op1_vectype_p
6316 && dt[1] == vect_constant_def)
6317 {
6318 for (unsigned i = 0;
6319 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6320 {
6321 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6322 = fold_convert (TREE_TYPE (vectype),
6323 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6324 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6325 == INTEGER_CST));
6326 }
6327 }
6328 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6329 DUMP_VECT_SCOPE ("vectorizable_shift");
6330 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6331 ndts: scalar_shift_arg ? 1 : ndts, node: slp_node, cost_vec);
6332 return true;
6333 }
6334
6335 /* Transform. */
6336
6337 if (dump_enabled_p ())
6338 dump_printf_loc (MSG_NOTE, vect_location,
6339 "transform binary/unary operation.\n");
6340
6341 if (incompatible_op1_vectype_p && !slp_node)
6342 {
6343 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6344 op1 = fold_convert (TREE_TYPE (vectype), op1);
6345 if (dt[1] != vect_constant_def)
6346 op1 = vect_init_vector (vinfo, stmt_info, val: op1,
6347 TREE_TYPE (vectype), NULL);
6348 }
6349
6350 /* Handle def. */
6351 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6352
6353 if (scalar_shift_arg && dt[1] != vect_internal_def)
6354 {
6355 /* Vector shl and shr insn patterns can be defined with scalar
6356 operand 2 (shift operand). In this case, use constant or loop
6357 invariant op1 directly, without extending it to vector mode
6358 first. */
6359 optab_op2_mode = insn_data[icode].operand[2].mode;
6360 if (!VECTOR_MODE_P (optab_op2_mode))
6361 {
6362 if (dump_enabled_p ())
6363 dump_printf_loc (MSG_NOTE, vect_location,
6364 "operand 1 using scalar mode.\n");
6365 vec_oprnd1 = op1;
6366 vec_oprnds1.create (nelems: slp_node ? slp_node->vec_stmts_size : ncopies);
6367 vec_oprnds1.quick_push (obj: vec_oprnd1);
6368 /* Store vec_oprnd1 for every vector stmt to be created.
6369 We check during the analysis that all the shift arguments
6370 are the same.
6371 TODO: Allow different constants for different vector
6372 stmts generated for an SLP instance. */
6373 for (k = 0;
6374 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6375 vec_oprnds1.quick_push (obj: vec_oprnd1);
6376 }
6377 }
6378 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6379 {
6380 if (was_scalar_shift_arg)
6381 {
6382 /* If the argument was the same in all lanes create
6383 the correctly typed vector shift amount directly. */
6384 op1 = fold_convert (TREE_TYPE (vectype), op1);
6385 op1 = vect_init_vector (vinfo, stmt_info, val: op1, TREE_TYPE (vectype),
6386 gsi: !loop_vinfo ? gsi : NULL);
6387 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, val: op1, type: vectype,
6388 gsi: !loop_vinfo ? gsi : NULL);
6389 vec_oprnds1.create (nelems: slp_node->vec_stmts_size);
6390 for (k = 0; k < slp_node->vec_stmts_size; k++)
6391 vec_oprnds1.quick_push (obj: vec_oprnd1);
6392 }
6393 else if (dt[1] == vect_constant_def)
6394 /* The constant shift amount has been adjusted in place. */
6395 ;
6396 else
6397 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6398 }
6399
6400 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6401 (a special case for certain kind of vector shifts); otherwise,
6402 operand 1 should be of a vector type (the usual case). */
6403 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6404 op0, vec_oprnds0: &vec_oprnds0,
6405 op1: vec_oprnd1 ? NULL_TREE : op1, vec_oprnds1: &vec_oprnds1);
6406
6407 /* Arguments are ready. Create the new vector stmt. */
6408 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6409 {
6410 /* For internal defs where we need to use a scalar shift arg
6411 extract the first lane. */
6412 if (scalar_shift_arg && dt[1] == vect_internal_def)
6413 {
6414 vop1 = vec_oprnds1[0];
6415 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6416 gassign *new_stmt
6417 = gimple_build_assign (new_temp,
6418 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6419 vop1,
6420 TYPE_SIZE (TREE_TYPE (new_temp)),
6421 bitsize_zero_node));
6422 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6423 vop1 = new_temp;
6424 }
6425 else
6426 vop1 = vec_oprnds1[i];
6427 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6428 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6429 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6430 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6431 if (slp_node)
6432 slp_node->push_vec_def (def: new_stmt);
6433 else
6434 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
6435 }
6436
6437 if (!slp_node)
6438 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6439
6440 vec_oprnds0.release ();
6441 vec_oprnds1.release ();
6442
6443 return true;
6444}
6445
6446/* Function vectorizable_operation.
6447
6448 Check if STMT_INFO performs a binary, unary or ternary operation that can
6449 be vectorized.
6450 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6451 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6452 Return true if STMT_INFO is vectorizable in this way. */
6453
6454static bool
6455vectorizable_operation (vec_info *vinfo,
6456 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6457 gimple **vec_stmt, slp_tree slp_node,
6458 stmt_vector_for_cost *cost_vec)
6459{
6460 tree vec_dest;
6461 tree scalar_dest;
6462 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6463 tree vectype;
6464 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6465 enum tree_code code, orig_code;
6466 machine_mode vec_mode;
6467 tree new_temp;
6468 int op_type;
6469 optab optab;
6470 bool target_support_p;
6471 enum vect_def_type dt[3]
6472 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6473 int ndts = 3;
6474 poly_uint64 nunits_in;
6475 poly_uint64 nunits_out;
6476 tree vectype_out;
6477 int ncopies, vec_num;
6478 int i;
6479 vec<tree> vec_oprnds0 = vNULL;
6480 vec<tree> vec_oprnds1 = vNULL;
6481 vec<tree> vec_oprnds2 = vNULL;
6482 tree vop0, vop1, vop2;
6483 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6484
6485 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6486 return false;
6487
6488 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6489 && ! vec_stmt)
6490 return false;
6491
6492 /* Is STMT a vectorizable binary/unary operation? */
6493 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
6494 if (!stmt)
6495 return false;
6496
6497 /* Loads and stores are handled in vectorizable_{load,store}. */
6498 if (STMT_VINFO_DATA_REF (stmt_info))
6499 return false;
6500
6501 orig_code = code = gimple_assign_rhs_code (gs: stmt);
6502
6503 /* Shifts are handled in vectorizable_shift. */
6504 if (code == LSHIFT_EXPR
6505 || code == RSHIFT_EXPR
6506 || code == LROTATE_EXPR
6507 || code == RROTATE_EXPR)
6508 return false;
6509
6510 /* Comparisons are handled in vectorizable_comparison. */
6511 if (TREE_CODE_CLASS (code) == tcc_comparison)
6512 return false;
6513
6514 /* Conditions are handled in vectorizable_condition. */
6515 if (code == COND_EXPR)
6516 return false;
6517
6518 /* For pointer addition and subtraction, we should use the normal
6519 plus and minus for the vector operation. */
6520 if (code == POINTER_PLUS_EXPR)
6521 code = PLUS_EXPR;
6522 if (code == POINTER_DIFF_EXPR)
6523 code = MINUS_EXPR;
6524
6525 /* Support only unary or binary operations. */
6526 op_type = TREE_CODE_LENGTH (code);
6527 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6528 {
6529 if (dump_enabled_p ())
6530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6531 "num. args = %d (not unary/binary/ternary op).\n",
6532 op_type);
6533 return false;
6534 }
6535
6536 scalar_dest = gimple_assign_lhs (gs: stmt);
6537 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6538
6539 /* Most operations cannot handle bit-precision types without extra
6540 truncations. */
6541 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6542 if (!mask_op_p
6543 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6544 /* Exception are bitwise binary operations. */
6545 && code != BIT_IOR_EXPR
6546 && code != BIT_XOR_EXPR
6547 && code != BIT_AND_EXPR)
6548 {
6549 if (dump_enabled_p ())
6550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6551 "bit-precision arithmetic not supported.\n");
6552 return false;
6553 }
6554
6555 slp_tree slp_op0;
6556 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6557 0, &op0, &slp_op0, &dt[0], &vectype))
6558 {
6559 if (dump_enabled_p ())
6560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6561 "use not simple.\n");
6562 return false;
6563 }
6564 bool is_invariant = (dt[0] == vect_external_def
6565 || dt[0] == vect_constant_def);
6566 /* If op0 is an external or constant def, infer the vector type
6567 from the scalar type. */
6568 if (!vectype)
6569 {
6570 /* For boolean type we cannot determine vectype by
6571 invariant value (don't know whether it is a vector
6572 of booleans or vector of integers). We use output
6573 vectype because operations on boolean don't change
6574 type. */
6575 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6576 {
6577 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6578 {
6579 if (dump_enabled_p ())
6580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6581 "not supported operation on bool value.\n");
6582 return false;
6583 }
6584 vectype = vectype_out;
6585 }
6586 else
6587 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6588 slp_node);
6589 }
6590 if (vec_stmt)
6591 gcc_assert (vectype);
6592 if (!vectype)
6593 {
6594 if (dump_enabled_p ())
6595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6596 "no vectype for scalar type %T\n",
6597 TREE_TYPE (op0));
6598
6599 return false;
6600 }
6601
6602 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6603 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6604 if (maybe_ne (a: nunits_out, b: nunits_in))
6605 return false;
6606
6607 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6608 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6609 if (op_type == binary_op || op_type == ternary_op)
6610 {
6611 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6612 1, &op1, &slp_op1, &dt[1], &vectype2))
6613 {
6614 if (dump_enabled_p ())
6615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6616 "use not simple.\n");
6617 return false;
6618 }
6619 is_invariant &= (dt[1] == vect_external_def
6620 || dt[1] == vect_constant_def);
6621 if (vectype2
6622 && maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
6623 return false;
6624 }
6625 if (op_type == ternary_op)
6626 {
6627 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6628 2, &op2, &slp_op2, &dt[2], &vectype3))
6629 {
6630 if (dump_enabled_p ())
6631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6632 "use not simple.\n");
6633 return false;
6634 }
6635 is_invariant &= (dt[2] == vect_external_def
6636 || dt[2] == vect_constant_def);
6637 if (vectype3
6638 && maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype3)))
6639 return false;
6640 }
6641
6642 /* Multiple types in SLP are handled by creating the appropriate number of
6643 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6644 case of SLP. */
6645 if (slp_node)
6646 {
6647 ncopies = 1;
6648 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6649 }
6650 else
6651 {
6652 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6653 vec_num = 1;
6654 }
6655
6656 gcc_assert (ncopies >= 1);
6657
6658 /* Reject attempts to combine mask types with nonmask types, e.g. if
6659 we have an AND between a (nonmask) boolean loaded from memory and
6660 a (mask) boolean result of a comparison.
6661
6662 TODO: We could easily fix these cases up using pattern statements. */
6663 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6664 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6665 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6666 {
6667 if (dump_enabled_p ())
6668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6669 "mixed mask and nonmask vector types\n");
6670 return false;
6671 }
6672
6673 /* Supportable by target? */
6674
6675 vec_mode = TYPE_MODE (vectype);
6676 if (code == MULT_HIGHPART_EXPR)
6677 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6678 else
6679 {
6680 optab = optab_for_tree_code (code, vectype, optab_default);
6681 if (!optab)
6682 {
6683 if (dump_enabled_p ())
6684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6685 "no optab.\n");
6686 return false;
6687 }
6688 target_support_p = (optab_handler (op: optab, mode: vec_mode) != CODE_FOR_nothing
6689 || optab_libfunc (optab, vec_mode));
6690 }
6691
6692 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6693 if (!target_support_p || using_emulated_vectors_p)
6694 {
6695 if (dump_enabled_p ())
6696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6697 "op not supported by target.\n");
6698 /* When vec_mode is not a vector mode and we verified ops we
6699 do not have to lower like AND are natively supported let
6700 those through even when the mode isn't word_mode. For
6701 ops we have to lower the lowering code assumes we are
6702 dealing with word_mode. */
6703 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6704 || !target_support_p)
6705 && maybe_ne (a: GET_MODE_SIZE (mode: vec_mode), UNITS_PER_WORD))
6706 /* Check only during analysis. */
6707 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6708 {
6709 if (dump_enabled_p ())
6710 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6711 return false;
6712 }
6713 if (dump_enabled_p ())
6714 dump_printf_loc (MSG_NOTE, vect_location,
6715 "proceeding using word mode.\n");
6716 using_emulated_vectors_p = true;
6717 }
6718
6719 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6720 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6721 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6722 internal_fn cond_fn = get_conditional_internal_fn (code);
6723 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6724
6725 /* If operating on inactive elements could generate spurious traps,
6726 we need to restrict the operation to active lanes. Note that this
6727 specifically doesn't apply to unhoisted invariants, since they
6728 operate on the same value for every lane.
6729
6730 Similarly, if this operation is part of a reduction, a fully-masked
6731 loop should only change the active lanes of the reduction chain,
6732 keeping the inactive lanes as-is. */
6733 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6734 || reduc_idx >= 0);
6735
6736 if (!vec_stmt) /* transformation not required. */
6737 {
6738 if (loop_vinfo
6739 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6740 && mask_out_inactive)
6741 {
6742 if (cond_len_fn != IFN_LAST
6743 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6744 OPTIMIZE_FOR_SPEED))
6745 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6746 1);
6747 else if (cond_fn != IFN_LAST
6748 && direct_internal_fn_supported_p (cond_fn, vectype,
6749 OPTIMIZE_FOR_SPEED))
6750 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6751 vectype, NULL);
6752 else
6753 {
6754 if (dump_enabled_p ())
6755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6756 "can't use a fully-masked loop because no"
6757 " conditional operation is available.\n");
6758 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6759 }
6760 }
6761
6762 /* Put types on constant and invariant SLP children. */
6763 if (slp_node
6764 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6765 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6766 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6767 {
6768 if (dump_enabled_p ())
6769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6770 "incompatible vector types for invariants\n");
6771 return false;
6772 }
6773
6774 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6775 DUMP_VECT_SCOPE ("vectorizable_operation");
6776 vect_model_simple_cost (vinfo, stmt_info,
6777 ncopies, dt, ndts, node: slp_node, cost_vec);
6778 if (using_emulated_vectors_p)
6779 {
6780 /* The above vect_model_simple_cost call handles constants
6781 in the prologue and (mis-)costs one of the stmts as
6782 vector stmt. See below for the actual lowering that will
6783 be applied. */
6784 unsigned n
6785 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6786 switch (code)
6787 {
6788 case PLUS_EXPR:
6789 n *= 5;
6790 break;
6791 case MINUS_EXPR:
6792 n *= 6;
6793 break;
6794 case NEGATE_EXPR:
6795 n *= 4;
6796 break;
6797 default:
6798 /* Bit operations do not have extra cost and are accounted
6799 as vector stmt by vect_model_simple_cost. */
6800 n = 0;
6801 break;
6802 }
6803 if (n != 0)
6804 {
6805 /* We also need to materialize two large constants. */
6806 record_stmt_cost (body_cost_vec: cost_vec, count: 2, kind: scalar_stmt, stmt_info,
6807 misalign: 0, where: vect_prologue);
6808 record_stmt_cost (body_cost_vec: cost_vec, count: n, kind: scalar_stmt, stmt_info,
6809 misalign: 0, where: vect_body);
6810 }
6811 }
6812 return true;
6813 }
6814
6815 /* Transform. */
6816
6817 if (dump_enabled_p ())
6818 dump_printf_loc (MSG_NOTE, vect_location,
6819 "transform binary/unary operation.\n");
6820
6821 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6822 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6823
6824 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6825 vectors with unsigned elements, but the result is signed. So, we
6826 need to compute the MINUS_EXPR into vectype temporary and
6827 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6828 tree vec_cvt_dest = NULL_TREE;
6829 if (orig_code == POINTER_DIFF_EXPR)
6830 {
6831 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6832 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6833 }
6834 /* Handle def. */
6835 else
6836 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6837
6838 /* In case the vectorization factor (VF) is bigger than the number
6839 of elements that we can fit in a vectype (nunits), we have to generate
6840 more than one vector stmt - i.e - we need to "unroll" the
6841 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6842 from one copy of the vector stmt to the next, in the field
6843 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6844 stages to find the correct vector defs to be used when vectorizing
6845 stmts that use the defs of the current stmt. The example below
6846 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6847 we need to create 4 vectorized stmts):
6848
6849 before vectorization:
6850 RELATED_STMT VEC_STMT
6851 S1: x = memref - -
6852 S2: z = x + 1 - -
6853
6854 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6855 there):
6856 RELATED_STMT VEC_STMT
6857 VS1_0: vx0 = memref0 VS1_1 -
6858 VS1_1: vx1 = memref1 VS1_2 -
6859 VS1_2: vx2 = memref2 VS1_3 -
6860 VS1_3: vx3 = memref3 - -
6861 S1: x = load - VS1_0
6862 S2: z = x + 1 - -
6863
6864 step2: vectorize stmt S2 (done here):
6865 To vectorize stmt S2 we first need to find the relevant vector
6866 def for the first operand 'x'. This is, as usual, obtained from
6867 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6868 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6869 relevant vector def 'vx0'. Having found 'vx0' we can generate
6870 the vector stmt VS2_0, and as usual, record it in the
6871 STMT_VINFO_VEC_STMT of stmt S2.
6872 When creating the second copy (VS2_1), we obtain the relevant vector
6873 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6874 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6875 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6876 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6877 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6878 chain of stmts and pointers:
6879 RELATED_STMT VEC_STMT
6880 VS1_0: vx0 = memref0 VS1_1 -
6881 VS1_1: vx1 = memref1 VS1_2 -
6882 VS1_2: vx2 = memref2 VS1_3 -
6883 VS1_3: vx3 = memref3 - -
6884 S1: x = load - VS1_0
6885 VS2_0: vz0 = vx0 + v1 VS2_1 -
6886 VS2_1: vz1 = vx1 + v1 VS2_2 -
6887 VS2_2: vz2 = vx2 + v1 VS2_3 -
6888 VS2_3: vz3 = vx3 + v1 - -
6889 S2: z = x + 1 - VS2_0 */
6890
6891 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6892 op0, vec_oprnds0: &vec_oprnds0, op1, vec_oprnds1: &vec_oprnds1, op2, vec_oprnds2: &vec_oprnds2);
6893 /* Arguments are ready. Create the new vector stmt. */
6894 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6895 {
6896 gimple *new_stmt = NULL;
6897 vop1 = ((op_type == binary_op || op_type == ternary_op)
6898 ? vec_oprnds1[i] : NULL_TREE);
6899 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6900 if (using_emulated_vectors_p
6901 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6902 {
6903 /* Lower the operation. This follows vector lowering. */
6904 unsigned int width = vector_element_bits (vectype);
6905 tree inner_type = TREE_TYPE (vectype);
6906 tree word_type
6907 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: word_mode), 1);
6908 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6909 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6910 tree high_bits
6911 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6912 tree wvop0 = make_ssa_name (var: word_type);
6913 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6914 build1 (VIEW_CONVERT_EXPR,
6915 word_type, vop0));
6916 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6917 tree result_low, signs;
6918 if (code == PLUS_EXPR || code == MINUS_EXPR)
6919 {
6920 tree wvop1 = make_ssa_name (var: word_type);
6921 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6922 build1 (VIEW_CONVERT_EXPR,
6923 word_type, vop1));
6924 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6925 signs = make_ssa_name (var: word_type);
6926 new_stmt = gimple_build_assign (signs,
6927 BIT_XOR_EXPR, wvop0, wvop1);
6928 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6929 tree b_low = make_ssa_name (var: word_type);
6930 new_stmt = gimple_build_assign (b_low,
6931 BIT_AND_EXPR, wvop1, low_bits);
6932 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6933 tree a_low = make_ssa_name (var: word_type);
6934 if (code == PLUS_EXPR)
6935 new_stmt = gimple_build_assign (a_low,
6936 BIT_AND_EXPR, wvop0, low_bits);
6937 else
6938 new_stmt = gimple_build_assign (a_low,
6939 BIT_IOR_EXPR, wvop0, high_bits);
6940 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6941 if (code == MINUS_EXPR)
6942 {
6943 new_stmt = gimple_build_assign (NULL_TREE,
6944 BIT_NOT_EXPR, signs);
6945 signs = make_ssa_name (var: word_type);
6946 gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
6947 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6948 }
6949 new_stmt = gimple_build_assign (NULL_TREE,
6950 BIT_AND_EXPR, signs, high_bits);
6951 signs = make_ssa_name (var: word_type);
6952 gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
6953 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6954 result_low = make_ssa_name (var: word_type);
6955 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6956 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6957 }
6958 else
6959 {
6960 tree a_low = make_ssa_name (var: word_type);
6961 new_stmt = gimple_build_assign (a_low,
6962 BIT_AND_EXPR, wvop0, low_bits);
6963 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6964 signs = make_ssa_name (var: word_type);
6965 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6966 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6967 new_stmt = gimple_build_assign (NULL_TREE,
6968 BIT_AND_EXPR, signs, high_bits);
6969 signs = make_ssa_name (var: word_type);
6970 gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
6971 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6972 result_low = make_ssa_name (var: word_type);
6973 new_stmt = gimple_build_assign (result_low,
6974 MINUS_EXPR, high_bits, a_low);
6975 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6976 }
6977 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6978 signs);
6979 result_low = make_ssa_name (var: word_type);
6980 gimple_assign_set_lhs (gs: new_stmt, lhs: result_low);
6981 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6982 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6983 build1 (VIEW_CONVERT_EXPR,
6984 vectype, result_low));
6985 new_temp = make_ssa_name (var: vectype);
6986 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6987 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6988 }
6989 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
6990 {
6991 tree mask;
6992 if (masked_loop_p)
6993 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
6994 vec_num * ncopies, vectype, i);
6995 else
6996 /* Dummy mask. */
6997 mask = build_minus_one_cst (truth_type_for (vectype));
6998 auto_vec<tree> vops (6);
6999 vops.quick_push (obj: mask);
7000 vops.quick_push (obj: vop0);
7001 if (vop1)
7002 vops.quick_push (obj: vop1);
7003 if (vop2)
7004 vops.quick_push (obj: vop2);
7005 if (reduc_idx >= 0)
7006 {
7007 /* Perform the operation on active elements only and take
7008 inactive elements from the reduction chain input. */
7009 gcc_assert (!vop2);
7010 vops.quick_push (obj: reduc_idx == 1 ? vop1 : vop0);
7011 }
7012 else
7013 {
7014 auto else_value = targetm.preferred_else_value
7015 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7016 vops.quick_push (obj: else_value);
7017 }
7018 if (len_loop_p)
7019 {
7020 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7021 vec_num * ncopies, vectype, i, 1);
7022 signed char biasval
7023 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7024 tree bias = build_int_cst (intQI_type_node, biasval);
7025 vops.quick_push (obj: len);
7026 vops.quick_push (obj: bias);
7027 }
7028 gcall *call
7029 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7030 : cond_len_fn,
7031 vops);
7032 new_temp = make_ssa_name (var: vec_dest, stmt: call);
7033 gimple_call_set_lhs (gs: call, lhs: new_temp);
7034 gimple_call_set_nothrow (s: call, nothrow_p: true);
7035 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
7036 new_stmt = call;
7037 }
7038 else
7039 {
7040 tree mask = NULL_TREE;
7041 /* When combining two masks check if either of them is elsewhere
7042 combined with a loop mask, if that's the case we can mark that the
7043 new combined mask doesn't need to be combined with a loop mask. */
7044 if (masked_loop_p
7045 && code == BIT_AND_EXPR
7046 && VECTOR_BOOLEAN_TYPE_P (vectype))
7047 {
7048 if (loop_vinfo->scalar_cond_masked_set.contains (k: { op0,
7049 ncopies}))
7050 {
7051 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7052 vec_num * ncopies, vectype, i);
7053
7054 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7055 vec_mask: vop0, gsi);
7056 }
7057
7058 if (loop_vinfo->scalar_cond_masked_set.contains (k: { op1,
7059 ncopies }))
7060 {
7061 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7062 vec_num * ncopies, vectype, i);
7063
7064 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7065 vec_mask: vop1, gsi);
7066 }
7067 }
7068
7069 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7070 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
7071 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7072 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7073 if (using_emulated_vectors_p)
7074 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7075
7076 /* Enter the combined value into the vector cond hash so we don't
7077 AND it with a loop mask again. */
7078 if (mask)
7079 loop_vinfo->vec_cond_masked_set.add (k: { new_temp, mask });
7080 }
7081
7082 if (vec_cvt_dest)
7083 {
7084 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7085 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7086 new_temp);
7087 new_temp = make_ssa_name (var: vec_cvt_dest, stmt: new_stmt);
7088 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7089 vect_finish_stmt_generation (vinfo, stmt_info,
7090 vec_stmt: new_stmt, gsi);
7091 }
7092
7093 if (slp_node)
7094 slp_node->push_vec_def (def: new_stmt);
7095 else
7096 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
7097 }
7098
7099 if (!slp_node)
7100 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7101
7102 vec_oprnds0.release ();
7103 vec_oprnds1.release ();
7104 vec_oprnds2.release ();
7105
7106 return true;
7107}
7108
7109/* A helper function to ensure data reference DR_INFO's base alignment. */
7110
7111static void
7112ensure_base_align (dr_vec_info *dr_info)
7113{
7114 /* Alignment is only analyzed for the first element of a DR group,
7115 use that to look at base alignment we need to enforce. */
7116 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7117 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7118
7119 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7120
7121 if (dr_info->base_misaligned)
7122 {
7123 tree base_decl = dr_info->base_decl;
7124
7125 // We should only be able to increase the alignment of a base object if
7126 // we know what its new alignment should be at compile time.
7127 unsigned HOST_WIDE_INT align_base_to =
7128 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7129
7130 if (decl_in_symtab_p (decl: base_decl))
7131 symtab_node::get (decl: base_decl)->increase_alignment (align: align_base_to);
7132 else if (DECL_ALIGN (base_decl) < align_base_to)
7133 {
7134 SET_DECL_ALIGN (base_decl, align_base_to);
7135 DECL_USER_ALIGN (base_decl) = 1;
7136 }
7137 dr_info->base_misaligned = false;
7138 }
7139}
7140
7141
7142/* Function get_group_alias_ptr_type.
7143
7144 Return the alias type for the group starting at FIRST_STMT_INFO. */
7145
7146static tree
7147get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7148{
7149 struct data_reference *first_dr, *next_dr;
7150
7151 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7152 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7153 while (next_stmt_info)
7154 {
7155 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7156 if (get_alias_set (DR_REF (first_dr))
7157 != get_alias_set (DR_REF (next_dr)))
7158 {
7159 if (dump_enabled_p ())
7160 dump_printf_loc (MSG_NOTE, vect_location,
7161 "conflicting alias set types.\n");
7162 return ptr_type_node;
7163 }
7164 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7165 }
7166 return reference_alias_ptr_type (DR_REF (first_dr));
7167}
7168
7169
7170/* Function scan_operand_equal_p.
7171
7172 Helper function for check_scan_store. Compare two references
7173 with .GOMP_SIMD_LANE bases. */
7174
7175static bool
7176scan_operand_equal_p (tree ref1, tree ref2)
7177{
7178 tree ref[2] = { ref1, ref2 };
7179 poly_int64 bitsize[2], bitpos[2];
7180 tree offset[2], base[2];
7181 for (int i = 0; i < 2; ++i)
7182 {
7183 machine_mode mode;
7184 int unsignedp, reversep, volatilep = 0;
7185 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7186 &offset[i], &mode, &unsignedp,
7187 &reversep, &volatilep);
7188 if (reversep || volatilep || maybe_ne (a: bitpos[i], b: 0))
7189 return false;
7190 if (TREE_CODE (base[i]) == MEM_REF
7191 && offset[i] == NULL_TREE
7192 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7193 {
7194 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7195 if (is_gimple_assign (gs: def_stmt)
7196 && gimple_assign_rhs_code (gs: def_stmt) == POINTER_PLUS_EXPR
7197 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7198 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7199 {
7200 if (maybe_ne (a: mem_ref_offset (base[i]), b: 0))
7201 return false;
7202 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7203 offset[i] = gimple_assign_rhs2 (gs: def_stmt);
7204 }
7205 }
7206 }
7207
7208 if (!operand_equal_p (base[0], base[1], flags: 0))
7209 return false;
7210 if (maybe_ne (a: bitsize[0], b: bitsize[1]))
7211 return false;
7212 if (offset[0] != offset[1])
7213 {
7214 if (!offset[0] || !offset[1])
7215 return false;
7216 if (!operand_equal_p (offset[0], offset[1], flags: 0))
7217 {
7218 tree step[2];
7219 for (int i = 0; i < 2; ++i)
7220 {
7221 step[i] = integer_one_node;
7222 if (TREE_CODE (offset[i]) == SSA_NAME)
7223 {
7224 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7225 if (is_gimple_assign (gs: def_stmt)
7226 && gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR
7227 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7228 == INTEGER_CST))
7229 {
7230 step[i] = gimple_assign_rhs2 (gs: def_stmt);
7231 offset[i] = gimple_assign_rhs1 (gs: def_stmt);
7232 }
7233 }
7234 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7235 {
7236 step[i] = TREE_OPERAND (offset[i], 1);
7237 offset[i] = TREE_OPERAND (offset[i], 0);
7238 }
7239 tree rhs1 = NULL_TREE;
7240 if (TREE_CODE (offset[i]) == SSA_NAME)
7241 {
7242 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7243 if (gimple_assign_cast_p (s: def_stmt))
7244 rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7245 }
7246 else if (CONVERT_EXPR_P (offset[i]))
7247 rhs1 = TREE_OPERAND (offset[i], 0);
7248 if (rhs1
7249 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7250 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7251 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7252 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7253 offset[i] = rhs1;
7254 }
7255 if (!operand_equal_p (offset[0], offset[1], flags: 0)
7256 || !operand_equal_p (step[0], step[1], flags: 0))
7257 return false;
7258 }
7259 }
7260 return true;
7261}
7262
7263
7264enum scan_store_kind {
7265 /* Normal permutation. */
7266 scan_store_kind_perm,
7267
7268 /* Whole vector left shift permutation with zero init. */
7269 scan_store_kind_lshift_zero,
7270
7271 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7272 scan_store_kind_lshift_cond
7273};
7274
7275/* Function check_scan_store.
7276
7277 Verify if we can perform the needed permutations or whole vector shifts.
7278 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7279 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7280 to do at each step. */
7281
7282static int
7283scan_store_can_perm_p (tree vectype, tree init,
7284 vec<enum scan_store_kind> *use_whole_vector = NULL)
7285{
7286 enum machine_mode vec_mode = TYPE_MODE (vectype);
7287 unsigned HOST_WIDE_INT nunits;
7288 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7289 return -1;
7290 int units_log2 = exact_log2 (x: nunits);
7291 if (units_log2 <= 0)
7292 return -1;
7293
7294 int i;
7295 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7296 for (i = 0; i <= units_log2; ++i)
7297 {
7298 unsigned HOST_WIDE_INT j, k;
7299 enum scan_store_kind kind = scan_store_kind_perm;
7300 vec_perm_builder sel (nunits, nunits, 1);
7301 sel.quick_grow (len: nunits);
7302 if (i == units_log2)
7303 {
7304 for (j = 0; j < nunits; ++j)
7305 sel[j] = nunits - 1;
7306 }
7307 else
7308 {
7309 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7310 sel[j] = j;
7311 for (k = 0; j < nunits; ++j, ++k)
7312 sel[j] = nunits + k;
7313 }
7314 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7315 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7316 {
7317 if (i == units_log2)
7318 return -1;
7319
7320 if (whole_vector_shift_kind == scan_store_kind_perm)
7321 {
7322 if (optab_handler (op: vec_shl_optab, mode: vec_mode) == CODE_FOR_nothing)
7323 return -1;
7324 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7325 /* Whole vector shifts shift in zeros, so if init is all zero
7326 constant, there is no need to do anything further. */
7327 if ((TREE_CODE (init) != INTEGER_CST
7328 && TREE_CODE (init) != REAL_CST)
7329 || !initializer_zerop (init))
7330 {
7331 tree masktype = truth_type_for (vectype);
7332 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7333 return -1;
7334 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7335 }
7336 }
7337 kind = whole_vector_shift_kind;
7338 }
7339 if (use_whole_vector)
7340 {
7341 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7342 use_whole_vector->safe_grow_cleared (len: i, exact: true);
7343 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7344 use_whole_vector->safe_push (obj: kind);
7345 }
7346 }
7347
7348 return units_log2;
7349}
7350
7351
7352/* Function check_scan_store.
7353
7354 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7355
7356static bool
7357check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7358 enum vect_def_type rhs_dt, bool slp, tree mask,
7359 vect_memory_access_type memory_access_type)
7360{
7361 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7362 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7363 tree ref_type;
7364
7365 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7366 if (slp
7367 || mask
7368 || memory_access_type != VMAT_CONTIGUOUS
7369 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7370 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7371 || loop_vinfo == NULL
7372 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7373 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7374 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7375 || !integer_zerop (DR_INIT (dr_info->dr))
7376 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7377 || !alias_sets_conflict_p (get_alias_set (vectype),
7378 get_alias_set (TREE_TYPE (ref_type))))
7379 {
7380 if (dump_enabled_p ())
7381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7382 "unsupported OpenMP scan store.\n");
7383 return false;
7384 }
7385
7386 /* We need to pattern match code built by OpenMP lowering and simplified
7387 by following optimizations into something we can handle.
7388 #pragma omp simd reduction(inscan,+:r)
7389 for (...)
7390 {
7391 r += something ();
7392 #pragma omp scan inclusive (r)
7393 use (r);
7394 }
7395 shall have body with:
7396 // Initialization for input phase, store the reduction initializer:
7397 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7398 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7399 D.2042[_21] = 0;
7400 // Actual input phase:
7401 ...
7402 r.0_5 = D.2042[_20];
7403 _6 = _4 + r.0_5;
7404 D.2042[_20] = _6;
7405 // Initialization for scan phase:
7406 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7407 _26 = D.2043[_25];
7408 _27 = D.2042[_25];
7409 _28 = _26 + _27;
7410 D.2043[_25] = _28;
7411 D.2042[_25] = _28;
7412 // Actual scan phase:
7413 ...
7414 r.1_8 = D.2042[_20];
7415 ...
7416 The "omp simd array" variable D.2042 holds the privatized copy used
7417 inside of the loop and D.2043 is another one that holds copies of
7418 the current original list item. The separate GOMP_SIMD_LANE ifn
7419 kinds are there in order to allow optimizing the initializer store
7420 and combiner sequence, e.g. if it is originally some C++ish user
7421 defined reduction, but allow the vectorizer to pattern recognize it
7422 and turn into the appropriate vectorized scan.
7423
7424 For exclusive scan, this is slightly different:
7425 #pragma omp simd reduction(inscan,+:r)
7426 for (...)
7427 {
7428 use (r);
7429 #pragma omp scan exclusive (r)
7430 r += something ();
7431 }
7432 shall have body with:
7433 // Initialization for input phase, store the reduction initializer:
7434 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7435 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7436 D.2042[_21] = 0;
7437 // Actual input phase:
7438 ...
7439 r.0_5 = D.2042[_20];
7440 _6 = _4 + r.0_5;
7441 D.2042[_20] = _6;
7442 // Initialization for scan phase:
7443 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7444 _26 = D.2043[_25];
7445 D.2044[_25] = _26;
7446 _27 = D.2042[_25];
7447 _28 = _26 + _27;
7448 D.2043[_25] = _28;
7449 // Actual scan phase:
7450 ...
7451 r.1_8 = D.2044[_20];
7452 ... */
7453
7454 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7455 {
7456 /* Match the D.2042[_21] = 0; store above. Just require that
7457 it is a constant or external definition store. */
7458 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7459 {
7460 fail_init:
7461 if (dump_enabled_p ())
7462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7463 "unsupported OpenMP scan initializer store.\n");
7464 return false;
7465 }
7466
7467 if (! loop_vinfo->scan_map)
7468 loop_vinfo->scan_map = new hash_map<tree, tree>;
7469 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7470 tree &cached = loop_vinfo->scan_map->get_or_insert (k: var);
7471 if (cached)
7472 goto fail_init;
7473 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7474
7475 /* These stores can be vectorized normally. */
7476 return true;
7477 }
7478
7479 if (rhs_dt != vect_internal_def)
7480 {
7481 fail:
7482 if (dump_enabled_p ())
7483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7484 "unsupported OpenMP scan combiner pattern.\n");
7485 return false;
7486 }
7487
7488 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7489 tree rhs = gimple_assign_rhs1 (gs: stmt);
7490 if (TREE_CODE (rhs) != SSA_NAME)
7491 goto fail;
7492
7493 gimple *other_store_stmt = NULL;
7494 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7495 bool inscan_var_store
7496 = lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7497
7498 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7499 {
7500 if (!inscan_var_store)
7501 {
7502 use_operand_p use_p;
7503 imm_use_iterator iter;
7504 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7505 {
7506 gimple *use_stmt = USE_STMT (use_p);
7507 if (use_stmt == stmt || is_gimple_debug (gs: use_stmt))
7508 continue;
7509 if (gimple_bb (g: use_stmt) != gimple_bb (g: stmt)
7510 || !is_gimple_assign (gs: use_stmt)
7511 || gimple_assign_rhs_class (gs: use_stmt) != GIMPLE_BINARY_RHS
7512 || other_store_stmt
7513 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7514 goto fail;
7515 other_store_stmt = use_stmt;
7516 }
7517 if (other_store_stmt == NULL)
7518 goto fail;
7519 rhs = gimple_assign_lhs (gs: other_store_stmt);
7520 if (!single_imm_use (var: rhs, use_p: &use_p, stmt: &other_store_stmt))
7521 goto fail;
7522 }
7523 }
7524 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7525 {
7526 use_operand_p use_p;
7527 imm_use_iterator iter;
7528 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7529 {
7530 gimple *use_stmt = USE_STMT (use_p);
7531 if (use_stmt == stmt || is_gimple_debug (gs: use_stmt))
7532 continue;
7533 if (other_store_stmt)
7534 goto fail;
7535 other_store_stmt = use_stmt;
7536 }
7537 }
7538 else
7539 goto fail;
7540
7541 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7542 if (gimple_bb (g: def_stmt) != gimple_bb (g: stmt)
7543 || !is_gimple_assign (gs: def_stmt)
7544 || gimple_assign_rhs_class (gs: def_stmt) != GIMPLE_BINARY_RHS)
7545 goto fail;
7546
7547 enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7548 /* For pointer addition, we should use the normal plus for the vector
7549 operation. */
7550 switch (code)
7551 {
7552 case POINTER_PLUS_EXPR:
7553 code = PLUS_EXPR;
7554 break;
7555 case MULT_HIGHPART_EXPR:
7556 goto fail;
7557 default:
7558 break;
7559 }
7560 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7561 goto fail;
7562
7563 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7564 tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7565 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7566 goto fail;
7567
7568 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7569 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7570 if (gimple_bb (g: load1_stmt) != gimple_bb (g: stmt)
7571 || !gimple_assign_load_p (load1_stmt)
7572 || gimple_bb (g: load2_stmt) != gimple_bb (g: stmt)
7573 || !gimple_assign_load_p (load2_stmt))
7574 goto fail;
7575
7576 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7577 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7578 if (load1_stmt_info == NULL
7579 || load2_stmt_info == NULL
7580 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7581 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7582 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7583 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7584 goto fail;
7585
7586 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7587 {
7588 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7589 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7590 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7591 goto fail;
7592 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7593 tree lrhs;
7594 if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7595 lrhs = rhs1;
7596 else
7597 lrhs = rhs2;
7598 use_operand_p use_p;
7599 imm_use_iterator iter;
7600 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7601 {
7602 gimple *use_stmt = USE_STMT (use_p);
7603 if (use_stmt == def_stmt || is_gimple_debug (gs: use_stmt))
7604 continue;
7605 if (other_store_stmt)
7606 goto fail;
7607 other_store_stmt = use_stmt;
7608 }
7609 }
7610
7611 if (other_store_stmt == NULL)
7612 goto fail;
7613 if (gimple_bb (g: other_store_stmt) != gimple_bb (g: stmt)
7614 || !gimple_store_p (gs: other_store_stmt))
7615 goto fail;
7616
7617 stmt_vec_info other_store_stmt_info
7618 = loop_vinfo->lookup_stmt (other_store_stmt);
7619 if (other_store_stmt_info == NULL
7620 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7621 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7622 goto fail;
7623
7624 gimple *stmt1 = stmt;
7625 gimple *stmt2 = other_store_stmt;
7626 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7627 std::swap (a&: stmt1, b&: stmt2);
7628 if (scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7629 ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7630 {
7631 std::swap (a&: rhs1, b&: rhs2);
7632 std::swap (a&: load1_stmt, b&: load2_stmt);
7633 std::swap (a&: load1_stmt_info, b&: load2_stmt_info);
7634 }
7635 if (!scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7636 ref2: gimple_assign_rhs1 (gs: load1_stmt)))
7637 goto fail;
7638
7639 tree var3 = NULL_TREE;
7640 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7641 && !scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt2),
7642 ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7643 goto fail;
7644 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7645 {
7646 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7647 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7648 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7649 goto fail;
7650 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7651 if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var3))
7652 || lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var3))
7653 || lookup_attribute (attr_name: "omp simd inscan exclusive",
7654 DECL_ATTRIBUTES (var3)))
7655 goto fail;
7656 }
7657
7658 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7659 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7660 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7661 goto fail;
7662
7663 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7664 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7665 if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var1))
7666 || !lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var2))
7667 || (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7668 == (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var2))))
7669 goto fail;
7670
7671 if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7672 std::swap (a&: var1, b&: var2);
7673
7674 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7675 {
7676 if (!lookup_attribute (attr_name: "omp simd inscan exclusive",
7677 DECL_ATTRIBUTES (var1)))
7678 goto fail;
7679 var1 = var3;
7680 }
7681
7682 if (loop_vinfo->scan_map == NULL)
7683 goto fail;
7684 tree *init = loop_vinfo->scan_map->get (k: var1);
7685 if (init == NULL)
7686 goto fail;
7687
7688 /* The IL is as expected, now check if we can actually vectorize it.
7689 Inclusive scan:
7690 _26 = D.2043[_25];
7691 _27 = D.2042[_25];
7692 _28 = _26 + _27;
7693 D.2043[_25] = _28;
7694 D.2042[_25] = _28;
7695 should be vectorized as (where _40 is the vectorized rhs
7696 from the D.2042[_21] = 0; store):
7697 _30 = MEM <vector(8) int> [(int *)&D.2043];
7698 _31 = MEM <vector(8) int> [(int *)&D.2042];
7699 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7700 _33 = _31 + _32;
7701 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7702 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7703 _35 = _33 + _34;
7704 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7705 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7706 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7707 _37 = _35 + _36;
7708 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7709 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7710 _38 = _30 + _37;
7711 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7712 MEM <vector(8) int> [(int *)&D.2043] = _39;
7713 MEM <vector(8) int> [(int *)&D.2042] = _38;
7714 Exclusive scan:
7715 _26 = D.2043[_25];
7716 D.2044[_25] = _26;
7717 _27 = D.2042[_25];
7718 _28 = _26 + _27;
7719 D.2043[_25] = _28;
7720 should be vectorized as (where _40 is the vectorized rhs
7721 from the D.2042[_21] = 0; store):
7722 _30 = MEM <vector(8) int> [(int *)&D.2043];
7723 _31 = MEM <vector(8) int> [(int *)&D.2042];
7724 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7725 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7726 _34 = _32 + _33;
7727 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7728 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7729 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7730 _36 = _34 + _35;
7731 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7732 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7733 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7734 _38 = _36 + _37;
7735 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7736 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7737 _39 = _30 + _38;
7738 _50 = _31 + _39;
7739 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7740 MEM <vector(8) int> [(int *)&D.2044] = _39;
7741 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7742 enum machine_mode vec_mode = TYPE_MODE (vectype);
7743 optab optab = optab_for_tree_code (code, vectype, optab_default);
7744 if (!optab || optab_handler (op: optab, mode: vec_mode) == CODE_FOR_nothing)
7745 goto fail;
7746
7747 int units_log2 = scan_store_can_perm_p (vectype, init: *init);
7748 if (units_log2 == -1)
7749 goto fail;
7750
7751 return true;
7752}
7753
7754
7755/* Function vectorizable_scan_store.
7756
7757 Helper of vectorizable_score, arguments like on vectorizable_store.
7758 Handle only the transformation, checking is done in check_scan_store. */
7759
7760static bool
7761vectorizable_scan_store (vec_info *vinfo,
7762 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7763 gimple **vec_stmt, int ncopies)
7764{
7765 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7766 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7767 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7768 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7769
7770 if (dump_enabled_p ())
7771 dump_printf_loc (MSG_NOTE, vect_location,
7772 "transform scan store. ncopies = %d\n", ncopies);
7773
7774 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7775 tree rhs = gimple_assign_rhs1 (gs: stmt);
7776 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7777
7778 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7779 bool inscan_var_store
7780 = lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7781
7782 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7783 {
7784 use_operand_p use_p;
7785 imm_use_iterator iter;
7786 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7787 {
7788 gimple *use_stmt = USE_STMT (use_p);
7789 if (use_stmt == stmt || is_gimple_debug (gs: use_stmt))
7790 continue;
7791 rhs = gimple_assign_lhs (gs: use_stmt);
7792 break;
7793 }
7794 }
7795
7796 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7797 enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7798 if (code == POINTER_PLUS_EXPR)
7799 code = PLUS_EXPR;
7800 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7801 && commutative_tree_code (code));
7802 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7803 tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7804 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7805 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7806 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7807 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7808 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7809 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7810 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7811 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7812 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7813
7814 if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7815 {
7816 std::swap (a&: rhs1, b&: rhs2);
7817 std::swap (a&: var1, b&: var2);
7818 std::swap (a&: load1_dr_info, b&: load2_dr_info);
7819 }
7820
7821 tree *init = loop_vinfo->scan_map->get (k: var1);
7822 gcc_assert (init);
7823
7824 unsigned HOST_WIDE_INT nunits;
7825 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7826 gcc_unreachable ();
7827 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7828 int units_log2 = scan_store_can_perm_p (vectype, init: *init, use_whole_vector: &use_whole_vector);
7829 gcc_assert (units_log2 > 0);
7830 auto_vec<tree, 16> perms;
7831 perms.quick_grow (len: units_log2 + 1);
7832 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7833 for (int i = 0; i <= units_log2; ++i)
7834 {
7835 unsigned HOST_WIDE_INT j, k;
7836 vec_perm_builder sel (nunits, nunits, 1);
7837 sel.quick_grow (len: nunits);
7838 if (i == units_log2)
7839 for (j = 0; j < nunits; ++j)
7840 sel[j] = nunits - 1;
7841 else
7842 {
7843 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7844 sel[j] = j;
7845 for (k = 0; j < nunits; ++j, ++k)
7846 sel[j] = nunits + k;
7847 }
7848 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7849 if (!use_whole_vector.is_empty ()
7850 && use_whole_vector[i] != scan_store_kind_perm)
7851 {
7852 if (zero_vec == NULL_TREE)
7853 zero_vec = build_zero_cst (vectype);
7854 if (masktype == NULL_TREE
7855 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7856 masktype = truth_type_for (vectype);
7857 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7858 }
7859 else
7860 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7861 }
7862
7863 tree vec_oprnd1 = NULL_TREE;
7864 tree vec_oprnd2 = NULL_TREE;
7865 tree vec_oprnd3 = NULL_TREE;
7866 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7867 tree dataref_offset = build_int_cst (ref_type, 0);
7868 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7869 aggr_type: vectype, memory_access_type: VMAT_CONTIGUOUS);
7870 tree ldataref_ptr = NULL_TREE;
7871 tree orig = NULL_TREE;
7872 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7873 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7874 auto_vec<tree> vec_oprnds1;
7875 auto_vec<tree> vec_oprnds2;
7876 auto_vec<tree> vec_oprnds3;
7877 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7878 op0: *init, vec_oprnds0: &vec_oprnds1,
7879 op1: ldataref_ptr == NULL ? rhs1 : NULL, vec_oprnds1: &vec_oprnds2,
7880 op2: rhs2, vec_oprnds2: &vec_oprnds3);
7881 for (int j = 0; j < ncopies; j++)
7882 {
7883 vec_oprnd1 = vec_oprnds1[j];
7884 if (ldataref_ptr == NULL)
7885 vec_oprnd2 = vec_oprnds2[j];
7886 vec_oprnd3 = vec_oprnds3[j];
7887 if (j == 0)
7888 orig = vec_oprnd3;
7889 else if (!inscan_var_store)
7890 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7891
7892 if (ldataref_ptr)
7893 {
7894 vec_oprnd2 = make_ssa_name (var: vectype);
7895 tree data_ref = fold_build2 (MEM_REF, vectype,
7896 unshare_expr (ldataref_ptr),
7897 dataref_offset);
7898 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7899 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7900 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7901 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7902 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7903 }
7904
7905 tree v = vec_oprnd2;
7906 for (int i = 0; i < units_log2; ++i)
7907 {
7908 tree new_temp = make_ssa_name (var: vectype);
7909 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7910 (zero_vec
7911 && (use_whole_vector[i]
7912 != scan_store_kind_perm))
7913 ? zero_vec : vec_oprnd1, v,
7914 perms[i]);
7915 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7916 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7917 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7918
7919 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7920 {
7921 /* Whole vector shift shifted in zero bits, but if *init
7922 is not initializer_zerop, we need to replace those elements
7923 with elements from vec_oprnd1. */
7924 tree_vector_builder vb (masktype, nunits, 1);
7925 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7926 vb.quick_push (obj: k < (HOST_WIDE_INT_1U << i)
7927 ? boolean_false_node : boolean_true_node);
7928
7929 tree new_temp2 = make_ssa_name (var: vectype);
7930 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7931 new_temp, vec_oprnd1);
7932 vect_finish_stmt_generation (vinfo, stmt_info,
7933 vec_stmt: g, gsi);
7934 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7935 new_temp = new_temp2;
7936 }
7937
7938 /* For exclusive scan, perform the perms[i] permutation once
7939 more. */
7940 if (i == 0
7941 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7942 && v == vec_oprnd2)
7943 {
7944 v = new_temp;
7945 --i;
7946 continue;
7947 }
7948
7949 tree new_temp2 = make_ssa_name (var: vectype);
7950 g = gimple_build_assign (new_temp2, code, v, new_temp);
7951 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7952 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7953
7954 v = new_temp2;
7955 }
7956
7957 tree new_temp = make_ssa_name (var: vectype);
7958 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7959 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7960 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7961
7962 tree last_perm_arg = new_temp;
7963 /* For exclusive scan, new_temp computed above is the exclusive scan
7964 prefix sum. Turn it into inclusive prefix sum for the broadcast
7965 of the last element into orig. */
7966 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7967 {
7968 last_perm_arg = make_ssa_name (var: vectype);
7969 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7970 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7971 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7972 }
7973
7974 orig = make_ssa_name (var: vectype);
7975 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7976 last_perm_arg, perms[units_log2]);
7977 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7978 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7979
7980 if (!inscan_var_store)
7981 {
7982 tree data_ref = fold_build2 (MEM_REF, vectype,
7983 unshare_expr (dataref_ptr),
7984 dataref_offset);
7985 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7986 g = gimple_build_assign (data_ref, new_temp);
7987 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7988 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7989 }
7990 }
7991
7992 if (inscan_var_store)
7993 for (int j = 0; j < ncopies; j++)
7994 {
7995 if (j != 0)
7996 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7997
7998 tree data_ref = fold_build2 (MEM_REF, vectype,
7999 unshare_expr (dataref_ptr),
8000 dataref_offset);
8001 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8002 gimple *g = gimple_build_assign (data_ref, orig);
8003 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8004 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8005 }
8006 return true;
8007}
8008
8009
8010/* Function vectorizable_store.
8011
8012 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8013 that can be vectorized.
8014 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8015 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8016 Return true if STMT_INFO is vectorizable in this way. */
8017
8018static bool
8019vectorizable_store (vec_info *vinfo,
8020 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8021 gimple **vec_stmt, slp_tree slp_node,
8022 stmt_vector_for_cost *cost_vec)
8023{
8024 tree data_ref;
8025 tree vec_oprnd = NULL_TREE;
8026 tree elem_type;
8027 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
8028 class loop *loop = NULL;
8029 machine_mode vec_mode;
8030 tree dummy;
8031 enum vect_def_type rhs_dt = vect_unknown_def_type;
8032 enum vect_def_type mask_dt = vect_unknown_def_type;
8033 tree dataref_ptr = NULL_TREE;
8034 tree dataref_offset = NULL_TREE;
8035 gimple *ptr_incr = NULL;
8036 int ncopies;
8037 int j;
8038 stmt_vec_info first_stmt_info;
8039 bool grouped_store;
8040 unsigned int group_size, i;
8041 bool slp = (slp_node != NULL);
8042 unsigned int vec_num;
8043 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
8044 tree aggr_type;
8045 gather_scatter_info gs_info;
8046 poly_uint64 vf;
8047 vec_load_store_type vls_type;
8048 tree ref_type;
8049
8050 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8051 return false;
8052
8053 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8054 && ! vec_stmt)
8055 return false;
8056
8057 /* Is vectorizable store? */
8058
8059 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8060 slp_tree mask_node = NULL;
8061 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
8062 {
8063 tree scalar_dest = gimple_assign_lhs (gs: assign);
8064 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8065 && is_pattern_stmt_p (stmt_info))
8066 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8067 if (TREE_CODE (scalar_dest) != ARRAY_REF
8068 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8069 && TREE_CODE (scalar_dest) != INDIRECT_REF
8070 && TREE_CODE (scalar_dest) != COMPONENT_REF
8071 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8072 && TREE_CODE (scalar_dest) != REALPART_EXPR
8073 && TREE_CODE (scalar_dest) != MEM_REF)
8074 return false;
8075 }
8076 else
8077 {
8078 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
8079 if (!call || !gimple_call_internal_p (gs: call))
8080 return false;
8081
8082 internal_fn ifn = gimple_call_internal_fn (gs: call);
8083 if (!internal_store_fn_p (ifn))
8084 return false;
8085
8086 int mask_index = internal_fn_mask_index (ifn);
8087 if (mask_index >= 0 && slp_node)
8088 mask_index = vect_slp_child_index_for_operand
8089 (call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8090 if (mask_index >= 0
8091 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8092 mask: &mask, mask_node: &mask_node, mask_dt_out: &mask_dt,
8093 mask_vectype_out: &mask_vectype))
8094 return false;
8095 }
8096
8097 /* Cannot have hybrid store SLP -- that would mean storing to the
8098 same location twice. */
8099 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8100
8101 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8102 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
8103
8104 if (loop_vinfo)
8105 {
8106 loop = LOOP_VINFO_LOOP (loop_vinfo);
8107 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8108 }
8109 else
8110 vf = 1;
8111
8112 /* Multiple types in SLP are handled by creating the appropriate number of
8113 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8114 case of SLP. */
8115 if (slp)
8116 ncopies = 1;
8117 else
8118 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8119
8120 gcc_assert (ncopies >= 1);
8121
8122 /* FORNOW. This restriction should be relaxed. */
8123 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8124 {
8125 if (dump_enabled_p ())
8126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8127 "multiple types in nested loop.\n");
8128 return false;
8129 }
8130
8131 tree op;
8132 slp_tree op_node;
8133 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8134 rhs: &op, rhs_node: &op_node, rhs_dt_out: &rhs_dt, rhs_vectype_out: &rhs_vectype, vls_type_out: &vls_type))
8135 return false;
8136
8137 elem_type = TREE_TYPE (vectype);
8138 vec_mode = TYPE_MODE (vectype);
8139
8140 if (!STMT_VINFO_DATA_REF (stmt_info))
8141 return false;
8142
8143 vect_memory_access_type memory_access_type;
8144 enum dr_alignment_support alignment_support_scheme;
8145 int misalignment;
8146 poly_int64 poffset;
8147 internal_fn lanes_ifn;
8148 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type,
8149 ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
8150 alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
8151 lanes_ifn: &lanes_ifn))
8152 return false;
8153
8154 if (mask)
8155 {
8156 if (memory_access_type == VMAT_CONTIGUOUS)
8157 {
8158 if (!VECTOR_MODE_P (vec_mode)
8159 || !can_vec_mask_load_store_p (vec_mode,
8160 TYPE_MODE (mask_vectype), false))
8161 return false;
8162 }
8163 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8164 && (memory_access_type != VMAT_GATHER_SCATTER
8165 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8166 {
8167 if (dump_enabled_p ())
8168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8169 "unsupported access type for masked store.\n");
8170 return false;
8171 }
8172 else if (memory_access_type == VMAT_GATHER_SCATTER
8173 && gs_info.ifn == IFN_LAST
8174 && !gs_info.decl)
8175 {
8176 if (dump_enabled_p ())
8177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8178 "unsupported masked emulated scatter.\n");
8179 return false;
8180 }
8181 }
8182 else
8183 {
8184 /* FORNOW. In some cases can vectorize even if data-type not supported
8185 (e.g. - array initialization with 0). */
8186 if (optab_handler (op: mov_optab, mode: vec_mode) == CODE_FOR_nothing)
8187 return false;
8188 }
8189
8190 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8191 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8192 && memory_access_type != VMAT_GATHER_SCATTER
8193 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8194 if (grouped_store)
8195 {
8196 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8197 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8198 group_size = DR_GROUP_SIZE (first_stmt_info);
8199 }
8200 else
8201 {
8202 first_stmt_info = stmt_info;
8203 first_dr_info = dr_info;
8204 group_size = vec_num = 1;
8205 }
8206
8207 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8208 {
8209 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8210 memory_access_type))
8211 return false;
8212 }
8213
8214 bool costing_p = !vec_stmt;
8215 if (costing_p) /* transformation not required. */
8216 {
8217 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8218
8219 if (loop_vinfo
8220 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8221 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8222 vls_type, group_size,
8223 memory_access_type, gs_info: &gs_info,
8224 scalar_mask: mask);
8225
8226 if (slp_node
8227 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8228 || (mask
8229 && !vect_maybe_update_slp_op_vectype (mask_node,
8230 mask_vectype))))
8231 {
8232 if (dump_enabled_p ())
8233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8234 "incompatible vector types for invariants\n");
8235 return false;
8236 }
8237
8238 if (dump_enabled_p ()
8239 && memory_access_type != VMAT_ELEMENTWISE
8240 && memory_access_type != VMAT_GATHER_SCATTER
8241 && alignment_support_scheme != dr_aligned)
8242 dump_printf_loc (MSG_NOTE, vect_location,
8243 "Vectorizing an unaligned access.\n");
8244
8245 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8246
8247 /* As function vect_transform_stmt shows, for interleaving stores
8248 the whole chain is vectorized when the last store in the chain
8249 is reached, the other stores in the group are skipped. So we
8250 want to only cost the last one here, but it's not trivial to
8251 get the last, as it's equivalent to use the first one for
8252 costing, use the first one instead. */
8253 if (grouped_store
8254 && !slp
8255 && first_stmt_info != stmt_info)
8256 return true;
8257 }
8258 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8259
8260 /* Transform. */
8261
8262 ensure_base_align (dr_info);
8263
8264 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8265 {
8266 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8267 gcc_assert (!slp);
8268 if (costing_p)
8269 {
8270 unsigned int inside_cost = 0, prologue_cost = 0;
8271 if (vls_type == VLS_STORE_INVARIANT)
8272 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec,
8273 stmt_info, misalign: 0, where: vect_prologue);
8274 vect_get_store_cost (vinfo, stmt_info, ncopies,
8275 alignment_support_scheme, misalignment,
8276 inside_cost: &inside_cost, body_cost_vec: cost_vec);
8277
8278 if (dump_enabled_p ())
8279 dump_printf_loc (MSG_NOTE, vect_location,
8280 "vect_model_store_cost: inside_cost = %d, "
8281 "prologue_cost = %d .\n",
8282 inside_cost, prologue_cost);
8283
8284 return true;
8285 }
8286 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8287 }
8288
8289 if (grouped_store)
8290 {
8291 /* FORNOW */
8292 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8293
8294 if (slp)
8295 {
8296 grouped_store = false;
8297 /* VEC_NUM is the number of vect stmts to be created for this
8298 group. */
8299 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8300 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8301 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8302 == first_stmt_info);
8303 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8304 op = vect_get_store_rhs (stmt_info: first_stmt_info);
8305 }
8306 else
8307 /* VEC_NUM is the number of vect stmts to be created for this
8308 group. */
8309 vec_num = group_size;
8310
8311 ref_type = get_group_alias_ptr_type (first_stmt_info);
8312 }
8313 else
8314 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8315
8316 if (!costing_p && dump_enabled_p ())
8317 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8318 ncopies);
8319
8320 /* Check if we need to update prologue cost for invariant,
8321 and update it accordingly if so. If it's not for
8322 interleaving store, we can just check vls_type; but if
8323 it's for interleaving store, need to check the def_type
8324 of the stored value since the current vls_type is just
8325 for first_stmt_info. */
8326 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8327 {
8328 gcc_assert (costing_p);
8329 if (slp)
8330 return;
8331 if (grouped_store)
8332 {
8333 gcc_assert (store_rhs);
8334 enum vect_def_type cdt;
8335 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8336 if (cdt != vect_constant_def && cdt != vect_external_def)
8337 return;
8338 }
8339 else if (vls_type != VLS_STORE_INVARIANT)
8340 return;
8341 *prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec, stmt_info,
8342 misalign: 0, where: vect_prologue);
8343 };
8344
8345 if (memory_access_type == VMAT_ELEMENTWISE
8346 || memory_access_type == VMAT_STRIDED_SLP)
8347 {
8348 unsigned inside_cost = 0, prologue_cost = 0;
8349 gimple_stmt_iterator incr_gsi;
8350 bool insert_after;
8351 gimple *incr;
8352 tree offvar;
8353 tree ivstep;
8354 tree running_off;
8355 tree stride_base, stride_step, alias_off;
8356 tree vec_oprnd = NULL_TREE;
8357 tree dr_offset;
8358 unsigned int g;
8359 /* Checked by get_load_store_type. */
8360 unsigned int const_nunits = nunits.to_constant ();
8361
8362 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8363 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8364
8365 dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
8366 stride_base
8367 = fold_build_pointer_plus
8368 (DR_BASE_ADDRESS (first_dr_info->dr),
8369 size_binop (PLUS_EXPR,
8370 convert_to_ptrofftype (dr_offset),
8371 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8372 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8373
8374 /* For a store with loop-invariant (but other than power-of-2)
8375 stride (i.e. not a grouped access) like so:
8376
8377 for (i = 0; i < n; i += stride)
8378 array[i] = ...;
8379
8380 we generate a new induction variable and new stores from
8381 the components of the (vectorized) rhs:
8382
8383 for (j = 0; ; j += VF*stride)
8384 vectemp = ...;
8385 tmp1 = vectemp[0];
8386 array[j] = tmp1;
8387 tmp2 = vectemp[1];
8388 array[j + stride] = tmp2;
8389 ...
8390 */
8391
8392 unsigned nstores = const_nunits;
8393 unsigned lnel = 1;
8394 tree ltype = elem_type;
8395 tree lvectype = vectype;
8396 if (slp)
8397 {
8398 if (group_size < const_nunits
8399 && const_nunits % group_size == 0)
8400 {
8401 nstores = const_nunits / group_size;
8402 lnel = group_size;
8403 ltype = build_vector_type (elem_type, group_size);
8404 lvectype = vectype;
8405
8406 /* First check if vec_extract optab doesn't support extraction
8407 of vector elts directly. */
8408 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8409 machine_mode vmode;
8410 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8411 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8412 group_size).exists (mode: &vmode)
8413 || (convert_optab_handler (op: vec_extract_optab,
8414 TYPE_MODE (vectype), from_mode: vmode)
8415 == CODE_FOR_nothing))
8416 {
8417 /* Try to avoid emitting an extract of vector elements
8418 by performing the extracts using an integer type of the
8419 same size, extracting from a vector of those and then
8420 re-interpreting it as the original vector type if
8421 supported. */
8422 unsigned lsize
8423 = group_size * GET_MODE_BITSIZE (mode: elmode);
8424 unsigned int lnunits = const_nunits / group_size;
8425 /* If we can't construct such a vector fall back to
8426 element extracts from the original vector type and
8427 element size stores. */
8428 if (int_mode_for_size (size: lsize, limit: 0).exists (mode: &elmode)
8429 && VECTOR_MODE_P (TYPE_MODE (vectype))
8430 && related_vector_mode (TYPE_MODE (vectype), elmode,
8431 lnunits).exists (mode: &vmode)
8432 && (convert_optab_handler (op: vec_extract_optab,
8433 to_mode: vmode, from_mode: elmode)
8434 != CODE_FOR_nothing))
8435 {
8436 nstores = lnunits;
8437 lnel = group_size;
8438 ltype = build_nonstandard_integer_type (lsize, 1);
8439 lvectype = build_vector_type (ltype, nstores);
8440 }
8441 /* Else fall back to vector extraction anyway.
8442 Fewer stores are more important than avoiding spilling
8443 of the vector we extract from. Compared to the
8444 construction case in vectorizable_load no store-forwarding
8445 issue exists here for reasonable archs. */
8446 }
8447 }
8448 else if (group_size >= const_nunits
8449 && group_size % const_nunits == 0)
8450 {
8451 int mis_align = dr_misalignment (dr_info: first_dr_info, vectype);
8452 dr_alignment_support dr_align
8453 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8454 mis_align);
8455 if (dr_align == dr_aligned
8456 || dr_align == dr_unaligned_supported)
8457 {
8458 nstores = 1;
8459 lnel = const_nunits;
8460 ltype = vectype;
8461 lvectype = vectype;
8462 alignment_support_scheme = dr_align;
8463 misalignment = mis_align;
8464 }
8465 }
8466 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8467 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8468 }
8469
8470 if (!costing_p)
8471 {
8472 ivstep = stride_step;
8473 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8474 build_int_cst (TREE_TYPE (ivstep), vf));
8475
8476 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8477
8478 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8479 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8480 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8481 insert_after, &offvar, NULL);
8482 incr = gsi_stmt (i: incr_gsi);
8483
8484 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8485 }
8486
8487 alias_off = build_int_cst (ref_type, 0);
8488 stmt_vec_info next_stmt_info = first_stmt_info;
8489 auto_vec<tree> vec_oprnds (ncopies);
8490 /* For costing some adjacent vector stores, we'd like to cost with
8491 the total number of them once instead of cost each one by one. */
8492 unsigned int n_adjacent_stores = 0;
8493 for (g = 0; g < group_size; g++)
8494 {
8495 running_off = offvar;
8496 if (!costing_p)
8497 {
8498 if (g)
8499 {
8500 tree size = TYPE_SIZE_UNIT (ltype);
8501 tree pos
8502 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8503 tree newoff = copy_ssa_name (var: running_off, NULL);
8504 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8505 running_off, pos);
8506 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8507 running_off = newoff;
8508 }
8509 }
8510 if (!slp)
8511 op = vect_get_store_rhs (stmt_info: next_stmt_info);
8512 if (!costing_p)
8513 vect_get_vec_defs (vinfo, stmt_info: next_stmt_info, slp_node, ncopies, op0: op,
8514 vec_oprnds0: &vec_oprnds);
8515 else
8516 update_prologue_cost (&prologue_cost, op);
8517 unsigned int group_el = 0;
8518 unsigned HOST_WIDE_INT
8519 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8520 for (j = 0; j < ncopies; j++)
8521 {
8522 if (!costing_p)
8523 {
8524 vec_oprnd = vec_oprnds[j];
8525 /* Pun the vector to extract from if necessary. */
8526 if (lvectype != vectype)
8527 {
8528 tree tem = make_ssa_name (var: lvectype);
8529 tree cvt
8530 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8531 gimple *pun = gimple_build_assign (tem, cvt);
8532 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: pun, gsi);
8533 vec_oprnd = tem;
8534 }
8535 }
8536 for (i = 0; i < nstores; i++)
8537 {
8538 if (costing_p)
8539 {
8540 /* Only need vector extracting when there are more
8541 than one stores. */
8542 if (nstores > 1)
8543 inside_cost
8544 += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_to_scalar,
8545 stmt_info, misalign: 0, where: vect_body);
8546 /* Take a single lane vector type store as scalar
8547 store to avoid ICE like 110776. */
8548 if (VECTOR_TYPE_P (ltype)
8549 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8550 n_adjacent_stores++;
8551 else
8552 inside_cost
8553 += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_store,
8554 stmt_info, misalign: 0, where: vect_body);
8555 continue;
8556 }
8557 tree newref, newoff;
8558 gimple *incr, *assign;
8559 tree size = TYPE_SIZE (ltype);
8560 /* Extract the i'th component. */
8561 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8562 bitsize_int (i), size);
8563 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8564 size, pos);
8565
8566 elem = force_gimple_operand_gsi (gsi, elem, true,
8567 NULL_TREE, true,
8568 GSI_SAME_STMT);
8569
8570 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8571 group_el * elsz);
8572 newref = build2 (MEM_REF, ltype,
8573 running_off, this_off);
8574 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8575
8576 /* And store it to *running_off. */
8577 assign = gimple_build_assign (newref, elem);
8578 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: assign, gsi);
8579
8580 group_el += lnel;
8581 if (! slp
8582 || group_el == group_size)
8583 {
8584 newoff = copy_ssa_name (var: running_off, NULL);
8585 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8586 running_off, stride_step);
8587 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8588
8589 running_off = newoff;
8590 group_el = 0;
8591 }
8592 if (g == group_size - 1
8593 && !slp)
8594 {
8595 if (j == 0 && i == 0)
8596 *vec_stmt = assign;
8597 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: assign);
8598 }
8599 }
8600 }
8601 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8602 vec_oprnds.truncate(size: 0);
8603 if (slp)
8604 break;
8605 }
8606
8607 if (costing_p)
8608 {
8609 if (n_adjacent_stores > 0)
8610 vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8611 alignment_support_scheme, misalignment,
8612 inside_cost: &inside_cost, body_cost_vec: cost_vec);
8613 if (dump_enabled_p ())
8614 dump_printf_loc (MSG_NOTE, vect_location,
8615 "vect_model_store_cost: inside_cost = %d, "
8616 "prologue_cost = %d .\n",
8617 inside_cost, prologue_cost);
8618 }
8619
8620 return true;
8621 }
8622
8623 gcc_assert (alignment_support_scheme);
8624 vec_loop_masks *loop_masks
8625 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8626 ? &LOOP_VINFO_MASKS (loop_vinfo)
8627 : NULL);
8628 vec_loop_lens *loop_lens
8629 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8630 ? &LOOP_VINFO_LENS (loop_vinfo)
8631 : NULL);
8632
8633 /* Shouldn't go with length-based approach if fully masked. */
8634 gcc_assert (!loop_lens || !loop_masks);
8635
8636 /* Targets with store-lane instructions must not require explicit
8637 realignment. vect_supportable_dr_alignment always returns either
8638 dr_aligned or dr_unaligned_supported for masked operations. */
8639 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8640 && !mask
8641 && !loop_masks)
8642 || alignment_support_scheme == dr_aligned
8643 || alignment_support_scheme == dr_unaligned_supported);
8644
8645 tree offset = NULL_TREE;
8646 if (!known_eq (poffset, 0))
8647 offset = size_int (poffset);
8648
8649 tree bump;
8650 tree vec_offset = NULL_TREE;
8651 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8652 {
8653 aggr_type = NULL_TREE;
8654 bump = NULL_TREE;
8655 }
8656 else if (memory_access_type == VMAT_GATHER_SCATTER)
8657 {
8658 aggr_type = elem_type;
8659 if (!costing_p)
8660 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
8661 dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
8662 }
8663 else
8664 {
8665 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8666 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8667 else
8668 aggr_type = vectype;
8669 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8670 memory_access_type, loop_lens);
8671 }
8672
8673 if (mask && !costing_p)
8674 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8675
8676 /* In case the vectorization factor (VF) is bigger than the number
8677 of elements that we can fit in a vectype (nunits), we have to generate
8678 more than one vector stmt - i.e - we need to "unroll" the
8679 vector stmt by a factor VF/nunits. */
8680
8681 /* In case of interleaving (non-unit grouped access):
8682
8683 S1: &base + 2 = x2
8684 S2: &base = x0
8685 S3: &base + 1 = x1
8686 S4: &base + 3 = x3
8687
8688 We create vectorized stores starting from base address (the access of the
8689 first stmt in the chain (S2 in the above example), when the last store stmt
8690 of the chain (S4) is reached:
8691
8692 VS1: &base = vx2
8693 VS2: &base + vec_size*1 = vx0
8694 VS3: &base + vec_size*2 = vx1
8695 VS4: &base + vec_size*3 = vx3
8696
8697 Then permutation statements are generated:
8698
8699 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8700 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8701 ...
8702
8703 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8704 (the order of the data-refs in the output of vect_permute_store_chain
8705 corresponds to the order of scalar stmts in the interleaving chain - see
8706 the documentation of vect_permute_store_chain()).
8707
8708 In case of both multiple types and interleaving, above vector stores and
8709 permutation stmts are created for every copy. The result vector stmts are
8710 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8711 STMT_VINFO_RELATED_STMT for the next copies.
8712 */
8713
8714 auto_vec<tree> dr_chain (group_size);
8715 auto_vec<tree> vec_masks;
8716 tree vec_mask = NULL;
8717 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8718 for (i = 0; i < group_size; i++)
8719 gvec_oprnds.quick_push (obj: new auto_vec<tree> (ncopies));
8720
8721 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8722 {
8723 gcc_assert (!slp && grouped_store);
8724 unsigned inside_cost = 0, prologue_cost = 0;
8725 /* For costing some adjacent vector stores, we'd like to cost with
8726 the total number of them once instead of cost each one by one. */
8727 unsigned int n_adjacent_stores = 0;
8728 for (j = 0; j < ncopies; j++)
8729 {
8730 gimple *new_stmt;
8731 if (j == 0)
8732 {
8733 /* For interleaved stores we collect vectorized defs for all
8734 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8735 as an input to vect_permute_store_chain(). */
8736 stmt_vec_info next_stmt_info = first_stmt_info;
8737 for (i = 0; i < group_size; i++)
8738 {
8739 /* Since gaps are not supported for interleaved stores,
8740 DR_GROUP_SIZE is the exact number of stmts in the
8741 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8742 op = vect_get_store_rhs (stmt_info: next_stmt_info);
8743 if (costing_p)
8744 update_prologue_cost (&prologue_cost, op);
8745 else
8746 {
8747 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
8748 ncopies, op,
8749 vec_oprnds: gvec_oprnds[i]);
8750 vec_oprnd = (*gvec_oprnds[i])[0];
8751 dr_chain.quick_push (obj: vec_oprnd);
8752 }
8753 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8754 }
8755
8756 if (!costing_p)
8757 {
8758 if (mask)
8759 {
8760 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
8761 op: mask, vec_oprnds: &vec_masks,
8762 vectype: mask_vectype);
8763 vec_mask = vec_masks[0];
8764 }
8765
8766 /* We should have catched mismatched types earlier. */
8767 gcc_assert (
8768 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8769 dataref_ptr
8770 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8771 aggr_type, NULL, offset, &dummy,
8772 gsi, &ptr_incr, false, bump);
8773 }
8774 }
8775 else if (!costing_p)
8776 {
8777 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8778 /* DR_CHAIN is then used as an input to
8779 vect_permute_store_chain(). */
8780 for (i = 0; i < group_size; i++)
8781 {
8782 vec_oprnd = (*gvec_oprnds[i])[j];
8783 dr_chain[i] = vec_oprnd;
8784 }
8785 if (mask)
8786 vec_mask = vec_masks[j];
8787 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8788 stmt_info, bump);
8789 }
8790
8791 if (costing_p)
8792 {
8793 n_adjacent_stores += vec_num;
8794 continue;
8795 }
8796
8797 /* Get an array into which we can store the individual vectors. */
8798 tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
8799
8800 /* Invalidate the current contents of VEC_ARRAY. This should
8801 become an RTL clobber too, which prevents the vector registers
8802 from being upward-exposed. */
8803 vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8804
8805 /* Store the individual vectors into the array. */
8806 for (i = 0; i < vec_num; i++)
8807 {
8808 vec_oprnd = dr_chain[i];
8809 write_vector_array (vinfo, stmt_info, gsi, vect: vec_oprnd, array: vec_array,
8810 n: i);
8811 }
8812
8813 tree final_mask = NULL;
8814 tree final_len = NULL;
8815 tree bias = NULL;
8816 if (loop_masks)
8817 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8818 ncopies, vectype, j);
8819 if (vec_mask)
8820 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
8821 vec_mask, gsi);
8822
8823 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8824 {
8825 if (loop_lens)
8826 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8827 ncopies, vectype, j, 1);
8828 else
8829 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8830 signed char biasval
8831 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8832 bias = build_int_cst (intQI_type_node, biasval);
8833 if (!final_mask)
8834 {
8835 mask_vectype = truth_type_for (vectype);
8836 final_mask = build_minus_one_cst (mask_vectype);
8837 }
8838 }
8839
8840 gcall *call;
8841 if (final_len && final_mask)
8842 {
8843 /* Emit:
8844 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8845 LEN, BIAS, VEC_ARRAY). */
8846 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8847 tree alias_ptr = build_int_cst (ref_type, align);
8848 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8849 dataref_ptr, alias_ptr,
8850 final_mask, final_len, bias,
8851 vec_array);
8852 }
8853 else if (final_mask)
8854 {
8855 /* Emit:
8856 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8857 VEC_ARRAY). */
8858 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8859 tree alias_ptr = build_int_cst (ref_type, align);
8860 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8861 dataref_ptr, alias_ptr,
8862 final_mask, vec_array);
8863 }
8864 else
8865 {
8866 /* Emit:
8867 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8868 data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
8869 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8870 gimple_call_set_lhs (gs: call, lhs: data_ref);
8871 }
8872 gimple_call_set_nothrow (s: call, nothrow_p: true);
8873 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
8874 new_stmt = call;
8875
8876 /* Record that VEC_ARRAY is now dead. */
8877 vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8878 if (j == 0)
8879 *vec_stmt = new_stmt;
8880 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
8881 }
8882
8883 if (costing_p)
8884 {
8885 if (n_adjacent_stores > 0)
8886 vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8887 alignment_support_scheme, misalignment,
8888 inside_cost: &inside_cost, body_cost_vec: cost_vec);
8889 if (dump_enabled_p ())
8890 dump_printf_loc (MSG_NOTE, vect_location,
8891 "vect_model_store_cost: inside_cost = %d, "
8892 "prologue_cost = %d .\n",
8893 inside_cost, prologue_cost);
8894 }
8895
8896 return true;
8897 }
8898
8899 if (memory_access_type == VMAT_GATHER_SCATTER)
8900 {
8901 gcc_assert (!grouped_store);
8902 auto_vec<tree> vec_offsets;
8903 unsigned int inside_cost = 0, prologue_cost = 0;
8904 for (j = 0; j < ncopies; j++)
8905 {
8906 gimple *new_stmt;
8907 if (j == 0)
8908 {
8909 if (costing_p && vls_type == VLS_STORE_INVARIANT)
8910 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec,
8911 stmt_info, misalign: 0, where: vect_prologue);
8912 else if (!costing_p)
8913 {
8914 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8915 DR_CHAIN is of size 1. */
8916 gcc_assert (group_size == 1);
8917 if (slp_node)
8918 vect_get_slp_defs (op_node, gvec_oprnds[0]);
8919 else
8920 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: first_stmt_info,
8921 ncopies, op, vec_oprnds: gvec_oprnds[0]);
8922 if (mask)
8923 {
8924 if (slp_node)
8925 vect_get_slp_defs (mask_node, &vec_masks);
8926 else
8927 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
8928 ncopies,
8929 op: mask, vec_oprnds: &vec_masks,
8930 vectype: mask_vectype);
8931 }
8932
8933 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8934 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8935 slp_node, gs_info: &gs_info,
8936 dataref_ptr: &dataref_ptr, vec_offset: &vec_offsets);
8937 else
8938 dataref_ptr
8939 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8940 aggr_type, NULL, offset,
8941 &dummy, gsi, &ptr_incr, false,
8942 bump);
8943 }
8944 }
8945 else if (!costing_p)
8946 {
8947 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8948 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8949 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8950 gsi, stmt_info, bump);
8951 }
8952
8953 new_stmt = NULL;
8954 for (i = 0; i < vec_num; ++i)
8955 {
8956 if (!costing_p)
8957 {
8958 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
8959 if (mask)
8960 vec_mask = vec_masks[vec_num * j + i];
8961 /* We should have catched mismatched types earlier. */
8962 gcc_assert (useless_type_conversion_p (vectype,
8963 TREE_TYPE (vec_oprnd)));
8964 }
8965 unsigned HOST_WIDE_INT align;
8966 tree final_mask = NULL_TREE;
8967 tree final_len = NULL_TREE;
8968 tree bias = NULL_TREE;
8969 if (!costing_p)
8970 {
8971 if (loop_masks)
8972 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
8973 loop_masks, ncopies,
8974 vectype, j);
8975 if (vec_mask)
8976 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
8977 loop_mask: final_mask, vec_mask, gsi);
8978 }
8979
8980 if (gs_info.ifn != IFN_LAST)
8981 {
8982 if (costing_p)
8983 {
8984 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
8985 inside_cost
8986 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
8987 stmt_info, misalign: 0, where: vect_body);
8988 continue;
8989 }
8990
8991 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8992 vec_offset = vec_offsets[vec_num * j + i];
8993 tree scale = size_int (gs_info.scale);
8994
8995 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
8996 {
8997 if (loop_lens)
8998 final_len = vect_get_loop_len (loop_vinfo, gsi,
8999 loop_lens, ncopies,
9000 vectype, j, 1);
9001 else
9002 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9003 signed char biasval
9004 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9005 bias = build_int_cst (intQI_type_node, biasval);
9006 if (!final_mask)
9007 {
9008 mask_vectype = truth_type_for (vectype);
9009 final_mask = build_minus_one_cst (mask_vectype);
9010 }
9011 }
9012
9013 gcall *call;
9014 if (final_len && final_mask)
9015 call = gimple_build_call_internal
9016 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9017 vec_offset, scale, vec_oprnd, final_mask,
9018 final_len, bias);
9019 else if (final_mask)
9020 call = gimple_build_call_internal
9021 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9022 vec_offset, scale, vec_oprnd, final_mask);
9023 else
9024 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9025 dataref_ptr, vec_offset,
9026 scale, vec_oprnd);
9027 gimple_call_set_nothrow (s: call, nothrow_p: true);
9028 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9029 new_stmt = call;
9030 }
9031 else if (gs_info.decl)
9032 {
9033 /* The builtin decls path for scatter is legacy, x86 only. */
9034 gcc_assert (nunits.is_constant ()
9035 && (!final_mask
9036 || SCALAR_INT_MODE_P
9037 (TYPE_MODE (TREE_TYPE (final_mask)))));
9038 if (costing_p)
9039 {
9040 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9041 inside_cost
9042 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9043 stmt_info, misalign: 0, where: vect_body);
9044 continue;
9045 }
9046 poly_uint64 offset_nunits
9047 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
9048 if (known_eq (nunits, offset_nunits))
9049 {
9050 new_stmt = vect_build_one_scatter_store_call
9051 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9052 ptr: dataref_ptr, offset: vec_offsets[vec_num * j + i],
9053 oprnd: vec_oprnd, mask: final_mask);
9054 vect_finish_stmt_generation (vinfo, stmt_info,
9055 vec_stmt: new_stmt, gsi);
9056 }
9057 else if (known_eq (nunits, offset_nunits * 2))
9058 {
9059 /* We have a offset vector with half the number of
9060 lanes but the builtins will store full vectype
9061 data from the lower lanes. */
9062 new_stmt = vect_build_one_scatter_store_call
9063 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9064 ptr: dataref_ptr,
9065 offset: vec_offsets[2 * vec_num * j + 2 * i],
9066 oprnd: vec_oprnd, mask: final_mask);
9067 vect_finish_stmt_generation (vinfo, stmt_info,
9068 vec_stmt: new_stmt, gsi);
9069 int count = nunits.to_constant ();
9070 vec_perm_builder sel (count, count, 1);
9071 sel.quick_grow (len: count);
9072 for (int i = 0; i < count; ++i)
9073 sel[i] = i | (count / 2);
9074 vec_perm_indices indices (sel, 2, count);
9075 tree perm_mask
9076 = vect_gen_perm_mask_checked (vectype, indices);
9077 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9078 vec_oprnd, vec_oprnd,
9079 perm_mask);
9080 vec_oprnd = make_ssa_name (var: vectype);
9081 gimple_set_lhs (new_stmt, vec_oprnd);
9082 vect_finish_stmt_generation (vinfo, stmt_info,
9083 vec_stmt: new_stmt, gsi);
9084 if (final_mask)
9085 {
9086 new_stmt = gimple_build_assign (NULL_TREE,
9087 VEC_UNPACK_HI_EXPR,
9088 final_mask);
9089 final_mask = make_ssa_name
9090 (var: truth_type_for (gs_info.offset_vectype));
9091 gimple_set_lhs (new_stmt, final_mask);
9092 vect_finish_stmt_generation (vinfo, stmt_info,
9093 vec_stmt: new_stmt, gsi);
9094 }
9095 new_stmt = vect_build_one_scatter_store_call
9096 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9097 ptr: dataref_ptr,
9098 offset: vec_offsets[2 * vec_num * j + 2 * i + 1],
9099 oprnd: vec_oprnd, mask: final_mask);
9100 vect_finish_stmt_generation (vinfo, stmt_info,
9101 vec_stmt: new_stmt, gsi);
9102 }
9103 else if (known_eq (nunits * 2, offset_nunits))
9104 {
9105 /* We have a offset vector with double the number of
9106 lanes. Select the low/high part accordingly. */
9107 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9108 if ((vec_num * j + i) & 1)
9109 {
9110 int count = offset_nunits.to_constant ();
9111 vec_perm_builder sel (count, count, 1);
9112 sel.quick_grow (len: count);
9113 for (int i = 0; i < count; ++i)
9114 sel[i] = i | (count / 2);
9115 vec_perm_indices indices (sel, 2, count);
9116 tree perm_mask = vect_gen_perm_mask_checked
9117 (TREE_TYPE (vec_offset), indices);
9118 new_stmt = gimple_build_assign (NULL_TREE,
9119 VEC_PERM_EXPR,
9120 vec_offset,
9121 vec_offset,
9122 perm_mask);
9123 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9124 gimple_set_lhs (new_stmt, vec_offset);
9125 vect_finish_stmt_generation (vinfo, stmt_info,
9126 vec_stmt: new_stmt, gsi);
9127 }
9128 new_stmt = vect_build_one_scatter_store_call
9129 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9130 ptr: dataref_ptr, offset: vec_offset,
9131 oprnd: vec_oprnd, mask: final_mask);
9132 vect_finish_stmt_generation (vinfo, stmt_info,
9133 vec_stmt: new_stmt, gsi);
9134 }
9135 else
9136 gcc_unreachable ();
9137 }
9138 else
9139 {
9140 /* Emulated scatter. */
9141 gcc_assert (!final_mask);
9142 if (costing_p)
9143 {
9144 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9145 /* For emulated scatter N offset vector element extracts
9146 (we assume the scalar scaling and ptr + offset add is
9147 consumed by the load). */
9148 inside_cost
9149 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9150 stmt_info, misalign: 0, where: vect_body);
9151 /* N scalar stores plus extracting the elements. */
9152 inside_cost
9153 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9154 stmt_info, misalign: 0, where: vect_body);
9155 inside_cost
9156 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9157 stmt_info, misalign: 0, where: vect_body);
9158 continue;
9159 }
9160
9161 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9162 unsigned HOST_WIDE_INT const_offset_nunits
9163 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype).to_constant ();
9164 vec<constructor_elt, va_gc> *ctor_elts;
9165 vec_alloc (v&: ctor_elts, nelems: const_nunits);
9166 gimple_seq stmts = NULL;
9167 tree elt_type = TREE_TYPE (vectype);
9168 unsigned HOST_WIDE_INT elt_size
9169 = tree_to_uhwi (TYPE_SIZE (elt_type));
9170 /* We support offset vectors with more elements
9171 than the data vector for now. */
9172 unsigned HOST_WIDE_INT factor
9173 = const_offset_nunits / const_nunits;
9174 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9175 unsigned elt_offset = (j % factor) * const_nunits;
9176 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9177 tree scale = size_int (gs_info.scale);
9178 align = get_object_alignment (DR_REF (first_dr_info->dr));
9179 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9180 for (unsigned k = 0; k < const_nunits; ++k)
9181 {
9182 /* Compute the offsetted pointer. */
9183 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9184 bitsize_int (k + elt_offset));
9185 tree idx
9186 = gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
9187 ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
9188 idx = gimple_convert (seq: &stmts, sizetype, op: idx);
9189 idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype,
9190 ops: idx, ops: scale);
9191 tree ptr
9192 = gimple_build (seq: &stmts, code: PLUS_EXPR,
9193 TREE_TYPE (dataref_ptr),
9194 ops: dataref_ptr, ops: idx);
9195 ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
9196 /* Extract the element to be stored. */
9197 tree elt
9198 = gimple_build (seq: &stmts, code: BIT_FIELD_REF,
9199 TREE_TYPE (vectype),
9200 ops: vec_oprnd, TYPE_SIZE (elt_type),
9201 bitsize_int (k * elt_size));
9202 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9203 stmts = NULL;
9204 tree ref
9205 = build2 (MEM_REF, ltype, ptr,
9206 build_int_cst (ref_type, 0));
9207 new_stmt = gimple_build_assign (ref, elt);
9208 vect_finish_stmt_generation (vinfo, stmt_info,
9209 vec_stmt: new_stmt, gsi);
9210 }
9211 if (slp)
9212 slp_node->push_vec_def (def: new_stmt);
9213 }
9214 }
9215 if (!slp && !costing_p)
9216 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9217 }
9218
9219 if (!slp && !costing_p)
9220 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9221
9222 if (costing_p && dump_enabled_p ())
9223 dump_printf_loc (MSG_NOTE, vect_location,
9224 "vect_model_store_cost: inside_cost = %d, "
9225 "prologue_cost = %d .\n",
9226 inside_cost, prologue_cost);
9227
9228 return true;
9229 }
9230
9231 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9232 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9233 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9234 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9235
9236 unsigned inside_cost = 0, prologue_cost = 0;
9237 /* For costing some adjacent vector stores, we'd like to cost with
9238 the total number of them once instead of cost each one by one. */
9239 unsigned int n_adjacent_stores = 0;
9240 auto_vec<tree> result_chain (group_size);
9241 auto_vec<tree, 1> vec_oprnds;
9242 for (j = 0; j < ncopies; j++)
9243 {
9244 gimple *new_stmt;
9245 if (j == 0)
9246 {
9247 if (slp && !costing_p)
9248 {
9249 /* Get vectorized arguments for SLP_NODE. */
9250 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: 1, op0: op,
9251 vec_oprnds0: &vec_oprnds, op1: mask, vec_oprnds1: &vec_masks);
9252 vec_oprnd = vec_oprnds[0];
9253 if (mask)
9254 vec_mask = vec_masks[0];
9255 }
9256 else
9257 {
9258 /* For interleaved stores we collect vectorized defs for all the
9259 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9260 input to vect_permute_store_chain().
9261
9262 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9263 is of size 1. */
9264 stmt_vec_info next_stmt_info = first_stmt_info;
9265 for (i = 0; i < group_size; i++)
9266 {
9267 /* Since gaps are not supported for interleaved stores,
9268 DR_GROUP_SIZE is the exact number of stmts in the chain.
9269 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9270 that there is no interleaving, DR_GROUP_SIZE is 1,
9271 and only one iteration of the loop will be executed. */
9272 op = vect_get_store_rhs (stmt_info: next_stmt_info);
9273 if (costing_p)
9274 update_prologue_cost (&prologue_cost, op);
9275 else
9276 {
9277 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
9278 ncopies, op,
9279 vec_oprnds: gvec_oprnds[i]);
9280 vec_oprnd = (*gvec_oprnds[i])[0];
9281 dr_chain.quick_push (obj: vec_oprnd);
9282 }
9283 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9284 }
9285 if (mask && !costing_p)
9286 {
9287 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
9288 op: mask, vec_oprnds: &vec_masks,
9289 vectype: mask_vectype);
9290 vec_mask = vec_masks[0];
9291 }
9292 }
9293
9294 /* We should have catched mismatched types earlier. */
9295 gcc_assert (costing_p
9296 || useless_type_conversion_p (vectype,
9297 TREE_TYPE (vec_oprnd)));
9298 bool simd_lane_access_p
9299 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9300 if (!costing_p
9301 && simd_lane_access_p
9302 && !loop_masks
9303 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9304 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9305 && integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
9306 && integer_zerop (DR_INIT (first_dr_info->dr))
9307 && alias_sets_conflict_p (get_alias_set (aggr_type),
9308 get_alias_set (TREE_TYPE (ref_type))))
9309 {
9310 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9311 dataref_offset = build_int_cst (ref_type, 0);
9312 }
9313 else if (!costing_p)
9314 dataref_ptr
9315 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9316 simd_lane_access_p ? loop : NULL,
9317 offset, &dummy, gsi, &ptr_incr,
9318 simd_lane_access_p, bump);
9319 }
9320 else if (!costing_p)
9321 {
9322 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9323 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9324 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9325 of size 1. */
9326 for (i = 0; i < group_size; i++)
9327 {
9328 vec_oprnd = (*gvec_oprnds[i])[j];
9329 dr_chain[i] = vec_oprnd;
9330 }
9331 if (mask)
9332 vec_mask = vec_masks[j];
9333 if (dataref_offset)
9334 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9335 else
9336 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9337 stmt_info, bump);
9338 }
9339
9340 new_stmt = NULL;
9341 if (grouped_store)
9342 {
9343 /* Permute. */
9344 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9345 if (costing_p)
9346 {
9347 int group_size = DR_GROUP_SIZE (first_stmt_info);
9348 int nstmts = ceil_log2 (x: group_size) * group_size;
9349 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
9350 stmt_info, misalign: 0, where: vect_body);
9351 if (dump_enabled_p ())
9352 dump_printf_loc (MSG_NOTE, vect_location,
9353 "vect_model_store_cost: "
9354 "strided group_size = %d .\n",
9355 group_size);
9356 }
9357 else
9358 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9359 gsi, &result_chain);
9360 }
9361
9362 stmt_vec_info next_stmt_info = first_stmt_info;
9363 for (i = 0; i < vec_num; i++)
9364 {
9365 if (!costing_p)
9366 {
9367 if (slp)
9368 vec_oprnd = vec_oprnds[i];
9369 else if (grouped_store)
9370 /* For grouped stores vectorized defs are interleaved in
9371 vect_permute_store_chain(). */
9372 vec_oprnd = result_chain[i];
9373 }
9374
9375 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9376 {
9377 if (costing_p)
9378 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_perm,
9379 stmt_info, misalign: 0, where: vect_body);
9380 else
9381 {
9382 tree perm_mask = perm_mask_for_reverse (vectype);
9383 tree perm_dest = vect_create_destination_var (
9384 vect_get_store_rhs (stmt_info), vectype);
9385 tree new_temp = make_ssa_name (var: perm_dest);
9386
9387 /* Generate the permute statement. */
9388 gimple *perm_stmt
9389 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9390 vec_oprnd, perm_mask);
9391 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt,
9392 gsi);
9393
9394 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9395 vec_oprnd = new_temp;
9396 }
9397 }
9398
9399 if (costing_p)
9400 {
9401 n_adjacent_stores++;
9402
9403 if (!slp)
9404 {
9405 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9406 if (!next_stmt_info)
9407 break;
9408 }
9409
9410 continue;
9411 }
9412
9413 tree final_mask = NULL_TREE;
9414 tree final_len = NULL_TREE;
9415 tree bias = NULL_TREE;
9416 if (loop_masks)
9417 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9418 vec_num * ncopies, vectype,
9419 vec_num * j + i);
9420 if (slp && vec_mask)
9421 vec_mask = vec_masks[i];
9422 if (vec_mask)
9423 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
9424 vec_mask, gsi);
9425
9426 if (i > 0)
9427 /* Bump the vector pointer. */
9428 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9429 stmt_info, bump);
9430
9431 unsigned misalign;
9432 unsigned HOST_WIDE_INT align;
9433 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9434 if (alignment_support_scheme == dr_aligned)
9435 misalign = 0;
9436 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9437 {
9438 align = dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
9439 misalign = 0;
9440 }
9441 else
9442 misalign = misalignment;
9443 if (dataref_offset == NULL_TREE
9444 && TREE_CODE (dataref_ptr) == SSA_NAME)
9445 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9446 misalign);
9447 align = least_bit_hwi (x: misalign | align);
9448
9449 /* Compute IFN when LOOP_LENS or final_mask valid. */
9450 machine_mode vmode = TYPE_MODE (vectype);
9451 machine_mode new_vmode = vmode;
9452 internal_fn partial_ifn = IFN_LAST;
9453 if (loop_lens)
9454 {
9455 opt_machine_mode new_ovmode
9456 = get_len_load_store_mode (vmode, false, &partial_ifn);
9457 new_vmode = new_ovmode.require ();
9458 unsigned factor
9459 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9460 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9461 vec_num * ncopies, vectype,
9462 vec_num * j + i, factor);
9463 }
9464 else if (final_mask)
9465 {
9466 if (!can_vec_mask_load_store_p (
9467 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9468 &partial_ifn))
9469 gcc_unreachable ();
9470 }
9471
9472 if (partial_ifn == IFN_MASK_LEN_STORE)
9473 {
9474 if (!final_len)
9475 {
9476 /* Pass VF value to 'len' argument of
9477 MASK_LEN_STORE if LOOP_LENS is invalid. */
9478 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9479 }
9480 if (!final_mask)
9481 {
9482 /* Pass all ones value to 'mask' argument of
9483 MASK_LEN_STORE if final_mask is invalid. */
9484 mask_vectype = truth_type_for (vectype);
9485 final_mask = build_minus_one_cst (mask_vectype);
9486 }
9487 }
9488 if (final_len)
9489 {
9490 signed char biasval
9491 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9492
9493 bias = build_int_cst (intQI_type_node, biasval);
9494 }
9495
9496 /* Arguments are ready. Create the new vector stmt. */
9497 if (final_len)
9498 {
9499 gcall *call;
9500 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9501 /* Need conversion if it's wrapped with VnQI. */
9502 if (vmode != new_vmode)
9503 {
9504 tree new_vtype
9505 = build_vector_type_for_mode (unsigned_intQI_type_node,
9506 new_vmode);
9507 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9508 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9509 gassign *new_stmt
9510 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9511 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9512 vec_oprnd = var;
9513 }
9514
9515 if (partial_ifn == IFN_MASK_LEN_STORE)
9516 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9517 dataref_ptr, ptr, final_mask,
9518 final_len, bias, vec_oprnd);
9519 else
9520 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9521 dataref_ptr, ptr, final_len,
9522 bias, vec_oprnd);
9523 gimple_call_set_nothrow (s: call, nothrow_p: true);
9524 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9525 new_stmt = call;
9526 }
9527 else if (final_mask)
9528 {
9529 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9530 gcall *call
9531 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9532 ptr, final_mask, vec_oprnd);
9533 gimple_call_set_nothrow (s: call, nothrow_p: true);
9534 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9535 new_stmt = call;
9536 }
9537 else
9538 {
9539 data_ref
9540 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9541 dataref_offset ? dataref_offset
9542 : build_int_cst (ref_type, 0));
9543 if (alignment_support_scheme == dr_aligned)
9544 ;
9545 else
9546 TREE_TYPE (data_ref)
9547 = build_aligned_type (TREE_TYPE (data_ref),
9548 align * BITS_PER_UNIT);
9549 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9550 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9551 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9552 }
9553
9554 if (slp)
9555 continue;
9556
9557 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9558 if (!next_stmt_info)
9559 break;
9560 }
9561 if (!slp && !costing_p)
9562 {
9563 if (j == 0)
9564 *vec_stmt = new_stmt;
9565 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9566 }
9567 }
9568
9569 if (costing_p)
9570 {
9571 if (n_adjacent_stores > 0)
9572 vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
9573 alignment_support_scheme, misalignment,
9574 inside_cost: &inside_cost, body_cost_vec: cost_vec);
9575
9576 /* When vectorizing a store into the function result assign
9577 a penalty if the function returns in a multi-register location.
9578 In this case we assume we'll end up with having to spill the
9579 vector result and do piecewise loads as a conservative estimate. */
9580 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9581 if (base
9582 && (TREE_CODE (base) == RESULT_DECL
9583 || (DECL_P (base) && cfun_returns (decl: base)))
9584 && !aggregate_value_p (base, cfun->decl))
9585 {
9586 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9587 /* ??? Handle PARALLEL in some way. */
9588 if (REG_P (reg))
9589 {
9590 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9591 /* Assume that a single reg-reg move is possible and cheap,
9592 do not account for vector to gp register move cost. */
9593 if (nregs > 1)
9594 {
9595 /* Spill. */
9596 prologue_cost
9597 += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_store,
9598 stmt_info, misalign: 0, where: vect_epilogue);
9599 /* Loads. */
9600 prologue_cost
9601 += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies * nregs, kind: scalar_load,
9602 stmt_info, misalign: 0, where: vect_epilogue);
9603 }
9604 }
9605 }
9606 if (dump_enabled_p ())
9607 dump_printf_loc (MSG_NOTE, vect_location,
9608 "vect_model_store_cost: inside_cost = %d, "
9609 "prologue_cost = %d .\n",
9610 inside_cost, prologue_cost);
9611 }
9612
9613 return true;
9614}
9615
9616/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9617 VECTOR_CST mask. No checks are made that the target platform supports the
9618 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9619 vect_gen_perm_mask_checked. */
9620
9621tree
9622vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9623{
9624 tree mask_type;
9625
9626 poly_uint64 nunits = sel.length ();
9627 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9628
9629 mask_type = build_vector_type (ssizetype, nunits);
9630 return vec_perm_indices_to_tree (mask_type, sel);
9631}
9632
9633/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9634 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9635
9636tree
9637vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9638{
9639 machine_mode vmode = TYPE_MODE (vectype);
9640 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9641 return vect_gen_perm_mask_any (vectype, sel);
9642}
9643
9644/* Given a vector variable X and Y, that was generated for the scalar
9645 STMT_INFO, generate instructions to permute the vector elements of X and Y
9646 using permutation mask MASK_VEC, insert them at *GSI and return the
9647 permuted vector variable. */
9648
9649static tree
9650permute_vec_elements (vec_info *vinfo,
9651 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9652 gimple_stmt_iterator *gsi)
9653{
9654 tree vectype = TREE_TYPE (x);
9655 tree perm_dest, data_ref;
9656 gimple *perm_stmt;
9657
9658 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9659 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9660 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9661 else
9662 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9663 data_ref = make_ssa_name (var: perm_dest);
9664
9665 /* Generate the permute statement. */
9666 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9667 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt, gsi);
9668
9669 return data_ref;
9670}
9671
9672/* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9673 inserting them on the loops preheader edge. Returns true if we
9674 were successful in doing so (and thus STMT_INFO can be moved then),
9675 otherwise returns false. HOIST_P indicates if we want to hoist the
9676 definitions of all SSA uses, it would be false when we are costing. */
9677
9678static bool
9679hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9680{
9681 ssa_op_iter i;
9682 tree op;
9683 bool any = false;
9684
9685 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9686 {
9687 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9688 if (!gimple_nop_p (g: def_stmt)
9689 && flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9690 {
9691 /* Make sure we don't need to recurse. While we could do
9692 so in simple cases when there are more complex use webs
9693 we don't have an easy way to preserve stmt order to fulfil
9694 dependencies within them. */
9695 tree op2;
9696 ssa_op_iter i2;
9697 if (gimple_code (g: def_stmt) == GIMPLE_PHI)
9698 return false;
9699 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9700 {
9701 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9702 if (!gimple_nop_p (g: def_stmt2)
9703 && flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt2)))
9704 return false;
9705 }
9706 any = true;
9707 }
9708 }
9709
9710 if (!any)
9711 return true;
9712
9713 if (!hoist_p)
9714 return true;
9715
9716 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9717 {
9718 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9719 if (!gimple_nop_p (g: def_stmt)
9720 && flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9721 {
9722 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9723 gsi_remove (&gsi, false);
9724 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9725 }
9726 }
9727
9728 return true;
9729}
9730
9731/* vectorizable_load.
9732
9733 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9734 that can be vectorized.
9735 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9736 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9737 Return true if STMT_INFO is vectorizable in this way. */
9738
9739static bool
9740vectorizable_load (vec_info *vinfo,
9741 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9742 gimple **vec_stmt, slp_tree slp_node,
9743 stmt_vector_for_cost *cost_vec)
9744{
9745 tree scalar_dest;
9746 tree vec_dest = NULL;
9747 tree data_ref = NULL;
9748 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
9749 class loop *loop = NULL;
9750 class loop *containing_loop = gimple_bb (g: stmt_info->stmt)->loop_father;
9751 bool nested_in_vect_loop = false;
9752 tree elem_type;
9753 /* Avoid false positive uninitialized warning, see PR110652. */
9754 tree new_temp = NULL_TREE;
9755 machine_mode mode;
9756 tree dummy;
9757 tree dataref_ptr = NULL_TREE;
9758 tree dataref_offset = NULL_TREE;
9759 gimple *ptr_incr = NULL;
9760 int ncopies;
9761 int i, j;
9762 unsigned int group_size;
9763 poly_uint64 group_gap_adj;
9764 tree msq = NULL_TREE, lsq;
9765 tree realignment_token = NULL_TREE;
9766 gphi *phi = NULL;
9767 vec<tree> dr_chain = vNULL;
9768 bool grouped_load = false;
9769 stmt_vec_info first_stmt_info;
9770 stmt_vec_info first_stmt_info_for_drptr = NULL;
9771 bool compute_in_loop = false;
9772 class loop *at_loop;
9773 int vec_num;
9774 bool slp = (slp_node != NULL);
9775 bool slp_perm = false;
9776 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
9777 poly_uint64 vf;
9778 tree aggr_type;
9779 gather_scatter_info gs_info;
9780 tree ref_type;
9781 enum vect_def_type mask_dt = vect_unknown_def_type;
9782
9783 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9784 return false;
9785
9786 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9787 && ! vec_stmt)
9788 return false;
9789
9790 if (!STMT_VINFO_DATA_REF (stmt_info))
9791 return false;
9792
9793 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9794 int mask_index = -1;
9795 slp_tree slp_op = NULL;
9796 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
9797 {
9798 scalar_dest = gimple_assign_lhs (gs: assign);
9799 if (TREE_CODE (scalar_dest) != SSA_NAME)
9800 return false;
9801
9802 tree_code code = gimple_assign_rhs_code (gs: assign);
9803 if (code != ARRAY_REF
9804 && code != BIT_FIELD_REF
9805 && code != INDIRECT_REF
9806 && code != COMPONENT_REF
9807 && code != IMAGPART_EXPR
9808 && code != REALPART_EXPR
9809 && code != MEM_REF
9810 && TREE_CODE_CLASS (code) != tcc_declaration)
9811 return false;
9812 }
9813 else
9814 {
9815 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
9816 if (!call || !gimple_call_internal_p (gs: call))
9817 return false;
9818
9819 internal_fn ifn = gimple_call_internal_fn (gs: call);
9820 if (!internal_load_fn_p (ifn))
9821 return false;
9822
9823 scalar_dest = gimple_call_lhs (gs: call);
9824 if (!scalar_dest)
9825 return false;
9826
9827 mask_index = internal_fn_mask_index (ifn);
9828 if (mask_index >= 0 && slp_node)
9829 mask_index = vect_slp_child_index_for_operand
9830 (call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9831 if (mask_index >= 0
9832 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9833 mask: &mask, mask_node: &slp_op, mask_dt_out: &mask_dt, mask_vectype_out: &mask_vectype))
9834 return false;
9835 }
9836
9837 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9838 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9839
9840 if (loop_vinfo)
9841 {
9842 loop = LOOP_VINFO_LOOP (loop_vinfo);
9843 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9844 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9845 }
9846 else
9847 vf = 1;
9848
9849 /* Multiple types in SLP are handled by creating the appropriate number of
9850 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9851 case of SLP. */
9852 if (slp)
9853 ncopies = 1;
9854 else
9855 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9856
9857 gcc_assert (ncopies >= 1);
9858
9859 /* FORNOW. This restriction should be relaxed. */
9860 if (nested_in_vect_loop && ncopies > 1)
9861 {
9862 if (dump_enabled_p ())
9863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9864 "multiple types in nested loop.\n");
9865 return false;
9866 }
9867
9868 /* Invalidate assumptions made by dependence analysis when vectorization
9869 on the unrolled body effectively re-orders stmts. */
9870 if (ncopies > 1
9871 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9872 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9873 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9874 {
9875 if (dump_enabled_p ())
9876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9877 "cannot perform implicit CSE when unrolling "
9878 "with negative dependence distance\n");
9879 return false;
9880 }
9881
9882 elem_type = TREE_TYPE (vectype);
9883 mode = TYPE_MODE (vectype);
9884
9885 /* FORNOW. In some cases can vectorize even if data-type not supported
9886 (e.g. - data copies). */
9887 if (optab_handler (op: mov_optab, mode) == CODE_FOR_nothing)
9888 {
9889 if (dump_enabled_p ())
9890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9891 "Aligned load, but unsupported type.\n");
9892 return false;
9893 }
9894
9895 /* Check if the load is a part of an interleaving chain. */
9896 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9897 {
9898 grouped_load = true;
9899 /* FORNOW */
9900 gcc_assert (!nested_in_vect_loop);
9901 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9902
9903 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9904 group_size = DR_GROUP_SIZE (first_stmt_info);
9905
9906 /* Refuse non-SLP vectorization of SLP-only groups. */
9907 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9908 {
9909 if (dump_enabled_p ())
9910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9911 "cannot vectorize load in non-SLP mode.\n");
9912 return false;
9913 }
9914
9915 /* Invalidate assumptions made by dependence analysis when vectorization
9916 on the unrolled body effectively re-orders stmts. */
9917 if (!PURE_SLP_STMT (stmt_info)
9918 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9919 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9920 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9921 {
9922 if (dump_enabled_p ())
9923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9924 "cannot perform implicit CSE when performing "
9925 "group loads with negative dependence distance\n");
9926 return false;
9927 }
9928 }
9929 else
9930 group_size = 1;
9931
9932 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9933 {
9934 slp_perm = true;
9935
9936 if (!loop_vinfo)
9937 {
9938 /* In BB vectorization we may not actually use a loaded vector
9939 accessing elements in excess of DR_GROUP_SIZE. */
9940 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9941 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9942 unsigned HOST_WIDE_INT nunits;
9943 unsigned j, k, maxk = 0;
9944 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9945 if (k > maxk)
9946 maxk = k;
9947 tree vectype = SLP_TREE_VECTYPE (slp_node);
9948 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits)
9949 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9950 {
9951 if (dump_enabled_p ())
9952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9953 "BB vectorization with gaps at the end of "
9954 "a load is not supported\n");
9955 return false;
9956 }
9957 }
9958
9959 auto_vec<tree> tem;
9960 unsigned n_perms;
9961 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9962 true, &n_perms))
9963 {
9964 if (dump_enabled_p ())
9965 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9966 vect_location,
9967 "unsupported load permutation\n");
9968 return false;
9969 }
9970 }
9971
9972 vect_memory_access_type memory_access_type;
9973 enum dr_alignment_support alignment_support_scheme;
9974 int misalignment;
9975 poly_int64 poffset;
9976 internal_fn lanes_ifn;
9977 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type: VLS_LOAD,
9978 ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
9979 alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
9980 lanes_ifn: &lanes_ifn))
9981 return false;
9982
9983 if (mask)
9984 {
9985 if (memory_access_type == VMAT_CONTIGUOUS)
9986 {
9987 machine_mode vec_mode = TYPE_MODE (vectype);
9988 if (!VECTOR_MODE_P (vec_mode)
9989 || !can_vec_mask_load_store_p (vec_mode,
9990 TYPE_MODE (mask_vectype), true))
9991 return false;
9992 }
9993 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9994 && memory_access_type != VMAT_GATHER_SCATTER)
9995 {
9996 if (dump_enabled_p ())
9997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9998 "unsupported access type for masked load.\n");
9999 return false;
10000 }
10001 else if (memory_access_type == VMAT_GATHER_SCATTER
10002 && gs_info.ifn == IFN_LAST
10003 && !gs_info.decl)
10004 {
10005 if (dump_enabled_p ())
10006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10007 "unsupported masked emulated gather.\n");
10008 return false;
10009 }
10010 }
10011
10012 bool costing_p = !vec_stmt;
10013
10014 if (costing_p) /* transformation not required. */
10015 {
10016 if (slp_node
10017 && mask
10018 && !vect_maybe_update_slp_op_vectype (slp_op,
10019 mask_vectype))
10020 {
10021 if (dump_enabled_p ())
10022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10023 "incompatible vector types for invariants\n");
10024 return false;
10025 }
10026
10027 if (!slp)
10028 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10029
10030 if (loop_vinfo
10031 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10032 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10033 vls_type: VLS_LOAD, group_size,
10034 memory_access_type, gs_info: &gs_info,
10035 scalar_mask: mask);
10036
10037 if (dump_enabled_p ()
10038 && memory_access_type != VMAT_ELEMENTWISE
10039 && memory_access_type != VMAT_GATHER_SCATTER
10040 && alignment_support_scheme != dr_aligned)
10041 dump_printf_loc (MSG_NOTE, vect_location,
10042 "Vectorizing an unaligned access.\n");
10043
10044 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10045 vinfo->any_known_not_updated_vssa = true;
10046
10047 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10048 }
10049
10050 if (!slp)
10051 gcc_assert (memory_access_type
10052 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10053
10054 if (dump_enabled_p () && !costing_p)
10055 dump_printf_loc (MSG_NOTE, vect_location,
10056 "transform load. ncopies = %d\n", ncopies);
10057
10058 /* Transform. */
10059
10060 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10061 ensure_base_align (dr_info);
10062
10063 if (memory_access_type == VMAT_INVARIANT)
10064 {
10065 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10066 /* If we have versioned for aliasing or the loop doesn't
10067 have any data dependencies that would preclude this,
10068 then we are sure this is a loop invariant load and
10069 thus we can insert it on the preheader edge.
10070 TODO: hoist_defs_of_uses should ideally be computed
10071 once at analysis time, remembered and used in the
10072 transform time. */
10073 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10074 && !nested_in_vect_loop
10075 && hoist_defs_of_uses (stmt_info, loop, hoist_p: !costing_p));
10076 if (costing_p)
10077 {
10078 enum vect_cost_model_location cost_loc
10079 = hoist_p ? vect_prologue : vect_body;
10080 unsigned int cost = record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_load,
10081 stmt_info, misalign: 0, where: cost_loc);
10082 cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec, stmt_info, misalign: 0,
10083 where: cost_loc);
10084 unsigned int prologue_cost = hoist_p ? cost : 0;
10085 unsigned int inside_cost = hoist_p ? 0 : cost;
10086 if (dump_enabled_p ())
10087 dump_printf_loc (MSG_NOTE, vect_location,
10088 "vect_model_load_cost: inside_cost = %d, "
10089 "prologue_cost = %d .\n",
10090 inside_cost, prologue_cost);
10091 return true;
10092 }
10093 if (hoist_p)
10094 {
10095 gassign *stmt = as_a <gassign *> (p: stmt_info->stmt);
10096 if (dump_enabled_p ())
10097 dump_printf_loc (MSG_NOTE, vect_location,
10098 "hoisting out of the vectorized loop: %G",
10099 (gimple *) stmt);
10100 scalar_dest = copy_ssa_name (var: scalar_dest);
10101 tree rhs = unshare_expr (gimple_assign_rhs1 (gs: stmt));
10102 edge pe = loop_preheader_edge (loop);
10103 gphi *vphi = get_virtual_phi (loop->header);
10104 tree vuse;
10105 if (vphi)
10106 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10107 else
10108 vuse = gimple_vuse (g: gsi_stmt (i: *gsi));
10109 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10110 gimple_set_vuse (g: new_stmt, vuse);
10111 gsi_insert_on_edge_immediate (pe, new_stmt);
10112 }
10113 /* These copies are all equivalent. */
10114 if (hoist_p)
10115 new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10116 type: vectype, NULL);
10117 else
10118 {
10119 gimple_stmt_iterator gsi2 = *gsi;
10120 gsi_next (i: &gsi2);
10121 new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10122 type: vectype, gsi: &gsi2);
10123 }
10124 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10125 if (slp)
10126 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10127 slp_node->push_vec_def (def: new_stmt);
10128 else
10129 {
10130 for (j = 0; j < ncopies; ++j)
10131 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10132 *vec_stmt = new_stmt;
10133 }
10134 return true;
10135 }
10136
10137 if (memory_access_type == VMAT_ELEMENTWISE
10138 || memory_access_type == VMAT_STRIDED_SLP)
10139 {
10140 gimple_stmt_iterator incr_gsi;
10141 bool insert_after;
10142 tree offvar;
10143 tree ivstep;
10144 tree running_off;
10145 vec<constructor_elt, va_gc> *v = NULL;
10146 tree stride_base, stride_step, alias_off;
10147 /* Checked by get_load_store_type. */
10148 unsigned int const_nunits = nunits.to_constant ();
10149 unsigned HOST_WIDE_INT cst_offset = 0;
10150 tree dr_offset;
10151 unsigned int inside_cost = 0;
10152
10153 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10154 gcc_assert (!nested_in_vect_loop);
10155
10156 if (grouped_load)
10157 {
10158 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10159 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10160 }
10161 else
10162 {
10163 first_stmt_info = stmt_info;
10164 first_dr_info = dr_info;
10165 }
10166
10167 if (slp && grouped_load)
10168 {
10169 group_size = DR_GROUP_SIZE (first_stmt_info);
10170 ref_type = get_group_alias_ptr_type (first_stmt_info);
10171 }
10172 else
10173 {
10174 if (grouped_load)
10175 cst_offset
10176 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10177 * vect_get_place_in_interleaving_chain (stmt_info,
10178 first_stmt_info));
10179 group_size = 1;
10180 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10181 }
10182
10183 if (!costing_p)
10184 {
10185 dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
10186 stride_base = fold_build_pointer_plus (
10187 DR_BASE_ADDRESS (first_dr_info->dr),
10188 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10189 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10190 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10191
10192 /* For a load with loop-invariant (but other than power-of-2)
10193 stride (i.e. not a grouped access) like so:
10194
10195 for (i = 0; i < n; i += stride)
10196 ... = array[i];
10197
10198 we generate a new induction variable and new accesses to
10199 form a new vector (or vectors, depending on ncopies):
10200
10201 for (j = 0; ; j += VF*stride)
10202 tmp1 = array[j];
10203 tmp2 = array[j + stride];
10204 ...
10205 vectemp = {tmp1, tmp2, ...}
10206 */
10207
10208 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10209 build_int_cst (TREE_TYPE (stride_step), vf));
10210
10211 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10212
10213 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10214 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10215 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10216 loop, &incr_gsi, insert_after,
10217 &offvar, NULL);
10218
10219 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10220 }
10221
10222 running_off = offvar;
10223 alias_off = build_int_cst (ref_type, 0);
10224 int nloads = const_nunits;
10225 int lnel = 1;
10226 tree ltype = TREE_TYPE (vectype);
10227 tree lvectype = vectype;
10228 auto_vec<tree> dr_chain;
10229 if (memory_access_type == VMAT_STRIDED_SLP)
10230 {
10231 if (group_size < const_nunits)
10232 {
10233 /* First check if vec_init optab supports construction from vector
10234 elts directly. Otherwise avoid emitting a constructor of
10235 vector elements by performing the loads using an integer type
10236 of the same size, constructing a vector of those and then
10237 re-interpreting it as the original vector type. This avoids a
10238 huge runtime penalty due to the general inability to perform
10239 store forwarding from smaller stores to a larger load. */
10240 tree ptype;
10241 tree vtype
10242 = vector_vector_composition_type (vtype: vectype,
10243 nelts: const_nunits / group_size,
10244 ptype: &ptype);
10245 if (vtype != NULL_TREE)
10246 {
10247 nloads = const_nunits / group_size;
10248 lnel = group_size;
10249 lvectype = vtype;
10250 ltype = ptype;
10251 }
10252 }
10253 else
10254 {
10255 nloads = 1;
10256 lnel = const_nunits;
10257 ltype = vectype;
10258 }
10259 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10260 }
10261 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10262 else if (nloads == 1)
10263 ltype = vectype;
10264
10265 if (slp)
10266 {
10267 /* For SLP permutation support we need to load the whole group,
10268 not only the number of vector stmts the permutation result
10269 fits in. */
10270 if (slp_perm)
10271 {
10272 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10273 variable VF. */
10274 unsigned int const_vf = vf.to_constant ();
10275 ncopies = CEIL (group_size * const_vf, const_nunits);
10276 dr_chain.create (nelems: ncopies);
10277 }
10278 else
10279 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10280 }
10281 unsigned int group_el = 0;
10282 unsigned HOST_WIDE_INT
10283 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10284 unsigned int n_groups = 0;
10285 /* For costing some adjacent vector loads, we'd like to cost with
10286 the total number of them once instead of cost each one by one. */
10287 unsigned int n_adjacent_loads = 0;
10288 for (j = 0; j < ncopies; j++)
10289 {
10290 if (nloads > 1 && !costing_p)
10291 vec_alloc (v, nelems: nloads);
10292 gimple *new_stmt = NULL;
10293 for (i = 0; i < nloads; i++)
10294 {
10295 if (costing_p)
10296 {
10297 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10298 avoid ICE, see PR110776. */
10299 if (VECTOR_TYPE_P (ltype)
10300 && memory_access_type != VMAT_ELEMENTWISE)
10301 n_adjacent_loads++;
10302 else
10303 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_load,
10304 stmt_info, misalign: 0, where: vect_body);
10305 continue;
10306 }
10307 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10308 group_el * elsz + cst_offset);
10309 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10310 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10311 new_stmt = gimple_build_assign (make_ssa_name (var: ltype), data_ref);
10312 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
10313 if (nloads > 1)
10314 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10315 gimple_assign_lhs (new_stmt));
10316
10317 group_el += lnel;
10318 if (! slp
10319 || group_el == group_size)
10320 {
10321 n_groups++;
10322 /* When doing SLP make sure to not load elements from
10323 the next vector iteration, those will not be accessed
10324 so just use the last element again. See PR107451. */
10325 if (!slp || known_lt (n_groups, vf))
10326 {
10327 tree newoff = copy_ssa_name (var: running_off);
10328 gimple *incr
10329 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10330 running_off, stride_step);
10331 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
10332 running_off = newoff;
10333 }
10334 group_el = 0;
10335 }
10336 }
10337
10338 if (nloads > 1)
10339 {
10340 if (costing_p)
10341 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_construct,
10342 stmt_info, misalign: 0, where: vect_body);
10343 else
10344 {
10345 tree vec_inv = build_constructor (lvectype, v);
10346 new_temp = vect_init_vector (vinfo, stmt_info, val: vec_inv,
10347 type: lvectype, gsi);
10348 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10349 if (lvectype != vectype)
10350 {
10351 new_stmt
10352 = gimple_build_assign (make_ssa_name (var: vectype),
10353 VIEW_CONVERT_EXPR,
10354 build1 (VIEW_CONVERT_EXPR,
10355 vectype, new_temp));
10356 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
10357 gsi);
10358 }
10359 }
10360 }
10361
10362 if (!costing_p)
10363 {
10364 if (slp)
10365 {
10366 if (slp_perm)
10367 dr_chain.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
10368 else
10369 slp_node->push_vec_def (def: new_stmt);
10370 }
10371 else
10372 {
10373 if (j == 0)
10374 *vec_stmt = new_stmt;
10375 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10376 }
10377 }
10378 }
10379 if (slp_perm)
10380 {
10381 unsigned n_perms;
10382 if (costing_p)
10383 {
10384 unsigned n_loads;
10385 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10386 true, &n_perms, &n_loads);
10387 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
10388 stmt_info: first_stmt_info, misalign: 0, where: vect_body);
10389 }
10390 else
10391 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10392 false, &n_perms);
10393 }
10394
10395 if (costing_p)
10396 {
10397 if (n_adjacent_loads > 0)
10398 vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10399 alignment_support_scheme, misalignment, add_realign_cost: false,
10400 inside_cost: &inside_cost, prologue_cost: nullptr, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10401 record_prologue_costs: true);
10402 if (dump_enabled_p ())
10403 dump_printf_loc (MSG_NOTE, vect_location,
10404 "vect_model_load_cost: inside_cost = %u, "
10405 "prologue_cost = 0 .\n",
10406 inside_cost);
10407 }
10408
10409 return true;
10410 }
10411
10412 if (memory_access_type == VMAT_GATHER_SCATTER
10413 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10414 grouped_load = false;
10415
10416 if (grouped_load
10417 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10418 {
10419 if (grouped_load)
10420 {
10421 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10422 group_size = DR_GROUP_SIZE (first_stmt_info);
10423 }
10424 else
10425 {
10426 first_stmt_info = stmt_info;
10427 group_size = 1;
10428 }
10429 /* For SLP vectorization we directly vectorize a subchain
10430 without permutation. */
10431 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10432 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10433 /* For BB vectorization always use the first stmt to base
10434 the data ref pointer on. */
10435 if (bb_vinfo)
10436 first_stmt_info_for_drptr
10437 = vect_find_first_scalar_stmt_in_slp (slp_node);
10438
10439 /* Check if the chain of loads is already vectorized. */
10440 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10441 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10442 ??? But we can only do so if there is exactly one
10443 as we have no way to get at the rest. Leave the CSE
10444 opportunity alone.
10445 ??? With the group load eventually participating
10446 in multiple different permutations (having multiple
10447 slp nodes which refer to the same group) the CSE
10448 is even wrong code. See PR56270. */
10449 && !slp)
10450 {
10451 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10452 return true;
10453 }
10454 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10455 group_gap_adj = 0;
10456
10457 /* VEC_NUM is the number of vect stmts to be created for this group. */
10458 if (slp)
10459 {
10460 grouped_load = false;
10461 /* If an SLP permutation is from N elements to N elements,
10462 and if one vector holds a whole number of N, we can load
10463 the inputs to the permutation in the same way as an
10464 unpermuted sequence. In other cases we need to load the
10465 whole group, not only the number of vector stmts the
10466 permutation result fits in. */
10467 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10468 if (slp_perm
10469 && (group_size != scalar_lanes
10470 || !multiple_p (a: nunits, b: group_size)))
10471 {
10472 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10473 variable VF; see vect_transform_slp_perm_load. */
10474 unsigned int const_vf = vf.to_constant ();
10475 unsigned int const_nunits = nunits.to_constant ();
10476 vec_num = CEIL (group_size * const_vf, const_nunits);
10477 group_gap_adj = vf * group_size - nunits * vec_num;
10478 }
10479 else
10480 {
10481 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10482 group_gap_adj
10483 = group_size - scalar_lanes;
10484 }
10485 }
10486 else
10487 vec_num = group_size;
10488
10489 ref_type = get_group_alias_ptr_type (first_stmt_info);
10490 }
10491 else
10492 {
10493 first_stmt_info = stmt_info;
10494 first_dr_info = dr_info;
10495 group_size = vec_num = 1;
10496 group_gap_adj = 0;
10497 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10498 if (slp)
10499 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10500 }
10501
10502 gcc_assert (alignment_support_scheme);
10503 vec_loop_masks *loop_masks
10504 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10505 ? &LOOP_VINFO_MASKS (loop_vinfo)
10506 : NULL);
10507 vec_loop_lens *loop_lens
10508 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10509 ? &LOOP_VINFO_LENS (loop_vinfo)
10510 : NULL);
10511
10512 /* Shouldn't go with length-based approach if fully masked. */
10513 gcc_assert (!loop_lens || !loop_masks);
10514
10515 /* Targets with store-lane instructions must not require explicit
10516 realignment. vect_supportable_dr_alignment always returns either
10517 dr_aligned or dr_unaligned_supported for masked operations. */
10518 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10519 && !mask
10520 && !loop_masks)
10521 || alignment_support_scheme == dr_aligned
10522 || alignment_support_scheme == dr_unaligned_supported);
10523
10524 /* In case the vectorization factor (VF) is bigger than the number
10525 of elements that we can fit in a vectype (nunits), we have to generate
10526 more than one vector stmt - i.e - we need to "unroll" the
10527 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10528 from one copy of the vector stmt to the next, in the field
10529 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10530 stages to find the correct vector defs to be used when vectorizing
10531 stmts that use the defs of the current stmt. The example below
10532 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10533 need to create 4 vectorized stmts):
10534
10535 before vectorization:
10536 RELATED_STMT VEC_STMT
10537 S1: x = memref - -
10538 S2: z = x + 1 - -
10539
10540 step 1: vectorize stmt S1:
10541 We first create the vector stmt VS1_0, and, as usual, record a
10542 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10543 Next, we create the vector stmt VS1_1, and record a pointer to
10544 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10545 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10546 stmts and pointers:
10547 RELATED_STMT VEC_STMT
10548 VS1_0: vx0 = memref0 VS1_1 -
10549 VS1_1: vx1 = memref1 VS1_2 -
10550 VS1_2: vx2 = memref2 VS1_3 -
10551 VS1_3: vx3 = memref3 - -
10552 S1: x = load - VS1_0
10553 S2: z = x + 1 - -
10554 */
10555
10556 /* In case of interleaving (non-unit grouped access):
10557
10558 S1: x2 = &base + 2
10559 S2: x0 = &base
10560 S3: x1 = &base + 1
10561 S4: x3 = &base + 3
10562
10563 Vectorized loads are created in the order of memory accesses
10564 starting from the access of the first stmt of the chain:
10565
10566 VS1: vx0 = &base
10567 VS2: vx1 = &base + vec_size*1
10568 VS3: vx3 = &base + vec_size*2
10569 VS4: vx4 = &base + vec_size*3
10570
10571 Then permutation statements are generated:
10572
10573 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10574 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10575 ...
10576
10577 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10578 (the order of the data-refs in the output of vect_permute_load_chain
10579 corresponds to the order of scalar stmts in the interleaving chain - see
10580 the documentation of vect_permute_load_chain()).
10581 The generation of permutation stmts and recording them in
10582 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10583
10584 In case of both multiple types and interleaving, the vector loads and
10585 permutation stmts above are created for every copy. The result vector
10586 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10587 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10588
10589 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10590 on a target that supports unaligned accesses (dr_unaligned_supported)
10591 we generate the following code:
10592 p = initial_addr;
10593 indx = 0;
10594 loop {
10595 p = p + indx * vectype_size;
10596 vec_dest = *(p);
10597 indx = indx + 1;
10598 }
10599
10600 Otherwise, the data reference is potentially unaligned on a target that
10601 does not support unaligned accesses (dr_explicit_realign_optimized) -
10602 then generate the following code, in which the data in each iteration is
10603 obtained by two vector loads, one from the previous iteration, and one
10604 from the current iteration:
10605 p1 = initial_addr;
10606 msq_init = *(floor(p1))
10607 p2 = initial_addr + VS - 1;
10608 realignment_token = call target_builtin;
10609 indx = 0;
10610 loop {
10611 p2 = p2 + indx * vectype_size
10612 lsq = *(floor(p2))
10613 vec_dest = realign_load (msq, lsq, realignment_token)
10614 indx = indx + 1;
10615 msq = lsq;
10616 } */
10617
10618 /* If the misalignment remains the same throughout the execution of the
10619 loop, we can create the init_addr and permutation mask at the loop
10620 preheader. Otherwise, it needs to be created inside the loop.
10621 This can only occur when vectorizing memory accesses in the inner-loop
10622 nested within an outer-loop that is being vectorized. */
10623
10624 if (nested_in_vect_loop
10625 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10626 b: GET_MODE_SIZE (TYPE_MODE (vectype))))
10627 {
10628 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10629 compute_in_loop = true;
10630 }
10631
10632 bool diff_first_stmt_info
10633 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10634
10635 tree offset = NULL_TREE;
10636 if ((alignment_support_scheme == dr_explicit_realign_optimized
10637 || alignment_support_scheme == dr_explicit_realign)
10638 && !compute_in_loop)
10639 {
10640 /* If we have different first_stmt_info, we can't set up realignment
10641 here, since we can't guarantee first_stmt_info DR has been
10642 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10643 distance from first_stmt_info DR instead as below. */
10644 if (!costing_p)
10645 {
10646 if (!diff_first_stmt_info)
10647 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10648 &realignment_token,
10649 alignment_support_scheme, NULL_TREE,
10650 &at_loop);
10651 if (alignment_support_scheme == dr_explicit_realign_optimized)
10652 {
10653 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10654 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10655 size_one_node);
10656 gcc_assert (!first_stmt_info_for_drptr);
10657 }
10658 }
10659 }
10660 else
10661 at_loop = loop;
10662
10663 if (!known_eq (poffset, 0))
10664 offset = (offset
10665 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10666 : size_int (poffset));
10667
10668 tree bump;
10669 tree vec_offset = NULL_TREE;
10670 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10671 {
10672 aggr_type = NULL_TREE;
10673 bump = NULL_TREE;
10674 }
10675 else if (memory_access_type == VMAT_GATHER_SCATTER)
10676 {
10677 aggr_type = elem_type;
10678 if (!costing_p)
10679 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
10680 dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
10681 }
10682 else
10683 {
10684 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10685 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10686 else
10687 aggr_type = vectype;
10688 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10689 memory_access_type, loop_lens);
10690 }
10691
10692 auto_vec<tree> vec_offsets;
10693 auto_vec<tree> vec_masks;
10694 if (mask && !costing_p)
10695 {
10696 if (slp_node)
10697 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10698 &vec_masks);
10699 else
10700 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies, op: mask,
10701 vec_oprnds: &vec_masks, vectype: mask_vectype);
10702 }
10703
10704 tree vec_mask = NULL_TREE;
10705 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10706 {
10707 gcc_assert (alignment_support_scheme == dr_aligned
10708 || alignment_support_scheme == dr_unaligned_supported);
10709 gcc_assert (grouped_load && !slp);
10710
10711 unsigned int inside_cost = 0, prologue_cost = 0;
10712 /* For costing some adjacent vector loads, we'd like to cost with
10713 the total number of them once instead of cost each one by one. */
10714 unsigned int n_adjacent_loads = 0;
10715 for (j = 0; j < ncopies; j++)
10716 {
10717 if (costing_p)
10718 {
10719 /* An IFN_LOAD_LANES will load all its vector results,
10720 regardless of which ones we actually need. Account
10721 for the cost of unused results. */
10722 if (first_stmt_info == stmt_info)
10723 {
10724 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10725 stmt_vec_info next_stmt_info = first_stmt_info;
10726 do
10727 {
10728 gaps -= 1;
10729 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10730 }
10731 while (next_stmt_info);
10732 if (gaps)
10733 {
10734 if (dump_enabled_p ())
10735 dump_printf_loc (MSG_NOTE, vect_location,
10736 "vect_model_load_cost: %d "
10737 "unused vectors.\n",
10738 gaps);
10739 vect_get_load_cost (vinfo, stmt_info, ncopies: gaps,
10740 alignment_support_scheme,
10741 misalignment, add_realign_cost: false, inside_cost: &inside_cost,
10742 prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10743 record_prologue_costs: true);
10744 }
10745 }
10746 n_adjacent_loads++;
10747 continue;
10748 }
10749
10750 /* 1. Create the vector or array pointer update chain. */
10751 if (j == 0)
10752 dataref_ptr
10753 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10754 at_loop, offset, &dummy, gsi,
10755 &ptr_incr, false, bump);
10756 else
10757 {
10758 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10759 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10760 stmt_info, bump);
10761 }
10762 if (mask)
10763 vec_mask = vec_masks[j];
10764
10765 tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
10766
10767 tree final_mask = NULL_TREE;
10768 tree final_len = NULL_TREE;
10769 tree bias = NULL_TREE;
10770 if (loop_masks)
10771 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10772 ncopies, vectype, j);
10773 if (vec_mask)
10774 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
10775 vec_mask, gsi);
10776
10777 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10778 {
10779 if (loop_lens)
10780 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10781 ncopies, vectype, j, 1);
10782 else
10783 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10784 signed char biasval
10785 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10786 bias = build_int_cst (intQI_type_node, biasval);
10787 if (!final_mask)
10788 {
10789 mask_vectype = truth_type_for (vectype);
10790 final_mask = build_minus_one_cst (mask_vectype);
10791 }
10792 }
10793
10794 gcall *call;
10795 if (final_len && final_mask)
10796 {
10797 /* Emit:
10798 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10799 VEC_MASK, LEN, BIAS). */
10800 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10801 tree alias_ptr = build_int_cst (ref_type, align);
10802 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10803 dataref_ptr, alias_ptr,
10804 final_mask, final_len, bias);
10805 }
10806 else if (final_mask)
10807 {
10808 /* Emit:
10809 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10810 VEC_MASK). */
10811 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10812 tree alias_ptr = build_int_cst (ref_type, align);
10813 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10814 dataref_ptr, alias_ptr,
10815 final_mask);
10816 }
10817 else
10818 {
10819 /* Emit:
10820 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10821 data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
10822 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10823 }
10824 gimple_call_set_lhs (gs: call, lhs: vec_array);
10825 gimple_call_set_nothrow (s: call, nothrow_p: true);
10826 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
10827
10828 dr_chain.create (nelems: vec_num);
10829 /* Extract each vector into an SSA_NAME. */
10830 for (i = 0; i < vec_num; i++)
10831 {
10832 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10833 array: vec_array, n: i);
10834 dr_chain.quick_push (obj: new_temp);
10835 }
10836
10837 /* Record the mapping between SSA_NAMEs and statements. */
10838 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10839
10840 /* Record that VEC_ARRAY is now dead. */
10841 vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
10842
10843 dr_chain.release ();
10844
10845 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10846 }
10847
10848 if (costing_p)
10849 {
10850 if (n_adjacent_loads > 0)
10851 vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10852 alignment_support_scheme, misalignment, add_realign_cost: false,
10853 inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec,
10854 body_cost_vec: cost_vec, record_prologue_costs: true);
10855 if (dump_enabled_p ())
10856 dump_printf_loc (MSG_NOTE, vect_location,
10857 "vect_model_load_cost: inside_cost = %u, "
10858 "prologue_cost = %u .\n",
10859 inside_cost, prologue_cost);
10860 }
10861
10862 return true;
10863 }
10864
10865 if (memory_access_type == VMAT_GATHER_SCATTER)
10866 {
10867 gcc_assert (alignment_support_scheme == dr_aligned
10868 || alignment_support_scheme == dr_unaligned_supported);
10869 gcc_assert (!grouped_load && !slp_perm);
10870
10871 unsigned int inside_cost = 0, prologue_cost = 0;
10872 for (j = 0; j < ncopies; j++)
10873 {
10874 /* 1. Create the vector or array pointer update chain. */
10875 if (j == 0 && !costing_p)
10876 {
10877 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10878 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10879 slp_node, gs_info: &gs_info, dataref_ptr: &dataref_ptr,
10880 vec_offset: &vec_offsets);
10881 else
10882 dataref_ptr
10883 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10884 at_loop, offset, &dummy, gsi,
10885 &ptr_incr, false, bump);
10886 }
10887 else if (!costing_p)
10888 {
10889 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10890 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10891 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10892 gsi, stmt_info, bump);
10893 }
10894
10895 gimple *new_stmt = NULL;
10896 for (i = 0; i < vec_num; i++)
10897 {
10898 tree final_mask = NULL_TREE;
10899 tree final_len = NULL_TREE;
10900 tree bias = NULL_TREE;
10901 if (!costing_p)
10902 {
10903 if (mask)
10904 vec_mask = vec_masks[vec_num * j + i];
10905 if (loop_masks)
10906 final_mask
10907 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10908 vec_num * ncopies, vectype,
10909 vec_num * j + i);
10910 if (vec_mask)
10911 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
10912 loop_mask: final_mask, vec_mask, gsi);
10913
10914 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10915 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10916 gsi, stmt_info, bump);
10917 }
10918
10919 /* 2. Create the vector-load in the loop. */
10920 unsigned HOST_WIDE_INT align;
10921 if (gs_info.ifn != IFN_LAST)
10922 {
10923 if (costing_p)
10924 {
10925 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
10926 inside_cost
10927 = record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
10928 stmt_info, misalign: 0, where: vect_body);
10929 continue;
10930 }
10931 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10932 vec_offset = vec_offsets[vec_num * j + i];
10933 tree zero = build_zero_cst (vectype);
10934 tree scale = size_int (gs_info.scale);
10935
10936 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10937 {
10938 if (loop_lens)
10939 final_len
10940 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10941 vec_num * ncopies, vectype,
10942 vec_num * j + i, 1);
10943 else
10944 final_len
10945 = build_int_cst (sizetype,
10946 TYPE_VECTOR_SUBPARTS (node: vectype));
10947 signed char biasval
10948 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10949 bias = build_int_cst (intQI_type_node, biasval);
10950 if (!final_mask)
10951 {
10952 mask_vectype = truth_type_for (vectype);
10953 final_mask = build_minus_one_cst (mask_vectype);
10954 }
10955 }
10956
10957 gcall *call;
10958 if (final_len && final_mask)
10959 call
10960 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10961 dataref_ptr, vec_offset,
10962 scale, zero, final_mask,
10963 final_len, bias);
10964 else if (final_mask)
10965 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10966 dataref_ptr, vec_offset,
10967 scale, zero, final_mask);
10968 else
10969 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10970 dataref_ptr, vec_offset,
10971 scale, zero);
10972 gimple_call_set_nothrow (s: call, nothrow_p: true);
10973 new_stmt = call;
10974 data_ref = NULL_TREE;
10975 }
10976 else if (gs_info.decl)
10977 {
10978 /* The builtin decls path for gather is legacy, x86 only. */
10979 gcc_assert (!final_len && nunits.is_constant ());
10980 if (costing_p)
10981 {
10982 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
10983 inside_cost
10984 = record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
10985 stmt_info, misalign: 0, where: vect_body);
10986 continue;
10987 }
10988 poly_uint64 offset_nunits
10989 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
10990 if (known_eq (nunits, offset_nunits))
10991 {
10992 new_stmt = vect_build_one_gather_load_call
10993 (vinfo, stmt_info, gsi, gs_info: &gs_info,
10994 ptr: dataref_ptr, offset: vec_offsets[vec_num * j + i],
10995 mask: final_mask);
10996 data_ref = NULL_TREE;
10997 }
10998 else if (known_eq (nunits, offset_nunits * 2))
10999 {
11000 /* We have a offset vector with half the number of
11001 lanes but the builtins will produce full vectype
11002 data with just the lower lanes filled. */
11003 new_stmt = vect_build_one_gather_load_call
11004 (vinfo, stmt_info, gsi, gs_info: &gs_info,
11005 ptr: dataref_ptr, offset: vec_offsets[2 * vec_num * j + 2 * i],
11006 mask: final_mask);
11007 tree low = make_ssa_name (var: vectype);
11008 gimple_set_lhs (new_stmt, low);
11009 vect_finish_stmt_generation (vinfo, stmt_info,
11010 vec_stmt: new_stmt, gsi);
11011
11012 /* now put upper half of final_mask in final_mask low. */
11013 if (final_mask
11014 && !SCALAR_INT_MODE_P
11015 (TYPE_MODE (TREE_TYPE (final_mask))))
11016 {
11017 int count = nunits.to_constant ();
11018 vec_perm_builder sel (count, count, 1);
11019 sel.quick_grow (len: count);
11020 for (int i = 0; i < count; ++i)
11021 sel[i] = i | (count / 2);
11022 vec_perm_indices indices (sel, 2, count);
11023 tree perm_mask = vect_gen_perm_mask_checked
11024 (TREE_TYPE (final_mask), sel: indices);
11025 new_stmt = gimple_build_assign (NULL_TREE,
11026 VEC_PERM_EXPR,
11027 final_mask,
11028 final_mask,
11029 perm_mask);
11030 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11031 gimple_set_lhs (new_stmt, final_mask);
11032 vect_finish_stmt_generation (vinfo, stmt_info,
11033 vec_stmt: new_stmt, gsi);
11034 }
11035 else if (final_mask)
11036 {
11037 new_stmt = gimple_build_assign (NULL_TREE,
11038 VEC_UNPACK_HI_EXPR,
11039 final_mask);
11040 final_mask = make_ssa_name
11041 (var: truth_type_for (gs_info.offset_vectype));
11042 gimple_set_lhs (new_stmt, final_mask);
11043 vect_finish_stmt_generation (vinfo, stmt_info,
11044 vec_stmt: new_stmt, gsi);
11045 }
11046
11047 new_stmt = vect_build_one_gather_load_call
11048 (vinfo, stmt_info, gsi, gs_info: &gs_info,
11049 ptr: dataref_ptr,
11050 offset: vec_offsets[2 * vec_num * j + 2 * i + 1],
11051 mask: final_mask);
11052 tree high = make_ssa_name (var: vectype);
11053 gimple_set_lhs (new_stmt, high);
11054 vect_finish_stmt_generation (vinfo, stmt_info,
11055 vec_stmt: new_stmt, gsi);
11056
11057 /* compose low + high. */
11058 int count = nunits.to_constant ();
11059 vec_perm_builder sel (count, count, 1);
11060 sel.quick_grow (len: count);
11061 for (int i = 0; i < count; ++i)
11062 sel[i] = i < count / 2 ? i : i + count / 2;
11063 vec_perm_indices indices (sel, 2, count);
11064 tree perm_mask
11065 = vect_gen_perm_mask_checked (vectype, sel: indices);
11066 new_stmt = gimple_build_assign (NULL_TREE,
11067 VEC_PERM_EXPR,
11068 low, high, perm_mask);
11069 data_ref = NULL_TREE;
11070 }
11071 else if (known_eq (nunits * 2, offset_nunits))
11072 {
11073 /* We have a offset vector with double the number of
11074 lanes. Select the low/high part accordingly. */
11075 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11076 if ((vec_num * j + i) & 1)
11077 {
11078 int count = offset_nunits.to_constant ();
11079 vec_perm_builder sel (count, count, 1);
11080 sel.quick_grow (len: count);
11081 for (int i = 0; i < count; ++i)
11082 sel[i] = i | (count / 2);
11083 vec_perm_indices indices (sel, 2, count);
11084 tree perm_mask = vect_gen_perm_mask_checked
11085 (TREE_TYPE (vec_offset), sel: indices);
11086 new_stmt = gimple_build_assign (NULL_TREE,
11087 VEC_PERM_EXPR,
11088 vec_offset,
11089 vec_offset,
11090 perm_mask);
11091 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11092 gimple_set_lhs (new_stmt, vec_offset);
11093 vect_finish_stmt_generation (vinfo, stmt_info,
11094 vec_stmt: new_stmt, gsi);
11095 }
11096 new_stmt = vect_build_one_gather_load_call
11097 (vinfo, stmt_info, gsi, gs_info: &gs_info,
11098 ptr: dataref_ptr, offset: vec_offset, mask: final_mask);
11099 data_ref = NULL_TREE;
11100 }
11101 else
11102 gcc_unreachable ();
11103 }
11104 else
11105 {
11106 /* Emulated gather-scatter. */
11107 gcc_assert (!final_mask);
11108 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11109 if (costing_p)
11110 {
11111 /* For emulated gathers N offset vector element
11112 offset add is consumed by the load). */
11113 inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits,
11114 kind: vec_to_scalar, stmt_info,
11115 misalign: 0, where: vect_body);
11116 /* N scalar loads plus gathering them into a
11117 vector. */
11118 inside_cost
11119 = record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits, kind: scalar_load,
11120 stmt_info, misalign: 0, where: vect_body);
11121 inside_cost
11122 = record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_construct,
11123 stmt_info, misalign: 0, where: vect_body);
11124 continue;
11125 }
11126 unsigned HOST_WIDE_INT const_offset_nunits
11127 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype)
11128 .to_constant ();
11129 vec<constructor_elt, va_gc> *ctor_elts;
11130 vec_alloc (v&: ctor_elts, nelems: const_nunits);
11131 gimple_seq stmts = NULL;
11132 /* We support offset vectors with more elements
11133 than the data vector for now. */
11134 unsigned HOST_WIDE_INT factor
11135 = const_offset_nunits / const_nunits;
11136 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11137 unsigned elt_offset = (j % factor) * const_nunits;
11138 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11139 tree scale = size_int (gs_info.scale);
11140 align = get_object_alignment (DR_REF (first_dr_info->dr));
11141 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11142 for (unsigned k = 0; k < const_nunits; ++k)
11143 {
11144 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11145 bitsize_int (k + elt_offset));
11146 tree idx
11147 = gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
11148 ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
11149 idx = gimple_convert (seq: &stmts, sizetype, op: idx);
11150 idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype, ops: idx,
11151 ops: scale);
11152 tree ptr = gimple_build (seq: &stmts, code: PLUS_EXPR,
11153 TREE_TYPE (dataref_ptr),
11154 ops: dataref_ptr, ops: idx);
11155 ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
11156 tree elt = make_ssa_name (TREE_TYPE (vectype));
11157 tree ref = build2 (MEM_REF, ltype, ptr,
11158 build_int_cst (ref_type, 0));
11159 new_stmt = gimple_build_assign (elt, ref);
11160 gimple_set_vuse (g: new_stmt, vuse: gimple_vuse (g: gsi_stmt (i: *gsi)));
11161 gimple_seq_add_stmt (&stmts, new_stmt);
11162 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11163 }
11164 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11165 new_stmt = gimple_build_assign (
11166 NULL_TREE, build_constructor (vectype, ctor_elts));
11167 data_ref = NULL_TREE;
11168 }
11169
11170 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11171 /* DATA_REF is null if we've already built the statement. */
11172 if (data_ref)
11173 {
11174 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11175 new_stmt = gimple_build_assign (vec_dest, data_ref);
11176 }
11177 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11178 gimple_set_lhs (new_stmt, new_temp);
11179 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11180
11181 /* Store vector loads in the corresponding SLP_NODE. */
11182 if (slp)
11183 slp_node->push_vec_def (def: new_stmt);
11184 }
11185
11186 if (!slp && !costing_p)
11187 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11188 }
11189
11190 if (!slp && !costing_p)
11191 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11192
11193 if (costing_p && dump_enabled_p ())
11194 dump_printf_loc (MSG_NOTE, vect_location,
11195 "vect_model_load_cost: inside_cost = %u, "
11196 "prologue_cost = %u .\n",
11197 inside_cost, prologue_cost);
11198 return true;
11199 }
11200
11201 poly_uint64 group_elt = 0;
11202 unsigned int inside_cost = 0, prologue_cost = 0;
11203 /* For costing some adjacent vector loads, we'd like to cost with
11204 the total number of them once instead of cost each one by one. */
11205 unsigned int n_adjacent_loads = 0;
11206 for (j = 0; j < ncopies; j++)
11207 {
11208 /* 1. Create the vector or array pointer update chain. */
11209 if (j == 0 && !costing_p)
11210 {
11211 bool simd_lane_access_p
11212 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11213 if (simd_lane_access_p
11214 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11215 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11216 && integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
11217 && integer_zerop (DR_INIT (first_dr_info->dr))
11218 && alias_sets_conflict_p (get_alias_set (aggr_type),
11219 get_alias_set (TREE_TYPE (ref_type)))
11220 && (alignment_support_scheme == dr_aligned
11221 || alignment_support_scheme == dr_unaligned_supported))
11222 {
11223 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11224 dataref_offset = build_int_cst (ref_type, 0);
11225 }
11226 else if (diff_first_stmt_info)
11227 {
11228 dataref_ptr
11229 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11230 aggr_type, at_loop, offset, &dummy,
11231 gsi, &ptr_incr, simd_lane_access_p,
11232 bump);
11233 /* Adjust the pointer by the difference to first_stmt. */
11234 data_reference_p ptrdr
11235 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11236 tree diff
11237 = fold_convert (sizetype,
11238 size_binop (MINUS_EXPR,
11239 DR_INIT (first_dr_info->dr),
11240 DR_INIT (ptrdr)));
11241 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11242 stmt_info, diff);
11243 if (alignment_support_scheme == dr_explicit_realign)
11244 {
11245 msq = vect_setup_realignment (vinfo,
11246 first_stmt_info_for_drptr, gsi,
11247 &realignment_token,
11248 alignment_support_scheme,
11249 dataref_ptr, &at_loop);
11250 gcc_assert (!compute_in_loop);
11251 }
11252 }
11253 else
11254 dataref_ptr
11255 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11256 at_loop,
11257 offset, &dummy, gsi, &ptr_incr,
11258 simd_lane_access_p, bump);
11259 }
11260 else if (!costing_p)
11261 {
11262 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11263 if (dataref_offset)
11264 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11265 bump);
11266 else
11267 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11268 stmt_info, bump);
11269 }
11270
11271 if (grouped_load || slp_perm)
11272 dr_chain.create (nelems: vec_num);
11273
11274 gimple *new_stmt = NULL;
11275 for (i = 0; i < vec_num; i++)
11276 {
11277 tree final_mask = NULL_TREE;
11278 tree final_len = NULL_TREE;
11279 tree bias = NULL_TREE;
11280 if (!costing_p)
11281 {
11282 if (mask)
11283 vec_mask = vec_masks[vec_num * j + i];
11284 if (loop_masks)
11285 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11286 vec_num * ncopies, vectype,
11287 vec_num * j + i);
11288 if (vec_mask)
11289 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
11290 loop_mask: final_mask, vec_mask, gsi);
11291
11292 if (i > 0)
11293 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11294 gsi, stmt_info, bump);
11295 }
11296
11297 /* 2. Create the vector-load in the loop. */
11298 switch (alignment_support_scheme)
11299 {
11300 case dr_aligned:
11301 case dr_unaligned_supported:
11302 {
11303 if (costing_p)
11304 break;
11305
11306 unsigned int misalign;
11307 unsigned HOST_WIDE_INT align;
11308 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11309 if (alignment_support_scheme == dr_aligned)
11310 misalign = 0;
11311 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11312 {
11313 align
11314 = dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
11315 misalign = 0;
11316 }
11317 else
11318 misalign = misalignment;
11319 if (dataref_offset == NULL_TREE
11320 && TREE_CODE (dataref_ptr) == SSA_NAME)
11321 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11322 misalign);
11323 align = least_bit_hwi (x: misalign | align);
11324
11325 /* Compute IFN when LOOP_LENS or final_mask valid. */
11326 machine_mode vmode = TYPE_MODE (vectype);
11327 machine_mode new_vmode = vmode;
11328 internal_fn partial_ifn = IFN_LAST;
11329 if (loop_lens)
11330 {
11331 opt_machine_mode new_ovmode
11332 = get_len_load_store_mode (vmode, true, &partial_ifn);
11333 new_vmode = new_ovmode.require ();
11334 unsigned factor
11335 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11336 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11337 vec_num * ncopies, vectype,
11338 vec_num * j + i, factor);
11339 }
11340 else if (final_mask)
11341 {
11342 if (!can_vec_mask_load_store_p (
11343 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11344 &partial_ifn))
11345 gcc_unreachable ();
11346 }
11347
11348 if (partial_ifn == IFN_MASK_LEN_LOAD)
11349 {
11350 if (!final_len)
11351 {
11352 /* Pass VF value to 'len' argument of
11353 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11354 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11355 }
11356 if (!final_mask)
11357 {
11358 /* Pass all ones value to 'mask' argument of
11359 MASK_LEN_LOAD if final_mask is invalid. */
11360 mask_vectype = truth_type_for (vectype);
11361 final_mask = build_minus_one_cst (mask_vectype);
11362 }
11363 }
11364 if (final_len)
11365 {
11366 signed char biasval
11367 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11368
11369 bias = build_int_cst (intQI_type_node, biasval);
11370 }
11371
11372 if (final_len)
11373 {
11374 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11375 gcall *call;
11376 if (partial_ifn == IFN_MASK_LEN_LOAD)
11377 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11378 dataref_ptr, ptr,
11379 final_mask, final_len,
11380 bias);
11381 else
11382 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11383 dataref_ptr, ptr,
11384 final_len, bias);
11385 gimple_call_set_nothrow (s: call, nothrow_p: true);
11386 new_stmt = call;
11387 data_ref = NULL_TREE;
11388
11389 /* Need conversion if it's wrapped with VnQI. */
11390 if (vmode != new_vmode)
11391 {
11392 tree new_vtype = build_vector_type_for_mode (
11393 unsigned_intQI_type_node, new_vmode);
11394 tree var
11395 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11396 gimple_set_lhs (call, var);
11397 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call,
11398 gsi);
11399 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11400 new_stmt = gimple_build_assign (vec_dest,
11401 VIEW_CONVERT_EXPR, op);
11402 }
11403 }
11404 else if (final_mask)
11405 {
11406 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11407 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11408 dataref_ptr, ptr,
11409 final_mask);
11410 gimple_call_set_nothrow (s: call, nothrow_p: true);
11411 new_stmt = call;
11412 data_ref = NULL_TREE;
11413 }
11414 else
11415 {
11416 tree ltype = vectype;
11417 tree new_vtype = NULL_TREE;
11418 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11419 unsigned int vect_align
11420 = vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype);
11421 unsigned int scalar_dr_size
11422 = vect_get_scalar_dr_size (dr_info: first_dr_info);
11423 /* If there's no peeling for gaps but we have a gap
11424 with slp loads then load the lower half of the
11425 vector only. See get_group_load_store_type for
11426 when we apply this optimization. */
11427 if (slp
11428 && loop_vinfo
11429 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11430 && known_eq (nunits, (group_size - gap) * 2)
11431 && known_eq (nunits, group_size)
11432 && gap >= (vect_align / scalar_dr_size))
11433 {
11434 tree half_vtype;
11435 new_vtype
11436 = vector_vector_composition_type (vtype: vectype, nelts: 2,
11437 ptype: &half_vtype);
11438 if (new_vtype != NULL_TREE)
11439 ltype = half_vtype;
11440 }
11441 tree offset
11442 = (dataref_offset ? dataref_offset
11443 : build_int_cst (ref_type, 0));
11444 if (ltype != vectype
11445 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11446 {
11447 unsigned HOST_WIDE_INT gap_offset
11448 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11449 tree gapcst = build_int_cst (ref_type, gap_offset);
11450 offset = size_binop (PLUS_EXPR, offset, gapcst);
11451 }
11452 data_ref
11453 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11454 if (alignment_support_scheme == dr_aligned)
11455 ;
11456 else
11457 TREE_TYPE (data_ref)
11458 = build_aligned_type (TREE_TYPE (data_ref),
11459 align * BITS_PER_UNIT);
11460 if (ltype != vectype)
11461 {
11462 vect_copy_ref_info (data_ref,
11463 DR_REF (first_dr_info->dr));
11464 tree tem = make_ssa_name (var: ltype);
11465 new_stmt = gimple_build_assign (tem, data_ref);
11466 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
11467 gsi);
11468 data_ref = NULL;
11469 vec<constructor_elt, va_gc> *v;
11470 vec_alloc (v, nelems: 2);
11471 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11472 {
11473 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11474 build_zero_cst (ltype));
11475 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11476 }
11477 else
11478 {
11479 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11480 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11481 build_zero_cst (ltype));
11482 }
11483 gcc_assert (new_vtype != NULL_TREE);
11484 if (new_vtype == vectype)
11485 new_stmt = gimple_build_assign (
11486 vec_dest, build_constructor (vectype, v));
11487 else
11488 {
11489 tree new_vname = make_ssa_name (var: new_vtype);
11490 new_stmt = gimple_build_assign (
11491 new_vname, build_constructor (new_vtype, v));
11492 vect_finish_stmt_generation (vinfo, stmt_info,
11493 vec_stmt: new_stmt, gsi);
11494 new_stmt = gimple_build_assign (
11495 vec_dest,
11496 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11497 }
11498 }
11499 }
11500 break;
11501 }
11502 case dr_explicit_realign:
11503 {
11504 if (costing_p)
11505 break;
11506 tree ptr, bump;
11507
11508 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11509
11510 if (compute_in_loop)
11511 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11512 &realignment_token,
11513 dr_explicit_realign,
11514 dataref_ptr, NULL);
11515
11516 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11517 ptr = copy_ssa_name (var: dataref_ptr);
11518 else
11519 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11520 // For explicit realign the target alignment should be
11521 // known at compile time.
11522 unsigned HOST_WIDE_INT align
11523 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11524 new_stmt = gimple_build_assign (
11525 ptr, BIT_AND_EXPR, dataref_ptr,
11526 build_int_cst (TREE_TYPE (dataref_ptr),
11527 -(HOST_WIDE_INT) align));
11528 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11529 data_ref
11530 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11531 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11532 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11533 new_stmt = gimple_build_assign (vec_dest, data_ref);
11534 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11535 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11536 gimple_move_vops (new_stmt, stmt_info->stmt);
11537 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11538 msq = new_temp;
11539
11540 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11541 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11542 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11543 bump);
11544 new_stmt = gimple_build_assign (
11545 NULL_TREE, BIT_AND_EXPR, ptr,
11546 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11547 if (TREE_CODE (ptr) == SSA_NAME)
11548 ptr = copy_ssa_name (var: ptr, stmt: new_stmt);
11549 else
11550 ptr = make_ssa_name (TREE_TYPE (ptr), stmt: new_stmt);
11551 gimple_assign_set_lhs (gs: new_stmt, lhs: ptr);
11552 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11553 data_ref
11554 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11555 break;
11556 }
11557 case dr_explicit_realign_optimized:
11558 {
11559 if (costing_p)
11560 break;
11561 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11562 new_temp = copy_ssa_name (var: dataref_ptr);
11563 else
11564 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11565 // We should only be doing this if we know the target
11566 // alignment at compile time.
11567 unsigned HOST_WIDE_INT align
11568 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11569 new_stmt = gimple_build_assign (
11570 new_temp, BIT_AND_EXPR, dataref_ptr,
11571 build_int_cst (TREE_TYPE (dataref_ptr),
11572 -(HOST_WIDE_INT) align));
11573 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11574 data_ref = build2 (MEM_REF, vectype, new_temp,
11575 build_int_cst (ref_type, 0));
11576 break;
11577 }
11578 default:
11579 gcc_unreachable ();
11580 }
11581
11582 /* One common place to cost the above vect load for different
11583 alignment support schemes. */
11584 if (costing_p)
11585 {
11586 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11587 only need to take care of the first stmt, whose
11588 stmt_info is first_stmt_info, vec_num iterating on it
11589 will cover the cost for the remaining, it's consistent
11590 with transforming. For the prologue cost for realign,
11591 we only need to count it once for the whole group. */
11592 bool first_stmt_info_p = first_stmt_info == stmt_info;
11593 bool add_realign_cost = first_stmt_info_p && i == 0;
11594 if (memory_access_type == VMAT_CONTIGUOUS
11595 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11596 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11597 && (!grouped_load || first_stmt_info_p)))
11598 {
11599 /* Leave realign cases alone to keep them simple. */
11600 if (alignment_support_scheme == dr_explicit_realign_optimized
11601 || alignment_support_scheme == dr_explicit_realign)
11602 vect_get_load_cost (vinfo, stmt_info, ncopies: 1,
11603 alignment_support_scheme, misalignment,
11604 add_realign_cost, inside_cost: &inside_cost,
11605 prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11606 record_prologue_costs: true);
11607 else
11608 n_adjacent_loads++;
11609 }
11610 }
11611 else
11612 {
11613 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11614 /* DATA_REF is null if we've already built the statement. */
11615 if (data_ref)
11616 {
11617 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11618 new_stmt = gimple_build_assign (vec_dest, data_ref);
11619 }
11620 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11621 gimple_set_lhs (new_stmt, new_temp);
11622 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11623 }
11624
11625 /* 3. Handle explicit realignment if necessary/supported.
11626 Create in loop:
11627 vec_dest = realign_load (msq, lsq, realignment_token) */
11628 if (!costing_p
11629 && (alignment_support_scheme == dr_explicit_realign_optimized
11630 || alignment_support_scheme == dr_explicit_realign))
11631 {
11632 lsq = gimple_assign_lhs (gs: new_stmt);
11633 if (!realignment_token)
11634 realignment_token = dataref_ptr;
11635 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11636 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11637 lsq, realignment_token);
11638 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11639 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11640 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11641
11642 if (alignment_support_scheme == dr_explicit_realign_optimized)
11643 {
11644 gcc_assert (phi);
11645 if (i == vec_num - 1 && j == ncopies - 1)
11646 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11647 UNKNOWN_LOCATION);
11648 msq = lsq;
11649 }
11650 }
11651
11652 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11653 {
11654 if (costing_p)
11655 inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_perm,
11656 stmt_info, misalign: 0, where: vect_body);
11657 else
11658 {
11659 tree perm_mask = perm_mask_for_reverse (vectype);
11660 new_temp = permute_vec_elements (vinfo, x: new_temp, y: new_temp,
11661 mask_vec: perm_mask, stmt_info, gsi);
11662 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11663 }
11664 }
11665
11666 /* Collect vector loads and later create their permutation in
11667 vect_transform_grouped_load (). */
11668 if (!costing_p && (grouped_load || slp_perm))
11669 dr_chain.quick_push (obj: new_temp);
11670
11671 /* Store vector loads in the corresponding SLP_NODE. */
11672 if (!costing_p && slp && !slp_perm)
11673 slp_node->push_vec_def (def: new_stmt);
11674
11675 /* With SLP permutation we load the gaps as well, without
11676 we need to skip the gaps after we manage to fully load
11677 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11678 group_elt += nunits;
11679 if (!costing_p
11680 && maybe_ne (a: group_gap_adj, b: 0U)
11681 && !slp_perm
11682 && known_eq (group_elt, group_size - group_gap_adj))
11683 {
11684 poly_wide_int bump_val
11685 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11686 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11687 == -1)
11688 bump_val = -bump_val;
11689 tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11690 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11691 stmt_info, bump);
11692 group_elt = 0;
11693 }
11694 }
11695 /* Bump the vector pointer to account for a gap or for excess
11696 elements loaded for a permuted SLP load. */
11697 if (!costing_p
11698 && maybe_ne (a: group_gap_adj, b: 0U)
11699 && slp_perm)
11700 {
11701 poly_wide_int bump_val
11702 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11703 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11704 bump_val = -bump_val;
11705 tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11706 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11707 stmt_info, bump);
11708 }
11709
11710 if (slp && !slp_perm)
11711 continue;
11712
11713 if (slp_perm)
11714 {
11715 unsigned n_perms;
11716 /* For SLP we know we've seen all possible uses of dr_chain so
11717 direct vect_transform_slp_perm_load to DCE the unused parts.
11718 ??? This is a hack to prevent compile-time issues as seen
11719 in PR101120 and friends. */
11720 if (costing_p)
11721 {
11722 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11723 true, &n_perms, nullptr);
11724 inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
11725 stmt_info, misalign: 0, where: vect_body);
11726 }
11727 else
11728 {
11729 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11730 gsi, vf, false, &n_perms,
11731 nullptr, true);
11732 gcc_assert (ok);
11733 }
11734 }
11735 else
11736 {
11737 if (grouped_load)
11738 {
11739 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11740 /* We assume that the cost of a single load-lanes instruction
11741 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11742 If a grouped access is instead being provided by a
11743 load-and-permute operation, include the cost of the
11744 permutes. */
11745 if (costing_p && first_stmt_info == stmt_info)
11746 {
11747 /* Uses an even and odd extract operations or shuffle
11748 operations for each needed permute. */
11749 int group_size = DR_GROUP_SIZE (first_stmt_info);
11750 int nstmts = ceil_log2 (x: group_size) * group_size;
11751 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
11752 stmt_info, misalign: 0, where: vect_body);
11753
11754 if (dump_enabled_p ())
11755 dump_printf_loc (MSG_NOTE, vect_location,
11756 "vect_model_load_cost:"
11757 "strided group_size = %d .\n",
11758 group_size);
11759 }
11760 else if (!costing_p)
11761 {
11762 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11763 group_size, gsi);
11764 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11765 }
11766 }
11767 else if (!costing_p)
11768 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11769 }
11770 dr_chain.release ();
11771 }
11772 if (!slp && !costing_p)
11773 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11774
11775 if (costing_p)
11776 {
11777 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11778 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11779 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11780 if (n_adjacent_loads > 0)
11781 vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
11782 alignment_support_scheme, misalignment, add_realign_cost: false,
11783 inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11784 record_prologue_costs: true);
11785 if (dump_enabled_p ())
11786 dump_printf_loc (MSG_NOTE, vect_location,
11787 "vect_model_load_cost: inside_cost = %u, "
11788 "prologue_cost = %u .\n",
11789 inside_cost, prologue_cost);
11790 }
11791
11792 return true;
11793}
11794
11795/* Function vect_is_simple_cond.
11796
11797 Input:
11798 LOOP - the loop that is being vectorized.
11799 COND - Condition that is checked for simple use.
11800
11801 Output:
11802 *COMP_VECTYPE - the vector type for the comparison.
11803 *DTS - The def types for the arguments of the comparison
11804
11805 Returns whether a COND can be vectorized. Checks whether
11806 condition operands are supportable using vec_is_simple_use. */
11807
11808static bool
11809vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11810 slp_tree slp_node, tree *comp_vectype,
11811 enum vect_def_type *dts, tree vectype)
11812{
11813 tree lhs, rhs;
11814 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11815 slp_tree slp_op;
11816
11817 /* Mask case. */
11818 if (TREE_CODE (cond) == SSA_NAME
11819 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11820 {
11821 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11822 &slp_op, &dts[0], comp_vectype)
11823 || !*comp_vectype
11824 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11825 return false;
11826 return true;
11827 }
11828
11829 if (!COMPARISON_CLASS_P (cond))
11830 return false;
11831
11832 lhs = TREE_OPERAND (cond, 0);
11833 rhs = TREE_OPERAND (cond, 1);
11834
11835 if (TREE_CODE (lhs) == SSA_NAME)
11836 {
11837 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11838 &lhs, &slp_op, &dts[0], &vectype1))
11839 return false;
11840 }
11841 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11842 || TREE_CODE (lhs) == FIXED_CST)
11843 dts[0] = vect_constant_def;
11844 else
11845 return false;
11846
11847 if (TREE_CODE (rhs) == SSA_NAME)
11848 {
11849 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11850 &rhs, &slp_op, &dts[1], &vectype2))
11851 return false;
11852 }
11853 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11854 || TREE_CODE (rhs) == FIXED_CST)
11855 dts[1] = vect_constant_def;
11856 else
11857 return false;
11858
11859 if (vectype1 && vectype2
11860 && maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
11861 b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
11862 return false;
11863
11864 *comp_vectype = vectype1 ? vectype1 : vectype2;
11865 /* Invariant comparison. */
11866 if (! *comp_vectype)
11867 {
11868 tree scalar_type = TREE_TYPE (lhs);
11869 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11870 *comp_vectype = truth_type_for (vectype);
11871 else
11872 {
11873 /* If we can widen the comparison to match vectype do so. */
11874 if (INTEGRAL_TYPE_P (scalar_type)
11875 && !slp_node
11876 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11877 TYPE_SIZE (TREE_TYPE (vectype))))
11878 scalar_type = build_nonstandard_integer_type
11879 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11880 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11881 slp_node);
11882 }
11883 }
11884
11885 return true;
11886}
11887
11888/* vectorizable_condition.
11889
11890 Check if STMT_INFO is conditional modify expression that can be vectorized.
11891 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11892 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11893 at GSI.
11894
11895 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11896
11897 Return true if STMT_INFO is vectorizable in this way. */
11898
11899static bool
11900vectorizable_condition (vec_info *vinfo,
11901 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11902 gimple **vec_stmt,
11903 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11904{
11905 tree scalar_dest = NULL_TREE;
11906 tree vec_dest = NULL_TREE;
11907 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11908 tree then_clause, else_clause;
11909 tree comp_vectype = NULL_TREE;
11910 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11911 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11912 tree vec_compare;
11913 tree new_temp;
11914 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
11915 enum vect_def_type dts[4]
11916 = {vect_unknown_def_type, vect_unknown_def_type,
11917 vect_unknown_def_type, vect_unknown_def_type};
11918 int ndts = 4;
11919 int ncopies;
11920 int vec_num;
11921 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11922 int i;
11923 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
11924 vec<tree> vec_oprnds0 = vNULL;
11925 vec<tree> vec_oprnds1 = vNULL;
11926 vec<tree> vec_oprnds2 = vNULL;
11927 vec<tree> vec_oprnds3 = vNULL;
11928 tree vec_cmp_type;
11929 bool masked = false;
11930
11931 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11932 return false;
11933
11934 /* Is vectorizable conditional operation? */
11935 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
11936 if (!stmt)
11937 return false;
11938
11939 code = gimple_assign_rhs_code (gs: stmt);
11940 if (code != COND_EXPR)
11941 return false;
11942
11943 stmt_vec_info reduc_info = NULL;
11944 int reduc_index = -1;
11945 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11946 bool for_reduction
11947 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11948 if (for_reduction)
11949 {
11950 if (slp_node)
11951 return false;
11952 reduc_info = info_for_reduction (vinfo, stmt_info);
11953 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11954 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11955 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11956 || reduc_index != -1);
11957 }
11958 else
11959 {
11960 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11961 return false;
11962 }
11963
11964 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11965 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11966
11967 if (slp_node)
11968 {
11969 ncopies = 1;
11970 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11971 }
11972 else
11973 {
11974 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11975 vec_num = 1;
11976 }
11977
11978 gcc_assert (ncopies >= 1);
11979 if (for_reduction && ncopies > 1)
11980 return false; /* FORNOW */
11981
11982 cond_expr = gimple_assign_rhs1 (gs: stmt);
11983
11984 if (!vect_is_simple_cond (cond: cond_expr, vinfo, stmt_info, slp_node,
11985 comp_vectype: &comp_vectype, dts: &dts[0], vectype)
11986 || !comp_vectype)
11987 return false;
11988
11989 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11990 slp_tree then_slp_node, else_slp_node;
11991 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11992 &then_clause, &then_slp_node, &dts[2], &vectype1))
11993 return false;
11994 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11995 &else_clause, &else_slp_node, &dts[3], &vectype2))
11996 return false;
11997
11998 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11999 return false;
12000
12001 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12002 return false;
12003
12004 masked = !COMPARISON_CLASS_P (cond_expr);
12005 vec_cmp_type = truth_type_for (comp_vectype);
12006
12007 if (vec_cmp_type == NULL_TREE)
12008 return false;
12009
12010 cond_code = TREE_CODE (cond_expr);
12011 if (!masked)
12012 {
12013 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12014 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12015 }
12016
12017 /* For conditional reductions, the "then" value needs to be the candidate
12018 value calculated by this iteration while the "else" value needs to be
12019 the result carried over from previous iterations. If the COND_EXPR
12020 is the other way around, we need to swap it. */
12021 bool must_invert_cmp_result = false;
12022 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12023 {
12024 if (masked)
12025 must_invert_cmp_result = true;
12026 else
12027 {
12028 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12029 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12030 if (new_code == ERROR_MARK)
12031 must_invert_cmp_result = true;
12032 else
12033 {
12034 cond_code = new_code;
12035 /* Make sure we don't accidentally use the old condition. */
12036 cond_expr = NULL_TREE;
12037 }
12038 }
12039 std::swap (a&: then_clause, b&: else_clause);
12040 }
12041
12042 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12043 {
12044 /* Boolean values may have another representation in vectors
12045 and therefore we prefer bit operations over comparison for
12046 them (which also works for scalar masks). We store opcodes
12047 to use in bitop1 and bitop2. Statement is vectorized as
12048 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12049 depending on bitop1 and bitop2 arity. */
12050 switch (cond_code)
12051 {
12052 case GT_EXPR:
12053 bitop1 = BIT_NOT_EXPR;
12054 bitop2 = BIT_AND_EXPR;
12055 break;
12056 case GE_EXPR:
12057 bitop1 = BIT_NOT_EXPR;
12058 bitop2 = BIT_IOR_EXPR;
12059 break;
12060 case LT_EXPR:
12061 bitop1 = BIT_NOT_EXPR;
12062 bitop2 = BIT_AND_EXPR;
12063 std::swap (a&: cond_expr0, b&: cond_expr1);
12064 break;
12065 case LE_EXPR:
12066 bitop1 = BIT_NOT_EXPR;
12067 bitop2 = BIT_IOR_EXPR;
12068 std::swap (a&: cond_expr0, b&: cond_expr1);
12069 break;
12070 case NE_EXPR:
12071 bitop1 = BIT_XOR_EXPR;
12072 break;
12073 case EQ_EXPR:
12074 bitop1 = BIT_XOR_EXPR;
12075 bitop2 = BIT_NOT_EXPR;
12076 break;
12077 default:
12078 return false;
12079 }
12080 cond_code = SSA_NAME;
12081 }
12082
12083 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12084 && reduction_type == EXTRACT_LAST_REDUCTION
12085 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12086 {
12087 if (dump_enabled_p ())
12088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12089 "reduction comparison operation not supported.\n");
12090 return false;
12091 }
12092
12093 if (!vec_stmt)
12094 {
12095 if (bitop1 != NOP_EXPR)
12096 {
12097 machine_mode mode = TYPE_MODE (comp_vectype);
12098 optab optab;
12099
12100 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12101 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12102 return false;
12103
12104 if (bitop2 != NOP_EXPR)
12105 {
12106 optab = optab_for_tree_code (bitop2, comp_vectype,
12107 optab_default);
12108 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12109 return false;
12110 }
12111 }
12112
12113 vect_cost_for_stmt kind = vector_stmt;
12114 if (reduction_type == EXTRACT_LAST_REDUCTION)
12115 /* Count one reduction-like operation per vector. */
12116 kind = vec_to_scalar;
12117 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12118 && (masked
12119 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12120 cond_code)
12121 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12122 ERROR_MARK))))
12123 return false;
12124
12125 if (slp_node
12126 && (!vect_maybe_update_slp_op_vectype
12127 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12128 || (op_adjust == 1
12129 && !vect_maybe_update_slp_op_vectype
12130 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12131 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12132 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12133 {
12134 if (dump_enabled_p ())
12135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12136 "incompatible vector types for invariants\n");
12137 return false;
12138 }
12139
12140 if (loop_vinfo && for_reduction
12141 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12142 {
12143 if (reduction_type == EXTRACT_LAST_REDUCTION)
12144 {
12145 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12146 vectype, OPTIMIZE_FOR_SPEED))
12147 vect_record_loop_len (loop_vinfo,
12148 &LOOP_VINFO_LENS (loop_vinfo),
12149 ncopies * vec_num, vectype, 1);
12150 else
12151 vect_record_loop_mask (loop_vinfo,
12152 &LOOP_VINFO_MASKS (loop_vinfo),
12153 ncopies * vec_num, vectype, NULL);
12154 }
12155 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12156 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12157 {
12158 if (dump_enabled_p ())
12159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12160 "conditional reduction prevents the use"
12161 " of partial vectors.\n");
12162 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12163 }
12164 }
12165
12166 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12167 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt: dts, ndts, node: slp_node,
12168 cost_vec, kind);
12169 return true;
12170 }
12171
12172 /* Transform. */
12173
12174 /* Handle def. */
12175 scalar_dest = gimple_assign_lhs (gs: stmt);
12176 if (reduction_type != EXTRACT_LAST_REDUCTION)
12177 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12178
12179 bool swap_cond_operands = false;
12180
12181 /* See whether another part of the vectorized code applies a loop
12182 mask to the condition, or to its inverse. */
12183
12184 vec_loop_masks *masks = NULL;
12185 vec_loop_lens *lens = NULL;
12186 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12187 {
12188 if (reduction_type == EXTRACT_LAST_REDUCTION)
12189 lens = &LOOP_VINFO_LENS (loop_vinfo);
12190 }
12191 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12192 {
12193 if (reduction_type == EXTRACT_LAST_REDUCTION)
12194 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12195 else
12196 {
12197 scalar_cond_masked_key cond (cond_expr, ncopies);
12198 if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12199 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12200 else
12201 {
12202 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12203 tree_code orig_code = cond.code;
12204 cond.code = invert_tree_comparison (cond.code, honor_nans);
12205 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12206 {
12207 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12208 cond_code = cond.code;
12209 swap_cond_operands = true;
12210 }
12211 else
12212 {
12213 /* Try the inverse of the current mask. We check if the
12214 inverse mask is live and if so we generate a negate of
12215 the current mask such that we still honor NaNs. */
12216 cond.inverted_p = true;
12217 cond.code = orig_code;
12218 if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12219 {
12220 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12221 cond_code = cond.code;
12222 swap_cond_operands = true;
12223 must_invert_cmp_result = true;
12224 }
12225 }
12226 }
12227 }
12228 }
12229
12230 /* Handle cond expr. */
12231 if (masked)
12232 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12233 op0: cond_expr, vec_oprnds0: &vec_oprnds0, vectype0: comp_vectype,
12234 op1: then_clause, vec_oprnds1: &vec_oprnds2, vectype1: vectype,
12235 op2: reduction_type != EXTRACT_LAST_REDUCTION
12236 ? else_clause : NULL, vec_oprnds2: &vec_oprnds3, vectype2: vectype);
12237 else
12238 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12239 op0: cond_expr0, vec_oprnds0: &vec_oprnds0, vectype0: comp_vectype,
12240 op1: cond_expr1, vec_oprnds1: &vec_oprnds1, vectype1: comp_vectype,
12241 op2: then_clause, vec_oprnds2: &vec_oprnds2, vectype2: vectype,
12242 op3: reduction_type != EXTRACT_LAST_REDUCTION
12243 ? else_clause : NULL, vec_oprnds3: &vec_oprnds3, vectype3: vectype);
12244
12245 /* Arguments are ready. Create the new vector stmt. */
12246 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12247 {
12248 vec_then_clause = vec_oprnds2[i];
12249 if (reduction_type != EXTRACT_LAST_REDUCTION)
12250 vec_else_clause = vec_oprnds3[i];
12251
12252 if (swap_cond_operands)
12253 std::swap (a&: vec_then_clause, b&: vec_else_clause);
12254
12255 if (masked)
12256 vec_compare = vec_cond_lhs;
12257 else
12258 {
12259 vec_cond_rhs = vec_oprnds1[i];
12260 if (bitop1 == NOP_EXPR)
12261 {
12262 gimple_seq stmts = NULL;
12263 vec_compare = gimple_build (seq: &stmts, code: cond_code, type: vec_cmp_type,
12264 ops: vec_cond_lhs, ops: vec_cond_rhs);
12265 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12266 }
12267 else
12268 {
12269 new_temp = make_ssa_name (var: vec_cmp_type);
12270 gassign *new_stmt;
12271 if (bitop1 == BIT_NOT_EXPR)
12272 new_stmt = gimple_build_assign (new_temp, bitop1,
12273 vec_cond_rhs);
12274 else
12275 new_stmt
12276 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12277 vec_cond_rhs);
12278 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12279 if (bitop2 == NOP_EXPR)
12280 vec_compare = new_temp;
12281 else if (bitop2 == BIT_NOT_EXPR
12282 && reduction_type != EXTRACT_LAST_REDUCTION)
12283 {
12284 /* Instead of doing ~x ? y : z do x ? z : y. */
12285 vec_compare = new_temp;
12286 std::swap (a&: vec_then_clause, b&: vec_else_clause);
12287 }
12288 else
12289 {
12290 vec_compare = make_ssa_name (var: vec_cmp_type);
12291 if (bitop2 == BIT_NOT_EXPR)
12292 new_stmt
12293 = gimple_build_assign (vec_compare, bitop2, new_temp);
12294 else
12295 new_stmt
12296 = gimple_build_assign (vec_compare, bitop2,
12297 vec_cond_lhs, new_temp);
12298 vect_finish_stmt_generation (vinfo, stmt_info,
12299 vec_stmt: new_stmt, gsi);
12300 }
12301 }
12302 }
12303
12304 /* If we decided to apply a loop mask to the result of the vector
12305 comparison, AND the comparison with the mask now. Later passes
12306 should then be able to reuse the AND results between mulitple
12307 vector statements.
12308
12309 For example:
12310 for (int i = 0; i < 100; ++i)
12311 x[i] = y[i] ? z[i] : 10;
12312
12313 results in following optimized GIMPLE:
12314
12315 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12316 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12317 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12318 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12319 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12320 vect_iftmp.11_47, { 10, ... }>;
12321
12322 instead of using a masked and unmasked forms of
12323 vec != { 0, ... } (masked in the MASK_LOAD,
12324 unmasked in the VEC_COND_EXPR). */
12325
12326 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12327 in cases where that's necessary. */
12328
12329 tree len = NULL_TREE, bias = NULL_TREE;
12330 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12331 {
12332 if (!is_gimple_val (vec_compare))
12333 {
12334 tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12335 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12336 vec_compare);
12337 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12338 vec_compare = vec_compare_name;
12339 }
12340
12341 if (must_invert_cmp_result)
12342 {
12343 tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12344 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12345 BIT_NOT_EXPR,
12346 vec_compare);
12347 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12348 vec_compare = vec_compare_name;
12349 }
12350
12351 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12352 vectype, OPTIMIZE_FOR_SPEED))
12353 {
12354 if (lens)
12355 {
12356 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12357 vec_num * ncopies, vectype, i, 1);
12358 signed char biasval
12359 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12360 bias = build_int_cst (intQI_type_node, biasval);
12361 }
12362 else
12363 {
12364 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12365 bias = build_int_cst (intQI_type_node, 0);
12366 }
12367 }
12368 if (masks)
12369 {
12370 tree loop_mask
12371 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12372 vectype, i);
12373 tree tmp2 = make_ssa_name (var: vec_cmp_type);
12374 gassign *g
12375 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12376 loop_mask);
12377 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
12378 vec_compare = tmp2;
12379 }
12380 }
12381
12382 gimple *new_stmt;
12383 if (reduction_type == EXTRACT_LAST_REDUCTION)
12384 {
12385 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12386 tree lhs = gimple_get_lhs (old_stmt);
12387 if (len)
12388 new_stmt = gimple_build_call_internal
12389 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12390 vec_then_clause, len, bias);
12391 else
12392 new_stmt = gimple_build_call_internal
12393 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12394 vec_then_clause);
12395 gimple_call_set_lhs (gs: new_stmt, lhs);
12396 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12397 if (old_stmt == gsi_stmt (i: *gsi))
12398 vect_finish_replace_stmt (vinfo, stmt_info, vec_stmt: new_stmt);
12399 else
12400 {
12401 /* In this case we're moving the definition to later in the
12402 block. That doesn't matter because the only uses of the
12403 lhs are in phi statements. */
12404 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12405 gsi_remove (&old_gsi, true);
12406 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12407 }
12408 }
12409 else
12410 {
12411 new_temp = make_ssa_name (var: vec_dest);
12412 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12413 vec_then_clause, vec_else_clause);
12414 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12415 }
12416 if (slp_node)
12417 slp_node->push_vec_def (def: new_stmt);
12418 else
12419 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12420 }
12421
12422 if (!slp_node)
12423 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12424
12425 vec_oprnds0.release ();
12426 vec_oprnds1.release ();
12427 vec_oprnds2.release ();
12428 vec_oprnds3.release ();
12429
12430 return true;
12431}
12432
12433/* Helper of vectorizable_comparison.
12434
12435 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12436 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12437 comparison, put it in VEC_STMT, and insert it at GSI.
12438
12439 Return true if STMT_INFO is vectorizable in this way. */
12440
12441static bool
12442vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12443 stmt_vec_info stmt_info, tree_code code,
12444 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12445 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12446{
12447 tree lhs, rhs1, rhs2;
12448 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12449 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12450 tree new_temp;
12451 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12452 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12453 int ndts = 2;
12454 poly_uint64 nunits;
12455 int ncopies;
12456 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12457 int i;
12458 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12459 vec<tree> vec_oprnds0 = vNULL;
12460 vec<tree> vec_oprnds1 = vNULL;
12461 tree mask_type;
12462 tree mask;
12463
12464 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12465 return false;
12466
12467 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12468 return false;
12469
12470 mask_type = vectype;
12471 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
12472
12473 if (slp_node)
12474 ncopies = 1;
12475 else
12476 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12477
12478 gcc_assert (ncopies >= 1);
12479
12480 if (TREE_CODE_CLASS (code) != tcc_comparison)
12481 return false;
12482
12483 slp_tree slp_rhs1, slp_rhs2;
12484 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12485 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12486 return false;
12487
12488 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12489 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12490 return false;
12491
12492 if (vectype1 && vectype2
12493 && maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
12494 b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
12495 return false;
12496
12497 vectype = vectype1 ? vectype1 : vectype2;
12498
12499 /* Invariant comparison. */
12500 if (!vectype)
12501 {
12502 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12503 vectype = mask_type;
12504 else
12505 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12506 slp_node);
12507 if (!vectype || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
12508 return false;
12509 }
12510 else if (maybe_ne (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
12511 return false;
12512
12513 /* Can't compare mask and non-mask types. */
12514 if (vectype1 && vectype2
12515 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12516 return false;
12517
12518 /* Boolean values may have another representation in vectors
12519 and therefore we prefer bit operations over comparison for
12520 them (which also works for scalar masks). We store opcodes
12521 to use in bitop1 and bitop2. Statement is vectorized as
12522 BITOP2 (rhs1 BITOP1 rhs2) or
12523 rhs1 BITOP2 (BITOP1 rhs2)
12524 depending on bitop1 and bitop2 arity. */
12525 bool swap_p = false;
12526 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12527 {
12528 if (code == GT_EXPR)
12529 {
12530 bitop1 = BIT_NOT_EXPR;
12531 bitop2 = BIT_AND_EXPR;
12532 }
12533 else if (code == GE_EXPR)
12534 {
12535 bitop1 = BIT_NOT_EXPR;
12536 bitop2 = BIT_IOR_EXPR;
12537 }
12538 else if (code == LT_EXPR)
12539 {
12540 bitop1 = BIT_NOT_EXPR;
12541 bitop2 = BIT_AND_EXPR;
12542 swap_p = true;
12543 }
12544 else if (code == LE_EXPR)
12545 {
12546 bitop1 = BIT_NOT_EXPR;
12547 bitop2 = BIT_IOR_EXPR;
12548 swap_p = true;
12549 }
12550 else
12551 {
12552 bitop1 = BIT_XOR_EXPR;
12553 if (code == EQ_EXPR)
12554 bitop2 = BIT_NOT_EXPR;
12555 }
12556 }
12557
12558 if (!vec_stmt)
12559 {
12560 if (bitop1 == NOP_EXPR)
12561 {
12562 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12563 return false;
12564 }
12565 else
12566 {
12567 machine_mode mode = TYPE_MODE (vectype);
12568 optab optab;
12569
12570 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12571 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12572 return false;
12573
12574 if (bitop2 != NOP_EXPR)
12575 {
12576 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12577 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12578 return false;
12579 }
12580 }
12581
12582 /* Put types on constant and invariant SLP children. */
12583 if (slp_node
12584 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12585 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12586 {
12587 if (dump_enabled_p ())
12588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12589 "incompatible vector types for invariants\n");
12590 return false;
12591 }
12592
12593 vect_model_simple_cost (vinfo, stmt_info,
12594 ncopies: ncopies * (1 + (bitop2 != NOP_EXPR)),
12595 dt: dts, ndts, node: slp_node, cost_vec);
12596 return true;
12597 }
12598
12599 /* Transform. */
12600
12601 /* Handle def. */
12602 lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12603 mask = vect_create_destination_var (lhs, mask_type);
12604
12605 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12606 op0: rhs1, vec_oprnds0: &vec_oprnds0, vectype0: vectype,
12607 op1: rhs2, vec_oprnds1: &vec_oprnds1, vectype1: vectype);
12608 if (swap_p)
12609 std::swap (a&: vec_oprnds0, b&: vec_oprnds1);
12610
12611 /* Arguments are ready. Create the new vector stmt. */
12612 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12613 {
12614 gimple *new_stmt;
12615 vec_rhs2 = vec_oprnds1[i];
12616
12617 new_temp = make_ssa_name (var: mask);
12618 if (bitop1 == NOP_EXPR)
12619 {
12620 new_stmt = gimple_build_assign (new_temp, code,
12621 vec_rhs1, vec_rhs2);
12622 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12623 }
12624 else
12625 {
12626 if (bitop1 == BIT_NOT_EXPR)
12627 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12628 else
12629 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12630 vec_rhs2);
12631 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12632 if (bitop2 != NOP_EXPR)
12633 {
12634 tree res = make_ssa_name (var: mask);
12635 if (bitop2 == BIT_NOT_EXPR)
12636 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12637 else
12638 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12639 new_temp);
12640 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12641 }
12642 }
12643 if (slp_node)
12644 slp_node->push_vec_def (def: new_stmt);
12645 else
12646 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12647 }
12648
12649 if (!slp_node)
12650 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12651
12652 vec_oprnds0.release ();
12653 vec_oprnds1.release ();
12654
12655 return true;
12656}
12657
12658/* vectorizable_comparison.
12659
12660 Check if STMT_INFO is comparison expression that can be vectorized.
12661 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12662 comparison, put it in VEC_STMT, and insert it at GSI.
12663
12664 Return true if STMT_INFO is vectorizable in this way. */
12665
12666static bool
12667vectorizable_comparison (vec_info *vinfo,
12668 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12669 gimple **vec_stmt,
12670 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12671{
12672 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12673
12674 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12675 return false;
12676
12677 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12678 return false;
12679
12680 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
12681 if (!stmt)
12682 return false;
12683
12684 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
12685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12686 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12687 vec_stmt, slp_node, cost_vec))
12688 return false;
12689
12690 if (!vec_stmt)
12691 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12692
12693 return true;
12694}
12695
12696/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12697 can handle all live statements in the node. Otherwise return true
12698 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12699 VEC_STMT_P is as for vectorizable_live_operation. */
12700
12701static bool
12702can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12703 slp_tree slp_node, slp_instance slp_node_instance,
12704 bool vec_stmt_p,
12705 stmt_vector_for_cost *cost_vec)
12706{
12707 if (slp_node)
12708 {
12709 stmt_vec_info slp_stmt_info;
12710 unsigned int i;
12711 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12712 {
12713 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12714 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12715 slp_node_instance, i,
12716 vec_stmt_p, cost_vec))
12717 return false;
12718 }
12719 }
12720 else if (STMT_VINFO_LIVE_P (stmt_info)
12721 && !vectorizable_live_operation (vinfo, stmt_info,
12722 slp_node, slp_node_instance, -1,
12723 vec_stmt_p, cost_vec))
12724 return false;
12725
12726 return true;
12727}
12728
12729/* Make sure the statement is vectorizable. */
12730
12731opt_result
12732vect_analyze_stmt (vec_info *vinfo,
12733 stmt_vec_info stmt_info, bool *need_to_vectorize,
12734 slp_tree node, slp_instance node_instance,
12735 stmt_vector_for_cost *cost_vec)
12736{
12737 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12738 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12739 bool ok;
12740 gimple_seq pattern_def_seq;
12741
12742 if (dump_enabled_p ())
12743 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12744 stmt_info->stmt);
12745
12746 if (gimple_has_volatile_ops (stmt: stmt_info->stmt))
12747 return opt_result::failure_at (loc: stmt_info->stmt,
12748 fmt: "not vectorized:"
12749 " stmt has volatile operands: %G\n",
12750 stmt_info->stmt);
12751
12752 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12753 && node == NULL
12754 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12755 {
12756 gimple_stmt_iterator si;
12757
12758 for (si = gsi_start (seq&: pattern_def_seq); !gsi_end_p (i: si); gsi_next (i: &si))
12759 {
12760 stmt_vec_info pattern_def_stmt_info
12761 = vinfo->lookup_stmt (gsi_stmt (i: si));
12762 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12763 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12764 {
12765 /* Analyze def stmt of STMT if it's a pattern stmt. */
12766 if (dump_enabled_p ())
12767 dump_printf_loc (MSG_NOTE, vect_location,
12768 "==> examining pattern def statement: %G",
12769 pattern_def_stmt_info->stmt);
12770
12771 opt_result res
12772 = vect_analyze_stmt (vinfo, stmt_info: pattern_def_stmt_info,
12773 need_to_vectorize, node, node_instance,
12774 cost_vec);
12775 if (!res)
12776 return res;
12777 }
12778 }
12779 }
12780
12781 /* Skip stmts that do not need to be vectorized. In loops this is expected
12782 to include:
12783 - the COND_EXPR which is the loop exit condition
12784 - any LABEL_EXPRs in the loop
12785 - computations that are used only for array indexing or loop control.
12786 In basic blocks we only analyze statements that are a part of some SLP
12787 instance, therefore, all the statements are relevant.
12788
12789 Pattern statement needs to be analyzed instead of the original statement
12790 if the original statement is not relevant. Otherwise, we analyze both
12791 statements. In basic blocks we are called from some SLP instance
12792 traversal, don't analyze pattern stmts instead, the pattern stmts
12793 already will be part of SLP instance. */
12794
12795 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12796 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12797 && !STMT_VINFO_LIVE_P (stmt_info))
12798 {
12799 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12800 && pattern_stmt_info
12801 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12802 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12803 {
12804 /* Analyze PATTERN_STMT instead of the original stmt. */
12805 stmt_info = pattern_stmt_info;
12806 if (dump_enabled_p ())
12807 dump_printf_loc (MSG_NOTE, vect_location,
12808 "==> examining pattern statement: %G",
12809 stmt_info->stmt);
12810 }
12811 else
12812 {
12813 if (dump_enabled_p ())
12814 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12815
12816 return opt_result::success ();
12817 }
12818 }
12819 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12820 && node == NULL
12821 && pattern_stmt_info
12822 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12823 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12824 {
12825 /* Analyze PATTERN_STMT too. */
12826 if (dump_enabled_p ())
12827 dump_printf_loc (MSG_NOTE, vect_location,
12828 "==> examining pattern statement: %G",
12829 pattern_stmt_info->stmt);
12830
12831 opt_result res
12832 = vect_analyze_stmt (vinfo, stmt_info: pattern_stmt_info, need_to_vectorize, node,
12833 node_instance, cost_vec);
12834 if (!res)
12835 return res;
12836 }
12837
12838 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12839 {
12840 case vect_internal_def:
12841 break;
12842
12843 case vect_reduction_def:
12844 case vect_nested_cycle:
12845 gcc_assert (!bb_vinfo
12846 && (relevance == vect_used_in_outer
12847 || relevance == vect_used_in_outer_by_reduction
12848 || relevance == vect_used_by_reduction
12849 || relevance == vect_unused_in_scope
12850 || relevance == vect_used_only_live));
12851 break;
12852
12853 case vect_induction_def:
12854 case vect_first_order_recurrence:
12855 gcc_assert (!bb_vinfo);
12856 break;
12857
12858 case vect_constant_def:
12859 case vect_external_def:
12860 case vect_unknown_def_type:
12861 default:
12862 gcc_unreachable ();
12863 }
12864
12865 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12866 if (node)
12867 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12868
12869 if (STMT_VINFO_RELEVANT_P (stmt_info))
12870 {
12871 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
12872 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12873 || (call && gimple_call_lhs (call) == NULL_TREE));
12874 *need_to_vectorize = true;
12875 }
12876
12877 if (PURE_SLP_STMT (stmt_info) && !node)
12878 {
12879 if (dump_enabled_p ())
12880 dump_printf_loc (MSG_NOTE, vect_location,
12881 "handled only by SLP analysis\n");
12882 return opt_result::success ();
12883 }
12884
12885 ok = true;
12886 if (!bb_vinfo
12887 && (STMT_VINFO_RELEVANT_P (stmt_info)
12888 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12889 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12890 -mveclibabi= takes preference over library functions with
12891 the simd attribute. */
12892 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12893 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, slp_node: node,
12894 cost_vec)
12895 || vectorizable_conversion (vinfo, stmt_info,
12896 NULL, NULL, slp_node: node, cost_vec)
12897 || vectorizable_operation (vinfo, stmt_info,
12898 NULL, NULL, slp_node: node, cost_vec)
12899 || vectorizable_assignment (vinfo, stmt_info,
12900 NULL, NULL, slp_node: node, cost_vec)
12901 || vectorizable_load (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12902 || vectorizable_store (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12903 || vectorizable_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
12904 node, node_instance, cost_vec)
12905 || vectorizable_induction (as_a <loop_vec_info> (p: vinfo), stmt_info,
12906 NULL, node, cost_vec)
12907 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12908 || vectorizable_condition (vinfo, stmt_info,
12909 NULL, NULL, slp_node: node, cost_vec)
12910 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
12911 cost_vec)
12912 || vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
12913 stmt_info, NULL, node)
12914 || vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
12915 stmt_info, NULL, node, cost_vec));
12916 else
12917 {
12918 if (bb_vinfo)
12919 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
12920 || vectorizable_simd_clone_call (vinfo, stmt_info,
12921 NULL, NULL, slp_node: node, cost_vec)
12922 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, slp_node: node,
12923 cost_vec)
12924 || vectorizable_shift (vinfo, stmt_info,
12925 NULL, NULL, slp_node: node, cost_vec)
12926 || vectorizable_operation (vinfo, stmt_info,
12927 NULL, NULL, slp_node: node, cost_vec)
12928 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, slp_node: node,
12929 cost_vec)
12930 || vectorizable_load (vinfo, stmt_info,
12931 NULL, NULL, slp_node: node, cost_vec)
12932 || vectorizable_store (vinfo, stmt_info,
12933 NULL, NULL, slp_node: node, cost_vec)
12934 || vectorizable_condition (vinfo, stmt_info,
12935 NULL, NULL, slp_node: node, cost_vec)
12936 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
12937 cost_vec)
12938 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12939 }
12940
12941 if (node)
12942 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12943
12944 if (!ok)
12945 return opt_result::failure_at (loc: stmt_info->stmt,
12946 fmt: "not vectorized:"
12947 " relevant stmt not supported: %G",
12948 stmt_info->stmt);
12949
12950 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12951 need extra handling, except for vectorizable reductions. */
12952 if (!bb_vinfo
12953 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12954 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12955 && !can_vectorize_live_stmts (vinfo: as_a <loop_vec_info> (p: vinfo),
12956 stmt_info, slp_node: node, slp_node_instance: node_instance,
12957 vec_stmt_p: false, cost_vec))
12958 return opt_result::failure_at (loc: stmt_info->stmt,
12959 fmt: "not vectorized:"
12960 " live stmt not supported: %G",
12961 stmt_info->stmt);
12962
12963 return opt_result::success ();
12964}
12965
12966
12967/* Function vect_transform_stmt.
12968
12969 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12970
12971bool
12972vect_transform_stmt (vec_info *vinfo,
12973 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12974 slp_tree slp_node, slp_instance slp_node_instance)
12975{
12976 bool is_store = false;
12977 gimple *vec_stmt = NULL;
12978 bool done;
12979
12980 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12981
12982 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12983 if (slp_node)
12984 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12985
12986 switch (STMT_VINFO_TYPE (stmt_info))
12987 {
12988 case type_demotion_vec_info_type:
12989 case type_promotion_vec_info_type:
12990 case type_conversion_vec_info_type:
12991 done = vectorizable_conversion (vinfo, stmt_info,
12992 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
12993 gcc_assert (done);
12994 break;
12995
12996 case induc_vec_info_type:
12997 done = vectorizable_induction (as_a <loop_vec_info> (p: vinfo),
12998 stmt_info, &vec_stmt, slp_node,
12999 NULL);
13000 gcc_assert (done);
13001 break;
13002
13003 case shift_vec_info_type:
13004 done = vectorizable_shift (vinfo, stmt_info,
13005 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13006 gcc_assert (done);
13007 break;
13008
13009 case op_vec_info_type:
13010 done = vectorizable_operation (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13011 NULL);
13012 gcc_assert (done);
13013 break;
13014
13015 case assignment_vec_info_type:
13016 done = vectorizable_assignment (vinfo, stmt_info,
13017 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13018 gcc_assert (done);
13019 break;
13020
13021 case load_vec_info_type:
13022 done = vectorizable_load (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13023 NULL);
13024 gcc_assert (done);
13025 break;
13026
13027 case store_vec_info_type:
13028 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13029 && !slp_node
13030 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13031 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13032 /* In case of interleaving, the whole chain is vectorized when the
13033 last store in the chain is reached. Store stmts before the last
13034 one are skipped, and there vec_stmt_info shouldn't be freed
13035 meanwhile. */
13036 ;
13037 else
13038 {
13039 done = vectorizable_store (vinfo, stmt_info,
13040 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13041 gcc_assert (done);
13042 is_store = true;
13043 }
13044 break;
13045
13046 case condition_vec_info_type:
13047 done = vectorizable_condition (vinfo, stmt_info,
13048 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13049 gcc_assert (done);
13050 break;
13051
13052 case comparison_vec_info_type:
13053 done = vectorizable_comparison (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13054 slp_node, NULL);
13055 gcc_assert (done);
13056 break;
13057
13058 case call_vec_info_type:
13059 done = vectorizable_call (vinfo, stmt_info,
13060 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13061 break;
13062
13063 case call_simd_clone_vec_info_type:
13064 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13065 slp_node, NULL);
13066 break;
13067
13068 case reduc_vec_info_type:
13069 done = vect_transform_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13070 gsi, &vec_stmt, slp_node);
13071 gcc_assert (done);
13072 break;
13073
13074 case cycle_phi_info_type:
13075 done = vect_transform_cycle_phi (as_a <loop_vec_info> (p: vinfo), stmt_info,
13076 &vec_stmt, slp_node, slp_node_instance);
13077 gcc_assert (done);
13078 break;
13079
13080 case lc_phi_info_type:
13081 done = vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
13082 stmt_info, &vec_stmt, slp_node);
13083 gcc_assert (done);
13084 break;
13085
13086 case recurr_info_type:
13087 done = vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
13088 stmt_info, &vec_stmt, slp_node, NULL);
13089 gcc_assert (done);
13090 break;
13091
13092 case phi_info_type:
13093 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13094 gcc_assert (done);
13095 break;
13096
13097 default:
13098 if (!STMT_VINFO_LIVE_P (stmt_info))
13099 {
13100 if (dump_enabled_p ())
13101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13102 "stmt not supported.\n");
13103 gcc_unreachable ();
13104 }
13105 done = true;
13106 }
13107
13108 if (!slp_node && vec_stmt)
13109 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13110
13111 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13112 {
13113 /* Handle stmts whose DEF is used outside the loop-nest that is
13114 being vectorized. */
13115 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13116 slp_node_instance, vec_stmt_p: true, NULL);
13117 gcc_assert (done);
13118 }
13119
13120 if (slp_node)
13121 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13122
13123 return is_store;
13124}
13125
13126
13127/* Remove a group of stores (for SLP or interleaving), free their
13128 stmt_vec_info. */
13129
13130void
13131vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13132{
13133 stmt_vec_info next_stmt_info = first_stmt_info;
13134
13135 while (next_stmt_info)
13136 {
13137 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13138 next_stmt_info = vect_orig_stmt (stmt_info: next_stmt_info);
13139 /* Free the attached stmt_vec_info and remove the stmt. */
13140 vinfo->remove_stmt (next_stmt_info);
13141 next_stmt_info = tmp;
13142 }
13143}
13144
13145/* If NUNITS is nonzero, return a vector type that contains NUNITS
13146 elements of type SCALAR_TYPE, or null if the target doesn't support
13147 such a type.
13148
13149 If NUNITS is zero, return a vector type that contains elements of
13150 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13151
13152 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13153 for this vectorization region and want to "autodetect" the best choice.
13154 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13155 and we want the new type to be interoperable with it. PREVAILING_MODE
13156 in this case can be a scalar integer mode or a vector mode; when it
13157 is a vector mode, the function acts like a tree-level version of
13158 related_vector_mode. */
13159
13160tree
13161get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13162 tree scalar_type, poly_uint64 nunits)
13163{
13164 tree orig_scalar_type = scalar_type;
13165 scalar_mode inner_mode;
13166 machine_mode simd_mode;
13167 tree vectype;
13168
13169 if ((!INTEGRAL_TYPE_P (scalar_type)
13170 && !POINTER_TYPE_P (scalar_type)
13171 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13172 || (!is_int_mode (TYPE_MODE (scalar_type), int_mode: &inner_mode)
13173 && !is_float_mode (TYPE_MODE (scalar_type), float_mode: &inner_mode)))
13174 return NULL_TREE;
13175
13176 unsigned int nbytes = GET_MODE_SIZE (mode: inner_mode);
13177
13178 /* Interoperability between modes requires one to be a constant multiple
13179 of the other, so that the number of vectors required for each operation
13180 is a compile-time constant. */
13181 if (prevailing_mode != VOIDmode
13182 && !constant_multiple_p (a: nunits * nbytes,
13183 b: GET_MODE_SIZE (mode: prevailing_mode))
13184 && !constant_multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode),
13185 b: nunits * nbytes))
13186 return NULL_TREE;
13187
13188 /* For vector types of elements whose mode precision doesn't
13189 match their types precision we use a element type of mode
13190 precision. The vectorization routines will have to make sure
13191 they support the proper result truncation/extension.
13192 We also make sure to build vector types with INTEGER_TYPE
13193 component type only. */
13194 if (INTEGRAL_TYPE_P (scalar_type)
13195 && (GET_MODE_BITSIZE (mode: inner_mode) != TYPE_PRECISION (scalar_type)
13196 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13197 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: inner_mode),
13198 TYPE_UNSIGNED (scalar_type));
13199
13200 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13201 When the component mode passes the above test simply use a type
13202 corresponding to that mode. The theory is that any use that
13203 would cause problems with this will disable vectorization anyway. */
13204 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13205 && !INTEGRAL_TYPE_P (scalar_type))
13206 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13207
13208 /* We can't build a vector type of elements with alignment bigger than
13209 their size. */
13210 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13211 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13212 TYPE_UNSIGNED (scalar_type));
13213
13214 /* If we felt back to using the mode fail if there was
13215 no scalar type for it. */
13216 if (scalar_type == NULL_TREE)
13217 return NULL_TREE;
13218
13219 /* If no prevailing mode was supplied, use the mode the target prefers.
13220 Otherwise lookup a vector mode based on the prevailing mode. */
13221 if (prevailing_mode == VOIDmode)
13222 {
13223 gcc_assert (known_eq (nunits, 0U));
13224 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13225 if (SCALAR_INT_MODE_P (simd_mode))
13226 {
13227 /* Traditional behavior is not to take the integer mode
13228 literally, but simply to use it as a way of determining
13229 the vector size. It is up to mode_for_vector to decide
13230 what the TYPE_MODE should be.
13231
13232 Note that nunits == 1 is allowed in order to support single
13233 element vector types. */
13234 if (!multiple_p (a: GET_MODE_SIZE (mode: simd_mode), b: nbytes, multiple: &nunits)
13235 || !mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13236 return NULL_TREE;
13237 }
13238 }
13239 else if (SCALAR_INT_MODE_P (prevailing_mode)
13240 || !related_vector_mode (prevailing_mode,
13241 inner_mode, nunits).exists (mode: &simd_mode))
13242 {
13243 /* Fall back to using mode_for_vector, mostly in the hope of being
13244 able to use an integer mode. */
13245 if (known_eq (nunits, 0U)
13246 && !multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode), b: nbytes, multiple: &nunits))
13247 return NULL_TREE;
13248
13249 if (!mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13250 return NULL_TREE;
13251 }
13252
13253 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13254
13255 /* In cases where the mode was chosen by mode_for_vector, check that
13256 the target actually supports the chosen mode, or that it at least
13257 allows the vector mode to be replaced by a like-sized integer. */
13258 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13259 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13260 return NULL_TREE;
13261
13262 /* Re-attach the address-space qualifier if we canonicalized the scalar
13263 type. */
13264 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13265 return build_qualified_type
13266 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13267
13268 return vectype;
13269}
13270
13271/* Function get_vectype_for_scalar_type.
13272
13273 Returns the vector type corresponding to SCALAR_TYPE as supported
13274 by the target. If GROUP_SIZE is nonzero and we're performing BB
13275 vectorization, make sure that the number of elements in the vector
13276 is no bigger than GROUP_SIZE. */
13277
13278tree
13279get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13280 unsigned int group_size)
13281{
13282 /* For BB vectorization, we should always have a group size once we've
13283 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13284 are tentative requests during things like early data reference
13285 analysis and pattern recognition. */
13286 if (is_a <bb_vec_info> (p: vinfo))
13287 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13288 else
13289 group_size = 0;
13290
13291 tree vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13292 scalar_type);
13293 if (vectype && vinfo->vector_mode == VOIDmode)
13294 vinfo->vector_mode = TYPE_MODE (vectype);
13295
13296 /* Register the natural choice of vector type, before the group size
13297 has been applied. */
13298 if (vectype)
13299 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13300
13301 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13302 try again with an explicit number of elements. */
13303 if (vectype
13304 && group_size
13305 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13306 {
13307 /* Start with the biggest number of units that fits within
13308 GROUP_SIZE and halve it until we find a valid vector type.
13309 Usually either the first attempt will succeed or all will
13310 fail (in the latter case because GROUP_SIZE is too small
13311 for the target), but it's possible that a target could have
13312 a hole between supported vector types.
13313
13314 If GROUP_SIZE is not a power of 2, this has the effect of
13315 trying the largest power of 2 that fits within the group,
13316 even though the group is not a multiple of that vector size.
13317 The BB vectorizer will then try to carve up the group into
13318 smaller pieces. */
13319 unsigned int nunits = 1 << floor_log2 (x: group_size);
13320 do
13321 {
13322 vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13323 scalar_type, nunits);
13324 nunits /= 2;
13325 }
13326 while (nunits > 1 && !vectype);
13327 }
13328
13329 return vectype;
13330}
13331
13332/* Return the vector type corresponding to SCALAR_TYPE as supported
13333 by the target. NODE, if nonnull, is the SLP tree node that will
13334 use the returned vector type. */
13335
13336tree
13337get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13338{
13339 unsigned int group_size = 0;
13340 if (node)
13341 group_size = SLP_TREE_LANES (node);
13342 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13343}
13344
13345/* Function get_mask_type_for_scalar_type.
13346
13347 Returns the mask type corresponding to a result of comparison
13348 of vectors of specified SCALAR_TYPE as supported by target.
13349 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13350 make sure that the number of elements in the vector is no bigger
13351 than GROUP_SIZE. */
13352
13353tree
13354get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13355 unsigned int group_size)
13356{
13357 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13358
13359 if (!vectype)
13360 return NULL;
13361
13362 return truth_type_for (vectype);
13363}
13364
13365/* Function get_mask_type_for_scalar_type.
13366
13367 Returns the mask type corresponding to a result of comparison
13368 of vectors of specified SCALAR_TYPE as supported by target.
13369 NODE, if nonnull, is the SLP tree node that will use the returned
13370 vector type. */
13371
13372tree
13373get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13374 slp_tree node)
13375{
13376 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13377
13378 if (!vectype)
13379 return NULL;
13380
13381 return truth_type_for (vectype);
13382}
13383
13384/* Function get_same_sized_vectype
13385
13386 Returns a vector type corresponding to SCALAR_TYPE of size
13387 VECTOR_TYPE if supported by the target. */
13388
13389tree
13390get_same_sized_vectype (tree scalar_type, tree vector_type)
13391{
13392 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13393 return truth_type_for (vector_type);
13394
13395 poly_uint64 nunits;
13396 if (!multiple_p (a: GET_MODE_SIZE (TYPE_MODE (vector_type)),
13397 b: GET_MODE_SIZE (TYPE_MODE (scalar_type)), multiple: &nunits))
13398 return NULL_TREE;
13399
13400 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13401 scalar_type, nunits);
13402}
13403
13404/* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13405 would not change the chosen vector modes. */
13406
13407bool
13408vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13409{
13410 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13411 i != vinfo->used_vector_modes.end (); ++i)
13412 if (!VECTOR_MODE_P (*i)
13413 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13414 return false;
13415 return true;
13416}
13417
13418/* Function vect_is_simple_use.
13419
13420 Input:
13421 VINFO - the vect info of the loop or basic block that is being vectorized.
13422 OPERAND - operand in the loop or bb.
13423 Output:
13424 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13425 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13426 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13427 the definition could be anywhere in the function
13428 DT - the type of definition
13429
13430 Returns whether a stmt with OPERAND can be vectorized.
13431 For loops, supportable operands are constants, loop invariants, and operands
13432 that are defined by the current iteration of the loop. Unsupportable
13433 operands are those that are defined by a previous iteration of the loop (as
13434 is the case in reduction/induction computations).
13435 For basic blocks, supportable operands are constants and bb invariants.
13436 For now, operands defined outside the basic block are not supported. */
13437
13438bool
13439vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13440 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13441{
13442 if (def_stmt_info_out)
13443 *def_stmt_info_out = NULL;
13444 if (def_stmt_out)
13445 *def_stmt_out = NULL;
13446 *dt = vect_unknown_def_type;
13447
13448 if (dump_enabled_p ())
13449 {
13450 dump_printf_loc (MSG_NOTE, vect_location,
13451 "vect_is_simple_use: operand ");
13452 if (TREE_CODE (operand) == SSA_NAME
13453 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13454 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13455 else
13456 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13457 }
13458
13459 if (CONSTANT_CLASS_P (operand))
13460 *dt = vect_constant_def;
13461 else if (is_gimple_min_invariant (operand))
13462 *dt = vect_external_def;
13463 else if (TREE_CODE (operand) != SSA_NAME)
13464 *dt = vect_unknown_def_type;
13465 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13466 *dt = vect_external_def;
13467 else
13468 {
13469 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13470 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13471 if (!stmt_vinfo)
13472 *dt = vect_external_def;
13473 else
13474 {
13475 stmt_vinfo = vect_stmt_to_vectorize (stmt_info: stmt_vinfo);
13476 def_stmt = stmt_vinfo->stmt;
13477 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13478 if (def_stmt_info_out)
13479 *def_stmt_info_out = stmt_vinfo;
13480 }
13481 if (def_stmt_out)
13482 *def_stmt_out = def_stmt;
13483 }
13484
13485 if (dump_enabled_p ())
13486 {
13487 dump_printf (MSG_NOTE, ", type of def: ");
13488 switch (*dt)
13489 {
13490 case vect_uninitialized_def:
13491 dump_printf (MSG_NOTE, "uninitialized\n");
13492 break;
13493 case vect_constant_def:
13494 dump_printf (MSG_NOTE, "constant\n");
13495 break;
13496 case vect_external_def:
13497 dump_printf (MSG_NOTE, "external\n");
13498 break;
13499 case vect_internal_def:
13500 dump_printf (MSG_NOTE, "internal\n");
13501 break;
13502 case vect_induction_def:
13503 dump_printf (MSG_NOTE, "induction\n");
13504 break;
13505 case vect_reduction_def:
13506 dump_printf (MSG_NOTE, "reduction\n");
13507 break;
13508 case vect_double_reduction_def:
13509 dump_printf (MSG_NOTE, "double reduction\n");
13510 break;
13511 case vect_nested_cycle:
13512 dump_printf (MSG_NOTE, "nested cycle\n");
13513 break;
13514 case vect_first_order_recurrence:
13515 dump_printf (MSG_NOTE, "first order recurrence\n");
13516 break;
13517 case vect_unknown_def_type:
13518 dump_printf (MSG_NOTE, "unknown\n");
13519 break;
13520 }
13521 }
13522
13523 if (*dt == vect_unknown_def_type)
13524 {
13525 if (dump_enabled_p ())
13526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13527 "Unsupported pattern.\n");
13528 return false;
13529 }
13530
13531 return true;
13532}
13533
13534/* Function vect_is_simple_use.
13535
13536 Same as vect_is_simple_use but also determines the vector operand
13537 type of OPERAND and stores it to *VECTYPE. If the definition of
13538 OPERAND is vect_uninitialized_def, vect_constant_def or
13539 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13540 is responsible to compute the best suited vector type for the
13541 scalar operand. */
13542
13543bool
13544vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13545 tree *vectype, stmt_vec_info *def_stmt_info_out,
13546 gimple **def_stmt_out)
13547{
13548 stmt_vec_info def_stmt_info;
13549 gimple *def_stmt;
13550 if (!vect_is_simple_use (operand, vinfo, dt, def_stmt_info_out: &def_stmt_info, def_stmt_out: &def_stmt))
13551 return false;
13552
13553 if (def_stmt_out)
13554 *def_stmt_out = def_stmt;
13555 if (def_stmt_info_out)
13556 *def_stmt_info_out = def_stmt_info;
13557
13558 /* Now get a vector type if the def is internal, otherwise supply
13559 NULL_TREE and leave it up to the caller to figure out a proper
13560 type for the use stmt. */
13561 if (*dt == vect_internal_def
13562 || *dt == vect_induction_def
13563 || *dt == vect_reduction_def
13564 || *dt == vect_double_reduction_def
13565 || *dt == vect_nested_cycle
13566 || *dt == vect_first_order_recurrence)
13567 {
13568 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13569 gcc_assert (*vectype != NULL_TREE);
13570 if (dump_enabled_p ())
13571 dump_printf_loc (MSG_NOTE, vect_location,
13572 "vect_is_simple_use: vectype %T\n", *vectype);
13573 }
13574 else if (*dt == vect_uninitialized_def
13575 || *dt == vect_constant_def
13576 || *dt == vect_external_def)
13577 *vectype = NULL_TREE;
13578 else
13579 gcc_unreachable ();
13580
13581 return true;
13582}
13583
13584/* Function vect_is_simple_use.
13585
13586 Same as vect_is_simple_use but determines the operand by operand
13587 position OPERAND from either STMT or SLP_NODE, filling in *OP
13588 and *SLP_DEF (when SLP_NODE is not NULL). */
13589
13590bool
13591vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13592 unsigned operand, tree *op, slp_tree *slp_def,
13593 enum vect_def_type *dt,
13594 tree *vectype, stmt_vec_info *def_stmt_info_out)
13595{
13596 if (slp_node)
13597 {
13598 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13599 *slp_def = child;
13600 *vectype = SLP_TREE_VECTYPE (child);
13601 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13602 {
13603 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13604 return vect_is_simple_use (operand: *op, vinfo, dt, def_stmt_info_out);
13605 }
13606 else
13607 {
13608 if (def_stmt_info_out)
13609 *def_stmt_info_out = NULL;
13610 *op = SLP_TREE_SCALAR_OPS (child)[0];
13611 *dt = SLP_TREE_DEF_TYPE (child);
13612 return true;
13613 }
13614 }
13615 else
13616 {
13617 *slp_def = NULL;
13618 if (gassign *ass = dyn_cast <gassign *> (p: stmt->stmt))
13619 {
13620 if (gimple_assign_rhs_code (gs: ass) == COND_EXPR
13621 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13622 {
13623 if (operand < 2)
13624 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13625 else
13626 *op = gimple_op (gs: ass, i: operand);
13627 }
13628 else if (gimple_assign_rhs_code (gs: ass) == VIEW_CONVERT_EXPR)
13629 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13630 else
13631 *op = gimple_op (gs: ass, i: operand + 1);
13632 }
13633 else if (gcall *call = dyn_cast <gcall *> (p: stmt->stmt))
13634 *op = gimple_call_arg (gs: call, index: operand);
13635 else
13636 gcc_unreachable ();
13637 return vect_is_simple_use (operand: *op, vinfo, dt, vectype, def_stmt_info_out);
13638 }
13639}
13640
13641/* If OP is not NULL and is external or constant update its vector
13642 type with VECTYPE. Returns true if successful or false if not,
13643 for example when conflicting vector types are present. */
13644
13645bool
13646vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13647{
13648 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13649 return true;
13650 if (SLP_TREE_VECTYPE (op))
13651 return types_compatible_p (SLP_TREE_VECTYPE (op), type2: vectype);
13652 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13653 should be handled by patters. Allow vect_constant_def for now. */
13654 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13655 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13656 return false;
13657 SLP_TREE_VECTYPE (op) = vectype;
13658 return true;
13659}
13660
13661/* Function supportable_widening_operation
13662
13663 Check whether an operation represented by the code CODE is a
13664 widening operation that is supported by the target platform in
13665 vector form (i.e., when operating on arguments of type VECTYPE_IN
13666 producing a result of type VECTYPE_OUT).
13667
13668 Widening operations we currently support are NOP (CONVERT), FLOAT,
13669 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13670 are supported by the target platform either directly (via vector
13671 tree-codes), or via target builtins.
13672
13673 Output:
13674 - CODE1 and CODE2 are codes of vector operations to be used when
13675 vectorizing the operation, if available.
13676 - MULTI_STEP_CVT determines the number of required intermediate steps in
13677 case of multi-step conversion (like char->short->int - in that case
13678 MULTI_STEP_CVT will be 1).
13679 - INTERM_TYPES contains the intermediate type required to perform the
13680 widening operation (short in the above example). */
13681
13682bool
13683supportable_widening_operation (vec_info *vinfo,
13684 code_helper code,
13685 stmt_vec_info stmt_info,
13686 tree vectype_out, tree vectype_in,
13687 code_helper *code1,
13688 code_helper *code2,
13689 int *multi_step_cvt,
13690 vec<tree> *interm_types)
13691{
13692 loop_vec_info loop_info = dyn_cast <loop_vec_info> (p: vinfo);
13693 class loop *vect_loop = NULL;
13694 machine_mode vec_mode;
13695 enum insn_code icode1, icode2;
13696 optab optab1 = unknown_optab, optab2 = unknown_optab;
13697 tree vectype = vectype_in;
13698 tree wide_vectype = vectype_out;
13699 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13700 int i;
13701 tree prev_type, intermediate_type;
13702 machine_mode intermediate_mode, prev_mode;
13703 optab optab3, optab4;
13704
13705 *multi_step_cvt = 0;
13706 if (loop_info)
13707 vect_loop = LOOP_VINFO_LOOP (loop_info);
13708
13709 switch (code.safe_as_tree_code ())
13710 {
13711 case MAX_TREE_CODES:
13712 /* Don't set c1 and c2 if code is not a tree_code. */
13713 break;
13714
13715 case WIDEN_MULT_EXPR:
13716 /* The result of a vectorized widening operation usually requires
13717 two vectors (because the widened results do not fit into one vector).
13718 The generated vector results would normally be expected to be
13719 generated in the same order as in the original scalar computation,
13720 i.e. if 8 results are generated in each vector iteration, they are
13721 to be organized as follows:
13722 vect1: [res1,res2,res3,res4],
13723 vect2: [res5,res6,res7,res8].
13724
13725 However, in the special case that the result of the widening
13726 operation is used in a reduction computation only, the order doesn't
13727 matter (because when vectorizing a reduction we change the order of
13728 the computation). Some targets can take advantage of this and
13729 generate more efficient code. For example, targets like Altivec,
13730 that support widen_mult using a sequence of {mult_even,mult_odd}
13731 generate the following vectors:
13732 vect1: [res1,res3,res5,res7],
13733 vect2: [res2,res4,res6,res8].
13734
13735 When vectorizing outer-loops, we execute the inner-loop sequentially
13736 (each vectorized inner-loop iteration contributes to VF outer-loop
13737 iterations in parallel). We therefore don't allow to change the
13738 order of the computation in the inner-loop during outer-loop
13739 vectorization. */
13740 /* TODO: Another case in which order doesn't *really* matter is when we
13741 widen and then contract again, e.g. (short)((int)x * y >> 8).
13742 Normally, pack_trunc performs an even/odd permute, whereas the
13743 repack from an even/odd expansion would be an interleave, which
13744 would be significantly simpler for e.g. AVX2. */
13745 /* In any case, in order to avoid duplicating the code below, recurse
13746 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13747 are properly set up for the caller. If we fail, we'll continue with
13748 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13749 if (vect_loop
13750 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13751 && !nested_in_vect_loop_p (loop: vect_loop, stmt_info)
13752 && supportable_widening_operation (vinfo, code: VEC_WIDEN_MULT_EVEN_EXPR,
13753 stmt_info, vectype_out,
13754 vectype_in, code1,
13755 code2, multi_step_cvt,
13756 interm_types))
13757 {
13758 /* Elements in a vector with vect_used_by_reduction property cannot
13759 be reordered if the use chain with this property does not have the
13760 same operation. One such an example is s += a * b, where elements
13761 in a and b cannot be reordered. Here we check if the vector defined
13762 by STMT is only directly used in the reduction statement. */
13763 tree lhs = gimple_assign_lhs (gs: stmt_info->stmt);
13764 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13765 if (use_stmt_info
13766 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13767 return true;
13768 }
13769 c1 = VEC_WIDEN_MULT_LO_EXPR;
13770 c2 = VEC_WIDEN_MULT_HI_EXPR;
13771 break;
13772
13773 case DOT_PROD_EXPR:
13774 c1 = DOT_PROD_EXPR;
13775 c2 = DOT_PROD_EXPR;
13776 break;
13777
13778 case SAD_EXPR:
13779 c1 = SAD_EXPR;
13780 c2 = SAD_EXPR;
13781 break;
13782
13783 case VEC_WIDEN_MULT_EVEN_EXPR:
13784 /* Support the recursion induced just above. */
13785 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13786 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13787 break;
13788
13789 case WIDEN_LSHIFT_EXPR:
13790 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13791 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13792 break;
13793
13794 CASE_CONVERT:
13795 c1 = VEC_UNPACK_LO_EXPR;
13796 c2 = VEC_UNPACK_HI_EXPR;
13797 break;
13798
13799 case FLOAT_EXPR:
13800 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13801 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13802 break;
13803
13804 case FIX_TRUNC_EXPR:
13805 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13806 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13807 break;
13808
13809 default:
13810 gcc_unreachable ();
13811 }
13812
13813 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13814 std::swap (a&: c1, b&: c2);
13815
13816 if (code == FIX_TRUNC_EXPR)
13817 {
13818 /* The signedness is determined from output operand. */
13819 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13820 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13821 }
13822 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13823 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13824 && VECTOR_BOOLEAN_TYPE_P (vectype)
13825 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13826 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13827 {
13828 /* If the input and result modes are the same, a different optab
13829 is needed where we pass in the number of units in vectype. */
13830 optab1 = vec_unpacks_sbool_lo_optab;
13831 optab2 = vec_unpacks_sbool_hi_optab;
13832 }
13833
13834 vec_mode = TYPE_MODE (vectype);
13835 if (widening_fn_p (code))
13836 {
13837 /* If this is an internal fn then we must check whether the target
13838 supports either a low-high split or an even-odd split. */
13839 internal_fn ifn = as_internal_fn (code: (combined_fn) code);
13840
13841 internal_fn lo, hi, even, odd;
13842 lookup_hilo_internal_fn (ifn, &lo, &hi);
13843 *code1 = as_combined_fn (fn: lo);
13844 *code2 = as_combined_fn (fn: hi);
13845 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13846 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13847
13848 /* If we don't support low-high, then check for even-odd. */
13849 if (!optab1
13850 || (icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
13851 || !optab2
13852 || (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
13853 {
13854 lookup_evenodd_internal_fn (ifn, &even, &odd);
13855 *code1 = as_combined_fn (fn: even);
13856 *code2 = as_combined_fn (fn: odd);
13857 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13858 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13859 }
13860 }
13861 else if (code.is_tree_code ())
13862 {
13863 if (code == FIX_TRUNC_EXPR)
13864 {
13865 /* The signedness is determined from output operand. */
13866 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13867 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13868 }
13869 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13870 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13871 && VECTOR_BOOLEAN_TYPE_P (vectype)
13872 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13873 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13874 {
13875 /* If the input and result modes are the same, a different optab
13876 is needed where we pass in the number of units in vectype. */
13877 optab1 = vec_unpacks_sbool_lo_optab;
13878 optab2 = vec_unpacks_sbool_hi_optab;
13879 }
13880 else
13881 {
13882 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13883 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13884 }
13885 *code1 = c1;
13886 *code2 = c2;
13887 }
13888
13889 if (!optab1 || !optab2)
13890 return false;
13891
13892 if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
13893 || (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
13894 return false;
13895
13896
13897 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13898 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13899 {
13900 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13901 return true;
13902 /* For scalar masks we may have different boolean
13903 vector types having the same QImode. Thus we
13904 add additional check for elements number. */
13905 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13906 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13907 return true;
13908 }
13909
13910 /* Check if it's a multi-step conversion that can be done using intermediate
13911 types. */
13912
13913 prev_type = vectype;
13914 prev_mode = vec_mode;
13915
13916 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13917 return false;
13918
13919 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13920 intermediate steps in promotion sequence. We try
13921 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13922 not. */
13923 interm_types->create (MAX_INTERM_CVT_STEPS);
13924 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13925 {
13926 intermediate_mode = insn_data[icode1].operand[0].mode;
13927 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13928 intermediate_type
13929 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13930 else if (VECTOR_MODE_P (intermediate_mode))
13931 {
13932 tree intermediate_element_type
13933 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13934 TYPE_UNSIGNED (prev_type));
13935 intermediate_type
13936 = build_vector_type_for_mode (intermediate_element_type,
13937 intermediate_mode);
13938 }
13939 else
13940 intermediate_type
13941 = lang_hooks.types.type_for_mode (intermediate_mode,
13942 TYPE_UNSIGNED (prev_type));
13943
13944 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13945 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13946 && intermediate_mode == prev_mode
13947 && SCALAR_INT_MODE_P (prev_mode))
13948 {
13949 /* If the input and result modes are the same, a different optab
13950 is needed where we pass in the number of units in vectype. */
13951 optab3 = vec_unpacks_sbool_lo_optab;
13952 optab4 = vec_unpacks_sbool_hi_optab;
13953 }
13954 else
13955 {
13956 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13957 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13958 }
13959
13960 if (!optab3 || !optab4
13961 || (icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing
13962 || insn_data[icode1].operand[0].mode != intermediate_mode
13963 || (icode2 = optab_handler (op: optab2, mode: prev_mode)) == CODE_FOR_nothing
13964 || insn_data[icode2].operand[0].mode != intermediate_mode
13965 || ((icode1 = optab_handler (op: optab3, mode: intermediate_mode))
13966 == CODE_FOR_nothing)
13967 || ((icode2 = optab_handler (op: optab4, mode: intermediate_mode))
13968 == CODE_FOR_nothing))
13969 break;
13970
13971 interm_types->quick_push (obj: intermediate_type);
13972 (*multi_step_cvt)++;
13973
13974 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13975 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13976 {
13977 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13978 return true;
13979 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13980 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13981 return true;
13982 }
13983
13984 prev_type = intermediate_type;
13985 prev_mode = intermediate_mode;
13986 }
13987
13988 interm_types->release ();
13989 return false;
13990}
13991
13992
13993/* Function supportable_narrowing_operation
13994
13995 Check whether an operation represented by the code CODE is a
13996 narrowing operation that is supported by the target platform in
13997 vector form (i.e., when operating on arguments of type VECTYPE_IN
13998 and producing a result of type VECTYPE_OUT).
13999
14000 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14001 and FLOAT. This function checks if these operations are supported by
14002 the target platform directly via vector tree-codes.
14003
14004 Output:
14005 - CODE1 is the code of a vector operation to be used when
14006 vectorizing the operation, if available.
14007 - MULTI_STEP_CVT determines the number of required intermediate steps in
14008 case of multi-step conversion (like int->short->char - in that case
14009 MULTI_STEP_CVT will be 1).
14010 - INTERM_TYPES contains the intermediate type required to perform the
14011 narrowing operation (short in the above example). */
14012
14013bool
14014supportable_narrowing_operation (code_helper code,
14015 tree vectype_out, tree vectype_in,
14016 code_helper *code1, int *multi_step_cvt,
14017 vec<tree> *interm_types)
14018{
14019 machine_mode vec_mode;
14020 enum insn_code icode1;
14021 optab optab1, interm_optab;
14022 tree vectype = vectype_in;
14023 tree narrow_vectype = vectype_out;
14024 enum tree_code c1;
14025 tree intermediate_type, prev_type;
14026 machine_mode intermediate_mode, prev_mode;
14027 int i;
14028 unsigned HOST_WIDE_INT n_elts;
14029 bool uns;
14030
14031 if (!code.is_tree_code ())
14032 return false;
14033
14034 *multi_step_cvt = 0;
14035 switch ((tree_code) code)
14036 {
14037 CASE_CONVERT:
14038 c1 = VEC_PACK_TRUNC_EXPR;
14039 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14040 && VECTOR_BOOLEAN_TYPE_P (vectype)
14041 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14042 && TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &n_elts)
14043 && n_elts < BITS_PER_UNIT)
14044 optab1 = vec_pack_sbool_trunc_optab;
14045 else
14046 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14047 break;
14048
14049 case FIX_TRUNC_EXPR:
14050 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14051 /* The signedness is determined from output operand. */
14052 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14053 break;
14054
14055 case FLOAT_EXPR:
14056 c1 = VEC_PACK_FLOAT_EXPR;
14057 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14058 break;
14059
14060 default:
14061 gcc_unreachable ();
14062 }
14063
14064 if (!optab1)
14065 return false;
14066
14067 vec_mode = TYPE_MODE (vectype);
14068 if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing)
14069 return false;
14070
14071 *code1 = c1;
14072
14073 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14074 {
14075 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14076 return true;
14077 /* For scalar masks we may have different boolean
14078 vector types having the same QImode. Thus we
14079 add additional check for elements number. */
14080 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14081 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14082 return true;
14083 }
14084
14085 if (code == FLOAT_EXPR)
14086 return false;
14087
14088 /* Check if it's a multi-step conversion that can be done using intermediate
14089 types. */
14090 prev_mode = vec_mode;
14091 prev_type = vectype;
14092 if (code == FIX_TRUNC_EXPR)
14093 uns = TYPE_UNSIGNED (vectype_out);
14094 else
14095 uns = TYPE_UNSIGNED (vectype);
14096
14097 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14098 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14099 costly than signed. */
14100 if (code == FIX_TRUNC_EXPR && uns)
14101 {
14102 enum insn_code icode2;
14103
14104 intermediate_type
14105 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14106 interm_optab
14107 = optab_for_tree_code (c1, intermediate_type, optab_default);
14108 if (interm_optab != unknown_optab
14109 && (icode2 = optab_handler (op: optab1, mode: vec_mode)) != CODE_FOR_nothing
14110 && insn_data[icode1].operand[0].mode
14111 == insn_data[icode2].operand[0].mode)
14112 {
14113 uns = false;
14114 optab1 = interm_optab;
14115 icode1 = icode2;
14116 }
14117 }
14118
14119 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14120 intermediate steps in promotion sequence. We try
14121 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14122 interm_types->create (MAX_INTERM_CVT_STEPS);
14123 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14124 {
14125 intermediate_mode = insn_data[icode1].operand[0].mode;
14126 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14127 intermediate_type
14128 = vect_double_mask_nunits (prev_type, intermediate_mode);
14129 else
14130 intermediate_type
14131 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14132 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14133 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14134 && SCALAR_INT_MODE_P (prev_mode)
14135 && TYPE_VECTOR_SUBPARTS (node: intermediate_type).is_constant (const_value: &n_elts)
14136 && n_elts < BITS_PER_UNIT)
14137 interm_optab = vec_pack_sbool_trunc_optab;
14138 else
14139 interm_optab
14140 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14141 optab_default);
14142 if (!interm_optab
14143 || ((icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing)
14144 || insn_data[icode1].operand[0].mode != intermediate_mode
14145 || ((icode1 = optab_handler (op: interm_optab, mode: intermediate_mode))
14146 == CODE_FOR_nothing))
14147 break;
14148
14149 interm_types->quick_push (obj: intermediate_type);
14150 (*multi_step_cvt)++;
14151
14152 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14153 {
14154 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14155 return true;
14156 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14157 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14158 return true;
14159 }
14160
14161 prev_mode = intermediate_mode;
14162 prev_type = intermediate_type;
14163 optab1 = interm_optab;
14164 }
14165
14166 interm_types->release ();
14167 return false;
14168}
14169
14170/* Generate and return a vector mask of MASK_TYPE such that
14171 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14172 Add the statements to SEQ. */
14173
14174tree
14175vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14176 tree end_index, const char *name)
14177{
14178 tree cmp_type = TREE_TYPE (start_index);
14179 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14180 cmp_type, mask_type,
14181 OPTIMIZE_FOR_SPEED));
14182 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14183 start_index, end_index,
14184 build_zero_cst (mask_type));
14185 tree tmp;
14186 if (name)
14187 tmp = make_temp_ssa_name (type: mask_type, NULL, name);
14188 else
14189 tmp = make_ssa_name (var: mask_type);
14190 gimple_call_set_lhs (gs: call, lhs: tmp);
14191 gimple_seq_add_stmt (seq, call);
14192 return tmp;
14193}
14194
14195/* Generate a vector mask of type MASK_TYPE for which index I is false iff
14196 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14197
14198tree
14199vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14200 tree end_index)
14201{
14202 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14203 return gimple_build (seq, code: BIT_NOT_EXPR, type: mask_type, ops: tmp);
14204}
14205
14206/* Try to compute the vector types required to vectorize STMT_INFO,
14207 returning true on success and false if vectorization isn't possible.
14208 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14209 take sure that the number of elements in the vectors is no bigger
14210 than GROUP_SIZE.
14211
14212 On success:
14213
14214 - Set *STMT_VECTYPE_OUT to:
14215 - NULL_TREE if the statement doesn't need to be vectorized;
14216 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14217
14218 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14219 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14220 statement does not help to determine the overall number of units. */
14221
14222opt_result
14223vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14224 tree *stmt_vectype_out,
14225 tree *nunits_vectype_out,
14226 unsigned int group_size)
14227{
14228 gimple *stmt = stmt_info->stmt;
14229
14230 /* For BB vectorization, we should always have a group size once we've
14231 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14232 are tentative requests during things like early data reference
14233 analysis and pattern recognition. */
14234 if (is_a <bb_vec_info> (p: vinfo))
14235 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14236 else
14237 group_size = 0;
14238
14239 *stmt_vectype_out = NULL_TREE;
14240 *nunits_vectype_out = NULL_TREE;
14241
14242 if (gimple_get_lhs (stmt) == NULL_TREE
14243 /* MASK_STORE has no lhs, but is ok. */
14244 && !gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14245 {
14246 if (is_a <gcall *> (p: stmt))
14247 {
14248 /* Ignore calls with no lhs. These must be calls to
14249 #pragma omp simd functions, and what vectorization factor
14250 it really needs can't be determined until
14251 vectorizable_simd_clone_call. */
14252 if (dump_enabled_p ())
14253 dump_printf_loc (MSG_NOTE, vect_location,
14254 "defer to SIMD clone analysis.\n");
14255 return opt_result::success ();
14256 }
14257
14258 return opt_result::failure_at (loc: stmt,
14259 fmt: "not vectorized: irregular stmt.%G", stmt);
14260 }
14261
14262 tree vectype;
14263 tree scalar_type = NULL_TREE;
14264 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14265 {
14266 vectype = STMT_VINFO_VECTYPE (stmt_info);
14267 if (dump_enabled_p ())
14268 dump_printf_loc (MSG_NOTE, vect_location,
14269 "precomputed vectype: %T\n", vectype);
14270 }
14271 else if (vect_use_mask_type_p (stmt_info))
14272 {
14273 unsigned int precision = stmt_info->mask_precision;
14274 scalar_type = build_nonstandard_integer_type (precision, 1);
14275 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14276 if (!vectype)
14277 return opt_result::failure_at (loc: stmt, fmt: "not vectorized: unsupported"
14278 " data-type %T\n", scalar_type);
14279 if (dump_enabled_p ())
14280 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14281 }
14282 else
14283 {
14284 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14285 scalar_type = TREE_TYPE (DR_REF (dr));
14286 else if (gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14287 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14288 else
14289 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14290
14291 if (dump_enabled_p ())
14292 {
14293 if (group_size)
14294 dump_printf_loc (MSG_NOTE, vect_location,
14295 "get vectype for scalar type (group size %d):"
14296 " %T\n", group_size, scalar_type);
14297 else
14298 dump_printf_loc (MSG_NOTE, vect_location,
14299 "get vectype for scalar type: %T\n", scalar_type);
14300 }
14301 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14302 if (!vectype)
14303 return opt_result::failure_at (loc: stmt,
14304 fmt: "not vectorized:"
14305 " unsupported data-type %T\n",
14306 scalar_type);
14307
14308 if (dump_enabled_p ())
14309 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14310 }
14311
14312 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14313 return opt_result::failure_at (loc: stmt,
14314 fmt: "not vectorized: vector stmt in loop:%G",
14315 stmt);
14316
14317 *stmt_vectype_out = vectype;
14318
14319 /* Don't try to compute scalar types if the stmt produces a boolean
14320 vector; use the existing vector type instead. */
14321 tree nunits_vectype = vectype;
14322 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14323 {
14324 /* The number of units is set according to the smallest scalar
14325 type (or the largest vector size, but we only support one
14326 vector size per vectorization). */
14327 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14328 TREE_TYPE (vectype));
14329 if (scalar_type != TREE_TYPE (vectype))
14330 {
14331 if (dump_enabled_p ())
14332 dump_printf_loc (MSG_NOTE, vect_location,
14333 "get vectype for smallest scalar type: %T\n",
14334 scalar_type);
14335 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14336 group_size);
14337 if (!nunits_vectype)
14338 return opt_result::failure_at
14339 (loc: stmt, fmt: "not vectorized: unsupported data-type %T\n",
14340 scalar_type);
14341 if (dump_enabled_p ())
14342 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14343 nunits_vectype);
14344 }
14345 }
14346
14347 if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: nunits_vectype),
14348 b: TYPE_VECTOR_SUBPARTS (node: *stmt_vectype_out)))
14349 return opt_result::failure_at (loc: stmt,
14350 fmt: "Not vectorized: Incompatible number "
14351 "of vector subparts between %T and %T\n",
14352 nunits_vectype, *stmt_vectype_out);
14353
14354 if (dump_enabled_p ())
14355 {
14356 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14357 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (node: nunits_vectype));
14358 dump_printf (MSG_NOTE, "\n");
14359 }
14360
14361 *nunits_vectype_out = nunits_vectype;
14362 return opt_result::success ();
14363}
14364
14365/* Generate and return statement sequence that sets vector length LEN that is:
14366
14367 min_of_start_and_end = min (START_INDEX, END_INDEX);
14368 left_len = END_INDEX - min_of_start_and_end;
14369 rhs = min (left_len, LEN_LIMIT);
14370 LEN = rhs;
14371
14372 Note: the cost of the code generated by this function is modeled
14373 by vect_estimate_min_profitable_iters, so changes here may need
14374 corresponding changes there. */
14375
14376gimple_seq
14377vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14378{
14379 gimple_seq stmts = NULL;
14380 tree len_type = TREE_TYPE (len);
14381 gcc_assert (TREE_TYPE (start_index) == len_type);
14382
14383 tree min = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: start_index, ops: end_index);
14384 tree left_len = gimple_build (seq: &stmts, code: MINUS_EXPR, type: len_type, ops: end_index, ops: min);
14385 tree rhs = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: left_len, ops: len_limit);
14386 gimple* stmt = gimple_build_assign (len, rhs);
14387 gimple_seq_add_stmt (&stmts, stmt);
14388
14389 return stmts;
14390}
14391
14392

source code of gcc/tree-vect-stmts.cc