1/* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "gimple.h"
28#include "gimple-iterator.h"
29#include "gimple-fold.h"
30#include "ssa.h"
31#include "expmed.h"
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
35#include "fold-const.h"
36#include "stor-layout.h"
37#include "tree-eh.h"
38#include "gimplify.h"
39#include "gimple-iterator.h"
40#include "gimple-fold.h"
41#include "gimplify-me.h"
42#include "cfgloop.h"
43#include "tree-vectorizer.h"
44#include "dumpfile.h"
45#include "builtins.h"
46#include "internal-fn.h"
47#include "case-cfn-macros.h"
48#include "fold-const-call.h"
49#include "attribs.h"
50#include "cgraph.h"
51#include "omp-simd-clone.h"
52#include "predict.h"
53#include "tree-vector-builder.h"
54#include "vec-perm-indices.h"
55#include "gimple-range.h"
56
57
58/* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61
62
63/* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
65
66bool
67vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
68{
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (r&: vr, expr: var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, min&: vr_min, max&: vr_max);
75 *min_value = wi::to_wide (t: vr_min);
76 *max_value = wi::to_wide (t: vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
81 {
82 if (dump_enabled_p ())
83 {
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
90 }
91 return true;
92 }
93 else
94 {
95 if (dump_enabled_p ())
96 {
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
99 }
100 return false;
101 }
102}
103
104/* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
106
107static void
108vect_pattern_detected (const char *name, gimple *stmt)
109{
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
112}
113
114/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
117
118static stmt_vec_info
119vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
121{
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (g: orig_stmt_info->stmt));
126
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
133 {
134 gcc_assert (!vectype
135 || (VECTOR_BOOLEAN_TYPE_P (vectype)
136 == vect_use_mask_type_p (orig_stmt_info)));
137 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
138 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
139 }
140 return pattern_stmt_info;
141}
142
143/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
144 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
145 have one already. */
146
147static void
148vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
149 stmt_vec_info orig_stmt_info, tree vectype)
150{
151 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
152 STMT_VINFO_RELATED_STMT (orig_stmt_info)
153 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
154}
155
156/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
157 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
158 be different from the vector type of the final pattern statement.
159 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
160 from which it was derived. */
161
162static inline void
163append_pattern_def_seq (vec_info *vinfo,
164 stmt_vec_info stmt_info, gimple *new_stmt,
165 tree vectype = NULL_TREE,
166 tree scalar_type_for_mask = NULL_TREE)
167{
168 gcc_assert (!scalar_type_for_mask
169 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
170 if (vectype)
171 {
172 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
173 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
174 if (scalar_type_for_mask)
175 new_stmt_info->mask_precision
176 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
177 }
178 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
179 new_stmt);
180}
181
182/* The caller wants to perform new operations on vect_external variable
183 VAR, so that the result of the operations would also be vect_external.
184 Return the edge on which the operations can be performed, if one exists.
185 Return null if the operations should instead be treated as part of
186 the pattern that needs them. */
187
188static edge
189vect_get_external_def_edge (vec_info *vinfo, tree var)
190{
191 edge e = NULL;
192 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
193 {
194 e = loop_preheader_edge (loop_vinfo->loop);
195 if (!SSA_NAME_IS_DEFAULT_DEF (var))
196 {
197 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
198 if (bb == NULL
199 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
200 e = NULL;
201 }
202 }
203 return e;
204}
205
206/* Return true if the target supports a vector version of CODE,
207 where CODE is known to map to a direct optab with the given SUBTYPE.
208 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
209 specifies the type of the scalar result.
210
211 If CODE allows the inputs and outputs to have different type
212 (such as for WIDEN_SUM_EXPR), it is the input mode rather
213 than the output mode that determines the appropriate target pattern.
214 Operand 0 of the target pattern then specifies the mode that the output
215 must have.
216
217 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
218 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
219 is nonnull. */
220
221static bool
222vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
223 tree itype, tree *vecotype_out,
224 tree *vecitype_out = NULL,
225 enum optab_subtype subtype = optab_default)
226{
227 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
228 if (!vecitype)
229 return false;
230
231 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
232 if (!vecotype)
233 return false;
234
235 optab optab = optab_for_tree_code (code, vecitype, subtype);
236 if (!optab)
237 return false;
238
239 insn_code icode = optab_handler (op: optab, TYPE_MODE (vecitype));
240 if (icode == CODE_FOR_nothing
241 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
242 return false;
243
244 *vecotype_out = vecotype;
245 if (vecitype_out)
246 *vecitype_out = vecitype;
247 return true;
248}
249
250/* Round bit precision PRECISION up to a full element. */
251
252static unsigned int
253vect_element_precision (unsigned int precision)
254{
255 precision = 1 << ceil_log2 (x: precision);
256 return MAX (precision, BITS_PER_UNIT);
257}
258
259/* If OP is defined by a statement that's being considered for vectorization,
260 return information about that statement, otherwise return NULL. */
261
262static stmt_vec_info
263vect_get_internal_def (vec_info *vinfo, tree op)
264{
265 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
266 if (def_stmt_info
267 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
268 return def_stmt_info;
269 return NULL;
270}
271
272/* Check whether NAME, an ssa-name used in STMT_VINFO,
273 is a result of a type promotion, such that:
274 DEF_STMT: NAME = NOP (name0)
275 If CHECK_SIGN is TRUE, check that either both types are signed or both are
276 unsigned. */
277
278static bool
279type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
280 tree *orig_type, gimple **def_stmt, bool *promotion)
281{
282 tree type = TREE_TYPE (name);
283 tree oprnd0;
284 enum vect_def_type dt;
285
286 stmt_vec_info def_stmt_info;
287 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
288 return false;
289
290 if (dt != vect_internal_def
291 && dt != vect_external_def && dt != vect_constant_def)
292 return false;
293
294 if (!*def_stmt)
295 return false;
296
297 if (!is_gimple_assign (gs: *def_stmt))
298 return false;
299
300 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
301 return false;
302
303 oprnd0 = gimple_assign_rhs1 (gs: *def_stmt);
304
305 *orig_type = TREE_TYPE (oprnd0);
306 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
307 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
308 return false;
309
310 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
311 *promotion = true;
312 else
313 *promotion = false;
314
315 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
316 return false;
317
318 return true;
319}
320
321/* Holds information about an input operand after some sign changes
322 and type promotions have been peeled away. */
323class vect_unpromoted_value {
324public:
325 vect_unpromoted_value ();
326
327 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
328
329 /* The value obtained after peeling away zero or more casts. */
330 tree op;
331
332 /* The type of OP. */
333 tree type;
334
335 /* The definition type of OP. */
336 vect_def_type dt;
337
338 /* If OP is the result of peeling at least one cast, and if the cast
339 of OP itself is a vectorizable statement, CASTER identifies that
340 statement, otherwise it is null. */
341 stmt_vec_info caster;
342};
343
344inline vect_unpromoted_value::vect_unpromoted_value ()
345 : op (NULL_TREE),
346 type (NULL_TREE),
347 dt (vect_uninitialized_def),
348 caster (NULL)
349{
350}
351
352/* Set the operand to OP_IN, its definition type to DT_IN, and the
353 statement that casts it to CASTER_IN. */
354
355inline void
356vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
357 stmt_vec_info caster_in)
358{
359 op = op_in;
360 type = TREE_TYPE (op);
361 dt = dt_in;
362 caster = caster_in;
363}
364
365/* If OP is a vectorizable SSA name, strip a sequence of integer conversions
366 to reach some vectorizable inner operand OP', continuing as long as it
367 is possible to convert OP' back to OP using a possible sign change
368 followed by a possible promotion P. Return this OP', or null if OP is
369 not a vectorizable SSA name. If there is a promotion P, describe its
370 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
371 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
372 have more than one user.
373
374 A successful return means that it is possible to go from OP' to OP
375 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
376 whereas the cast from UNPROM to OP might be a promotion, a sign
377 change, or a nop.
378
379 E.g. say we have:
380
381 signed short *ptr = ...;
382 signed short C = *ptr;
383 unsigned short B = (unsigned short) C; // sign change
384 signed int A = (signed int) B; // unsigned promotion
385 ...possible other uses of A...
386 unsigned int OP = (unsigned int) A; // sign change
387
388 In this case it's possible to go directly from C to OP using:
389
390 OP = (unsigned int) (unsigned short) C;
391 +------------+ +--------------+
392 promotion sign change
393
394 so OP' would be C. The input to the promotion is B, so UNPROM
395 would describe B. */
396
397static tree
398vect_look_through_possible_promotion (vec_info *vinfo, tree op,
399 vect_unpromoted_value *unprom,
400 bool *single_use_p = NULL)
401{
402 tree op_type = TREE_TYPE (op);
403 if (!INTEGRAL_TYPE_P (op_type))
404 return NULL_TREE;
405
406 tree res = NULL_TREE;
407 unsigned int orig_precision = TYPE_PRECISION (op_type);
408 unsigned int min_precision = orig_precision;
409 stmt_vec_info caster = NULL;
410 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
411 {
412 /* See whether OP is simple enough to vectorize. */
413 stmt_vec_info def_stmt_info;
414 gimple *def_stmt;
415 vect_def_type dt;
416 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
417 break;
418
419 /* If OP is the input of a demotion, skip over it to see whether
420 OP is itself the result of a promotion. If so, the combined
421 effect of the promotion and the demotion might fit the required
422 pattern, otherwise neither operation fits.
423
424 This copes with cases such as the result of an arithmetic
425 operation being truncated before being stored, and where that
426 arithmetic operation has been recognized as an over-widened one. */
427 if (TYPE_PRECISION (op_type) <= min_precision)
428 {
429 /* Use OP as the UNPROM described above if we haven't yet
430 found a promotion, or if using the new input preserves the
431 sign of the previous promotion. */
432 if (!res
433 || TYPE_PRECISION (unprom->type) == orig_precision
434 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
435 {
436 unprom->set_op (op_in: op, dt_in: dt, caster_in: caster);
437 min_precision = TYPE_PRECISION (op_type);
438 }
439 /* Stop if we've already seen a promotion and if this
440 conversion does more than change the sign. */
441 else if (TYPE_PRECISION (op_type)
442 != TYPE_PRECISION (unprom->type))
443 break;
444
445 /* The sequence now extends to OP. */
446 res = op;
447 }
448
449 /* See whether OP is defined by a cast. Record it as CASTER if
450 the cast is potentially vectorizable. */
451 if (!def_stmt)
452 break;
453 caster = def_stmt_info;
454
455 /* Ignore pattern statements, since we don't link uses for them. */
456 if (caster
457 && single_use_p
458 && !STMT_VINFO_RELATED_STMT (caster)
459 && !has_single_use (var: res))
460 *single_use_p = false;
461
462 gassign *assign = dyn_cast <gassign *> (p: def_stmt);
463 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
464 break;
465
466 /* Continue with the input to the cast. */
467 op = gimple_assign_rhs1 (gs: def_stmt);
468 op_type = TREE_TYPE (op);
469 }
470 return res;
471}
472
473/* OP is an integer operand to an operation that returns TYPE, and we
474 want to treat the operation as a widening one. So far we can treat
475 it as widening from *COMMON_TYPE.
476
477 Return true if OP is suitable for such a widening operation,
478 either widening from *COMMON_TYPE or from some supertype of it.
479 Update *COMMON_TYPE to the supertype in the latter case.
480
481 SHIFT_P is true if OP is a shift amount. */
482
483static bool
484vect_joust_widened_integer (tree type, bool shift_p, tree op,
485 tree *common_type)
486{
487 /* Calculate the minimum precision required by OP, without changing
488 the sign of either operand. */
489 unsigned int precision;
490 if (shift_p)
491 {
492 if (!wi::leu_p (x: wi::to_widest (t: op), TYPE_PRECISION (type) / 2))
493 return false;
494 precision = TREE_INT_CST_LOW (op);
495 }
496 else
497 {
498 precision = wi::min_precision (x: wi::to_widest (t: op),
499 TYPE_SIGN (*common_type));
500 if (precision * 2 > TYPE_PRECISION (type))
501 return false;
502 }
503
504 /* If OP requires a wider type, switch to that type. The checks
505 above ensure that this is still narrower than the result. */
506 precision = vect_element_precision (precision);
507 if (TYPE_PRECISION (*common_type) < precision)
508 *common_type = build_nonstandard_integer_type
509 (precision, TYPE_UNSIGNED (*common_type));
510 return true;
511}
512
513/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
514 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
515
516static bool
517vect_joust_widened_type (tree type, tree new_type, tree *common_type)
518{
519 if (types_compatible_p (type1: *common_type, type2: new_type))
520 return true;
521
522 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
523 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
524 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
525 return true;
526
527 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
528 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
529 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
530 {
531 *common_type = new_type;
532 return true;
533 }
534
535 /* We have mismatched signs, with the signed type being
536 no wider than the unsigned type. In this case we need
537 a wider signed type. */
538 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
539 TYPE_PRECISION (new_type));
540 precision *= 2;
541
542 if (precision * 2 > TYPE_PRECISION (type))
543 return false;
544
545 *common_type = build_nonstandard_integer_type (precision, false);
546 return true;
547}
548
549/* Check whether STMT_INFO can be viewed as a tree of integer operations
550 in which each node either performs CODE or WIDENED_CODE, and where
551 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
552 specifies the maximum number of leaf operands. SHIFT_P says whether
553 CODE and WIDENED_CODE are some sort of shift.
554
555 If STMT_INFO is such a tree, return the number of leaf operands
556 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
557 to a type that (a) is narrower than the result of STMT_INFO and
558 (b) can hold all leaf operand values.
559
560 If SUBTYPE then allow that the signs of the operands
561 may differ in signs but not in precision. SUBTYPE is updated to reflect
562 this.
563
564 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
565 exists. */
566
567static unsigned int
568vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
569 code_helper widened_code, bool shift_p,
570 unsigned int max_nops,
571 vect_unpromoted_value *unprom, tree *common_type,
572 enum optab_subtype *subtype = NULL)
573{
574 /* Check for an integer operation with the right code. */
575 gimple* stmt = stmt_info->stmt;
576 if (!(is_gimple_assign (gs: stmt) || is_gimple_call (gs: stmt)))
577 return 0;
578
579 code_helper rhs_code;
580 if (is_gimple_assign (gs: stmt))
581 rhs_code = gimple_assign_rhs_code (gs: stmt);
582 else if (is_gimple_call (gs: stmt))
583 rhs_code = gimple_call_combined_fn (stmt);
584 else
585 return 0;
586
587 if (rhs_code != code
588 && rhs_code != widened_code)
589 return 0;
590
591 tree lhs = gimple_get_lhs (stmt);
592 tree type = TREE_TYPE (lhs);
593 if (!INTEGRAL_TYPE_P (type))
594 return 0;
595
596 /* Assume that both operands will be leaf operands. */
597 max_nops -= 2;
598
599 /* Check the operands. */
600 unsigned int next_op = 0;
601 for (unsigned int i = 0; i < 2; ++i)
602 {
603 vect_unpromoted_value *this_unprom = &unprom[next_op];
604 unsigned int nops = 1;
605 tree op = gimple_arg (gs: stmt, i);
606 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
607 {
608 /* We already have a common type from earlier operands.
609 Update it to account for OP. */
610 this_unprom->set_op (op_in: op, dt_in: vect_constant_def);
611 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
612 return 0;
613 }
614 else
615 {
616 /* Only allow shifts by constants. */
617 if (shift_p && i == 1)
618 return 0;
619
620 if (rhs_code != code)
621 {
622 /* If rhs_code is widened_code, don't look through further
623 possible promotions, there is a promotion already embedded
624 in the WIDEN_*_EXPR. */
625 if (TREE_CODE (op) != SSA_NAME
626 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
627 return 0;
628
629 stmt_vec_info def_stmt_info;
630 gimple *def_stmt;
631 vect_def_type dt;
632 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
633 &def_stmt))
634 return 0;
635 this_unprom->set_op (op_in: op, dt_in: dt, NULL);
636 }
637 else if (!vect_look_through_possible_promotion (vinfo, op,
638 unprom: this_unprom))
639 return 0;
640
641 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
642 {
643 /* The operand isn't widened. If STMT_INFO has the code
644 for an unwidened operation, recursively check whether
645 this operand is a node of the tree. */
646 if (rhs_code != code
647 || max_nops == 0
648 || this_unprom->dt != vect_internal_def)
649 return 0;
650
651 /* Give back the leaf slot allocated above now that we're
652 not treating this as a leaf operand. */
653 max_nops += 1;
654
655 /* Recursively process the definition of the operand. */
656 stmt_vec_info def_stmt_info
657 = vinfo->lookup_def (this_unprom->op);
658 nops = vect_widened_op_tree (vinfo, stmt_info: def_stmt_info, code,
659 widened_code, shift_p, max_nops,
660 unprom: this_unprom, common_type,
661 subtype);
662 if (nops == 0)
663 return 0;
664
665 max_nops -= nops;
666 }
667 else
668 {
669 /* Make sure that the operand is narrower than the result. */
670 if (TYPE_PRECISION (this_unprom->type) * 2
671 > TYPE_PRECISION (type))
672 return 0;
673
674 /* Update COMMON_TYPE for the new operand. */
675 if (i == 0)
676 *common_type = this_unprom->type;
677 else if (!vect_joust_widened_type (type, new_type: this_unprom->type,
678 common_type))
679 {
680 if (subtype)
681 {
682 /* See if we can sign extend the smaller type. */
683 if (TYPE_PRECISION (this_unprom->type)
684 > TYPE_PRECISION (*common_type))
685 *common_type = this_unprom->type;
686 *subtype = optab_vector_mixed_sign;
687 }
688 else
689 return 0;
690 }
691 }
692 }
693 next_op += nops;
694 }
695 return next_op;
696}
697
698/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
699 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
700
701static tree
702vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
703{
704 return make_temp_ssa_name (type, stmt, name: "patt");
705}
706
707/* STMT2_INFO describes a type conversion that could be split into STMT1
708 followed by a version of STMT2_INFO that takes NEW_RHS as its first
709 input. Try to do this using pattern statements, returning true on
710 success. */
711
712static bool
713vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
714 gimple *stmt1, tree vectype)
715{
716 if (is_pattern_stmt_p (stmt_info: stmt2_info))
717 {
718 /* STMT2_INFO is part of a pattern. Get the statement to which
719 the pattern is attached. */
720 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
721 vect_init_pattern_stmt (vinfo, pattern_stmt: stmt1, orig_stmt_info: orig_stmt2_info, vectype);
722
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "Splitting pattern statement: %G", stmt2_info->stmt);
726
727 /* Since STMT2_INFO is a pattern statement, we can change it
728 in-situ without worrying about changing the code for the
729 containing block. */
730 gimple_assign_set_rhs1 (gs: stmt2_info->stmt, rhs: new_rhs);
731
732 if (dump_enabled_p ())
733 {
734 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
735 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
736 stmt2_info->stmt);
737 }
738
739 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
740 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
741 /* STMT2_INFO is the actual pattern statement. Add STMT1
742 to the end of the definition sequence. */
743 gimple_seq_add_stmt_without_update (def_seq, stmt1);
744 else
745 {
746 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
747 before it. */
748 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
749 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
750 }
751 return true;
752 }
753 else
754 {
755 /* STMT2_INFO doesn't yet have a pattern. Try to create a
756 two-statement pattern now. */
757 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
758 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
759 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
760 if (!lhs_vectype)
761 return false;
762
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_NOTE, vect_location,
765 "Splitting statement: %G", stmt2_info->stmt);
766
767 /* Add STMT1 as a singleton pattern definition sequence. */
768 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
769 vect_init_pattern_stmt (vinfo, pattern_stmt: stmt1, orig_stmt_info: stmt2_info, vectype);
770 gimple_seq_add_stmt_without_update (def_seq, stmt1);
771
772 /* Build the second of the two pattern statements. */
773 tree new_lhs = vect_recog_temp_ssa_var (type: lhs_type, NULL);
774 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
775 vect_set_pattern_stmt (vinfo, pattern_stmt: new_stmt2, orig_stmt_info: stmt2_info, vectype: lhs_vectype);
776
777 if (dump_enabled_p ())
778 {
779 dump_printf_loc (MSG_NOTE, vect_location,
780 "into pattern statements: %G", stmt1);
781 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
782 (gimple *) new_stmt2);
783 }
784
785 return true;
786 }
787}
788
789/* Look for the following pattern
790 X = x[i]
791 Y = y[i]
792 DIFF = X - Y
793 DAD = ABS_EXPR<DIFF>
794
795 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
796 HALF_TYPE and UNPROM will be set should the statement be found to
797 be a widened operation.
798 DIFF_STMT will be set to the MINUS_EXPR
799 statement that precedes the ABS_STMT unless vect_widened_op_tree
800 succeeds.
801 */
802static bool
803vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
804 tree *half_type,
805 vect_unpromoted_value unprom[2],
806 gassign **diff_stmt)
807{
808 if (!abs_stmt)
809 return false;
810
811 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
812 inside the loop (in case we are analyzing an outer-loop). */
813 enum tree_code code = gimple_assign_rhs_code (gs: abs_stmt);
814 if (code != ABS_EXPR && code != ABSU_EXPR)
815 return false;
816
817 tree abs_oprnd = gimple_assign_rhs1 (gs: abs_stmt);
818 tree abs_type = TREE_TYPE (abs_oprnd);
819 if (!abs_oprnd)
820 return false;
821 if (!ANY_INTEGRAL_TYPE_P (abs_type)
822 || TYPE_OVERFLOW_WRAPS (abs_type)
823 || TYPE_UNSIGNED (abs_type))
824 return false;
825
826 /* Peel off conversions from the ABS input. This can involve sign
827 changes (e.g. from an unsigned subtraction to a signed ABS input)
828 or signed promotion, but it can't include unsigned promotion.
829 (Note that ABS of an unsigned promotion should have been folded
830 away before now anyway.) */
831 vect_unpromoted_value unprom_diff;
832 abs_oprnd = vect_look_through_possible_promotion (vinfo, op: abs_oprnd,
833 unprom: &unprom_diff);
834 if (!abs_oprnd)
835 return false;
836 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
837 && TYPE_UNSIGNED (unprom_diff.type))
838 return false;
839
840 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
841 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, op: abs_oprnd);
842 if (!diff_stmt_vinfo)
843 return false;
844
845 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
846 inside the loop (in case we are analyzing an outer-loop). */
847 if (vect_widened_op_tree (vinfo, stmt_info: diff_stmt_vinfo,
848 code: MINUS_EXPR, widened_code: IFN_VEC_WIDEN_MINUS,
849 shift_p: false, max_nops: 2, unprom, common_type: half_type))
850 return true;
851
852 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
853 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
854 if (diff_stmt && diff
855 && gimple_assign_rhs_code (gs: diff) == MINUS_EXPR
856 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
857 {
858 *diff_stmt = diff;
859 *half_type = NULL_TREE;
860 return true;
861 }
862
863 return false;
864}
865
866/* Convert UNPROM to TYPE and return the result, adding new statements
867 to STMT_INFO's pattern definition statements if no better way is
868 available. VECTYPE is the vector form of TYPE.
869
870 If SUBTYPE then convert the type based on the subtype. */
871
872static tree
873vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
874 vect_unpromoted_value *unprom, tree vectype,
875 enum optab_subtype subtype = optab_default)
876{
877 /* Update the type if the signs differ. */
878 if (subtype == optab_vector_mixed_sign)
879 {
880 gcc_assert (!TYPE_UNSIGNED (type));
881 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
882 {
883 type = unsigned_type_for (type);
884 vectype = unsigned_type_for (vectype);
885 }
886 }
887
888 /* Check for a no-op conversion. */
889 if (types_compatible_p (type1: type, TREE_TYPE (unprom->op)))
890 return unprom->op;
891
892 /* Allow the caller to create constant vect_unpromoted_values. */
893 if (TREE_CODE (unprom->op) == INTEGER_CST)
894 return wide_int_to_tree (type, cst: wi::to_widest (t: unprom->op));
895
896 tree input = unprom->op;
897 if (unprom->caster)
898 {
899 tree lhs = gimple_get_lhs (unprom->caster->stmt);
900 tree lhs_type = TREE_TYPE (lhs);
901
902 /* If the result of the existing cast is the right width, use it
903 instead of the source of the cast. */
904 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
905 input = lhs;
906 /* If the precision we want is between the source and result
907 precisions of the existing cast, try splitting the cast into
908 two and tapping into a mid-way point. */
909 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
910 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
911 {
912 /* In order to preserve the semantics of the original cast,
913 give the mid-way point the same signedness as the input value.
914
915 It would be possible to use a signed type here instead if
916 TYPE is signed and UNPROM->TYPE is unsigned, but that would
917 make the sign of the midtype sensitive to the order in
918 which we process the statements, since the signedness of
919 TYPE is the signedness required by just one of possibly
920 many users. Also, unsigned promotions are usually as cheap
921 as or cheaper than signed ones, so it's better to keep an
922 unsigned promotion. */
923 tree midtype = build_nonstandard_integer_type
924 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
925 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
926 if (vec_midtype)
927 {
928 input = vect_recog_temp_ssa_var (type: midtype, NULL);
929 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
930 unprom->op);
931 if (!vect_split_statement (vinfo, stmt2_info: unprom->caster, new_rhs: input, stmt1: new_stmt,
932 vectype: vec_midtype))
933 append_pattern_def_seq (vinfo, stmt_info,
934 new_stmt, vectype: vec_midtype);
935 }
936 }
937
938 /* See if we can reuse an existing result. */
939 if (types_compatible_p (type1: type, TREE_TYPE (input)))
940 return input;
941 }
942
943 /* We need a new conversion statement. */
944 tree new_op = vect_recog_temp_ssa_var (type, NULL);
945 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
946
947 /* If OP is an external value, see if we can insert the new statement
948 on an incoming edge. */
949 if (input == unprom->op && unprom->dt == vect_external_def)
950 if (edge e = vect_get_external_def_edge (vinfo, var: input))
951 {
952 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
953 gcc_assert (!new_bb);
954 return new_op;
955 }
956
957 /* As a (common) last resort, add the statement to the pattern itself. */
958 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
959 return new_op;
960}
961
962/* Invoke vect_convert_input for N elements of UNPROM and store the
963 result in the corresponding elements of RESULT.
964
965 If SUBTYPE then convert the type based on the subtype. */
966
967static void
968vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
969 tree *result, tree type, vect_unpromoted_value *unprom,
970 tree vectype, enum optab_subtype subtype = optab_default)
971{
972 for (unsigned int i = 0; i < n; ++i)
973 {
974 unsigned int j;
975 for (j = 0; j < i; ++j)
976 if (unprom[j].op == unprom[i].op)
977 break;
978
979 if (j < i)
980 result[i] = result[j];
981 else
982 result[i] = vect_convert_input (vinfo, stmt_info,
983 type, unprom: &unprom[i], vectype, subtype);
984 }
985}
986
987/* The caller has created a (possibly empty) sequence of pattern definition
988 statements followed by a single statement PATTERN_STMT. Cast the result
989 of this final statement to TYPE. If a new statement is needed, add
990 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
991 and return the new statement, otherwise return PATTERN_STMT as-is.
992 VECITYPE is the vector form of PATTERN_STMT's result type. */
993
994static gimple *
995vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
996 gimple *pattern_stmt, tree vecitype)
997{
998 tree lhs = gimple_get_lhs (pattern_stmt);
999 if (!types_compatible_p (type1: type, TREE_TYPE (lhs)))
1000 {
1001 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype: vecitype);
1002 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1003 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1004 }
1005 return pattern_stmt;
1006}
1007
1008/* Return true if STMT_VINFO describes a reduction for which reassociation
1009 is allowed. If STMT_INFO is part of a group, assume that it's part of
1010 a reduction chain and optimistically assume that all statements
1011 except the last allow reassociation.
1012 Also require it to have code CODE and to be a reduction
1013 in the outermost loop. When returning true, store the operands in
1014 *OP0_OUT and *OP1_OUT. */
1015
1016static bool
1017vect_reassociating_reduction_p (vec_info *vinfo,
1018 stmt_vec_info stmt_info, tree_code code,
1019 tree *op0_out, tree *op1_out)
1020{
1021 loop_vec_info loop_info = dyn_cast <loop_vec_info> (p: vinfo);
1022 if (!loop_info)
1023 return false;
1024
1025 gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt);
1026 if (!assign || gimple_assign_rhs_code (gs: assign) != code)
1027 return false;
1028
1029 /* We don't allow changing the order of the computation in the inner-loop
1030 when doing outer-loop vectorization. */
1031 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1032 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1033 return false;
1034
1035 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1036 {
1037 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1038 code))
1039 return false;
1040 }
1041 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1042 return false;
1043
1044 *op0_out = gimple_assign_rhs1 (gs: assign);
1045 *op1_out = gimple_assign_rhs2 (gs: assign);
1046 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1047 std::swap (a&: *op0_out, b&: *op1_out);
1048 return true;
1049}
1050
1051/* match.pd function to match
1052 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1053 with conditions:
1054 1) @1, @2, c, d, a, b are all integral type.
1055 2) There's single_use for both @1 and @2.
1056 3) a, c have same precision.
1057 4) c and @1 have different precision.
1058 5) c, d are the same type or they can differ in sign when convert is
1059 truncation.
1060
1061 record a and c and d and @3. */
1062
1063extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1064
1065/* Function vect_recog_cond_expr_convert
1066
1067 Try to find the following pattern:
1068
1069 TYPE_AB A,B;
1070 TYPE_CD C,D;
1071 TYPE_E E;
1072 TYPE_E op_true = (TYPE_E) A;
1073 TYPE_E op_false = (TYPE_E) B;
1074
1075 E = C cmp D ? op_true : op_false;
1076
1077 where
1078 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1079 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1080 single_use of op_true and op_false.
1081 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1082
1083 Input:
1084
1085 * STMT_VINFO: The stmt from which the pattern search begins.
1086 here it starts with E = c cmp D ? op_true : op_false;
1087
1088 Output:
1089
1090 TYPE1 E' = C cmp D ? A : B;
1091 TYPE3 E = (TYPE3) E';
1092
1093 There may extra nop_convert for A or B to handle different signness.
1094
1095 * TYPE_OUT: The vector type of the output of this pattern.
1096
1097 * Return value: A new stmt that will be used to replace the sequence of
1098 stmts that constitute the pattern. In this case it will be:
1099 E = (TYPE3)E';
1100 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1101
1102static gimple *
1103vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1104 stmt_vec_info stmt_vinfo, tree *type_out)
1105{
1106 gassign *last_stmt = dyn_cast <gassign *> (p: stmt_vinfo->stmt);
1107 tree lhs, match[4], temp, type, new_lhs, op2;
1108 gimple *cond_stmt;
1109 gimple *pattern_stmt;
1110
1111 if (!last_stmt)
1112 return NULL;
1113
1114 lhs = gimple_assign_lhs (gs: last_stmt);
1115
1116 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1117 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1118 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1119 return NULL;
1120
1121 vect_pattern_detected (name: "vect_recog_cond_expr_convert_pattern", stmt: last_stmt);
1122
1123 op2 = match[2];
1124 type = TREE_TYPE (match[1]);
1125 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1126 {
1127 op2 = vect_recog_temp_ssa_var (type, NULL);
1128 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1129 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: nop_stmt,
1130 vectype: get_vectype_for_scalar_type (vinfo, type));
1131 }
1132
1133 temp = vect_recog_temp_ssa_var (type, NULL);
1134 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1135 match[1], op2));
1136 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: cond_stmt,
1137 vectype: get_vectype_for_scalar_type (vinfo, type));
1138 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1139 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1140 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1141
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "created pattern stmt: %G", pattern_stmt);
1145 return pattern_stmt;
1146}
1147
1148/* Function vect_recog_dot_prod_pattern
1149
1150 Try to find the following pattern:
1151
1152 type1a x_t
1153 type1b y_t;
1154 TYPE1 prod;
1155 TYPE2 sum = init;
1156 loop:
1157 sum_0 = phi <init, sum_1>
1158 S1 x_t = ...
1159 S2 y_t = ...
1160 S3 x_T = (TYPE1) x_t;
1161 S4 y_T = (TYPE1) y_t;
1162 S5 prod = x_T * y_T;
1163 [S6 prod = (TYPE2) prod; #optional]
1164 S7 sum_1 = prod + sum_0;
1165
1166 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1167 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1168 'type1a' and 'type1b' can differ.
1169
1170 Input:
1171
1172 * STMT_VINFO: The stmt from which the pattern search begins. In the
1173 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1174 will be detected.
1175
1176 Output:
1177
1178 * TYPE_OUT: The type of the output of this pattern.
1179
1180 * Return value: A new stmt that will be used to replace the sequence of
1181 stmts that constitute the pattern. In this case it will be:
1182 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1183
1184 Note: The dot-prod idiom is a widening reduction pattern that is
1185 vectorized without preserving all the intermediate results. It
1186 produces only N/2 (widened) results (by summing up pairs of
1187 intermediate results) rather than all N results. Therefore, we
1188 cannot allow this pattern when we want to get all the results and in
1189 the correct order (as is the case when this computation is in an
1190 inner-loop nested in an outer-loop that us being vectorized). */
1191
1192static gimple *
1193vect_recog_dot_prod_pattern (vec_info *vinfo,
1194 stmt_vec_info stmt_vinfo, tree *type_out)
1195{
1196 tree oprnd0, oprnd1;
1197 gimple *last_stmt = stmt_vinfo->stmt;
1198 tree type, half_type;
1199 gimple *pattern_stmt;
1200 tree var;
1201
1202 /* Look for the following pattern
1203 DX = (TYPE1) X;
1204 DY = (TYPE1) Y;
1205 DPROD = DX * DY;
1206 DDPROD = (TYPE2) DPROD;
1207 sum_1 = DDPROD + sum_0;
1208 In which
1209 - DX is double the size of X
1210 - DY is double the size of Y
1211 - DX, DY, DPROD all have the same type but the sign
1212 between X, Y and DPROD can differ.
1213 - sum is the same size of DPROD or bigger
1214 - sum has been recognized as a reduction variable.
1215
1216 This is equivalent to:
1217 DPROD = X w* Y; #widen mult
1218 sum_1 = DPROD w+ sum_0; #widen summation
1219 or
1220 DPROD = X w* Y; #widen mult
1221 sum_1 = DPROD + sum_0; #summation
1222 */
1223
1224 /* Starting from LAST_STMT, follow the defs of its uses in search
1225 of the above pattern. */
1226
1227 if (!vect_reassociating_reduction_p (vinfo, stmt_info: stmt_vinfo, code: PLUS_EXPR,
1228 op0_out: &oprnd0, op1_out: &oprnd1))
1229 return NULL;
1230
1231 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1232
1233 vect_unpromoted_value unprom_mult;
1234 oprnd0 = vect_look_through_possible_promotion (vinfo, op: oprnd0, unprom: &unprom_mult);
1235
1236 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1237 we know that oprnd1 is the reduction variable (defined by a loop-header
1238 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1239 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1240 if (!oprnd0)
1241 return NULL;
1242
1243 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, op: oprnd0);
1244 if (!mult_vinfo)
1245 return NULL;
1246
1247 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1248 inside the loop (in case we are analyzing an outer-loop). */
1249 vect_unpromoted_value unprom0[2];
1250 enum optab_subtype subtype = optab_vector;
1251 if (!vect_widened_op_tree (vinfo, stmt_info: mult_vinfo, code: MULT_EXPR, widened_code: WIDEN_MULT_EXPR,
1252 shift_p: false, max_nops: 2, unprom: unprom0, common_type: &half_type, subtype: &subtype))
1253 return NULL;
1254
1255 /* If there are two widening operations, make sure they agree on the sign
1256 of the extension. The result of an optab_vector_mixed_sign operation
1257 is signed; otherwise, the result has the same sign as the operands. */
1258 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1259 && (subtype == optab_vector_mixed_sign
1260 ? TYPE_UNSIGNED (unprom_mult.type)
1261 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1262 return NULL;
1263
1264 vect_pattern_detected (name: "vect_recog_dot_prod_pattern", stmt: last_stmt);
1265
1266 /* If the inputs have mixed signs, canonicalize on using the signed
1267 input type for analysis. This also helps when emulating mixed-sign
1268 operations using signed operations. */
1269 if (subtype == optab_vector_mixed_sign)
1270 half_type = signed_type_for (half_type);
1271
1272 tree half_vectype;
1273 if (!vect_supportable_direct_optab_p (vinfo, otype: type, code: DOT_PROD_EXPR, itype: half_type,
1274 vecotype_out: type_out, vecitype_out: &half_vectype, subtype))
1275 {
1276 /* We can emulate a mixed-sign dot-product using a sequence of
1277 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1278 if (subtype != optab_vector_mixed_sign
1279 || !vect_supportable_direct_optab_p (vinfo, otype: signed_type_for (type),
1280 code: DOT_PROD_EXPR, itype: half_type,
1281 vecotype_out: type_out, vecitype_out: &half_vectype,
1282 subtype: optab_vector))
1283 return NULL;
1284
1285 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1286 *type_out);
1287 }
1288
1289 /* Get the inputs in the appropriate types. */
1290 tree mult_oprnd[2];
1291 vect_convert_inputs (vinfo, stmt_info: stmt_vinfo, n: 2, result: mult_oprnd, type: half_type,
1292 unprom: unprom0, vectype: half_vectype, subtype);
1293
1294 var = vect_recog_temp_ssa_var (type, NULL);
1295 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1296 mult_oprnd[0], mult_oprnd[1], oprnd1);
1297
1298 return pattern_stmt;
1299}
1300
1301
1302/* Function vect_recog_sad_pattern
1303
1304 Try to find the following Sum of Absolute Difference (SAD) pattern:
1305
1306 type x_t, y_t;
1307 signed TYPE1 diff, abs_diff;
1308 TYPE2 sum = init;
1309 loop:
1310 sum_0 = phi <init, sum_1>
1311 S1 x_t = ...
1312 S2 y_t = ...
1313 S3 x_T = (TYPE1) x_t;
1314 S4 y_T = (TYPE1) y_t;
1315 S5 diff = x_T - y_T;
1316 S6 abs_diff = ABS_EXPR <diff>;
1317 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1318 S8 sum_1 = abs_diff + sum_0;
1319
1320 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1321 same size of 'TYPE1' or bigger. This is a special case of a reduction
1322 computation.
1323
1324 Input:
1325
1326 * STMT_VINFO: The stmt from which the pattern search begins. In the
1327 example, when this function is called with S8, the pattern
1328 {S3,S4,S5,S6,S7,S8} will be detected.
1329
1330 Output:
1331
1332 * TYPE_OUT: The type of the output of this pattern.
1333
1334 * Return value: A new stmt that will be used to replace the sequence of
1335 stmts that constitute the pattern. In this case it will be:
1336 SAD_EXPR <x_t, y_t, sum_0>
1337 */
1338
1339static gimple *
1340vect_recog_sad_pattern (vec_info *vinfo,
1341 stmt_vec_info stmt_vinfo, tree *type_out)
1342{
1343 gimple *last_stmt = stmt_vinfo->stmt;
1344 tree half_type;
1345
1346 /* Look for the following pattern
1347 DX = (TYPE1) X;
1348 DY = (TYPE1) Y;
1349 DDIFF = DX - DY;
1350 DAD = ABS_EXPR <DDIFF>;
1351 DDPROD = (TYPE2) DPROD;
1352 sum_1 = DAD + sum_0;
1353 In which
1354 - DX is at least double the size of X
1355 - DY is at least double the size of Y
1356 - DX, DY, DDIFF, DAD all have the same type
1357 - sum is the same size of DAD or bigger
1358 - sum has been recognized as a reduction variable.
1359
1360 This is equivalent to:
1361 DDIFF = X w- Y; #widen sub
1362 DAD = ABS_EXPR <DDIFF>;
1363 sum_1 = DAD w+ sum_0; #widen summation
1364 or
1365 DDIFF = X w- Y; #widen sub
1366 DAD = ABS_EXPR <DDIFF>;
1367 sum_1 = DAD + sum_0; #summation
1368 */
1369
1370 /* Starting from LAST_STMT, follow the defs of its uses in search
1371 of the above pattern. */
1372
1373 tree plus_oprnd0, plus_oprnd1;
1374 if (!vect_reassociating_reduction_p (vinfo, stmt_info: stmt_vinfo, code: PLUS_EXPR,
1375 op0_out: &plus_oprnd0, op1_out: &plus_oprnd1))
1376 return NULL;
1377
1378 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1379
1380 /* Any non-truncating sequence of conversions is OK here, since
1381 with a successful match, the result of the ABS(U) is known to fit
1382 within the nonnegative range of the result type. (It cannot be the
1383 negative of the minimum signed value due to the range of the widening
1384 MINUS_EXPR.) */
1385 vect_unpromoted_value unprom_abs;
1386 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, op: plus_oprnd0,
1387 unprom: &unprom_abs);
1388
1389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1390 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1391 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1392 Then check that plus_oprnd0 is defined by an abs_expr. */
1393
1394 if (!plus_oprnd0)
1395 return NULL;
1396
1397 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, op: plus_oprnd0);
1398 if (!abs_stmt_vinfo)
1399 return NULL;
1400
1401 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1402 inside the loop (in case we are analyzing an outer-loop). */
1403 gassign *abs_stmt = dyn_cast <gassign *> (p: abs_stmt_vinfo->stmt);
1404 vect_unpromoted_value unprom[2];
1405
1406 if (!abs_stmt)
1407 {
1408 gcall *abd_stmt = dyn_cast <gcall *> (p: abs_stmt_vinfo->stmt);
1409 if (!abd_stmt
1410 || !gimple_call_internal_p (gs: abd_stmt)
1411 || gimple_call_num_args (gs: abd_stmt) != 2)
1412 return NULL;
1413
1414 tree abd_oprnd0 = gimple_call_arg (gs: abd_stmt, index: 0);
1415 tree abd_oprnd1 = gimple_call_arg (gs: abd_stmt, index: 1);
1416
1417 if (gimple_call_internal_fn (gs: abd_stmt) == IFN_ABD)
1418 {
1419 if (!vect_look_through_possible_promotion (vinfo, op: abd_oprnd0,
1420 unprom: &unprom[0])
1421 || !vect_look_through_possible_promotion (vinfo, op: abd_oprnd1,
1422 unprom: &unprom[1]))
1423 return NULL;
1424 }
1425 else if (gimple_call_internal_fn (gs: abd_stmt) == IFN_VEC_WIDEN_ABD)
1426 {
1427 unprom[0].op = abd_oprnd0;
1428 unprom[0].type = TREE_TYPE (abd_oprnd0);
1429 unprom[1].op = abd_oprnd1;
1430 unprom[1].type = TREE_TYPE (abd_oprnd1);
1431 }
1432 else
1433 return NULL;
1434
1435 half_type = unprom[0].type;
1436 }
1437 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, half_type: &half_type,
1438 unprom, NULL))
1439 return NULL;
1440
1441 vect_pattern_detected (name: "vect_recog_sad_pattern", stmt: last_stmt);
1442
1443 tree half_vectype;
1444 if (!vect_supportable_direct_optab_p (vinfo, otype: sum_type, code: SAD_EXPR, itype: half_type,
1445 vecotype_out: type_out, vecitype_out: &half_vectype))
1446 return NULL;
1447
1448 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1449 tree sad_oprnd[2];
1450 vect_convert_inputs (vinfo, stmt_info: stmt_vinfo, n: 2, result: sad_oprnd, type: half_type,
1451 unprom, vectype: half_vectype);
1452
1453 tree var = vect_recog_temp_ssa_var (type: sum_type, NULL);
1454 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1455 sad_oprnd[1], plus_oprnd1);
1456
1457 return pattern_stmt;
1458}
1459
1460/* Function vect_recog_abd_pattern
1461
1462 Try to find the following ABsolute Difference (ABD) or
1463 widening ABD (WIDEN_ABD) pattern:
1464
1465 TYPE1 x;
1466 TYPE2 y;
1467 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1468 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1469 TYPE3 diff = x_cast - y_cast;
1470 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1471 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1472
1473 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1474 twice as wide as TYPE3.
1475
1476 Input:
1477
1478 * STMT_VINFO: The stmt from which the pattern search begins
1479
1480 Output:
1481
1482 * TYPE_OUT: The type of the output of this pattern
1483
1484 * Return value: A new stmt that will be used to replace the sequence of
1485 stmts that constitute the pattern, principally:
1486 out = IFN_ABD (x, y)
1487 out = IFN_WIDEN_ABD (x, y)
1488 */
1489
1490static gimple *
1491vect_recog_abd_pattern (vec_info *vinfo,
1492 stmt_vec_info stmt_vinfo, tree *type_out)
1493{
1494 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1495 if (!last_stmt)
1496 return NULL;
1497
1498 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1499
1500 vect_unpromoted_value unprom[2];
1501 gassign *diff_stmt;
1502 tree half_type;
1503 if (!vect_recog_absolute_difference (vinfo, abs_stmt: last_stmt, half_type: &half_type,
1504 unprom, diff_stmt: &diff_stmt))
1505 return NULL;
1506
1507 tree abd_in_type, abd_out_type;
1508
1509 if (half_type)
1510 {
1511 abd_in_type = half_type;
1512 abd_out_type = abd_in_type;
1513 }
1514 else
1515 {
1516 unprom[0].op = gimple_assign_rhs1 (gs: diff_stmt);
1517 unprom[1].op = gimple_assign_rhs2 (gs: diff_stmt);
1518 abd_in_type = signed_type_for (out_type);
1519 abd_out_type = abd_in_type;
1520 }
1521
1522 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1523 if (!vectype_in)
1524 return NULL;
1525
1526 internal_fn ifn = IFN_ABD;
1527 tree vectype_out = vectype_in;
1528
1529 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1530 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1531 {
1532 tree mid_type
1533 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1534 TYPE_UNSIGNED (abd_in_type));
1535 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1536
1537 code_helper dummy_code;
1538 int dummy_int;
1539 auto_vec<tree> dummy_vec;
1540 if (mid_vectype
1541 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1542 stmt_vinfo, mid_vectype,
1543 vectype_in,
1544 &dummy_code, &dummy_code,
1545 &dummy_int, &dummy_vec))
1546 {
1547 ifn = IFN_VEC_WIDEN_ABD;
1548 abd_out_type = mid_type;
1549 vectype_out = mid_vectype;
1550 }
1551 }
1552
1553 if (ifn == IFN_ABD
1554 && !direct_internal_fn_supported_p (ifn, vectype_in,
1555 OPTIMIZE_FOR_SPEED))
1556 return NULL;
1557
1558 vect_pattern_detected (name: "vect_recog_abd_pattern", stmt: last_stmt);
1559
1560 tree abd_oprnds[2];
1561 vect_convert_inputs (vinfo, stmt_info: stmt_vinfo, n: 2, result: abd_oprnds,
1562 type: abd_in_type, unprom, vectype: vectype_in);
1563
1564 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1565
1566 tree abd_result = vect_recog_temp_ssa_var (type: abd_out_type, NULL);
1567 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1568 abd_oprnds[0], abd_oprnds[1]);
1569 gimple_call_set_lhs (gs: abd_stmt, lhs: abd_result);
1570 gimple_set_location (g: abd_stmt, location: gimple_location (g: last_stmt));
1571
1572 gimple *stmt = abd_stmt;
1573 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1574 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1575 && !TYPE_UNSIGNED (abd_out_type))
1576 {
1577 tree unsign = unsigned_type_for (abd_out_type);
1578 tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
1579 stmt = vect_convert_output (vinfo, stmt_info: stmt_vinfo, type: unsign, pattern_stmt: stmt,
1580 vecitype: unsign_vectype);
1581 }
1582
1583 return vect_convert_output (vinfo, stmt_info: stmt_vinfo, type: out_type, pattern_stmt: stmt, vecitype: vectype_out);
1584}
1585
1586/* Recognize an operation that performs ORIG_CODE on widened inputs,
1587 so that it can be treated as though it had the form:
1588
1589 A_TYPE a;
1590 B_TYPE b;
1591 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1592 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1593 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1594 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1595 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1596
1597 Try to replace the pattern with:
1598
1599 A_TYPE a;
1600 B_TYPE b;
1601 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1604 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1605
1606 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1607
1608 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1609 name of the pattern being matched, for dump purposes. */
1610
1611static gimple *
1612vect_recog_widen_op_pattern (vec_info *vinfo,
1613 stmt_vec_info last_stmt_info, tree *type_out,
1614 tree_code orig_code, code_helper wide_code,
1615 bool shift_p, const char *name)
1616{
1617 gimple *last_stmt = last_stmt_info->stmt;
1618
1619 vect_unpromoted_value unprom[2];
1620 tree half_type;
1621 if (!vect_widened_op_tree (vinfo, stmt_info: last_stmt_info, code: orig_code, widened_code: orig_code,
1622 shift_p, max_nops: 2, unprom, common_type: &half_type))
1623
1624 return NULL;
1625
1626 /* Pattern detected. */
1627 vect_pattern_detected (name, stmt: last_stmt);
1628
1629 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1630 tree itype = type;
1631 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1632 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1633 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1634 TYPE_UNSIGNED (half_type));
1635
1636 /* Check target support */
1637 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1638 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1639 tree ctype = itype;
1640 tree vecctype = vecitype;
1641 if (orig_code == MINUS_EXPR
1642 && TYPE_UNSIGNED (itype)
1643 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1644 {
1645 /* Subtraction is special, even if half_type is unsigned and no matter
1646 whether type is signed or unsigned, if type is wider than itype,
1647 we need to sign-extend from the widening operation result to the
1648 result type.
1649 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1650 itype unsigned short and type either int or unsigned int.
1651 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1652 (unsigned short) 0xffff, but for type int we want the result -1
1653 and for type unsigned int 0xffffffff rather than 0xffff. */
1654 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1655 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1656 }
1657
1658 code_helper dummy_code;
1659 int dummy_int;
1660 auto_vec<tree> dummy_vec;
1661 if (!vectype
1662 || !vecitype
1663 || !vecctype
1664 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1665 vecitype, vectype,
1666 &dummy_code, &dummy_code,
1667 &dummy_int, &dummy_vec))
1668 return NULL;
1669
1670 *type_out = get_vectype_for_scalar_type (vinfo, type);
1671 if (!*type_out)
1672 return NULL;
1673
1674 tree oprnd[2];
1675 vect_convert_inputs (vinfo, stmt_info: last_stmt_info,
1676 n: 2, result: oprnd, type: half_type, unprom, vectype);
1677
1678 tree var = vect_recog_temp_ssa_var (type: itype, NULL);
1679 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1680
1681 if (vecctype != vecitype)
1682 pattern_stmt = vect_convert_output (vinfo, stmt_info: last_stmt_info, type: ctype,
1683 pattern_stmt, vecitype);
1684
1685 return vect_convert_output (vinfo, stmt_info: last_stmt_info,
1686 type, pattern_stmt, vecitype: vecctype);
1687}
1688
1689/* Try to detect multiplication on widened inputs, converting MULT_EXPR
1690 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1691
1692static gimple *
1693vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1694 tree *type_out)
1695{
1696 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1697 orig_code: MULT_EXPR, wide_code: WIDEN_MULT_EXPR, shift_p: false,
1698 name: "vect_recog_widen_mult_pattern");
1699}
1700
1701/* Try to detect addition on widened inputs, converting PLUS_EXPR
1702 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1703
1704static gimple *
1705vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1706 tree *type_out)
1707{
1708 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1709 orig_code: PLUS_EXPR, wide_code: IFN_VEC_WIDEN_PLUS,
1710 shift_p: false, name: "vect_recog_widen_plus_pattern");
1711}
1712
1713/* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1714 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1715static gimple *
1716vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1717 tree *type_out)
1718{
1719 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1720 orig_code: MINUS_EXPR, wide_code: IFN_VEC_WIDEN_MINUS,
1721 shift_p: false, name: "vect_recog_widen_minus_pattern");
1722}
1723
1724/* Try to detect abd on widened inputs, converting IFN_ABD
1725 to IFN_VEC_WIDEN_ABD. */
1726static gimple *
1727vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1728 tree *type_out)
1729{
1730 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1731 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1732 return NULL;
1733
1734 tree last_rhs = gimple_assign_rhs1 (gs: last_stmt);
1735
1736 tree in_type = TREE_TYPE (last_rhs);
1737 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1738 if (!INTEGRAL_TYPE_P (in_type)
1739 || !INTEGRAL_TYPE_P (out_type)
1740 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1741 || !TYPE_UNSIGNED (in_type))
1742 return NULL;
1743
1744 vect_unpromoted_value unprom;
1745 tree op = vect_look_through_possible_promotion (vinfo, op: last_rhs, unprom: &unprom);
1746 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1747 return NULL;
1748
1749 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1750 if (!abd_pattern_vinfo)
1751 return NULL;
1752
1753 abd_pattern_vinfo = vect_stmt_to_vectorize (stmt_info: abd_pattern_vinfo);
1754 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1755 if (!abd_stmt
1756 || !gimple_call_internal_p (gs: abd_stmt)
1757 || gimple_call_internal_fn (gs: abd_stmt) != IFN_ABD)
1758 return NULL;
1759
1760 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1761 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1762
1763 code_helper dummy_code;
1764 int dummy_int;
1765 auto_vec<tree> dummy_vec;
1766 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1767 vectype_out, vectype_in,
1768 &dummy_code, &dummy_code,
1769 &dummy_int, &dummy_vec))
1770 return NULL;
1771
1772 vect_pattern_detected (name: "vect_recog_widen_abd_pattern", stmt: last_stmt);
1773
1774 *type_out = vectype_out;
1775
1776 tree abd_oprnd0 = gimple_call_arg (gs: abd_stmt, index: 0);
1777 tree abd_oprnd1 = gimple_call_arg (gs: abd_stmt, index: 1);
1778 tree widen_abd_result = vect_recog_temp_ssa_var (type: out_type, NULL);
1779 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1780 abd_oprnd0, abd_oprnd1);
1781 gimple_call_set_lhs (gs: widen_abd_stmt, lhs: widen_abd_result);
1782 gimple_set_location (g: widen_abd_stmt, location: gimple_location (g: last_stmt));
1783 return widen_abd_stmt;
1784}
1785
1786/* Function vect_recog_ctz_ffs_pattern
1787
1788 Try to find the following pattern:
1789
1790 TYPE1 A;
1791 TYPE1 B;
1792
1793 B = __builtin_ctz{,l,ll} (A);
1794
1795 or
1796
1797 B = __builtin_ffs{,l,ll} (A);
1798
1799 Input:
1800
1801 * STMT_VINFO: The stmt from which the pattern search begins.
1802 here it starts with B = __builtin_* (A);
1803
1804 Output:
1805
1806 * TYPE_OUT: The vector type of the output of this pattern.
1807
1808 * Return value: A new stmt that will be used to replace the sequence of
1809 stmts that constitute the pattern, using clz or popcount builtins. */
1810
1811static gimple *
1812vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1813 tree *type_out)
1814{
1815 gimple *call_stmt = stmt_vinfo->stmt;
1816 gimple *pattern_stmt;
1817 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1818 tree new_var;
1819 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1820 bool defined_at_zero = true, defined_at_zero_new = false;
1821 int val = 0, val_new = 0;
1822 int prec;
1823 int sub = 0, add = 0;
1824 location_t loc;
1825
1826 if (!is_gimple_call (gs: call_stmt))
1827 return NULL;
1828
1829 if (gimple_call_num_args (gs: call_stmt) != 1)
1830 return NULL;
1831
1832 rhs_oprnd = gimple_call_arg (gs: call_stmt, index: 0);
1833 rhs_type = TREE_TYPE (rhs_oprnd);
1834 lhs_oprnd = gimple_call_lhs (gs: call_stmt);
1835 if (!lhs_oprnd)
1836 return NULL;
1837 lhs_type = TREE_TYPE (lhs_oprnd);
1838 if (!INTEGRAL_TYPE_P (lhs_type)
1839 || !INTEGRAL_TYPE_P (rhs_type)
1840 || !type_has_mode_precision_p (t: rhs_type)
1841 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1842 return NULL;
1843
1844 switch (gimple_call_combined_fn (call_stmt))
1845 {
1846 CASE_CFN_CTZ:
1847 ifn = IFN_CTZ;
1848 if (!gimple_call_internal_p (gs: call_stmt)
1849 || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1850 val) != 2)
1851 defined_at_zero = false;
1852 break;
1853 CASE_CFN_FFS:
1854 ifn = IFN_FFS;
1855 break;
1856 default:
1857 return NULL;
1858 }
1859
1860 prec = TYPE_PRECISION (rhs_type);
1861 loc = gimple_location (g: call_stmt);
1862
1863 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1864 if (!vec_type)
1865 return NULL;
1866
1867 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1868 if (!vec_rhs_type)
1869 return NULL;
1870
1871 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1872 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1873 popcount<vector_mode>2. */
1874 if (!vec_type
1875 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1876 OPTIMIZE_FOR_SPEED))
1877 return NULL;
1878
1879 if (ifn == IFN_FFS
1880 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1881 OPTIMIZE_FOR_SPEED))
1882 {
1883 ifnnew = IFN_CTZ;
1884 defined_at_zero_new
1885 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1886 val_new) == 2;
1887 }
1888 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1889 OPTIMIZE_FOR_SPEED))
1890 {
1891 ifnnew = IFN_CLZ;
1892 defined_at_zero_new
1893 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1894 val_new) == 2;
1895 }
1896 if ((ifnnew == IFN_LAST
1897 || (defined_at_zero && !defined_at_zero_new))
1898 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1899 OPTIMIZE_FOR_SPEED))
1900 {
1901 ifnnew = IFN_POPCOUNT;
1902 defined_at_zero_new = true;
1903 val_new = prec;
1904 }
1905 if (ifnnew == IFN_LAST)
1906 return NULL;
1907
1908 vect_pattern_detected (name: "vec_recog_ctz_ffs_pattern", stmt: call_stmt);
1909
1910 if ((ifnnew == IFN_CLZ
1911 && defined_at_zero
1912 && defined_at_zero_new
1913 && val == prec
1914 && val_new == prec)
1915 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1916 {
1917 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1918 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1919 if (ifnnew == IFN_CLZ)
1920 sub = prec;
1921 val_new = prec;
1922
1923 if (!TYPE_UNSIGNED (rhs_type))
1924 {
1925 rhs_type = unsigned_type_for (rhs_type);
1926 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1927 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1928 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1929 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt,
1930 vectype: vec_rhs_type);
1931 rhs_oprnd = new_var;
1932 }
1933
1934 tree m1 = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1935 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1936 build_int_cst (rhs_type, -1));
1937 gimple_set_location (g: pattern_stmt, location: loc);
1938 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1939
1940 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1941 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1942 gimple_set_location (g: pattern_stmt, location: loc);
1943 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1944 rhs_oprnd = new_var;
1945
1946 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1947 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1948 m1, rhs_oprnd);
1949 gimple_set_location (g: pattern_stmt, location: loc);
1950 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1951 rhs_oprnd = new_var;
1952 }
1953 else if (ifnnew == IFN_CLZ)
1954 {
1955 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1956 .FFS (X) = PREC - .CLZ (X & -X). */
1957 sub = prec - (ifn == IFN_CTZ);
1958 val_new = sub - val_new;
1959
1960 tree neg = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1961 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1962 gimple_set_location (g: pattern_stmt, location: loc);
1963 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1964
1965 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1966 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1967 rhs_oprnd, neg);
1968 gimple_set_location (g: pattern_stmt, location: loc);
1969 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1970 rhs_oprnd = new_var;
1971 }
1972 else if (ifnnew == IFN_POPCOUNT)
1973 {
1974 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1975 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1976 sub = prec + (ifn == IFN_FFS);
1977 val_new = sub;
1978
1979 tree neg = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1980 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1981 gimple_set_location (g: pattern_stmt, location: loc);
1982 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1983
1984 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1985 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1986 rhs_oprnd, neg);
1987 gimple_set_location (g: pattern_stmt, location: loc);
1988 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1989 rhs_oprnd = new_var;
1990 }
1991 else if (ifnnew == IFN_CTZ)
1992 {
1993 /* .FFS (X) = .CTZ (X) + 1. */
1994 add = 1;
1995 val_new++;
1996 }
1997
1998 /* Create B = .IFNNEW (A). */
1999 new_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2000 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2001 gimple_call_set_lhs (gs: pattern_stmt, lhs: new_var);
2002 gimple_set_location (g: pattern_stmt, location: loc);
2003 *type_out = vec_type;
2004
2005 if (sub)
2006 {
2007 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2008 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2009 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2010 build_int_cst (lhs_type, sub),
2011 new_var);
2012 gimple_set_location (g: pattern_stmt, location: loc);
2013 new_var = ret_var;
2014 }
2015 else if (add)
2016 {
2017 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2018 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2019 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2020 build_int_cst (lhs_type, add));
2021 gimple_set_location (g: pattern_stmt, location: loc);
2022 new_var = ret_var;
2023 }
2024
2025 if (defined_at_zero
2026 && (!defined_at_zero_new || val != val_new))
2027 {
2028 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2029 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2030 rhs_oprnd = gimple_call_arg (gs: call_stmt, index: 0);
2031 rhs_type = TREE_TYPE (rhs_oprnd);
2032 tree cmp = build2_loc (loc, code: NE_EXPR, boolean_type_node,
2033 arg0: rhs_oprnd, arg1: build_zero_cst (rhs_type));
2034 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2035 new_var,
2036 build_int_cst (lhs_type, val));
2037 }
2038
2039 if (dump_enabled_p ())
2040 dump_printf_loc (MSG_NOTE, vect_location,
2041 "created pattern stmt: %G", pattern_stmt);
2042
2043 return pattern_stmt;
2044}
2045
2046/* Function vect_recog_popcount_clz_ctz_ffs_pattern
2047
2048 Try to find the following pattern:
2049
2050 UTYPE1 A;
2051 TYPE1 B;
2052 UTYPE2 temp_in;
2053 TYPE3 temp_out;
2054 temp_in = (UTYPE2)A;
2055
2056 temp_out = __builtin_popcount{,l,ll} (temp_in);
2057 B = (TYPE1) temp_out;
2058
2059 TYPE2 may or may not be equal to TYPE3.
2060 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2061 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2062
2063 Input:
2064
2065 * STMT_VINFO: The stmt from which the pattern search begins.
2066 here it starts with B = (TYPE1) temp_out;
2067
2068 Output:
2069
2070 * TYPE_OUT: The vector type of the output of this pattern.
2071
2072 * Return value: A new stmt that will be used to replace the sequence of
2073 stmts that constitute the pattern. In this case it will be:
2074 B = .POPCOUNT (A);
2075
2076 Similarly for clz, ctz and ffs.
2077*/
2078
2079static gimple *
2080vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2081 stmt_vec_info stmt_vinfo,
2082 tree *type_out)
2083{
2084 gassign *last_stmt = dyn_cast <gassign *> (p: stmt_vinfo->stmt);
2085 gimple *call_stmt, *pattern_stmt;
2086 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2087 internal_fn ifn = IFN_LAST;
2088 int addend = 0;
2089
2090 /* Find B = (TYPE1) temp_out. */
2091 if (!last_stmt)
2092 return NULL;
2093 tree_code code = gimple_assign_rhs_code (gs: last_stmt);
2094 if (!CONVERT_EXPR_CODE_P (code))
2095 return NULL;
2096
2097 lhs_oprnd = gimple_assign_lhs (gs: last_stmt);
2098 lhs_type = TREE_TYPE (lhs_oprnd);
2099 if (!INTEGRAL_TYPE_P (lhs_type))
2100 return NULL;
2101
2102 rhs_oprnd = gimple_assign_rhs1 (gs: last_stmt);
2103 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2104 || !has_single_use (var: rhs_oprnd))
2105 return NULL;
2106 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2107
2108 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2109 if (!is_gimple_call (gs: call_stmt))
2110 return NULL;
2111 switch (gimple_call_combined_fn (call_stmt))
2112 {
2113 int val;
2114 CASE_CFN_POPCOUNT:
2115 ifn = IFN_POPCOUNT;
2116 break;
2117 CASE_CFN_CLZ:
2118 ifn = IFN_CLZ;
2119 /* Punt if call result is unsigned and defined value at zero
2120 is negative, as the negative value doesn't extend correctly. */
2121 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2122 && gimple_call_internal_p (gs: call_stmt)
2123 && CLZ_DEFINED_VALUE_AT_ZERO
2124 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2125 && val < 0)
2126 return NULL;
2127 break;
2128 CASE_CFN_CTZ:
2129 ifn = IFN_CTZ;
2130 /* Punt if call result is unsigned and defined value at zero
2131 is negative, as the negative value doesn't extend correctly. */
2132 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2133 && gimple_call_internal_p (gs: call_stmt)
2134 && CTZ_DEFINED_VALUE_AT_ZERO
2135 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2136 && val < 0)
2137 return NULL;
2138 break;
2139 CASE_CFN_FFS:
2140 ifn = IFN_FFS;
2141 break;
2142 default:
2143 return NULL;
2144 }
2145
2146 if (gimple_call_num_args (gs: call_stmt) != 1)
2147 return NULL;
2148
2149 rhs_oprnd = gimple_call_arg (gs: call_stmt, index: 0);
2150 vect_unpromoted_value unprom_diff;
2151 rhs_origin
2152 = vect_look_through_possible_promotion (vinfo, op: rhs_oprnd, unprom: &unprom_diff);
2153
2154 if (!rhs_origin)
2155 return NULL;
2156
2157 /* Input and output of .POPCOUNT should be same-precision integer. */
2158 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2159 return NULL;
2160
2161 /* Also A should be unsigned or same precision as temp_in, otherwise
2162 different builtins/internal functions have different behaviors. */
2163 if (TYPE_PRECISION (unprom_diff.type)
2164 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2165 switch (ifn)
2166 {
2167 case IFN_POPCOUNT:
2168 /* For popcount require zero extension, which doesn't add any
2169 further bits to the count. */
2170 if (!TYPE_UNSIGNED (unprom_diff.type))
2171 return NULL;
2172 break;
2173 case IFN_CLZ:
2174 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2175 if it is undefined at zero or if it matches also for the
2176 defined value there. */
2177 if (!TYPE_UNSIGNED (unprom_diff.type))
2178 return NULL;
2179 if (!type_has_mode_precision_p (t: lhs_type)
2180 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2181 return NULL;
2182 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2183 - TYPE_PRECISION (lhs_type));
2184 if (gimple_call_internal_p (gs: call_stmt))
2185 {
2186 int val1, val2;
2187 int d1
2188 = CLZ_DEFINED_VALUE_AT_ZERO
2189 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
2190 int d2
2191 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2192 val2);
2193 if (d1 != 2)
2194 break;
2195 if (d2 != 2 || val1 != val2 + addend)
2196 return NULL;
2197 }
2198 break;
2199 case IFN_CTZ:
2200 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2201 if it is undefined at zero or if it matches also for the
2202 defined value there. */
2203 if (gimple_call_internal_p (gs: call_stmt))
2204 {
2205 int val1, val2;
2206 int d1
2207 = CTZ_DEFINED_VALUE_AT_ZERO
2208 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
2209 int d2
2210 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2211 val2);
2212 if (d1 != 2)
2213 break;
2214 if (d2 != 2 || val1 != val2)
2215 return NULL;
2216 }
2217 break;
2218 case IFN_FFS:
2219 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2220 break;
2221 default:
2222 gcc_unreachable ();
2223 }
2224
2225 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2226 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2227 if (!vec_type)
2228 return NULL;
2229
2230 bool supported
2231 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2232 if (!supported)
2233 switch (ifn)
2234 {
2235 case IFN_POPCOUNT:
2236 case IFN_CLZ:
2237 return NULL;
2238 case IFN_FFS:
2239 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2240 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2241 OPTIMIZE_FOR_SPEED))
2242 break;
2243 /* FALLTHRU */
2244 case IFN_CTZ:
2245 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2246 clz or popcount. */
2247 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2248 OPTIMIZE_FOR_SPEED))
2249 break;
2250 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2251 OPTIMIZE_FOR_SPEED))
2252 break;
2253 return NULL;
2254 default:
2255 gcc_unreachable ();
2256 }
2257
2258 vect_pattern_detected (name: "vec_recog_popcount_clz_ctz_ffs_pattern",
2259 stmt: call_stmt);
2260
2261 /* Create B = .POPCOUNT (A). */
2262 new_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2263 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2264 gimple_call_set_lhs (gs: pattern_stmt, lhs: new_var);
2265 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
2266 *type_out = vec_type;
2267
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE, vect_location,
2270 "created pattern stmt: %G", pattern_stmt);
2271
2272 if (addend)
2273 {
2274 gcc_assert (supported);
2275 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2276 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2277 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2278 build_int_cst (lhs_type, addend));
2279 }
2280 else if (!supported)
2281 {
2282 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2283 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2284 pattern_stmt
2285 = vect_recog_ctz_ffs_pattern (vinfo, stmt_vinfo: new_stmt_info, type_out);
2286 if (pattern_stmt == NULL)
2287 return NULL;
2288 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2289 {
2290 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2291 gimple_seq_add_seq_without_update (pseq, seq);
2292 }
2293 }
2294 return pattern_stmt;
2295}
2296
2297/* Function vect_recog_pow_pattern
2298
2299 Try to find the following pattern:
2300
2301 x = POW (y, N);
2302
2303 with POW being one of pow, powf, powi, powif and N being
2304 either 2 or 0.5.
2305
2306 Input:
2307
2308 * STMT_VINFO: The stmt from which the pattern search begins.
2309
2310 Output:
2311
2312 * TYPE_OUT: The type of the output of this pattern.
2313
2314 * Return value: A new stmt that will be used to replace the sequence of
2315 stmts that constitute the pattern. In this case it will be:
2316 x = x * x
2317 or
2318 x = sqrt (x)
2319*/
2320
2321static gimple *
2322vect_recog_pow_pattern (vec_info *vinfo,
2323 stmt_vec_info stmt_vinfo, tree *type_out)
2324{
2325 gimple *last_stmt = stmt_vinfo->stmt;
2326 tree base, exp;
2327 gimple *stmt;
2328 tree var;
2329
2330 if (!is_gimple_call (gs: last_stmt) || gimple_call_lhs (gs: last_stmt) == NULL)
2331 return NULL;
2332
2333 switch (gimple_call_combined_fn (last_stmt))
2334 {
2335 CASE_CFN_POW:
2336 CASE_CFN_POWI:
2337 break;
2338
2339 default:
2340 return NULL;
2341 }
2342
2343 base = gimple_call_arg (gs: last_stmt, index: 0);
2344 exp = gimple_call_arg (gs: last_stmt, index: 1);
2345 if (TREE_CODE (exp) != REAL_CST
2346 && TREE_CODE (exp) != INTEGER_CST)
2347 {
2348 if (flag_unsafe_math_optimizations
2349 && TREE_CODE (base) == REAL_CST
2350 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2351 {
2352 combined_fn log_cfn;
2353 built_in_function exp_bfn;
2354 switch (DECL_FUNCTION_CODE (decl: gimple_call_fndecl (gs: last_stmt)))
2355 {
2356 case BUILT_IN_POW:
2357 log_cfn = CFN_BUILT_IN_LOG;
2358 exp_bfn = BUILT_IN_EXP;
2359 break;
2360 case BUILT_IN_POWF:
2361 log_cfn = CFN_BUILT_IN_LOGF;
2362 exp_bfn = BUILT_IN_EXPF;
2363 break;
2364 case BUILT_IN_POWL:
2365 log_cfn = CFN_BUILT_IN_LOGL;
2366 exp_bfn = BUILT_IN_EXPL;
2367 break;
2368 default:
2369 return NULL;
2370 }
2371 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2372 tree exp_decl = builtin_decl_implicit (fncode: exp_bfn);
2373 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2374 does that, but if C is a power of 2, we want to use
2375 exp2 (log2 (C) * x) in the non-vectorized version, but for
2376 vectorization we don't have vectorized exp2. */
2377 if (logc
2378 && TREE_CODE (logc) == REAL_CST
2379 && exp_decl
2380 && lookup_attribute (attr_name: "omp declare simd",
2381 DECL_ATTRIBUTES (exp_decl)))
2382 {
2383 cgraph_node *node = cgraph_node::get_create (exp_decl);
2384 if (node->simd_clones == NULL)
2385 {
2386 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2387 || node->definition)
2388 return NULL;
2389 expand_simd_clones (node);
2390 if (node->simd_clones == NULL)
2391 return NULL;
2392 }
2393 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2394 if (!*type_out)
2395 return NULL;
2396 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2397 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2398 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: g);
2399 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2400 g = gimple_build_call (exp_decl, 1, def);
2401 gimple_call_set_lhs (gs: g, lhs: res);
2402 return g;
2403 }
2404 }
2405
2406 return NULL;
2407 }
2408
2409 /* We now have a pow or powi builtin function call with a constant
2410 exponent. */
2411
2412 /* Catch squaring. */
2413 if ((tree_fits_shwi_p (exp)
2414 && tree_to_shwi (exp) == 2)
2415 || (TREE_CODE (exp) == REAL_CST
2416 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2417 {
2418 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), code: MULT_EXPR,
2419 TREE_TYPE (base), vecotype_out: type_out))
2420 return NULL;
2421
2422 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2423 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2424 return stmt;
2425 }
2426
2427 /* Catch square root. */
2428 if (TREE_CODE (exp) == REAL_CST
2429 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2430 {
2431 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2432 if (*type_out
2433 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2434 OPTIMIZE_FOR_SPEED))
2435 {
2436 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2437 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2438 gimple_call_set_lhs (gs: stmt, lhs: var);
2439 gimple_call_set_nothrow (s: stmt, nothrow_p: true);
2440 return stmt;
2441 }
2442 }
2443
2444 return NULL;
2445}
2446
2447
2448/* Function vect_recog_widen_sum_pattern
2449
2450 Try to find the following pattern:
2451
2452 type x_t;
2453 TYPE x_T, sum = init;
2454 loop:
2455 sum_0 = phi <init, sum_1>
2456 S1 x_t = *p;
2457 S2 x_T = (TYPE) x_t;
2458 S3 sum_1 = x_T + sum_0;
2459
2460 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2461 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2462 a special case of a reduction computation.
2463
2464 Input:
2465
2466 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2467 when this function is called with S3, the pattern {S2,S3} will be detected.
2468
2469 Output:
2470
2471 * TYPE_OUT: The type of the output of this pattern.
2472
2473 * Return value: A new stmt that will be used to replace the sequence of
2474 stmts that constitute the pattern. In this case it will be:
2475 WIDEN_SUM <x_t, sum_0>
2476
2477 Note: The widening-sum idiom is a widening reduction pattern that is
2478 vectorized without preserving all the intermediate results. It
2479 produces only N/2 (widened) results (by summing up pairs of
2480 intermediate results) rather than all N results. Therefore, we
2481 cannot allow this pattern when we want to get all the results and in
2482 the correct order (as is the case when this computation is in an
2483 inner-loop nested in an outer-loop that us being vectorized). */
2484
2485static gimple *
2486vect_recog_widen_sum_pattern (vec_info *vinfo,
2487 stmt_vec_info stmt_vinfo, tree *type_out)
2488{
2489 gimple *last_stmt = stmt_vinfo->stmt;
2490 tree oprnd0, oprnd1;
2491 tree type;
2492 gimple *pattern_stmt;
2493 tree var;
2494
2495 /* Look for the following pattern
2496 DX = (TYPE) X;
2497 sum_1 = DX + sum_0;
2498 In which DX is at least double the size of X, and sum_1 has been
2499 recognized as a reduction variable.
2500 */
2501
2502 /* Starting from LAST_STMT, follow the defs of its uses in search
2503 of the above pattern. */
2504
2505 if (!vect_reassociating_reduction_p (vinfo, stmt_info: stmt_vinfo, code: PLUS_EXPR,
2506 op0_out: &oprnd0, op1_out: &oprnd1)
2507 || TREE_CODE (oprnd0) != SSA_NAME
2508 || !vinfo->lookup_def (oprnd0))
2509 return NULL;
2510
2511 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2512
2513 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2514 we know that oprnd1 is the reduction variable (defined by a loop-header
2515 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2516 Left to check that oprnd0 is defined by a cast from type 'type' to type
2517 'TYPE'. */
2518
2519 vect_unpromoted_value unprom0;
2520 if (!vect_look_through_possible_promotion (vinfo, op: oprnd0, unprom: &unprom0)
2521 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2522 return NULL;
2523
2524 vect_pattern_detected (name: "vect_recog_widen_sum_pattern", stmt: last_stmt);
2525
2526 if (!vect_supportable_direct_optab_p (vinfo, otype: type, code: WIDEN_SUM_EXPR,
2527 itype: unprom0.type, vecotype_out: type_out))
2528 return NULL;
2529
2530 var = vect_recog_temp_ssa_var (type, NULL);
2531 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2532
2533 return pattern_stmt;
2534}
2535
2536/* Function vect_recog_bitfield_ref_pattern
2537
2538 Try to find the following pattern:
2539
2540 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2541 result = (type_out) bf_value;
2542
2543 or
2544
2545 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2546
2547 where type_out is a non-bitfield type, that is to say, it's precision matches
2548 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2549
2550 Input:
2551
2552 * STMT_VINFO: The stmt from which the pattern search begins.
2553 here it starts with:
2554 result = (type_out) bf_value;
2555
2556 or
2557
2558 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2559
2560 Output:
2561
2562 * TYPE_OUT: The vector type of the output of this pattern.
2563
2564 * Return value: A new stmt that will be used to replace the sequence of
2565 stmts that constitute the pattern. If the precision of type_out is bigger
2566 than the precision type of _1 we perform the widening before the shifting,
2567 since the new precision will be large enough to shift the value and moving
2568 widening operations up the statement chain enables the generation of
2569 widening loads. If we are widening and the operation after the pattern is
2570 an addition then we mask first and shift later, to enable the generation of
2571 shifting adds. In the case of narrowing we will always mask first, shift
2572 last and then perform a narrowing operation. This will enable the
2573 generation of narrowing shifts.
2574
2575 Widening with mask first, shift later:
2576 container = (type_out) container;
2577 masked = container & (((1 << bitsize) - 1) << bitpos);
2578 result = masked >> bitpos;
2579
2580 Widening with shift first, mask last:
2581 container = (type_out) container;
2582 shifted = container >> bitpos;
2583 result = shifted & ((1 << bitsize) - 1);
2584
2585 Narrowing:
2586 masked = container & (((1 << bitsize) - 1) << bitpos);
2587 result = masked >> bitpos;
2588 result = (type_out) result;
2589
2590 If the bitfield is signed and it's wider than type_out, we need to
2591 keep the result sign-extended:
2592 container = (type) container;
2593 masked = container << (prec - bitsize - bitpos);
2594 result = (type_out) (masked >> (prec - bitsize));
2595
2596 Here type is the signed variant of the wider of type_out and the type
2597 of container.
2598
2599 The shifting is always optional depending on whether bitpos != 0.
2600
2601 When the original bitfield was inside a gcond then an new gcond is also
2602 generated with the newly `result` as the operand to the comparison.
2603
2604*/
2605
2606static gimple *
2607vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2608 tree *type_out)
2609{
2610 gimple *bf_stmt = NULL;
2611 tree lhs = NULL_TREE;
2612 tree ret_type = NULL_TREE;
2613 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2614 if (gcond *cond_stmt = dyn_cast <gcond *> (p: stmt))
2615 {
2616 tree op = gimple_cond_lhs (gs: cond_stmt);
2617 if (TREE_CODE (op) != SSA_NAME)
2618 return NULL;
2619 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2620 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2621 return NULL;
2622 }
2623 else if (is_gimple_assign (gs: stmt)
2624 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2625 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2626 {
2627 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2628 bf_stmt = dyn_cast <gassign *> (p: second_stmt);
2629 lhs = gimple_assign_lhs (gs: stmt);
2630 ret_type = TREE_TYPE (lhs);
2631 }
2632
2633 if (!bf_stmt
2634 || gimple_assign_rhs_code (gs: bf_stmt) != BIT_FIELD_REF)
2635 return NULL;
2636
2637 tree bf_ref = gimple_assign_rhs1 (gs: bf_stmt);
2638 tree container = TREE_OPERAND (bf_ref, 0);
2639 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2640
2641 if (!bit_field_offset (t: bf_ref).is_constant ()
2642 || !bit_field_size (t: bf_ref).is_constant ()
2643 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2644 return NULL;
2645
2646 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2647 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2648 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2649 return NULL;
2650
2651 gimple *use_stmt, *pattern_stmt;
2652 use_operand_p use_p;
2653 bool shift_first = true;
2654 tree container_type = TREE_TYPE (container);
2655 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2656
2657 /* Calculate shift_n before the adjustments for widening loads, otherwise
2658 the container may change and we have to consider offset change for
2659 widening loads on big endianness. The shift_n calculated here can be
2660 independent of widening. */
2661 unsigned HOST_WIDE_INT shift_n = bit_field_offset (t: bf_ref).to_constant ();
2662 unsigned HOST_WIDE_INT mask_width = bit_field_size (t: bf_ref).to_constant ();
2663 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2664 if (BYTES_BIG_ENDIAN)
2665 shift_n = prec - shift_n - mask_width;
2666
2667 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2668 TYPE_PRECISION (ret_type) > mask_width);
2669 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2670 TYPE_PRECISION (ret_type));
2671
2672 /* We move the conversion earlier if the loaded type is smaller than the
2673 return type to enable the use of widening loads. And if we need a
2674 sign extension, we need to convert the loaded value early to a signed
2675 type as well. */
2676 if (ref_sext || load_widen)
2677 {
2678 tree type = load_widen ? ret_type : container_type;
2679 if (ref_sext)
2680 type = gimple_signed_type (type);
2681 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2682 NOP_EXPR, container);
2683 container = gimple_get_lhs (pattern_stmt);
2684 container_type = TREE_TYPE (container);
2685 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2686 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2687 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2688 }
2689 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2690 /* If we are doing the conversion last then also delay the shift as we may
2691 be able to combine the shift and conversion in certain cases. */
2692 shift_first = false;
2693
2694 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2695 PLUS_EXPR then do the shift last as some targets can combine the shift and
2696 add into a single instruction. */
2697 if (lhs && single_imm_use (var: lhs, use_p: &use_p, stmt: &use_stmt))
2698 {
2699 if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN
2700 && gimple_assign_rhs_code (gs: use_stmt) == PLUS_EXPR)
2701 shift_first = false;
2702 }
2703
2704 /* If we don't have to shift we only generate the mask, so just fix the
2705 code-path to shift_first. */
2706 if (shift_n == 0)
2707 shift_first = true;
2708
2709 tree result;
2710 if (shift_first && !ref_sext)
2711 {
2712 tree shifted = container;
2713 if (shift_n)
2714 {
2715 pattern_stmt
2716 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2717 RSHIFT_EXPR, container,
2718 build_int_cst (sizetype, shift_n));
2719 shifted = gimple_assign_lhs (gs: pattern_stmt);
2720 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2721 }
2722
2723 tree mask = wide_int_to_tree (type: container_type,
2724 cst: wi::mask (width: mask_width, negate_p: false, precision: prec));
2725
2726 pattern_stmt
2727 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2728 BIT_AND_EXPR, shifted, mask);
2729 result = gimple_assign_lhs (gs: pattern_stmt);
2730 }
2731 else
2732 {
2733 tree temp = vect_recog_temp_ssa_var (type: container_type);
2734 if (!ref_sext)
2735 {
2736 tree mask = wide_int_to_tree (type: container_type,
2737 cst: wi::shifted_mask (start: shift_n,
2738 width: mask_width,
2739 negate_p: false, precision: prec));
2740 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2741 container, mask);
2742 }
2743 else
2744 {
2745 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2746 shift_n += shl;
2747 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2748 container,
2749 build_int_cst (sizetype,
2750 shl));
2751 }
2752
2753 tree masked = gimple_assign_lhs (gs: pattern_stmt);
2754 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2755 pattern_stmt
2756 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2757 RSHIFT_EXPR, masked,
2758 build_int_cst (sizetype, shift_n));
2759 result = gimple_assign_lhs (gs: pattern_stmt);
2760 }
2761
2762 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2763 {
2764 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2765 pattern_stmt
2766 = gimple_build_assign (vect_recog_temp_ssa_var (type: ret_type),
2767 NOP_EXPR, result);
2768 }
2769
2770 if (!lhs)
2771 {
2772 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2773 gcond *cond_stmt = dyn_cast <gcond *> (p: stmt_info->stmt);
2774 tree cond_cst = gimple_cond_rhs (gs: cond_stmt);
2775 pattern_stmt
2776 = gimple_build_cond (gimple_cond_code (gs: cond_stmt),
2777 gimple_get_lhs (pattern_stmt),
2778 fold_convert (ret_type, cond_cst),
2779 gimple_cond_true_label (gs: cond_stmt),
2780 gimple_cond_false_label (gs: cond_stmt));
2781 }
2782
2783 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2784 vect_pattern_detected (name: "bitfield_ref pattern", stmt: stmt_info->stmt);
2785
2786 return pattern_stmt;
2787}
2788
2789/* Function vect_recog_bit_insert_pattern
2790
2791 Try to find the following pattern:
2792
2793 written = BIT_INSERT_EXPR (container, value, bitpos);
2794
2795 Input:
2796
2797 * STMT_VINFO: The stmt we want to replace.
2798
2799 Output:
2800
2801 * TYPE_OUT: The vector type of the output of this pattern.
2802
2803 * Return value: A new stmt that will be used to replace the sequence of
2804 stmts that constitute the pattern. In this case it will be:
2805 value = (container_type) value; // Make sure
2806 shifted = value << bitpos; // Shift value into place
2807 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2808 // the 'to-write value'.
2809 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2810 // write to from the value we want
2811 // to write to.
2812 written = cleared | masked; // Write bits.
2813
2814
2815 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2816 bits corresponding to the real size of the bitfield value we are writing to.
2817 The shifting is always optional depending on whether bitpos != 0.
2818
2819*/
2820
2821static gimple *
2822vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2823 tree *type_out)
2824{
2825 gassign *bf_stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
2826 if (!bf_stmt || gimple_assign_rhs_code (gs: bf_stmt) != BIT_INSERT_EXPR)
2827 return NULL;
2828
2829 tree container = gimple_assign_rhs1 (gs: bf_stmt);
2830 tree value = gimple_assign_rhs2 (gs: bf_stmt);
2831 tree shift = gimple_assign_rhs3 (gs: bf_stmt);
2832
2833 tree bf_type = TREE_TYPE (value);
2834 tree container_type = TREE_TYPE (container);
2835
2836 if (!INTEGRAL_TYPE_P (container_type)
2837 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2838 return NULL;
2839
2840 gimple *pattern_stmt;
2841
2842 vect_unpromoted_value unprom;
2843 unprom.set_op (op_in: value, dt_in: vect_internal_def);
2844 value = vect_convert_input (vinfo, stmt_info, type: container_type, unprom: &unprom,
2845 vectype: get_vectype_for_scalar_type (vinfo,
2846 container_type));
2847
2848 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2849 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2850 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2851 if (BYTES_BIG_ENDIAN)
2852 {
2853 shift_n = prec - shift_n - mask_width;
2854 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2855 }
2856
2857 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2858 {
2859 pattern_stmt =
2860 gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2861 NOP_EXPR, value);
2862 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt);
2863 value = gimple_get_lhs (pattern_stmt);
2864 }
2865
2866 /* Shift VALUE into place. */
2867 tree shifted = value;
2868 if (shift_n)
2869 {
2870 gimple_seq stmts = NULL;
2871 shifted
2872 = gimple_build (seq: &stmts, code: LSHIFT_EXPR, type: container_type, ops: value, ops: shift);
2873 if (!gimple_seq_empty_p (s: stmts))
2874 append_pattern_def_seq (vinfo, stmt_info,
2875 new_stmt: gimple_seq_first_stmt (s: stmts));
2876 }
2877
2878 tree mask_t
2879 = wide_int_to_tree (type: container_type,
2880 cst: wi::shifted_mask (start: shift_n, width: mask_width, negate_p: false, precision: prec));
2881
2882 /* Clear bits we don't want to write back from SHIFTED. */
2883 gimple_seq stmts = NULL;
2884 tree masked = gimple_build (seq: &stmts, code: BIT_AND_EXPR, type: container_type, ops: shifted,
2885 ops: mask_t);
2886 if (!gimple_seq_empty_p (s: stmts))
2887 {
2888 pattern_stmt = gimple_seq_first_stmt (s: stmts);
2889 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt);
2890 }
2891
2892 /* Mask off the bits in the container that we are to write to. */
2893 mask_t = wide_int_to_tree (type: container_type,
2894 cst: wi::shifted_mask (start: shift_n, width: mask_width, negate_p: true, precision: prec));
2895 tree cleared = vect_recog_temp_ssa_var (type: container_type);
2896 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2897 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt);
2898
2899 /* Write MASKED into CLEARED. */
2900 pattern_stmt
2901 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2902 BIT_IOR_EXPR, cleared, masked);
2903
2904 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2905 vect_pattern_detected (name: "bit_insert pattern", stmt: stmt_info->stmt);
2906
2907 return pattern_stmt;
2908}
2909
2910
2911/* Recognize cases in which an operation is performed in one type WTYPE
2912 but could be done more efficiently in a narrower type NTYPE. For example,
2913 if we have:
2914
2915 ATYPE a; // narrower than NTYPE
2916 BTYPE b; // narrower than NTYPE
2917 WTYPE aw = (WTYPE) a;
2918 WTYPE bw = (WTYPE) b;
2919 WTYPE res = aw + bw; // only uses of aw and bw
2920
2921 then it would be more efficient to do:
2922
2923 NTYPE an = (NTYPE) a;
2924 NTYPE bn = (NTYPE) b;
2925 NTYPE resn = an + bn;
2926 WTYPE res = (WTYPE) resn;
2927
2928 Other situations include things like:
2929
2930 ATYPE a; // NTYPE or narrower
2931 WTYPE aw = (WTYPE) a;
2932 WTYPE res = aw + b;
2933
2934 when only "(NTYPE) res" is significant. In that case it's more efficient
2935 to truncate "b" and do the operation on NTYPE instead:
2936
2937 NTYPE an = (NTYPE) a;
2938 NTYPE bn = (NTYPE) b; // truncation
2939 NTYPE resn = an + bn;
2940 WTYPE res = (WTYPE) resn;
2941
2942 All users of "res" should then use "resn" instead, making the final
2943 statement dead (not marked as relevant). The final statement is still
2944 needed to maintain the type correctness of the IR.
2945
2946 vect_determine_precisions has already determined the minimum
2947 precison of the operation and the minimum precision required
2948 by users of the result. */
2949
2950static gimple *
2951vect_recog_over_widening_pattern (vec_info *vinfo,
2952 stmt_vec_info last_stmt_info, tree *type_out)
2953{
2954 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
2955 if (!last_stmt)
2956 return NULL;
2957
2958 /* See whether we have found that this operation can be done on a
2959 narrower type without changing its semantics. */
2960 unsigned int new_precision = last_stmt_info->operation_precision;
2961 if (!new_precision)
2962 return NULL;
2963
2964 tree lhs = gimple_assign_lhs (gs: last_stmt);
2965 tree type = TREE_TYPE (lhs);
2966 tree_code code = gimple_assign_rhs_code (gs: last_stmt);
2967
2968 /* Punt for reductions where we don't handle the type conversions. */
2969 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2970 return NULL;
2971
2972 /* Keep the first operand of a COND_EXPR as-is: only the other two
2973 operands are interesting. */
2974 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2975
2976 /* Check the operands. */
2977 unsigned int nops = gimple_num_ops (gs: last_stmt) - first_op;
2978 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2979 unprom.quick_grow_cleared (len: nops);
2980 unsigned int min_precision = 0;
2981 bool single_use_p = false;
2982 for (unsigned int i = 0; i < nops; ++i)
2983 {
2984 tree op = gimple_op (gs: last_stmt, i: first_op + i);
2985 if (TREE_CODE (op) == INTEGER_CST)
2986 unprom[i].set_op (op_in: op, dt_in: vect_constant_def);
2987 else if (TREE_CODE (op) == SSA_NAME)
2988 {
2989 bool op_single_use_p = true;
2990 if (!vect_look_through_possible_promotion (vinfo, op, unprom: &unprom[i],
2991 single_use_p: &op_single_use_p))
2992 return NULL;
2993 /* If:
2994
2995 (1) N bits of the result are needed;
2996 (2) all inputs are widened from M<N bits; and
2997 (3) one operand OP is a single-use SSA name
2998
2999 we can shift the M->N widening from OP to the output
3000 without changing the number or type of extensions involved.
3001 This then reduces the number of copies of STMT_INFO.
3002
3003 If instead of (3) more than one operand is a single-use SSA name,
3004 shifting the extension to the output is even more of a win.
3005
3006 If instead:
3007
3008 (1) N bits of the result are needed;
3009 (2) one operand OP2 is widened from M2<N bits;
3010 (3) another operand OP1 is widened from M1<M2 bits; and
3011 (4) both OP1 and OP2 are single-use
3012
3013 the choice is between:
3014
3015 (a) truncating OP2 to M1, doing the operation on M1,
3016 and then widening the result to N
3017
3018 (b) widening OP1 to M2, doing the operation on M2, and then
3019 widening the result to N
3020
3021 Both shift the M2->N widening of the inputs to the output.
3022 (a) additionally shifts the M1->M2 widening to the output;
3023 it requires fewer copies of STMT_INFO but requires an extra
3024 M2->M1 truncation.
3025
3026 Which is better will depend on the complexity and cost of
3027 STMT_INFO, which is hard to predict at this stage. However,
3028 a clear tie-breaker in favor of (b) is the fact that the
3029 truncation in (a) increases the length of the operation chain.
3030
3031 If instead of (4) only one of OP1 or OP2 is single-use,
3032 (b) is still a win over doing the operation in N bits:
3033 it still shifts the M2->N widening on the single-use operand
3034 to the output and reduces the number of STMT_INFO copies.
3035
3036 If neither operand is single-use then operating on fewer than
3037 N bits might lead to more extensions overall. Whether it does
3038 or not depends on global information about the vectorization
3039 region, and whether that's a good trade-off would again
3040 depend on the complexity and cost of the statements involved,
3041 as well as things like register pressure that are not normally
3042 modelled at this stage. We therefore ignore these cases
3043 and just optimize the clear single-use wins above.
3044
3045 Thus we take the maximum precision of the unpromoted operands
3046 and record whether any operand is single-use. */
3047 if (unprom[i].dt == vect_internal_def)
3048 {
3049 min_precision = MAX (min_precision,
3050 TYPE_PRECISION (unprom[i].type));
3051 single_use_p |= op_single_use_p;
3052 }
3053 }
3054 else
3055 return NULL;
3056 }
3057
3058 /* Although the operation could be done in operation_precision, we have
3059 to balance that against introducing extra truncations or extensions.
3060 Calculate the minimum precision that can be handled efficiently.
3061
3062 The loop above determined that the operation could be handled
3063 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3064 extension from the inputs to the output without introducing more
3065 instructions, and would reduce the number of instructions required
3066 for STMT_INFO itself.
3067
3068 vect_determine_precisions has also determined that the result only
3069 needs min_output_precision bits. Truncating by a factor of N times
3070 requires a tree of N - 1 instructions, so if TYPE is N times wider
3071 than min_output_precision, doing the operation in TYPE and truncating
3072 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3073 In contrast:
3074
3075 - truncating the input to a unary operation and doing the operation
3076 in the new type requires at most N - 1 + 1 = N instructions per
3077 output vector
3078
3079 - doing the same for a binary operation requires at most
3080 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3081
3082 Both unary and binary operations require fewer instructions than
3083 this if the operands were extended from a suitable truncated form.
3084 Thus there is usually nothing to lose by doing operations in
3085 min_output_precision bits, but there can be something to gain. */
3086 if (!single_use_p)
3087 min_precision = last_stmt_info->min_output_precision;
3088 else
3089 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3090
3091 /* Apply the minimum efficient precision we just calculated. */
3092 if (new_precision < min_precision)
3093 new_precision = min_precision;
3094 new_precision = vect_element_precision (precision: new_precision);
3095 if (new_precision >= TYPE_PRECISION (type))
3096 return NULL;
3097
3098 vect_pattern_detected (name: "vect_recog_over_widening_pattern", stmt: last_stmt);
3099
3100 *type_out = get_vectype_for_scalar_type (vinfo, type);
3101 if (!*type_out)
3102 return NULL;
3103
3104 /* We've found a viable pattern. Get the new type of the operation. */
3105 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3106 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3107
3108 /* If we're truncating an operation, we need to make sure that we
3109 don't introduce new undefined overflow. The codes tested here are
3110 a subset of those accepted by vect_truncatable_operation_p. */
3111 tree op_type = new_type;
3112 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3113 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3114 op_type = build_nonstandard_integer_type (new_precision, true);
3115
3116 /* We specifically don't check here whether the target supports the
3117 new operation, since it might be something that a later pattern
3118 wants to rewrite anyway. If targets have a minimum element size
3119 for some optabs, we should pattern-match smaller ops to larger ops
3120 where beneficial. */
3121 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3122 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3123 if (!new_vectype || !op_vectype)
3124 return NULL;
3125
3126 if (dump_enabled_p ())
3127 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3128 type, new_type);
3129
3130 /* Calculate the rhs operands for an operation on OP_TYPE. */
3131 tree ops[3] = {};
3132 for (unsigned int i = 1; i < first_op; ++i)
3133 ops[i - 1] = gimple_op (gs: last_stmt, i);
3134 /* For right shifts limit the shift operand. */
3135 vect_convert_inputs (vinfo, stmt_info: last_stmt_info, n: nops, result: &ops[first_op - 1],
3136 type: op_type, unprom: &unprom[0], vectype: op_vectype);
3137
3138 /* Limit shift operands. */
3139 if (code == RSHIFT_EXPR)
3140 {
3141 wide_int min_value, max_value;
3142 if (TREE_CODE (ops[1]) == INTEGER_CST)
3143 ops[1] = wide_int_to_tree (type: op_type,
3144 cst: wi::umin (x: wi::to_wide (t: ops[1]),
3145 y: new_precision - 1));
3146 else if (!vect_get_range_info (var: ops[1], min_value: &min_value, max_value: &max_value)
3147 || wi::ge_p (x: max_value, y: new_precision, TYPE_SIGN (op_type)))
3148 {
3149 /* ??? Note the following bad for SLP as that only supports
3150 same argument widened shifts and it un-CSEs same arguments. */
3151 tree new_var = vect_recog_temp_ssa_var (type: op_type, NULL);
3152 gimple *pattern_stmt
3153 = gimple_build_assign (new_var, MIN_EXPR, ops[1],
3154 build_int_cst (op_type, new_precision - 1));
3155 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
3156 if (ops[1] == unprom[1].op && unprom[1].dt == vect_external_def)
3157 {
3158 if (edge e = vect_get_external_def_edge (vinfo, var: ops[1]))
3159 {
3160 basic_block new_bb
3161 = gsi_insert_on_edge_immediate (e, pattern_stmt);
3162 gcc_assert (!new_bb);
3163 }
3164 else
3165 return NULL;
3166 }
3167 else
3168 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: pattern_stmt,
3169 vectype: op_vectype);
3170 ops[1] = new_var;
3171 }
3172 }
3173
3174 /* Use the operation to produce a result of type OP_TYPE. */
3175 tree new_var = vect_recog_temp_ssa_var (type: op_type, NULL);
3176 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3177 ops[0], ops[1], ops[2]);
3178 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
3179
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_NOTE, vect_location,
3182 "created pattern stmt: %G", pattern_stmt);
3183
3184 /* Convert back to the original signedness, if OP_TYPE is different
3185 from NEW_TYPE. */
3186 if (op_type != new_type)
3187 pattern_stmt = vect_convert_output (vinfo, stmt_info: last_stmt_info, type: new_type,
3188 pattern_stmt, vecitype: op_vectype);
3189
3190 /* Promote the result to the original type. */
3191 pattern_stmt = vect_convert_output (vinfo, stmt_info: last_stmt_info, type,
3192 pattern_stmt, vecitype: new_vectype);
3193
3194 return pattern_stmt;
3195}
3196
3197/* Recognize the following patterns:
3198
3199 ATYPE a; // narrower than TYPE
3200 BTYPE b; // narrower than TYPE
3201
3202 1) Multiply high with scaling
3203 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3204 Here, c is bitsize (TYPE) / 2 - 1.
3205
3206 2) ... or also with rounding
3207 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3208 Here, d is bitsize (TYPE) / 2 - 2.
3209
3210 3) Normal multiply high
3211 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3212 Here, e is bitsize (TYPE) / 2.
3213
3214 where only the bottom half of res is used. */
3215
3216static gimple *
3217vect_recog_mulhs_pattern (vec_info *vinfo,
3218 stmt_vec_info last_stmt_info, tree *type_out)
3219{
3220 /* Check for a right shift. */
3221 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
3222 if (!last_stmt
3223 || gimple_assign_rhs_code (gs: last_stmt) != RSHIFT_EXPR)
3224 return NULL;
3225
3226 /* Check that the shift result is wider than the users of the
3227 result need (i.e. that narrowing would be a natural choice). */
3228 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3229 unsigned int target_precision
3230 = vect_element_precision (precision: last_stmt_info->min_output_precision);
3231 if (!INTEGRAL_TYPE_P (lhs_type)
3232 || target_precision >= TYPE_PRECISION (lhs_type))
3233 return NULL;
3234
3235 /* Look through any change in sign on the outer shift input. */
3236 vect_unpromoted_value unprom_rshift_input;
3237 tree rshift_input = vect_look_through_possible_promotion
3238 (vinfo, op: gimple_assign_rhs1 (gs: last_stmt), unprom: &unprom_rshift_input);
3239 if (!rshift_input
3240 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3241 != TYPE_PRECISION (lhs_type))
3242 return NULL;
3243
3244 /* Get the definition of the shift input. */
3245 stmt_vec_info rshift_input_stmt_info
3246 = vect_get_internal_def (vinfo, op: rshift_input);
3247 if (!rshift_input_stmt_info)
3248 return NULL;
3249 gassign *rshift_input_stmt
3250 = dyn_cast <gassign *> (p: rshift_input_stmt_info->stmt);
3251 if (!rshift_input_stmt)
3252 return NULL;
3253
3254 stmt_vec_info mulh_stmt_info;
3255 tree scale_term;
3256 bool rounding_p = false;
3257
3258 /* Check for the presence of the rounding term. */
3259 if (gimple_assign_rhs_code (gs: rshift_input_stmt) == PLUS_EXPR)
3260 {
3261 /* Check that the outer shift was by 1. */
3262 if (!integer_onep (gimple_assign_rhs2 (gs: last_stmt)))
3263 return NULL;
3264
3265 /* Check that the second operand of the PLUS_EXPR is 1. */
3266 if (!integer_onep (gimple_assign_rhs2 (gs: rshift_input_stmt)))
3267 return NULL;
3268
3269 /* Look through any change in sign on the addition input. */
3270 vect_unpromoted_value unprom_plus_input;
3271 tree plus_input = vect_look_through_possible_promotion
3272 (vinfo, op: gimple_assign_rhs1 (gs: rshift_input_stmt), unprom: &unprom_plus_input);
3273 if (!plus_input
3274 || TYPE_PRECISION (TREE_TYPE (plus_input))
3275 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3276 return NULL;
3277
3278 /* Get the definition of the multiply-high-scale part. */
3279 stmt_vec_info plus_input_stmt_info
3280 = vect_get_internal_def (vinfo, op: plus_input);
3281 if (!plus_input_stmt_info)
3282 return NULL;
3283 gassign *plus_input_stmt
3284 = dyn_cast <gassign *> (p: plus_input_stmt_info->stmt);
3285 if (!plus_input_stmt
3286 || gimple_assign_rhs_code (gs: plus_input_stmt) != RSHIFT_EXPR)
3287 return NULL;
3288
3289 /* Look through any change in sign on the scaling input. */
3290 vect_unpromoted_value unprom_scale_input;
3291 tree scale_input = vect_look_through_possible_promotion
3292 (vinfo, op: gimple_assign_rhs1 (gs: plus_input_stmt), unprom: &unprom_scale_input);
3293 if (!scale_input
3294 || TYPE_PRECISION (TREE_TYPE (scale_input))
3295 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3296 return NULL;
3297
3298 /* Get the definition of the multiply-high part. */
3299 mulh_stmt_info = vect_get_internal_def (vinfo, op: scale_input);
3300 if (!mulh_stmt_info)
3301 return NULL;
3302
3303 /* Get the scaling term. */
3304 scale_term = gimple_assign_rhs2 (gs: plus_input_stmt);
3305 rounding_p = true;
3306 }
3307 else
3308 {
3309 mulh_stmt_info = rshift_input_stmt_info;
3310 scale_term = gimple_assign_rhs2 (gs: last_stmt);
3311 }
3312
3313 /* Check that the scaling factor is constant. */
3314 if (TREE_CODE (scale_term) != INTEGER_CST)
3315 return NULL;
3316
3317 /* Check whether the scaling input term can be seen as two widened
3318 inputs multiplied together. */
3319 vect_unpromoted_value unprom_mult[2];
3320 tree new_type;
3321 unsigned int nops
3322 = vect_widened_op_tree (vinfo, stmt_info: mulh_stmt_info, code: MULT_EXPR, widened_code: WIDEN_MULT_EXPR,
3323 shift_p: false, max_nops: 2, unprom: unprom_mult, common_type: &new_type);
3324 if (nops != 2)
3325 return NULL;
3326
3327 /* Adjust output precision. */
3328 if (TYPE_PRECISION (new_type) < target_precision)
3329 new_type = build_nonstandard_integer_type
3330 (target_precision, TYPE_UNSIGNED (new_type));
3331
3332 unsigned mult_precision = TYPE_PRECISION (new_type);
3333 internal_fn ifn;
3334 /* Check that the scaling factor is expected. Instead of
3335 target_precision, we should use the one that we actually
3336 use for internal function. */
3337 if (rounding_p)
3338 {
3339 /* Check pattern 2). */
3340 if (wi::to_widest (t: scale_term) + mult_precision + 2
3341 != TYPE_PRECISION (lhs_type))
3342 return NULL;
3343
3344 ifn = IFN_MULHRS;
3345 }
3346 else
3347 {
3348 /* Check for pattern 1). */
3349 if (wi::to_widest (t: scale_term) + mult_precision + 1
3350 == TYPE_PRECISION (lhs_type))
3351 ifn = IFN_MULHS;
3352 /* Check for pattern 3). */
3353 else if (wi::to_widest (t: scale_term) + mult_precision
3354 == TYPE_PRECISION (lhs_type))
3355 ifn = IFN_MULH;
3356 else
3357 return NULL;
3358 }
3359
3360 vect_pattern_detected (name: "vect_recog_mulhs_pattern", stmt: last_stmt);
3361
3362 /* Check for target support. */
3363 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3364 if (!new_vectype
3365 || !direct_internal_fn_supported_p
3366 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3367 return NULL;
3368
3369 /* The IR requires a valid vector type for the cast result, even though
3370 it's likely to be discarded. */
3371 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3372 if (!*type_out)
3373 return NULL;
3374
3375 /* Generate the IFN_MULHRS call. */
3376 tree new_var = vect_recog_temp_ssa_var (type: new_type, NULL);
3377 tree new_ops[2];
3378 vect_convert_inputs (vinfo, stmt_info: last_stmt_info, n: 2, result: new_ops, type: new_type,
3379 unprom: unprom_mult, vectype: new_vectype);
3380 gcall *mulhrs_stmt
3381 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3382 gimple_call_set_lhs (gs: mulhrs_stmt, lhs: new_var);
3383 gimple_set_location (g: mulhrs_stmt, location: gimple_location (g: last_stmt));
3384
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_NOTE, vect_location,
3387 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3388
3389 return vect_convert_output (vinfo, stmt_info: last_stmt_info, type: lhs_type,
3390 pattern_stmt: mulhrs_stmt, vecitype: new_vectype);
3391}
3392
3393/* Recognize the patterns:
3394
3395 ATYPE a; // narrower than TYPE
3396 BTYPE b; // narrower than TYPE
3397 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3398 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3399
3400 where only the bottom half of avg is used. Try to transform them into:
3401
3402 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3403 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3404
3405 followed by:
3406
3407 TYPE avg = (TYPE) avg';
3408
3409 where NTYPE is no wider than half of TYPE. Since only the bottom half
3410 of avg is used, all or part of the cast of avg' should become redundant.
3411
3412 If there is no target support available, generate code to distribute rshift
3413 over plus and add a carry. */
3414
3415static gimple *
3416vect_recog_average_pattern (vec_info *vinfo,
3417 stmt_vec_info last_stmt_info, tree *type_out)
3418{
3419 /* Check for a shift right by one bit. */
3420 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
3421 if (!last_stmt
3422 || gimple_assign_rhs_code (gs: last_stmt) != RSHIFT_EXPR
3423 || !integer_onep (gimple_assign_rhs2 (gs: last_stmt)))
3424 return NULL;
3425
3426 /* Check that the shift result is wider than the users of the
3427 result need (i.e. that narrowing would be a natural choice). */
3428 tree lhs = gimple_assign_lhs (gs: last_stmt);
3429 tree type = TREE_TYPE (lhs);
3430 unsigned int target_precision
3431 = vect_element_precision (precision: last_stmt_info->min_output_precision);
3432 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3433 return NULL;
3434
3435 /* Look through any change in sign on the shift input. */
3436 tree rshift_rhs = gimple_assign_rhs1 (gs: last_stmt);
3437 vect_unpromoted_value unprom_plus;
3438 rshift_rhs = vect_look_through_possible_promotion (vinfo, op: rshift_rhs,
3439 unprom: &unprom_plus);
3440 if (!rshift_rhs
3441 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3442 return NULL;
3443
3444 /* Get the definition of the shift input. */
3445 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, op: rshift_rhs);
3446 if (!plus_stmt_info)
3447 return NULL;
3448
3449 /* Check whether the shift input can be seen as a tree of additions on
3450 2 or 3 widened inputs.
3451
3452 Note that the pattern should be a win even if the result of one or
3453 more additions is reused elsewhere: if the pattern matches, we'd be
3454 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3455 internal_fn ifn = IFN_AVG_FLOOR;
3456 vect_unpromoted_value unprom[3];
3457 tree new_type;
3458 unsigned int nops = vect_widened_op_tree (vinfo, stmt_info: plus_stmt_info, code: PLUS_EXPR,
3459 widened_code: IFN_VEC_WIDEN_PLUS, shift_p: false, max_nops: 3,
3460 unprom, common_type: &new_type);
3461 if (nops == 0)
3462 return NULL;
3463 if (nops == 3)
3464 {
3465 /* Check that one operand is 1. */
3466 unsigned int i;
3467 for (i = 0; i < 3; ++i)
3468 if (integer_onep (unprom[i].op))
3469 break;
3470 if (i == 3)
3471 return NULL;
3472 /* Throw away the 1 operand and keep the other two. */
3473 if (i < 2)
3474 unprom[i] = unprom[2];
3475 ifn = IFN_AVG_CEIL;
3476 }
3477
3478 vect_pattern_detected (name: "vect_recog_average_pattern", stmt: last_stmt);
3479
3480 /* We know that:
3481
3482 (a) the operation can be viewed as:
3483
3484 TYPE widened0 = (TYPE) UNPROM[0];
3485 TYPE widened1 = (TYPE) UNPROM[1];
3486 TYPE tmp1 = widened0 + widened1 {+ 1};
3487 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3488
3489 (b) the first two statements are equivalent to:
3490
3491 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3492 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3493
3494 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3495 where sensible;
3496
3497 (d) all the operations can be performed correctly at twice the width of
3498 NEW_TYPE, due to the nature of the average operation; and
3499
3500 (e) users of the result of the right shift need only TARGET_PRECISION
3501 bits, where TARGET_PRECISION is no more than half of TYPE's
3502 precision.
3503
3504 Under these circumstances, the only situation in which NEW_TYPE
3505 could be narrower than TARGET_PRECISION is if widened0, widened1
3506 and an addition result are all used more than once. Thus we can
3507 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3508 as "free", whereas widening the result of the average instruction
3509 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3510 therefore better not to go narrower than TARGET_PRECISION. */
3511 if (TYPE_PRECISION (new_type) < target_precision)
3512 new_type = build_nonstandard_integer_type (target_precision,
3513 TYPE_UNSIGNED (new_type));
3514
3515 /* Check for target support. */
3516 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3517 if (!new_vectype)
3518 return NULL;
3519
3520 bool fallback_p = false;
3521
3522 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3523 ;
3524 else if (TYPE_UNSIGNED (new_type)
3525 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3526 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3527 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3528 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3529 fallback_p = true;
3530 else
3531 return NULL;
3532
3533 /* The IR requires a valid vector type for the cast result, even though
3534 it's likely to be discarded. */
3535 *type_out = get_vectype_for_scalar_type (vinfo, type);
3536 if (!*type_out)
3537 return NULL;
3538
3539 tree new_var = vect_recog_temp_ssa_var (type: new_type, NULL);
3540 tree new_ops[2];
3541 vect_convert_inputs (vinfo, stmt_info: last_stmt_info, n: 2, result: new_ops, type: new_type,
3542 unprom, vectype: new_vectype);
3543
3544 if (fallback_p)
3545 {
3546 /* As a fallback, generate code for following sequence:
3547
3548 shifted_op0 = new_ops[0] >> 1;
3549 shifted_op1 = new_ops[1] >> 1;
3550 sum_of_shifted = shifted_op0 + shifted_op1;
3551 unmasked_carry = new_ops[0] and/or new_ops[1];
3552 carry = unmasked_carry & 1;
3553 new_var = sum_of_shifted + carry;
3554 */
3555
3556 tree one_cst = build_one_cst (new_type);
3557 gassign *g;
3558
3559 tree shifted_op0 = vect_recog_temp_ssa_var (type: new_type, NULL);
3560 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3561 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3562
3563 tree shifted_op1 = vect_recog_temp_ssa_var (type: new_type, NULL);
3564 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3565 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3566
3567 tree sum_of_shifted = vect_recog_temp_ssa_var (type: new_type, NULL);
3568 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3569 shifted_op0, shifted_op1);
3570 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3571
3572 tree unmasked_carry = vect_recog_temp_ssa_var (type: new_type, NULL);
3573 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3574 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3575 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3576
3577 tree carry = vect_recog_temp_ssa_var (type: new_type, NULL);
3578 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3579 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3580
3581 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3582 return vect_convert_output (vinfo, stmt_info: last_stmt_info, type, pattern_stmt: g, vecitype: new_vectype);
3583 }
3584
3585 /* Generate the IFN_AVG* call. */
3586 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3587 new_ops[1]);
3588 gimple_call_set_lhs (gs: average_stmt, lhs: new_var);
3589 gimple_set_location (g: average_stmt, location: gimple_location (g: last_stmt));
3590
3591 if (dump_enabled_p ())
3592 dump_printf_loc (MSG_NOTE, vect_location,
3593 "created pattern stmt: %G", (gimple *) average_stmt);
3594
3595 return vect_convert_output (vinfo, stmt_info: last_stmt_info,
3596 type, pattern_stmt: average_stmt, vecitype: new_vectype);
3597}
3598
3599/* Recognize cases in which the input to a cast is wider than its
3600 output, and the input is fed by a widening operation. Fold this
3601 by removing the unnecessary intermediate widening. E.g.:
3602
3603 unsigned char a;
3604 unsigned int b = (unsigned int) a;
3605 unsigned short c = (unsigned short) b;
3606
3607 -->
3608
3609 unsigned short c = (unsigned short) a;
3610
3611 Although this is rare in input IR, it is an expected side-effect
3612 of the over-widening pattern above.
3613
3614 This is beneficial also for integer-to-float conversions, if the
3615 widened integer has more bits than the float, and if the unwidened
3616 input doesn't. */
3617
3618static gimple *
3619vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3620 stmt_vec_info last_stmt_info, tree *type_out)
3621{
3622 /* Check for a cast, including an integer-to-float conversion. */
3623 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
3624 if (!last_stmt)
3625 return NULL;
3626 tree_code code = gimple_assign_rhs_code (gs: last_stmt);
3627 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3628 return NULL;
3629
3630 /* Make sure that the rhs is a scalar with a natural bitsize. */
3631 tree lhs = gimple_assign_lhs (gs: last_stmt);
3632 if (!lhs)
3633 return NULL;
3634 tree lhs_type = TREE_TYPE (lhs);
3635 scalar_mode lhs_mode;
3636 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3637 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), result: &lhs_mode))
3638 return NULL;
3639
3640 /* Check for a narrowing operation (from a vector point of view). */
3641 tree rhs = gimple_assign_rhs1 (gs: last_stmt);
3642 tree rhs_type = TREE_TYPE (rhs);
3643 if (!INTEGRAL_TYPE_P (rhs_type)
3644 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3645 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (mode: lhs_mode))
3646 return NULL;
3647
3648 /* Try to find an unpromoted input. */
3649 vect_unpromoted_value unprom;
3650 if (!vect_look_through_possible_promotion (vinfo, op: rhs, unprom: &unprom)
3651 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3652 return NULL;
3653
3654 /* If the bits above RHS_TYPE matter, make sure that they're the
3655 same when extending from UNPROM as they are when extending from RHS. */
3656 if (!INTEGRAL_TYPE_P (lhs_type)
3657 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3658 return NULL;
3659
3660 /* We can get the same result by casting UNPROM directly, to avoid
3661 the unnecessary widening and narrowing. */
3662 vect_pattern_detected (name: "vect_recog_cast_forwprop_pattern", stmt: last_stmt);
3663
3664 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3665 if (!*type_out)
3666 return NULL;
3667
3668 tree new_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
3669 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3670 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
3671
3672 return pattern_stmt;
3673}
3674
3675/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3676 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3677
3678static gimple *
3679vect_recog_widen_shift_pattern (vec_info *vinfo,
3680 stmt_vec_info last_stmt_info, tree *type_out)
3681{
3682 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3683 orig_code: LSHIFT_EXPR, wide_code: WIDEN_LSHIFT_EXPR, shift_p: true,
3684 name: "vect_recog_widen_shift_pattern");
3685}
3686
3687/* Detect a rotate pattern wouldn't be otherwise vectorized:
3688
3689 type a_t, b_t, c_t;
3690
3691 S0 a_t = b_t r<< c_t;
3692
3693 Input/Output:
3694
3695 * STMT_VINFO: The stmt from which the pattern search begins,
3696 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3697 with a sequence:
3698
3699 S1 d_t = -c_t;
3700 S2 e_t = d_t & (B - 1);
3701 S3 f_t = b_t << c_t;
3702 S4 g_t = b_t >> e_t;
3703 S0 a_t = f_t | g_t;
3704
3705 where B is element bitsize of type.
3706
3707 Output:
3708
3709 * TYPE_OUT: The type of the output of this pattern.
3710
3711 * Return value: A new stmt that will be used to replace the rotate
3712 S0 stmt. */
3713
3714static gimple *
3715vect_recog_rotate_pattern (vec_info *vinfo,
3716 stmt_vec_info stmt_vinfo, tree *type_out)
3717{
3718 gimple *last_stmt = stmt_vinfo->stmt;
3719 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3720 gimple *pattern_stmt, *def_stmt;
3721 enum tree_code rhs_code;
3722 enum vect_def_type dt;
3723 optab optab1, optab2;
3724 edge ext_def = NULL;
3725 bool bswap16_p = false;
3726
3727 if (is_gimple_assign (gs: last_stmt))
3728 {
3729 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
3730 switch (rhs_code)
3731 {
3732 case LROTATE_EXPR:
3733 case RROTATE_EXPR:
3734 break;
3735 default:
3736 return NULL;
3737 }
3738
3739 lhs = gimple_assign_lhs (gs: last_stmt);
3740 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
3741 type = TREE_TYPE (oprnd0);
3742 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
3743 }
3744 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3745 {
3746 /* __builtin_bswap16 (x) is another form of x r>> 8.
3747 The vectorizer has bswap support, but only if the argument isn't
3748 promoted. */
3749 lhs = gimple_call_lhs (gs: last_stmt);
3750 oprnd0 = gimple_call_arg (gs: last_stmt, index: 0);
3751 type = TREE_TYPE (oprnd0);
3752 if (!lhs
3753 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3754 || TYPE_PRECISION (type) <= 16
3755 || TREE_CODE (oprnd0) != SSA_NAME
3756 || BITS_PER_UNIT != 8)
3757 return NULL;
3758
3759 stmt_vec_info def_stmt_info;
3760 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3761 return NULL;
3762
3763 if (dt != vect_internal_def)
3764 return NULL;
3765
3766 if (gimple_assign_cast_p (s: def_stmt))
3767 {
3768 def = gimple_assign_rhs1 (gs: def_stmt);
3769 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3770 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3771 oprnd0 = def;
3772 }
3773
3774 type = TREE_TYPE (lhs);
3775 vectype = get_vectype_for_scalar_type (vinfo, type);
3776 if (vectype == NULL_TREE)
3777 return NULL;
3778
3779 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3780 {
3781 /* The encoding uses one stepped pattern for each byte in the
3782 16-bit word. */
3783 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (node: char_vectype), 2, 3);
3784 for (unsigned i = 0; i < 3; ++i)
3785 for (unsigned j = 0; j < 2; ++j)
3786 elts.quick_push (obj: (i + 1) * 2 - j - 1);
3787
3788 vec_perm_indices indices (elts, 1,
3789 TYPE_VECTOR_SUBPARTS (node: char_vectype));
3790 machine_mode vmode = TYPE_MODE (char_vectype);
3791 if (can_vec_perm_const_p (vmode, vmode, indices))
3792 {
3793 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3794 undo the argument promotion. */
3795 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3796 {
3797 def = vect_recog_temp_ssa_var (type, NULL);
3798 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3799 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
3800 oprnd0 = def;
3801 }
3802
3803 /* Pattern detected. */
3804 vect_pattern_detected (name: "vect_recog_rotate_pattern", stmt: last_stmt);
3805
3806 *type_out = vectype;
3807
3808 /* Pattern supported. Create a stmt to be used to replace the
3809 pattern, with the unpromoted argument. */
3810 var = vect_recog_temp_ssa_var (type, NULL);
3811 pattern_stmt = gimple_build_call (gimple_call_fndecl (gs: last_stmt),
3812 1, oprnd0);
3813 gimple_call_set_lhs (gs: pattern_stmt, lhs: var);
3814 gimple_call_set_fntype (call_stmt: as_a <gcall *> (p: pattern_stmt),
3815 fntype: gimple_call_fntype (gs: last_stmt));
3816 return pattern_stmt;
3817 }
3818 }
3819
3820 oprnd1 = build_int_cst (integer_type_node, 8);
3821 rhs_code = LROTATE_EXPR;
3822 bswap16_p = true;
3823 }
3824 else
3825 return NULL;
3826
3827 if (TREE_CODE (oprnd0) != SSA_NAME
3828 || !INTEGRAL_TYPE_P (type)
3829 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3830 return NULL;
3831
3832 stmt_vec_info def_stmt_info;
3833 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3834 return NULL;
3835
3836 if (dt != vect_internal_def
3837 && dt != vect_constant_def
3838 && dt != vect_external_def)
3839 return NULL;
3840
3841 vectype = get_vectype_for_scalar_type (vinfo, type);
3842 if (vectype == NULL_TREE)
3843 return NULL;
3844
3845 /* If vector/vector or vector/scalar rotate is supported by the target,
3846 don't do anything here. */
3847 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3848 if (optab1
3849 && optab_handler (op: optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3850 {
3851 use_rotate:
3852 if (bswap16_p)
3853 {
3854 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3855 {
3856 def = vect_recog_temp_ssa_var (type, NULL);
3857 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3858 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
3859 oprnd0 = def;
3860 }
3861
3862 /* Pattern detected. */
3863 vect_pattern_detected (name: "vect_recog_rotate_pattern", stmt: last_stmt);
3864
3865 *type_out = vectype;
3866
3867 /* Pattern supported. Create a stmt to be used to replace the
3868 pattern. */
3869 var = vect_recog_temp_ssa_var (type, NULL);
3870 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3871 oprnd1);
3872 return pattern_stmt;
3873 }
3874 return NULL;
3875 }
3876
3877 if (is_a <bb_vec_info> (p: vinfo) || dt != vect_internal_def)
3878 {
3879 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3880 if (optab2
3881 && optab_handler (op: optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3882 goto use_rotate;
3883 }
3884
3885 tree utype = unsigned_type_for (type);
3886 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3887 if (!uvectype)
3888 return NULL;
3889
3890 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3891 don't do anything here either. */
3892 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3893 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3894 if (!optab1
3895 || optab_handler (op: optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3896 || !optab2
3897 || optab_handler (op: optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3898 {
3899 if (! is_a <bb_vec_info> (p: vinfo) && dt == vect_internal_def)
3900 return NULL;
3901 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3902 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3903 if (!optab1
3904 || optab_handler (op: optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3905 || !optab2
3906 || optab_handler (op: optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3907 return NULL;
3908 }
3909
3910 *type_out = vectype;
3911
3912 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3913 {
3914 def = vect_recog_temp_ssa_var (type: utype, NULL);
3915 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3916 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3917 oprnd0 = def;
3918 }
3919
3920 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3921 ext_def = vect_get_external_def_edge (vinfo, var: oprnd1);
3922
3923 def = NULL_TREE;
3924 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3925 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3926 def = oprnd1;
3927 else if (def_stmt && gimple_assign_cast_p (s: def_stmt))
3928 {
3929 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
3930 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3931 && TYPE_PRECISION (TREE_TYPE (rhs1))
3932 == TYPE_PRECISION (type))
3933 def = rhs1;
3934 }
3935
3936 if (def == NULL_TREE)
3937 {
3938 def = vect_recog_temp_ssa_var (type: utype, NULL);
3939 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3940 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3941 }
3942 stype = TREE_TYPE (def);
3943
3944 if (TREE_CODE (def) == INTEGER_CST)
3945 {
3946 if (!tree_fits_uhwi_p (def)
3947 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3948 || integer_zerop (def))
3949 return NULL;
3950 def2 = build_int_cst (stype,
3951 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3952 }
3953 else
3954 {
3955 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3956
3957 if (vecstype == NULL_TREE)
3958 return NULL;
3959 def2 = vect_recog_temp_ssa_var (type: stype, NULL);
3960 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3961 if (ext_def)
3962 {
3963 basic_block new_bb
3964 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3965 gcc_assert (!new_bb);
3966 }
3967 else
3968 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecstype);
3969
3970 def2 = vect_recog_temp_ssa_var (type: stype, NULL);
3971 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3972 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3973 gimple_assign_lhs (gs: def_stmt), mask);
3974 if (ext_def)
3975 {
3976 basic_block new_bb
3977 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3978 gcc_assert (!new_bb);
3979 }
3980 else
3981 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecstype);
3982 }
3983
3984 var1 = vect_recog_temp_ssa_var (type: utype, NULL);
3985 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3986 ? LSHIFT_EXPR : RSHIFT_EXPR,
3987 oprnd0, def);
3988 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3989
3990 var2 = vect_recog_temp_ssa_var (type: utype, NULL);
3991 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3992 ? RSHIFT_EXPR : LSHIFT_EXPR,
3993 oprnd0, def2);
3994 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3995
3996 /* Pattern detected. */
3997 vect_pattern_detected (name: "vect_recog_rotate_pattern", stmt: last_stmt);
3998
3999 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4000 var = vect_recog_temp_ssa_var (type: utype, NULL);
4001 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
4002
4003 if (!useless_type_conversion_p (type, utype))
4004 {
4005 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: uvectype);
4006 tree result = vect_recog_temp_ssa_var (type, NULL);
4007 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4008 }
4009 return pattern_stmt;
4010}
4011
4012/* Detect a vector by vector shift pattern that wouldn't be otherwise
4013 vectorized:
4014
4015 type a_t;
4016 TYPE b_T, res_T;
4017
4018 S1 a_t = ;
4019 S2 b_T = ;
4020 S3 res_T = b_T op a_t;
4021
4022 where type 'TYPE' is a type with different size than 'type',
4023 and op is <<, >> or rotate.
4024
4025 Also detect cases:
4026
4027 type a_t;
4028 TYPE b_T, c_T, res_T;
4029
4030 S0 c_T = ;
4031 S1 a_t = (type) c_T;
4032 S2 b_T = ;
4033 S3 res_T = b_T op a_t;
4034
4035 Input/Output:
4036
4037 * STMT_VINFO: The stmt from which the pattern search begins,
4038 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4039 with a shift/rotate which has same type on both operands, in the
4040 second case just b_T op c_T, in the first case with added cast
4041 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4042
4043 Output:
4044
4045 * TYPE_OUT: The type of the output of this pattern.
4046
4047 * Return value: A new stmt that will be used to replace the shift/rotate
4048 S3 stmt. */
4049
4050static gimple *
4051vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4052 stmt_vec_info stmt_vinfo,
4053 tree *type_out)
4054{
4055 gimple *last_stmt = stmt_vinfo->stmt;
4056 tree oprnd0, oprnd1, lhs, var;
4057 gimple *pattern_stmt;
4058 enum tree_code rhs_code;
4059
4060 if (!is_gimple_assign (gs: last_stmt))
4061 return NULL;
4062
4063 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
4064 switch (rhs_code)
4065 {
4066 case LSHIFT_EXPR:
4067 case RSHIFT_EXPR:
4068 case LROTATE_EXPR:
4069 case RROTATE_EXPR:
4070 break;
4071 default:
4072 return NULL;
4073 }
4074
4075 lhs = gimple_assign_lhs (gs: last_stmt);
4076 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
4077 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
4078 if (TREE_CODE (oprnd0) != SSA_NAME
4079 || TREE_CODE (oprnd1) != SSA_NAME
4080 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4081 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4082 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4083 || TYPE_PRECISION (TREE_TYPE (lhs))
4084 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4085 return NULL;
4086
4087 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, op: oprnd1);
4088 if (!def_vinfo)
4089 return NULL;
4090
4091 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4092 if (*type_out == NULL_TREE)
4093 return NULL;
4094
4095 tree def = NULL_TREE;
4096 gassign *def_stmt = dyn_cast <gassign *> (p: def_vinfo->stmt);
4097 if (def_stmt && gimple_assign_cast_p (s: def_stmt))
4098 {
4099 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
4100 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4101 && TYPE_PRECISION (TREE_TYPE (rhs1))
4102 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4103 {
4104 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4105 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4106 def = rhs1;
4107 else
4108 {
4109 tree mask
4110 = build_low_bits_mask (TREE_TYPE (rhs1),
4111 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4112 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4113 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4114 tree vecstype = get_vectype_for_scalar_type (vinfo,
4115 TREE_TYPE (rhs1));
4116 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecstype);
4117 }
4118 }
4119 }
4120
4121 if (def == NULL_TREE)
4122 {
4123 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4124 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4125 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4126 }
4127
4128 /* Pattern detected. */
4129 vect_pattern_detected (name: "vect_recog_vector_vector_shift_pattern", stmt: last_stmt);
4130
4131 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4132 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4133 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4134
4135 return pattern_stmt;
4136}
4137
4138/* Return true iff the target has a vector optab implementing the operation
4139 CODE on type VECTYPE. */
4140
4141static bool
4142target_has_vecop_for_code (tree_code code, tree vectype)
4143{
4144 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4145 return voptab
4146 && optab_handler (op: voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4147}
4148
4149/* Verify that the target has optabs of VECTYPE to perform all the steps
4150 needed by the multiplication-by-immediate synthesis algorithm described by
4151 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4152 present. Return true iff the target supports all the steps. */
4153
4154static bool
4155target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4156 tree vectype, bool synth_shift_p)
4157{
4158 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4159 return false;
4160
4161 bool supports_vminus = target_has_vecop_for_code (code: MINUS_EXPR, vectype);
4162 bool supports_vplus = target_has_vecop_for_code (code: PLUS_EXPR, vectype);
4163
4164 if (var == negate_variant
4165 && !target_has_vecop_for_code (code: NEGATE_EXPR, vectype))
4166 return false;
4167
4168 /* If we must synthesize shifts with additions make sure that vector
4169 addition is available. */
4170 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4171 return false;
4172
4173 for (int i = 1; i < alg->ops; i++)
4174 {
4175 switch (alg->op[i])
4176 {
4177 case alg_shift:
4178 break;
4179 case alg_add_t_m2:
4180 case alg_add_t2_m:
4181 case alg_add_factor:
4182 if (!supports_vplus)
4183 return false;
4184 break;
4185 case alg_sub_t_m2:
4186 case alg_sub_t2_m:
4187 case alg_sub_factor:
4188 if (!supports_vminus)
4189 return false;
4190 break;
4191 case alg_unknown:
4192 case alg_m:
4193 case alg_zero:
4194 case alg_impossible:
4195 return false;
4196 default:
4197 gcc_unreachable ();
4198 }
4199 }
4200
4201 return true;
4202}
4203
4204/* Synthesize a left shift of OP by AMNT bits using a series of additions and
4205 putting the final result in DEST. Append all statements but the last into
4206 VINFO. Return the last statement. */
4207
4208static gimple *
4209synth_lshift_by_additions (vec_info *vinfo,
4210 tree dest, tree op, HOST_WIDE_INT amnt,
4211 stmt_vec_info stmt_info)
4212{
4213 HOST_WIDE_INT i;
4214 tree itype = TREE_TYPE (op);
4215 tree prev_res = op;
4216 gcc_assert (amnt >= 0);
4217 for (i = 0; i < amnt; i++)
4218 {
4219 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (type: itype, NULL)
4220 : dest;
4221 gimple *stmt
4222 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4223 prev_res = tmp_var;
4224 if (i < amnt - 1)
4225 append_pattern_def_seq (vinfo, stmt_info, new_stmt: stmt);
4226 else
4227 return stmt;
4228 }
4229 gcc_unreachable ();
4230 return NULL;
4231}
4232
4233/* Helper for vect_synth_mult_by_constant. Apply a binary operation
4234 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4235 the process if necessary. Append the resulting assignment statements
4236 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4237 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4238 left shifts using additions. */
4239
4240static tree
4241apply_binop_and_append_stmt (vec_info *vinfo,
4242 tree_code code, tree op1, tree op2,
4243 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4244{
4245 if (integer_zerop (op2)
4246 && (code == LSHIFT_EXPR
4247 || code == PLUS_EXPR))
4248 {
4249 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4250 return op1;
4251 }
4252
4253 gimple *stmt;
4254 tree itype = TREE_TYPE (op1);
4255 tree tmp_var = vect_recog_temp_ssa_var (type: itype, NULL);
4256
4257 if (code == LSHIFT_EXPR
4258 && synth_shift_p)
4259 {
4260 stmt = synth_lshift_by_additions (vinfo, dest: tmp_var, op: op1,
4261 TREE_INT_CST_LOW (op2), stmt_info: stmt_vinfo);
4262 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4263 return tmp_var;
4264 }
4265
4266 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4267 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4268 return tmp_var;
4269}
4270
4271/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4272 and simple arithmetic operations to be vectorized. Record the statements
4273 produced in STMT_VINFO and return the last statement in the sequence or
4274 NULL if it's not possible to synthesize such a multiplication.
4275 This function mirrors the behavior of expand_mult_const in expmed.cc but
4276 works on tree-ssa form. */
4277
4278static gimple *
4279vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4280 stmt_vec_info stmt_vinfo)
4281{
4282 tree itype = TREE_TYPE (op);
4283 machine_mode mode = TYPE_MODE (itype);
4284 struct algorithm alg;
4285 mult_variant variant;
4286 if (!tree_fits_shwi_p (val))
4287 return NULL;
4288
4289 /* Multiplication synthesis by shifts, adds and subs can introduce
4290 signed overflow where the original operation didn't. Perform the
4291 operations on an unsigned type and cast back to avoid this.
4292 In the future we may want to relax this for synthesis algorithms
4293 that we can prove do not cause unexpected overflow. */
4294 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4295
4296 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4297 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4298 if (!vectype)
4299 return NULL;
4300
4301 /* Targets that don't support vector shifts but support vector additions
4302 can synthesize shifts that way. */
4303 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4304
4305 HOST_WIDE_INT hwval = tree_to_shwi (val);
4306 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4307 The vectorizer's benefit analysis will decide whether it's beneficial
4308 to do this. */
4309 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4310 ? TYPE_MODE (vectype) : mode,
4311 hwval, &alg, &variant, MAX_COST);
4312 if (!possible)
4313 return NULL;
4314
4315 if (!target_supports_mult_synth_alg (alg: &alg, var: variant, vectype, synth_shift_p))
4316 return NULL;
4317
4318 tree accumulator;
4319
4320 /* Clear out the sequence of statements so we can populate it below. */
4321 gimple *stmt = NULL;
4322
4323 if (cast_to_unsigned_p)
4324 {
4325 tree tmp_op = vect_recog_temp_ssa_var (type: multtype, NULL);
4326 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4327 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4328 op = tmp_op;
4329 }
4330
4331 if (alg.op[0] == alg_zero)
4332 accumulator = build_int_cst (multtype, 0);
4333 else
4334 accumulator = op;
4335
4336 bool needs_fixup = (variant == negate_variant)
4337 || (variant == add_variant);
4338
4339 for (int i = 1; i < alg.ops; i++)
4340 {
4341 tree shft_log = build_int_cst (multtype, alg.log[i]);
4342 tree accum_tmp = vect_recog_temp_ssa_var (type: multtype, NULL);
4343 tree tmp_var = NULL_TREE;
4344
4345 switch (alg.op[i])
4346 {
4347 case alg_shift:
4348 if (synth_shift_p)
4349 stmt
4350 = synth_lshift_by_additions (vinfo, dest: accum_tmp, op: accumulator,
4351 amnt: alg.log[i], stmt_info: stmt_vinfo);
4352 else
4353 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4354 shft_log);
4355 break;
4356 case alg_add_t_m2:
4357 tmp_var
4358 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: op, op2: shft_log,
4359 stmt_vinfo, synth_shift_p);
4360 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4361 tmp_var);
4362 break;
4363 case alg_sub_t_m2:
4364 tmp_var = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: op,
4365 op2: shft_log, stmt_vinfo,
4366 synth_shift_p);
4367 /* In some algorithms the first step involves zeroing the
4368 accumulator. If subtracting from such an accumulator
4369 just emit the negation directly. */
4370 if (integer_zerop (accumulator))
4371 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4372 else
4373 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4374 tmp_var);
4375 break;
4376 case alg_add_t2_m:
4377 tmp_var
4378 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4379 op2: shft_log, stmt_vinfo, synth_shift_p);
4380 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4381 break;
4382 case alg_sub_t2_m:
4383 tmp_var
4384 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4385 op2: shft_log, stmt_vinfo, synth_shift_p);
4386 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4387 break;
4388 case alg_add_factor:
4389 tmp_var
4390 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4391 op2: shft_log, stmt_vinfo, synth_shift_p);
4392 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4393 tmp_var);
4394 break;
4395 case alg_sub_factor:
4396 tmp_var
4397 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4398 op2: shft_log, stmt_vinfo, synth_shift_p);
4399 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4400 accumulator);
4401 break;
4402 default:
4403 gcc_unreachable ();
4404 }
4405 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4406 but rather return it directly. */
4407
4408 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4409 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4410 accumulator = accum_tmp;
4411 }
4412 if (variant == negate_variant)
4413 {
4414 tree accum_tmp = vect_recog_temp_ssa_var (type: multtype, NULL);
4415 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4416 accumulator = accum_tmp;
4417 if (cast_to_unsigned_p)
4418 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4419 }
4420 else if (variant == add_variant)
4421 {
4422 tree accum_tmp = vect_recog_temp_ssa_var (type: multtype, NULL);
4423 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4424 accumulator = accum_tmp;
4425 if (cast_to_unsigned_p)
4426 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4427 }
4428 /* Move back to a signed if needed. */
4429 if (cast_to_unsigned_p)
4430 {
4431 tree accum_tmp = vect_recog_temp_ssa_var (type: itype, NULL);
4432 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4433 }
4434
4435 return stmt;
4436}
4437
4438/* Detect multiplication by constant and convert it into a sequence of
4439 shifts and additions, subtractions, negations. We reuse the
4440 choose_mult_variant algorithms from expmed.cc
4441
4442 Input/Output:
4443
4444 STMT_VINFO: The stmt from which the pattern search begins,
4445 i.e. the mult stmt.
4446
4447 Output:
4448
4449 * TYPE_OUT: The type of the output of this pattern.
4450
4451 * Return value: A new stmt that will be used to replace
4452 the multiplication. */
4453
4454static gimple *
4455vect_recog_mult_pattern (vec_info *vinfo,
4456 stmt_vec_info stmt_vinfo, tree *type_out)
4457{
4458 gimple *last_stmt = stmt_vinfo->stmt;
4459 tree oprnd0, oprnd1, vectype, itype;
4460 gimple *pattern_stmt;
4461
4462 if (!is_gimple_assign (gs: last_stmt))
4463 return NULL;
4464
4465 if (gimple_assign_rhs_code (gs: last_stmt) != MULT_EXPR)
4466 return NULL;
4467
4468 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
4469 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
4470 itype = TREE_TYPE (oprnd0);
4471
4472 if (TREE_CODE (oprnd0) != SSA_NAME
4473 || TREE_CODE (oprnd1) != INTEGER_CST
4474 || !INTEGRAL_TYPE_P (itype)
4475 || !type_has_mode_precision_p (t: itype))
4476 return NULL;
4477
4478 vectype = get_vectype_for_scalar_type (vinfo, itype);
4479 if (vectype == NULL_TREE)
4480 return NULL;
4481
4482 /* If the target can handle vectorized multiplication natively,
4483 don't attempt to optimize this. */
4484 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4485 if (mul_optab != unknown_optab)
4486 {
4487 machine_mode vec_mode = TYPE_MODE (vectype);
4488 int icode = (int) optab_handler (op: mul_optab, mode: vec_mode);
4489 if (icode != CODE_FOR_nothing)
4490 return NULL;
4491 }
4492
4493 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4494 op: oprnd0, val: oprnd1, stmt_vinfo);
4495 if (!pattern_stmt)
4496 return NULL;
4497
4498 /* Pattern detected. */
4499 vect_pattern_detected (name: "vect_recog_mult_pattern", stmt: last_stmt);
4500
4501 *type_out = vectype;
4502
4503 return pattern_stmt;
4504}
4505
4506/* Detect a signed division by a constant that wouldn't be
4507 otherwise vectorized:
4508
4509 type a_t, b_t;
4510
4511 S1 a_t = b_t / N;
4512
4513 where type 'type' is an integral type and N is a constant.
4514
4515 Similarly handle modulo by a constant:
4516
4517 S4 a_t = b_t % N;
4518
4519 Input/Output:
4520
4521 * STMT_VINFO: The stmt from which the pattern search begins,
4522 i.e. the division stmt. S1 is replaced by if N is a power
4523 of two constant and type is signed:
4524 S3 y_t = b_t < 0 ? N - 1 : 0;
4525 S2 x_t = b_t + y_t;
4526 S1' a_t = x_t >> log2 (N);
4527
4528 S4 is replaced if N is a power of two constant and
4529 type is signed by (where *_T temporaries have unsigned type):
4530 S9 y_T = b_t < 0 ? -1U : 0U;
4531 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4532 S7 z_t = (type) z_T;
4533 S6 w_t = b_t + z_t;
4534 S5 x_t = w_t & (N - 1);
4535 S4' a_t = x_t - z_t;
4536
4537 Output:
4538
4539 * TYPE_OUT: The type of the output of this pattern.
4540
4541 * Return value: A new stmt that will be used to replace the division
4542 S1 or modulo S4 stmt. */
4543
4544static gimple *
4545vect_recog_divmod_pattern (vec_info *vinfo,
4546 stmt_vec_info stmt_vinfo, tree *type_out)
4547{
4548 gimple *last_stmt = stmt_vinfo->stmt;
4549 tree oprnd0, oprnd1, vectype, itype, cond;
4550 gimple *pattern_stmt, *def_stmt;
4551 enum tree_code rhs_code;
4552 optab optab;
4553 tree q, cst;
4554 int dummy_int, prec;
4555
4556 if (!is_gimple_assign (gs: last_stmt))
4557 return NULL;
4558
4559 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
4560 switch (rhs_code)
4561 {
4562 case TRUNC_DIV_EXPR:
4563 case EXACT_DIV_EXPR:
4564 case TRUNC_MOD_EXPR:
4565 break;
4566 default:
4567 return NULL;
4568 }
4569
4570 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
4571 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
4572 itype = TREE_TYPE (oprnd0);
4573 if (TREE_CODE (oprnd0) != SSA_NAME
4574 || TREE_CODE (oprnd1) != INTEGER_CST
4575 || TREE_CODE (itype) != INTEGER_TYPE
4576 || !type_has_mode_precision_p (t: itype))
4577 return NULL;
4578
4579 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4580 vectype = get_vectype_for_scalar_type (vinfo, itype);
4581 if (vectype == NULL_TREE)
4582 return NULL;
4583
4584 if (optimize_bb_for_size_p (gimple_bb (g: last_stmt)))
4585 {
4586 /* If the target can handle vectorized division or modulo natively,
4587 don't attempt to optimize this, since native division is likely
4588 to give smaller code. */
4589 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4590 if (optab != unknown_optab)
4591 {
4592 machine_mode vec_mode = TYPE_MODE (vectype);
4593 int icode = (int) optab_handler (op: optab, mode: vec_mode);
4594 if (icode != CODE_FOR_nothing)
4595 return NULL;
4596 }
4597 }
4598
4599 prec = TYPE_PRECISION (itype);
4600 if (integer_pow2p (oprnd1))
4601 {
4602 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4603 return NULL;
4604
4605 /* Pattern detected. */
4606 vect_pattern_detected (name: "vect_recog_divmod_pattern", stmt: last_stmt);
4607
4608 *type_out = vectype;
4609
4610 /* Check if the target supports this internal function. */
4611 internal_fn ifn = IFN_DIV_POW2;
4612 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4613 {
4614 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4615
4616 tree var_div = vect_recog_temp_ssa_var (type: itype, NULL);
4617 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4618 gimple_call_set_lhs (gs: div_stmt, lhs: var_div);
4619
4620 if (rhs_code == TRUNC_MOD_EXPR)
4621 {
4622 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: div_stmt);
4623 def_stmt
4624 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4625 LSHIFT_EXPR, var_div, shift);
4626 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4627 pattern_stmt
4628 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4629 MINUS_EXPR, oprnd0,
4630 gimple_assign_lhs (gs: def_stmt));
4631 }
4632 else
4633 pattern_stmt = div_stmt;
4634 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
4635
4636 return pattern_stmt;
4637 }
4638
4639 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4640 build_int_cst (itype, 0));
4641 if (rhs_code == TRUNC_DIV_EXPR
4642 || rhs_code == EXACT_DIV_EXPR)
4643 {
4644 tree var = vect_recog_temp_ssa_var (type: itype, NULL);
4645 tree shift;
4646 def_stmt
4647 = gimple_build_assign (var, COND_EXPR, cond,
4648 fold_build2 (MINUS_EXPR, itype, oprnd1,
4649 build_int_cst (itype, 1)),
4650 build_int_cst (itype, 0));
4651 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4652 var = vect_recog_temp_ssa_var (type: itype, NULL);
4653 def_stmt
4654 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4655 gimple_assign_lhs (gs: def_stmt));
4656 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4657
4658 shift = build_int_cst (itype, tree_log2 (oprnd1));
4659 pattern_stmt
4660 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4661 RSHIFT_EXPR, var, shift);
4662 }
4663 else
4664 {
4665 tree signmask;
4666 if (compare_tree_int (oprnd1, 2) == 0)
4667 {
4668 signmask = vect_recog_temp_ssa_var (type: itype, NULL);
4669 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4670 build_int_cst (itype, 1),
4671 build_int_cst (itype, 0));
4672 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4673 }
4674 else
4675 {
4676 tree utype
4677 = build_nonstandard_integer_type (prec, 1);
4678 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4679 tree shift
4680 = build_int_cst (utype, GET_MODE_BITSIZE (mode: itype_mode)
4681 - tree_log2 (oprnd1));
4682 tree var = vect_recog_temp_ssa_var (type: utype, NULL);
4683
4684 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4685 build_int_cst (utype, -1),
4686 build_int_cst (utype, 0));
4687 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecutype);
4688 var = vect_recog_temp_ssa_var (type: utype, NULL);
4689 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4690 gimple_assign_lhs (gs: def_stmt),
4691 shift);
4692 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecutype);
4693 signmask = vect_recog_temp_ssa_var (type: itype, NULL);
4694 def_stmt
4695 = gimple_build_assign (signmask, NOP_EXPR, var);
4696 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4697 }
4698 def_stmt
4699 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4700 PLUS_EXPR, oprnd0, signmask);
4701 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4702 def_stmt
4703 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4704 BIT_AND_EXPR, gimple_assign_lhs (gs: def_stmt),
4705 fold_build2 (MINUS_EXPR, itype, oprnd1,
4706 build_int_cst (itype, 1)));
4707 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4708
4709 pattern_stmt
4710 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4711 MINUS_EXPR, gimple_assign_lhs (gs: def_stmt),
4712 signmask);
4713 }
4714
4715 return pattern_stmt;
4716 }
4717
4718 if ((cst = uniform_integer_cst_p (oprnd1))
4719 && TYPE_UNSIGNED (itype)
4720 && rhs_code == TRUNC_DIV_EXPR
4721 && vectype
4722 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4723 {
4724 /* We can use the relationship:
4725
4726 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4727
4728 to optimize cases where N+1 is a power of 2, and where // (N+1)
4729 is therefore a shift right. When operating in modes that are
4730 multiples of a byte in size, there are two cases:
4731
4732 (1) N(N+3) is not representable, in which case the question
4733 becomes whether the replacement expression overflows.
4734 It is enough to test that x+N+2 does not overflow,
4735 i.e. that x < MAX-(N+1).
4736
4737 (2) N(N+3) is representable, in which case it is the (only)
4738 bound that we need to check.
4739
4740 ??? For now we just handle the case where // (N+1) is a shift
4741 right by half the precision, since some architectures can
4742 optimize the associated addition and shift combinations
4743 into single instructions. */
4744
4745 auto wcst = wi::to_wide (t: cst);
4746 int pow = wi::exact_log2 (wcst + 1);
4747 if (pow == prec / 2)
4748 {
4749 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4750
4751 gimple_ranger ranger;
4752 int_range_max r;
4753
4754 /* Check that no overflow will occur. If we don't have range
4755 information we can't perform the optimization. */
4756
4757 if (ranger.range_of_expr (r, name: oprnd0, stmt) && !r.undefined_p ())
4758 {
4759 wide_int max = r.upper_bound ();
4760 wide_int one = wi::shwi (val: 1, precision: prec);
4761 wide_int adder = wi::add (x: one, y: wi::lshift (x: one, y: pow));
4762 wi::overflow_type ovf;
4763 wi::add (x: max, y: adder, sgn: UNSIGNED, overflow: &ovf);
4764 if (ovf == wi::OVF_NONE)
4765 {
4766 *type_out = vectype;
4767 tree tadder = wide_int_to_tree (type: itype, cst: adder);
4768 tree rshift = wide_int_to_tree (type: itype, cst: pow);
4769
4770 tree new_lhs1 = vect_recog_temp_ssa_var (type: itype, NULL);
4771 gassign *patt1
4772 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4773 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: patt1, vectype);
4774
4775 tree new_lhs2 = vect_recog_temp_ssa_var (type: itype, NULL);
4776 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4777 rshift);
4778 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: patt1, vectype);
4779
4780 tree new_lhs3 = vect_recog_temp_ssa_var (type: itype, NULL);
4781 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4782 oprnd0);
4783 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: patt1, vectype);
4784
4785 tree new_lhs4 = vect_recog_temp_ssa_var (type: itype, NULL);
4786 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4787 new_lhs3, rshift);
4788
4789 return pattern_stmt;
4790 }
4791 }
4792 }
4793 }
4794
4795 if (prec > HOST_BITS_PER_WIDE_INT
4796 || integer_zerop (oprnd1))
4797 return NULL;
4798
4799 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4800 return NULL;
4801
4802 if (TYPE_UNSIGNED (itype))
4803 {
4804 unsigned HOST_WIDE_INT mh, ml;
4805 int pre_shift, post_shift;
4806 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4807 & GET_MODE_MASK (itype_mode));
4808 tree t1, t2, t3, t4;
4809
4810 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4811 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4812 return NULL;
4813
4814 /* Find a suitable multiplier and right shift count
4815 instead of multiplying with D. */
4816 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4817
4818 /* If the suggested multiplier is more than SIZE bits, we can do better
4819 for even divisors, using an initial right shift. */
4820 if (mh != 0 && (d & 1) == 0)
4821 {
4822 pre_shift = ctz_or_zero (x: d);
4823 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4824 &ml, &post_shift, &dummy_int);
4825 gcc_assert (!mh);
4826 }
4827 else
4828 pre_shift = 0;
4829
4830 if (mh != 0)
4831 {
4832 if (post_shift - 1 >= prec)
4833 return NULL;
4834
4835 /* t1 = oprnd0 h* ml;
4836 t2 = oprnd0 - t1;
4837 t3 = t2 >> 1;
4838 t4 = t1 + t3;
4839 q = t4 >> (post_shift - 1); */
4840 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
4841 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4842 build_int_cst (itype, ml));
4843 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4844
4845 t2 = vect_recog_temp_ssa_var (type: itype, NULL);
4846 def_stmt
4847 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4848 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4849
4850 t3 = vect_recog_temp_ssa_var (type: itype, NULL);
4851 def_stmt
4852 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4853 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4854
4855 t4 = vect_recog_temp_ssa_var (type: itype, NULL);
4856 def_stmt
4857 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4858
4859 if (post_shift != 1)
4860 {
4861 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4862
4863 q = vect_recog_temp_ssa_var (type: itype, NULL);
4864 pattern_stmt
4865 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4866 build_int_cst (itype, post_shift - 1));
4867 }
4868 else
4869 {
4870 q = t4;
4871 pattern_stmt = def_stmt;
4872 }
4873 }
4874 else
4875 {
4876 if (pre_shift >= prec || post_shift >= prec)
4877 return NULL;
4878
4879 /* t1 = oprnd0 >> pre_shift;
4880 t2 = t1 h* ml;
4881 q = t2 >> post_shift; */
4882 if (pre_shift)
4883 {
4884 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
4885 def_stmt
4886 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4887 build_int_cst (NULL, pre_shift));
4888 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4889 }
4890 else
4891 t1 = oprnd0;
4892
4893 t2 = vect_recog_temp_ssa_var (type: itype, NULL);
4894 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4895 build_int_cst (itype, ml));
4896
4897 if (post_shift)
4898 {
4899 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4900
4901 q = vect_recog_temp_ssa_var (type: itype, NULL);
4902 def_stmt
4903 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4904 build_int_cst (itype, post_shift));
4905 }
4906 else
4907 q = t2;
4908
4909 pattern_stmt = def_stmt;
4910 }
4911 }
4912 else
4913 {
4914 unsigned HOST_WIDE_INT ml;
4915 int post_shift;
4916 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4917 unsigned HOST_WIDE_INT abs_d;
4918 bool add = false;
4919 tree t1, t2, t3, t4;
4920
4921 /* Give up for -1. */
4922 if (d == -1)
4923 return NULL;
4924
4925 /* Since d might be INT_MIN, we have to cast to
4926 unsigned HOST_WIDE_INT before negating to avoid
4927 undefined signed overflow. */
4928 abs_d = (d >= 0
4929 ? (unsigned HOST_WIDE_INT) d
4930 : - (unsigned HOST_WIDE_INT) d);
4931
4932 /* n rem d = n rem -d */
4933 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4934 {
4935 d = abs_d;
4936 oprnd1 = build_int_cst (itype, abs_d);
4937 }
4938 if (HOST_BITS_PER_WIDE_INT >= prec
4939 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4940 /* This case is not handled correctly below. */
4941 return NULL;
4942
4943 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4944 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4945 {
4946 add = true;
4947 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4948 }
4949 if (post_shift >= prec)
4950 return NULL;
4951
4952 /* t1 = oprnd0 h* ml; */
4953 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
4954 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4955 build_int_cst (itype, ml));
4956
4957 if (add)
4958 {
4959 /* t2 = t1 + oprnd0; */
4960 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4961 t2 = vect_recog_temp_ssa_var (type: itype, NULL);
4962 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4963 }
4964 else
4965 t2 = t1;
4966
4967 if (post_shift)
4968 {
4969 /* t3 = t2 >> post_shift; */
4970 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4971 t3 = vect_recog_temp_ssa_var (type: itype, NULL);
4972 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4973 build_int_cst (itype, post_shift));
4974 }
4975 else
4976 t3 = t2;
4977
4978 int msb = 1;
4979 value_range r;
4980 get_range_query (cfun)->range_of_expr (r, expr: oprnd0);
4981 if (!r.varying_p () && !r.undefined_p ())
4982 {
4983 if (!wi::neg_p (x: r.lower_bound (), TYPE_SIGN (itype)))
4984 msb = 0;
4985 else if (wi::neg_p (x: r.upper_bound (), TYPE_SIGN (itype)))
4986 msb = -1;
4987 }
4988
4989 if (msb == 0 && d >= 0)
4990 {
4991 /* q = t3; */
4992 q = t3;
4993 pattern_stmt = def_stmt;
4994 }
4995 else
4996 {
4997 /* t4 = oprnd0 >> (prec - 1);
4998 or if we know from VRP that oprnd0 >= 0
4999 t4 = 0;
5000 or if we know from VRP that oprnd0 < 0
5001 t4 = -1; */
5002 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
5003 t4 = vect_recog_temp_ssa_var (type: itype, NULL);
5004 if (msb != 1)
5005 def_stmt = gimple_build_assign (t4, INTEGER_CST,
5006 build_int_cst (itype, msb));
5007 else
5008 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5009 build_int_cst (itype, prec - 1));
5010 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
5011
5012 /* q = t3 - t4; or q = t4 - t3; */
5013 q = vect_recog_temp_ssa_var (type: itype, NULL);
5014 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5015 d < 0 ? t3 : t4);
5016 }
5017 }
5018
5019 if (rhs_code == TRUNC_MOD_EXPR)
5020 {
5021 tree r, t1;
5022
5023 /* We divided. Now finish by:
5024 t1 = q * oprnd1;
5025 r = oprnd0 - t1; */
5026 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt);
5027
5028 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
5029 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5030 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
5031
5032 r = vect_recog_temp_ssa_var (type: itype, NULL);
5033 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5034 }
5035
5036 /* Pattern detected. */
5037 vect_pattern_detected (name: "vect_recog_divmod_pattern", stmt: last_stmt);
5038
5039 *type_out = vectype;
5040 return pattern_stmt;
5041}
5042
5043/* Function vect_recog_mixed_size_cond_pattern
5044
5045 Try to find the following pattern:
5046
5047 type x_t, y_t;
5048 TYPE a_T, b_T, c_T;
5049 loop:
5050 S1 a_T = x_t CMP y_t ? b_T : c_T;
5051
5052 where type 'TYPE' is an integral type which has different size
5053 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5054 than 'type', the constants need to fit into an integer type
5055 with the same width as 'type') or results of conversion from 'type'.
5056
5057 Input:
5058
5059 * STMT_VINFO: The stmt from which the pattern search begins.
5060
5061 Output:
5062
5063 * TYPE_OUT: The type of the output of this pattern.
5064
5065 * Return value: A new stmt that will be used to replace the pattern.
5066 Additionally a def_stmt is added.
5067
5068 a_it = x_t CMP y_t ? b_it : c_it;
5069 a_T = (TYPE) a_it; */
5070
5071static gimple *
5072vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5073 stmt_vec_info stmt_vinfo, tree *type_out)
5074{
5075 gimple *last_stmt = stmt_vinfo->stmt;
5076 tree cond_expr, then_clause, else_clause;
5077 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5078 gimple *pattern_stmt, *def_stmt;
5079 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5080 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5081 bool promotion;
5082 tree comp_scalar_type;
5083
5084 if (!is_gimple_assign (gs: last_stmt)
5085 || gimple_assign_rhs_code (gs: last_stmt) != COND_EXPR
5086 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5087 return NULL;
5088
5089 cond_expr = gimple_assign_rhs1 (gs: last_stmt);
5090 then_clause = gimple_assign_rhs2 (gs: last_stmt);
5091 else_clause = gimple_assign_rhs3 (gs: last_stmt);
5092
5093 if (!COMPARISON_CLASS_P (cond_expr))
5094 return NULL;
5095
5096 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5097 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5098 if (comp_vectype == NULL_TREE)
5099 return NULL;
5100
5101 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5102 if (types_compatible_p (type1: type, type2: comp_scalar_type)
5103 || ((TREE_CODE (then_clause) != INTEGER_CST
5104 || TREE_CODE (else_clause) != INTEGER_CST)
5105 && !INTEGRAL_TYPE_P (comp_scalar_type))
5106 || !INTEGRAL_TYPE_P (type))
5107 return NULL;
5108
5109 if ((TREE_CODE (then_clause) != INTEGER_CST
5110 && !type_conversion_p (vinfo, name: then_clause, check_sign: false,
5111 orig_type: &orig_type0, def_stmt: &def_stmt0, promotion: &promotion))
5112 || (TREE_CODE (else_clause) != INTEGER_CST
5113 && !type_conversion_p (vinfo, name: else_clause, check_sign: false,
5114 orig_type: &orig_type1, def_stmt: &def_stmt1, promotion: &promotion)))
5115 return NULL;
5116
5117 if (orig_type0 && orig_type1
5118 && !types_compatible_p (type1: orig_type0, type2: orig_type1))
5119 return NULL;
5120
5121 if (orig_type0)
5122 {
5123 if (!types_compatible_p (type1: orig_type0, type2: comp_scalar_type))
5124 return NULL;
5125 then_clause = gimple_assign_rhs1 (gs: def_stmt0);
5126 itype = orig_type0;
5127 }
5128
5129 if (orig_type1)
5130 {
5131 if (!types_compatible_p (type1: orig_type1, type2: comp_scalar_type))
5132 return NULL;
5133 else_clause = gimple_assign_rhs1 (gs: def_stmt1);
5134 itype = orig_type1;
5135 }
5136
5137
5138 HOST_WIDE_INT cmp_mode_size
5139 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5140
5141 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5142 if (GET_MODE_BITSIZE (mode: type_mode) == cmp_mode_size)
5143 return NULL;
5144
5145 vectype = get_vectype_for_scalar_type (vinfo, type);
5146 if (vectype == NULL_TREE)
5147 return NULL;
5148
5149 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5150 return NULL;
5151
5152 if (itype == NULL_TREE)
5153 itype = build_nonstandard_integer_type (cmp_mode_size,
5154 TYPE_UNSIGNED (type));
5155
5156 if (itype == NULL_TREE
5157 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5158 return NULL;
5159
5160 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5161 if (vecitype == NULL_TREE)
5162 return NULL;
5163
5164 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5165 return NULL;
5166
5167 if (GET_MODE_BITSIZE (mode: type_mode) > cmp_mode_size)
5168 {
5169 if ((TREE_CODE (then_clause) == INTEGER_CST
5170 && !int_fits_type_p (then_clause, itype))
5171 || (TREE_CODE (else_clause) == INTEGER_CST
5172 && !int_fits_type_p (else_clause, itype)))
5173 return NULL;
5174 }
5175
5176 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5177 COND_EXPR, unshare_expr (cond_expr),
5178 fold_convert (itype, then_clause),
5179 fold_convert (itype, else_clause));
5180 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5181 NOP_EXPR, gimple_assign_lhs (gs: def_stmt));
5182
5183 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecitype);
5184 *type_out = vectype;
5185
5186 vect_pattern_detected (name: "vect_recog_mixed_size_cond_pattern", stmt: last_stmt);
5187
5188 return pattern_stmt;
5189}
5190
5191
5192/* Helper function of vect_recog_bool_pattern. Called recursively, return
5193 true if bool VAR can and should be optimized that way. Assume it shouldn't
5194 in case it's a result of a comparison which can be directly vectorized into
5195 a vector comparison. Fills in STMTS with all stmts visited during the
5196 walk. */
5197
5198static bool
5199check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5200{
5201 tree rhs1;
5202 enum tree_code rhs_code;
5203
5204 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, op: var);
5205 if (!def_stmt_info)
5206 return false;
5207
5208 gassign *def_stmt = dyn_cast <gassign *> (p: def_stmt_info->stmt);
5209 if (!def_stmt)
5210 return false;
5211
5212 if (stmts.contains (k: def_stmt))
5213 return true;
5214
5215 rhs1 = gimple_assign_rhs1 (gs: def_stmt);
5216 rhs_code = gimple_assign_rhs_code (gs: def_stmt);
5217 switch (rhs_code)
5218 {
5219 case SSA_NAME:
5220 if (! check_bool_pattern (var: rhs1, vinfo, stmts))
5221 return false;
5222 break;
5223
5224 CASE_CONVERT:
5225 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5226 return false;
5227 if (! check_bool_pattern (var: rhs1, vinfo, stmts))
5228 return false;
5229 break;
5230
5231 case BIT_NOT_EXPR:
5232 if (! check_bool_pattern (var: rhs1, vinfo, stmts))
5233 return false;
5234 break;
5235
5236 case BIT_AND_EXPR:
5237 case BIT_IOR_EXPR:
5238 case BIT_XOR_EXPR:
5239 if (! check_bool_pattern (var: rhs1, vinfo, stmts)
5240 || ! check_bool_pattern (var: gimple_assign_rhs2 (gs: def_stmt), vinfo, stmts))
5241 return false;
5242 break;
5243
5244 default:
5245 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5246 {
5247 tree vecitype, comp_vectype;
5248
5249 /* If the comparison can throw, then is_gimple_condexpr will be
5250 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5251 if (stmt_could_throw_p (cfun, def_stmt))
5252 return false;
5253
5254 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5255 if (comp_vectype == NULL_TREE)
5256 return false;
5257
5258 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5259 TREE_TYPE (rhs1));
5260 if (mask_type
5261 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5262 return false;
5263
5264 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5265 {
5266 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5267 tree itype
5268 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5269 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5270 if (vecitype == NULL_TREE)
5271 return false;
5272 }
5273 else
5274 vecitype = comp_vectype;
5275 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5276 return false;
5277 }
5278 else
5279 return false;
5280 break;
5281 }
5282
5283 bool res = stmts.add (k: def_stmt);
5284 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5285 gcc_assert (!res);
5286
5287 return true;
5288}
5289
5290
5291/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5292 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5293 pattern sequence. */
5294
5295static tree
5296adjust_bool_pattern_cast (vec_info *vinfo,
5297 tree type, tree var, stmt_vec_info stmt_info)
5298{
5299 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5300 NOP_EXPR, var);
5301 append_pattern_def_seq (vinfo, stmt_info, new_stmt: cast_stmt,
5302 vectype: get_vectype_for_scalar_type (vinfo, type));
5303 return gimple_assign_lhs (gs: cast_stmt);
5304}
5305
5306/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5307 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5308 type, OUT_TYPE is the desired final integer type of the whole pattern.
5309 STMT_INFO is the info of the pattern root and is where pattern stmts should
5310 be associated with. DEFS is a map of pattern defs. */
5311
5312static void
5313adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5314 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5315{
5316 gimple *stmt = SSA_NAME_DEF_STMT (var);
5317 enum tree_code rhs_code, def_rhs_code;
5318 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5319 location_t loc;
5320 gimple *pattern_stmt, *def_stmt;
5321 tree trueval = NULL_TREE;
5322
5323 rhs1 = gimple_assign_rhs1 (gs: stmt);
5324 rhs2 = gimple_assign_rhs2 (gs: stmt);
5325 rhs_code = gimple_assign_rhs_code (gs: stmt);
5326 loc = gimple_location (g: stmt);
5327 switch (rhs_code)
5328 {
5329 case SSA_NAME:
5330 CASE_CONVERT:
5331 irhs1 = *defs.get (k: rhs1);
5332 itype = TREE_TYPE (irhs1);
5333 pattern_stmt
5334 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5335 SSA_NAME, irhs1);
5336 break;
5337
5338 case BIT_NOT_EXPR:
5339 irhs1 = *defs.get (k: rhs1);
5340 itype = TREE_TYPE (irhs1);
5341 pattern_stmt
5342 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5343 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5344 break;
5345
5346 case BIT_AND_EXPR:
5347 /* Try to optimize x = y & (a < b ? 1 : 0); into
5348 x = (a < b ? y : 0);
5349
5350 E.g. for:
5351 bool a_b, b_b, c_b;
5352 TYPE d_T;
5353
5354 S1 a_b = x1 CMP1 y1;
5355 S2 b_b = x2 CMP2 y2;
5356 S3 c_b = a_b & b_b;
5357 S4 d_T = (TYPE) c_b;
5358
5359 we would normally emit:
5360
5361 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5362 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5363 S3' c_T = a_T & b_T;
5364 S4' d_T = c_T;
5365
5366 but we can save one stmt by using the
5367 result of one of the COND_EXPRs in the other COND_EXPR and leave
5368 BIT_AND_EXPR stmt out:
5369
5370 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5371 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5372 S4' f_T = c_T;
5373
5374 At least when VEC_COND_EXPR is implemented using masks
5375 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5376 computes the comparison masks and ands it, in one case with
5377 all ones vector, in the other case with a vector register.
5378 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5379 often more expensive. */
5380 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5381 def_rhs_code = gimple_assign_rhs_code (gs: def_stmt);
5382 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5383 {
5384 irhs1 = *defs.get (k: rhs1);
5385 tree def_rhs1 = gimple_assign_rhs1 (gs: def_stmt);
5386 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5387 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5388 {
5389 rhs_code = def_rhs_code;
5390 rhs1 = def_rhs1;
5391 rhs2 = gimple_assign_rhs2 (gs: def_stmt);
5392 trueval = irhs1;
5393 goto do_compare;
5394 }
5395 else
5396 irhs2 = *defs.get (k: rhs2);
5397 goto and_ior_xor;
5398 }
5399 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5400 def_rhs_code = gimple_assign_rhs_code (gs: def_stmt);
5401 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5402 {
5403 irhs2 = *defs.get (k: rhs2);
5404 tree def_rhs1 = gimple_assign_rhs1 (gs: def_stmt);
5405 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5406 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5407 {
5408 rhs_code = def_rhs_code;
5409 rhs1 = def_rhs1;
5410 rhs2 = gimple_assign_rhs2 (gs: def_stmt);
5411 trueval = irhs2;
5412 goto do_compare;
5413 }
5414 else
5415 irhs1 = *defs.get (k: rhs1);
5416 goto and_ior_xor;
5417 }
5418 /* FALLTHRU */
5419 case BIT_IOR_EXPR:
5420 case BIT_XOR_EXPR:
5421 irhs1 = *defs.get (k: rhs1);
5422 irhs2 = *defs.get (k: rhs2);
5423 and_ior_xor:
5424 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5425 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5426 {
5427 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5428 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5429 int out_prec = TYPE_PRECISION (out_type);
5430 if (absu_hwi (x: out_prec - prec1) < absu_hwi (x: out_prec - prec2))
5431 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), var: irhs2,
5432 stmt_info);
5433 else if (absu_hwi (x: out_prec - prec1) > absu_hwi (x: out_prec - prec2))
5434 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), var: irhs1,
5435 stmt_info);
5436 else
5437 {
5438 irhs1 = adjust_bool_pattern_cast (vinfo,
5439 type: out_type, var: irhs1, stmt_info);
5440 irhs2 = adjust_bool_pattern_cast (vinfo,
5441 type: out_type, var: irhs2, stmt_info);
5442 }
5443 }
5444 itype = TREE_TYPE (irhs1);
5445 pattern_stmt
5446 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5447 rhs_code, irhs1, irhs2);
5448 break;
5449
5450 default:
5451 do_compare:
5452 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5453 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5454 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5455 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5456 b: GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5457 {
5458 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5459 itype
5460 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5461 }
5462 else
5463 itype = TREE_TYPE (rhs1);
5464 cond_expr = build2_loc (loc, code: rhs_code, type: itype, arg0: rhs1, arg1: rhs2);
5465 if (trueval == NULL_TREE)
5466 trueval = build_int_cst (itype, 1);
5467 else
5468 gcc_checking_assert (useless_type_conversion_p (itype,
5469 TREE_TYPE (trueval)));
5470 pattern_stmt
5471 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5472 COND_EXPR, cond_expr, trueval,
5473 build_int_cst (itype, 0));
5474 break;
5475 }
5476
5477 gimple_set_location (g: pattern_stmt, location: loc);
5478 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt,
5479 vectype: get_vectype_for_scalar_type (vinfo, itype));
5480 defs.put (k: var, v: gimple_assign_lhs (gs: pattern_stmt));
5481}
5482
5483/* Comparison function to qsort a vector of gimple stmts after UID. */
5484
5485static int
5486sort_after_uid (const void *p1, const void *p2)
5487{
5488 const gimple *stmt1 = *(const gimple * const *)p1;
5489 const gimple *stmt2 = *(const gimple * const *)p2;
5490 return gimple_uid (g: stmt1) - gimple_uid (g: stmt2);
5491}
5492
5493/* Create pattern stmts for all stmts participating in the bool pattern
5494 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5495 OUT_TYPE. Return the def of the pattern root. */
5496
5497static tree
5498adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5499 tree out_type, stmt_vec_info stmt_info)
5500{
5501 /* Gather original stmts in the bool pattern in their order of appearance
5502 in the IL. */
5503 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5504 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5505 i != bool_stmt_set.end (); ++i)
5506 bool_stmts.quick_push (obj: *i);
5507 bool_stmts.qsort (sort_after_uid);
5508
5509 /* Now process them in that order, producing pattern stmts. */
5510 hash_map <tree, tree> defs;
5511 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5512 adjust_bool_pattern (vinfo, var: gimple_assign_lhs (gs: bool_stmts[i]),
5513 out_type, stmt_info, defs);
5514
5515 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5516 gimple *pattern_stmt
5517 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5518 return gimple_assign_lhs (gs: pattern_stmt);
5519}
5520
5521/* Return the proper type for converting bool VAR into
5522 an integer value or NULL_TREE if no such type exists.
5523 The type is chosen so that the converted value has the
5524 same number of elements as VAR's vector type. */
5525
5526static tree
5527integer_type_for_mask (tree var, vec_info *vinfo)
5528{
5529 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5530 return NULL_TREE;
5531
5532 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, op: var);
5533 if (!def_stmt_info || !vect_use_mask_type_p (stmt_info: def_stmt_info))
5534 return NULL_TREE;
5535
5536 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5537}
5538
5539/* Function vect_recog_bool_pattern
5540
5541 Try to find pattern like following:
5542
5543 bool a_b, b_b, c_b, d_b, e_b;
5544 TYPE f_T;
5545 loop:
5546 S1 a_b = x1 CMP1 y1;
5547 S2 b_b = x2 CMP2 y2;
5548 S3 c_b = a_b & b_b;
5549 S4 d_b = x3 CMP3 y3;
5550 S5 e_b = c_b | d_b;
5551 S6 f_T = (TYPE) e_b;
5552
5553 where type 'TYPE' is an integral type. Or a similar pattern
5554 ending in
5555
5556 S6 f_Y = e_b ? r_Y : s_Y;
5557
5558 as results from if-conversion of a complex condition.
5559
5560 Input:
5561
5562 * STMT_VINFO: The stmt at the end from which the pattern
5563 search begins, i.e. cast of a bool to
5564 an integer type.
5565
5566 Output:
5567
5568 * TYPE_OUT: The type of the output of this pattern.
5569
5570 * Return value: A new stmt that will be used to replace the pattern.
5571
5572 Assuming size of TYPE is the same as size of all comparisons
5573 (otherwise some casts would be added where needed), the above
5574 sequence we create related pattern stmts:
5575 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5576 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5577 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5578 S5' e_T = c_T | d_T;
5579 S6' f_T = e_T;
5580
5581 Instead of the above S3' we could emit:
5582 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5583 S3' c_T = a_T | b_T;
5584 but the above is more efficient. */
5585
5586static gimple *
5587vect_recog_bool_pattern (vec_info *vinfo,
5588 stmt_vec_info stmt_vinfo, tree *type_out)
5589{
5590 gimple *last_stmt = stmt_vinfo->stmt;
5591 enum tree_code rhs_code;
5592 tree var, lhs, rhs, vectype;
5593 gimple *pattern_stmt;
5594
5595 if (!is_gimple_assign (gs: last_stmt))
5596 return NULL;
5597
5598 var = gimple_assign_rhs1 (gs: last_stmt);
5599 lhs = gimple_assign_lhs (gs: last_stmt);
5600 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
5601
5602 if (rhs_code == VIEW_CONVERT_EXPR)
5603 var = TREE_OPERAND (var, 0);
5604
5605 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5606 return NULL;
5607
5608 hash_set<gimple *> bool_stmts;
5609
5610 if (CONVERT_EXPR_CODE_P (rhs_code)
5611 || rhs_code == VIEW_CONVERT_EXPR)
5612 {
5613 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5614 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5615 return NULL;
5616 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5617
5618 if (check_bool_pattern (var, vinfo, stmts&: bool_stmts))
5619 {
5620 rhs = adjust_bool_stmts (vinfo, bool_stmt_set&: bool_stmts,
5621 TREE_TYPE (lhs), stmt_info: stmt_vinfo);
5622 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5623 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5624 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5625 else
5626 pattern_stmt
5627 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5628 }
5629 else
5630 {
5631 tree type = integer_type_for_mask (var, vinfo);
5632 tree cst0, cst1, tmp;
5633
5634 if (!type)
5635 return NULL;
5636
5637 /* We may directly use cond with narrowed type to avoid
5638 multiple cond exprs with following result packing and
5639 perform single cond with packed mask instead. In case
5640 of widening we better make cond first and then extract
5641 results. */
5642 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5643 type = TREE_TYPE (lhs);
5644
5645 cst0 = build_int_cst (type, 0);
5646 cst1 = build_int_cst (type, 1);
5647 tmp = vect_recog_temp_ssa_var (type, NULL);
5648 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5649
5650 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5651 {
5652 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5653 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo,
5654 new_stmt: pattern_stmt, vectype: new_vectype);
5655
5656 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5657 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5658 }
5659 }
5660
5661 *type_out = vectype;
5662 vect_pattern_detected (name: "vect_recog_bool_pattern", stmt: last_stmt);
5663
5664 return pattern_stmt;
5665 }
5666 else if (rhs_code == COND_EXPR
5667 && TREE_CODE (var) == SSA_NAME)
5668 {
5669 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5670 if (vectype == NULL_TREE)
5671 return NULL;
5672
5673 /* Build a scalar type for the boolean result that when
5674 vectorized matches the vector type of the result in
5675 size and number of elements. */
5676 unsigned prec
5677 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5678 TYPE_VECTOR_SUBPARTS (vectype));
5679
5680 tree type
5681 = build_nonstandard_integer_type (prec,
5682 TYPE_UNSIGNED (TREE_TYPE (var)));
5683 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5684 return NULL;
5685
5686 if (check_bool_pattern (var, vinfo, stmts&: bool_stmts))
5687 var = adjust_bool_stmts (vinfo, bool_stmt_set&: bool_stmts, out_type: type, stmt_info: stmt_vinfo);
5688 else if (integer_type_for_mask (var, vinfo))
5689 return NULL;
5690
5691 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5692 pattern_stmt
5693 = gimple_build_assign (lhs, COND_EXPR,
5694 build2 (NE_EXPR, boolean_type_node,
5695 var, build_int_cst (TREE_TYPE (var), 0)),
5696 gimple_assign_rhs2 (gs: last_stmt),
5697 gimple_assign_rhs3 (gs: last_stmt));
5698 *type_out = vectype;
5699 vect_pattern_detected (name: "vect_recog_bool_pattern", stmt: last_stmt);
5700
5701 return pattern_stmt;
5702 }
5703 else if (rhs_code == SSA_NAME
5704 && STMT_VINFO_DATA_REF (stmt_vinfo))
5705 {
5706 stmt_vec_info pattern_stmt_info;
5707 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5708 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5709 return NULL;
5710
5711 if (check_bool_pattern (var, vinfo, stmts&: bool_stmts))
5712 rhs = adjust_bool_stmts (vinfo, bool_stmt_set&: bool_stmts,
5713 TREE_TYPE (vectype), stmt_info: stmt_vinfo);
5714 else
5715 {
5716 tree type = integer_type_for_mask (var, vinfo);
5717 tree cst0, cst1, new_vectype;
5718
5719 if (!type)
5720 return NULL;
5721
5722 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5723 type = TREE_TYPE (vectype);
5724
5725 cst0 = build_int_cst (type, 0);
5726 cst1 = build_int_cst (type, 1);
5727 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5728
5729 rhs = vect_recog_temp_ssa_var (type, NULL);
5730 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5731 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: new_vectype);
5732 }
5733
5734 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5735 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5736 {
5737 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5738 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5739 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: cast_stmt);
5740 rhs = rhs2;
5741 }
5742 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5743 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5744 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5745 *type_out = vectype;
5746 vect_pattern_detected (name: "vect_recog_bool_pattern", stmt: last_stmt);
5747
5748 return pattern_stmt;
5749 }
5750 else
5751 return NULL;
5752}
5753
5754
5755/* A helper for vect_recog_mask_conversion_pattern. Build
5756 conversion of MASK to a type suitable for masking VECTYPE.
5757 Built statement gets required vectype and is appended to
5758 a pattern sequence of STMT_VINFO.
5759
5760 Return converted mask. */
5761
5762static tree
5763build_mask_conversion (vec_info *vinfo,
5764 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5765{
5766 gimple *stmt;
5767 tree masktype, tmp;
5768
5769 masktype = truth_type_for (vectype);
5770 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5771 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5772 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo,
5773 new_stmt: stmt, vectype: masktype, TREE_TYPE (vectype));
5774
5775 return tmp;
5776}
5777
5778
5779/* Function vect_recog_mask_conversion_pattern
5780
5781 Try to find statements which require boolean type
5782 converison. Additional conversion statements are
5783 added to handle such cases. For example:
5784
5785 bool m_1, m_2, m_3;
5786 int i_4, i_5;
5787 double d_6, d_7;
5788 char c_1, c_2, c_3;
5789
5790 S1 m_1 = i_4 > i_5;
5791 S2 m_2 = d_6 < d_7;
5792 S3 m_3 = m_1 & m_2;
5793 S4 c_1 = m_3 ? c_2 : c_3;
5794
5795 Will be transformed into:
5796
5797 S1 m_1 = i_4 > i_5;
5798 S2 m_2 = d_6 < d_7;
5799 S3'' m_2' = (_Bool[bitsize=32])m_2
5800 S3' m_3' = m_1 & m_2';
5801 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5802 S4' c_1' = m_3'' ? c_2 : c_3; */
5803
5804static gimple *
5805vect_recog_mask_conversion_pattern (vec_info *vinfo,
5806 stmt_vec_info stmt_vinfo, tree *type_out)
5807{
5808 gimple *last_stmt = stmt_vinfo->stmt;
5809 enum tree_code rhs_code;
5810 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5811 tree vectype1, vectype2;
5812 stmt_vec_info pattern_stmt_info;
5813 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5814 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5815
5816 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
5817 if (is_gimple_call (gs: last_stmt)
5818 && gimple_call_internal_p (gs: last_stmt))
5819 {
5820 gcall *pattern_stmt;
5821
5822 internal_fn ifn = gimple_call_internal_fn (gs: last_stmt);
5823 int mask_argno = internal_fn_mask_index (ifn);
5824 if (mask_argno < 0)
5825 return NULL;
5826
5827 bool store_p = internal_store_fn_p (ifn);
5828 if (store_p)
5829 {
5830 int rhs_index = internal_fn_stored_value_index (ifn);
5831 tree rhs = gimple_call_arg (gs: last_stmt, index: rhs_index);
5832 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5833 }
5834 else
5835 {
5836 lhs = gimple_call_lhs (gs: last_stmt);
5837 if (!lhs)
5838 return NULL;
5839 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5840 }
5841
5842 tree mask_arg = gimple_call_arg (gs: last_stmt, index: mask_argno);
5843 tree mask_arg_type = integer_type_for_mask (var: mask_arg, vinfo);
5844 if (!mask_arg_type)
5845 return NULL;
5846 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5847
5848 if (!vectype1 || !vectype2
5849 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5850 TYPE_VECTOR_SUBPARTS (vectype2)))
5851 return NULL;
5852
5853 tmp = build_mask_conversion (vinfo, mask: mask_arg, vectype: vectype1, stmt_vinfo);
5854
5855 auto_vec<tree, 8> args;
5856 unsigned int nargs = gimple_call_num_args (gs: last_stmt);
5857 args.safe_grow (len: nargs, exact: true);
5858 for (unsigned int i = 0; i < nargs; ++i)
5859 args[i] = ((int) i == mask_argno
5860 ? tmp
5861 : gimple_call_arg (gs: last_stmt, index: i));
5862 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5863
5864 if (!store_p)
5865 {
5866 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5867 gimple_call_set_lhs (gs: pattern_stmt, lhs);
5868 }
5869 gimple_call_set_nothrow (s: pattern_stmt, nothrow_p: true);
5870
5871 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5872 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5873 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5874
5875 *type_out = vectype1;
5876 vect_pattern_detected (name: "vect_recog_mask_conversion_pattern", stmt: last_stmt);
5877
5878 return pattern_stmt;
5879 }
5880
5881 if (!is_gimple_assign (gs: last_stmt))
5882 return NULL;
5883
5884 gimple *pattern_stmt;
5885 lhs = gimple_assign_lhs (gs: last_stmt);
5886 rhs1 = gimple_assign_rhs1 (gs: last_stmt);
5887 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
5888
5889 /* Check for cond expression requiring mask conversion. */
5890 if (rhs_code == COND_EXPR)
5891 {
5892 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5893
5894 if (TREE_CODE (rhs1) == SSA_NAME)
5895 {
5896 rhs1_type = integer_type_for_mask (var: rhs1, vinfo);
5897 if (!rhs1_type)
5898 return NULL;
5899 }
5900 else if (COMPARISON_CLASS_P (rhs1))
5901 {
5902 /* Check whether we're comparing scalar booleans and (if so)
5903 whether a better mask type exists than the mask associated
5904 with boolean-sized elements. This avoids unnecessary packs
5905 and unpacks if the booleans are set from comparisons of
5906 wider types. E.g. in:
5907
5908 int x1, x2, x3, x4, y1, y1;
5909 ...
5910 bool b1 = (x1 == x2);
5911 bool b2 = (x3 == x4);
5912 ... = b1 == b2 ? y1 : y2;
5913
5914 it is better for b1 and b2 to use the mask type associated
5915 with int elements rather bool (byte) elements. */
5916 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5917 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5918 if (!rhs1_op0 || !rhs1_op1)
5919 return NULL;
5920 rhs1_op0_type = integer_type_for_mask (var: rhs1_op0, vinfo);
5921 rhs1_op1_type = integer_type_for_mask (var: rhs1_op1, vinfo);
5922
5923 if (!rhs1_op0_type)
5924 rhs1_type = TREE_TYPE (rhs1_op0);
5925 else if (!rhs1_op1_type)
5926 rhs1_type = TREE_TYPE (rhs1_op1);
5927 else if (TYPE_PRECISION (rhs1_op0_type)
5928 != TYPE_PRECISION (rhs1_op1_type))
5929 {
5930 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5931 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5932 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5933 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5934 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5935 {
5936 if (abs (x: tmp0) > abs (x: tmp1))
5937 rhs1_type = rhs1_op1_type;
5938 else
5939 rhs1_type = rhs1_op0_type;
5940 }
5941 else
5942 rhs1_type = build_nonstandard_integer_type
5943 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5944 }
5945 else
5946 rhs1_type = rhs1_op0_type;
5947 }
5948 else
5949 return NULL;
5950
5951 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5952
5953 if (!vectype1 || !vectype2)
5954 return NULL;
5955
5956 /* Continue if a conversion is needed. Also continue if we have
5957 a comparison whose vector type would normally be different from
5958 VECTYPE2 when considered in isolation. In that case we'll
5959 replace the comparison with an SSA name (so that we can record
5960 its vector type) and behave as though the comparison was an SSA
5961 name from the outset. */
5962 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5963 TYPE_VECTOR_SUBPARTS (vectype2))
5964 && !rhs1_op0_type
5965 && !rhs1_op1_type)
5966 return NULL;
5967
5968 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5969 in place, we can handle it in vectorizable_condition. This avoids
5970 unnecessary promotion stmts and increased vectorization factor. */
5971 if (COMPARISON_CLASS_P (rhs1)
5972 && INTEGRAL_TYPE_P (rhs1_type)
5973 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
5974 TYPE_VECTOR_SUBPARTS (vectype2)))
5975 {
5976 enum vect_def_type dt;
5977 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
5978 && dt == vect_external_def
5979 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
5980 && (dt == vect_external_def
5981 || dt == vect_constant_def))
5982 {
5983 tree wide_scalar_type = build_nonstandard_integer_type
5984 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
5985 tree vectype3 = get_vectype_for_scalar_type (vinfo,
5986 wide_scalar_type);
5987 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
5988 return NULL;
5989 }
5990 }
5991
5992 /* If rhs1 is a comparison we need to move it into a
5993 separate statement. */
5994 if (TREE_CODE (rhs1) != SSA_NAME)
5995 {
5996 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
5997 if (rhs1_op0_type
5998 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
5999 rhs1_op0 = build_mask_conversion (vinfo, mask: rhs1_op0,
6000 vectype: vectype2, stmt_vinfo);
6001 if (rhs1_op1_type
6002 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6003 rhs1_op1 = build_mask_conversion (vinfo, mask: rhs1_op1,
6004 vectype: vectype2, stmt_vinfo);
6005 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6006 rhs1_op0, rhs1_op1);
6007 rhs1 = tmp;
6008 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vectype2,
6009 scalar_type_for_mask: rhs1_type);
6010 }
6011
6012 if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
6013 b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
6014 tmp = build_mask_conversion (vinfo, mask: rhs1, vectype: vectype1, stmt_vinfo);
6015 else
6016 tmp = rhs1;
6017
6018 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6019 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6020 gimple_assign_rhs2 (gs: last_stmt),
6021 gimple_assign_rhs3 (gs: last_stmt));
6022
6023 *type_out = vectype1;
6024 vect_pattern_detected (name: "vect_recog_mask_conversion_pattern", stmt: last_stmt);
6025
6026 return pattern_stmt;
6027 }
6028
6029 /* Now check for binary boolean operations requiring conversion for
6030 one of operands. */
6031 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6032 return NULL;
6033
6034 if (rhs_code != BIT_IOR_EXPR
6035 && rhs_code != BIT_XOR_EXPR
6036 && rhs_code != BIT_AND_EXPR
6037 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6038 return NULL;
6039
6040 rhs2 = gimple_assign_rhs2 (gs: last_stmt);
6041
6042 rhs1_type = integer_type_for_mask (var: rhs1, vinfo);
6043 rhs2_type = integer_type_for_mask (var: rhs2, vinfo);
6044
6045 if (!rhs1_type || !rhs2_type
6046 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6047 return NULL;
6048
6049 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6050 {
6051 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6052 if (!vectype1)
6053 return NULL;
6054 rhs2 = build_mask_conversion (vinfo, mask: rhs2, vectype: vectype1, stmt_vinfo);
6055 }
6056 else
6057 {
6058 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6059 if (!vectype1)
6060 return NULL;
6061 rhs1 = build_mask_conversion (vinfo, mask: rhs1, vectype: vectype1, stmt_vinfo);
6062 }
6063
6064 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6065 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6066
6067 *type_out = vectype1;
6068 vect_pattern_detected (name: "vect_recog_mask_conversion_pattern", stmt: last_stmt);
6069
6070 return pattern_stmt;
6071}
6072
6073/* STMT_INFO is a load or store. If the load or store is conditional, return
6074 the boolean condition under which it occurs, otherwise return null. */
6075
6076static tree
6077vect_get_load_store_mask (stmt_vec_info stmt_info)
6078{
6079 if (gassign *def_assign = dyn_cast <gassign *> (p: stmt_info->stmt))
6080 {
6081 gcc_assert (gimple_assign_single_p (def_assign));
6082 return NULL_TREE;
6083 }
6084
6085 if (gcall *def_call = dyn_cast <gcall *> (p: stmt_info->stmt))
6086 {
6087 internal_fn ifn = gimple_call_internal_fn (gs: def_call);
6088 int mask_index = internal_fn_mask_index (ifn);
6089 return gimple_call_arg (gs: def_call, index: mask_index);
6090 }
6091
6092 gcc_unreachable ();
6093}
6094
6095/* Return MASK if MASK is suitable for masking an operation on vectors
6096 of type VECTYPE, otherwise convert it into such a form and return
6097 the result. Associate any conversion statements with STMT_INFO's
6098 pattern. */
6099
6100static tree
6101vect_convert_mask_for_vectype (tree mask, tree vectype,
6102 stmt_vec_info stmt_info, vec_info *vinfo)
6103{
6104 tree mask_type = integer_type_for_mask (var: mask, vinfo);
6105 if (mask_type)
6106 {
6107 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6108 if (mask_vectype
6109 && maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype),
6110 b: TYPE_VECTOR_SUBPARTS (node: mask_vectype)))
6111 mask = build_mask_conversion (vinfo, mask, vectype, stmt_vinfo: stmt_info);
6112 }
6113 return mask;
6114}
6115
6116/* Return the equivalent of:
6117
6118 fold_convert (TYPE, VALUE)
6119
6120 with the expectation that the operation will be vectorized.
6121 If new statements are needed, add them as pattern statements
6122 to STMT_INFO. */
6123
6124static tree
6125vect_add_conversion_to_pattern (vec_info *vinfo,
6126 tree type, tree value, stmt_vec_info stmt_info)
6127{
6128 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6129 return value;
6130
6131 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6132 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6133 append_pattern_def_seq (vinfo, stmt_info, new_stmt: conversion,
6134 vectype: get_vectype_for_scalar_type (vinfo, type));
6135 return new_value;
6136}
6137
6138/* Try to convert STMT_INFO into a call to a gather load or scatter store
6139 internal function. Return the final statement on success and set
6140 *TYPE_OUT to the vector type being loaded or stored.
6141
6142 This function only handles gathers and scatters that were recognized
6143 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6144
6145static gimple *
6146vect_recog_gather_scatter_pattern (vec_info *vinfo,
6147 stmt_vec_info stmt_info, tree *type_out)
6148{
6149 /* Currently we only support this for loop vectorization. */
6150 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6151 if (!loop_vinfo)
6152 return NULL;
6153
6154 /* Make sure that we're looking at a gather load or scatter store. */
6155 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6156 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6157 return NULL;
6158
6159 /* Get the boolean that controls whether the load or store happens.
6160 This is null if the operation is unconditional. */
6161 tree mask = vect_get_load_store_mask (stmt_info);
6162
6163 /* Make sure that the target supports an appropriate internal
6164 function for the gather/scatter operation. */
6165 gather_scatter_info gs_info;
6166 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6167 || gs_info.ifn == IFN_LAST)
6168 return NULL;
6169
6170 /* Convert the mask to the right form. */
6171 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6172 gs_info.element_type);
6173 if (mask)
6174 mask = vect_convert_mask_for_vectype (mask, vectype: gs_vectype, stmt_info,
6175 vinfo: loop_vinfo);
6176 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6177 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6178 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6179 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6180 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6181
6182 /* Get the invariant base and non-invariant offset, converting the
6183 latter to the same width as the vector elements. */
6184 tree base = gs_info.base;
6185 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6186 tree offset = vect_add_conversion_to_pattern (vinfo, type: offset_type,
6187 value: gs_info.offset, stmt_info);
6188
6189 /* Build the new pattern statement. */
6190 tree scale = size_int (gs_info.scale);
6191 gcall *pattern_stmt;
6192 if (DR_IS_READ (dr))
6193 {
6194 tree zero = build_zero_cst (gs_info.element_type);
6195 if (mask != NULL)
6196 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6197 offset, scale, zero, mask);
6198 else
6199 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6200 offset, scale, zero);
6201 tree load_lhs = vect_recog_temp_ssa_var (type: gs_info.element_type, NULL);
6202 gimple_call_set_lhs (gs: pattern_stmt, lhs: load_lhs);
6203 }
6204 else
6205 {
6206 tree rhs = vect_get_store_rhs (stmt_info);
6207 if (mask != NULL)
6208 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6209 base, offset, scale, rhs,
6210 mask);
6211 else
6212 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6213 base, offset, scale, rhs);
6214 }
6215 gimple_call_set_nothrow (s: pattern_stmt, nothrow_p: true);
6216
6217 /* Copy across relevant vectorization info and associate DR with the
6218 new pattern statement instead of the original statement. */
6219 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6220 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6221
6222 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6223 *type_out = vectype;
6224 vect_pattern_detected (name: "gather/scatter pattern", stmt: stmt_info->stmt);
6225
6226 return pattern_stmt;
6227}
6228
6229/* Return true if TYPE is a non-boolean integer type. These are the types
6230 that we want to consider for narrowing. */
6231
6232static bool
6233vect_narrowable_type_p (tree type)
6234{
6235 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6236}
6237
6238/* Return true if the operation given by CODE can be truncated to N bits
6239 when only N bits of the output are needed. This is only true if bit N+1
6240 of the inputs has no effect on the low N bits of the result. */
6241
6242static bool
6243vect_truncatable_operation_p (tree_code code)
6244{
6245 switch (code)
6246 {
6247 case PLUS_EXPR:
6248 case MINUS_EXPR:
6249 case MULT_EXPR:
6250 case BIT_AND_EXPR:
6251 case BIT_IOR_EXPR:
6252 case BIT_XOR_EXPR:
6253 case COND_EXPR:
6254 return true;
6255
6256 default:
6257 return false;
6258 }
6259}
6260
6261/* Record that STMT_INFO could be changed from operating on TYPE to
6262 operating on a type with the precision and sign given by PRECISION
6263 and SIGN respectively. PRECISION is an arbitrary bit precision;
6264 it might not be a whole number of bytes. */
6265
6266static void
6267vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6268 unsigned int precision, signop sign)
6269{
6270 /* Round the precision up to a whole number of bytes. */
6271 precision = vect_element_precision (precision);
6272 if (precision < TYPE_PRECISION (type)
6273 && (!stmt_info->operation_precision
6274 || stmt_info->operation_precision > precision))
6275 {
6276 stmt_info->operation_precision = precision;
6277 stmt_info->operation_sign = sign;
6278 }
6279}
6280
6281/* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6282 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6283 is an arbitrary bit precision; it might not be a whole number of bytes. */
6284
6285static void
6286vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6287 unsigned int min_input_precision)
6288{
6289 /* This operation in isolation only requires the inputs to have
6290 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6291 that MIN_INPUT_PRECISION is a natural precision for the chain
6292 as a whole. E.g. consider something like:
6293
6294 unsigned short *x, *y;
6295 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6296
6297 The right shift can be done on unsigned chars, and only requires the
6298 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6299 approach would mean turning a natural chain of single-vector unsigned
6300 short operations into one that truncates "*x" and then extends
6301 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6302 operation and one vector for each unsigned char operation.
6303 This would be a significant pessimization.
6304
6305 Instead only propagate the maximum of this precision and the precision
6306 required by the users of the result. This means that we don't pessimize
6307 the case above but continue to optimize things like:
6308
6309 unsigned char *y;
6310 unsigned short *x;
6311 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6312
6313 Here we would truncate two vectors of *x to a single vector of
6314 unsigned chars and use single-vector unsigned char operations for
6315 everything else, rather than doing two unsigned short copies of
6316 "(*x & 0xf0) >> 4" and then truncating the result. */
6317 min_input_precision = MAX (min_input_precision,
6318 stmt_info->min_output_precision);
6319
6320 if (min_input_precision < TYPE_PRECISION (type)
6321 && (!stmt_info->min_input_precision
6322 || stmt_info->min_input_precision > min_input_precision))
6323 stmt_info->min_input_precision = min_input_precision;
6324}
6325
6326/* Subroutine of vect_determine_min_output_precision. Return true if
6327 we can calculate a reduced number of output bits for STMT_INFO,
6328 whose result is LHS. */
6329
6330static bool
6331vect_determine_min_output_precision_1 (vec_info *vinfo,
6332 stmt_vec_info stmt_info, tree lhs)
6333{
6334 /* Take the maximum precision required by users of the result. */
6335 unsigned int precision = 0;
6336 imm_use_iterator iter;
6337 use_operand_p use;
6338 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6339 {
6340 gimple *use_stmt = USE_STMT (use);
6341 if (is_gimple_debug (gs: use_stmt))
6342 continue;
6343 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6344 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6345 return false;
6346 /* The input precision recorded for COND_EXPRs applies only to the
6347 "then" and "else" values. */
6348 gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt);
6349 if (assign
6350 && gimple_assign_rhs_code (gs: assign) == COND_EXPR
6351 && use->use != gimple_assign_rhs2_ptr (gs: assign)
6352 && use->use != gimple_assign_rhs3_ptr (gs: assign))
6353 return false;
6354 precision = MAX (precision, use_stmt_info->min_input_precision);
6355 }
6356
6357 if (dump_enabled_p ())
6358 dump_printf_loc (MSG_NOTE, vect_location,
6359 "only the low %d bits of %T are significant\n",
6360 precision, lhs);
6361 stmt_info->min_output_precision = precision;
6362 return true;
6363}
6364
6365/* Calculate min_output_precision for STMT_INFO. */
6366
6367static void
6368vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6369{
6370 /* We're only interested in statements with a narrowable result. */
6371 tree lhs = gimple_get_lhs (stmt_info->stmt);
6372 if (!lhs
6373 || TREE_CODE (lhs) != SSA_NAME
6374 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6375 return;
6376
6377 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6378 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6379}
6380
6381/* Use range information to decide whether STMT (described by STMT_INFO)
6382 could be done in a narrower type. This is effectively a forward
6383 propagation, since it uses context-independent information that applies
6384 to all users of an SSA name. */
6385
6386static void
6387vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6388{
6389 tree lhs = gimple_assign_lhs (gs: stmt);
6390 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6391 return;
6392
6393 tree type = TREE_TYPE (lhs);
6394 if (!vect_narrowable_type_p (type))
6395 return;
6396
6397 /* First see whether we have any useful range information for the result. */
6398 unsigned int precision = TYPE_PRECISION (type);
6399 signop sign = TYPE_SIGN (type);
6400 wide_int min_value, max_value;
6401 if (!vect_get_range_info (var: lhs, min_value: &min_value, max_value: &max_value))
6402 return;
6403
6404 tree_code code = gimple_assign_rhs_code (gs: stmt);
6405 unsigned int nops = gimple_num_ops (gs: stmt);
6406
6407 if (!vect_truncatable_operation_p (code))
6408 /* Check that all relevant input operands are compatible, and update
6409 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6410 for (unsigned int i = 1; i < nops; ++i)
6411 {
6412 tree op = gimple_op (gs: stmt, i);
6413 if (TREE_CODE (op) == INTEGER_CST)
6414 {
6415 /* Don't require the integer to have RHS_TYPE (which it might
6416 not for things like shift amounts, etc.), but do require it
6417 to fit the type. */
6418 if (!int_fits_type_p (op, type))
6419 return;
6420
6421 min_value = wi::min (x: min_value, y: wi::to_wide (t: op, prec: precision), sgn: sign);
6422 max_value = wi::max (x: max_value, y: wi::to_wide (t: op, prec: precision), sgn: sign);
6423 }
6424 else if (TREE_CODE (op) == SSA_NAME)
6425 {
6426 /* Ignore codes that don't take uniform arguments. */
6427 if (!types_compatible_p (TREE_TYPE (op), type2: type))
6428 return;
6429
6430 wide_int op_min_value, op_max_value;
6431 if (!vect_get_range_info (var: op, min_value: &op_min_value, max_value: &op_max_value))
6432 return;
6433
6434 min_value = wi::min (x: min_value, y: op_min_value, sgn: sign);
6435 max_value = wi::max (x: max_value, y: op_max_value, sgn: sign);
6436 }
6437 else
6438 return;
6439 }
6440
6441 /* Try to switch signed types for unsigned types if we can.
6442 This is better for two reasons. First, unsigned ops tend
6443 to be cheaper than signed ops. Second, it means that we can
6444 handle things like:
6445
6446 signed char c;
6447 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6448
6449 as:
6450
6451 signed char c;
6452 unsigned short res_1 = (unsigned short) c & 0xff00;
6453 int res = (int) res_1;
6454
6455 where the intermediate result res_1 has unsigned rather than
6456 signed type. */
6457 if (sign == SIGNED && !wi::neg_p (x: min_value))
6458 sign = UNSIGNED;
6459
6460 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6461 unsigned int precision1 = wi::min_precision (x: min_value, sgn: sign);
6462 unsigned int precision2 = wi::min_precision (x: max_value, sgn: sign);
6463 unsigned int value_precision = MAX (precision1, precision2);
6464 if (value_precision >= precision)
6465 return;
6466
6467 if (dump_enabled_p ())
6468 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6469 " without loss of precision: %G",
6470 sign == SIGNED ? "signed" : "unsigned",
6471 value_precision, (gimple *) stmt);
6472
6473 vect_set_operation_type (stmt_info, type, precision: value_precision, sign);
6474 vect_set_min_input_precision (stmt_info, type, min_input_precision: value_precision);
6475}
6476
6477/* Use information about the users of STMT's result to decide whether
6478 STMT (described by STMT_INFO) could be done in a narrower type.
6479 This is effectively a backward propagation. */
6480
6481static void
6482vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6483{
6484 tree_code code = gimple_assign_rhs_code (gs: stmt);
6485 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6486 tree type = TREE_TYPE (gimple_op (stmt, opno));
6487 if (!vect_narrowable_type_p (type))
6488 return;
6489
6490 unsigned int precision = TYPE_PRECISION (type);
6491 unsigned int operation_precision, min_input_precision;
6492 switch (code)
6493 {
6494 CASE_CONVERT:
6495 /* Only the bits that contribute to the output matter. Don't change
6496 the precision of the operation itself. */
6497 operation_precision = precision;
6498 min_input_precision = stmt_info->min_output_precision;
6499 break;
6500
6501 case LSHIFT_EXPR:
6502 case RSHIFT_EXPR:
6503 {
6504 tree shift = gimple_assign_rhs2 (gs: stmt);
6505 if (TREE_CODE (shift) != INTEGER_CST
6506 || !wi::ltu_p (x: wi::to_widest (t: shift), y: precision))
6507 return;
6508 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6509 if (code == LSHIFT_EXPR)
6510 {
6511 /* Avoid creating an undefined shift.
6512
6513 ??? We could instead use min_output_precision as-is and
6514 optimize out-of-range shifts to zero. However, only
6515 degenerate testcases shift away all their useful input data,
6516 and it isn't natural to drop input operations in the middle
6517 of vectorization. This sort of thing should really be
6518 handled before vectorization. */
6519 operation_precision = MAX (stmt_info->min_output_precision,
6520 const_shift + 1);
6521 /* We need CONST_SHIFT fewer bits of the input. */
6522 min_input_precision = (MAX (operation_precision, const_shift)
6523 - const_shift);
6524 }
6525 else
6526 {
6527 /* We need CONST_SHIFT extra bits to do the operation. */
6528 operation_precision = (stmt_info->min_output_precision
6529 + const_shift);
6530 min_input_precision = operation_precision;
6531 }
6532 break;
6533 }
6534
6535 default:
6536 if (vect_truncatable_operation_p (code))
6537 {
6538 /* Input bit N has no effect on output bits N-1 and lower. */
6539 operation_precision = stmt_info->min_output_precision;
6540 min_input_precision = operation_precision;
6541 break;
6542 }
6543 return;
6544 }
6545
6546 if (operation_precision < precision)
6547 {
6548 if (dump_enabled_p ())
6549 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6550 " without affecting users: %G",
6551 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6552 operation_precision, (gimple *) stmt);
6553 vect_set_operation_type (stmt_info, type, precision: operation_precision,
6554 TYPE_SIGN (type));
6555 }
6556 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6557}
6558
6559/* Return true if the statement described by STMT_INFO sets a boolean
6560 SSA_NAME and if we know how to vectorize this kind of statement using
6561 vector mask types. */
6562
6563static bool
6564possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6565{
6566 tree lhs = gimple_get_lhs (stmt_info->stmt);
6567 if (!lhs
6568 || TREE_CODE (lhs) != SSA_NAME
6569 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6570 return false;
6571
6572 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
6573 {
6574 tree_code rhs_code = gimple_assign_rhs_code (gs: assign);
6575 switch (rhs_code)
6576 {
6577 CASE_CONVERT:
6578 case SSA_NAME:
6579 case BIT_NOT_EXPR:
6580 case BIT_IOR_EXPR:
6581 case BIT_XOR_EXPR:
6582 case BIT_AND_EXPR:
6583 return true;
6584
6585 default:
6586 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
6587 }
6588 }
6589 else if (is_a <gphi *> (p: stmt_info->stmt))
6590 return true;
6591 return false;
6592}
6593
6594/* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6595 a vector mask type instead of a normal vector type. Record the
6596 result in STMT_INFO->mask_precision. */
6597
6598static void
6599vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6600{
6601 if (!possible_vector_mask_operation_p (stmt_info))
6602 return;
6603
6604 /* If at least one boolean input uses a vector mask type,
6605 pick the mask type with the narrowest elements.
6606
6607 ??? This is the traditional behavior. It should always produce
6608 the smallest number of operations, but isn't necessarily the
6609 optimal choice. For example, if we have:
6610
6611 a = b & c
6612
6613 where:
6614
6615 - the user of a wants it to have a mask type for 16-bit elements (M16)
6616 - b also uses M16
6617 - c uses a mask type for 8-bit elements (M8)
6618
6619 then picking M8 gives:
6620
6621 - 1 M16->M8 pack for b
6622 - 1 M8 AND for a
6623 - 2 M8->M16 unpacks for the user of a
6624
6625 whereas picking M16 would have given:
6626
6627 - 2 M8->M16 unpacks for c
6628 - 2 M16 ANDs for a
6629
6630 The number of operations are equal, but M16 would have given
6631 a shorter dependency chain and allowed more ILP. */
6632 unsigned int precision = ~0U;
6633 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
6634 {
6635 unsigned int nops = gimple_num_ops (gs: assign);
6636 for (unsigned int i = 1; i < nops; ++i)
6637 {
6638 tree rhs = gimple_op (gs: assign, i);
6639 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6640 continue;
6641
6642 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6643 if (!def_stmt_info)
6644 /* Don't let external or constant operands influence the choice.
6645 We can convert them to whichever vector type we pick. */
6646 continue;
6647
6648 if (def_stmt_info->mask_precision)
6649 {
6650 if (precision > def_stmt_info->mask_precision)
6651 precision = def_stmt_info->mask_precision;
6652 }
6653 }
6654
6655 /* If the statement compares two values that shouldn't use vector masks,
6656 try comparing the values as normal scalars instead. */
6657 tree_code rhs_code = gimple_assign_rhs_code (gs: assign);
6658 if (precision == ~0U
6659 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
6660 {
6661 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6662 scalar_mode mode;
6663 tree vectype, mask_type;
6664 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), result: &mode)
6665 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
6666 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
6667 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
6668 precision = GET_MODE_BITSIZE (mode);
6669 }
6670 }
6671 else
6672 {
6673 gphi *phi = as_a <gphi *> (p: stmt_info->stmt);
6674 for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i)
6675 {
6676 tree rhs = gimple_phi_arg_def (gs: phi, index: i);
6677
6678 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6679 if (!def_stmt_info)
6680 /* Don't let external or constant operands influence the choice.
6681 We can convert them to whichever vector type we pick. */
6682 continue;
6683
6684 if (def_stmt_info->mask_precision)
6685 {
6686 if (precision > def_stmt_info->mask_precision)
6687 precision = def_stmt_info->mask_precision;
6688 }
6689 }
6690 }
6691
6692 if (dump_enabled_p ())
6693 {
6694 if (precision == ~0U)
6695 dump_printf_loc (MSG_NOTE, vect_location,
6696 "using normal nonmask vectors for %G",
6697 stmt_info->stmt);
6698 else
6699 dump_printf_loc (MSG_NOTE, vect_location,
6700 "using boolean precision %d for %G",
6701 precision, stmt_info->stmt);
6702 }
6703
6704 stmt_info->mask_precision = precision;
6705}
6706
6707/* Handle vect_determine_precisions for STMT_INFO, given that we
6708 have already done so for the users of its result. */
6709
6710void
6711vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6712{
6713 vect_determine_min_output_precision (vinfo, stmt_info);
6714 if (gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt))
6715 {
6716 vect_determine_precisions_from_range (stmt_info, stmt);
6717 vect_determine_precisions_from_users (stmt_info, stmt);
6718 }
6719}
6720
6721/* Walk backwards through the vectorizable region to determine the
6722 values of these fields:
6723
6724 - min_output_precision
6725 - min_input_precision
6726 - operation_precision
6727 - operation_sign. */
6728
6729void
6730vect_determine_precisions (vec_info *vinfo)
6731{
6732 DUMP_VECT_SCOPE ("vect_determine_precisions");
6733
6734 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
6735 {
6736 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6737 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6738 unsigned int nbbs = loop->num_nodes;
6739
6740 for (unsigned int i = 0; i < nbbs; i++)
6741 {
6742 basic_block bb = bbs[i];
6743 for (auto gsi = gsi_start_phis (bb);
6744 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6745 {
6746 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6747 if (stmt_info)
6748 vect_determine_mask_precision (vinfo, stmt_info);
6749 }
6750 for (auto si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
6751 if (!is_gimple_debug (gs: gsi_stmt (i: si)))
6752 vect_determine_mask_precision
6753 (vinfo, stmt_info: vinfo->lookup_stmt (gsi_stmt (i: si)));
6754 }
6755 for (unsigned int i = 0; i < nbbs; i++)
6756 {
6757 basic_block bb = bbs[nbbs - i - 1];
6758 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6759 !gsi_end_p (i: si); gsi_prev (i: &si))
6760 if (!is_gimple_debug (gs: gsi_stmt (i: si)))
6761 vect_determine_stmt_precisions
6762 (vinfo, stmt_info: vinfo->lookup_stmt (gsi_stmt (i: si)));
6763 for (auto gsi = gsi_start_phis (bb);
6764 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6765 {
6766 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6767 if (stmt_info)
6768 vect_determine_stmt_precisions (vinfo, stmt_info);
6769 }
6770 }
6771 }
6772 else
6773 {
6774 bb_vec_info bb_vinfo = as_a <bb_vec_info> (p: vinfo);
6775 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6776 {
6777 basic_block bb = bb_vinfo->bbs[i];
6778 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6779 {
6780 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6781 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6782 vect_determine_mask_precision (vinfo, stmt_info);
6783 }
6784 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6785 {
6786 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (i: gsi));
6787 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6788 vect_determine_mask_precision (vinfo, stmt_info);
6789 }
6790 }
6791 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6792 {
6793 for (gimple_stmt_iterator gsi = gsi_last_bb (bb: bb_vinfo->bbs[i]);
6794 !gsi_end_p (i: gsi); gsi_prev (i: &gsi))
6795 {
6796 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (i: gsi));
6797 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6798 vect_determine_stmt_precisions (vinfo, stmt_info);
6799 }
6800 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6801 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6802 {
6803 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6804 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6805 vect_determine_stmt_precisions (vinfo, stmt_info);
6806 }
6807 }
6808 }
6809}
6810
6811typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6812
6813struct vect_recog_func
6814{
6815 vect_recog_func_ptr fn;
6816 const char *name;
6817};
6818
6819/* Note that ordering matters - the first pattern matching on a stmt is
6820 taken which means usually the more complex one needs to preceed the
6821 less comples onex (widen_sum only after dot_prod or sad for example). */
6822static vect_recog_func vect_vect_recog_func_ptrs[] = {
6823 { .fn: vect_recog_bitfield_ref_pattern, .name: "bitfield_ref" },
6824 { .fn: vect_recog_bit_insert_pattern, .name: "bit_insert" },
6825 { .fn: vect_recog_abd_pattern, .name: "abd" },
6826 { .fn: vect_recog_over_widening_pattern, .name: "over_widening" },
6827 /* Must come after over_widening, which narrows the shift as much as
6828 possible beforehand. */
6829 { .fn: vect_recog_average_pattern, .name: "average" },
6830 { .fn: vect_recog_cond_expr_convert_pattern, .name: "cond_expr_convert" },
6831 { .fn: vect_recog_mulhs_pattern, .name: "mult_high" },
6832 { .fn: vect_recog_cast_forwprop_pattern, .name: "cast_forwprop" },
6833 { .fn: vect_recog_widen_mult_pattern, .name: "widen_mult" },
6834 { .fn: vect_recog_dot_prod_pattern, .name: "dot_prod" },
6835 { .fn: vect_recog_sad_pattern, .name: "sad" },
6836 { .fn: vect_recog_widen_sum_pattern, .name: "widen_sum" },
6837 { .fn: vect_recog_pow_pattern, .name: "pow" },
6838 { .fn: vect_recog_popcount_clz_ctz_ffs_pattern, .name: "popcount_clz_ctz_ffs" },
6839 { .fn: vect_recog_ctz_ffs_pattern, .name: "ctz_ffs" },
6840 { .fn: vect_recog_widen_shift_pattern, .name: "widen_shift" },
6841 { .fn: vect_recog_rotate_pattern, .name: "rotate" },
6842 { .fn: vect_recog_vector_vector_shift_pattern, .name: "vector_vector_shift" },
6843 { .fn: vect_recog_divmod_pattern, .name: "divmod" },
6844 { .fn: vect_recog_mult_pattern, .name: "mult" },
6845 { .fn: vect_recog_mixed_size_cond_pattern, .name: "mixed_size_cond" },
6846 { .fn: vect_recog_bool_pattern, .name: "bool" },
6847 /* This must come before mask conversion, and includes the parts
6848 of mask conversion that are needed for gather and scatter
6849 internal functions. */
6850 { .fn: vect_recog_gather_scatter_pattern, .name: "gather_scatter" },
6851 { .fn: vect_recog_mask_conversion_pattern, .name: "mask_conversion" },
6852 { .fn: vect_recog_widen_plus_pattern, .name: "widen_plus" },
6853 { .fn: vect_recog_widen_minus_pattern, .name: "widen_minus" },
6854 { .fn: vect_recog_widen_abd_pattern, .name: "widen_abd" },
6855 /* These must come after the double widening ones. */
6856};
6857
6858const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
6859
6860/* Mark statements that are involved in a pattern. */
6861
6862void
6863vect_mark_pattern_stmts (vec_info *vinfo,
6864 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
6865 tree pattern_vectype)
6866{
6867 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
6868 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6869
6870 gimple *orig_pattern_stmt = NULL;
6871 if (is_pattern_stmt_p (stmt_info: orig_stmt_info))
6872 {
6873 /* We're replacing a statement in an existing pattern definition
6874 sequence. */
6875 orig_pattern_stmt = orig_stmt_info->stmt;
6876 if (dump_enabled_p ())
6877 dump_printf_loc (MSG_NOTE, vect_location,
6878 "replacing earlier pattern %G", orig_pattern_stmt);
6879
6880 /* To keep the book-keeping simple, just swap the lhs of the
6881 old and new statements, so that the old one has a valid but
6882 unused lhs. */
6883 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
6884 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
6885 gimple_set_lhs (pattern_stmt, old_lhs);
6886
6887 if (dump_enabled_p ())
6888 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
6889
6890 /* Switch to the statement that ORIG replaces. */
6891 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6892
6893 /* We shouldn't be replacing the main pattern statement. */
6894 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6895 != orig_pattern_stmt);
6896 }
6897
6898 if (def_seq)
6899 for (gimple_stmt_iterator si = gsi_start (seq&: def_seq);
6900 !gsi_end_p (i: si); gsi_next (i: &si))
6901 {
6902 if (dump_enabled_p ())
6903 dump_printf_loc (MSG_NOTE, vect_location,
6904 "extra pattern stmt: %G", gsi_stmt (i: si));
6905 stmt_vec_info pattern_stmt_info
6906 = vect_init_pattern_stmt (vinfo, pattern_stmt: gsi_stmt (i: si),
6907 orig_stmt_info, vectype: pattern_vectype);
6908 /* Stmts in the def sequence are not vectorizable cycle or
6909 induction defs, instead they should all be vect_internal_def
6910 feeding the main pattern stmt which retains this def type. */
6911 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6912 }
6913
6914 if (orig_pattern_stmt)
6915 {
6916 vect_init_pattern_stmt (vinfo, pattern_stmt,
6917 orig_stmt_info, vectype: pattern_vectype);
6918
6919 /* Insert all the new pattern statements before the original one. */
6920 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6921 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6922 orig_def_seq);
6923 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6924 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6925
6926 /* Remove the pattern statement that this new pattern replaces. */
6927 gsi_remove (&gsi, false);
6928 }
6929 else
6930 vect_set_pattern_stmt (vinfo,
6931 pattern_stmt, orig_stmt_info, vectype: pattern_vectype);
6932
6933 /* Transfer reduction path info to the pattern. */
6934 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6935 {
6936 gimple_match_op op;
6937 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6938 gcc_unreachable ();
6939 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6940 /* Search the pattern def sequence and the main pattern stmt. Note
6941 we may have inserted all into a containing pattern def sequence
6942 so the following is a bit awkward. */
6943 gimple_stmt_iterator si;
6944 gimple *s;
6945 if (def_seq)
6946 {
6947 si = gsi_start (seq&: def_seq);
6948 s = gsi_stmt (i: si);
6949 gsi_next (i: &si);
6950 }
6951 else
6952 {
6953 si = gsi_none ();
6954 s = pattern_stmt;
6955 }
6956 do
6957 {
6958 bool found = false;
6959 if (gimple_extract_op (s, &op))
6960 for (unsigned i = 0; i < op.num_ops; ++i)
6961 if (op.ops[i] == lookfor)
6962 {
6963 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6964 lookfor = gimple_get_lhs (s);
6965 found = true;
6966 break;
6967 }
6968 if (s == pattern_stmt)
6969 {
6970 if (!found && dump_enabled_p ())
6971 dump_printf_loc (MSG_NOTE, vect_location,
6972 "failed to update reduction index.\n");
6973 break;
6974 }
6975 if (gsi_end_p (i: si))
6976 s = pattern_stmt;
6977 else
6978 {
6979 s = gsi_stmt (i: si);
6980 if (s == pattern_stmt)
6981 /* Found the end inside a bigger pattern def seq. */
6982 si = gsi_none ();
6983 else
6984 gsi_next (i: &si);
6985 }
6986 } while (1);
6987 }
6988}
6989
6990/* Function vect_pattern_recog_1
6991
6992 Input:
6993 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6994 computation pattern.
6995 STMT_INFO: A stmt from which the pattern search should start.
6996
6997 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6998 a sequence of statements that has the same functionality and can be
6999 used to replace STMT_INFO. It returns the last statement in the sequence
7000 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7001 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7002 statement, having first checked that the target supports the new operation
7003 in that type.
7004
7005 This function also does some bookkeeping, as explained in the documentation
7006 for vect_recog_pattern. */
7007
7008static void
7009vect_pattern_recog_1 (vec_info *vinfo,
7010 vect_recog_func *recog_func, stmt_vec_info stmt_info)
7011{
7012 gimple *pattern_stmt;
7013 loop_vec_info loop_vinfo;
7014 tree pattern_vectype;
7015
7016 /* If this statement has already been replaced with pattern statements,
7017 leave the original statement alone, since the first match wins.
7018 Instead try to match against the definition statements that feed
7019 the main pattern statement. */
7020 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7021 {
7022 gimple_stmt_iterator gsi;
7023 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7024 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
7025 vect_pattern_recog_1 (vinfo, recog_func,
7026 stmt_info: vinfo->lookup_stmt (gsi_stmt (i: gsi)));
7027 return;
7028 }
7029
7030 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7031 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7032 if (!pattern_stmt)
7033 {
7034 /* Clear any half-formed pattern definition sequence. */
7035 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7036 return;
7037 }
7038
7039 loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7040
7041 /* Found a vectorizable pattern. */
7042 if (dump_enabled_p ())
7043 dump_printf_loc (MSG_NOTE, vect_location,
7044 "%s pattern recognized: %G",
7045 recog_func->name, pattern_stmt);
7046
7047 /* Mark the stmts that are involved in the pattern. */
7048 vect_mark_pattern_stmts (vinfo, orig_stmt_info: stmt_info, pattern_stmt, pattern_vectype);
7049
7050 /* Patterns cannot be vectorized using SLP, because they change the order of
7051 computation. */
7052 if (loop_vinfo)
7053 {
7054 unsigned ix, ix2;
7055 stmt_vec_info *elem_ptr;
7056 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7057 elem_ptr, *elem_ptr == stmt_info);
7058 }
7059}
7060
7061
7062/* Function vect_pattern_recog
7063
7064 Input:
7065 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7066 computation idioms.
7067
7068 Output - for each computation idiom that is detected we create a new stmt
7069 that provides the same functionality and that can be vectorized. We
7070 also record some information in the struct_stmt_info of the relevant
7071 stmts, as explained below:
7072
7073 At the entry to this function we have the following stmts, with the
7074 following initial value in the STMT_VINFO fields:
7075
7076 stmt in_pattern_p related_stmt vec_stmt
7077 S1: a_i = .... - - -
7078 S2: a_2 = ..use(a_i).. - - -
7079 S3: a_1 = ..use(a_2).. - - -
7080 S4: a_0 = ..use(a_1).. - - -
7081 S5: ... = ..use(a_0).. - - -
7082
7083 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7084 represented by a single stmt. We then:
7085 - create a new stmt S6 equivalent to the pattern (the stmt is not
7086 inserted into the code)
7087 - fill in the STMT_VINFO fields as follows:
7088
7089 in_pattern_p related_stmt vec_stmt
7090 S1: a_i = .... - - -
7091 S2: a_2 = ..use(a_i).. - - -
7092 S3: a_1 = ..use(a_2).. - - -
7093 S4: a_0 = ..use(a_1).. true S6 -
7094 '---> S6: a_new = .... - S4 -
7095 S5: ... = ..use(a_0).. - - -
7096
7097 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7098 to each other through the RELATED_STMT field).
7099
7100 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7101 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7102 remain irrelevant unless used by stmts other than S4.
7103
7104 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7105 (because they are marked as irrelevant). It will vectorize S6, and record
7106 a pointer to the new vector stmt VS6 from S6 (as usual).
7107 S4 will be skipped, and S5 will be vectorized as usual:
7108
7109 in_pattern_p related_stmt vec_stmt
7110 S1: a_i = .... - - -
7111 S2: a_2 = ..use(a_i).. - - -
7112 S3: a_1 = ..use(a_2).. - - -
7113 > VS6: va_new = .... - - -
7114 S4: a_0 = ..use(a_1).. true S6 VS6
7115 '---> S6: a_new = .... - S4 VS6
7116 > VS5: ... = ..vuse(va_new).. - - -
7117 S5: ... = ..use(a_0).. - - -
7118
7119 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7120 elsewhere), and we'll end up with:
7121
7122 VS6: va_new = ....
7123 VS5: ... = ..vuse(va_new)..
7124
7125 In case of more than one pattern statements, e.g., widen-mult with
7126 intermediate type:
7127
7128 S1 a_t = ;
7129 S2 a_T = (TYPE) a_t;
7130 '--> S3: a_it = (interm_type) a_t;
7131 S4 prod_T = a_T * CONST;
7132 '--> S5: prod_T' = a_it w* CONST;
7133
7134 there may be other users of a_T outside the pattern. In that case S2 will
7135 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7136 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7137 be recorded in S3. */
7138
7139void
7140vect_pattern_recog (vec_info *vinfo)
7141{
7142 class loop *loop;
7143 basic_block *bbs;
7144 unsigned int nbbs;
7145 gimple_stmt_iterator si;
7146 unsigned int i, j;
7147
7148 vect_determine_precisions (vinfo);
7149
7150 DUMP_VECT_SCOPE ("vect_pattern_recog");
7151
7152 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
7153 {
7154 loop = LOOP_VINFO_LOOP (loop_vinfo);
7155 bbs = LOOP_VINFO_BBS (loop_vinfo);
7156 nbbs = loop->num_nodes;
7157
7158 /* Scan through the loop stmts, applying the pattern recognition
7159 functions starting at each stmt visited: */
7160 for (i = 0; i < nbbs; i++)
7161 {
7162 basic_block bb = bbs[i];
7163 for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
7164 {
7165 if (is_gimple_debug (gs: gsi_stmt (i: si)))
7166 continue;
7167 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (i: si));
7168 /* Scan over all generic vect_recog_xxx_pattern functions. */
7169 for (j = 0; j < NUM_PATTERNS; j++)
7170 vect_pattern_recog_1 (vinfo, recog_func: &vect_vect_recog_func_ptrs[j],
7171 stmt_info);
7172 }
7173 }
7174 }
7175 else
7176 {
7177 bb_vec_info bb_vinfo = as_a <bb_vec_info> (p: vinfo);
7178 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7179 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: bb_vinfo->bbs[i]);
7180 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
7181 {
7182 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (i: gsi));
7183 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7184 continue;
7185
7186 /* Scan over all generic vect_recog_xxx_pattern functions. */
7187 for (j = 0; j < NUM_PATTERNS; j++)
7188 vect_pattern_recog_1 (vinfo,
7189 recog_func: &vect_vect_recog_func_ptrs[j], stmt_info);
7190 }
7191 }
7192
7193 /* After this no more add_stmt calls are allowed. */
7194 vinfo->stmt_vec_info_ro = true;
7195}
7196
7197/* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7198 or internal_fn contained in ch, respectively. */
7199gimple *
7200vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7201{
7202 gcc_assert (op0 != NULL_TREE);
7203 if (ch.is_tree_code ())
7204 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7205
7206 gcc_assert (ch.is_internal_fn ());
7207 gimple* stmt = gimple_build_call_internal (as_internal_fn (code: (combined_fn) ch),
7208 op1 == NULL_TREE ? 1 : 2,
7209 op0, op1);
7210 gimple_call_set_lhs (gs: stmt, lhs);
7211 return stmt;
7212}
7213

source code of gcc/tree-vect-patterns.cc