1/* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "rtl.h"
25#include "tree.h"
26#include "gimple.h"
27#include "tree-pass.h"
28#include "ssa.h"
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "diagnostic.h"
32#include "fold-const.h"
33#include "stor-layout.h"
34#include "langhooks.h"
35#include "tree-eh.h"
36#include "gimple-iterator.h"
37#include "gimplify-me.h"
38#include "gimplify.h"
39#include "tree-cfg.h"
40#include "tree-vector-builder.h"
41#include "vec-perm-indices.h"
42#include "insn-config.h"
43#include "tree-ssa-dce.h"
44#include "gimple-fold.h"
45#include "gimple-match.h"
46#include "recog.h" /* FIXME: for insn_data */
47#include "optabs-libfuncs.h"
48
49
50/* Build a ternary operation and gimplify it. Emit code before GSI.
51 Return the gimple_val holding the result. */
52
53static tree
54gimplify_build3 (gimple_stmt_iterator *gsi, enum tree_code code,
55 tree type, tree a, tree b, tree c)
56{
57 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
58 return gimple_build (gsi, true, GSI_SAME_STMT, loc, code, type, a, b, c);
59}
60
61/* Build a binary operation and gimplify it. Emit code before GSI.
62 Return the gimple_val holding the result. */
63
64static tree
65gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
66 tree type, tree a, tree b)
67{
68 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
69 return gimple_build (gsi, true, GSI_SAME_STMT, loc, code, type, a, b);
70}
71
72/* Build a unary operation and gimplify it. Emit code before GSI.
73 Return the gimple_val holding the result. */
74
75static tree
76gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
77 tree a)
78{
79 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
80 return gimple_build (gsi, true, GSI_SAME_STMT, loc, code, type, a);
81}
82
83
84static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
85
86/* Return the number of elements in a vector type TYPE that we have
87 already decided needs to be expanded piecewise. We don't support
88 this kind of expansion for variable-length vectors, since we should
89 always check for target support before introducing uses of those. */
90static unsigned int
91nunits_for_known_piecewise_op (const_tree type)
92{
93 return TYPE_VECTOR_SUBPARTS (node: type).to_constant ();
94}
95
96/* Return true if TYPE1 has more elements than TYPE2, where either
97 type may be a vector or a scalar. */
98
99static inline bool
100subparts_gt (tree type1, tree type2)
101{
102 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (node: type1) : 1;
103 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (node: type2) : 1;
104 return known_gt (n1, n2);
105}
106
107static GTY(()) tree vector_inner_type;
108static GTY(()) tree vector_last_type;
109static GTY(()) int vector_last_nunits;
110
111/* Return a suitable vector types made of SUBPARTS units each of mode
112 "word_mode" (the global variable). */
113static tree
114build_word_mode_vector_type (int nunits)
115{
116 if (!vector_inner_type)
117 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
118 else if (vector_last_nunits == nunits)
119 {
120 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
121 return vector_last_type;
122 }
123
124 vector_last_nunits = nunits;
125 vector_last_type = build_vector_type (vector_inner_type, nunits);
126 return vector_last_type;
127}
128
129typedef tree (*elem_op_func) (gimple_stmt_iterator *,
130 tree, tree, tree, tree, tree, enum tree_code,
131 tree);
132
133/* Extract the vector element of type TYPE at BITPOS with BITSIZE from T
134 and return it. */
135
136tree
137tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
138 tree t, tree bitsize, tree bitpos)
139{
140 /* We're using the resimplify API and maybe_push_res_to_seq to
141 simplify the BIT_FIELD_REF but restrict the simplification to
142 a single stmt while at the same time following SSA edges for
143 simplification with already emitted CTORs. */
144 gimple_match_op opr;
145 opr.set_op (code_in: BIT_FIELD_REF, type_in: type, op0: t, op1: bitsize, op2: bitpos);
146 opr.resimplify (NULL, follow_all_ssa_edges);
147 gimple_seq stmts = NULL;
148 tree res = maybe_push_res_to_seq (&opr, &stmts);
149 if (!res)
150 {
151 /* This can happen if SSA_NAME_OCCURS_IN_ABNORMAL_PHI are
152 used. Build BIT_FIELD_REF manually otherwise. */
153 t = build3 (BIT_FIELD_REF, type, t, bitsize, bitpos);
154 res = make_ssa_name (var: type);
155 gimple *g = gimple_build_assign (res, t);
156 gsi_insert_before (gsi, g, GSI_SAME_STMT);
157 return res;
158 }
159 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
160 return res;
161}
162
163static tree
164do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
165 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
166 enum tree_code code, tree type ATTRIBUTE_UNUSED)
167{
168 a = tree_vec_extract (gsi, type: inner_type, t: a, bitsize, bitpos);
169 return gimplify_build1 (gsi, code, type: inner_type, a);
170}
171
172static tree
173do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
174 tree bitpos, tree bitsize, enum tree_code code,
175 tree type ATTRIBUTE_UNUSED)
176{
177 if (VECTOR_TYPE_P (TREE_TYPE (a)))
178 a = tree_vec_extract (gsi, type: inner_type, t: a, bitsize, bitpos);
179 if (VECTOR_TYPE_P (TREE_TYPE (b)))
180 b = tree_vec_extract (gsi, type: inner_type, t: b, bitsize, bitpos);
181 return gimplify_build2 (gsi, code, type: inner_type, a, b);
182}
183
184/* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
185
186 INNER_TYPE is the type of A and B elements
187
188 returned expression is of signed integer type with the
189 size equal to the size of INNER_TYPE. */
190static tree
191do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
192 tree bitpos, tree bitsize, enum tree_code code, tree type)
193{
194 tree stype = TREE_TYPE (type);
195 tree cst_false = build_zero_cst (stype);
196 tree cst_true = build_all_ones_cst (stype);
197 tree cmp;
198
199 a = tree_vec_extract (gsi, type: inner_type, t: a, bitsize, bitpos);
200 b = tree_vec_extract (gsi, type: inner_type, t: b, bitsize, bitpos);
201
202 cmp = build2 (code, boolean_type_node, a, b);
203 return gimplify_build3 (gsi, code: COND_EXPR, type: stype, a: cmp, b: cst_true, c: cst_false);
204}
205
206/* Expand vector addition to scalars. This does bit twiddling
207 in order to increase parallelism:
208
209 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
210 (a ^ b) & 0x80808080
211
212 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
213 (a ^ ~b) & 0x80808080
214
215 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
216
217 This optimization should be done only if 4 vector items or more
218 fit into a word. */
219static tree
220do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
221 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
222 enum tree_code code, tree type ATTRIBUTE_UNUSED)
223{
224 unsigned int width = vector_element_bits (TREE_TYPE (a));
225 tree inner_type = TREE_TYPE (TREE_TYPE (a));
226 unsigned HOST_WIDE_INT max;
227 tree low_bits, high_bits, a_low, b_low, result_low, signs;
228
229 max = GET_MODE_MASK (TYPE_MODE (inner_type));
230 low_bits = build_replicated_int_cst (word_type, width, max >> 1);
231 high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
232
233 a = tree_vec_extract (gsi, type: word_type, t: a, bitsize, bitpos);
234 b = tree_vec_extract (gsi, type: word_type, t: b, bitsize, bitpos);
235
236 signs = gimplify_build2 (gsi, code: BIT_XOR_EXPR, type: word_type, a, b);
237 b_low = gimplify_build2 (gsi, code: BIT_AND_EXPR, type: word_type, a: b, b: low_bits);
238 if (code == PLUS_EXPR)
239 a_low = gimplify_build2 (gsi, code: BIT_AND_EXPR, type: word_type, a, b: low_bits);
240 else
241 {
242 a_low = gimplify_build2 (gsi, code: BIT_IOR_EXPR, type: word_type, a, b: high_bits);
243 signs = gimplify_build1 (gsi, code: BIT_NOT_EXPR, type: word_type, a: signs);
244 }
245
246 signs = gimplify_build2 (gsi, code: BIT_AND_EXPR, type: word_type, a: signs, b: high_bits);
247 result_low = gimplify_build2 (gsi, code, type: word_type, a: a_low, b: b_low);
248 return gimplify_build2 (gsi, code: BIT_XOR_EXPR, type: word_type, a: result_low, b: signs);
249}
250
251static tree
252do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
253 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
254 tree bitsize ATTRIBUTE_UNUSED,
255 enum tree_code code ATTRIBUTE_UNUSED,
256 tree type ATTRIBUTE_UNUSED)
257{
258 unsigned int width = vector_element_bits (TREE_TYPE (b));
259 tree inner_type = TREE_TYPE (TREE_TYPE (b));
260 HOST_WIDE_INT max;
261 tree low_bits, high_bits, b_low, result_low, signs;
262
263 max = GET_MODE_MASK (TYPE_MODE (inner_type));
264 low_bits = build_replicated_int_cst (word_type, width, max >> 1);
265 high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
266
267 b = tree_vec_extract (gsi, type: word_type, t: b, bitsize, bitpos);
268
269 b_low = gimplify_build2 (gsi, code: BIT_AND_EXPR, type: word_type, a: b, b: low_bits);
270 signs = gimplify_build1 (gsi, code: BIT_NOT_EXPR, type: word_type, a: b);
271 signs = gimplify_build2 (gsi, code: BIT_AND_EXPR, type: word_type, a: signs, b: high_bits);
272 result_low = gimplify_build2 (gsi, code: MINUS_EXPR, type: word_type, a: high_bits, b: b_low);
273 return gimplify_build2 (gsi, code: BIT_XOR_EXPR, type: word_type, a: result_low, b: signs);
274}
275
276/* Expand a vector operation to scalars, by using many operations
277 whose type is the vector type's inner type. */
278static tree
279expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
280 tree type, tree inner_type,
281 tree a, tree b, enum tree_code code,
282 bool parallel_p, tree ret_type = NULL_TREE)
283{
284 vec<constructor_elt, va_gc> *v;
285 tree part_width = TYPE_SIZE (inner_type);
286 tree index = bitsize_int (0);
287 int nunits = nunits_for_known_piecewise_op (type);
288 int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
289 int i;
290 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
291
292 if (nunits == 1
293 || warning_suppressed_p (gsi_stmt (i: *gsi),
294 OPT_Wvector_operation_performance))
295 /* Do not diagnose decomposing single element vectors or when
296 decomposing vectorizer produced operations. */
297 ;
298 else if (ret_type || !parallel_p)
299 warning_at (loc, OPT_Wvector_operation_performance,
300 "vector operation will be expanded piecewise");
301 else
302 warning_at (loc, OPT_Wvector_operation_performance,
303 "vector operation will be expanded in parallel");
304
305 if (!ret_type)
306 ret_type = type;
307 vec_alloc (v, nelems: (nunits + delta - 1) / delta);
308 bool constant_p = true;
309 for (i = 0; i < nunits;
310 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
311 {
312 tree result = f (gsi, inner_type, a, b, index, part_width, code,
313 ret_type);
314 if (!CONSTANT_CLASS_P (result))
315 constant_p = false;
316 constructor_elt ce = {NULL_TREE, .value: result};
317 v->quick_push (obj: ce);
318 }
319
320 if (constant_p)
321 return build_vector_from_ctor (ret_type, v);
322 else
323 return build_constructor (ret_type, v);
324}
325
326/* Expand a vector operation to scalars with the freedom to use
327 a scalar integer type, or to use a different size for the items
328 in the vector type. */
329static tree
330expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
331 tree a, tree b, enum tree_code code)
332{
333 tree result, compute_type;
334 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
335 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
336
337 /* We have three strategies. If the type is already correct, just do
338 the operation an element at a time. Else, if the vector is wider than
339 one word, do it a word at a time; finally, if the vector is smaller
340 than one word, do it as a scalar. */
341 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
342 return expand_vector_piecewise (gsi, f,
343 type, TREE_TYPE (type),
344 a, b, code, parallel_p: true);
345 else if (n_words > 1)
346 {
347 tree word_type = build_word_mode_vector_type (nunits: n_words);
348 result = expand_vector_piecewise (gsi, f,
349 type: word_type, TREE_TYPE (word_type),
350 a, b, code, parallel_p: true);
351 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
352 GSI_SAME_STMT);
353 }
354 else
355 {
356 /* Use a single scalar operation with a mode no wider than word_mode. */
357 if (!warning_suppressed_p (gsi_stmt (i: *gsi),
358 OPT_Wvector_operation_performance))
359 warning_at (loc, OPT_Wvector_operation_performance,
360 "vector operation will be expanded with a "
361 "single scalar operation");
362 scalar_int_mode mode
363 = int_mode_for_size (size: tree_to_uhwi (TYPE_SIZE (type)), limit: 0).require ();
364 compute_type = lang_hooks.types.type_for_mode (mode, 1);
365 result = f (gsi, compute_type, a, b, bitsize_zero_node,
366 TYPE_SIZE (compute_type), code, type);
367 }
368
369 return result;
370}
371
372/* Expand a vector operation to scalars; for integer types we can use
373 special bit twiddling tricks to do the sums a word at a time, using
374 function F_PARALLEL instead of F. These tricks are done only if
375 they can process at least four items, that is, only if the vector
376 holds at least four items and if a word can hold four items. */
377static tree
378expand_vector_addition (gimple_stmt_iterator *gsi,
379 elem_op_func f, elem_op_func f_parallel,
380 tree type, tree a, tree b, enum tree_code code)
381{
382 int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
383
384 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
385 && parts_per_word >= 4
386 && nunits_for_known_piecewise_op (type) >= 4)
387 return expand_vector_parallel (gsi, f: f_parallel,
388 type, a, b, code);
389 else
390 return expand_vector_piecewise (gsi, f,
391 type, TREE_TYPE (type),
392 a, b, code, parallel_p: false);
393}
394
395static bool
396expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
397
398/* Try to expand vector comparison expression OP0 CODE OP1 by
399 querying optab if the following expression:
400 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
401 can be expanded. */
402static tree
403expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
404 tree op1, enum tree_code code,
405 bitmap dce_ssa_names)
406{
407 tree lhs = gimple_assign_lhs (gs: gsi_stmt (i: *gsi));
408 use_operand_p use_p;
409 imm_use_iterator iterator;
410 bool vec_cond_expr_only = true;
411
412 /* As seen in PR95830, we should not expand comparisons that are only
413 feeding a VEC_COND_EXPR statement. */
414 auto_vec<gimple *> uses;
415 FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
416 {
417 gimple *use = USE_STMT (use_p);
418 if (is_gimple_debug (gs: use))
419 continue;
420 if (is_gimple_assign (gs: use)
421 && gimple_assign_rhs_code (gs: use) == VEC_COND_EXPR
422 && gimple_assign_rhs1 (gs: use) == lhs
423 && gimple_assign_rhs2 (gs: use) != lhs
424 && gimple_assign_rhs3 (gs: use) != lhs)
425 uses.safe_push (obj: use);
426 else
427 vec_cond_expr_only = false;
428 }
429
430 if (vec_cond_expr_only)
431 for (gimple *use : uses)
432 {
433 gimple_stmt_iterator it = gsi_for_stmt (use);
434 if (!expand_vector_condition (gsi: &it, dce_ssa_names))
435 {
436 vec_cond_expr_only = false;
437 break;
438 }
439 }
440
441 if (!uses.is_empty () && vec_cond_expr_only)
442 return NULL_TREE;
443
444 tree t;
445 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
446 {
447 if (VECTOR_BOOLEAN_TYPE_P (type)
448 && SCALAR_INT_MODE_P (TYPE_MODE (type))
449 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
450 TYPE_VECTOR_SUBPARTS (type)
451 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
452 (TREE_TYPE (type)))))
453 {
454 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
455 tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
456 tree index = bitsize_int (0);
457 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
458 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
459 tree ret_type = build_nonstandard_integer_type (prec, 1);
460 tree ret_inner_type = boolean_type_node;
461 int i;
462 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
463 t = build_zero_cst (ret_type);
464
465 if (TYPE_PRECISION (ret_inner_type) != 1)
466 ret_inner_type = build_nonstandard_integer_type (1, 1);
467 if (!warning_suppressed_p (gsi_stmt (i: *gsi),
468 OPT_Wvector_operation_performance))
469 warning_at (loc, OPT_Wvector_operation_performance,
470 "vector operation will be expanded piecewise");
471 for (i = 0; i < nunits;
472 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
473 {
474 tree a = tree_vec_extract (gsi, type: inner_type, t: op0, bitsize: part_width,
475 bitpos: index);
476 tree b = tree_vec_extract (gsi, type: inner_type, t: op1, bitsize: part_width,
477 bitpos: index);
478 tree result = gimplify_build2 (gsi, code, type: ret_inner_type, a, b);
479 t = gimplify_build3 (gsi, code: BIT_INSERT_EXPR, type: ret_type, a: t, b: result,
480 bitsize_int (i));
481 }
482 t = gimplify_build1 (gsi, code: VIEW_CONVERT_EXPR, type, a: t);
483 }
484 else
485 t = expand_vector_piecewise (gsi, f: do_compare, type,
486 TREE_TYPE (TREE_TYPE (op0)), a: op0, b: op1,
487 code, parallel_p: false);
488 }
489 else
490 t = NULL_TREE;
491
492 return t;
493}
494
495/* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
496 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
497 the result if successful, otherwise return NULL_TREE. */
498static tree
499add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
500{
501 optab op;
502 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
503 bool scalar_shift = true;
504
505 for (i = 1; i < nunits; i++)
506 {
507 if (shiftcnts[i] != shiftcnts[0])
508 scalar_shift = false;
509 }
510
511 if (scalar_shift && shiftcnts[0] == 0)
512 return op0;
513
514 if (scalar_shift)
515 {
516 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
517 if (op != unknown_optab
518 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
519 return gimplify_build2 (gsi, code: RSHIFT_EXPR, type, a: op0,
520 b: build_int_cst (NULL_TREE, shiftcnts[0]));
521 }
522
523 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
524 if (op != unknown_optab
525 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
526 {
527 tree_vector_builder vec (type, nunits, 1);
528 for (i = 0; i < nunits; i++)
529 vec.quick_push (obj: build_int_cst (TREE_TYPE (type), shiftcnts[i]));
530 return gimplify_build2 (gsi, code: RSHIFT_EXPR, type, a: op0, b: vec.build ());
531 }
532
533 return NULL_TREE;
534}
535
536/* Try to expand integer vector division by constant using
537 widening multiply, shifts and additions. */
538static tree
539expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
540 tree op1, enum tree_code code)
541{
542 bool use_pow2 = true;
543 bool has_vector_shift = true;
544 bool use_abs_op1 = false;
545 int mode = -1, this_mode;
546 int pre_shift = -1, post_shift;
547 unsigned int nunits = nunits_for_known_piecewise_op (type);
548 int *shifts = XALLOCAVEC (int, nunits * 4);
549 int *pre_shifts = shifts + nunits;
550 int *post_shifts = pre_shifts + nunits;
551 int *shift_temps = post_shifts + nunits;
552 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
553 int prec = TYPE_PRECISION (TREE_TYPE (type));
554 int dummy_int;
555 unsigned int i;
556 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
557 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
558 tree cur_op, mulcst, tem;
559 optab op;
560
561 if (prec > HOST_BITS_PER_WIDE_INT)
562 return NULL_TREE;
563
564 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
565 if (op == unknown_optab
566 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
567 has_vector_shift = false;
568
569 /* Analysis phase. Determine if all op1 elements are either power
570 of two and it is possible to expand it using shifts (or for remainder
571 using masking). Additionally compute the multiplicative constants
572 and pre and post shifts if the division is to be expanded using
573 widening or high part multiplication plus shifts. */
574 for (i = 0; i < nunits; i++)
575 {
576 tree cst = VECTOR_CST_ELT (op1, i);
577 unsigned HOST_WIDE_INT ml;
578
579 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
580 return NULL_TREE;
581 pre_shifts[i] = 0;
582 post_shifts[i] = 0;
583 mulc[i] = 0;
584 if (use_pow2
585 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
586 use_pow2 = false;
587 if (use_pow2)
588 {
589 shifts[i] = tree_log2 (cst);
590 if (shifts[i] != shifts[0]
591 && code == TRUNC_DIV_EXPR
592 && !has_vector_shift)
593 use_pow2 = false;
594 }
595 if (mode == -2)
596 continue;
597 if (sign_p == UNSIGNED)
598 {
599 unsigned HOST_WIDE_INT mh;
600 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
601
602 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
603 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
604 return NULL_TREE;
605
606 if (d <= 1)
607 {
608 mode = -2;
609 continue;
610 }
611
612 /* Find a suitable multiplier and right shift count
613 instead of multiplying with D. */
614 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
615
616 /* If the suggested multiplier is more than SIZE bits, we can
617 do better for even divisors, using an initial right shift. */
618 if ((mh != 0 && (d & 1) == 0)
619 || (!has_vector_shift && pre_shift != -1))
620 {
621 if (has_vector_shift)
622 pre_shift = ctz_or_zero (x: d);
623 else if (pre_shift == -1)
624 {
625 unsigned int j;
626 for (j = 0; j < nunits; j++)
627 {
628 tree cst2 = VECTOR_CST_ELT (op1, j);
629 unsigned HOST_WIDE_INT d2;
630 int this_pre_shift;
631
632 if (!tree_fits_uhwi_p (cst2))
633 return NULL_TREE;
634 d2 = tree_to_uhwi (cst2) & mask;
635 if (d2 == 0)
636 return NULL_TREE;
637 this_pre_shift = floor_log2 (x: d2 & -d2);
638 if (pre_shift == -1 || this_pre_shift < pre_shift)
639 pre_shift = this_pre_shift;
640 }
641 if (i != 0 && pre_shift != 0)
642 {
643 /* Restart. */
644 i = -1U;
645 mode = -1;
646 continue;
647 }
648 }
649 if (pre_shift != 0)
650 {
651 if ((d >> pre_shift) <= 1)
652 {
653 mode = -2;
654 continue;
655 }
656 mh = choose_multiplier (d >> pre_shift, prec,
657 prec - pre_shift,
658 &ml, &post_shift, &dummy_int);
659 gcc_assert (!mh);
660 pre_shifts[i] = pre_shift;
661 }
662 }
663 if (!mh)
664 this_mode = 0;
665 else
666 this_mode = 1;
667 }
668 else
669 {
670 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
671 unsigned HOST_WIDE_INT abs_d;
672
673 if (d == -1)
674 return NULL_TREE;
675
676 /* Since d might be INT_MIN, we have to cast to
677 unsigned HOST_WIDE_INT before negating to avoid
678 undefined signed overflow. */
679 abs_d = (d >= 0
680 ? (unsigned HOST_WIDE_INT) d
681 : - (unsigned HOST_WIDE_INT) d);
682
683 /* n rem d = n rem -d */
684 if (code == TRUNC_MOD_EXPR && d < 0)
685 {
686 d = abs_d;
687 use_abs_op1 = true;
688 }
689 if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
690 {
691 /* This case is not handled correctly below. */
692 mode = -2;
693 continue;
694 }
695 if (abs_d <= 1)
696 {
697 mode = -2;
698 continue;
699 }
700
701 choose_multiplier (abs_d, prec, prec - 1, &ml,
702 &post_shift, &dummy_int);
703 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
704 {
705 this_mode = 4 + (d < 0);
706 ml |= HOST_WIDE_INT_M1U << (prec - 1);
707 }
708 else
709 this_mode = 2 + (d < 0);
710 }
711 mulc[i] = ml;
712 post_shifts[i] = post_shift;
713 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
714 || post_shift >= prec
715 || pre_shifts[i] >= prec)
716 this_mode = -2;
717
718 if (i == 0)
719 mode = this_mode;
720 else if (mode != this_mode)
721 mode = -2;
722 }
723
724 if (use_pow2)
725 {
726 tree addend = NULL_TREE;
727 if (sign_p == SIGNED)
728 {
729 tree uns_type;
730
731 /* Both division and remainder sequences need
732 op0 < 0 ? mask : 0 computed. It can be either computed as
733 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
734 if none of the shifts is 0, or as the conditional. */
735 for (i = 0; i < nunits; i++)
736 if (shifts[i] == 0)
737 break;
738 uns_type
739 = build_vector_type (build_nonstandard_integer_type (prec, 1),
740 nunits);
741 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
742 {
743 for (i = 0; i < nunits; i++)
744 shift_temps[i] = prec - 1;
745 cur_op = add_rshift (gsi, type, op0, shiftcnts: shift_temps);
746 if (cur_op != NULL_TREE)
747 {
748 cur_op = gimplify_build1 (gsi, code: VIEW_CONVERT_EXPR,
749 type: uns_type, a: cur_op);
750 for (i = 0; i < nunits; i++)
751 shift_temps[i] = prec - shifts[i];
752 cur_op = add_rshift (gsi, type: uns_type, op0: cur_op, shiftcnts: shift_temps);
753 if (cur_op != NULL_TREE)
754 addend = gimplify_build1 (gsi, code: VIEW_CONVERT_EXPR,
755 type, a: cur_op);
756 }
757 }
758 if (addend == NULL_TREE
759 && expand_vec_cond_expr_p (type, type, LT_EXPR))
760 {
761 tree zero, cst, mask_type, mask;
762 gimple *stmt, *cond;
763
764 mask_type = truth_type_for (type);
765 zero = build_zero_cst (type);
766 mask = make_ssa_name (var: mask_type);
767 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
768 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
769 tree_vector_builder vec (type, nunits, 1);
770 for (i = 0; i < nunits; i++)
771 vec.quick_push (obj: build_int_cst (TREE_TYPE (type),
772 (HOST_WIDE_INT_1U
773 << shifts[i]) - 1));
774 cst = vec.build ();
775 addend = make_ssa_name (var: type);
776 stmt
777 = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
778 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
779 }
780 }
781 if (code == TRUNC_DIV_EXPR)
782 {
783 if (sign_p == UNSIGNED)
784 {
785 /* q = op0 >> shift; */
786 cur_op = add_rshift (gsi, type, op0, shiftcnts: shifts);
787 if (cur_op != NULL_TREE)
788 return cur_op;
789 }
790 else if (addend != NULL_TREE)
791 {
792 /* t1 = op0 + addend;
793 q = t1 >> shift; */
794 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
795 if (op != unknown_optab
796 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
797 {
798 cur_op = gimplify_build2 (gsi, code: PLUS_EXPR, type, a: op0, b: addend);
799 cur_op = add_rshift (gsi, type, op0: cur_op, shiftcnts: shifts);
800 if (cur_op != NULL_TREE)
801 return cur_op;
802 }
803 }
804 }
805 else
806 {
807 tree mask;
808 tree_vector_builder vec (type, nunits, 1);
809 for (i = 0; i < nunits; i++)
810 vec.quick_push (obj: build_int_cst (TREE_TYPE (type),
811 (HOST_WIDE_INT_1U
812 << shifts[i]) - 1));
813 mask = vec.build ();
814 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
815 if (op != unknown_optab
816 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
817 {
818 if (sign_p == UNSIGNED)
819 /* r = op0 & mask; */
820 return gimplify_build2 (gsi, code: BIT_AND_EXPR, type, a: op0, b: mask);
821 else if (addend != NULL_TREE)
822 {
823 /* t1 = op0 + addend;
824 t2 = t1 & mask;
825 r = t2 - addend; */
826 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
827 if (op != unknown_optab
828 && optab_handler (op, TYPE_MODE (type))
829 != CODE_FOR_nothing)
830 {
831 cur_op = gimplify_build2 (gsi, code: PLUS_EXPR, type, a: op0,
832 b: addend);
833 cur_op = gimplify_build2 (gsi, code: BIT_AND_EXPR, type,
834 a: cur_op, b: mask);
835 op = optab_for_tree_code (MINUS_EXPR, type,
836 optab_default);
837 if (op != unknown_optab
838 && optab_handler (op, TYPE_MODE (type))
839 != CODE_FOR_nothing)
840 return gimplify_build2 (gsi, code: MINUS_EXPR, type,
841 a: cur_op, b: addend);
842 }
843 }
844 }
845 }
846 }
847
848 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
849 return NULL_TREE;
850
851 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
852 return NULL_TREE;
853
854 cur_op = op0;
855
856 switch (mode)
857 {
858 case 0:
859 gcc_assert (sign_p == UNSIGNED);
860 /* t1 = oprnd0 >> pre_shift;
861 t2 = t1 h* ml;
862 q = t2 >> post_shift; */
863 cur_op = add_rshift (gsi, type, op0: cur_op, shiftcnts: pre_shifts);
864 if (cur_op == NULL_TREE)
865 return NULL_TREE;
866 break;
867 case 1:
868 gcc_assert (sign_p == UNSIGNED);
869 for (i = 0; i < nunits; i++)
870 {
871 shift_temps[i] = 1;
872 post_shifts[i]--;
873 }
874 break;
875 case 2:
876 case 3:
877 case 4:
878 case 5:
879 gcc_assert (sign_p == SIGNED);
880 for (i = 0; i < nunits; i++)
881 shift_temps[i] = prec - 1;
882 break;
883 default:
884 return NULL_TREE;
885 }
886
887 tree_vector_builder vec (type, nunits, 1);
888 for (i = 0; i < nunits; i++)
889 vec.quick_push (obj: build_int_cst (TREE_TYPE (type), mulc[i]));
890 mulcst = vec.build ();
891
892 cur_op = gimplify_build2 (gsi, code: MULT_HIGHPART_EXPR, type, a: cur_op, b: mulcst);
893
894 switch (mode)
895 {
896 case 0:
897 /* t1 = oprnd0 >> pre_shift;
898 t2 = t1 h* ml;
899 q = t2 >> post_shift; */
900 cur_op = add_rshift (gsi, type, op0: cur_op, shiftcnts: post_shifts);
901 break;
902 case 1:
903 /* t1 = oprnd0 h* ml;
904 t2 = oprnd0 - t1;
905 t3 = t2 >> 1;
906 t4 = t1 + t3;
907 q = t4 >> (post_shift - 1); */
908 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
909 if (op == unknown_optab
910 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
911 return NULL_TREE;
912 tem = gimplify_build2 (gsi, code: MINUS_EXPR, type, a: op0, b: cur_op);
913 tem = add_rshift (gsi, type, op0: tem, shiftcnts: shift_temps);
914 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
915 if (op == unknown_optab
916 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
917 return NULL_TREE;
918 tem = gimplify_build2 (gsi, code: PLUS_EXPR, type, a: cur_op, b: tem);
919 cur_op = add_rshift (gsi, type, op0: tem, shiftcnts: post_shifts);
920 if (cur_op == NULL_TREE)
921 return NULL_TREE;
922 break;
923 case 2:
924 case 3:
925 case 4:
926 case 5:
927 /* t1 = oprnd0 h* ml;
928 t2 = t1; [ iff (mode & 2) != 0 ]
929 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
930 t3 = t2 >> post_shift;
931 t4 = oprnd0 >> (prec - 1);
932 q = t3 - t4; [ iff (mode & 1) == 0 ]
933 q = t4 - t3; [ iff (mode & 1) != 0 ] */
934 if ((mode & 2) == 0)
935 {
936 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
937 if (op == unknown_optab
938 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
939 return NULL_TREE;
940 cur_op = gimplify_build2 (gsi, code: PLUS_EXPR, type, a: cur_op, b: op0);
941 }
942 cur_op = add_rshift (gsi, type, op0: cur_op, shiftcnts: post_shifts);
943 if (cur_op == NULL_TREE)
944 return NULL_TREE;
945 tem = add_rshift (gsi, type, op0, shiftcnts: shift_temps);
946 if (tem == NULL_TREE)
947 return NULL_TREE;
948 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
949 if (op == unknown_optab
950 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
951 return NULL_TREE;
952 if ((mode & 1) == 0)
953 cur_op = gimplify_build2 (gsi, code: MINUS_EXPR, type, a: cur_op, b: tem);
954 else
955 cur_op = gimplify_build2 (gsi, code: MINUS_EXPR, type, a: tem, b: cur_op);
956 break;
957 default:
958 gcc_unreachable ();
959 }
960
961 if (code == TRUNC_DIV_EXPR)
962 return cur_op;
963
964 /* We divided. Now finish by:
965 t1 = q * oprnd1;
966 r = oprnd0 - t1; */
967 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
968 if (op == unknown_optab
969 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
970 return NULL_TREE;
971 if (use_abs_op1)
972 {
973 tree_vector_builder elts;
974 if (!elts.new_unary_operation (shape: type, vec: op1, allow_stepped_p: false))
975 return NULL_TREE;
976 unsigned int count = elts.encoded_nelts ();
977 for (unsigned int i = 0; i < count; ++i)
978 {
979 tree elem1 = VECTOR_CST_ELT (op1, i);
980
981 tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
982 if (elt == NULL_TREE)
983 return NULL_TREE;
984 elts.quick_push (obj: elt);
985 }
986 op1 = elts.build ();
987 }
988 tem = gimplify_build2 (gsi, code: MULT_EXPR, type, a: cur_op, b: op1);
989 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
990 if (op == unknown_optab
991 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
992 return NULL_TREE;
993 return gimplify_build2 (gsi, code: MINUS_EXPR, type, a: op0, b: tem);
994}
995
996/* Expand a vector condition to scalars, by using many conditions
997 on the vector's elements. */
998
999static bool
1000expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
1001{
1002 gassign *stmt = as_a <gassign *> (p: gsi_stmt (i: *gsi));
1003 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1004 tree a = gimple_assign_rhs1 (gs: stmt);
1005 tree a1 = a;
1006 tree a2 = NULL_TREE;
1007 bool a_is_comparison = false;
1008 bool a_is_scalar_bitmask = false;
1009 tree b = gimple_assign_rhs2 (gs: stmt);
1010 tree c = gimple_assign_rhs3 (gs: stmt);
1011 vec<constructor_elt, va_gc> *v;
1012 tree constr;
1013 tree inner_type = TREE_TYPE (type);
1014 tree width = vector_element_bits_tree (type);
1015 tree cond_type = TREE_TYPE (TREE_TYPE (a));
1016 tree comp_inner_type = cond_type;
1017 tree index = bitsize_int (0);
1018 tree comp_width = width;
1019 tree comp_index = index;
1020 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
1021 tree_code code = TREE_CODE (a);
1022 gassign *assign = NULL;
1023
1024 if (code == SSA_NAME)
1025 {
1026 assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
1027 if (assign != NULL
1028 && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
1029 {
1030 a_is_comparison = true;
1031 a1 = gimple_assign_rhs1 (gs: assign);
1032 a2 = gimple_assign_rhs2 (gs: assign);
1033 code = gimple_assign_rhs_code (gs: assign);
1034 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
1035 comp_width = vector_element_bits_tree (TREE_TYPE (a1));
1036 }
1037 }
1038
1039 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)
1040 || (integer_all_onesp (b) && integer_zerop (c)
1041 && expand_vec_cmp_expr_p (type, TREE_TYPE (a1), code)))
1042 {
1043 gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1044 return true;
1045 }
1046
1047 /* If a has vector boolean type and is a comparison, above
1048 expand_vec_cond_expr_p might fail, even if both the comparison and
1049 VEC_COND_EXPR could be supported individually. See PR109176. */
1050 if (a_is_comparison
1051 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1052 && expand_vec_cond_expr_p (type, TREE_TYPE (a), SSA_NAME)
1053 && expand_vec_cmp_expr_p (TREE_TYPE (a1), TREE_TYPE (a), code))
1054 return true;
1055
1056 /* Handle vector boolean types with bitmasks. If there is a comparison
1057 and we can expand the comparison into the vector boolean bitmask,
1058 or otherwise if it is compatible with type, we can transform
1059 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1060 into
1061 tmp_6 = x_2 < y_3;
1062 tmp_7 = tmp_6 & vbfld_4;
1063 tmp_8 = ~tmp_6;
1064 tmp_9 = tmp_8 & vbfld_5;
1065 vbfld_1 = tmp_7 | tmp_9;
1066 Similarly for vbfld_10 instead of x_2 < y_3. */
1067 if (VECTOR_BOOLEAN_TYPE_P (type)
1068 && SCALAR_INT_MODE_P (TYPE_MODE (type))
1069 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1070 TYPE_VECTOR_SUBPARTS (type)
1071 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1072 && (a_is_comparison
1073 ? useless_type_conversion_p (type, TREE_TYPE (a))
1074 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1075 {
1076 if (a_is_comparison)
1077 a = gimplify_build2 (gsi, code, type, a: a1, b: a2);
1078 a1 = gimplify_build2 (gsi, code: BIT_AND_EXPR, type, a, b);
1079 a2 = gimplify_build1 (gsi, code: BIT_NOT_EXPR, type, a);
1080 a2 = gimplify_build2 (gsi, code: BIT_AND_EXPR, type, a: a2, b: c);
1081 a = gimplify_build2 (gsi, code: BIT_IOR_EXPR, type, a: a1, b: a2);
1082 gimple_assign_set_rhs_from_tree (gsi, a);
1083 update_stmt (s: gsi_stmt (i: *gsi));
1084 return true;
1085 }
1086
1087 /* TODO: try and find a smaller vector type. */
1088
1089 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1090 warning_at (loc, OPT_Wvector_operation_performance,
1091 "vector condition will be expanded piecewise");
1092
1093 if (!a_is_comparison
1094 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1095 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1096 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1097 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1098 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1099 (TREE_TYPE (TREE_TYPE (a))))))
1100 {
1101 a_is_scalar_bitmask = true;
1102 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1103 tree atype = build_nonstandard_integer_type (prec, 1);
1104 a = gimplify_build1 (gsi, code: VIEW_CONVERT_EXPR, type: atype, a);
1105 }
1106 else if (!a_is_comparison
1107 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)))
1108 comp_width = vector_element_bits_tree (TREE_TYPE (a));
1109
1110 int nunits = nunits_for_known_piecewise_op (type);
1111 vec_alloc (v, nelems: nunits);
1112 bool constant_p = true;
1113 for (int i = 0; i < nunits; i++)
1114 {
1115 tree aa, result;
1116 tree bb = tree_vec_extract (gsi, type: inner_type, t: b, bitsize: width, bitpos: index);
1117 tree cc = tree_vec_extract (gsi, type: inner_type, t: c, bitsize: width, bitpos: index);
1118 if (a_is_comparison)
1119 {
1120 tree aa1 = tree_vec_extract (gsi, type: comp_inner_type, t: a1,
1121 bitsize: comp_width, bitpos: comp_index);
1122 tree aa2 = tree_vec_extract (gsi, type: comp_inner_type, t: a2,
1123 bitsize: comp_width, bitpos: comp_index);
1124 aa = gimplify_build2 (gsi, code, boolean_type_node, a: aa1, b: aa2);
1125 }
1126 else if (a_is_scalar_bitmask)
1127 {
1128 wide_int w = wi::set_bit_in_zero (bit: i, TYPE_PRECISION (TREE_TYPE (a)));
1129 result = gimplify_build2 (gsi, code: BIT_AND_EXPR, TREE_TYPE (a),
1130 a, b: wide_int_to_tree (TREE_TYPE (a), cst: w));
1131 aa = gimplify_build2 (gsi, code: NE_EXPR, boolean_type_node, a: result,
1132 b: build_zero_cst (TREE_TYPE (a)));
1133 }
1134 else
1135 {
1136 result = tree_vec_extract (gsi, type: cond_type, t: a, bitsize: comp_width, bitpos: comp_index);
1137 aa = gimplify_build2 (gsi, code: NE_EXPR, boolean_type_node, a: result,
1138 b: build_zero_cst (cond_type));
1139 }
1140 result = gimplify_build3 (gsi, code: COND_EXPR, type: inner_type, a: aa, b: bb, c: cc);
1141 if (!CONSTANT_CLASS_P (result))
1142 constant_p = false;
1143 constructor_elt ce = {NULL_TREE, .value: result};
1144 v->quick_push (obj: ce);
1145 index = int_const_binop (PLUS_EXPR, index, width);
1146 if (width == comp_width)
1147 comp_index = index;
1148 else
1149 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1150 }
1151
1152 if (constant_p)
1153 constr = build_vector_from_ctor (type, v);
1154 else
1155 constr = build_constructor (type, v);
1156 gimple_assign_set_rhs_from_tree (gsi, constr);
1157 update_stmt (s: gsi_stmt (i: *gsi));
1158
1159 if (a_is_comparison)
1160 bitmap_set_bit (dce_ssa_names,
1161 SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1162
1163 return false;
1164}
1165
1166static tree
1167expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1168 gassign *assign, enum tree_code code,
1169 bitmap dce_ssa_names)
1170{
1171 machine_mode compute_mode = TYPE_MODE (compute_type);
1172
1173 /* If the compute mode is not a vector mode (hence we are not decomposing
1174 a BLKmode vector to smaller, hardware-supported vectors), we may want
1175 to expand the operations in parallel. */
1176 if (!VECTOR_MODE_P (compute_mode))
1177 switch (code)
1178 {
1179 case PLUS_EXPR:
1180 case MINUS_EXPR:
1181 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1182 return expand_vector_addition (gsi, f: do_binop, f_parallel: do_plus_minus, type,
1183 a: gimple_assign_rhs1 (gs: assign),
1184 b: gimple_assign_rhs2 (gs: assign), code);
1185 break;
1186
1187 case NEGATE_EXPR:
1188 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1189 return expand_vector_addition (gsi, f: do_unop, f_parallel: do_negate, type,
1190 a: gimple_assign_rhs1 (gs: assign),
1191 NULL_TREE, code);
1192 break;
1193
1194 case BIT_AND_EXPR:
1195 case BIT_IOR_EXPR:
1196 case BIT_XOR_EXPR:
1197 return expand_vector_parallel (gsi, f: do_binop, type,
1198 a: gimple_assign_rhs1 (gs: assign),
1199 b: gimple_assign_rhs2 (gs: assign), code);
1200
1201 case BIT_NOT_EXPR:
1202 return expand_vector_parallel (gsi, f: do_unop, type,
1203 a: gimple_assign_rhs1 (gs: assign),
1204 NULL_TREE, code);
1205 case EQ_EXPR:
1206 case NE_EXPR:
1207 case GT_EXPR:
1208 case LT_EXPR:
1209 case GE_EXPR:
1210 case LE_EXPR:
1211 case UNEQ_EXPR:
1212 case UNGT_EXPR:
1213 case UNLT_EXPR:
1214 case UNGE_EXPR:
1215 case UNLE_EXPR:
1216 case LTGT_EXPR:
1217 case ORDERED_EXPR:
1218 case UNORDERED_EXPR:
1219 {
1220 tree rhs1 = gimple_assign_rhs1 (gs: assign);
1221 tree rhs2 = gimple_assign_rhs2 (gs: assign);
1222
1223 return expand_vector_comparison (gsi, type, op0: rhs1, op1: rhs2, code,
1224 dce_ssa_names);
1225 }
1226
1227 case TRUNC_DIV_EXPR:
1228 case TRUNC_MOD_EXPR:
1229 {
1230 tree rhs1 = gimple_assign_rhs1 (gs: assign);
1231 tree rhs2 = gimple_assign_rhs2 (gs: assign);
1232 tree ret;
1233
1234 if (!optimize
1235 || !VECTOR_INTEGER_TYPE_P (type)
1236 || TREE_CODE (rhs2) != VECTOR_CST
1237 || !VECTOR_MODE_P (TYPE_MODE (type)))
1238 break;
1239
1240 ret = expand_vector_divmod (gsi, type, op0: rhs1, op1: rhs2, code);
1241 if (ret != NULL_TREE)
1242 return ret;
1243 break;
1244 }
1245
1246 default:
1247 break;
1248 }
1249
1250 if (TREE_CODE_CLASS (code) == tcc_unary)
1251 return expand_vector_piecewise (gsi, f: do_unop, type, inner_type: compute_type,
1252 a: gimple_assign_rhs1 (gs: assign),
1253 NULL_TREE, code, parallel_p: false);
1254 else
1255 return expand_vector_piecewise (gsi, f: do_binop, type, inner_type: compute_type,
1256 a: gimple_assign_rhs1 (gs: assign),
1257 b: gimple_assign_rhs2 (gs: assign), code, parallel_p: false);
1258}
1259
1260/* Try to optimize
1261 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1262 style stmts into:
1263 _9 = { b_7, b_7, b_7, b_7 };
1264 a_5 = _9 + { 0, 3, 6, 9 };
1265 because vector splat operation is usually more efficient
1266 than piecewise initialization of the vector. */
1267
1268static void
1269optimize_vector_constructor (gimple_stmt_iterator *gsi)
1270{
1271 gassign *stmt = as_a <gassign *> (p: gsi_stmt (i: *gsi));
1272 tree lhs = gimple_assign_lhs (gs: stmt);
1273 tree rhs = gimple_assign_rhs1 (gs: stmt);
1274 tree type = TREE_TYPE (rhs);
1275 unsigned int i, j;
1276 unsigned HOST_WIDE_INT nelts;
1277 bool all_same = true;
1278 constructor_elt *elt;
1279 gimple *g;
1280 tree base = NULL_TREE;
1281 optab op;
1282
1283 if (!TYPE_VECTOR_SUBPARTS (node: type).is_constant (const_value: &nelts)
1284 || nelts <= 2
1285 || CONSTRUCTOR_NELTS (rhs) != nelts)
1286 return;
1287 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1288 if (op == unknown_optab
1289 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1290 return;
1291 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1292 if (TREE_CODE (elt->value) != SSA_NAME
1293 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1294 return;
1295 else
1296 {
1297 tree this_base = elt->value;
1298 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1299 all_same = false;
1300 for (j = 0; j < nelts + 1; j++)
1301 {
1302 g = SSA_NAME_DEF_STMT (this_base);
1303 if (is_gimple_assign (gs: g)
1304 && gimple_assign_rhs_code (gs: g) == PLUS_EXPR
1305 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1306 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1307 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1308 this_base = gimple_assign_rhs1 (gs: g);
1309 else
1310 break;
1311 }
1312 if (i == 0)
1313 base = this_base;
1314 else if (this_base != base)
1315 return;
1316 }
1317 if (all_same)
1318 return;
1319 tree_vector_builder cst (type, nelts, 1);
1320 for (i = 0; i < nelts; i++)
1321 {
1322 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1323 tree elt = build_zero_cst (TREE_TYPE (base));
1324 while (this_base != base)
1325 {
1326 g = SSA_NAME_DEF_STMT (this_base);
1327 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1328 elt, gimple_assign_rhs2 (g));
1329 if (elt == NULL_TREE
1330 || TREE_CODE (elt) != INTEGER_CST
1331 || TREE_OVERFLOW (elt))
1332 return;
1333 this_base = gimple_assign_rhs1 (gs: g);
1334 }
1335 cst.quick_push (obj: elt);
1336 }
1337 for (i = 0; i < nelts; i++)
1338 CONSTRUCTOR_ELT (rhs, i)->value = base;
1339 g = gimple_build_assign (make_ssa_name (var: type), rhs);
1340 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1341 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (gs: g),
1342 cst.build ());
1343 gsi_replace (gsi, g, false);
1344}
1345
1346/* Return a type for the widest vector mode with the same element type as
1347 type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
1348 ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
1349 with optab OP, or return NULL_TREE if none is found. */
1350
1351static tree
1352type_for_widest_vector_mode (tree original_vector_type, optab op)
1353{
1354 gcc_assert (VECTOR_TYPE_P (original_vector_type));
1355 tree type = TREE_TYPE (original_vector_type);
1356 machine_mode inner_mode = TYPE_MODE (type);
1357 machine_mode best_mode = VOIDmode, mode;
1358 poly_int64 best_nunits = 0;
1359
1360 if (SCALAR_FLOAT_MODE_P (inner_mode))
1361 mode = MIN_MODE_VECTOR_FLOAT;
1362 else if (SCALAR_FRACT_MODE_P (inner_mode))
1363 mode = MIN_MODE_VECTOR_FRACT;
1364 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1365 mode = MIN_MODE_VECTOR_UFRACT;
1366 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1367 mode = MIN_MODE_VECTOR_ACCUM;
1368 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1369 mode = MIN_MODE_VECTOR_UACCUM;
1370 else if (inner_mode == BImode)
1371 mode = MIN_MODE_VECTOR_BOOL;
1372 else
1373 mode = MIN_MODE_VECTOR_INT;
1374
1375 FOR_EACH_MODE_FROM (mode, mode)
1376 if (GET_MODE_INNER (mode) == inner_mode
1377 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1378 && optab_handler (op, mode) != CODE_FOR_nothing
1379 && known_le (GET_MODE_NUNITS (mode),
1380 TYPE_VECTOR_SUBPARTS (original_vector_type)))
1381 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1382
1383 if (best_mode == VOIDmode)
1384 return NULL_TREE;
1385 else
1386 return build_vector_type_for_mode (type, best_mode);
1387}
1388
1389
1390/* Build a reference to the element of the vector VECT. Function
1391 returns either the element itself, either BIT_FIELD_REF, or an
1392 ARRAY_REF expression.
1393
1394 GSI is required to insert temporary variables while building a
1395 refernece to the element of the vector VECT.
1396
1397 PTMPVEC is a pointer to the temporary variable for caching
1398 purposes. In case when PTMPVEC is NULL new temporary variable
1399 will be created. */
1400static tree
1401vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1402{
1403 tree vect_type, vect_elt_type;
1404 gimple *asgn;
1405 tree tmpvec;
1406 tree arraytype;
1407 bool need_asgn = true;
1408 unsigned int elements;
1409
1410 vect_type = TREE_TYPE (vect);
1411 vect_elt_type = TREE_TYPE (vect_type);
1412 elements = nunits_for_known_piecewise_op (type: vect_type);
1413
1414 if (TREE_CODE (idx) == INTEGER_CST)
1415 {
1416 unsigned HOST_WIDE_INT index;
1417
1418 /* Given that we're about to compute a binary modulus,
1419 we don't care about the high bits of the value. */
1420 index = TREE_INT_CST_LOW (idx);
1421 if (!tree_fits_uhwi_p (idx) || index >= elements)
1422 {
1423 index &= elements - 1;
1424 idx = build_int_cst (TREE_TYPE (idx), index);
1425 }
1426
1427 /* When lowering a vector statement sequence do some easy
1428 simplification by looking through intermediate vector results. */
1429 if (TREE_CODE (vect) == SSA_NAME)
1430 {
1431 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1432 if (is_gimple_assign (gs: def_stmt)
1433 && (gimple_assign_rhs_code (gs: def_stmt) == VECTOR_CST
1434 || gimple_assign_rhs_code (gs: def_stmt) == CONSTRUCTOR))
1435 vect = gimple_assign_rhs1 (gs: def_stmt);
1436 }
1437
1438 if (TREE_CODE (vect) == VECTOR_CST)
1439 return VECTOR_CST_ELT (vect, index);
1440 else if (TREE_CODE (vect) == CONSTRUCTOR
1441 && (CONSTRUCTOR_NELTS (vect) == 0
1442 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1443 != VECTOR_TYPE))
1444 {
1445 if (index < CONSTRUCTOR_NELTS (vect))
1446 return CONSTRUCTOR_ELT (vect, index)->value;
1447 return build_zero_cst (vect_elt_type);
1448 }
1449 else
1450 {
1451 tree size = vector_element_bits_tree (vect_type);
1452 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1453 size);
1454 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1455 }
1456 }
1457
1458 if (!ptmpvec)
1459 tmpvec = create_tmp_var (vect_type, "vectmp");
1460 else if (!*ptmpvec)
1461 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1462 else
1463 {
1464 tmpvec = *ptmpvec;
1465 need_asgn = false;
1466 }
1467
1468 if (need_asgn)
1469 {
1470 TREE_ADDRESSABLE (tmpvec) = 1;
1471 asgn = gimple_build_assign (tmpvec, vect);
1472 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1473 }
1474
1475 arraytype = build_array_type_nelts (vect_elt_type, elements);
1476 return build4 (ARRAY_REF, vect_elt_type,
1477 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1478 idx, NULL_TREE, NULL_TREE);
1479}
1480
1481/* Check if VEC_PERM_EXPR within the given setting is supported
1482 by hardware, or lower it piecewise.
1483
1484 When VEC_PERM_EXPR has the same first and second operands:
1485 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1486 {v0[mask[0]], v0[mask[1]], ...}
1487 MASK and V0 must have the same number of elements.
1488
1489 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1490 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1491 V0 and V1 must have the same type. MASK, V0, V1 must have the
1492 same number of arguments. */
1493
1494static void
1495lower_vec_perm (gimple_stmt_iterator *gsi)
1496{
1497 gassign *stmt = as_a <gassign *> (p: gsi_stmt (i: *gsi));
1498 tree mask = gimple_assign_rhs3 (gs: stmt);
1499 tree vec0 = gimple_assign_rhs1 (gs: stmt);
1500 tree vec1 = gimple_assign_rhs2 (gs: stmt);
1501 tree vect_type = TREE_TYPE (vec0);
1502 tree mask_type = TREE_TYPE (mask);
1503 tree vect_elt_type = TREE_TYPE (vect_type);
1504 tree mask_elt_type = TREE_TYPE (mask_type);
1505 unsigned HOST_WIDE_INT elements;
1506 vec<constructor_elt, va_gc> *v;
1507 tree constr, t, si, i_val;
1508 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1509 bool two_operand_p = !operand_equal_p (vec0, vec1, flags: 0);
1510 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
1511 unsigned i;
1512
1513 if (!TYPE_VECTOR_SUBPARTS (node: vect_type).is_constant (const_value: &elements))
1514 return;
1515
1516 if (TREE_CODE (mask) == SSA_NAME)
1517 {
1518 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1519 if (is_gimple_assign (gs: def_stmt)
1520 && gimple_assign_rhs_code (gs: def_stmt) == VECTOR_CST)
1521 mask = gimple_assign_rhs1 (gs: def_stmt);
1522 }
1523
1524 vec_perm_builder sel_int;
1525
1526 if (TREE_CODE (mask) == VECTOR_CST
1527 && tree_to_vec_perm_builder (&sel_int, mask))
1528 {
1529 vec_perm_indices indices (sel_int, 2, elements);
1530 machine_mode vmode = TYPE_MODE (vect_type);
1531 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1532 machine_mode lhs_mode = TYPE_MODE (lhs_type);
1533 if (can_vec_perm_const_p (lhs_mode, vmode, indices))
1534 {
1535 gimple_assign_set_rhs3 (gs: stmt, rhs: mask);
1536 update_stmt (s: stmt);
1537 return;
1538 }
1539 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1540 vector as VEC1 and a right element shift MASK. */
1541 if (optab_handler (op: vec_shr_optab, TYPE_MODE (vect_type))
1542 != CODE_FOR_nothing
1543 && TREE_CODE (vec1) == VECTOR_CST
1544 && initializer_zerop (vec1)
1545 && maybe_ne (a: indices[0], b: 0)
1546 && known_lt (poly_uint64 (indices[0]), elements))
1547 {
1548 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1549 if (!ok_p)
1550 {
1551 for (i = 1; i < elements; ++i)
1552 {
1553 poly_uint64 actual = indices[i];
1554 poly_uint64 expected = i + indices[0];
1555 /* Indices into the second vector are all equivalent. */
1556 if (maybe_lt (a: actual, b: elements)
1557 ? maybe_ne (a: actual, b: expected)
1558 : maybe_lt (a: expected, b: elements))
1559 break;
1560 }
1561 ok_p = i == elements;
1562 }
1563 if (ok_p)
1564 {
1565 gimple_assign_set_rhs3 (gs: stmt, rhs: mask);
1566 update_stmt (s: stmt);
1567 return;
1568 }
1569 }
1570 /* And similarly vec_shl pattern. */
1571 if (optab_handler (op: vec_shl_optab, TYPE_MODE (vect_type))
1572 != CODE_FOR_nothing
1573 && TREE_CODE (vec0) == VECTOR_CST
1574 && initializer_zerop (vec0))
1575 {
1576 unsigned int first = 0;
1577 for (i = 0; i < elements; ++i)
1578 if (known_eq (poly_uint64 (indices[i]), elements))
1579 {
1580 if (i == 0 || first)
1581 break;
1582 first = i;
1583 }
1584 else if (first
1585 ? maybe_ne (a: poly_uint64 (indices[i]),
1586 b: elements + i - first)
1587 : maybe_ge (poly_uint64 (indices[i]), elements))
1588 break;
1589 if (first && i == elements)
1590 {
1591 gimple_assign_set_rhs3 (gs: stmt, rhs: mask);
1592 update_stmt (s: stmt);
1593 return;
1594 }
1595 }
1596 }
1597 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1598 return;
1599
1600 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1601 warning_at (loc, OPT_Wvector_operation_performance,
1602 "vector shuffling operation will be expanded piecewise");
1603
1604 vec_alloc (v, nelems: elements);
1605 bool constant_p = true;
1606 for (i = 0; i < elements; i++)
1607 {
1608 si = size_int (i);
1609 i_val = vector_element (gsi, vect: mask, idx: si, ptmpvec: &masktmp);
1610
1611 if (TREE_CODE (i_val) == INTEGER_CST)
1612 {
1613 unsigned HOST_WIDE_INT index;
1614
1615 index = TREE_INT_CST_LOW (i_val);
1616 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1617 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1618
1619 if (two_operand_p && (index & elements) != 0)
1620 t = vector_element (gsi, vect: vec1, idx: i_val, ptmpvec: &vec1tmp);
1621 else
1622 t = vector_element (gsi, vect: vec0, idx: i_val, ptmpvec: &vec0tmp);
1623
1624 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1625 true, GSI_SAME_STMT);
1626 }
1627 else
1628 {
1629 tree cond = NULL_TREE, v0_val;
1630
1631 if (two_operand_p)
1632 {
1633 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1634 build_int_cst (mask_elt_type, elements));
1635 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1636 true, GSI_SAME_STMT);
1637 }
1638
1639 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1640 build_int_cst (mask_elt_type, elements - 1));
1641 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1642 true, GSI_SAME_STMT);
1643
1644 v0_val = vector_element (gsi, vect: vec0, idx: i_val, ptmpvec: &vec0tmp);
1645 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1646 true, GSI_SAME_STMT);
1647
1648 if (two_operand_p)
1649 {
1650 tree v1_val;
1651
1652 v1_val = vector_element (gsi, vect: vec1, idx: i_val, ptmpvec: &vec1tmp);
1653 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1654 true, GSI_SAME_STMT);
1655
1656 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1657 cond, build_zero_cst (mask_elt_type));
1658 cond = fold_build3 (COND_EXPR, vect_elt_type,
1659 cond, v0_val, v1_val);
1660 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1661 true, GSI_SAME_STMT);
1662 }
1663 else
1664 t = v0_val;
1665 }
1666
1667 if (!CONSTANT_CLASS_P (t))
1668 constant_p = false;
1669 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1670 }
1671
1672 if (constant_p)
1673 constr = build_vector_from_ctor (vect_type, v);
1674 else
1675 constr = build_constructor (vect_type, v);
1676 gimple_assign_set_rhs_from_tree (gsi, constr);
1677 update_stmt (s: gsi_stmt (i: *gsi));
1678}
1679
1680/* If OP is a uniform vector return the element it is a splat from. */
1681
1682static tree
1683ssa_uniform_vector_p (tree op)
1684{
1685 if (TREE_CODE (op) == VECTOR_CST
1686 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1687 || TREE_CODE (op) == CONSTRUCTOR)
1688 return uniform_vector_p (op);
1689 if (TREE_CODE (op) == SSA_NAME)
1690 {
1691 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1692 if (gimple_assign_single_p (gs: def_stmt))
1693 return uniform_vector_p (gimple_assign_rhs1 (gs: def_stmt));
1694 }
1695 return NULL_TREE;
1696}
1697
1698/* Return type in which CODE operation with optab OP can be
1699 computed. */
1700
1701static tree
1702get_compute_type (enum tree_code code, optab op, tree type)
1703{
1704 /* For very wide vectors, try using a smaller vector mode. */
1705 tree compute_type = type;
1706 if (op
1707 && (!VECTOR_MODE_P (TYPE_MODE (type))
1708 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1709 {
1710 tree vector_compute_type
1711 = type_for_widest_vector_mode (original_vector_type: type, op);
1712 if (vector_compute_type != NULL_TREE
1713 && maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vector_compute_type), b: 1U)
1714 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1715 != CODE_FOR_nothing))
1716 compute_type = vector_compute_type;
1717 }
1718
1719 /* If we are breaking a BLKmode vector into smaller pieces,
1720 type_for_widest_vector_mode has already looked into the optab,
1721 so skip these checks. */
1722 if (compute_type == type)
1723 {
1724 machine_mode compute_mode = TYPE_MODE (compute_type);
1725 if (VECTOR_MODE_P (compute_mode))
1726 {
1727 if (op
1728 && (optab_handler (op, mode: compute_mode) != CODE_FOR_nothing
1729 || optab_libfunc (op, compute_mode)))
1730 return compute_type;
1731 if (code == MULT_HIGHPART_EXPR
1732 && can_mult_highpart_p (compute_mode,
1733 TYPE_UNSIGNED (compute_type)))
1734 return compute_type;
1735 }
1736 /* There is no operation in hardware, so fall back to scalars. */
1737 compute_type = TREE_TYPE (type);
1738 }
1739
1740 return compute_type;
1741}
1742
1743static tree
1744do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1745 tree bitpos, tree bitsize, enum tree_code code,
1746 tree type ATTRIBUTE_UNUSED)
1747{
1748 if (VECTOR_TYPE_P (TREE_TYPE (a)))
1749 a = tree_vec_extract (gsi, type: inner_type, t: a, bitsize, bitpos);
1750 if (VECTOR_TYPE_P (TREE_TYPE (b)))
1751 b = tree_vec_extract (gsi, type: inner_type, t: b, bitsize, bitpos);
1752 tree cond = gimple_assign_rhs1 (gs: gsi_stmt (i: *gsi));
1753 return gimplify_build3 (gsi, code, type: inner_type, a: unshare_expr (cond), b: a, c: b);
1754}
1755
1756/* Expand a vector COND_EXPR to scalars, piecewise. */
1757static void
1758expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1759{
1760 gassign *stmt = as_a <gassign *> (p: gsi_stmt (i: *gsi));
1761 tree lhs = gimple_assign_lhs (gs: stmt);
1762 tree type = TREE_TYPE (lhs);
1763 tree compute_type = get_compute_type (code: COND_EXPR, op: mov_optab, type);
1764 machine_mode compute_mode = TYPE_MODE (compute_type);
1765 gcc_assert (compute_mode != BLKmode);
1766 tree rhs2 = gimple_assign_rhs2 (gs: stmt);
1767 tree rhs3 = gimple_assign_rhs3 (gs: stmt);
1768 tree new_rhs;
1769
1770 /* If the compute mode is not a vector mode (hence we are not decomposing
1771 a BLKmode vector to smaller, hardware-supported vectors), we may want
1772 to expand the operations in parallel. */
1773 if (!VECTOR_MODE_P (compute_mode))
1774 new_rhs = expand_vector_parallel (gsi, f: do_cond, type, a: rhs2, b: rhs3,
1775 code: COND_EXPR);
1776 else
1777 new_rhs = expand_vector_piecewise (gsi, f: do_cond, type, inner_type: compute_type,
1778 a: rhs2, b: rhs3, code: COND_EXPR, parallel_p: false);
1779 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1780 new_rhs = gimplify_build1 (gsi, code: VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1781 a: new_rhs);
1782
1783 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1784 way to do it is change expand_vector_operation and its callees to
1785 return a tree_code, RHS1 and RHS2 instead of a tree. */
1786 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1787 update_stmt (s: gsi_stmt (i: *gsi));
1788}
1789
1790/* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1791 lowering. If INNER_TYPE is not a vector type, this is a scalar
1792 fallback. */
1793
1794static tree
1795do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1796 tree decl, tree bitpos, tree bitsize,
1797 enum tree_code code, tree type)
1798{
1799 a = tree_vec_extract (gsi, type: inner_type, t: a, bitsize, bitpos);
1800 if (!VECTOR_TYPE_P (inner_type))
1801 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1802 if (code == CALL_EXPR)
1803 {
1804 gimple *g = gimple_build_call (decl, 1, a);
1805 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1806 gimple_call_set_lhs (gs: g, lhs);
1807 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1808 return lhs;
1809 }
1810 else
1811 {
1812 tree outer_type = build_vector_type (TREE_TYPE (type),
1813 TYPE_VECTOR_SUBPARTS (node: inner_type));
1814 return gimplify_build1 (gsi, code, type: outer_type, a);
1815 }
1816}
1817
1818/* Similarly, but for narrowing conversion. */
1819
1820static tree
1821do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1822 tree, tree bitpos, tree, enum tree_code code,
1823 tree type)
1824{
1825 tree itype = build_vector_type (TREE_TYPE (inner_type),
1826 exact_div (a: TYPE_VECTOR_SUBPARTS (node: inner_type),
1827 b: 2));
1828 tree b = tree_vec_extract (gsi, type: itype, t: a, TYPE_SIZE (itype), bitpos);
1829 tree c = tree_vec_extract (gsi, type: itype, t: a, TYPE_SIZE (itype),
1830 bitpos: int_const_binop (PLUS_EXPR, bitpos,
1831 TYPE_SIZE (itype)));
1832 tree outer_type = build_vector_type (TREE_TYPE (type),
1833 TYPE_VECTOR_SUBPARTS (node: inner_type));
1834 return gimplify_build2 (gsi, code, type: outer_type, a: b, b: c);
1835}
1836
1837/* Expand VEC_CONVERT ifn call. */
1838
1839static void
1840expand_vector_conversion (gimple_stmt_iterator *gsi)
1841{
1842 gimple *stmt = gsi_stmt (i: *gsi);
1843 gimple *g;
1844 tree lhs = gimple_call_lhs (gs: stmt);
1845 if (lhs == NULL_TREE)
1846 {
1847 g = gimple_build_nop ();
1848 gsi_replace (gsi, g, false);
1849 return;
1850 }
1851 tree arg = gimple_call_arg (gs: stmt, index: 0);
1852 tree ret_type = TREE_TYPE (lhs);
1853 tree arg_type = TREE_TYPE (arg);
1854 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1855 enum tree_code code = NOP_EXPR;
1856 enum tree_code code1 = ERROR_MARK;
1857 enum { NARROW, NONE, WIDEN } modifier = NONE;
1858 optab optab1 = unknown_optab;
1859
1860 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1861 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1862 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1863 code = FIX_TRUNC_EXPR;
1864 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1865 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1866 code = FLOAT_EXPR;
1867 unsigned int ret_elt_bits = vector_element_bits (ret_type);
1868 unsigned int arg_elt_bits = vector_element_bits (arg_type);
1869 if (ret_elt_bits < arg_elt_bits)
1870 modifier = NARROW;
1871 else if (ret_elt_bits > arg_elt_bits)
1872 modifier = WIDEN;
1873
1874 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1875 {
1876 if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1877 {
1878 g = gimple_build_assign (lhs, code1, arg);
1879 gsi_replace (gsi, g, false);
1880 return;
1881 }
1882 /* Can't use get_compute_type here, as supportable_convert_operation
1883 doesn't necessarily use an optab and needs two arguments. */
1884 tree vec_compute_type
1885 = type_for_widest_vector_mode (original_vector_type: arg_type, op: mov_optab);
1886 if (vec_compute_type
1887 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
1888 {
1889 unsigned HOST_WIDE_INT nelts
1890 = constant_lower_bound (a: TYPE_VECTOR_SUBPARTS (node: vec_compute_type));
1891 while (nelts > 1)
1892 {
1893 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1894 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1895 if (supportable_convert_operation (code, ret1_type, arg1_type,
1896 &code1))
1897 {
1898 new_rhs = expand_vector_piecewise (gsi, f: do_vec_conversion,
1899 type: ret_type, inner_type: arg1_type, a: arg,
1900 NULL_TREE, code: code1, parallel_p: false);
1901 g = gimple_build_assign (lhs, new_rhs);
1902 gsi_replace (gsi, g, false);
1903 return;
1904 }
1905 nelts = nelts / 2;
1906 }
1907 }
1908 }
1909 else if (modifier == NARROW)
1910 {
1911 switch (code)
1912 {
1913 CASE_CONVERT:
1914 code1 = VEC_PACK_TRUNC_EXPR;
1915 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1916 break;
1917 case FIX_TRUNC_EXPR:
1918 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1919 /* The signedness is determined from output operand. */
1920 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1921 break;
1922 case FLOAT_EXPR:
1923 code1 = VEC_PACK_FLOAT_EXPR;
1924 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1925 break;
1926 default:
1927 gcc_unreachable ();
1928 }
1929
1930 if (optab1)
1931 compute_type = get_compute_type (code: code1, op: optab1, type: arg_type);
1932 enum insn_code icode1;
1933 if (VECTOR_TYPE_P (compute_type)
1934 && ((icode1 = optab_handler (op: optab1, TYPE_MODE (compute_type)))
1935 != CODE_FOR_nothing)
1936 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1937 {
1938 tree cretd_type
1939 = build_vector_type (TREE_TYPE (ret_type),
1940 TYPE_VECTOR_SUBPARTS (node: compute_type) * 2);
1941 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1942 {
1943 if (compute_type == arg_type)
1944 {
1945 new_rhs = gimplify_build2 (gsi, code: code1, type: cretd_type,
1946 a: arg, b: build_zero_cst (arg_type));
1947 new_rhs = tree_vec_extract (gsi, type: ret_type, t: new_rhs,
1948 TYPE_SIZE (ret_type),
1949 bitsize_int (0));
1950 g = gimple_build_assign (lhs, new_rhs);
1951 gsi_replace (gsi, g, false);
1952 return;
1953 }
1954 tree dcompute_type
1955 = build_vector_type (TREE_TYPE (compute_type),
1956 TYPE_VECTOR_SUBPARTS (node: compute_type) * 2);
1957 if (TYPE_MAIN_VARIANT (dcompute_type)
1958 == TYPE_MAIN_VARIANT (arg_type))
1959 new_rhs = do_vec_narrow_conversion (gsi, inner_type: dcompute_type, a: arg,
1960 NULL_TREE, bitsize_int (0),
1961 NULL_TREE, code: code1,
1962 type: ret_type);
1963 else
1964 new_rhs = expand_vector_piecewise (gsi,
1965 f: do_vec_narrow_conversion,
1966 type: arg_type, inner_type: dcompute_type,
1967 a: arg, NULL_TREE, code: code1,
1968 parallel_p: false, ret_type);
1969 g = gimple_build_assign (lhs, new_rhs);
1970 gsi_replace (gsi, g, false);
1971 return;
1972 }
1973 }
1974 }
1975 else if (modifier == WIDEN)
1976 {
1977 enum tree_code code2 = ERROR_MARK;
1978 optab optab2 = unknown_optab;
1979 switch (code)
1980 {
1981 CASE_CONVERT:
1982 code1 = VEC_UNPACK_LO_EXPR;
1983 code2 = VEC_UNPACK_HI_EXPR;
1984 break;
1985 case FIX_TRUNC_EXPR:
1986 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1987 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1988 break;
1989 case FLOAT_EXPR:
1990 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1991 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1992 break;
1993 default:
1994 gcc_unreachable ();
1995 }
1996 if (BYTES_BIG_ENDIAN)
1997 std::swap (a&: code1, b&: code2);
1998
1999 if (code == FIX_TRUNC_EXPR)
2000 {
2001 /* The signedness is determined from output operand. */
2002 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
2003 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
2004 }
2005 else
2006 {
2007 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
2008 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
2009 }
2010
2011 if (optab1 && optab2)
2012 compute_type = get_compute_type (code: code1, op: optab1, type: arg_type);
2013
2014 enum insn_code icode1, icode2;
2015 if (VECTOR_TYPE_P (compute_type)
2016 && ((icode1 = optab_handler (op: optab1, TYPE_MODE (compute_type)))
2017 != CODE_FOR_nothing)
2018 && ((icode2 = optab_handler (op: optab2, TYPE_MODE (compute_type)))
2019 != CODE_FOR_nothing)
2020 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
2021 && (insn_data[icode1].operand[0].mode
2022 == insn_data[icode2].operand[0].mode))
2023 {
2024 poly_uint64 nunits
2025 = exact_div (a: TYPE_VECTOR_SUBPARTS (node: compute_type), b: 2);
2026 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
2027 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
2028 {
2029 vec<constructor_elt, va_gc> *v;
2030 tree part_width = TYPE_SIZE (compute_type);
2031 tree index = bitsize_int (0);
2032 int nunits = nunits_for_known_piecewise_op (type: arg_type);
2033 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
2034 int i;
2035 location_t loc = gimple_location (g: gsi_stmt (i: *gsi));
2036
2037 if (compute_type != arg_type)
2038 {
2039 if (!warning_suppressed_p (gsi_stmt (i: *gsi),
2040 OPT_Wvector_operation_performance))
2041 warning_at (loc, OPT_Wvector_operation_performance,
2042 "vector operation will be expanded piecewise");
2043 }
2044 else
2045 {
2046 nunits = 1;
2047 delta = 1;
2048 }
2049
2050 vec_alloc (v, nelems: (nunits + delta - 1) / delta * 2);
2051 bool constant_p = true;
2052 for (i = 0; i < nunits;
2053 i += delta, index = int_const_binop (PLUS_EXPR, index,
2054 part_width))
2055 {
2056 tree a = arg;
2057 if (compute_type != arg_type)
2058 a = tree_vec_extract (gsi, type: compute_type, t: a, bitsize: part_width,
2059 bitpos: index);
2060 tree result = gimplify_build1 (gsi, code: code1, type: cretd_type, a);
2061 constructor_elt ce = { NULL_TREE, .value: result };
2062 if (!CONSTANT_CLASS_P (ce.value))
2063 constant_p = false;
2064 v->quick_push (obj: ce);
2065 ce.value = gimplify_build1 (gsi, code: code2, type: cretd_type, a);
2066 if (!CONSTANT_CLASS_P (ce.value))
2067 constant_p = false;
2068 v->quick_push (obj: ce);
2069 }
2070
2071 if (constant_p)
2072 new_rhs = build_vector_from_ctor (ret_type, v);
2073 else
2074 new_rhs = build_constructor (ret_type, v);
2075 g = gimple_build_assign (lhs, new_rhs);
2076 gsi_replace (gsi, g, false);
2077 return;
2078 }
2079 }
2080 }
2081
2082 new_rhs = expand_vector_piecewise (gsi, f: do_vec_conversion, type: arg_type,
2083 TREE_TYPE (arg_type), a: arg,
2084 NULL_TREE, code, parallel_p: false, ret_type);
2085 g = gimple_build_assign (lhs, new_rhs);
2086 gsi_replace (gsi, g, false);
2087}
2088
2089/* Process one statement. If we identify a vector operation, expand it. */
2090
2091static void
2092expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2093 bitmap dce_ssa_names)
2094{
2095 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2096 enum tree_code code;
2097 optab op = unknown_optab;
2098 enum gimple_rhs_class rhs_class;
2099 tree new_rhs;
2100
2101 /* Only consider code == GIMPLE_ASSIGN. */
2102 gassign *stmt = dyn_cast <gassign *> (p: gsi_stmt (i: *gsi));
2103 if (!stmt)
2104 {
2105 if (gimple_call_internal_p (gs: gsi_stmt (i: *gsi), fn: IFN_VEC_CONVERT))
2106 expand_vector_conversion (gsi);
2107 return;
2108 }
2109
2110 code = gimple_assign_rhs_code (gs: stmt);
2111 rhs_class = get_gimple_rhs_class (code);
2112 lhs = gimple_assign_lhs (gs: stmt);
2113
2114 if (code == VEC_PERM_EXPR)
2115 {
2116 lower_vec_perm (gsi);
2117 return;
2118 }
2119
2120 if (code == VEC_COND_EXPR)
2121 {
2122 expand_vector_condition (gsi, dce_ssa_names);
2123 return;
2124 }
2125
2126 if (code == COND_EXPR
2127 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2128 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2129 {
2130 expand_vector_scalar_condition (gsi);
2131 return;
2132 }
2133
2134 if (code == CONSTRUCTOR
2135 && TREE_CODE (lhs) == SSA_NAME
2136 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2137 && !gimple_clobber_p (s: stmt)
2138 && optimize)
2139 {
2140 optimize_vector_constructor (gsi);
2141 return;
2142 }
2143
2144 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2145 return;
2146
2147 rhs1 = gimple_assign_rhs1 (gs: stmt);
2148 if (rhs_class == GIMPLE_BINARY_RHS)
2149 rhs2 = gimple_assign_rhs2 (gs: stmt);
2150
2151 type = TREE_TYPE (lhs);
2152 if (!VECTOR_TYPE_P (type)
2153 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2154 return;
2155
2156 /* A scalar operation pretending to be a vector one. */
2157 if (VECTOR_BOOLEAN_TYPE_P (type)
2158 && !VECTOR_MODE_P (TYPE_MODE (type))
2159 && TYPE_MODE (type) != BLKmode
2160 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2161 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2162 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2163 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2164 return;
2165
2166 /* If the vector operation is operating on all same vector elements
2167 implement it with a scalar operation and a splat if the target
2168 supports the scalar operation. */
2169 tree srhs1, srhs2 = NULL_TREE;
2170 if ((srhs1 = ssa_uniform_vector_p (op: rhs1)) != NULL_TREE
2171 && (rhs2 == NULL_TREE
2172 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2173 && (srhs2 = rhs2))
2174 || (srhs2 = ssa_uniform_vector_p (op: rhs2)) != NULL_TREE)
2175 /* As we query direct optabs restrict to non-convert operations. */
2176 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2177 {
2178 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2179 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2180 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2181 {
2182 tree stype = TREE_TYPE (TREE_TYPE (lhs));
2183 tree slhs = (rhs2 != NULL_TREE)
2184 ? gimplify_build2 (gsi, code, type: stype, a: srhs1, b: srhs2)
2185 : gimplify_build1 (gsi, code, type: stype, a: srhs1);
2186 gimple_assign_set_rhs_from_tree (gsi,
2187 build_vector_from_val (type, slhs));
2188 update_stmt (s: stmt);
2189 return;
2190 }
2191 }
2192
2193 if (CONVERT_EXPR_CODE_P (code)
2194 || code == FLOAT_EXPR
2195 || code == FIX_TRUNC_EXPR
2196 || code == VIEW_CONVERT_EXPR)
2197 return;
2198
2199 /* The signedness is determined from input argument. */
2200 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2201 || code == VEC_UNPACK_FLOAT_LO_EXPR
2202 || code == VEC_PACK_FLOAT_EXPR)
2203 {
2204 /* We do not know how to scalarize those. */
2205 return;
2206 }
2207
2208 /* For widening/narrowing vector operations, the relevant type is of the
2209 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2210 calculated in the same way above. */
2211 if (code == WIDEN_SUM_EXPR
2212 || code == VEC_WIDEN_MULT_HI_EXPR
2213 || code == VEC_WIDEN_MULT_LO_EXPR
2214 || code == VEC_WIDEN_MULT_EVEN_EXPR
2215 || code == VEC_WIDEN_MULT_ODD_EXPR
2216 || code == VEC_UNPACK_HI_EXPR
2217 || code == VEC_UNPACK_LO_EXPR
2218 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2219 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2220 || code == VEC_PACK_TRUNC_EXPR
2221 || code == VEC_PACK_SAT_EXPR
2222 || code == VEC_PACK_FIX_TRUNC_EXPR
2223 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2224 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2225 {
2226 /* We do not know how to scalarize those. */
2227 return;
2228 }
2229
2230 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2231 scalar */
2232 if (code == LSHIFT_EXPR
2233 || code == RSHIFT_EXPR
2234 || code == LROTATE_EXPR
2235 || code == RROTATE_EXPR)
2236 {
2237 optab opv;
2238
2239 /* Check whether we have vector <op> {x,x,x,x} where x
2240 could be a scalar variable or a constant. Transform
2241 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2242 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2243 {
2244 tree first;
2245
2246 if ((first = ssa_uniform_vector_p (op: rhs2)) != NULL_TREE)
2247 {
2248 gimple_assign_set_rhs2 (gs: stmt, rhs: first);
2249 update_stmt (s: stmt);
2250 rhs2 = first;
2251 }
2252 }
2253
2254 opv = optab_for_tree_code (code, type, optab_vector);
2255 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2256 op = opv;
2257 else
2258 {
2259 op = optab_for_tree_code (code, type, optab_scalar);
2260
2261 compute_type = get_compute_type (code, op, type);
2262 if (compute_type == type)
2263 return;
2264 /* The rtl expander will expand vector/scalar as vector/vector
2265 if necessary. Pick one with wider vector type. */
2266 tree compute_vtype = get_compute_type (code, op: opv, type);
2267 if (subparts_gt (type1: compute_vtype, type2: compute_type))
2268 {
2269 compute_type = compute_vtype;
2270 op = opv;
2271 }
2272 }
2273
2274 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2275 {
2276 if (compute_type == NULL_TREE)
2277 compute_type = get_compute_type (code, op, type);
2278 if (compute_type == type)
2279 return;
2280 /* Before splitting vector rotates into scalar rotates,
2281 see if we can't use vector shifts and BIT_IOR_EXPR
2282 instead. For vector by vector rotates we'd also
2283 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2284 for now, fold doesn't seem to create such rotates anyway. */
2285 if (compute_type == TREE_TYPE (type)
2286 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2287 {
2288 optab oplv = vashl_optab, opl = ashl_optab;
2289 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2290 tree compute_lvtype = get_compute_type (code: LSHIFT_EXPR, op: oplv, type);
2291 tree compute_rvtype = get_compute_type (code: RSHIFT_EXPR, op: oprv, type);
2292 tree compute_otype = get_compute_type (code: BIT_IOR_EXPR, op: opo, type);
2293 tree compute_ltype = get_compute_type (code: LSHIFT_EXPR, op: opl, type);
2294 tree compute_rtype = get_compute_type (code: RSHIFT_EXPR, op: opr, type);
2295 /* The rtl expander will expand vector/scalar as vector/vector
2296 if necessary. Pick one with wider vector type. */
2297 if (subparts_gt (type1: compute_lvtype, type2: compute_ltype))
2298 {
2299 compute_ltype = compute_lvtype;
2300 opl = oplv;
2301 }
2302 if (subparts_gt (type1: compute_rvtype, type2: compute_rtype))
2303 {
2304 compute_rtype = compute_rvtype;
2305 opr = oprv;
2306 }
2307 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2308 BIT_IOR_EXPR. */
2309 compute_type = compute_ltype;
2310 if (subparts_gt (type1: compute_type, type2: compute_rtype))
2311 compute_type = compute_rtype;
2312 if (subparts_gt (type1: compute_type, type2: compute_otype))
2313 compute_type = compute_otype;
2314 /* Verify all 3 operations can be performed in that type. */
2315 if (compute_type != TREE_TYPE (type))
2316 {
2317 if (optab_handler (op: opl, TYPE_MODE (compute_type))
2318 == CODE_FOR_nothing
2319 || optab_handler (op: opr, TYPE_MODE (compute_type))
2320 == CODE_FOR_nothing
2321 || optab_handler (op: opo, TYPE_MODE (compute_type))
2322 == CODE_FOR_nothing)
2323 compute_type = TREE_TYPE (type);
2324 }
2325 }
2326 }
2327 }
2328 else
2329 op = optab_for_tree_code (code, type, optab_default);
2330
2331 /* Optabs will try converting a negation into a subtraction, so
2332 look for it as well. TODO: negation of floating-point vectors
2333 might be turned into an exclusive OR toggling the sign bit. */
2334 if (op == unknown_optab
2335 && code == NEGATE_EXPR
2336 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2337 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2338
2339 if (compute_type == NULL_TREE)
2340 compute_type = get_compute_type (code, op, type);
2341 if (compute_type == type)
2342 return;
2343
2344 new_rhs = expand_vector_operation (gsi, type, compute_type, assign: stmt, code,
2345 dce_ssa_names);
2346
2347 /* Leave expression untouched for later expansion. */
2348 if (new_rhs == NULL_TREE)
2349 return;
2350
2351 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2352 new_rhs = gimplify_build1 (gsi, code: VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2353 a: new_rhs);
2354
2355 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2356 way to do it is change expand_vector_operation and its callees to
2357 return a tree_code, RHS1 and RHS2 instead of a tree. */
2358 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2359 update_stmt (s: gsi_stmt (i: *gsi));
2360}
2361
2362/* Use this to lower vector operations introduced by the vectorizer,
2363 if it may need the bit-twiddling tricks implemented in this file. */
2364
2365static unsigned int
2366expand_vector_operations (void)
2367{
2368 gimple_stmt_iterator gsi;
2369 basic_block bb;
2370 bool cfg_changed = false;
2371
2372 auto_bitmap dce_ssa_names;
2373
2374 FOR_EACH_BB_FN (bb, cfun)
2375 {
2376 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2377 {
2378 expand_vector_operations_1 (gsi: &gsi, dce_ssa_names);
2379 /* ??? If we do not cleanup EH then we will ICE in
2380 verification. But in reality we have created wrong-code
2381 as we did not properly transition EH info and edges to
2382 the piecewise computations. */
2383 if (maybe_clean_eh_stmt (gsi_stmt (i: gsi))
2384 && gimple_purge_dead_eh_edges (bb))
2385 cfg_changed = true;
2386 /* If a .LOOP_DIST_ALIAS call prevailed loops got elided
2387 before vectorization got a chance to get at them. Simply
2388 fold as if loop distribution wasn't performed. */
2389 if (gimple_call_internal_p (gs: gsi_stmt (i: gsi), fn: IFN_LOOP_DIST_ALIAS))
2390 {
2391 fold_loop_internal_call (gsi_stmt (i: gsi), boolean_false_node);
2392 cfg_changed = true;
2393 }
2394 }
2395 }
2396
2397 simple_dce_from_worklist (dce_ssa_names);
2398
2399 return cfg_changed ? TODO_cleanup_cfg : 0;
2400}
2401
2402namespace {
2403
2404const pass_data pass_data_lower_vector =
2405{
2406 .type: GIMPLE_PASS, /* type */
2407 .name: "veclower", /* name */
2408 .optinfo_flags: OPTGROUP_VEC, /* optinfo_flags */
2409 .tv_id: TV_NONE, /* tv_id */
2410 PROP_cfg, /* properties_required */
2411 PROP_gimple_lvec, /* properties_provided */
2412 .properties_destroyed: 0, /* properties_destroyed */
2413 .todo_flags_start: 0, /* todo_flags_start */
2414 TODO_update_ssa, /* todo_flags_finish */
2415};
2416
2417class pass_lower_vector : public gimple_opt_pass
2418{
2419public:
2420 pass_lower_vector (gcc::context *ctxt)
2421 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2422 {}
2423
2424 /* opt_pass methods: */
2425 bool gate (function *fun) final override
2426 {
2427 return !(fun->curr_properties & PROP_gimple_lvec);
2428 }
2429
2430 unsigned int execute (function *) final override
2431 {
2432 return expand_vector_operations ();
2433 }
2434
2435}; // class pass_lower_vector
2436
2437} // anon namespace
2438
2439gimple_opt_pass *
2440make_pass_lower_vector (gcc::context *ctxt)
2441{
2442 return new pass_lower_vector (ctxt);
2443}
2444
2445namespace {
2446
2447const pass_data pass_data_lower_vector_ssa =
2448{
2449 .type: GIMPLE_PASS, /* type */
2450 .name: "veclower2", /* name */
2451 .optinfo_flags: OPTGROUP_VEC, /* optinfo_flags */
2452 .tv_id: TV_NONE, /* tv_id */
2453 PROP_cfg, /* properties_required */
2454 PROP_gimple_lvec, /* properties_provided */
2455 .properties_destroyed: 0, /* properties_destroyed */
2456 .todo_flags_start: 0, /* todo_flags_start */
2457 .todo_flags_finish: ( TODO_update_ssa
2458 | TODO_cleanup_cfg ), /* todo_flags_finish */
2459};
2460
2461class pass_lower_vector_ssa : public gimple_opt_pass
2462{
2463public:
2464 pass_lower_vector_ssa (gcc::context *ctxt)
2465 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2466 {}
2467
2468 /* opt_pass methods: */
2469 opt_pass * clone () final override
2470 {
2471 return new pass_lower_vector_ssa (m_ctxt);
2472 }
2473 unsigned int execute (function *) final override
2474 {
2475 return expand_vector_operations ();
2476 }
2477
2478}; // class pass_lower_vector_ssa
2479
2480} // anon namespace
2481
2482gimple_opt_pass *
2483make_pass_lower_vector_ssa (gcc::context *ctxt)
2484{
2485 return new pass_lower_vector_ssa (ctxt);
2486}
2487
2488#include "gt-tree-vect-generic.h"
2489

source code of gcc/tree-vect-generic.cc