1/* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2025 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
86
87#include "config.h"
88#include "system.h"
89#include "coretypes.h"
90#include "backend.h"
91#include "target.h"
92#include "rtl.h"
93#include "tree.h"
94#include "gimple.h"
95#include "predict.h"
96#include "alloc-pool.h"
97#include "tree-pass.h"
98#include "ssa.h"
99#include "optabs-tree.h"
100#include "gimple-pretty-print.h"
101#include "alias.h"
102#include "fold-const.h"
103#include "gimple-iterator.h"
104#include "gimple-fold.h"
105#include "gimplify.h"
106#include "gimplify-me.h"
107#include "stor-layout.h"
108#include "tree-cfg.h"
109#include "tree-dfa.h"
110#include "tree-ssa.h"
111#include "builtins.h"
112#include "internal-fn.h"
113#include "case-cfn-macros.h"
114#include "optabs-libfuncs.h"
115#include "tree-eh.h"
116#include "targhooks.h"
117#include "domwalk.h"
118#include "tree-ssa-math-opts.h"
119#include "dbgcnt.h"
120#include "cfghooks.h"
121
122/* This structure represents one basic block that either computes a
123 division, or is a common dominator for basic block that compute a
124 division. */
125struct occurrence {
126 /* The basic block represented by this structure. */
127 basic_block bb = basic_block();
128
129 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
130 inserted in BB. */
131 tree recip_def = tree();
132
133 /* If non-NULL, the SSA_NAME holding the definition for a squared
134 reciprocal inserted in BB. */
135 tree square_recip_def = tree();
136
137 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
138 was inserted in BB. */
139 gimple *recip_def_stmt = nullptr;
140
141 /* Pointer to a list of "struct occurrence"s for blocks dominated
142 by BB. */
143 struct occurrence *children = nullptr;
144
145 /* Pointer to the next "struct occurrence"s in the list of blocks
146 sharing a common dominator. */
147 struct occurrence *next = nullptr;
148
149 /* The number of divisions that are in BB before compute_merit. The
150 number of divisions that are in BB or post-dominate it after
151 compute_merit. */
152 int num_divisions = 0;
153
154 /* True if the basic block has a division, false if it is a common
155 dominator for basic blocks that do. If it is false and trapping
156 math is active, BB is not a candidate for inserting a reciprocal. */
157 bool bb_has_division = false;
158
159 /* Construct a struct occurrence for basic block BB, and whose
160 children list is headed by CHILDREN. */
161 occurrence (basic_block bb, struct occurrence *children)
162 : bb (bb), children (children)
163 {
164 bb->aux = this;
165 }
166
167 /* Destroy a struct occurrence and remove it from its basic block. */
168 ~occurrence ()
169 {
170 bb->aux = nullptr;
171 }
172
173 /* Allocate memory for a struct occurrence from OCC_POOL. */
174 static void* operator new (size_t);
175
176 /* Return memory for a struct occurrence to OCC_POOL. */
177 static void operator delete (void*, size_t);
178};
179
180static struct
181{
182 /* Number of 1.0/X ops inserted. */
183 int rdivs_inserted;
184
185 /* Number of 1.0/FUNC ops inserted. */
186 int rfuncs_inserted;
187} reciprocal_stats;
188
189static struct
190{
191 /* Number of cexpi calls inserted. */
192 int inserted;
193
194 /* Number of conversions removed. */
195 int conv_removed;
196
197} sincos_stats;
198
199static struct
200{
201 /* Number of widening multiplication ops inserted. */
202 int widen_mults_inserted;
203
204 /* Number of integer multiply-and-accumulate ops inserted. */
205 int maccs_inserted;
206
207 /* Number of fp fused multiply-add ops inserted. */
208 int fmas_inserted;
209
210 /* Number of divmod calls inserted. */
211 int divmod_calls_inserted;
212
213 /* Number of highpart multiplication ops inserted. */
214 int highpart_mults_inserted;
215} widen_mul_stats;
216
217/* The instance of "struct occurrence" representing the highest
218 interesting block in the dominator tree. */
219static struct occurrence *occ_head;
220
221/* Allocation pool for getting instances of "struct occurrence". */
222static object_allocator<occurrence> *occ_pool;
223
224void* occurrence::operator new (size_t n)
225{
226 gcc_assert (n == sizeof(occurrence));
227 return occ_pool->allocate_raw ();
228}
229
230void occurrence::operator delete (void *occ, size_t n)
231{
232 gcc_assert (n == sizeof(occurrence));
233 occ_pool->remove_raw (object: occ);
234}
235
236/* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
237 list of "struct occurrence"s, one per basic block, having IDOM as
238 their common dominator.
239
240 We try to insert NEW_OCC as deep as possible in the tree, and we also
241 insert any other block that is a common dominator for BB and one
242 block already in the tree. */
243
244static void
245insert_bb (struct occurrence *new_occ, basic_block idom,
246 struct occurrence **p_head)
247{
248 struct occurrence *occ, **p_occ;
249
250 for (p_occ = p_head; (occ = *p_occ) != NULL; )
251 {
252 basic_block bb = new_occ->bb, occ_bb = occ->bb;
253 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
254 if (dom == bb)
255 {
256 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
257 from its list. */
258 *p_occ = occ->next;
259 occ->next = new_occ->children;
260 new_occ->children = occ;
261
262 /* Try the next block (it may as well be dominated by BB). */
263 }
264
265 else if (dom == occ_bb)
266 {
267 /* OCC_BB dominates BB. Tail recurse to look deeper. */
268 insert_bb (new_occ, idom: dom, p_head: &occ->children);
269 return;
270 }
271
272 else if (dom != idom)
273 {
274 gcc_assert (!dom->aux);
275
276 /* There is a dominator between IDOM and BB, add it and make
277 two children out of NEW_OCC and OCC. First, remove OCC from
278 its list. */
279 *p_occ = occ->next;
280 new_occ->next = occ;
281 occ->next = NULL;
282
283 /* None of the previous blocks has DOM as a dominator: if we tail
284 recursed, we would reexamine them uselessly. Just switch BB with
285 DOM, and go on looking for blocks dominated by DOM. */
286 new_occ = new occurrence (dom, new_occ);
287 }
288
289 else
290 {
291 /* Nothing special, go on with the next element. */
292 p_occ = &occ->next;
293 }
294 }
295
296 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
297 new_occ->next = *p_head;
298 *p_head = new_occ;
299}
300
301/* Register that we found a division in BB.
302 IMPORTANCE is a measure of how much weighting to give
303 that division. Use IMPORTANCE = 2 to register a single
304 division. If the division is going to be found multiple
305 times use 1 (as it is with squares). */
306
307static inline void
308register_division_in (basic_block bb, int importance)
309{
310 struct occurrence *occ;
311
312 occ = (struct occurrence *) bb->aux;
313 if (!occ)
314 {
315 occ = new occurrence (bb, NULL);
316 insert_bb (new_occ: occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), p_head: &occ_head);
317 }
318
319 occ->bb_has_division = true;
320 occ->num_divisions += importance;
321}
322
323
324/* Compute the number of divisions that postdominate each block in OCC and
325 its children. */
326
327static void
328compute_merit (struct occurrence *occ)
329{
330 struct occurrence *occ_child;
331 basic_block dom = occ->bb;
332
333 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
334 {
335 basic_block bb;
336 if (occ_child->children)
337 compute_merit (occ: occ_child);
338
339 if (flag_exceptions)
340 bb = single_noncomplex_succ (bb: dom);
341 else
342 bb = dom;
343
344 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
345 occ->num_divisions += occ_child->num_divisions;
346 }
347}
348
349
350/* Return whether USE_STMT is a floating-point division by DEF. */
351static inline bool
352is_division_by (gimple *use_stmt, tree def)
353{
354 return is_gimple_assign (gs: use_stmt)
355 && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR
356 && gimple_assign_rhs2 (gs: use_stmt) == def
357 /* Do not recognize x / x as valid division, as we are getting
358 confused later by replacing all immediate uses x in such
359 a stmt. */
360 && gimple_assign_rhs1 (gs: use_stmt) != def
361 && !stmt_can_throw_internal (cfun, use_stmt);
362}
363
364/* Return TRUE if USE_STMT is a multiplication of DEF by A. */
365static inline bool
366is_mult_by (gimple *use_stmt, tree def, tree a)
367{
368 if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN
369 && gimple_assign_rhs_code (gs: use_stmt) == MULT_EXPR)
370 {
371 tree op0 = gimple_assign_rhs1 (gs: use_stmt);
372 tree op1 = gimple_assign_rhs2 (gs: use_stmt);
373
374 return (op0 == def && op1 == a)
375 || (op0 == a && op1 == def);
376 }
377 return 0;
378}
379
380/* Return whether USE_STMT is DEF * DEF. */
381static inline bool
382is_square_of (gimple *use_stmt, tree def)
383{
384 return is_mult_by (use_stmt, def, a: def);
385}
386
387/* Return whether USE_STMT is a floating-point division by
388 DEF * DEF. */
389static inline bool
390is_division_by_square (gimple *use_stmt, tree def)
391{
392 if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN
393 && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR
394 && gimple_assign_rhs1 (gs: use_stmt) != gimple_assign_rhs2 (gs: use_stmt)
395 && !stmt_can_throw_internal (cfun, use_stmt))
396 {
397 tree denominator = gimple_assign_rhs2 (gs: use_stmt);
398 if (TREE_CODE (denominator) == SSA_NAME)
399 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
400 }
401 return 0;
402}
403
404/* Walk the subset of the dominator tree rooted at OCC, setting the
405 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
406 the given basic block. The field may be left NULL, of course,
407 if it is not possible or profitable to do the optimization.
408
409 DEF_BSI is an iterator pointing at the statement defining DEF.
410 If RECIP_DEF is set, a dominator already has a computation that can
411 be used.
412
413 If should_insert_square_recip is set, then this also inserts
414 the square of the reciprocal immediately after the definition
415 of the reciprocal. */
416
417static void
418insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
419 tree def, tree recip_def, tree square_recip_def,
420 int should_insert_square_recip, int threshold)
421{
422 tree type;
423 gassign *new_stmt, *new_square_stmt;
424 gimple_stmt_iterator gsi;
425 struct occurrence *occ_child;
426
427 if (!recip_def
428 && (occ->bb_has_division || !flag_trapping_math)
429 /* Divide by two as all divisions are counted twice in
430 the costing loop. */
431 && occ->num_divisions / 2 >= threshold)
432 {
433 /* Make a variable with the replacement and substitute it. */
434 type = TREE_TYPE (def);
435 recip_def = create_tmp_reg (type, "reciptmp");
436 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
437 build_one_cst (type), def);
438
439 if (should_insert_square_recip)
440 {
441 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
442 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
443 recip_def, recip_def);
444 }
445
446 if (occ->bb_has_division)
447 {
448 /* Case 1: insert before an existing division. */
449 gsi = gsi_after_labels (bb: occ->bb);
450 while (!gsi_end_p (i: gsi)
451 && (!is_division_by (use_stmt: gsi_stmt (i: gsi), def))
452 && (!is_division_by_square (use_stmt: gsi_stmt (i: gsi), def)))
453 gsi_next (i: &gsi);
454
455 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
456 if (should_insert_square_recip)
457 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
458 }
459 else if (def_gsi && occ->bb == gsi_bb (i: *def_gsi))
460 {
461 /* Case 2: insert right after the definition. Note that this will
462 never happen if the definition statement can throw, because in
463 that case the sole successor of the statement's basic block will
464 dominate all the uses as well. */
465 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
466 if (should_insert_square_recip)
467 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
468 }
469 else
470 {
471 /* Case 3: insert in a basic block not containing defs/uses. */
472 gsi = gsi_after_labels (bb: occ->bb);
473 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
474 if (should_insert_square_recip)
475 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
476 }
477
478 reciprocal_stats.rdivs_inserted++;
479
480 occ->recip_def_stmt = new_stmt;
481 }
482
483 occ->recip_def = recip_def;
484 occ->square_recip_def = square_recip_def;
485 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
486 insert_reciprocals (def_gsi, occ: occ_child, def, recip_def,
487 square_recip_def, should_insert_square_recip,
488 threshold);
489}
490
491/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
492 Take as argument the use for (x * x). */
493static inline void
494replace_reciprocal_squares (use_operand_p use_p)
495{
496 gimple *use_stmt = USE_STMT (use_p);
497 basic_block bb = gimple_bb (g: use_stmt);
498 struct occurrence *occ = (struct occurrence *) bb->aux;
499
500 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
501 && occ->recip_def)
502 {
503 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
504 gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR);
505 gimple_assign_set_rhs2 (gs: use_stmt, rhs: occ->square_recip_def);
506 SET_USE (use_p, occ->square_recip_def);
507 fold_stmt_inplace (&gsi);
508 update_stmt (s: use_stmt);
509 }
510}
511
512
513/* Replace the division at USE_P with a multiplication by the reciprocal, if
514 possible. */
515
516static inline void
517replace_reciprocal (use_operand_p use_p)
518{
519 gimple *use_stmt = USE_STMT (use_p);
520 basic_block bb = gimple_bb (g: use_stmt);
521 struct occurrence *occ = (struct occurrence *) bb->aux;
522
523 if (optimize_bb_for_speed_p (bb)
524 && occ->recip_def && use_stmt != occ->recip_def_stmt)
525 {
526 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
527 gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR);
528 SET_USE (use_p, occ->recip_def);
529 fold_stmt_inplace (&gsi);
530 update_stmt (s: use_stmt);
531 }
532}
533
534
535/* Free OCC and return one more "struct occurrence" to be freed. */
536
537static struct occurrence *
538free_bb (struct occurrence *occ)
539{
540 struct occurrence *child, *next;
541
542 /* First get the two pointers hanging off OCC. */
543 next = occ->next;
544 child = occ->children;
545 delete occ;
546
547 /* Now ensure that we don't recurse unless it is necessary. */
548 if (!child)
549 return next;
550 else
551 {
552 while (next)
553 next = free_bb (occ: next);
554
555 return child;
556 }
557}
558
559/* Transform sequences like
560 t = sqrt (a)
561 x = 1.0 / t;
562 r1 = x * x;
563 r2 = a * x;
564 into:
565 t = sqrt (a)
566 r1 = 1.0 / a;
567 r2 = t;
568 x = r1 * r2;
569 depending on the uses of x, r1, r2. This removes one multiplication and
570 allows the sqrt and division operations to execute in parallel.
571 DEF_GSI is the gsi of the initial division by sqrt that defines
572 DEF (x in the example above). */
573
574static void
575optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
576{
577 gimple *use_stmt;
578 imm_use_iterator use_iter;
579 gimple *stmt = gsi_stmt (i: *def_gsi);
580 tree x = def;
581 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (gs: stmt);
582 tree div_rhs1 = gimple_assign_rhs1 (gs: stmt);
583
584 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
585 || TREE_CODE (div_rhs1) != REAL_CST
586 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
587 return;
588
589 gcall *sqrt_stmt
590 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
591
592 if (!sqrt_stmt || !gimple_call_lhs (gs: sqrt_stmt))
593 return;
594
595 switch (gimple_call_combined_fn (sqrt_stmt))
596 {
597 CASE_CFN_SQRT:
598 CASE_CFN_SQRT_FN:
599 break;
600
601 default:
602 return;
603 }
604 tree a = gimple_call_arg (gs: sqrt_stmt, index: 0);
605
606 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
607
608 /* Statements that use x in x * x. */
609 auto_vec<gimple *> sqr_stmts;
610 /* Statements that use x in a * x. */
611 auto_vec<gimple *> mult_stmts;
612 bool has_other_use = false;
613 bool mult_on_main_path = false;
614
615 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
616 {
617 if (is_gimple_debug (gs: use_stmt))
618 continue;
619 if (is_square_of (use_stmt, def: x))
620 {
621 sqr_stmts.safe_push (obj: use_stmt);
622 if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt))
623 mult_on_main_path = true;
624 }
625 else if (is_mult_by (use_stmt, def: x, a))
626 {
627 mult_stmts.safe_push (obj: use_stmt);
628 if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt))
629 mult_on_main_path = true;
630 }
631 else
632 has_other_use = true;
633 }
634
635 /* In the x * x and a * x cases we just rewire stmt operands or
636 remove multiplications. In the has_other_use case we introduce
637 a multiplication so make sure we don't introduce a multiplication
638 on a path where there was none. */
639 if (has_other_use && !mult_on_main_path)
640 return;
641
642 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
643 return;
644
645 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
646 to be able to compose it from the sqr and mult cases. */
647 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
648 return;
649
650 if (dump_file)
651 {
652 fprintf (stream: dump_file, format: "Optimizing reciprocal sqrt multiplications of\n");
653 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
654 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
655 fprintf (stream: dump_file, format: "\n");
656 }
657
658 bool delete_div = !has_other_use;
659 tree sqr_ssa_name = NULL_TREE;
660 if (!sqr_stmts.is_empty ())
661 {
662 /* r1 = x * x. Transform the original
663 x = 1.0 / t
664 into
665 tmp1 = 1.0 / a
666 r1 = tmp1. */
667
668 sqr_ssa_name
669 = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "recip_sqrt_sqr");
670
671 if (dump_file)
672 {
673 fprintf (stream: dump_file, format: "Replacing original division\n");
674 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
675 fprintf (stream: dump_file, format: "with new division\n");
676 }
677 stmt
678 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (gs: stmt),
679 gimple_assign_rhs1 (gs: stmt), a);
680 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
681 gsi_remove (def_gsi, true);
682 *def_gsi = gsi_for_stmt (stmt);
683 fold_stmt_inplace (def_gsi);
684 update_stmt (s: stmt);
685
686 if (dump_file)
687 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
688
689 delete_div = false;
690 gimple *sqr_stmt;
691 unsigned int i;
692 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
693 {
694 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
695 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
696 update_stmt (s: sqr_stmt);
697 }
698 }
699 if (!mult_stmts.is_empty ())
700 {
701 /* r2 = a * x. Transform this into:
702 r2 = t (The original sqrt (a)). */
703 unsigned int i;
704 gimple *mult_stmt = NULL;
705 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
706 {
707 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
708
709 if (dump_file)
710 {
711 fprintf (stream: dump_file, format: "Replacing squaring multiplication\n");
712 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
713 fprintf (stream: dump_file, format: "with assignment\n");
714 }
715 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
716 fold_stmt_inplace (&gsi2);
717 update_stmt (s: mult_stmt);
718 if (dump_file)
719 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
720 }
721 }
722
723 if (has_other_use)
724 {
725 /* Using the two temporaries tmp1, tmp2 from above
726 the original x is now:
727 x = tmp1 * tmp2. */
728 gcc_assert (orig_sqrt_ssa_name);
729 gcc_assert (sqr_ssa_name);
730
731 gimple *new_stmt
732 = gimple_build_assign (x, MULT_EXPR,
733 orig_sqrt_ssa_name, sqr_ssa_name);
734 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
735 update_stmt (s: stmt);
736 }
737 else if (delete_div)
738 {
739 /* Remove the original division. */
740 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
741 gsi_remove (&gsi2, true);
742 release_defs (stmt);
743 }
744 else
745 release_ssa_name (name: x);
746}
747
748/* Look for floating-point divisions among DEF's uses, and try to
749 replace them by multiplications with the reciprocal. Add
750 as many statements computing the reciprocal as needed.
751
752 DEF must be a GIMPLE register of a floating-point type. */
753
754static void
755execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
756{
757 use_operand_p use_p, square_use_p;
758 imm_use_iterator use_iter, square_use_iter;
759 tree square_def;
760 struct occurrence *occ;
761 int count = 0;
762 int threshold;
763 int square_recip_count = 0;
764 int sqrt_recip_count = 0;
765
766 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
767 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
768
769 /* If DEF is a square (x * x), count the number of divisions by x.
770 If there are more divisions by x than by (DEF * DEF), prefer to optimize
771 the reciprocal of x instead of DEF. This improves cases like:
772 def = x * x
773 t0 = a / def
774 t1 = b / def
775 t2 = c / x
776 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
777 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
778
779 if (is_gimple_assign (gs: def_stmt)
780 && gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR
781 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
782 && gimple_assign_rhs1 (gs: def_stmt) == gimple_assign_rhs2 (gs: def_stmt))
783 {
784 tree op0 = gimple_assign_rhs1 (gs: def_stmt);
785
786 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
787 {
788 gimple *use_stmt = USE_STMT (use_p);
789 if (is_division_by (use_stmt, def: op0))
790 sqrt_recip_count++;
791 }
792 }
793
794 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
795 {
796 gimple *use_stmt = USE_STMT (use_p);
797 if (is_division_by (use_stmt, def))
798 {
799 register_division_in (bb: gimple_bb (g: use_stmt), importance: 2);
800 count++;
801 }
802
803 if (is_square_of (use_stmt, def))
804 {
805 square_def = gimple_assign_lhs (gs: use_stmt);
806 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
807 {
808 gimple *square_use_stmt = USE_STMT (square_use_p);
809 if (is_division_by (use_stmt: square_use_stmt, def: square_def))
810 {
811 /* This is executed twice for each division by a square. */
812 register_division_in (bb: gimple_bb (g: square_use_stmt), importance: 1);
813 square_recip_count++;
814 }
815 }
816 }
817 }
818
819 /* Square reciprocals were counted twice above. */
820 square_recip_count /= 2;
821
822 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
823 if (sqrt_recip_count > square_recip_count)
824 goto out;
825
826 /* Do the expensive part only if we can hope to optimize something. */
827 if (count + square_recip_count >= threshold && count >= 1)
828 {
829 gimple *use_stmt;
830 for (occ = occ_head; occ; occ = occ->next)
831 {
832 compute_merit (occ);
833 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
834 should_insert_square_recip: square_recip_count, threshold);
835 }
836
837 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
838 {
839 if (is_division_by (use_stmt, def))
840 {
841 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
842 replace_reciprocal (use_p);
843 }
844 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
845 {
846 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
847 {
848 /* Find all uses of the square that are divisions and
849 * replace them by multiplications with the inverse. */
850 imm_use_iterator square_iterator;
851 gimple *powmult_use_stmt = USE_STMT (use_p);
852 tree powmult_def_name = gimple_assign_lhs (gs: powmult_use_stmt);
853
854 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
855 square_iterator, powmult_def_name)
856 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
857 {
858 gimple *powmult_use_stmt = USE_STMT (square_use_p);
859 if (is_division_by (use_stmt: powmult_use_stmt, def: powmult_def_name))
860 replace_reciprocal_squares (use_p: square_use_p);
861 }
862 }
863 }
864 }
865 }
866
867out:
868 for (occ = occ_head; occ; )
869 occ = free_bb (occ);
870
871 occ_head = NULL;
872}
873
874/* Return an internal function that implements the reciprocal of CALL,
875 or IFN_LAST if there is no such function that the target supports. */
876
877internal_fn
878internal_fn_reciprocal (gcall *call)
879{
880 internal_fn ifn;
881
882 switch (gimple_call_combined_fn (call))
883 {
884 CASE_CFN_SQRT:
885 CASE_CFN_SQRT_FN:
886 ifn = IFN_RSQRT;
887 break;
888
889 default:
890 return IFN_LAST;
891 }
892
893 tree_pair types = direct_internal_fn_types (ifn, call);
894 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
895 return IFN_LAST;
896
897 return ifn;
898}
899
900/* Go through all the floating-point SSA_NAMEs, and call
901 execute_cse_reciprocals_1 on each of them. */
902namespace {
903
904const pass_data pass_data_cse_reciprocals =
905{
906 .type: GIMPLE_PASS, /* type */
907 .name: "recip", /* name */
908 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
909 .tv_id: TV_TREE_RECIP, /* tv_id */
910 PROP_ssa, /* properties_required */
911 .properties_provided: 0, /* properties_provided */
912 .properties_destroyed: 0, /* properties_destroyed */
913 .todo_flags_start: 0, /* todo_flags_start */
914 TODO_update_ssa, /* todo_flags_finish */
915};
916
917class pass_cse_reciprocals : public gimple_opt_pass
918{
919public:
920 pass_cse_reciprocals (gcc::context *ctxt)
921 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
922 {}
923
924 /* opt_pass methods: */
925 bool gate (function *) final override
926 {
927 return optimize && flag_reciprocal_math;
928 }
929 unsigned int execute (function *) final override;
930
931}; // class pass_cse_reciprocals
932
933unsigned int
934pass_cse_reciprocals::execute (function *fun)
935{
936 basic_block bb;
937 tree arg;
938
939 occ_pool = new object_allocator<occurrence> ("dominators for recip");
940
941 memset (s: &reciprocal_stats, c: 0, n: sizeof (reciprocal_stats));
942 calculate_dominance_info (CDI_DOMINATORS);
943 calculate_dominance_info (CDI_POST_DOMINATORS);
944
945 if (flag_checking)
946 FOR_EACH_BB_FN (bb, fun)
947 gcc_assert (!bb->aux);
948
949 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
950 if (FLOAT_TYPE_P (TREE_TYPE (arg))
951 && is_gimple_reg (arg))
952 {
953 tree name = ssa_default_def (fun, arg);
954 if (name)
955 execute_cse_reciprocals_1 (NULL, def: name);
956 }
957
958 FOR_EACH_BB_FN (bb, fun)
959 {
960 tree def;
961
962 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
963 gsi_next (i: &gsi))
964 {
965 gphi *phi = gsi.phi ();
966 def = PHI_RESULT (phi);
967 if (! virtual_operand_p (op: def)
968 && FLOAT_TYPE_P (TREE_TYPE (def)))
969 execute_cse_reciprocals_1 (NULL, def);
970 }
971
972 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);
973 gsi_next (i: &gsi))
974 {
975 gimple *stmt = gsi_stmt (i: gsi);
976
977 if (gimple_has_lhs (stmt)
978 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
979 && FLOAT_TYPE_P (TREE_TYPE (def))
980 && TREE_CODE (def) == SSA_NAME)
981 {
982 execute_cse_reciprocals_1 (def_gsi: &gsi, def);
983 stmt = gsi_stmt (i: gsi);
984 if (flag_unsafe_math_optimizations
985 && is_gimple_assign (gs: stmt)
986 && gimple_assign_lhs (gs: stmt) == def
987 && !stmt_can_throw_internal (cfun, stmt)
988 && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR)
989 optimize_recip_sqrt (def_gsi: &gsi, def);
990 }
991 }
992
993 if (optimize_bb_for_size_p (bb))
994 continue;
995
996 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
997 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);
998 gsi_next (i: &gsi))
999 {
1000 gimple *stmt = gsi_stmt (i: gsi);
1001
1002 if (is_gimple_assign (gs: stmt)
1003 && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR)
1004 {
1005 tree arg1 = gimple_assign_rhs2 (gs: stmt);
1006 gimple *stmt1;
1007
1008 if (TREE_CODE (arg1) != SSA_NAME)
1009 continue;
1010
1011 stmt1 = SSA_NAME_DEF_STMT (arg1);
1012
1013 if (is_gimple_call (gs: stmt1)
1014 && gimple_call_lhs (gs: stmt1))
1015 {
1016 bool fail;
1017 imm_use_iterator ui;
1018 use_operand_p use_p;
1019 tree fndecl = NULL_TREE;
1020
1021 gcall *call = as_a <gcall *> (p: stmt1);
1022 internal_fn ifn = internal_fn_reciprocal (call);
1023 if (ifn == IFN_LAST)
1024 {
1025 fndecl = gimple_call_fndecl (gs: call);
1026 if (!fndecl
1027 || !fndecl_built_in_p (node: fndecl, klass: BUILT_IN_MD))
1028 continue;
1029 fndecl = targetm.builtin_reciprocal (fndecl);
1030 if (!fndecl)
1031 continue;
1032 }
1033
1034 /* Check that all uses of the SSA name are divisions,
1035 otherwise replacing the defining statement will do
1036 the wrong thing. */
1037 fail = false;
1038 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1039 {
1040 gimple *stmt2 = USE_STMT (use_p);
1041 if (is_gimple_debug (gs: stmt2))
1042 continue;
1043 if (!is_gimple_assign (gs: stmt2)
1044 || gimple_assign_rhs_code (gs: stmt2) != RDIV_EXPR
1045 || gimple_assign_rhs1 (gs: stmt2) == arg1
1046 || gimple_assign_rhs2 (gs: stmt2) != arg1)
1047 {
1048 fail = true;
1049 break;
1050 }
1051 }
1052 if (fail)
1053 continue;
1054
1055 gimple_replace_ssa_lhs (call, arg1);
1056 if (gimple_call_internal_p (gs: call) != (ifn != IFN_LAST))
1057 {
1058 auto_vec<tree, 4> args;
1059 for (unsigned int i = 0;
1060 i < gimple_call_num_args (gs: call); i++)
1061 args.safe_push (obj: gimple_call_arg (gs: call, index: i));
1062 gcall *stmt2;
1063 if (ifn == IFN_LAST)
1064 stmt2 = gimple_build_call_vec (fndecl, args);
1065 else
1066 stmt2 = gimple_build_call_internal_vec (ifn, args);
1067 gimple_call_set_lhs (gs: stmt2, lhs: arg1);
1068 gimple_move_vops (stmt2, call);
1069 gimple_call_set_nothrow (s: stmt2,
1070 nothrow_p: gimple_call_nothrow_p (s: call));
1071 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1072 gsi_replace (&gsi2, stmt2, true);
1073 }
1074 else
1075 {
1076 if (ifn == IFN_LAST)
1077 gimple_call_set_fndecl (gs: call, decl: fndecl);
1078 else
1079 gimple_call_set_internal_fn (call_stmt: call, fn: ifn);
1080 update_stmt (s: call);
1081 }
1082 reciprocal_stats.rfuncs_inserted++;
1083
1084 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1085 {
1086 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1087 gimple_assign_set_rhs_code (s: stmt, code: MULT_EXPR);
1088 fold_stmt_inplace (&gsi);
1089 update_stmt (s: stmt);
1090 }
1091 }
1092 }
1093 }
1094 }
1095
1096 statistics_counter_event (fun, "reciprocal divs inserted",
1097 reciprocal_stats.rdivs_inserted);
1098 statistics_counter_event (fun, "reciprocal functions inserted",
1099 reciprocal_stats.rfuncs_inserted);
1100
1101 free_dominance_info (CDI_DOMINATORS);
1102 free_dominance_info (CDI_POST_DOMINATORS);
1103 delete occ_pool;
1104 return 0;
1105}
1106
1107} // anon namespace
1108
1109gimple_opt_pass *
1110make_pass_cse_reciprocals (gcc::context *ctxt)
1111{
1112 return new pass_cse_reciprocals (ctxt);
1113}
1114
1115/* If NAME is the result of a type conversion, look for other
1116 equivalent dominating or dominated conversions, and replace all
1117 uses with the earliest dominating name, removing the redundant
1118 conversions. Return the prevailing name. */
1119
1120static tree
1121execute_cse_conv_1 (tree name, bool *cfg_changed)
1122{
1123 if (SSA_NAME_IS_DEFAULT_DEF (name)
1124 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1125 return name;
1126
1127 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1128
1129 if (!gimple_assign_cast_p (s: def_stmt))
1130 return name;
1131
1132 tree src = gimple_assign_rhs1 (gs: def_stmt);
1133
1134 if (TREE_CODE (src) != SSA_NAME)
1135 return name;
1136
1137 imm_use_iterator use_iter;
1138 gimple *use_stmt;
1139
1140 /* Find the earliest dominating def. */
1141 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1142 {
1143 if (use_stmt == def_stmt
1144 || !gimple_assign_cast_p (s: use_stmt))
1145 continue;
1146
1147 tree lhs = gimple_assign_lhs (gs: use_stmt);
1148
1149 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1150 || (gimple_assign_rhs1 (gs: use_stmt)
1151 != gimple_assign_rhs1 (gs: def_stmt))
1152 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1153 continue;
1154
1155 bool use_dominates;
1156 if (gimple_bb (g: def_stmt) == gimple_bb (g: use_stmt))
1157 {
1158 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1159 while (!gsi_end_p (i: gsi) && gsi_stmt (i: gsi) != def_stmt)
1160 gsi_next (i: &gsi);
1161 use_dominates = !gsi_end_p (i: gsi);
1162 }
1163 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt),
1164 gimple_bb (g: def_stmt)))
1165 use_dominates = false;
1166 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: def_stmt),
1167 gimple_bb (g: use_stmt)))
1168 use_dominates = true;
1169 else
1170 continue;
1171
1172 if (use_dominates)
1173 {
1174 std::swap (a&: name, b&: lhs);
1175 std::swap (a&: def_stmt, b&: use_stmt);
1176 }
1177 }
1178
1179 /* Now go through all uses of SRC again, replacing the equivalent
1180 dominated conversions. We may replace defs that were not
1181 dominated by the then-prevailing defs when we first visited
1182 them. */
1183 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1184 {
1185 if (use_stmt == def_stmt
1186 || !gimple_assign_cast_p (s: use_stmt))
1187 continue;
1188
1189 tree lhs = gimple_assign_lhs (gs: use_stmt);
1190
1191 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1192 || (gimple_assign_rhs1 (gs: use_stmt)
1193 != gimple_assign_rhs1 (gs: def_stmt))
1194 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1195 continue;
1196
1197 basic_block use_bb = gimple_bb (g: use_stmt);
1198 if (gimple_bb (g: def_stmt) == use_bb
1199 || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (g: def_stmt)))
1200 {
1201 sincos_stats.conv_removed++;
1202
1203 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1204 replace_uses_by (lhs, name);
1205 if (gsi_remove (&gsi, true)
1206 && gimple_purge_dead_eh_edges (use_bb))
1207 *cfg_changed = true;
1208 release_defs (use_stmt);
1209 }
1210 }
1211
1212 return name;
1213}
1214
1215/* Records an occurrence at statement USE_STMT in the vector of trees
1216 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1217 is not yet initialized. Returns true if the occurrence was pushed on
1218 the vector. Adjusts *TOP_BB to be the basic block dominating all
1219 statements in the vector. */
1220
1221static bool
1222maybe_record_sincos (vec<gimple *> *stmts,
1223 basic_block *top_bb, gimple *use_stmt)
1224{
1225 basic_block use_bb = gimple_bb (g: use_stmt);
1226 if (*top_bb
1227 && (*top_bb == use_bb
1228 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1229 stmts->safe_push (obj: use_stmt);
1230 else if (!*top_bb
1231 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1232 {
1233 stmts->safe_push (obj: use_stmt);
1234 *top_bb = use_bb;
1235 }
1236 else
1237 return false;
1238
1239 return true;
1240}
1241
1242/* Look for sin, cos and cexpi calls with the same argument NAME and
1243 create a single call to cexpi CSEing the result in this case.
1244 We first walk over all immediate uses of the argument collecting
1245 statements that we can CSE in a vector and in a second pass replace
1246 the statement rhs with a REALPART or IMAGPART expression on the
1247 result of the cexpi call we insert before the use statement that
1248 dominates all other candidates. */
1249
1250static bool
1251execute_cse_sincos_1 (tree name)
1252{
1253 gimple_stmt_iterator gsi;
1254 imm_use_iterator use_iter;
1255 tree fndecl, res, type = NULL_TREE;
1256 gimple *def_stmt, *use_stmt, *stmt;
1257 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1258 auto_vec<gimple *> stmts;
1259 basic_block top_bb = NULL;
1260 int i;
1261 bool cfg_changed = false;
1262
1263 name = execute_cse_conv_1 (name, cfg_changed: &cfg_changed);
1264
1265 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1266 {
1267 if (gimple_code (g: use_stmt) != GIMPLE_CALL
1268 || !gimple_call_lhs (gs: use_stmt))
1269 continue;
1270
1271 switch (gimple_call_combined_fn (use_stmt))
1272 {
1273 CASE_CFN_COS:
1274 seen_cos |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0;
1275 break;
1276
1277 CASE_CFN_SIN:
1278 seen_sin |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0;
1279 break;
1280
1281 CASE_CFN_CEXPI:
1282 seen_cexpi |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0;
1283 break;
1284
1285 default:;
1286 continue;
1287 }
1288
1289 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1290 if (!type)
1291 {
1292 type = t;
1293 t = TREE_TYPE (name);
1294 }
1295 /* This checks that NAME has the right type in the first round,
1296 and, in subsequent rounds, that the built_in type is the same
1297 type, or a compatible type. */
1298 if (type != t && !types_compatible_p (type1: type, type2: t))
1299 return false;
1300 }
1301 if (seen_cos + seen_sin + seen_cexpi <= 1)
1302 return false;
1303
1304 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1305 the name def statement. */
1306 fndecl = mathfn_built_in (type, fn: BUILT_IN_CEXPI);
1307 if (!fndecl)
1308 return false;
1309 stmt = gimple_build_call (fndecl, 1, name);
1310 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, name: "sincostmp");
1311 gimple_call_set_lhs (gs: stmt, lhs: res);
1312
1313 def_stmt = SSA_NAME_DEF_STMT (name);
1314 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1315 && gimple_code (g: def_stmt) != GIMPLE_PHI
1316 && gimple_bb (g: def_stmt) == top_bb)
1317 {
1318 gsi = gsi_for_stmt (def_stmt);
1319 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1320 }
1321 else
1322 {
1323 gsi = gsi_after_labels (bb: top_bb);
1324 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1325 }
1326 sincos_stats.inserted++;
1327
1328 /* And adjust the recorded old call sites. */
1329 for (i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i)
1330 {
1331 tree rhs = NULL;
1332
1333 switch (gimple_call_combined_fn (use_stmt))
1334 {
1335 CASE_CFN_COS:
1336 rhs = fold_build1 (REALPART_EXPR, type, res);
1337 break;
1338
1339 CASE_CFN_SIN:
1340 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1341 break;
1342
1343 CASE_CFN_CEXPI:
1344 rhs = res;
1345 break;
1346
1347 default:;
1348 gcc_unreachable ();
1349 }
1350
1351 /* Replace call with a copy. */
1352 stmt = gimple_build_assign (gimple_call_lhs (gs: use_stmt), rhs);
1353
1354 gsi = gsi_for_stmt (use_stmt);
1355 gsi_replace (&gsi, stmt, true);
1356 if (gimple_purge_dead_eh_edges (gimple_bb (g: stmt)))
1357 cfg_changed = true;
1358 }
1359
1360 return cfg_changed;
1361}
1362
1363/* To evaluate powi(x,n), the floating point value x raised to the
1364 constant integer exponent n, we use a hybrid algorithm that
1365 combines the "window method" with look-up tables. For an
1366 introduction to exponentiation algorithms and "addition chains",
1367 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1368 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1369 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1370 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1371
1372/* Provide a default value for POWI_MAX_MULTS, the maximum number of
1373 multiplications to inline before calling the system library's pow
1374 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1375 so this default never requires calling pow, powf or powl. */
1376
1377#ifndef POWI_MAX_MULTS
1378#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1379#endif
1380
1381/* The size of the "optimal power tree" lookup table. All
1382 exponents less than this value are simply looked up in the
1383 powi_table below. This threshold is also used to size the
1384 cache of pseudo registers that hold intermediate results. */
1385#define POWI_TABLE_SIZE 256
1386
1387/* The size, in bits of the window, used in the "window method"
1388 exponentiation algorithm. This is equivalent to a radix of
1389 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1390#define POWI_WINDOW_SIZE 3
1391
1392/* The following table is an efficient representation of an
1393 "optimal power tree". For each value, i, the corresponding
1394 value, j, in the table states than an optimal evaluation
1395 sequence for calculating pow(x,i) can be found by evaluating
1396 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1397 100 integers is given in Knuth's "Seminumerical algorithms". */
1398
1399static const unsigned char powi_table[POWI_TABLE_SIZE] =
1400 {
1401 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1402 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1403 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1404 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1405 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1406 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1407 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1408 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1409 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1410 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1411 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1412 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1413 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1414 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1415 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1416 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1417 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1418 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1419 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1420 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1421 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1422 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1423 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1424 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1425 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1426 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1427 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1428 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1429 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1430 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1431 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1432 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1433 };
1434
1435
1436/* Return the number of multiplications required to calculate
1437 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1438 subroutine of powi_cost. CACHE is an array indicating
1439 which exponents have already been calculated. */
1440
1441static int
1442powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1443{
1444 /* If we've already calculated this exponent, then this evaluation
1445 doesn't require any additional multiplications. */
1446 if (cache[n])
1447 return 0;
1448
1449 cache[n] = true;
1450 return powi_lookup_cost (n: n - powi_table[n], cache)
1451 + powi_lookup_cost (n: powi_table[n], cache) + 1;
1452}
1453
1454/* Return the number of multiplications required to calculate
1455 powi(x,n) for an arbitrary x, given the exponent N. This
1456 function needs to be kept in sync with powi_as_mults below. */
1457
1458static int
1459powi_cost (HOST_WIDE_INT n)
1460{
1461 bool cache[POWI_TABLE_SIZE];
1462 unsigned HOST_WIDE_INT digit;
1463 unsigned HOST_WIDE_INT val;
1464 int result;
1465
1466 if (n == 0)
1467 return 0;
1468
1469 /* Ignore the reciprocal when calculating the cost. */
1470 val = absu_hwi (x: n);
1471
1472 /* Initialize the exponent cache. */
1473 memset (s: cache, c: 0, POWI_TABLE_SIZE * sizeof (bool));
1474 cache[1] = true;
1475
1476 result = 0;
1477
1478 while (val >= POWI_TABLE_SIZE)
1479 {
1480 if (val & 1)
1481 {
1482 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1483 result += powi_lookup_cost (n: digit, cache)
1484 + POWI_WINDOW_SIZE + 1;
1485 val >>= POWI_WINDOW_SIZE;
1486 }
1487 else
1488 {
1489 val >>= 1;
1490 result++;
1491 }
1492 }
1493
1494 return result + powi_lookup_cost (n: val, cache);
1495}
1496
1497/* Recursive subroutine of powi_as_mults. This function takes the
1498 array, CACHE, of already calculated exponents and an exponent N and
1499 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1500
1501static tree
1502powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1503 unsigned HOST_WIDE_INT n, tree *cache)
1504{
1505 tree op0, op1, ssa_target;
1506 unsigned HOST_WIDE_INT digit;
1507 gassign *mult_stmt;
1508
1509 if (n < POWI_TABLE_SIZE && cache[n])
1510 return cache[n];
1511
1512 ssa_target = make_temp_ssa_name (type, NULL, name: "powmult");
1513
1514 if (n < POWI_TABLE_SIZE)
1515 {
1516 cache[n] = ssa_target;
1517 op0 = powi_as_mults_1 (gsi, loc, type, n: n - powi_table[n], cache);
1518 op1 = powi_as_mults_1 (gsi, loc, type, n: powi_table[n], cache);
1519 }
1520 else if (n & 1)
1521 {
1522 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1523 op0 = powi_as_mults_1 (gsi, loc, type, n: n - digit, cache);
1524 op1 = powi_as_mults_1 (gsi, loc, type, n: digit, cache);
1525 }
1526 else
1527 {
1528 op0 = powi_as_mults_1 (gsi, loc, type, n: n >> 1, cache);
1529 op1 = op0;
1530 }
1531
1532 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1533 gimple_set_location (g: mult_stmt, location: loc);
1534 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1535
1536 return ssa_target;
1537}
1538
1539/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1540 This function needs to be kept in sync with powi_cost above. */
1541
1542tree
1543powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1544 tree arg0, HOST_WIDE_INT n)
1545{
1546 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1547 gassign *div_stmt;
1548 tree target;
1549
1550 if (n == 0)
1551 return build_one_cst (type);
1552
1553 memset (s: cache, c: 0, n: sizeof (cache));
1554 cache[1] = arg0;
1555
1556 result = powi_as_mults_1 (gsi, loc, type, n: absu_hwi (x: n), cache);
1557 if (n >= 0)
1558 return result;
1559
1560 /* If the original exponent was negative, reciprocate the result. */
1561 target = make_temp_ssa_name (type, NULL, name: "powmult");
1562 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1563 build_real (type, dconst1), result);
1564 gimple_set_location (g: div_stmt, location: loc);
1565 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1566
1567 return target;
1568}
1569
1570/* ARG0 and N are the two arguments to a powi builtin in GSI with
1571 location info LOC. If the arguments are appropriate, create an
1572 equivalent sequence of statements prior to GSI using an optimal
1573 number of multiplications, and return an expession holding the
1574 result. */
1575
1576static tree
1577gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1578 tree arg0, HOST_WIDE_INT n)
1579{
1580 if ((n >= -1 && n <= 2)
1581 || (optimize_function_for_speed_p (cfun)
1582 && powi_cost (n) <= POWI_MAX_MULTS))
1583 return powi_as_mults (gsi, loc, arg0, n);
1584
1585 return NULL_TREE;
1586}
1587
1588/* Build a gimple call statement that calls FN with argument ARG.
1589 Set the lhs of the call statement to a fresh SSA name. Insert the
1590 statement prior to GSI's current position, and return the fresh
1591 SSA name. */
1592
1593static tree
1594build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1595 tree fn, tree arg)
1596{
1597 gcall *call_stmt;
1598 tree ssa_target;
1599
1600 call_stmt = gimple_build_call (fn, 1, arg);
1601 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, name: "powroot");
1602 gimple_set_lhs (call_stmt, ssa_target);
1603 gimple_set_location (g: call_stmt, location: loc);
1604 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1605
1606 return ssa_target;
1607}
1608
1609/* Build a gimple binary operation with the given CODE and arguments
1610 ARG0, ARG1, assigning the result to a new SSA name for variable
1611 TARGET. Insert the statement prior to GSI's current position, and
1612 return the fresh SSA name.*/
1613
1614static tree
1615build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1616 const char *name, enum tree_code code,
1617 tree arg0, tree arg1)
1618{
1619 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1620 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1621 gimple_set_location (g: stmt, location: loc);
1622 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1623 return result;
1624}
1625
1626/* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1627 prior to GSI's current position, and return the fresh SSA name. */
1628
1629static tree
1630build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1631 tree type, tree val)
1632{
1633 tree result = make_ssa_name (var: type);
1634 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1635 gimple_set_location (g: stmt, location: loc);
1636 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1637 return result;
1638}
1639
1640struct pow_synth_sqrt_info
1641{
1642 bool *factors;
1643 unsigned int deepest;
1644 unsigned int num_mults;
1645};
1646
1647/* Return true iff the real value C can be represented as a
1648 sum of powers of 0.5 up to N. That is:
1649 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1650 Record in INFO the various parameters of the synthesis algorithm such
1651 as the factors a[i], the maximum 0.5 power and the number of
1652 multiplications that will be required. */
1653
1654bool
1655representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1656 struct pow_synth_sqrt_info *info)
1657{
1658 REAL_VALUE_TYPE factor = dconsthalf;
1659 REAL_VALUE_TYPE remainder = c;
1660
1661 info->deepest = 0;
1662 info->num_mults = 0;
1663 memset (s: info->factors, c: 0, n: n * sizeof (bool));
1664
1665 for (unsigned i = 0; i < n; i++)
1666 {
1667 REAL_VALUE_TYPE res;
1668
1669 /* If something inexact happened bail out now. */
1670 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1671 return false;
1672
1673 /* We have hit zero. The number is representable as a sum
1674 of powers of 0.5. */
1675 if (real_equal (&res, &dconst0))
1676 {
1677 info->factors[i] = true;
1678 info->deepest = i + 1;
1679 return true;
1680 }
1681 else if (!REAL_VALUE_NEGATIVE (res))
1682 {
1683 remainder = res;
1684 info->factors[i] = true;
1685 info->num_mults++;
1686 }
1687 else
1688 info->factors[i] = false;
1689
1690 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1691 }
1692 return false;
1693}
1694
1695/* Return the tree corresponding to FN being applied
1696 to ARG N times at GSI and LOC.
1697 Look up previous results from CACHE if need be.
1698 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1699
1700static tree
1701get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1702 tree fn, location_t loc, tree *cache)
1703{
1704 tree res = cache[n];
1705 if (!res)
1706 {
1707 tree prev = get_fn_chain (arg, n: n - 1, gsi, fn, loc, cache);
1708 res = build_and_insert_call (gsi, loc, fn, arg: prev);
1709 cache[n] = res;
1710 }
1711
1712 return res;
1713}
1714
1715/* Print to STREAM the repeated application of function FNAME to ARG
1716 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1717 "foo (foo (x))". */
1718
1719static void
1720print_nested_fn (FILE* stream, const char *fname, const char* arg,
1721 unsigned int n)
1722{
1723 if (n == 0)
1724 fprintf (stream: stream, format: "%s", arg);
1725 else
1726 {
1727 fprintf (stream: stream, format: "%s (", fname);
1728 print_nested_fn (stream, fname, arg, n: n - 1);
1729 fprintf (stream: stream, format: ")");
1730 }
1731}
1732
1733/* Print to STREAM the fractional sequence of sqrt chains
1734 applied to ARG, described by INFO. Used for the dump file. */
1735
1736static void
1737dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1738 struct pow_synth_sqrt_info *info)
1739{
1740 for (unsigned int i = 0; i < info->deepest; i++)
1741 {
1742 bool is_set = info->factors[i];
1743 if (is_set)
1744 {
1745 print_nested_fn (stream, fname: "sqrt", arg, n: i + 1);
1746 if (i != info->deepest - 1)
1747 fprintf (stream: stream, format: " * ");
1748 }
1749 }
1750}
1751
1752/* Print to STREAM a representation of raising ARG to an integer
1753 power N. Used for the dump file. */
1754
1755static void
1756dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1757{
1758 if (n > 1)
1759 fprintf (stream: stream, format: "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1760 else if (n == 1)
1761 fprintf (stream: stream, format: "%s", arg);
1762}
1763
1764/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1765 square roots. Place at GSI and LOC. Limit the maximum depth
1766 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1767 result of the expanded sequence or NULL_TREE if the expansion failed.
1768
1769 This routine assumes that ARG1 is a real number with a fractional part
1770 (the integer exponent case will have been handled earlier in
1771 gimple_expand_builtin_pow).
1772
1773 For ARG1 > 0.0:
1774 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1775 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1776 FRAC_PART == ARG1 - WHOLE_PART:
1777 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1778 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1779 if it can be expressed as such, that is if FRAC_PART satisfies:
1780 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1781 where integer a[i] is either 0 or 1.
1782
1783 Example:
1784 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1785 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1786
1787 For ARG1 < 0.0 there are two approaches:
1788 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1789 is calculated as above.
1790
1791 Example:
1792 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1793 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1794
1795 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1796 FRAC_PART := ARG1 - WHOLE_PART
1797 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1798 Example:
1799 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1800 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1801
1802 For ARG1 < 0.0 we choose between (A) and (B) depending on
1803 how many multiplications we'd have to do.
1804 So, for the example in (B): POW (x, -5.875), if we were to
1805 follow algorithm (A) we would produce:
1806 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1807 which contains more multiplications than approach (B).
1808
1809 Hopefully, this approach will eliminate potentially expensive POW library
1810 calls when unsafe floating point math is enabled and allow the compiler to
1811 further optimise the multiplies, square roots and divides produced by this
1812 function. */
1813
1814static tree
1815expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1816 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1817{
1818 tree type = TREE_TYPE (arg0);
1819 machine_mode mode = TYPE_MODE (type);
1820 tree sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT);
1821 bool one_over = true;
1822
1823 if (!sqrtfn)
1824 return NULL_TREE;
1825
1826 if (TREE_CODE (arg1) != REAL_CST)
1827 return NULL_TREE;
1828
1829 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1830
1831 gcc_assert (max_depth > 0);
1832 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1833
1834 struct pow_synth_sqrt_info synth_info;
1835 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1836 synth_info.deepest = 0;
1837 synth_info.num_mults = 0;
1838
1839 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1840 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1841
1842 /* The whole and fractional parts of exp. */
1843 REAL_VALUE_TYPE whole_part;
1844 REAL_VALUE_TYPE frac_part;
1845
1846 real_floor (&whole_part, mode, &exp);
1847 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1848
1849
1850 REAL_VALUE_TYPE ceil_whole = dconst0;
1851 REAL_VALUE_TYPE ceil_fract = dconst0;
1852
1853 if (neg_exp)
1854 {
1855 real_ceil (&ceil_whole, mode, &exp);
1856 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1857 }
1858
1859 if (!representable_as_half_series_p (c: frac_part, n: max_depth, info: &synth_info))
1860 return NULL_TREE;
1861
1862 /* Check whether it's more profitable to not use 1.0 / ... */
1863 if (neg_exp)
1864 {
1865 struct pow_synth_sqrt_info alt_synth_info;
1866 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1867 alt_synth_info.deepest = 0;
1868 alt_synth_info.num_mults = 0;
1869
1870 if (representable_as_half_series_p (c: ceil_fract, n: max_depth,
1871 info: &alt_synth_info)
1872 && alt_synth_info.deepest <= synth_info.deepest
1873 && alt_synth_info.num_mults < synth_info.num_mults)
1874 {
1875 whole_part = ceil_whole;
1876 frac_part = ceil_fract;
1877 synth_info.deepest = alt_synth_info.deepest;
1878 synth_info.num_mults = alt_synth_info.num_mults;
1879 memcpy (dest: synth_info.factors, src: alt_synth_info.factors,
1880 n: (max_depth + 1) * sizeof (bool));
1881 one_over = false;
1882 }
1883 }
1884
1885 HOST_WIDE_INT n = real_to_integer (&whole_part);
1886 REAL_VALUE_TYPE cint;
1887 real_from_integer (&cint, VOIDmode, n, SIGNED);
1888
1889 if (!real_identical (&whole_part, &cint))
1890 return NULL_TREE;
1891
1892 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1893 return NULL_TREE;
1894
1895 memset (s: cache, c: 0, n: (max_depth + 1) * sizeof (tree));
1896
1897 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1898
1899 /* Calculate the integer part of the exponent. */
1900 if (n > 1)
1901 {
1902 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1903 if (!integer_res)
1904 return NULL_TREE;
1905 }
1906
1907 if (dump_file)
1908 {
1909 char string[64];
1910
1911 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1912 fprintf (stream: dump_file, format: "synthesizing pow (x, %s) as:\n", string);
1913
1914 if (neg_exp)
1915 {
1916 if (one_over)
1917 {
1918 fprintf (stream: dump_file, format: "1.0 / (");
1919 dump_integer_part (stream: dump_file, arg: "x", n);
1920 if (n > 0)
1921 fprintf (stream: dump_file, format: " * ");
1922 dump_fractional_sqrt_sequence (stream: dump_file, arg: "x", info: &synth_info);
1923 fprintf (stream: dump_file, format: ")");
1924 }
1925 else
1926 {
1927 dump_fractional_sqrt_sequence (stream: dump_file, arg: "x", info: &synth_info);
1928 fprintf (stream: dump_file, format: " / (");
1929 dump_integer_part (stream: dump_file, arg: "x", n);
1930 fprintf (stream: dump_file, format: ")");
1931 }
1932 }
1933 else
1934 {
1935 dump_fractional_sqrt_sequence (stream: dump_file, arg: "x", info: &synth_info);
1936 if (n > 0)
1937 fprintf (stream: dump_file, format: " * ");
1938 dump_integer_part (stream: dump_file, arg: "x", n);
1939 }
1940
1941 fprintf (stream: dump_file, format: "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1942 }
1943
1944
1945 tree fract_res = NULL_TREE;
1946 cache[0] = arg0;
1947
1948 /* Calculate the fractional part of the exponent. */
1949 for (unsigned i = 0; i < synth_info.deepest; i++)
1950 {
1951 if (synth_info.factors[i])
1952 {
1953 tree sqrt_chain = get_fn_chain (arg: arg0, n: i + 1, gsi, fn: sqrtfn, loc, cache);
1954
1955 if (!fract_res)
1956 fract_res = sqrt_chain;
1957
1958 else
1959 fract_res = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
1960 arg0: fract_res, arg1: sqrt_chain);
1961 }
1962 }
1963
1964 tree res = NULL_TREE;
1965
1966 if (neg_exp)
1967 {
1968 if (one_over)
1969 {
1970 if (n > 0)
1971 res = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
1972 arg0: fract_res, arg1: integer_res);
1973 else
1974 res = fract_res;
1975
1976 res = build_and_insert_binop (gsi, loc, name: "powrootrecip", code: RDIV_EXPR,
1977 arg0: build_real (type, dconst1), arg1: res);
1978 }
1979 else
1980 {
1981 res = build_and_insert_binop (gsi, loc, name: "powroot", code: RDIV_EXPR,
1982 arg0: fract_res, arg1: integer_res);
1983 }
1984 }
1985 else
1986 res = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
1987 arg0: fract_res, arg1: integer_res);
1988 return res;
1989}
1990
1991/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1992 with location info LOC. If possible, create an equivalent and
1993 less expensive sequence of statements prior to GSI, and return an
1994 expession holding the result. */
1995
1996static tree
1997gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1998 tree arg0, tree arg1)
1999{
2000 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2001 REAL_VALUE_TYPE c2, dconst3;
2002 HOST_WIDE_INT n;
2003 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2004 machine_mode mode;
2005 bool speed_p = optimize_bb_for_speed_p (gsi_bb (i: *gsi));
2006 bool hw_sqrt_exists, c_is_int, c2_is_int;
2007
2008 dconst1_4 = dconst1;
2009 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2010
2011 /* If the exponent isn't a constant, there's nothing of interest
2012 to be done. */
2013 if (TREE_CODE (arg1) != REAL_CST)
2014 return NULL_TREE;
2015
2016 /* Don't perform the operation if flag_signaling_nans is on
2017 and the operand is a signaling NaN. */
2018 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2019 && ((TREE_CODE (arg0) == REAL_CST
2020 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2021 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2022 return NULL_TREE;
2023
2024 /* If the exponent is equivalent to an integer, expand to an optimal
2025 multiplication sequence when profitable. */
2026 c = TREE_REAL_CST (arg1);
2027 n = real_to_integer (&c);
2028 real_from_integer (&cint, VOIDmode, n, SIGNED);
2029 c_is_int = real_identical (&c, &cint);
2030
2031 if (c_is_int
2032 && ((n >= -1 && n <= 2)
2033 || (flag_unsafe_math_optimizations
2034 && speed_p
2035 && powi_cost (n) <= POWI_MAX_MULTS)))
2036 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2037
2038 /* Attempt various optimizations using sqrt and cbrt. */
2039 type = TREE_TYPE (arg0);
2040 mode = TYPE_MODE (type);
2041 sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT);
2042
2043 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2044 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2045 sqrt(-0) = -0. */
2046 if (sqrtfn
2047 && real_equal (&c, &dconsthalf)
2048 && !HONOR_SIGNED_ZEROS (mode))
2049 return build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0);
2050
2051 hw_sqrt_exists = optab_handler (op: sqrt_optab, mode) != CODE_FOR_nothing;
2052
2053 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2054 optimizations since 1./3. is not exactly representable. If x
2055 is negative and finite, the correct value of pow(x,1./3.) is
2056 a NaN with the "invalid" exception raised, because the value
2057 of 1./3. actually has an even denominator. The correct value
2058 of cbrt(x) is a negative real value. */
2059 cbrtfn = mathfn_built_in (type, fn: BUILT_IN_CBRT);
2060 dconst1_3 = real_value_truncate (mode, dconst_third ());
2061
2062 if (flag_unsafe_math_optimizations
2063 && cbrtfn
2064 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2065 && real_equal (&c, &dconst1_3))
2066 return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0);
2067
2068 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2069 if we don't have a hardware sqrt insn. */
2070 dconst1_6 = dconst1_3;
2071 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2072
2073 if (flag_unsafe_math_optimizations
2074 && sqrtfn
2075 && cbrtfn
2076 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2077 && speed_p
2078 && hw_sqrt_exists
2079 && real_equal (&c, &dconst1_6))
2080 {
2081 /* sqrt(x) */
2082 sqrt_arg0 = build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0);
2083
2084 /* cbrt(sqrt(x)) */
2085 return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: sqrt_arg0);
2086 }
2087
2088
2089 /* Attempt to expand the POW as a product of square root chains.
2090 Expand the 0.25 case even when otpimising for size. */
2091 if (flag_unsafe_math_optimizations
2092 && sqrtfn
2093 && hw_sqrt_exists
2094 && (speed_p || real_equal (&c, &dconst1_4))
2095 && !HONOR_SIGNED_ZEROS (mode))
2096 {
2097 unsigned int max_depth = speed_p
2098 ? param_max_pow_sqrt_depth
2099 : 2;
2100
2101 tree expand_with_sqrts
2102 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2103
2104 if (expand_with_sqrts)
2105 return expand_with_sqrts;
2106 }
2107
2108 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2109 n = real_to_integer (&c2);
2110 real_from_integer (&cint, VOIDmode, n, SIGNED);
2111 c2_is_int = real_identical (&c2, &cint);
2112
2113 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2114
2115 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2116 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2117
2118 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2119 different from pow(x, 1./3.) due to rounding and behavior with
2120 negative x, we need to constrain this transformation to unsafe
2121 math and positive x or finite math. */
2122 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2123 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2124 real_round (&c2, mode, &c2);
2125 n = real_to_integer (&c2);
2126 real_from_integer (&cint, VOIDmode, n, SIGNED);
2127 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2128 real_convert (&c2, mode, &c2);
2129
2130 if (flag_unsafe_math_optimizations
2131 && cbrtfn
2132 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2133 && real_identical (&c2, &c)
2134 && !c2_is_int
2135 && optimize_function_for_speed_p (cfun)
2136 && powi_cost (n: n / 3) <= POWI_MAX_MULTS)
2137 {
2138 tree powi_x_ndiv3 = NULL_TREE;
2139
2140 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2141 possible or profitable, give up. Skip the degenerate case when
2142 abs(n) < 3, where the result is always 1. */
2143 if (absu_hwi (x: n) >= 3)
2144 {
2145 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2146 n: abs_hwi (x: n / 3));
2147 if (!powi_x_ndiv3)
2148 return NULL_TREE;
2149 }
2150
2151 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2152 as that creates an unnecessary variable. Instead, just produce
2153 either cbrt(x) or cbrt(x) * cbrt(x). */
2154 cbrt_x = build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0);
2155
2156 if (absu_hwi (x: n) % 3 == 1)
2157 powi_cbrt_x = cbrt_x;
2158 else
2159 powi_cbrt_x = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
2160 arg0: cbrt_x, arg1: cbrt_x);
2161
2162 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2163 if (absu_hwi (x: n) < 3)
2164 result = powi_cbrt_x;
2165 else
2166 result = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
2167 arg0: powi_x_ndiv3, arg1: powi_cbrt_x);
2168
2169 /* If n is negative, reciprocate the result. */
2170 if (n < 0)
2171 result = build_and_insert_binop (gsi, loc, name: "powroot", code: RDIV_EXPR,
2172 arg0: build_real (type, dconst1), arg1: result);
2173
2174 return result;
2175 }
2176
2177 /* No optimizations succeeded. */
2178 return NULL_TREE;
2179}
2180
2181/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2182 on the SSA_NAME argument of each of them. */
2183
2184namespace {
2185
2186const pass_data pass_data_cse_sincos =
2187{
2188 .type: GIMPLE_PASS, /* type */
2189 .name: "sincos", /* name */
2190 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
2191 .tv_id: TV_TREE_SINCOS, /* tv_id */
2192 PROP_ssa, /* properties_required */
2193 .properties_provided: 0, /* properties_provided */
2194 .properties_destroyed: 0, /* properties_destroyed */
2195 .todo_flags_start: 0, /* todo_flags_start */
2196 TODO_update_ssa, /* todo_flags_finish */
2197};
2198
2199class pass_cse_sincos : public gimple_opt_pass
2200{
2201public:
2202 pass_cse_sincos (gcc::context *ctxt)
2203 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2204 {}
2205
2206 /* opt_pass methods: */
2207 bool gate (function *) final override
2208 {
2209 return optimize;
2210 }
2211
2212 unsigned int execute (function *) final override;
2213
2214}; // class pass_cse_sincos
2215
2216unsigned int
2217pass_cse_sincos::execute (function *fun)
2218{
2219 basic_block bb;
2220 bool cfg_changed = false;
2221
2222 calculate_dominance_info (CDI_DOMINATORS);
2223 memset (s: &sincos_stats, c: 0, n: sizeof (sincos_stats));
2224
2225 FOR_EACH_BB_FN (bb, fun)
2226 {
2227 gimple_stmt_iterator gsi;
2228
2229 for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2230 {
2231 gimple *stmt = gsi_stmt (i: gsi);
2232
2233 if (is_gimple_call (gs: stmt)
2234 && gimple_call_lhs (gs: stmt))
2235 {
2236 tree arg;
2237 switch (gimple_call_combined_fn (stmt))
2238 {
2239 CASE_CFN_COS:
2240 CASE_CFN_SIN:
2241 CASE_CFN_CEXPI:
2242 arg = gimple_call_arg (gs: stmt, index: 0);
2243 /* Make sure we have either sincos or cexp. */
2244 if (!targetm.libc_has_function (function_c99_math_complex,
2245 TREE_TYPE (arg))
2246 && !targetm.libc_has_function (function_sincos,
2247 TREE_TYPE (arg)))
2248 break;
2249
2250 if (TREE_CODE (arg) == SSA_NAME)
2251 cfg_changed |= execute_cse_sincos_1 (name: arg);
2252 break;
2253 default:
2254 break;
2255 }
2256 }
2257 }
2258 }
2259
2260 statistics_counter_event (fun, "sincos statements inserted",
2261 sincos_stats.inserted);
2262 statistics_counter_event (fun, "conv statements removed",
2263 sincos_stats.conv_removed);
2264
2265 return cfg_changed ? TODO_cleanup_cfg : 0;
2266}
2267
2268} // anon namespace
2269
2270gimple_opt_pass *
2271make_pass_cse_sincos (gcc::context *ctxt)
2272{
2273 return new pass_cse_sincos (ctxt);
2274}
2275
2276/* Expand powi(x,n) into an optimal number of multiplies, when n is a
2277 constant. */
2278namespace {
2279
2280const pass_data pass_data_expand_pow =
2281{
2282 .type: GIMPLE_PASS, /* type */
2283 .name: "pow", /* name */
2284 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
2285 .tv_id: TV_TREE_POW, /* tv_id */
2286 PROP_ssa, /* properties_required */
2287 PROP_gimple_opt_math, /* properties_provided */
2288 .properties_destroyed: 0, /* properties_destroyed */
2289 .todo_flags_start: 0, /* todo_flags_start */
2290 TODO_update_ssa, /* todo_flags_finish */
2291};
2292
2293class pass_expand_pow : public gimple_opt_pass
2294{
2295public:
2296 pass_expand_pow (gcc::context *ctxt)
2297 : gimple_opt_pass (pass_data_expand_pow, ctxt)
2298 {}
2299
2300 /* opt_pass methods: */
2301 bool gate (function *) final override
2302 {
2303 return optimize;
2304 }
2305
2306 unsigned int execute (function *) final override;
2307
2308}; // class pass_expand_pow
2309
2310unsigned int
2311pass_expand_pow::execute (function *fun)
2312{
2313 basic_block bb;
2314 bool cfg_changed = false;
2315
2316 calculate_dominance_info (CDI_DOMINATORS);
2317
2318 FOR_EACH_BB_FN (bb, fun)
2319 {
2320 gimple_stmt_iterator gsi;
2321 bool cleanup_eh = false;
2322
2323 for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2324 {
2325 gimple *stmt = gsi_stmt (i: gsi);
2326
2327 /* Only the last stmt in a bb could throw, no need to call
2328 gimple_purge_dead_eh_edges if we change something in the middle
2329 of a basic block. */
2330 cleanup_eh = false;
2331
2332 if (is_gimple_call (gs: stmt)
2333 && gimple_call_lhs (gs: stmt))
2334 {
2335 tree arg0, arg1, result;
2336 HOST_WIDE_INT n;
2337 location_t loc;
2338
2339 switch (gimple_call_combined_fn (stmt))
2340 {
2341 CASE_CFN_POW:
2342 arg0 = gimple_call_arg (gs: stmt, index: 0);
2343 arg1 = gimple_call_arg (gs: stmt, index: 1);
2344
2345 loc = gimple_location (g: stmt);
2346 result = gimple_expand_builtin_pow (gsi: &gsi, loc, arg0, arg1);
2347
2348 if (result)
2349 {
2350 tree lhs = gimple_get_lhs (stmt);
2351 gassign *new_stmt = gimple_build_assign (lhs, result);
2352 gimple_set_location (g: new_stmt, location: loc);
2353 unlink_stmt_vdef (stmt);
2354 gsi_replace (&gsi, new_stmt, true);
2355 cleanup_eh = true;
2356 if (gimple_vdef (g: stmt))
2357 release_ssa_name (name: gimple_vdef (g: stmt));
2358 }
2359 break;
2360
2361 CASE_CFN_POWI:
2362 arg0 = gimple_call_arg (gs: stmt, index: 0);
2363 arg1 = gimple_call_arg (gs: stmt, index: 1);
2364 loc = gimple_location (g: stmt);
2365
2366 if (real_minus_onep (arg0))
2367 {
2368 tree t0, t1, cond, one, minus_one;
2369 gassign *stmt;
2370
2371 t0 = TREE_TYPE (arg0);
2372 t1 = TREE_TYPE (arg1);
2373 one = build_real (t0, dconst1);
2374 minus_one = build_real (t0, dconstm1);
2375
2376 cond = make_temp_ssa_name (type: t1, NULL, name: "powi_cond");
2377 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2378 arg1, build_int_cst (t1, 1));
2379 gimple_set_location (g: stmt, location: loc);
2380 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2381
2382 result = make_temp_ssa_name (type: t0, NULL, name: "powi");
2383 stmt = gimple_build_assign (result, COND_EXPR, cond,
2384 minus_one, one);
2385 gimple_set_location (g: stmt, location: loc);
2386 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2387 }
2388 else
2389 {
2390 if (!tree_fits_shwi_p (arg1))
2391 break;
2392
2393 n = tree_to_shwi (arg1);
2394 result = gimple_expand_builtin_powi (gsi: &gsi, loc, arg0, n);
2395 }
2396
2397 if (result)
2398 {
2399 tree lhs = gimple_get_lhs (stmt);
2400 gassign *new_stmt = gimple_build_assign (lhs, result);
2401 gimple_set_location (g: new_stmt, location: loc);
2402 unlink_stmt_vdef (stmt);
2403 gsi_replace (&gsi, new_stmt, true);
2404 cleanup_eh = true;
2405 if (gimple_vdef (g: stmt))
2406 release_ssa_name (name: gimple_vdef (g: stmt));
2407 }
2408 break;
2409
2410 default:;
2411 }
2412 }
2413 }
2414 if (cleanup_eh)
2415 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2416 }
2417
2418 return cfg_changed ? TODO_cleanup_cfg : 0;
2419}
2420
2421} // anon namespace
2422
2423gimple_opt_pass *
2424make_pass_expand_pow (gcc::context *ctxt)
2425{
2426 return new pass_expand_pow (ctxt);
2427}
2428
2429/* Return true if stmt is a type conversion operation that can be stripped
2430 when used in a widening multiply operation. */
2431static bool
2432widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2433{
2434 enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt);
2435
2436 if (TREE_CODE (result_type) == INTEGER_TYPE)
2437 {
2438 tree op_type;
2439 tree inner_op_type;
2440
2441 if (!CONVERT_EXPR_CODE_P (rhs_code))
2442 return false;
2443
2444 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2445
2446 /* If the type of OP has the same precision as the result, then
2447 we can strip this conversion. The multiply operation will be
2448 selected to create the correct extension as a by-product. */
2449 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2450 return true;
2451
2452 /* We can also strip a conversion if it preserves the signed-ness of
2453 the operation and doesn't narrow the range. */
2454 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2455
2456 /* If the inner-most type is unsigned, then we can strip any
2457 intermediate widening operation. If it's signed, then the
2458 intermediate widening operation must also be signed. */
2459 if ((TYPE_UNSIGNED (inner_op_type)
2460 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2461 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2462 return true;
2463
2464 return false;
2465 }
2466
2467 return rhs_code == FIXED_CONVERT_EXPR;
2468}
2469
2470/* Return true if RHS is a suitable operand for a widening multiplication,
2471 assuming a target type of TYPE.
2472 There are two cases:
2473
2474 - RHS makes some value at least twice as wide. Store that value
2475 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2476
2477 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2478 but leave *TYPE_OUT untouched. */
2479
2480static bool
2481is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2482 tree *new_rhs_out)
2483{
2484 gimple *stmt;
2485 tree type1, rhs1;
2486
2487 if (TREE_CODE (rhs) == SSA_NAME)
2488 {
2489 /* Use tree_non_zero_bits to see if this operand is zero_extended
2490 for unsigned widening multiplications or non-negative for
2491 signed widening multiplications. */
2492 if (TREE_CODE (type) == INTEGER_TYPE
2493 && (TYPE_PRECISION (type) & 1) == 0
2494 && int_mode_for_size (TYPE_PRECISION (type) / 2, limit: 1).exists ())
2495 {
2496 unsigned int prec = TYPE_PRECISION (type);
2497 unsigned int hprec = prec / 2;
2498 wide_int bits = wide_int::from (x: tree_nonzero_bits (rhs), precision: prec,
2499 TYPE_SIGN (TREE_TYPE (rhs)));
2500 if (TYPE_UNSIGNED (type)
2501 && wi::bit_and (x: bits, y: wi::mask (width: hprec, negate_p: true, precision: prec)) == 0)
2502 {
2503 *type_out = build_nonstandard_integer_type (hprec, true);
2504 /* X & MODE_MASK can be simplified to (T)X. */
2505 stmt = SSA_NAME_DEF_STMT (rhs);
2506 if (is_gimple_assign (gs: stmt)
2507 && gimple_assign_rhs_code (gs: stmt) == BIT_AND_EXPR
2508 && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
2509 && wide_int::from (x: wi::to_wide (t: gimple_assign_rhs2 (gs: stmt)),
2510 precision: prec, TYPE_SIGN (TREE_TYPE (rhs)))
2511 == wi::mask (width: hprec, negate_p: false, precision: prec))
2512 *new_rhs_out = gimple_assign_rhs1 (gs: stmt);
2513 else
2514 *new_rhs_out = rhs;
2515 return true;
2516 }
2517 else if (!TYPE_UNSIGNED (type)
2518 && wi::bit_and (x: bits, y: wi::mask (width: hprec - 1, negate_p: true, precision: prec)) == 0)
2519 {
2520 *type_out = build_nonstandard_integer_type (hprec, false);
2521 *new_rhs_out = rhs;
2522 return true;
2523 }
2524 }
2525
2526 stmt = SSA_NAME_DEF_STMT (rhs);
2527 if (is_gimple_assign (gs: stmt))
2528 {
2529
2530 if (widening_mult_conversion_strippable_p (result_type: type, stmt))
2531 {
2532 rhs1 = gimple_assign_rhs1 (gs: stmt);
2533
2534 if (TREE_CODE (rhs1) == INTEGER_CST)
2535 {
2536 *new_rhs_out = rhs1;
2537 *type_out = NULL;
2538 return true;
2539 }
2540 }
2541 else
2542 rhs1 = rhs;
2543 }
2544 else
2545 rhs1 = rhs;
2546
2547 type1 = TREE_TYPE (rhs1);
2548
2549 if (TREE_CODE (type1) != TREE_CODE (type)
2550 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2551 return false;
2552
2553 *new_rhs_out = rhs1;
2554 *type_out = type1;
2555 return true;
2556 }
2557
2558 if (TREE_CODE (rhs) == INTEGER_CST)
2559 {
2560 *new_rhs_out = rhs;
2561 *type_out = NULL;
2562 return true;
2563 }
2564
2565 return false;
2566}
2567
2568/* Return true if STMT performs a widening multiplication, assuming the
2569 output type is TYPE. If so, store the unwidened types of the operands
2570 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2571 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2572 and *TYPE2_OUT would give the operands of the multiplication. */
2573
2574static bool
2575is_widening_mult_p (gimple *stmt,
2576 tree *type1_out, tree *rhs1_out,
2577 tree *type2_out, tree *rhs2_out)
2578{
2579 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2580
2581 if (TREE_CODE (type) == INTEGER_TYPE)
2582 {
2583 if (TYPE_OVERFLOW_TRAPS (type))
2584 return false;
2585 }
2586 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2587 return false;
2588
2589 if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs1 (gs: stmt), type_out: type1_out,
2590 new_rhs_out: rhs1_out))
2591 return false;
2592
2593 if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs2 (gs: stmt), type_out: type2_out,
2594 new_rhs_out: rhs2_out))
2595 return false;
2596
2597 if (*type1_out == NULL)
2598 {
2599 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2600 return false;
2601 *type1_out = *type2_out;
2602 }
2603
2604 if (*type2_out == NULL)
2605 {
2606 if (!int_fits_type_p (*rhs2_out, *type1_out))
2607 return false;
2608 *type2_out = *type1_out;
2609 }
2610
2611 /* Ensure that the larger of the two operands comes first. */
2612 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2613 {
2614 std::swap (a&: *type1_out, b&: *type2_out);
2615 std::swap (a&: *rhs1_out, b&: *rhs2_out);
2616 }
2617
2618 return true;
2619}
2620
2621/* Check to see if the CALL statement is an invocation of copysign
2622 with 1. being the first argument. */
2623static bool
2624is_copysign_call_with_1 (gimple *call)
2625{
2626 gcall *c = dyn_cast <gcall *> (p: call);
2627 if (! c)
2628 return false;
2629
2630 enum combined_fn code = gimple_call_combined_fn (c);
2631
2632 if (code == CFN_LAST)
2633 return false;
2634
2635 if (builtin_fn_p (code))
2636 {
2637 switch (as_builtin_fn (code))
2638 {
2639 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2640 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2641 return real_onep (gimple_call_arg (gs: c, index: 0));
2642 default:
2643 return false;
2644 }
2645 }
2646
2647 if (internal_fn_p (code))
2648 {
2649 switch (as_internal_fn (code))
2650 {
2651 case IFN_COPYSIGN:
2652 return real_onep (gimple_call_arg (gs: c, index: 0));
2653 default:
2654 return false;
2655 }
2656 }
2657
2658 return false;
2659}
2660
2661/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2662 This only happens when the xorsign optab is defined, if the
2663 pattern is not a xorsign pattern or if expansion fails FALSE is
2664 returned, otherwise TRUE is returned. */
2665static bool
2666convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2667{
2668 tree treeop0, treeop1, lhs, type;
2669 location_t loc = gimple_location (g: stmt);
2670 lhs = gimple_assign_lhs (gs: stmt);
2671 treeop0 = gimple_assign_rhs1 (gs: stmt);
2672 treeop1 = gimple_assign_rhs2 (gs: stmt);
2673 type = TREE_TYPE (lhs);
2674 machine_mode mode = TYPE_MODE (type);
2675
2676 if (HONOR_SNANS (type))
2677 return false;
2678
2679 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2680 {
2681 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2682 if (!has_single_use (var: treeop0) || !is_copysign_call_with_1 (call: call0))
2683 {
2684 call0 = SSA_NAME_DEF_STMT (treeop1);
2685 if (!has_single_use (var: treeop1) || !is_copysign_call_with_1 (call: call0))
2686 return false;
2687
2688 treeop1 = treeop0;
2689 }
2690 if (optab_handler (op: xorsign_optab, mode) == CODE_FOR_nothing)
2691 return false;
2692
2693 gcall *c = as_a<gcall*> (p: call0);
2694 treeop0 = gimple_call_arg (gs: c, index: 1);
2695
2696 gcall *call_stmt
2697 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2698 gimple_set_lhs (call_stmt, lhs);
2699 gimple_set_location (g: call_stmt, location: loc);
2700 gsi_replace (gsi, call_stmt, true);
2701 return true;
2702 }
2703
2704 return false;
2705}
2706
2707/* Process a single gimple statement STMT, which has a MULT_EXPR as
2708 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2709 value is true iff we converted the statement. */
2710
2711static bool
2712convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2713{
2714 tree lhs, rhs1, rhs2, type, type1, type2;
2715 enum insn_code handler;
2716 scalar_int_mode to_mode, from_mode, actual_mode;
2717 optab op;
2718 int actual_precision;
2719 location_t loc = gimple_location (g: stmt);
2720 bool from_unsigned1, from_unsigned2;
2721
2722 lhs = gimple_assign_lhs (gs: stmt);
2723 type = TREE_TYPE (lhs);
2724 if (TREE_CODE (type) != INTEGER_TYPE)
2725 return false;
2726
2727 if (!is_widening_mult_p (stmt, type1_out: &type1, rhs1_out: &rhs1, type2_out: &type2, rhs2_out: &rhs2))
2728 return false;
2729
2730 /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2731 avoid the tranform. */
2732 if ((TREE_CODE (rhs1) == SSA_NAME
2733 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2734 || (TREE_CODE (rhs2) == SSA_NAME
2735 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2736 return false;
2737
2738 to_mode = SCALAR_INT_TYPE_MODE (type);
2739 from_mode = SCALAR_INT_TYPE_MODE (type1);
2740 if (to_mode == from_mode)
2741 return false;
2742
2743 from_unsigned1 = TYPE_UNSIGNED (type1);
2744 from_unsigned2 = TYPE_UNSIGNED (type2);
2745
2746 if (from_unsigned1 && from_unsigned2)
2747 op = umul_widen_optab;
2748 else if (!from_unsigned1 && !from_unsigned2)
2749 op = smul_widen_optab;
2750 else
2751 op = usmul_widen_optab;
2752
2753 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2754 found_mode: &actual_mode);
2755
2756 if (handler == CODE_FOR_nothing)
2757 {
2758 if (op != smul_widen_optab)
2759 {
2760 /* We can use a signed multiply with unsigned types as long as
2761 there is a wider mode to use, or it is the smaller of the two
2762 types that is unsigned. Note that type1 >= type2, always. */
2763 if ((TYPE_UNSIGNED (type1)
2764 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode))
2765 || (TYPE_UNSIGNED (type2)
2766 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode)))
2767 {
2768 if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode)
2769 || GET_MODE_SIZE (mode: to_mode) <= GET_MODE_SIZE (mode: from_mode))
2770 return false;
2771 }
2772
2773 op = smul_widen_optab;
2774 handler = find_widening_optab_handler_and_mode (op, to_mode,
2775 from_mode,
2776 found_mode: &actual_mode);
2777
2778 if (handler == CODE_FOR_nothing)
2779 return false;
2780
2781 from_unsigned1 = from_unsigned2 = false;
2782 }
2783 else
2784 {
2785 /* Expand can synthesize smul_widen_optab if the target
2786 supports umul_widen_optab. */
2787 op = umul_widen_optab;
2788 handler = find_widening_optab_handler_and_mode (op, to_mode,
2789 from_mode,
2790 found_mode: &actual_mode);
2791 if (handler == CODE_FOR_nothing)
2792 return false;
2793 }
2794 }
2795
2796 /* Ensure that the inputs to the handler are in the correct precison
2797 for the opcode. This will be the full mode size. */
2798 actual_precision = GET_MODE_PRECISION (mode: actual_mode);
2799 if (2 * actual_precision > TYPE_PRECISION (type))
2800 return false;
2801 if (actual_precision != TYPE_PRECISION (type1)
2802 || from_unsigned1 != TYPE_UNSIGNED (type1))
2803 {
2804 if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2805 {
2806 if (TREE_CODE (rhs1) == INTEGER_CST)
2807 rhs1 = fold_convert (type1, rhs1);
2808 else
2809 rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: rhs1);
2810 }
2811 type1 = build_nonstandard_integer_type (actual_precision,
2812 from_unsigned1);
2813 }
2814 if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2815 {
2816 if (TREE_CODE (rhs1) == INTEGER_CST)
2817 rhs1 = fold_convert (type1, rhs1);
2818 else
2819 rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: rhs1);
2820 }
2821 if (actual_precision != TYPE_PRECISION (type2)
2822 || from_unsigned2 != TYPE_UNSIGNED (type2))
2823 {
2824 if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2825 {
2826 if (TREE_CODE (rhs2) == INTEGER_CST)
2827 rhs2 = fold_convert (type2, rhs2);
2828 else
2829 rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: rhs2);
2830 }
2831 type2 = build_nonstandard_integer_type (actual_precision,
2832 from_unsigned2);
2833 }
2834 if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2835 {
2836 if (TREE_CODE (rhs2) == INTEGER_CST)
2837 rhs2 = fold_convert (type2, rhs2);
2838 else
2839 rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: rhs2);
2840 }
2841
2842 gimple_assign_set_rhs1 (gs: stmt, rhs: rhs1);
2843 gimple_assign_set_rhs2 (gs: stmt, rhs: rhs2);
2844 gimple_assign_set_rhs_code (s: stmt, code: WIDEN_MULT_EXPR);
2845 update_stmt (s: stmt);
2846 widen_mul_stats.widen_mults_inserted++;
2847 return true;
2848}
2849
2850/* Process a single gimple statement STMT, which is found at the
2851 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2852 rhs (given by CODE), and try to convert it into a
2853 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2854 is true iff we converted the statement. */
2855
2856static bool
2857convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2858 enum tree_code code)
2859{
2860 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2861 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2862 tree type, type1, type2, optype;
2863 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2864 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2865 optab this_optab;
2866 enum tree_code wmult_code;
2867 enum insn_code handler;
2868 scalar_mode to_mode, from_mode, actual_mode;
2869 location_t loc = gimple_location (g: stmt);
2870 int actual_precision;
2871 bool from_unsigned1, from_unsigned2;
2872
2873 lhs = gimple_assign_lhs (gs: stmt);
2874 type = TREE_TYPE (lhs);
2875 if ((TREE_CODE (type) != INTEGER_TYPE
2876 && TREE_CODE (type) != FIXED_POINT_TYPE)
2877 || !type_has_mode_precision_p (t: type))
2878 return false;
2879
2880 if (code == MINUS_EXPR)
2881 wmult_code = WIDEN_MULT_MINUS_EXPR;
2882 else
2883 wmult_code = WIDEN_MULT_PLUS_EXPR;
2884
2885 rhs1 = gimple_assign_rhs1 (gs: stmt);
2886 rhs2 = gimple_assign_rhs2 (gs: stmt);
2887
2888 if (TREE_CODE (rhs1) == SSA_NAME)
2889 {
2890 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2891 if (is_gimple_assign (gs: rhs1_stmt))
2892 rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt);
2893 }
2894
2895 if (TREE_CODE (rhs2) == SSA_NAME)
2896 {
2897 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2898 if (is_gimple_assign (gs: rhs2_stmt))
2899 rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt);
2900 }
2901
2902 /* Allow for one conversion statement between the multiply
2903 and addition/subtraction statement. If there are more than
2904 one conversions then we assume they would invalidate this
2905 transformation. If that's not the case then they should have
2906 been folded before now. */
2907 if (CONVERT_EXPR_CODE_P (rhs1_code))
2908 {
2909 conv1_stmt = rhs1_stmt;
2910 rhs1 = gimple_assign_rhs1 (gs: rhs1_stmt);
2911 if (TREE_CODE (rhs1) == SSA_NAME)
2912 {
2913 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2914 if (is_gimple_assign (gs: rhs1_stmt))
2915 rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt);
2916 }
2917 else
2918 return false;
2919 }
2920 if (CONVERT_EXPR_CODE_P (rhs2_code))
2921 {
2922 conv2_stmt = rhs2_stmt;
2923 rhs2 = gimple_assign_rhs1 (gs: rhs2_stmt);
2924 if (TREE_CODE (rhs2) == SSA_NAME)
2925 {
2926 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2927 if (is_gimple_assign (gs: rhs2_stmt))
2928 rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt);
2929 }
2930 else
2931 return false;
2932 }
2933
2934 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2935 is_widening_mult_p, but we still need the rhs returns.
2936
2937 It might also appear that it would be sufficient to use the existing
2938 operands of the widening multiply, but that would limit the choice of
2939 multiply-and-accumulate instructions.
2940
2941 If the widened-multiplication result has more than one uses, it is
2942 probably wiser not to do the conversion. Also restrict this operation
2943 to single basic block to avoid moving the multiply to a different block
2944 with a higher execution frequency. */
2945 if (code == PLUS_EXPR
2946 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2947 {
2948 if (!has_single_use (var: rhs1)
2949 || gimple_bb (g: rhs1_stmt) != gimple_bb (g: stmt)
2950 || !is_widening_mult_p (stmt: rhs1_stmt, type1_out: &type1, rhs1_out: &mult_rhs1,
2951 type2_out: &type2, rhs2_out: &mult_rhs2))
2952 return false;
2953 add_rhs = rhs2;
2954 conv_stmt = conv1_stmt;
2955 }
2956 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2957 {
2958 if (!has_single_use (var: rhs2)
2959 || gimple_bb (g: rhs2_stmt) != gimple_bb (g: stmt)
2960 || !is_widening_mult_p (stmt: rhs2_stmt, type1_out: &type1, rhs1_out: &mult_rhs1,
2961 type2_out: &type2, rhs2_out: &mult_rhs2))
2962 return false;
2963 add_rhs = rhs1;
2964 conv_stmt = conv2_stmt;
2965 }
2966 else
2967 return false;
2968
2969 to_mode = SCALAR_TYPE_MODE (type);
2970 from_mode = SCALAR_TYPE_MODE (type1);
2971 if (to_mode == from_mode)
2972 return false;
2973
2974 from_unsigned1 = TYPE_UNSIGNED (type1);
2975 from_unsigned2 = TYPE_UNSIGNED (type2);
2976 optype = type1;
2977
2978 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2979 if (from_unsigned1 != from_unsigned2)
2980 {
2981 if (!INTEGRAL_TYPE_P (type))
2982 return false;
2983 /* We can use a signed multiply with unsigned types as long as
2984 there is a wider mode to use, or it is the smaller of the two
2985 types that is unsigned. Note that type1 >= type2, always. */
2986 if ((from_unsigned1
2987 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode))
2988 || (from_unsigned2
2989 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode)))
2990 {
2991 if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode)
2992 || GET_MODE_SIZE (mode: from_mode) >= GET_MODE_SIZE (mode: to_mode))
2993 return false;
2994 }
2995
2996 from_unsigned1 = from_unsigned2 = false;
2997 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (mode: from_mode),
2998 false);
2999 }
3000
3001 /* If there was a conversion between the multiply and addition
3002 then we need to make sure it fits a multiply-and-accumulate.
3003 The should be a single mode change which does not change the
3004 value. */
3005 if (conv_stmt)
3006 {
3007 /* We use the original, unmodified data types for this. */
3008 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3009 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3010 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3011 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3012
3013 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3014 {
3015 /* Conversion is a truncate. */
3016 if (TYPE_PRECISION (to_type) < data_size)
3017 return false;
3018 }
3019 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3020 {
3021 /* Conversion is an extend. Check it's the right sort. */
3022 if (TYPE_UNSIGNED (from_type) != is_unsigned
3023 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3024 return false;
3025 }
3026 /* else convert is a no-op for our purposes. */
3027 }
3028
3029 /* Verify that the machine can perform a widening multiply
3030 accumulate in this mode/signedness combination, otherwise
3031 this transformation is likely to pessimize code. */
3032 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3033 handler = find_widening_optab_handler_and_mode (op: this_optab, to_mode,
3034 from_mode, found_mode: &actual_mode);
3035
3036 if (handler == CODE_FOR_nothing)
3037 return false;
3038
3039 /* Ensure that the inputs to the handler are in the correct precison
3040 for the opcode. This will be the full mode size. */
3041 actual_precision = GET_MODE_PRECISION (mode: actual_mode);
3042 if (actual_precision != TYPE_PRECISION (type1)
3043 || from_unsigned1 != TYPE_UNSIGNED (type1))
3044 {
3045 if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3046 {
3047 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3048 mult_rhs1 = fold_convert (type1, mult_rhs1);
3049 else
3050 mult_rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: mult_rhs1);
3051 }
3052 type1 = build_nonstandard_integer_type (actual_precision,
3053 from_unsigned1);
3054 }
3055 if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3056 {
3057 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3058 mult_rhs1 = fold_convert (type1, mult_rhs1);
3059 else
3060 mult_rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: mult_rhs1);
3061 }
3062 if (actual_precision != TYPE_PRECISION (type2)
3063 || from_unsigned2 != TYPE_UNSIGNED (type2))
3064 {
3065 if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3066 {
3067 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3068 mult_rhs2 = fold_convert (type2, mult_rhs2);
3069 else
3070 mult_rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: mult_rhs2);
3071 }
3072 type2 = build_nonstandard_integer_type (actual_precision,
3073 from_unsigned2);
3074 }
3075 if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3076 {
3077 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3078 mult_rhs2 = fold_convert (type2, mult_rhs2);
3079 else
3080 mult_rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: mult_rhs2);
3081 }
3082
3083 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3084 add_rhs = build_and_insert_cast (gsi, loc, type, val: add_rhs);
3085
3086 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3087 add_rhs);
3088 update_stmt (s: gsi_stmt (i: *gsi));
3089 widen_mul_stats.maccs_inserted++;
3090 return true;
3091}
3092
3093/* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3094 OP2 and which we know is used in statements that can be, together with the
3095 multiplication, converted to FMAs, perform the transformation. */
3096
3097static void
3098convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3099{
3100 tree type = TREE_TYPE (mul_result);
3101 gimple *use_stmt;
3102 imm_use_iterator imm_iter;
3103 gcall *fma_stmt;
3104
3105 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3106 {
3107 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3108 tree addop, mulop1 = op1, result = mul_result;
3109 bool negate_p = false;
3110 gimple_seq seq = NULL;
3111
3112 if (is_gimple_debug (gs: use_stmt))
3113 continue;
3114
3115 if (is_gimple_assign (gs: use_stmt)
3116 && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR)
3117 {
3118 result = gimple_assign_lhs (gs: use_stmt);
3119 use_operand_p use_p;
3120 gimple *neguse_stmt;
3121 single_imm_use (var: gimple_assign_lhs (gs: use_stmt), use_p: &use_p, stmt: &neguse_stmt);
3122 gsi_remove (&gsi, true);
3123 release_defs (use_stmt);
3124
3125 use_stmt = neguse_stmt;
3126 gsi = gsi_for_stmt (use_stmt);
3127 negate_p = true;
3128 }
3129
3130 tree cond, else_value, ops[3], len, bias;
3131 tree_code code;
3132 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3133 ops, &else_value,
3134 &len, &bias))
3135 gcc_unreachable ();
3136 addop = ops[0] == result ? ops[1] : ops[0];
3137
3138 if (code == MINUS_EXPR)
3139 {
3140 if (ops[0] == result)
3141 /* a * b - c -> a * b + (-c) */
3142 addop = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: addop);
3143 else
3144 /* a - b * c -> (-b) * c + a */
3145 negate_p = !negate_p;
3146 }
3147
3148 if (negate_p)
3149 mulop1 = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: mulop1);
3150
3151 if (seq)
3152 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3153
3154 if (len)
3155 fma_stmt
3156 = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3157 addop, else_value, len, bias);
3158 else if (cond)
3159 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3160 op2, addop, else_value);
3161 else
3162 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3163 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3164 gimple_call_set_nothrow (s: fma_stmt, nothrow_p: !stmt_can_throw_internal (cfun,
3165 use_stmt));
3166 gsi_replace (&gsi, fma_stmt, true);
3167 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3168 regardless of where the negation occurs. */
3169 gimple *orig_stmt = gsi_stmt (i: gsi);
3170 if (fold_stmt (&gsi, follow_all_ssa_edges))
3171 {
3172 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (i: gsi)))
3173 gcc_unreachable ();
3174 update_stmt (s: gsi_stmt (i: gsi));
3175 }
3176
3177 if (dump_file && (dump_flags & TDF_DETAILS))
3178 {
3179 fprintf (stream: dump_file, format: "Generated FMA ");
3180 print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE);
3181 fprintf (stream: dump_file, format: "\n");
3182 }
3183
3184 /* If the FMA result is negated in a single use, fold the negation
3185 too. */
3186 orig_stmt = gsi_stmt (i: gsi);
3187 use_operand_p use_p;
3188 gimple *neg_stmt;
3189 if (is_gimple_call (gs: orig_stmt)
3190 && gimple_call_internal_p (gs: orig_stmt)
3191 && gimple_call_lhs (gs: orig_stmt)
3192 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3193 && single_imm_use (var: gimple_call_lhs (gs: orig_stmt), use_p: &use_p, stmt: &neg_stmt)
3194 && is_gimple_assign (gs: neg_stmt)
3195 && gimple_assign_rhs_code (gs: neg_stmt) == NEGATE_EXPR
3196 && !stmt_could_throw_p (cfun, neg_stmt))
3197 {
3198 gsi = gsi_for_stmt (neg_stmt);
3199 if (fold_stmt (&gsi, follow_all_ssa_edges))
3200 {
3201 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (i: gsi)))
3202 gcc_unreachable ();
3203 update_stmt (s: gsi_stmt (i: gsi));
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3205 {
3206 fprintf (stream: dump_file, format: "Folded FMA negation ");
3207 print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE);
3208 fprintf (stream: dump_file, format: "\n");
3209 }
3210 }
3211 }
3212
3213 widen_mul_stats.fmas_inserted++;
3214 }
3215}
3216
3217/* Data necessary to perform the actual transformation from a multiplication
3218 and an addition to an FMA after decision is taken it should be done and to
3219 then delete the multiplication statement from the function IL. */
3220
3221struct fma_transformation_info
3222{
3223 gimple *mul_stmt;
3224 tree mul_result;
3225 tree op1;
3226 tree op2;
3227};
3228
3229/* Structure containing the current state of FMA deferring, i.e. whether we are
3230 deferring, whether to continue deferring, and all data necessary to come
3231 back and perform all deferred transformations. */
3232
3233class fma_deferring_state
3234{
3235public:
3236 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3237 do any deferring. */
3238
3239 fma_deferring_state (bool perform_deferring)
3240 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3241 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3242
3243 /* List of FMA candidates for which we the transformation has been determined
3244 possible but we at this point in BB analysis we do not consider them
3245 beneficial. */
3246 auto_vec<fma_transformation_info, 8> m_candidates;
3247
3248 /* Set of results of multiplication that are part of an already deferred FMA
3249 candidates. */
3250 hash_set<tree> m_mul_result_set;
3251
3252 /* The PHI that supposedly feeds back result of a FMA to another over loop
3253 boundary. */
3254 gphi *m_initial_phi;
3255
3256 /* Result of the last produced FMA candidate or NULL if there has not been
3257 one. */
3258 tree m_last_result;
3259
3260 /* If true, deferring might still be profitable. If false, transform all
3261 candidates and no longer defer. */
3262 bool m_deferring_p;
3263};
3264
3265/* Transform all deferred FMA candidates and mark STATE as no longer
3266 deferring. */
3267
3268static void
3269cancel_fma_deferring (fma_deferring_state *state)
3270{
3271 if (!state->m_deferring_p)
3272 return;
3273
3274 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3275 {
3276 if (dump_file && (dump_flags & TDF_DETAILS))
3277 fprintf (stream: dump_file, format: "Generating deferred FMA\n");
3278
3279 const fma_transformation_info &fti = state->m_candidates[i];
3280 convert_mult_to_fma_1 (mul_result: fti.mul_result, op1: fti.op1, op2: fti.op2);
3281
3282 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3283 gsi_remove (&gsi, true);
3284 release_defs (fti.mul_stmt);
3285 }
3286 state->m_deferring_p = false;
3287}
3288
3289/* If OP is an SSA name defined by a PHI node, return the PHI statement.
3290 Otherwise return NULL. */
3291
3292static gphi *
3293result_of_phi (tree op)
3294{
3295 if (TREE_CODE (op) != SSA_NAME)
3296 return NULL;
3297
3298 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3299}
3300
3301/* After processing statements of a BB and recording STATE, return true if the
3302 initial phi is fed by the last FMA candidate result ore one such result from
3303 previously processed BBs marked in LAST_RESULT_SET. */
3304
3305static bool
3306last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3307 hash_set<tree> *last_result_set)
3308{
3309 ssa_op_iter iter;
3310 use_operand_p use;
3311 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3312 {
3313 tree t = USE_FROM_PTR (use);
3314 if (t == state->m_last_result
3315 || last_result_set->contains (k: t))
3316 return true;
3317 }
3318
3319 return false;
3320}
3321
3322/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3323 with uses in additions and subtractions to form fused multiply-add
3324 operations. Returns true if successful and MUL_STMT should be removed.
3325 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3326 on MUL_COND, otherwise it is unconditional.
3327
3328 If STATE indicates that we are deferring FMA transformation, that means
3329 that we do not produce FMAs for basic blocks which look like:
3330
3331 <bb 6>
3332 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3333 _65 = _14 * _16;
3334 accumulator_66 = _65 + accumulator_111;
3335
3336 or its unrolled version, i.e. with several FMA candidates that feed result
3337 of one into the addend of another. Instead, we add them to a list in STATE
3338 and if we later discover an FMA candidate that is not part of such a chain,
3339 we go back and perform all deferred past candidates. */
3340
3341static bool
3342convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3343 fma_deferring_state *state, tree mul_cond = NULL_TREE,
3344 tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3345{
3346 tree mul_result = gimple_get_lhs (mul_stmt);
3347 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3348 if the statement was left just for the side-effects. */
3349 if (!mul_result)
3350 return false;
3351 tree type = TREE_TYPE (mul_result);
3352 gimple *use_stmt, *neguse_stmt;
3353 use_operand_p use_p;
3354 imm_use_iterator imm_iter;
3355
3356 if (FLOAT_TYPE_P (type)
3357 && flag_fp_contract_mode != FP_CONTRACT_FAST)
3358 return false;
3359
3360 /* We don't want to do bitfield reduction ops. */
3361 if (INTEGRAL_TYPE_P (type)
3362 && (!type_has_mode_precision_p (t: type) || TYPE_OVERFLOW_TRAPS (type)))
3363 return false;
3364
3365 /* If the target doesn't support it, don't generate it. We assume that
3366 if fma isn't available then fms, fnma or fnms are not either. */
3367 optimization_type opt_type = bb_optimization_type (gimple_bb (g: mul_stmt));
3368 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3369 return false;
3370
3371 /* If the multiplication has zero uses, it is kept around probably because
3372 of -fnon-call-exceptions. Don't optimize it away in that case,
3373 it is DCE job. */
3374 if (has_zero_uses (var: mul_result))
3375 return false;
3376
3377 bool check_defer
3378 = (state->m_deferring_p
3379 && maybe_le (a: tree_to_poly_int64 (TYPE_SIZE (type)),
3380 param_avoid_fma_max_bits));
3381 bool defer = check_defer;
3382 bool seen_negate_p = false;
3383
3384 /* There is no numerical difference between fused and unfused integer FMAs,
3385 and the assumption below that FMA is as cheap as addition is unlikely
3386 to be true, especially if the multiplication occurs multiple times on
3387 the same chain. E.g., for something like:
3388
3389 (((a * b) + c) >> 1) + (a * b)
3390
3391 we do not want to duplicate the a * b into two additions, not least
3392 because the result is not a natural FMA chain. */
3393 if (ANY_INTEGRAL_TYPE_P (type)
3394 && !has_single_use (var: mul_result))
3395 return false;
3396
3397 if (!dbg_cnt (index: form_fma))
3398 return false;
3399
3400 /* Make sure that the multiplication statement becomes dead after
3401 the transformation, thus that all uses are transformed to FMAs.
3402 This means we assume that an FMA operation has the same cost
3403 as an addition. */
3404 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3405 {
3406 tree result = mul_result;
3407 bool negate_p = false;
3408
3409 use_stmt = USE_STMT (use_p);
3410
3411 if (is_gimple_debug (gs: use_stmt))
3412 continue;
3413
3414 /* For now restrict this operations to single basic blocks. In theory
3415 we would want to support sinking the multiplication in
3416 m = a*b;
3417 if ()
3418 ma = m + c;
3419 else
3420 d = m;
3421 to form a fma in the then block and sink the multiplication to the
3422 else block. */
3423 if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt))
3424 return false;
3425
3426 /* A negate on the multiplication leads to FNMA. */
3427 if (is_gimple_assign (gs: use_stmt)
3428 && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR)
3429 {
3430 ssa_op_iter iter;
3431 use_operand_p usep;
3432
3433 /* If (due to earlier missed optimizations) we have two
3434 negates of the same value, treat them as equivalent
3435 to a single negate with multiple uses. */
3436 if (seen_negate_p)
3437 return false;
3438
3439 result = gimple_assign_lhs (gs: use_stmt);
3440
3441 /* Make sure the negate statement becomes dead with this
3442 single transformation. */
3443 if (!single_imm_use (var: gimple_assign_lhs (gs: use_stmt),
3444 use_p: &use_p, stmt: &neguse_stmt))
3445 return false;
3446
3447 /* Make sure the multiplication isn't also used on that stmt. */
3448 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3449 if (USE_FROM_PTR (usep) == mul_result)
3450 return false;
3451
3452 /* Re-validate. */
3453 use_stmt = neguse_stmt;
3454 if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt))
3455 return false;
3456
3457 negate_p = seen_negate_p = true;
3458 }
3459
3460 tree cond, else_value, ops[3], len, bias;
3461 tree_code code;
3462 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3463 &else_value, &len, &bias))
3464 return false;
3465
3466 switch (code)
3467 {
3468 case MINUS_EXPR:
3469 if (ops[1] == result)
3470 negate_p = !negate_p;
3471 break;
3472 case PLUS_EXPR:
3473 break;
3474 default:
3475 /* FMA can only be formed from PLUS and MINUS. */
3476 return false;
3477 }
3478
3479 if (len)
3480 {
3481 /* For COND_LEN_* operations, we may have dummpy mask which is
3482 the all true mask. Such TREE type may be mul_cond != cond
3483 but we still consider they are equal. */
3484 if (mul_cond && cond != mul_cond
3485 && !(integer_truep (mul_cond) && integer_truep (cond)))
3486 return false;
3487
3488 if (else_value == result)
3489 return false;
3490
3491 if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3492 opt_type))
3493 return false;
3494
3495 if (mul_len)
3496 {
3497 poly_int64 mul_value, value;
3498 if (poly_int_tree_p (t: mul_len, value: &mul_value)
3499 && poly_int_tree_p (t: len, value: &value)
3500 && maybe_ne (a: mul_value, b: value))
3501 return false;
3502 else if (mul_len != len)
3503 return false;
3504
3505 if (wi::to_widest (t: mul_bias) != wi::to_widest (t: bias))
3506 return false;
3507 }
3508 }
3509 else
3510 {
3511 if (mul_cond && cond != mul_cond)
3512 return false;
3513
3514 if (cond)
3515 {
3516 if (cond == result || else_value == result)
3517 return false;
3518 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3519 opt_type))
3520 return false;
3521 }
3522 }
3523
3524 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3525 we'll visit later, we might be able to get a more profitable
3526 match with fnma.
3527 OTOH, if we don't, a negate / fma pair has likely lower latency
3528 that a mult / subtract pair. */
3529 if (code == MINUS_EXPR
3530 && !negate_p
3531 && ops[0] == result
3532 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3533 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3534 && TREE_CODE (ops[1]) == SSA_NAME
3535 && has_single_use (var: ops[1]))
3536 {
3537 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3538 if (is_gimple_assign (gs: stmt2)
3539 && gimple_assign_rhs_code (gs: stmt2) == MULT_EXPR)
3540 return false;
3541 }
3542
3543 /* We can't handle a * b + a * b. */
3544 if (ops[0] == ops[1])
3545 return false;
3546 /* If deferring, make sure we are not looking at an instruction that
3547 wouldn't have existed if we were not. */
3548 if (state->m_deferring_p
3549 && (state->m_mul_result_set.contains (k: ops[0])
3550 || state->m_mul_result_set.contains (k: ops[1])))
3551 return false;
3552
3553 if (check_defer)
3554 {
3555 tree use_lhs = gimple_get_lhs (use_stmt);
3556 if (state->m_last_result)
3557 {
3558 if (ops[1] == state->m_last_result
3559 || ops[0] == state->m_last_result)
3560 defer = true;
3561 else
3562 defer = false;
3563 }
3564 else
3565 {
3566 gcc_checking_assert (!state->m_initial_phi);
3567 gphi *phi;
3568 if (ops[0] == result)
3569 phi = result_of_phi (op: ops[1]);
3570 else
3571 {
3572 gcc_assert (ops[1] == result);
3573 phi = result_of_phi (op: ops[0]);
3574 }
3575
3576 if (phi)
3577 {
3578 state->m_initial_phi = phi;
3579 defer = true;
3580 }
3581 else
3582 defer = false;
3583 }
3584
3585 state->m_last_result = use_lhs;
3586 check_defer = false;
3587 }
3588 else
3589 defer = false;
3590
3591 /* While it is possible to validate whether or not the exact form that
3592 we've recognized is available in the backend, the assumption is that
3593 if the deferring logic above did not trigger, the transformation is
3594 never a loss. For instance, suppose the target only has the plain FMA
3595 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3596 MUL+SUB for FMA+NEG, which is still two operations. Consider
3597 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3598 form the two NEGs are independent and could be run in parallel. */
3599 }
3600
3601 if (defer)
3602 {
3603 fma_transformation_info fti;
3604 fti.mul_stmt = mul_stmt;
3605 fti.mul_result = mul_result;
3606 fti.op1 = op1;
3607 fti.op2 = op2;
3608 state->m_candidates.safe_push (obj: fti);
3609 state->m_mul_result_set.add (k: mul_result);
3610
3611 if (dump_file && (dump_flags & TDF_DETAILS))
3612 {
3613 fprintf (stream: dump_file, format: "Deferred generating FMA for multiplication ");
3614 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3615 fprintf (stream: dump_file, format: "\n");
3616 }
3617
3618 return false;
3619 }
3620 else
3621 {
3622 if (state->m_deferring_p)
3623 cancel_fma_deferring (state);
3624 convert_mult_to_fma_1 (mul_result, op1, op2);
3625 return true;
3626 }
3627}
3628
3629
3630/* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3631 a check for non-zero like:
3632 _1 = x_4(D) * y_5(D);
3633 *res_7(D) = _1;
3634 if (x_4(D) != 0)
3635 goto <bb 3>; [50.00%]
3636 else
3637 goto <bb 4>; [50.00%]
3638
3639 <bb 3> [local count: 536870913]:
3640 _2 = _1 / x_4(D);
3641 _9 = _2 != y_5(D);
3642 _10 = (int) _9;
3643
3644 <bb 4> [local count: 1073741824]:
3645 # iftmp.0_3 = PHI <_10(3), 0(2)>
3646 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3647 optimize the x_4(D) != 0 condition to 1. */
3648
3649static void
3650maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3651 gimple *div_stmt, bool *cfg_changed)
3652{
3653 basic_block bb = gimple_bb (g: cond_stmt);
3654 if (gimple_bb (g: div_stmt) != bb || !single_pred_p (bb))
3655 return;
3656 edge pred_edge = single_pred_edge (bb);
3657 basic_block pred_bb = pred_edge->src;
3658 if (EDGE_COUNT (pred_bb->succs) != 2)
3659 return;
3660 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3661 edge other_succ_edge = NULL;
3662 if (gimple_code (g: cond_stmt) == GIMPLE_COND)
3663 {
3664 if (EDGE_COUNT (bb->succs) != 2)
3665 return;
3666 other_succ_edge = EDGE_SUCC (bb, 0);
3667 if (gimple_cond_code (gs: cond_stmt) == NE_EXPR)
3668 {
3669 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3670 other_succ_edge = EDGE_SUCC (bb, 1);
3671 }
3672 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3673 other_succ_edge = EDGE_SUCC (bb, 0);
3674 if (other_edge->dest != other_succ_edge->dest)
3675 return;
3676 }
3677 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3678 return;
3679 gcond *zero_cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: pred_bb));
3680 if (zero_cond == NULL
3681 || (gimple_cond_code (gs: zero_cond)
3682 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3683 || !integer_zerop (gimple_cond_rhs (gs: zero_cond)))
3684 return;
3685 tree zero_cond_lhs = gimple_cond_lhs (gs: zero_cond);
3686 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3687 return;
3688 if (gimple_assign_rhs2 (gs: div_stmt) != zero_cond_lhs)
3689 {
3690 /* Allow the divisor to be result of a same precision cast
3691 from zero_cond_lhs. */
3692 tree rhs2 = gimple_assign_rhs2 (gs: div_stmt);
3693 if (TREE_CODE (rhs2) != SSA_NAME)
3694 return;
3695 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3696 if (!gimple_assign_cast_p (s: g)
3697 || gimple_assign_rhs1 (gs: g) != gimple_cond_lhs (gs: zero_cond)
3698 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3699 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3700 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3701 return;
3702 }
3703 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3704 mul_stmts.quick_push (obj: div_stmt);
3705 if (is_gimple_debug (gs: gsi_stmt (i: gsi)))
3706 gsi_next_nondebug (i: &gsi);
3707 unsigned cast_count = 0;
3708 while (gsi_stmt (i: gsi) != cond_stmt)
3709 {
3710 /* If original mul_stmt has a single use, allow it in the same bb,
3711 we are looking then just at __builtin_mul_overflow_p.
3712 Though, in that case the original mul_stmt will be replaced
3713 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3714 gimple *mul_stmt;
3715 unsigned int i;
3716 bool ok = false;
3717 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3718 {
3719 if (gsi_stmt (i: gsi) == mul_stmt)
3720 {
3721 ok = true;
3722 break;
3723 }
3724 }
3725 if (!ok && gimple_assign_cast_p (s: gsi_stmt (i: gsi)) && ++cast_count < 4)
3726 ok = true;
3727 if (!ok)
3728 return;
3729 gsi_next_nondebug (i: &gsi);
3730 }
3731 if (gimple_code (g: cond_stmt) == GIMPLE_COND)
3732 {
3733 basic_block succ_bb = other_edge->dest;
3734 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (i: gpi);
3735 gsi_next (i: &gpi))
3736 {
3737 gphi *phi = gpi.phi ();
3738 tree v1 = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx);
3739 tree v2 = gimple_phi_arg_def (gs: phi, index: other_succ_edge->dest_idx);
3740 if (!operand_equal_p (v1, v2, flags: 0))
3741 return;
3742 }
3743 }
3744 else
3745 {
3746 tree lhs = gimple_assign_lhs (gs: cond_stmt);
3747 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3748 return;
3749 gsi_next_nondebug (i: &gsi);
3750 if (!gsi_end_p (i: gsi))
3751 {
3752 if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR)
3753 return;
3754 gimple *cast_stmt = gsi_stmt (i: gsi);
3755 if (!gimple_assign_cast_p (s: cast_stmt))
3756 return;
3757 tree new_lhs = gimple_assign_lhs (gs: cast_stmt);
3758 gsi_next_nondebug (i: &gsi);
3759 if (!gsi_end_p (i: gsi)
3760 || !new_lhs
3761 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3762 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3763 return;
3764 lhs = new_lhs;
3765 }
3766 edge succ_edge = single_succ_edge (bb);
3767 basic_block succ_bb = succ_edge->dest;
3768 gsi = gsi_start_phis (succ_bb);
3769 if (gsi_end_p (i: gsi))
3770 return;
3771 gphi *phi = as_a <gphi *> (p: gsi_stmt (i: gsi));
3772 gsi_next (i: &gsi);
3773 if (!gsi_end_p (i: gsi))
3774 return;
3775 if (gimple_phi_arg_def (gs: phi, index: succ_edge->dest_idx) != lhs)
3776 return;
3777 tree other_val = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx);
3778 if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR)
3779 {
3780 tree cond = gimple_assign_rhs1 (gs: cond_stmt);
3781 if (TREE_CODE (cond) == NE_EXPR)
3782 {
3783 if (!operand_equal_p (other_val,
3784 gimple_assign_rhs3 (gs: cond_stmt), flags: 0))
3785 return;
3786 }
3787 else if (!operand_equal_p (other_val,
3788 gimple_assign_rhs2 (gs: cond_stmt), flags: 0))
3789 return;
3790 }
3791 else if (gimple_assign_rhs_code (gs: cond_stmt) == NE_EXPR)
3792 {
3793 if (!integer_zerop (other_val))
3794 return;
3795 }
3796 else if (!integer_onep (other_val))
3797 return;
3798 }
3799 if (pred_edge->flags & EDGE_TRUE_VALUE)
3800 gimple_cond_make_true (gs: zero_cond);
3801 else
3802 gimple_cond_make_false (gs: zero_cond);
3803 update_stmt (s: zero_cond);
3804 *cfg_changed = true;
3805}
3806
3807/* Helper function for arith_overflow_check_p. Return true
3808 if VAL1 is equal to VAL2 cast to corresponding integral type
3809 with other signedness or vice versa. */
3810
3811static bool
3812arith_cast_equal_p (tree val1, tree val2)
3813{
3814 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3815 return wi::eq_p (x: wi::to_wide (t: val1), y: wi::to_wide (t: val2));
3816 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3817 return false;
3818 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3819 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3820 return true;
3821 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3822 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3823 return true;
3824 return false;
3825}
3826
3827/* Helper function of match_arith_overflow. Return 1
3828 if USE_STMT is unsigned overflow check ovf != 0 for
3829 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3830 and 0 otherwise. */
3831
3832static int
3833arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3834 tree maxval, tree *other)
3835{
3836 enum tree_code ccode = ERROR_MARK;
3837 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3838 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
3839 tree lhs = gimple_assign_lhs (gs: cast_stmt ? cast_stmt : stmt);
3840 tree rhs1 = gimple_assign_rhs1 (gs: stmt);
3841 tree rhs2 = gimple_assign_rhs2 (gs: stmt);
3842 tree multop = NULL_TREE, divlhs = NULL_TREE;
3843 gimple *cur_use_stmt = use_stmt;
3844
3845 if (code == MULT_EXPR)
3846 {
3847 if (!is_gimple_assign (gs: use_stmt))
3848 return 0;
3849 if (gimple_assign_rhs_code (gs: use_stmt) != TRUNC_DIV_EXPR)
3850 return 0;
3851 if (gimple_assign_rhs1 (gs: use_stmt) != lhs)
3852 return 0;
3853 if (cast_stmt)
3854 {
3855 if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs1))
3856 multop = rhs2;
3857 else if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs2))
3858 multop = rhs1;
3859 else
3860 return 0;
3861 }
3862 else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1)
3863 multop = rhs2;
3864 else if (operand_equal_p (gimple_assign_rhs2 (gs: use_stmt), rhs2, flags: 0))
3865 multop = rhs1;
3866 else
3867 return 0;
3868 if (stmt_ends_bb_p (use_stmt))
3869 return 0;
3870 divlhs = gimple_assign_lhs (gs: use_stmt);
3871 if (!divlhs)
3872 return 0;
3873 use_operand_p use;
3874 if (!single_imm_use (var: divlhs, use_p: &use, stmt: &cur_use_stmt))
3875 return 0;
3876 if (cast_stmt && gimple_assign_cast_p (s: cur_use_stmt))
3877 {
3878 tree cast_lhs = gimple_assign_lhs (gs: cur_use_stmt);
3879 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3880 && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3881 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3882 == TYPE_PRECISION (TREE_TYPE (divlhs)))
3883 && single_imm_use (var: cast_lhs, use_p: &use, stmt: &cur_use_stmt))
3884 {
3885 cast_stmt = NULL;
3886 divlhs = cast_lhs;
3887 }
3888 else
3889 return 0;
3890 }
3891 }
3892 if (gimple_code (g: cur_use_stmt) == GIMPLE_COND)
3893 {
3894 ccode = gimple_cond_code (gs: cur_use_stmt);
3895 crhs1 = gimple_cond_lhs (gs: cur_use_stmt);
3896 crhs2 = gimple_cond_rhs (gs: cur_use_stmt);
3897 }
3898 else if (is_gimple_assign (gs: cur_use_stmt))
3899 {
3900 if (gimple_assign_rhs_class (gs: cur_use_stmt) == GIMPLE_BINARY_RHS)
3901 {
3902 ccode = gimple_assign_rhs_code (gs: cur_use_stmt);
3903 crhs1 = gimple_assign_rhs1 (gs: cur_use_stmt);
3904 crhs2 = gimple_assign_rhs2 (gs: cur_use_stmt);
3905 }
3906 else if (gimple_assign_rhs_code (gs: cur_use_stmt) == COND_EXPR)
3907 {
3908 tree cond = gimple_assign_rhs1 (gs: cur_use_stmt);
3909 if (COMPARISON_CLASS_P (cond))
3910 {
3911 ccode = TREE_CODE (cond);
3912 crhs1 = TREE_OPERAND (cond, 0);
3913 crhs2 = TREE_OPERAND (cond, 1);
3914 }
3915 else
3916 return 0;
3917 }
3918 else
3919 return 0;
3920 }
3921 else
3922 return 0;
3923
3924 if (maxval
3925 && ccode == RSHIFT_EXPR
3926 && crhs1 == lhs
3927 && TREE_CODE (crhs2) == INTEGER_CST
3928 && wi::to_widest (t: crhs2) == TYPE_PRECISION (TREE_TYPE (maxval)))
3929 {
3930 tree shiftlhs = gimple_assign_lhs (gs: use_stmt);
3931 if (!shiftlhs)
3932 return 0;
3933 use_operand_p use;
3934 if (!single_imm_use (var: shiftlhs, use_p: &use, stmt: &cur_use_stmt))
3935 return 0;
3936 if (gimple_code (g: cur_use_stmt) == GIMPLE_COND)
3937 {
3938 ccode = gimple_cond_code (gs: cur_use_stmt);
3939 crhs1 = gimple_cond_lhs (gs: cur_use_stmt);
3940 crhs2 = gimple_cond_rhs (gs: cur_use_stmt);
3941 }
3942 else if (is_gimple_assign (gs: cur_use_stmt))
3943 {
3944 if (gimple_assign_rhs_class (gs: cur_use_stmt) == GIMPLE_BINARY_RHS)
3945 {
3946 ccode = gimple_assign_rhs_code (gs: cur_use_stmt);
3947 crhs1 = gimple_assign_rhs1 (gs: cur_use_stmt);
3948 crhs2 = gimple_assign_rhs2 (gs: cur_use_stmt);
3949 }
3950 else if (gimple_assign_rhs_code (gs: cur_use_stmt) == COND_EXPR)
3951 {
3952 tree cond = gimple_assign_rhs1 (gs: cur_use_stmt);
3953 if (COMPARISON_CLASS_P (cond))
3954 {
3955 ccode = TREE_CODE (cond);
3956 crhs1 = TREE_OPERAND (cond, 0);
3957 crhs2 = TREE_OPERAND (cond, 1);
3958 }
3959 else
3960 return 0;
3961 }
3962 else
3963 {
3964 enum tree_code sc = gimple_assign_rhs_code (gs: cur_use_stmt);
3965 tree castlhs = gimple_assign_lhs (gs: cur_use_stmt);
3966 if (!CONVERT_EXPR_CODE_P (sc)
3967 || !castlhs
3968 || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs))
3969 || (TYPE_PRECISION (TREE_TYPE (castlhs))
3970 > TYPE_PRECISION (TREE_TYPE (maxval))))
3971 return 0;
3972 return 1;
3973 }
3974 }
3975 else
3976 return 0;
3977 if ((ccode != EQ_EXPR && ccode != NE_EXPR)
3978 || crhs1 != shiftlhs
3979 || !integer_zerop (crhs2))
3980 return 0;
3981 return 1;
3982 }
3983
3984 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3985 return 0;
3986
3987 switch (ccode)
3988 {
3989 case GT_EXPR:
3990 case LE_EXPR:
3991 if (maxval)
3992 {
3993 /* r = a + b; r > maxval or r <= maxval */
3994 if (crhs1 == lhs
3995 && TREE_CODE (crhs2) == INTEGER_CST
3996 && tree_int_cst_equal (crhs2, maxval))
3997 return ccode == GT_EXPR ? 1 : -1;
3998 break;
3999 }
4000 /* r = a - b; r > a or r <= a
4001 r = a + b; a > r or a <= r or b > r or b <= r. */
4002 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
4003 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
4004 && crhs2 == lhs))
4005 return ccode == GT_EXPR ? 1 : -1;
4006 /* r = ~a; b > r or b <= r. */
4007 if (code == BIT_NOT_EXPR && crhs2 == lhs)
4008 {
4009 if (other)
4010 *other = crhs1;
4011 return ccode == GT_EXPR ? 1 : -1;
4012 }
4013 break;
4014 case LT_EXPR:
4015 case GE_EXPR:
4016 if (maxval)
4017 break;
4018 /* r = a - b; a < r or a >= r
4019 r = a + b; r < a or r >= a or r < b or r >= b. */
4020 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
4021 || (code == PLUS_EXPR && crhs1 == lhs
4022 && (crhs2 == rhs1 || crhs2 == rhs2)))
4023 return ccode == LT_EXPR ? 1 : -1;
4024 /* r = ~a; r < b or r >= b. */
4025 if (code == BIT_NOT_EXPR && crhs1 == lhs)
4026 {
4027 if (other)
4028 *other = crhs2;
4029 return ccode == LT_EXPR ? 1 : -1;
4030 }
4031 break;
4032 case EQ_EXPR:
4033 case NE_EXPR:
4034 /* r = a * b; _1 = r / a; _1 == b
4035 r = a * b; _1 = r / b; _1 == a
4036 r = a * b; _1 = r / a; _1 != b
4037 r = a * b; _1 = r / b; _1 != a. */
4038 if (code == MULT_EXPR)
4039 {
4040 if (cast_stmt)
4041 {
4042 if ((crhs1 == divlhs && arith_cast_equal_p (val1: crhs2, val2: multop))
4043 || (crhs2 == divlhs && arith_cast_equal_p (val1: crhs1, val2: multop)))
4044 {
4045 use_stmt = cur_use_stmt;
4046 return ccode == NE_EXPR ? 1 : -1;
4047 }
4048 }
4049 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, flags: 0))
4050 || (crhs2 == divlhs && crhs1 == multop))
4051 {
4052 use_stmt = cur_use_stmt;
4053 return ccode == NE_EXPR ? 1 : -1;
4054 }
4055 }
4056 break;
4057 default:
4058 break;
4059 }
4060 return 0;
4061}
4062
4063extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4064extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4065extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4066
4067extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4068extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4069extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
4070
4071static void
4072build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator *gsi,
4073 internal_fn fn, tree lhs,
4074 tree op_0, tree op_1)
4075{
4076 if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
4077 {
4078 gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4079 gimple_call_set_lhs (gs: call, lhs);
4080 gsi_replace (gsi, call, /* update_eh_info */ true);
4081 }
4082}
4083
4084static bool
4085build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator *gsi,
4086 internal_fn fn, tree lhs,
4087 tree op_0, tree op_1)
4088{
4089 if (!direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
4090 return false;
4091
4092 gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4093 gimple_call_set_lhs (gs: call, lhs);
4094 gsi_insert_before (gsi, call, GSI_SAME_STMT);
4095
4096 return true;
4097}
4098
4099/*
4100 * Try to match saturation unsigned add with assign.
4101 * _7 = _4 + _6;
4102 * _8 = _4 > _7;
4103 * _9 = (long unsigned int) _8;
4104 * _10 = -_9;
4105 * _12 = _7 | _10;
4106 * =>
4107 * _12 = .SAT_ADD (_4, _6);
4108 *
4109 * Try to match IMM=-1 saturation signed add with assign.
4110 * <bb 2> [local count: 1073741824]:
4111 * x.0_1 = (unsigned char) x_5(D);
4112 * _3 = -x.0_1;
4113 * _10 = (signed char) _3;
4114 * _8 = x_5(D) & _10;
4115 * if (_8 < 0)
4116 * goto <bb 4>; [1.40%]
4117 * else
4118 * goto <bb 3>; [98.60%]
4119 * <bb 3> [local count: 434070867]:
4120 * _2 = x.0_1 + 255;
4121 * <bb 4> [local count: 1073741824]:
4122 * # _9 = PHI <_2(3), 128(2)>
4123 * _4 = (int8_t) _9;
4124 * =>
4125 * _4 = .SAT_ADD (x_5, -1); */
4126
4127static void
4128match_saturation_add_with_assign (gimple_stmt_iterator *gsi, gassign *stmt)
4129{
4130 tree ops[2];
4131 tree lhs = gimple_assign_lhs (gs: stmt);
4132
4133 if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4134 || gimple_signed_integer_sat_add (lhs, ops, NULL))
4135 build_saturation_binary_arith_call_and_replace (gsi, fn: IFN_SAT_ADD, lhs,
4136 op_0: ops[0], op_1: ops[1]);
4137}
4138
4139/*
4140 * Try to match saturation add with PHI.
4141 * For unsigned integer:
4142 * <bb 2> :
4143 * _1 = x_3(D) + y_4(D);
4144 * if (_1 >= x_3(D))
4145 * goto <bb 3>; [INV]
4146 * else
4147 * goto <bb 4>; [INV]
4148 *
4149 * <bb 3> :
4150 *
4151 * <bb 4> :
4152 * # _2 = PHI <255(2), _1(3)>
4153 * =>
4154 * <bb 4> [local count: 1073741824]:
4155 * _2 = .SAT_ADD (x_4(D), y_5(D));
4156 *
4157 * For signed integer:
4158 * x.0_1 = (long unsigned int) x_7(D);
4159 * y.1_2 = (long unsigned int) y_8(D);
4160 * _3 = x.0_1 + y.1_2;
4161 * sum_9 = (int64_t) _3;
4162 * _4 = x_7(D) ^ y_8(D);
4163 * _5 = x_7(D) ^ sum_9;
4164 * _15 = ~_4;
4165 * _16 = _5 & _15;
4166 * if (_16 < 0)
4167 * goto <bb 3>; [41.00%]
4168 * else
4169 * goto <bb 4>; [59.00%]
4170 * _11 = x_7(D) < 0;
4171 * _12 = (long int) _11;
4172 * _13 = -_12;
4173 * _14 = _13 ^ 9223372036854775807;
4174 * # _6 = PHI <_14(3), sum_9(2)>
4175 * =>
4176 * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
4177
4178static bool
4179match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
4180{
4181 if (gimple_phi_num_args (gs: phi) != 2)
4182 return false;
4183
4184 tree ops[2];
4185 tree phi_result = gimple_phi_result (gs: phi);
4186
4187 if (!gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
4188 && !gimple_signed_integer_sat_add (phi_result, ops, NULL))
4189 return false;
4190
4191 if (!TYPE_UNSIGNED (TREE_TYPE (ops[0])) && TREE_CODE (ops[1]) == INTEGER_CST)
4192 ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4193
4194 return build_saturation_binary_arith_call_and_insert (gsi, fn: IFN_SAT_ADD,
4195 lhs: phi_result, op_0: ops[0],
4196 op_1: ops[1]);
4197}
4198
4199/*
4200 * Try to match saturation unsigned sub.
4201 * _1 = _4 >= _5;
4202 * _3 = _4 - _5;
4203 * _6 = _1 ? _3 : 0;
4204 * =>
4205 * _6 = .SAT_SUB (_4, _5); */
4206
4207static void
4208match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
4209{
4210 tree ops[2];
4211 tree lhs = gimple_assign_lhs (gs: stmt);
4212
4213 if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
4214 build_saturation_binary_arith_call_and_replace (gsi, fn: IFN_SAT_SUB, lhs,
4215 op_0: ops[0], op_1: ops[1]);
4216}
4217
4218/*
4219 * Try to match saturation unsigned sub.
4220 * <bb 2> [local count: 1073741824]:
4221 * if (x_2(D) > y_3(D))
4222 * goto <bb 3>; [50.00%]
4223 * else
4224 * goto <bb 4>; [50.00%]
4225 *
4226 * <bb 3> [local count: 536870912]:
4227 * _4 = x_2(D) - y_3(D);
4228 *
4229 * <bb 4> [local count: 1073741824]:
4230 * # _1 = PHI <0(2), _4(3)>
4231 * =>
4232 * <bb 4> [local count: 1073741824]:
4233 * _1 = .SAT_SUB (x_2(D), y_3(D)); */
4234static bool
4235match_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
4236{
4237 if (gimple_phi_num_args (gs: phi) != 2)
4238 return false;
4239
4240 tree ops[2];
4241 tree phi_result = gimple_phi_result (gs: phi);
4242
4243 if (!gimple_unsigned_integer_sat_sub (phi_result, ops, NULL)
4244 && !gimple_signed_integer_sat_sub (phi_result, ops, NULL))
4245 return false;
4246
4247 return build_saturation_binary_arith_call_and_insert (gsi, fn: IFN_SAT_SUB,
4248 lhs: phi_result, op_0: ops[0],
4249 op_1: ops[1]);
4250}
4251
4252/*
4253 * Try to match saturation unsigned sub.
4254 * uint16_t x_4(D);
4255 * uint8_t _6;
4256 * overflow_5 = x_4(D) > 255;
4257 * _1 = (unsigned char) x_4(D);
4258 * _2 = (unsigned char) overflow_5;
4259 * _3 = -_2;
4260 * _6 = _1 | _3;
4261 * =>
4262 * _6 = .SAT_TRUNC (x_4(D));
4263 * */
4264static void
4265match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
4266{
4267 tree ops[1];
4268 tree lhs = gimple_assign_lhs (gs: stmt);
4269 tree type = TREE_TYPE (lhs);
4270
4271 if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4272 && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4273 tree_pair (type, TREE_TYPE (ops[0])),
4274 OPTIMIZE_FOR_BOTH))
4275 {
4276 gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4277 gimple_call_set_lhs (gs: call, lhs);
4278 gsi_replace (gsi, call, /* update_eh_info */ true);
4279 }
4280}
4281
4282/*
4283 * Try to match saturation truncate.
4284 * Aka:
4285 * x.0_1 = (unsigned long) x_4(D);
4286 * _2 = x.0_1 + 2147483648;
4287 * if (_2 > 4294967295)
4288 * goto <bb 4>; [50.00%]
4289 * else
4290 * goto <bb 3>; [50.00%]
4291 * ;; succ: 4
4292 * ;; 3
4293 *
4294 * ;; basic block 3, loop depth 0
4295 * ;; pred: 2
4296 * trunc_5 = (int32_t) x_4(D);
4297 * goto <bb 5>; [100.00%]
4298 * ;; succ: 5
4299 *
4300 * ;; basic block 4, loop depth 0
4301 * ;; pred: 2
4302 * _7 = x_4(D) < 0;
4303 * _8 = (int) _7;
4304 * _9 = -_8;
4305 * _10 = _9 ^ 2147483647;
4306 * ;; succ: 5
4307 *
4308 * ;; basic block 5, loop depth 0
4309 * ;; pred: 3
4310 * ;; 4
4311 * # _3 = PHI <trunc_5(3), _10(4)>
4312 * =>
4313 * _6 = .SAT_TRUNC (x_4(D));
4314 */
4315
4316static bool
4317match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi)
4318{
4319 if (gimple_phi_num_args (gs: phi) != 2)
4320 return false;
4321
4322 tree ops[1];
4323 tree phi_result = gimple_phi_result (gs: phi);
4324 tree type = TREE_TYPE (phi_result);
4325
4326 if (!gimple_unsigned_integer_sat_trunc (phi_result, ops, NULL)
4327 && !gimple_signed_integer_sat_trunc (phi_result, ops, NULL))
4328 return false;
4329
4330 if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC,
4331 tree_pair (type, TREE_TYPE (ops[0])),
4332 OPTIMIZE_FOR_BOTH))
4333 return false;
4334
4335 gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
4336 gimple_call_set_lhs (gs: call, lhs: phi_result);
4337 gsi_insert_before (gsi, call, GSI_SAME_STMT);
4338
4339 return true;
4340}
4341
4342/* Recognize for unsigned x
4343 x = y - z;
4344 if (x > y)
4345 where there are other uses of x and replace it with
4346 _7 = .SUB_OVERFLOW (y, z);
4347 x = REALPART_EXPR <_7>;
4348 _8 = IMAGPART_EXPR <_7>;
4349 if (_8)
4350 and similarly for addition.
4351
4352 Also recognize:
4353 yc = (type) y;
4354 zc = (type) z;
4355 x = yc + zc;
4356 if (x > max)
4357 where y and z have unsigned types with maximum max
4358 and there are other uses of x and all of those cast x
4359 back to that unsigned type and again replace it with
4360 _7 = .ADD_OVERFLOW (y, z);
4361 _9 = REALPART_EXPR <_7>;
4362 _8 = IMAGPART_EXPR <_7>;
4363 if (_8)
4364 and replace (utype) x with _9.
4365 Or with x >> popcount (max) instead of x > max.
4366
4367 Also recognize:
4368 x = ~z;
4369 if (y > x)
4370 and replace it with
4371 _7 = .ADD_OVERFLOW (y, z);
4372 _8 = IMAGPART_EXPR <_7>;
4373 if (_8)
4374
4375 And also recognize:
4376 z = x * y;
4377 if (x != 0)
4378 goto <bb 3>; [50.00%]
4379 else
4380 goto <bb 4>; [50.00%]
4381
4382 <bb 3> [local count: 536870913]:
4383 _2 = z / x;
4384 _9 = _2 != y;
4385 _10 = (int) _9;
4386
4387 <bb 4> [local count: 1073741824]:
4388 # iftmp.0_3 = PHI <_10(3), 0(2)>
4389 and replace it with
4390 _7 = .MUL_OVERFLOW (x, y);
4391 z = IMAGPART_EXPR <_7>;
4392 _8 = IMAGPART_EXPR <_7>;
4393 _9 = _8 != 0;
4394 iftmp.0_3 = (int) _9; */
4395
4396static bool
4397match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4398 enum tree_code code, bool *cfg_changed)
4399{
4400 tree lhs = gimple_assign_lhs (gs: stmt);
4401 tree type = TREE_TYPE (lhs);
4402 use_operand_p use_p;
4403 imm_use_iterator iter;
4404 bool use_seen = false;
4405 bool ovf_use_seen = false;
4406 gimple *use_stmt;
4407 gimple *add_stmt = NULL;
4408 bool add_first = false;
4409 gimple *cond_stmt = NULL;
4410 gimple *cast_stmt = NULL;
4411 tree cast_lhs = NULL_TREE;
4412
4413 gcc_checking_assert (code == PLUS_EXPR
4414 || code == MINUS_EXPR
4415 || code == MULT_EXPR
4416 || code == BIT_NOT_EXPR);
4417 if (!INTEGRAL_TYPE_P (type)
4418 || !TYPE_UNSIGNED (type)
4419 || has_zero_uses (var: lhs)
4420 || (code != PLUS_EXPR
4421 && code != MULT_EXPR
4422 && optab_handler (op: code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4423 TYPE_MODE (type)) == CODE_FOR_nothing))
4424 return false;
4425
4426 tree rhs1 = gimple_assign_rhs1 (gs: stmt);
4427 tree rhs2 = gimple_assign_rhs2 (gs: stmt);
4428 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4429 {
4430 use_stmt = USE_STMT (use_p);
4431 if (is_gimple_debug (gs: use_stmt))
4432 continue;
4433
4434 tree other = NULL_TREE;
4435 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, other: &other))
4436 {
4437 if (code == BIT_NOT_EXPR)
4438 {
4439 gcc_assert (other);
4440 if (TREE_CODE (other) != SSA_NAME)
4441 return false;
4442 if (rhs2 == NULL)
4443 rhs2 = other;
4444 else
4445 return false;
4446 cond_stmt = use_stmt;
4447 }
4448 ovf_use_seen = true;
4449 }
4450 else
4451 {
4452 use_seen = true;
4453 if (code == MULT_EXPR
4454 && cast_stmt == NULL
4455 && gimple_assign_cast_p (s: use_stmt))
4456 {
4457 cast_lhs = gimple_assign_lhs (gs: use_stmt);
4458 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4459 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4460 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4461 == TYPE_PRECISION (TREE_TYPE (lhs))))
4462 cast_stmt = use_stmt;
4463 else
4464 cast_lhs = NULL_TREE;
4465 }
4466 }
4467 if (ovf_use_seen && use_seen)
4468 break;
4469 }
4470
4471 if (!ovf_use_seen
4472 && code == MULT_EXPR
4473 && cast_stmt)
4474 {
4475 if (TREE_CODE (rhs1) != SSA_NAME
4476 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4477 return false;
4478 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4479 {
4480 use_stmt = USE_STMT (use_p);
4481 if (is_gimple_debug (gs: use_stmt))
4482 continue;
4483
4484 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4485 NULL_TREE, NULL))
4486 ovf_use_seen = true;
4487 }
4488 }
4489 else
4490 {
4491 cast_stmt = NULL;
4492 cast_lhs = NULL_TREE;
4493 }
4494
4495 tree maxval = NULL_TREE;
4496 if (!ovf_use_seen
4497 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4498 || (code == PLUS_EXPR
4499 && optab_handler (op: uaddv4_optab,
4500 TYPE_MODE (type)) == CODE_FOR_nothing)
4501 || (code == MULT_EXPR
4502 && optab_handler (op: cast_stmt ? mulv4_optab : umulv4_optab,
4503 TYPE_MODE (type)) == CODE_FOR_nothing
4504 && (use_seen
4505 || cast_stmt
4506 || !can_mult_highpart_p (TYPE_MODE (type), true))))
4507 {
4508 if (code != PLUS_EXPR)
4509 return false;
4510 if (TREE_CODE (rhs1) != SSA_NAME
4511 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4512 return false;
4513 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4514 tree type1 = TREE_TYPE (rhs1);
4515 if (!INTEGRAL_TYPE_P (type1)
4516 || !TYPE_UNSIGNED (type1)
4517 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4518 || (TYPE_PRECISION (type1)
4519 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4520 return false;
4521 if (TREE_CODE (rhs2) == INTEGER_CST)
4522 {
4523 if (wi::ne_p (x: wi::rshift (x: wi::to_wide (t: rhs2),
4524 TYPE_PRECISION (type1),
4525 sgn: UNSIGNED), y: 0))
4526 return false;
4527 rhs2 = fold_convert (type1, rhs2);
4528 }
4529 else
4530 {
4531 if (TREE_CODE (rhs2) != SSA_NAME
4532 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4533 return false;
4534 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4535 tree type2 = TREE_TYPE (rhs2);
4536 if (!INTEGRAL_TYPE_P (type2)
4537 || !TYPE_UNSIGNED (type2)
4538 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4539 || (TYPE_PRECISION (type2)
4540 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4541 return false;
4542 }
4543 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4544 type = type1;
4545 else
4546 type = TREE_TYPE (rhs2);
4547
4548 if (TREE_CODE (type) != INTEGER_TYPE
4549 || optab_handler (op: uaddv4_optab,
4550 TYPE_MODE (type)) == CODE_FOR_nothing)
4551 return false;
4552
4553 maxval = wide_int_to_tree (type, cst: wi::max_value (TYPE_PRECISION (type),
4554 UNSIGNED));
4555 ovf_use_seen = false;
4556 use_seen = false;
4557 basic_block use_bb = NULL;
4558 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4559 {
4560 use_stmt = USE_STMT (use_p);
4561 if (is_gimple_debug (gs: use_stmt))
4562 continue;
4563
4564 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4565 {
4566 ovf_use_seen = true;
4567 use_bb = gimple_bb (g: use_stmt);
4568 }
4569 else
4570 {
4571 if (!gimple_assign_cast_p (s: use_stmt)
4572 || gimple_assign_rhs_code (gs: use_stmt) == VIEW_CONVERT_EXPR)
4573 return false;
4574 tree use_lhs = gimple_assign_lhs (gs: use_stmt);
4575 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4576 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4577 > TYPE_PRECISION (type)))
4578 return false;
4579 use_seen = true;
4580 }
4581 }
4582 if (!ovf_use_seen)
4583 return false;
4584 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4585 {
4586 if (!use_seen)
4587 return false;
4588 tree new_rhs1 = make_ssa_name (var: type);
4589 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4590 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4591 rhs1 = new_rhs1;
4592 }
4593 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4594 {
4595 if (!use_seen)
4596 return false;
4597 tree new_rhs2 = make_ssa_name (var: type);
4598 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4599 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4600 rhs2 = new_rhs2;
4601 }
4602 else if (!use_seen)
4603 {
4604 /* If there are no uses of the wider addition, check if
4605 forwprop has not created a narrower addition.
4606 Require it to be in the same bb as the overflow check. */
4607 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4608 {
4609 use_stmt = USE_STMT (use_p);
4610 if (is_gimple_debug (gs: use_stmt))
4611 continue;
4612
4613 if (use_stmt == stmt)
4614 continue;
4615
4616 if (!is_gimple_assign (gs: use_stmt)
4617 || gimple_bb (g: use_stmt) != use_bb
4618 || gimple_assign_rhs_code (gs: use_stmt) != PLUS_EXPR)
4619 continue;
4620
4621 if (gimple_assign_rhs1 (gs: use_stmt) == rhs1)
4622 {
4623 if (!operand_equal_p (gimple_assign_rhs2 (gs: use_stmt),
4624 rhs2, flags: 0))
4625 continue;
4626 }
4627 else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1)
4628 {
4629 if (gimple_assign_rhs1 (gs: use_stmt) != rhs2)
4630 continue;
4631 }
4632 else
4633 continue;
4634
4635 add_stmt = use_stmt;
4636 break;
4637 }
4638 if (add_stmt == NULL)
4639 return false;
4640
4641 /* If stmt and add_stmt are in the same bb, we need to find out
4642 which one is earlier. If they are in different bbs, we've
4643 checked add_stmt is in the same bb as one of the uses of the
4644 stmt lhs, so stmt needs to dominate add_stmt too. */
4645 if (gimple_bb (g: stmt) == gimple_bb (g: add_stmt))
4646 {
4647 gimple_stmt_iterator gsif = *gsi;
4648 gimple_stmt_iterator gsib = *gsi;
4649 int i;
4650 /* Search both forward and backward from stmt and have a small
4651 upper bound. */
4652 for (i = 0; i < 128; i++)
4653 {
4654 if (!gsi_end_p (i: gsib))
4655 {
4656 gsi_prev_nondebug (i: &gsib);
4657 if (gsi_stmt (i: gsib) == add_stmt)
4658 {
4659 add_first = true;
4660 break;
4661 }
4662 }
4663 else if (gsi_end_p (i: gsif))
4664 break;
4665 if (!gsi_end_p (i: gsif))
4666 {
4667 gsi_next_nondebug (i: &gsif);
4668 if (gsi_stmt (i: gsif) == add_stmt)
4669 break;
4670 }
4671 }
4672 if (i == 128)
4673 return false;
4674 if (add_first)
4675 *gsi = gsi_for_stmt (add_stmt);
4676 }
4677 }
4678 }
4679
4680 if (code == BIT_NOT_EXPR)
4681 *gsi = gsi_for_stmt (cond_stmt);
4682
4683 auto_vec<gimple *, 8> mul_stmts;
4684 if (code == MULT_EXPR && cast_stmt)
4685 {
4686 type = TREE_TYPE (cast_lhs);
4687 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4688 if (gimple_assign_cast_p (s: g)
4689 && useless_type_conversion_p (type,
4690 TREE_TYPE (gimple_assign_rhs1 (g)))
4691 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4692 rhs1 = gimple_assign_rhs1 (gs: g);
4693 else
4694 {
4695 g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs1);
4696 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4697 rhs1 = gimple_assign_lhs (gs: g);
4698 mul_stmts.quick_push (obj: g);
4699 }
4700 if (TREE_CODE (rhs2) == INTEGER_CST)
4701 rhs2 = fold_convert (type, rhs2);
4702 else
4703 {
4704 g = SSA_NAME_DEF_STMT (rhs2);
4705 if (gimple_assign_cast_p (s: g)
4706 && useless_type_conversion_p (type,
4707 TREE_TYPE (gimple_assign_rhs1 (g)))
4708 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4709 rhs2 = gimple_assign_rhs1 (gs: g);
4710 else
4711 {
4712 g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs2);
4713 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4714 rhs2 = gimple_assign_lhs (gs: g);
4715 mul_stmts.quick_push (obj: g);
4716 }
4717 }
4718 }
4719 tree ctype = build_complex_type (type);
4720 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4721 ? IFN_MUL_OVERFLOW
4722 : code != MINUS_EXPR
4723 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4724 2, rhs1, rhs2);
4725 tree ctmp = make_ssa_name (var: ctype);
4726 gimple_call_set_lhs (gs: g, lhs: ctmp);
4727 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4728 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (var: type) : lhs;
4729 gassign *g2;
4730 if (code != BIT_NOT_EXPR)
4731 {
4732 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4733 build1 (REALPART_EXPR, type, ctmp));
4734 if (maxval || cast_stmt)
4735 {
4736 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4737 if (add_first)
4738 *gsi = gsi_for_stmt (stmt);
4739 }
4740 else
4741 gsi_replace (gsi, g2, true);
4742 if (code == MULT_EXPR)
4743 {
4744 mul_stmts.quick_push (obj: g);
4745 mul_stmts.quick_push (obj: g2);
4746 if (cast_stmt)
4747 {
4748 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4749 gsi_replace (gsi, g2, true);
4750 mul_stmts.quick_push (obj: g2);
4751 }
4752 }
4753 }
4754 tree ovf = make_ssa_name (var: type);
4755 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4756 build1 (IMAGPART_EXPR, type, ctmp));
4757 if (code != BIT_NOT_EXPR)
4758 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4759 else
4760 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4761 if (code == MULT_EXPR)
4762 mul_stmts.quick_push (obj: g2);
4763
4764 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4765 {
4766 if (is_gimple_debug (gs: use_stmt))
4767 continue;
4768
4769 gimple *orig_use_stmt = use_stmt;
4770 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4771 maxval, NULL);
4772 if (ovf_use == 0)
4773 {
4774 gcc_assert (code != BIT_NOT_EXPR);
4775 if (maxval)
4776 {
4777 tree use_lhs = gimple_assign_lhs (gs: use_stmt);
4778 gimple_assign_set_rhs1 (gs: use_stmt, rhs: new_lhs);
4779 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4780 TREE_TYPE (new_lhs)))
4781 gimple_assign_set_rhs_code (s: use_stmt, code: SSA_NAME);
4782 update_stmt (s: use_stmt);
4783 }
4784 continue;
4785 }
4786 if (gimple_code (g: use_stmt) == GIMPLE_COND)
4787 {
4788 gcond *cond_stmt = as_a <gcond *> (p: use_stmt);
4789 gimple_cond_set_lhs (gs: cond_stmt, lhs: ovf);
4790 gimple_cond_set_rhs (gs: cond_stmt, rhs: build_int_cst (type, 0));
4791 gimple_cond_set_code (gs: cond_stmt, code: ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4792 }
4793 else
4794 {
4795 gcc_checking_assert (is_gimple_assign (use_stmt));
4796 if (gimple_assign_rhs_class (gs: use_stmt) == GIMPLE_BINARY_RHS)
4797 {
4798 if (gimple_assign_rhs_code (gs: use_stmt) == RSHIFT_EXPR)
4799 {
4800 g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
4801 ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4802 ovf, build_int_cst (type, 0));
4803 gimple_stmt_iterator gsiu = gsi_for_stmt (use_stmt);
4804 gsi_insert_before (&gsiu, g2, GSI_SAME_STMT);
4805 gimple_assign_set_rhs_with_ops (gsi: &gsiu, code: NOP_EXPR,
4806 op1: gimple_assign_lhs (gs: g2));
4807 update_stmt (s: use_stmt);
4808 use_operand_p use;
4809 single_imm_use (var: gimple_assign_lhs (gs: use_stmt), use_p: &use,
4810 stmt: &use_stmt);
4811 if (gimple_code (g: use_stmt) == GIMPLE_COND)
4812 {
4813 gcond *cond_stmt = as_a <gcond *> (p: use_stmt);
4814 gimple_cond_set_lhs (gs: cond_stmt, lhs: ovf);
4815 gimple_cond_set_rhs (gs: cond_stmt, rhs: build_int_cst (type, 0));
4816 }
4817 else
4818 {
4819 gcc_checking_assert (is_gimple_assign (use_stmt));
4820 if (gimple_assign_rhs_class (gs: use_stmt)
4821 == GIMPLE_BINARY_RHS)
4822 {
4823 gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf);
4824 gimple_assign_set_rhs2 (gs: use_stmt,
4825 rhs: build_int_cst (type, 0));
4826 }
4827 else if (gimple_assign_cast_p (s: use_stmt))
4828 gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf);
4829 else
4830 {
4831 tree_code sc = gimple_assign_rhs_code (gs: use_stmt);
4832 gcc_checking_assert (sc == COND_EXPR);
4833 tree cond = gimple_assign_rhs1 (gs: use_stmt);
4834 cond = build2 (TREE_CODE (cond),
4835 boolean_type_node, ovf,
4836 build_int_cst (type, 0));
4837 gimple_assign_set_rhs1 (gs: use_stmt, rhs: cond);
4838 }
4839 }
4840 update_stmt (s: use_stmt);
4841 gsi_remove (&gsiu, true);
4842 gsiu = gsi_for_stmt (g2);
4843 gsi_remove (&gsiu, true);
4844 continue;
4845 }
4846 else
4847 {
4848 gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf);
4849 gimple_assign_set_rhs2 (gs: use_stmt, rhs: build_int_cst (type, 0));
4850 gimple_assign_set_rhs_code (s: use_stmt,
4851 code: ovf_use == 1
4852 ? NE_EXPR : EQ_EXPR);
4853 }
4854 }
4855 else
4856 {
4857 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4858 == COND_EXPR);
4859 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4860 boolean_type_node, ovf,
4861 build_int_cst (type, 0));
4862 gimple_assign_set_rhs1 (gs: use_stmt, rhs: cond);
4863 }
4864 }
4865 update_stmt (s: use_stmt);
4866 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4867 {
4868 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4869 maybe_optimize_guarding_check (mul_stmts, cond_stmt: use_stmt, div_stmt: orig_use_stmt,
4870 cfg_changed);
4871 use_operand_p use;
4872 gimple *cast_stmt;
4873 if (single_imm_use (var: gimple_assign_lhs (gs: orig_use_stmt), use_p: &use,
4874 stmt: &cast_stmt)
4875 && gimple_assign_cast_p (s: cast_stmt))
4876 {
4877 gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4878 gsi_remove (&gsi3, true);
4879 release_ssa_name (name: gimple_assign_lhs (gs: cast_stmt));
4880 }
4881 gsi_remove (&gsi2, true);
4882 release_ssa_name (name: gimple_assign_lhs (gs: orig_use_stmt));
4883 }
4884 }
4885 if (maxval)
4886 {
4887 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4888 gsi_remove (&gsi2, true);
4889 if (add_stmt)
4890 {
4891 gimple *g = gimple_build_assign (gimple_assign_lhs (gs: add_stmt),
4892 new_lhs);
4893 gsi2 = gsi_for_stmt (add_stmt);
4894 gsi_replace (&gsi2, g, true);
4895 }
4896 }
4897 else if (code == BIT_NOT_EXPR)
4898 {
4899 *gsi = gsi_for_stmt (stmt);
4900 gsi_remove (gsi, true);
4901 release_ssa_name (name: lhs);
4902 return true;
4903 }
4904 return false;
4905}
4906
4907/* Helper of match_uaddc_usubc. Look through an integral cast
4908 which should preserve [0, 1] range value (unless source has
4909 1-bit signed type) and the cast has single use. */
4910
4911static gimple *
4912uaddc_cast (gimple *g)
4913{
4914 if (!gimple_assign_cast_p (s: g))
4915 return g;
4916 tree op = gimple_assign_rhs1 (gs: g);
4917 if (TREE_CODE (op) == SSA_NAME
4918 && INTEGRAL_TYPE_P (TREE_TYPE (op))
4919 && (TYPE_PRECISION (TREE_TYPE (op)) > 1
4920 || TYPE_UNSIGNED (TREE_TYPE (op)))
4921 && has_single_use (var: gimple_assign_lhs (gs: g)))
4922 return SSA_NAME_DEF_STMT (op);
4923 return g;
4924}
4925
4926/* Helper of match_uaddc_usubc. Look through a NE_EXPR
4927 comparison with 0 which also preserves [0, 1] value range. */
4928
4929static gimple *
4930uaddc_ne0 (gimple *g)
4931{
4932 if (is_gimple_assign (gs: g)
4933 && gimple_assign_rhs_code (gs: g) == NE_EXPR
4934 && integer_zerop (gimple_assign_rhs2 (gs: g))
4935 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
4936 && has_single_use (var: gimple_assign_lhs (gs: g)))
4937 return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
4938 return g;
4939}
4940
4941/* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4942 operand. */
4943
4944static bool
4945uaddc_is_cplxpart (gimple *g, tree_code part)
4946{
4947 return (is_gimple_assign (gs: g)
4948 && gimple_assign_rhs_code (gs: g) == part
4949 && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
4950}
4951
4952/* Try to match e.g.
4953 _29 = .ADD_OVERFLOW (_3, _4);
4954 _30 = REALPART_EXPR <_29>;
4955 _31 = IMAGPART_EXPR <_29>;
4956 _32 = .ADD_OVERFLOW (_30, _38);
4957 _33 = REALPART_EXPR <_32>;
4958 _34 = IMAGPART_EXPR <_32>;
4959 _35 = _31 + _34;
4960 as
4961 _36 = .UADDC (_3, _4, _38);
4962 _33 = REALPART_EXPR <_36>;
4963 _35 = IMAGPART_EXPR <_36>;
4964 or
4965 _22 = .SUB_OVERFLOW (_6, _5);
4966 _23 = REALPART_EXPR <_22>;
4967 _24 = IMAGPART_EXPR <_22>;
4968 _25 = .SUB_OVERFLOW (_23, _37);
4969 _26 = REALPART_EXPR <_25>;
4970 _27 = IMAGPART_EXPR <_25>;
4971 _28 = _24 | _27;
4972 as
4973 _29 = .USUBC (_6, _5, _37);
4974 _26 = REALPART_EXPR <_29>;
4975 _288 = IMAGPART_EXPR <_29>;
4976 provided _38 or _37 above have [0, 1] range
4977 and _3, _4 and _30 or _6, _5 and _23 are unsigned
4978 integral types with the same precision. Whether + or | or ^ is
4979 used on the IMAGPART_EXPR results doesn't matter, with one of
4980 added or subtracted operands in [0, 1] range at most one
4981 .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
4982
4983static bool
4984match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
4985{
4986 tree rhs[4];
4987 rhs[0] = gimple_assign_rhs1 (gs: stmt);
4988 rhs[1] = gimple_assign_rhs2 (gs: stmt);
4989 rhs[2] = NULL_TREE;
4990 rhs[3] = NULL_TREE;
4991 tree type = TREE_TYPE (rhs[0]);
4992 if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
4993 return false;
4994
4995 auto_vec<gimple *, 2> temp_stmts;
4996 if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
4997 {
4998 /* If overflow flag is ignored on the MSB limb, we can end up with
4999 the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
5000 or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
5001 thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
5002 the limb below the MSB, but also create another .UADDC/.USUBC call
5003 for the last limb.
5004
5005 First look through assignments with the same rhs code as CODE,
5006 with the exception that subtraction of a constant is canonicalized
5007 into addition of its negation. rhs[0] will be minuend for
5008 subtractions and one of addends for addition, all other assigned
5009 rhs[i] operands will be subtrahends or other addends. */
5010 while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
5011 {
5012 gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
5013 if (has_single_use (var: rhs[0])
5014 && is_gimple_assign (gs: g)
5015 && (gimple_assign_rhs_code (gs: g) == code
5016 || (code == MINUS_EXPR
5017 && gimple_assign_rhs_code (gs: g) == PLUS_EXPR
5018 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
5019 {
5020 tree r2 = gimple_assign_rhs2 (gs: g);
5021 if (gimple_assign_rhs_code (gs: g) != code)
5022 {
5023 r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
5024 if (!r2)
5025 break;
5026 }
5027 rhs[0] = gimple_assign_rhs1 (gs: g);
5028 tree &r = rhs[2] ? rhs[3] : rhs[2];
5029 r = r2;
5030 temp_stmts.quick_push (obj: g);
5031 }
5032 else
5033 break;
5034 }
5035 for (int i = 1; i <= 2; ++i)
5036 while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
5037 {
5038 gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
5039 if (has_single_use (var: rhs[i])
5040 && is_gimple_assign (gs: g)
5041 && gimple_assign_rhs_code (gs: g) == PLUS_EXPR)
5042 {
5043 rhs[i] = gimple_assign_rhs1 (gs: g);
5044 if (rhs[2])
5045 rhs[3] = gimple_assign_rhs2 (gs: g);
5046 else
5047 rhs[2] = gimple_assign_rhs2 (gs: g);
5048 temp_stmts.quick_push (obj: g);
5049 }
5050 else
5051 break;
5052 }
5053 /* If there are just 3 addends or one minuend and two subtrahends,
5054 check for UADDC or USUBC being pattern recognized earlier.
5055 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
5056 got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
5057 etc. */
5058 if (rhs[2] && !rhs[3])
5059 {
5060 for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
5061 if (TREE_CODE (rhs[i]) == SSA_NAME)
5062 {
5063 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5064 im = uaddc_ne0 (g: im);
5065 if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR))
5066 {
5067 /* We found one of the 3 addends or 2 subtrahends to be
5068 __imag__ of something, verify it is .UADDC/.USUBC. */
5069 tree rhs1 = gimple_assign_rhs1 (gs: im);
5070 gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
5071 tree ovf_lhs = NULL_TREE;
5072 tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE;
5073 if (gimple_call_internal_p (gs: ovf, fn: code == PLUS_EXPR
5074 ? IFN_ADD_OVERFLOW
5075 : IFN_SUB_OVERFLOW))
5076 {
5077 /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
5078 This is for the case of 2 chained .UADDC/.USUBC,
5079 where the first one uses 0 carry-in and the second
5080 one ignores the carry-out.
5081 So, something like:
5082 _16 = .ADD_OVERFLOW (_1, _2);
5083 _17 = REALPART_EXPR <_16>;
5084 _18 = IMAGPART_EXPR <_16>;
5085 _15 = _3 + _4;
5086 _12 = _15 + _18;
5087 where the first 3 statements come from the lower
5088 limb addition and the last 2 from the higher limb
5089 which ignores carry-out. */
5090 ovf_lhs = gimple_call_lhs (gs: ovf);
5091 tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs));
5092 ovf_arg1 = gimple_call_arg (gs: ovf, index: 0);
5093 ovf_arg2 = gimple_call_arg (gs: ovf, index: 1);
5094 /* In that case we need to punt if the types don't
5095 mismatch. */
5096 if (!types_compatible_p (type1: type, type2: ovf_lhs_type)
5097 || !types_compatible_p (type1: type, TREE_TYPE (ovf_arg1))
5098 || !types_compatible_p (type1: type,
5099 TREE_TYPE (ovf_arg2)))
5100 ovf_lhs = NULL_TREE;
5101 else
5102 {
5103 for (int i = (code == PLUS_EXPR ? 1 : 0);
5104 i >= 0; --i)
5105 {
5106 tree r = gimple_call_arg (gs: ovf, index: i);
5107 if (TREE_CODE (r) != SSA_NAME)
5108 continue;
5109 if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r),
5110 part: REALPART_EXPR))
5111 {
5112 /* Punt if one of the args which isn't
5113 subtracted isn't __real__; that could
5114 then prevent better match later.
5115 Consider:
5116 _3 = .ADD_OVERFLOW (_1, _2);
5117 _4 = REALPART_EXPR <_3>;
5118 _5 = IMAGPART_EXPR <_3>;
5119 _7 = .ADD_OVERFLOW (_4, _6);
5120 _8 = REALPART_EXPR <_7>;
5121 _9 = IMAGPART_EXPR <_7>;
5122 _12 = _10 + _11;
5123 _13 = _12 + _9;
5124 _14 = _13 + _5;
5125 We want to match this when called on
5126 the last stmt as a pair of .UADDC calls,
5127 but without this check we could turn
5128 that prematurely on _13 = _12 + _9;
5129 stmt into .UADDC with 0 carry-in just
5130 on the second .ADD_OVERFLOW call and
5131 another replacing the _12 and _13
5132 additions. */
5133 ovf_lhs = NULL_TREE;
5134 break;
5135 }
5136 }
5137 }
5138 if (ovf_lhs)
5139 {
5140 use_operand_p use_p;
5141 imm_use_iterator iter;
5142 tree re_lhs = NULL_TREE;
5143 FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs)
5144 {
5145 gimple *use_stmt = USE_STMT (use_p);
5146 if (is_gimple_debug (gs: use_stmt))
5147 continue;
5148 if (use_stmt == im)
5149 continue;
5150 if (!uaddc_is_cplxpart (g: use_stmt,
5151 part: REALPART_EXPR))
5152 {
5153 ovf_lhs = NULL_TREE;
5154 break;
5155 }
5156 re_lhs = gimple_assign_lhs (gs: use_stmt);
5157 }
5158 if (ovf_lhs && re_lhs)
5159 {
5160 FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs)
5161 {
5162 gimple *use_stmt = USE_STMT (use_p);
5163 if (is_gimple_debug (gs: use_stmt))
5164 continue;
5165 internal_fn ifn
5166 = gimple_call_internal_fn (gs: ovf);
5167 /* Punt if the __real__ of lhs is used
5168 in the same .*_OVERFLOW call.
5169 Consider:
5170 _3 = .ADD_OVERFLOW (_1, _2);
5171 _4 = REALPART_EXPR <_3>;
5172 _5 = IMAGPART_EXPR <_3>;
5173 _7 = .ADD_OVERFLOW (_4, _6);
5174 _8 = REALPART_EXPR <_7>;
5175 _9 = IMAGPART_EXPR <_7>;
5176 _12 = _10 + _11;
5177 _13 = _12 + _5;
5178 _14 = _13 + _9;
5179 We want to match this when called on
5180 the last stmt as a pair of .UADDC calls,
5181 but without this check we could turn
5182 that prematurely on _13 = _12 + _5;
5183 stmt into .UADDC with 0 carry-in just
5184 on the first .ADD_OVERFLOW call and
5185 another replacing the _12 and _13
5186 additions. */
5187 if (gimple_call_internal_p (gs: use_stmt, fn: ifn))
5188 {
5189 ovf_lhs = NULL_TREE;
5190 break;
5191 }
5192 }
5193 }
5194 }
5195 }
5196 if ((ovf_lhs
5197 || gimple_call_internal_p (gs: ovf,
5198 fn: code == PLUS_EXPR
5199 ? IFN_UADDC : IFN_USUBC))
5200 && (optab_handler (op: code == PLUS_EXPR
5201 ? uaddc5_optab : usubc5_optab,
5202 TYPE_MODE (type))
5203 != CODE_FOR_nothing))
5204 {
5205 /* And in that case build another .UADDC/.USUBC
5206 call for the most significand limb addition.
5207 Overflow bit is ignored here. */
5208 if (i != 2)
5209 std::swap (a&: rhs[i], b&: rhs[2]);
5210 gimple *g
5211 = gimple_build_call_internal (code == PLUS_EXPR
5212 ? IFN_UADDC
5213 : IFN_USUBC,
5214 3, rhs[0], rhs[1],
5215 rhs[2]);
5216 tree nlhs = make_ssa_name (var: build_complex_type (type));
5217 gimple_call_set_lhs (gs: g, lhs: nlhs);
5218 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5219 tree ilhs = gimple_assign_lhs (gs: stmt);
5220 g = gimple_build_assign (ilhs, REALPART_EXPR,
5221 build1 (REALPART_EXPR,
5222 TREE_TYPE (ilhs),
5223 nlhs));
5224 gsi_replace (gsi, g, true);
5225 /* And if it is initialized from result of __imag__
5226 of .{ADD,SUB}_OVERFLOW call, replace that
5227 call with .U{ADD,SUB}C call with the same arguments,
5228 just 0 added as third argument. This isn't strictly
5229 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5230 produce the same result, but may result in better
5231 generated code on some targets where the backend can
5232 better prepare in how the result will be used. */
5233 if (ovf_lhs)
5234 {
5235 tree zero = build_zero_cst (type);
5236 g = gimple_build_call_internal (code == PLUS_EXPR
5237 ? IFN_UADDC
5238 : IFN_USUBC,
5239 3, ovf_arg1,
5240 ovf_arg2, zero);
5241 gimple_call_set_lhs (gs: g, lhs: ovf_lhs);
5242 gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf);
5243 gsi_replace (&gsi2, g, true);
5244 }
5245 return true;
5246 }
5247 }
5248 }
5249 return false;
5250 }
5251 if (code == MINUS_EXPR && !rhs[2])
5252 return false;
5253 if (code == MINUS_EXPR)
5254 /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
5255 So, for MINUS_EXPR swap the single added rhs operand (others are
5256 subtracted) to rhs[3]. */
5257 std::swap (a&: rhs[0], b&: rhs[3]);
5258 }
5259 /* Walk from both operands of STMT (for +/- even sometimes from
5260 all the 4 addends or 3 subtrahends), see through casts and != 0
5261 statements which would preserve [0, 1] range of values and
5262 check which is initialized from __imag__. */
5263 gimple *im1 = NULL, *im2 = NULL;
5264 for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
5265 if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
5266 {
5267 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
5268 im = uaddc_ne0 (g: im);
5269 if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR))
5270 {
5271 if (im1 == NULL)
5272 {
5273 im1 = im;
5274 if (i != 0)
5275 std::swap (a&: rhs[0], b&: rhs[i]);
5276 }
5277 else
5278 {
5279 im2 = im;
5280 if (i != 1)
5281 std::swap (a&: rhs[1], b&: rhs[i]);
5282 break;
5283 }
5284 }
5285 }
5286 /* If we don't find at least two, punt. */
5287 if (!im2)
5288 return false;
5289 /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
5290 either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
5291 uaddc5/usubc5 named pattern for the corresponding mode. */
5292 gimple *ovf1
5293 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
5294 gimple *ovf2
5295 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
5296 internal_fn ifn;
5297 if (!is_gimple_call (gs: ovf1)
5298 || !gimple_call_internal_p (gs: ovf1)
5299 || ((ifn = gimple_call_internal_fn (gs: ovf1)) != IFN_ADD_OVERFLOW
5300 && ifn != IFN_SUB_OVERFLOW)
5301 || !gimple_call_internal_p (gs: ovf2, fn: ifn)
5302 || optab_handler (op: ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
5303 TYPE_MODE (type)) == CODE_FOR_nothing
5304 || (rhs[2]
5305 && optab_handler (op: code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
5306 TYPE_MODE (type)) == CODE_FOR_nothing))
5307 return false;
5308 tree arg1, arg2, arg3 = NULL_TREE;
5309 gimple *re1 = NULL, *re2 = NULL;
5310 /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
5311 should be initialized from __real__ of the other of the two calls.
5312 Though, for .SUB_OVERFLOW, it has to be the first argument, not the
5313 second one. */
5314 for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
5315 for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
5316 {
5317 tree arg = gimple_call_arg (gs: ovf, index: i);
5318 if (TREE_CODE (arg) != SSA_NAME)
5319 continue;
5320 re1 = SSA_NAME_DEF_STMT (arg);
5321 if (uaddc_is_cplxpart (g: re1, part: REALPART_EXPR)
5322 && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
5323 == (ovf == ovf1 ? ovf2 : ovf1)))
5324 {
5325 if (ovf == ovf1)
5326 {
5327 /* Make sure ovf2 is the .*_OVERFLOW call with argument
5328 initialized from __real__ of ovf1. */
5329 std::swap (a&: rhs[0], b&: rhs[1]);
5330 std::swap (a&: im1, b&: im2);
5331 std::swap (a&: ovf1, b&: ovf2);
5332 }
5333 arg3 = gimple_call_arg (gs: ovf, index: 1 - i);
5334 i = -1;
5335 break;
5336 }
5337 }
5338 if (!arg3)
5339 return false;
5340 arg1 = gimple_call_arg (gs: ovf1, index: 0);
5341 arg2 = gimple_call_arg (gs: ovf1, index: 1);
5342 if (!types_compatible_p (type1: type, TREE_TYPE (arg1)))
5343 return false;
5344 int kind[2] = { 0, 0 };
5345 tree arg_im[2] = { NULL_TREE, NULL_TREE };
5346 /* At least one of arg2 and arg3 should have type compatible
5347 with arg1/rhs[0], and the other one should have value in [0, 1]
5348 range. If both are in [0, 1] range and type compatible with
5349 arg1/rhs[0], try harder to find after looking through casts,
5350 != 0 comparisons which one is initialized to __imag__ of
5351 .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
5352 for (int i = 0; i < 2; ++i)
5353 {
5354 tree arg = i == 0 ? arg2 : arg3;
5355 if (types_compatible_p (type1: type, TREE_TYPE (arg)))
5356 kind[i] = 1;
5357 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
5358 || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
5359 && !TYPE_UNSIGNED (TREE_TYPE (arg))))
5360 continue;
5361 if (tree_zero_one_valued_p (arg))
5362 kind[i] |= 2;
5363 if (TREE_CODE (arg) == SSA_NAME)
5364 {
5365 gimple *g = SSA_NAME_DEF_STMT (arg);
5366 if (gimple_assign_cast_p (s: g))
5367 {
5368 tree op = gimple_assign_rhs1 (gs: g);
5369 if (TREE_CODE (op) == SSA_NAME
5370 && INTEGRAL_TYPE_P (TREE_TYPE (op)))
5371 g = SSA_NAME_DEF_STMT (op);
5372 }
5373 g = uaddc_ne0 (g);
5374 if (!uaddc_is_cplxpart (g, part: IMAGPART_EXPR))
5375 continue;
5376 arg_im[i] = gimple_assign_lhs (gs: g);
5377 g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
5378 if (!is_gimple_call (gs: g) || !gimple_call_internal_p (gs: g))
5379 continue;
5380 switch (gimple_call_internal_fn (gs: g))
5381 {
5382 case IFN_ADD_OVERFLOW:
5383 case IFN_SUB_OVERFLOW:
5384 case IFN_UADDC:
5385 case IFN_USUBC:
5386 break;
5387 default:
5388 continue;
5389 }
5390 kind[i] |= 4;
5391 }
5392 }
5393 /* Make arg2 the one with compatible type and arg3 the one
5394 with [0, 1] range. If both is true for both operands,
5395 prefer as arg3 result of __imag__ of some ifn. */
5396 if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
5397 {
5398 std::swap (a&: arg2, b&: arg3);
5399 std::swap (a&: kind[0], b&: kind[1]);
5400 std::swap (a&: arg_im[0], b&: arg_im[1]);
5401 }
5402 if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
5403 return false;
5404 if (!has_single_use (var: gimple_assign_lhs (gs: im1))
5405 || !has_single_use (var: gimple_assign_lhs (gs: im2))
5406 || !has_single_use (var: gimple_assign_lhs (gs: re1))
5407 || num_imm_uses (var: gimple_call_lhs (gs: ovf1)) != 2)
5408 return false;
5409 /* Check that ovf2's result is used in __real__ and set re2
5410 to that statement. */
5411 use_operand_p use_p;
5412 imm_use_iterator iter;
5413 tree lhs = gimple_call_lhs (gs: ovf2);
5414 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5415 {
5416 gimple *use_stmt = USE_STMT (use_p);
5417 if (is_gimple_debug (gs: use_stmt))
5418 continue;
5419 if (use_stmt == im2)
5420 continue;
5421 if (re2)
5422 return false;
5423 if (!uaddc_is_cplxpart (g: use_stmt, part: REALPART_EXPR))
5424 return false;
5425 re2 = use_stmt;
5426 }
5427 /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5428 gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
5429 gimple *g;
5430 if ((kind[1] & 4) != 0 && types_compatible_p (type1: type, TREE_TYPE (arg_im[1])))
5431 arg3 = arg_im[1];
5432 if ((kind[1] & 1) == 0)
5433 {
5434 if (TREE_CODE (arg3) == INTEGER_CST)
5435 arg3 = fold_convert (type, arg3);
5436 else
5437 {
5438 g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, arg3);
5439 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5440 arg3 = gimple_assign_lhs (gs: g);
5441 }
5442 }
5443 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5444 ? IFN_UADDC : IFN_USUBC,
5445 3, arg1, arg2, arg3);
5446 tree nlhs = make_ssa_name (TREE_TYPE (lhs));
5447 gimple_call_set_lhs (gs: g, lhs: nlhs);
5448 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5449 /* In the case where stmt is | or ^ of two overflow flags
5450 or addition of those, replace stmt with __imag__ of the above
5451 added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5452 arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5453 tree ilhs = rhs[2] ? make_ssa_name (var: type) : gimple_assign_lhs (gs: stmt);
5454 g = gimple_build_assign (ilhs, IMAGPART_EXPR,
5455 build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
5456 if (rhs[2])
5457 {
5458 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5459 /* Remove some further statements which can't be kept in the IL because
5460 they can use SSA_NAMEs whose setter is going to be removed too. */
5461 for (gimple *g2 : temp_stmts)
5462 {
5463 gsi2 = gsi_for_stmt (g2);
5464 gsi_remove (&gsi2, true);
5465 release_defs (g2);
5466 }
5467 }
5468 else
5469 gsi_replace (gsi, g, true);
5470 /* Remove some statements which can't be kept in the IL because they
5471 use SSA_NAME whose setter is going to be removed too. */
5472 tree rhs1 = rhs[1];
5473 for (int i = 0; i < 2; i++)
5474 if (rhs1 == gimple_assign_lhs (gs: im2))
5475 break;
5476 else
5477 {
5478 g = SSA_NAME_DEF_STMT (rhs1);
5479 rhs1 = gimple_assign_rhs1 (gs: g);
5480 gsi2 = gsi_for_stmt (g);
5481 gsi_remove (&gsi2, true);
5482 release_defs (g);
5483 }
5484 gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
5485 gsi2 = gsi_for_stmt (im2);
5486 gsi_remove (&gsi2, true);
5487 release_defs (im2);
5488 /* Replace the re2 statement with __real__ of the newly added
5489 .UADDC/.USUBC call. */
5490 if (re2)
5491 {
5492 gsi2 = gsi_for_stmt (re2);
5493 tree rlhs = gimple_assign_lhs (gs: re2);
5494 g = gimple_build_assign (rlhs, REALPART_EXPR,
5495 build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
5496 gsi_replace (&gsi2, g, true);
5497 }
5498 if (rhs[2])
5499 {
5500 /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5501 arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5502 replace stmt with __real__ of another .UADDC/.USUBC call which
5503 handles the most significant limb. Overflow flag from this is
5504 ignored. */
5505 g = gimple_build_call_internal (code == PLUS_EXPR
5506 ? IFN_UADDC : IFN_USUBC,
5507 3, rhs[3], rhs[2], ilhs);
5508 nlhs = make_ssa_name (TREE_TYPE (lhs));
5509 gimple_call_set_lhs (gs: g, lhs: nlhs);
5510 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5511 ilhs = gimple_assign_lhs (gs: stmt);
5512 g = gimple_build_assign (ilhs, REALPART_EXPR,
5513 build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
5514 gsi_replace (gsi, g, true);
5515 }
5516 if (TREE_CODE (arg3) == SSA_NAME)
5517 {
5518 /* When pattern recognizing the second least significant limb
5519 above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5520 check if the [0, 1] range argument (i.e. carry in) isn't the
5521 result of another .{ADD,SUB}_OVERFLOW call (one handling the
5522 least significant limb). Again look through casts and != 0. */
5523 gimple *im3 = SSA_NAME_DEF_STMT (arg3);
5524 for (int i = 0; i < 2; ++i)
5525 {
5526 gimple *im4 = uaddc_cast (g: im3);
5527 if (im4 == im3)
5528 break;
5529 else
5530 im3 = im4;
5531 }
5532 im3 = uaddc_ne0 (g: im3);
5533 if (uaddc_is_cplxpart (g: im3, part: IMAGPART_EXPR))
5534 {
5535 gimple *ovf3
5536 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
5537 if (gimple_call_internal_p (gs: ovf3, fn: ifn))
5538 {
5539 lhs = gimple_call_lhs (gs: ovf3);
5540 arg1 = gimple_call_arg (gs: ovf3, index: 0);
5541 arg2 = gimple_call_arg (gs: ovf3, index: 1);
5542 if (types_compatible_p (type1: type, TREE_TYPE (TREE_TYPE (lhs)))
5543 && types_compatible_p (type1: type, TREE_TYPE (arg1))
5544 && types_compatible_p (type1: type, TREE_TYPE (arg2)))
5545 {
5546 /* And if it is initialized from result of __imag__
5547 of .{ADD,SUB}_OVERFLOW call, replace that
5548 call with .U{ADD,SUB}C call with the same arguments,
5549 just 0 added as third argument. This isn't strictly
5550 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5551 produce the same result, but may result in better
5552 generated code on some targets where the backend can
5553 better prepare in how the result will be used. */
5554 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5555 ? IFN_UADDC : IFN_USUBC,
5556 3, arg1, arg2,
5557 build_zero_cst (type));
5558 gimple_call_set_lhs (gs: g, lhs);
5559 gsi2 = gsi_for_stmt (ovf3);
5560 gsi_replace (&gsi2, g, true);
5561 }
5562 }
5563 }
5564 }
5565 return true;
5566}
5567
5568/* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5569 (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5570 isn't a direct optab. Also handle `<=`/`>` to be
5571 `x & (x - 1) !=/== x`. */
5572
5573static void
5574match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
5575{
5576 tree clhs, crhs;
5577 enum tree_code code;
5578 bool was_le = false;
5579 if (gimple_code (g: stmt) == GIMPLE_COND)
5580 {
5581 clhs = gimple_cond_lhs (gs: stmt);
5582 crhs = gimple_cond_rhs (gs: stmt);
5583 code = gimple_cond_code (gs: stmt);
5584 }
5585 else
5586 {
5587 clhs = gimple_assign_rhs1 (gs: stmt);
5588 crhs = gimple_assign_rhs2 (gs: stmt);
5589 code = gimple_assign_rhs_code (gs: stmt);
5590 }
5591 if (code != LE_EXPR && code != GT_EXPR
5592 && code != EQ_EXPR && code != NE_EXPR)
5593 return;
5594 if (code == LE_EXPR || code == GT_EXPR)
5595 was_le = true;
5596 if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs))
5597 return;
5598 gimple *call = SSA_NAME_DEF_STMT (clhs);
5599 combined_fn cfn = gimple_call_combined_fn (call);
5600 switch (cfn)
5601 {
5602 CASE_CFN_POPCOUNT:
5603 break;
5604 default:
5605 return;
5606 }
5607 if (!has_single_use (var: clhs))
5608 return;
5609 tree arg = gimple_call_arg (gs: call, index: 0);
5610 tree type = TREE_TYPE (arg);
5611 if (!INTEGRAL_TYPE_P (type))
5612 return;
5613 bool nonzero_arg = tree_expr_nonzero_p (arg);
5614 if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
5615 {
5616 /* Tell expand_POPCOUNT the popcount result is only used in equality
5617 comparison with one, so that it can decide based on rtx costs. */
5618 gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
5619 was_le ? integer_minus_one_node
5620 : (nonzero_arg ? integer_zero_node
5621 : integer_one_node));
5622 gimple_call_set_lhs (gs: g, lhs: gimple_call_lhs (gs: call));
5623 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5624 gsi_replace (&gsi2, g, true);
5625 return;
5626 }
5627 tree argm1 = make_ssa_name (var: type);
5628 gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
5629 build_int_cst (type, -1));
5630 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5631 g = gimple_build_assign (make_ssa_name (var: type),
5632 (nonzero_arg || was_le) ? BIT_AND_EXPR : BIT_XOR_EXPR,
5633 arg, argm1);
5634 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5635 tree_code cmpcode;
5636 if (was_le)
5637 {
5638 argm1 = build_zero_cst (type);
5639 cmpcode = code == LE_EXPR ? EQ_EXPR : NE_EXPR;
5640 }
5641 else if (nonzero_arg)
5642 {
5643 argm1 = build_zero_cst (type);
5644 cmpcode = code;
5645 }
5646 else
5647 cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR;
5648 if (gcond *cond = dyn_cast <gcond *> (p: stmt))
5649 {
5650 gimple_cond_set_lhs (gs: cond, lhs: gimple_assign_lhs (gs: g));
5651 gimple_cond_set_rhs (gs: cond, rhs: argm1);
5652 gimple_cond_set_code (gs: cond, code: cmpcode);
5653 }
5654 else
5655 {
5656 gimple_assign_set_rhs1 (gs: stmt, rhs: gimple_assign_lhs (gs: g));
5657 gimple_assign_set_rhs2 (gs: stmt, rhs: argm1);
5658 gimple_assign_set_rhs_code (s: stmt, code: cmpcode);
5659 }
5660 update_stmt (s: stmt);
5661 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5662 gsi_remove (&gsi2, true);
5663 release_defs (call);
5664}
5665
5666/* Return true if target has support for divmod. */
5667
5668static bool
5669target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
5670{
5671 /* If target supports hardware divmod insn, use it for divmod. */
5672 if (optab_handler (op: divmod_optab, mode) != CODE_FOR_nothing)
5673 return true;
5674
5675 /* Check if libfunc for divmod is available. */
5676 rtx libfunc = optab_libfunc (divmod_optab, mode);
5677 if (libfunc != NULL_RTX)
5678 {
5679 /* If optab_handler exists for div_optab, perhaps in a wider mode,
5680 we don't want to use the libfunc even if it exists for given mode. */
5681 machine_mode div_mode;
5682 FOR_EACH_MODE_FROM (div_mode, mode)
5683 if (optab_handler (op: div_optab, mode: div_mode) != CODE_FOR_nothing)
5684 return false;
5685
5686 return targetm.expand_divmod_libfunc != NULL;
5687 }
5688
5689 return false;
5690}
5691
5692/* Check if stmt is candidate for divmod transform. */
5693
5694static bool
5695divmod_candidate_p (gassign *stmt)
5696{
5697 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5698 machine_mode mode = TYPE_MODE (type);
5699 optab divmod_optab, div_optab;
5700
5701 if (TYPE_UNSIGNED (type))
5702 {
5703 divmod_optab = udivmod_optab;
5704 div_optab = udiv_optab;
5705 }
5706 else
5707 {
5708 divmod_optab = sdivmod_optab;
5709 div_optab = sdiv_optab;
5710 }
5711
5712 tree op1 = gimple_assign_rhs1 (gs: stmt);
5713 tree op2 = gimple_assign_rhs2 (gs: stmt);
5714
5715 /* Disable the transform if either is a constant, since division-by-constant
5716 may have specialized expansion. */
5717 if (CONSTANT_CLASS_P (op1))
5718 return false;
5719
5720 if (CONSTANT_CLASS_P (op2))
5721 {
5722 if (integer_pow2p (op2))
5723 return false;
5724
5725 if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5726 && element_precision (type) <= BITS_PER_WORD)
5727 return false;
5728
5729 /* If the divisor is not power of 2 and the precision wider than
5730 HWI, expand_divmod punts on that, so in that case it is better
5731 to use divmod optab or libfunc. Similarly if choose_multiplier
5732 might need pre/post shifts of BITS_PER_WORD or more. */
5733 }
5734
5735 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5736 expand using the [su]divv optabs. */
5737 if (TYPE_OVERFLOW_TRAPS (type))
5738 return false;
5739
5740 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5741 return false;
5742
5743 return true;
5744}
5745
5746/* This function looks for:
5747 t1 = a TRUNC_DIV_EXPR b;
5748 t2 = a TRUNC_MOD_EXPR b;
5749 and transforms it to the following sequence:
5750 complex_tmp = DIVMOD (a, b);
5751 t1 = REALPART_EXPR(a);
5752 t2 = IMAGPART_EXPR(b);
5753 For conditions enabling the transform see divmod_candidate_p().
5754
5755 The pass has three parts:
5756 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5757 other trunc_div_expr and trunc_mod_expr stmts.
5758 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5759 to stmts vector.
5760 3) Insert DIVMOD call just before top_stmt and update entries in
5761 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5762 IMAGPART_EXPR for mod). */
5763
5764static bool
5765convert_to_divmod (gassign *stmt)
5766{
5767 if (stmt_can_throw_internal (cfun, stmt)
5768 || !divmod_candidate_p (stmt))
5769 return false;
5770
5771 tree op1 = gimple_assign_rhs1 (gs: stmt);
5772 tree op2 = gimple_assign_rhs2 (gs: stmt);
5773
5774 imm_use_iterator use_iter;
5775 gimple *use_stmt;
5776 auto_vec<gimple *> stmts;
5777
5778 gimple *top_stmt = stmt;
5779 basic_block top_bb = gimple_bb (g: stmt);
5780
5781 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5782 at-least stmt and possibly other trunc_div/trunc_mod stmts
5783 having same operands as stmt. */
5784
5785 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5786 {
5787 if (is_gimple_assign (gs: use_stmt)
5788 && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR
5789 || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR)
5790 && operand_equal_p (op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0)
5791 && operand_equal_p (op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0))
5792 {
5793 if (stmt_can_throw_internal (cfun, use_stmt))
5794 continue;
5795
5796 basic_block bb = gimple_bb (g: use_stmt);
5797
5798 if (bb == top_bb)
5799 {
5800 if (gimple_uid (g: use_stmt) < gimple_uid (g: top_stmt))
5801 top_stmt = use_stmt;
5802 }
5803 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5804 {
5805 top_bb = bb;
5806 top_stmt = use_stmt;
5807 }
5808 }
5809 }
5810
5811 tree top_op1 = gimple_assign_rhs1 (gs: top_stmt);
5812 tree top_op2 = gimple_assign_rhs2 (gs: top_stmt);
5813
5814 stmts.safe_push (obj: top_stmt);
5815 bool div_seen = (gimple_assign_rhs_code (gs: top_stmt) == TRUNC_DIV_EXPR);
5816
5817 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5818 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5819 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5820 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5821
5822 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5823 {
5824 if (is_gimple_assign (gs: use_stmt)
5825 && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR
5826 || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR)
5827 && operand_equal_p (top_op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0)
5828 && operand_equal_p (top_op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0))
5829 {
5830 if (use_stmt == top_stmt
5831 || stmt_can_throw_internal (cfun, use_stmt)
5832 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt), top_bb))
5833 continue;
5834
5835 stmts.safe_push (obj: use_stmt);
5836 if (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR)
5837 div_seen = true;
5838 }
5839 }
5840
5841 if (!div_seen)
5842 return false;
5843
5844 /* Part 3: Create libcall to internal fn DIVMOD:
5845 divmod_tmp = DIVMOD (op1, op2). */
5846
5847 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5848 tree res = make_temp_ssa_name (type: build_complex_type (TREE_TYPE (op1)),
5849 stmt: call_stmt, name: "divmod_tmp");
5850 gimple_call_set_lhs (gs: call_stmt, lhs: res);
5851 /* We rejected throwing statements above. */
5852 gimple_call_set_nothrow (s: call_stmt, nothrow_p: true);
5853
5854 /* Insert the call before top_stmt. */
5855 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5856 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5857
5858 widen_mul_stats.divmod_calls_inserted++;
5859
5860 /* Update all statements in stmts vector:
5861 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5862 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5863
5864 for (unsigned i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i)
5865 {
5866 tree new_rhs;
5867
5868 switch (gimple_assign_rhs_code (gs: use_stmt))
5869 {
5870 case TRUNC_DIV_EXPR:
5871 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5872 break;
5873
5874 case TRUNC_MOD_EXPR:
5875 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
5876 break;
5877
5878 default:
5879 gcc_unreachable ();
5880 }
5881
5882 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
5883 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
5884 update_stmt (s: use_stmt);
5885 }
5886
5887 return true;
5888}
5889
5890/* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5891 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
5892 value is true iff we converted the statement. */
5893
5894static bool
5895convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
5896{
5897 tree lhs = gimple_assign_lhs (gs: stmt);
5898 tree stype = TREE_TYPE (lhs);
5899 tree sarg0 = gimple_assign_rhs1 (gs: stmt);
5900 tree sarg1 = gimple_assign_rhs2 (gs: stmt);
5901
5902 if (TREE_CODE (stype) != INTEGER_TYPE
5903 || TREE_CODE (sarg1) != INTEGER_CST
5904 || TREE_CODE (sarg0) != SSA_NAME
5905 || !tree_fits_uhwi_p (sarg1)
5906 || !has_single_use (var: sarg0))
5907 return false;
5908
5909 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
5910 if (!def)
5911 return false;
5912
5913 enum tree_code mcode = gimple_assign_rhs_code (gs: def);
5914 if (mcode == NOP_EXPR)
5915 {
5916 tree tmp = gimple_assign_rhs1 (gs: def);
5917 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (var: tmp))
5918 return false;
5919 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
5920 if (!def)
5921 return false;
5922 mcode = gimple_assign_rhs_code (gs: def);
5923 }
5924
5925 if (mcode != WIDEN_MULT_EXPR
5926 || gimple_bb (g: def) != gimple_bb (g: stmt))
5927 return false;
5928 tree mtype = TREE_TYPE (gimple_assign_lhs (def));
5929 if (TREE_CODE (mtype) != INTEGER_TYPE
5930 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
5931 return false;
5932
5933 tree mop1 = gimple_assign_rhs1 (gs: def);
5934 tree mop2 = gimple_assign_rhs2 (gs: def);
5935 tree optype = TREE_TYPE (mop1);
5936 bool unsignedp = TYPE_UNSIGNED (optype);
5937 unsigned int prec = TYPE_PRECISION (optype);
5938
5939 if (unsignedp != TYPE_UNSIGNED (mtype)
5940 || TYPE_PRECISION (mtype) != 2 * prec)
5941 return false;
5942
5943 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
5944 if (bits < prec || bits >= 2 * prec)
5945 return false;
5946
5947 /* For the time being, require operands to have the same sign. */
5948 if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
5949 return false;
5950
5951 machine_mode mode = TYPE_MODE (optype);
5952 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
5953 if (optab_handler (op: tab, mode) == CODE_FOR_nothing)
5954 return false;
5955
5956 location_t loc = gimple_location (g: stmt);
5957 tree highpart1 = build_and_insert_binop (gsi, loc, name: "highparttmp",
5958 code: MULT_HIGHPART_EXPR, arg0: mop1, arg1: mop2);
5959 tree highpart2 = highpart1;
5960 tree ntype = optype;
5961
5962 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
5963 {
5964 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
5965 : signed_type_for (optype);
5966 highpart2 = build_and_insert_cast (gsi, loc, type: ntype, val: highpart1);
5967 }
5968 if (bits > prec)
5969 highpart2 = build_and_insert_binop (gsi, loc, name: "highparttmp",
5970 code: RSHIFT_EXPR, arg0: highpart2,
5971 arg1: build_int_cst (ntype, bits - prec));
5972
5973 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
5974 gsi_replace (gsi, new_stmt, true);
5975
5976 widen_mul_stats.highpart_mults_inserted++;
5977 return true;
5978}
5979
5980/* If target has spaceship<MODE>3 expander, pattern recognize
5981 <bb 2> [local count: 1073741824]:
5982 if (a_2(D) == b_3(D))
5983 goto <bb 6>; [34.00%]
5984 else
5985 goto <bb 3>; [66.00%]
5986
5987 <bb 3> [local count: 708669601]:
5988 if (a_2(D) < b_3(D))
5989 goto <bb 6>; [1.04%]
5990 else
5991 goto <bb 4>; [98.96%]
5992
5993 <bb 4> [local count: 701299439]:
5994 if (a_2(D) > b_3(D))
5995 goto <bb 5>; [48.89%]
5996 else
5997 goto <bb 6>; [51.11%]
5998
5999 <bb 5> [local count: 342865295]:
6000
6001 <bb 6> [local count: 1073741824]:
6002 and turn it into:
6003 <bb 2> [local count: 1073741824]:
6004 _1 = .SPACESHIP (a_2(D), b_3(D), 0);
6005 if (_1 == 0)
6006 goto <bb 6>; [34.00%]
6007 else
6008 goto <bb 3>; [66.00%]
6009
6010 <bb 3> [local count: 708669601]:
6011 if (_1 == -1)
6012 goto <bb 6>; [1.04%]
6013 else
6014 goto <bb 4>; [98.96%]
6015
6016 <bb 4> [local count: 701299439]:
6017 if (_1 == 1)
6018 goto <bb 5>; [48.89%]
6019 else
6020 goto <bb 6>; [51.11%]
6021
6022 <bb 5> [local count: 342865295]:
6023
6024 <bb 6> [local count: 1073741824]:
6025 so that the backend can emit optimal comparison and
6026 conditional jump sequence. If the
6027 <bb 6> [local count: 1073741824]:
6028 above has a single PHI like:
6029 # _27 = PHI<0(2), -1(3), 2(4), 1(5)>
6030 then replace it with effectively
6031 _1 = .SPACESHIP (a_2(D), b_3(D), 1);
6032 _27 = _1; */
6033
6034static void
6035optimize_spaceship (gcond *stmt)
6036{
6037 enum tree_code code = gimple_cond_code (gs: stmt);
6038 if (code != EQ_EXPR && code != NE_EXPR)
6039 return;
6040 tree arg1 = gimple_cond_lhs (gs: stmt);
6041 tree arg2 = gimple_cond_rhs (gs: stmt);
6042 if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
6043 && !INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
6044 || optab_handler (op: spaceship_optab,
6045 TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
6046 || operand_equal_p (arg1, arg2, flags: 0))
6047 return;
6048
6049 basic_block bb0 = gimple_bb (g: stmt), bb1, bb2 = NULL;
6050 edge em1 = NULL, e1 = NULL, e2 = NULL;
6051 bb1 = EDGE_SUCC (bb0, 1)->dest;
6052 if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
6053 bb1 = EDGE_SUCC (bb0, 0)->dest;
6054
6055 gcond *g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb1));
6056 if (g == NULL
6057 || !single_pred_p (bb: bb1)
6058 || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0)
6059 ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0)
6060 : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0)
6061 || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0)))
6062 || !cond_only_block_p (bb1))
6063 return;
6064
6065 enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0)
6066 ? LT_EXPR : GT_EXPR);
6067 switch (gimple_cond_code (gs: g))
6068 {
6069 case LT_EXPR:
6070 case LE_EXPR:
6071 break;
6072 case GT_EXPR:
6073 case GE_EXPR:
6074 ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
6075 break;
6076 default:
6077 return;
6078 }
6079
6080 for (int i = 0; i < 2; ++i)
6081 {
6082 /* With NaNs, </<=/>/>= are false, so we need to look for the
6083 third comparison on the false edge from whatever non-equality
6084 comparison the second comparison is. */
6085 if (HONOR_NANS (TREE_TYPE (arg1))
6086 && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
6087 continue;
6088
6089 bb2 = EDGE_SUCC (bb1, i)->dest;
6090 g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb2));
6091 if (g == NULL
6092 || !single_pred_p (bb: bb2)
6093 || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0)
6094 ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0)
6095 : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0)
6096 || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0)))
6097 || !cond_only_block_p (bb2)
6098 || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
6099 continue;
6100
6101 enum tree_code ccode2
6102 = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) ? LT_EXPR : GT_EXPR);
6103 switch (gimple_cond_code (gs: g))
6104 {
6105 case LT_EXPR:
6106 case LE_EXPR:
6107 break;
6108 case GT_EXPR:
6109 case GE_EXPR:
6110 ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
6111 break;
6112 default:
6113 continue;
6114 }
6115 if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
6116 continue;
6117
6118 if ((ccode == LT_EXPR)
6119 ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
6120 {
6121 em1 = EDGE_SUCC (bb1, 1 - i);
6122 e1 = EDGE_SUCC (bb2, 0);
6123 e2 = EDGE_SUCC (bb2, 1);
6124 if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
6125 std::swap (a&: e1, b&: e2);
6126 }
6127 else
6128 {
6129 e1 = EDGE_SUCC (bb1, 1 - i);
6130 em1 = EDGE_SUCC (bb2, 0);
6131 e2 = EDGE_SUCC (bb2, 1);
6132 if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
6133 std::swap (a&: em1, b&: e2);
6134 }
6135 break;
6136 }
6137
6138 if (em1 == NULL)
6139 {
6140 if ((ccode == LT_EXPR)
6141 ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
6142 {
6143 em1 = EDGE_SUCC (bb1, 1);
6144 e1 = EDGE_SUCC (bb1, 0);
6145 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6146 }
6147 else
6148 {
6149 em1 = EDGE_SUCC (bb1, 0);
6150 e1 = EDGE_SUCC (bb1, 1);
6151 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
6152 }
6153 }
6154
6155 /* Check if there is a single bb into which all failed conditions
6156 jump to (perhaps through an empty block) and if it results in
6157 a single integral PHI which just sets it to -1, 0, 1, X
6158 (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather
6159 than 0 as last .SPACESHIP argument to tell backends it might
6160 consider different code generation and just cast the result
6161 of .SPACESHIP to the PHI result. X above is some value
6162 other than -1, 0, 1, for libstdc++ 2, for libc++ -127. */
6163 tree arg3 = integer_zero_node;
6164 edge e = EDGE_SUCC (bb0, 0);
6165 if (e->dest == bb1)
6166 e = EDGE_SUCC (bb0, 1);
6167 basic_block bbp = e->dest;
6168 gphi *phi = NULL;
6169 for (gphi_iterator psi = gsi_start_phis (bbp);
6170 !gsi_end_p (i: psi); gsi_next (i: &psi))
6171 {
6172 gphi *gp = psi.phi ();
6173 tree res = gimple_phi_result (gs: gp);
6174
6175 if (phi != NULL
6176 || virtual_operand_p (op: res)
6177 || !INTEGRAL_TYPE_P (TREE_TYPE (res))
6178 || TYPE_PRECISION (TREE_TYPE (res)) < 2)
6179 {
6180 phi = NULL;
6181 break;
6182 }
6183 phi = gp;
6184 }
6185 if (phi
6186 && integer_zerop (gimple_phi_arg_def_from_edge (gs: phi, e))
6187 && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3))
6188 {
6189 HOST_WIDE_INT argval = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? 2 : -1;
6190 for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i)
6191 {
6192 edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2;
6193 if (e3->dest != bbp)
6194 {
6195 if (!empty_block_p (e3->dest)
6196 || !single_succ_p (bb: e3->dest)
6197 || single_succ (bb: e3->dest) != bbp)
6198 {
6199 phi = NULL;
6200 break;
6201 }
6202 e3 = single_succ_edge (bb: e3->dest);
6203 }
6204 tree a = gimple_phi_arg_def_from_edge (gs: phi, e: e3);
6205 if (TREE_CODE (a) != INTEGER_CST
6206 || (i == 0 && !integer_onep (a))
6207 || (i == 1 && !integer_all_onesp (a)))
6208 {
6209 phi = NULL;
6210 break;
6211 }
6212 if (i == 2)
6213 {
6214 tree minv = TYPE_MIN_VALUE (signed_char_type_node);
6215 tree maxv = TYPE_MAX_VALUE (signed_char_type_node);
6216 widest_int w = widest_int::from (x: wi::to_wide (t: a), sgn: SIGNED);
6217 if ((w >= -1 && w <= 1)
6218 || w < wi::to_widest (t: minv)
6219 || w > wi::to_widest (t: maxv))
6220 {
6221 phi = NULL;
6222 break;
6223 }
6224 argval = w.to_shwi ();
6225 }
6226 }
6227 if (phi)
6228 arg3 = build_int_cst (integer_type_node,
6229 TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval);
6230 }
6231
6232 /* For integral <=> comparisons only use .SPACESHIP if it is turned
6233 into an integer (-1, 0, 1). */
6234 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) && arg3 == integer_zero_node)
6235 return;
6236
6237 gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 3, arg1, arg2, arg3);
6238 tree lhs = make_ssa_name (integer_type_node);
6239 gimple_call_set_lhs (gs: gc, lhs);
6240 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6241 gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
6242
6243 wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node));
6244 wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node));
6245 if (HONOR_NANS (TREE_TYPE (arg1)))
6246 {
6247 if (arg3 == integer_zero_node)
6248 wmax = wi::two (TYPE_PRECISION (integer_type_node));
6249 else if (tree_int_cst_sgn (arg3) < 0)
6250 wmin = wi::to_wide (t: arg3);
6251 else
6252 wmax = wi::to_wide (t: arg3);
6253 }
6254 int_range<1> vr (TREE_TYPE (lhs), wmin, wmax);
6255 set_range_info (lhs, vr);
6256
6257 if (arg3 != integer_zero_node)
6258 {
6259 tree type = TREE_TYPE (gimple_phi_result (phi));
6260 if (!useless_type_conversion_p (type, integer_type_node))
6261 {
6262 tree tem = make_ssa_name (var: type);
6263 gimple *gcv = gimple_build_assign (tem, NOP_EXPR, lhs);
6264 gsi_insert_before (&gsi, gcv, GSI_SAME_STMT);
6265 lhs = tem;
6266 }
6267 SET_PHI_ARG_DEF_ON_EDGE (phi, e, lhs);
6268 gimple_cond_set_lhs (gs: stmt, boolean_false_node);
6269 gimple_cond_set_rhs (gs: stmt, boolean_false_node);
6270 gimple_cond_set_code (gs: stmt, code: (e->flags & EDGE_TRUE_VALUE)
6271 ? EQ_EXPR : NE_EXPR);
6272 update_stmt (s: stmt);
6273 return;
6274 }
6275
6276 gimple_cond_set_lhs (gs: stmt, lhs);
6277 gimple_cond_set_rhs (gs: stmt, integer_zero_node);
6278 update_stmt (s: stmt);
6279
6280 gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb1));
6281 gimple_cond_set_lhs (gs: cond, lhs);
6282 if (em1->src == bb1 && e2 != em1)
6283 {
6284 gimple_cond_set_rhs (gs: cond, integer_minus_one_node);
6285 gimple_cond_set_code (gs: cond, code: (em1->flags & EDGE_TRUE_VALUE)
6286 ? EQ_EXPR : NE_EXPR);
6287 }
6288 else
6289 {
6290 gcc_assert (e1->src == bb1 && e2 != e1);
6291 gimple_cond_set_rhs (gs: cond, integer_one_node);
6292 gimple_cond_set_code (gs: cond, code: (e1->flags & EDGE_TRUE_VALUE)
6293 ? EQ_EXPR : NE_EXPR);
6294 }
6295 update_stmt (s: cond);
6296
6297 if (e2 != e1 && e2 != em1)
6298 {
6299 cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb2));
6300 gimple_cond_set_lhs (gs: cond, lhs);
6301 if (em1->src == bb2)
6302 gimple_cond_set_rhs (gs: cond, integer_minus_one_node);
6303 else
6304 {
6305 gcc_assert (e1->src == bb2);
6306 gimple_cond_set_rhs (gs: cond, integer_one_node);
6307 }
6308 gimple_cond_set_code (gs: cond,
6309 code: (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
6310 update_stmt (s: cond);
6311 }
6312}
6313
6314
6315/* Find integer multiplications where the operands are extended from
6316 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
6317 or MULT_HIGHPART_EXPR where appropriate. */
6318
6319namespace {
6320
6321const pass_data pass_data_optimize_widening_mul =
6322{
6323 .type: GIMPLE_PASS, /* type */
6324 .name: "widening_mul", /* name */
6325 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
6326 .tv_id: TV_TREE_WIDEN_MUL, /* tv_id */
6327 PROP_ssa, /* properties_required */
6328 .properties_provided: 0, /* properties_provided */
6329 .properties_destroyed: 0, /* properties_destroyed */
6330 .todo_flags_start: 0, /* todo_flags_start */
6331 TODO_update_ssa, /* todo_flags_finish */
6332};
6333
6334class pass_optimize_widening_mul : public gimple_opt_pass
6335{
6336public:
6337 pass_optimize_widening_mul (gcc::context *ctxt)
6338 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
6339 {}
6340
6341 /* opt_pass methods: */
6342 bool gate (function *) final override
6343 {
6344 return flag_expensive_optimizations && optimize;
6345 }
6346
6347 unsigned int execute (function *) final override;
6348
6349}; // class pass_optimize_widening_mul
6350
6351/* Walker class to perform the transformation in reverse dominance order. */
6352
6353class math_opts_dom_walker : public dom_walker
6354{
6355public:
6356 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
6357 if walking modidifes the CFG. */
6358
6359 math_opts_dom_walker (bool *cfg_changed_p)
6360 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
6361 m_cfg_changed_p (cfg_changed_p) {}
6362
6363 /* The actual actions performed in the walk. */
6364
6365 void after_dom_children (basic_block) final override;
6366
6367 /* Set of results of chains of multiply and add statement combinations that
6368 were not transformed into FMAs because of active deferring. */
6369 hash_set<tree> m_last_result_set;
6370
6371 /* Pointer to a flag of the user that needs to be set if CFG has been
6372 modified. */
6373 bool *m_cfg_changed_p;
6374};
6375
6376void
6377math_opts_dom_walker::after_dom_children (basic_block bb)
6378{
6379 gimple_stmt_iterator gsi;
6380
6381 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
6382
6383 for (gphi_iterator psi_next, psi = gsi_start_phis (bb); !gsi_end_p (i: psi);
6384 psi = psi_next)
6385 {
6386 psi_next = psi;
6387 gsi_next (i: &psi_next);
6388
6389 gimple_stmt_iterator gsi = gsi_after_labels (bb);
6390 gphi *phi = psi.phi ();
6391
6392 if (match_saturation_add (gsi: &gsi, phi)
6393 || match_saturation_sub (gsi: &gsi, phi)
6394 || match_saturation_trunc (gsi: &gsi, phi))
6395 remove_phi_node (&psi, /* release_lhs_p */ false);
6396 }
6397
6398 for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);)
6399 {
6400 gimple *stmt = gsi_stmt (i: gsi);
6401 enum tree_code code;
6402
6403 if (is_gimple_assign (gs: stmt))
6404 {
6405 code = gimple_assign_rhs_code (gs: stmt);
6406 switch (code)
6407 {
6408 case MULT_EXPR:
6409 if (!convert_mult_to_widen (stmt, gsi: &gsi)
6410 && !convert_expand_mult_copysign (stmt, gsi: &gsi)
6411 && convert_mult_to_fma (mul_stmt: stmt,
6412 op1: gimple_assign_rhs1 (gs: stmt),
6413 op2: gimple_assign_rhs2 (gs: stmt),
6414 state: &fma_state))
6415 {
6416 gsi_remove (&gsi, true);
6417 release_defs (stmt);
6418 continue;
6419 }
6420 match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p);
6421 match_unsigned_saturation_sub (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6422 break;
6423
6424 case PLUS_EXPR:
6425 match_saturation_add_with_assign (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6426 match_unsigned_saturation_sub (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6427 /* fall-through */
6428 case MINUS_EXPR:
6429 if (!convert_plusminus_to_widen (gsi: &gsi, stmt, code))
6430 {
6431 match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p);
6432 if (gsi_stmt (i: gsi) == stmt)
6433 match_uaddc_usubc (gsi: &gsi, stmt, code);
6434 }
6435 break;
6436
6437 case BIT_NOT_EXPR:
6438 if (match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p))
6439 continue;
6440 break;
6441
6442 case TRUNC_MOD_EXPR:
6443 convert_to_divmod (stmt: as_a<gassign *> (p: stmt));
6444 break;
6445
6446 case RSHIFT_EXPR:
6447 convert_mult_to_highpart (stmt: as_a<gassign *> (p: stmt), gsi: &gsi);
6448 break;
6449
6450 case BIT_IOR_EXPR:
6451 match_saturation_add_with_assign (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6452 match_unsigned_saturation_trunc (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6453 /* fall-through */
6454 case BIT_XOR_EXPR:
6455 match_uaddc_usubc (gsi: &gsi, stmt, code);
6456 break;
6457
6458 case EQ_EXPR:
6459 case NE_EXPR:
6460 case LE_EXPR:
6461 case GT_EXPR:
6462 match_single_bit_test (gsi: &gsi, stmt);
6463 break;
6464
6465 case COND_EXPR:
6466 case BIT_AND_EXPR:
6467 match_unsigned_saturation_sub (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6468 break;
6469
6470 case NOP_EXPR:
6471 match_unsigned_saturation_trunc (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6472 match_saturation_add_with_assign (gsi: &gsi, stmt: as_a<gassign *> (p: stmt));
6473 break;
6474
6475 default:;
6476 }
6477 }
6478 else if (is_gimple_call (gs: stmt))
6479 {
6480 switch (gimple_call_combined_fn (stmt))
6481 {
6482 CASE_CFN_POW:
6483 if (gimple_call_lhs (gs: stmt)
6484 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
6485 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
6486 &dconst2)
6487 && convert_mult_to_fma (mul_stmt: stmt,
6488 op1: gimple_call_arg (gs: stmt, index: 0),
6489 op2: gimple_call_arg (gs: stmt, index: 0),
6490 state: &fma_state))
6491 {
6492 unlink_stmt_vdef (stmt);
6493 if (gsi_remove (&gsi, true)
6494 && gimple_purge_dead_eh_edges (bb))
6495 *m_cfg_changed_p = true;
6496 release_defs (stmt);
6497 continue;
6498 }
6499 break;
6500
6501 case CFN_COND_MUL:
6502 if (convert_mult_to_fma (mul_stmt: stmt,
6503 op1: gimple_call_arg (gs: stmt, index: 1),
6504 op2: gimple_call_arg (gs: stmt, index: 2),
6505 state: &fma_state,
6506 mul_cond: gimple_call_arg (gs: stmt, index: 0)))
6507
6508 {
6509 gsi_remove (&gsi, true);
6510 release_defs (stmt);
6511 continue;
6512 }
6513 break;
6514
6515 case CFN_COND_LEN_MUL:
6516 if (convert_mult_to_fma (mul_stmt: stmt,
6517 op1: gimple_call_arg (gs: stmt, index: 1),
6518 op2: gimple_call_arg (gs: stmt, index: 2),
6519 state: &fma_state,
6520 mul_cond: gimple_call_arg (gs: stmt, index: 0),
6521 mul_len: gimple_call_arg (gs: stmt, index: 4),
6522 mul_bias: gimple_call_arg (gs: stmt, index: 5)))
6523
6524 {
6525 gsi_remove (&gsi, true);
6526 release_defs (stmt);
6527 continue;
6528 }
6529 break;
6530
6531 case CFN_LAST:
6532 cancel_fma_deferring (state: &fma_state);
6533 break;
6534
6535 default:
6536 break;
6537 }
6538 }
6539 else if (gimple_code (g: stmt) == GIMPLE_COND)
6540 {
6541 match_single_bit_test (gsi: &gsi, stmt);
6542 optimize_spaceship (stmt: as_a <gcond *> (p: stmt));
6543 }
6544 gsi_next (i: &gsi);
6545 }
6546 if (fma_state.m_deferring_p
6547 && fma_state.m_initial_phi)
6548 {
6549 gcc_checking_assert (fma_state.m_last_result);
6550 if (!last_fma_candidate_feeds_initial_phi (state: &fma_state,
6551 last_result_set: &m_last_result_set))
6552 cancel_fma_deferring (state: &fma_state);
6553 else
6554 m_last_result_set.add (k: fma_state.m_last_result);
6555 }
6556}
6557
6558
6559unsigned int
6560pass_optimize_widening_mul::execute (function *fun)
6561{
6562 bool cfg_changed = false;
6563
6564 memset (s: &widen_mul_stats, c: 0, n: sizeof (widen_mul_stats));
6565 calculate_dominance_info (CDI_DOMINATORS);
6566 renumber_gimple_stmt_uids (cfun);
6567
6568 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6569
6570 statistics_counter_event (fun, "widening multiplications inserted",
6571 widen_mul_stats.widen_mults_inserted);
6572 statistics_counter_event (fun, "widening maccs inserted",
6573 widen_mul_stats.maccs_inserted);
6574 statistics_counter_event (fun, "fused multiply-adds inserted",
6575 widen_mul_stats.fmas_inserted);
6576 statistics_counter_event (fun, "divmod calls inserted",
6577 widen_mul_stats.divmod_calls_inserted);
6578 statistics_counter_event (fun, "highpart multiplications inserted",
6579 widen_mul_stats.highpart_mults_inserted);
6580
6581 return cfg_changed ? TODO_cleanup_cfg : 0;
6582}
6583
6584} // anon namespace
6585
6586gimple_opt_pass *
6587make_pass_optimize_widening_mul (gcc::context *ctxt)
6588{
6589 return new pass_optimize_widening_mul (ctxt);
6590}
6591

source code of gcc/tree-ssa-math-opts.cc