1 | /* Global, SSA-based optimizations using mathematical identities. |
2 | Copyright (C) 2005-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal |
21 | operations. These are common in sequences such as this one: |
22 | |
23 | modulus = sqrt(x*x + y*y + z*z); |
24 | x = x / modulus; |
25 | y = y / modulus; |
26 | z = z / modulus; |
27 | |
28 | that can be optimized to |
29 | |
30 | modulus = sqrt(x*x + y*y + z*z); |
31 | rmodulus = 1.0 / modulus; |
32 | x = x * rmodulus; |
33 | y = y * rmodulus; |
34 | z = z * rmodulus; |
35 | |
36 | We do this for loop invariant divisors, and with this pass whenever |
37 | we notice that a division has the same divisor multiple times. |
38 | |
39 | Of course, like in PRE, we don't insert a division if a dominator |
40 | already has one. However, this cannot be done as an extension of |
41 | PRE for several reasons. |
42 | |
43 | First of all, with some experiments it was found out that the |
44 | transformation is not always useful if there are only two divisions |
45 | by the same divisor. This is probably because modern processors |
46 | can pipeline the divisions; on older, in-order processors it should |
47 | still be effective to optimize two divisions by the same number. |
48 | We make this a param, and it shall be called N in the remainder of |
49 | this comment. |
50 | |
51 | Second, if trapping math is active, we have less freedom on where |
52 | to insert divisions: we can only do so in basic blocks that already |
53 | contain one. (If divisions don't trap, instead, we can insert |
54 | divisions elsewhere, which will be in blocks that are common dominators |
55 | of those that have the division). |
56 | |
57 | We really don't want to compute the reciprocal unless a division will |
58 | be found. To do this, we won't insert the division in a basic block |
59 | that has less than N divisions *post-dominating* it. |
60 | |
61 | The algorithm constructs a subset of the dominator tree, holding the |
62 | blocks containing the divisions and the common dominators to them, |
63 | and walk it twice. The first walk is in post-order, and it annotates |
64 | each block with the number of divisions that post-dominate it: this |
65 | gives information on where divisions can be inserted profitably. |
66 | The second walk is in pre-order, and it inserts divisions as explained |
67 | above, and replaces divisions by multiplications. |
68 | |
69 | In the best case, the cost of the pass is O(n_statements). In the |
70 | worst-case, the cost is due to creating the dominator tree subset, |
71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen |
72 | for n_statements / n_basic_blocks statements. So, the amortized cost |
73 | of creating the dominator tree subset is O(n_basic_blocks) and the |
74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). |
75 | |
76 | More practically, the cost will be small because there are few |
77 | divisions, and they tend to be in the same basic block, so insert_bb |
78 | is called very few times. |
79 | |
80 | If we did this using domwalk.cc, an efficient implementation would have |
81 | to work on all the variables in a single pass, because we could not |
82 | work on just a subset of the dominator tree, as we do now, and the |
83 | cost would also be something like O(n_statements * n_basic_blocks). |
84 | The data structures would be more complex in order to work on all the |
85 | variables in a single pass. */ |
86 | |
87 | #include "config.h" |
88 | #include "system.h" |
89 | #include "coretypes.h" |
90 | #include "backend.h" |
91 | #include "target.h" |
92 | #include "rtl.h" |
93 | #include "tree.h" |
94 | #include "gimple.h" |
95 | #include "predict.h" |
96 | #include "alloc-pool.h" |
97 | #include "tree-pass.h" |
98 | #include "ssa.h" |
99 | #include "optabs-tree.h" |
100 | #include "gimple-pretty-print.h" |
101 | #include "alias.h" |
102 | #include "fold-const.h" |
103 | #include "gimple-iterator.h" |
104 | #include "gimple-fold.h" |
105 | #include "gimplify.h" |
106 | #include "gimplify-me.h" |
107 | #include "stor-layout.h" |
108 | #include "tree-cfg.h" |
109 | #include "tree-dfa.h" |
110 | #include "tree-ssa.h" |
111 | #include "builtins.h" |
112 | #include "internal-fn.h" |
113 | #include "case-cfn-macros.h" |
114 | #include "optabs-libfuncs.h" |
115 | #include "tree-eh.h" |
116 | #include "targhooks.h" |
117 | #include "domwalk.h" |
118 | #include "tree-ssa-math-opts.h" |
119 | #include "dbgcnt.h" |
120 | |
121 | /* This structure represents one basic block that either computes a |
122 | division, or is a common dominator for basic block that compute a |
123 | division. */ |
124 | struct occurrence { |
125 | /* The basic block represented by this structure. */ |
126 | basic_block bb = basic_block(); |
127 | |
128 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal |
129 | inserted in BB. */ |
130 | tree recip_def = tree(); |
131 | |
132 | /* If non-NULL, the SSA_NAME holding the definition for a squared |
133 | reciprocal inserted in BB. */ |
134 | tree square_recip_def = tree(); |
135 | |
136 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
137 | was inserted in BB. */ |
138 | gimple *recip_def_stmt = nullptr; |
139 | |
140 | /* Pointer to a list of "struct occurrence"s for blocks dominated |
141 | by BB. */ |
142 | struct occurrence *children = nullptr; |
143 | |
144 | /* Pointer to the next "struct occurrence"s in the list of blocks |
145 | sharing a common dominator. */ |
146 | struct occurrence *next = nullptr; |
147 | |
148 | /* The number of divisions that are in BB before compute_merit. The |
149 | number of divisions that are in BB or post-dominate it after |
150 | compute_merit. */ |
151 | int num_divisions = 0; |
152 | |
153 | /* True if the basic block has a division, false if it is a common |
154 | dominator for basic blocks that do. If it is false and trapping |
155 | math is active, BB is not a candidate for inserting a reciprocal. */ |
156 | bool bb_has_division = false; |
157 | |
158 | /* Construct a struct occurrence for basic block BB, and whose |
159 | children list is headed by CHILDREN. */ |
160 | occurrence (basic_block bb, struct occurrence *children) |
161 | : bb (bb), children (children) |
162 | { |
163 | bb->aux = this; |
164 | } |
165 | |
166 | /* Destroy a struct occurrence and remove it from its basic block. */ |
167 | ~occurrence () |
168 | { |
169 | bb->aux = nullptr; |
170 | } |
171 | |
172 | /* Allocate memory for a struct occurrence from OCC_POOL. */ |
173 | static void* operator new (size_t); |
174 | |
175 | /* Return memory for a struct occurrence to OCC_POOL. */ |
176 | static void operator delete (void*, size_t); |
177 | }; |
178 | |
179 | static struct |
180 | { |
181 | /* Number of 1.0/X ops inserted. */ |
182 | int rdivs_inserted; |
183 | |
184 | /* Number of 1.0/FUNC ops inserted. */ |
185 | int rfuncs_inserted; |
186 | } reciprocal_stats; |
187 | |
188 | static struct |
189 | { |
190 | /* Number of cexpi calls inserted. */ |
191 | int inserted; |
192 | |
193 | /* Number of conversions removed. */ |
194 | int conv_removed; |
195 | |
196 | } sincos_stats; |
197 | |
198 | static struct |
199 | { |
200 | /* Number of widening multiplication ops inserted. */ |
201 | int widen_mults_inserted; |
202 | |
203 | /* Number of integer multiply-and-accumulate ops inserted. */ |
204 | int maccs_inserted; |
205 | |
206 | /* Number of fp fused multiply-add ops inserted. */ |
207 | int fmas_inserted; |
208 | |
209 | /* Number of divmod calls inserted. */ |
210 | int divmod_calls_inserted; |
211 | |
212 | /* Number of highpart multiplication ops inserted. */ |
213 | int highpart_mults_inserted; |
214 | } widen_mul_stats; |
215 | |
216 | /* The instance of "struct occurrence" representing the highest |
217 | interesting block in the dominator tree. */ |
218 | static struct occurrence *occ_head; |
219 | |
220 | /* Allocation pool for getting instances of "struct occurrence". */ |
221 | static object_allocator<occurrence> *occ_pool; |
222 | |
223 | void* occurrence::operator new (size_t n) |
224 | { |
225 | gcc_assert (n == sizeof(occurrence)); |
226 | return occ_pool->allocate_raw (); |
227 | } |
228 | |
229 | void occurrence::operator delete (void *occ, size_t n) |
230 | { |
231 | gcc_assert (n == sizeof(occurrence)); |
232 | occ_pool->remove_raw (object: occ); |
233 | } |
234 | |
235 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a |
236 | list of "struct occurrence"s, one per basic block, having IDOM as |
237 | their common dominator. |
238 | |
239 | We try to insert NEW_OCC as deep as possible in the tree, and we also |
240 | insert any other block that is a common dominator for BB and one |
241 | block already in the tree. */ |
242 | |
243 | static void |
244 | insert_bb (struct occurrence *new_occ, basic_block idom, |
245 | struct occurrence **p_head) |
246 | { |
247 | struct occurrence *occ, **p_occ; |
248 | |
249 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
250 | { |
251 | basic_block bb = new_occ->bb, occ_bb = occ->bb; |
252 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); |
253 | if (dom == bb) |
254 | { |
255 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC |
256 | from its list. */ |
257 | *p_occ = occ->next; |
258 | occ->next = new_occ->children; |
259 | new_occ->children = occ; |
260 | |
261 | /* Try the next block (it may as well be dominated by BB). */ |
262 | } |
263 | |
264 | else if (dom == occ_bb) |
265 | { |
266 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ |
267 | insert_bb (new_occ, idom: dom, p_head: &occ->children); |
268 | return; |
269 | } |
270 | |
271 | else if (dom != idom) |
272 | { |
273 | gcc_assert (!dom->aux); |
274 | |
275 | /* There is a dominator between IDOM and BB, add it and make |
276 | two children out of NEW_OCC and OCC. First, remove OCC from |
277 | its list. */ |
278 | *p_occ = occ->next; |
279 | new_occ->next = occ; |
280 | occ->next = NULL; |
281 | |
282 | /* None of the previous blocks has DOM as a dominator: if we tail |
283 | recursed, we would reexamine them uselessly. Just switch BB with |
284 | DOM, and go on looking for blocks dominated by DOM. */ |
285 | new_occ = new occurrence (dom, new_occ); |
286 | } |
287 | |
288 | else |
289 | { |
290 | /* Nothing special, go on with the next element. */ |
291 | p_occ = &occ->next; |
292 | } |
293 | } |
294 | |
295 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ |
296 | new_occ->next = *p_head; |
297 | *p_head = new_occ; |
298 | } |
299 | |
300 | /* Register that we found a division in BB. |
301 | IMPORTANCE is a measure of how much weighting to give |
302 | that division. Use IMPORTANCE = 2 to register a single |
303 | division. If the division is going to be found multiple |
304 | times use 1 (as it is with squares). */ |
305 | |
306 | static inline void |
307 | register_division_in (basic_block bb, int importance) |
308 | { |
309 | struct occurrence *occ; |
310 | |
311 | occ = (struct occurrence *) bb->aux; |
312 | if (!occ) |
313 | { |
314 | occ = new occurrence (bb, NULL); |
315 | insert_bb (new_occ: occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), p_head: &occ_head); |
316 | } |
317 | |
318 | occ->bb_has_division = true; |
319 | occ->num_divisions += importance; |
320 | } |
321 | |
322 | |
323 | /* Compute the number of divisions that postdominate each block in OCC and |
324 | its children. */ |
325 | |
326 | static void |
327 | compute_merit (struct occurrence *occ) |
328 | { |
329 | struct occurrence *occ_child; |
330 | basic_block dom = occ->bb; |
331 | |
332 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
333 | { |
334 | basic_block bb; |
335 | if (occ_child->children) |
336 | compute_merit (occ: occ_child); |
337 | |
338 | if (flag_exceptions) |
339 | bb = single_noncomplex_succ (bb: dom); |
340 | else |
341 | bb = dom; |
342 | |
343 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) |
344 | occ->num_divisions += occ_child->num_divisions; |
345 | } |
346 | } |
347 | |
348 | |
349 | /* Return whether USE_STMT is a floating-point division by DEF. */ |
350 | static inline bool |
351 | is_division_by (gimple *use_stmt, tree def) |
352 | { |
353 | return is_gimple_assign (gs: use_stmt) |
354 | && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR |
355 | && gimple_assign_rhs2 (gs: use_stmt) == def |
356 | /* Do not recognize x / x as valid division, as we are getting |
357 | confused later by replacing all immediate uses x in such |
358 | a stmt. */ |
359 | && gimple_assign_rhs1 (gs: use_stmt) != def |
360 | && !stmt_can_throw_internal (cfun, use_stmt); |
361 | } |
362 | |
363 | /* Return TRUE if USE_STMT is a multiplication of DEF by A. */ |
364 | static inline bool |
365 | is_mult_by (gimple *use_stmt, tree def, tree a) |
366 | { |
367 | if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN |
368 | && gimple_assign_rhs_code (gs: use_stmt) == MULT_EXPR) |
369 | { |
370 | tree op0 = gimple_assign_rhs1 (gs: use_stmt); |
371 | tree op1 = gimple_assign_rhs2 (gs: use_stmt); |
372 | |
373 | return (op0 == def && op1 == a) |
374 | || (op0 == a && op1 == def); |
375 | } |
376 | return 0; |
377 | } |
378 | |
379 | /* Return whether USE_STMT is DEF * DEF. */ |
380 | static inline bool |
381 | is_square_of (gimple *use_stmt, tree def) |
382 | { |
383 | return is_mult_by (use_stmt, def, a: def); |
384 | } |
385 | |
386 | /* Return whether USE_STMT is a floating-point division by |
387 | DEF * DEF. */ |
388 | static inline bool |
389 | is_division_by_square (gimple *use_stmt, tree def) |
390 | { |
391 | if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN |
392 | && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR |
393 | && gimple_assign_rhs1 (gs: use_stmt) != gimple_assign_rhs2 (gs: use_stmt) |
394 | && !stmt_can_throw_internal (cfun, use_stmt)) |
395 | { |
396 | tree denominator = gimple_assign_rhs2 (gs: use_stmt); |
397 | if (TREE_CODE (denominator) == SSA_NAME) |
398 | return is_square_of (SSA_NAME_DEF_STMT (denominator), def); |
399 | } |
400 | return 0; |
401 | } |
402 | |
403 | /* Walk the subset of the dominator tree rooted at OCC, setting the |
404 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in |
405 | the given basic block. The field may be left NULL, of course, |
406 | if it is not possible or profitable to do the optimization. |
407 | |
408 | DEF_BSI is an iterator pointing at the statement defining DEF. |
409 | If RECIP_DEF is set, a dominator already has a computation that can |
410 | be used. |
411 | |
412 | If should_insert_square_recip is set, then this also inserts |
413 | the square of the reciprocal immediately after the definition |
414 | of the reciprocal. */ |
415 | |
416 | static void |
417 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
418 | tree def, tree recip_def, tree square_recip_def, |
419 | int should_insert_square_recip, int threshold) |
420 | { |
421 | tree type; |
422 | gassign *new_stmt, *new_square_stmt; |
423 | gimple_stmt_iterator gsi; |
424 | struct occurrence *occ_child; |
425 | |
426 | if (!recip_def |
427 | && (occ->bb_has_division || !flag_trapping_math) |
428 | /* Divide by two as all divisions are counted twice in |
429 | the costing loop. */ |
430 | && occ->num_divisions / 2 >= threshold) |
431 | { |
432 | /* Make a variable with the replacement and substitute it. */ |
433 | type = TREE_TYPE (def); |
434 | recip_def = create_tmp_reg (type, "reciptmp" ); |
435 | new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |
436 | build_one_cst (type), def); |
437 | |
438 | if (should_insert_square_recip) |
439 | { |
440 | square_recip_def = create_tmp_reg (type, "powmult_reciptmp" ); |
441 | new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR, |
442 | recip_def, recip_def); |
443 | } |
444 | |
445 | if (occ->bb_has_division) |
446 | { |
447 | /* Case 1: insert before an existing division. */ |
448 | gsi = gsi_after_labels (bb: occ->bb); |
449 | while (!gsi_end_p (i: gsi) |
450 | && (!is_division_by (use_stmt: gsi_stmt (i: gsi), def)) |
451 | && (!is_division_by_square (use_stmt: gsi_stmt (i: gsi), def))) |
452 | gsi_next (i: &gsi); |
453 | |
454 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
455 | if (should_insert_square_recip) |
456 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); |
457 | } |
458 | else if (def_gsi && occ->bb == gsi_bb (i: *def_gsi)) |
459 | { |
460 | /* Case 2: insert right after the definition. Note that this will |
461 | never happen if the definition statement can throw, because in |
462 | that case the sole successor of the statement's basic block will |
463 | dominate all the uses as well. */ |
464 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
465 | if (should_insert_square_recip) |
466 | gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT); |
467 | } |
468 | else |
469 | { |
470 | /* Case 3: insert in a basic block not containing defs/uses. */ |
471 | gsi = gsi_after_labels (bb: occ->bb); |
472 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
473 | if (should_insert_square_recip) |
474 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); |
475 | } |
476 | |
477 | reciprocal_stats.rdivs_inserted++; |
478 | |
479 | occ->recip_def_stmt = new_stmt; |
480 | } |
481 | |
482 | occ->recip_def = recip_def; |
483 | occ->square_recip_def = square_recip_def; |
484 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
485 | insert_reciprocals (def_gsi, occ: occ_child, def, recip_def, |
486 | square_recip_def, should_insert_square_recip, |
487 | threshold); |
488 | } |
489 | |
490 | /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). |
491 | Take as argument the use for (x * x). */ |
492 | static inline void |
493 | replace_reciprocal_squares (use_operand_p use_p) |
494 | { |
495 | gimple *use_stmt = USE_STMT (use_p); |
496 | basic_block bb = gimple_bb (g: use_stmt); |
497 | struct occurrence *occ = (struct occurrence *) bb->aux; |
498 | |
499 | if (optimize_bb_for_speed_p (bb) && occ->square_recip_def |
500 | && occ->recip_def) |
501 | { |
502 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
503 | gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR); |
504 | gimple_assign_set_rhs2 (gs: use_stmt, rhs: occ->square_recip_def); |
505 | SET_USE (use_p, occ->square_recip_def); |
506 | fold_stmt_inplace (&gsi); |
507 | update_stmt (s: use_stmt); |
508 | } |
509 | } |
510 | |
511 | |
512 | /* Replace the division at USE_P with a multiplication by the reciprocal, if |
513 | possible. */ |
514 | |
515 | static inline void |
516 | replace_reciprocal (use_operand_p use_p) |
517 | { |
518 | gimple *use_stmt = USE_STMT (use_p); |
519 | basic_block bb = gimple_bb (g: use_stmt); |
520 | struct occurrence *occ = (struct occurrence *) bb->aux; |
521 | |
522 | if (optimize_bb_for_speed_p (bb) |
523 | && occ->recip_def && use_stmt != occ->recip_def_stmt) |
524 | { |
525 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
526 | gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR); |
527 | SET_USE (use_p, occ->recip_def); |
528 | fold_stmt_inplace (&gsi); |
529 | update_stmt (s: use_stmt); |
530 | } |
531 | } |
532 | |
533 | |
534 | /* Free OCC and return one more "struct occurrence" to be freed. */ |
535 | |
536 | static struct occurrence * |
537 | free_bb (struct occurrence *occ) |
538 | { |
539 | struct occurrence *child, *next; |
540 | |
541 | /* First get the two pointers hanging off OCC. */ |
542 | next = occ->next; |
543 | child = occ->children; |
544 | delete occ; |
545 | |
546 | /* Now ensure that we don't recurse unless it is necessary. */ |
547 | if (!child) |
548 | return next; |
549 | else |
550 | { |
551 | while (next) |
552 | next = free_bb (occ: next); |
553 | |
554 | return child; |
555 | } |
556 | } |
557 | |
558 | /* Transform sequences like |
559 | t = sqrt (a) |
560 | x = 1.0 / t; |
561 | r1 = x * x; |
562 | r2 = a * x; |
563 | into: |
564 | t = sqrt (a) |
565 | r1 = 1.0 / a; |
566 | r2 = t; |
567 | x = r1 * r2; |
568 | depending on the uses of x, r1, r2. This removes one multiplication and |
569 | allows the sqrt and division operations to execute in parallel. |
570 | DEF_GSI is the gsi of the initial division by sqrt that defines |
571 | DEF (x in the example above). */ |
572 | |
573 | static void |
574 | optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def) |
575 | { |
576 | gimple *use_stmt; |
577 | imm_use_iterator use_iter; |
578 | gimple *stmt = gsi_stmt (i: *def_gsi); |
579 | tree x = def; |
580 | tree orig_sqrt_ssa_name = gimple_assign_rhs2 (gs: stmt); |
581 | tree div_rhs1 = gimple_assign_rhs1 (gs: stmt); |
582 | |
583 | if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME |
584 | || TREE_CODE (div_rhs1) != REAL_CST |
585 | || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1)) |
586 | return; |
587 | |
588 | gcall *sqrt_stmt |
589 | = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name)); |
590 | |
591 | if (!sqrt_stmt || !gimple_call_lhs (gs: sqrt_stmt)) |
592 | return; |
593 | |
594 | switch (gimple_call_combined_fn (sqrt_stmt)) |
595 | { |
596 | CASE_CFN_SQRT: |
597 | CASE_CFN_SQRT_FN: |
598 | break; |
599 | |
600 | default: |
601 | return; |
602 | } |
603 | tree a = gimple_call_arg (gs: sqrt_stmt, index: 0); |
604 | |
605 | /* We have 'a' and 'x'. Now analyze the uses of 'x'. */ |
606 | |
607 | /* Statements that use x in x * x. */ |
608 | auto_vec<gimple *> sqr_stmts; |
609 | /* Statements that use x in a * x. */ |
610 | auto_vec<gimple *> mult_stmts; |
611 | bool has_other_use = false; |
612 | bool mult_on_main_path = false; |
613 | |
614 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x) |
615 | { |
616 | if (is_gimple_debug (gs: use_stmt)) |
617 | continue; |
618 | if (is_square_of (use_stmt, def: x)) |
619 | { |
620 | sqr_stmts.safe_push (obj: use_stmt); |
621 | if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt)) |
622 | mult_on_main_path = true; |
623 | } |
624 | else if (is_mult_by (use_stmt, def: x, a)) |
625 | { |
626 | mult_stmts.safe_push (obj: use_stmt); |
627 | if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt)) |
628 | mult_on_main_path = true; |
629 | } |
630 | else |
631 | has_other_use = true; |
632 | } |
633 | |
634 | /* In the x * x and a * x cases we just rewire stmt operands or |
635 | remove multiplications. In the has_other_use case we introduce |
636 | a multiplication so make sure we don't introduce a multiplication |
637 | on a path where there was none. */ |
638 | if (has_other_use && !mult_on_main_path) |
639 | return; |
640 | |
641 | if (sqr_stmts.is_empty () && mult_stmts.is_empty ()) |
642 | return; |
643 | |
644 | /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want |
645 | to be able to compose it from the sqr and mult cases. */ |
646 | if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ())) |
647 | return; |
648 | |
649 | if (dump_file) |
650 | { |
651 | fprintf (stream: dump_file, format: "Optimizing reciprocal sqrt multiplications of\n" ); |
652 | print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE); |
653 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); |
654 | fprintf (stream: dump_file, format: "\n" ); |
655 | } |
656 | |
657 | bool delete_div = !has_other_use; |
658 | tree sqr_ssa_name = NULL_TREE; |
659 | if (!sqr_stmts.is_empty ()) |
660 | { |
661 | /* r1 = x * x. Transform the original |
662 | x = 1.0 / t |
663 | into |
664 | tmp1 = 1.0 / a |
665 | r1 = tmp1. */ |
666 | |
667 | sqr_ssa_name |
668 | = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "recip_sqrt_sqr" ); |
669 | |
670 | if (dump_file) |
671 | { |
672 | fprintf (stream: dump_file, format: "Replacing original division\n" ); |
673 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); |
674 | fprintf (stream: dump_file, format: "with new division\n" ); |
675 | } |
676 | stmt |
677 | = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (gs: stmt), |
678 | gimple_assign_rhs1 (gs: stmt), a); |
679 | gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT); |
680 | gsi_remove (def_gsi, true); |
681 | *def_gsi = gsi_for_stmt (stmt); |
682 | fold_stmt_inplace (def_gsi); |
683 | update_stmt (s: stmt); |
684 | |
685 | if (dump_file) |
686 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); |
687 | |
688 | delete_div = false; |
689 | gimple *sqr_stmt; |
690 | unsigned int i; |
691 | FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt) |
692 | { |
693 | gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt); |
694 | gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name); |
695 | update_stmt (s: sqr_stmt); |
696 | } |
697 | } |
698 | if (!mult_stmts.is_empty ()) |
699 | { |
700 | /* r2 = a * x. Transform this into: |
701 | r2 = t (The original sqrt (a)). */ |
702 | unsigned int i; |
703 | gimple *mult_stmt = NULL; |
704 | FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt) |
705 | { |
706 | gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt); |
707 | |
708 | if (dump_file) |
709 | { |
710 | fprintf (stream: dump_file, format: "Replacing squaring multiplication\n" ); |
711 | print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); |
712 | fprintf (stream: dump_file, format: "with assignment\n" ); |
713 | } |
714 | gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name); |
715 | fold_stmt_inplace (&gsi2); |
716 | update_stmt (s: mult_stmt); |
717 | if (dump_file) |
718 | print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); |
719 | } |
720 | } |
721 | |
722 | if (has_other_use) |
723 | { |
724 | /* Using the two temporaries tmp1, tmp2 from above |
725 | the original x is now: |
726 | x = tmp1 * tmp2. */ |
727 | gcc_assert (orig_sqrt_ssa_name); |
728 | gcc_assert (sqr_ssa_name); |
729 | |
730 | gimple *new_stmt |
731 | = gimple_build_assign (x, MULT_EXPR, |
732 | orig_sqrt_ssa_name, sqr_ssa_name); |
733 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
734 | update_stmt (s: stmt); |
735 | } |
736 | else if (delete_div) |
737 | { |
738 | /* Remove the original division. */ |
739 | gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); |
740 | gsi_remove (&gsi2, true); |
741 | release_defs (stmt); |
742 | } |
743 | else |
744 | release_ssa_name (name: x); |
745 | } |
746 | |
747 | /* Look for floating-point divisions among DEF's uses, and try to |
748 | replace them by multiplications with the reciprocal. Add |
749 | as many statements computing the reciprocal as needed. |
750 | |
751 | DEF must be a GIMPLE register of a floating-point type. */ |
752 | |
753 | static void |
754 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
755 | { |
756 | use_operand_p use_p, square_use_p; |
757 | imm_use_iterator use_iter, square_use_iter; |
758 | tree square_def; |
759 | struct occurrence *occ; |
760 | int count = 0; |
761 | int threshold; |
762 | int square_recip_count = 0; |
763 | int sqrt_recip_count = 0; |
764 | |
765 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME); |
766 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); |
767 | |
768 | /* If DEF is a square (x * x), count the number of divisions by x. |
769 | If there are more divisions by x than by (DEF * DEF), prefer to optimize |
770 | the reciprocal of x instead of DEF. This improves cases like: |
771 | def = x * x |
772 | t0 = a / def |
773 | t1 = b / def |
774 | t2 = c / x |
775 | Reciprocal optimization of x results in 1 division rather than 2 or 3. */ |
776 | gimple *def_stmt = SSA_NAME_DEF_STMT (def); |
777 | |
778 | if (is_gimple_assign (gs: def_stmt) |
779 | && gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR |
780 | && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME |
781 | && gimple_assign_rhs1 (gs: def_stmt) == gimple_assign_rhs2 (gs: def_stmt)) |
782 | { |
783 | tree op0 = gimple_assign_rhs1 (gs: def_stmt); |
784 | |
785 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0) |
786 | { |
787 | gimple *use_stmt = USE_STMT (use_p); |
788 | if (is_division_by (use_stmt, def: op0)) |
789 | sqrt_recip_count++; |
790 | } |
791 | } |
792 | |
793 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) |
794 | { |
795 | gimple *use_stmt = USE_STMT (use_p); |
796 | if (is_division_by (use_stmt, def)) |
797 | { |
798 | register_division_in (bb: gimple_bb (g: use_stmt), importance: 2); |
799 | count++; |
800 | } |
801 | |
802 | if (is_square_of (use_stmt, def)) |
803 | { |
804 | square_def = gimple_assign_lhs (gs: use_stmt); |
805 | FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) |
806 | { |
807 | gimple *square_use_stmt = USE_STMT (square_use_p); |
808 | if (is_division_by (use_stmt: square_use_stmt, def: square_def)) |
809 | { |
810 | /* This is executed twice for each division by a square. */ |
811 | register_division_in (bb: gimple_bb (g: square_use_stmt), importance: 1); |
812 | square_recip_count++; |
813 | } |
814 | } |
815 | } |
816 | } |
817 | |
818 | /* Square reciprocals were counted twice above. */ |
819 | square_recip_count /= 2; |
820 | |
821 | /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */ |
822 | if (sqrt_recip_count > square_recip_count) |
823 | goto out; |
824 | |
825 | /* Do the expensive part only if we can hope to optimize something. */ |
826 | if (count + square_recip_count >= threshold && count >= 1) |
827 | { |
828 | gimple *use_stmt; |
829 | for (occ = occ_head; occ; occ = occ->next) |
830 | { |
831 | compute_merit (occ); |
832 | insert_reciprocals (def_gsi, occ, def, NULL, NULL, |
833 | should_insert_square_recip: square_recip_count, threshold); |
834 | } |
835 | |
836 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
837 | { |
838 | if (is_division_by (use_stmt, def)) |
839 | { |
840 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |
841 | replace_reciprocal (use_p); |
842 | } |
843 | else if (square_recip_count > 0 && is_square_of (use_stmt, def)) |
844 | { |
845 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |
846 | { |
847 | /* Find all uses of the square that are divisions and |
848 | * replace them by multiplications with the inverse. */ |
849 | imm_use_iterator square_iterator; |
850 | gimple *powmult_use_stmt = USE_STMT (use_p); |
851 | tree powmult_def_name = gimple_assign_lhs (gs: powmult_use_stmt); |
852 | |
853 | FOR_EACH_IMM_USE_STMT (powmult_use_stmt, |
854 | square_iterator, powmult_def_name) |
855 | FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator) |
856 | { |
857 | gimple *powmult_use_stmt = USE_STMT (square_use_p); |
858 | if (is_division_by (use_stmt: powmult_use_stmt, def: powmult_def_name)) |
859 | replace_reciprocal_squares (use_p: square_use_p); |
860 | } |
861 | } |
862 | } |
863 | } |
864 | } |
865 | |
866 | out: |
867 | for (occ = occ_head; occ; ) |
868 | occ = free_bb (occ); |
869 | |
870 | occ_head = NULL; |
871 | } |
872 | |
873 | /* Return an internal function that implements the reciprocal of CALL, |
874 | or IFN_LAST if there is no such function that the target supports. */ |
875 | |
876 | internal_fn |
877 | internal_fn_reciprocal (gcall *call) |
878 | { |
879 | internal_fn ifn; |
880 | |
881 | switch (gimple_call_combined_fn (call)) |
882 | { |
883 | CASE_CFN_SQRT: |
884 | CASE_CFN_SQRT_FN: |
885 | ifn = IFN_RSQRT; |
886 | break; |
887 | |
888 | default: |
889 | return IFN_LAST; |
890 | } |
891 | |
892 | tree_pair types = direct_internal_fn_types (ifn, call); |
893 | if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED)) |
894 | return IFN_LAST; |
895 | |
896 | return ifn; |
897 | } |
898 | |
899 | /* Go through all the floating-point SSA_NAMEs, and call |
900 | execute_cse_reciprocals_1 on each of them. */ |
901 | namespace { |
902 | |
903 | const pass_data pass_data_cse_reciprocals = |
904 | { |
905 | .type: GIMPLE_PASS, /* type */ |
906 | .name: "recip" , /* name */ |
907 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
908 | .tv_id: TV_TREE_RECIP, /* tv_id */ |
909 | PROP_ssa, /* properties_required */ |
910 | .properties_provided: 0, /* properties_provided */ |
911 | .properties_destroyed: 0, /* properties_destroyed */ |
912 | .todo_flags_start: 0, /* todo_flags_start */ |
913 | TODO_update_ssa, /* todo_flags_finish */ |
914 | }; |
915 | |
916 | class pass_cse_reciprocals : public gimple_opt_pass |
917 | { |
918 | public: |
919 | pass_cse_reciprocals (gcc::context *ctxt) |
920 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) |
921 | {} |
922 | |
923 | /* opt_pass methods: */ |
924 | bool gate (function *) final override |
925 | { |
926 | return optimize && flag_reciprocal_math; |
927 | } |
928 | unsigned int execute (function *) final override; |
929 | |
930 | }; // class pass_cse_reciprocals |
931 | |
932 | unsigned int |
933 | pass_cse_reciprocals::execute (function *fun) |
934 | { |
935 | basic_block bb; |
936 | tree arg; |
937 | |
938 | occ_pool = new object_allocator<occurrence> ("dominators for recip" ); |
939 | |
940 | memset (s: &reciprocal_stats, c: 0, n: sizeof (reciprocal_stats)); |
941 | calculate_dominance_info (CDI_DOMINATORS); |
942 | calculate_dominance_info (CDI_POST_DOMINATORS); |
943 | |
944 | if (flag_checking) |
945 | FOR_EACH_BB_FN (bb, fun) |
946 | gcc_assert (!bb->aux); |
947 | |
948 | for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) |
949 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |
950 | && is_gimple_reg (arg)) |
951 | { |
952 | tree name = ssa_default_def (fun, arg); |
953 | if (name) |
954 | execute_cse_reciprocals_1 (NULL, def: name); |
955 | } |
956 | |
957 | FOR_EACH_BB_FN (bb, fun) |
958 | { |
959 | tree def; |
960 | |
961 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); |
962 | gsi_next (i: &gsi)) |
963 | { |
964 | gphi *phi = gsi.phi (); |
965 | def = PHI_RESULT (phi); |
966 | if (! virtual_operand_p (op: def) |
967 | && FLOAT_TYPE_P (TREE_TYPE (def))) |
968 | execute_cse_reciprocals_1 (NULL, def); |
969 | } |
970 | |
971 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); |
972 | gsi_next (i: &gsi)) |
973 | { |
974 | gimple *stmt = gsi_stmt (i: gsi); |
975 | |
976 | if (gimple_has_lhs (stmt) |
977 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
978 | && FLOAT_TYPE_P (TREE_TYPE (def)) |
979 | && TREE_CODE (def) == SSA_NAME) |
980 | { |
981 | execute_cse_reciprocals_1 (def_gsi: &gsi, def); |
982 | stmt = gsi_stmt (i: gsi); |
983 | if (flag_unsafe_math_optimizations |
984 | && is_gimple_assign (gs: stmt) |
985 | && gimple_assign_lhs (gs: stmt) == def |
986 | && !stmt_can_throw_internal (cfun, stmt) |
987 | && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR) |
988 | optimize_recip_sqrt (def_gsi: &gsi, def); |
989 | } |
990 | } |
991 | |
992 | if (optimize_bb_for_size_p (bb)) |
993 | continue; |
994 | |
995 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
996 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); |
997 | gsi_next (i: &gsi)) |
998 | { |
999 | gimple *stmt = gsi_stmt (i: gsi); |
1000 | |
1001 | if (is_gimple_assign (gs: stmt) |
1002 | && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR) |
1003 | { |
1004 | tree arg1 = gimple_assign_rhs2 (gs: stmt); |
1005 | gimple *stmt1; |
1006 | |
1007 | if (TREE_CODE (arg1) != SSA_NAME) |
1008 | continue; |
1009 | |
1010 | stmt1 = SSA_NAME_DEF_STMT (arg1); |
1011 | |
1012 | if (is_gimple_call (gs: stmt1) |
1013 | && gimple_call_lhs (gs: stmt1)) |
1014 | { |
1015 | bool fail; |
1016 | imm_use_iterator ui; |
1017 | use_operand_p use_p; |
1018 | tree fndecl = NULL_TREE; |
1019 | |
1020 | gcall *call = as_a <gcall *> (p: stmt1); |
1021 | internal_fn ifn = internal_fn_reciprocal (call); |
1022 | if (ifn == IFN_LAST) |
1023 | { |
1024 | fndecl = gimple_call_fndecl (gs: call); |
1025 | if (!fndecl |
1026 | || !fndecl_built_in_p (node: fndecl, klass: BUILT_IN_MD)) |
1027 | continue; |
1028 | fndecl = targetm.builtin_reciprocal (fndecl); |
1029 | if (!fndecl) |
1030 | continue; |
1031 | } |
1032 | |
1033 | /* Check that all uses of the SSA name are divisions, |
1034 | otherwise replacing the defining statement will do |
1035 | the wrong thing. */ |
1036 | fail = false; |
1037 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) |
1038 | { |
1039 | gimple *stmt2 = USE_STMT (use_p); |
1040 | if (is_gimple_debug (gs: stmt2)) |
1041 | continue; |
1042 | if (!is_gimple_assign (gs: stmt2) |
1043 | || gimple_assign_rhs_code (gs: stmt2) != RDIV_EXPR |
1044 | || gimple_assign_rhs1 (gs: stmt2) == arg1 |
1045 | || gimple_assign_rhs2 (gs: stmt2) != arg1) |
1046 | { |
1047 | fail = true; |
1048 | break; |
1049 | } |
1050 | } |
1051 | if (fail) |
1052 | continue; |
1053 | |
1054 | gimple_replace_ssa_lhs (call, arg1); |
1055 | if (gimple_call_internal_p (gs: call) != (ifn != IFN_LAST)) |
1056 | { |
1057 | auto_vec<tree, 4> args; |
1058 | for (unsigned int i = 0; |
1059 | i < gimple_call_num_args (gs: call); i++) |
1060 | args.safe_push (obj: gimple_call_arg (gs: call, index: i)); |
1061 | gcall *stmt2; |
1062 | if (ifn == IFN_LAST) |
1063 | stmt2 = gimple_build_call_vec (fndecl, args); |
1064 | else |
1065 | stmt2 = gimple_build_call_internal_vec (ifn, args); |
1066 | gimple_call_set_lhs (gs: stmt2, lhs: arg1); |
1067 | gimple_move_vops (stmt2, call); |
1068 | gimple_call_set_nothrow (s: stmt2, |
1069 | nothrow_p: gimple_call_nothrow_p (s: call)); |
1070 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); |
1071 | gsi_replace (&gsi2, stmt2, true); |
1072 | } |
1073 | else |
1074 | { |
1075 | if (ifn == IFN_LAST) |
1076 | gimple_call_set_fndecl (gs: call, decl: fndecl); |
1077 | else |
1078 | gimple_call_set_internal_fn (call_stmt: call, fn: ifn); |
1079 | update_stmt (s: call); |
1080 | } |
1081 | reciprocal_stats.rfuncs_inserted++; |
1082 | |
1083 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
1084 | { |
1085 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1086 | gimple_assign_set_rhs_code (s: stmt, code: MULT_EXPR); |
1087 | fold_stmt_inplace (&gsi); |
1088 | update_stmt (s: stmt); |
1089 | } |
1090 | } |
1091 | } |
1092 | } |
1093 | } |
1094 | |
1095 | statistics_counter_event (fun, "reciprocal divs inserted" , |
1096 | reciprocal_stats.rdivs_inserted); |
1097 | statistics_counter_event (fun, "reciprocal functions inserted" , |
1098 | reciprocal_stats.rfuncs_inserted); |
1099 | |
1100 | free_dominance_info (CDI_DOMINATORS); |
1101 | free_dominance_info (CDI_POST_DOMINATORS); |
1102 | delete occ_pool; |
1103 | return 0; |
1104 | } |
1105 | |
1106 | } // anon namespace |
1107 | |
1108 | gimple_opt_pass * |
1109 | make_pass_cse_reciprocals (gcc::context *ctxt) |
1110 | { |
1111 | return new pass_cse_reciprocals (ctxt); |
1112 | } |
1113 | |
1114 | /* If NAME is the result of a type conversion, look for other |
1115 | equivalent dominating or dominated conversions, and replace all |
1116 | uses with the earliest dominating name, removing the redundant |
1117 | conversions. Return the prevailing name. */ |
1118 | |
1119 | static tree |
1120 | execute_cse_conv_1 (tree name, bool *cfg_changed) |
1121 | { |
1122 | if (SSA_NAME_IS_DEFAULT_DEF (name) |
1123 | || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) |
1124 | return name; |
1125 | |
1126 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); |
1127 | |
1128 | if (!gimple_assign_cast_p (s: def_stmt)) |
1129 | return name; |
1130 | |
1131 | tree src = gimple_assign_rhs1 (gs: def_stmt); |
1132 | |
1133 | if (TREE_CODE (src) != SSA_NAME) |
1134 | return name; |
1135 | |
1136 | imm_use_iterator use_iter; |
1137 | gimple *use_stmt; |
1138 | |
1139 | /* Find the earliest dominating def. */ |
1140 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src) |
1141 | { |
1142 | if (use_stmt == def_stmt |
1143 | || !gimple_assign_cast_p (s: use_stmt)) |
1144 | continue; |
1145 | |
1146 | tree lhs = gimple_assign_lhs (gs: use_stmt); |
1147 | |
1148 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) |
1149 | || (gimple_assign_rhs1 (gs: use_stmt) |
1150 | != gimple_assign_rhs1 (gs: def_stmt)) |
1151 | || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs))) |
1152 | continue; |
1153 | |
1154 | bool use_dominates; |
1155 | if (gimple_bb (g: def_stmt) == gimple_bb (g: use_stmt)) |
1156 | { |
1157 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
1158 | while (!gsi_end_p (i: gsi) && gsi_stmt (i: gsi) != def_stmt) |
1159 | gsi_next (i: &gsi); |
1160 | use_dominates = !gsi_end_p (i: gsi); |
1161 | } |
1162 | else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt), |
1163 | gimple_bb (g: def_stmt))) |
1164 | use_dominates = false; |
1165 | else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: def_stmt), |
1166 | gimple_bb (g: use_stmt))) |
1167 | use_dominates = true; |
1168 | else |
1169 | continue; |
1170 | |
1171 | if (use_dominates) |
1172 | { |
1173 | std::swap (a&: name, b&: lhs); |
1174 | std::swap (a&: def_stmt, b&: use_stmt); |
1175 | } |
1176 | } |
1177 | |
1178 | /* Now go through all uses of SRC again, replacing the equivalent |
1179 | dominated conversions. We may replace defs that were not |
1180 | dominated by the then-prevailing defs when we first visited |
1181 | them. */ |
1182 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src) |
1183 | { |
1184 | if (use_stmt == def_stmt |
1185 | || !gimple_assign_cast_p (s: use_stmt)) |
1186 | continue; |
1187 | |
1188 | tree lhs = gimple_assign_lhs (gs: use_stmt); |
1189 | |
1190 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) |
1191 | || (gimple_assign_rhs1 (gs: use_stmt) |
1192 | != gimple_assign_rhs1 (gs: def_stmt)) |
1193 | || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs))) |
1194 | continue; |
1195 | |
1196 | basic_block use_bb = gimple_bb (g: use_stmt); |
1197 | if (gimple_bb (g: def_stmt) == use_bb |
1198 | || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (g: def_stmt))) |
1199 | { |
1200 | sincos_stats.conv_removed++; |
1201 | |
1202 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
1203 | replace_uses_by (lhs, name); |
1204 | if (gsi_remove (&gsi, true) |
1205 | && gimple_purge_dead_eh_edges (use_bb)) |
1206 | *cfg_changed = true; |
1207 | release_defs (use_stmt); |
1208 | } |
1209 | } |
1210 | |
1211 | return name; |
1212 | } |
1213 | |
1214 | /* Records an occurrence at statement USE_STMT in the vector of trees |
1215 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
1216 | is not yet initialized. Returns true if the occurrence was pushed on |
1217 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
1218 | statements in the vector. */ |
1219 | |
1220 | static bool |
1221 | maybe_record_sincos (vec<gimple *> *stmts, |
1222 | basic_block *top_bb, gimple *use_stmt) |
1223 | { |
1224 | basic_block use_bb = gimple_bb (g: use_stmt); |
1225 | if (*top_bb |
1226 | && (*top_bb == use_bb |
1227 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) |
1228 | stmts->safe_push (obj: use_stmt); |
1229 | else if (!*top_bb |
1230 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) |
1231 | { |
1232 | stmts->safe_push (obj: use_stmt); |
1233 | *top_bb = use_bb; |
1234 | } |
1235 | else |
1236 | return false; |
1237 | |
1238 | return true; |
1239 | } |
1240 | |
1241 | /* Look for sin, cos and cexpi calls with the same argument NAME and |
1242 | create a single call to cexpi CSEing the result in this case. |
1243 | We first walk over all immediate uses of the argument collecting |
1244 | statements that we can CSE in a vector and in a second pass replace |
1245 | the statement rhs with a REALPART or IMAGPART expression on the |
1246 | result of the cexpi call we insert before the use statement that |
1247 | dominates all other candidates. */ |
1248 | |
1249 | static bool |
1250 | execute_cse_sincos_1 (tree name) |
1251 | { |
1252 | gimple_stmt_iterator gsi; |
1253 | imm_use_iterator use_iter; |
1254 | tree fndecl, res, type = NULL_TREE; |
1255 | gimple *def_stmt, *use_stmt, *stmt; |
1256 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
1257 | auto_vec<gimple *> stmts; |
1258 | basic_block top_bb = NULL; |
1259 | int i; |
1260 | bool cfg_changed = false; |
1261 | |
1262 | name = execute_cse_conv_1 (name, cfg_changed: &cfg_changed); |
1263 | |
1264 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) |
1265 | { |
1266 | if (gimple_code (g: use_stmt) != GIMPLE_CALL |
1267 | || !gimple_call_lhs (gs: use_stmt)) |
1268 | continue; |
1269 | |
1270 | switch (gimple_call_combined_fn (use_stmt)) |
1271 | { |
1272 | CASE_CFN_COS: |
1273 | seen_cos |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0; |
1274 | break; |
1275 | |
1276 | CASE_CFN_SIN: |
1277 | seen_sin |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0; |
1278 | break; |
1279 | |
1280 | CASE_CFN_CEXPI: |
1281 | seen_cexpi |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0; |
1282 | break; |
1283 | |
1284 | default:; |
1285 | continue; |
1286 | } |
1287 | |
1288 | tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt)); |
1289 | if (!type) |
1290 | { |
1291 | type = t; |
1292 | t = TREE_TYPE (name); |
1293 | } |
1294 | /* This checks that NAME has the right type in the first round, |
1295 | and, in subsequent rounds, that the built_in type is the same |
1296 | type, or a compatible type. */ |
1297 | if (type != t && !types_compatible_p (type1: type, type2: t)) |
1298 | return false; |
1299 | } |
1300 | if (seen_cos + seen_sin + seen_cexpi <= 1) |
1301 | return false; |
1302 | |
1303 | /* Simply insert cexpi at the beginning of top_bb but not earlier than |
1304 | the name def statement. */ |
1305 | fndecl = mathfn_built_in (type, fn: BUILT_IN_CEXPI); |
1306 | if (!fndecl) |
1307 | return false; |
1308 | stmt = gimple_build_call (fndecl, 1, name); |
1309 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, name: "sincostmp" ); |
1310 | gimple_call_set_lhs (gs: stmt, lhs: res); |
1311 | |
1312 | def_stmt = SSA_NAME_DEF_STMT (name); |
1313 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
1314 | && gimple_code (g: def_stmt) != GIMPLE_PHI |
1315 | && gimple_bb (g: def_stmt) == top_bb) |
1316 | { |
1317 | gsi = gsi_for_stmt (def_stmt); |
1318 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |
1319 | } |
1320 | else |
1321 | { |
1322 | gsi = gsi_after_labels (bb: top_bb); |
1323 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
1324 | } |
1325 | sincos_stats.inserted++; |
1326 | |
1327 | /* And adjust the recorded old call sites. */ |
1328 | for (i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i) |
1329 | { |
1330 | tree rhs = NULL; |
1331 | |
1332 | switch (gimple_call_combined_fn (use_stmt)) |
1333 | { |
1334 | CASE_CFN_COS: |
1335 | rhs = fold_build1 (REALPART_EXPR, type, res); |
1336 | break; |
1337 | |
1338 | CASE_CFN_SIN: |
1339 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
1340 | break; |
1341 | |
1342 | CASE_CFN_CEXPI: |
1343 | rhs = res; |
1344 | break; |
1345 | |
1346 | default:; |
1347 | gcc_unreachable (); |
1348 | } |
1349 | |
1350 | /* Replace call with a copy. */ |
1351 | stmt = gimple_build_assign (gimple_call_lhs (gs: use_stmt), rhs); |
1352 | |
1353 | gsi = gsi_for_stmt (use_stmt); |
1354 | gsi_replace (&gsi, stmt, true); |
1355 | if (gimple_purge_dead_eh_edges (gimple_bb (g: stmt))) |
1356 | cfg_changed = true; |
1357 | } |
1358 | |
1359 | return cfg_changed; |
1360 | } |
1361 | |
1362 | /* To evaluate powi(x,n), the floating point value x raised to the |
1363 | constant integer exponent n, we use a hybrid algorithm that |
1364 | combines the "window method" with look-up tables. For an |
1365 | introduction to exponentiation algorithms and "addition chains", |
1366 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, |
1367 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", |
1368 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation |
1369 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ |
1370 | |
1371 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of |
1372 | multiplications to inline before calling the system library's pow |
1373 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, |
1374 | so this default never requires calling pow, powf or powl. */ |
1375 | |
1376 | #ifndef POWI_MAX_MULTS |
1377 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) |
1378 | #endif |
1379 | |
1380 | /* The size of the "optimal power tree" lookup table. All |
1381 | exponents less than this value are simply looked up in the |
1382 | powi_table below. This threshold is also used to size the |
1383 | cache of pseudo registers that hold intermediate results. */ |
1384 | #define POWI_TABLE_SIZE 256 |
1385 | |
1386 | /* The size, in bits of the window, used in the "window method" |
1387 | exponentiation algorithm. This is equivalent to a radix of |
1388 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ |
1389 | #define POWI_WINDOW_SIZE 3 |
1390 | |
1391 | /* The following table is an efficient representation of an |
1392 | "optimal power tree". For each value, i, the corresponding |
1393 | value, j, in the table states than an optimal evaluation |
1394 | sequence for calculating pow(x,i) can be found by evaluating |
1395 | pow(x,j)*pow(x,i-j). An optimal power tree for the first |
1396 | 100 integers is given in Knuth's "Seminumerical algorithms". */ |
1397 | |
1398 | static const unsigned char powi_table[POWI_TABLE_SIZE] = |
1399 | { |
1400 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ |
1401 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ |
1402 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ |
1403 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ |
1404 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ |
1405 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ |
1406 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ |
1407 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ |
1408 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ |
1409 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ |
1410 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ |
1411 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ |
1412 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ |
1413 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ |
1414 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ |
1415 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ |
1416 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ |
1417 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ |
1418 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ |
1419 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ |
1420 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ |
1421 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ |
1422 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ |
1423 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ |
1424 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ |
1425 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ |
1426 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ |
1427 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ |
1428 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ |
1429 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ |
1430 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ |
1431 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ |
1432 | }; |
1433 | |
1434 | |
1435 | /* Return the number of multiplications required to calculate |
1436 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a |
1437 | subroutine of powi_cost. CACHE is an array indicating |
1438 | which exponents have already been calculated. */ |
1439 | |
1440 | static int |
1441 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) |
1442 | { |
1443 | /* If we've already calculated this exponent, then this evaluation |
1444 | doesn't require any additional multiplications. */ |
1445 | if (cache[n]) |
1446 | return 0; |
1447 | |
1448 | cache[n] = true; |
1449 | return powi_lookup_cost (n: n - powi_table[n], cache) |
1450 | + powi_lookup_cost (n: powi_table[n], cache) + 1; |
1451 | } |
1452 | |
1453 | /* Return the number of multiplications required to calculate |
1454 | powi(x,n) for an arbitrary x, given the exponent N. This |
1455 | function needs to be kept in sync with powi_as_mults below. */ |
1456 | |
1457 | static int |
1458 | powi_cost (HOST_WIDE_INT n) |
1459 | { |
1460 | bool cache[POWI_TABLE_SIZE]; |
1461 | unsigned HOST_WIDE_INT digit; |
1462 | unsigned HOST_WIDE_INT val; |
1463 | int result; |
1464 | |
1465 | if (n == 0) |
1466 | return 0; |
1467 | |
1468 | /* Ignore the reciprocal when calculating the cost. */ |
1469 | val = absu_hwi (x: n); |
1470 | |
1471 | /* Initialize the exponent cache. */ |
1472 | memset (s: cache, c: 0, POWI_TABLE_SIZE * sizeof (bool)); |
1473 | cache[1] = true; |
1474 | |
1475 | result = 0; |
1476 | |
1477 | while (val >= POWI_TABLE_SIZE) |
1478 | { |
1479 | if (val & 1) |
1480 | { |
1481 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); |
1482 | result += powi_lookup_cost (n: digit, cache) |
1483 | + POWI_WINDOW_SIZE + 1; |
1484 | val >>= POWI_WINDOW_SIZE; |
1485 | } |
1486 | else |
1487 | { |
1488 | val >>= 1; |
1489 | result++; |
1490 | } |
1491 | } |
1492 | |
1493 | return result + powi_lookup_cost (n: val, cache); |
1494 | } |
1495 | |
1496 | /* Recursive subroutine of powi_as_mults. This function takes the |
1497 | array, CACHE, of already calculated exponents and an exponent N and |
1498 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ |
1499 | |
1500 | static tree |
1501 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, |
1502 | unsigned HOST_WIDE_INT n, tree *cache) |
1503 | { |
1504 | tree op0, op1, ssa_target; |
1505 | unsigned HOST_WIDE_INT digit; |
1506 | gassign *mult_stmt; |
1507 | |
1508 | if (n < POWI_TABLE_SIZE && cache[n]) |
1509 | return cache[n]; |
1510 | |
1511 | ssa_target = make_temp_ssa_name (type, NULL, name: "powmult" ); |
1512 | |
1513 | if (n < POWI_TABLE_SIZE) |
1514 | { |
1515 | cache[n] = ssa_target; |
1516 | op0 = powi_as_mults_1 (gsi, loc, type, n: n - powi_table[n], cache); |
1517 | op1 = powi_as_mults_1 (gsi, loc, type, n: powi_table[n], cache); |
1518 | } |
1519 | else if (n & 1) |
1520 | { |
1521 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); |
1522 | op0 = powi_as_mults_1 (gsi, loc, type, n: n - digit, cache); |
1523 | op1 = powi_as_mults_1 (gsi, loc, type, n: digit, cache); |
1524 | } |
1525 | else |
1526 | { |
1527 | op0 = powi_as_mults_1 (gsi, loc, type, n: n >> 1, cache); |
1528 | op1 = op0; |
1529 | } |
1530 | |
1531 | mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); |
1532 | gimple_set_location (g: mult_stmt, location: loc); |
1533 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |
1534 | |
1535 | return ssa_target; |
1536 | } |
1537 | |
1538 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. |
1539 | This function needs to be kept in sync with powi_cost above. */ |
1540 | |
1541 | tree |
1542 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, |
1543 | tree arg0, HOST_WIDE_INT n) |
1544 | { |
1545 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |
1546 | gassign *div_stmt; |
1547 | tree target; |
1548 | |
1549 | if (n == 0) |
1550 | return build_one_cst (type); |
1551 | |
1552 | memset (s: cache, c: 0, n: sizeof (cache)); |
1553 | cache[1] = arg0; |
1554 | |
1555 | result = powi_as_mults_1 (gsi, loc, type, n: absu_hwi (x: n), cache); |
1556 | if (n >= 0) |
1557 | return result; |
1558 | |
1559 | /* If the original exponent was negative, reciprocate the result. */ |
1560 | target = make_temp_ssa_name (type, NULL, name: "powmult" ); |
1561 | div_stmt = gimple_build_assign (target, RDIV_EXPR, |
1562 | build_real (type, dconst1), result); |
1563 | gimple_set_location (g: div_stmt, location: loc); |
1564 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |
1565 | |
1566 | return target; |
1567 | } |
1568 | |
1569 | /* ARG0 and N are the two arguments to a powi builtin in GSI with |
1570 | location info LOC. If the arguments are appropriate, create an |
1571 | equivalent sequence of statements prior to GSI using an optimal |
1572 | number of multiplications, and return an expession holding the |
1573 | result. */ |
1574 | |
1575 | static tree |
1576 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, |
1577 | tree arg0, HOST_WIDE_INT n) |
1578 | { |
1579 | if ((n >= -1 && n <= 2) |
1580 | || (optimize_function_for_speed_p (cfun) |
1581 | && powi_cost (n) <= POWI_MAX_MULTS)) |
1582 | return powi_as_mults (gsi, loc, arg0, n); |
1583 | |
1584 | return NULL_TREE; |
1585 | } |
1586 | |
1587 | /* Build a gimple call statement that calls FN with argument ARG. |
1588 | Set the lhs of the call statement to a fresh SSA name. Insert the |
1589 | statement prior to GSI's current position, and return the fresh |
1590 | SSA name. */ |
1591 | |
1592 | static tree |
1593 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |
1594 | tree fn, tree arg) |
1595 | { |
1596 | gcall *call_stmt; |
1597 | tree ssa_target; |
1598 | |
1599 | call_stmt = gimple_build_call (fn, 1, arg); |
1600 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, name: "powroot" ); |
1601 | gimple_set_lhs (call_stmt, ssa_target); |
1602 | gimple_set_location (g: call_stmt, location: loc); |
1603 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); |
1604 | |
1605 | return ssa_target; |
1606 | } |
1607 | |
1608 | /* Build a gimple binary operation with the given CODE and arguments |
1609 | ARG0, ARG1, assigning the result to a new SSA name for variable |
1610 | TARGET. Insert the statement prior to GSI's current position, and |
1611 | return the fresh SSA name.*/ |
1612 | |
1613 | static tree |
1614 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, |
1615 | const char *name, enum tree_code code, |
1616 | tree arg0, tree arg1) |
1617 | { |
1618 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |
1619 | gassign *stmt = gimple_build_assign (result, code, arg0, arg1); |
1620 | gimple_set_location (g: stmt, location: loc); |
1621 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
1622 | return result; |
1623 | } |
1624 | |
1625 | /* Build a gimple reference operation with the given CODE and argument |
1626 | ARG, assigning the result to a new SSA name of TYPE with NAME. |
1627 | Insert the statement prior to GSI's current position, and return |
1628 | the fresh SSA name. */ |
1629 | |
1630 | static inline tree |
1631 | build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, |
1632 | const char *name, enum tree_code code, tree arg0) |
1633 | { |
1634 | tree result = make_temp_ssa_name (type, NULL, name); |
1635 | gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0)); |
1636 | gimple_set_location (g: stmt, location: loc); |
1637 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
1638 | return result; |
1639 | } |
1640 | |
1641 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |
1642 | prior to GSI's current position, and return the fresh SSA name. */ |
1643 | |
1644 | static tree |
1645 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, |
1646 | tree type, tree val) |
1647 | { |
1648 | tree result = make_ssa_name (var: type); |
1649 | gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); |
1650 | gimple_set_location (g: stmt, location: loc); |
1651 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
1652 | return result; |
1653 | } |
1654 | |
1655 | struct pow_synth_sqrt_info |
1656 | { |
1657 | bool *factors; |
1658 | unsigned int deepest; |
1659 | unsigned int num_mults; |
1660 | }; |
1661 | |
1662 | /* Return true iff the real value C can be represented as a |
1663 | sum of powers of 0.5 up to N. That is: |
1664 | C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. |
1665 | Record in INFO the various parameters of the synthesis algorithm such |
1666 | as the factors a[i], the maximum 0.5 power and the number of |
1667 | multiplications that will be required. */ |
1668 | |
1669 | bool |
1670 | representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, |
1671 | struct pow_synth_sqrt_info *info) |
1672 | { |
1673 | REAL_VALUE_TYPE factor = dconsthalf; |
1674 | REAL_VALUE_TYPE remainder = c; |
1675 | |
1676 | info->deepest = 0; |
1677 | info->num_mults = 0; |
1678 | memset (s: info->factors, c: 0, n: n * sizeof (bool)); |
1679 | |
1680 | for (unsigned i = 0; i < n; i++) |
1681 | { |
1682 | REAL_VALUE_TYPE res; |
1683 | |
1684 | /* If something inexact happened bail out now. */ |
1685 | if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor)) |
1686 | return false; |
1687 | |
1688 | /* We have hit zero. The number is representable as a sum |
1689 | of powers of 0.5. */ |
1690 | if (real_equal (&res, &dconst0)) |
1691 | { |
1692 | info->factors[i] = true; |
1693 | info->deepest = i + 1; |
1694 | return true; |
1695 | } |
1696 | else if (!REAL_VALUE_NEGATIVE (res)) |
1697 | { |
1698 | remainder = res; |
1699 | info->factors[i] = true; |
1700 | info->num_mults++; |
1701 | } |
1702 | else |
1703 | info->factors[i] = false; |
1704 | |
1705 | real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf); |
1706 | } |
1707 | return false; |
1708 | } |
1709 | |
1710 | /* Return the tree corresponding to FN being applied |
1711 | to ARG N times at GSI and LOC. |
1712 | Look up previous results from CACHE if need be. |
1713 | cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ |
1714 | |
1715 | static tree |
1716 | get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, |
1717 | tree fn, location_t loc, tree *cache) |
1718 | { |
1719 | tree res = cache[n]; |
1720 | if (!res) |
1721 | { |
1722 | tree prev = get_fn_chain (arg, n: n - 1, gsi, fn, loc, cache); |
1723 | res = build_and_insert_call (gsi, loc, fn, arg: prev); |
1724 | cache[n] = res; |
1725 | } |
1726 | |
1727 | return res; |
1728 | } |
1729 | |
1730 | /* Print to STREAM the repeated application of function FNAME to ARG |
1731 | N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: |
1732 | "foo (foo (x))". */ |
1733 | |
1734 | static void |
1735 | print_nested_fn (FILE* stream, const char *fname, const char* arg, |
1736 | unsigned int n) |
1737 | { |
1738 | if (n == 0) |
1739 | fprintf (stream: stream, format: "%s" , arg); |
1740 | else |
1741 | { |
1742 | fprintf (stream: stream, format: "%s (" , fname); |
1743 | print_nested_fn (stream, fname, arg, n: n - 1); |
1744 | fprintf (stream: stream, format: ")" ); |
1745 | } |
1746 | } |
1747 | |
1748 | /* Print to STREAM the fractional sequence of sqrt chains |
1749 | applied to ARG, described by INFO. Used for the dump file. */ |
1750 | |
1751 | static void |
1752 | dump_fractional_sqrt_sequence (FILE *stream, const char *arg, |
1753 | struct pow_synth_sqrt_info *info) |
1754 | { |
1755 | for (unsigned int i = 0; i < info->deepest; i++) |
1756 | { |
1757 | bool is_set = info->factors[i]; |
1758 | if (is_set) |
1759 | { |
1760 | print_nested_fn (stream, fname: "sqrt" , arg, n: i + 1); |
1761 | if (i != info->deepest - 1) |
1762 | fprintf (stream: stream, format: " * " ); |
1763 | } |
1764 | } |
1765 | } |
1766 | |
1767 | /* Print to STREAM a representation of raising ARG to an integer |
1768 | power N. Used for the dump file. */ |
1769 | |
1770 | static void |
1771 | dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) |
1772 | { |
1773 | if (n > 1) |
1774 | fprintf (stream: stream, format: "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")" , arg, n); |
1775 | else if (n == 1) |
1776 | fprintf (stream: stream, format: "%s" , arg); |
1777 | } |
1778 | |
1779 | /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of |
1780 | square roots. Place at GSI and LOC. Limit the maximum depth |
1781 | of the sqrt chains to MAX_DEPTH. Return the tree holding the |
1782 | result of the expanded sequence or NULL_TREE if the expansion failed. |
1783 | |
1784 | This routine assumes that ARG1 is a real number with a fractional part |
1785 | (the integer exponent case will have been handled earlier in |
1786 | gimple_expand_builtin_pow). |
1787 | |
1788 | For ARG1 > 0.0: |
1789 | * For ARG1 composed of a whole part WHOLE_PART and a fractional part |
1790 | FRAC_PART i.e. WHOLE_PART == floor (ARG1) and |
1791 | FRAC_PART == ARG1 - WHOLE_PART: |
1792 | Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where |
1793 | POW (ARG0, FRAC_PART) is expanded as a product of square root chains |
1794 | if it can be expressed as such, that is if FRAC_PART satisfies: |
1795 | FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) |
1796 | where integer a[i] is either 0 or 1. |
1797 | |
1798 | Example: |
1799 | POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) |
1800 | --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) |
1801 | |
1802 | For ARG1 < 0.0 there are two approaches: |
1803 | * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) |
1804 | is calculated as above. |
1805 | |
1806 | Example: |
1807 | POW (x, -5.625) == 1.0 / POW (x, 5.625) |
1808 | --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) |
1809 | |
1810 | * (B) : WHOLE_PART := - ceil (abs (ARG1)) |
1811 | FRAC_PART := ARG1 - WHOLE_PART |
1812 | and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). |
1813 | Example: |
1814 | POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) |
1815 | --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) |
1816 | |
1817 | For ARG1 < 0.0 we choose between (A) and (B) depending on |
1818 | how many multiplications we'd have to do. |
1819 | So, for the example in (B): POW (x, -5.875), if we were to |
1820 | follow algorithm (A) we would produce: |
1821 | 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) |
1822 | which contains more multiplications than approach (B). |
1823 | |
1824 | Hopefully, this approach will eliminate potentially expensive POW library |
1825 | calls when unsafe floating point math is enabled and allow the compiler to |
1826 | further optimise the multiplies, square roots and divides produced by this |
1827 | function. */ |
1828 | |
1829 | static tree |
1830 | expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, |
1831 | tree arg0, tree arg1, HOST_WIDE_INT max_depth) |
1832 | { |
1833 | tree type = TREE_TYPE (arg0); |
1834 | machine_mode mode = TYPE_MODE (type); |
1835 | tree sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT); |
1836 | bool one_over = true; |
1837 | |
1838 | if (!sqrtfn) |
1839 | return NULL_TREE; |
1840 | |
1841 | if (TREE_CODE (arg1) != REAL_CST) |
1842 | return NULL_TREE; |
1843 | |
1844 | REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); |
1845 | |
1846 | gcc_assert (max_depth > 0); |
1847 | tree *cache = XALLOCAVEC (tree, max_depth + 1); |
1848 | |
1849 | struct pow_synth_sqrt_info synth_info; |
1850 | synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |
1851 | synth_info.deepest = 0; |
1852 | synth_info.num_mults = 0; |
1853 | |
1854 | bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); |
1855 | REAL_VALUE_TYPE exp = real_value_abs (&exp_init); |
1856 | |
1857 | /* The whole and fractional parts of exp. */ |
1858 | REAL_VALUE_TYPE whole_part; |
1859 | REAL_VALUE_TYPE frac_part; |
1860 | |
1861 | real_floor (&whole_part, mode, &exp); |
1862 | real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part); |
1863 | |
1864 | |
1865 | REAL_VALUE_TYPE ceil_whole = dconst0; |
1866 | REAL_VALUE_TYPE ceil_fract = dconst0; |
1867 | |
1868 | if (neg_exp) |
1869 | { |
1870 | real_ceil (&ceil_whole, mode, &exp); |
1871 | real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp); |
1872 | } |
1873 | |
1874 | if (!representable_as_half_series_p (c: frac_part, n: max_depth, info: &synth_info)) |
1875 | return NULL_TREE; |
1876 | |
1877 | /* Check whether it's more profitable to not use 1.0 / ... */ |
1878 | if (neg_exp) |
1879 | { |
1880 | struct pow_synth_sqrt_info alt_synth_info; |
1881 | alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |
1882 | alt_synth_info.deepest = 0; |
1883 | alt_synth_info.num_mults = 0; |
1884 | |
1885 | if (representable_as_half_series_p (c: ceil_fract, n: max_depth, |
1886 | info: &alt_synth_info) |
1887 | && alt_synth_info.deepest <= synth_info.deepest |
1888 | && alt_synth_info.num_mults < synth_info.num_mults) |
1889 | { |
1890 | whole_part = ceil_whole; |
1891 | frac_part = ceil_fract; |
1892 | synth_info.deepest = alt_synth_info.deepest; |
1893 | synth_info.num_mults = alt_synth_info.num_mults; |
1894 | memcpy (dest: synth_info.factors, src: alt_synth_info.factors, |
1895 | n: (max_depth + 1) * sizeof (bool)); |
1896 | one_over = false; |
1897 | } |
1898 | } |
1899 | |
1900 | HOST_WIDE_INT n = real_to_integer (&whole_part); |
1901 | REAL_VALUE_TYPE cint; |
1902 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
1903 | |
1904 | if (!real_identical (&whole_part, &cint)) |
1905 | return NULL_TREE; |
1906 | |
1907 | if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) |
1908 | return NULL_TREE; |
1909 | |
1910 | memset (s: cache, c: 0, n: (max_depth + 1) * sizeof (tree)); |
1911 | |
1912 | tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; |
1913 | |
1914 | /* Calculate the integer part of the exponent. */ |
1915 | if (n > 1) |
1916 | { |
1917 | integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); |
1918 | if (!integer_res) |
1919 | return NULL_TREE; |
1920 | } |
1921 | |
1922 | if (dump_file) |
1923 | { |
1924 | char string[64]; |
1925 | |
1926 | real_to_decimal (string, &exp_init, sizeof (string), 0, 1); |
1927 | fprintf (stream: dump_file, format: "synthesizing pow (x, %s) as:\n" , string); |
1928 | |
1929 | if (neg_exp) |
1930 | { |
1931 | if (one_over) |
1932 | { |
1933 | fprintf (stream: dump_file, format: "1.0 / (" ); |
1934 | dump_integer_part (stream: dump_file, arg: "x" , n); |
1935 | if (n > 0) |
1936 | fprintf (stream: dump_file, format: " * " ); |
1937 | dump_fractional_sqrt_sequence (stream: dump_file, arg: "x" , info: &synth_info); |
1938 | fprintf (stream: dump_file, format: ")" ); |
1939 | } |
1940 | else |
1941 | { |
1942 | dump_fractional_sqrt_sequence (stream: dump_file, arg: "x" , info: &synth_info); |
1943 | fprintf (stream: dump_file, format: " / (" ); |
1944 | dump_integer_part (stream: dump_file, arg: "x" , n); |
1945 | fprintf (stream: dump_file, format: ")" ); |
1946 | } |
1947 | } |
1948 | else |
1949 | { |
1950 | dump_fractional_sqrt_sequence (stream: dump_file, arg: "x" , info: &synth_info); |
1951 | if (n > 0) |
1952 | fprintf (stream: dump_file, format: " * " ); |
1953 | dump_integer_part (stream: dump_file, arg: "x" , n); |
1954 | } |
1955 | |
1956 | fprintf (stream: dump_file, format: "\ndeepest sqrt chain: %d\n" , synth_info.deepest); |
1957 | } |
1958 | |
1959 | |
1960 | tree fract_res = NULL_TREE; |
1961 | cache[0] = arg0; |
1962 | |
1963 | /* Calculate the fractional part of the exponent. */ |
1964 | for (unsigned i = 0; i < synth_info.deepest; i++) |
1965 | { |
1966 | if (synth_info.factors[i]) |
1967 | { |
1968 | tree sqrt_chain = get_fn_chain (arg: arg0, n: i + 1, gsi, fn: sqrtfn, loc, cache); |
1969 | |
1970 | if (!fract_res) |
1971 | fract_res = sqrt_chain; |
1972 | |
1973 | else |
1974 | fract_res = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
1975 | arg0: fract_res, arg1: sqrt_chain); |
1976 | } |
1977 | } |
1978 | |
1979 | tree res = NULL_TREE; |
1980 | |
1981 | if (neg_exp) |
1982 | { |
1983 | if (one_over) |
1984 | { |
1985 | if (n > 0) |
1986 | res = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
1987 | arg0: fract_res, arg1: integer_res); |
1988 | else |
1989 | res = fract_res; |
1990 | |
1991 | res = build_and_insert_binop (gsi, loc, name: "powrootrecip" , code: RDIV_EXPR, |
1992 | arg0: build_real (type, dconst1), arg1: res); |
1993 | } |
1994 | else |
1995 | { |
1996 | res = build_and_insert_binop (gsi, loc, name: "powroot" , code: RDIV_EXPR, |
1997 | arg0: fract_res, arg1: integer_res); |
1998 | } |
1999 | } |
2000 | else |
2001 | res = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
2002 | arg0: fract_res, arg1: integer_res); |
2003 | return res; |
2004 | } |
2005 | |
2006 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |
2007 | with location info LOC. If possible, create an equivalent and |
2008 | less expensive sequence of statements prior to GSI, and return an |
2009 | expession holding the result. */ |
2010 | |
2011 | static tree |
2012 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, |
2013 | tree arg0, tree arg1) |
2014 | { |
2015 | REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; |
2016 | REAL_VALUE_TYPE c2, dconst3; |
2017 | HOST_WIDE_INT n; |
2018 | tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; |
2019 | machine_mode mode; |
2020 | bool speed_p = optimize_bb_for_speed_p (gsi_bb (i: *gsi)); |
2021 | bool hw_sqrt_exists, c_is_int, c2_is_int; |
2022 | |
2023 | dconst1_4 = dconst1; |
2024 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); |
2025 | |
2026 | /* If the exponent isn't a constant, there's nothing of interest |
2027 | to be done. */ |
2028 | if (TREE_CODE (arg1) != REAL_CST) |
2029 | return NULL_TREE; |
2030 | |
2031 | /* Don't perform the operation if flag_signaling_nans is on |
2032 | and the operand is a signaling NaN. */ |
2033 | if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1))) |
2034 | && ((TREE_CODE (arg0) == REAL_CST |
2035 | && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0))) |
2036 | || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))) |
2037 | return NULL_TREE; |
2038 | |
2039 | /* If the exponent is equivalent to an integer, expand to an optimal |
2040 | multiplication sequence when profitable. */ |
2041 | c = TREE_REAL_CST (arg1); |
2042 | n = real_to_integer (&c); |
2043 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
2044 | c_is_int = real_identical (&c, &cint); |
2045 | |
2046 | if (c_is_int |
2047 | && ((n >= -1 && n <= 2) |
2048 | || (flag_unsafe_math_optimizations |
2049 | && speed_p |
2050 | && powi_cost (n) <= POWI_MAX_MULTS))) |
2051 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); |
2052 | |
2053 | /* Attempt various optimizations using sqrt and cbrt. */ |
2054 | type = TREE_TYPE (arg0); |
2055 | mode = TYPE_MODE (type); |
2056 | sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT); |
2057 | |
2058 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe |
2059 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while |
2060 | sqrt(-0) = -0. */ |
2061 | if (sqrtfn |
2062 | && real_equal (&c, &dconsthalf) |
2063 | && !HONOR_SIGNED_ZEROS (mode)) |
2064 | return build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0); |
2065 | |
2066 | hw_sqrt_exists = optab_handler (op: sqrt_optab, mode) != CODE_FOR_nothing; |
2067 | |
2068 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math |
2069 | optimizations since 1./3. is not exactly representable. If x |
2070 | is negative and finite, the correct value of pow(x,1./3.) is |
2071 | a NaN with the "invalid" exception raised, because the value |
2072 | of 1./3. actually has an even denominator. The correct value |
2073 | of cbrt(x) is a negative real value. */ |
2074 | cbrtfn = mathfn_built_in (type, fn: BUILT_IN_CBRT); |
2075 | dconst1_3 = real_value_truncate (mode, dconst_third ()); |
2076 | |
2077 | if (flag_unsafe_math_optimizations |
2078 | && cbrtfn |
2079 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
2080 | && real_equal (&c, &dconst1_3)) |
2081 | return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0); |
2082 | |
2083 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization |
2084 | if we don't have a hardware sqrt insn. */ |
2085 | dconst1_6 = dconst1_3; |
2086 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); |
2087 | |
2088 | if (flag_unsafe_math_optimizations |
2089 | && sqrtfn |
2090 | && cbrtfn |
2091 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
2092 | && speed_p |
2093 | && hw_sqrt_exists |
2094 | && real_equal (&c, &dconst1_6)) |
2095 | { |
2096 | /* sqrt(x) */ |
2097 | sqrt_arg0 = build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0); |
2098 | |
2099 | /* cbrt(sqrt(x)) */ |
2100 | return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: sqrt_arg0); |
2101 | } |
2102 | |
2103 | |
2104 | /* Attempt to expand the POW as a product of square root chains. |
2105 | Expand the 0.25 case even when otpimising for size. */ |
2106 | if (flag_unsafe_math_optimizations |
2107 | && sqrtfn |
2108 | && hw_sqrt_exists |
2109 | && (speed_p || real_equal (&c, &dconst1_4)) |
2110 | && !HONOR_SIGNED_ZEROS (mode)) |
2111 | { |
2112 | unsigned int max_depth = speed_p |
2113 | ? param_max_pow_sqrt_depth |
2114 | : 2; |
2115 | |
2116 | tree expand_with_sqrts |
2117 | = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); |
2118 | |
2119 | if (expand_with_sqrts) |
2120 | return expand_with_sqrts; |
2121 | } |
2122 | |
2123 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); |
2124 | n = real_to_integer (&c2); |
2125 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
2126 | c2_is_int = real_identical (&c2, &cint); |
2127 | |
2128 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into |
2129 | |
2130 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; |
2131 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. |
2132 | |
2133 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is |
2134 | different from pow(x, 1./3.) due to rounding and behavior with |
2135 | negative x, we need to constrain this transformation to unsafe |
2136 | math and positive x or finite math. */ |
2137 | real_from_integer (&dconst3, VOIDmode, 3, SIGNED); |
2138 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); |
2139 | real_round (&c2, mode, &c2); |
2140 | n = real_to_integer (&c2); |
2141 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
2142 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); |
2143 | real_convert (&c2, mode, &c2); |
2144 | |
2145 | if (flag_unsafe_math_optimizations |
2146 | && cbrtfn |
2147 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
2148 | && real_identical (&c2, &c) |
2149 | && !c2_is_int |
2150 | && optimize_function_for_speed_p (cfun) |
2151 | && powi_cost (n: n / 3) <= POWI_MAX_MULTS) |
2152 | { |
2153 | tree powi_x_ndiv3 = NULL_TREE; |
2154 | |
2155 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not |
2156 | possible or profitable, give up. Skip the degenerate case when |
2157 | abs(n) < 3, where the result is always 1. */ |
2158 | if (absu_hwi (x: n) >= 3) |
2159 | { |
2160 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, |
2161 | n: abs_hwi (x: n / 3)); |
2162 | if (!powi_x_ndiv3) |
2163 | return NULL_TREE; |
2164 | } |
2165 | |
2166 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi |
2167 | as that creates an unnecessary variable. Instead, just produce |
2168 | either cbrt(x) or cbrt(x) * cbrt(x). */ |
2169 | cbrt_x = build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0); |
2170 | |
2171 | if (absu_hwi (x: n) % 3 == 1) |
2172 | powi_cbrt_x = cbrt_x; |
2173 | else |
2174 | powi_cbrt_x = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
2175 | arg0: cbrt_x, arg1: cbrt_x); |
2176 | |
2177 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ |
2178 | if (absu_hwi (x: n) < 3) |
2179 | result = powi_cbrt_x; |
2180 | else |
2181 | result = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
2182 | arg0: powi_x_ndiv3, arg1: powi_cbrt_x); |
2183 | |
2184 | /* If n is negative, reciprocate the result. */ |
2185 | if (n < 0) |
2186 | result = build_and_insert_binop (gsi, loc, name: "powroot" , code: RDIV_EXPR, |
2187 | arg0: build_real (type, dconst1), arg1: result); |
2188 | |
2189 | return result; |
2190 | } |
2191 | |
2192 | /* No optimizations succeeded. */ |
2193 | return NULL_TREE; |
2194 | } |
2195 | |
2196 | /* ARG is the argument to a cabs builtin call in GSI with location info |
2197 | LOC. Create a sequence of statements prior to GSI that calculates |
2198 | sqrt(R*R + I*I), where R and I are the real and imaginary components |
2199 | of ARG, respectively. Return an expression holding the result. */ |
2200 | |
2201 | static tree |
2202 | gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) |
2203 | { |
2204 | tree real_part, imag_part, addend1, addend2, sum, result; |
2205 | tree type = TREE_TYPE (TREE_TYPE (arg)); |
2206 | tree sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT); |
2207 | machine_mode mode = TYPE_MODE (type); |
2208 | |
2209 | if (!flag_unsafe_math_optimizations |
2210 | || !optimize_bb_for_speed_p (gimple_bb (g: gsi_stmt (i: *gsi))) |
2211 | || !sqrtfn |
2212 | || optab_handler (op: sqrt_optab, mode) == CODE_FOR_nothing) |
2213 | return NULL_TREE; |
2214 | |
2215 | real_part = build_and_insert_ref (gsi, loc, type, name: "cabs" , |
2216 | code: REALPART_EXPR, arg0: arg); |
2217 | addend1 = build_and_insert_binop (gsi, loc, name: "cabs" , code: MULT_EXPR, |
2218 | arg0: real_part, arg1: real_part); |
2219 | imag_part = build_and_insert_ref (gsi, loc, type, name: "cabs" , |
2220 | code: IMAGPART_EXPR, arg0: arg); |
2221 | addend2 = build_and_insert_binop (gsi, loc, name: "cabs" , code: MULT_EXPR, |
2222 | arg0: imag_part, arg1: imag_part); |
2223 | sum = build_and_insert_binop (gsi, loc, name: "cabs" , code: PLUS_EXPR, arg0: addend1, arg1: addend2); |
2224 | result = build_and_insert_call (gsi, loc, fn: sqrtfn, arg: sum); |
2225 | |
2226 | return result; |
2227 | } |
2228 | |
2229 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |
2230 | on the SSA_NAME argument of each of them. */ |
2231 | |
2232 | namespace { |
2233 | |
2234 | const pass_data pass_data_cse_sincos = |
2235 | { |
2236 | .type: GIMPLE_PASS, /* type */ |
2237 | .name: "sincos" , /* name */ |
2238 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
2239 | .tv_id: TV_TREE_SINCOS, /* tv_id */ |
2240 | PROP_ssa, /* properties_required */ |
2241 | .properties_provided: 0, /* properties_provided */ |
2242 | .properties_destroyed: 0, /* properties_destroyed */ |
2243 | .todo_flags_start: 0, /* todo_flags_start */ |
2244 | TODO_update_ssa, /* todo_flags_finish */ |
2245 | }; |
2246 | |
2247 | class pass_cse_sincos : public gimple_opt_pass |
2248 | { |
2249 | public: |
2250 | pass_cse_sincos (gcc::context *ctxt) |
2251 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) |
2252 | {} |
2253 | |
2254 | /* opt_pass methods: */ |
2255 | bool gate (function *) final override |
2256 | { |
2257 | return optimize; |
2258 | } |
2259 | |
2260 | unsigned int execute (function *) final override; |
2261 | |
2262 | }; // class pass_cse_sincos |
2263 | |
2264 | unsigned int |
2265 | pass_cse_sincos::execute (function *fun) |
2266 | { |
2267 | basic_block bb; |
2268 | bool cfg_changed = false; |
2269 | |
2270 | calculate_dominance_info (CDI_DOMINATORS); |
2271 | memset (s: &sincos_stats, c: 0, n: sizeof (sincos_stats)); |
2272 | |
2273 | FOR_EACH_BB_FN (bb, fun) |
2274 | { |
2275 | gimple_stmt_iterator gsi; |
2276 | |
2277 | for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
2278 | { |
2279 | gimple *stmt = gsi_stmt (i: gsi); |
2280 | |
2281 | if (is_gimple_call (gs: stmt) |
2282 | && gimple_call_lhs (gs: stmt)) |
2283 | { |
2284 | tree arg; |
2285 | switch (gimple_call_combined_fn (stmt)) |
2286 | { |
2287 | CASE_CFN_COS: |
2288 | CASE_CFN_SIN: |
2289 | CASE_CFN_CEXPI: |
2290 | arg = gimple_call_arg (gs: stmt, index: 0); |
2291 | /* Make sure we have either sincos or cexp. */ |
2292 | if (!targetm.libc_has_function (function_c99_math_complex, |
2293 | TREE_TYPE (arg)) |
2294 | && !targetm.libc_has_function (function_sincos, |
2295 | TREE_TYPE (arg))) |
2296 | break; |
2297 | |
2298 | if (TREE_CODE (arg) == SSA_NAME) |
2299 | cfg_changed |= execute_cse_sincos_1 (name: arg); |
2300 | break; |
2301 | default: |
2302 | break; |
2303 | } |
2304 | } |
2305 | } |
2306 | } |
2307 | |
2308 | statistics_counter_event (fun, "sincos statements inserted" , |
2309 | sincos_stats.inserted); |
2310 | statistics_counter_event (fun, "conv statements removed" , |
2311 | sincos_stats.conv_removed); |
2312 | |
2313 | return cfg_changed ? TODO_cleanup_cfg : 0; |
2314 | } |
2315 | |
2316 | } // anon namespace |
2317 | |
2318 | gimple_opt_pass * |
2319 | make_pass_cse_sincos (gcc::context *ctxt) |
2320 | { |
2321 | return new pass_cse_sincos (ctxt); |
2322 | } |
2323 | |
2324 | /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant. |
2325 | Also expand CABS. */ |
2326 | namespace { |
2327 | |
2328 | const pass_data pass_data_expand_powcabs = |
2329 | { |
2330 | .type: GIMPLE_PASS, /* type */ |
2331 | .name: "powcabs" , /* name */ |
2332 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
2333 | .tv_id: TV_TREE_POWCABS, /* tv_id */ |
2334 | PROP_ssa, /* properties_required */ |
2335 | PROP_gimple_opt_math, /* properties_provided */ |
2336 | .properties_destroyed: 0, /* properties_destroyed */ |
2337 | .todo_flags_start: 0, /* todo_flags_start */ |
2338 | TODO_update_ssa, /* todo_flags_finish */ |
2339 | }; |
2340 | |
2341 | class pass_expand_powcabs : public gimple_opt_pass |
2342 | { |
2343 | public: |
2344 | pass_expand_powcabs (gcc::context *ctxt) |
2345 | : gimple_opt_pass (pass_data_expand_powcabs, ctxt) |
2346 | {} |
2347 | |
2348 | /* opt_pass methods: */ |
2349 | bool gate (function *) final override |
2350 | { |
2351 | return optimize; |
2352 | } |
2353 | |
2354 | unsigned int execute (function *) final override; |
2355 | |
2356 | }; // class pass_expand_powcabs |
2357 | |
2358 | unsigned int |
2359 | pass_expand_powcabs::execute (function *fun) |
2360 | { |
2361 | basic_block bb; |
2362 | bool cfg_changed = false; |
2363 | |
2364 | calculate_dominance_info (CDI_DOMINATORS); |
2365 | |
2366 | FOR_EACH_BB_FN (bb, fun) |
2367 | { |
2368 | gimple_stmt_iterator gsi; |
2369 | bool cleanup_eh = false; |
2370 | |
2371 | for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
2372 | { |
2373 | gimple *stmt = gsi_stmt (i: gsi); |
2374 | |
2375 | /* Only the last stmt in a bb could throw, no need to call |
2376 | gimple_purge_dead_eh_edges if we change something in the middle |
2377 | of a basic block. */ |
2378 | cleanup_eh = false; |
2379 | |
2380 | if (is_gimple_call (gs: stmt) |
2381 | && gimple_call_lhs (gs: stmt)) |
2382 | { |
2383 | tree arg0, arg1, result; |
2384 | HOST_WIDE_INT n; |
2385 | location_t loc; |
2386 | |
2387 | switch (gimple_call_combined_fn (stmt)) |
2388 | { |
2389 | CASE_CFN_POW: |
2390 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
2391 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
2392 | |
2393 | loc = gimple_location (g: stmt); |
2394 | result = gimple_expand_builtin_pow (gsi: &gsi, loc, arg0, arg1); |
2395 | |
2396 | if (result) |
2397 | { |
2398 | tree lhs = gimple_get_lhs (stmt); |
2399 | gassign *new_stmt = gimple_build_assign (lhs, result); |
2400 | gimple_set_location (g: new_stmt, location: loc); |
2401 | unlink_stmt_vdef (stmt); |
2402 | gsi_replace (&gsi, new_stmt, true); |
2403 | cleanup_eh = true; |
2404 | if (gimple_vdef (g: stmt)) |
2405 | release_ssa_name (name: gimple_vdef (g: stmt)); |
2406 | } |
2407 | break; |
2408 | |
2409 | CASE_CFN_POWI: |
2410 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
2411 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
2412 | loc = gimple_location (g: stmt); |
2413 | |
2414 | if (real_minus_onep (arg0)) |
2415 | { |
2416 | tree t0, t1, cond, one, minus_one; |
2417 | gassign *stmt; |
2418 | |
2419 | t0 = TREE_TYPE (arg0); |
2420 | t1 = TREE_TYPE (arg1); |
2421 | one = build_real (t0, dconst1); |
2422 | minus_one = build_real (t0, dconstm1); |
2423 | |
2424 | cond = make_temp_ssa_name (type: t1, NULL, name: "powi_cond" ); |
2425 | stmt = gimple_build_assign (cond, BIT_AND_EXPR, |
2426 | arg1, build_int_cst (t1, 1)); |
2427 | gimple_set_location (g: stmt, location: loc); |
2428 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
2429 | |
2430 | result = make_temp_ssa_name (type: t0, NULL, name: "powi" ); |
2431 | stmt = gimple_build_assign (result, COND_EXPR, cond, |
2432 | minus_one, one); |
2433 | gimple_set_location (g: stmt, location: loc); |
2434 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
2435 | } |
2436 | else |
2437 | { |
2438 | if (!tree_fits_shwi_p (arg1)) |
2439 | break; |
2440 | |
2441 | n = tree_to_shwi (arg1); |
2442 | result = gimple_expand_builtin_powi (gsi: &gsi, loc, arg0, n); |
2443 | } |
2444 | |
2445 | if (result) |
2446 | { |
2447 | tree lhs = gimple_get_lhs (stmt); |
2448 | gassign *new_stmt = gimple_build_assign (lhs, result); |
2449 | gimple_set_location (g: new_stmt, location: loc); |
2450 | unlink_stmt_vdef (stmt); |
2451 | gsi_replace (&gsi, new_stmt, true); |
2452 | cleanup_eh = true; |
2453 | if (gimple_vdef (g: stmt)) |
2454 | release_ssa_name (name: gimple_vdef (g: stmt)); |
2455 | } |
2456 | break; |
2457 | |
2458 | CASE_CFN_CABS: |
2459 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
2460 | loc = gimple_location (g: stmt); |
2461 | result = gimple_expand_builtin_cabs (gsi: &gsi, loc, arg: arg0); |
2462 | |
2463 | if (result) |
2464 | { |
2465 | tree lhs = gimple_get_lhs (stmt); |
2466 | gassign *new_stmt = gimple_build_assign (lhs, result); |
2467 | gimple_set_location (g: new_stmt, location: loc); |
2468 | unlink_stmt_vdef (stmt); |
2469 | gsi_replace (&gsi, new_stmt, true); |
2470 | cleanup_eh = true; |
2471 | if (gimple_vdef (g: stmt)) |
2472 | release_ssa_name (name: gimple_vdef (g: stmt)); |
2473 | } |
2474 | break; |
2475 | |
2476 | default:; |
2477 | } |
2478 | } |
2479 | } |
2480 | if (cleanup_eh) |
2481 | cfg_changed |= gimple_purge_dead_eh_edges (bb); |
2482 | } |
2483 | |
2484 | return cfg_changed ? TODO_cleanup_cfg : 0; |
2485 | } |
2486 | |
2487 | } // anon namespace |
2488 | |
2489 | gimple_opt_pass * |
2490 | make_pass_expand_powcabs (gcc::context *ctxt) |
2491 | { |
2492 | return new pass_expand_powcabs (ctxt); |
2493 | } |
2494 | |
2495 | /* Return true if stmt is a type conversion operation that can be stripped |
2496 | when used in a widening multiply operation. */ |
2497 | static bool |
2498 | widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |
2499 | { |
2500 | enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt); |
2501 | |
2502 | if (TREE_CODE (result_type) == INTEGER_TYPE) |
2503 | { |
2504 | tree op_type; |
2505 | tree inner_op_type; |
2506 | |
2507 | if (!CONVERT_EXPR_CODE_P (rhs_code)) |
2508 | return false; |
2509 | |
2510 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); |
2511 | |
2512 | /* If the type of OP has the same precision as the result, then |
2513 | we can strip this conversion. The multiply operation will be |
2514 | selected to create the correct extension as a by-product. */ |
2515 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) |
2516 | return true; |
2517 | |
2518 | /* We can also strip a conversion if it preserves the signed-ness of |
2519 | the operation and doesn't narrow the range. */ |
2520 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); |
2521 | |
2522 | /* If the inner-most type is unsigned, then we can strip any |
2523 | intermediate widening operation. If it's signed, then the |
2524 | intermediate widening operation must also be signed. */ |
2525 | if ((TYPE_UNSIGNED (inner_op_type) |
2526 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) |
2527 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |
2528 | return true; |
2529 | |
2530 | return false; |
2531 | } |
2532 | |
2533 | return rhs_code == FIXED_CONVERT_EXPR; |
2534 | } |
2535 | |
2536 | /* Return true if RHS is a suitable operand for a widening multiplication, |
2537 | assuming a target type of TYPE. |
2538 | There are two cases: |
2539 | |
2540 | - RHS makes some value at least twice as wide. Store that value |
2541 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. |
2542 | |
2543 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, |
2544 | but leave *TYPE_OUT untouched. */ |
2545 | |
2546 | static bool |
2547 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |
2548 | tree *new_rhs_out) |
2549 | { |
2550 | gimple *stmt; |
2551 | tree type1, rhs1; |
2552 | |
2553 | if (TREE_CODE (rhs) == SSA_NAME) |
2554 | { |
2555 | stmt = SSA_NAME_DEF_STMT (rhs); |
2556 | if (is_gimple_assign (gs: stmt)) |
2557 | { |
2558 | if (! widening_mult_conversion_strippable_p (result_type: type, stmt)) |
2559 | rhs1 = rhs; |
2560 | else |
2561 | { |
2562 | rhs1 = gimple_assign_rhs1 (gs: stmt); |
2563 | |
2564 | if (TREE_CODE (rhs1) == INTEGER_CST) |
2565 | { |
2566 | *new_rhs_out = rhs1; |
2567 | *type_out = NULL; |
2568 | return true; |
2569 | } |
2570 | } |
2571 | } |
2572 | else |
2573 | rhs1 = rhs; |
2574 | |
2575 | type1 = TREE_TYPE (rhs1); |
2576 | |
2577 | if (TREE_CODE (type1) != TREE_CODE (type) |
2578 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |
2579 | return false; |
2580 | |
2581 | *new_rhs_out = rhs1; |
2582 | *type_out = type1; |
2583 | return true; |
2584 | } |
2585 | |
2586 | if (TREE_CODE (rhs) == INTEGER_CST) |
2587 | { |
2588 | *new_rhs_out = rhs; |
2589 | *type_out = NULL; |
2590 | return true; |
2591 | } |
2592 | |
2593 | return false; |
2594 | } |
2595 | |
2596 | /* Return true if STMT performs a widening multiplication, assuming the |
2597 | output type is TYPE. If so, store the unwidened types of the operands |
2598 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and |
2599 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT |
2600 | and *TYPE2_OUT would give the operands of the multiplication. */ |
2601 | |
2602 | static bool |
2603 | is_widening_mult_p (gimple *stmt, |
2604 | tree *type1_out, tree *rhs1_out, |
2605 | tree *type2_out, tree *rhs2_out) |
2606 | { |
2607 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
2608 | |
2609 | if (TREE_CODE (type) == INTEGER_TYPE) |
2610 | { |
2611 | if (TYPE_OVERFLOW_TRAPS (type)) |
2612 | return false; |
2613 | } |
2614 | else if (TREE_CODE (type) != FIXED_POINT_TYPE) |
2615 | return false; |
2616 | |
2617 | if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs1 (gs: stmt), type_out: type1_out, |
2618 | new_rhs_out: rhs1_out)) |
2619 | return false; |
2620 | |
2621 | if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs2 (gs: stmt), type_out: type2_out, |
2622 | new_rhs_out: rhs2_out)) |
2623 | return false; |
2624 | |
2625 | if (*type1_out == NULL) |
2626 | { |
2627 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
2628 | return false; |
2629 | *type1_out = *type2_out; |
2630 | } |
2631 | |
2632 | if (*type2_out == NULL) |
2633 | { |
2634 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
2635 | return false; |
2636 | *type2_out = *type1_out; |
2637 | } |
2638 | |
2639 | /* Ensure that the larger of the two operands comes first. */ |
2640 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) |
2641 | { |
2642 | std::swap (a&: *type1_out, b&: *type2_out); |
2643 | std::swap (a&: *rhs1_out, b&: *rhs2_out); |
2644 | } |
2645 | |
2646 | return true; |
2647 | } |
2648 | |
2649 | /* Check to see if the CALL statement is an invocation of copysign |
2650 | with 1. being the first argument. */ |
2651 | static bool |
2652 | is_copysign_call_with_1 (gimple *call) |
2653 | { |
2654 | gcall *c = dyn_cast <gcall *> (p: call); |
2655 | if (! c) |
2656 | return false; |
2657 | |
2658 | enum combined_fn code = gimple_call_combined_fn (c); |
2659 | |
2660 | if (code == CFN_LAST) |
2661 | return false; |
2662 | |
2663 | if (builtin_fn_p (code)) |
2664 | { |
2665 | switch (as_builtin_fn (code)) |
2666 | { |
2667 | CASE_FLT_FN (BUILT_IN_COPYSIGN): |
2668 | CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): |
2669 | return real_onep (gimple_call_arg (gs: c, index: 0)); |
2670 | default: |
2671 | return false; |
2672 | } |
2673 | } |
2674 | |
2675 | if (internal_fn_p (code)) |
2676 | { |
2677 | switch (as_internal_fn (code)) |
2678 | { |
2679 | case IFN_COPYSIGN: |
2680 | return real_onep (gimple_call_arg (gs: c, index: 0)); |
2681 | default: |
2682 | return false; |
2683 | } |
2684 | } |
2685 | |
2686 | return false; |
2687 | } |
2688 | |
2689 | /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). |
2690 | This only happens when the xorsign optab is defined, if the |
2691 | pattern is not a xorsign pattern or if expansion fails FALSE is |
2692 | returned, otherwise TRUE is returned. */ |
2693 | static bool |
2694 | convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) |
2695 | { |
2696 | tree treeop0, treeop1, lhs, type; |
2697 | location_t loc = gimple_location (g: stmt); |
2698 | lhs = gimple_assign_lhs (gs: stmt); |
2699 | treeop0 = gimple_assign_rhs1 (gs: stmt); |
2700 | treeop1 = gimple_assign_rhs2 (gs: stmt); |
2701 | type = TREE_TYPE (lhs); |
2702 | machine_mode mode = TYPE_MODE (type); |
2703 | |
2704 | if (HONOR_SNANS (type)) |
2705 | return false; |
2706 | |
2707 | if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) |
2708 | { |
2709 | gimple *call0 = SSA_NAME_DEF_STMT (treeop0); |
2710 | if (!has_single_use (var: treeop0) || !is_copysign_call_with_1 (call: call0)) |
2711 | { |
2712 | call0 = SSA_NAME_DEF_STMT (treeop1); |
2713 | if (!has_single_use (var: treeop1) || !is_copysign_call_with_1 (call: call0)) |
2714 | return false; |
2715 | |
2716 | treeop1 = treeop0; |
2717 | } |
2718 | if (optab_handler (op: xorsign_optab, mode) == CODE_FOR_nothing) |
2719 | return false; |
2720 | |
2721 | gcall *c = as_a<gcall*> (p: call0); |
2722 | treeop0 = gimple_call_arg (gs: c, index: 1); |
2723 | |
2724 | gcall *call_stmt |
2725 | = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0); |
2726 | gimple_set_lhs (call_stmt, lhs); |
2727 | gimple_set_location (g: call_stmt, location: loc); |
2728 | gsi_replace (gsi, call_stmt, true); |
2729 | return true; |
2730 | } |
2731 | |
2732 | return false; |
2733 | } |
2734 | |
2735 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
2736 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return |
2737 | value is true iff we converted the statement. */ |
2738 | |
2739 | static bool |
2740 | convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |
2741 | { |
2742 | tree lhs, rhs1, rhs2, type, type1, type2; |
2743 | enum insn_code handler; |
2744 | scalar_int_mode to_mode, from_mode, actual_mode; |
2745 | optab op; |
2746 | int actual_precision; |
2747 | location_t loc = gimple_location (g: stmt); |
2748 | bool from_unsigned1, from_unsigned2; |
2749 | |
2750 | lhs = gimple_assign_lhs (gs: stmt); |
2751 | type = TREE_TYPE (lhs); |
2752 | if (TREE_CODE (type) != INTEGER_TYPE) |
2753 | return false; |
2754 | |
2755 | if (!is_widening_mult_p (stmt, type1_out: &type1, rhs1_out: &rhs1, type2_out: &type2, rhs2_out: &rhs2)) |
2756 | return false; |
2757 | |
2758 | /* if any one of rhs1 and rhs2 is subject to abnormal coalescing, |
2759 | avoid the tranform. */ |
2760 | if ((TREE_CODE (rhs1) == SSA_NAME |
2761 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) |
2762 | || (TREE_CODE (rhs2) == SSA_NAME |
2763 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))) |
2764 | return false; |
2765 | |
2766 | to_mode = SCALAR_INT_TYPE_MODE (type); |
2767 | from_mode = SCALAR_INT_TYPE_MODE (type1); |
2768 | if (to_mode == from_mode) |
2769 | return false; |
2770 | |
2771 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2772 | from_unsigned2 = TYPE_UNSIGNED (type2); |
2773 | |
2774 | if (from_unsigned1 && from_unsigned2) |
2775 | op = umul_widen_optab; |
2776 | else if (!from_unsigned1 && !from_unsigned2) |
2777 | op = smul_widen_optab; |
2778 | else |
2779 | op = usmul_widen_optab; |
2780 | |
2781 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
2782 | found_mode: &actual_mode); |
2783 | |
2784 | if (handler == CODE_FOR_nothing) |
2785 | { |
2786 | if (op != smul_widen_optab) |
2787 | { |
2788 | /* We can use a signed multiply with unsigned types as long as |
2789 | there is a wider mode to use, or it is the smaller of the two |
2790 | types that is unsigned. Note that type1 >= type2, always. */ |
2791 | if ((TYPE_UNSIGNED (type1) |
2792 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode)) |
2793 | || (TYPE_UNSIGNED (type2) |
2794 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode))) |
2795 | { |
2796 | if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode) |
2797 | || GET_MODE_SIZE (mode: to_mode) <= GET_MODE_SIZE (mode: from_mode)) |
2798 | return false; |
2799 | } |
2800 | |
2801 | op = smul_widen_optab; |
2802 | handler = find_widening_optab_handler_and_mode (op, to_mode, |
2803 | from_mode, |
2804 | found_mode: &actual_mode); |
2805 | |
2806 | if (handler == CODE_FOR_nothing) |
2807 | return false; |
2808 | |
2809 | from_unsigned1 = from_unsigned2 = false; |
2810 | } |
2811 | else |
2812 | { |
2813 | /* Expand can synthesize smul_widen_optab if the target |
2814 | supports umul_widen_optab. */ |
2815 | op = umul_widen_optab; |
2816 | handler = find_widening_optab_handler_and_mode (op, to_mode, |
2817 | from_mode, |
2818 | found_mode: &actual_mode); |
2819 | if (handler == CODE_FOR_nothing) |
2820 | return false; |
2821 | } |
2822 | } |
2823 | |
2824 | /* Ensure that the inputs to the handler are in the correct precison |
2825 | for the opcode. This will be the full mode size. */ |
2826 | actual_precision = GET_MODE_PRECISION (mode: actual_mode); |
2827 | if (2 * actual_precision > TYPE_PRECISION (type)) |
2828 | return false; |
2829 | if (actual_precision != TYPE_PRECISION (type1) |
2830 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |
2831 | rhs1 = build_and_insert_cast (gsi, loc, |
2832 | type: build_nonstandard_integer_type |
2833 | (actual_precision, from_unsigned1), val: rhs1); |
2834 | if (actual_precision != TYPE_PRECISION (type2) |
2835 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |
2836 | rhs2 = build_and_insert_cast (gsi, loc, |
2837 | type: build_nonstandard_integer_type |
2838 | (actual_precision, from_unsigned2), val: rhs2); |
2839 | |
2840 | /* Handle constants. */ |
2841 | if (TREE_CODE (rhs1) == INTEGER_CST) |
2842 | rhs1 = fold_convert (type1, rhs1); |
2843 | if (TREE_CODE (rhs2) == INTEGER_CST) |
2844 | rhs2 = fold_convert (type2, rhs2); |
2845 | |
2846 | gimple_assign_set_rhs1 (gs: stmt, rhs: rhs1); |
2847 | gimple_assign_set_rhs2 (gs: stmt, rhs: rhs2); |
2848 | gimple_assign_set_rhs_code (s: stmt, code: WIDEN_MULT_EXPR); |
2849 | update_stmt (s: stmt); |
2850 | widen_mul_stats.widen_mults_inserted++; |
2851 | return true; |
2852 | } |
2853 | |
2854 | /* Process a single gimple statement STMT, which is found at the |
2855 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its |
2856 | rhs (given by CODE), and try to convert it into a |
2857 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value |
2858 | is true iff we converted the statement. */ |
2859 | |
2860 | static bool |
2861 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, |
2862 | enum tree_code code) |
2863 | { |
2864 | gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL; |
2865 | gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt; |
2866 | tree type, type1, type2, optype; |
2867 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
2868 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; |
2869 | optab this_optab; |
2870 | enum tree_code wmult_code; |
2871 | enum insn_code handler; |
2872 | scalar_mode to_mode, from_mode, actual_mode; |
2873 | location_t loc = gimple_location (g: stmt); |
2874 | int actual_precision; |
2875 | bool from_unsigned1, from_unsigned2; |
2876 | |
2877 | lhs = gimple_assign_lhs (gs: stmt); |
2878 | type = TREE_TYPE (lhs); |
2879 | if (TREE_CODE (type) != INTEGER_TYPE |
2880 | && TREE_CODE (type) != FIXED_POINT_TYPE) |
2881 | return false; |
2882 | |
2883 | if (code == MINUS_EXPR) |
2884 | wmult_code = WIDEN_MULT_MINUS_EXPR; |
2885 | else |
2886 | wmult_code = WIDEN_MULT_PLUS_EXPR; |
2887 | |
2888 | rhs1 = gimple_assign_rhs1 (gs: stmt); |
2889 | rhs2 = gimple_assign_rhs2 (gs: stmt); |
2890 | |
2891 | if (TREE_CODE (rhs1) == SSA_NAME) |
2892 | { |
2893 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |
2894 | if (is_gimple_assign (gs: rhs1_stmt)) |
2895 | rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt); |
2896 | } |
2897 | |
2898 | if (TREE_CODE (rhs2) == SSA_NAME) |
2899 | { |
2900 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |
2901 | if (is_gimple_assign (gs: rhs2_stmt)) |
2902 | rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt); |
2903 | } |
2904 | |
2905 | /* Allow for one conversion statement between the multiply |
2906 | and addition/subtraction statement. If there are more than |
2907 | one conversions then we assume they would invalidate this |
2908 | transformation. If that's not the case then they should have |
2909 | been folded before now. */ |
2910 | if (CONVERT_EXPR_CODE_P (rhs1_code)) |
2911 | { |
2912 | conv1_stmt = rhs1_stmt; |
2913 | rhs1 = gimple_assign_rhs1 (gs: rhs1_stmt); |
2914 | if (TREE_CODE (rhs1) == SSA_NAME) |
2915 | { |
2916 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |
2917 | if (is_gimple_assign (gs: rhs1_stmt)) |
2918 | rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt); |
2919 | } |
2920 | else |
2921 | return false; |
2922 | } |
2923 | if (CONVERT_EXPR_CODE_P (rhs2_code)) |
2924 | { |
2925 | conv2_stmt = rhs2_stmt; |
2926 | rhs2 = gimple_assign_rhs1 (gs: rhs2_stmt); |
2927 | if (TREE_CODE (rhs2) == SSA_NAME) |
2928 | { |
2929 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |
2930 | if (is_gimple_assign (gs: rhs2_stmt)) |
2931 | rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt); |
2932 | } |
2933 | else |
2934 | return false; |
2935 | } |
2936 | |
2937 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |
2938 | is_widening_mult_p, but we still need the rhs returns. |
2939 | |
2940 | It might also appear that it would be sufficient to use the existing |
2941 | operands of the widening multiply, but that would limit the choice of |
2942 | multiply-and-accumulate instructions. |
2943 | |
2944 | If the widened-multiplication result has more than one uses, it is |
2945 | probably wiser not to do the conversion. Also restrict this operation |
2946 | to single basic block to avoid moving the multiply to a different block |
2947 | with a higher execution frequency. */ |
2948 | if (code == PLUS_EXPR |
2949 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) |
2950 | { |
2951 | if (!has_single_use (var: rhs1) |
2952 | || gimple_bb (g: rhs1_stmt) != gimple_bb (g: stmt) |
2953 | || !is_widening_mult_p (stmt: rhs1_stmt, type1_out: &type1, rhs1_out: &mult_rhs1, |
2954 | type2_out: &type2, rhs2_out: &mult_rhs2)) |
2955 | return false; |
2956 | add_rhs = rhs2; |
2957 | conv_stmt = conv1_stmt; |
2958 | } |
2959 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |
2960 | { |
2961 | if (!has_single_use (var: rhs2) |
2962 | || gimple_bb (g: rhs2_stmt) != gimple_bb (g: stmt) |
2963 | || !is_widening_mult_p (stmt: rhs2_stmt, type1_out: &type1, rhs1_out: &mult_rhs1, |
2964 | type2_out: &type2, rhs2_out: &mult_rhs2)) |
2965 | return false; |
2966 | add_rhs = rhs1; |
2967 | conv_stmt = conv2_stmt; |
2968 | } |
2969 | else |
2970 | return false; |
2971 | |
2972 | to_mode = SCALAR_TYPE_MODE (type); |
2973 | from_mode = SCALAR_TYPE_MODE (type1); |
2974 | if (to_mode == from_mode) |
2975 | return false; |
2976 | |
2977 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2978 | from_unsigned2 = TYPE_UNSIGNED (type2); |
2979 | optype = type1; |
2980 | |
2981 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
2982 | if (from_unsigned1 != from_unsigned2) |
2983 | { |
2984 | if (!INTEGRAL_TYPE_P (type)) |
2985 | return false; |
2986 | /* We can use a signed multiply with unsigned types as long as |
2987 | there is a wider mode to use, or it is the smaller of the two |
2988 | types that is unsigned. Note that type1 >= type2, always. */ |
2989 | if ((from_unsigned1 |
2990 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode)) |
2991 | || (from_unsigned2 |
2992 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode))) |
2993 | { |
2994 | if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode) |
2995 | || GET_MODE_SIZE (mode: from_mode) >= GET_MODE_SIZE (mode: to_mode)) |
2996 | return false; |
2997 | } |
2998 | |
2999 | from_unsigned1 = from_unsigned2 = false; |
3000 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (mode: from_mode), |
3001 | false); |
3002 | } |
3003 | |
3004 | /* If there was a conversion between the multiply and addition |
3005 | then we need to make sure it fits a multiply-and-accumulate. |
3006 | The should be a single mode change which does not change the |
3007 | value. */ |
3008 | if (conv_stmt) |
3009 | { |
3010 | /* We use the original, unmodified data types for this. */ |
3011 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |
3012 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); |
3013 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); |
3014 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); |
3015 | |
3016 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) |
3017 | { |
3018 | /* Conversion is a truncate. */ |
3019 | if (TYPE_PRECISION (to_type) < data_size) |
3020 | return false; |
3021 | } |
3022 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) |
3023 | { |
3024 | /* Conversion is an extend. Check it's the right sort. */ |
3025 | if (TYPE_UNSIGNED (from_type) != is_unsigned |
3026 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) |
3027 | return false; |
3028 | } |
3029 | /* else convert is a no-op for our purposes. */ |
3030 | } |
3031 | |
3032 | /* Verify that the machine can perform a widening multiply |
3033 | accumulate in this mode/signedness combination, otherwise |
3034 | this transformation is likely to pessimize code. */ |
3035 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
3036 | handler = find_widening_optab_handler_and_mode (op: this_optab, to_mode, |
3037 | from_mode, found_mode: &actual_mode); |
3038 | |
3039 | if (handler == CODE_FOR_nothing) |
3040 | return false; |
3041 | |
3042 | /* Ensure that the inputs to the handler are in the correct precison |
3043 | for the opcode. This will be the full mode size. */ |
3044 | actual_precision = GET_MODE_PRECISION (mode: actual_mode); |
3045 | if (actual_precision != TYPE_PRECISION (type1) |
3046 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |
3047 | mult_rhs1 = build_and_insert_cast (gsi, loc, |
3048 | type: build_nonstandard_integer_type |
3049 | (actual_precision, from_unsigned1), |
3050 | val: mult_rhs1); |
3051 | if (actual_precision != TYPE_PRECISION (type2) |
3052 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |
3053 | mult_rhs2 = build_and_insert_cast (gsi, loc, |
3054 | type: build_nonstandard_integer_type |
3055 | (actual_precision, from_unsigned2), |
3056 | val: mult_rhs2); |
3057 | |
3058 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |
3059 | add_rhs = build_and_insert_cast (gsi, loc, type, val: add_rhs); |
3060 | |
3061 | /* Handle constants. */ |
3062 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) |
3063 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
3064 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
3065 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
3066 | |
3067 | gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, |
3068 | add_rhs); |
3069 | update_stmt (s: gsi_stmt (i: *gsi)); |
3070 | widen_mul_stats.maccs_inserted++; |
3071 | return true; |
3072 | } |
3073 | |
3074 | /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and |
3075 | OP2 and which we know is used in statements that can be, together with the |
3076 | multiplication, converted to FMAs, perform the transformation. */ |
3077 | |
3078 | static void |
3079 | convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2) |
3080 | { |
3081 | tree type = TREE_TYPE (mul_result); |
3082 | gimple *use_stmt; |
3083 | imm_use_iterator imm_iter; |
3084 | gcall *fma_stmt; |
3085 | |
3086 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) |
3087 | { |
3088 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
3089 | tree addop, mulop1 = op1, result = mul_result; |
3090 | bool negate_p = false; |
3091 | gimple_seq seq = NULL; |
3092 | |
3093 | if (is_gimple_debug (gs: use_stmt)) |
3094 | continue; |
3095 | |
3096 | if (is_gimple_assign (gs: use_stmt) |
3097 | && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR) |
3098 | { |
3099 | result = gimple_assign_lhs (gs: use_stmt); |
3100 | use_operand_p use_p; |
3101 | gimple *neguse_stmt; |
3102 | single_imm_use (var: gimple_assign_lhs (gs: use_stmt), use_p: &use_p, stmt: &neguse_stmt); |
3103 | gsi_remove (&gsi, true); |
3104 | release_defs (use_stmt); |
3105 | |
3106 | use_stmt = neguse_stmt; |
3107 | gsi = gsi_for_stmt (use_stmt); |
3108 | negate_p = true; |
3109 | } |
3110 | |
3111 | tree cond, else_value, ops[3], len, bias; |
3112 | tree_code code; |
3113 | if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, |
3114 | ops, &else_value, |
3115 | &len, &bias)) |
3116 | gcc_unreachable (); |
3117 | addop = ops[0] == result ? ops[1] : ops[0]; |
3118 | |
3119 | if (code == MINUS_EXPR) |
3120 | { |
3121 | if (ops[0] == result) |
3122 | /* a * b - c -> a * b + (-c) */ |
3123 | addop = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: addop); |
3124 | else |
3125 | /* a - b * c -> (-b) * c + a */ |
3126 | negate_p = !negate_p; |
3127 | } |
3128 | |
3129 | if (negate_p) |
3130 | mulop1 = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: mulop1); |
3131 | |
3132 | if (seq) |
3133 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); |
3134 | |
3135 | if (len) |
3136 | fma_stmt |
3137 | = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2, |
3138 | addop, else_value, len, bias); |
3139 | else if (cond) |
3140 | fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1, |
3141 | op2, addop, else_value); |
3142 | else |
3143 | fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop); |
3144 | gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt)); |
3145 | gimple_call_set_nothrow (s: fma_stmt, nothrow_p: !stmt_can_throw_internal (cfun, |
3146 | use_stmt)); |
3147 | gsi_replace (&gsi, fma_stmt, true); |
3148 | /* Follow all SSA edges so that we generate FMS, FNMA and FNMS |
3149 | regardless of where the negation occurs. */ |
3150 | gimple *orig_stmt = gsi_stmt (i: gsi); |
3151 | if (fold_stmt (&gsi, follow_all_ssa_edges)) |
3152 | { |
3153 | if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (i: gsi))) |
3154 | gcc_unreachable (); |
3155 | update_stmt (s: gsi_stmt (i: gsi)); |
3156 | } |
3157 | |
3158 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3159 | { |
3160 | fprintf (stream: dump_file, format: "Generated FMA " ); |
3161 | print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE); |
3162 | fprintf (stream: dump_file, format: "\n" ); |
3163 | } |
3164 | |
3165 | /* If the FMA result is negated in a single use, fold the negation |
3166 | too. */ |
3167 | orig_stmt = gsi_stmt (i: gsi); |
3168 | use_operand_p use_p; |
3169 | gimple *neg_stmt; |
3170 | if (is_gimple_call (gs: orig_stmt) |
3171 | && gimple_call_internal_p (gs: orig_stmt) |
3172 | && gimple_call_lhs (gs: orig_stmt) |
3173 | && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME |
3174 | && single_imm_use (var: gimple_call_lhs (gs: orig_stmt), use_p: &use_p, stmt: &neg_stmt) |
3175 | && is_gimple_assign (gs: neg_stmt) |
3176 | && gimple_assign_rhs_code (gs: neg_stmt) == NEGATE_EXPR |
3177 | && !stmt_could_throw_p (cfun, neg_stmt)) |
3178 | { |
3179 | gsi = gsi_for_stmt (neg_stmt); |
3180 | if (fold_stmt (&gsi, follow_all_ssa_edges)) |
3181 | { |
3182 | if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (i: gsi))) |
3183 | gcc_unreachable (); |
3184 | update_stmt (s: gsi_stmt (i: gsi)); |
3185 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3186 | { |
3187 | fprintf (stream: dump_file, format: "Folded FMA negation " ); |
3188 | print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE); |
3189 | fprintf (stream: dump_file, format: "\n" ); |
3190 | } |
3191 | } |
3192 | } |
3193 | |
3194 | widen_mul_stats.fmas_inserted++; |
3195 | } |
3196 | } |
3197 | |
3198 | /* Data necessary to perform the actual transformation from a multiplication |
3199 | and an addition to an FMA after decision is taken it should be done and to |
3200 | then delete the multiplication statement from the function IL. */ |
3201 | |
3202 | struct fma_transformation_info |
3203 | { |
3204 | gimple *mul_stmt; |
3205 | tree mul_result; |
3206 | tree op1; |
3207 | tree op2; |
3208 | }; |
3209 | |
3210 | /* Structure containing the current state of FMA deferring, i.e. whether we are |
3211 | deferring, whether to continue deferring, and all data necessary to come |
3212 | back and perform all deferred transformations. */ |
3213 | |
3214 | class fma_deferring_state |
3215 | { |
3216 | public: |
3217 | /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually |
3218 | do any deferring. */ |
3219 | |
3220 | fma_deferring_state (bool perform_deferring) |
3221 | : m_candidates (), m_mul_result_set (), m_initial_phi (NULL), |
3222 | m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {} |
3223 | |
3224 | /* List of FMA candidates for which we the transformation has been determined |
3225 | possible but we at this point in BB analysis we do not consider them |
3226 | beneficial. */ |
3227 | auto_vec<fma_transformation_info, 8> m_candidates; |
3228 | |
3229 | /* Set of results of multiplication that are part of an already deferred FMA |
3230 | candidates. */ |
3231 | hash_set<tree> m_mul_result_set; |
3232 | |
3233 | /* The PHI that supposedly feeds back result of a FMA to another over loop |
3234 | boundary. */ |
3235 | gphi *m_initial_phi; |
3236 | |
3237 | /* Result of the last produced FMA candidate or NULL if there has not been |
3238 | one. */ |
3239 | tree m_last_result; |
3240 | |
3241 | /* If true, deferring might still be profitable. If false, transform all |
3242 | candidates and no longer defer. */ |
3243 | bool m_deferring_p; |
3244 | }; |
3245 | |
3246 | /* Transform all deferred FMA candidates and mark STATE as no longer |
3247 | deferring. */ |
3248 | |
3249 | static void |
3250 | cancel_fma_deferring (fma_deferring_state *state) |
3251 | { |
3252 | if (!state->m_deferring_p) |
3253 | return; |
3254 | |
3255 | for (unsigned i = 0; i < state->m_candidates.length (); i++) |
3256 | { |
3257 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3258 | fprintf (stream: dump_file, format: "Generating deferred FMA\n" ); |
3259 | |
3260 | const fma_transformation_info &fti = state->m_candidates[i]; |
3261 | convert_mult_to_fma_1 (mul_result: fti.mul_result, op1: fti.op1, op2: fti.op2); |
3262 | |
3263 | gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt); |
3264 | gsi_remove (&gsi, true); |
3265 | release_defs (fti.mul_stmt); |
3266 | } |
3267 | state->m_deferring_p = false; |
3268 | } |
3269 | |
3270 | /* If OP is an SSA name defined by a PHI node, return the PHI statement. |
3271 | Otherwise return NULL. */ |
3272 | |
3273 | static gphi * |
3274 | result_of_phi (tree op) |
3275 | { |
3276 | if (TREE_CODE (op) != SSA_NAME) |
3277 | return NULL; |
3278 | |
3279 | return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op)); |
3280 | } |
3281 | |
3282 | /* After processing statements of a BB and recording STATE, return true if the |
3283 | initial phi is fed by the last FMA candidate result ore one such result from |
3284 | previously processed BBs marked in LAST_RESULT_SET. */ |
3285 | |
3286 | static bool |
3287 | last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, |
3288 | hash_set<tree> *last_result_set) |
3289 | { |
3290 | ssa_op_iter iter; |
3291 | use_operand_p use; |
3292 | FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE) |
3293 | { |
3294 | tree t = USE_FROM_PTR (use); |
3295 | if (t == state->m_last_result |
3296 | || last_result_set->contains (k: t)) |
3297 | return true; |
3298 | } |
3299 | |
3300 | return false; |
3301 | } |
3302 | |
3303 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
3304 | with uses in additions and subtractions to form fused multiply-add |
3305 | operations. Returns true if successful and MUL_STMT should be removed. |
3306 | If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional |
3307 | on MUL_COND, otherwise it is unconditional. |
3308 | |
3309 | If STATE indicates that we are deferring FMA transformation, that means |
3310 | that we do not produce FMAs for basic blocks which look like: |
3311 | |
3312 | <bb 6> |
3313 | # accumulator_111 = PHI <0.0(5), accumulator_66(6)> |
3314 | _65 = _14 * _16; |
3315 | accumulator_66 = _65 + accumulator_111; |
3316 | |
3317 | or its unrolled version, i.e. with several FMA candidates that feed result |
3318 | of one into the addend of another. Instead, we add them to a list in STATE |
3319 | and if we later discover an FMA candidate that is not part of such a chain, |
3320 | we go back and perform all deferred past candidates. */ |
3321 | |
3322 | static bool |
3323 | convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, |
3324 | fma_deferring_state *state, tree mul_cond = NULL_TREE, |
3325 | tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE) |
3326 | { |
3327 | tree mul_result = gimple_get_lhs (mul_stmt); |
3328 | /* If there isn't a LHS then this can't be an FMA. There can be no LHS |
3329 | if the statement was left just for the side-effects. */ |
3330 | if (!mul_result) |
3331 | return false; |
3332 | tree type = TREE_TYPE (mul_result); |
3333 | gimple *use_stmt, *neguse_stmt; |
3334 | use_operand_p use_p; |
3335 | imm_use_iterator imm_iter; |
3336 | |
3337 | if (FLOAT_TYPE_P (type) |
3338 | && flag_fp_contract_mode != FP_CONTRACT_FAST) |
3339 | return false; |
3340 | |
3341 | /* We don't want to do bitfield reduction ops. */ |
3342 | if (INTEGRAL_TYPE_P (type) |
3343 | && (!type_has_mode_precision_p (t: type) || TYPE_OVERFLOW_TRAPS (type))) |
3344 | return false; |
3345 | |
3346 | /* If the target doesn't support it, don't generate it. We assume that |
3347 | if fma isn't available then fms, fnma or fnms are not either. */ |
3348 | optimization_type opt_type = bb_optimization_type (gimple_bb (g: mul_stmt)); |
3349 | if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type)) |
3350 | return false; |
3351 | |
3352 | /* If the multiplication has zero uses, it is kept around probably because |
3353 | of -fnon-call-exceptions. Don't optimize it away in that case, |
3354 | it is DCE job. */ |
3355 | if (has_zero_uses (var: mul_result)) |
3356 | return false; |
3357 | |
3358 | bool check_defer |
3359 | = (state->m_deferring_p |
3360 | && maybe_le (a: tree_to_poly_int64 (TYPE_SIZE (type)), |
3361 | param_avoid_fma_max_bits)); |
3362 | bool defer = check_defer; |
3363 | bool seen_negate_p = false; |
3364 | |
3365 | /* There is no numerical difference between fused and unfused integer FMAs, |
3366 | and the assumption below that FMA is as cheap as addition is unlikely |
3367 | to be true, especially if the multiplication occurs multiple times on |
3368 | the same chain. E.g., for something like: |
3369 | |
3370 | (((a * b) + c) >> 1) + (a * b) |
3371 | |
3372 | we do not want to duplicate the a * b into two additions, not least |
3373 | because the result is not a natural FMA chain. */ |
3374 | if (ANY_INTEGRAL_TYPE_P (type) |
3375 | && !has_single_use (var: mul_result)) |
3376 | return false; |
3377 | |
3378 | if (!dbg_cnt (index: form_fma)) |
3379 | return false; |
3380 | |
3381 | /* Make sure that the multiplication statement becomes dead after |
3382 | the transformation, thus that all uses are transformed to FMAs. |
3383 | This means we assume that an FMA operation has the same cost |
3384 | as an addition. */ |
3385 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) |
3386 | { |
3387 | tree result = mul_result; |
3388 | bool negate_p = false; |
3389 | |
3390 | use_stmt = USE_STMT (use_p); |
3391 | |
3392 | if (is_gimple_debug (gs: use_stmt)) |
3393 | continue; |
3394 | |
3395 | /* For now restrict this operations to single basic blocks. In theory |
3396 | we would want to support sinking the multiplication in |
3397 | m = a*b; |
3398 | if () |
3399 | ma = m + c; |
3400 | else |
3401 | d = m; |
3402 | to form a fma in the then block and sink the multiplication to the |
3403 | else block. */ |
3404 | if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt)) |
3405 | return false; |
3406 | |
3407 | /* A negate on the multiplication leads to FNMA. */ |
3408 | if (is_gimple_assign (gs: use_stmt) |
3409 | && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR) |
3410 | { |
3411 | ssa_op_iter iter; |
3412 | use_operand_p usep; |
3413 | |
3414 | /* If (due to earlier missed optimizations) we have two |
3415 | negates of the same value, treat them as equivalent |
3416 | to a single negate with multiple uses. */ |
3417 | if (seen_negate_p) |
3418 | return false; |
3419 | |
3420 | result = gimple_assign_lhs (gs: use_stmt); |
3421 | |
3422 | /* Make sure the negate statement becomes dead with this |
3423 | single transformation. */ |
3424 | if (!single_imm_use (var: gimple_assign_lhs (gs: use_stmt), |
3425 | use_p: &use_p, stmt: &neguse_stmt)) |
3426 | return false; |
3427 | |
3428 | /* Make sure the multiplication isn't also used on that stmt. */ |
3429 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |
3430 | if (USE_FROM_PTR (usep) == mul_result) |
3431 | return false; |
3432 | |
3433 | /* Re-validate. */ |
3434 | use_stmt = neguse_stmt; |
3435 | if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt)) |
3436 | return false; |
3437 | |
3438 | negate_p = seen_negate_p = true; |
3439 | } |
3440 | |
3441 | tree cond, else_value, ops[3], len, bias; |
3442 | tree_code code; |
3443 | if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops, |
3444 | &else_value, &len, &bias)) |
3445 | return false; |
3446 | |
3447 | switch (code) |
3448 | { |
3449 | case MINUS_EXPR: |
3450 | if (ops[1] == result) |
3451 | negate_p = !negate_p; |
3452 | break; |
3453 | case PLUS_EXPR: |
3454 | break; |
3455 | default: |
3456 | /* FMA can only be formed from PLUS and MINUS. */ |
3457 | return false; |
3458 | } |
3459 | |
3460 | if (len) |
3461 | { |
3462 | /* For COND_LEN_* operations, we may have dummpy mask which is |
3463 | the all true mask. Such TREE type may be mul_cond != cond |
3464 | but we still consider they are equal. */ |
3465 | if (mul_cond && cond != mul_cond |
3466 | && !(integer_truep (mul_cond) && integer_truep (cond))) |
3467 | return false; |
3468 | |
3469 | if (else_value == result) |
3470 | return false; |
3471 | |
3472 | if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type, |
3473 | opt_type)) |
3474 | return false; |
3475 | |
3476 | if (mul_len) |
3477 | { |
3478 | poly_int64 mul_value, value; |
3479 | if (poly_int_tree_p (t: mul_len, value: &mul_value) |
3480 | && poly_int_tree_p (t: len, value: &value) |
3481 | && maybe_ne (a: mul_value, b: value)) |
3482 | return false; |
3483 | else if (mul_len != len) |
3484 | return false; |
3485 | |
3486 | if (wi::to_widest (t: mul_bias) != wi::to_widest (t: bias)) |
3487 | return false; |
3488 | } |
3489 | } |
3490 | else |
3491 | { |
3492 | if (mul_cond && cond != mul_cond) |
3493 | return false; |
3494 | |
3495 | if (cond) |
3496 | { |
3497 | if (cond == result || else_value == result) |
3498 | return false; |
3499 | if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, |
3500 | opt_type)) |
3501 | return false; |
3502 | } |
3503 | } |
3504 | |
3505 | /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that |
3506 | we'll visit later, we might be able to get a more profitable |
3507 | match with fnma. |
3508 | OTOH, if we don't, a negate / fma pair has likely lower latency |
3509 | that a mult / subtract pair. */ |
3510 | if (code == MINUS_EXPR |
3511 | && !negate_p |
3512 | && ops[0] == result |
3513 | && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type) |
3514 | && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type) |
3515 | && TREE_CODE (ops[1]) == SSA_NAME |
3516 | && has_single_use (var: ops[1])) |
3517 | { |
3518 | gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]); |
3519 | if (is_gimple_assign (gs: stmt2) |
3520 | && gimple_assign_rhs_code (gs: stmt2) == MULT_EXPR) |
3521 | return false; |
3522 | } |
3523 | |
3524 | /* We can't handle a * b + a * b. */ |
3525 | if (ops[0] == ops[1]) |
3526 | return false; |
3527 | /* If deferring, make sure we are not looking at an instruction that |
3528 | wouldn't have existed if we were not. */ |
3529 | if (state->m_deferring_p |
3530 | && (state->m_mul_result_set.contains (k: ops[0]) |
3531 | || state->m_mul_result_set.contains (k: ops[1]))) |
3532 | return false; |
3533 | |
3534 | if (check_defer) |
3535 | { |
3536 | tree use_lhs = gimple_get_lhs (use_stmt); |
3537 | if (state->m_last_result) |
3538 | { |
3539 | if (ops[1] == state->m_last_result |
3540 | || ops[0] == state->m_last_result) |
3541 | defer = true; |
3542 | else |
3543 | defer = false; |
3544 | } |
3545 | else |
3546 | { |
3547 | gcc_checking_assert (!state->m_initial_phi); |
3548 | gphi *phi; |
3549 | if (ops[0] == result) |
3550 | phi = result_of_phi (op: ops[1]); |
3551 | else |
3552 | { |
3553 | gcc_assert (ops[1] == result); |
3554 | phi = result_of_phi (op: ops[0]); |
3555 | } |
3556 | |
3557 | if (phi) |
3558 | { |
3559 | state->m_initial_phi = phi; |
3560 | defer = true; |
3561 | } |
3562 | else |
3563 | defer = false; |
3564 | } |
3565 | |
3566 | state->m_last_result = use_lhs; |
3567 | check_defer = false; |
3568 | } |
3569 | else |
3570 | defer = false; |
3571 | |
3572 | /* While it is possible to validate whether or not the exact form that |
3573 | we've recognized is available in the backend, the assumption is that |
3574 | if the deferring logic above did not trigger, the transformation is |
3575 | never a loss. For instance, suppose the target only has the plain FMA |
3576 | pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged |
3577 | MUL+SUB for FMA+NEG, which is still two operations. Consider |
3578 | -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA |
3579 | form the two NEGs are independent and could be run in parallel. */ |
3580 | } |
3581 | |
3582 | if (defer) |
3583 | { |
3584 | fma_transformation_info fti; |
3585 | fti.mul_stmt = mul_stmt; |
3586 | fti.mul_result = mul_result; |
3587 | fti.op1 = op1; |
3588 | fti.op2 = op2; |
3589 | state->m_candidates.safe_push (obj: fti); |
3590 | state->m_mul_result_set.add (k: mul_result); |
3591 | |
3592 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3593 | { |
3594 | fprintf (stream: dump_file, format: "Deferred generating FMA for multiplication " ); |
3595 | print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE); |
3596 | fprintf (stream: dump_file, format: "\n" ); |
3597 | } |
3598 | |
3599 | return false; |
3600 | } |
3601 | else |
3602 | { |
3603 | if (state->m_deferring_p) |
3604 | cancel_fma_deferring (state); |
3605 | convert_mult_to_fma_1 (mul_result, op1, op2); |
3606 | return true; |
3607 | } |
3608 | } |
3609 | |
3610 | |
3611 | /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have |
3612 | a check for non-zero like: |
3613 | _1 = x_4(D) * y_5(D); |
3614 | *res_7(D) = _1; |
3615 | if (x_4(D) != 0) |
3616 | goto <bb 3>; [50.00%] |
3617 | else |
3618 | goto <bb 4>; [50.00%] |
3619 | |
3620 | <bb 3> [local count: 536870913]: |
3621 | _2 = _1 / x_4(D); |
3622 | _9 = _2 != y_5(D); |
3623 | _10 = (int) _9; |
3624 | |
3625 | <bb 4> [local count: 1073741824]: |
3626 | # iftmp.0_3 = PHI <_10(3), 0(2)> |
3627 | then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also |
3628 | optimize the x_4(D) != 0 condition to 1. */ |
3629 | |
3630 | static void |
3631 | maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt, |
3632 | gimple *div_stmt, bool *cfg_changed) |
3633 | { |
3634 | basic_block bb = gimple_bb (g: cond_stmt); |
3635 | if (gimple_bb (g: div_stmt) != bb || !single_pred_p (bb)) |
3636 | return; |
3637 | edge pred_edge = single_pred_edge (bb); |
3638 | basic_block pred_bb = pred_edge->src; |
3639 | if (EDGE_COUNT (pred_bb->succs) != 2) |
3640 | return; |
3641 | edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge); |
3642 | edge other_succ_edge = NULL; |
3643 | if (gimple_code (g: cond_stmt) == GIMPLE_COND) |
3644 | { |
3645 | if (EDGE_COUNT (bb->succs) != 2) |
3646 | return; |
3647 | other_succ_edge = EDGE_SUCC (bb, 0); |
3648 | if (gimple_cond_code (gs: cond_stmt) == NE_EXPR) |
3649 | { |
3650 | if (other_succ_edge->flags & EDGE_TRUE_VALUE) |
3651 | other_succ_edge = EDGE_SUCC (bb, 1); |
3652 | } |
3653 | else if (other_succ_edge->flags & EDGE_FALSE_VALUE) |
3654 | other_succ_edge = EDGE_SUCC (bb, 0); |
3655 | if (other_edge->dest != other_succ_edge->dest) |
3656 | return; |
3657 | } |
3658 | else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb)) |
3659 | return; |
3660 | gcond *zero_cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: pred_bb)); |
3661 | if (zero_cond == NULL |
3662 | || (gimple_cond_code (gs: zero_cond) |
3663 | != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR)) |
3664 | || !integer_zerop (gimple_cond_rhs (gs: zero_cond))) |
3665 | return; |
3666 | tree zero_cond_lhs = gimple_cond_lhs (gs: zero_cond); |
3667 | if (TREE_CODE (zero_cond_lhs) != SSA_NAME) |
3668 | return; |
3669 | if (gimple_assign_rhs2 (gs: div_stmt) != zero_cond_lhs) |
3670 | { |
3671 | /* Allow the divisor to be result of a same precision cast |
3672 | from zero_cond_lhs. */ |
3673 | tree rhs2 = gimple_assign_rhs2 (gs: div_stmt); |
3674 | if (TREE_CODE (rhs2) != SSA_NAME) |
3675 | return; |
3676 | gimple *g = SSA_NAME_DEF_STMT (rhs2); |
3677 | if (!gimple_assign_cast_p (s: g) |
3678 | || gimple_assign_rhs1 (gs: g) != gimple_cond_lhs (gs: zero_cond) |
3679 | || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs)) |
3680 | || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs)) |
3681 | != TYPE_PRECISION (TREE_TYPE (rhs2)))) |
3682 | return; |
3683 | } |
3684 | gimple_stmt_iterator gsi = gsi_after_labels (bb); |
3685 | mul_stmts.quick_push (obj: div_stmt); |
3686 | if (is_gimple_debug (gs: gsi_stmt (i: gsi))) |
3687 | gsi_next_nondebug (i: &gsi); |
3688 | unsigned cast_count = 0; |
3689 | while (gsi_stmt (i: gsi) != cond_stmt) |
3690 | { |
3691 | /* If original mul_stmt has a single use, allow it in the same bb, |
3692 | we are looking then just at __builtin_mul_overflow_p. |
3693 | Though, in that case the original mul_stmt will be replaced |
3694 | by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */ |
3695 | gimple *mul_stmt; |
3696 | unsigned int i; |
3697 | bool ok = false; |
3698 | FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt) |
3699 | { |
3700 | if (gsi_stmt (i: gsi) == mul_stmt) |
3701 | { |
3702 | ok = true; |
3703 | break; |
3704 | } |
3705 | } |
3706 | if (!ok && gimple_assign_cast_p (s: gsi_stmt (i: gsi)) && ++cast_count < 4) |
3707 | ok = true; |
3708 | if (!ok) |
3709 | return; |
3710 | gsi_next_nondebug (i: &gsi); |
3711 | } |
3712 | if (gimple_code (g: cond_stmt) == GIMPLE_COND) |
3713 | { |
3714 | basic_block succ_bb = other_edge->dest; |
3715 | for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (i: gpi); |
3716 | gsi_next (i: &gpi)) |
3717 | { |
3718 | gphi *phi = gpi.phi (); |
3719 | tree v1 = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx); |
3720 | tree v2 = gimple_phi_arg_def (gs: phi, index: other_succ_edge->dest_idx); |
3721 | if (!operand_equal_p (v1, v2, flags: 0)) |
3722 | return; |
3723 | } |
3724 | } |
3725 | else |
3726 | { |
3727 | tree lhs = gimple_assign_lhs (gs: cond_stmt); |
3728 | if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))) |
3729 | return; |
3730 | gsi_next_nondebug (i: &gsi); |
3731 | if (!gsi_end_p (i: gsi)) |
3732 | { |
3733 | if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR) |
3734 | return; |
3735 | gimple *cast_stmt = gsi_stmt (i: gsi); |
3736 | if (!gimple_assign_cast_p (s: cast_stmt)) |
3737 | return; |
3738 | tree new_lhs = gimple_assign_lhs (gs: cast_stmt); |
3739 | gsi_next_nondebug (i: &gsi); |
3740 | if (!gsi_end_p (i: gsi) |
3741 | || !new_lhs |
3742 | || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs)) |
3743 | || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1) |
3744 | return; |
3745 | lhs = new_lhs; |
3746 | } |
3747 | edge succ_edge = single_succ_edge (bb); |
3748 | basic_block succ_bb = succ_edge->dest; |
3749 | gsi = gsi_start_phis (succ_bb); |
3750 | if (gsi_end_p (i: gsi)) |
3751 | return; |
3752 | gphi *phi = as_a <gphi *> (p: gsi_stmt (i: gsi)); |
3753 | gsi_next (i: &gsi); |
3754 | if (!gsi_end_p (i: gsi)) |
3755 | return; |
3756 | if (gimple_phi_arg_def (gs: phi, index: succ_edge->dest_idx) != lhs) |
3757 | return; |
3758 | tree other_val = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx); |
3759 | if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR) |
3760 | { |
3761 | tree cond = gimple_assign_rhs1 (gs: cond_stmt); |
3762 | if (TREE_CODE (cond) == NE_EXPR) |
3763 | { |
3764 | if (!operand_equal_p (other_val, |
3765 | gimple_assign_rhs3 (gs: cond_stmt), flags: 0)) |
3766 | return; |
3767 | } |
3768 | else if (!operand_equal_p (other_val, |
3769 | gimple_assign_rhs2 (gs: cond_stmt), flags: 0)) |
3770 | return; |
3771 | } |
3772 | else if (gimple_assign_rhs_code (gs: cond_stmt) == NE_EXPR) |
3773 | { |
3774 | if (!integer_zerop (other_val)) |
3775 | return; |
3776 | } |
3777 | else if (!integer_onep (other_val)) |
3778 | return; |
3779 | } |
3780 | if (pred_edge->flags & EDGE_TRUE_VALUE) |
3781 | gimple_cond_make_true (gs: zero_cond); |
3782 | else |
3783 | gimple_cond_make_false (gs: zero_cond); |
3784 | update_stmt (s: zero_cond); |
3785 | *cfg_changed = true; |
3786 | } |
3787 | |
3788 | /* Helper function for arith_overflow_check_p. Return true |
3789 | if VAL1 is equal to VAL2 cast to corresponding integral type |
3790 | with other signedness or vice versa. */ |
3791 | |
3792 | static bool |
3793 | arith_cast_equal_p (tree val1, tree val2) |
3794 | { |
3795 | if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST) |
3796 | return wi::eq_p (x: wi::to_wide (t: val1), y: wi::to_wide (t: val2)); |
3797 | else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME) |
3798 | return false; |
3799 | if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1)) |
3800 | && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2) |
3801 | return true; |
3802 | if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2)) |
3803 | && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1) |
3804 | return true; |
3805 | return false; |
3806 | } |
3807 | |
3808 | /* Helper function of match_arith_overflow. Return 1 |
3809 | if USE_STMT is unsigned overflow check ovf != 0 for |
3810 | STMT, -1 if USE_STMT is unsigned overflow check ovf == 0 |
3811 | and 0 otherwise. */ |
3812 | |
3813 | static int |
3814 | arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, |
3815 | tree maxval, tree *other) |
3816 | { |
3817 | enum tree_code ccode = ERROR_MARK; |
3818 | tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; |
3819 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
3820 | tree lhs = gimple_assign_lhs (gs: cast_stmt ? cast_stmt : stmt); |
3821 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
3822 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
3823 | tree multop = NULL_TREE, divlhs = NULL_TREE; |
3824 | gimple *cur_use_stmt = use_stmt; |
3825 | |
3826 | if (code == MULT_EXPR) |
3827 | { |
3828 | if (!is_gimple_assign (gs: use_stmt)) |
3829 | return 0; |
3830 | if (gimple_assign_rhs_code (gs: use_stmt) != TRUNC_DIV_EXPR) |
3831 | return 0; |
3832 | if (gimple_assign_rhs1 (gs: use_stmt) != lhs) |
3833 | return 0; |
3834 | if (cast_stmt) |
3835 | { |
3836 | if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs1)) |
3837 | multop = rhs2; |
3838 | else if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs2)) |
3839 | multop = rhs1; |
3840 | else |
3841 | return 0; |
3842 | } |
3843 | else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1) |
3844 | multop = rhs2; |
3845 | else if (operand_equal_p (gimple_assign_rhs2 (gs: use_stmt), rhs2, flags: 0)) |
3846 | multop = rhs1; |
3847 | else |
3848 | return 0; |
3849 | if (stmt_ends_bb_p (use_stmt)) |
3850 | return 0; |
3851 | divlhs = gimple_assign_lhs (gs: use_stmt); |
3852 | if (!divlhs) |
3853 | return 0; |
3854 | use_operand_p use; |
3855 | if (!single_imm_use (var: divlhs, use_p: &use, stmt: &cur_use_stmt)) |
3856 | return 0; |
3857 | if (cast_stmt && gimple_assign_cast_p (s: cur_use_stmt)) |
3858 | { |
3859 | tree cast_lhs = gimple_assign_lhs (gs: cur_use_stmt); |
3860 | if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs)) |
3861 | && TYPE_UNSIGNED (TREE_TYPE (cast_lhs)) |
3862 | && (TYPE_PRECISION (TREE_TYPE (cast_lhs)) |
3863 | == TYPE_PRECISION (TREE_TYPE (divlhs))) |
3864 | && single_imm_use (var: cast_lhs, use_p: &use, stmt: &cur_use_stmt)) |
3865 | { |
3866 | cast_stmt = NULL; |
3867 | divlhs = cast_lhs; |
3868 | } |
3869 | else |
3870 | return 0; |
3871 | } |
3872 | } |
3873 | if (gimple_code (g: cur_use_stmt) == GIMPLE_COND) |
3874 | { |
3875 | ccode = gimple_cond_code (gs: cur_use_stmt); |
3876 | crhs1 = gimple_cond_lhs (gs: cur_use_stmt); |
3877 | crhs2 = gimple_cond_rhs (gs: cur_use_stmt); |
3878 | } |
3879 | else if (is_gimple_assign (gs: cur_use_stmt)) |
3880 | { |
3881 | if (gimple_assign_rhs_class (gs: cur_use_stmt) == GIMPLE_BINARY_RHS) |
3882 | { |
3883 | ccode = gimple_assign_rhs_code (gs: cur_use_stmt); |
3884 | crhs1 = gimple_assign_rhs1 (gs: cur_use_stmt); |
3885 | crhs2 = gimple_assign_rhs2 (gs: cur_use_stmt); |
3886 | } |
3887 | else if (gimple_assign_rhs_code (gs: cur_use_stmt) == COND_EXPR) |
3888 | { |
3889 | tree cond = gimple_assign_rhs1 (gs: cur_use_stmt); |
3890 | if (COMPARISON_CLASS_P (cond)) |
3891 | { |
3892 | ccode = TREE_CODE (cond); |
3893 | crhs1 = TREE_OPERAND (cond, 0); |
3894 | crhs2 = TREE_OPERAND (cond, 1); |
3895 | } |
3896 | else |
3897 | return 0; |
3898 | } |
3899 | else |
3900 | return 0; |
3901 | } |
3902 | else |
3903 | return 0; |
3904 | |
3905 | if (TREE_CODE_CLASS (ccode) != tcc_comparison) |
3906 | return 0; |
3907 | |
3908 | switch (ccode) |
3909 | { |
3910 | case GT_EXPR: |
3911 | case LE_EXPR: |
3912 | if (maxval) |
3913 | { |
3914 | /* r = a + b; r > maxval or r <= maxval */ |
3915 | if (crhs1 == lhs |
3916 | && TREE_CODE (crhs2) == INTEGER_CST |
3917 | && tree_int_cst_equal (crhs2, maxval)) |
3918 | return ccode == GT_EXPR ? 1 : -1; |
3919 | break; |
3920 | } |
3921 | /* r = a - b; r > a or r <= a |
3922 | r = a + b; a > r or a <= r or b > r or b <= r. */ |
3923 | if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1) |
3924 | || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) |
3925 | && crhs2 == lhs)) |
3926 | return ccode == GT_EXPR ? 1 : -1; |
3927 | /* r = ~a; b > r or b <= r. */ |
3928 | if (code == BIT_NOT_EXPR && crhs2 == lhs) |
3929 | { |
3930 | if (other) |
3931 | *other = crhs1; |
3932 | return ccode == GT_EXPR ? 1 : -1; |
3933 | } |
3934 | break; |
3935 | case LT_EXPR: |
3936 | case GE_EXPR: |
3937 | if (maxval) |
3938 | break; |
3939 | /* r = a - b; a < r or a >= r |
3940 | r = a + b; r < a or r >= a or r < b or r >= b. */ |
3941 | if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs) |
3942 | || (code == PLUS_EXPR && crhs1 == lhs |
3943 | && (crhs2 == rhs1 || crhs2 == rhs2))) |
3944 | return ccode == LT_EXPR ? 1 : -1; |
3945 | /* r = ~a; r < b or r >= b. */ |
3946 | if (code == BIT_NOT_EXPR && crhs1 == lhs) |
3947 | { |
3948 | if (other) |
3949 | *other = crhs2; |
3950 | return ccode == LT_EXPR ? 1 : -1; |
3951 | } |
3952 | break; |
3953 | case EQ_EXPR: |
3954 | case NE_EXPR: |
3955 | /* r = a * b; _1 = r / a; _1 == b |
3956 | r = a * b; _1 = r / b; _1 == a |
3957 | r = a * b; _1 = r / a; _1 != b |
3958 | r = a * b; _1 = r / b; _1 != a. */ |
3959 | if (code == MULT_EXPR) |
3960 | { |
3961 | if (cast_stmt) |
3962 | { |
3963 | if ((crhs1 == divlhs && arith_cast_equal_p (val1: crhs2, val2: multop)) |
3964 | || (crhs2 == divlhs && arith_cast_equal_p (val1: crhs1, val2: multop))) |
3965 | { |
3966 | use_stmt = cur_use_stmt; |
3967 | return ccode == NE_EXPR ? 1 : -1; |
3968 | } |
3969 | } |
3970 | else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, flags: 0)) |
3971 | || (crhs2 == divlhs && crhs1 == multop)) |
3972 | { |
3973 | use_stmt = cur_use_stmt; |
3974 | return ccode == NE_EXPR ? 1 : -1; |
3975 | } |
3976 | } |
3977 | break; |
3978 | default: |
3979 | break; |
3980 | } |
3981 | return 0; |
3982 | } |
3983 | |
3984 | /* Recognize for unsigned x |
3985 | x = y - z; |
3986 | if (x > y) |
3987 | where there are other uses of x and replace it with |
3988 | _7 = .SUB_OVERFLOW (y, z); |
3989 | x = REALPART_EXPR <_7>; |
3990 | _8 = IMAGPART_EXPR <_7>; |
3991 | if (_8) |
3992 | and similarly for addition. |
3993 | |
3994 | Also recognize: |
3995 | yc = (type) y; |
3996 | zc = (type) z; |
3997 | x = yc + zc; |
3998 | if (x > max) |
3999 | where y and z have unsigned types with maximum max |
4000 | and there are other uses of x and all of those cast x |
4001 | back to that unsigned type and again replace it with |
4002 | _7 = .ADD_OVERFLOW (y, z); |
4003 | _9 = REALPART_EXPR <_7>; |
4004 | _8 = IMAGPART_EXPR <_7>; |
4005 | if (_8) |
4006 | and replace (utype) x with _9. |
4007 | |
4008 | Also recognize: |
4009 | x = ~z; |
4010 | if (y > x) |
4011 | and replace it with |
4012 | _7 = .ADD_OVERFLOW (y, z); |
4013 | _8 = IMAGPART_EXPR <_7>; |
4014 | if (_8) |
4015 | |
4016 | And also recognize: |
4017 | z = x * y; |
4018 | if (x != 0) |
4019 | goto <bb 3>; [50.00%] |
4020 | else |
4021 | goto <bb 4>; [50.00%] |
4022 | |
4023 | <bb 3> [local count: 536870913]: |
4024 | _2 = z / x; |
4025 | _9 = _2 != y; |
4026 | _10 = (int) _9; |
4027 | |
4028 | <bb 4> [local count: 1073741824]: |
4029 | # iftmp.0_3 = PHI <_10(3), 0(2)> |
4030 | and replace it with |
4031 | _7 = .MUL_OVERFLOW (x, y); |
4032 | z = IMAGPART_EXPR <_7>; |
4033 | _8 = IMAGPART_EXPR <_7>; |
4034 | _9 = _8 != 0; |
4035 | iftmp.0_3 = (int) _9; */ |
4036 | |
4037 | static bool |
4038 | match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, |
4039 | enum tree_code code, bool *cfg_changed) |
4040 | { |
4041 | tree lhs = gimple_assign_lhs (gs: stmt); |
4042 | tree type = TREE_TYPE (lhs); |
4043 | use_operand_p use_p; |
4044 | imm_use_iterator iter; |
4045 | bool use_seen = false; |
4046 | bool ovf_use_seen = false; |
4047 | gimple *use_stmt; |
4048 | gimple *add_stmt = NULL; |
4049 | bool add_first = false; |
4050 | gimple *cond_stmt = NULL; |
4051 | gimple *cast_stmt = NULL; |
4052 | tree cast_lhs = NULL_TREE; |
4053 | |
4054 | gcc_checking_assert (code == PLUS_EXPR |
4055 | || code == MINUS_EXPR |
4056 | || code == MULT_EXPR |
4057 | || code == BIT_NOT_EXPR); |
4058 | if (!INTEGRAL_TYPE_P (type) |
4059 | || !TYPE_UNSIGNED (type) |
4060 | || has_zero_uses (var: lhs) |
4061 | || (code != PLUS_EXPR |
4062 | && code != MULT_EXPR |
4063 | && optab_handler (op: code == MINUS_EXPR ? usubv4_optab : uaddv4_optab, |
4064 | TYPE_MODE (type)) == CODE_FOR_nothing)) |
4065 | return false; |
4066 | |
4067 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
4068 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
4069 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
4070 | { |
4071 | use_stmt = USE_STMT (use_p); |
4072 | if (is_gimple_debug (gs: use_stmt)) |
4073 | continue; |
4074 | |
4075 | tree other = NULL_TREE; |
4076 | if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, other: &other)) |
4077 | { |
4078 | if (code == BIT_NOT_EXPR) |
4079 | { |
4080 | gcc_assert (other); |
4081 | if (TREE_CODE (other) != SSA_NAME) |
4082 | return false; |
4083 | if (rhs2 == NULL) |
4084 | rhs2 = other; |
4085 | else |
4086 | return false; |
4087 | cond_stmt = use_stmt; |
4088 | } |
4089 | ovf_use_seen = true; |
4090 | } |
4091 | else |
4092 | { |
4093 | use_seen = true; |
4094 | if (code == MULT_EXPR |
4095 | && cast_stmt == NULL |
4096 | && gimple_assign_cast_p (s: use_stmt)) |
4097 | { |
4098 | cast_lhs = gimple_assign_lhs (gs: use_stmt); |
4099 | if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs)) |
4100 | && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs)) |
4101 | && (TYPE_PRECISION (TREE_TYPE (cast_lhs)) |
4102 | == TYPE_PRECISION (TREE_TYPE (lhs)))) |
4103 | cast_stmt = use_stmt; |
4104 | else |
4105 | cast_lhs = NULL_TREE; |
4106 | } |
4107 | } |
4108 | if (ovf_use_seen && use_seen) |
4109 | break; |
4110 | } |
4111 | |
4112 | if (!ovf_use_seen |
4113 | && code == MULT_EXPR |
4114 | && cast_stmt) |
4115 | { |
4116 | if (TREE_CODE (rhs1) != SSA_NAME |
4117 | || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST)) |
4118 | return false; |
4119 | FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs) |
4120 | { |
4121 | use_stmt = USE_STMT (use_p); |
4122 | if (is_gimple_debug (gs: use_stmt)) |
4123 | continue; |
4124 | |
4125 | if (arith_overflow_check_p (stmt, cast_stmt, use_stmt, |
4126 | NULL_TREE, NULL)) |
4127 | ovf_use_seen = true; |
4128 | } |
4129 | } |
4130 | else |
4131 | { |
4132 | cast_stmt = NULL; |
4133 | cast_lhs = NULL_TREE; |
4134 | } |
4135 | |
4136 | tree maxval = NULL_TREE; |
4137 | if (!ovf_use_seen |
4138 | || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen)) |
4139 | || (code == PLUS_EXPR |
4140 | && optab_handler (op: uaddv4_optab, |
4141 | TYPE_MODE (type)) == CODE_FOR_nothing) |
4142 | || (code == MULT_EXPR |
4143 | && optab_handler (op: cast_stmt ? mulv4_optab : umulv4_optab, |
4144 | TYPE_MODE (type)) == CODE_FOR_nothing |
4145 | && (use_seen |
4146 | || cast_stmt |
4147 | || !can_mult_highpart_p (TYPE_MODE (type), true)))) |
4148 | { |
4149 | if (code != PLUS_EXPR) |
4150 | return false; |
4151 | if (TREE_CODE (rhs1) != SSA_NAME |
4152 | || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1))) |
4153 | return false; |
4154 | rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1)); |
4155 | tree type1 = TREE_TYPE (rhs1); |
4156 | if (!INTEGRAL_TYPE_P (type1) |
4157 | || !TYPE_UNSIGNED (type1) |
4158 | || TYPE_PRECISION (type1) >= TYPE_PRECISION (type) |
4159 | || (TYPE_PRECISION (type1) |
4160 | != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1)))) |
4161 | return false; |
4162 | if (TREE_CODE (rhs2) == INTEGER_CST) |
4163 | { |
4164 | if (wi::ne_p (x: wi::rshift (x: wi::to_wide (t: rhs2), |
4165 | TYPE_PRECISION (type1), |
4166 | sgn: UNSIGNED), y: 0)) |
4167 | return false; |
4168 | rhs2 = fold_convert (type1, rhs2); |
4169 | } |
4170 | else |
4171 | { |
4172 | if (TREE_CODE (rhs2) != SSA_NAME |
4173 | || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2))) |
4174 | return false; |
4175 | rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2)); |
4176 | tree type2 = TREE_TYPE (rhs2); |
4177 | if (!INTEGRAL_TYPE_P (type2) |
4178 | || !TYPE_UNSIGNED (type2) |
4179 | || TYPE_PRECISION (type2) >= TYPE_PRECISION (type) |
4180 | || (TYPE_PRECISION (type2) |
4181 | != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2)))) |
4182 | return false; |
4183 | } |
4184 | if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2))) |
4185 | type = type1; |
4186 | else |
4187 | type = TREE_TYPE (rhs2); |
4188 | |
4189 | if (TREE_CODE (type) != INTEGER_TYPE |
4190 | || optab_handler (op: uaddv4_optab, |
4191 | TYPE_MODE (type)) == CODE_FOR_nothing) |
4192 | return false; |
4193 | |
4194 | maxval = wide_int_to_tree (type, cst: wi::max_value (TYPE_PRECISION (type), |
4195 | UNSIGNED)); |
4196 | ovf_use_seen = false; |
4197 | use_seen = false; |
4198 | basic_block use_bb = NULL; |
4199 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
4200 | { |
4201 | use_stmt = USE_STMT (use_p); |
4202 | if (is_gimple_debug (gs: use_stmt)) |
4203 | continue; |
4204 | |
4205 | if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL)) |
4206 | { |
4207 | ovf_use_seen = true; |
4208 | use_bb = gimple_bb (g: use_stmt); |
4209 | } |
4210 | else |
4211 | { |
4212 | if (!gimple_assign_cast_p (s: use_stmt) |
4213 | || gimple_assign_rhs_code (gs: use_stmt) == VIEW_CONVERT_EXPR) |
4214 | return false; |
4215 | tree use_lhs = gimple_assign_lhs (gs: use_stmt); |
4216 | if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs)) |
4217 | || (TYPE_PRECISION (TREE_TYPE (use_lhs)) |
4218 | > TYPE_PRECISION (type))) |
4219 | return false; |
4220 | use_seen = true; |
4221 | } |
4222 | } |
4223 | if (!ovf_use_seen) |
4224 | return false; |
4225 | if (!useless_type_conversion_p (type, TREE_TYPE (rhs1))) |
4226 | { |
4227 | if (!use_seen) |
4228 | return false; |
4229 | tree new_rhs1 = make_ssa_name (var: type); |
4230 | gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1); |
4231 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
4232 | rhs1 = new_rhs1; |
4233 | } |
4234 | else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2))) |
4235 | { |
4236 | if (!use_seen) |
4237 | return false; |
4238 | tree new_rhs2 = make_ssa_name (var: type); |
4239 | gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2); |
4240 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
4241 | rhs2 = new_rhs2; |
4242 | } |
4243 | else if (!use_seen) |
4244 | { |
4245 | /* If there are no uses of the wider addition, check if |
4246 | forwprop has not created a narrower addition. |
4247 | Require it to be in the same bb as the overflow check. */ |
4248 | FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1) |
4249 | { |
4250 | use_stmt = USE_STMT (use_p); |
4251 | if (is_gimple_debug (gs: use_stmt)) |
4252 | continue; |
4253 | |
4254 | if (use_stmt == stmt) |
4255 | continue; |
4256 | |
4257 | if (!is_gimple_assign (gs: use_stmt) |
4258 | || gimple_bb (g: use_stmt) != use_bb |
4259 | || gimple_assign_rhs_code (gs: use_stmt) != PLUS_EXPR) |
4260 | continue; |
4261 | |
4262 | if (gimple_assign_rhs1 (gs: use_stmt) == rhs1) |
4263 | { |
4264 | if (!operand_equal_p (gimple_assign_rhs2 (gs: use_stmt), |
4265 | rhs2, flags: 0)) |
4266 | continue; |
4267 | } |
4268 | else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1) |
4269 | { |
4270 | if (gimple_assign_rhs1 (gs: use_stmt) != rhs2) |
4271 | continue; |
4272 | } |
4273 | else |
4274 | continue; |
4275 | |
4276 | add_stmt = use_stmt; |
4277 | break; |
4278 | } |
4279 | if (add_stmt == NULL) |
4280 | return false; |
4281 | |
4282 | /* If stmt and add_stmt are in the same bb, we need to find out |
4283 | which one is earlier. If they are in different bbs, we've |
4284 | checked add_stmt is in the same bb as one of the uses of the |
4285 | stmt lhs, so stmt needs to dominate add_stmt too. */ |
4286 | if (gimple_bb (g: stmt) == gimple_bb (g: add_stmt)) |
4287 | { |
4288 | gimple_stmt_iterator gsif = *gsi; |
4289 | gimple_stmt_iterator gsib = *gsi; |
4290 | int i; |
4291 | /* Search both forward and backward from stmt and have a small |
4292 | upper bound. */ |
4293 | for (i = 0; i < 128; i++) |
4294 | { |
4295 | if (!gsi_end_p (i: gsib)) |
4296 | { |
4297 | gsi_prev_nondebug (i: &gsib); |
4298 | if (gsi_stmt (i: gsib) == add_stmt) |
4299 | { |
4300 | add_first = true; |
4301 | break; |
4302 | } |
4303 | } |
4304 | else if (gsi_end_p (i: gsif)) |
4305 | break; |
4306 | if (!gsi_end_p (i: gsif)) |
4307 | { |
4308 | gsi_next_nondebug (i: &gsif); |
4309 | if (gsi_stmt (i: gsif) == add_stmt) |
4310 | break; |
4311 | } |
4312 | } |
4313 | if (i == 128) |
4314 | return false; |
4315 | if (add_first) |
4316 | *gsi = gsi_for_stmt (add_stmt); |
4317 | } |
4318 | } |
4319 | } |
4320 | |
4321 | if (code == BIT_NOT_EXPR) |
4322 | *gsi = gsi_for_stmt (cond_stmt); |
4323 | |
4324 | auto_vec<gimple *, 8> mul_stmts; |
4325 | if (code == MULT_EXPR && cast_stmt) |
4326 | { |
4327 | type = TREE_TYPE (cast_lhs); |
4328 | gimple *g = SSA_NAME_DEF_STMT (rhs1); |
4329 | if (gimple_assign_cast_p (s: g) |
4330 | && useless_type_conversion_p (type, |
4331 | TREE_TYPE (gimple_assign_rhs1 (g))) |
4332 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) |
4333 | rhs1 = gimple_assign_rhs1 (gs: g); |
4334 | else |
4335 | { |
4336 | g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs1); |
4337 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
4338 | rhs1 = gimple_assign_lhs (gs: g); |
4339 | mul_stmts.quick_push (obj: g); |
4340 | } |
4341 | if (TREE_CODE (rhs2) == INTEGER_CST) |
4342 | rhs2 = fold_convert (type, rhs2); |
4343 | else |
4344 | { |
4345 | g = SSA_NAME_DEF_STMT (rhs2); |
4346 | if (gimple_assign_cast_p (s: g) |
4347 | && useless_type_conversion_p (type, |
4348 | TREE_TYPE (gimple_assign_rhs1 (g))) |
4349 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) |
4350 | rhs2 = gimple_assign_rhs1 (gs: g); |
4351 | else |
4352 | { |
4353 | g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs2); |
4354 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
4355 | rhs2 = gimple_assign_lhs (gs: g); |
4356 | mul_stmts.quick_push (obj: g); |
4357 | } |
4358 | } |
4359 | } |
4360 | tree ctype = build_complex_type (type); |
4361 | gcall *g = gimple_build_call_internal (code == MULT_EXPR |
4362 | ? IFN_MUL_OVERFLOW |
4363 | : code != MINUS_EXPR |
4364 | ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, |
4365 | 2, rhs1, rhs2); |
4366 | tree ctmp = make_ssa_name (var: ctype); |
4367 | gimple_call_set_lhs (gs: g, lhs: ctmp); |
4368 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
4369 | tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (var: type) : lhs; |
4370 | gassign *g2; |
4371 | if (code != BIT_NOT_EXPR) |
4372 | { |
4373 | g2 = gimple_build_assign (new_lhs, REALPART_EXPR, |
4374 | build1 (REALPART_EXPR, type, ctmp)); |
4375 | if (maxval || cast_stmt) |
4376 | { |
4377 | gsi_insert_before (gsi, g2, GSI_SAME_STMT); |
4378 | if (add_first) |
4379 | *gsi = gsi_for_stmt (stmt); |
4380 | } |
4381 | else |
4382 | gsi_replace (gsi, g2, true); |
4383 | if (code == MULT_EXPR) |
4384 | { |
4385 | mul_stmts.quick_push (obj: g); |
4386 | mul_stmts.quick_push (obj: g2); |
4387 | if (cast_stmt) |
4388 | { |
4389 | g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs); |
4390 | gsi_replace (gsi, g2, true); |
4391 | mul_stmts.quick_push (obj: g2); |
4392 | } |
4393 | } |
4394 | } |
4395 | tree ovf = make_ssa_name (var: type); |
4396 | g2 = gimple_build_assign (ovf, IMAGPART_EXPR, |
4397 | build1 (IMAGPART_EXPR, type, ctmp)); |
4398 | if (code != BIT_NOT_EXPR) |
4399 | gsi_insert_after (gsi, g2, GSI_NEW_STMT); |
4400 | else |
4401 | gsi_insert_before (gsi, g2, GSI_SAME_STMT); |
4402 | if (code == MULT_EXPR) |
4403 | mul_stmts.quick_push (obj: g2); |
4404 | |
4405 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs) |
4406 | { |
4407 | if (is_gimple_debug (gs: use_stmt)) |
4408 | continue; |
4409 | |
4410 | gimple *orig_use_stmt = use_stmt; |
4411 | int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt, |
4412 | maxval, NULL); |
4413 | if (ovf_use == 0) |
4414 | { |
4415 | gcc_assert (code != BIT_NOT_EXPR); |
4416 | if (maxval) |
4417 | { |
4418 | tree use_lhs = gimple_assign_lhs (gs: use_stmt); |
4419 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: new_lhs); |
4420 | if (useless_type_conversion_p (TREE_TYPE (use_lhs), |
4421 | TREE_TYPE (new_lhs))) |
4422 | gimple_assign_set_rhs_code (s: use_stmt, code: SSA_NAME); |
4423 | update_stmt (s: use_stmt); |
4424 | } |
4425 | continue; |
4426 | } |
4427 | if (gimple_code (g: use_stmt) == GIMPLE_COND) |
4428 | { |
4429 | gcond *cond_stmt = as_a <gcond *> (p: use_stmt); |
4430 | gimple_cond_set_lhs (gs: cond_stmt, lhs: ovf); |
4431 | gimple_cond_set_rhs (gs: cond_stmt, rhs: build_int_cst (type, 0)); |
4432 | gimple_cond_set_code (gs: cond_stmt, code: ovf_use == 1 ? NE_EXPR : EQ_EXPR); |
4433 | } |
4434 | else |
4435 | { |
4436 | gcc_checking_assert (is_gimple_assign (use_stmt)); |
4437 | if (gimple_assign_rhs_class (gs: use_stmt) == GIMPLE_BINARY_RHS) |
4438 | { |
4439 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf); |
4440 | gimple_assign_set_rhs2 (gs: use_stmt, rhs: build_int_cst (type, 0)); |
4441 | gimple_assign_set_rhs_code (s: use_stmt, |
4442 | code: ovf_use == 1 ? NE_EXPR : EQ_EXPR); |
4443 | } |
4444 | else |
4445 | { |
4446 | gcc_checking_assert (gimple_assign_rhs_code (use_stmt) |
4447 | == COND_EXPR); |
4448 | tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, |
4449 | boolean_type_node, ovf, |
4450 | build_int_cst (type, 0)); |
4451 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: cond); |
4452 | } |
4453 | } |
4454 | update_stmt (s: use_stmt); |
4455 | if (code == MULT_EXPR && use_stmt != orig_use_stmt) |
4456 | { |
4457 | gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt); |
4458 | maybe_optimize_guarding_check (mul_stmts, cond_stmt: use_stmt, div_stmt: orig_use_stmt, |
4459 | cfg_changed); |
4460 | use_operand_p use; |
4461 | gimple *cast_stmt; |
4462 | if (single_imm_use (var: gimple_assign_lhs (gs: orig_use_stmt), use_p: &use, |
4463 | stmt: &cast_stmt) |
4464 | && gimple_assign_cast_p (s: cast_stmt)) |
4465 | { |
4466 | gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt); |
4467 | gsi_remove (&gsi3, true); |
4468 | release_ssa_name (name: gimple_assign_lhs (gs: cast_stmt)); |
4469 | } |
4470 | gsi_remove (&gsi2, true); |
4471 | release_ssa_name (name: gimple_assign_lhs (gs: orig_use_stmt)); |
4472 | } |
4473 | } |
4474 | if (maxval) |
4475 | { |
4476 | gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); |
4477 | gsi_remove (&gsi2, true); |
4478 | if (add_stmt) |
4479 | { |
4480 | gimple *g = gimple_build_assign (gimple_assign_lhs (gs: add_stmt), |
4481 | new_lhs); |
4482 | gsi2 = gsi_for_stmt (add_stmt); |
4483 | gsi_replace (&gsi2, g, true); |
4484 | } |
4485 | } |
4486 | else if (code == BIT_NOT_EXPR) |
4487 | { |
4488 | *gsi = gsi_for_stmt (stmt); |
4489 | gsi_remove (gsi, true); |
4490 | release_ssa_name (name: lhs); |
4491 | return true; |
4492 | } |
4493 | return false; |
4494 | } |
4495 | |
4496 | /* Helper of match_uaddc_usubc. Look through an integral cast |
4497 | which should preserve [0, 1] range value (unless source has |
4498 | 1-bit signed type) and the cast has single use. */ |
4499 | |
4500 | static gimple * |
4501 | uaddc_cast (gimple *g) |
4502 | { |
4503 | if (!gimple_assign_cast_p (s: g)) |
4504 | return g; |
4505 | tree op = gimple_assign_rhs1 (gs: g); |
4506 | if (TREE_CODE (op) == SSA_NAME |
4507 | && INTEGRAL_TYPE_P (TREE_TYPE (op)) |
4508 | && (TYPE_PRECISION (TREE_TYPE (op)) > 1 |
4509 | || TYPE_UNSIGNED (TREE_TYPE (op))) |
4510 | && has_single_use (var: gimple_assign_lhs (gs: g))) |
4511 | return SSA_NAME_DEF_STMT (op); |
4512 | return g; |
4513 | } |
4514 | |
4515 | /* Helper of match_uaddc_usubc. Look through a NE_EXPR |
4516 | comparison with 0 which also preserves [0, 1] value range. */ |
4517 | |
4518 | static gimple * |
4519 | uaddc_ne0 (gimple *g) |
4520 | { |
4521 | if (is_gimple_assign (gs: g) |
4522 | && gimple_assign_rhs_code (gs: g) == NE_EXPR |
4523 | && integer_zerop (gimple_assign_rhs2 (gs: g)) |
4524 | && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME |
4525 | && has_single_use (var: gimple_assign_lhs (gs: g))) |
4526 | return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g)); |
4527 | return g; |
4528 | } |
4529 | |
4530 | /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME |
4531 | operand. */ |
4532 | |
4533 | static bool |
4534 | uaddc_is_cplxpart (gimple *g, tree_code part) |
4535 | { |
4536 | return (is_gimple_assign (gs: g) |
4537 | && gimple_assign_rhs_code (gs: g) == part |
4538 | && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME); |
4539 | } |
4540 | |
4541 | /* Try to match e.g. |
4542 | _29 = .ADD_OVERFLOW (_3, _4); |
4543 | _30 = REALPART_EXPR <_29>; |
4544 | _31 = IMAGPART_EXPR <_29>; |
4545 | _32 = .ADD_OVERFLOW (_30, _38); |
4546 | _33 = REALPART_EXPR <_32>; |
4547 | _34 = IMAGPART_EXPR <_32>; |
4548 | _35 = _31 + _34; |
4549 | as |
4550 | _36 = .UADDC (_3, _4, _38); |
4551 | _33 = REALPART_EXPR <_36>; |
4552 | _35 = IMAGPART_EXPR <_36>; |
4553 | or |
4554 | _22 = .SUB_OVERFLOW (_6, _5); |
4555 | _23 = REALPART_EXPR <_22>; |
4556 | _24 = IMAGPART_EXPR <_22>; |
4557 | _25 = .SUB_OVERFLOW (_23, _37); |
4558 | _26 = REALPART_EXPR <_25>; |
4559 | _27 = IMAGPART_EXPR <_25>; |
4560 | _28 = _24 | _27; |
4561 | as |
4562 | _29 = .USUBC (_6, _5, _37); |
4563 | _26 = REALPART_EXPR <_29>; |
4564 | _288 = IMAGPART_EXPR <_29>; |
4565 | provided _38 or _37 above have [0, 1] range |
4566 | and _3, _4 and _30 or _6, _5 and _23 are unsigned |
4567 | integral types with the same precision. Whether + or | or ^ is |
4568 | used on the IMAGPART_EXPR results doesn't matter, with one of |
4569 | added or subtracted operands in [0, 1] range at most one |
4570 | .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */ |
4571 | |
4572 | static bool |
4573 | match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code) |
4574 | { |
4575 | tree rhs[4]; |
4576 | rhs[0] = gimple_assign_rhs1 (gs: stmt); |
4577 | rhs[1] = gimple_assign_rhs2 (gs: stmt); |
4578 | rhs[2] = NULL_TREE; |
4579 | rhs[3] = NULL_TREE; |
4580 | tree type = TREE_TYPE (rhs[0]); |
4581 | if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type)) |
4582 | return false; |
4583 | |
4584 | auto_vec<gimple *, 2> temp_stmts; |
4585 | if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR) |
4586 | { |
4587 | /* If overflow flag is ignored on the MSB limb, we can end up with |
4588 | the most significant limb handled as r = op1 + op2 + ovf1 + ovf2; |
4589 | or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions |
4590 | thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize |
4591 | the limb below the MSB, but also create another .UADDC/.USUBC call |
4592 | for the last limb. |
4593 | |
4594 | First look through assignments with the same rhs code as CODE, |
4595 | with the exception that subtraction of a constant is canonicalized |
4596 | into addition of its negation. rhs[0] will be minuend for |
4597 | subtractions and one of addends for addition, all other assigned |
4598 | rhs[i] operands will be subtrahends or other addends. */ |
4599 | while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3]) |
4600 | { |
4601 | gimple *g = SSA_NAME_DEF_STMT (rhs[0]); |
4602 | if (has_single_use (var: rhs[0]) |
4603 | && is_gimple_assign (gs: g) |
4604 | && (gimple_assign_rhs_code (gs: g) == code |
4605 | || (code == MINUS_EXPR |
4606 | && gimple_assign_rhs_code (gs: g) == PLUS_EXPR |
4607 | && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST))) |
4608 | { |
4609 | tree r2 = gimple_assign_rhs2 (gs: g); |
4610 | if (gimple_assign_rhs_code (gs: g) != code) |
4611 | { |
4612 | r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2); |
4613 | if (!r2) |
4614 | break; |
4615 | } |
4616 | rhs[0] = gimple_assign_rhs1 (gs: g); |
4617 | tree &r = rhs[2] ? rhs[3] : rhs[2]; |
4618 | r = r2; |
4619 | temp_stmts.quick_push (obj: g); |
4620 | } |
4621 | else |
4622 | break; |
4623 | } |
4624 | for (int i = 1; i <= 2; ++i) |
4625 | while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3]) |
4626 | { |
4627 | gimple *g = SSA_NAME_DEF_STMT (rhs[i]); |
4628 | if (has_single_use (var: rhs[i]) |
4629 | && is_gimple_assign (gs: g) |
4630 | && gimple_assign_rhs_code (gs: g) == PLUS_EXPR) |
4631 | { |
4632 | rhs[i] = gimple_assign_rhs1 (gs: g); |
4633 | if (rhs[2]) |
4634 | rhs[3] = gimple_assign_rhs2 (gs: g); |
4635 | else |
4636 | rhs[2] = gimple_assign_rhs2 (gs: g); |
4637 | temp_stmts.quick_push (obj: g); |
4638 | } |
4639 | else |
4640 | break; |
4641 | } |
4642 | /* If there are just 3 addends or one minuend and two subtrahends, |
4643 | check for UADDC or USUBC being pattern recognized earlier. |
4644 | Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part |
4645 | got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3) |
4646 | etc. */ |
4647 | if (rhs[2] && !rhs[3]) |
4648 | { |
4649 | for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i) |
4650 | if (TREE_CODE (rhs[i]) == SSA_NAME) |
4651 | { |
4652 | gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i])); |
4653 | im = uaddc_ne0 (g: im); |
4654 | if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR)) |
4655 | { |
4656 | /* We found one of the 3 addends or 2 subtrahends to be |
4657 | __imag__ of something, verify it is .UADDC/.USUBC. */ |
4658 | tree rhs1 = gimple_assign_rhs1 (gs: im); |
4659 | gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0)); |
4660 | tree ovf_lhs = NULL_TREE; |
4661 | tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE; |
4662 | if (gimple_call_internal_p (gs: ovf, fn: code == PLUS_EXPR |
4663 | ? IFN_ADD_OVERFLOW |
4664 | : IFN_SUB_OVERFLOW)) |
4665 | { |
4666 | /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW. |
4667 | This is for the case of 2 chained .UADDC/.USUBC, |
4668 | where the first one uses 0 carry-in and the second |
4669 | one ignores the carry-out. |
4670 | So, something like: |
4671 | _16 = .ADD_OVERFLOW (_1, _2); |
4672 | _17 = REALPART_EXPR <_16>; |
4673 | _18 = IMAGPART_EXPR <_16>; |
4674 | _15 = _3 + _4; |
4675 | _12 = _15 + _18; |
4676 | where the first 3 statements come from the lower |
4677 | limb addition and the last 2 from the higher limb |
4678 | which ignores carry-out. */ |
4679 | ovf_lhs = gimple_call_lhs (gs: ovf); |
4680 | tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs)); |
4681 | ovf_arg1 = gimple_call_arg (gs: ovf, index: 0); |
4682 | ovf_arg2 = gimple_call_arg (gs: ovf, index: 1); |
4683 | /* In that case we need to punt if the types don't |
4684 | mismatch. */ |
4685 | if (!types_compatible_p (type1: type, type2: ovf_lhs_type) |
4686 | || !types_compatible_p (type1: type, TREE_TYPE (ovf_arg1)) |
4687 | || !types_compatible_p (type1: type, |
4688 | TREE_TYPE (ovf_arg2))) |
4689 | ovf_lhs = NULL_TREE; |
4690 | else |
4691 | { |
4692 | for (int i = (code == PLUS_EXPR ? 1 : 0); |
4693 | i >= 0; --i) |
4694 | { |
4695 | tree r = gimple_call_arg (gs: ovf, index: i); |
4696 | if (TREE_CODE (r) != SSA_NAME) |
4697 | continue; |
4698 | if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r), |
4699 | part: REALPART_EXPR)) |
4700 | { |
4701 | /* Punt if one of the args which isn't |
4702 | subtracted isn't __real__; that could |
4703 | then prevent better match later. |
4704 | Consider: |
4705 | _3 = .ADD_OVERFLOW (_1, _2); |
4706 | _4 = REALPART_EXPR <_3>; |
4707 | _5 = IMAGPART_EXPR <_3>; |
4708 | _7 = .ADD_OVERFLOW (_4, _6); |
4709 | _8 = REALPART_EXPR <_7>; |
4710 | _9 = IMAGPART_EXPR <_7>; |
4711 | _12 = _10 + _11; |
4712 | _13 = _12 + _9; |
4713 | _14 = _13 + _5; |
4714 | We want to match this when called on |
4715 | the last stmt as a pair of .UADDC calls, |
4716 | but without this check we could turn |
4717 | that prematurely on _13 = _12 + _9; |
4718 | stmt into .UADDC with 0 carry-in just |
4719 | on the second .ADD_OVERFLOW call and |
4720 | another replacing the _12 and _13 |
4721 | additions. */ |
4722 | ovf_lhs = NULL_TREE; |
4723 | break; |
4724 | } |
4725 | } |
4726 | } |
4727 | if (ovf_lhs) |
4728 | { |
4729 | use_operand_p use_p; |
4730 | imm_use_iterator iter; |
4731 | tree re_lhs = NULL_TREE; |
4732 | FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs) |
4733 | { |
4734 | gimple *use_stmt = USE_STMT (use_p); |
4735 | if (is_gimple_debug (gs: use_stmt)) |
4736 | continue; |
4737 | if (use_stmt == im) |
4738 | continue; |
4739 | if (!uaddc_is_cplxpart (g: use_stmt, |
4740 | part: REALPART_EXPR)) |
4741 | { |
4742 | ovf_lhs = NULL_TREE; |
4743 | break; |
4744 | } |
4745 | re_lhs = gimple_assign_lhs (gs: use_stmt); |
4746 | } |
4747 | if (ovf_lhs && re_lhs) |
4748 | { |
4749 | FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs) |
4750 | { |
4751 | gimple *use_stmt = USE_STMT (use_p); |
4752 | if (is_gimple_debug (gs: use_stmt)) |
4753 | continue; |
4754 | internal_fn ifn |
4755 | = gimple_call_internal_fn (gs: ovf); |
4756 | /* Punt if the __real__ of lhs is used |
4757 | in the same .*_OVERFLOW call. |
4758 | Consider: |
4759 | _3 = .ADD_OVERFLOW (_1, _2); |
4760 | _4 = REALPART_EXPR <_3>; |
4761 | _5 = IMAGPART_EXPR <_3>; |
4762 | _7 = .ADD_OVERFLOW (_4, _6); |
4763 | _8 = REALPART_EXPR <_7>; |
4764 | _9 = IMAGPART_EXPR <_7>; |
4765 | _12 = _10 + _11; |
4766 | _13 = _12 + _5; |
4767 | _14 = _13 + _9; |
4768 | We want to match this when called on |
4769 | the last stmt as a pair of .UADDC calls, |
4770 | but without this check we could turn |
4771 | that prematurely on _13 = _12 + _5; |
4772 | stmt into .UADDC with 0 carry-in just |
4773 | on the first .ADD_OVERFLOW call and |
4774 | another replacing the _12 and _13 |
4775 | additions. */ |
4776 | if (gimple_call_internal_p (gs: use_stmt, fn: ifn)) |
4777 | { |
4778 | ovf_lhs = NULL_TREE; |
4779 | break; |
4780 | } |
4781 | } |
4782 | } |
4783 | } |
4784 | } |
4785 | if ((ovf_lhs |
4786 | || gimple_call_internal_p (gs: ovf, |
4787 | fn: code == PLUS_EXPR |
4788 | ? IFN_UADDC : IFN_USUBC)) |
4789 | && (optab_handler (op: code == PLUS_EXPR |
4790 | ? uaddc5_optab : usubc5_optab, |
4791 | TYPE_MODE (type)) |
4792 | != CODE_FOR_nothing)) |
4793 | { |
4794 | /* And in that case build another .UADDC/.USUBC |
4795 | call for the most significand limb addition. |
4796 | Overflow bit is ignored here. */ |
4797 | if (i != 2) |
4798 | std::swap (a&: rhs[i], b&: rhs[2]); |
4799 | gimple *g |
4800 | = gimple_build_call_internal (code == PLUS_EXPR |
4801 | ? IFN_UADDC |
4802 | : IFN_USUBC, |
4803 | 3, rhs[0], rhs[1], |
4804 | rhs[2]); |
4805 | tree nlhs = make_ssa_name (var: build_complex_type (type)); |
4806 | gimple_call_set_lhs (gs: g, lhs: nlhs); |
4807 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
4808 | tree ilhs = gimple_assign_lhs (gs: stmt); |
4809 | g = gimple_build_assign (ilhs, REALPART_EXPR, |
4810 | build1 (REALPART_EXPR, |
4811 | TREE_TYPE (ilhs), |
4812 | nlhs)); |
4813 | gsi_replace (gsi, g, true); |
4814 | /* And if it is initialized from result of __imag__ |
4815 | of .{ADD,SUB}_OVERFLOW call, replace that |
4816 | call with .U{ADD,SUB}C call with the same arguments, |
4817 | just 0 added as third argument. This isn't strictly |
4818 | necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0) |
4819 | produce the same result, but may result in better |
4820 | generated code on some targets where the backend can |
4821 | better prepare in how the result will be used. */ |
4822 | if (ovf_lhs) |
4823 | { |
4824 | tree zero = build_zero_cst (type); |
4825 | g = gimple_build_call_internal (code == PLUS_EXPR |
4826 | ? IFN_UADDC |
4827 | : IFN_USUBC, |
4828 | 3, ovf_arg1, |
4829 | ovf_arg2, zero); |
4830 | gimple_call_set_lhs (gs: g, lhs: ovf_lhs); |
4831 | gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf); |
4832 | gsi_replace (&gsi2, g, true); |
4833 | } |
4834 | return true; |
4835 | } |
4836 | } |
4837 | } |
4838 | return false; |
4839 | } |
4840 | if (code == MINUS_EXPR && !rhs[2]) |
4841 | return false; |
4842 | if (code == MINUS_EXPR) |
4843 | /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs. |
4844 | So, for MINUS_EXPR swap the single added rhs operand (others are |
4845 | subtracted) to rhs[3]. */ |
4846 | std::swap (a&: rhs[0], b&: rhs[3]); |
4847 | } |
4848 | /* Walk from both operands of STMT (for +/- even sometimes from |
4849 | all the 4 addends or 3 subtrahends), see through casts and != 0 |
4850 | statements which would preserve [0, 1] range of values and |
4851 | check which is initialized from __imag__. */ |
4852 | gimple *im1 = NULL, *im2 = NULL; |
4853 | for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++) |
4854 | if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME) |
4855 | { |
4856 | gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i])); |
4857 | im = uaddc_ne0 (g: im); |
4858 | if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR)) |
4859 | { |
4860 | if (im1 == NULL) |
4861 | { |
4862 | im1 = im; |
4863 | if (i != 0) |
4864 | std::swap (a&: rhs[0], b&: rhs[i]); |
4865 | } |
4866 | else |
4867 | { |
4868 | im2 = im; |
4869 | if (i != 1) |
4870 | std::swap (a&: rhs[1], b&: rhs[i]); |
4871 | break; |
4872 | } |
4873 | } |
4874 | } |
4875 | /* If we don't find at least two, punt. */ |
4876 | if (!im2) |
4877 | return false; |
4878 | /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results, |
4879 | either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have |
4880 | uaddc5/usubc5 named pattern for the corresponding mode. */ |
4881 | gimple *ovf1 |
4882 | = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0)); |
4883 | gimple *ovf2 |
4884 | = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0)); |
4885 | internal_fn ifn; |
4886 | if (!is_gimple_call (gs: ovf1) |
4887 | || !gimple_call_internal_p (gs: ovf1) |
4888 | || ((ifn = gimple_call_internal_fn (gs: ovf1)) != IFN_ADD_OVERFLOW |
4889 | && ifn != IFN_SUB_OVERFLOW) |
4890 | || !gimple_call_internal_p (gs: ovf2, fn: ifn) |
4891 | || optab_handler (op: ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab, |
4892 | TYPE_MODE (type)) == CODE_FOR_nothing |
4893 | || (rhs[2] |
4894 | && optab_handler (op: code == PLUS_EXPR ? uaddc5_optab : usubc5_optab, |
4895 | TYPE_MODE (type)) == CODE_FOR_nothing)) |
4896 | return false; |
4897 | tree arg1, arg2, arg3 = NULL_TREE; |
4898 | gimple *re1 = NULL, *re2 = NULL; |
4899 | /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments |
4900 | should be initialized from __real__ of the other of the two calls. |
4901 | Though, for .SUB_OVERFLOW, it has to be the first argument, not the |
4902 | second one. */ |
4903 | for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i) |
4904 | for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL)) |
4905 | { |
4906 | tree arg = gimple_call_arg (gs: ovf, index: i); |
4907 | if (TREE_CODE (arg) != SSA_NAME) |
4908 | continue; |
4909 | re1 = SSA_NAME_DEF_STMT (arg); |
4910 | if (uaddc_is_cplxpart (g: re1, part: REALPART_EXPR) |
4911 | && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0)) |
4912 | == (ovf == ovf1 ? ovf2 : ovf1))) |
4913 | { |
4914 | if (ovf == ovf1) |
4915 | { |
4916 | /* Make sure ovf2 is the .*_OVERFLOW call with argument |
4917 | initialized from __real__ of ovf1. */ |
4918 | std::swap (a&: rhs[0], b&: rhs[1]); |
4919 | std::swap (a&: im1, b&: im2); |
4920 | std::swap (a&: ovf1, b&: ovf2); |
4921 | } |
4922 | arg3 = gimple_call_arg (gs: ovf, index: 1 - i); |
4923 | i = -1; |
4924 | break; |
4925 | } |
4926 | } |
4927 | if (!arg3) |
4928 | return false; |
4929 | arg1 = gimple_call_arg (gs: ovf1, index: 0); |
4930 | arg2 = gimple_call_arg (gs: ovf1, index: 1); |
4931 | if (!types_compatible_p (type1: type, TREE_TYPE (arg1))) |
4932 | return false; |
4933 | int kind[2] = { 0, 0 }; |
4934 | tree arg_im[2] = { NULL_TREE, NULL_TREE }; |
4935 | /* At least one of arg2 and arg3 should have type compatible |
4936 | with arg1/rhs[0], and the other one should have value in [0, 1] |
4937 | range. If both are in [0, 1] range and type compatible with |
4938 | arg1/rhs[0], try harder to find after looking through casts, |
4939 | != 0 comparisons which one is initialized to __imag__ of |
4940 | .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */ |
4941 | for (int i = 0; i < 2; ++i) |
4942 | { |
4943 | tree arg = i == 0 ? arg2 : arg3; |
4944 | if (types_compatible_p (type1: type, TREE_TYPE (arg))) |
4945 | kind[i] = 1; |
4946 | if (!INTEGRAL_TYPE_P (TREE_TYPE (arg)) |
4947 | || (TYPE_PRECISION (TREE_TYPE (arg)) == 1 |
4948 | && !TYPE_UNSIGNED (TREE_TYPE (arg)))) |
4949 | continue; |
4950 | if (tree_zero_one_valued_p (arg)) |
4951 | kind[i] |= 2; |
4952 | if (TREE_CODE (arg) == SSA_NAME) |
4953 | { |
4954 | gimple *g = SSA_NAME_DEF_STMT (arg); |
4955 | if (gimple_assign_cast_p (s: g)) |
4956 | { |
4957 | tree op = gimple_assign_rhs1 (gs: g); |
4958 | if (TREE_CODE (op) == SSA_NAME |
4959 | && INTEGRAL_TYPE_P (TREE_TYPE (op))) |
4960 | g = SSA_NAME_DEF_STMT (op); |
4961 | } |
4962 | g = uaddc_ne0 (g); |
4963 | if (!uaddc_is_cplxpart (g, part: IMAGPART_EXPR)) |
4964 | continue; |
4965 | arg_im[i] = gimple_assign_lhs (gs: g); |
4966 | g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0)); |
4967 | if (!is_gimple_call (gs: g) || !gimple_call_internal_p (gs: g)) |
4968 | continue; |
4969 | switch (gimple_call_internal_fn (gs: g)) |
4970 | { |
4971 | case IFN_ADD_OVERFLOW: |
4972 | case IFN_SUB_OVERFLOW: |
4973 | case IFN_UADDC: |
4974 | case IFN_USUBC: |
4975 | break; |
4976 | default: |
4977 | continue; |
4978 | } |
4979 | kind[i] |= 4; |
4980 | } |
4981 | } |
4982 | /* Make arg2 the one with compatible type and arg3 the one |
4983 | with [0, 1] range. If both is true for both operands, |
4984 | prefer as arg3 result of __imag__ of some ifn. */ |
4985 | if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1])) |
4986 | { |
4987 | std::swap (a&: arg2, b&: arg3); |
4988 | std::swap (a&: kind[0], b&: kind[1]); |
4989 | std::swap (a&: arg_im[0], b&: arg_im[1]); |
4990 | } |
4991 | if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0) |
4992 | return false; |
4993 | if (!has_single_use (var: gimple_assign_lhs (gs: im1)) |
4994 | || !has_single_use (var: gimple_assign_lhs (gs: im2)) |
4995 | || !has_single_use (var: gimple_assign_lhs (gs: re1)) |
4996 | || num_imm_uses (var: gimple_call_lhs (gs: ovf1)) != 2) |
4997 | return false; |
4998 | /* Check that ovf2's result is used in __real__ and set re2 |
4999 | to that statement. */ |
5000 | use_operand_p use_p; |
5001 | imm_use_iterator iter; |
5002 | tree lhs = gimple_call_lhs (gs: ovf2); |
5003 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
5004 | { |
5005 | gimple *use_stmt = USE_STMT (use_p); |
5006 | if (is_gimple_debug (gs: use_stmt)) |
5007 | continue; |
5008 | if (use_stmt == im2) |
5009 | continue; |
5010 | if (re2) |
5011 | return false; |
5012 | if (!uaddc_is_cplxpart (g: use_stmt, part: REALPART_EXPR)) |
5013 | return false; |
5014 | re2 = use_stmt; |
5015 | } |
5016 | /* Build .UADDC/.USUBC call which will be placed before the stmt. */ |
5017 | gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2); |
5018 | gimple *g; |
5019 | if ((kind[1] & 4) != 0 && types_compatible_p (type1: type, TREE_TYPE (arg_im[1]))) |
5020 | arg3 = arg_im[1]; |
5021 | if ((kind[1] & 1) == 0) |
5022 | { |
5023 | if (TREE_CODE (arg3) == INTEGER_CST) |
5024 | arg3 = fold_convert (type, arg3); |
5025 | else |
5026 | { |
5027 | g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, arg3); |
5028 | gsi_insert_before (&gsi2, g, GSI_SAME_STMT); |
5029 | arg3 = gimple_assign_lhs (gs: g); |
5030 | } |
5031 | } |
5032 | g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW |
5033 | ? IFN_UADDC : IFN_USUBC, |
5034 | 3, arg1, arg2, arg3); |
5035 | tree nlhs = make_ssa_name (TREE_TYPE (lhs)); |
5036 | gimple_call_set_lhs (gs: g, lhs: nlhs); |
5037 | gsi_insert_before (&gsi2, g, GSI_SAME_STMT); |
5038 | /* In the case where stmt is | or ^ of two overflow flags |
5039 | or addition of those, replace stmt with __imag__ of the above |
5040 | added call. In case of arg1 + arg2 + (ovf1 + ovf2) or |
5041 | arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */ |
5042 | tree ilhs = rhs[2] ? make_ssa_name (var: type) : gimple_assign_lhs (gs: stmt); |
5043 | g = gimple_build_assign (ilhs, IMAGPART_EXPR, |
5044 | build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs)); |
5045 | if (rhs[2]) |
5046 | { |
5047 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
5048 | /* Remove some further statements which can't be kept in the IL because |
5049 | they can use SSA_NAMEs whose setter is going to be removed too. */ |
5050 | while (temp_stmts.length ()) |
5051 | { |
5052 | g = temp_stmts.pop (); |
5053 | gsi2 = gsi_for_stmt (g); |
5054 | gsi_remove (&gsi2, true); |
5055 | } |
5056 | } |
5057 | else |
5058 | gsi_replace (gsi, g, true); |
5059 | /* Remove some statements which can't be kept in the IL because they |
5060 | use SSA_NAME whose setter is going to be removed too. */ |
5061 | tree rhs1 = rhs[1]; |
5062 | for (int i = 0; i < 2; i++) |
5063 | if (rhs1 == gimple_assign_lhs (gs: im2)) |
5064 | break; |
5065 | else |
5066 | { |
5067 | g = SSA_NAME_DEF_STMT (rhs1); |
5068 | rhs1 = gimple_assign_rhs1 (gs: g); |
5069 | gsi2 = gsi_for_stmt (g); |
5070 | gsi_remove (&gsi2, true); |
5071 | } |
5072 | gcc_checking_assert (rhs1 == gimple_assign_lhs (im2)); |
5073 | gsi2 = gsi_for_stmt (im2); |
5074 | gsi_remove (&gsi2, true); |
5075 | /* Replace the re2 statement with __real__ of the newly added |
5076 | .UADDC/.USUBC call. */ |
5077 | if (re2) |
5078 | { |
5079 | gsi2 = gsi_for_stmt (re2); |
5080 | tree rlhs = gimple_assign_lhs (gs: re2); |
5081 | g = gimple_build_assign (rlhs, REALPART_EXPR, |
5082 | build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs)); |
5083 | gsi_replace (&gsi2, g, true); |
5084 | } |
5085 | if (rhs[2]) |
5086 | { |
5087 | /* If this is the arg1 + arg2 + (ovf1 + ovf2) or |
5088 | arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb, |
5089 | replace stmt with __real__ of another .UADDC/.USUBC call which |
5090 | handles the most significant limb. Overflow flag from this is |
5091 | ignored. */ |
5092 | g = gimple_build_call_internal (code == PLUS_EXPR |
5093 | ? IFN_UADDC : IFN_USUBC, |
5094 | 3, rhs[3], rhs[2], ilhs); |
5095 | nlhs = make_ssa_name (TREE_TYPE (lhs)); |
5096 | gimple_call_set_lhs (gs: g, lhs: nlhs); |
5097 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
5098 | ilhs = gimple_assign_lhs (gs: stmt); |
5099 | g = gimple_build_assign (ilhs, REALPART_EXPR, |
5100 | build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs)); |
5101 | gsi_replace (gsi, g, true); |
5102 | } |
5103 | if (TREE_CODE (arg3) == SSA_NAME) |
5104 | { |
5105 | /* When pattern recognizing the second least significant limb |
5106 | above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb), |
5107 | check if the [0, 1] range argument (i.e. carry in) isn't the |
5108 | result of another .{ADD,SUB}_OVERFLOW call (one handling the |
5109 | least significant limb). Again look through casts and != 0. */ |
5110 | gimple *im3 = SSA_NAME_DEF_STMT (arg3); |
5111 | for (int i = 0; i < 2; ++i) |
5112 | { |
5113 | gimple *im4 = uaddc_cast (g: im3); |
5114 | if (im4 == im3) |
5115 | break; |
5116 | else |
5117 | im3 = im4; |
5118 | } |
5119 | im3 = uaddc_ne0 (g: im3); |
5120 | if (uaddc_is_cplxpart (g: im3, part: IMAGPART_EXPR)) |
5121 | { |
5122 | gimple *ovf3 |
5123 | = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0)); |
5124 | if (gimple_call_internal_p (gs: ovf3, fn: ifn)) |
5125 | { |
5126 | lhs = gimple_call_lhs (gs: ovf3); |
5127 | arg1 = gimple_call_arg (gs: ovf3, index: 0); |
5128 | arg2 = gimple_call_arg (gs: ovf3, index: 1); |
5129 | if (types_compatible_p (type1: type, TREE_TYPE (TREE_TYPE (lhs))) |
5130 | && types_compatible_p (type1: type, TREE_TYPE (arg1)) |
5131 | && types_compatible_p (type1: type, TREE_TYPE (arg2))) |
5132 | { |
5133 | /* And if it is initialized from result of __imag__ |
5134 | of .{ADD,SUB}_OVERFLOW call, replace that |
5135 | call with .U{ADD,SUB}C call with the same arguments, |
5136 | just 0 added as third argument. This isn't strictly |
5137 | necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0) |
5138 | produce the same result, but may result in better |
5139 | generated code on some targets where the backend can |
5140 | better prepare in how the result will be used. */ |
5141 | g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW |
5142 | ? IFN_UADDC : IFN_USUBC, |
5143 | 3, arg1, arg2, |
5144 | build_zero_cst (type)); |
5145 | gimple_call_set_lhs (gs: g, lhs); |
5146 | gsi2 = gsi_for_stmt (ovf3); |
5147 | gsi_replace (&gsi2, g, true); |
5148 | } |
5149 | } |
5150 | } |
5151 | } |
5152 | return true; |
5153 | } |
5154 | |
5155 | /* Return true if target has support for divmod. */ |
5156 | |
5157 | static bool |
5158 | target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) |
5159 | { |
5160 | /* If target supports hardware divmod insn, use it for divmod. */ |
5161 | if (optab_handler (op: divmod_optab, mode) != CODE_FOR_nothing) |
5162 | return true; |
5163 | |
5164 | /* Check if libfunc for divmod is available. */ |
5165 | rtx libfunc = optab_libfunc (divmod_optab, mode); |
5166 | if (libfunc != NULL_RTX) |
5167 | { |
5168 | /* If optab_handler exists for div_optab, perhaps in a wider mode, |
5169 | we don't want to use the libfunc even if it exists for given mode. */ |
5170 | machine_mode div_mode; |
5171 | FOR_EACH_MODE_FROM (div_mode, mode) |
5172 | if (optab_handler (op: div_optab, mode: div_mode) != CODE_FOR_nothing) |
5173 | return false; |
5174 | |
5175 | return targetm.expand_divmod_libfunc != NULL; |
5176 | } |
5177 | |
5178 | return false; |
5179 | } |
5180 | |
5181 | /* Check if stmt is candidate for divmod transform. */ |
5182 | |
5183 | static bool |
5184 | divmod_candidate_p (gassign *stmt) |
5185 | { |
5186 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
5187 | machine_mode mode = TYPE_MODE (type); |
5188 | optab divmod_optab, div_optab; |
5189 | |
5190 | if (TYPE_UNSIGNED (type)) |
5191 | { |
5192 | divmod_optab = udivmod_optab; |
5193 | div_optab = udiv_optab; |
5194 | } |
5195 | else |
5196 | { |
5197 | divmod_optab = sdivmod_optab; |
5198 | div_optab = sdiv_optab; |
5199 | } |
5200 | |
5201 | tree op1 = gimple_assign_rhs1 (gs: stmt); |
5202 | tree op2 = gimple_assign_rhs2 (gs: stmt); |
5203 | |
5204 | /* Disable the transform if either is a constant, since division-by-constant |
5205 | may have specialized expansion. */ |
5206 | if (CONSTANT_CLASS_P (op1)) |
5207 | return false; |
5208 | |
5209 | if (CONSTANT_CLASS_P (op2)) |
5210 | { |
5211 | if (integer_pow2p (op2)) |
5212 | return false; |
5213 | |
5214 | if (element_precision (type) <= HOST_BITS_PER_WIDE_INT |
5215 | && element_precision (type) <= BITS_PER_WORD) |
5216 | return false; |
5217 | |
5218 | /* If the divisor is not power of 2 and the precision wider than |
5219 | HWI, expand_divmod punts on that, so in that case it is better |
5220 | to use divmod optab or libfunc. Similarly if choose_multiplier |
5221 | might need pre/post shifts of BITS_PER_WORD or more. */ |
5222 | } |
5223 | |
5224 | /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should |
5225 | expand using the [su]divv optabs. */ |
5226 | if (TYPE_OVERFLOW_TRAPS (type)) |
5227 | return false; |
5228 | |
5229 | if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) |
5230 | return false; |
5231 | |
5232 | return true; |
5233 | } |
5234 | |
5235 | /* This function looks for: |
5236 | t1 = a TRUNC_DIV_EXPR b; |
5237 | t2 = a TRUNC_MOD_EXPR b; |
5238 | and transforms it to the following sequence: |
5239 | complex_tmp = DIVMOD (a, b); |
5240 | t1 = REALPART_EXPR(a); |
5241 | t2 = IMAGPART_EXPR(b); |
5242 | For conditions enabling the transform see divmod_candidate_p(). |
5243 | |
5244 | The pass has three parts: |
5245 | 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all |
5246 | other trunc_div_expr and trunc_mod_expr stmts. |
5247 | 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt |
5248 | to stmts vector. |
5249 | 3) Insert DIVMOD call just before top_stmt and update entries in |
5250 | stmts vector to use return value of DIMOVD (REALEXPR_PART for div, |
5251 | IMAGPART_EXPR for mod). */ |
5252 | |
5253 | static bool |
5254 | convert_to_divmod (gassign *stmt) |
5255 | { |
5256 | if (stmt_can_throw_internal (cfun, stmt) |
5257 | || !divmod_candidate_p (stmt)) |
5258 | return false; |
5259 | |
5260 | tree op1 = gimple_assign_rhs1 (gs: stmt); |
5261 | tree op2 = gimple_assign_rhs2 (gs: stmt); |
5262 | |
5263 | imm_use_iterator use_iter; |
5264 | gimple *use_stmt; |
5265 | auto_vec<gimple *> stmts; |
5266 | |
5267 | gimple *top_stmt = stmt; |
5268 | basic_block top_bb = gimple_bb (g: stmt); |
5269 | |
5270 | /* Part 1: Try to set top_stmt to "topmost" stmt that dominates |
5271 | at-least stmt and possibly other trunc_div/trunc_mod stmts |
5272 | having same operands as stmt. */ |
5273 | |
5274 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) |
5275 | { |
5276 | if (is_gimple_assign (gs: use_stmt) |
5277 | && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR |
5278 | || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR) |
5279 | && operand_equal_p (op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0) |
5280 | && operand_equal_p (op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0)) |
5281 | { |
5282 | if (stmt_can_throw_internal (cfun, use_stmt)) |
5283 | continue; |
5284 | |
5285 | basic_block bb = gimple_bb (g: use_stmt); |
5286 | |
5287 | if (bb == top_bb) |
5288 | { |
5289 | if (gimple_uid (g: use_stmt) < gimple_uid (g: top_stmt)) |
5290 | top_stmt = use_stmt; |
5291 | } |
5292 | else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) |
5293 | { |
5294 | top_bb = bb; |
5295 | top_stmt = use_stmt; |
5296 | } |
5297 | } |
5298 | } |
5299 | |
5300 | tree top_op1 = gimple_assign_rhs1 (gs: top_stmt); |
5301 | tree top_op2 = gimple_assign_rhs2 (gs: top_stmt); |
5302 | |
5303 | stmts.safe_push (obj: top_stmt); |
5304 | bool div_seen = (gimple_assign_rhs_code (gs: top_stmt) == TRUNC_DIV_EXPR); |
5305 | |
5306 | /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb |
5307 | to stmts vector. The 2nd loop will always add stmt to stmts vector, since |
5308 | gimple_bb (top_stmt) dominates gimple_bb (stmt), so the |
5309 | 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ |
5310 | |
5311 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) |
5312 | { |
5313 | if (is_gimple_assign (gs: use_stmt) |
5314 | && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR |
5315 | || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR) |
5316 | && operand_equal_p (top_op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0) |
5317 | && operand_equal_p (top_op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0)) |
5318 | { |
5319 | if (use_stmt == top_stmt |
5320 | || stmt_can_throw_internal (cfun, use_stmt) |
5321 | || !dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt), top_bb)) |
5322 | continue; |
5323 | |
5324 | stmts.safe_push (obj: use_stmt); |
5325 | if (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR) |
5326 | div_seen = true; |
5327 | } |
5328 | } |
5329 | |
5330 | if (!div_seen) |
5331 | return false; |
5332 | |
5333 | /* Part 3: Create libcall to internal fn DIVMOD: |
5334 | divmod_tmp = DIVMOD (op1, op2). */ |
5335 | |
5336 | gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); |
5337 | tree res = make_temp_ssa_name (type: build_complex_type (TREE_TYPE (op1)), |
5338 | stmt: call_stmt, name: "divmod_tmp" ); |
5339 | gimple_call_set_lhs (gs: call_stmt, lhs: res); |
5340 | /* We rejected throwing statements above. */ |
5341 | gimple_call_set_nothrow (s: call_stmt, nothrow_p: true); |
5342 | |
5343 | /* Insert the call before top_stmt. */ |
5344 | gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); |
5345 | gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); |
5346 | |
5347 | widen_mul_stats.divmod_calls_inserted++; |
5348 | |
5349 | /* Update all statements in stmts vector: |
5350 | lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp> |
5351 | lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */ |
5352 | |
5353 | for (unsigned i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i) |
5354 | { |
5355 | tree new_rhs; |
5356 | |
5357 | switch (gimple_assign_rhs_code (gs: use_stmt)) |
5358 | { |
5359 | case TRUNC_DIV_EXPR: |
5360 | new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); |
5361 | break; |
5362 | |
5363 | case TRUNC_MOD_EXPR: |
5364 | new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); |
5365 | break; |
5366 | |
5367 | default: |
5368 | gcc_unreachable (); |
5369 | } |
5370 | |
5371 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
5372 | gimple_assign_set_rhs_from_tree (&gsi, new_rhs); |
5373 | update_stmt (s: use_stmt); |
5374 | } |
5375 | |
5376 | return true; |
5377 | } |
5378 | |
5379 | /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as |
5380 | its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return |
5381 | value is true iff we converted the statement. */ |
5382 | |
5383 | static bool |
5384 | convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi) |
5385 | { |
5386 | tree lhs = gimple_assign_lhs (gs: stmt); |
5387 | tree stype = TREE_TYPE (lhs); |
5388 | tree sarg0 = gimple_assign_rhs1 (gs: stmt); |
5389 | tree sarg1 = gimple_assign_rhs2 (gs: stmt); |
5390 | |
5391 | if (TREE_CODE (stype) != INTEGER_TYPE |
5392 | || TREE_CODE (sarg1) != INTEGER_CST |
5393 | || TREE_CODE (sarg0) != SSA_NAME |
5394 | || !tree_fits_uhwi_p (sarg1) |
5395 | || !has_single_use (var: sarg0)) |
5396 | return false; |
5397 | |
5398 | gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0)); |
5399 | if (!def) |
5400 | return false; |
5401 | |
5402 | enum tree_code mcode = gimple_assign_rhs_code (gs: def); |
5403 | if (mcode == NOP_EXPR) |
5404 | { |
5405 | tree tmp = gimple_assign_rhs1 (gs: def); |
5406 | if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (var: tmp)) |
5407 | return false; |
5408 | def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp)); |
5409 | if (!def) |
5410 | return false; |
5411 | mcode = gimple_assign_rhs_code (gs: def); |
5412 | } |
5413 | |
5414 | if (mcode != WIDEN_MULT_EXPR |
5415 | || gimple_bb (g: def) != gimple_bb (g: stmt)) |
5416 | return false; |
5417 | tree mtype = TREE_TYPE (gimple_assign_lhs (def)); |
5418 | if (TREE_CODE (mtype) != INTEGER_TYPE |
5419 | || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype)) |
5420 | return false; |
5421 | |
5422 | tree mop1 = gimple_assign_rhs1 (gs: def); |
5423 | tree mop2 = gimple_assign_rhs2 (gs: def); |
5424 | tree optype = TREE_TYPE (mop1); |
5425 | bool unsignedp = TYPE_UNSIGNED (optype); |
5426 | unsigned int prec = TYPE_PRECISION (optype); |
5427 | |
5428 | if (unsignedp != TYPE_UNSIGNED (mtype) |
5429 | || TYPE_PRECISION (mtype) != 2 * prec) |
5430 | return false; |
5431 | |
5432 | unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1); |
5433 | if (bits < prec || bits >= 2 * prec) |
5434 | return false; |
5435 | |
5436 | /* For the time being, require operands to have the same sign. */ |
5437 | if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2))) |
5438 | return false; |
5439 | |
5440 | machine_mode mode = TYPE_MODE (optype); |
5441 | optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
5442 | if (optab_handler (op: tab, mode) == CODE_FOR_nothing) |
5443 | return false; |
5444 | |
5445 | location_t loc = gimple_location (g: stmt); |
5446 | tree highpart1 = build_and_insert_binop (gsi, loc, name: "highparttmp" , |
5447 | code: MULT_HIGHPART_EXPR, arg0: mop1, arg1: mop2); |
5448 | tree highpart2 = highpart1; |
5449 | tree ntype = optype; |
5450 | |
5451 | if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype)) |
5452 | { |
5453 | ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype) |
5454 | : signed_type_for (optype); |
5455 | highpart2 = build_and_insert_cast (gsi, loc, type: ntype, val: highpart1); |
5456 | } |
5457 | if (bits > prec) |
5458 | highpart2 = build_and_insert_binop (gsi, loc, name: "highparttmp" , |
5459 | code: RSHIFT_EXPR, arg0: highpart2, |
5460 | arg1: build_int_cst (ntype, bits - prec)); |
5461 | |
5462 | gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2); |
5463 | gsi_replace (gsi, new_stmt, true); |
5464 | |
5465 | widen_mul_stats.highpart_mults_inserted++; |
5466 | return true; |
5467 | } |
5468 | |
5469 | /* If target has spaceship<MODE>3 expander, pattern recognize |
5470 | <bb 2> [local count: 1073741824]: |
5471 | if (a_2(D) == b_3(D)) |
5472 | goto <bb 6>; [34.00%] |
5473 | else |
5474 | goto <bb 3>; [66.00%] |
5475 | |
5476 | <bb 3> [local count: 708669601]: |
5477 | if (a_2(D) < b_3(D)) |
5478 | goto <bb 6>; [1.04%] |
5479 | else |
5480 | goto <bb 4>; [98.96%] |
5481 | |
5482 | <bb 4> [local count: 701299439]: |
5483 | if (a_2(D) > b_3(D)) |
5484 | goto <bb 5>; [48.89%] |
5485 | else |
5486 | goto <bb 6>; [51.11%] |
5487 | |
5488 | <bb 5> [local count: 342865295]: |
5489 | |
5490 | <bb 6> [local count: 1073741824]: |
5491 | and turn it into: |
5492 | <bb 2> [local count: 1073741824]: |
5493 | _1 = .SPACESHIP (a_2(D), b_3(D)); |
5494 | if (_1 == 0) |
5495 | goto <bb 6>; [34.00%] |
5496 | else |
5497 | goto <bb 3>; [66.00%] |
5498 | |
5499 | <bb 3> [local count: 708669601]: |
5500 | if (_1 == -1) |
5501 | goto <bb 6>; [1.04%] |
5502 | else |
5503 | goto <bb 4>; [98.96%] |
5504 | |
5505 | <bb 4> [local count: 701299439]: |
5506 | if (_1 == 1) |
5507 | goto <bb 5>; [48.89%] |
5508 | else |
5509 | goto <bb 6>; [51.11%] |
5510 | |
5511 | <bb 5> [local count: 342865295]: |
5512 | |
5513 | <bb 6> [local count: 1073741824]: |
5514 | so that the backend can emit optimal comparison and |
5515 | conditional jump sequence. */ |
5516 | |
5517 | static void |
5518 | optimize_spaceship (gcond *stmt) |
5519 | { |
5520 | enum tree_code code = gimple_cond_code (gs: stmt); |
5521 | if (code != EQ_EXPR && code != NE_EXPR) |
5522 | return; |
5523 | tree arg1 = gimple_cond_lhs (gs: stmt); |
5524 | tree arg2 = gimple_cond_rhs (gs: stmt); |
5525 | if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) |
5526 | || optab_handler (op: spaceship_optab, |
5527 | TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing |
5528 | || operand_equal_p (arg1, arg2, flags: 0)) |
5529 | return; |
5530 | |
5531 | basic_block bb0 = gimple_bb (g: stmt), bb1, bb2 = NULL; |
5532 | edge em1 = NULL, e1 = NULL, e2 = NULL; |
5533 | bb1 = EDGE_SUCC (bb0, 1)->dest; |
5534 | if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR)) |
5535 | bb1 = EDGE_SUCC (bb0, 0)->dest; |
5536 | |
5537 | gcond *g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb1)); |
5538 | if (g == NULL |
5539 | || !single_pred_p (bb: bb1) |
5540 | || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) |
5541 | ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0) |
5542 | : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0) |
5543 | || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0))) |
5544 | || !cond_only_block_p (bb1)) |
5545 | return; |
5546 | |
5547 | enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) |
5548 | ? LT_EXPR : GT_EXPR); |
5549 | switch (gimple_cond_code (gs: g)) |
5550 | { |
5551 | case LT_EXPR: |
5552 | case LE_EXPR: |
5553 | break; |
5554 | case GT_EXPR: |
5555 | case GE_EXPR: |
5556 | ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR; |
5557 | break; |
5558 | default: |
5559 | return; |
5560 | } |
5561 | |
5562 | for (int i = 0; i < 2; ++i) |
5563 | { |
5564 | /* With NaNs, </<=/>/>= are false, so we need to look for the |
5565 | third comparison on the false edge from whatever non-equality |
5566 | comparison the second comparison is. */ |
5567 | if (HONOR_NANS (TREE_TYPE (arg1)) |
5568 | && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0) |
5569 | continue; |
5570 | |
5571 | bb2 = EDGE_SUCC (bb1, i)->dest; |
5572 | g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb2)); |
5573 | if (g == NULL |
5574 | || !single_pred_p (bb: bb2) |
5575 | || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) |
5576 | ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0) |
5577 | : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0) |
5578 | || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0))) |
5579 | || !cond_only_block_p (bb2) |
5580 | || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) |
5581 | continue; |
5582 | |
5583 | enum tree_code ccode2 |
5584 | = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) ? LT_EXPR : GT_EXPR); |
5585 | switch (gimple_cond_code (gs: g)) |
5586 | { |
5587 | case LT_EXPR: |
5588 | case LE_EXPR: |
5589 | break; |
5590 | case GT_EXPR: |
5591 | case GE_EXPR: |
5592 | ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; |
5593 | break; |
5594 | default: |
5595 | continue; |
5596 | } |
5597 | if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2) |
5598 | continue; |
5599 | |
5600 | if ((ccode == LT_EXPR) |
5601 | ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)) |
5602 | { |
5603 | em1 = EDGE_SUCC (bb1, 1 - i); |
5604 | e1 = EDGE_SUCC (bb2, 0); |
5605 | e2 = EDGE_SUCC (bb2, 1); |
5606 | if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0)) |
5607 | std::swap (a&: e1, b&: e2); |
5608 | } |
5609 | else |
5610 | { |
5611 | e1 = EDGE_SUCC (bb1, 1 - i); |
5612 | em1 = EDGE_SUCC (bb2, 0); |
5613 | e2 = EDGE_SUCC (bb2, 1); |
5614 | if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0)) |
5615 | std::swap (a&: em1, b&: e2); |
5616 | } |
5617 | break; |
5618 | } |
5619 | |
5620 | if (em1 == NULL) |
5621 | { |
5622 | if ((ccode == LT_EXPR) |
5623 | ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0)) |
5624 | { |
5625 | em1 = EDGE_SUCC (bb1, 1); |
5626 | e1 = EDGE_SUCC (bb1, 0); |
5627 | e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; |
5628 | } |
5629 | else |
5630 | { |
5631 | em1 = EDGE_SUCC (bb1, 0); |
5632 | e1 = EDGE_SUCC (bb1, 1); |
5633 | e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; |
5634 | } |
5635 | } |
5636 | |
5637 | gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2); |
5638 | tree lhs = make_ssa_name (integer_type_node); |
5639 | gimple_call_set_lhs (gs: gc, lhs); |
5640 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
5641 | gsi_insert_before (&gsi, gc, GSI_SAME_STMT); |
5642 | |
5643 | gimple_cond_set_lhs (gs: stmt, lhs); |
5644 | gimple_cond_set_rhs (gs: stmt, integer_zero_node); |
5645 | update_stmt (s: stmt); |
5646 | |
5647 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb1)); |
5648 | gimple_cond_set_lhs (gs: cond, lhs); |
5649 | if (em1->src == bb1 && e2 != em1) |
5650 | { |
5651 | gimple_cond_set_rhs (gs: cond, integer_minus_one_node); |
5652 | gimple_cond_set_code (gs: cond, code: (em1->flags & EDGE_TRUE_VALUE) |
5653 | ? EQ_EXPR : NE_EXPR); |
5654 | } |
5655 | else |
5656 | { |
5657 | gcc_assert (e1->src == bb1 && e2 != e1); |
5658 | gimple_cond_set_rhs (gs: cond, integer_one_node); |
5659 | gimple_cond_set_code (gs: cond, code: (e1->flags & EDGE_TRUE_VALUE) |
5660 | ? EQ_EXPR : NE_EXPR); |
5661 | } |
5662 | update_stmt (s: cond); |
5663 | |
5664 | if (e2 != e1 && e2 != em1) |
5665 | { |
5666 | cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb2)); |
5667 | gimple_cond_set_lhs (gs: cond, lhs); |
5668 | if (em1->src == bb2) |
5669 | gimple_cond_set_rhs (gs: cond, integer_minus_one_node); |
5670 | else |
5671 | { |
5672 | gcc_assert (e1->src == bb2); |
5673 | gimple_cond_set_rhs (gs: cond, integer_one_node); |
5674 | } |
5675 | gimple_cond_set_code (gs: cond, |
5676 | code: (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR); |
5677 | update_stmt (s: cond); |
5678 | } |
5679 | |
5680 | wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node)); |
5681 | wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node)); |
5682 | value_range vr (TREE_TYPE (lhs), wm1, w2); |
5683 | set_range_info (lhs, vr); |
5684 | } |
5685 | |
5686 | |
5687 | /* Find integer multiplications where the operands are extended from |
5688 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR |
5689 | or MULT_HIGHPART_EXPR where appropriate. */ |
5690 | |
5691 | namespace { |
5692 | |
5693 | const pass_data pass_data_optimize_widening_mul = |
5694 | { |
5695 | .type: GIMPLE_PASS, /* type */ |
5696 | .name: "widening_mul" , /* name */ |
5697 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
5698 | .tv_id: TV_TREE_WIDEN_MUL, /* tv_id */ |
5699 | PROP_ssa, /* properties_required */ |
5700 | .properties_provided: 0, /* properties_provided */ |
5701 | .properties_destroyed: 0, /* properties_destroyed */ |
5702 | .todo_flags_start: 0, /* todo_flags_start */ |
5703 | TODO_update_ssa, /* todo_flags_finish */ |
5704 | }; |
5705 | |
5706 | class pass_optimize_widening_mul : public gimple_opt_pass |
5707 | { |
5708 | public: |
5709 | pass_optimize_widening_mul (gcc::context *ctxt) |
5710 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) |
5711 | {} |
5712 | |
5713 | /* opt_pass methods: */ |
5714 | bool gate (function *) final override |
5715 | { |
5716 | return flag_expensive_optimizations && optimize; |
5717 | } |
5718 | |
5719 | unsigned int execute (function *) final override; |
5720 | |
5721 | }; // class pass_optimize_widening_mul |
5722 | |
5723 | /* Walker class to perform the transformation in reverse dominance order. */ |
5724 | |
5725 | class math_opts_dom_walker : public dom_walker |
5726 | { |
5727 | public: |
5728 | /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set |
5729 | if walking modidifes the CFG. */ |
5730 | |
5731 | math_opts_dom_walker (bool *cfg_changed_p) |
5732 | : dom_walker (CDI_DOMINATORS), m_last_result_set (), |
5733 | m_cfg_changed_p (cfg_changed_p) {} |
5734 | |
5735 | /* The actual actions performed in the walk. */ |
5736 | |
5737 | void after_dom_children (basic_block) final override; |
5738 | |
5739 | /* Set of results of chains of multiply and add statement combinations that |
5740 | were not transformed into FMAs because of active deferring. */ |
5741 | hash_set<tree> m_last_result_set; |
5742 | |
5743 | /* Pointer to a flag of the user that needs to be set if CFG has been |
5744 | modified. */ |
5745 | bool *m_cfg_changed_p; |
5746 | }; |
5747 | |
5748 | void |
5749 | math_opts_dom_walker::after_dom_children (basic_block bb) |
5750 | { |
5751 | gimple_stmt_iterator gsi; |
5752 | |
5753 | fma_deferring_state fma_state (param_avoid_fma_max_bits > 0); |
5754 | |
5755 | for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);) |
5756 | { |
5757 | gimple *stmt = gsi_stmt (i: gsi); |
5758 | enum tree_code code; |
5759 | |
5760 | if (is_gimple_assign (gs: stmt)) |
5761 | { |
5762 | code = gimple_assign_rhs_code (gs: stmt); |
5763 | switch (code) |
5764 | { |
5765 | case MULT_EXPR: |
5766 | if (!convert_mult_to_widen (stmt, gsi: &gsi) |
5767 | && !convert_expand_mult_copysign (stmt, gsi: &gsi) |
5768 | && convert_mult_to_fma (mul_stmt: stmt, |
5769 | op1: gimple_assign_rhs1 (gs: stmt), |
5770 | op2: gimple_assign_rhs2 (gs: stmt), |
5771 | state: &fma_state)) |
5772 | { |
5773 | gsi_remove (&gsi, true); |
5774 | release_defs (stmt); |
5775 | continue; |
5776 | } |
5777 | match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p); |
5778 | break; |
5779 | |
5780 | case PLUS_EXPR: |
5781 | case MINUS_EXPR: |
5782 | if (!convert_plusminus_to_widen (gsi: &gsi, stmt, code)) |
5783 | { |
5784 | match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p); |
5785 | if (gsi_stmt (i: gsi) == stmt) |
5786 | match_uaddc_usubc (gsi: &gsi, stmt, code); |
5787 | } |
5788 | break; |
5789 | |
5790 | case BIT_NOT_EXPR: |
5791 | if (match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p)) |
5792 | continue; |
5793 | break; |
5794 | |
5795 | case TRUNC_MOD_EXPR: |
5796 | convert_to_divmod (stmt: as_a<gassign *> (p: stmt)); |
5797 | break; |
5798 | |
5799 | case RSHIFT_EXPR: |
5800 | convert_mult_to_highpart (stmt: as_a<gassign *> (p: stmt), gsi: &gsi); |
5801 | break; |
5802 | |
5803 | case BIT_IOR_EXPR: |
5804 | case BIT_XOR_EXPR: |
5805 | match_uaddc_usubc (gsi: &gsi, stmt, code); |
5806 | break; |
5807 | |
5808 | default:; |
5809 | } |
5810 | } |
5811 | else if (is_gimple_call (gs: stmt)) |
5812 | { |
5813 | switch (gimple_call_combined_fn (stmt)) |
5814 | { |
5815 | CASE_CFN_POW: |
5816 | if (gimple_call_lhs (gs: stmt) |
5817 | && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST |
5818 | && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), |
5819 | &dconst2) |
5820 | && convert_mult_to_fma (mul_stmt: stmt, |
5821 | op1: gimple_call_arg (gs: stmt, index: 0), |
5822 | op2: gimple_call_arg (gs: stmt, index: 0), |
5823 | state: &fma_state)) |
5824 | { |
5825 | unlink_stmt_vdef (stmt); |
5826 | if (gsi_remove (&gsi, true) |
5827 | && gimple_purge_dead_eh_edges (bb)) |
5828 | *m_cfg_changed_p = true; |
5829 | release_defs (stmt); |
5830 | continue; |
5831 | } |
5832 | break; |
5833 | |
5834 | case CFN_COND_MUL: |
5835 | if (convert_mult_to_fma (mul_stmt: stmt, |
5836 | op1: gimple_call_arg (gs: stmt, index: 1), |
5837 | op2: gimple_call_arg (gs: stmt, index: 2), |
5838 | state: &fma_state, |
5839 | mul_cond: gimple_call_arg (gs: stmt, index: 0))) |
5840 | |
5841 | { |
5842 | gsi_remove (&gsi, true); |
5843 | release_defs (stmt); |
5844 | continue; |
5845 | } |
5846 | break; |
5847 | |
5848 | case CFN_COND_LEN_MUL: |
5849 | if (convert_mult_to_fma (mul_stmt: stmt, |
5850 | op1: gimple_call_arg (gs: stmt, index: 1), |
5851 | op2: gimple_call_arg (gs: stmt, index: 2), |
5852 | state: &fma_state, |
5853 | mul_cond: gimple_call_arg (gs: stmt, index: 0), |
5854 | mul_len: gimple_call_arg (gs: stmt, index: 4), |
5855 | mul_bias: gimple_call_arg (gs: stmt, index: 5))) |
5856 | |
5857 | { |
5858 | gsi_remove (&gsi, true); |
5859 | release_defs (stmt); |
5860 | continue; |
5861 | } |
5862 | break; |
5863 | |
5864 | case CFN_LAST: |
5865 | cancel_fma_deferring (state: &fma_state); |
5866 | break; |
5867 | |
5868 | default: |
5869 | break; |
5870 | } |
5871 | } |
5872 | else if (gimple_code (g: stmt) == GIMPLE_COND) |
5873 | optimize_spaceship (stmt: as_a <gcond *> (p: stmt)); |
5874 | gsi_next (i: &gsi); |
5875 | } |
5876 | if (fma_state.m_deferring_p |
5877 | && fma_state.m_initial_phi) |
5878 | { |
5879 | gcc_checking_assert (fma_state.m_last_result); |
5880 | if (!last_fma_candidate_feeds_initial_phi (state: &fma_state, |
5881 | last_result_set: &m_last_result_set)) |
5882 | cancel_fma_deferring (state: &fma_state); |
5883 | else |
5884 | m_last_result_set.add (k: fma_state.m_last_result); |
5885 | } |
5886 | } |
5887 | |
5888 | |
5889 | unsigned int |
5890 | pass_optimize_widening_mul::execute (function *fun) |
5891 | { |
5892 | bool cfg_changed = false; |
5893 | |
5894 | memset (s: &widen_mul_stats, c: 0, n: sizeof (widen_mul_stats)); |
5895 | calculate_dominance_info (CDI_DOMINATORS); |
5896 | renumber_gimple_stmt_uids (cfun); |
5897 | |
5898 | math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
5899 | |
5900 | statistics_counter_event (fun, "widening multiplications inserted" , |
5901 | widen_mul_stats.widen_mults_inserted); |
5902 | statistics_counter_event (fun, "widening maccs inserted" , |
5903 | widen_mul_stats.maccs_inserted); |
5904 | statistics_counter_event (fun, "fused multiply-adds inserted" , |
5905 | widen_mul_stats.fmas_inserted); |
5906 | statistics_counter_event (fun, "divmod calls inserted" , |
5907 | widen_mul_stats.divmod_calls_inserted); |
5908 | statistics_counter_event (fun, "highpart multiplications inserted" , |
5909 | widen_mul_stats.highpart_mults_inserted); |
5910 | |
5911 | return cfg_changed ? TODO_cleanup_cfg : 0; |
5912 | } |
5913 | |
5914 | } // anon namespace |
5915 | |
5916 | gimple_opt_pass * |
5917 | make_pass_optimize_widening_mul (gcc::context *ctxt) |
5918 | { |
5919 | return new pass_optimize_widening_mul (ctxt); |
5920 | } |
5921 | |