| 1 | /* Global, SSA-based optimizations using mathematical identities. |
| 2 | Copyright (C) 2005-2025 Free Software Foundation, Inc. |
| 3 | |
| 4 | This file is part of GCC. |
| 5 | |
| 6 | GCC is free software; you can redistribute it and/or modify it |
| 7 | under the terms of the GNU General Public License as published by the |
| 8 | Free Software Foundation; either version 3, or (at your option) any |
| 9 | later version. |
| 10 | |
| 11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
| 12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with GCC; see the file COPYING3. If not see |
| 18 | <http://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal |
| 21 | operations. These are common in sequences such as this one: |
| 22 | |
| 23 | modulus = sqrt(x*x + y*y + z*z); |
| 24 | x = x / modulus; |
| 25 | y = y / modulus; |
| 26 | z = z / modulus; |
| 27 | |
| 28 | that can be optimized to |
| 29 | |
| 30 | modulus = sqrt(x*x + y*y + z*z); |
| 31 | rmodulus = 1.0 / modulus; |
| 32 | x = x * rmodulus; |
| 33 | y = y * rmodulus; |
| 34 | z = z * rmodulus; |
| 35 | |
| 36 | We do this for loop invariant divisors, and with this pass whenever |
| 37 | we notice that a division has the same divisor multiple times. |
| 38 | |
| 39 | Of course, like in PRE, we don't insert a division if a dominator |
| 40 | already has one. However, this cannot be done as an extension of |
| 41 | PRE for several reasons. |
| 42 | |
| 43 | First of all, with some experiments it was found out that the |
| 44 | transformation is not always useful if there are only two divisions |
| 45 | by the same divisor. This is probably because modern processors |
| 46 | can pipeline the divisions; on older, in-order processors it should |
| 47 | still be effective to optimize two divisions by the same number. |
| 48 | We make this a param, and it shall be called N in the remainder of |
| 49 | this comment. |
| 50 | |
| 51 | Second, if trapping math is active, we have less freedom on where |
| 52 | to insert divisions: we can only do so in basic blocks that already |
| 53 | contain one. (If divisions don't trap, instead, we can insert |
| 54 | divisions elsewhere, which will be in blocks that are common dominators |
| 55 | of those that have the division). |
| 56 | |
| 57 | We really don't want to compute the reciprocal unless a division will |
| 58 | be found. To do this, we won't insert the division in a basic block |
| 59 | that has less than N divisions *post-dominating* it. |
| 60 | |
| 61 | The algorithm constructs a subset of the dominator tree, holding the |
| 62 | blocks containing the divisions and the common dominators to them, |
| 63 | and walk it twice. The first walk is in post-order, and it annotates |
| 64 | each block with the number of divisions that post-dominate it: this |
| 65 | gives information on where divisions can be inserted profitably. |
| 66 | The second walk is in pre-order, and it inserts divisions as explained |
| 67 | above, and replaces divisions by multiplications. |
| 68 | |
| 69 | In the best case, the cost of the pass is O(n_statements). In the |
| 70 | worst-case, the cost is due to creating the dominator tree subset, |
| 71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen |
| 72 | for n_statements / n_basic_blocks statements. So, the amortized cost |
| 73 | of creating the dominator tree subset is O(n_basic_blocks) and the |
| 74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). |
| 75 | |
| 76 | More practically, the cost will be small because there are few |
| 77 | divisions, and they tend to be in the same basic block, so insert_bb |
| 78 | is called very few times. |
| 79 | |
| 80 | If we did this using domwalk.cc, an efficient implementation would have |
| 81 | to work on all the variables in a single pass, because we could not |
| 82 | work on just a subset of the dominator tree, as we do now, and the |
| 83 | cost would also be something like O(n_statements * n_basic_blocks). |
| 84 | The data structures would be more complex in order to work on all the |
| 85 | variables in a single pass. */ |
| 86 | |
| 87 | #include "config.h" |
| 88 | #include "system.h" |
| 89 | #include "coretypes.h" |
| 90 | #include "backend.h" |
| 91 | #include "target.h" |
| 92 | #include "rtl.h" |
| 93 | #include "tree.h" |
| 94 | #include "gimple.h" |
| 95 | #include "predict.h" |
| 96 | #include "alloc-pool.h" |
| 97 | #include "tree-pass.h" |
| 98 | #include "ssa.h" |
| 99 | #include "optabs-tree.h" |
| 100 | #include "gimple-pretty-print.h" |
| 101 | #include "alias.h" |
| 102 | #include "fold-const.h" |
| 103 | #include "gimple-iterator.h" |
| 104 | #include "gimple-fold.h" |
| 105 | #include "gimplify.h" |
| 106 | #include "gimplify-me.h" |
| 107 | #include "stor-layout.h" |
| 108 | #include "tree-cfg.h" |
| 109 | #include "tree-dfa.h" |
| 110 | #include "tree-ssa.h" |
| 111 | #include "builtins.h" |
| 112 | #include "internal-fn.h" |
| 113 | #include "case-cfn-macros.h" |
| 114 | #include "optabs-libfuncs.h" |
| 115 | #include "tree-eh.h" |
| 116 | #include "targhooks.h" |
| 117 | #include "domwalk.h" |
| 118 | #include "tree-ssa-math-opts.h" |
| 119 | #include "dbgcnt.h" |
| 120 | #include "cfghooks.h" |
| 121 | |
| 122 | /* This structure represents one basic block that either computes a |
| 123 | division, or is a common dominator for basic block that compute a |
| 124 | division. */ |
| 125 | struct occurrence { |
| 126 | /* The basic block represented by this structure. */ |
| 127 | basic_block bb = basic_block(); |
| 128 | |
| 129 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal |
| 130 | inserted in BB. */ |
| 131 | tree recip_def = tree(); |
| 132 | |
| 133 | /* If non-NULL, the SSA_NAME holding the definition for a squared |
| 134 | reciprocal inserted in BB. */ |
| 135 | tree square_recip_def = tree(); |
| 136 | |
| 137 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
| 138 | was inserted in BB. */ |
| 139 | gimple *recip_def_stmt = nullptr; |
| 140 | |
| 141 | /* Pointer to a list of "struct occurrence"s for blocks dominated |
| 142 | by BB. */ |
| 143 | struct occurrence *children = nullptr; |
| 144 | |
| 145 | /* Pointer to the next "struct occurrence"s in the list of blocks |
| 146 | sharing a common dominator. */ |
| 147 | struct occurrence *next = nullptr; |
| 148 | |
| 149 | /* The number of divisions that are in BB before compute_merit. The |
| 150 | number of divisions that are in BB or post-dominate it after |
| 151 | compute_merit. */ |
| 152 | int num_divisions = 0; |
| 153 | |
| 154 | /* True if the basic block has a division, false if it is a common |
| 155 | dominator for basic blocks that do. If it is false and trapping |
| 156 | math is active, BB is not a candidate for inserting a reciprocal. */ |
| 157 | bool bb_has_division = false; |
| 158 | |
| 159 | /* Construct a struct occurrence for basic block BB, and whose |
| 160 | children list is headed by CHILDREN. */ |
| 161 | occurrence (basic_block bb, struct occurrence *children) |
| 162 | : bb (bb), children (children) |
| 163 | { |
| 164 | bb->aux = this; |
| 165 | } |
| 166 | |
| 167 | /* Destroy a struct occurrence and remove it from its basic block. */ |
| 168 | ~occurrence () |
| 169 | { |
| 170 | bb->aux = nullptr; |
| 171 | } |
| 172 | |
| 173 | /* Allocate memory for a struct occurrence from OCC_POOL. */ |
| 174 | static void* operator new (size_t); |
| 175 | |
| 176 | /* Return memory for a struct occurrence to OCC_POOL. */ |
| 177 | static void operator delete (void*, size_t); |
| 178 | }; |
| 179 | |
| 180 | static struct |
| 181 | { |
| 182 | /* Number of 1.0/X ops inserted. */ |
| 183 | int rdivs_inserted; |
| 184 | |
| 185 | /* Number of 1.0/FUNC ops inserted. */ |
| 186 | int rfuncs_inserted; |
| 187 | } reciprocal_stats; |
| 188 | |
| 189 | static struct |
| 190 | { |
| 191 | /* Number of cexpi calls inserted. */ |
| 192 | int inserted; |
| 193 | |
| 194 | /* Number of conversions removed. */ |
| 195 | int conv_removed; |
| 196 | |
| 197 | } sincos_stats; |
| 198 | |
| 199 | static struct |
| 200 | { |
| 201 | /* Number of widening multiplication ops inserted. */ |
| 202 | int widen_mults_inserted; |
| 203 | |
| 204 | /* Number of integer multiply-and-accumulate ops inserted. */ |
| 205 | int maccs_inserted; |
| 206 | |
| 207 | /* Number of fp fused multiply-add ops inserted. */ |
| 208 | int fmas_inserted; |
| 209 | |
| 210 | /* Number of divmod calls inserted. */ |
| 211 | int divmod_calls_inserted; |
| 212 | |
| 213 | /* Number of highpart multiplication ops inserted. */ |
| 214 | int highpart_mults_inserted; |
| 215 | } widen_mul_stats; |
| 216 | |
| 217 | /* The instance of "struct occurrence" representing the highest |
| 218 | interesting block in the dominator tree. */ |
| 219 | static struct occurrence *occ_head; |
| 220 | |
| 221 | /* Allocation pool for getting instances of "struct occurrence". */ |
| 222 | static object_allocator<occurrence> *occ_pool; |
| 223 | |
| 224 | void* occurrence::operator new (size_t n) |
| 225 | { |
| 226 | gcc_assert (n == sizeof(occurrence)); |
| 227 | return occ_pool->allocate_raw (); |
| 228 | } |
| 229 | |
| 230 | void occurrence::operator delete (void *occ, size_t n) |
| 231 | { |
| 232 | gcc_assert (n == sizeof(occurrence)); |
| 233 | occ_pool->remove_raw (object: occ); |
| 234 | } |
| 235 | |
| 236 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a |
| 237 | list of "struct occurrence"s, one per basic block, having IDOM as |
| 238 | their common dominator. |
| 239 | |
| 240 | We try to insert NEW_OCC as deep as possible in the tree, and we also |
| 241 | insert any other block that is a common dominator for BB and one |
| 242 | block already in the tree. */ |
| 243 | |
| 244 | static void |
| 245 | insert_bb (struct occurrence *new_occ, basic_block idom, |
| 246 | struct occurrence **p_head) |
| 247 | { |
| 248 | struct occurrence *occ, **p_occ; |
| 249 | |
| 250 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
| 251 | { |
| 252 | basic_block bb = new_occ->bb, occ_bb = occ->bb; |
| 253 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); |
| 254 | if (dom == bb) |
| 255 | { |
| 256 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC |
| 257 | from its list. */ |
| 258 | *p_occ = occ->next; |
| 259 | occ->next = new_occ->children; |
| 260 | new_occ->children = occ; |
| 261 | |
| 262 | /* Try the next block (it may as well be dominated by BB). */ |
| 263 | } |
| 264 | |
| 265 | else if (dom == occ_bb) |
| 266 | { |
| 267 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ |
| 268 | insert_bb (new_occ, idom: dom, p_head: &occ->children); |
| 269 | return; |
| 270 | } |
| 271 | |
| 272 | else if (dom != idom) |
| 273 | { |
| 274 | gcc_assert (!dom->aux); |
| 275 | |
| 276 | /* There is a dominator between IDOM and BB, add it and make |
| 277 | two children out of NEW_OCC and OCC. First, remove OCC from |
| 278 | its list. */ |
| 279 | *p_occ = occ->next; |
| 280 | new_occ->next = occ; |
| 281 | occ->next = NULL; |
| 282 | |
| 283 | /* None of the previous blocks has DOM as a dominator: if we tail |
| 284 | recursed, we would reexamine them uselessly. Just switch BB with |
| 285 | DOM, and go on looking for blocks dominated by DOM. */ |
| 286 | new_occ = new occurrence (dom, new_occ); |
| 287 | } |
| 288 | |
| 289 | else |
| 290 | { |
| 291 | /* Nothing special, go on with the next element. */ |
| 292 | p_occ = &occ->next; |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ |
| 297 | new_occ->next = *p_head; |
| 298 | *p_head = new_occ; |
| 299 | } |
| 300 | |
| 301 | /* Register that we found a division in BB. |
| 302 | IMPORTANCE is a measure of how much weighting to give |
| 303 | that division. Use IMPORTANCE = 2 to register a single |
| 304 | division. If the division is going to be found multiple |
| 305 | times use 1 (as it is with squares). */ |
| 306 | |
| 307 | static inline void |
| 308 | register_division_in (basic_block bb, int importance) |
| 309 | { |
| 310 | struct occurrence *occ; |
| 311 | |
| 312 | occ = (struct occurrence *) bb->aux; |
| 313 | if (!occ) |
| 314 | { |
| 315 | occ = new occurrence (bb, NULL); |
| 316 | insert_bb (new_occ: occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), p_head: &occ_head); |
| 317 | } |
| 318 | |
| 319 | occ->bb_has_division = true; |
| 320 | occ->num_divisions += importance; |
| 321 | } |
| 322 | |
| 323 | |
| 324 | /* Compute the number of divisions that postdominate each block in OCC and |
| 325 | its children. */ |
| 326 | |
| 327 | static void |
| 328 | compute_merit (struct occurrence *occ) |
| 329 | { |
| 330 | struct occurrence *occ_child; |
| 331 | basic_block dom = occ->bb; |
| 332 | |
| 333 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
| 334 | { |
| 335 | basic_block bb; |
| 336 | if (occ_child->children) |
| 337 | compute_merit (occ: occ_child); |
| 338 | |
| 339 | if (flag_exceptions) |
| 340 | bb = single_noncomplex_succ (bb: dom); |
| 341 | else |
| 342 | bb = dom; |
| 343 | |
| 344 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) |
| 345 | occ->num_divisions += occ_child->num_divisions; |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | |
| 350 | /* Return whether USE_STMT is a floating-point division by DEF. */ |
| 351 | static inline bool |
| 352 | is_division_by (gimple *use_stmt, tree def) |
| 353 | { |
| 354 | return is_gimple_assign (gs: use_stmt) |
| 355 | && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR |
| 356 | && gimple_assign_rhs2 (gs: use_stmt) == def |
| 357 | /* Do not recognize x / x as valid division, as we are getting |
| 358 | confused later by replacing all immediate uses x in such |
| 359 | a stmt. */ |
| 360 | && gimple_assign_rhs1 (gs: use_stmt) != def |
| 361 | && !stmt_can_throw_internal (cfun, use_stmt); |
| 362 | } |
| 363 | |
| 364 | /* Return TRUE if USE_STMT is a multiplication of DEF by A. */ |
| 365 | static inline bool |
| 366 | is_mult_by (gimple *use_stmt, tree def, tree a) |
| 367 | { |
| 368 | if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN |
| 369 | && gimple_assign_rhs_code (gs: use_stmt) == MULT_EXPR) |
| 370 | { |
| 371 | tree op0 = gimple_assign_rhs1 (gs: use_stmt); |
| 372 | tree op1 = gimple_assign_rhs2 (gs: use_stmt); |
| 373 | |
| 374 | return (op0 == def && op1 == a) |
| 375 | || (op0 == a && op1 == def); |
| 376 | } |
| 377 | return 0; |
| 378 | } |
| 379 | |
| 380 | /* Return whether USE_STMT is DEF * DEF. */ |
| 381 | static inline bool |
| 382 | is_square_of (gimple *use_stmt, tree def) |
| 383 | { |
| 384 | return is_mult_by (use_stmt, def, a: def); |
| 385 | } |
| 386 | |
| 387 | /* Return whether USE_STMT is a floating-point division by |
| 388 | DEF * DEF. */ |
| 389 | static inline bool |
| 390 | is_division_by_square (gimple *use_stmt, tree def) |
| 391 | { |
| 392 | if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN |
| 393 | && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR |
| 394 | && gimple_assign_rhs1 (gs: use_stmt) != gimple_assign_rhs2 (gs: use_stmt) |
| 395 | && !stmt_can_throw_internal (cfun, use_stmt)) |
| 396 | { |
| 397 | tree denominator = gimple_assign_rhs2 (gs: use_stmt); |
| 398 | if (TREE_CODE (denominator) == SSA_NAME) |
| 399 | return is_square_of (SSA_NAME_DEF_STMT (denominator), def); |
| 400 | } |
| 401 | return 0; |
| 402 | } |
| 403 | |
| 404 | /* Walk the subset of the dominator tree rooted at OCC, setting the |
| 405 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in |
| 406 | the given basic block. The field may be left NULL, of course, |
| 407 | if it is not possible or profitable to do the optimization. |
| 408 | |
| 409 | DEF_BSI is an iterator pointing at the statement defining DEF. |
| 410 | If RECIP_DEF is set, a dominator already has a computation that can |
| 411 | be used. |
| 412 | |
| 413 | If should_insert_square_recip is set, then this also inserts |
| 414 | the square of the reciprocal immediately after the definition |
| 415 | of the reciprocal. */ |
| 416 | |
| 417 | static void |
| 418 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
| 419 | tree def, tree recip_def, tree square_recip_def, |
| 420 | int should_insert_square_recip, int threshold) |
| 421 | { |
| 422 | tree type; |
| 423 | gassign *new_stmt, *new_square_stmt; |
| 424 | gimple_stmt_iterator gsi; |
| 425 | struct occurrence *occ_child; |
| 426 | |
| 427 | if (!recip_def |
| 428 | && (occ->bb_has_division || !flag_trapping_math) |
| 429 | /* Divide by two as all divisions are counted twice in |
| 430 | the costing loop. */ |
| 431 | && occ->num_divisions / 2 >= threshold) |
| 432 | { |
| 433 | /* Make a variable with the replacement and substitute it. */ |
| 434 | type = TREE_TYPE (def); |
| 435 | recip_def = create_tmp_reg (type, "reciptmp" ); |
| 436 | new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |
| 437 | build_one_cst (type), def); |
| 438 | |
| 439 | if (should_insert_square_recip) |
| 440 | { |
| 441 | square_recip_def = create_tmp_reg (type, "powmult_reciptmp" ); |
| 442 | new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR, |
| 443 | recip_def, recip_def); |
| 444 | } |
| 445 | |
| 446 | if (occ->bb_has_division) |
| 447 | { |
| 448 | /* Case 1: insert before an existing division. */ |
| 449 | gsi = gsi_after_labels (bb: occ->bb); |
| 450 | while (!gsi_end_p (i: gsi) |
| 451 | && (!is_division_by (use_stmt: gsi_stmt (i: gsi), def)) |
| 452 | && (!is_division_by_square (use_stmt: gsi_stmt (i: gsi), def))) |
| 453 | gsi_next (i: &gsi); |
| 454 | |
| 455 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
| 456 | if (should_insert_square_recip) |
| 457 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); |
| 458 | } |
| 459 | else if (def_gsi && occ->bb == gsi_bb (i: *def_gsi)) |
| 460 | { |
| 461 | /* Case 2: insert right after the definition. Note that this will |
| 462 | never happen if the definition statement can throw, because in |
| 463 | that case the sole successor of the statement's basic block will |
| 464 | dominate all the uses as well. */ |
| 465 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
| 466 | if (should_insert_square_recip) |
| 467 | gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT); |
| 468 | } |
| 469 | else |
| 470 | { |
| 471 | /* Case 3: insert in a basic block not containing defs/uses. */ |
| 472 | gsi = gsi_after_labels (bb: occ->bb); |
| 473 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
| 474 | if (should_insert_square_recip) |
| 475 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); |
| 476 | } |
| 477 | |
| 478 | reciprocal_stats.rdivs_inserted++; |
| 479 | |
| 480 | occ->recip_def_stmt = new_stmt; |
| 481 | } |
| 482 | |
| 483 | occ->recip_def = recip_def; |
| 484 | occ->square_recip_def = square_recip_def; |
| 485 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
| 486 | insert_reciprocals (def_gsi, occ: occ_child, def, recip_def, |
| 487 | square_recip_def, should_insert_square_recip, |
| 488 | threshold); |
| 489 | } |
| 490 | |
| 491 | /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). |
| 492 | Take as argument the use for (x * x). */ |
| 493 | static inline void |
| 494 | replace_reciprocal_squares (use_operand_p use_p) |
| 495 | { |
| 496 | gimple *use_stmt = USE_STMT (use_p); |
| 497 | basic_block bb = gimple_bb (g: use_stmt); |
| 498 | struct occurrence *occ = (struct occurrence *) bb->aux; |
| 499 | |
| 500 | if (optimize_bb_for_speed_p (bb) && occ->square_recip_def |
| 501 | && occ->recip_def) |
| 502 | { |
| 503 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
| 504 | gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR); |
| 505 | gimple_assign_set_rhs2 (gs: use_stmt, rhs: occ->square_recip_def); |
| 506 | SET_USE (use_p, occ->square_recip_def); |
| 507 | fold_stmt_inplace (&gsi); |
| 508 | update_stmt (s: use_stmt); |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | |
| 513 | /* Replace the division at USE_P with a multiplication by the reciprocal, if |
| 514 | possible. */ |
| 515 | |
| 516 | static inline void |
| 517 | replace_reciprocal (use_operand_p use_p) |
| 518 | { |
| 519 | gimple *use_stmt = USE_STMT (use_p); |
| 520 | basic_block bb = gimple_bb (g: use_stmt); |
| 521 | struct occurrence *occ = (struct occurrence *) bb->aux; |
| 522 | |
| 523 | if (optimize_bb_for_speed_p (bb) |
| 524 | && occ->recip_def && use_stmt != occ->recip_def_stmt) |
| 525 | { |
| 526 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
| 527 | gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR); |
| 528 | SET_USE (use_p, occ->recip_def); |
| 529 | fold_stmt_inplace (&gsi); |
| 530 | update_stmt (s: use_stmt); |
| 531 | } |
| 532 | } |
| 533 | |
| 534 | |
| 535 | /* Free OCC and return one more "struct occurrence" to be freed. */ |
| 536 | |
| 537 | static struct occurrence * |
| 538 | free_bb (struct occurrence *occ) |
| 539 | { |
| 540 | struct occurrence *child, *next; |
| 541 | |
| 542 | /* First get the two pointers hanging off OCC. */ |
| 543 | next = occ->next; |
| 544 | child = occ->children; |
| 545 | delete occ; |
| 546 | |
| 547 | /* Now ensure that we don't recurse unless it is necessary. */ |
| 548 | if (!child) |
| 549 | return next; |
| 550 | else |
| 551 | { |
| 552 | while (next) |
| 553 | next = free_bb (occ: next); |
| 554 | |
| 555 | return child; |
| 556 | } |
| 557 | } |
| 558 | |
| 559 | /* Transform sequences like |
| 560 | t = sqrt (a) |
| 561 | x = 1.0 / t; |
| 562 | r1 = x * x; |
| 563 | r2 = a * x; |
| 564 | into: |
| 565 | t = sqrt (a) |
| 566 | r1 = 1.0 / a; |
| 567 | r2 = t; |
| 568 | x = r1 * r2; |
| 569 | depending on the uses of x, r1, r2. This removes one multiplication and |
| 570 | allows the sqrt and division operations to execute in parallel. |
| 571 | DEF_GSI is the gsi of the initial division by sqrt that defines |
| 572 | DEF (x in the example above). */ |
| 573 | |
| 574 | static void |
| 575 | optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def) |
| 576 | { |
| 577 | gimple *use_stmt; |
| 578 | imm_use_iterator use_iter; |
| 579 | gimple *stmt = gsi_stmt (i: *def_gsi); |
| 580 | tree x = def; |
| 581 | tree orig_sqrt_ssa_name = gimple_assign_rhs2 (gs: stmt); |
| 582 | tree div_rhs1 = gimple_assign_rhs1 (gs: stmt); |
| 583 | |
| 584 | if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME |
| 585 | || TREE_CODE (div_rhs1) != REAL_CST |
| 586 | || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1)) |
| 587 | return; |
| 588 | |
| 589 | gcall *sqrt_stmt |
| 590 | = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name)); |
| 591 | |
| 592 | if (!sqrt_stmt || !gimple_call_lhs (gs: sqrt_stmt)) |
| 593 | return; |
| 594 | |
| 595 | switch (gimple_call_combined_fn (sqrt_stmt)) |
| 596 | { |
| 597 | CASE_CFN_SQRT: |
| 598 | CASE_CFN_SQRT_FN: |
| 599 | break; |
| 600 | |
| 601 | default: |
| 602 | return; |
| 603 | } |
| 604 | tree a = gimple_call_arg (gs: sqrt_stmt, index: 0); |
| 605 | |
| 606 | /* We have 'a' and 'x'. Now analyze the uses of 'x'. */ |
| 607 | |
| 608 | /* Statements that use x in x * x. */ |
| 609 | auto_vec<gimple *> sqr_stmts; |
| 610 | /* Statements that use x in a * x. */ |
| 611 | auto_vec<gimple *> mult_stmts; |
| 612 | bool has_other_use = false; |
| 613 | bool mult_on_main_path = false; |
| 614 | |
| 615 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x) |
| 616 | { |
| 617 | if (is_gimple_debug (gs: use_stmt)) |
| 618 | continue; |
| 619 | if (is_square_of (use_stmt, def: x)) |
| 620 | { |
| 621 | sqr_stmts.safe_push (obj: use_stmt); |
| 622 | if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt)) |
| 623 | mult_on_main_path = true; |
| 624 | } |
| 625 | else if (is_mult_by (use_stmt, def: x, a)) |
| 626 | { |
| 627 | mult_stmts.safe_push (obj: use_stmt); |
| 628 | if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt)) |
| 629 | mult_on_main_path = true; |
| 630 | } |
| 631 | else |
| 632 | has_other_use = true; |
| 633 | } |
| 634 | |
| 635 | /* In the x * x and a * x cases we just rewire stmt operands or |
| 636 | remove multiplications. In the has_other_use case we introduce |
| 637 | a multiplication so make sure we don't introduce a multiplication |
| 638 | on a path where there was none. */ |
| 639 | if (has_other_use && !mult_on_main_path) |
| 640 | return; |
| 641 | |
| 642 | if (sqr_stmts.is_empty () && mult_stmts.is_empty ()) |
| 643 | return; |
| 644 | |
| 645 | /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want |
| 646 | to be able to compose it from the sqr and mult cases. */ |
| 647 | if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ())) |
| 648 | return; |
| 649 | |
| 650 | if (dump_file) |
| 651 | { |
| 652 | fprintf (stream: dump_file, format: "Optimizing reciprocal sqrt multiplications of\n" ); |
| 653 | print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE); |
| 654 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); |
| 655 | fprintf (stream: dump_file, format: "\n" ); |
| 656 | } |
| 657 | |
| 658 | bool delete_div = !has_other_use; |
| 659 | tree sqr_ssa_name = NULL_TREE; |
| 660 | if (!sqr_stmts.is_empty ()) |
| 661 | { |
| 662 | /* r1 = x * x. Transform the original |
| 663 | x = 1.0 / t |
| 664 | into |
| 665 | tmp1 = 1.0 / a |
| 666 | r1 = tmp1. */ |
| 667 | |
| 668 | sqr_ssa_name |
| 669 | = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "recip_sqrt_sqr" ); |
| 670 | |
| 671 | if (dump_file) |
| 672 | { |
| 673 | fprintf (stream: dump_file, format: "Replacing original division\n" ); |
| 674 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); |
| 675 | fprintf (stream: dump_file, format: "with new division\n" ); |
| 676 | } |
| 677 | stmt |
| 678 | = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (gs: stmt), |
| 679 | gimple_assign_rhs1 (gs: stmt), a); |
| 680 | gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT); |
| 681 | gsi_remove (def_gsi, true); |
| 682 | *def_gsi = gsi_for_stmt (stmt); |
| 683 | fold_stmt_inplace (def_gsi); |
| 684 | update_stmt (s: stmt); |
| 685 | |
| 686 | if (dump_file) |
| 687 | print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); |
| 688 | |
| 689 | delete_div = false; |
| 690 | gimple *sqr_stmt; |
| 691 | unsigned int i; |
| 692 | FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt) |
| 693 | { |
| 694 | gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt); |
| 695 | gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name); |
| 696 | update_stmt (s: sqr_stmt); |
| 697 | } |
| 698 | } |
| 699 | if (!mult_stmts.is_empty ()) |
| 700 | { |
| 701 | /* r2 = a * x. Transform this into: |
| 702 | r2 = t (The original sqrt (a)). */ |
| 703 | unsigned int i; |
| 704 | gimple *mult_stmt = NULL; |
| 705 | FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt) |
| 706 | { |
| 707 | gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt); |
| 708 | |
| 709 | if (dump_file) |
| 710 | { |
| 711 | fprintf (stream: dump_file, format: "Replacing squaring multiplication\n" ); |
| 712 | print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); |
| 713 | fprintf (stream: dump_file, format: "with assignment\n" ); |
| 714 | } |
| 715 | gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name); |
| 716 | fold_stmt_inplace (&gsi2); |
| 717 | update_stmt (s: mult_stmt); |
| 718 | if (dump_file) |
| 719 | print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); |
| 720 | } |
| 721 | } |
| 722 | |
| 723 | if (has_other_use) |
| 724 | { |
| 725 | /* Using the two temporaries tmp1, tmp2 from above |
| 726 | the original x is now: |
| 727 | x = tmp1 * tmp2. */ |
| 728 | gcc_assert (orig_sqrt_ssa_name); |
| 729 | gcc_assert (sqr_ssa_name); |
| 730 | |
| 731 | gimple *new_stmt |
| 732 | = gimple_build_assign (x, MULT_EXPR, |
| 733 | orig_sqrt_ssa_name, sqr_ssa_name); |
| 734 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
| 735 | update_stmt (s: stmt); |
| 736 | } |
| 737 | else if (delete_div) |
| 738 | { |
| 739 | /* Remove the original division. */ |
| 740 | gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); |
| 741 | gsi_remove (&gsi2, true); |
| 742 | release_defs (stmt); |
| 743 | } |
| 744 | else |
| 745 | release_ssa_name (name: x); |
| 746 | } |
| 747 | |
| 748 | /* Look for floating-point divisions among DEF's uses, and try to |
| 749 | replace them by multiplications with the reciprocal. Add |
| 750 | as many statements computing the reciprocal as needed. |
| 751 | |
| 752 | DEF must be a GIMPLE register of a floating-point type. */ |
| 753 | |
| 754 | static void |
| 755 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
| 756 | { |
| 757 | use_operand_p use_p, square_use_p; |
| 758 | imm_use_iterator use_iter, square_use_iter; |
| 759 | tree square_def; |
| 760 | struct occurrence *occ; |
| 761 | int count = 0; |
| 762 | int threshold; |
| 763 | int square_recip_count = 0; |
| 764 | int sqrt_recip_count = 0; |
| 765 | |
| 766 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME); |
| 767 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); |
| 768 | |
| 769 | /* If DEF is a square (x * x), count the number of divisions by x. |
| 770 | If there are more divisions by x than by (DEF * DEF), prefer to optimize |
| 771 | the reciprocal of x instead of DEF. This improves cases like: |
| 772 | def = x * x |
| 773 | t0 = a / def |
| 774 | t1 = b / def |
| 775 | t2 = c / x |
| 776 | Reciprocal optimization of x results in 1 division rather than 2 or 3. */ |
| 777 | gimple *def_stmt = SSA_NAME_DEF_STMT (def); |
| 778 | |
| 779 | if (is_gimple_assign (gs: def_stmt) |
| 780 | && gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR |
| 781 | && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME |
| 782 | && gimple_assign_rhs1 (gs: def_stmt) == gimple_assign_rhs2 (gs: def_stmt)) |
| 783 | { |
| 784 | tree op0 = gimple_assign_rhs1 (gs: def_stmt); |
| 785 | |
| 786 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0) |
| 787 | { |
| 788 | gimple *use_stmt = USE_STMT (use_p); |
| 789 | if (is_division_by (use_stmt, def: op0)) |
| 790 | sqrt_recip_count++; |
| 791 | } |
| 792 | } |
| 793 | |
| 794 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) |
| 795 | { |
| 796 | gimple *use_stmt = USE_STMT (use_p); |
| 797 | if (is_division_by (use_stmt, def)) |
| 798 | { |
| 799 | register_division_in (bb: gimple_bb (g: use_stmt), importance: 2); |
| 800 | count++; |
| 801 | } |
| 802 | |
| 803 | if (is_square_of (use_stmt, def)) |
| 804 | { |
| 805 | square_def = gimple_assign_lhs (gs: use_stmt); |
| 806 | FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) |
| 807 | { |
| 808 | gimple *square_use_stmt = USE_STMT (square_use_p); |
| 809 | if (is_division_by (use_stmt: square_use_stmt, def: square_def)) |
| 810 | { |
| 811 | /* This is executed twice for each division by a square. */ |
| 812 | register_division_in (bb: gimple_bb (g: square_use_stmt), importance: 1); |
| 813 | square_recip_count++; |
| 814 | } |
| 815 | } |
| 816 | } |
| 817 | } |
| 818 | |
| 819 | /* Square reciprocals were counted twice above. */ |
| 820 | square_recip_count /= 2; |
| 821 | |
| 822 | /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */ |
| 823 | if (sqrt_recip_count > square_recip_count) |
| 824 | goto out; |
| 825 | |
| 826 | /* Do the expensive part only if we can hope to optimize something. */ |
| 827 | if (count + square_recip_count >= threshold && count >= 1) |
| 828 | { |
| 829 | gimple *use_stmt; |
| 830 | for (occ = occ_head; occ; occ = occ->next) |
| 831 | { |
| 832 | compute_merit (occ); |
| 833 | insert_reciprocals (def_gsi, occ, def, NULL, NULL, |
| 834 | should_insert_square_recip: square_recip_count, threshold); |
| 835 | } |
| 836 | |
| 837 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
| 838 | { |
| 839 | if (is_division_by (use_stmt, def)) |
| 840 | { |
| 841 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |
| 842 | replace_reciprocal (use_p); |
| 843 | } |
| 844 | else if (square_recip_count > 0 && is_square_of (use_stmt, def)) |
| 845 | { |
| 846 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |
| 847 | { |
| 848 | /* Find all uses of the square that are divisions and |
| 849 | * replace them by multiplications with the inverse. */ |
| 850 | imm_use_iterator square_iterator; |
| 851 | gimple *powmult_use_stmt = USE_STMT (use_p); |
| 852 | tree powmult_def_name = gimple_assign_lhs (gs: powmult_use_stmt); |
| 853 | |
| 854 | FOR_EACH_IMM_USE_STMT (powmult_use_stmt, |
| 855 | square_iterator, powmult_def_name) |
| 856 | FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator) |
| 857 | { |
| 858 | gimple *powmult_use_stmt = USE_STMT (square_use_p); |
| 859 | if (is_division_by (use_stmt: powmult_use_stmt, def: powmult_def_name)) |
| 860 | replace_reciprocal_squares (use_p: square_use_p); |
| 861 | } |
| 862 | } |
| 863 | } |
| 864 | } |
| 865 | } |
| 866 | |
| 867 | out: |
| 868 | for (occ = occ_head; occ; ) |
| 869 | occ = free_bb (occ); |
| 870 | |
| 871 | occ_head = NULL; |
| 872 | } |
| 873 | |
| 874 | /* Return an internal function that implements the reciprocal of CALL, |
| 875 | or IFN_LAST if there is no such function that the target supports. */ |
| 876 | |
| 877 | internal_fn |
| 878 | internal_fn_reciprocal (gcall *call) |
| 879 | { |
| 880 | internal_fn ifn; |
| 881 | |
| 882 | switch (gimple_call_combined_fn (call)) |
| 883 | { |
| 884 | CASE_CFN_SQRT: |
| 885 | CASE_CFN_SQRT_FN: |
| 886 | ifn = IFN_RSQRT; |
| 887 | break; |
| 888 | |
| 889 | default: |
| 890 | return IFN_LAST; |
| 891 | } |
| 892 | |
| 893 | tree_pair types = direct_internal_fn_types (ifn, call); |
| 894 | if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED)) |
| 895 | return IFN_LAST; |
| 896 | |
| 897 | return ifn; |
| 898 | } |
| 899 | |
| 900 | /* Go through all the floating-point SSA_NAMEs, and call |
| 901 | execute_cse_reciprocals_1 on each of them. */ |
| 902 | namespace { |
| 903 | |
| 904 | const pass_data pass_data_cse_reciprocals = |
| 905 | { |
| 906 | .type: GIMPLE_PASS, /* type */ |
| 907 | .name: "recip" , /* name */ |
| 908 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
| 909 | .tv_id: TV_TREE_RECIP, /* tv_id */ |
| 910 | PROP_ssa, /* properties_required */ |
| 911 | .properties_provided: 0, /* properties_provided */ |
| 912 | .properties_destroyed: 0, /* properties_destroyed */ |
| 913 | .todo_flags_start: 0, /* todo_flags_start */ |
| 914 | TODO_update_ssa, /* todo_flags_finish */ |
| 915 | }; |
| 916 | |
| 917 | class pass_cse_reciprocals : public gimple_opt_pass |
| 918 | { |
| 919 | public: |
| 920 | pass_cse_reciprocals (gcc::context *ctxt) |
| 921 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) |
| 922 | {} |
| 923 | |
| 924 | /* opt_pass methods: */ |
| 925 | bool gate (function *) final override |
| 926 | { |
| 927 | return optimize && flag_reciprocal_math; |
| 928 | } |
| 929 | unsigned int execute (function *) final override; |
| 930 | |
| 931 | }; // class pass_cse_reciprocals |
| 932 | |
| 933 | unsigned int |
| 934 | pass_cse_reciprocals::execute (function *fun) |
| 935 | { |
| 936 | basic_block bb; |
| 937 | tree arg; |
| 938 | |
| 939 | occ_pool = new object_allocator<occurrence> ("dominators for recip" ); |
| 940 | |
| 941 | memset (s: &reciprocal_stats, c: 0, n: sizeof (reciprocal_stats)); |
| 942 | calculate_dominance_info (CDI_DOMINATORS); |
| 943 | calculate_dominance_info (CDI_POST_DOMINATORS); |
| 944 | |
| 945 | if (flag_checking) |
| 946 | FOR_EACH_BB_FN (bb, fun) |
| 947 | gcc_assert (!bb->aux); |
| 948 | |
| 949 | for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) |
| 950 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |
| 951 | && is_gimple_reg (arg)) |
| 952 | { |
| 953 | tree name = ssa_default_def (fun, arg); |
| 954 | if (name) |
| 955 | execute_cse_reciprocals_1 (NULL, def: name); |
| 956 | } |
| 957 | |
| 958 | FOR_EACH_BB_FN (bb, fun) |
| 959 | { |
| 960 | tree def; |
| 961 | |
| 962 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); |
| 963 | gsi_next (i: &gsi)) |
| 964 | { |
| 965 | gphi *phi = gsi.phi (); |
| 966 | def = PHI_RESULT (phi); |
| 967 | if (! virtual_operand_p (op: def) |
| 968 | && FLOAT_TYPE_P (TREE_TYPE (def))) |
| 969 | execute_cse_reciprocals_1 (NULL, def); |
| 970 | } |
| 971 | |
| 972 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); |
| 973 | gsi_next (i: &gsi)) |
| 974 | { |
| 975 | gimple *stmt = gsi_stmt (i: gsi); |
| 976 | |
| 977 | if (gimple_has_lhs (stmt) |
| 978 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
| 979 | && FLOAT_TYPE_P (TREE_TYPE (def)) |
| 980 | && TREE_CODE (def) == SSA_NAME) |
| 981 | { |
| 982 | execute_cse_reciprocals_1 (def_gsi: &gsi, def); |
| 983 | stmt = gsi_stmt (i: gsi); |
| 984 | if (flag_unsafe_math_optimizations |
| 985 | && is_gimple_assign (gs: stmt) |
| 986 | && gimple_assign_lhs (gs: stmt) == def |
| 987 | && !stmt_can_throw_internal (cfun, stmt) |
| 988 | && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR) |
| 989 | optimize_recip_sqrt (def_gsi: &gsi, def); |
| 990 | } |
| 991 | } |
| 992 | |
| 993 | if (optimize_bb_for_size_p (bb)) |
| 994 | continue; |
| 995 | |
| 996 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
| 997 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); |
| 998 | gsi_next (i: &gsi)) |
| 999 | { |
| 1000 | gimple *stmt = gsi_stmt (i: gsi); |
| 1001 | |
| 1002 | if (is_gimple_assign (gs: stmt) |
| 1003 | && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR) |
| 1004 | { |
| 1005 | tree arg1 = gimple_assign_rhs2 (gs: stmt); |
| 1006 | gimple *stmt1; |
| 1007 | |
| 1008 | if (TREE_CODE (arg1) != SSA_NAME) |
| 1009 | continue; |
| 1010 | |
| 1011 | stmt1 = SSA_NAME_DEF_STMT (arg1); |
| 1012 | |
| 1013 | if (is_gimple_call (gs: stmt1) |
| 1014 | && gimple_call_lhs (gs: stmt1)) |
| 1015 | { |
| 1016 | bool fail; |
| 1017 | imm_use_iterator ui; |
| 1018 | use_operand_p use_p; |
| 1019 | tree fndecl = NULL_TREE; |
| 1020 | |
| 1021 | gcall *call = as_a <gcall *> (p: stmt1); |
| 1022 | internal_fn ifn = internal_fn_reciprocal (call); |
| 1023 | if (ifn == IFN_LAST) |
| 1024 | { |
| 1025 | fndecl = gimple_call_fndecl (gs: call); |
| 1026 | if (!fndecl |
| 1027 | || !fndecl_built_in_p (node: fndecl, klass: BUILT_IN_MD)) |
| 1028 | continue; |
| 1029 | fndecl = targetm.builtin_reciprocal (fndecl); |
| 1030 | if (!fndecl) |
| 1031 | continue; |
| 1032 | } |
| 1033 | |
| 1034 | /* Check that all uses of the SSA name are divisions, |
| 1035 | otherwise replacing the defining statement will do |
| 1036 | the wrong thing. */ |
| 1037 | fail = false; |
| 1038 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) |
| 1039 | { |
| 1040 | gimple *stmt2 = USE_STMT (use_p); |
| 1041 | if (is_gimple_debug (gs: stmt2)) |
| 1042 | continue; |
| 1043 | if (!is_gimple_assign (gs: stmt2) |
| 1044 | || gimple_assign_rhs_code (gs: stmt2) != RDIV_EXPR |
| 1045 | || gimple_assign_rhs1 (gs: stmt2) == arg1 |
| 1046 | || gimple_assign_rhs2 (gs: stmt2) != arg1) |
| 1047 | { |
| 1048 | fail = true; |
| 1049 | break; |
| 1050 | } |
| 1051 | } |
| 1052 | if (fail) |
| 1053 | continue; |
| 1054 | |
| 1055 | gimple_replace_ssa_lhs (call, arg1); |
| 1056 | if (gimple_call_internal_p (gs: call) != (ifn != IFN_LAST)) |
| 1057 | { |
| 1058 | auto_vec<tree, 4> args; |
| 1059 | for (unsigned int i = 0; |
| 1060 | i < gimple_call_num_args (gs: call); i++) |
| 1061 | args.safe_push (obj: gimple_call_arg (gs: call, index: i)); |
| 1062 | gcall *stmt2; |
| 1063 | if (ifn == IFN_LAST) |
| 1064 | stmt2 = gimple_build_call_vec (fndecl, args); |
| 1065 | else |
| 1066 | stmt2 = gimple_build_call_internal_vec (ifn, args); |
| 1067 | gimple_call_set_lhs (gs: stmt2, lhs: arg1); |
| 1068 | gimple_move_vops (stmt2, call); |
| 1069 | gimple_call_set_nothrow (s: stmt2, |
| 1070 | nothrow_p: gimple_call_nothrow_p (s: call)); |
| 1071 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); |
| 1072 | gsi_replace (&gsi2, stmt2, true); |
| 1073 | } |
| 1074 | else |
| 1075 | { |
| 1076 | if (ifn == IFN_LAST) |
| 1077 | gimple_call_set_fndecl (gs: call, decl: fndecl); |
| 1078 | else |
| 1079 | gimple_call_set_internal_fn (call_stmt: call, fn: ifn); |
| 1080 | update_stmt (s: call); |
| 1081 | } |
| 1082 | reciprocal_stats.rfuncs_inserted++; |
| 1083 | |
| 1084 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
| 1085 | { |
| 1086 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
| 1087 | gimple_assign_set_rhs_code (s: stmt, code: MULT_EXPR); |
| 1088 | fold_stmt_inplace (&gsi); |
| 1089 | update_stmt (s: stmt); |
| 1090 | } |
| 1091 | } |
| 1092 | } |
| 1093 | } |
| 1094 | } |
| 1095 | |
| 1096 | statistics_counter_event (fun, "reciprocal divs inserted" , |
| 1097 | reciprocal_stats.rdivs_inserted); |
| 1098 | statistics_counter_event (fun, "reciprocal functions inserted" , |
| 1099 | reciprocal_stats.rfuncs_inserted); |
| 1100 | |
| 1101 | free_dominance_info (CDI_DOMINATORS); |
| 1102 | free_dominance_info (CDI_POST_DOMINATORS); |
| 1103 | delete occ_pool; |
| 1104 | return 0; |
| 1105 | } |
| 1106 | |
| 1107 | } // anon namespace |
| 1108 | |
| 1109 | gimple_opt_pass * |
| 1110 | make_pass_cse_reciprocals (gcc::context *ctxt) |
| 1111 | { |
| 1112 | return new pass_cse_reciprocals (ctxt); |
| 1113 | } |
| 1114 | |
| 1115 | /* If NAME is the result of a type conversion, look for other |
| 1116 | equivalent dominating or dominated conversions, and replace all |
| 1117 | uses with the earliest dominating name, removing the redundant |
| 1118 | conversions. Return the prevailing name. */ |
| 1119 | |
| 1120 | static tree |
| 1121 | execute_cse_conv_1 (tree name, bool *cfg_changed) |
| 1122 | { |
| 1123 | if (SSA_NAME_IS_DEFAULT_DEF (name) |
| 1124 | || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) |
| 1125 | return name; |
| 1126 | |
| 1127 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); |
| 1128 | |
| 1129 | if (!gimple_assign_cast_p (s: def_stmt)) |
| 1130 | return name; |
| 1131 | |
| 1132 | tree src = gimple_assign_rhs1 (gs: def_stmt); |
| 1133 | |
| 1134 | if (TREE_CODE (src) != SSA_NAME) |
| 1135 | return name; |
| 1136 | |
| 1137 | imm_use_iterator use_iter; |
| 1138 | gimple *use_stmt; |
| 1139 | |
| 1140 | /* Find the earliest dominating def. */ |
| 1141 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src) |
| 1142 | { |
| 1143 | if (use_stmt == def_stmt |
| 1144 | || !gimple_assign_cast_p (s: use_stmt)) |
| 1145 | continue; |
| 1146 | |
| 1147 | tree lhs = gimple_assign_lhs (gs: use_stmt); |
| 1148 | |
| 1149 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) |
| 1150 | || (gimple_assign_rhs1 (gs: use_stmt) |
| 1151 | != gimple_assign_rhs1 (gs: def_stmt)) |
| 1152 | || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs))) |
| 1153 | continue; |
| 1154 | |
| 1155 | bool use_dominates; |
| 1156 | if (gimple_bb (g: def_stmt) == gimple_bb (g: use_stmt)) |
| 1157 | { |
| 1158 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
| 1159 | while (!gsi_end_p (i: gsi) && gsi_stmt (i: gsi) != def_stmt) |
| 1160 | gsi_next (i: &gsi); |
| 1161 | use_dominates = !gsi_end_p (i: gsi); |
| 1162 | } |
| 1163 | else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt), |
| 1164 | gimple_bb (g: def_stmt))) |
| 1165 | use_dominates = false; |
| 1166 | else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: def_stmt), |
| 1167 | gimple_bb (g: use_stmt))) |
| 1168 | use_dominates = true; |
| 1169 | else |
| 1170 | continue; |
| 1171 | |
| 1172 | if (use_dominates) |
| 1173 | { |
| 1174 | std::swap (a&: name, b&: lhs); |
| 1175 | std::swap (a&: def_stmt, b&: use_stmt); |
| 1176 | } |
| 1177 | } |
| 1178 | |
| 1179 | /* Now go through all uses of SRC again, replacing the equivalent |
| 1180 | dominated conversions. We may replace defs that were not |
| 1181 | dominated by the then-prevailing defs when we first visited |
| 1182 | them. */ |
| 1183 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src) |
| 1184 | { |
| 1185 | if (use_stmt == def_stmt |
| 1186 | || !gimple_assign_cast_p (s: use_stmt)) |
| 1187 | continue; |
| 1188 | |
| 1189 | tree lhs = gimple_assign_lhs (gs: use_stmt); |
| 1190 | |
| 1191 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) |
| 1192 | || (gimple_assign_rhs1 (gs: use_stmt) |
| 1193 | != gimple_assign_rhs1 (gs: def_stmt)) |
| 1194 | || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs))) |
| 1195 | continue; |
| 1196 | |
| 1197 | basic_block use_bb = gimple_bb (g: use_stmt); |
| 1198 | if (gimple_bb (g: def_stmt) == use_bb |
| 1199 | || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (g: def_stmt))) |
| 1200 | { |
| 1201 | sincos_stats.conv_removed++; |
| 1202 | |
| 1203 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
| 1204 | replace_uses_by (lhs, name); |
| 1205 | if (gsi_remove (&gsi, true) |
| 1206 | && gimple_purge_dead_eh_edges (use_bb)) |
| 1207 | *cfg_changed = true; |
| 1208 | release_defs (use_stmt); |
| 1209 | } |
| 1210 | } |
| 1211 | |
| 1212 | return name; |
| 1213 | } |
| 1214 | |
| 1215 | /* Records an occurrence at statement USE_STMT in the vector of trees |
| 1216 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
| 1217 | is not yet initialized. Returns true if the occurrence was pushed on |
| 1218 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
| 1219 | statements in the vector. */ |
| 1220 | |
| 1221 | static bool |
| 1222 | maybe_record_sincos (vec<gimple *> *stmts, |
| 1223 | basic_block *top_bb, gimple *use_stmt) |
| 1224 | { |
| 1225 | basic_block use_bb = gimple_bb (g: use_stmt); |
| 1226 | if (*top_bb |
| 1227 | && (*top_bb == use_bb |
| 1228 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) |
| 1229 | stmts->safe_push (obj: use_stmt); |
| 1230 | else if (!*top_bb |
| 1231 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) |
| 1232 | { |
| 1233 | stmts->safe_push (obj: use_stmt); |
| 1234 | *top_bb = use_bb; |
| 1235 | } |
| 1236 | else |
| 1237 | return false; |
| 1238 | |
| 1239 | return true; |
| 1240 | } |
| 1241 | |
| 1242 | /* Look for sin, cos and cexpi calls with the same argument NAME and |
| 1243 | create a single call to cexpi CSEing the result in this case. |
| 1244 | We first walk over all immediate uses of the argument collecting |
| 1245 | statements that we can CSE in a vector and in a second pass replace |
| 1246 | the statement rhs with a REALPART or IMAGPART expression on the |
| 1247 | result of the cexpi call we insert before the use statement that |
| 1248 | dominates all other candidates. */ |
| 1249 | |
| 1250 | static bool |
| 1251 | execute_cse_sincos_1 (tree name) |
| 1252 | { |
| 1253 | gimple_stmt_iterator gsi; |
| 1254 | imm_use_iterator use_iter; |
| 1255 | tree fndecl, res, type = NULL_TREE; |
| 1256 | gimple *def_stmt, *use_stmt, *stmt; |
| 1257 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
| 1258 | auto_vec<gimple *> stmts; |
| 1259 | basic_block top_bb = NULL; |
| 1260 | int i; |
| 1261 | bool cfg_changed = false; |
| 1262 | |
| 1263 | name = execute_cse_conv_1 (name, cfg_changed: &cfg_changed); |
| 1264 | |
| 1265 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) |
| 1266 | { |
| 1267 | if (gimple_code (g: use_stmt) != GIMPLE_CALL |
| 1268 | || !gimple_call_lhs (gs: use_stmt)) |
| 1269 | continue; |
| 1270 | |
| 1271 | switch (gimple_call_combined_fn (use_stmt)) |
| 1272 | { |
| 1273 | CASE_CFN_COS: |
| 1274 | seen_cos |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0; |
| 1275 | break; |
| 1276 | |
| 1277 | CASE_CFN_SIN: |
| 1278 | seen_sin |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0; |
| 1279 | break; |
| 1280 | |
| 1281 | CASE_CFN_CEXPI: |
| 1282 | seen_cexpi |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0; |
| 1283 | break; |
| 1284 | |
| 1285 | default:; |
| 1286 | continue; |
| 1287 | } |
| 1288 | |
| 1289 | tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt)); |
| 1290 | if (!type) |
| 1291 | { |
| 1292 | type = t; |
| 1293 | t = TREE_TYPE (name); |
| 1294 | } |
| 1295 | /* This checks that NAME has the right type in the first round, |
| 1296 | and, in subsequent rounds, that the built_in type is the same |
| 1297 | type, or a compatible type. */ |
| 1298 | if (type != t && !types_compatible_p (type1: type, type2: t)) |
| 1299 | return false; |
| 1300 | } |
| 1301 | if (seen_cos + seen_sin + seen_cexpi <= 1) |
| 1302 | return false; |
| 1303 | |
| 1304 | /* Simply insert cexpi at the beginning of top_bb but not earlier than |
| 1305 | the name def statement. */ |
| 1306 | fndecl = mathfn_built_in (type, fn: BUILT_IN_CEXPI); |
| 1307 | if (!fndecl) |
| 1308 | return false; |
| 1309 | stmt = gimple_build_call (fndecl, 1, name); |
| 1310 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, name: "sincostmp" ); |
| 1311 | gimple_call_set_lhs (gs: stmt, lhs: res); |
| 1312 | |
| 1313 | def_stmt = SSA_NAME_DEF_STMT (name); |
| 1314 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
| 1315 | && gimple_code (g: def_stmt) != GIMPLE_PHI |
| 1316 | && gimple_bb (g: def_stmt) == top_bb) |
| 1317 | { |
| 1318 | gsi = gsi_for_stmt (def_stmt); |
| 1319 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |
| 1320 | } |
| 1321 | else |
| 1322 | { |
| 1323 | gsi = gsi_after_labels (bb: top_bb); |
| 1324 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
| 1325 | } |
| 1326 | sincos_stats.inserted++; |
| 1327 | |
| 1328 | /* And adjust the recorded old call sites. */ |
| 1329 | for (i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i) |
| 1330 | { |
| 1331 | tree rhs = NULL; |
| 1332 | |
| 1333 | switch (gimple_call_combined_fn (use_stmt)) |
| 1334 | { |
| 1335 | CASE_CFN_COS: |
| 1336 | rhs = fold_build1 (REALPART_EXPR, type, res); |
| 1337 | break; |
| 1338 | |
| 1339 | CASE_CFN_SIN: |
| 1340 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
| 1341 | break; |
| 1342 | |
| 1343 | CASE_CFN_CEXPI: |
| 1344 | rhs = res; |
| 1345 | break; |
| 1346 | |
| 1347 | default:; |
| 1348 | gcc_unreachable (); |
| 1349 | } |
| 1350 | |
| 1351 | /* Replace call with a copy. */ |
| 1352 | stmt = gimple_build_assign (gimple_call_lhs (gs: use_stmt), rhs); |
| 1353 | |
| 1354 | gsi = gsi_for_stmt (use_stmt); |
| 1355 | gsi_replace (&gsi, stmt, true); |
| 1356 | if (gimple_purge_dead_eh_edges (gimple_bb (g: stmt))) |
| 1357 | cfg_changed = true; |
| 1358 | } |
| 1359 | |
| 1360 | return cfg_changed; |
| 1361 | } |
| 1362 | |
| 1363 | /* To evaluate powi(x,n), the floating point value x raised to the |
| 1364 | constant integer exponent n, we use a hybrid algorithm that |
| 1365 | combines the "window method" with look-up tables. For an |
| 1366 | introduction to exponentiation algorithms and "addition chains", |
| 1367 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, |
| 1368 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", |
| 1369 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation |
| 1370 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ |
| 1371 | |
| 1372 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of |
| 1373 | multiplications to inline before calling the system library's pow |
| 1374 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, |
| 1375 | so this default never requires calling pow, powf or powl. */ |
| 1376 | |
| 1377 | #ifndef POWI_MAX_MULTS |
| 1378 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) |
| 1379 | #endif |
| 1380 | |
| 1381 | /* The size of the "optimal power tree" lookup table. All |
| 1382 | exponents less than this value are simply looked up in the |
| 1383 | powi_table below. This threshold is also used to size the |
| 1384 | cache of pseudo registers that hold intermediate results. */ |
| 1385 | #define POWI_TABLE_SIZE 256 |
| 1386 | |
| 1387 | /* The size, in bits of the window, used in the "window method" |
| 1388 | exponentiation algorithm. This is equivalent to a radix of |
| 1389 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ |
| 1390 | #define POWI_WINDOW_SIZE 3 |
| 1391 | |
| 1392 | /* The following table is an efficient representation of an |
| 1393 | "optimal power tree". For each value, i, the corresponding |
| 1394 | value, j, in the table states than an optimal evaluation |
| 1395 | sequence for calculating pow(x,i) can be found by evaluating |
| 1396 | pow(x,j)*pow(x,i-j). An optimal power tree for the first |
| 1397 | 100 integers is given in Knuth's "Seminumerical algorithms". */ |
| 1398 | |
| 1399 | static const unsigned char powi_table[POWI_TABLE_SIZE] = |
| 1400 | { |
| 1401 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ |
| 1402 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ |
| 1403 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ |
| 1404 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ |
| 1405 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ |
| 1406 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ |
| 1407 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ |
| 1408 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ |
| 1409 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ |
| 1410 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ |
| 1411 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ |
| 1412 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ |
| 1413 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ |
| 1414 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ |
| 1415 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ |
| 1416 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ |
| 1417 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ |
| 1418 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ |
| 1419 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ |
| 1420 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ |
| 1421 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ |
| 1422 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ |
| 1423 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ |
| 1424 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ |
| 1425 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ |
| 1426 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ |
| 1427 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ |
| 1428 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ |
| 1429 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ |
| 1430 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ |
| 1431 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ |
| 1432 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ |
| 1433 | }; |
| 1434 | |
| 1435 | |
| 1436 | /* Return the number of multiplications required to calculate |
| 1437 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a |
| 1438 | subroutine of powi_cost. CACHE is an array indicating |
| 1439 | which exponents have already been calculated. */ |
| 1440 | |
| 1441 | static int |
| 1442 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) |
| 1443 | { |
| 1444 | /* If we've already calculated this exponent, then this evaluation |
| 1445 | doesn't require any additional multiplications. */ |
| 1446 | if (cache[n]) |
| 1447 | return 0; |
| 1448 | |
| 1449 | cache[n] = true; |
| 1450 | return powi_lookup_cost (n: n - powi_table[n], cache) |
| 1451 | + powi_lookup_cost (n: powi_table[n], cache) + 1; |
| 1452 | } |
| 1453 | |
| 1454 | /* Return the number of multiplications required to calculate |
| 1455 | powi(x,n) for an arbitrary x, given the exponent N. This |
| 1456 | function needs to be kept in sync with powi_as_mults below. */ |
| 1457 | |
| 1458 | static int |
| 1459 | powi_cost (HOST_WIDE_INT n) |
| 1460 | { |
| 1461 | bool cache[POWI_TABLE_SIZE]; |
| 1462 | unsigned HOST_WIDE_INT digit; |
| 1463 | unsigned HOST_WIDE_INT val; |
| 1464 | int result; |
| 1465 | |
| 1466 | if (n == 0) |
| 1467 | return 0; |
| 1468 | |
| 1469 | /* Ignore the reciprocal when calculating the cost. */ |
| 1470 | val = absu_hwi (x: n); |
| 1471 | |
| 1472 | /* Initialize the exponent cache. */ |
| 1473 | memset (s: cache, c: 0, POWI_TABLE_SIZE * sizeof (bool)); |
| 1474 | cache[1] = true; |
| 1475 | |
| 1476 | result = 0; |
| 1477 | |
| 1478 | while (val >= POWI_TABLE_SIZE) |
| 1479 | { |
| 1480 | if (val & 1) |
| 1481 | { |
| 1482 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); |
| 1483 | result += powi_lookup_cost (n: digit, cache) |
| 1484 | + POWI_WINDOW_SIZE + 1; |
| 1485 | val >>= POWI_WINDOW_SIZE; |
| 1486 | } |
| 1487 | else |
| 1488 | { |
| 1489 | val >>= 1; |
| 1490 | result++; |
| 1491 | } |
| 1492 | } |
| 1493 | |
| 1494 | return result + powi_lookup_cost (n: val, cache); |
| 1495 | } |
| 1496 | |
| 1497 | /* Recursive subroutine of powi_as_mults. This function takes the |
| 1498 | array, CACHE, of already calculated exponents and an exponent N and |
| 1499 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ |
| 1500 | |
| 1501 | static tree |
| 1502 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, |
| 1503 | unsigned HOST_WIDE_INT n, tree *cache) |
| 1504 | { |
| 1505 | tree op0, op1, ssa_target; |
| 1506 | unsigned HOST_WIDE_INT digit; |
| 1507 | gassign *mult_stmt; |
| 1508 | |
| 1509 | if (n < POWI_TABLE_SIZE && cache[n]) |
| 1510 | return cache[n]; |
| 1511 | |
| 1512 | ssa_target = make_temp_ssa_name (type, NULL, name: "powmult" ); |
| 1513 | |
| 1514 | if (n < POWI_TABLE_SIZE) |
| 1515 | { |
| 1516 | cache[n] = ssa_target; |
| 1517 | op0 = powi_as_mults_1 (gsi, loc, type, n: n - powi_table[n], cache); |
| 1518 | op1 = powi_as_mults_1 (gsi, loc, type, n: powi_table[n], cache); |
| 1519 | } |
| 1520 | else if (n & 1) |
| 1521 | { |
| 1522 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); |
| 1523 | op0 = powi_as_mults_1 (gsi, loc, type, n: n - digit, cache); |
| 1524 | op1 = powi_as_mults_1 (gsi, loc, type, n: digit, cache); |
| 1525 | } |
| 1526 | else |
| 1527 | { |
| 1528 | op0 = powi_as_mults_1 (gsi, loc, type, n: n >> 1, cache); |
| 1529 | op1 = op0; |
| 1530 | } |
| 1531 | |
| 1532 | mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); |
| 1533 | gimple_set_location (g: mult_stmt, location: loc); |
| 1534 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |
| 1535 | |
| 1536 | return ssa_target; |
| 1537 | } |
| 1538 | |
| 1539 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. |
| 1540 | This function needs to be kept in sync with powi_cost above. */ |
| 1541 | |
| 1542 | tree |
| 1543 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, |
| 1544 | tree arg0, HOST_WIDE_INT n) |
| 1545 | { |
| 1546 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |
| 1547 | gassign *div_stmt; |
| 1548 | tree target; |
| 1549 | |
| 1550 | if (n == 0) |
| 1551 | return build_one_cst (type); |
| 1552 | |
| 1553 | memset (s: cache, c: 0, n: sizeof (cache)); |
| 1554 | cache[1] = arg0; |
| 1555 | |
| 1556 | result = powi_as_mults_1 (gsi, loc, type, n: absu_hwi (x: n), cache); |
| 1557 | if (n >= 0) |
| 1558 | return result; |
| 1559 | |
| 1560 | /* If the original exponent was negative, reciprocate the result. */ |
| 1561 | target = make_temp_ssa_name (type, NULL, name: "powmult" ); |
| 1562 | div_stmt = gimple_build_assign (target, RDIV_EXPR, |
| 1563 | build_real (type, dconst1), result); |
| 1564 | gimple_set_location (g: div_stmt, location: loc); |
| 1565 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |
| 1566 | |
| 1567 | return target; |
| 1568 | } |
| 1569 | |
| 1570 | /* ARG0 and N are the two arguments to a powi builtin in GSI with |
| 1571 | location info LOC. If the arguments are appropriate, create an |
| 1572 | equivalent sequence of statements prior to GSI using an optimal |
| 1573 | number of multiplications, and return an expession holding the |
| 1574 | result. */ |
| 1575 | |
| 1576 | static tree |
| 1577 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, |
| 1578 | tree arg0, HOST_WIDE_INT n) |
| 1579 | { |
| 1580 | if ((n >= -1 && n <= 2) |
| 1581 | || (optimize_function_for_speed_p (cfun) |
| 1582 | && powi_cost (n) <= POWI_MAX_MULTS)) |
| 1583 | return powi_as_mults (gsi, loc, arg0, n); |
| 1584 | |
| 1585 | return NULL_TREE; |
| 1586 | } |
| 1587 | |
| 1588 | /* Build a gimple call statement that calls FN with argument ARG. |
| 1589 | Set the lhs of the call statement to a fresh SSA name. Insert the |
| 1590 | statement prior to GSI's current position, and return the fresh |
| 1591 | SSA name. */ |
| 1592 | |
| 1593 | static tree |
| 1594 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |
| 1595 | tree fn, tree arg) |
| 1596 | { |
| 1597 | gcall *call_stmt; |
| 1598 | tree ssa_target; |
| 1599 | |
| 1600 | call_stmt = gimple_build_call (fn, 1, arg); |
| 1601 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, name: "powroot" ); |
| 1602 | gimple_set_lhs (call_stmt, ssa_target); |
| 1603 | gimple_set_location (g: call_stmt, location: loc); |
| 1604 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); |
| 1605 | |
| 1606 | return ssa_target; |
| 1607 | } |
| 1608 | |
| 1609 | /* Build a gimple binary operation with the given CODE and arguments |
| 1610 | ARG0, ARG1, assigning the result to a new SSA name for variable |
| 1611 | TARGET. Insert the statement prior to GSI's current position, and |
| 1612 | return the fresh SSA name.*/ |
| 1613 | |
| 1614 | static tree |
| 1615 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, |
| 1616 | const char *name, enum tree_code code, |
| 1617 | tree arg0, tree arg1) |
| 1618 | { |
| 1619 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |
| 1620 | gassign *stmt = gimple_build_assign (result, code, arg0, arg1); |
| 1621 | gimple_set_location (g: stmt, location: loc); |
| 1622 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
| 1623 | return result; |
| 1624 | } |
| 1625 | |
| 1626 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |
| 1627 | prior to GSI's current position, and return the fresh SSA name. */ |
| 1628 | |
| 1629 | static tree |
| 1630 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, |
| 1631 | tree type, tree val) |
| 1632 | { |
| 1633 | tree result = make_ssa_name (var: type); |
| 1634 | gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); |
| 1635 | gimple_set_location (g: stmt, location: loc); |
| 1636 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
| 1637 | return result; |
| 1638 | } |
| 1639 | |
| 1640 | struct pow_synth_sqrt_info |
| 1641 | { |
| 1642 | bool *factors; |
| 1643 | unsigned int deepest; |
| 1644 | unsigned int num_mults; |
| 1645 | }; |
| 1646 | |
| 1647 | /* Return true iff the real value C can be represented as a |
| 1648 | sum of powers of 0.5 up to N. That is: |
| 1649 | C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. |
| 1650 | Record in INFO the various parameters of the synthesis algorithm such |
| 1651 | as the factors a[i], the maximum 0.5 power and the number of |
| 1652 | multiplications that will be required. */ |
| 1653 | |
| 1654 | bool |
| 1655 | representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, |
| 1656 | struct pow_synth_sqrt_info *info) |
| 1657 | { |
| 1658 | REAL_VALUE_TYPE factor = dconsthalf; |
| 1659 | REAL_VALUE_TYPE remainder = c; |
| 1660 | |
| 1661 | info->deepest = 0; |
| 1662 | info->num_mults = 0; |
| 1663 | memset (s: info->factors, c: 0, n: n * sizeof (bool)); |
| 1664 | |
| 1665 | for (unsigned i = 0; i < n; i++) |
| 1666 | { |
| 1667 | REAL_VALUE_TYPE res; |
| 1668 | |
| 1669 | /* If something inexact happened bail out now. */ |
| 1670 | if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor)) |
| 1671 | return false; |
| 1672 | |
| 1673 | /* We have hit zero. The number is representable as a sum |
| 1674 | of powers of 0.5. */ |
| 1675 | if (real_equal (&res, &dconst0)) |
| 1676 | { |
| 1677 | info->factors[i] = true; |
| 1678 | info->deepest = i + 1; |
| 1679 | return true; |
| 1680 | } |
| 1681 | else if (!REAL_VALUE_NEGATIVE (res)) |
| 1682 | { |
| 1683 | remainder = res; |
| 1684 | info->factors[i] = true; |
| 1685 | info->num_mults++; |
| 1686 | } |
| 1687 | else |
| 1688 | info->factors[i] = false; |
| 1689 | |
| 1690 | real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf); |
| 1691 | } |
| 1692 | return false; |
| 1693 | } |
| 1694 | |
| 1695 | /* Return the tree corresponding to FN being applied |
| 1696 | to ARG N times at GSI and LOC. |
| 1697 | Look up previous results from CACHE if need be. |
| 1698 | cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ |
| 1699 | |
| 1700 | static tree |
| 1701 | get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, |
| 1702 | tree fn, location_t loc, tree *cache) |
| 1703 | { |
| 1704 | tree res = cache[n]; |
| 1705 | if (!res) |
| 1706 | { |
| 1707 | tree prev = get_fn_chain (arg, n: n - 1, gsi, fn, loc, cache); |
| 1708 | res = build_and_insert_call (gsi, loc, fn, arg: prev); |
| 1709 | cache[n] = res; |
| 1710 | } |
| 1711 | |
| 1712 | return res; |
| 1713 | } |
| 1714 | |
| 1715 | /* Print to STREAM the repeated application of function FNAME to ARG |
| 1716 | N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: |
| 1717 | "foo (foo (x))". */ |
| 1718 | |
| 1719 | static void |
| 1720 | print_nested_fn (FILE* stream, const char *fname, const char* arg, |
| 1721 | unsigned int n) |
| 1722 | { |
| 1723 | if (n == 0) |
| 1724 | fprintf (stream: stream, format: "%s" , arg); |
| 1725 | else |
| 1726 | { |
| 1727 | fprintf (stream: stream, format: "%s (" , fname); |
| 1728 | print_nested_fn (stream, fname, arg, n: n - 1); |
| 1729 | fprintf (stream: stream, format: ")" ); |
| 1730 | } |
| 1731 | } |
| 1732 | |
| 1733 | /* Print to STREAM the fractional sequence of sqrt chains |
| 1734 | applied to ARG, described by INFO. Used for the dump file. */ |
| 1735 | |
| 1736 | static void |
| 1737 | dump_fractional_sqrt_sequence (FILE *stream, const char *arg, |
| 1738 | struct pow_synth_sqrt_info *info) |
| 1739 | { |
| 1740 | for (unsigned int i = 0; i < info->deepest; i++) |
| 1741 | { |
| 1742 | bool is_set = info->factors[i]; |
| 1743 | if (is_set) |
| 1744 | { |
| 1745 | print_nested_fn (stream, fname: "sqrt" , arg, n: i + 1); |
| 1746 | if (i != info->deepest - 1) |
| 1747 | fprintf (stream: stream, format: " * " ); |
| 1748 | } |
| 1749 | } |
| 1750 | } |
| 1751 | |
| 1752 | /* Print to STREAM a representation of raising ARG to an integer |
| 1753 | power N. Used for the dump file. */ |
| 1754 | |
| 1755 | static void |
| 1756 | dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) |
| 1757 | { |
| 1758 | if (n > 1) |
| 1759 | fprintf (stream: stream, format: "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")" , arg, n); |
| 1760 | else if (n == 1) |
| 1761 | fprintf (stream: stream, format: "%s" , arg); |
| 1762 | } |
| 1763 | |
| 1764 | /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of |
| 1765 | square roots. Place at GSI and LOC. Limit the maximum depth |
| 1766 | of the sqrt chains to MAX_DEPTH. Return the tree holding the |
| 1767 | result of the expanded sequence or NULL_TREE if the expansion failed. |
| 1768 | |
| 1769 | This routine assumes that ARG1 is a real number with a fractional part |
| 1770 | (the integer exponent case will have been handled earlier in |
| 1771 | gimple_expand_builtin_pow). |
| 1772 | |
| 1773 | For ARG1 > 0.0: |
| 1774 | * For ARG1 composed of a whole part WHOLE_PART and a fractional part |
| 1775 | FRAC_PART i.e. WHOLE_PART == floor (ARG1) and |
| 1776 | FRAC_PART == ARG1 - WHOLE_PART: |
| 1777 | Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where |
| 1778 | POW (ARG0, FRAC_PART) is expanded as a product of square root chains |
| 1779 | if it can be expressed as such, that is if FRAC_PART satisfies: |
| 1780 | FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) |
| 1781 | where integer a[i] is either 0 or 1. |
| 1782 | |
| 1783 | Example: |
| 1784 | POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) |
| 1785 | --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) |
| 1786 | |
| 1787 | For ARG1 < 0.0 there are two approaches: |
| 1788 | * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) |
| 1789 | is calculated as above. |
| 1790 | |
| 1791 | Example: |
| 1792 | POW (x, -5.625) == 1.0 / POW (x, 5.625) |
| 1793 | --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) |
| 1794 | |
| 1795 | * (B) : WHOLE_PART := - ceil (abs (ARG1)) |
| 1796 | FRAC_PART := ARG1 - WHOLE_PART |
| 1797 | and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). |
| 1798 | Example: |
| 1799 | POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) |
| 1800 | --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) |
| 1801 | |
| 1802 | For ARG1 < 0.0 we choose between (A) and (B) depending on |
| 1803 | how many multiplications we'd have to do. |
| 1804 | So, for the example in (B): POW (x, -5.875), if we were to |
| 1805 | follow algorithm (A) we would produce: |
| 1806 | 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) |
| 1807 | which contains more multiplications than approach (B). |
| 1808 | |
| 1809 | Hopefully, this approach will eliminate potentially expensive POW library |
| 1810 | calls when unsafe floating point math is enabled and allow the compiler to |
| 1811 | further optimise the multiplies, square roots and divides produced by this |
| 1812 | function. */ |
| 1813 | |
| 1814 | static tree |
| 1815 | expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, |
| 1816 | tree arg0, tree arg1, HOST_WIDE_INT max_depth) |
| 1817 | { |
| 1818 | tree type = TREE_TYPE (arg0); |
| 1819 | machine_mode mode = TYPE_MODE (type); |
| 1820 | tree sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT); |
| 1821 | bool one_over = true; |
| 1822 | |
| 1823 | if (!sqrtfn) |
| 1824 | return NULL_TREE; |
| 1825 | |
| 1826 | if (TREE_CODE (arg1) != REAL_CST) |
| 1827 | return NULL_TREE; |
| 1828 | |
| 1829 | REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); |
| 1830 | |
| 1831 | gcc_assert (max_depth > 0); |
| 1832 | tree *cache = XALLOCAVEC (tree, max_depth + 1); |
| 1833 | |
| 1834 | struct pow_synth_sqrt_info synth_info; |
| 1835 | synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |
| 1836 | synth_info.deepest = 0; |
| 1837 | synth_info.num_mults = 0; |
| 1838 | |
| 1839 | bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); |
| 1840 | REAL_VALUE_TYPE exp = real_value_abs (&exp_init); |
| 1841 | |
| 1842 | /* The whole and fractional parts of exp. */ |
| 1843 | REAL_VALUE_TYPE whole_part; |
| 1844 | REAL_VALUE_TYPE frac_part; |
| 1845 | |
| 1846 | real_floor (&whole_part, mode, &exp); |
| 1847 | real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part); |
| 1848 | |
| 1849 | |
| 1850 | REAL_VALUE_TYPE ceil_whole = dconst0; |
| 1851 | REAL_VALUE_TYPE ceil_fract = dconst0; |
| 1852 | |
| 1853 | if (neg_exp) |
| 1854 | { |
| 1855 | real_ceil (&ceil_whole, mode, &exp); |
| 1856 | real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp); |
| 1857 | } |
| 1858 | |
| 1859 | if (!representable_as_half_series_p (c: frac_part, n: max_depth, info: &synth_info)) |
| 1860 | return NULL_TREE; |
| 1861 | |
| 1862 | /* Check whether it's more profitable to not use 1.0 / ... */ |
| 1863 | if (neg_exp) |
| 1864 | { |
| 1865 | struct pow_synth_sqrt_info alt_synth_info; |
| 1866 | alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |
| 1867 | alt_synth_info.deepest = 0; |
| 1868 | alt_synth_info.num_mults = 0; |
| 1869 | |
| 1870 | if (representable_as_half_series_p (c: ceil_fract, n: max_depth, |
| 1871 | info: &alt_synth_info) |
| 1872 | && alt_synth_info.deepest <= synth_info.deepest |
| 1873 | && alt_synth_info.num_mults < synth_info.num_mults) |
| 1874 | { |
| 1875 | whole_part = ceil_whole; |
| 1876 | frac_part = ceil_fract; |
| 1877 | synth_info.deepest = alt_synth_info.deepest; |
| 1878 | synth_info.num_mults = alt_synth_info.num_mults; |
| 1879 | memcpy (dest: synth_info.factors, src: alt_synth_info.factors, |
| 1880 | n: (max_depth + 1) * sizeof (bool)); |
| 1881 | one_over = false; |
| 1882 | } |
| 1883 | } |
| 1884 | |
| 1885 | HOST_WIDE_INT n = real_to_integer (&whole_part); |
| 1886 | REAL_VALUE_TYPE cint; |
| 1887 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
| 1888 | |
| 1889 | if (!real_identical (&whole_part, &cint)) |
| 1890 | return NULL_TREE; |
| 1891 | |
| 1892 | if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) |
| 1893 | return NULL_TREE; |
| 1894 | |
| 1895 | memset (s: cache, c: 0, n: (max_depth + 1) * sizeof (tree)); |
| 1896 | |
| 1897 | tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; |
| 1898 | |
| 1899 | /* Calculate the integer part of the exponent. */ |
| 1900 | if (n > 1) |
| 1901 | { |
| 1902 | integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); |
| 1903 | if (!integer_res) |
| 1904 | return NULL_TREE; |
| 1905 | } |
| 1906 | |
| 1907 | if (dump_file) |
| 1908 | { |
| 1909 | char string[64]; |
| 1910 | |
| 1911 | real_to_decimal (string, &exp_init, sizeof (string), 0, 1); |
| 1912 | fprintf (stream: dump_file, format: "synthesizing pow (x, %s) as:\n" , string); |
| 1913 | |
| 1914 | if (neg_exp) |
| 1915 | { |
| 1916 | if (one_over) |
| 1917 | { |
| 1918 | fprintf (stream: dump_file, format: "1.0 / (" ); |
| 1919 | dump_integer_part (stream: dump_file, arg: "x" , n); |
| 1920 | if (n > 0) |
| 1921 | fprintf (stream: dump_file, format: " * " ); |
| 1922 | dump_fractional_sqrt_sequence (stream: dump_file, arg: "x" , info: &synth_info); |
| 1923 | fprintf (stream: dump_file, format: ")" ); |
| 1924 | } |
| 1925 | else |
| 1926 | { |
| 1927 | dump_fractional_sqrt_sequence (stream: dump_file, arg: "x" , info: &synth_info); |
| 1928 | fprintf (stream: dump_file, format: " / (" ); |
| 1929 | dump_integer_part (stream: dump_file, arg: "x" , n); |
| 1930 | fprintf (stream: dump_file, format: ")" ); |
| 1931 | } |
| 1932 | } |
| 1933 | else |
| 1934 | { |
| 1935 | dump_fractional_sqrt_sequence (stream: dump_file, arg: "x" , info: &synth_info); |
| 1936 | if (n > 0) |
| 1937 | fprintf (stream: dump_file, format: " * " ); |
| 1938 | dump_integer_part (stream: dump_file, arg: "x" , n); |
| 1939 | } |
| 1940 | |
| 1941 | fprintf (stream: dump_file, format: "\ndeepest sqrt chain: %d\n" , synth_info.deepest); |
| 1942 | } |
| 1943 | |
| 1944 | |
| 1945 | tree fract_res = NULL_TREE; |
| 1946 | cache[0] = arg0; |
| 1947 | |
| 1948 | /* Calculate the fractional part of the exponent. */ |
| 1949 | for (unsigned i = 0; i < synth_info.deepest; i++) |
| 1950 | { |
| 1951 | if (synth_info.factors[i]) |
| 1952 | { |
| 1953 | tree sqrt_chain = get_fn_chain (arg: arg0, n: i + 1, gsi, fn: sqrtfn, loc, cache); |
| 1954 | |
| 1955 | if (!fract_res) |
| 1956 | fract_res = sqrt_chain; |
| 1957 | |
| 1958 | else |
| 1959 | fract_res = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
| 1960 | arg0: fract_res, arg1: sqrt_chain); |
| 1961 | } |
| 1962 | } |
| 1963 | |
| 1964 | tree res = NULL_TREE; |
| 1965 | |
| 1966 | if (neg_exp) |
| 1967 | { |
| 1968 | if (one_over) |
| 1969 | { |
| 1970 | if (n > 0) |
| 1971 | res = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
| 1972 | arg0: fract_res, arg1: integer_res); |
| 1973 | else |
| 1974 | res = fract_res; |
| 1975 | |
| 1976 | res = build_and_insert_binop (gsi, loc, name: "powrootrecip" , code: RDIV_EXPR, |
| 1977 | arg0: build_real (type, dconst1), arg1: res); |
| 1978 | } |
| 1979 | else |
| 1980 | { |
| 1981 | res = build_and_insert_binop (gsi, loc, name: "powroot" , code: RDIV_EXPR, |
| 1982 | arg0: fract_res, arg1: integer_res); |
| 1983 | } |
| 1984 | } |
| 1985 | else |
| 1986 | res = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
| 1987 | arg0: fract_res, arg1: integer_res); |
| 1988 | return res; |
| 1989 | } |
| 1990 | |
| 1991 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |
| 1992 | with location info LOC. If possible, create an equivalent and |
| 1993 | less expensive sequence of statements prior to GSI, and return an |
| 1994 | expession holding the result. */ |
| 1995 | |
| 1996 | static tree |
| 1997 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, |
| 1998 | tree arg0, tree arg1) |
| 1999 | { |
| 2000 | REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; |
| 2001 | REAL_VALUE_TYPE c2, dconst3; |
| 2002 | HOST_WIDE_INT n; |
| 2003 | tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; |
| 2004 | machine_mode mode; |
| 2005 | bool speed_p = optimize_bb_for_speed_p (gsi_bb (i: *gsi)); |
| 2006 | bool hw_sqrt_exists, c_is_int, c2_is_int; |
| 2007 | |
| 2008 | dconst1_4 = dconst1; |
| 2009 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); |
| 2010 | |
| 2011 | /* If the exponent isn't a constant, there's nothing of interest |
| 2012 | to be done. */ |
| 2013 | if (TREE_CODE (arg1) != REAL_CST) |
| 2014 | return NULL_TREE; |
| 2015 | |
| 2016 | /* Don't perform the operation if flag_signaling_nans is on |
| 2017 | and the operand is a signaling NaN. */ |
| 2018 | if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1))) |
| 2019 | && ((TREE_CODE (arg0) == REAL_CST |
| 2020 | && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0))) |
| 2021 | || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))) |
| 2022 | return NULL_TREE; |
| 2023 | |
| 2024 | /* If the exponent is equivalent to an integer, expand to an optimal |
| 2025 | multiplication sequence when profitable. */ |
| 2026 | c = TREE_REAL_CST (arg1); |
| 2027 | n = real_to_integer (&c); |
| 2028 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
| 2029 | c_is_int = real_identical (&c, &cint); |
| 2030 | |
| 2031 | if (c_is_int |
| 2032 | && ((n >= -1 && n <= 2) |
| 2033 | || (flag_unsafe_math_optimizations |
| 2034 | && speed_p |
| 2035 | && powi_cost (n) <= POWI_MAX_MULTS))) |
| 2036 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); |
| 2037 | |
| 2038 | /* Attempt various optimizations using sqrt and cbrt. */ |
| 2039 | type = TREE_TYPE (arg0); |
| 2040 | mode = TYPE_MODE (type); |
| 2041 | sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT); |
| 2042 | |
| 2043 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe |
| 2044 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while |
| 2045 | sqrt(-0) = -0. */ |
| 2046 | if (sqrtfn |
| 2047 | && real_equal (&c, &dconsthalf) |
| 2048 | && !HONOR_SIGNED_ZEROS (mode)) |
| 2049 | return build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0); |
| 2050 | |
| 2051 | hw_sqrt_exists = optab_handler (op: sqrt_optab, mode) != CODE_FOR_nothing; |
| 2052 | |
| 2053 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math |
| 2054 | optimizations since 1./3. is not exactly representable. If x |
| 2055 | is negative and finite, the correct value of pow(x,1./3.) is |
| 2056 | a NaN with the "invalid" exception raised, because the value |
| 2057 | of 1./3. actually has an even denominator. The correct value |
| 2058 | of cbrt(x) is a negative real value. */ |
| 2059 | cbrtfn = mathfn_built_in (type, fn: BUILT_IN_CBRT); |
| 2060 | dconst1_3 = real_value_truncate (mode, dconst_third ()); |
| 2061 | |
| 2062 | if (flag_unsafe_math_optimizations |
| 2063 | && cbrtfn |
| 2064 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
| 2065 | && real_equal (&c, &dconst1_3)) |
| 2066 | return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0); |
| 2067 | |
| 2068 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization |
| 2069 | if we don't have a hardware sqrt insn. */ |
| 2070 | dconst1_6 = dconst1_3; |
| 2071 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); |
| 2072 | |
| 2073 | if (flag_unsafe_math_optimizations |
| 2074 | && sqrtfn |
| 2075 | && cbrtfn |
| 2076 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
| 2077 | && speed_p |
| 2078 | && hw_sqrt_exists |
| 2079 | && real_equal (&c, &dconst1_6)) |
| 2080 | { |
| 2081 | /* sqrt(x) */ |
| 2082 | sqrt_arg0 = build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0); |
| 2083 | |
| 2084 | /* cbrt(sqrt(x)) */ |
| 2085 | return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: sqrt_arg0); |
| 2086 | } |
| 2087 | |
| 2088 | |
| 2089 | /* Attempt to expand the POW as a product of square root chains. |
| 2090 | Expand the 0.25 case even when otpimising for size. */ |
| 2091 | if (flag_unsafe_math_optimizations |
| 2092 | && sqrtfn |
| 2093 | && hw_sqrt_exists |
| 2094 | && (speed_p || real_equal (&c, &dconst1_4)) |
| 2095 | && !HONOR_SIGNED_ZEROS (mode)) |
| 2096 | { |
| 2097 | unsigned int max_depth = speed_p |
| 2098 | ? param_max_pow_sqrt_depth |
| 2099 | : 2; |
| 2100 | |
| 2101 | tree expand_with_sqrts |
| 2102 | = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); |
| 2103 | |
| 2104 | if (expand_with_sqrts) |
| 2105 | return expand_with_sqrts; |
| 2106 | } |
| 2107 | |
| 2108 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); |
| 2109 | n = real_to_integer (&c2); |
| 2110 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
| 2111 | c2_is_int = real_identical (&c2, &cint); |
| 2112 | |
| 2113 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into |
| 2114 | |
| 2115 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; |
| 2116 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. |
| 2117 | |
| 2118 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is |
| 2119 | different from pow(x, 1./3.) due to rounding and behavior with |
| 2120 | negative x, we need to constrain this transformation to unsafe |
| 2121 | math and positive x or finite math. */ |
| 2122 | real_from_integer (&dconst3, VOIDmode, 3, SIGNED); |
| 2123 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); |
| 2124 | real_round (&c2, mode, &c2); |
| 2125 | n = real_to_integer (&c2); |
| 2126 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
| 2127 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); |
| 2128 | real_convert (&c2, mode, &c2); |
| 2129 | |
| 2130 | if (flag_unsafe_math_optimizations |
| 2131 | && cbrtfn |
| 2132 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
| 2133 | && real_identical (&c2, &c) |
| 2134 | && !c2_is_int |
| 2135 | && optimize_function_for_speed_p (cfun) |
| 2136 | && powi_cost (n: n / 3) <= POWI_MAX_MULTS) |
| 2137 | { |
| 2138 | tree powi_x_ndiv3 = NULL_TREE; |
| 2139 | |
| 2140 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not |
| 2141 | possible or profitable, give up. Skip the degenerate case when |
| 2142 | abs(n) < 3, where the result is always 1. */ |
| 2143 | if (absu_hwi (x: n) >= 3) |
| 2144 | { |
| 2145 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, |
| 2146 | n: abs_hwi (x: n / 3)); |
| 2147 | if (!powi_x_ndiv3) |
| 2148 | return NULL_TREE; |
| 2149 | } |
| 2150 | |
| 2151 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi |
| 2152 | as that creates an unnecessary variable. Instead, just produce |
| 2153 | either cbrt(x) or cbrt(x) * cbrt(x). */ |
| 2154 | cbrt_x = build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0); |
| 2155 | |
| 2156 | if (absu_hwi (x: n) % 3 == 1) |
| 2157 | powi_cbrt_x = cbrt_x; |
| 2158 | else |
| 2159 | powi_cbrt_x = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
| 2160 | arg0: cbrt_x, arg1: cbrt_x); |
| 2161 | |
| 2162 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ |
| 2163 | if (absu_hwi (x: n) < 3) |
| 2164 | result = powi_cbrt_x; |
| 2165 | else |
| 2166 | result = build_and_insert_binop (gsi, loc, name: "powroot" , code: MULT_EXPR, |
| 2167 | arg0: powi_x_ndiv3, arg1: powi_cbrt_x); |
| 2168 | |
| 2169 | /* If n is negative, reciprocate the result. */ |
| 2170 | if (n < 0) |
| 2171 | result = build_and_insert_binop (gsi, loc, name: "powroot" , code: RDIV_EXPR, |
| 2172 | arg0: build_real (type, dconst1), arg1: result); |
| 2173 | |
| 2174 | return result; |
| 2175 | } |
| 2176 | |
| 2177 | /* No optimizations succeeded. */ |
| 2178 | return NULL_TREE; |
| 2179 | } |
| 2180 | |
| 2181 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |
| 2182 | on the SSA_NAME argument of each of them. */ |
| 2183 | |
| 2184 | namespace { |
| 2185 | |
| 2186 | const pass_data pass_data_cse_sincos = |
| 2187 | { |
| 2188 | .type: GIMPLE_PASS, /* type */ |
| 2189 | .name: "sincos" , /* name */ |
| 2190 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
| 2191 | .tv_id: TV_TREE_SINCOS, /* tv_id */ |
| 2192 | PROP_ssa, /* properties_required */ |
| 2193 | .properties_provided: 0, /* properties_provided */ |
| 2194 | .properties_destroyed: 0, /* properties_destroyed */ |
| 2195 | .todo_flags_start: 0, /* todo_flags_start */ |
| 2196 | TODO_update_ssa, /* todo_flags_finish */ |
| 2197 | }; |
| 2198 | |
| 2199 | class pass_cse_sincos : public gimple_opt_pass |
| 2200 | { |
| 2201 | public: |
| 2202 | pass_cse_sincos (gcc::context *ctxt) |
| 2203 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) |
| 2204 | {} |
| 2205 | |
| 2206 | /* opt_pass methods: */ |
| 2207 | bool gate (function *) final override |
| 2208 | { |
| 2209 | return optimize; |
| 2210 | } |
| 2211 | |
| 2212 | unsigned int execute (function *) final override; |
| 2213 | |
| 2214 | }; // class pass_cse_sincos |
| 2215 | |
| 2216 | unsigned int |
| 2217 | pass_cse_sincos::execute (function *fun) |
| 2218 | { |
| 2219 | basic_block bb; |
| 2220 | bool cfg_changed = false; |
| 2221 | |
| 2222 | calculate_dominance_info (CDI_DOMINATORS); |
| 2223 | memset (s: &sincos_stats, c: 0, n: sizeof (sincos_stats)); |
| 2224 | |
| 2225 | FOR_EACH_BB_FN (bb, fun) |
| 2226 | { |
| 2227 | gimple_stmt_iterator gsi; |
| 2228 | |
| 2229 | for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
| 2230 | { |
| 2231 | gimple *stmt = gsi_stmt (i: gsi); |
| 2232 | |
| 2233 | if (is_gimple_call (gs: stmt) |
| 2234 | && gimple_call_lhs (gs: stmt)) |
| 2235 | { |
| 2236 | tree arg; |
| 2237 | switch (gimple_call_combined_fn (stmt)) |
| 2238 | { |
| 2239 | CASE_CFN_COS: |
| 2240 | CASE_CFN_SIN: |
| 2241 | CASE_CFN_CEXPI: |
| 2242 | arg = gimple_call_arg (gs: stmt, index: 0); |
| 2243 | /* Make sure we have either sincos or cexp. */ |
| 2244 | if (!targetm.libc_has_function (function_c99_math_complex, |
| 2245 | TREE_TYPE (arg)) |
| 2246 | && !targetm.libc_has_function (function_sincos, |
| 2247 | TREE_TYPE (arg))) |
| 2248 | break; |
| 2249 | |
| 2250 | if (TREE_CODE (arg) == SSA_NAME) |
| 2251 | cfg_changed |= execute_cse_sincos_1 (name: arg); |
| 2252 | break; |
| 2253 | default: |
| 2254 | break; |
| 2255 | } |
| 2256 | } |
| 2257 | } |
| 2258 | } |
| 2259 | |
| 2260 | statistics_counter_event (fun, "sincos statements inserted" , |
| 2261 | sincos_stats.inserted); |
| 2262 | statistics_counter_event (fun, "conv statements removed" , |
| 2263 | sincos_stats.conv_removed); |
| 2264 | |
| 2265 | return cfg_changed ? TODO_cleanup_cfg : 0; |
| 2266 | } |
| 2267 | |
| 2268 | } // anon namespace |
| 2269 | |
| 2270 | gimple_opt_pass * |
| 2271 | make_pass_cse_sincos (gcc::context *ctxt) |
| 2272 | { |
| 2273 | return new pass_cse_sincos (ctxt); |
| 2274 | } |
| 2275 | |
| 2276 | /* Expand powi(x,n) into an optimal number of multiplies, when n is a |
| 2277 | constant. */ |
| 2278 | namespace { |
| 2279 | |
| 2280 | const pass_data pass_data_expand_pow = |
| 2281 | { |
| 2282 | .type: GIMPLE_PASS, /* type */ |
| 2283 | .name: "pow" , /* name */ |
| 2284 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
| 2285 | .tv_id: TV_TREE_POW, /* tv_id */ |
| 2286 | PROP_ssa, /* properties_required */ |
| 2287 | PROP_gimple_opt_math, /* properties_provided */ |
| 2288 | .properties_destroyed: 0, /* properties_destroyed */ |
| 2289 | .todo_flags_start: 0, /* todo_flags_start */ |
| 2290 | TODO_update_ssa, /* todo_flags_finish */ |
| 2291 | }; |
| 2292 | |
| 2293 | class pass_expand_pow : public gimple_opt_pass |
| 2294 | { |
| 2295 | public: |
| 2296 | pass_expand_pow (gcc::context *ctxt) |
| 2297 | : gimple_opt_pass (pass_data_expand_pow, ctxt) |
| 2298 | {} |
| 2299 | |
| 2300 | /* opt_pass methods: */ |
| 2301 | bool gate (function *) final override |
| 2302 | { |
| 2303 | return optimize; |
| 2304 | } |
| 2305 | |
| 2306 | unsigned int execute (function *) final override; |
| 2307 | |
| 2308 | }; // class pass_expand_pow |
| 2309 | |
| 2310 | unsigned int |
| 2311 | pass_expand_pow::execute (function *fun) |
| 2312 | { |
| 2313 | basic_block bb; |
| 2314 | bool cfg_changed = false; |
| 2315 | |
| 2316 | calculate_dominance_info (CDI_DOMINATORS); |
| 2317 | |
| 2318 | FOR_EACH_BB_FN (bb, fun) |
| 2319 | { |
| 2320 | gimple_stmt_iterator gsi; |
| 2321 | bool cleanup_eh = false; |
| 2322 | |
| 2323 | for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
| 2324 | { |
| 2325 | gimple *stmt = gsi_stmt (i: gsi); |
| 2326 | |
| 2327 | /* Only the last stmt in a bb could throw, no need to call |
| 2328 | gimple_purge_dead_eh_edges if we change something in the middle |
| 2329 | of a basic block. */ |
| 2330 | cleanup_eh = false; |
| 2331 | |
| 2332 | if (is_gimple_call (gs: stmt) |
| 2333 | && gimple_call_lhs (gs: stmt)) |
| 2334 | { |
| 2335 | tree arg0, arg1, result; |
| 2336 | HOST_WIDE_INT n; |
| 2337 | location_t loc; |
| 2338 | |
| 2339 | switch (gimple_call_combined_fn (stmt)) |
| 2340 | { |
| 2341 | CASE_CFN_POW: |
| 2342 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 2343 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 2344 | |
| 2345 | loc = gimple_location (g: stmt); |
| 2346 | result = gimple_expand_builtin_pow (gsi: &gsi, loc, arg0, arg1); |
| 2347 | |
| 2348 | if (result) |
| 2349 | { |
| 2350 | tree lhs = gimple_get_lhs (stmt); |
| 2351 | gassign *new_stmt = gimple_build_assign (lhs, result); |
| 2352 | gimple_set_location (g: new_stmt, location: loc); |
| 2353 | unlink_stmt_vdef (stmt); |
| 2354 | gsi_replace (&gsi, new_stmt, true); |
| 2355 | cleanup_eh = true; |
| 2356 | if (gimple_vdef (g: stmt)) |
| 2357 | release_ssa_name (name: gimple_vdef (g: stmt)); |
| 2358 | } |
| 2359 | break; |
| 2360 | |
| 2361 | CASE_CFN_POWI: |
| 2362 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
| 2363 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
| 2364 | loc = gimple_location (g: stmt); |
| 2365 | |
| 2366 | if (real_minus_onep (arg0)) |
| 2367 | { |
| 2368 | tree t0, t1, cond, one, minus_one; |
| 2369 | gassign *stmt; |
| 2370 | |
| 2371 | t0 = TREE_TYPE (arg0); |
| 2372 | t1 = TREE_TYPE (arg1); |
| 2373 | one = build_real (t0, dconst1); |
| 2374 | minus_one = build_real (t0, dconstm1); |
| 2375 | |
| 2376 | cond = make_temp_ssa_name (type: t1, NULL, name: "powi_cond" ); |
| 2377 | stmt = gimple_build_assign (cond, BIT_AND_EXPR, |
| 2378 | arg1, build_int_cst (t1, 1)); |
| 2379 | gimple_set_location (g: stmt, location: loc); |
| 2380 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
| 2381 | |
| 2382 | result = make_temp_ssa_name (type: t0, NULL, name: "powi" ); |
| 2383 | stmt = gimple_build_assign (result, COND_EXPR, cond, |
| 2384 | minus_one, one); |
| 2385 | gimple_set_location (g: stmt, location: loc); |
| 2386 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
| 2387 | } |
| 2388 | else |
| 2389 | { |
| 2390 | if (!tree_fits_shwi_p (arg1)) |
| 2391 | break; |
| 2392 | |
| 2393 | n = tree_to_shwi (arg1); |
| 2394 | result = gimple_expand_builtin_powi (gsi: &gsi, loc, arg0, n); |
| 2395 | } |
| 2396 | |
| 2397 | if (result) |
| 2398 | { |
| 2399 | tree lhs = gimple_get_lhs (stmt); |
| 2400 | gassign *new_stmt = gimple_build_assign (lhs, result); |
| 2401 | gimple_set_location (g: new_stmt, location: loc); |
| 2402 | unlink_stmt_vdef (stmt); |
| 2403 | gsi_replace (&gsi, new_stmt, true); |
| 2404 | cleanup_eh = true; |
| 2405 | if (gimple_vdef (g: stmt)) |
| 2406 | release_ssa_name (name: gimple_vdef (g: stmt)); |
| 2407 | } |
| 2408 | break; |
| 2409 | |
| 2410 | default:; |
| 2411 | } |
| 2412 | } |
| 2413 | } |
| 2414 | if (cleanup_eh) |
| 2415 | cfg_changed |= gimple_purge_dead_eh_edges (bb); |
| 2416 | } |
| 2417 | |
| 2418 | return cfg_changed ? TODO_cleanup_cfg : 0; |
| 2419 | } |
| 2420 | |
| 2421 | } // anon namespace |
| 2422 | |
| 2423 | gimple_opt_pass * |
| 2424 | make_pass_expand_pow (gcc::context *ctxt) |
| 2425 | { |
| 2426 | return new pass_expand_pow (ctxt); |
| 2427 | } |
| 2428 | |
| 2429 | /* Return true if stmt is a type conversion operation that can be stripped |
| 2430 | when used in a widening multiply operation. */ |
| 2431 | static bool |
| 2432 | widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |
| 2433 | { |
| 2434 | enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt); |
| 2435 | |
| 2436 | if (TREE_CODE (result_type) == INTEGER_TYPE) |
| 2437 | { |
| 2438 | tree op_type; |
| 2439 | tree inner_op_type; |
| 2440 | |
| 2441 | if (!CONVERT_EXPR_CODE_P (rhs_code)) |
| 2442 | return false; |
| 2443 | |
| 2444 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); |
| 2445 | |
| 2446 | /* If the type of OP has the same precision as the result, then |
| 2447 | we can strip this conversion. The multiply operation will be |
| 2448 | selected to create the correct extension as a by-product. */ |
| 2449 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) |
| 2450 | return true; |
| 2451 | |
| 2452 | /* We can also strip a conversion if it preserves the signed-ness of |
| 2453 | the operation and doesn't narrow the range. */ |
| 2454 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); |
| 2455 | |
| 2456 | /* If the inner-most type is unsigned, then we can strip any |
| 2457 | intermediate widening operation. If it's signed, then the |
| 2458 | intermediate widening operation must also be signed. */ |
| 2459 | if ((TYPE_UNSIGNED (inner_op_type) |
| 2460 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) |
| 2461 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |
| 2462 | return true; |
| 2463 | |
| 2464 | return false; |
| 2465 | } |
| 2466 | |
| 2467 | return rhs_code == FIXED_CONVERT_EXPR; |
| 2468 | } |
| 2469 | |
| 2470 | /* Return true if RHS is a suitable operand for a widening multiplication, |
| 2471 | assuming a target type of TYPE. |
| 2472 | There are two cases: |
| 2473 | |
| 2474 | - RHS makes some value at least twice as wide. Store that value |
| 2475 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. |
| 2476 | |
| 2477 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, |
| 2478 | but leave *TYPE_OUT untouched. */ |
| 2479 | |
| 2480 | static bool |
| 2481 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |
| 2482 | tree *new_rhs_out) |
| 2483 | { |
| 2484 | gimple *stmt; |
| 2485 | tree type1, rhs1; |
| 2486 | |
| 2487 | if (TREE_CODE (rhs) == SSA_NAME) |
| 2488 | { |
| 2489 | /* Use tree_non_zero_bits to see if this operand is zero_extended |
| 2490 | for unsigned widening multiplications or non-negative for |
| 2491 | signed widening multiplications. */ |
| 2492 | if (TREE_CODE (type) == INTEGER_TYPE |
| 2493 | && (TYPE_PRECISION (type) & 1) == 0 |
| 2494 | && int_mode_for_size (TYPE_PRECISION (type) / 2, limit: 1).exists ()) |
| 2495 | { |
| 2496 | unsigned int prec = TYPE_PRECISION (type); |
| 2497 | unsigned int hprec = prec / 2; |
| 2498 | wide_int bits = wide_int::from (x: tree_nonzero_bits (rhs), precision: prec, |
| 2499 | TYPE_SIGN (TREE_TYPE (rhs))); |
| 2500 | if (TYPE_UNSIGNED (type) |
| 2501 | && wi::bit_and (x: bits, y: wi::mask (width: hprec, negate_p: true, precision: prec)) == 0) |
| 2502 | { |
| 2503 | *type_out = build_nonstandard_integer_type (hprec, true); |
| 2504 | /* X & MODE_MASK can be simplified to (T)X. */ |
| 2505 | stmt = SSA_NAME_DEF_STMT (rhs); |
| 2506 | if (is_gimple_assign (gs: stmt) |
| 2507 | && gimple_assign_rhs_code (gs: stmt) == BIT_AND_EXPR |
| 2508 | && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST |
| 2509 | && wide_int::from (x: wi::to_wide (t: gimple_assign_rhs2 (gs: stmt)), |
| 2510 | precision: prec, TYPE_SIGN (TREE_TYPE (rhs))) |
| 2511 | == wi::mask (width: hprec, negate_p: false, precision: prec)) |
| 2512 | *new_rhs_out = gimple_assign_rhs1 (gs: stmt); |
| 2513 | else |
| 2514 | *new_rhs_out = rhs; |
| 2515 | return true; |
| 2516 | } |
| 2517 | else if (!TYPE_UNSIGNED (type) |
| 2518 | && wi::bit_and (x: bits, y: wi::mask (width: hprec - 1, negate_p: true, precision: prec)) == 0) |
| 2519 | { |
| 2520 | *type_out = build_nonstandard_integer_type (hprec, false); |
| 2521 | *new_rhs_out = rhs; |
| 2522 | return true; |
| 2523 | } |
| 2524 | } |
| 2525 | |
| 2526 | stmt = SSA_NAME_DEF_STMT (rhs); |
| 2527 | if (is_gimple_assign (gs: stmt)) |
| 2528 | { |
| 2529 | |
| 2530 | if (widening_mult_conversion_strippable_p (result_type: type, stmt)) |
| 2531 | { |
| 2532 | rhs1 = gimple_assign_rhs1 (gs: stmt); |
| 2533 | |
| 2534 | if (TREE_CODE (rhs1) == INTEGER_CST) |
| 2535 | { |
| 2536 | *new_rhs_out = rhs1; |
| 2537 | *type_out = NULL; |
| 2538 | return true; |
| 2539 | } |
| 2540 | } |
| 2541 | else |
| 2542 | rhs1 = rhs; |
| 2543 | } |
| 2544 | else |
| 2545 | rhs1 = rhs; |
| 2546 | |
| 2547 | type1 = TREE_TYPE (rhs1); |
| 2548 | |
| 2549 | if (TREE_CODE (type1) != TREE_CODE (type) |
| 2550 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |
| 2551 | return false; |
| 2552 | |
| 2553 | *new_rhs_out = rhs1; |
| 2554 | *type_out = type1; |
| 2555 | return true; |
| 2556 | } |
| 2557 | |
| 2558 | if (TREE_CODE (rhs) == INTEGER_CST) |
| 2559 | { |
| 2560 | *new_rhs_out = rhs; |
| 2561 | *type_out = NULL; |
| 2562 | return true; |
| 2563 | } |
| 2564 | |
| 2565 | return false; |
| 2566 | } |
| 2567 | |
| 2568 | /* Return true if STMT performs a widening multiplication, assuming the |
| 2569 | output type is TYPE. If so, store the unwidened types of the operands |
| 2570 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and |
| 2571 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT |
| 2572 | and *TYPE2_OUT would give the operands of the multiplication. */ |
| 2573 | |
| 2574 | static bool |
| 2575 | is_widening_mult_p (gimple *stmt, |
| 2576 | tree *type1_out, tree *rhs1_out, |
| 2577 | tree *type2_out, tree *rhs2_out) |
| 2578 | { |
| 2579 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
| 2580 | |
| 2581 | if (TREE_CODE (type) == INTEGER_TYPE) |
| 2582 | { |
| 2583 | if (TYPE_OVERFLOW_TRAPS (type)) |
| 2584 | return false; |
| 2585 | } |
| 2586 | else if (TREE_CODE (type) != FIXED_POINT_TYPE) |
| 2587 | return false; |
| 2588 | |
| 2589 | if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs1 (gs: stmt), type_out: type1_out, |
| 2590 | new_rhs_out: rhs1_out)) |
| 2591 | return false; |
| 2592 | |
| 2593 | if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs2 (gs: stmt), type_out: type2_out, |
| 2594 | new_rhs_out: rhs2_out)) |
| 2595 | return false; |
| 2596 | |
| 2597 | if (*type1_out == NULL) |
| 2598 | { |
| 2599 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
| 2600 | return false; |
| 2601 | *type1_out = *type2_out; |
| 2602 | } |
| 2603 | |
| 2604 | if (*type2_out == NULL) |
| 2605 | { |
| 2606 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
| 2607 | return false; |
| 2608 | *type2_out = *type1_out; |
| 2609 | } |
| 2610 | |
| 2611 | /* Ensure that the larger of the two operands comes first. */ |
| 2612 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) |
| 2613 | { |
| 2614 | std::swap (a&: *type1_out, b&: *type2_out); |
| 2615 | std::swap (a&: *rhs1_out, b&: *rhs2_out); |
| 2616 | } |
| 2617 | |
| 2618 | return true; |
| 2619 | } |
| 2620 | |
| 2621 | /* Check to see if the CALL statement is an invocation of copysign |
| 2622 | with 1. being the first argument. */ |
| 2623 | static bool |
| 2624 | is_copysign_call_with_1 (gimple *call) |
| 2625 | { |
| 2626 | gcall *c = dyn_cast <gcall *> (p: call); |
| 2627 | if (! c) |
| 2628 | return false; |
| 2629 | |
| 2630 | enum combined_fn code = gimple_call_combined_fn (c); |
| 2631 | |
| 2632 | if (code == CFN_LAST) |
| 2633 | return false; |
| 2634 | |
| 2635 | if (builtin_fn_p (code)) |
| 2636 | { |
| 2637 | switch (as_builtin_fn (code)) |
| 2638 | { |
| 2639 | CASE_FLT_FN (BUILT_IN_COPYSIGN): |
| 2640 | CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): |
| 2641 | return real_onep (gimple_call_arg (gs: c, index: 0)); |
| 2642 | default: |
| 2643 | return false; |
| 2644 | } |
| 2645 | } |
| 2646 | |
| 2647 | if (internal_fn_p (code)) |
| 2648 | { |
| 2649 | switch (as_internal_fn (code)) |
| 2650 | { |
| 2651 | case IFN_COPYSIGN: |
| 2652 | return real_onep (gimple_call_arg (gs: c, index: 0)); |
| 2653 | default: |
| 2654 | return false; |
| 2655 | } |
| 2656 | } |
| 2657 | |
| 2658 | return false; |
| 2659 | } |
| 2660 | |
| 2661 | /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). |
| 2662 | This only happens when the xorsign optab is defined, if the |
| 2663 | pattern is not a xorsign pattern or if expansion fails FALSE is |
| 2664 | returned, otherwise TRUE is returned. */ |
| 2665 | static bool |
| 2666 | convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) |
| 2667 | { |
| 2668 | tree treeop0, treeop1, lhs, type; |
| 2669 | location_t loc = gimple_location (g: stmt); |
| 2670 | lhs = gimple_assign_lhs (gs: stmt); |
| 2671 | treeop0 = gimple_assign_rhs1 (gs: stmt); |
| 2672 | treeop1 = gimple_assign_rhs2 (gs: stmt); |
| 2673 | type = TREE_TYPE (lhs); |
| 2674 | machine_mode mode = TYPE_MODE (type); |
| 2675 | |
| 2676 | if (HONOR_SNANS (type)) |
| 2677 | return false; |
| 2678 | |
| 2679 | if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) |
| 2680 | { |
| 2681 | gimple *call0 = SSA_NAME_DEF_STMT (treeop0); |
| 2682 | if (!has_single_use (var: treeop0) || !is_copysign_call_with_1 (call: call0)) |
| 2683 | { |
| 2684 | call0 = SSA_NAME_DEF_STMT (treeop1); |
| 2685 | if (!has_single_use (var: treeop1) || !is_copysign_call_with_1 (call: call0)) |
| 2686 | return false; |
| 2687 | |
| 2688 | treeop1 = treeop0; |
| 2689 | } |
| 2690 | if (optab_handler (op: xorsign_optab, mode) == CODE_FOR_nothing) |
| 2691 | return false; |
| 2692 | |
| 2693 | gcall *c = as_a<gcall*> (p: call0); |
| 2694 | treeop0 = gimple_call_arg (gs: c, index: 1); |
| 2695 | |
| 2696 | gcall *call_stmt |
| 2697 | = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0); |
| 2698 | gimple_set_lhs (call_stmt, lhs); |
| 2699 | gimple_set_location (g: call_stmt, location: loc); |
| 2700 | gsi_replace (gsi, call_stmt, true); |
| 2701 | return true; |
| 2702 | } |
| 2703 | |
| 2704 | return false; |
| 2705 | } |
| 2706 | |
| 2707 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
| 2708 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return |
| 2709 | value is true iff we converted the statement. */ |
| 2710 | |
| 2711 | static bool |
| 2712 | convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |
| 2713 | { |
| 2714 | tree lhs, rhs1, rhs2, type, type1, type2; |
| 2715 | enum insn_code handler; |
| 2716 | scalar_int_mode to_mode, from_mode, actual_mode; |
| 2717 | optab op; |
| 2718 | int actual_precision; |
| 2719 | location_t loc = gimple_location (g: stmt); |
| 2720 | bool from_unsigned1, from_unsigned2; |
| 2721 | |
| 2722 | lhs = gimple_assign_lhs (gs: stmt); |
| 2723 | type = TREE_TYPE (lhs); |
| 2724 | if (TREE_CODE (type) != INTEGER_TYPE) |
| 2725 | return false; |
| 2726 | |
| 2727 | if (!is_widening_mult_p (stmt, type1_out: &type1, rhs1_out: &rhs1, type2_out: &type2, rhs2_out: &rhs2)) |
| 2728 | return false; |
| 2729 | |
| 2730 | /* if any one of rhs1 and rhs2 is subject to abnormal coalescing, |
| 2731 | avoid the tranform. */ |
| 2732 | if ((TREE_CODE (rhs1) == SSA_NAME |
| 2733 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) |
| 2734 | || (TREE_CODE (rhs2) == SSA_NAME |
| 2735 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))) |
| 2736 | return false; |
| 2737 | |
| 2738 | to_mode = SCALAR_INT_TYPE_MODE (type); |
| 2739 | from_mode = SCALAR_INT_TYPE_MODE (type1); |
| 2740 | if (to_mode == from_mode) |
| 2741 | return false; |
| 2742 | |
| 2743 | from_unsigned1 = TYPE_UNSIGNED (type1); |
| 2744 | from_unsigned2 = TYPE_UNSIGNED (type2); |
| 2745 | |
| 2746 | if (from_unsigned1 && from_unsigned2) |
| 2747 | op = umul_widen_optab; |
| 2748 | else if (!from_unsigned1 && !from_unsigned2) |
| 2749 | op = smul_widen_optab; |
| 2750 | else |
| 2751 | op = usmul_widen_optab; |
| 2752 | |
| 2753 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
| 2754 | found_mode: &actual_mode); |
| 2755 | |
| 2756 | if (handler == CODE_FOR_nothing) |
| 2757 | { |
| 2758 | if (op != smul_widen_optab) |
| 2759 | { |
| 2760 | /* We can use a signed multiply with unsigned types as long as |
| 2761 | there is a wider mode to use, or it is the smaller of the two |
| 2762 | types that is unsigned. Note that type1 >= type2, always. */ |
| 2763 | if ((TYPE_UNSIGNED (type1) |
| 2764 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode)) |
| 2765 | || (TYPE_UNSIGNED (type2) |
| 2766 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode))) |
| 2767 | { |
| 2768 | if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode) |
| 2769 | || GET_MODE_SIZE (mode: to_mode) <= GET_MODE_SIZE (mode: from_mode)) |
| 2770 | return false; |
| 2771 | } |
| 2772 | |
| 2773 | op = smul_widen_optab; |
| 2774 | handler = find_widening_optab_handler_and_mode (op, to_mode, |
| 2775 | from_mode, |
| 2776 | found_mode: &actual_mode); |
| 2777 | |
| 2778 | if (handler == CODE_FOR_nothing) |
| 2779 | return false; |
| 2780 | |
| 2781 | from_unsigned1 = from_unsigned2 = false; |
| 2782 | } |
| 2783 | else |
| 2784 | { |
| 2785 | /* Expand can synthesize smul_widen_optab if the target |
| 2786 | supports umul_widen_optab. */ |
| 2787 | op = umul_widen_optab; |
| 2788 | handler = find_widening_optab_handler_and_mode (op, to_mode, |
| 2789 | from_mode, |
| 2790 | found_mode: &actual_mode); |
| 2791 | if (handler == CODE_FOR_nothing) |
| 2792 | return false; |
| 2793 | } |
| 2794 | } |
| 2795 | |
| 2796 | /* Ensure that the inputs to the handler are in the correct precison |
| 2797 | for the opcode. This will be the full mode size. */ |
| 2798 | actual_precision = GET_MODE_PRECISION (mode: actual_mode); |
| 2799 | if (2 * actual_precision > TYPE_PRECISION (type)) |
| 2800 | return false; |
| 2801 | if (actual_precision != TYPE_PRECISION (type1) |
| 2802 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |
| 2803 | { |
| 2804 | if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1))) |
| 2805 | { |
| 2806 | if (TREE_CODE (rhs1) == INTEGER_CST) |
| 2807 | rhs1 = fold_convert (type1, rhs1); |
| 2808 | else |
| 2809 | rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: rhs1); |
| 2810 | } |
| 2811 | type1 = build_nonstandard_integer_type (actual_precision, |
| 2812 | from_unsigned1); |
| 2813 | } |
| 2814 | if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1))) |
| 2815 | { |
| 2816 | if (TREE_CODE (rhs1) == INTEGER_CST) |
| 2817 | rhs1 = fold_convert (type1, rhs1); |
| 2818 | else |
| 2819 | rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: rhs1); |
| 2820 | } |
| 2821 | if (actual_precision != TYPE_PRECISION (type2) |
| 2822 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |
| 2823 | { |
| 2824 | if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2))) |
| 2825 | { |
| 2826 | if (TREE_CODE (rhs2) == INTEGER_CST) |
| 2827 | rhs2 = fold_convert (type2, rhs2); |
| 2828 | else |
| 2829 | rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: rhs2); |
| 2830 | } |
| 2831 | type2 = build_nonstandard_integer_type (actual_precision, |
| 2832 | from_unsigned2); |
| 2833 | } |
| 2834 | if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2))) |
| 2835 | { |
| 2836 | if (TREE_CODE (rhs2) == INTEGER_CST) |
| 2837 | rhs2 = fold_convert (type2, rhs2); |
| 2838 | else |
| 2839 | rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: rhs2); |
| 2840 | } |
| 2841 | |
| 2842 | gimple_assign_set_rhs1 (gs: stmt, rhs: rhs1); |
| 2843 | gimple_assign_set_rhs2 (gs: stmt, rhs: rhs2); |
| 2844 | gimple_assign_set_rhs_code (s: stmt, code: WIDEN_MULT_EXPR); |
| 2845 | update_stmt (s: stmt); |
| 2846 | widen_mul_stats.widen_mults_inserted++; |
| 2847 | return true; |
| 2848 | } |
| 2849 | |
| 2850 | /* Process a single gimple statement STMT, which is found at the |
| 2851 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its |
| 2852 | rhs (given by CODE), and try to convert it into a |
| 2853 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value |
| 2854 | is true iff we converted the statement. */ |
| 2855 | |
| 2856 | static bool |
| 2857 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, |
| 2858 | enum tree_code code) |
| 2859 | { |
| 2860 | gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL; |
| 2861 | gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt; |
| 2862 | tree type, type1, type2, optype; |
| 2863 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
| 2864 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; |
| 2865 | optab this_optab; |
| 2866 | enum tree_code wmult_code; |
| 2867 | enum insn_code handler; |
| 2868 | scalar_mode to_mode, from_mode, actual_mode; |
| 2869 | location_t loc = gimple_location (g: stmt); |
| 2870 | int actual_precision; |
| 2871 | bool from_unsigned1, from_unsigned2; |
| 2872 | |
| 2873 | lhs = gimple_assign_lhs (gs: stmt); |
| 2874 | type = TREE_TYPE (lhs); |
| 2875 | if ((TREE_CODE (type) != INTEGER_TYPE |
| 2876 | && TREE_CODE (type) != FIXED_POINT_TYPE) |
| 2877 | || !type_has_mode_precision_p (t: type)) |
| 2878 | return false; |
| 2879 | |
| 2880 | if (code == MINUS_EXPR) |
| 2881 | wmult_code = WIDEN_MULT_MINUS_EXPR; |
| 2882 | else |
| 2883 | wmult_code = WIDEN_MULT_PLUS_EXPR; |
| 2884 | |
| 2885 | rhs1 = gimple_assign_rhs1 (gs: stmt); |
| 2886 | rhs2 = gimple_assign_rhs2 (gs: stmt); |
| 2887 | |
| 2888 | if (TREE_CODE (rhs1) == SSA_NAME) |
| 2889 | { |
| 2890 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |
| 2891 | if (is_gimple_assign (gs: rhs1_stmt)) |
| 2892 | rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt); |
| 2893 | } |
| 2894 | |
| 2895 | if (TREE_CODE (rhs2) == SSA_NAME) |
| 2896 | { |
| 2897 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |
| 2898 | if (is_gimple_assign (gs: rhs2_stmt)) |
| 2899 | rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt); |
| 2900 | } |
| 2901 | |
| 2902 | /* Allow for one conversion statement between the multiply |
| 2903 | and addition/subtraction statement. If there are more than |
| 2904 | one conversions then we assume they would invalidate this |
| 2905 | transformation. If that's not the case then they should have |
| 2906 | been folded before now. */ |
| 2907 | if (CONVERT_EXPR_CODE_P (rhs1_code)) |
| 2908 | { |
| 2909 | conv1_stmt = rhs1_stmt; |
| 2910 | rhs1 = gimple_assign_rhs1 (gs: rhs1_stmt); |
| 2911 | if (TREE_CODE (rhs1) == SSA_NAME) |
| 2912 | { |
| 2913 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |
| 2914 | if (is_gimple_assign (gs: rhs1_stmt)) |
| 2915 | rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt); |
| 2916 | } |
| 2917 | else |
| 2918 | return false; |
| 2919 | } |
| 2920 | if (CONVERT_EXPR_CODE_P (rhs2_code)) |
| 2921 | { |
| 2922 | conv2_stmt = rhs2_stmt; |
| 2923 | rhs2 = gimple_assign_rhs1 (gs: rhs2_stmt); |
| 2924 | if (TREE_CODE (rhs2) == SSA_NAME) |
| 2925 | { |
| 2926 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |
| 2927 | if (is_gimple_assign (gs: rhs2_stmt)) |
| 2928 | rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt); |
| 2929 | } |
| 2930 | else |
| 2931 | return false; |
| 2932 | } |
| 2933 | |
| 2934 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |
| 2935 | is_widening_mult_p, but we still need the rhs returns. |
| 2936 | |
| 2937 | It might also appear that it would be sufficient to use the existing |
| 2938 | operands of the widening multiply, but that would limit the choice of |
| 2939 | multiply-and-accumulate instructions. |
| 2940 | |
| 2941 | If the widened-multiplication result has more than one uses, it is |
| 2942 | probably wiser not to do the conversion. Also restrict this operation |
| 2943 | to single basic block to avoid moving the multiply to a different block |
| 2944 | with a higher execution frequency. */ |
| 2945 | if (code == PLUS_EXPR |
| 2946 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) |
| 2947 | { |
| 2948 | if (!has_single_use (var: rhs1) |
| 2949 | || gimple_bb (g: rhs1_stmt) != gimple_bb (g: stmt) |
| 2950 | || !is_widening_mult_p (stmt: rhs1_stmt, type1_out: &type1, rhs1_out: &mult_rhs1, |
| 2951 | type2_out: &type2, rhs2_out: &mult_rhs2)) |
| 2952 | return false; |
| 2953 | add_rhs = rhs2; |
| 2954 | conv_stmt = conv1_stmt; |
| 2955 | } |
| 2956 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |
| 2957 | { |
| 2958 | if (!has_single_use (var: rhs2) |
| 2959 | || gimple_bb (g: rhs2_stmt) != gimple_bb (g: stmt) |
| 2960 | || !is_widening_mult_p (stmt: rhs2_stmt, type1_out: &type1, rhs1_out: &mult_rhs1, |
| 2961 | type2_out: &type2, rhs2_out: &mult_rhs2)) |
| 2962 | return false; |
| 2963 | add_rhs = rhs1; |
| 2964 | conv_stmt = conv2_stmt; |
| 2965 | } |
| 2966 | else |
| 2967 | return false; |
| 2968 | |
| 2969 | to_mode = SCALAR_TYPE_MODE (type); |
| 2970 | from_mode = SCALAR_TYPE_MODE (type1); |
| 2971 | if (to_mode == from_mode) |
| 2972 | return false; |
| 2973 | |
| 2974 | from_unsigned1 = TYPE_UNSIGNED (type1); |
| 2975 | from_unsigned2 = TYPE_UNSIGNED (type2); |
| 2976 | optype = type1; |
| 2977 | |
| 2978 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
| 2979 | if (from_unsigned1 != from_unsigned2) |
| 2980 | { |
| 2981 | if (!INTEGRAL_TYPE_P (type)) |
| 2982 | return false; |
| 2983 | /* We can use a signed multiply with unsigned types as long as |
| 2984 | there is a wider mode to use, or it is the smaller of the two |
| 2985 | types that is unsigned. Note that type1 >= type2, always. */ |
| 2986 | if ((from_unsigned1 |
| 2987 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode)) |
| 2988 | || (from_unsigned2 |
| 2989 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode))) |
| 2990 | { |
| 2991 | if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode) |
| 2992 | || GET_MODE_SIZE (mode: from_mode) >= GET_MODE_SIZE (mode: to_mode)) |
| 2993 | return false; |
| 2994 | } |
| 2995 | |
| 2996 | from_unsigned1 = from_unsigned2 = false; |
| 2997 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (mode: from_mode), |
| 2998 | false); |
| 2999 | } |
| 3000 | |
| 3001 | /* If there was a conversion between the multiply and addition |
| 3002 | then we need to make sure it fits a multiply-and-accumulate. |
| 3003 | The should be a single mode change which does not change the |
| 3004 | value. */ |
| 3005 | if (conv_stmt) |
| 3006 | { |
| 3007 | /* We use the original, unmodified data types for this. */ |
| 3008 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |
| 3009 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); |
| 3010 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); |
| 3011 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); |
| 3012 | |
| 3013 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) |
| 3014 | { |
| 3015 | /* Conversion is a truncate. */ |
| 3016 | if (TYPE_PRECISION (to_type) < data_size) |
| 3017 | return false; |
| 3018 | } |
| 3019 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) |
| 3020 | { |
| 3021 | /* Conversion is an extend. Check it's the right sort. */ |
| 3022 | if (TYPE_UNSIGNED (from_type) != is_unsigned |
| 3023 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) |
| 3024 | return false; |
| 3025 | } |
| 3026 | /* else convert is a no-op for our purposes. */ |
| 3027 | } |
| 3028 | |
| 3029 | /* Verify that the machine can perform a widening multiply |
| 3030 | accumulate in this mode/signedness combination, otherwise |
| 3031 | this transformation is likely to pessimize code. */ |
| 3032 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
| 3033 | handler = find_widening_optab_handler_and_mode (op: this_optab, to_mode, |
| 3034 | from_mode, found_mode: &actual_mode); |
| 3035 | |
| 3036 | if (handler == CODE_FOR_nothing) |
| 3037 | return false; |
| 3038 | |
| 3039 | /* Ensure that the inputs to the handler are in the correct precison |
| 3040 | for the opcode. This will be the full mode size. */ |
| 3041 | actual_precision = GET_MODE_PRECISION (mode: actual_mode); |
| 3042 | if (actual_precision != TYPE_PRECISION (type1) |
| 3043 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |
| 3044 | { |
| 3045 | if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1))) |
| 3046 | { |
| 3047 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) |
| 3048 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
| 3049 | else |
| 3050 | mult_rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: mult_rhs1); |
| 3051 | } |
| 3052 | type1 = build_nonstandard_integer_type (actual_precision, |
| 3053 | from_unsigned1); |
| 3054 | } |
| 3055 | if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1))) |
| 3056 | { |
| 3057 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) |
| 3058 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
| 3059 | else |
| 3060 | mult_rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: mult_rhs1); |
| 3061 | } |
| 3062 | if (actual_precision != TYPE_PRECISION (type2) |
| 3063 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |
| 3064 | { |
| 3065 | if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2))) |
| 3066 | { |
| 3067 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
| 3068 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
| 3069 | else |
| 3070 | mult_rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: mult_rhs2); |
| 3071 | } |
| 3072 | type2 = build_nonstandard_integer_type (actual_precision, |
| 3073 | from_unsigned2); |
| 3074 | } |
| 3075 | if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2))) |
| 3076 | { |
| 3077 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
| 3078 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
| 3079 | else |
| 3080 | mult_rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: mult_rhs2); |
| 3081 | } |
| 3082 | |
| 3083 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |
| 3084 | add_rhs = build_and_insert_cast (gsi, loc, type, val: add_rhs); |
| 3085 | |
| 3086 | gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, |
| 3087 | add_rhs); |
| 3088 | update_stmt (s: gsi_stmt (i: *gsi)); |
| 3089 | widen_mul_stats.maccs_inserted++; |
| 3090 | return true; |
| 3091 | } |
| 3092 | |
| 3093 | /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and |
| 3094 | OP2 and which we know is used in statements that can be, together with the |
| 3095 | multiplication, converted to FMAs, perform the transformation. */ |
| 3096 | |
| 3097 | static void |
| 3098 | convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2) |
| 3099 | { |
| 3100 | tree type = TREE_TYPE (mul_result); |
| 3101 | gimple *use_stmt; |
| 3102 | imm_use_iterator imm_iter; |
| 3103 | gcall *fma_stmt; |
| 3104 | |
| 3105 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) |
| 3106 | { |
| 3107 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
| 3108 | tree addop, mulop1 = op1, result = mul_result; |
| 3109 | bool negate_p = false; |
| 3110 | gimple_seq seq = NULL; |
| 3111 | |
| 3112 | if (is_gimple_debug (gs: use_stmt)) |
| 3113 | continue; |
| 3114 | |
| 3115 | if (is_gimple_assign (gs: use_stmt) |
| 3116 | && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR) |
| 3117 | { |
| 3118 | result = gimple_assign_lhs (gs: use_stmt); |
| 3119 | use_operand_p use_p; |
| 3120 | gimple *neguse_stmt; |
| 3121 | single_imm_use (var: gimple_assign_lhs (gs: use_stmt), use_p: &use_p, stmt: &neguse_stmt); |
| 3122 | gsi_remove (&gsi, true); |
| 3123 | release_defs (use_stmt); |
| 3124 | |
| 3125 | use_stmt = neguse_stmt; |
| 3126 | gsi = gsi_for_stmt (use_stmt); |
| 3127 | negate_p = true; |
| 3128 | } |
| 3129 | |
| 3130 | tree cond, else_value, ops[3], len, bias; |
| 3131 | tree_code code; |
| 3132 | if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, |
| 3133 | ops, &else_value, |
| 3134 | &len, &bias)) |
| 3135 | gcc_unreachable (); |
| 3136 | addop = ops[0] == result ? ops[1] : ops[0]; |
| 3137 | |
| 3138 | if (code == MINUS_EXPR) |
| 3139 | { |
| 3140 | if (ops[0] == result) |
| 3141 | /* a * b - c -> a * b + (-c) */ |
| 3142 | addop = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: addop); |
| 3143 | else |
| 3144 | /* a - b * c -> (-b) * c + a */ |
| 3145 | negate_p = !negate_p; |
| 3146 | } |
| 3147 | |
| 3148 | if (negate_p) |
| 3149 | mulop1 = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: mulop1); |
| 3150 | |
| 3151 | if (seq) |
| 3152 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); |
| 3153 | |
| 3154 | if (len) |
| 3155 | fma_stmt |
| 3156 | = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2, |
| 3157 | addop, else_value, len, bias); |
| 3158 | else if (cond) |
| 3159 | fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1, |
| 3160 | op2, addop, else_value); |
| 3161 | else |
| 3162 | fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop); |
| 3163 | gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt)); |
| 3164 | gimple_call_set_nothrow (s: fma_stmt, nothrow_p: !stmt_can_throw_internal (cfun, |
| 3165 | use_stmt)); |
| 3166 | gsi_replace (&gsi, fma_stmt, true); |
| 3167 | /* Follow all SSA edges so that we generate FMS, FNMA and FNMS |
| 3168 | regardless of where the negation occurs. */ |
| 3169 | gimple *orig_stmt = gsi_stmt (i: gsi); |
| 3170 | if (fold_stmt (&gsi, follow_all_ssa_edges)) |
| 3171 | { |
| 3172 | if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (i: gsi))) |
| 3173 | gcc_unreachable (); |
| 3174 | update_stmt (s: gsi_stmt (i: gsi)); |
| 3175 | } |
| 3176 | |
| 3177 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3178 | { |
| 3179 | fprintf (stream: dump_file, format: "Generated FMA " ); |
| 3180 | print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE); |
| 3181 | fprintf (stream: dump_file, format: "\n" ); |
| 3182 | } |
| 3183 | |
| 3184 | /* If the FMA result is negated in a single use, fold the negation |
| 3185 | too. */ |
| 3186 | orig_stmt = gsi_stmt (i: gsi); |
| 3187 | use_operand_p use_p; |
| 3188 | gimple *neg_stmt; |
| 3189 | if (is_gimple_call (gs: orig_stmt) |
| 3190 | && gimple_call_internal_p (gs: orig_stmt) |
| 3191 | && gimple_call_lhs (gs: orig_stmt) |
| 3192 | && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME |
| 3193 | && single_imm_use (var: gimple_call_lhs (gs: orig_stmt), use_p: &use_p, stmt: &neg_stmt) |
| 3194 | && is_gimple_assign (gs: neg_stmt) |
| 3195 | && gimple_assign_rhs_code (gs: neg_stmt) == NEGATE_EXPR |
| 3196 | && !stmt_could_throw_p (cfun, neg_stmt)) |
| 3197 | { |
| 3198 | gsi = gsi_for_stmt (neg_stmt); |
| 3199 | if (fold_stmt (&gsi, follow_all_ssa_edges)) |
| 3200 | { |
| 3201 | if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (i: gsi))) |
| 3202 | gcc_unreachable (); |
| 3203 | update_stmt (s: gsi_stmt (i: gsi)); |
| 3204 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3205 | { |
| 3206 | fprintf (stream: dump_file, format: "Folded FMA negation " ); |
| 3207 | print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE); |
| 3208 | fprintf (stream: dump_file, format: "\n" ); |
| 3209 | } |
| 3210 | } |
| 3211 | } |
| 3212 | |
| 3213 | widen_mul_stats.fmas_inserted++; |
| 3214 | } |
| 3215 | } |
| 3216 | |
| 3217 | /* Data necessary to perform the actual transformation from a multiplication |
| 3218 | and an addition to an FMA after decision is taken it should be done and to |
| 3219 | then delete the multiplication statement from the function IL. */ |
| 3220 | |
| 3221 | struct fma_transformation_info |
| 3222 | { |
| 3223 | gimple *mul_stmt; |
| 3224 | tree mul_result; |
| 3225 | tree op1; |
| 3226 | tree op2; |
| 3227 | }; |
| 3228 | |
| 3229 | /* Structure containing the current state of FMA deferring, i.e. whether we are |
| 3230 | deferring, whether to continue deferring, and all data necessary to come |
| 3231 | back and perform all deferred transformations. */ |
| 3232 | |
| 3233 | class fma_deferring_state |
| 3234 | { |
| 3235 | public: |
| 3236 | /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually |
| 3237 | do any deferring. */ |
| 3238 | |
| 3239 | fma_deferring_state (bool perform_deferring) |
| 3240 | : m_candidates (), m_mul_result_set (), m_initial_phi (NULL), |
| 3241 | m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {} |
| 3242 | |
| 3243 | /* List of FMA candidates for which we the transformation has been determined |
| 3244 | possible but we at this point in BB analysis we do not consider them |
| 3245 | beneficial. */ |
| 3246 | auto_vec<fma_transformation_info, 8> m_candidates; |
| 3247 | |
| 3248 | /* Set of results of multiplication that are part of an already deferred FMA |
| 3249 | candidates. */ |
| 3250 | hash_set<tree> m_mul_result_set; |
| 3251 | |
| 3252 | /* The PHI that supposedly feeds back result of a FMA to another over loop |
| 3253 | boundary. */ |
| 3254 | gphi *m_initial_phi; |
| 3255 | |
| 3256 | /* Result of the last produced FMA candidate or NULL if there has not been |
| 3257 | one. */ |
| 3258 | tree m_last_result; |
| 3259 | |
| 3260 | /* If true, deferring might still be profitable. If false, transform all |
| 3261 | candidates and no longer defer. */ |
| 3262 | bool m_deferring_p; |
| 3263 | }; |
| 3264 | |
| 3265 | /* Transform all deferred FMA candidates and mark STATE as no longer |
| 3266 | deferring. */ |
| 3267 | |
| 3268 | static void |
| 3269 | cancel_fma_deferring (fma_deferring_state *state) |
| 3270 | { |
| 3271 | if (!state->m_deferring_p) |
| 3272 | return; |
| 3273 | |
| 3274 | for (unsigned i = 0; i < state->m_candidates.length (); i++) |
| 3275 | { |
| 3276 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3277 | fprintf (stream: dump_file, format: "Generating deferred FMA\n" ); |
| 3278 | |
| 3279 | const fma_transformation_info &fti = state->m_candidates[i]; |
| 3280 | convert_mult_to_fma_1 (mul_result: fti.mul_result, op1: fti.op1, op2: fti.op2); |
| 3281 | |
| 3282 | gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt); |
| 3283 | gsi_remove (&gsi, true); |
| 3284 | release_defs (fti.mul_stmt); |
| 3285 | } |
| 3286 | state->m_deferring_p = false; |
| 3287 | } |
| 3288 | |
| 3289 | /* If OP is an SSA name defined by a PHI node, return the PHI statement. |
| 3290 | Otherwise return NULL. */ |
| 3291 | |
| 3292 | static gphi * |
| 3293 | result_of_phi (tree op) |
| 3294 | { |
| 3295 | if (TREE_CODE (op) != SSA_NAME) |
| 3296 | return NULL; |
| 3297 | |
| 3298 | return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op)); |
| 3299 | } |
| 3300 | |
| 3301 | /* After processing statements of a BB and recording STATE, return true if the |
| 3302 | initial phi is fed by the last FMA candidate result ore one such result from |
| 3303 | previously processed BBs marked in LAST_RESULT_SET. */ |
| 3304 | |
| 3305 | static bool |
| 3306 | last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, |
| 3307 | hash_set<tree> *last_result_set) |
| 3308 | { |
| 3309 | ssa_op_iter iter; |
| 3310 | use_operand_p use; |
| 3311 | FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE) |
| 3312 | { |
| 3313 | tree t = USE_FROM_PTR (use); |
| 3314 | if (t == state->m_last_result |
| 3315 | || last_result_set->contains (k: t)) |
| 3316 | return true; |
| 3317 | } |
| 3318 | |
| 3319 | return false; |
| 3320 | } |
| 3321 | |
| 3322 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
| 3323 | with uses in additions and subtractions to form fused multiply-add |
| 3324 | operations. Returns true if successful and MUL_STMT should be removed. |
| 3325 | If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional |
| 3326 | on MUL_COND, otherwise it is unconditional. |
| 3327 | |
| 3328 | If STATE indicates that we are deferring FMA transformation, that means |
| 3329 | that we do not produce FMAs for basic blocks which look like: |
| 3330 | |
| 3331 | <bb 6> |
| 3332 | # accumulator_111 = PHI <0.0(5), accumulator_66(6)> |
| 3333 | _65 = _14 * _16; |
| 3334 | accumulator_66 = _65 + accumulator_111; |
| 3335 | |
| 3336 | or its unrolled version, i.e. with several FMA candidates that feed result |
| 3337 | of one into the addend of another. Instead, we add them to a list in STATE |
| 3338 | and if we later discover an FMA candidate that is not part of such a chain, |
| 3339 | we go back and perform all deferred past candidates. */ |
| 3340 | |
| 3341 | static bool |
| 3342 | convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, |
| 3343 | fma_deferring_state *state, tree mul_cond = NULL_TREE, |
| 3344 | tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE) |
| 3345 | { |
| 3346 | tree mul_result = gimple_get_lhs (mul_stmt); |
| 3347 | /* If there isn't a LHS then this can't be an FMA. There can be no LHS |
| 3348 | if the statement was left just for the side-effects. */ |
| 3349 | if (!mul_result) |
| 3350 | return false; |
| 3351 | tree type = TREE_TYPE (mul_result); |
| 3352 | gimple *use_stmt, *neguse_stmt; |
| 3353 | use_operand_p use_p; |
| 3354 | imm_use_iterator imm_iter; |
| 3355 | |
| 3356 | if (FLOAT_TYPE_P (type) |
| 3357 | && flag_fp_contract_mode != FP_CONTRACT_FAST) |
| 3358 | return false; |
| 3359 | |
| 3360 | /* We don't want to do bitfield reduction ops. */ |
| 3361 | if (INTEGRAL_TYPE_P (type) |
| 3362 | && (!type_has_mode_precision_p (t: type) || TYPE_OVERFLOW_TRAPS (type))) |
| 3363 | return false; |
| 3364 | |
| 3365 | /* If the target doesn't support it, don't generate it. We assume that |
| 3366 | if fma isn't available then fms, fnma or fnms are not either. */ |
| 3367 | optimization_type opt_type = bb_optimization_type (gimple_bb (g: mul_stmt)); |
| 3368 | if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type)) |
| 3369 | return false; |
| 3370 | |
| 3371 | /* If the multiplication has zero uses, it is kept around probably because |
| 3372 | of -fnon-call-exceptions. Don't optimize it away in that case, |
| 3373 | it is DCE job. */ |
| 3374 | if (has_zero_uses (var: mul_result)) |
| 3375 | return false; |
| 3376 | |
| 3377 | bool check_defer |
| 3378 | = (state->m_deferring_p |
| 3379 | && maybe_le (a: tree_to_poly_int64 (TYPE_SIZE (type)), |
| 3380 | param_avoid_fma_max_bits)); |
| 3381 | bool defer = check_defer; |
| 3382 | bool seen_negate_p = false; |
| 3383 | |
| 3384 | /* There is no numerical difference between fused and unfused integer FMAs, |
| 3385 | and the assumption below that FMA is as cheap as addition is unlikely |
| 3386 | to be true, especially if the multiplication occurs multiple times on |
| 3387 | the same chain. E.g., for something like: |
| 3388 | |
| 3389 | (((a * b) + c) >> 1) + (a * b) |
| 3390 | |
| 3391 | we do not want to duplicate the a * b into two additions, not least |
| 3392 | because the result is not a natural FMA chain. */ |
| 3393 | if (ANY_INTEGRAL_TYPE_P (type) |
| 3394 | && !has_single_use (var: mul_result)) |
| 3395 | return false; |
| 3396 | |
| 3397 | if (!dbg_cnt (index: form_fma)) |
| 3398 | return false; |
| 3399 | |
| 3400 | /* Make sure that the multiplication statement becomes dead after |
| 3401 | the transformation, thus that all uses are transformed to FMAs. |
| 3402 | This means we assume that an FMA operation has the same cost |
| 3403 | as an addition. */ |
| 3404 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) |
| 3405 | { |
| 3406 | tree result = mul_result; |
| 3407 | bool negate_p = false; |
| 3408 | |
| 3409 | use_stmt = USE_STMT (use_p); |
| 3410 | |
| 3411 | if (is_gimple_debug (gs: use_stmt)) |
| 3412 | continue; |
| 3413 | |
| 3414 | /* For now restrict this operations to single basic blocks. In theory |
| 3415 | we would want to support sinking the multiplication in |
| 3416 | m = a*b; |
| 3417 | if () |
| 3418 | ma = m + c; |
| 3419 | else |
| 3420 | d = m; |
| 3421 | to form a fma in the then block and sink the multiplication to the |
| 3422 | else block. */ |
| 3423 | if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt)) |
| 3424 | return false; |
| 3425 | |
| 3426 | /* A negate on the multiplication leads to FNMA. */ |
| 3427 | if (is_gimple_assign (gs: use_stmt) |
| 3428 | && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR) |
| 3429 | { |
| 3430 | ssa_op_iter iter; |
| 3431 | use_operand_p usep; |
| 3432 | |
| 3433 | /* If (due to earlier missed optimizations) we have two |
| 3434 | negates of the same value, treat them as equivalent |
| 3435 | to a single negate with multiple uses. */ |
| 3436 | if (seen_negate_p) |
| 3437 | return false; |
| 3438 | |
| 3439 | result = gimple_assign_lhs (gs: use_stmt); |
| 3440 | |
| 3441 | /* Make sure the negate statement becomes dead with this |
| 3442 | single transformation. */ |
| 3443 | if (!single_imm_use (var: gimple_assign_lhs (gs: use_stmt), |
| 3444 | use_p: &use_p, stmt: &neguse_stmt)) |
| 3445 | return false; |
| 3446 | |
| 3447 | /* Make sure the multiplication isn't also used on that stmt. */ |
| 3448 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |
| 3449 | if (USE_FROM_PTR (usep) == mul_result) |
| 3450 | return false; |
| 3451 | |
| 3452 | /* Re-validate. */ |
| 3453 | use_stmt = neguse_stmt; |
| 3454 | if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt)) |
| 3455 | return false; |
| 3456 | |
| 3457 | negate_p = seen_negate_p = true; |
| 3458 | } |
| 3459 | |
| 3460 | tree cond, else_value, ops[3], len, bias; |
| 3461 | tree_code code; |
| 3462 | if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops, |
| 3463 | &else_value, &len, &bias)) |
| 3464 | return false; |
| 3465 | |
| 3466 | switch (code) |
| 3467 | { |
| 3468 | case MINUS_EXPR: |
| 3469 | if (ops[1] == result) |
| 3470 | negate_p = !negate_p; |
| 3471 | break; |
| 3472 | case PLUS_EXPR: |
| 3473 | break; |
| 3474 | default: |
| 3475 | /* FMA can only be formed from PLUS and MINUS. */ |
| 3476 | return false; |
| 3477 | } |
| 3478 | |
| 3479 | if (len) |
| 3480 | { |
| 3481 | /* For COND_LEN_* operations, we may have dummpy mask which is |
| 3482 | the all true mask. Such TREE type may be mul_cond != cond |
| 3483 | but we still consider they are equal. */ |
| 3484 | if (mul_cond && cond != mul_cond |
| 3485 | && !(integer_truep (mul_cond) && integer_truep (cond))) |
| 3486 | return false; |
| 3487 | |
| 3488 | if (else_value == result) |
| 3489 | return false; |
| 3490 | |
| 3491 | if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type, |
| 3492 | opt_type)) |
| 3493 | return false; |
| 3494 | |
| 3495 | if (mul_len) |
| 3496 | { |
| 3497 | poly_int64 mul_value, value; |
| 3498 | if (poly_int_tree_p (t: mul_len, value: &mul_value) |
| 3499 | && poly_int_tree_p (t: len, value: &value) |
| 3500 | && maybe_ne (a: mul_value, b: value)) |
| 3501 | return false; |
| 3502 | else if (mul_len != len) |
| 3503 | return false; |
| 3504 | |
| 3505 | if (wi::to_widest (t: mul_bias) != wi::to_widest (t: bias)) |
| 3506 | return false; |
| 3507 | } |
| 3508 | } |
| 3509 | else |
| 3510 | { |
| 3511 | if (mul_cond && cond != mul_cond) |
| 3512 | return false; |
| 3513 | |
| 3514 | if (cond) |
| 3515 | { |
| 3516 | if (cond == result || else_value == result) |
| 3517 | return false; |
| 3518 | if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, |
| 3519 | opt_type)) |
| 3520 | return false; |
| 3521 | } |
| 3522 | } |
| 3523 | |
| 3524 | /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that |
| 3525 | we'll visit later, we might be able to get a more profitable |
| 3526 | match with fnma. |
| 3527 | OTOH, if we don't, a negate / fma pair has likely lower latency |
| 3528 | that a mult / subtract pair. */ |
| 3529 | if (code == MINUS_EXPR |
| 3530 | && !negate_p |
| 3531 | && ops[0] == result |
| 3532 | && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type) |
| 3533 | && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type) |
| 3534 | && TREE_CODE (ops[1]) == SSA_NAME |
| 3535 | && has_single_use (var: ops[1])) |
| 3536 | { |
| 3537 | gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]); |
| 3538 | if (is_gimple_assign (gs: stmt2) |
| 3539 | && gimple_assign_rhs_code (gs: stmt2) == MULT_EXPR) |
| 3540 | return false; |
| 3541 | } |
| 3542 | |
| 3543 | /* We can't handle a * b + a * b. */ |
| 3544 | if (ops[0] == ops[1]) |
| 3545 | return false; |
| 3546 | /* If deferring, make sure we are not looking at an instruction that |
| 3547 | wouldn't have existed if we were not. */ |
| 3548 | if (state->m_deferring_p |
| 3549 | && (state->m_mul_result_set.contains (k: ops[0]) |
| 3550 | || state->m_mul_result_set.contains (k: ops[1]))) |
| 3551 | return false; |
| 3552 | |
| 3553 | if (check_defer) |
| 3554 | { |
| 3555 | tree use_lhs = gimple_get_lhs (use_stmt); |
| 3556 | if (state->m_last_result) |
| 3557 | { |
| 3558 | if (ops[1] == state->m_last_result |
| 3559 | || ops[0] == state->m_last_result) |
| 3560 | defer = true; |
| 3561 | else |
| 3562 | defer = false; |
| 3563 | } |
| 3564 | else |
| 3565 | { |
| 3566 | gcc_checking_assert (!state->m_initial_phi); |
| 3567 | gphi *phi; |
| 3568 | if (ops[0] == result) |
| 3569 | phi = result_of_phi (op: ops[1]); |
| 3570 | else |
| 3571 | { |
| 3572 | gcc_assert (ops[1] == result); |
| 3573 | phi = result_of_phi (op: ops[0]); |
| 3574 | } |
| 3575 | |
| 3576 | if (phi) |
| 3577 | { |
| 3578 | state->m_initial_phi = phi; |
| 3579 | defer = true; |
| 3580 | } |
| 3581 | else |
| 3582 | defer = false; |
| 3583 | } |
| 3584 | |
| 3585 | state->m_last_result = use_lhs; |
| 3586 | check_defer = false; |
| 3587 | } |
| 3588 | else |
| 3589 | defer = false; |
| 3590 | |
| 3591 | /* While it is possible to validate whether or not the exact form that |
| 3592 | we've recognized is available in the backend, the assumption is that |
| 3593 | if the deferring logic above did not trigger, the transformation is |
| 3594 | never a loss. For instance, suppose the target only has the plain FMA |
| 3595 | pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged |
| 3596 | MUL+SUB for FMA+NEG, which is still two operations. Consider |
| 3597 | -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA |
| 3598 | form the two NEGs are independent and could be run in parallel. */ |
| 3599 | } |
| 3600 | |
| 3601 | if (defer) |
| 3602 | { |
| 3603 | fma_transformation_info fti; |
| 3604 | fti.mul_stmt = mul_stmt; |
| 3605 | fti.mul_result = mul_result; |
| 3606 | fti.op1 = op1; |
| 3607 | fti.op2 = op2; |
| 3608 | state->m_candidates.safe_push (obj: fti); |
| 3609 | state->m_mul_result_set.add (k: mul_result); |
| 3610 | |
| 3611 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 3612 | { |
| 3613 | fprintf (stream: dump_file, format: "Deferred generating FMA for multiplication " ); |
| 3614 | print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE); |
| 3615 | fprintf (stream: dump_file, format: "\n" ); |
| 3616 | } |
| 3617 | |
| 3618 | return false; |
| 3619 | } |
| 3620 | else |
| 3621 | { |
| 3622 | if (state->m_deferring_p) |
| 3623 | cancel_fma_deferring (state); |
| 3624 | convert_mult_to_fma_1 (mul_result, op1, op2); |
| 3625 | return true; |
| 3626 | } |
| 3627 | } |
| 3628 | |
| 3629 | |
| 3630 | /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have |
| 3631 | a check for non-zero like: |
| 3632 | _1 = x_4(D) * y_5(D); |
| 3633 | *res_7(D) = _1; |
| 3634 | if (x_4(D) != 0) |
| 3635 | goto <bb 3>; [50.00%] |
| 3636 | else |
| 3637 | goto <bb 4>; [50.00%] |
| 3638 | |
| 3639 | <bb 3> [local count: 536870913]: |
| 3640 | _2 = _1 / x_4(D); |
| 3641 | _9 = _2 != y_5(D); |
| 3642 | _10 = (int) _9; |
| 3643 | |
| 3644 | <bb 4> [local count: 1073741824]: |
| 3645 | # iftmp.0_3 = PHI <_10(3), 0(2)> |
| 3646 | then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also |
| 3647 | optimize the x_4(D) != 0 condition to 1. */ |
| 3648 | |
| 3649 | static void |
| 3650 | maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt, |
| 3651 | gimple *div_stmt, bool *cfg_changed) |
| 3652 | { |
| 3653 | basic_block bb = gimple_bb (g: cond_stmt); |
| 3654 | if (gimple_bb (g: div_stmt) != bb || !single_pred_p (bb)) |
| 3655 | return; |
| 3656 | edge pred_edge = single_pred_edge (bb); |
| 3657 | basic_block pred_bb = pred_edge->src; |
| 3658 | if (EDGE_COUNT (pred_bb->succs) != 2) |
| 3659 | return; |
| 3660 | edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge); |
| 3661 | edge other_succ_edge = NULL; |
| 3662 | if (gimple_code (g: cond_stmt) == GIMPLE_COND) |
| 3663 | { |
| 3664 | if (EDGE_COUNT (bb->succs) != 2) |
| 3665 | return; |
| 3666 | other_succ_edge = EDGE_SUCC (bb, 0); |
| 3667 | if (gimple_cond_code (gs: cond_stmt) == NE_EXPR) |
| 3668 | { |
| 3669 | if (other_succ_edge->flags & EDGE_TRUE_VALUE) |
| 3670 | other_succ_edge = EDGE_SUCC (bb, 1); |
| 3671 | } |
| 3672 | else if (other_succ_edge->flags & EDGE_FALSE_VALUE) |
| 3673 | other_succ_edge = EDGE_SUCC (bb, 0); |
| 3674 | if (other_edge->dest != other_succ_edge->dest) |
| 3675 | return; |
| 3676 | } |
| 3677 | else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb)) |
| 3678 | return; |
| 3679 | gcond *zero_cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: pred_bb)); |
| 3680 | if (zero_cond == NULL |
| 3681 | || (gimple_cond_code (gs: zero_cond) |
| 3682 | != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR)) |
| 3683 | || !integer_zerop (gimple_cond_rhs (gs: zero_cond))) |
| 3684 | return; |
| 3685 | tree zero_cond_lhs = gimple_cond_lhs (gs: zero_cond); |
| 3686 | if (TREE_CODE (zero_cond_lhs) != SSA_NAME) |
| 3687 | return; |
| 3688 | if (gimple_assign_rhs2 (gs: div_stmt) != zero_cond_lhs) |
| 3689 | { |
| 3690 | /* Allow the divisor to be result of a same precision cast |
| 3691 | from zero_cond_lhs. */ |
| 3692 | tree rhs2 = gimple_assign_rhs2 (gs: div_stmt); |
| 3693 | if (TREE_CODE (rhs2) != SSA_NAME) |
| 3694 | return; |
| 3695 | gimple *g = SSA_NAME_DEF_STMT (rhs2); |
| 3696 | if (!gimple_assign_cast_p (s: g) |
| 3697 | || gimple_assign_rhs1 (gs: g) != gimple_cond_lhs (gs: zero_cond) |
| 3698 | || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs)) |
| 3699 | || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs)) |
| 3700 | != TYPE_PRECISION (TREE_TYPE (rhs2)))) |
| 3701 | return; |
| 3702 | } |
| 3703 | gimple_stmt_iterator gsi = gsi_after_labels (bb); |
| 3704 | mul_stmts.quick_push (obj: div_stmt); |
| 3705 | if (is_gimple_debug (gs: gsi_stmt (i: gsi))) |
| 3706 | gsi_next_nondebug (i: &gsi); |
| 3707 | unsigned cast_count = 0; |
| 3708 | while (gsi_stmt (i: gsi) != cond_stmt) |
| 3709 | { |
| 3710 | /* If original mul_stmt has a single use, allow it in the same bb, |
| 3711 | we are looking then just at __builtin_mul_overflow_p. |
| 3712 | Though, in that case the original mul_stmt will be replaced |
| 3713 | by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */ |
| 3714 | gimple *mul_stmt; |
| 3715 | unsigned int i; |
| 3716 | bool ok = false; |
| 3717 | FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt) |
| 3718 | { |
| 3719 | if (gsi_stmt (i: gsi) == mul_stmt) |
| 3720 | { |
| 3721 | ok = true; |
| 3722 | break; |
| 3723 | } |
| 3724 | } |
| 3725 | if (!ok && gimple_assign_cast_p (s: gsi_stmt (i: gsi)) && ++cast_count < 4) |
| 3726 | ok = true; |
| 3727 | if (!ok) |
| 3728 | return; |
| 3729 | gsi_next_nondebug (i: &gsi); |
| 3730 | } |
| 3731 | if (gimple_code (g: cond_stmt) == GIMPLE_COND) |
| 3732 | { |
| 3733 | basic_block succ_bb = other_edge->dest; |
| 3734 | for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (i: gpi); |
| 3735 | gsi_next (i: &gpi)) |
| 3736 | { |
| 3737 | gphi *phi = gpi.phi (); |
| 3738 | tree v1 = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx); |
| 3739 | tree v2 = gimple_phi_arg_def (gs: phi, index: other_succ_edge->dest_idx); |
| 3740 | if (!operand_equal_p (v1, v2, flags: 0)) |
| 3741 | return; |
| 3742 | } |
| 3743 | } |
| 3744 | else |
| 3745 | { |
| 3746 | tree lhs = gimple_assign_lhs (gs: cond_stmt); |
| 3747 | if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))) |
| 3748 | return; |
| 3749 | gsi_next_nondebug (i: &gsi); |
| 3750 | if (!gsi_end_p (i: gsi)) |
| 3751 | { |
| 3752 | if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR) |
| 3753 | return; |
| 3754 | gimple *cast_stmt = gsi_stmt (i: gsi); |
| 3755 | if (!gimple_assign_cast_p (s: cast_stmt)) |
| 3756 | return; |
| 3757 | tree new_lhs = gimple_assign_lhs (gs: cast_stmt); |
| 3758 | gsi_next_nondebug (i: &gsi); |
| 3759 | if (!gsi_end_p (i: gsi) |
| 3760 | || !new_lhs |
| 3761 | || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs)) |
| 3762 | || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1) |
| 3763 | return; |
| 3764 | lhs = new_lhs; |
| 3765 | } |
| 3766 | edge succ_edge = single_succ_edge (bb); |
| 3767 | basic_block succ_bb = succ_edge->dest; |
| 3768 | gsi = gsi_start_phis (succ_bb); |
| 3769 | if (gsi_end_p (i: gsi)) |
| 3770 | return; |
| 3771 | gphi *phi = as_a <gphi *> (p: gsi_stmt (i: gsi)); |
| 3772 | gsi_next (i: &gsi); |
| 3773 | if (!gsi_end_p (i: gsi)) |
| 3774 | return; |
| 3775 | if (gimple_phi_arg_def (gs: phi, index: succ_edge->dest_idx) != lhs) |
| 3776 | return; |
| 3777 | tree other_val = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx); |
| 3778 | if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR) |
| 3779 | { |
| 3780 | tree cond = gimple_assign_rhs1 (gs: cond_stmt); |
| 3781 | if (TREE_CODE (cond) == NE_EXPR) |
| 3782 | { |
| 3783 | if (!operand_equal_p (other_val, |
| 3784 | gimple_assign_rhs3 (gs: cond_stmt), flags: 0)) |
| 3785 | return; |
| 3786 | } |
| 3787 | else if (!operand_equal_p (other_val, |
| 3788 | gimple_assign_rhs2 (gs: cond_stmt), flags: 0)) |
| 3789 | return; |
| 3790 | } |
| 3791 | else if (gimple_assign_rhs_code (gs: cond_stmt) == NE_EXPR) |
| 3792 | { |
| 3793 | if (!integer_zerop (other_val)) |
| 3794 | return; |
| 3795 | } |
| 3796 | else if (!integer_onep (other_val)) |
| 3797 | return; |
| 3798 | } |
| 3799 | if (pred_edge->flags & EDGE_TRUE_VALUE) |
| 3800 | gimple_cond_make_true (gs: zero_cond); |
| 3801 | else |
| 3802 | gimple_cond_make_false (gs: zero_cond); |
| 3803 | update_stmt (s: zero_cond); |
| 3804 | *cfg_changed = true; |
| 3805 | } |
| 3806 | |
| 3807 | /* Helper function for arith_overflow_check_p. Return true |
| 3808 | if VAL1 is equal to VAL2 cast to corresponding integral type |
| 3809 | with other signedness or vice versa. */ |
| 3810 | |
| 3811 | static bool |
| 3812 | arith_cast_equal_p (tree val1, tree val2) |
| 3813 | { |
| 3814 | if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST) |
| 3815 | return wi::eq_p (x: wi::to_wide (t: val1), y: wi::to_wide (t: val2)); |
| 3816 | else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME) |
| 3817 | return false; |
| 3818 | if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1)) |
| 3819 | && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2) |
| 3820 | return true; |
| 3821 | if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2)) |
| 3822 | && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1) |
| 3823 | return true; |
| 3824 | return false; |
| 3825 | } |
| 3826 | |
| 3827 | /* Helper function of match_arith_overflow. Return 1 |
| 3828 | if USE_STMT is unsigned overflow check ovf != 0 for |
| 3829 | STMT, -1 if USE_STMT is unsigned overflow check ovf == 0 |
| 3830 | and 0 otherwise. */ |
| 3831 | |
| 3832 | static int |
| 3833 | arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, |
| 3834 | tree maxval, tree *other) |
| 3835 | { |
| 3836 | enum tree_code ccode = ERROR_MARK; |
| 3837 | tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; |
| 3838 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
| 3839 | tree lhs = gimple_assign_lhs (gs: cast_stmt ? cast_stmt : stmt); |
| 3840 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
| 3841 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
| 3842 | tree multop = NULL_TREE, divlhs = NULL_TREE; |
| 3843 | gimple *cur_use_stmt = use_stmt; |
| 3844 | |
| 3845 | if (code == MULT_EXPR) |
| 3846 | { |
| 3847 | if (!is_gimple_assign (gs: use_stmt)) |
| 3848 | return 0; |
| 3849 | if (gimple_assign_rhs_code (gs: use_stmt) != TRUNC_DIV_EXPR) |
| 3850 | return 0; |
| 3851 | if (gimple_assign_rhs1 (gs: use_stmt) != lhs) |
| 3852 | return 0; |
| 3853 | if (cast_stmt) |
| 3854 | { |
| 3855 | if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs1)) |
| 3856 | multop = rhs2; |
| 3857 | else if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs2)) |
| 3858 | multop = rhs1; |
| 3859 | else |
| 3860 | return 0; |
| 3861 | } |
| 3862 | else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1) |
| 3863 | multop = rhs2; |
| 3864 | else if (operand_equal_p (gimple_assign_rhs2 (gs: use_stmt), rhs2, flags: 0)) |
| 3865 | multop = rhs1; |
| 3866 | else |
| 3867 | return 0; |
| 3868 | if (stmt_ends_bb_p (use_stmt)) |
| 3869 | return 0; |
| 3870 | divlhs = gimple_assign_lhs (gs: use_stmt); |
| 3871 | if (!divlhs) |
| 3872 | return 0; |
| 3873 | use_operand_p use; |
| 3874 | if (!single_imm_use (var: divlhs, use_p: &use, stmt: &cur_use_stmt)) |
| 3875 | return 0; |
| 3876 | if (cast_stmt && gimple_assign_cast_p (s: cur_use_stmt)) |
| 3877 | { |
| 3878 | tree cast_lhs = gimple_assign_lhs (gs: cur_use_stmt); |
| 3879 | if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs)) |
| 3880 | && TYPE_UNSIGNED (TREE_TYPE (cast_lhs)) |
| 3881 | && (TYPE_PRECISION (TREE_TYPE (cast_lhs)) |
| 3882 | == TYPE_PRECISION (TREE_TYPE (divlhs))) |
| 3883 | && single_imm_use (var: cast_lhs, use_p: &use, stmt: &cur_use_stmt)) |
| 3884 | { |
| 3885 | cast_stmt = NULL; |
| 3886 | divlhs = cast_lhs; |
| 3887 | } |
| 3888 | else |
| 3889 | return 0; |
| 3890 | } |
| 3891 | } |
| 3892 | if (gimple_code (g: cur_use_stmt) == GIMPLE_COND) |
| 3893 | { |
| 3894 | ccode = gimple_cond_code (gs: cur_use_stmt); |
| 3895 | crhs1 = gimple_cond_lhs (gs: cur_use_stmt); |
| 3896 | crhs2 = gimple_cond_rhs (gs: cur_use_stmt); |
| 3897 | } |
| 3898 | else if (is_gimple_assign (gs: cur_use_stmt)) |
| 3899 | { |
| 3900 | if (gimple_assign_rhs_class (gs: cur_use_stmt) == GIMPLE_BINARY_RHS) |
| 3901 | { |
| 3902 | ccode = gimple_assign_rhs_code (gs: cur_use_stmt); |
| 3903 | crhs1 = gimple_assign_rhs1 (gs: cur_use_stmt); |
| 3904 | crhs2 = gimple_assign_rhs2 (gs: cur_use_stmt); |
| 3905 | } |
| 3906 | else if (gimple_assign_rhs_code (gs: cur_use_stmt) == COND_EXPR) |
| 3907 | { |
| 3908 | tree cond = gimple_assign_rhs1 (gs: cur_use_stmt); |
| 3909 | if (COMPARISON_CLASS_P (cond)) |
| 3910 | { |
| 3911 | ccode = TREE_CODE (cond); |
| 3912 | crhs1 = TREE_OPERAND (cond, 0); |
| 3913 | crhs2 = TREE_OPERAND (cond, 1); |
| 3914 | } |
| 3915 | else |
| 3916 | return 0; |
| 3917 | } |
| 3918 | else |
| 3919 | return 0; |
| 3920 | } |
| 3921 | else |
| 3922 | return 0; |
| 3923 | |
| 3924 | if (maxval |
| 3925 | && ccode == RSHIFT_EXPR |
| 3926 | && crhs1 == lhs |
| 3927 | && TREE_CODE (crhs2) == INTEGER_CST |
| 3928 | && wi::to_widest (t: crhs2) == TYPE_PRECISION (TREE_TYPE (maxval))) |
| 3929 | { |
| 3930 | tree shiftlhs = gimple_assign_lhs (gs: use_stmt); |
| 3931 | if (!shiftlhs) |
| 3932 | return 0; |
| 3933 | use_operand_p use; |
| 3934 | if (!single_imm_use (var: shiftlhs, use_p: &use, stmt: &cur_use_stmt)) |
| 3935 | return 0; |
| 3936 | if (gimple_code (g: cur_use_stmt) == GIMPLE_COND) |
| 3937 | { |
| 3938 | ccode = gimple_cond_code (gs: cur_use_stmt); |
| 3939 | crhs1 = gimple_cond_lhs (gs: cur_use_stmt); |
| 3940 | crhs2 = gimple_cond_rhs (gs: cur_use_stmt); |
| 3941 | } |
| 3942 | else if (is_gimple_assign (gs: cur_use_stmt)) |
| 3943 | { |
| 3944 | if (gimple_assign_rhs_class (gs: cur_use_stmt) == GIMPLE_BINARY_RHS) |
| 3945 | { |
| 3946 | ccode = gimple_assign_rhs_code (gs: cur_use_stmt); |
| 3947 | crhs1 = gimple_assign_rhs1 (gs: cur_use_stmt); |
| 3948 | crhs2 = gimple_assign_rhs2 (gs: cur_use_stmt); |
| 3949 | } |
| 3950 | else if (gimple_assign_rhs_code (gs: cur_use_stmt) == COND_EXPR) |
| 3951 | { |
| 3952 | tree cond = gimple_assign_rhs1 (gs: cur_use_stmt); |
| 3953 | if (COMPARISON_CLASS_P (cond)) |
| 3954 | { |
| 3955 | ccode = TREE_CODE (cond); |
| 3956 | crhs1 = TREE_OPERAND (cond, 0); |
| 3957 | crhs2 = TREE_OPERAND (cond, 1); |
| 3958 | } |
| 3959 | else |
| 3960 | return 0; |
| 3961 | } |
| 3962 | else |
| 3963 | { |
| 3964 | enum tree_code sc = gimple_assign_rhs_code (gs: cur_use_stmt); |
| 3965 | tree castlhs = gimple_assign_lhs (gs: cur_use_stmt); |
| 3966 | if (!CONVERT_EXPR_CODE_P (sc) |
| 3967 | || !castlhs |
| 3968 | || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs)) |
| 3969 | || (TYPE_PRECISION (TREE_TYPE (castlhs)) |
| 3970 | > TYPE_PRECISION (TREE_TYPE (maxval)))) |
| 3971 | return 0; |
| 3972 | return 1; |
| 3973 | } |
| 3974 | } |
| 3975 | else |
| 3976 | return 0; |
| 3977 | if ((ccode != EQ_EXPR && ccode != NE_EXPR) |
| 3978 | || crhs1 != shiftlhs |
| 3979 | || !integer_zerop (crhs2)) |
| 3980 | return 0; |
| 3981 | return 1; |
| 3982 | } |
| 3983 | |
| 3984 | if (TREE_CODE_CLASS (ccode) != tcc_comparison) |
| 3985 | return 0; |
| 3986 | |
| 3987 | switch (ccode) |
| 3988 | { |
| 3989 | case GT_EXPR: |
| 3990 | case LE_EXPR: |
| 3991 | if (maxval) |
| 3992 | { |
| 3993 | /* r = a + b; r > maxval or r <= maxval */ |
| 3994 | if (crhs1 == lhs |
| 3995 | && TREE_CODE (crhs2) == INTEGER_CST |
| 3996 | && tree_int_cst_equal (crhs2, maxval)) |
| 3997 | return ccode == GT_EXPR ? 1 : -1; |
| 3998 | break; |
| 3999 | } |
| 4000 | /* r = a - b; r > a or r <= a |
| 4001 | r = a + b; a > r or a <= r or b > r or b <= r. */ |
| 4002 | if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1) |
| 4003 | || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) |
| 4004 | && crhs2 == lhs)) |
| 4005 | return ccode == GT_EXPR ? 1 : -1; |
| 4006 | /* r = ~a; b > r or b <= r. */ |
| 4007 | if (code == BIT_NOT_EXPR && crhs2 == lhs) |
| 4008 | { |
| 4009 | if (other) |
| 4010 | *other = crhs1; |
| 4011 | return ccode == GT_EXPR ? 1 : -1; |
| 4012 | } |
| 4013 | break; |
| 4014 | case LT_EXPR: |
| 4015 | case GE_EXPR: |
| 4016 | if (maxval) |
| 4017 | break; |
| 4018 | /* r = a - b; a < r or a >= r |
| 4019 | r = a + b; r < a or r >= a or r < b or r >= b. */ |
| 4020 | if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs) |
| 4021 | || (code == PLUS_EXPR && crhs1 == lhs |
| 4022 | && (crhs2 == rhs1 || crhs2 == rhs2))) |
| 4023 | return ccode == LT_EXPR ? 1 : -1; |
| 4024 | /* r = ~a; r < b or r >= b. */ |
| 4025 | if (code == BIT_NOT_EXPR && crhs1 == lhs) |
| 4026 | { |
| 4027 | if (other) |
| 4028 | *other = crhs2; |
| 4029 | return ccode == LT_EXPR ? 1 : -1; |
| 4030 | } |
| 4031 | break; |
| 4032 | case EQ_EXPR: |
| 4033 | case NE_EXPR: |
| 4034 | /* r = a * b; _1 = r / a; _1 == b |
| 4035 | r = a * b; _1 = r / b; _1 == a |
| 4036 | r = a * b; _1 = r / a; _1 != b |
| 4037 | r = a * b; _1 = r / b; _1 != a. */ |
| 4038 | if (code == MULT_EXPR) |
| 4039 | { |
| 4040 | if (cast_stmt) |
| 4041 | { |
| 4042 | if ((crhs1 == divlhs && arith_cast_equal_p (val1: crhs2, val2: multop)) |
| 4043 | || (crhs2 == divlhs && arith_cast_equal_p (val1: crhs1, val2: multop))) |
| 4044 | { |
| 4045 | use_stmt = cur_use_stmt; |
| 4046 | return ccode == NE_EXPR ? 1 : -1; |
| 4047 | } |
| 4048 | } |
| 4049 | else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, flags: 0)) |
| 4050 | || (crhs2 == divlhs && crhs1 == multop)) |
| 4051 | { |
| 4052 | use_stmt = cur_use_stmt; |
| 4053 | return ccode == NE_EXPR ? 1 : -1; |
| 4054 | } |
| 4055 | } |
| 4056 | break; |
| 4057 | default: |
| 4058 | break; |
| 4059 | } |
| 4060 | return 0; |
| 4061 | } |
| 4062 | |
| 4063 | extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); |
| 4064 | extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); |
| 4065 | extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); |
| 4066 | |
| 4067 | extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); |
| 4068 | extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree)); |
| 4069 | extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree)); |
| 4070 | |
| 4071 | static void |
| 4072 | build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator *gsi, |
| 4073 | internal_fn fn, tree lhs, |
| 4074 | tree op_0, tree op_1) |
| 4075 | { |
| 4076 | if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) |
| 4077 | { |
| 4078 | gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); |
| 4079 | gimple_call_set_lhs (gs: call, lhs); |
| 4080 | gsi_replace (gsi, call, /* update_eh_info */ true); |
| 4081 | } |
| 4082 | } |
| 4083 | |
| 4084 | static bool |
| 4085 | build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator *gsi, |
| 4086 | internal_fn fn, tree lhs, |
| 4087 | tree op_0, tree op_1) |
| 4088 | { |
| 4089 | if (!direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH)) |
| 4090 | return false; |
| 4091 | |
| 4092 | gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); |
| 4093 | gimple_call_set_lhs (gs: call, lhs); |
| 4094 | gsi_insert_before (gsi, call, GSI_SAME_STMT); |
| 4095 | |
| 4096 | return true; |
| 4097 | } |
| 4098 | |
| 4099 | /* |
| 4100 | * Try to match saturation unsigned add with assign. |
| 4101 | * _7 = _4 + _6; |
| 4102 | * _8 = _4 > _7; |
| 4103 | * _9 = (long unsigned int) _8; |
| 4104 | * _10 = -_9; |
| 4105 | * _12 = _7 | _10; |
| 4106 | * => |
| 4107 | * _12 = .SAT_ADD (_4, _6); |
| 4108 | * |
| 4109 | * Try to match IMM=-1 saturation signed add with assign. |
| 4110 | * <bb 2> [local count: 1073741824]: |
| 4111 | * x.0_1 = (unsigned char) x_5(D); |
| 4112 | * _3 = -x.0_1; |
| 4113 | * _10 = (signed char) _3; |
| 4114 | * _8 = x_5(D) & _10; |
| 4115 | * if (_8 < 0) |
| 4116 | * goto <bb 4>; [1.40%] |
| 4117 | * else |
| 4118 | * goto <bb 3>; [98.60%] |
| 4119 | * <bb 3> [local count: 434070867]: |
| 4120 | * _2 = x.0_1 + 255; |
| 4121 | * <bb 4> [local count: 1073741824]: |
| 4122 | * # _9 = PHI <_2(3), 128(2)> |
| 4123 | * _4 = (int8_t) _9; |
| 4124 | * => |
| 4125 | * _4 = .SAT_ADD (x_5, -1); */ |
| 4126 | |
| 4127 | static void |
| 4128 | match_saturation_add_with_assign (gimple_stmt_iterator *gsi, gassign *stmt) |
| 4129 | { |
| 4130 | tree ops[2]; |
| 4131 | tree lhs = gimple_assign_lhs (gs: stmt); |
| 4132 | |
| 4133 | if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) |
| 4134 | || gimple_signed_integer_sat_add (lhs, ops, NULL)) |
| 4135 | build_saturation_binary_arith_call_and_replace (gsi, fn: IFN_SAT_ADD, lhs, |
| 4136 | op_0: ops[0], op_1: ops[1]); |
| 4137 | } |
| 4138 | |
| 4139 | /* |
| 4140 | * Try to match saturation add with PHI. |
| 4141 | * For unsigned integer: |
| 4142 | * <bb 2> : |
| 4143 | * _1 = x_3(D) + y_4(D); |
| 4144 | * if (_1 >= x_3(D)) |
| 4145 | * goto <bb 3>; [INV] |
| 4146 | * else |
| 4147 | * goto <bb 4>; [INV] |
| 4148 | * |
| 4149 | * <bb 3> : |
| 4150 | * |
| 4151 | * <bb 4> : |
| 4152 | * # _2 = PHI <255(2), _1(3)> |
| 4153 | * => |
| 4154 | * <bb 4> [local count: 1073741824]: |
| 4155 | * _2 = .SAT_ADD (x_4(D), y_5(D)); |
| 4156 | * |
| 4157 | * For signed integer: |
| 4158 | * x.0_1 = (long unsigned int) x_7(D); |
| 4159 | * y.1_2 = (long unsigned int) y_8(D); |
| 4160 | * _3 = x.0_1 + y.1_2; |
| 4161 | * sum_9 = (int64_t) _3; |
| 4162 | * _4 = x_7(D) ^ y_8(D); |
| 4163 | * _5 = x_7(D) ^ sum_9; |
| 4164 | * _15 = ~_4; |
| 4165 | * _16 = _5 & _15; |
| 4166 | * if (_16 < 0) |
| 4167 | * goto <bb 3>; [41.00%] |
| 4168 | * else |
| 4169 | * goto <bb 4>; [59.00%] |
| 4170 | * _11 = x_7(D) < 0; |
| 4171 | * _12 = (long int) _11; |
| 4172 | * _13 = -_12; |
| 4173 | * _14 = _13 ^ 9223372036854775807; |
| 4174 | * # _6 = PHI <_14(3), sum_9(2)> |
| 4175 | * => |
| 4176 | * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */ |
| 4177 | |
| 4178 | static bool |
| 4179 | match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi) |
| 4180 | { |
| 4181 | if (gimple_phi_num_args (gs: phi) != 2) |
| 4182 | return false; |
| 4183 | |
| 4184 | tree ops[2]; |
| 4185 | tree phi_result = gimple_phi_result (gs: phi); |
| 4186 | |
| 4187 | if (!gimple_unsigned_integer_sat_add (phi_result, ops, NULL) |
| 4188 | && !gimple_signed_integer_sat_add (phi_result, ops, NULL)) |
| 4189 | return false; |
| 4190 | |
| 4191 | if (!TYPE_UNSIGNED (TREE_TYPE (ops[0])) && TREE_CODE (ops[1]) == INTEGER_CST) |
| 4192 | ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]); |
| 4193 | |
| 4194 | return build_saturation_binary_arith_call_and_insert (gsi, fn: IFN_SAT_ADD, |
| 4195 | lhs: phi_result, op_0: ops[0], |
| 4196 | op_1: ops[1]); |
| 4197 | } |
| 4198 | |
| 4199 | /* |
| 4200 | * Try to match saturation unsigned sub. |
| 4201 | * _1 = _4 >= _5; |
| 4202 | * _3 = _4 - _5; |
| 4203 | * _6 = _1 ? _3 : 0; |
| 4204 | * => |
| 4205 | * _6 = .SAT_SUB (_4, _5); */ |
| 4206 | |
| 4207 | static void |
| 4208 | match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) |
| 4209 | { |
| 4210 | tree ops[2]; |
| 4211 | tree lhs = gimple_assign_lhs (gs: stmt); |
| 4212 | |
| 4213 | if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) |
| 4214 | build_saturation_binary_arith_call_and_replace (gsi, fn: IFN_SAT_SUB, lhs, |
| 4215 | op_0: ops[0], op_1: ops[1]); |
| 4216 | } |
| 4217 | |
| 4218 | /* |
| 4219 | * Try to match saturation unsigned sub. |
| 4220 | * <bb 2> [local count: 1073741824]: |
| 4221 | * if (x_2(D) > y_3(D)) |
| 4222 | * goto <bb 3>; [50.00%] |
| 4223 | * else |
| 4224 | * goto <bb 4>; [50.00%] |
| 4225 | * |
| 4226 | * <bb 3> [local count: 536870912]: |
| 4227 | * _4 = x_2(D) - y_3(D); |
| 4228 | * |
| 4229 | * <bb 4> [local count: 1073741824]: |
| 4230 | * # _1 = PHI <0(2), _4(3)> |
| 4231 | * => |
| 4232 | * <bb 4> [local count: 1073741824]: |
| 4233 | * _1 = .SAT_SUB (x_2(D), y_3(D)); */ |
| 4234 | static bool |
| 4235 | match_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi) |
| 4236 | { |
| 4237 | if (gimple_phi_num_args (gs: phi) != 2) |
| 4238 | return false; |
| 4239 | |
| 4240 | tree ops[2]; |
| 4241 | tree phi_result = gimple_phi_result (gs: phi); |
| 4242 | |
| 4243 | if (!gimple_unsigned_integer_sat_sub (phi_result, ops, NULL) |
| 4244 | && !gimple_signed_integer_sat_sub (phi_result, ops, NULL)) |
| 4245 | return false; |
| 4246 | |
| 4247 | return build_saturation_binary_arith_call_and_insert (gsi, fn: IFN_SAT_SUB, |
| 4248 | lhs: phi_result, op_0: ops[0], |
| 4249 | op_1: ops[1]); |
| 4250 | } |
| 4251 | |
| 4252 | /* |
| 4253 | * Try to match saturation unsigned sub. |
| 4254 | * uint16_t x_4(D); |
| 4255 | * uint8_t _6; |
| 4256 | * overflow_5 = x_4(D) > 255; |
| 4257 | * _1 = (unsigned char) x_4(D); |
| 4258 | * _2 = (unsigned char) overflow_5; |
| 4259 | * _3 = -_2; |
| 4260 | * _6 = _1 | _3; |
| 4261 | * => |
| 4262 | * _6 = .SAT_TRUNC (x_4(D)); |
| 4263 | * */ |
| 4264 | static void |
| 4265 | match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt) |
| 4266 | { |
| 4267 | tree ops[1]; |
| 4268 | tree lhs = gimple_assign_lhs (gs: stmt); |
| 4269 | tree type = TREE_TYPE (lhs); |
| 4270 | |
| 4271 | if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL) |
| 4272 | && direct_internal_fn_supported_p (IFN_SAT_TRUNC, |
| 4273 | tree_pair (type, TREE_TYPE (ops[0])), |
| 4274 | OPTIMIZE_FOR_BOTH)) |
| 4275 | { |
| 4276 | gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]); |
| 4277 | gimple_call_set_lhs (gs: call, lhs); |
| 4278 | gsi_replace (gsi, call, /* update_eh_info */ true); |
| 4279 | } |
| 4280 | } |
| 4281 | |
| 4282 | /* |
| 4283 | * Try to match saturation truncate. |
| 4284 | * Aka: |
| 4285 | * x.0_1 = (unsigned long) x_4(D); |
| 4286 | * _2 = x.0_1 + 2147483648; |
| 4287 | * if (_2 > 4294967295) |
| 4288 | * goto <bb 4>; [50.00%] |
| 4289 | * else |
| 4290 | * goto <bb 3>; [50.00%] |
| 4291 | * ;; succ: 4 |
| 4292 | * ;; 3 |
| 4293 | * |
| 4294 | * ;; basic block 3, loop depth 0 |
| 4295 | * ;; pred: 2 |
| 4296 | * trunc_5 = (int32_t) x_4(D); |
| 4297 | * goto <bb 5>; [100.00%] |
| 4298 | * ;; succ: 5 |
| 4299 | * |
| 4300 | * ;; basic block 4, loop depth 0 |
| 4301 | * ;; pred: 2 |
| 4302 | * _7 = x_4(D) < 0; |
| 4303 | * _8 = (int) _7; |
| 4304 | * _9 = -_8; |
| 4305 | * _10 = _9 ^ 2147483647; |
| 4306 | * ;; succ: 5 |
| 4307 | * |
| 4308 | * ;; basic block 5, loop depth 0 |
| 4309 | * ;; pred: 3 |
| 4310 | * ;; 4 |
| 4311 | * # _3 = PHI <trunc_5(3), _10(4)> |
| 4312 | * => |
| 4313 | * _6 = .SAT_TRUNC (x_4(D)); |
| 4314 | */ |
| 4315 | |
| 4316 | static bool |
| 4317 | match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi) |
| 4318 | { |
| 4319 | if (gimple_phi_num_args (gs: phi) != 2) |
| 4320 | return false; |
| 4321 | |
| 4322 | tree ops[1]; |
| 4323 | tree phi_result = gimple_phi_result (gs: phi); |
| 4324 | tree type = TREE_TYPE (phi_result); |
| 4325 | |
| 4326 | if (!gimple_unsigned_integer_sat_trunc (phi_result, ops, NULL) |
| 4327 | && !gimple_signed_integer_sat_trunc (phi_result, ops, NULL)) |
| 4328 | return false; |
| 4329 | |
| 4330 | if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC, |
| 4331 | tree_pair (type, TREE_TYPE (ops[0])), |
| 4332 | OPTIMIZE_FOR_BOTH)) |
| 4333 | return false; |
| 4334 | |
| 4335 | gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]); |
| 4336 | gimple_call_set_lhs (gs: call, lhs: phi_result); |
| 4337 | gsi_insert_before (gsi, call, GSI_SAME_STMT); |
| 4338 | |
| 4339 | return true; |
| 4340 | } |
| 4341 | |
| 4342 | /* Recognize for unsigned x |
| 4343 | x = y - z; |
| 4344 | if (x > y) |
| 4345 | where there are other uses of x and replace it with |
| 4346 | _7 = .SUB_OVERFLOW (y, z); |
| 4347 | x = REALPART_EXPR <_7>; |
| 4348 | _8 = IMAGPART_EXPR <_7>; |
| 4349 | if (_8) |
| 4350 | and similarly for addition. |
| 4351 | |
| 4352 | Also recognize: |
| 4353 | yc = (type) y; |
| 4354 | zc = (type) z; |
| 4355 | x = yc + zc; |
| 4356 | if (x > max) |
| 4357 | where y and z have unsigned types with maximum max |
| 4358 | and there are other uses of x and all of those cast x |
| 4359 | back to that unsigned type and again replace it with |
| 4360 | _7 = .ADD_OVERFLOW (y, z); |
| 4361 | _9 = REALPART_EXPR <_7>; |
| 4362 | _8 = IMAGPART_EXPR <_7>; |
| 4363 | if (_8) |
| 4364 | and replace (utype) x with _9. |
| 4365 | Or with x >> popcount (max) instead of x > max. |
| 4366 | |
| 4367 | Also recognize: |
| 4368 | x = ~z; |
| 4369 | if (y > x) |
| 4370 | and replace it with |
| 4371 | _7 = .ADD_OVERFLOW (y, z); |
| 4372 | _8 = IMAGPART_EXPR <_7>; |
| 4373 | if (_8) |
| 4374 | |
| 4375 | And also recognize: |
| 4376 | z = x * y; |
| 4377 | if (x != 0) |
| 4378 | goto <bb 3>; [50.00%] |
| 4379 | else |
| 4380 | goto <bb 4>; [50.00%] |
| 4381 | |
| 4382 | <bb 3> [local count: 536870913]: |
| 4383 | _2 = z / x; |
| 4384 | _9 = _2 != y; |
| 4385 | _10 = (int) _9; |
| 4386 | |
| 4387 | <bb 4> [local count: 1073741824]: |
| 4388 | # iftmp.0_3 = PHI <_10(3), 0(2)> |
| 4389 | and replace it with |
| 4390 | _7 = .MUL_OVERFLOW (x, y); |
| 4391 | z = IMAGPART_EXPR <_7>; |
| 4392 | _8 = IMAGPART_EXPR <_7>; |
| 4393 | _9 = _8 != 0; |
| 4394 | iftmp.0_3 = (int) _9; */ |
| 4395 | |
| 4396 | static bool |
| 4397 | match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, |
| 4398 | enum tree_code code, bool *cfg_changed) |
| 4399 | { |
| 4400 | tree lhs = gimple_assign_lhs (gs: stmt); |
| 4401 | tree type = TREE_TYPE (lhs); |
| 4402 | use_operand_p use_p; |
| 4403 | imm_use_iterator iter; |
| 4404 | bool use_seen = false; |
| 4405 | bool ovf_use_seen = false; |
| 4406 | gimple *use_stmt; |
| 4407 | gimple *add_stmt = NULL; |
| 4408 | bool add_first = false; |
| 4409 | gimple *cond_stmt = NULL; |
| 4410 | gimple *cast_stmt = NULL; |
| 4411 | tree cast_lhs = NULL_TREE; |
| 4412 | |
| 4413 | gcc_checking_assert (code == PLUS_EXPR |
| 4414 | || code == MINUS_EXPR |
| 4415 | || code == MULT_EXPR |
| 4416 | || code == BIT_NOT_EXPR); |
| 4417 | if (!INTEGRAL_TYPE_P (type) |
| 4418 | || !TYPE_UNSIGNED (type) |
| 4419 | || has_zero_uses (var: lhs) |
| 4420 | || (code != PLUS_EXPR |
| 4421 | && code != MULT_EXPR |
| 4422 | && optab_handler (op: code == MINUS_EXPR ? usubv4_optab : uaddv4_optab, |
| 4423 | TYPE_MODE (type)) == CODE_FOR_nothing)) |
| 4424 | return false; |
| 4425 | |
| 4426 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
| 4427 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
| 4428 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
| 4429 | { |
| 4430 | use_stmt = USE_STMT (use_p); |
| 4431 | if (is_gimple_debug (gs: use_stmt)) |
| 4432 | continue; |
| 4433 | |
| 4434 | tree other = NULL_TREE; |
| 4435 | if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, other: &other)) |
| 4436 | { |
| 4437 | if (code == BIT_NOT_EXPR) |
| 4438 | { |
| 4439 | gcc_assert (other); |
| 4440 | if (TREE_CODE (other) != SSA_NAME) |
| 4441 | return false; |
| 4442 | if (rhs2 == NULL) |
| 4443 | rhs2 = other; |
| 4444 | else |
| 4445 | return false; |
| 4446 | cond_stmt = use_stmt; |
| 4447 | } |
| 4448 | ovf_use_seen = true; |
| 4449 | } |
| 4450 | else |
| 4451 | { |
| 4452 | use_seen = true; |
| 4453 | if (code == MULT_EXPR |
| 4454 | && cast_stmt == NULL |
| 4455 | && gimple_assign_cast_p (s: use_stmt)) |
| 4456 | { |
| 4457 | cast_lhs = gimple_assign_lhs (gs: use_stmt); |
| 4458 | if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs)) |
| 4459 | && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs)) |
| 4460 | && (TYPE_PRECISION (TREE_TYPE (cast_lhs)) |
| 4461 | == TYPE_PRECISION (TREE_TYPE (lhs)))) |
| 4462 | cast_stmt = use_stmt; |
| 4463 | else |
| 4464 | cast_lhs = NULL_TREE; |
| 4465 | } |
| 4466 | } |
| 4467 | if (ovf_use_seen && use_seen) |
| 4468 | break; |
| 4469 | } |
| 4470 | |
| 4471 | if (!ovf_use_seen |
| 4472 | && code == MULT_EXPR |
| 4473 | && cast_stmt) |
| 4474 | { |
| 4475 | if (TREE_CODE (rhs1) != SSA_NAME |
| 4476 | || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST)) |
| 4477 | return false; |
| 4478 | FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs) |
| 4479 | { |
| 4480 | use_stmt = USE_STMT (use_p); |
| 4481 | if (is_gimple_debug (gs: use_stmt)) |
| 4482 | continue; |
| 4483 | |
| 4484 | if (arith_overflow_check_p (stmt, cast_stmt, use_stmt, |
| 4485 | NULL_TREE, NULL)) |
| 4486 | ovf_use_seen = true; |
| 4487 | } |
| 4488 | } |
| 4489 | else |
| 4490 | { |
| 4491 | cast_stmt = NULL; |
| 4492 | cast_lhs = NULL_TREE; |
| 4493 | } |
| 4494 | |
| 4495 | tree maxval = NULL_TREE; |
| 4496 | if (!ovf_use_seen |
| 4497 | || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen)) |
| 4498 | || (code == PLUS_EXPR |
| 4499 | && optab_handler (op: uaddv4_optab, |
| 4500 | TYPE_MODE (type)) == CODE_FOR_nothing) |
| 4501 | || (code == MULT_EXPR |
| 4502 | && optab_handler (op: cast_stmt ? mulv4_optab : umulv4_optab, |
| 4503 | TYPE_MODE (type)) == CODE_FOR_nothing |
| 4504 | && (use_seen |
| 4505 | || cast_stmt |
| 4506 | || !can_mult_highpart_p (TYPE_MODE (type), true)))) |
| 4507 | { |
| 4508 | if (code != PLUS_EXPR) |
| 4509 | return false; |
| 4510 | if (TREE_CODE (rhs1) != SSA_NAME |
| 4511 | || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1))) |
| 4512 | return false; |
| 4513 | rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1)); |
| 4514 | tree type1 = TREE_TYPE (rhs1); |
| 4515 | if (!INTEGRAL_TYPE_P (type1) |
| 4516 | || !TYPE_UNSIGNED (type1) |
| 4517 | || TYPE_PRECISION (type1) >= TYPE_PRECISION (type) |
| 4518 | || (TYPE_PRECISION (type1) |
| 4519 | != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1)))) |
| 4520 | return false; |
| 4521 | if (TREE_CODE (rhs2) == INTEGER_CST) |
| 4522 | { |
| 4523 | if (wi::ne_p (x: wi::rshift (x: wi::to_wide (t: rhs2), |
| 4524 | TYPE_PRECISION (type1), |
| 4525 | sgn: UNSIGNED), y: 0)) |
| 4526 | return false; |
| 4527 | rhs2 = fold_convert (type1, rhs2); |
| 4528 | } |
| 4529 | else |
| 4530 | { |
| 4531 | if (TREE_CODE (rhs2) != SSA_NAME |
| 4532 | || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2))) |
| 4533 | return false; |
| 4534 | rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2)); |
| 4535 | tree type2 = TREE_TYPE (rhs2); |
| 4536 | if (!INTEGRAL_TYPE_P (type2) |
| 4537 | || !TYPE_UNSIGNED (type2) |
| 4538 | || TYPE_PRECISION (type2) >= TYPE_PRECISION (type) |
| 4539 | || (TYPE_PRECISION (type2) |
| 4540 | != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2)))) |
| 4541 | return false; |
| 4542 | } |
| 4543 | if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2))) |
| 4544 | type = type1; |
| 4545 | else |
| 4546 | type = TREE_TYPE (rhs2); |
| 4547 | |
| 4548 | if (TREE_CODE (type) != INTEGER_TYPE |
| 4549 | || optab_handler (op: uaddv4_optab, |
| 4550 | TYPE_MODE (type)) == CODE_FOR_nothing) |
| 4551 | return false; |
| 4552 | |
| 4553 | maxval = wide_int_to_tree (type, cst: wi::max_value (TYPE_PRECISION (type), |
| 4554 | UNSIGNED)); |
| 4555 | ovf_use_seen = false; |
| 4556 | use_seen = false; |
| 4557 | basic_block use_bb = NULL; |
| 4558 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
| 4559 | { |
| 4560 | use_stmt = USE_STMT (use_p); |
| 4561 | if (is_gimple_debug (gs: use_stmt)) |
| 4562 | continue; |
| 4563 | |
| 4564 | if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL)) |
| 4565 | { |
| 4566 | ovf_use_seen = true; |
| 4567 | use_bb = gimple_bb (g: use_stmt); |
| 4568 | } |
| 4569 | else |
| 4570 | { |
| 4571 | if (!gimple_assign_cast_p (s: use_stmt) |
| 4572 | || gimple_assign_rhs_code (gs: use_stmt) == VIEW_CONVERT_EXPR) |
| 4573 | return false; |
| 4574 | tree use_lhs = gimple_assign_lhs (gs: use_stmt); |
| 4575 | if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs)) |
| 4576 | || (TYPE_PRECISION (TREE_TYPE (use_lhs)) |
| 4577 | > TYPE_PRECISION (type))) |
| 4578 | return false; |
| 4579 | use_seen = true; |
| 4580 | } |
| 4581 | } |
| 4582 | if (!ovf_use_seen) |
| 4583 | return false; |
| 4584 | if (!useless_type_conversion_p (type, TREE_TYPE (rhs1))) |
| 4585 | { |
| 4586 | if (!use_seen) |
| 4587 | return false; |
| 4588 | tree new_rhs1 = make_ssa_name (var: type); |
| 4589 | gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1); |
| 4590 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 4591 | rhs1 = new_rhs1; |
| 4592 | } |
| 4593 | else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2))) |
| 4594 | { |
| 4595 | if (!use_seen) |
| 4596 | return false; |
| 4597 | tree new_rhs2 = make_ssa_name (var: type); |
| 4598 | gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2); |
| 4599 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 4600 | rhs2 = new_rhs2; |
| 4601 | } |
| 4602 | else if (!use_seen) |
| 4603 | { |
| 4604 | /* If there are no uses of the wider addition, check if |
| 4605 | forwprop has not created a narrower addition. |
| 4606 | Require it to be in the same bb as the overflow check. */ |
| 4607 | FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1) |
| 4608 | { |
| 4609 | use_stmt = USE_STMT (use_p); |
| 4610 | if (is_gimple_debug (gs: use_stmt)) |
| 4611 | continue; |
| 4612 | |
| 4613 | if (use_stmt == stmt) |
| 4614 | continue; |
| 4615 | |
| 4616 | if (!is_gimple_assign (gs: use_stmt) |
| 4617 | || gimple_bb (g: use_stmt) != use_bb |
| 4618 | || gimple_assign_rhs_code (gs: use_stmt) != PLUS_EXPR) |
| 4619 | continue; |
| 4620 | |
| 4621 | if (gimple_assign_rhs1 (gs: use_stmt) == rhs1) |
| 4622 | { |
| 4623 | if (!operand_equal_p (gimple_assign_rhs2 (gs: use_stmt), |
| 4624 | rhs2, flags: 0)) |
| 4625 | continue; |
| 4626 | } |
| 4627 | else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1) |
| 4628 | { |
| 4629 | if (gimple_assign_rhs1 (gs: use_stmt) != rhs2) |
| 4630 | continue; |
| 4631 | } |
| 4632 | else |
| 4633 | continue; |
| 4634 | |
| 4635 | add_stmt = use_stmt; |
| 4636 | break; |
| 4637 | } |
| 4638 | if (add_stmt == NULL) |
| 4639 | return false; |
| 4640 | |
| 4641 | /* If stmt and add_stmt are in the same bb, we need to find out |
| 4642 | which one is earlier. If they are in different bbs, we've |
| 4643 | checked add_stmt is in the same bb as one of the uses of the |
| 4644 | stmt lhs, so stmt needs to dominate add_stmt too. */ |
| 4645 | if (gimple_bb (g: stmt) == gimple_bb (g: add_stmt)) |
| 4646 | { |
| 4647 | gimple_stmt_iterator gsif = *gsi; |
| 4648 | gimple_stmt_iterator gsib = *gsi; |
| 4649 | int i; |
| 4650 | /* Search both forward and backward from stmt and have a small |
| 4651 | upper bound. */ |
| 4652 | for (i = 0; i < 128; i++) |
| 4653 | { |
| 4654 | if (!gsi_end_p (i: gsib)) |
| 4655 | { |
| 4656 | gsi_prev_nondebug (i: &gsib); |
| 4657 | if (gsi_stmt (i: gsib) == add_stmt) |
| 4658 | { |
| 4659 | add_first = true; |
| 4660 | break; |
| 4661 | } |
| 4662 | } |
| 4663 | else if (gsi_end_p (i: gsif)) |
| 4664 | break; |
| 4665 | if (!gsi_end_p (i: gsif)) |
| 4666 | { |
| 4667 | gsi_next_nondebug (i: &gsif); |
| 4668 | if (gsi_stmt (i: gsif) == add_stmt) |
| 4669 | break; |
| 4670 | } |
| 4671 | } |
| 4672 | if (i == 128) |
| 4673 | return false; |
| 4674 | if (add_first) |
| 4675 | *gsi = gsi_for_stmt (add_stmt); |
| 4676 | } |
| 4677 | } |
| 4678 | } |
| 4679 | |
| 4680 | if (code == BIT_NOT_EXPR) |
| 4681 | *gsi = gsi_for_stmt (cond_stmt); |
| 4682 | |
| 4683 | auto_vec<gimple *, 8> mul_stmts; |
| 4684 | if (code == MULT_EXPR && cast_stmt) |
| 4685 | { |
| 4686 | type = TREE_TYPE (cast_lhs); |
| 4687 | gimple *g = SSA_NAME_DEF_STMT (rhs1); |
| 4688 | if (gimple_assign_cast_p (s: g) |
| 4689 | && useless_type_conversion_p (type, |
| 4690 | TREE_TYPE (gimple_assign_rhs1 (g))) |
| 4691 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) |
| 4692 | rhs1 = gimple_assign_rhs1 (gs: g); |
| 4693 | else |
| 4694 | { |
| 4695 | g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs1); |
| 4696 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 4697 | rhs1 = gimple_assign_lhs (gs: g); |
| 4698 | mul_stmts.quick_push (obj: g); |
| 4699 | } |
| 4700 | if (TREE_CODE (rhs2) == INTEGER_CST) |
| 4701 | rhs2 = fold_convert (type, rhs2); |
| 4702 | else |
| 4703 | { |
| 4704 | g = SSA_NAME_DEF_STMT (rhs2); |
| 4705 | if (gimple_assign_cast_p (s: g) |
| 4706 | && useless_type_conversion_p (type, |
| 4707 | TREE_TYPE (gimple_assign_rhs1 (g))) |
| 4708 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) |
| 4709 | rhs2 = gimple_assign_rhs1 (gs: g); |
| 4710 | else |
| 4711 | { |
| 4712 | g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs2); |
| 4713 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 4714 | rhs2 = gimple_assign_lhs (gs: g); |
| 4715 | mul_stmts.quick_push (obj: g); |
| 4716 | } |
| 4717 | } |
| 4718 | } |
| 4719 | tree ctype = build_complex_type (type); |
| 4720 | gcall *g = gimple_build_call_internal (code == MULT_EXPR |
| 4721 | ? IFN_MUL_OVERFLOW |
| 4722 | : code != MINUS_EXPR |
| 4723 | ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, |
| 4724 | 2, rhs1, rhs2); |
| 4725 | tree ctmp = make_ssa_name (var: ctype); |
| 4726 | gimple_call_set_lhs (gs: g, lhs: ctmp); |
| 4727 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 4728 | tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (var: type) : lhs; |
| 4729 | gassign *g2; |
| 4730 | if (code != BIT_NOT_EXPR) |
| 4731 | { |
| 4732 | g2 = gimple_build_assign (new_lhs, REALPART_EXPR, |
| 4733 | build1 (REALPART_EXPR, type, ctmp)); |
| 4734 | if (maxval || cast_stmt) |
| 4735 | { |
| 4736 | gsi_insert_before (gsi, g2, GSI_SAME_STMT); |
| 4737 | if (add_first) |
| 4738 | *gsi = gsi_for_stmt (stmt); |
| 4739 | } |
| 4740 | else |
| 4741 | gsi_replace (gsi, g2, true); |
| 4742 | if (code == MULT_EXPR) |
| 4743 | { |
| 4744 | mul_stmts.quick_push (obj: g); |
| 4745 | mul_stmts.quick_push (obj: g2); |
| 4746 | if (cast_stmt) |
| 4747 | { |
| 4748 | g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs); |
| 4749 | gsi_replace (gsi, g2, true); |
| 4750 | mul_stmts.quick_push (obj: g2); |
| 4751 | } |
| 4752 | } |
| 4753 | } |
| 4754 | tree ovf = make_ssa_name (var: type); |
| 4755 | g2 = gimple_build_assign (ovf, IMAGPART_EXPR, |
| 4756 | build1 (IMAGPART_EXPR, type, ctmp)); |
| 4757 | if (code != BIT_NOT_EXPR) |
| 4758 | gsi_insert_after (gsi, g2, GSI_NEW_STMT); |
| 4759 | else |
| 4760 | gsi_insert_before (gsi, g2, GSI_SAME_STMT); |
| 4761 | if (code == MULT_EXPR) |
| 4762 | mul_stmts.quick_push (obj: g2); |
| 4763 | |
| 4764 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs) |
| 4765 | { |
| 4766 | if (is_gimple_debug (gs: use_stmt)) |
| 4767 | continue; |
| 4768 | |
| 4769 | gimple *orig_use_stmt = use_stmt; |
| 4770 | int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt, |
| 4771 | maxval, NULL); |
| 4772 | if (ovf_use == 0) |
| 4773 | { |
| 4774 | gcc_assert (code != BIT_NOT_EXPR); |
| 4775 | if (maxval) |
| 4776 | { |
| 4777 | tree use_lhs = gimple_assign_lhs (gs: use_stmt); |
| 4778 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: new_lhs); |
| 4779 | if (useless_type_conversion_p (TREE_TYPE (use_lhs), |
| 4780 | TREE_TYPE (new_lhs))) |
| 4781 | gimple_assign_set_rhs_code (s: use_stmt, code: SSA_NAME); |
| 4782 | update_stmt (s: use_stmt); |
| 4783 | } |
| 4784 | continue; |
| 4785 | } |
| 4786 | if (gimple_code (g: use_stmt) == GIMPLE_COND) |
| 4787 | { |
| 4788 | gcond *cond_stmt = as_a <gcond *> (p: use_stmt); |
| 4789 | gimple_cond_set_lhs (gs: cond_stmt, lhs: ovf); |
| 4790 | gimple_cond_set_rhs (gs: cond_stmt, rhs: build_int_cst (type, 0)); |
| 4791 | gimple_cond_set_code (gs: cond_stmt, code: ovf_use == 1 ? NE_EXPR : EQ_EXPR); |
| 4792 | } |
| 4793 | else |
| 4794 | { |
| 4795 | gcc_checking_assert (is_gimple_assign (use_stmt)); |
| 4796 | if (gimple_assign_rhs_class (gs: use_stmt) == GIMPLE_BINARY_RHS) |
| 4797 | { |
| 4798 | if (gimple_assign_rhs_code (gs: use_stmt) == RSHIFT_EXPR) |
| 4799 | { |
| 4800 | g2 = gimple_build_assign (make_ssa_name (boolean_type_node), |
| 4801 | ovf_use == 1 ? NE_EXPR : EQ_EXPR, |
| 4802 | ovf, build_int_cst (type, 0)); |
| 4803 | gimple_stmt_iterator gsiu = gsi_for_stmt (use_stmt); |
| 4804 | gsi_insert_before (&gsiu, g2, GSI_SAME_STMT); |
| 4805 | gimple_assign_set_rhs_with_ops (gsi: &gsiu, code: NOP_EXPR, |
| 4806 | op1: gimple_assign_lhs (gs: g2)); |
| 4807 | update_stmt (s: use_stmt); |
| 4808 | use_operand_p use; |
| 4809 | single_imm_use (var: gimple_assign_lhs (gs: use_stmt), use_p: &use, |
| 4810 | stmt: &use_stmt); |
| 4811 | if (gimple_code (g: use_stmt) == GIMPLE_COND) |
| 4812 | { |
| 4813 | gcond *cond_stmt = as_a <gcond *> (p: use_stmt); |
| 4814 | gimple_cond_set_lhs (gs: cond_stmt, lhs: ovf); |
| 4815 | gimple_cond_set_rhs (gs: cond_stmt, rhs: build_int_cst (type, 0)); |
| 4816 | } |
| 4817 | else |
| 4818 | { |
| 4819 | gcc_checking_assert (is_gimple_assign (use_stmt)); |
| 4820 | if (gimple_assign_rhs_class (gs: use_stmt) |
| 4821 | == GIMPLE_BINARY_RHS) |
| 4822 | { |
| 4823 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf); |
| 4824 | gimple_assign_set_rhs2 (gs: use_stmt, |
| 4825 | rhs: build_int_cst (type, 0)); |
| 4826 | } |
| 4827 | else if (gimple_assign_cast_p (s: use_stmt)) |
| 4828 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf); |
| 4829 | else |
| 4830 | { |
| 4831 | tree_code sc = gimple_assign_rhs_code (gs: use_stmt); |
| 4832 | gcc_checking_assert (sc == COND_EXPR); |
| 4833 | tree cond = gimple_assign_rhs1 (gs: use_stmt); |
| 4834 | cond = build2 (TREE_CODE (cond), |
| 4835 | boolean_type_node, ovf, |
| 4836 | build_int_cst (type, 0)); |
| 4837 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: cond); |
| 4838 | } |
| 4839 | } |
| 4840 | update_stmt (s: use_stmt); |
| 4841 | gsi_remove (&gsiu, true); |
| 4842 | gsiu = gsi_for_stmt (g2); |
| 4843 | gsi_remove (&gsiu, true); |
| 4844 | continue; |
| 4845 | } |
| 4846 | else |
| 4847 | { |
| 4848 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf); |
| 4849 | gimple_assign_set_rhs2 (gs: use_stmt, rhs: build_int_cst (type, 0)); |
| 4850 | gimple_assign_set_rhs_code (s: use_stmt, |
| 4851 | code: ovf_use == 1 |
| 4852 | ? NE_EXPR : EQ_EXPR); |
| 4853 | } |
| 4854 | } |
| 4855 | else |
| 4856 | { |
| 4857 | gcc_checking_assert (gimple_assign_rhs_code (use_stmt) |
| 4858 | == COND_EXPR); |
| 4859 | tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, |
| 4860 | boolean_type_node, ovf, |
| 4861 | build_int_cst (type, 0)); |
| 4862 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: cond); |
| 4863 | } |
| 4864 | } |
| 4865 | update_stmt (s: use_stmt); |
| 4866 | if (code == MULT_EXPR && use_stmt != orig_use_stmt) |
| 4867 | { |
| 4868 | gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt); |
| 4869 | maybe_optimize_guarding_check (mul_stmts, cond_stmt: use_stmt, div_stmt: orig_use_stmt, |
| 4870 | cfg_changed); |
| 4871 | use_operand_p use; |
| 4872 | gimple *cast_stmt; |
| 4873 | if (single_imm_use (var: gimple_assign_lhs (gs: orig_use_stmt), use_p: &use, |
| 4874 | stmt: &cast_stmt) |
| 4875 | && gimple_assign_cast_p (s: cast_stmt)) |
| 4876 | { |
| 4877 | gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt); |
| 4878 | gsi_remove (&gsi3, true); |
| 4879 | release_ssa_name (name: gimple_assign_lhs (gs: cast_stmt)); |
| 4880 | } |
| 4881 | gsi_remove (&gsi2, true); |
| 4882 | release_ssa_name (name: gimple_assign_lhs (gs: orig_use_stmt)); |
| 4883 | } |
| 4884 | } |
| 4885 | if (maxval) |
| 4886 | { |
| 4887 | gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); |
| 4888 | gsi_remove (&gsi2, true); |
| 4889 | if (add_stmt) |
| 4890 | { |
| 4891 | gimple *g = gimple_build_assign (gimple_assign_lhs (gs: add_stmt), |
| 4892 | new_lhs); |
| 4893 | gsi2 = gsi_for_stmt (add_stmt); |
| 4894 | gsi_replace (&gsi2, g, true); |
| 4895 | } |
| 4896 | } |
| 4897 | else if (code == BIT_NOT_EXPR) |
| 4898 | { |
| 4899 | *gsi = gsi_for_stmt (stmt); |
| 4900 | gsi_remove (gsi, true); |
| 4901 | release_ssa_name (name: lhs); |
| 4902 | return true; |
| 4903 | } |
| 4904 | return false; |
| 4905 | } |
| 4906 | |
| 4907 | /* Helper of match_uaddc_usubc. Look through an integral cast |
| 4908 | which should preserve [0, 1] range value (unless source has |
| 4909 | 1-bit signed type) and the cast has single use. */ |
| 4910 | |
| 4911 | static gimple * |
| 4912 | uaddc_cast (gimple *g) |
| 4913 | { |
| 4914 | if (!gimple_assign_cast_p (s: g)) |
| 4915 | return g; |
| 4916 | tree op = gimple_assign_rhs1 (gs: g); |
| 4917 | if (TREE_CODE (op) == SSA_NAME |
| 4918 | && INTEGRAL_TYPE_P (TREE_TYPE (op)) |
| 4919 | && (TYPE_PRECISION (TREE_TYPE (op)) > 1 |
| 4920 | || TYPE_UNSIGNED (TREE_TYPE (op))) |
| 4921 | && has_single_use (var: gimple_assign_lhs (gs: g))) |
| 4922 | return SSA_NAME_DEF_STMT (op); |
| 4923 | return g; |
| 4924 | } |
| 4925 | |
| 4926 | /* Helper of match_uaddc_usubc. Look through a NE_EXPR |
| 4927 | comparison with 0 which also preserves [0, 1] value range. */ |
| 4928 | |
| 4929 | static gimple * |
| 4930 | uaddc_ne0 (gimple *g) |
| 4931 | { |
| 4932 | if (is_gimple_assign (gs: g) |
| 4933 | && gimple_assign_rhs_code (gs: g) == NE_EXPR |
| 4934 | && integer_zerop (gimple_assign_rhs2 (gs: g)) |
| 4935 | && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME |
| 4936 | && has_single_use (var: gimple_assign_lhs (gs: g))) |
| 4937 | return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g)); |
| 4938 | return g; |
| 4939 | } |
| 4940 | |
| 4941 | /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME |
| 4942 | operand. */ |
| 4943 | |
| 4944 | static bool |
| 4945 | uaddc_is_cplxpart (gimple *g, tree_code part) |
| 4946 | { |
| 4947 | return (is_gimple_assign (gs: g) |
| 4948 | && gimple_assign_rhs_code (gs: g) == part |
| 4949 | && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME); |
| 4950 | } |
| 4951 | |
| 4952 | /* Try to match e.g. |
| 4953 | _29 = .ADD_OVERFLOW (_3, _4); |
| 4954 | _30 = REALPART_EXPR <_29>; |
| 4955 | _31 = IMAGPART_EXPR <_29>; |
| 4956 | _32 = .ADD_OVERFLOW (_30, _38); |
| 4957 | _33 = REALPART_EXPR <_32>; |
| 4958 | _34 = IMAGPART_EXPR <_32>; |
| 4959 | _35 = _31 + _34; |
| 4960 | as |
| 4961 | _36 = .UADDC (_3, _4, _38); |
| 4962 | _33 = REALPART_EXPR <_36>; |
| 4963 | _35 = IMAGPART_EXPR <_36>; |
| 4964 | or |
| 4965 | _22 = .SUB_OVERFLOW (_6, _5); |
| 4966 | _23 = REALPART_EXPR <_22>; |
| 4967 | _24 = IMAGPART_EXPR <_22>; |
| 4968 | _25 = .SUB_OVERFLOW (_23, _37); |
| 4969 | _26 = REALPART_EXPR <_25>; |
| 4970 | _27 = IMAGPART_EXPR <_25>; |
| 4971 | _28 = _24 | _27; |
| 4972 | as |
| 4973 | _29 = .USUBC (_6, _5, _37); |
| 4974 | _26 = REALPART_EXPR <_29>; |
| 4975 | _288 = IMAGPART_EXPR <_29>; |
| 4976 | provided _38 or _37 above have [0, 1] range |
| 4977 | and _3, _4 and _30 or _6, _5 and _23 are unsigned |
| 4978 | integral types with the same precision. Whether + or | or ^ is |
| 4979 | used on the IMAGPART_EXPR results doesn't matter, with one of |
| 4980 | added or subtracted operands in [0, 1] range at most one |
| 4981 | .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */ |
| 4982 | |
| 4983 | static bool |
| 4984 | match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code) |
| 4985 | { |
| 4986 | tree rhs[4]; |
| 4987 | rhs[0] = gimple_assign_rhs1 (gs: stmt); |
| 4988 | rhs[1] = gimple_assign_rhs2 (gs: stmt); |
| 4989 | rhs[2] = NULL_TREE; |
| 4990 | rhs[3] = NULL_TREE; |
| 4991 | tree type = TREE_TYPE (rhs[0]); |
| 4992 | if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type)) |
| 4993 | return false; |
| 4994 | |
| 4995 | auto_vec<gimple *, 2> temp_stmts; |
| 4996 | if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR) |
| 4997 | { |
| 4998 | /* If overflow flag is ignored on the MSB limb, we can end up with |
| 4999 | the most significant limb handled as r = op1 + op2 + ovf1 + ovf2; |
| 5000 | or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions |
| 5001 | thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize |
| 5002 | the limb below the MSB, but also create another .UADDC/.USUBC call |
| 5003 | for the last limb. |
| 5004 | |
| 5005 | First look through assignments with the same rhs code as CODE, |
| 5006 | with the exception that subtraction of a constant is canonicalized |
| 5007 | into addition of its negation. rhs[0] will be minuend for |
| 5008 | subtractions and one of addends for addition, all other assigned |
| 5009 | rhs[i] operands will be subtrahends or other addends. */ |
| 5010 | while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3]) |
| 5011 | { |
| 5012 | gimple *g = SSA_NAME_DEF_STMT (rhs[0]); |
| 5013 | if (has_single_use (var: rhs[0]) |
| 5014 | && is_gimple_assign (gs: g) |
| 5015 | && (gimple_assign_rhs_code (gs: g) == code |
| 5016 | || (code == MINUS_EXPR |
| 5017 | && gimple_assign_rhs_code (gs: g) == PLUS_EXPR |
| 5018 | && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST))) |
| 5019 | { |
| 5020 | tree r2 = gimple_assign_rhs2 (gs: g); |
| 5021 | if (gimple_assign_rhs_code (gs: g) != code) |
| 5022 | { |
| 5023 | r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2); |
| 5024 | if (!r2) |
| 5025 | break; |
| 5026 | } |
| 5027 | rhs[0] = gimple_assign_rhs1 (gs: g); |
| 5028 | tree &r = rhs[2] ? rhs[3] : rhs[2]; |
| 5029 | r = r2; |
| 5030 | temp_stmts.quick_push (obj: g); |
| 5031 | } |
| 5032 | else |
| 5033 | break; |
| 5034 | } |
| 5035 | for (int i = 1; i <= 2; ++i) |
| 5036 | while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3]) |
| 5037 | { |
| 5038 | gimple *g = SSA_NAME_DEF_STMT (rhs[i]); |
| 5039 | if (has_single_use (var: rhs[i]) |
| 5040 | && is_gimple_assign (gs: g) |
| 5041 | && gimple_assign_rhs_code (gs: g) == PLUS_EXPR) |
| 5042 | { |
| 5043 | rhs[i] = gimple_assign_rhs1 (gs: g); |
| 5044 | if (rhs[2]) |
| 5045 | rhs[3] = gimple_assign_rhs2 (gs: g); |
| 5046 | else |
| 5047 | rhs[2] = gimple_assign_rhs2 (gs: g); |
| 5048 | temp_stmts.quick_push (obj: g); |
| 5049 | } |
| 5050 | else |
| 5051 | break; |
| 5052 | } |
| 5053 | /* If there are just 3 addends or one minuend and two subtrahends, |
| 5054 | check for UADDC or USUBC being pattern recognized earlier. |
| 5055 | Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part |
| 5056 | got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3) |
| 5057 | etc. */ |
| 5058 | if (rhs[2] && !rhs[3]) |
| 5059 | { |
| 5060 | for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i) |
| 5061 | if (TREE_CODE (rhs[i]) == SSA_NAME) |
| 5062 | { |
| 5063 | gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i])); |
| 5064 | im = uaddc_ne0 (g: im); |
| 5065 | if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR)) |
| 5066 | { |
| 5067 | /* We found one of the 3 addends or 2 subtrahends to be |
| 5068 | __imag__ of something, verify it is .UADDC/.USUBC. */ |
| 5069 | tree rhs1 = gimple_assign_rhs1 (gs: im); |
| 5070 | gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0)); |
| 5071 | tree ovf_lhs = NULL_TREE; |
| 5072 | tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE; |
| 5073 | if (gimple_call_internal_p (gs: ovf, fn: code == PLUS_EXPR |
| 5074 | ? IFN_ADD_OVERFLOW |
| 5075 | : IFN_SUB_OVERFLOW)) |
| 5076 | { |
| 5077 | /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW. |
| 5078 | This is for the case of 2 chained .UADDC/.USUBC, |
| 5079 | where the first one uses 0 carry-in and the second |
| 5080 | one ignores the carry-out. |
| 5081 | So, something like: |
| 5082 | _16 = .ADD_OVERFLOW (_1, _2); |
| 5083 | _17 = REALPART_EXPR <_16>; |
| 5084 | _18 = IMAGPART_EXPR <_16>; |
| 5085 | _15 = _3 + _4; |
| 5086 | _12 = _15 + _18; |
| 5087 | where the first 3 statements come from the lower |
| 5088 | limb addition and the last 2 from the higher limb |
| 5089 | which ignores carry-out. */ |
| 5090 | ovf_lhs = gimple_call_lhs (gs: ovf); |
| 5091 | tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs)); |
| 5092 | ovf_arg1 = gimple_call_arg (gs: ovf, index: 0); |
| 5093 | ovf_arg2 = gimple_call_arg (gs: ovf, index: 1); |
| 5094 | /* In that case we need to punt if the types don't |
| 5095 | mismatch. */ |
| 5096 | if (!types_compatible_p (type1: type, type2: ovf_lhs_type) |
| 5097 | || !types_compatible_p (type1: type, TREE_TYPE (ovf_arg1)) |
| 5098 | || !types_compatible_p (type1: type, |
| 5099 | TREE_TYPE (ovf_arg2))) |
| 5100 | ovf_lhs = NULL_TREE; |
| 5101 | else |
| 5102 | { |
| 5103 | for (int i = (code == PLUS_EXPR ? 1 : 0); |
| 5104 | i >= 0; --i) |
| 5105 | { |
| 5106 | tree r = gimple_call_arg (gs: ovf, index: i); |
| 5107 | if (TREE_CODE (r) != SSA_NAME) |
| 5108 | continue; |
| 5109 | if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r), |
| 5110 | part: REALPART_EXPR)) |
| 5111 | { |
| 5112 | /* Punt if one of the args which isn't |
| 5113 | subtracted isn't __real__; that could |
| 5114 | then prevent better match later. |
| 5115 | Consider: |
| 5116 | _3 = .ADD_OVERFLOW (_1, _2); |
| 5117 | _4 = REALPART_EXPR <_3>; |
| 5118 | _5 = IMAGPART_EXPR <_3>; |
| 5119 | _7 = .ADD_OVERFLOW (_4, _6); |
| 5120 | _8 = REALPART_EXPR <_7>; |
| 5121 | _9 = IMAGPART_EXPR <_7>; |
| 5122 | _12 = _10 + _11; |
| 5123 | _13 = _12 + _9; |
| 5124 | _14 = _13 + _5; |
| 5125 | We want to match this when called on |
| 5126 | the last stmt as a pair of .UADDC calls, |
| 5127 | but without this check we could turn |
| 5128 | that prematurely on _13 = _12 + _9; |
| 5129 | stmt into .UADDC with 0 carry-in just |
| 5130 | on the second .ADD_OVERFLOW call and |
| 5131 | another replacing the _12 and _13 |
| 5132 | additions. */ |
| 5133 | ovf_lhs = NULL_TREE; |
| 5134 | break; |
| 5135 | } |
| 5136 | } |
| 5137 | } |
| 5138 | if (ovf_lhs) |
| 5139 | { |
| 5140 | use_operand_p use_p; |
| 5141 | imm_use_iterator iter; |
| 5142 | tree re_lhs = NULL_TREE; |
| 5143 | FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs) |
| 5144 | { |
| 5145 | gimple *use_stmt = USE_STMT (use_p); |
| 5146 | if (is_gimple_debug (gs: use_stmt)) |
| 5147 | continue; |
| 5148 | if (use_stmt == im) |
| 5149 | continue; |
| 5150 | if (!uaddc_is_cplxpart (g: use_stmt, |
| 5151 | part: REALPART_EXPR)) |
| 5152 | { |
| 5153 | ovf_lhs = NULL_TREE; |
| 5154 | break; |
| 5155 | } |
| 5156 | re_lhs = gimple_assign_lhs (gs: use_stmt); |
| 5157 | } |
| 5158 | if (ovf_lhs && re_lhs) |
| 5159 | { |
| 5160 | FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs) |
| 5161 | { |
| 5162 | gimple *use_stmt = USE_STMT (use_p); |
| 5163 | if (is_gimple_debug (gs: use_stmt)) |
| 5164 | continue; |
| 5165 | internal_fn ifn |
| 5166 | = gimple_call_internal_fn (gs: ovf); |
| 5167 | /* Punt if the __real__ of lhs is used |
| 5168 | in the same .*_OVERFLOW call. |
| 5169 | Consider: |
| 5170 | _3 = .ADD_OVERFLOW (_1, _2); |
| 5171 | _4 = REALPART_EXPR <_3>; |
| 5172 | _5 = IMAGPART_EXPR <_3>; |
| 5173 | _7 = .ADD_OVERFLOW (_4, _6); |
| 5174 | _8 = REALPART_EXPR <_7>; |
| 5175 | _9 = IMAGPART_EXPR <_7>; |
| 5176 | _12 = _10 + _11; |
| 5177 | _13 = _12 + _5; |
| 5178 | _14 = _13 + _9; |
| 5179 | We want to match this when called on |
| 5180 | the last stmt as a pair of .UADDC calls, |
| 5181 | but without this check we could turn |
| 5182 | that prematurely on _13 = _12 + _5; |
| 5183 | stmt into .UADDC with 0 carry-in just |
| 5184 | on the first .ADD_OVERFLOW call and |
| 5185 | another replacing the _12 and _13 |
| 5186 | additions. */ |
| 5187 | if (gimple_call_internal_p (gs: use_stmt, fn: ifn)) |
| 5188 | { |
| 5189 | ovf_lhs = NULL_TREE; |
| 5190 | break; |
| 5191 | } |
| 5192 | } |
| 5193 | } |
| 5194 | } |
| 5195 | } |
| 5196 | if ((ovf_lhs |
| 5197 | || gimple_call_internal_p (gs: ovf, |
| 5198 | fn: code == PLUS_EXPR |
| 5199 | ? IFN_UADDC : IFN_USUBC)) |
| 5200 | && (optab_handler (op: code == PLUS_EXPR |
| 5201 | ? uaddc5_optab : usubc5_optab, |
| 5202 | TYPE_MODE (type)) |
| 5203 | != CODE_FOR_nothing)) |
| 5204 | { |
| 5205 | /* And in that case build another .UADDC/.USUBC |
| 5206 | call for the most significand limb addition. |
| 5207 | Overflow bit is ignored here. */ |
| 5208 | if (i != 2) |
| 5209 | std::swap (a&: rhs[i], b&: rhs[2]); |
| 5210 | gimple *g |
| 5211 | = gimple_build_call_internal (code == PLUS_EXPR |
| 5212 | ? IFN_UADDC |
| 5213 | : IFN_USUBC, |
| 5214 | 3, rhs[0], rhs[1], |
| 5215 | rhs[2]); |
| 5216 | tree nlhs = make_ssa_name (var: build_complex_type (type)); |
| 5217 | gimple_call_set_lhs (gs: g, lhs: nlhs); |
| 5218 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 5219 | tree ilhs = gimple_assign_lhs (gs: stmt); |
| 5220 | g = gimple_build_assign (ilhs, REALPART_EXPR, |
| 5221 | build1 (REALPART_EXPR, |
| 5222 | TREE_TYPE (ilhs), |
| 5223 | nlhs)); |
| 5224 | gsi_replace (gsi, g, true); |
| 5225 | /* And if it is initialized from result of __imag__ |
| 5226 | of .{ADD,SUB}_OVERFLOW call, replace that |
| 5227 | call with .U{ADD,SUB}C call with the same arguments, |
| 5228 | just 0 added as third argument. This isn't strictly |
| 5229 | necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0) |
| 5230 | produce the same result, but may result in better |
| 5231 | generated code on some targets where the backend can |
| 5232 | better prepare in how the result will be used. */ |
| 5233 | if (ovf_lhs) |
| 5234 | { |
| 5235 | tree zero = build_zero_cst (type); |
| 5236 | g = gimple_build_call_internal (code == PLUS_EXPR |
| 5237 | ? IFN_UADDC |
| 5238 | : IFN_USUBC, |
| 5239 | 3, ovf_arg1, |
| 5240 | ovf_arg2, zero); |
| 5241 | gimple_call_set_lhs (gs: g, lhs: ovf_lhs); |
| 5242 | gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf); |
| 5243 | gsi_replace (&gsi2, g, true); |
| 5244 | } |
| 5245 | return true; |
| 5246 | } |
| 5247 | } |
| 5248 | } |
| 5249 | return false; |
| 5250 | } |
| 5251 | if (code == MINUS_EXPR && !rhs[2]) |
| 5252 | return false; |
| 5253 | if (code == MINUS_EXPR) |
| 5254 | /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs. |
| 5255 | So, for MINUS_EXPR swap the single added rhs operand (others are |
| 5256 | subtracted) to rhs[3]. */ |
| 5257 | std::swap (a&: rhs[0], b&: rhs[3]); |
| 5258 | } |
| 5259 | /* Walk from both operands of STMT (for +/- even sometimes from |
| 5260 | all the 4 addends or 3 subtrahends), see through casts and != 0 |
| 5261 | statements which would preserve [0, 1] range of values and |
| 5262 | check which is initialized from __imag__. */ |
| 5263 | gimple *im1 = NULL, *im2 = NULL; |
| 5264 | for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++) |
| 5265 | if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME) |
| 5266 | { |
| 5267 | gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i])); |
| 5268 | im = uaddc_ne0 (g: im); |
| 5269 | if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR)) |
| 5270 | { |
| 5271 | if (im1 == NULL) |
| 5272 | { |
| 5273 | im1 = im; |
| 5274 | if (i != 0) |
| 5275 | std::swap (a&: rhs[0], b&: rhs[i]); |
| 5276 | } |
| 5277 | else |
| 5278 | { |
| 5279 | im2 = im; |
| 5280 | if (i != 1) |
| 5281 | std::swap (a&: rhs[1], b&: rhs[i]); |
| 5282 | break; |
| 5283 | } |
| 5284 | } |
| 5285 | } |
| 5286 | /* If we don't find at least two, punt. */ |
| 5287 | if (!im2) |
| 5288 | return false; |
| 5289 | /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results, |
| 5290 | either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have |
| 5291 | uaddc5/usubc5 named pattern for the corresponding mode. */ |
| 5292 | gimple *ovf1 |
| 5293 | = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0)); |
| 5294 | gimple *ovf2 |
| 5295 | = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0)); |
| 5296 | internal_fn ifn; |
| 5297 | if (!is_gimple_call (gs: ovf1) |
| 5298 | || !gimple_call_internal_p (gs: ovf1) |
| 5299 | || ((ifn = gimple_call_internal_fn (gs: ovf1)) != IFN_ADD_OVERFLOW |
| 5300 | && ifn != IFN_SUB_OVERFLOW) |
| 5301 | || !gimple_call_internal_p (gs: ovf2, fn: ifn) |
| 5302 | || optab_handler (op: ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab, |
| 5303 | TYPE_MODE (type)) == CODE_FOR_nothing |
| 5304 | || (rhs[2] |
| 5305 | && optab_handler (op: code == PLUS_EXPR ? uaddc5_optab : usubc5_optab, |
| 5306 | TYPE_MODE (type)) == CODE_FOR_nothing)) |
| 5307 | return false; |
| 5308 | tree arg1, arg2, arg3 = NULL_TREE; |
| 5309 | gimple *re1 = NULL, *re2 = NULL; |
| 5310 | /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments |
| 5311 | should be initialized from __real__ of the other of the two calls. |
| 5312 | Though, for .SUB_OVERFLOW, it has to be the first argument, not the |
| 5313 | second one. */ |
| 5314 | for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i) |
| 5315 | for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL)) |
| 5316 | { |
| 5317 | tree arg = gimple_call_arg (gs: ovf, index: i); |
| 5318 | if (TREE_CODE (arg) != SSA_NAME) |
| 5319 | continue; |
| 5320 | re1 = SSA_NAME_DEF_STMT (arg); |
| 5321 | if (uaddc_is_cplxpart (g: re1, part: REALPART_EXPR) |
| 5322 | && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0)) |
| 5323 | == (ovf == ovf1 ? ovf2 : ovf1))) |
| 5324 | { |
| 5325 | if (ovf == ovf1) |
| 5326 | { |
| 5327 | /* Make sure ovf2 is the .*_OVERFLOW call with argument |
| 5328 | initialized from __real__ of ovf1. */ |
| 5329 | std::swap (a&: rhs[0], b&: rhs[1]); |
| 5330 | std::swap (a&: im1, b&: im2); |
| 5331 | std::swap (a&: ovf1, b&: ovf2); |
| 5332 | } |
| 5333 | arg3 = gimple_call_arg (gs: ovf, index: 1 - i); |
| 5334 | i = -1; |
| 5335 | break; |
| 5336 | } |
| 5337 | } |
| 5338 | if (!arg3) |
| 5339 | return false; |
| 5340 | arg1 = gimple_call_arg (gs: ovf1, index: 0); |
| 5341 | arg2 = gimple_call_arg (gs: ovf1, index: 1); |
| 5342 | if (!types_compatible_p (type1: type, TREE_TYPE (arg1))) |
| 5343 | return false; |
| 5344 | int kind[2] = { 0, 0 }; |
| 5345 | tree arg_im[2] = { NULL_TREE, NULL_TREE }; |
| 5346 | /* At least one of arg2 and arg3 should have type compatible |
| 5347 | with arg1/rhs[0], and the other one should have value in [0, 1] |
| 5348 | range. If both are in [0, 1] range and type compatible with |
| 5349 | arg1/rhs[0], try harder to find after looking through casts, |
| 5350 | != 0 comparisons which one is initialized to __imag__ of |
| 5351 | .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */ |
| 5352 | for (int i = 0; i < 2; ++i) |
| 5353 | { |
| 5354 | tree arg = i == 0 ? arg2 : arg3; |
| 5355 | if (types_compatible_p (type1: type, TREE_TYPE (arg))) |
| 5356 | kind[i] = 1; |
| 5357 | if (!INTEGRAL_TYPE_P (TREE_TYPE (arg)) |
| 5358 | || (TYPE_PRECISION (TREE_TYPE (arg)) == 1 |
| 5359 | && !TYPE_UNSIGNED (TREE_TYPE (arg)))) |
| 5360 | continue; |
| 5361 | if (tree_zero_one_valued_p (arg)) |
| 5362 | kind[i] |= 2; |
| 5363 | if (TREE_CODE (arg) == SSA_NAME) |
| 5364 | { |
| 5365 | gimple *g = SSA_NAME_DEF_STMT (arg); |
| 5366 | if (gimple_assign_cast_p (s: g)) |
| 5367 | { |
| 5368 | tree op = gimple_assign_rhs1 (gs: g); |
| 5369 | if (TREE_CODE (op) == SSA_NAME |
| 5370 | && INTEGRAL_TYPE_P (TREE_TYPE (op))) |
| 5371 | g = SSA_NAME_DEF_STMT (op); |
| 5372 | } |
| 5373 | g = uaddc_ne0 (g); |
| 5374 | if (!uaddc_is_cplxpart (g, part: IMAGPART_EXPR)) |
| 5375 | continue; |
| 5376 | arg_im[i] = gimple_assign_lhs (gs: g); |
| 5377 | g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0)); |
| 5378 | if (!is_gimple_call (gs: g) || !gimple_call_internal_p (gs: g)) |
| 5379 | continue; |
| 5380 | switch (gimple_call_internal_fn (gs: g)) |
| 5381 | { |
| 5382 | case IFN_ADD_OVERFLOW: |
| 5383 | case IFN_SUB_OVERFLOW: |
| 5384 | case IFN_UADDC: |
| 5385 | case IFN_USUBC: |
| 5386 | break; |
| 5387 | default: |
| 5388 | continue; |
| 5389 | } |
| 5390 | kind[i] |= 4; |
| 5391 | } |
| 5392 | } |
| 5393 | /* Make arg2 the one with compatible type and arg3 the one |
| 5394 | with [0, 1] range. If both is true for both operands, |
| 5395 | prefer as arg3 result of __imag__ of some ifn. */ |
| 5396 | if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1])) |
| 5397 | { |
| 5398 | std::swap (a&: arg2, b&: arg3); |
| 5399 | std::swap (a&: kind[0], b&: kind[1]); |
| 5400 | std::swap (a&: arg_im[0], b&: arg_im[1]); |
| 5401 | } |
| 5402 | if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0) |
| 5403 | return false; |
| 5404 | if (!has_single_use (var: gimple_assign_lhs (gs: im1)) |
| 5405 | || !has_single_use (var: gimple_assign_lhs (gs: im2)) |
| 5406 | || !has_single_use (var: gimple_assign_lhs (gs: re1)) |
| 5407 | || num_imm_uses (var: gimple_call_lhs (gs: ovf1)) != 2) |
| 5408 | return false; |
| 5409 | /* Check that ovf2's result is used in __real__ and set re2 |
| 5410 | to that statement. */ |
| 5411 | use_operand_p use_p; |
| 5412 | imm_use_iterator iter; |
| 5413 | tree lhs = gimple_call_lhs (gs: ovf2); |
| 5414 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
| 5415 | { |
| 5416 | gimple *use_stmt = USE_STMT (use_p); |
| 5417 | if (is_gimple_debug (gs: use_stmt)) |
| 5418 | continue; |
| 5419 | if (use_stmt == im2) |
| 5420 | continue; |
| 5421 | if (re2) |
| 5422 | return false; |
| 5423 | if (!uaddc_is_cplxpart (g: use_stmt, part: REALPART_EXPR)) |
| 5424 | return false; |
| 5425 | re2 = use_stmt; |
| 5426 | } |
| 5427 | /* Build .UADDC/.USUBC call which will be placed before the stmt. */ |
| 5428 | gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2); |
| 5429 | gimple *g; |
| 5430 | if ((kind[1] & 4) != 0 && types_compatible_p (type1: type, TREE_TYPE (arg_im[1]))) |
| 5431 | arg3 = arg_im[1]; |
| 5432 | if ((kind[1] & 1) == 0) |
| 5433 | { |
| 5434 | if (TREE_CODE (arg3) == INTEGER_CST) |
| 5435 | arg3 = fold_convert (type, arg3); |
| 5436 | else |
| 5437 | { |
| 5438 | g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, arg3); |
| 5439 | gsi_insert_before (&gsi2, g, GSI_SAME_STMT); |
| 5440 | arg3 = gimple_assign_lhs (gs: g); |
| 5441 | } |
| 5442 | } |
| 5443 | g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW |
| 5444 | ? IFN_UADDC : IFN_USUBC, |
| 5445 | 3, arg1, arg2, arg3); |
| 5446 | tree nlhs = make_ssa_name (TREE_TYPE (lhs)); |
| 5447 | gimple_call_set_lhs (gs: g, lhs: nlhs); |
| 5448 | gsi_insert_before (&gsi2, g, GSI_SAME_STMT); |
| 5449 | /* In the case where stmt is | or ^ of two overflow flags |
| 5450 | or addition of those, replace stmt with __imag__ of the above |
| 5451 | added call. In case of arg1 + arg2 + (ovf1 + ovf2) or |
| 5452 | arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */ |
| 5453 | tree ilhs = rhs[2] ? make_ssa_name (var: type) : gimple_assign_lhs (gs: stmt); |
| 5454 | g = gimple_build_assign (ilhs, IMAGPART_EXPR, |
| 5455 | build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs)); |
| 5456 | if (rhs[2]) |
| 5457 | { |
| 5458 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 5459 | /* Remove some further statements which can't be kept in the IL because |
| 5460 | they can use SSA_NAMEs whose setter is going to be removed too. */ |
| 5461 | for (gimple *g2 : temp_stmts) |
| 5462 | { |
| 5463 | gsi2 = gsi_for_stmt (g2); |
| 5464 | gsi_remove (&gsi2, true); |
| 5465 | release_defs (g2); |
| 5466 | } |
| 5467 | } |
| 5468 | else |
| 5469 | gsi_replace (gsi, g, true); |
| 5470 | /* Remove some statements which can't be kept in the IL because they |
| 5471 | use SSA_NAME whose setter is going to be removed too. */ |
| 5472 | tree rhs1 = rhs[1]; |
| 5473 | for (int i = 0; i < 2; i++) |
| 5474 | if (rhs1 == gimple_assign_lhs (gs: im2)) |
| 5475 | break; |
| 5476 | else |
| 5477 | { |
| 5478 | g = SSA_NAME_DEF_STMT (rhs1); |
| 5479 | rhs1 = gimple_assign_rhs1 (gs: g); |
| 5480 | gsi2 = gsi_for_stmt (g); |
| 5481 | gsi_remove (&gsi2, true); |
| 5482 | release_defs (g); |
| 5483 | } |
| 5484 | gcc_checking_assert (rhs1 == gimple_assign_lhs (im2)); |
| 5485 | gsi2 = gsi_for_stmt (im2); |
| 5486 | gsi_remove (&gsi2, true); |
| 5487 | release_defs (im2); |
| 5488 | /* Replace the re2 statement with __real__ of the newly added |
| 5489 | .UADDC/.USUBC call. */ |
| 5490 | if (re2) |
| 5491 | { |
| 5492 | gsi2 = gsi_for_stmt (re2); |
| 5493 | tree rlhs = gimple_assign_lhs (gs: re2); |
| 5494 | g = gimple_build_assign (rlhs, REALPART_EXPR, |
| 5495 | build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs)); |
| 5496 | gsi_replace (&gsi2, g, true); |
| 5497 | } |
| 5498 | if (rhs[2]) |
| 5499 | { |
| 5500 | /* If this is the arg1 + arg2 + (ovf1 + ovf2) or |
| 5501 | arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb, |
| 5502 | replace stmt with __real__ of another .UADDC/.USUBC call which |
| 5503 | handles the most significant limb. Overflow flag from this is |
| 5504 | ignored. */ |
| 5505 | g = gimple_build_call_internal (code == PLUS_EXPR |
| 5506 | ? IFN_UADDC : IFN_USUBC, |
| 5507 | 3, rhs[3], rhs[2], ilhs); |
| 5508 | nlhs = make_ssa_name (TREE_TYPE (lhs)); |
| 5509 | gimple_call_set_lhs (gs: g, lhs: nlhs); |
| 5510 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 5511 | ilhs = gimple_assign_lhs (gs: stmt); |
| 5512 | g = gimple_build_assign (ilhs, REALPART_EXPR, |
| 5513 | build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs)); |
| 5514 | gsi_replace (gsi, g, true); |
| 5515 | } |
| 5516 | if (TREE_CODE (arg3) == SSA_NAME) |
| 5517 | { |
| 5518 | /* When pattern recognizing the second least significant limb |
| 5519 | above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb), |
| 5520 | check if the [0, 1] range argument (i.e. carry in) isn't the |
| 5521 | result of another .{ADD,SUB}_OVERFLOW call (one handling the |
| 5522 | least significant limb). Again look through casts and != 0. */ |
| 5523 | gimple *im3 = SSA_NAME_DEF_STMT (arg3); |
| 5524 | for (int i = 0; i < 2; ++i) |
| 5525 | { |
| 5526 | gimple *im4 = uaddc_cast (g: im3); |
| 5527 | if (im4 == im3) |
| 5528 | break; |
| 5529 | else |
| 5530 | im3 = im4; |
| 5531 | } |
| 5532 | im3 = uaddc_ne0 (g: im3); |
| 5533 | if (uaddc_is_cplxpart (g: im3, part: IMAGPART_EXPR)) |
| 5534 | { |
| 5535 | gimple *ovf3 |
| 5536 | = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0)); |
| 5537 | if (gimple_call_internal_p (gs: ovf3, fn: ifn)) |
| 5538 | { |
| 5539 | lhs = gimple_call_lhs (gs: ovf3); |
| 5540 | arg1 = gimple_call_arg (gs: ovf3, index: 0); |
| 5541 | arg2 = gimple_call_arg (gs: ovf3, index: 1); |
| 5542 | if (types_compatible_p (type1: type, TREE_TYPE (TREE_TYPE (lhs))) |
| 5543 | && types_compatible_p (type1: type, TREE_TYPE (arg1)) |
| 5544 | && types_compatible_p (type1: type, TREE_TYPE (arg2))) |
| 5545 | { |
| 5546 | /* And if it is initialized from result of __imag__ |
| 5547 | of .{ADD,SUB}_OVERFLOW call, replace that |
| 5548 | call with .U{ADD,SUB}C call with the same arguments, |
| 5549 | just 0 added as third argument. This isn't strictly |
| 5550 | necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0) |
| 5551 | produce the same result, but may result in better |
| 5552 | generated code on some targets where the backend can |
| 5553 | better prepare in how the result will be used. */ |
| 5554 | g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW |
| 5555 | ? IFN_UADDC : IFN_USUBC, |
| 5556 | 3, arg1, arg2, |
| 5557 | build_zero_cst (type)); |
| 5558 | gimple_call_set_lhs (gs: g, lhs); |
| 5559 | gsi2 = gsi_for_stmt (ovf3); |
| 5560 | gsi_replace (&gsi2, g, true); |
| 5561 | } |
| 5562 | } |
| 5563 | } |
| 5564 | } |
| 5565 | return true; |
| 5566 | } |
| 5567 | |
| 5568 | /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with |
| 5569 | (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT |
| 5570 | isn't a direct optab. Also handle `<=`/`>` to be |
| 5571 | `x & (x - 1) !=/== x`. */ |
| 5572 | |
| 5573 | static void |
| 5574 | match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt) |
| 5575 | { |
| 5576 | tree clhs, crhs; |
| 5577 | enum tree_code code; |
| 5578 | bool was_le = false; |
| 5579 | if (gimple_code (g: stmt) == GIMPLE_COND) |
| 5580 | { |
| 5581 | clhs = gimple_cond_lhs (gs: stmt); |
| 5582 | crhs = gimple_cond_rhs (gs: stmt); |
| 5583 | code = gimple_cond_code (gs: stmt); |
| 5584 | } |
| 5585 | else |
| 5586 | { |
| 5587 | clhs = gimple_assign_rhs1 (gs: stmt); |
| 5588 | crhs = gimple_assign_rhs2 (gs: stmt); |
| 5589 | code = gimple_assign_rhs_code (gs: stmt); |
| 5590 | } |
| 5591 | if (code != LE_EXPR && code != GT_EXPR |
| 5592 | && code != EQ_EXPR && code != NE_EXPR) |
| 5593 | return; |
| 5594 | if (code == LE_EXPR || code == GT_EXPR) |
| 5595 | was_le = true; |
| 5596 | if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs)) |
| 5597 | return; |
| 5598 | gimple *call = SSA_NAME_DEF_STMT (clhs); |
| 5599 | combined_fn cfn = gimple_call_combined_fn (call); |
| 5600 | switch (cfn) |
| 5601 | { |
| 5602 | CASE_CFN_POPCOUNT: |
| 5603 | break; |
| 5604 | default: |
| 5605 | return; |
| 5606 | } |
| 5607 | if (!has_single_use (var: clhs)) |
| 5608 | return; |
| 5609 | tree arg = gimple_call_arg (gs: call, index: 0); |
| 5610 | tree type = TREE_TYPE (arg); |
| 5611 | if (!INTEGRAL_TYPE_P (type)) |
| 5612 | return; |
| 5613 | bool nonzero_arg = tree_expr_nonzero_p (arg); |
| 5614 | if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH)) |
| 5615 | { |
| 5616 | /* Tell expand_POPCOUNT the popcount result is only used in equality |
| 5617 | comparison with one, so that it can decide based on rtx costs. */ |
| 5618 | gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg, |
| 5619 | was_le ? integer_minus_one_node |
| 5620 | : (nonzero_arg ? integer_zero_node |
| 5621 | : integer_one_node)); |
| 5622 | gimple_call_set_lhs (gs: g, lhs: gimple_call_lhs (gs: call)); |
| 5623 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); |
| 5624 | gsi_replace (&gsi2, g, true); |
| 5625 | return; |
| 5626 | } |
| 5627 | tree argm1 = make_ssa_name (var: type); |
| 5628 | gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg, |
| 5629 | build_int_cst (type, -1)); |
| 5630 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 5631 | g = gimple_build_assign (make_ssa_name (var: type), |
| 5632 | (nonzero_arg || was_le) ? BIT_AND_EXPR : BIT_XOR_EXPR, |
| 5633 | arg, argm1); |
| 5634 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
| 5635 | tree_code cmpcode; |
| 5636 | if (was_le) |
| 5637 | { |
| 5638 | argm1 = build_zero_cst (type); |
| 5639 | cmpcode = code == LE_EXPR ? EQ_EXPR : NE_EXPR; |
| 5640 | } |
| 5641 | else if (nonzero_arg) |
| 5642 | { |
| 5643 | argm1 = build_zero_cst (type); |
| 5644 | cmpcode = code; |
| 5645 | } |
| 5646 | else |
| 5647 | cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR; |
| 5648 | if (gcond *cond = dyn_cast <gcond *> (p: stmt)) |
| 5649 | { |
| 5650 | gimple_cond_set_lhs (gs: cond, lhs: gimple_assign_lhs (gs: g)); |
| 5651 | gimple_cond_set_rhs (gs: cond, rhs: argm1); |
| 5652 | gimple_cond_set_code (gs: cond, code: cmpcode); |
| 5653 | } |
| 5654 | else |
| 5655 | { |
| 5656 | gimple_assign_set_rhs1 (gs: stmt, rhs: gimple_assign_lhs (gs: g)); |
| 5657 | gimple_assign_set_rhs2 (gs: stmt, rhs: argm1); |
| 5658 | gimple_assign_set_rhs_code (s: stmt, code: cmpcode); |
| 5659 | } |
| 5660 | update_stmt (s: stmt); |
| 5661 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); |
| 5662 | gsi_remove (&gsi2, true); |
| 5663 | release_defs (call); |
| 5664 | } |
| 5665 | |
| 5666 | /* Return true if target has support for divmod. */ |
| 5667 | |
| 5668 | static bool |
| 5669 | target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) |
| 5670 | { |
| 5671 | /* If target supports hardware divmod insn, use it for divmod. */ |
| 5672 | if (optab_handler (op: divmod_optab, mode) != CODE_FOR_nothing) |
| 5673 | return true; |
| 5674 | |
| 5675 | /* Check if libfunc for divmod is available. */ |
| 5676 | rtx libfunc = optab_libfunc (divmod_optab, mode); |
| 5677 | if (libfunc != NULL_RTX) |
| 5678 | { |
| 5679 | /* If optab_handler exists for div_optab, perhaps in a wider mode, |
| 5680 | we don't want to use the libfunc even if it exists for given mode. */ |
| 5681 | machine_mode div_mode; |
| 5682 | FOR_EACH_MODE_FROM (div_mode, mode) |
| 5683 | if (optab_handler (op: div_optab, mode: div_mode) != CODE_FOR_nothing) |
| 5684 | return false; |
| 5685 | |
| 5686 | return targetm.expand_divmod_libfunc != NULL; |
| 5687 | } |
| 5688 | |
| 5689 | return false; |
| 5690 | } |
| 5691 | |
| 5692 | /* Check if stmt is candidate for divmod transform. */ |
| 5693 | |
| 5694 | static bool |
| 5695 | divmod_candidate_p (gassign *stmt) |
| 5696 | { |
| 5697 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
| 5698 | machine_mode mode = TYPE_MODE (type); |
| 5699 | optab divmod_optab, div_optab; |
| 5700 | |
| 5701 | if (TYPE_UNSIGNED (type)) |
| 5702 | { |
| 5703 | divmod_optab = udivmod_optab; |
| 5704 | div_optab = udiv_optab; |
| 5705 | } |
| 5706 | else |
| 5707 | { |
| 5708 | divmod_optab = sdivmod_optab; |
| 5709 | div_optab = sdiv_optab; |
| 5710 | } |
| 5711 | |
| 5712 | tree op1 = gimple_assign_rhs1 (gs: stmt); |
| 5713 | tree op2 = gimple_assign_rhs2 (gs: stmt); |
| 5714 | |
| 5715 | /* Disable the transform if either is a constant, since division-by-constant |
| 5716 | may have specialized expansion. */ |
| 5717 | if (CONSTANT_CLASS_P (op1)) |
| 5718 | return false; |
| 5719 | |
| 5720 | if (CONSTANT_CLASS_P (op2)) |
| 5721 | { |
| 5722 | if (integer_pow2p (op2)) |
| 5723 | return false; |
| 5724 | |
| 5725 | if (element_precision (type) <= HOST_BITS_PER_WIDE_INT |
| 5726 | && element_precision (type) <= BITS_PER_WORD) |
| 5727 | return false; |
| 5728 | |
| 5729 | /* If the divisor is not power of 2 and the precision wider than |
| 5730 | HWI, expand_divmod punts on that, so in that case it is better |
| 5731 | to use divmod optab or libfunc. Similarly if choose_multiplier |
| 5732 | might need pre/post shifts of BITS_PER_WORD or more. */ |
| 5733 | } |
| 5734 | |
| 5735 | /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should |
| 5736 | expand using the [su]divv optabs. */ |
| 5737 | if (TYPE_OVERFLOW_TRAPS (type)) |
| 5738 | return false; |
| 5739 | |
| 5740 | if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) |
| 5741 | return false; |
| 5742 | |
| 5743 | return true; |
| 5744 | } |
| 5745 | |
| 5746 | /* This function looks for: |
| 5747 | t1 = a TRUNC_DIV_EXPR b; |
| 5748 | t2 = a TRUNC_MOD_EXPR b; |
| 5749 | and transforms it to the following sequence: |
| 5750 | complex_tmp = DIVMOD (a, b); |
| 5751 | t1 = REALPART_EXPR(a); |
| 5752 | t2 = IMAGPART_EXPR(b); |
| 5753 | For conditions enabling the transform see divmod_candidate_p(). |
| 5754 | |
| 5755 | The pass has three parts: |
| 5756 | 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all |
| 5757 | other trunc_div_expr and trunc_mod_expr stmts. |
| 5758 | 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt |
| 5759 | to stmts vector. |
| 5760 | 3) Insert DIVMOD call just before top_stmt and update entries in |
| 5761 | stmts vector to use return value of DIMOVD (REALEXPR_PART for div, |
| 5762 | IMAGPART_EXPR for mod). */ |
| 5763 | |
| 5764 | static bool |
| 5765 | convert_to_divmod (gassign *stmt) |
| 5766 | { |
| 5767 | if (stmt_can_throw_internal (cfun, stmt) |
| 5768 | || !divmod_candidate_p (stmt)) |
| 5769 | return false; |
| 5770 | |
| 5771 | tree op1 = gimple_assign_rhs1 (gs: stmt); |
| 5772 | tree op2 = gimple_assign_rhs2 (gs: stmt); |
| 5773 | |
| 5774 | imm_use_iterator use_iter; |
| 5775 | gimple *use_stmt; |
| 5776 | auto_vec<gimple *> stmts; |
| 5777 | |
| 5778 | gimple *top_stmt = stmt; |
| 5779 | basic_block top_bb = gimple_bb (g: stmt); |
| 5780 | |
| 5781 | /* Part 1: Try to set top_stmt to "topmost" stmt that dominates |
| 5782 | at-least stmt and possibly other trunc_div/trunc_mod stmts |
| 5783 | having same operands as stmt. */ |
| 5784 | |
| 5785 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) |
| 5786 | { |
| 5787 | if (is_gimple_assign (gs: use_stmt) |
| 5788 | && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR |
| 5789 | || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR) |
| 5790 | && operand_equal_p (op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0) |
| 5791 | && operand_equal_p (op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0)) |
| 5792 | { |
| 5793 | if (stmt_can_throw_internal (cfun, use_stmt)) |
| 5794 | continue; |
| 5795 | |
| 5796 | basic_block bb = gimple_bb (g: use_stmt); |
| 5797 | |
| 5798 | if (bb == top_bb) |
| 5799 | { |
| 5800 | if (gimple_uid (g: use_stmt) < gimple_uid (g: top_stmt)) |
| 5801 | top_stmt = use_stmt; |
| 5802 | } |
| 5803 | else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) |
| 5804 | { |
| 5805 | top_bb = bb; |
| 5806 | top_stmt = use_stmt; |
| 5807 | } |
| 5808 | } |
| 5809 | } |
| 5810 | |
| 5811 | tree top_op1 = gimple_assign_rhs1 (gs: top_stmt); |
| 5812 | tree top_op2 = gimple_assign_rhs2 (gs: top_stmt); |
| 5813 | |
| 5814 | stmts.safe_push (obj: top_stmt); |
| 5815 | bool div_seen = (gimple_assign_rhs_code (gs: top_stmt) == TRUNC_DIV_EXPR); |
| 5816 | |
| 5817 | /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb |
| 5818 | to stmts vector. The 2nd loop will always add stmt to stmts vector, since |
| 5819 | gimple_bb (top_stmt) dominates gimple_bb (stmt), so the |
| 5820 | 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ |
| 5821 | |
| 5822 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) |
| 5823 | { |
| 5824 | if (is_gimple_assign (gs: use_stmt) |
| 5825 | && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR |
| 5826 | || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR) |
| 5827 | && operand_equal_p (top_op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0) |
| 5828 | && operand_equal_p (top_op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0)) |
| 5829 | { |
| 5830 | if (use_stmt == top_stmt |
| 5831 | || stmt_can_throw_internal (cfun, use_stmt) |
| 5832 | || !dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt), top_bb)) |
| 5833 | continue; |
| 5834 | |
| 5835 | stmts.safe_push (obj: use_stmt); |
| 5836 | if (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR) |
| 5837 | div_seen = true; |
| 5838 | } |
| 5839 | } |
| 5840 | |
| 5841 | if (!div_seen) |
| 5842 | return false; |
| 5843 | |
| 5844 | /* Part 3: Create libcall to internal fn DIVMOD: |
| 5845 | divmod_tmp = DIVMOD (op1, op2). */ |
| 5846 | |
| 5847 | gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); |
| 5848 | tree res = make_temp_ssa_name (type: build_complex_type (TREE_TYPE (op1)), |
| 5849 | stmt: call_stmt, name: "divmod_tmp" ); |
| 5850 | gimple_call_set_lhs (gs: call_stmt, lhs: res); |
| 5851 | /* We rejected throwing statements above. */ |
| 5852 | gimple_call_set_nothrow (s: call_stmt, nothrow_p: true); |
| 5853 | |
| 5854 | /* Insert the call before top_stmt. */ |
| 5855 | gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); |
| 5856 | gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); |
| 5857 | |
| 5858 | widen_mul_stats.divmod_calls_inserted++; |
| 5859 | |
| 5860 | /* Update all statements in stmts vector: |
| 5861 | lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp> |
| 5862 | lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */ |
| 5863 | |
| 5864 | for (unsigned i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i) |
| 5865 | { |
| 5866 | tree new_rhs; |
| 5867 | |
| 5868 | switch (gimple_assign_rhs_code (gs: use_stmt)) |
| 5869 | { |
| 5870 | case TRUNC_DIV_EXPR: |
| 5871 | new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); |
| 5872 | break; |
| 5873 | |
| 5874 | case TRUNC_MOD_EXPR: |
| 5875 | new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); |
| 5876 | break; |
| 5877 | |
| 5878 | default: |
| 5879 | gcc_unreachable (); |
| 5880 | } |
| 5881 | |
| 5882 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
| 5883 | gimple_assign_set_rhs_from_tree (&gsi, new_rhs); |
| 5884 | update_stmt (s: use_stmt); |
| 5885 | } |
| 5886 | |
| 5887 | return true; |
| 5888 | } |
| 5889 | |
| 5890 | /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as |
| 5891 | its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return |
| 5892 | value is true iff we converted the statement. */ |
| 5893 | |
| 5894 | static bool |
| 5895 | convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi) |
| 5896 | { |
| 5897 | tree lhs = gimple_assign_lhs (gs: stmt); |
| 5898 | tree stype = TREE_TYPE (lhs); |
| 5899 | tree sarg0 = gimple_assign_rhs1 (gs: stmt); |
| 5900 | tree sarg1 = gimple_assign_rhs2 (gs: stmt); |
| 5901 | |
| 5902 | if (TREE_CODE (stype) != INTEGER_TYPE |
| 5903 | || TREE_CODE (sarg1) != INTEGER_CST |
| 5904 | || TREE_CODE (sarg0) != SSA_NAME |
| 5905 | || !tree_fits_uhwi_p (sarg1) |
| 5906 | || !has_single_use (var: sarg0)) |
| 5907 | return false; |
| 5908 | |
| 5909 | gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0)); |
| 5910 | if (!def) |
| 5911 | return false; |
| 5912 | |
| 5913 | enum tree_code mcode = gimple_assign_rhs_code (gs: def); |
| 5914 | if (mcode == NOP_EXPR) |
| 5915 | { |
| 5916 | tree tmp = gimple_assign_rhs1 (gs: def); |
| 5917 | if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (var: tmp)) |
| 5918 | return false; |
| 5919 | def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp)); |
| 5920 | if (!def) |
| 5921 | return false; |
| 5922 | mcode = gimple_assign_rhs_code (gs: def); |
| 5923 | } |
| 5924 | |
| 5925 | if (mcode != WIDEN_MULT_EXPR |
| 5926 | || gimple_bb (g: def) != gimple_bb (g: stmt)) |
| 5927 | return false; |
| 5928 | tree mtype = TREE_TYPE (gimple_assign_lhs (def)); |
| 5929 | if (TREE_CODE (mtype) != INTEGER_TYPE |
| 5930 | || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype)) |
| 5931 | return false; |
| 5932 | |
| 5933 | tree mop1 = gimple_assign_rhs1 (gs: def); |
| 5934 | tree mop2 = gimple_assign_rhs2 (gs: def); |
| 5935 | tree optype = TREE_TYPE (mop1); |
| 5936 | bool unsignedp = TYPE_UNSIGNED (optype); |
| 5937 | unsigned int prec = TYPE_PRECISION (optype); |
| 5938 | |
| 5939 | if (unsignedp != TYPE_UNSIGNED (mtype) |
| 5940 | || TYPE_PRECISION (mtype) != 2 * prec) |
| 5941 | return false; |
| 5942 | |
| 5943 | unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1); |
| 5944 | if (bits < prec || bits >= 2 * prec) |
| 5945 | return false; |
| 5946 | |
| 5947 | /* For the time being, require operands to have the same sign. */ |
| 5948 | if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2))) |
| 5949 | return false; |
| 5950 | |
| 5951 | machine_mode mode = TYPE_MODE (optype); |
| 5952 | optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
| 5953 | if (optab_handler (op: tab, mode) == CODE_FOR_nothing) |
| 5954 | return false; |
| 5955 | |
| 5956 | location_t loc = gimple_location (g: stmt); |
| 5957 | tree highpart1 = build_and_insert_binop (gsi, loc, name: "highparttmp" , |
| 5958 | code: MULT_HIGHPART_EXPR, arg0: mop1, arg1: mop2); |
| 5959 | tree highpart2 = highpart1; |
| 5960 | tree ntype = optype; |
| 5961 | |
| 5962 | if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype)) |
| 5963 | { |
| 5964 | ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype) |
| 5965 | : signed_type_for (optype); |
| 5966 | highpart2 = build_and_insert_cast (gsi, loc, type: ntype, val: highpart1); |
| 5967 | } |
| 5968 | if (bits > prec) |
| 5969 | highpart2 = build_and_insert_binop (gsi, loc, name: "highparttmp" , |
| 5970 | code: RSHIFT_EXPR, arg0: highpart2, |
| 5971 | arg1: build_int_cst (ntype, bits - prec)); |
| 5972 | |
| 5973 | gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2); |
| 5974 | gsi_replace (gsi, new_stmt, true); |
| 5975 | |
| 5976 | widen_mul_stats.highpart_mults_inserted++; |
| 5977 | return true; |
| 5978 | } |
| 5979 | |
| 5980 | /* If target has spaceship<MODE>3 expander, pattern recognize |
| 5981 | <bb 2> [local count: 1073741824]: |
| 5982 | if (a_2(D) == b_3(D)) |
| 5983 | goto <bb 6>; [34.00%] |
| 5984 | else |
| 5985 | goto <bb 3>; [66.00%] |
| 5986 | |
| 5987 | <bb 3> [local count: 708669601]: |
| 5988 | if (a_2(D) < b_3(D)) |
| 5989 | goto <bb 6>; [1.04%] |
| 5990 | else |
| 5991 | goto <bb 4>; [98.96%] |
| 5992 | |
| 5993 | <bb 4> [local count: 701299439]: |
| 5994 | if (a_2(D) > b_3(D)) |
| 5995 | goto <bb 5>; [48.89%] |
| 5996 | else |
| 5997 | goto <bb 6>; [51.11%] |
| 5998 | |
| 5999 | <bb 5> [local count: 342865295]: |
| 6000 | |
| 6001 | <bb 6> [local count: 1073741824]: |
| 6002 | and turn it into: |
| 6003 | <bb 2> [local count: 1073741824]: |
| 6004 | _1 = .SPACESHIP (a_2(D), b_3(D), 0); |
| 6005 | if (_1 == 0) |
| 6006 | goto <bb 6>; [34.00%] |
| 6007 | else |
| 6008 | goto <bb 3>; [66.00%] |
| 6009 | |
| 6010 | <bb 3> [local count: 708669601]: |
| 6011 | if (_1 == -1) |
| 6012 | goto <bb 6>; [1.04%] |
| 6013 | else |
| 6014 | goto <bb 4>; [98.96%] |
| 6015 | |
| 6016 | <bb 4> [local count: 701299439]: |
| 6017 | if (_1 == 1) |
| 6018 | goto <bb 5>; [48.89%] |
| 6019 | else |
| 6020 | goto <bb 6>; [51.11%] |
| 6021 | |
| 6022 | <bb 5> [local count: 342865295]: |
| 6023 | |
| 6024 | <bb 6> [local count: 1073741824]: |
| 6025 | so that the backend can emit optimal comparison and |
| 6026 | conditional jump sequence. If the |
| 6027 | <bb 6> [local count: 1073741824]: |
| 6028 | above has a single PHI like: |
| 6029 | # _27 = PHI<0(2), -1(3), 2(4), 1(5)> |
| 6030 | then replace it with effectively |
| 6031 | _1 = .SPACESHIP (a_2(D), b_3(D), 1); |
| 6032 | _27 = _1; */ |
| 6033 | |
| 6034 | static void |
| 6035 | optimize_spaceship (gcond *stmt) |
| 6036 | { |
| 6037 | enum tree_code code = gimple_cond_code (gs: stmt); |
| 6038 | if (code != EQ_EXPR && code != NE_EXPR) |
| 6039 | return; |
| 6040 | tree arg1 = gimple_cond_lhs (gs: stmt); |
| 6041 | tree arg2 = gimple_cond_rhs (gs: stmt); |
| 6042 | if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) |
| 6043 | && !INTEGRAL_TYPE_P (TREE_TYPE (arg1))) |
| 6044 | || optab_handler (op: spaceship_optab, |
| 6045 | TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing |
| 6046 | || operand_equal_p (arg1, arg2, flags: 0)) |
| 6047 | return; |
| 6048 | |
| 6049 | basic_block bb0 = gimple_bb (g: stmt), bb1, bb2 = NULL; |
| 6050 | edge em1 = NULL, e1 = NULL, e2 = NULL; |
| 6051 | bb1 = EDGE_SUCC (bb0, 1)->dest; |
| 6052 | if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR)) |
| 6053 | bb1 = EDGE_SUCC (bb0, 0)->dest; |
| 6054 | |
| 6055 | gcond *g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb1)); |
| 6056 | if (g == NULL |
| 6057 | || !single_pred_p (bb: bb1) |
| 6058 | || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) |
| 6059 | ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0) |
| 6060 | : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0) |
| 6061 | || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0))) |
| 6062 | || !cond_only_block_p (bb1)) |
| 6063 | return; |
| 6064 | |
| 6065 | enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) |
| 6066 | ? LT_EXPR : GT_EXPR); |
| 6067 | switch (gimple_cond_code (gs: g)) |
| 6068 | { |
| 6069 | case LT_EXPR: |
| 6070 | case LE_EXPR: |
| 6071 | break; |
| 6072 | case GT_EXPR: |
| 6073 | case GE_EXPR: |
| 6074 | ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR; |
| 6075 | break; |
| 6076 | default: |
| 6077 | return; |
| 6078 | } |
| 6079 | |
| 6080 | for (int i = 0; i < 2; ++i) |
| 6081 | { |
| 6082 | /* With NaNs, </<=/>/>= are false, so we need to look for the |
| 6083 | third comparison on the false edge from whatever non-equality |
| 6084 | comparison the second comparison is. */ |
| 6085 | if (HONOR_NANS (TREE_TYPE (arg1)) |
| 6086 | && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0) |
| 6087 | continue; |
| 6088 | |
| 6089 | bb2 = EDGE_SUCC (bb1, i)->dest; |
| 6090 | g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb2)); |
| 6091 | if (g == NULL |
| 6092 | || !single_pred_p (bb: bb2) |
| 6093 | || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) |
| 6094 | ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0) |
| 6095 | : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0) |
| 6096 | || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0))) |
| 6097 | || !cond_only_block_p (bb2) |
| 6098 | || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) |
| 6099 | continue; |
| 6100 | |
| 6101 | enum tree_code ccode2 |
| 6102 | = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) ? LT_EXPR : GT_EXPR); |
| 6103 | switch (gimple_cond_code (gs: g)) |
| 6104 | { |
| 6105 | case LT_EXPR: |
| 6106 | case LE_EXPR: |
| 6107 | break; |
| 6108 | case GT_EXPR: |
| 6109 | case GE_EXPR: |
| 6110 | ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; |
| 6111 | break; |
| 6112 | default: |
| 6113 | continue; |
| 6114 | } |
| 6115 | if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2) |
| 6116 | continue; |
| 6117 | |
| 6118 | if ((ccode == LT_EXPR) |
| 6119 | ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)) |
| 6120 | { |
| 6121 | em1 = EDGE_SUCC (bb1, 1 - i); |
| 6122 | e1 = EDGE_SUCC (bb2, 0); |
| 6123 | e2 = EDGE_SUCC (bb2, 1); |
| 6124 | if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0)) |
| 6125 | std::swap (a&: e1, b&: e2); |
| 6126 | } |
| 6127 | else |
| 6128 | { |
| 6129 | e1 = EDGE_SUCC (bb1, 1 - i); |
| 6130 | em1 = EDGE_SUCC (bb2, 0); |
| 6131 | e2 = EDGE_SUCC (bb2, 1); |
| 6132 | if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0)) |
| 6133 | std::swap (a&: em1, b&: e2); |
| 6134 | } |
| 6135 | break; |
| 6136 | } |
| 6137 | |
| 6138 | if (em1 == NULL) |
| 6139 | { |
| 6140 | if ((ccode == LT_EXPR) |
| 6141 | ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0)) |
| 6142 | { |
| 6143 | em1 = EDGE_SUCC (bb1, 1); |
| 6144 | e1 = EDGE_SUCC (bb1, 0); |
| 6145 | e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; |
| 6146 | } |
| 6147 | else |
| 6148 | { |
| 6149 | em1 = EDGE_SUCC (bb1, 0); |
| 6150 | e1 = EDGE_SUCC (bb1, 1); |
| 6151 | e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; |
| 6152 | } |
| 6153 | } |
| 6154 | |
| 6155 | /* Check if there is a single bb into which all failed conditions |
| 6156 | jump to (perhaps through an empty block) and if it results in |
| 6157 | a single integral PHI which just sets it to -1, 0, 1, X |
| 6158 | (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather |
| 6159 | than 0 as last .SPACESHIP argument to tell backends it might |
| 6160 | consider different code generation and just cast the result |
| 6161 | of .SPACESHIP to the PHI result. X above is some value |
| 6162 | other than -1, 0, 1, for libstdc++ 2, for libc++ -127. */ |
| 6163 | tree arg3 = integer_zero_node; |
| 6164 | edge e = EDGE_SUCC (bb0, 0); |
| 6165 | if (e->dest == bb1) |
| 6166 | e = EDGE_SUCC (bb0, 1); |
| 6167 | basic_block bbp = e->dest; |
| 6168 | gphi *phi = NULL; |
| 6169 | for (gphi_iterator psi = gsi_start_phis (bbp); |
| 6170 | !gsi_end_p (i: psi); gsi_next (i: &psi)) |
| 6171 | { |
| 6172 | gphi *gp = psi.phi (); |
| 6173 | tree res = gimple_phi_result (gs: gp); |
| 6174 | |
| 6175 | if (phi != NULL |
| 6176 | || virtual_operand_p (op: res) |
| 6177 | || !INTEGRAL_TYPE_P (TREE_TYPE (res)) |
| 6178 | || TYPE_PRECISION (TREE_TYPE (res)) < 2) |
| 6179 | { |
| 6180 | phi = NULL; |
| 6181 | break; |
| 6182 | } |
| 6183 | phi = gp; |
| 6184 | } |
| 6185 | if (phi |
| 6186 | && integer_zerop (gimple_phi_arg_def_from_edge (gs: phi, e)) |
| 6187 | && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3)) |
| 6188 | { |
| 6189 | HOST_WIDE_INT argval = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? 2 : -1; |
| 6190 | for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i) |
| 6191 | { |
| 6192 | edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2; |
| 6193 | if (e3->dest != bbp) |
| 6194 | { |
| 6195 | if (!empty_block_p (e3->dest) |
| 6196 | || !single_succ_p (bb: e3->dest) |
| 6197 | || single_succ (bb: e3->dest) != bbp) |
| 6198 | { |
| 6199 | phi = NULL; |
| 6200 | break; |
| 6201 | } |
| 6202 | e3 = single_succ_edge (bb: e3->dest); |
| 6203 | } |
| 6204 | tree a = gimple_phi_arg_def_from_edge (gs: phi, e: e3); |
| 6205 | if (TREE_CODE (a) != INTEGER_CST |
| 6206 | || (i == 0 && !integer_onep (a)) |
| 6207 | || (i == 1 && !integer_all_onesp (a))) |
| 6208 | { |
| 6209 | phi = NULL; |
| 6210 | break; |
| 6211 | } |
| 6212 | if (i == 2) |
| 6213 | { |
| 6214 | tree minv = TYPE_MIN_VALUE (signed_char_type_node); |
| 6215 | tree maxv = TYPE_MAX_VALUE (signed_char_type_node); |
| 6216 | widest_int w = widest_int::from (x: wi::to_wide (t: a), sgn: SIGNED); |
| 6217 | if ((w >= -1 && w <= 1) |
| 6218 | || w < wi::to_widest (t: minv) |
| 6219 | || w > wi::to_widest (t: maxv)) |
| 6220 | { |
| 6221 | phi = NULL; |
| 6222 | break; |
| 6223 | } |
| 6224 | argval = w.to_shwi (); |
| 6225 | } |
| 6226 | } |
| 6227 | if (phi) |
| 6228 | arg3 = build_int_cst (integer_type_node, |
| 6229 | TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval); |
| 6230 | } |
| 6231 | |
| 6232 | /* For integral <=> comparisons only use .SPACESHIP if it is turned |
| 6233 | into an integer (-1, 0, 1). */ |
| 6234 | if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) && arg3 == integer_zero_node) |
| 6235 | return; |
| 6236 | |
| 6237 | gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 3, arg1, arg2, arg3); |
| 6238 | tree lhs = make_ssa_name (integer_type_node); |
| 6239 | gimple_call_set_lhs (gs: gc, lhs); |
| 6240 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
| 6241 | gsi_insert_before (&gsi, gc, GSI_SAME_STMT); |
| 6242 | |
| 6243 | wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node)); |
| 6244 | wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node)); |
| 6245 | if (HONOR_NANS (TREE_TYPE (arg1))) |
| 6246 | { |
| 6247 | if (arg3 == integer_zero_node) |
| 6248 | wmax = wi::two (TYPE_PRECISION (integer_type_node)); |
| 6249 | else if (tree_int_cst_sgn (arg3) < 0) |
| 6250 | wmin = wi::to_wide (t: arg3); |
| 6251 | else |
| 6252 | wmax = wi::to_wide (t: arg3); |
| 6253 | } |
| 6254 | int_range<1> vr (TREE_TYPE (lhs), wmin, wmax); |
| 6255 | set_range_info (lhs, vr); |
| 6256 | |
| 6257 | if (arg3 != integer_zero_node) |
| 6258 | { |
| 6259 | tree type = TREE_TYPE (gimple_phi_result (phi)); |
| 6260 | if (!useless_type_conversion_p (type, integer_type_node)) |
| 6261 | { |
| 6262 | tree tem = make_ssa_name (var: type); |
| 6263 | gimple *gcv = gimple_build_assign (tem, NOP_EXPR, lhs); |
| 6264 | gsi_insert_before (&gsi, gcv, GSI_SAME_STMT); |
| 6265 | lhs = tem; |
| 6266 | } |
| 6267 | SET_PHI_ARG_DEF_ON_EDGE (phi, e, lhs); |
| 6268 | gimple_cond_set_lhs (gs: stmt, boolean_false_node); |
| 6269 | gimple_cond_set_rhs (gs: stmt, boolean_false_node); |
| 6270 | gimple_cond_set_code (gs: stmt, code: (e->flags & EDGE_TRUE_VALUE) |
| 6271 | ? EQ_EXPR : NE_EXPR); |
| 6272 | update_stmt (s: stmt); |
| 6273 | return; |
| 6274 | } |
| 6275 | |
| 6276 | gimple_cond_set_lhs (gs: stmt, lhs); |
| 6277 | gimple_cond_set_rhs (gs: stmt, integer_zero_node); |
| 6278 | update_stmt (s: stmt); |
| 6279 | |
| 6280 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb1)); |
| 6281 | gimple_cond_set_lhs (gs: cond, lhs); |
| 6282 | if (em1->src == bb1 && e2 != em1) |
| 6283 | { |
| 6284 | gimple_cond_set_rhs (gs: cond, integer_minus_one_node); |
| 6285 | gimple_cond_set_code (gs: cond, code: (em1->flags & EDGE_TRUE_VALUE) |
| 6286 | ? EQ_EXPR : NE_EXPR); |
| 6287 | } |
| 6288 | else |
| 6289 | { |
| 6290 | gcc_assert (e1->src == bb1 && e2 != e1); |
| 6291 | gimple_cond_set_rhs (gs: cond, integer_one_node); |
| 6292 | gimple_cond_set_code (gs: cond, code: (e1->flags & EDGE_TRUE_VALUE) |
| 6293 | ? EQ_EXPR : NE_EXPR); |
| 6294 | } |
| 6295 | update_stmt (s: cond); |
| 6296 | |
| 6297 | if (e2 != e1 && e2 != em1) |
| 6298 | { |
| 6299 | cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb2)); |
| 6300 | gimple_cond_set_lhs (gs: cond, lhs); |
| 6301 | if (em1->src == bb2) |
| 6302 | gimple_cond_set_rhs (gs: cond, integer_minus_one_node); |
| 6303 | else |
| 6304 | { |
| 6305 | gcc_assert (e1->src == bb2); |
| 6306 | gimple_cond_set_rhs (gs: cond, integer_one_node); |
| 6307 | } |
| 6308 | gimple_cond_set_code (gs: cond, |
| 6309 | code: (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR); |
| 6310 | update_stmt (s: cond); |
| 6311 | } |
| 6312 | } |
| 6313 | |
| 6314 | |
| 6315 | /* Find integer multiplications where the operands are extended from |
| 6316 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR |
| 6317 | or MULT_HIGHPART_EXPR where appropriate. */ |
| 6318 | |
| 6319 | namespace { |
| 6320 | |
| 6321 | const pass_data pass_data_optimize_widening_mul = |
| 6322 | { |
| 6323 | .type: GIMPLE_PASS, /* type */ |
| 6324 | .name: "widening_mul" , /* name */ |
| 6325 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
| 6326 | .tv_id: TV_TREE_WIDEN_MUL, /* tv_id */ |
| 6327 | PROP_ssa, /* properties_required */ |
| 6328 | .properties_provided: 0, /* properties_provided */ |
| 6329 | .properties_destroyed: 0, /* properties_destroyed */ |
| 6330 | .todo_flags_start: 0, /* todo_flags_start */ |
| 6331 | TODO_update_ssa, /* todo_flags_finish */ |
| 6332 | }; |
| 6333 | |
| 6334 | class pass_optimize_widening_mul : public gimple_opt_pass |
| 6335 | { |
| 6336 | public: |
| 6337 | pass_optimize_widening_mul (gcc::context *ctxt) |
| 6338 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) |
| 6339 | {} |
| 6340 | |
| 6341 | /* opt_pass methods: */ |
| 6342 | bool gate (function *) final override |
| 6343 | { |
| 6344 | return flag_expensive_optimizations && optimize; |
| 6345 | } |
| 6346 | |
| 6347 | unsigned int execute (function *) final override; |
| 6348 | |
| 6349 | }; // class pass_optimize_widening_mul |
| 6350 | |
| 6351 | /* Walker class to perform the transformation in reverse dominance order. */ |
| 6352 | |
| 6353 | class math_opts_dom_walker : public dom_walker |
| 6354 | { |
| 6355 | public: |
| 6356 | /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set |
| 6357 | if walking modidifes the CFG. */ |
| 6358 | |
| 6359 | math_opts_dom_walker (bool *cfg_changed_p) |
| 6360 | : dom_walker (CDI_DOMINATORS), m_last_result_set (), |
| 6361 | m_cfg_changed_p (cfg_changed_p) {} |
| 6362 | |
| 6363 | /* The actual actions performed in the walk. */ |
| 6364 | |
| 6365 | void after_dom_children (basic_block) final override; |
| 6366 | |
| 6367 | /* Set of results of chains of multiply and add statement combinations that |
| 6368 | were not transformed into FMAs because of active deferring. */ |
| 6369 | hash_set<tree> m_last_result_set; |
| 6370 | |
| 6371 | /* Pointer to a flag of the user that needs to be set if CFG has been |
| 6372 | modified. */ |
| 6373 | bool *m_cfg_changed_p; |
| 6374 | }; |
| 6375 | |
| 6376 | void |
| 6377 | math_opts_dom_walker::after_dom_children (basic_block bb) |
| 6378 | { |
| 6379 | gimple_stmt_iterator gsi; |
| 6380 | |
| 6381 | fma_deferring_state fma_state (param_avoid_fma_max_bits > 0); |
| 6382 | |
| 6383 | for (gphi_iterator psi_next, psi = gsi_start_phis (bb); !gsi_end_p (i: psi); |
| 6384 | psi = psi_next) |
| 6385 | { |
| 6386 | psi_next = psi; |
| 6387 | gsi_next (i: &psi_next); |
| 6388 | |
| 6389 | gimple_stmt_iterator gsi = gsi_after_labels (bb); |
| 6390 | gphi *phi = psi.phi (); |
| 6391 | |
| 6392 | if (match_saturation_add (gsi: &gsi, phi) |
| 6393 | || match_saturation_sub (gsi: &gsi, phi) |
| 6394 | || match_saturation_trunc (gsi: &gsi, phi)) |
| 6395 | remove_phi_node (&psi, /* release_lhs_p */ false); |
| 6396 | } |
| 6397 | |
| 6398 | for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);) |
| 6399 | { |
| 6400 | gimple *stmt = gsi_stmt (i: gsi); |
| 6401 | enum tree_code code; |
| 6402 | |
| 6403 | if (is_gimple_assign (gs: stmt)) |
| 6404 | { |
| 6405 | code = gimple_assign_rhs_code (gs: stmt); |
| 6406 | switch (code) |
| 6407 | { |
| 6408 | case MULT_EXPR: |
| 6409 | if (!convert_mult_to_widen (stmt, gsi: &gsi) |
| 6410 | && !convert_expand_mult_copysign (stmt, gsi: &gsi) |
| 6411 | && convert_mult_to_fma (mul_stmt: stmt, |
| 6412 | op1: gimple_assign_rhs1 (gs: stmt), |
| 6413 | op2: gimple_assign_rhs2 (gs: stmt), |
| 6414 | state: &fma_state)) |
| 6415 | { |
| 6416 | gsi_remove (&gsi, true); |
| 6417 | release_defs (stmt); |
| 6418 | continue; |
| 6419 | } |
| 6420 | match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p); |
| 6421 | match_unsigned_saturation_sub (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6422 | break; |
| 6423 | |
| 6424 | case PLUS_EXPR: |
| 6425 | match_saturation_add_with_assign (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6426 | match_unsigned_saturation_sub (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6427 | /* fall-through */ |
| 6428 | case MINUS_EXPR: |
| 6429 | if (!convert_plusminus_to_widen (gsi: &gsi, stmt, code)) |
| 6430 | { |
| 6431 | match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p); |
| 6432 | if (gsi_stmt (i: gsi) == stmt) |
| 6433 | match_uaddc_usubc (gsi: &gsi, stmt, code); |
| 6434 | } |
| 6435 | break; |
| 6436 | |
| 6437 | case BIT_NOT_EXPR: |
| 6438 | if (match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p)) |
| 6439 | continue; |
| 6440 | break; |
| 6441 | |
| 6442 | case TRUNC_MOD_EXPR: |
| 6443 | convert_to_divmod (stmt: as_a<gassign *> (p: stmt)); |
| 6444 | break; |
| 6445 | |
| 6446 | case RSHIFT_EXPR: |
| 6447 | convert_mult_to_highpart (stmt: as_a<gassign *> (p: stmt), gsi: &gsi); |
| 6448 | break; |
| 6449 | |
| 6450 | case BIT_IOR_EXPR: |
| 6451 | match_saturation_add_with_assign (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6452 | match_unsigned_saturation_trunc (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6453 | /* fall-through */ |
| 6454 | case BIT_XOR_EXPR: |
| 6455 | match_uaddc_usubc (gsi: &gsi, stmt, code); |
| 6456 | break; |
| 6457 | |
| 6458 | case EQ_EXPR: |
| 6459 | case NE_EXPR: |
| 6460 | case LE_EXPR: |
| 6461 | case GT_EXPR: |
| 6462 | match_single_bit_test (gsi: &gsi, stmt); |
| 6463 | break; |
| 6464 | |
| 6465 | case COND_EXPR: |
| 6466 | case BIT_AND_EXPR: |
| 6467 | match_unsigned_saturation_sub (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6468 | break; |
| 6469 | |
| 6470 | case NOP_EXPR: |
| 6471 | match_unsigned_saturation_trunc (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6472 | match_saturation_add_with_assign (gsi: &gsi, stmt: as_a<gassign *> (p: stmt)); |
| 6473 | break; |
| 6474 | |
| 6475 | default:; |
| 6476 | } |
| 6477 | } |
| 6478 | else if (is_gimple_call (gs: stmt)) |
| 6479 | { |
| 6480 | switch (gimple_call_combined_fn (stmt)) |
| 6481 | { |
| 6482 | CASE_CFN_POW: |
| 6483 | if (gimple_call_lhs (gs: stmt) |
| 6484 | && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST |
| 6485 | && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), |
| 6486 | &dconst2) |
| 6487 | && convert_mult_to_fma (mul_stmt: stmt, |
| 6488 | op1: gimple_call_arg (gs: stmt, index: 0), |
| 6489 | op2: gimple_call_arg (gs: stmt, index: 0), |
| 6490 | state: &fma_state)) |
| 6491 | { |
| 6492 | unlink_stmt_vdef (stmt); |
| 6493 | if (gsi_remove (&gsi, true) |
| 6494 | && gimple_purge_dead_eh_edges (bb)) |
| 6495 | *m_cfg_changed_p = true; |
| 6496 | release_defs (stmt); |
| 6497 | continue; |
| 6498 | } |
| 6499 | break; |
| 6500 | |
| 6501 | case CFN_COND_MUL: |
| 6502 | if (convert_mult_to_fma (mul_stmt: stmt, |
| 6503 | op1: gimple_call_arg (gs: stmt, index: 1), |
| 6504 | op2: gimple_call_arg (gs: stmt, index: 2), |
| 6505 | state: &fma_state, |
| 6506 | mul_cond: gimple_call_arg (gs: stmt, index: 0))) |
| 6507 | |
| 6508 | { |
| 6509 | gsi_remove (&gsi, true); |
| 6510 | release_defs (stmt); |
| 6511 | continue; |
| 6512 | } |
| 6513 | break; |
| 6514 | |
| 6515 | case CFN_COND_LEN_MUL: |
| 6516 | if (convert_mult_to_fma (mul_stmt: stmt, |
| 6517 | op1: gimple_call_arg (gs: stmt, index: 1), |
| 6518 | op2: gimple_call_arg (gs: stmt, index: 2), |
| 6519 | state: &fma_state, |
| 6520 | mul_cond: gimple_call_arg (gs: stmt, index: 0), |
| 6521 | mul_len: gimple_call_arg (gs: stmt, index: 4), |
| 6522 | mul_bias: gimple_call_arg (gs: stmt, index: 5))) |
| 6523 | |
| 6524 | { |
| 6525 | gsi_remove (&gsi, true); |
| 6526 | release_defs (stmt); |
| 6527 | continue; |
| 6528 | } |
| 6529 | break; |
| 6530 | |
| 6531 | case CFN_LAST: |
| 6532 | cancel_fma_deferring (state: &fma_state); |
| 6533 | break; |
| 6534 | |
| 6535 | default: |
| 6536 | break; |
| 6537 | } |
| 6538 | } |
| 6539 | else if (gimple_code (g: stmt) == GIMPLE_COND) |
| 6540 | { |
| 6541 | match_single_bit_test (gsi: &gsi, stmt); |
| 6542 | optimize_spaceship (stmt: as_a <gcond *> (p: stmt)); |
| 6543 | } |
| 6544 | gsi_next (i: &gsi); |
| 6545 | } |
| 6546 | if (fma_state.m_deferring_p |
| 6547 | && fma_state.m_initial_phi) |
| 6548 | { |
| 6549 | gcc_checking_assert (fma_state.m_last_result); |
| 6550 | if (!last_fma_candidate_feeds_initial_phi (state: &fma_state, |
| 6551 | last_result_set: &m_last_result_set)) |
| 6552 | cancel_fma_deferring (state: &fma_state); |
| 6553 | else |
| 6554 | m_last_result_set.add (k: fma_state.m_last_result); |
| 6555 | } |
| 6556 | } |
| 6557 | |
| 6558 | |
| 6559 | unsigned int |
| 6560 | pass_optimize_widening_mul::execute (function *fun) |
| 6561 | { |
| 6562 | bool cfg_changed = false; |
| 6563 | |
| 6564 | memset (s: &widen_mul_stats, c: 0, n: sizeof (widen_mul_stats)); |
| 6565 | calculate_dominance_info (CDI_DOMINATORS); |
| 6566 | renumber_gimple_stmt_uids (cfun); |
| 6567 | |
| 6568 | math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
| 6569 | |
| 6570 | statistics_counter_event (fun, "widening multiplications inserted" , |
| 6571 | widen_mul_stats.widen_mults_inserted); |
| 6572 | statistics_counter_event (fun, "widening maccs inserted" , |
| 6573 | widen_mul_stats.maccs_inserted); |
| 6574 | statistics_counter_event (fun, "fused multiply-adds inserted" , |
| 6575 | widen_mul_stats.fmas_inserted); |
| 6576 | statistics_counter_event (fun, "divmod calls inserted" , |
| 6577 | widen_mul_stats.divmod_calls_inserted); |
| 6578 | statistics_counter_event (fun, "highpart multiplications inserted" , |
| 6579 | widen_mul_stats.highpart_mults_inserted); |
| 6580 | |
| 6581 | return cfg_changed ? TODO_cleanup_cfg : 0; |
| 6582 | } |
| 6583 | |
| 6584 | } // anon namespace |
| 6585 | |
| 6586 | gimple_opt_pass * |
| 6587 | make_pass_optimize_widening_mul (gcc::context *ctxt) |
| 6588 | { |
| 6589 | return new pass_optimize_widening_mul (ctxt); |
| 6590 | } |
| 6591 | |