| 1 | /* Generic partial redundancy elimination with lazy code motion support. |
| 2 | Copyright (C) 1998-2025 Free Software Foundation, Inc. |
| 3 | |
| 4 | This file is part of GCC. |
| 5 | |
| 6 | GCC is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free |
| 8 | Software Foundation; either version 3, or (at your option) any later |
| 9 | version. |
| 10 | |
| 11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| 12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with GCC; see the file COPYING3. If not see |
| 18 | <http://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | /* These routines are meant to be used by various optimization |
| 21 | passes which can be modeled as lazy code motion problems. |
| 22 | Including, but not limited to: |
| 23 | |
| 24 | * Traditional partial redundancy elimination. |
| 25 | |
| 26 | * Placement of caller/caller register save/restores. |
| 27 | |
| 28 | * Load/store motion. |
| 29 | |
| 30 | * Copy motion. |
| 31 | |
| 32 | * Conversion of flat register files to a stacked register |
| 33 | model. |
| 34 | |
| 35 | * Dead load/store elimination. |
| 36 | |
| 37 | These routines accept as input: |
| 38 | |
| 39 | * Basic block information (number of blocks, lists of |
| 40 | predecessors and successors). Note the granularity |
| 41 | does not need to be basic block, they could be statements |
| 42 | or functions. |
| 43 | |
| 44 | * Bitmaps of local properties (computed, transparent and |
| 45 | anticipatable expressions). |
| 46 | |
| 47 | The output of these routines is bitmap of redundant computations |
| 48 | and a bitmap of optimal placement points. */ |
| 49 | |
| 50 | |
| 51 | #include "config.h" |
| 52 | #include "system.h" |
| 53 | #include "coretypes.h" |
| 54 | #include "backend.h" |
| 55 | #include "cfganal.h" |
| 56 | #include "lcm.h" |
| 57 | |
| 58 | /* Edge based LCM routines. */ |
| 59 | static void compute_laterin (struct edge_list *, sbitmap *, sbitmap *, |
| 60 | sbitmap *, sbitmap *); |
| 61 | static void compute_insert_delete (struct edge_list *edge_list, sbitmap *, |
| 62 | sbitmap *, sbitmap *, sbitmap *, sbitmap *); |
| 63 | |
| 64 | /* Edge based LCM routines on a reverse flowgraph. */ |
| 65 | static void compute_farthest (struct edge_list *, int, sbitmap *, sbitmap *, |
| 66 | sbitmap*, sbitmap *, sbitmap *); |
| 67 | static void compute_nearerout (struct edge_list *, sbitmap *, sbitmap *, |
| 68 | sbitmap *, sbitmap *); |
| 69 | static void compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *, |
| 70 | sbitmap *, sbitmap *, sbitmap *, |
| 71 | sbitmap *); |
| 72 | |
| 73 | /* Edge based lcm routines. */ |
| 74 | |
| 75 | /* Compute expression anticipatability at entrance and exit of each block. |
| 76 | This is done based on the flow graph, and not on the pred-succ lists. |
| 77 | Other than that, its pretty much identical to compute_antinout. */ |
| 78 | |
| 79 | void |
| 80 | compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin, |
| 81 | sbitmap *antout) |
| 82 | { |
| 83 | basic_block bb; |
| 84 | edge e; |
| 85 | basic_block *worklist, *qin, *qout, *qend; |
| 86 | unsigned int qlen; |
| 87 | edge_iterator ei; |
| 88 | |
| 89 | /* Allocate a worklist array/queue. Entries are only added to the |
| 90 | list if they were not already on the list. So the size is |
| 91 | bounded by the number of basic blocks. */ |
| 92 | qin = qout = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
| 93 | |
| 94 | /* We want a maximal solution, so make an optimistic initialization of |
| 95 | ANTIN. */ |
| 96 | bitmap_vector_ones (antin, last_basic_block_for_fn (cfun)); |
| 97 | |
| 98 | /* Put every block on the worklist; this is necessary because of the |
| 99 | optimistic initialization of ANTIN above. Use reverse postorder |
| 100 | on the inverted graph to make the backward dataflow problem require |
| 101 | less iterations. */ |
| 102 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); |
| 103 | int n = inverted_rev_post_order_compute (cfun, rpo); |
| 104 | for (int i = 0; i < n; ++i) |
| 105 | { |
| 106 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); |
| 107 | if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) |
| 108 | || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 109 | continue; |
| 110 | *qin++ = bb; |
| 111 | bb->aux = bb; |
| 112 | } |
| 113 | free (ptr: rpo); |
| 114 | |
| 115 | qin = worklist; |
| 116 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; |
| 117 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
| 118 | |
| 119 | /* Mark blocks which are predecessors of the exit block so that we |
| 120 | can easily identify them below. */ |
| 121 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| 122 | e->src->aux = EXIT_BLOCK_PTR_FOR_FN (cfun); |
| 123 | |
| 124 | /* Iterate until the worklist is empty. */ |
| 125 | while (qlen) |
| 126 | { |
| 127 | /* Take the first entry off the worklist. */ |
| 128 | bb = *qout++; |
| 129 | qlen--; |
| 130 | |
| 131 | if (qout >= qend) |
| 132 | qout = worklist; |
| 133 | |
| 134 | if (bb->aux == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| 135 | /* Do not clear the aux field for blocks which are predecessors of |
| 136 | the EXIT block. That way we never add then to the worklist |
| 137 | again. */ |
| 138 | bitmap_clear (antout[bb->index]); |
| 139 | else |
| 140 | { |
| 141 | /* Clear the aux field of this block so that it can be added to |
| 142 | the worklist again if necessary. */ |
| 143 | bb->aux = NULL; |
| 144 | bitmap_intersection_of_succs (antout[bb->index], antin, bb); |
| 145 | } |
| 146 | |
| 147 | if (bitmap_or_and (antin[bb->index], antloc[bb->index], |
| 148 | transp[bb->index], antout[bb->index])) |
| 149 | /* If the in state of this block changed, then we need |
| 150 | to add the predecessors of this block to the worklist |
| 151 | if they are not already on the worklist. */ |
| 152 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 153 | if (!e->src->aux && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 154 | { |
| 155 | *qin++ = e->src; |
| 156 | e->src->aux = e; |
| 157 | qlen++; |
| 158 | if (qin >= qend) |
| 159 | qin = worklist; |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | clear_aux_for_edges (); |
| 164 | clear_aux_for_blocks (); |
| 165 | free (ptr: worklist); |
| 166 | } |
| 167 | |
| 168 | /* Compute the earliest vector for edge based lcm. */ |
| 169 | |
| 170 | void |
| 171 | compute_earliest (struct edge_list *edge_list, int n_exprs, sbitmap *antin, |
| 172 | sbitmap *antout, sbitmap *avout, sbitmap *kill, |
| 173 | sbitmap *earliest) |
| 174 | { |
| 175 | int x, num_edges; |
| 176 | basic_block pred, succ; |
| 177 | |
| 178 | num_edges = NUM_EDGES (edge_list); |
| 179 | |
| 180 | auto_sbitmap difference (n_exprs), temp_bitmap (n_exprs); |
| 181 | for (x = 0; x < num_edges; x++) |
| 182 | { |
| 183 | pred = INDEX_EDGE_PRED_BB (edge_list, x); |
| 184 | succ = INDEX_EDGE_SUCC_BB (edge_list, x); |
| 185 | if (pred == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 186 | bitmap_copy (earliest[x], antin[succ->index]); |
| 187 | else |
| 188 | { |
| 189 | if (succ == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| 190 | bitmap_clear (earliest[x]); |
| 191 | else |
| 192 | { |
| 193 | bitmap_and_compl (difference, antin[succ->index], |
| 194 | avout[pred->index]); |
| 195 | bitmap_not (temp_bitmap, antout[pred->index]); |
| 196 | bitmap_and_or (earliest[x], difference, |
| 197 | kill[pred->index], temp_bitmap); |
| 198 | } |
| 199 | } |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | /* later(p,s) is dependent on the calculation of laterin(p). |
| 204 | laterin(p) is dependent on the calculation of later(p2,p). |
| 205 | |
| 206 | laterin(ENTRY) is defined as all 0's |
| 207 | later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY) |
| 208 | laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)). |
| 209 | |
| 210 | If we progress in this manner, starting with all basic blocks |
| 211 | in the work list, anytime we change later(bb), we need to add |
| 212 | succs(bb) to the worklist if they are not already on the worklist. |
| 213 | |
| 214 | Boundary conditions: |
| 215 | |
| 216 | We prime the worklist all the normal basic blocks. The ENTRY block can |
| 217 | never be added to the worklist since it is never the successor of any |
| 218 | block. We explicitly prevent the EXIT block from being added to the |
| 219 | worklist. |
| 220 | |
| 221 | We optimistically initialize LATER. That is the only time this routine |
| 222 | will compute LATER for an edge out of the entry block since the entry |
| 223 | block is never on the worklist. Thus, LATERIN is neither used nor |
| 224 | computed for the ENTRY block. |
| 225 | |
| 226 | Since the EXIT block is never added to the worklist, we will neither |
| 227 | use nor compute LATERIN for the exit block. Edges which reach the |
| 228 | EXIT block are handled in the normal fashion inside the loop. However, |
| 229 | the insertion/deletion computation needs LATERIN(EXIT), so we have |
| 230 | to compute it. */ |
| 231 | |
| 232 | static void |
| 233 | compute_laterin (struct edge_list *edge_list, sbitmap *earliest, |
| 234 | sbitmap *antloc, sbitmap *later, sbitmap *laterin) |
| 235 | { |
| 236 | int num_edges, i; |
| 237 | edge e; |
| 238 | basic_block *worklist, *qin, *qout, *qend, bb; |
| 239 | unsigned int qlen; |
| 240 | edge_iterator ei; |
| 241 | |
| 242 | num_edges = NUM_EDGES (edge_list); |
| 243 | |
| 244 | /* Allocate a worklist array/queue. Entries are only added to the |
| 245 | list if they were not already on the list. So the size is |
| 246 | bounded by the number of basic blocks. */ |
| 247 | qin = qout = worklist |
| 248 | = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
| 249 | |
| 250 | /* Initialize a mapping from each edge to its index. */ |
| 251 | for (i = 0; i < num_edges; i++) |
| 252 | INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; |
| 253 | |
| 254 | /* We want a maximal solution, so initially consider LATER true for |
| 255 | all edges. This allows propagation through a loop since the incoming |
| 256 | loop edge will have LATER set, so if all the other incoming edges |
| 257 | to the loop are set, then LATERIN will be set for the head of the |
| 258 | loop. |
| 259 | |
| 260 | If the optimistic setting of LATER on that edge was incorrect (for |
| 261 | example the expression is ANTLOC in a block within the loop) then |
| 262 | this algorithm will detect it when we process the block at the head |
| 263 | of the optimistic edge. That will requeue the affected blocks. */ |
| 264 | bitmap_vector_ones (later, num_edges); |
| 265 | |
| 266 | /* Note that even though we want an optimistic setting of LATER, we |
| 267 | do not want to be overly optimistic. Consider an outgoing edge from |
| 268 | the entry block. That edge should always have a LATER value the |
| 269 | same as EARLIEST for that edge. */ |
| 270 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
| 271 | bitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]); |
| 272 | |
| 273 | /* Add all the blocks to the worklist. This prevents an early exit from |
| 274 | the loop given our optimistic initialization of LATER above. */ |
| 275 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
| 276 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); |
| 277 | for (int i = 0; i < n; ++i) |
| 278 | { |
| 279 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); |
| 280 | *qin++ = bb; |
| 281 | bb->aux = bb; |
| 282 | } |
| 283 | free (ptr: rpo); |
| 284 | |
| 285 | /* Note that we do not use the last allocated element for our queue, |
| 286 | as EXIT_BLOCK is never inserted into it. */ |
| 287 | qin = worklist; |
| 288 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; |
| 289 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
| 290 | |
| 291 | /* Iterate until the worklist is empty. */ |
| 292 | while (qlen) |
| 293 | { |
| 294 | /* Take the first entry off the worklist. */ |
| 295 | bb = *qout++; |
| 296 | bb->aux = NULL; |
| 297 | qlen--; |
| 298 | if (qout >= qend) |
| 299 | qout = worklist; |
| 300 | |
| 301 | /* Compute LATERIN as the intersection of LATER for each incoming |
| 302 | edge to BB. */ |
| 303 | bitmap_ones (laterin[bb->index]); |
| 304 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 305 | bitmap_and (laterin[bb->index], laterin[bb->index], |
| 306 | later[(size_t)e->aux]); |
| 307 | |
| 308 | /* Calculate LATER for all outgoing edges of BB. */ |
| 309 | FOR_EACH_EDGE (e, ei, bb->succs) |
| 310 | if (bitmap_ior_and_compl (later[(size_t) e->aux], |
| 311 | earliest[(size_t) e->aux], |
| 312 | laterin[bb->index], |
| 313 | antloc[bb->index]) |
| 314 | /* If LATER for an outgoing edge was changed, then we need |
| 315 | to add the target of the outgoing edge to the worklist. */ |
| 316 | && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0) |
| 317 | { |
| 318 | *qin++ = e->dest; |
| 319 | e->dest->aux = e; |
| 320 | qlen++; |
| 321 | if (qin >= qend) |
| 322 | qin = worklist; |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | /* Computation of insertion and deletion points requires computing LATERIN |
| 327 | for the EXIT block. We allocated an extra entry in the LATERIN array |
| 328 | for just this purpose. */ |
| 329 | bitmap_ones (laterin[last_basic_block_for_fn (cfun)]); |
| 330 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| 331 | bitmap_and (laterin[last_basic_block_for_fn (cfun)], |
| 332 | laterin[last_basic_block_for_fn (cfun)], |
| 333 | later[(size_t) e->aux]); |
| 334 | |
| 335 | clear_aux_for_edges (); |
| 336 | free (ptr: worklist); |
| 337 | } |
| 338 | |
| 339 | /* Compute the insertion and deletion points for edge based LCM. */ |
| 340 | |
| 341 | static void |
| 342 | compute_insert_delete (struct edge_list *edge_list, sbitmap *antloc, |
| 343 | sbitmap *later, sbitmap *laterin, sbitmap *insert, |
| 344 | sbitmap *del) |
| 345 | { |
| 346 | int x; |
| 347 | basic_block bb; |
| 348 | |
| 349 | FOR_EACH_BB_FN (bb, cfun) |
| 350 | bitmap_and_compl (del[bb->index], antloc[bb->index], |
| 351 | laterin[bb->index]); |
| 352 | |
| 353 | for (x = 0; x < NUM_EDGES (edge_list); x++) |
| 354 | { |
| 355 | basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x); |
| 356 | |
| 357 | if (b == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| 358 | bitmap_and_compl (insert[x], later[x], |
| 359 | laterin[last_basic_block_for_fn (cfun)]); |
| 360 | else |
| 361 | bitmap_and_compl (insert[x], later[x], laterin[b->index]); |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | /* Given local properties TRANSP, ANTLOC, AVLOC, KILL return the insert and |
| 366 | delete vectors for edge based LCM and return the AVIN, AVOUT bitmap. |
| 367 | map the insert vector to what edge an expression should be inserted on. */ |
| 368 | |
| 369 | struct edge_list * |
| 370 | pre_edge_lcm_avs (int n_exprs, sbitmap *transp, |
| 371 | sbitmap *avloc, sbitmap *antloc, sbitmap *kill, |
| 372 | sbitmap *avin, sbitmap *avout, |
| 373 | sbitmap **insert, sbitmap **del) |
| 374 | { |
| 375 | sbitmap *antin, *antout, *earliest; |
| 376 | sbitmap *later, *laterin; |
| 377 | struct edge_list *edge_list; |
| 378 | int num_edges; |
| 379 | |
| 380 | edge_list = create_edge_list (); |
| 381 | num_edges = NUM_EDGES (edge_list); |
| 382 | |
| 383 | #ifdef LCM_DEBUG_INFO |
| 384 | if (dump_file) |
| 385 | { |
| 386 | fprintf (dump_file, "Edge List:\n" ); |
| 387 | verify_edge_list (dump_file, edge_list); |
| 388 | print_edge_list (dump_file, edge_list); |
| 389 | dump_bitmap_vector (dump_file, "transp" , "" , transp, |
| 390 | last_basic_block_for_fn (cfun)); |
| 391 | dump_bitmap_vector (dump_file, "antloc" , "" , antloc, |
| 392 | last_basic_block_for_fn (cfun)); |
| 393 | dump_bitmap_vector (dump_file, "avloc" , "" , avloc, |
| 394 | last_basic_block_for_fn (cfun)); |
| 395 | dump_bitmap_vector (dump_file, "kill" , "" , kill, |
| 396 | last_basic_block_for_fn (cfun)); |
| 397 | } |
| 398 | #endif |
| 399 | |
| 400 | /* Compute global availability. */ |
| 401 | compute_available (avloc, kill, avout, avin); |
| 402 | |
| 403 | /* Compute global anticipatability. */ |
| 404 | antin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 405 | antout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 406 | compute_antinout_edge (antloc, transp, antin, antout); |
| 407 | |
| 408 | #ifdef LCM_DEBUG_INFO |
| 409 | if (dump_file) |
| 410 | { |
| 411 | dump_bitmap_vector (dump_file, "antin" , "" , antin, |
| 412 | last_basic_block_for_fn (cfun)); |
| 413 | dump_bitmap_vector (dump_file, "antout" , "" , antout, |
| 414 | last_basic_block_for_fn (cfun)); |
| 415 | } |
| 416 | #endif |
| 417 | |
| 418 | /* Compute earliestness. */ |
| 419 | earliest = sbitmap_vector_alloc (num_edges, n_exprs); |
| 420 | compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest); |
| 421 | |
| 422 | #ifdef LCM_DEBUG_INFO |
| 423 | if (dump_file) |
| 424 | dump_bitmap_vector (dump_file, "earliest" , "" , earliest, num_edges); |
| 425 | #endif |
| 426 | |
| 427 | sbitmap_vector_free (vec: antout); |
| 428 | sbitmap_vector_free (vec: antin); |
| 429 | |
| 430 | later = sbitmap_vector_alloc (num_edges, n_exprs); |
| 431 | |
| 432 | /* Allocate an extra element for the exit block in the laterin vector. */ |
| 433 | laterin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun) + 1, |
| 434 | n_exprs); |
| 435 | compute_laterin (edge_list, earliest, antloc, later, laterin); |
| 436 | |
| 437 | #ifdef LCM_DEBUG_INFO |
| 438 | if (dump_file) |
| 439 | { |
| 440 | dump_bitmap_vector (dump_file, "laterin" , "" , laterin, |
| 441 | last_basic_block_for_fn (cfun) + 1); |
| 442 | dump_bitmap_vector (dump_file, "later" , "" , later, num_edges); |
| 443 | } |
| 444 | #endif |
| 445 | |
| 446 | sbitmap_vector_free (vec: earliest); |
| 447 | |
| 448 | *insert = sbitmap_vector_alloc (num_edges, n_exprs); |
| 449 | *del = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 450 | bitmap_vector_clear (*insert, num_edges); |
| 451 | bitmap_vector_clear (*del, last_basic_block_for_fn (cfun)); |
| 452 | compute_insert_delete (edge_list, antloc, later, laterin, insert: *insert, del: *del); |
| 453 | |
| 454 | sbitmap_vector_free (vec: laterin); |
| 455 | sbitmap_vector_free (vec: later); |
| 456 | |
| 457 | #ifdef LCM_DEBUG_INFO |
| 458 | if (dump_file) |
| 459 | { |
| 460 | dump_bitmap_vector (dump_file, "pre_insert_map" , "" , *insert, num_edges); |
| 461 | dump_bitmap_vector (dump_file, "pre_delete_map" , "" , *del, |
| 462 | last_basic_block_for_fn (cfun)); |
| 463 | } |
| 464 | #endif |
| 465 | |
| 466 | return edge_list; |
| 467 | } |
| 468 | |
| 469 | /* Wrapper to allocate avin/avout and call pre_edge_lcm_avs. */ |
| 470 | |
| 471 | struct edge_list * |
| 472 | pre_edge_lcm (int n_exprs, sbitmap *transp, |
| 473 | sbitmap *avloc, sbitmap *antloc, sbitmap *kill, |
| 474 | sbitmap **insert, sbitmap **del) |
| 475 | { |
| 476 | struct edge_list *edge_list; |
| 477 | sbitmap *avin, *avout; |
| 478 | |
| 479 | avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 480 | avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 481 | |
| 482 | edge_list = pre_edge_lcm_avs (n_exprs, transp, avloc, antloc, kill, |
| 483 | avin, avout, insert, del); |
| 484 | |
| 485 | sbitmap_vector_free (vec: avout); |
| 486 | sbitmap_vector_free (vec: avin); |
| 487 | |
| 488 | return edge_list; |
| 489 | } |
| 490 | |
| 491 | /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors. |
| 492 | Return the number of passes we performed to iterate to a solution. */ |
| 493 | |
| 494 | void |
| 495 | compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout, |
| 496 | sbitmap *avin) |
| 497 | { |
| 498 | edge e; |
| 499 | basic_block *worklist, *qin, *qout, *qend, bb; |
| 500 | unsigned int qlen; |
| 501 | edge_iterator ei; |
| 502 | |
| 503 | /* Allocate a worklist array/queue. Entries are only added to the |
| 504 | list if they were not already on the list. So the size is |
| 505 | bounded by the number of basic blocks. */ |
| 506 | qin = qout = worklist = |
| 507 | XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
| 508 | |
| 509 | /* We want a maximal solution. */ |
| 510 | bitmap_vector_ones (avout, last_basic_block_for_fn (cfun)); |
| 511 | |
| 512 | /* Put every block on the worklist; this is necessary because of the |
| 513 | optimistic initialization of AVOUT above. Use reverse postorder |
| 514 | to make the forward dataflow problem require less iterations. */ |
| 515 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
| 516 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); |
| 517 | for (int i = 0; i < n; ++i) |
| 518 | { |
| 519 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); |
| 520 | *qin++ = bb; |
| 521 | bb->aux = bb; |
| 522 | } |
| 523 | free (ptr: rpo); |
| 524 | |
| 525 | qin = worklist; |
| 526 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; |
| 527 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
| 528 | |
| 529 | /* Mark blocks which are successors of the entry block so that we |
| 530 | can easily identify them below. */ |
| 531 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
| 532 | e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
| 533 | |
| 534 | /* Iterate until the worklist is empty. */ |
| 535 | while (qlen) |
| 536 | { |
| 537 | /* Take the first entry off the worklist. */ |
| 538 | bb = *qout++; |
| 539 | qlen--; |
| 540 | |
| 541 | if (qout >= qend) |
| 542 | qout = worklist; |
| 543 | |
| 544 | /* If one of the predecessor blocks is the ENTRY block, then the |
| 545 | intersection of avouts is the null set. We can identify such blocks |
| 546 | by the special value in the AUX field in the block structure. */ |
| 547 | if (bb->aux == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 548 | /* Do not clear the aux field for blocks which are successors of the |
| 549 | ENTRY block. That way we never add then to the worklist again. */ |
| 550 | bitmap_clear (avin[bb->index]); |
| 551 | else |
| 552 | { |
| 553 | /* Clear the aux field of this block so that it can be added to |
| 554 | the worklist again if necessary. */ |
| 555 | bb->aux = NULL; |
| 556 | bitmap_intersection_of_preds (avin[bb->index], avout, bb); |
| 557 | } |
| 558 | |
| 559 | if (bitmap_ior_and_compl (avout[bb->index], avloc[bb->index], |
| 560 | avin[bb->index], kill[bb->index])) |
| 561 | /* If the out state of this block changed, then we need |
| 562 | to add the successors of this block to the worklist |
| 563 | if they are not already on the worklist. */ |
| 564 | FOR_EACH_EDGE (e, ei, bb->succs) |
| 565 | if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| 566 | { |
| 567 | *qin++ = e->dest; |
| 568 | e->dest->aux = e; |
| 569 | qlen++; |
| 570 | |
| 571 | if (qin >= qend) |
| 572 | qin = worklist; |
| 573 | } |
| 574 | } |
| 575 | |
| 576 | clear_aux_for_edges (); |
| 577 | clear_aux_for_blocks (); |
| 578 | free (ptr: worklist); |
| 579 | } |
| 580 | |
| 581 | /* Compute the farthest vector for edge based lcm. */ |
| 582 | |
| 583 | static void |
| 584 | compute_farthest (struct edge_list *edge_list, int n_exprs, |
| 585 | sbitmap *st_avout, sbitmap *st_avin, sbitmap *st_antin, |
| 586 | sbitmap *kill, sbitmap *farthest) |
| 587 | { |
| 588 | int x, num_edges; |
| 589 | basic_block pred, succ; |
| 590 | |
| 591 | num_edges = NUM_EDGES (edge_list); |
| 592 | |
| 593 | auto_sbitmap difference (n_exprs), temp_bitmap (n_exprs); |
| 594 | for (x = 0; x < num_edges; x++) |
| 595 | { |
| 596 | pred = INDEX_EDGE_PRED_BB (edge_list, x); |
| 597 | succ = INDEX_EDGE_SUCC_BB (edge_list, x); |
| 598 | if (succ == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| 599 | bitmap_copy (farthest[x], st_avout[pred->index]); |
| 600 | else |
| 601 | { |
| 602 | if (pred == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 603 | bitmap_clear (farthest[x]); |
| 604 | else |
| 605 | { |
| 606 | bitmap_and_compl (difference, st_avout[pred->index], |
| 607 | st_antin[succ->index]); |
| 608 | bitmap_not (temp_bitmap, st_avin[succ->index]); |
| 609 | bitmap_and_or (farthest[x], difference, |
| 610 | kill[succ->index], temp_bitmap); |
| 611 | } |
| 612 | } |
| 613 | } |
| 614 | } |
| 615 | |
| 616 | /* Compute nearer and nearerout vectors for edge based lcm. |
| 617 | |
| 618 | This is the mirror of compute_laterin, additional comments on the |
| 619 | implementation can be found before compute_laterin. */ |
| 620 | |
| 621 | static void |
| 622 | compute_nearerout (struct edge_list *edge_list, sbitmap *farthest, |
| 623 | sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout) |
| 624 | { |
| 625 | int num_edges, i; |
| 626 | edge e; |
| 627 | basic_block *worklist, *tos, bb; |
| 628 | edge_iterator ei; |
| 629 | |
| 630 | num_edges = NUM_EDGES (edge_list); |
| 631 | |
| 632 | /* Allocate a worklist array/queue. Entries are only added to the |
| 633 | list if they were not already on the list. So the size is |
| 634 | bounded by the number of basic blocks. */ |
| 635 | tos = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) + 1); |
| 636 | |
| 637 | /* Initialize NEARER for each edge and build a mapping from an edge to |
| 638 | its index. */ |
| 639 | for (i = 0; i < num_edges; i++) |
| 640 | INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; |
| 641 | |
| 642 | /* We want a maximal solution. */ |
| 643 | bitmap_vector_ones (nearer, num_edges); |
| 644 | |
| 645 | /* Note that even though we want an optimistic setting of NEARER, we |
| 646 | do not want to be overly optimistic. Consider an incoming edge to |
| 647 | the exit block. That edge should always have a NEARER value the |
| 648 | same as FARTHEST for that edge. */ |
| 649 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| 650 | bitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]); |
| 651 | |
| 652 | /* Add all the blocks to the worklist. This prevents an early exit |
| 653 | from the loop given our optimistic initialization of NEARER. */ |
| 654 | FOR_EACH_BB_FN (bb, cfun) |
| 655 | { |
| 656 | *tos++ = bb; |
| 657 | bb->aux = bb; |
| 658 | } |
| 659 | |
| 660 | /* Iterate until the worklist is empty. */ |
| 661 | while (tos != worklist) |
| 662 | { |
| 663 | /* Take the first entry off the worklist. */ |
| 664 | bb = *--tos; |
| 665 | bb->aux = NULL; |
| 666 | |
| 667 | /* Compute the intersection of NEARER for each outgoing edge from B. */ |
| 668 | bitmap_ones (nearerout[bb->index]); |
| 669 | FOR_EACH_EDGE (e, ei, bb->succs) |
| 670 | bitmap_and (nearerout[bb->index], nearerout[bb->index], |
| 671 | nearer[(size_t) e->aux]); |
| 672 | |
| 673 | /* Calculate NEARER for all incoming edges. */ |
| 674 | FOR_EACH_EDGE (e, ei, bb->preds) |
| 675 | if (bitmap_ior_and_compl (nearer[(size_t) e->aux], |
| 676 | farthest[(size_t) e->aux], |
| 677 | nearerout[e->dest->index], |
| 678 | st_avloc[e->dest->index]) |
| 679 | /* If NEARER for an incoming edge was changed, then we need |
| 680 | to add the source of the incoming edge to the worklist. */ |
| 681 | && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && e->src->aux == 0) |
| 682 | { |
| 683 | *tos++ = e->src; |
| 684 | e->src->aux = e; |
| 685 | } |
| 686 | } |
| 687 | |
| 688 | /* Computation of insertion and deletion points requires computing NEAREROUT |
| 689 | for the ENTRY block. We allocated an extra entry in the NEAREROUT array |
| 690 | for just this purpose. */ |
| 691 | bitmap_ones (nearerout[last_basic_block_for_fn (cfun)]); |
| 692 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
| 693 | bitmap_and (nearerout[last_basic_block_for_fn (cfun)], |
| 694 | nearerout[last_basic_block_for_fn (cfun)], |
| 695 | nearer[(size_t) e->aux]); |
| 696 | |
| 697 | clear_aux_for_edges (); |
| 698 | free (ptr: tos); |
| 699 | } |
| 700 | |
| 701 | /* Compute the insertion and deletion points for edge based LCM. */ |
| 702 | |
| 703 | static void |
| 704 | compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *st_avloc, |
| 705 | sbitmap *nearer, sbitmap *nearerout, |
| 706 | sbitmap *insert, sbitmap *del) |
| 707 | { |
| 708 | int x; |
| 709 | basic_block bb; |
| 710 | |
| 711 | FOR_EACH_BB_FN (bb, cfun) |
| 712 | bitmap_and_compl (del[bb->index], st_avloc[bb->index], |
| 713 | nearerout[bb->index]); |
| 714 | |
| 715 | for (x = 0; x < NUM_EDGES (edge_list); x++) |
| 716 | { |
| 717 | basic_block b = INDEX_EDGE_PRED_BB (edge_list, x); |
| 718 | if (b == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| 719 | bitmap_and_compl (insert[x], nearer[x], |
| 720 | nearerout[last_basic_block_for_fn (cfun)]); |
| 721 | else |
| 722 | bitmap_and_compl (insert[x], nearer[x], nearerout[b->index]); |
| 723 | } |
| 724 | } |
| 725 | |
| 726 | /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the |
| 727 | insert and delete vectors for edge based reverse LCM. Returns an |
| 728 | edgelist which is used to map the insert vector to what edge |
| 729 | an expression should be inserted on. */ |
| 730 | |
| 731 | struct edge_list * |
| 732 | pre_edge_rev_lcm (int n_exprs, sbitmap *transp, |
| 733 | sbitmap *st_avloc, sbitmap *st_antloc, sbitmap *kill, |
| 734 | sbitmap **insert, sbitmap **del) |
| 735 | { |
| 736 | sbitmap *st_antin, *st_antout; |
| 737 | sbitmap *st_avout, *st_avin, *farthest; |
| 738 | sbitmap *nearer, *nearerout; |
| 739 | struct edge_list *edge_list; |
| 740 | int num_edges; |
| 741 | |
| 742 | edge_list = create_edge_list (); |
| 743 | num_edges = NUM_EDGES (edge_list); |
| 744 | |
| 745 | st_antin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 746 | st_antout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 747 | bitmap_vector_clear (st_antin, last_basic_block_for_fn (cfun)); |
| 748 | bitmap_vector_clear (st_antout, last_basic_block_for_fn (cfun)); |
| 749 | compute_antinout_edge (antloc: st_antloc, transp, antin: st_antin, antout: st_antout); |
| 750 | |
| 751 | /* Compute global anticipatability. */ |
| 752 | st_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 753 | st_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 754 | compute_available (avloc: st_avloc, kill, avout: st_avout, avin: st_avin); |
| 755 | |
| 756 | #ifdef LCM_DEBUG_INFO |
| 757 | if (dump_file) |
| 758 | { |
| 759 | fprintf (dump_file, "Edge List:\n" ); |
| 760 | verify_edge_list (dump_file, edge_list); |
| 761 | print_edge_list (dump_file, edge_list); |
| 762 | dump_bitmap_vector (dump_file, "transp" , "" , transp, |
| 763 | last_basic_block_for_fn (cfun)); |
| 764 | dump_bitmap_vector (dump_file, "st_avloc" , "" , st_avloc, |
| 765 | last_basic_block_for_fn (cfun)); |
| 766 | dump_bitmap_vector (dump_file, "st_antloc" , "" , st_antloc, |
| 767 | last_basic_block_for_fn (cfun)); |
| 768 | dump_bitmap_vector (dump_file, "st_antin" , "" , st_antin, |
| 769 | last_basic_block_for_fn (cfun)); |
| 770 | dump_bitmap_vector (dump_file, "st_antout" , "" , st_antout, |
| 771 | last_basic_block_for_fn (cfun)); |
| 772 | dump_bitmap_vector (dump_file, "st_kill" , "" , kill, |
| 773 | last_basic_block_for_fn (cfun)); |
| 774 | } |
| 775 | #endif |
| 776 | |
| 777 | #ifdef LCM_DEBUG_INFO |
| 778 | if (dump_file) |
| 779 | { |
| 780 | dump_bitmap_vector (dump_file, "st_avout" , "" , st_avout, last_basic_block_for_fn (cfun)); |
| 781 | dump_bitmap_vector (dump_file, "st_avin" , "" , st_avin, last_basic_block_for_fn (cfun)); |
| 782 | } |
| 783 | #endif |
| 784 | |
| 785 | /* Compute farthestness. */ |
| 786 | farthest = sbitmap_vector_alloc (num_edges, n_exprs); |
| 787 | compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, |
| 788 | kill, farthest); |
| 789 | |
| 790 | #ifdef LCM_DEBUG_INFO |
| 791 | if (dump_file) |
| 792 | dump_bitmap_vector (dump_file, "farthest" , "" , farthest, num_edges); |
| 793 | #endif |
| 794 | |
| 795 | sbitmap_vector_free (vec: st_antin); |
| 796 | sbitmap_vector_free (vec: st_antout); |
| 797 | |
| 798 | sbitmap_vector_free (vec: st_avin); |
| 799 | sbitmap_vector_free (vec: st_avout); |
| 800 | |
| 801 | nearer = sbitmap_vector_alloc (num_edges, n_exprs); |
| 802 | |
| 803 | /* Allocate an extra element for the entry block. */ |
| 804 | nearerout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun) + 1, |
| 805 | n_exprs); |
| 806 | compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout); |
| 807 | |
| 808 | #ifdef LCM_DEBUG_INFO |
| 809 | if (dump_file) |
| 810 | { |
| 811 | dump_bitmap_vector (dump_file, "nearerout" , "" , nearerout, |
| 812 | last_basic_block_for_fn (cfun) + 1); |
| 813 | dump_bitmap_vector (dump_file, "nearer" , "" , nearer, num_edges); |
| 814 | } |
| 815 | #endif |
| 816 | |
| 817 | sbitmap_vector_free (vec: farthest); |
| 818 | |
| 819 | *insert = sbitmap_vector_alloc (num_edges, n_exprs); |
| 820 | *del = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), n_exprs); |
| 821 | compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, |
| 822 | insert: *insert, del: *del); |
| 823 | |
| 824 | sbitmap_vector_free (vec: nearerout); |
| 825 | sbitmap_vector_free (vec: nearer); |
| 826 | |
| 827 | #ifdef LCM_DEBUG_INFO |
| 828 | if (dump_file) |
| 829 | { |
| 830 | dump_bitmap_vector (dump_file, "pre_insert_map" , "" , *insert, num_edges); |
| 831 | dump_bitmap_vector (dump_file, "pre_delete_map" , "" , *del, |
| 832 | last_basic_block_for_fn (cfun)); |
| 833 | } |
| 834 | #endif |
| 835 | return edge_list; |
| 836 | } |
| 837 | |
| 838 | |