1 | /* Loop manipulation code for GNU compiler. |
2 | Copyright (C) 2002-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "rtl.h" |
25 | #include "tree.h" |
26 | #include "gimple.h" |
27 | #include "cfghooks.h" |
28 | #include "cfganal.h" |
29 | #include "cfgloop.h" |
30 | #include "gimple-iterator.h" |
31 | #include "gimplify-me.h" |
32 | #include "tree-ssa-loop-manip.h" |
33 | #include "dumpfile.h" |
34 | #include "sreal.h" |
35 | |
36 | static void copy_loops_to (class loop **, int, |
37 | class loop *); |
38 | static void loop_redirect_edge (edge, basic_block); |
39 | static void remove_bbs (basic_block *, int); |
40 | static bool rpe_enum_p (const_basic_block, const void *); |
41 | static int find_path (edge, basic_block **); |
42 | static void fix_loop_placements (class loop *, bool *); |
43 | static bool fix_bb_placement (basic_block); |
44 | static void fix_bb_placements (basic_block, bool *, bitmap); |
45 | |
46 | /* Checks whether basic block BB is dominated by DATA. */ |
47 | static bool |
48 | rpe_enum_p (const_basic_block bb, const void *data) |
49 | { |
50 | return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data); |
51 | } |
52 | |
53 | /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */ |
54 | |
55 | static void |
56 | remove_bbs (basic_block *bbs, int nbbs) |
57 | { |
58 | int i; |
59 | |
60 | for (i = 0; i < nbbs; i++) |
61 | delete_basic_block (bbs[i]); |
62 | } |
63 | |
64 | /* Find path -- i.e. the basic blocks dominated by edge E and put them |
65 | into array BBS, that will be allocated large enough to contain them. |
66 | E->dest must have exactly one predecessor for this to work (it is |
67 | easy to achieve and we do not put it here because we do not want to |
68 | alter anything by this function). The number of basic blocks in the |
69 | path is returned. */ |
70 | static int |
71 | find_path (edge e, basic_block **bbs) |
72 | { |
73 | gcc_assert (EDGE_COUNT (e->dest->preds) <= 1); |
74 | |
75 | /* Find bbs in the path. */ |
76 | *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
77 | return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs, |
78 | n_basic_blocks_for_fn (cfun), e->dest); |
79 | } |
80 | |
81 | /* Fix placement of basic block BB inside loop hierarchy -- |
82 | Let L be a loop to that BB belongs. Then every successor of BB must either |
83 | 1) belong to some superloop of loop L, or |
84 | 2) be a header of loop K such that K->outer is superloop of L |
85 | Returns true if we had to move BB into other loop to enforce this condition, |
86 | false if the placement of BB was already correct (provided that placements |
87 | of its successors are correct). */ |
88 | static bool |
89 | fix_bb_placement (basic_block bb) |
90 | { |
91 | edge e; |
92 | edge_iterator ei; |
93 | class loop *loop = current_loops->tree_root, *act; |
94 | |
95 | FOR_EACH_EDGE (e, ei, bb->succs) |
96 | { |
97 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
98 | continue; |
99 | |
100 | act = e->dest->loop_father; |
101 | if (act->header == e->dest) |
102 | act = loop_outer (loop: act); |
103 | |
104 | if (flow_loop_nested_p (loop, act)) |
105 | loop = act; |
106 | } |
107 | |
108 | if (loop == bb->loop_father) |
109 | return false; |
110 | |
111 | remove_bb_from_loops (bb); |
112 | add_bb_to_loop (bb, loop); |
113 | |
114 | return true; |
115 | } |
116 | |
117 | /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop |
118 | of LOOP to that leads at least one exit edge of LOOP, and set it |
119 | as the immediate superloop of LOOP. Return true if the immediate superloop |
120 | of LOOP changed. |
121 | |
122 | IRRED_INVALIDATED is set to true if a change in the loop structures might |
123 | invalidate the information about irreducible regions. */ |
124 | |
125 | static bool |
126 | fix_loop_placement (class loop *loop, bool *irred_invalidated) |
127 | { |
128 | unsigned i; |
129 | edge e; |
130 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
131 | class loop *father = current_loops->tree_root, *act; |
132 | bool ret = false; |
133 | |
134 | FOR_EACH_VEC_ELT (exits, i, e) |
135 | { |
136 | act = find_common_loop (loop, e->dest->loop_father); |
137 | if (flow_loop_nested_p (father, act)) |
138 | father = act; |
139 | } |
140 | |
141 | if (father != loop_outer (loop)) |
142 | { |
143 | for (act = loop_outer (loop); act != father; act = loop_outer (loop: act)) |
144 | act->num_nodes -= loop->num_nodes; |
145 | flow_loop_tree_node_remove (loop); |
146 | flow_loop_tree_node_add (father, loop); |
147 | |
148 | /* The exit edges of LOOP no longer exits its original immediate |
149 | superloops; remove them from the appropriate exit lists. */ |
150 | FOR_EACH_VEC_ELT (exits, i, e) |
151 | { |
152 | /* We may need to recompute irreducible loops. */ |
153 | if (e->flags & EDGE_IRREDUCIBLE_LOOP) |
154 | *irred_invalidated = true; |
155 | rescan_loop_exit (e, false, false); |
156 | } |
157 | |
158 | ret = true; |
159 | } |
160 | |
161 | return ret; |
162 | } |
163 | |
164 | /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e. |
165 | enforce condition stated in description of fix_bb_placement. We |
166 | start from basic block FROM that had some of its successors removed, so that |
167 | his placement no longer has to be correct, and iteratively fix placement of |
168 | its predecessors that may change if placement of FROM changed. Also fix |
169 | placement of subloops of FROM->loop_father, that might also be altered due |
170 | to this change; the condition for them is similar, except that instead of |
171 | successors we consider edges coming out of the loops. |
172 | |
173 | If the changes may invalidate the information about irreducible regions, |
174 | IRRED_INVALIDATED is set to true. |
175 | |
176 | If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with |
177 | changed loop_father are collected there. */ |
178 | |
179 | static void |
180 | fix_bb_placements (basic_block from, |
181 | bool *irred_invalidated, |
182 | bitmap loop_closed_ssa_invalidated) |
183 | { |
184 | basic_block *queue, *qtop, *qbeg, *qend; |
185 | class loop *base_loop, *target_loop; |
186 | edge e; |
187 | |
188 | /* We pass through blocks back-reachable from FROM, testing whether some |
189 | of their successors moved to outer loop. It may be necessary to |
190 | iterate several times, but it is finite, as we stop unless we move |
191 | the basic block up the loop structure. The whole story is a bit |
192 | more complicated due to presence of subloops, those are moved using |
193 | fix_loop_placement. */ |
194 | |
195 | base_loop = from->loop_father; |
196 | /* If we are already in the outermost loop, the basic blocks cannot be moved |
197 | outside of it. If FROM is the header of the base loop, it cannot be moved |
198 | outside of it, either. In both cases, we can end now. */ |
199 | if (base_loop == current_loops->tree_root |
200 | || from == base_loop->header) |
201 | return; |
202 | |
203 | auto_sbitmap in_queue (last_basic_block_for_fn (cfun)); |
204 | bitmap_clear (in_queue); |
205 | bitmap_set_bit (map: in_queue, bitno: from->index); |
206 | /* Prevent us from going out of the base_loop. */ |
207 | bitmap_set_bit (map: in_queue, bitno: base_loop->header->index); |
208 | |
209 | queue = XNEWVEC (basic_block, base_loop->num_nodes + 1); |
210 | qtop = queue + base_loop->num_nodes + 1; |
211 | qbeg = queue; |
212 | qend = queue + 1; |
213 | *qbeg = from; |
214 | |
215 | while (qbeg != qend) |
216 | { |
217 | edge_iterator ei; |
218 | from = *qbeg; |
219 | qbeg++; |
220 | if (qbeg == qtop) |
221 | qbeg = queue; |
222 | bitmap_clear_bit (map: in_queue, bitno: from->index); |
223 | |
224 | if (from->loop_father->header == from) |
225 | { |
226 | /* Subloop header, maybe move the loop upward. */ |
227 | if (!fix_loop_placement (loop: from->loop_father, irred_invalidated)) |
228 | continue; |
229 | target_loop = loop_outer (loop: from->loop_father); |
230 | if (loop_closed_ssa_invalidated) |
231 | { |
232 | basic_block *bbs = get_loop_body (from->loop_father); |
233 | for (unsigned i = 0; i < from->loop_father->num_nodes; ++i) |
234 | bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index); |
235 | free (ptr: bbs); |
236 | } |
237 | } |
238 | else |
239 | { |
240 | /* Ordinary basic block. */ |
241 | if (!fix_bb_placement (bb: from)) |
242 | continue; |
243 | target_loop = from->loop_father; |
244 | if (loop_closed_ssa_invalidated) |
245 | bitmap_set_bit (loop_closed_ssa_invalidated, from->index); |
246 | } |
247 | |
248 | FOR_EACH_EDGE (e, ei, from->succs) |
249 | { |
250 | if (e->flags & EDGE_IRREDUCIBLE_LOOP) |
251 | *irred_invalidated = true; |
252 | } |
253 | |
254 | /* Something has changed, insert predecessors into queue. */ |
255 | FOR_EACH_EDGE (e, ei, from->preds) |
256 | { |
257 | basic_block pred = e->src; |
258 | class loop *nca; |
259 | |
260 | if (e->flags & EDGE_IRREDUCIBLE_LOOP) |
261 | *irred_invalidated = true; |
262 | |
263 | if (bitmap_bit_p (map: in_queue, bitno: pred->index)) |
264 | continue; |
265 | |
266 | /* If it is subloop, then it either was not moved, or |
267 | the path up the loop tree from base_loop do not contain |
268 | it. */ |
269 | nca = find_common_loop (pred->loop_father, base_loop); |
270 | if (pred->loop_father != base_loop |
271 | && (nca == base_loop |
272 | || nca != pred->loop_father)) |
273 | pred = pred->loop_father->header; |
274 | else if (!flow_loop_nested_p (target_loop, pred->loop_father)) |
275 | { |
276 | /* If PRED is already higher in the loop hierarchy than the |
277 | TARGET_LOOP to that we moved FROM, the change of the position |
278 | of FROM does not affect the position of PRED, so there is no |
279 | point in processing it. */ |
280 | continue; |
281 | } |
282 | |
283 | if (bitmap_bit_p (map: in_queue, bitno: pred->index)) |
284 | continue; |
285 | |
286 | /* Schedule the basic block. */ |
287 | *qend = pred; |
288 | qend++; |
289 | if (qend == qtop) |
290 | qend = queue; |
291 | bitmap_set_bit (map: in_queue, bitno: pred->index); |
292 | } |
293 | } |
294 | free (ptr: queue); |
295 | } |
296 | |
297 | /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E |
298 | and update loop structures and dominators. Return true if we were able |
299 | to remove the path, false otherwise (and nothing is affected then). */ |
300 | bool |
301 | remove_path (edge e, bool *irred_invalidated, |
302 | bitmap loop_closed_ssa_invalidated) |
303 | { |
304 | edge ae; |
305 | basic_block *rem_bbs, *bord_bbs, from, bb; |
306 | vec<basic_block> dom_bbs; |
307 | int i, nrem, n_bord_bbs; |
308 | bool local_irred_invalidated = false; |
309 | edge_iterator ei; |
310 | class loop *l, *f; |
311 | |
312 | if (! irred_invalidated) |
313 | irred_invalidated = &local_irred_invalidated; |
314 | |
315 | if (!can_remove_branch_p (e)) |
316 | return false; |
317 | |
318 | /* Keep track of whether we need to update information about irreducible |
319 | regions. This is the case if the removed area is a part of the |
320 | irreducible region, or if the set of basic blocks that belong to a loop |
321 | that is inside an irreducible region is changed, or if such a loop is |
322 | removed. */ |
323 | if (e->flags & EDGE_IRREDUCIBLE_LOOP) |
324 | *irred_invalidated = true; |
325 | |
326 | /* We need to check whether basic blocks are dominated by the edge |
327 | e, but we only have basic block dominators. This is easy to |
328 | fix -- when e->dest has exactly one predecessor, this corresponds |
329 | to blocks dominated by e->dest, if not, split the edge. */ |
330 | if (!single_pred_p (bb: e->dest)) |
331 | e = single_pred_edge (bb: split_edge (e)); |
332 | |
333 | /* It may happen that by removing path we remove one or more loops |
334 | we belong to. In this case first unloop the loops, then proceed |
335 | normally. We may assume that e->dest is not a header of any loop, |
336 | as it now has exactly one predecessor. */ |
337 | for (l = e->src->loop_father; loop_outer (loop: l); l = f) |
338 | { |
339 | f = loop_outer (loop: l); |
340 | if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest)) |
341 | unloop (l, irred_invalidated, loop_closed_ssa_invalidated); |
342 | } |
343 | |
344 | /* Identify the path. */ |
345 | nrem = find_path (e, bbs: &rem_bbs); |
346 | |
347 | n_bord_bbs = 0; |
348 | bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
349 | auto_sbitmap seen (last_basic_block_for_fn (cfun)); |
350 | bitmap_clear (seen); |
351 | |
352 | /* Find "border" hexes -- i.e. those with predecessor in removed path. */ |
353 | for (i = 0; i < nrem; i++) |
354 | bitmap_set_bit (map: seen, bitno: rem_bbs[i]->index); |
355 | if (!*irred_invalidated) |
356 | FOR_EACH_EDGE (ae, ei, e->src->succs) |
357 | if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) |
358 | && !bitmap_bit_p (map: seen, bitno: ae->dest->index) |
359 | && ae->flags & EDGE_IRREDUCIBLE_LOOP) |
360 | { |
361 | *irred_invalidated = true; |
362 | break; |
363 | } |
364 | |
365 | for (i = 0; i < nrem; i++) |
366 | { |
367 | FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs) |
368 | if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) |
369 | && !bitmap_bit_p (map: seen, bitno: ae->dest->index)) |
370 | { |
371 | bitmap_set_bit (map: seen, bitno: ae->dest->index); |
372 | bord_bbs[n_bord_bbs++] = ae->dest; |
373 | |
374 | if (ae->flags & EDGE_IRREDUCIBLE_LOOP) |
375 | *irred_invalidated = true; |
376 | } |
377 | } |
378 | |
379 | /* Remove the path. */ |
380 | from = e->src; |
381 | remove_branch (e); |
382 | dom_bbs.create (nelems: 0); |
383 | |
384 | /* Cancel loops contained in the path. */ |
385 | for (i = 0; i < nrem; i++) |
386 | if (rem_bbs[i]->loop_father->header == rem_bbs[i]) |
387 | cancel_loop_tree (rem_bbs[i]->loop_father); |
388 | |
389 | remove_bbs (bbs: rem_bbs, nbbs: nrem); |
390 | free (ptr: rem_bbs); |
391 | |
392 | /* Find blocks whose dominators may be affected. */ |
393 | bitmap_clear (seen); |
394 | for (i = 0; i < n_bord_bbs; i++) |
395 | { |
396 | basic_block ldom; |
397 | |
398 | bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]); |
399 | if (bitmap_bit_p (map: seen, bitno: bb->index)) |
400 | continue; |
401 | bitmap_set_bit (map: seen, bitno: bb->index); |
402 | |
403 | for (ldom = first_dom_son (CDI_DOMINATORS, bb); |
404 | ldom; |
405 | ldom = next_dom_son (CDI_DOMINATORS, ldom)) |
406 | if (!dominated_by_p (CDI_DOMINATORS, from, ldom)) |
407 | dom_bbs.safe_push (obj: ldom); |
408 | } |
409 | |
410 | /* Recount dominators. */ |
411 | iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true); |
412 | dom_bbs.release (); |
413 | free (ptr: bord_bbs); |
414 | |
415 | /* Fix placements of basic blocks inside loops and the placement of |
416 | loops in the loop tree. */ |
417 | fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated); |
418 | fix_loop_placements (from->loop_father, irred_invalidated); |
419 | |
420 | if (local_irred_invalidated |
421 | && loops_state_satisfies_p (flags: LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) |
422 | mark_irreducible_loops (); |
423 | |
424 | return true; |
425 | } |
426 | |
427 | /* Creates place for a new LOOP in loops structure of FN. */ |
428 | |
429 | void |
430 | place_new_loop (struct function *fn, class loop *loop) |
431 | { |
432 | loop->num = number_of_loops (fn); |
433 | vec_safe_push (v&: loops_for_fn (fn)->larray, obj: loop); |
434 | } |
435 | |
436 | /* Given LOOP structure with filled header and latch, find the body of the |
437 | corresponding loop and add it to loops tree. Insert the LOOP as a son of |
438 | outer. */ |
439 | |
440 | void |
441 | add_loop (class loop *loop, class loop *outer) |
442 | { |
443 | basic_block *bbs; |
444 | int i, n; |
445 | class loop *subloop; |
446 | edge e; |
447 | edge_iterator ei; |
448 | |
449 | /* Add it to loop structure. */ |
450 | place_new_loop (cfun, loop); |
451 | flow_loop_tree_node_add (outer, loop); |
452 | |
453 | /* Find its nodes. */ |
454 | bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
455 | n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun)); |
456 | |
457 | for (i = 0; i < n; i++) |
458 | { |
459 | if (bbs[i]->loop_father == outer) |
460 | { |
461 | remove_bb_from_loops (bbs[i]); |
462 | add_bb_to_loop (bbs[i], loop); |
463 | continue; |
464 | } |
465 | |
466 | loop->num_nodes++; |
467 | |
468 | /* If we find a direct subloop of OUTER, move it to LOOP. */ |
469 | subloop = bbs[i]->loop_father; |
470 | if (loop_outer (loop: subloop) == outer |
471 | && subloop->header == bbs[i]) |
472 | { |
473 | flow_loop_tree_node_remove (subloop); |
474 | flow_loop_tree_node_add (loop, subloop); |
475 | } |
476 | } |
477 | |
478 | /* Update the information about loop exit edges. */ |
479 | for (i = 0; i < n; i++) |
480 | { |
481 | FOR_EACH_EDGE (e, ei, bbs[i]->succs) |
482 | { |
483 | rescan_loop_exit (e, false, false); |
484 | } |
485 | } |
486 | |
487 | free (ptr: bbs); |
488 | } |
489 | |
490 | /* Scale profile of loop by P. */ |
491 | |
492 | void |
493 | scale_loop_frequencies (class loop *loop, profile_probability p) |
494 | { |
495 | basic_block *bbs; |
496 | |
497 | bbs = get_loop_body (loop); |
498 | scale_bbs_frequencies (bbs, loop->num_nodes, p); |
499 | free (ptr: bbs); |
500 | } |
501 | |
502 | /* Scales the frequencies of all basic blocks in LOOP that are strictly |
503 | dominated by BB by NUM/DEN. */ |
504 | |
505 | void |
506 | scale_dominated_blocks_in_loop (class loop *loop, basic_block bb, |
507 | profile_count num, profile_count den) |
508 | { |
509 | basic_block son; |
510 | |
511 | if (!den.nonzero_p () && !(num == profile_count::zero ())) |
512 | return; |
513 | auto_vec <basic_block, 8> worklist; |
514 | worklist.safe_push (obj: bb); |
515 | |
516 | while (!worklist.is_empty ()) |
517 | for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ()); |
518 | son; |
519 | son = next_dom_son (CDI_DOMINATORS, son)) |
520 | { |
521 | if (!flow_bb_inside_loop_p (loop, son)) |
522 | continue; |
523 | son->count = son->count.apply_scale (num, den); |
524 | worklist.safe_push (obj: son); |
525 | } |
526 | } |
527 | |
528 | /* Return exit that suitable for update when loop iterations |
529 | changed. */ |
530 | |
531 | static edge |
532 | loop_exit_for_scaling (class loop *loop) |
533 | { |
534 | edge exit_edge = single_exit (loop); |
535 | if (!exit_edge) |
536 | { |
537 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
538 | exit_edge = single_likely_exit (loop, exits); |
539 | } |
540 | return exit_edge; |
541 | } |
542 | |
543 | /* Assume that loop's entry count and profile up to a given EXIT_EDGE is |
544 | consistent. Update exit probability so loop exists with PROFILE_COUNT |
545 | and rescale profile of basic blocks inside loop dominated by EXIT_EDGE->src. |
546 | |
547 | This is useful after number of iteraitons of loop has changed. |
548 | If EXIT_EDGE is NULL, the function will try to identify suitable exit. |
549 | If DESIRED_COUNT is NULL, loop entry count will be used. |
550 | In consistent profile usually loop exists as many times as it is entred. |
551 | |
552 | Return updated exit if successfull and NULL otherwise. */ |
553 | |
554 | edge |
555 | update_loop_exit_probability_scale_dom_bbs (class loop *loop, |
556 | edge exit_edge, |
557 | profile_count desired_count) |
558 | { |
559 | if (!exit_edge) |
560 | exit_edge = loop_exit_for_scaling (loop); |
561 | if (!exit_edge) |
562 | { |
563 | if (dump_file && (dump_flags & TDF_DETAILS)) |
564 | { |
565 | fprintf (stream: dump_file, format: ";; Not updating exit probability of loop %i;" |
566 | " it has no single exit\n" , |
567 | loop->num); |
568 | } |
569 | return NULL; |
570 | } |
571 | /* If exit is inside another loop, adjusting its probability will also |
572 | adjust number of the inner loop iterations. Just do noting for now. */ |
573 | if (!just_once_each_iteration_p (loop, exit_edge->src)) |
574 | { |
575 | if (dump_file && (dump_flags & TDF_DETAILS)) |
576 | { |
577 | fprintf (stream: dump_file, format: ";; Not updating exit probability of loop %i;" |
578 | " exit is inside inner loop\n" , |
579 | loop->num); |
580 | } |
581 | return NULL; |
582 | } |
583 | /* Normal loops exit as many times as they are entered. */ |
584 | if (!desired_count.initialized_p ()) |
585 | desired_count = loop_count_in (loop); |
586 | /* See if everything is already perfect. */ |
587 | if (desired_count == exit_edge->count ()) |
588 | return exit_edge; |
589 | profile_count old_exit_count = exit_edge->count (); |
590 | /* See if update is possible. |
591 | Avoid turning probability to 0 or 1 just trying to reach impossible |
592 | value. |
593 | |
594 | Natural profile update after some loop will happily scale header count to |
595 | be less than count of entering the loop. This is bad idea and they should |
596 | special case maybe_flat_loop_profile. |
597 | |
598 | It may also happen that the source basic block of the exit edge is |
599 | inside in-loop condition: |
600 | |
601 | +-> header |
602 | | | |
603 | | B1 |
604 | | / \ |
605 | | | B2--exit_edge--> |
606 | | \ / |
607 | | B3 |
608 | +__/ |
609 | |
610 | If B2 count is smaller than desired exit edge count |
611 | the profile was inconsistent with the newly discovered upper bound. |
612 | Probablity of edge B1->B2 is too low. We do not attempt to fix |
613 | that (as it is hard in general). */ |
614 | if (desired_count > exit_edge->src->count |
615 | && exit_edge->src->count.differs_from_p (other: desired_count)) |
616 | { |
617 | if (dump_file) |
618 | { |
619 | fprintf (stream: dump_file, format: ";; Source bb of loop %i has count " , |
620 | loop->num); |
621 | exit_edge->src->count.dump (f: dump_file, cfun); |
622 | fprintf (stream: dump_file, |
623 | format: " which is smaller then desired count of exitting loop " ); |
624 | desired_count.dump (f: dump_file, cfun); |
625 | fprintf (stream: dump_file, format: ". Profile update is impossible.\n" ); |
626 | } |
627 | /* Drop quality of probability since we know it likely |
628 | bad. */ |
629 | exit_edge->probability = exit_edge->probability.guessed (); |
630 | return NULL; |
631 | } |
632 | if (!exit_edge->src->count.nonzero_p ()) |
633 | { |
634 | if (dump_file) |
635 | { |
636 | fprintf (stream: dump_file, format: ";; Not updating exit edge probability" |
637 | " in loop %i since profile is zero " , |
638 | loop->num); |
639 | } |
640 | return NULL; |
641 | } |
642 | set_edge_probability_and_rescale_others |
643 | (exit_edge, desired_count.probability_in (overall: exit_edge->src->count)); |
644 | /* Rescale the remaining edge probabilities and see if there is only |
645 | one. */ |
646 | edge other_edge = NULL; |
647 | bool found = false; |
648 | edge e; |
649 | edge_iterator ei; |
650 | FOR_EACH_EDGE (e, ei, exit_edge->src->succs) |
651 | if (!(e->flags & EDGE_FAKE) |
652 | && !loop_exit_edge_p (loop, e)) |
653 | { |
654 | if (found) |
655 | { |
656 | other_edge = NULL; |
657 | break; |
658 | } |
659 | other_edge = e; |
660 | found = true; |
661 | } |
662 | /* If there is only loop latch after other edge, |
663 | update its profile. This is tiny bit more precise |
664 | than scaling. */ |
665 | if (other_edge && other_edge->dest == loop->latch) |
666 | { |
667 | if (single_pred_p (bb: loop->latch)) |
668 | loop->latch->count = loop->latch->count |
669 | + old_exit_count - exit_edge->count (); |
670 | } |
671 | else |
672 | /* If there are multple blocks, just scale. */ |
673 | scale_dominated_blocks_in_loop (loop, bb: exit_edge->src, |
674 | num: exit_edge->src->count - exit_edge->count (), |
675 | den: exit_edge->src->count - old_exit_count); |
676 | return exit_edge; |
677 | } |
678 | |
679 | /* Scale profile in LOOP by P. |
680 | If ITERATION_BOUND is not -1, scale even further if loop is predicted |
681 | to iterate too many times. |
682 | Before caling this function, preheader block profile should be already |
683 | scaled to final count. This is necessary because loop iterations are |
684 | determined by comparing header edge count to latch ege count and thus |
685 | they need to be scaled synchronously. */ |
686 | |
687 | void |
688 | scale_loop_profile (class loop *loop, profile_probability p, |
689 | gcov_type iteration_bound) |
690 | { |
691 | if (!(p == profile_probability::always ())) |
692 | { |
693 | if (dump_file && (dump_flags & TDF_DETAILS)) |
694 | { |
695 | fprintf (stream: dump_file, format: ";; Scaling loop %i with scale " , |
696 | loop->num); |
697 | p.dump (f: dump_file); |
698 | fprintf (stream: dump_file, format: "\n" ); |
699 | } |
700 | |
701 | /* Scale the probabilities. */ |
702 | scale_loop_frequencies (loop, p); |
703 | } |
704 | |
705 | if (iteration_bound == -1) |
706 | return; |
707 | |
708 | sreal iterations; |
709 | if (!expected_loop_iterations_by_profile (loop, ret: &iterations)) |
710 | return; |
711 | |
712 | if (dump_file && (dump_flags & TDF_DETAILS)) |
713 | { |
714 | fprintf (stream: dump_file, |
715 | format: ";; Guessed iterations of loop %i is %f. New upper bound %i.\n" , |
716 | loop->num, |
717 | iterations.to_double (), |
718 | (int)iteration_bound); |
719 | } |
720 | |
721 | /* See if loop is predicted to iterate too many times. */ |
722 | if (iterations <= (sreal)iteration_bound) |
723 | return; |
724 | |
725 | profile_count count_in = loop_count_in (loop); |
726 | |
727 | /* Now scale the loop body so header count is |
728 | count_in * (iteration_bound + 1) */ |
729 | profile_probability scale_prob |
730 | = (count_in * (iteration_bound + 1)).probability_in (overall: loop->header->count); |
731 | if (dump_file && (dump_flags & TDF_DETAILS)) |
732 | { |
733 | fprintf (stream: dump_file, format: ";; Scaling loop %i with scale " , |
734 | loop->num); |
735 | scale_prob.dump (f: dump_file); |
736 | fprintf (stream: dump_file, format: " to reach upper bound %i\n" , |
737 | (int)iteration_bound); |
738 | } |
739 | /* Finally attempt to fix exit edge probability. */ |
740 | edge exit_edge = loop_exit_for_scaling (loop); |
741 | |
742 | /* In a consistent profile unadjusted_exit_count should be same as count_in, |
743 | however to preserve as much of the original info, avoid recomputing |
744 | it. */ |
745 | profile_count unadjusted_exit_count = profile_count::uninitialized (); |
746 | if (exit_edge) |
747 | unadjusted_exit_count = exit_edge->count (); |
748 | scale_loop_frequencies (loop, p: scale_prob); |
749 | update_loop_exit_probability_scale_dom_bbs (loop, exit_edge, |
750 | desired_count: unadjusted_exit_count); |
751 | } |
752 | |
753 | /* Recompute dominance information for basic blocks outside LOOP. */ |
754 | |
755 | static void |
756 | update_dominators_in_loop (class loop *loop) |
757 | { |
758 | vec<basic_block> dom_bbs = vNULL; |
759 | basic_block *body; |
760 | unsigned i; |
761 | |
762 | auto_sbitmap seen (last_basic_block_for_fn (cfun)); |
763 | bitmap_clear (seen); |
764 | body = get_loop_body (loop); |
765 | |
766 | for (i = 0; i < loop->num_nodes; i++) |
767 | bitmap_set_bit (map: seen, bitno: body[i]->index); |
768 | |
769 | for (i = 0; i < loop->num_nodes; i++) |
770 | { |
771 | basic_block ldom; |
772 | |
773 | for (ldom = first_dom_son (CDI_DOMINATORS, body[i]); |
774 | ldom; |
775 | ldom = next_dom_son (CDI_DOMINATORS, ldom)) |
776 | if (!bitmap_bit_p (map: seen, bitno: ldom->index)) |
777 | { |
778 | bitmap_set_bit (map: seen, bitno: ldom->index); |
779 | dom_bbs.safe_push (obj: ldom); |
780 | } |
781 | } |
782 | |
783 | iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); |
784 | free (ptr: body); |
785 | dom_bbs.release (); |
786 | } |
787 | |
788 | /* Creates an if region as shown above. CONDITION is used to create |
789 | the test for the if. |
790 | |
791 | | |
792 | | ------------- ------------- |
793 | | | pred_bb | | pred_bb | |
794 | | ------------- ------------- |
795 | | | | |
796 | | | | ENTRY_EDGE |
797 | | | ENTRY_EDGE V |
798 | | | ====> ------------- |
799 | | | | cond_bb | |
800 | | | | CONDITION | |
801 | | | ------------- |
802 | | V / \ |
803 | | ------------- e_false / \ e_true |
804 | | | succ_bb | V V |
805 | | ------------- ----------- ----------- |
806 | | | false_bb | | true_bb | |
807 | | ----------- ----------- |
808 | | \ / |
809 | | \ / |
810 | | V V |
811 | | ------------- |
812 | | | join_bb | |
813 | | ------------- |
814 | | | exit_edge (result) |
815 | | V |
816 | | ----------- |
817 | | | succ_bb | |
818 | | ----------- |
819 | | |
820 | */ |
821 | |
822 | edge |
823 | create_empty_if_region_on_edge (edge entry_edge, tree condition) |
824 | { |
825 | |
826 | basic_block cond_bb, true_bb, false_bb, join_bb; |
827 | edge e_true, e_false, exit_edge; |
828 | gcond *cond_stmt; |
829 | tree simple_cond; |
830 | gimple_stmt_iterator gsi; |
831 | |
832 | cond_bb = split_edge (entry_edge); |
833 | |
834 | /* Insert condition in cond_bb. */ |
835 | gsi = gsi_last_bb (bb: cond_bb); |
836 | simple_cond = |
837 | force_gimple_operand_gsi (&gsi, condition, true, NULL, |
838 | false, GSI_NEW_STMT); |
839 | cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE); |
840 | gsi = gsi_last_bb (bb: cond_bb); |
841 | gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); |
842 | |
843 | join_bb = split_edge (single_succ_edge (bb: cond_bb)); |
844 | |
845 | e_true = single_succ_edge (bb: cond_bb); |
846 | true_bb = split_edge (e_true); |
847 | |
848 | e_false = make_edge (cond_bb, join_bb, 0); |
849 | false_bb = split_edge (e_false); |
850 | |
851 | e_true->flags &= ~EDGE_FALLTHRU; |
852 | e_true->flags |= EDGE_TRUE_VALUE; |
853 | e_false->flags &= ~EDGE_FALLTHRU; |
854 | e_false->flags |= EDGE_FALSE_VALUE; |
855 | |
856 | set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src); |
857 | set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb); |
858 | set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb); |
859 | set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); |
860 | |
861 | exit_edge = single_succ_edge (bb: join_bb); |
862 | |
863 | if (single_pred_p (bb: exit_edge->dest)) |
864 | set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb); |
865 | |
866 | return exit_edge; |
867 | } |
868 | |
869 | /* create_empty_loop_on_edge |
870 | | |
871 | | - pred_bb - ------ pred_bb ------ |
872 | | | | | iv0 = initial_value | |
873 | | -----|----- ---------|----------- |
874 | | | ______ | entry_edge |
875 | | | entry_edge / | | |
876 | | | ====> | -V---V- loop_header ------------- |
877 | | V | | iv_before = phi (iv0, iv_after) | |
878 | | - succ_bb - | ---|----------------------------- |
879 | | | | | | |
880 | | ----------- | ---V--- loop_body --------------- |
881 | | | | iv_after = iv_before + stride | |
882 | | | | if (iv_before < upper_bound) | |
883 | | | ---|--------------\-------------- |
884 | | | | \ exit_e |
885 | | | V \ |
886 | | | - loop_latch - V- succ_bb - |
887 | | | | | | | |
888 | | | /------------- ----------- |
889 | | \ ___ / |
890 | |
891 | Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME |
892 | that is used before the increment of IV. IV_BEFORE should be used for |
893 | adding code to the body that uses the IV. OUTER is the outer loop in |
894 | which the new loop should be inserted. |
895 | |
896 | Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and |
897 | inserted on the loop entry edge. This implies that this function |
898 | should be used only when the UPPER_BOUND expression is a loop |
899 | invariant. */ |
900 | |
901 | class loop * |
902 | create_empty_loop_on_edge (edge entry_edge, |
903 | tree initial_value, |
904 | tree stride, tree upper_bound, |
905 | tree iv, |
906 | tree *iv_before, |
907 | tree *iv_after, |
908 | class loop *outer) |
909 | { |
910 | basic_block , loop_latch, succ_bb, pred_bb; |
911 | class loop *loop; |
912 | gimple_stmt_iterator gsi; |
913 | gimple_seq stmts; |
914 | gcond *cond_expr; |
915 | tree exit_test; |
916 | edge exit_e; |
917 | |
918 | gcc_assert (entry_edge && initial_value && stride && upper_bound && iv); |
919 | |
920 | /* Create header, latch and wire up the loop. */ |
921 | pred_bb = entry_edge->src; |
922 | loop_header = split_edge (entry_edge); |
923 | loop_latch = split_edge (single_succ_edge (bb: loop_header)); |
924 | succ_bb = single_succ (bb: loop_latch); |
925 | make_edge (loop_header, succ_bb, 0); |
926 | redirect_edge_succ_nodup (single_succ_edge (bb: loop_latch), loop_header); |
927 | |
928 | /* Set immediate dominator information. */ |
929 | set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb); |
930 | set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header); |
931 | set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header); |
932 | |
933 | /* Initialize a loop structure and put it in a loop hierarchy. */ |
934 | loop = alloc_loop (); |
935 | loop->header = loop_header; |
936 | loop->latch = loop_latch; |
937 | add_loop (loop, outer); |
938 | |
939 | /* TODO: Fix counts. */ |
940 | scale_loop_frequencies (loop, p: profile_probability::even ()); |
941 | |
942 | /* Update dominators. */ |
943 | update_dominators_in_loop (loop); |
944 | |
945 | /* Modify edge flags. */ |
946 | exit_e = single_exit (loop); |
947 | exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE; |
948 | single_pred_edge (bb: loop_latch)->flags = EDGE_TRUE_VALUE; |
949 | |
950 | /* Construct IV code in loop. */ |
951 | initial_value = force_gimple_operand (initial_value, &stmts, true, iv); |
952 | if (stmts) |
953 | { |
954 | gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts); |
955 | gsi_commit_edge_inserts (); |
956 | } |
957 | |
958 | upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL); |
959 | if (stmts) |
960 | { |
961 | gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts); |
962 | gsi_commit_edge_inserts (); |
963 | } |
964 | |
965 | gsi = gsi_last_bb (bb: loop_header); |
966 | create_iv (initial_value, PLUS_EXPR, stride, iv, loop, &gsi, false, |
967 | iv_before, iv_after); |
968 | |
969 | /* Insert loop exit condition. */ |
970 | cond_expr = gimple_build_cond |
971 | (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE); |
972 | |
973 | exit_test = gimple_cond_lhs (gs: cond_expr); |
974 | exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL, |
975 | false, GSI_NEW_STMT); |
976 | gimple_cond_set_lhs (gs: cond_expr, lhs: exit_test); |
977 | gsi = gsi_last_bb (bb: exit_e->src); |
978 | gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT); |
979 | |
980 | split_block_after_labels (loop_header); |
981 | |
982 | return loop; |
983 | } |
984 | |
985 | /* Remove the latch edge of a LOOP and update loops to indicate that |
986 | the LOOP was removed. After this function, original loop latch will |
987 | have no successor, which caller is expected to fix somehow. |
988 | |
989 | If this may cause the information about irreducible regions to become |
990 | invalid, IRRED_INVALIDATED is set to true. |
991 | |
992 | LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store |
993 | basic blocks that had non-trivial update on their loop_father.*/ |
994 | |
995 | void |
996 | unloop (class loop *loop, bool *irred_invalidated, |
997 | bitmap loop_closed_ssa_invalidated) |
998 | { |
999 | basic_block *body; |
1000 | class loop *ploop; |
1001 | unsigned i, n; |
1002 | basic_block latch = loop->latch; |
1003 | bool dummy = false; |
1004 | |
1005 | if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP) |
1006 | *irred_invalidated = true; |
1007 | |
1008 | /* This is relatively straightforward. The dominators are unchanged, as |
1009 | loop header dominates loop latch, so the only thing we have to care of |
1010 | is the placement of loops and basic blocks inside the loop tree. We |
1011 | move them all to the loop->outer, and then let fix_bb_placements do |
1012 | its work. */ |
1013 | |
1014 | body = get_loop_body (loop); |
1015 | n = loop->num_nodes; |
1016 | for (i = 0; i < n; i++) |
1017 | if (body[i]->loop_father == loop) |
1018 | { |
1019 | remove_bb_from_loops (body[i]); |
1020 | add_bb_to_loop (body[i], loop_outer (loop)); |
1021 | } |
1022 | free (ptr: body); |
1023 | |
1024 | while (loop->inner) |
1025 | { |
1026 | ploop = loop->inner; |
1027 | flow_loop_tree_node_remove (ploop); |
1028 | flow_loop_tree_node_add (loop_outer (loop), ploop); |
1029 | } |
1030 | |
1031 | /* Remove the loop and free its data. */ |
1032 | delete_loop (loop); |
1033 | |
1034 | remove_edge (single_succ_edge (bb: latch)); |
1035 | |
1036 | /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if |
1037 | there is an irreducible region inside the cancelled loop, the flags will |
1038 | be still correct. */ |
1039 | fix_bb_placements (from: latch, irred_invalidated: &dummy, loop_closed_ssa_invalidated); |
1040 | } |
1041 | |
1042 | /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that |
1043 | condition stated in description of fix_loop_placement holds for them. |
1044 | It is used in case when we removed some edges coming out of LOOP, which |
1045 | may cause the right placement of LOOP inside loop tree to change. |
1046 | |
1047 | IRRED_INVALIDATED is set to true if a change in the loop structures might |
1048 | invalidate the information about irreducible regions. */ |
1049 | |
1050 | static void |
1051 | fix_loop_placements (class loop *loop, bool *irred_invalidated) |
1052 | { |
1053 | class loop *outer; |
1054 | |
1055 | while (loop_outer (loop)) |
1056 | { |
1057 | outer = loop_outer (loop); |
1058 | if (!fix_loop_placement (loop, irred_invalidated)) |
1059 | break; |
1060 | |
1061 | /* Changing the placement of a loop in the loop tree may alter the |
1062 | validity of condition 2) of the description of fix_bb_placement |
1063 | for its preheader, because the successor is the header and belongs |
1064 | to the loop. So call fix_bb_placements to fix up the placement |
1065 | of the preheader and (possibly) of its predecessors. */ |
1066 | fix_bb_placements (from: loop_preheader_edge (loop)->src, |
1067 | irred_invalidated, NULL); |
1068 | loop = outer; |
1069 | } |
1070 | } |
1071 | |
1072 | /* Duplicate loop bounds and other information we store about |
1073 | the loop into its duplicate. */ |
1074 | |
1075 | void |
1076 | copy_loop_info (class loop *loop, class loop *target) |
1077 | { |
1078 | gcc_checking_assert (!target->any_upper_bound && !target->any_estimate); |
1079 | target->any_upper_bound = loop->any_upper_bound; |
1080 | target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound; |
1081 | target->any_likely_upper_bound = loop->any_likely_upper_bound; |
1082 | target->nb_iterations_likely_upper_bound |
1083 | = loop->nb_iterations_likely_upper_bound; |
1084 | target->any_estimate = loop->any_estimate; |
1085 | target->nb_iterations_estimate = loop->nb_iterations_estimate; |
1086 | target->estimate_state = loop->estimate_state; |
1087 | target->safelen = loop->safelen; |
1088 | target->simdlen = loop->simdlen; |
1089 | target->constraints = loop->constraints; |
1090 | target->can_be_parallel = loop->can_be_parallel; |
1091 | target->warned_aggressive_loop_optimizations |
1092 | |= loop->warned_aggressive_loop_optimizations; |
1093 | target->dont_vectorize = loop->dont_vectorize; |
1094 | target->force_vectorize = loop->force_vectorize; |
1095 | target->in_oacc_kernels_region = loop->in_oacc_kernels_region; |
1096 | target->finite_p = loop->finite_p; |
1097 | target->unroll = loop->unroll; |
1098 | target->owned_clique = loop->owned_clique; |
1099 | } |
1100 | |
1101 | /* Copies copy of LOOP as subloop of TARGET loop, placing newly |
1102 | created loop into loops structure. If AFTER is non-null |
1103 | the new loop is added at AFTER->next, otherwise in front of TARGETs |
1104 | sibling list. */ |
1105 | class loop * |
1106 | duplicate_loop (class loop *loop, class loop *target, class loop *after) |
1107 | { |
1108 | class loop *cloop; |
1109 | cloop = alloc_loop (); |
1110 | place_new_loop (cfun, loop: cloop); |
1111 | |
1112 | copy_loop_info (loop, target: cloop); |
1113 | |
1114 | /* Mark the new loop as copy of LOOP. */ |
1115 | set_loop_copy (loop, cloop); |
1116 | |
1117 | /* Add it to target. */ |
1118 | flow_loop_tree_node_add (target, cloop, after); |
1119 | |
1120 | return cloop; |
1121 | } |
1122 | |
1123 | /* Copies structure of subloops of LOOP into TARGET loop, placing |
1124 | newly created loops into loop tree at the end of TARGETs sibling |
1125 | list in the original order. */ |
1126 | void |
1127 | duplicate_subloops (class loop *loop, class loop *target) |
1128 | { |
1129 | class loop *aloop, *cloop, *tail; |
1130 | |
1131 | for (tail = target->inner; tail && tail->next; tail = tail->next) |
1132 | ; |
1133 | for (aloop = loop->inner; aloop; aloop = aloop->next) |
1134 | { |
1135 | cloop = duplicate_loop (loop: aloop, target, after: tail); |
1136 | tail = cloop; |
1137 | gcc_assert(!tail->next); |
1138 | duplicate_subloops (loop: aloop, target: cloop); |
1139 | } |
1140 | } |
1141 | |
1142 | /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, |
1143 | into TARGET loop, placing newly created loops into loop tree adding |
1144 | them to TARGETs sibling list at the end in order. */ |
1145 | static void |
1146 | copy_loops_to (class loop **copied_loops, int n, class loop *target) |
1147 | { |
1148 | class loop *aloop, *tail; |
1149 | int i; |
1150 | |
1151 | for (tail = target->inner; tail && tail->next; tail = tail->next) |
1152 | ; |
1153 | for (i = 0; i < n; i++) |
1154 | { |
1155 | aloop = duplicate_loop (loop: copied_loops[i], target, after: tail); |
1156 | tail = aloop; |
1157 | gcc_assert(!tail->next); |
1158 | duplicate_subloops (loop: copied_loops[i], target: aloop); |
1159 | } |
1160 | } |
1161 | |
1162 | /* Redirects edge E to basic block DEST. */ |
1163 | static void |
1164 | loop_redirect_edge (edge e, basic_block dest) |
1165 | { |
1166 | if (e->dest == dest) |
1167 | return; |
1168 | |
1169 | redirect_edge_and_branch_force (e, dest); |
1170 | } |
1171 | |
1172 | /* Check whether LOOP's body can be duplicated. */ |
1173 | bool |
1174 | can_duplicate_loop_p (const class loop *loop) |
1175 | { |
1176 | int ret; |
1177 | basic_block *bbs = get_loop_body (loop); |
1178 | |
1179 | ret = can_copy_bbs_p (bbs, loop->num_nodes); |
1180 | free (ptr: bbs); |
1181 | |
1182 | return ret; |
1183 | } |
1184 | |
1185 | /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating |
1186 | loop structure and dominators (order of inner subloops is retained). |
1187 | E's destination must be LOOP header for this to work, i.e. it must be entry |
1188 | or latch edge of this loop; these are unique, as the loops must have |
1189 | preheaders for this function to work correctly (in case E is latch, the |
1190 | function unrolls the loop, if E is entry edge, it peels the loop). Store |
1191 | edges created by copying ORIG edge from copies corresponding to set bits in |
1192 | WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies |
1193 | are numbered in order given by control flow through them) into TO_REMOVE |
1194 | array. Returns false if duplication is |
1195 | impossible. */ |
1196 | |
1197 | bool |
1198 | duplicate_loop_body_to_header_edge (class loop *loop, edge e, |
1199 | unsigned int ndupl, sbitmap wont_exit, |
1200 | edge orig, vec<edge> *to_remove, int flags) |
1201 | { |
1202 | class loop *target, *aloop; |
1203 | class loop **orig_loops; |
1204 | unsigned n_orig_loops; |
1205 | basic_block = loop->header, latch = loop->latch; |
1206 | basic_block *new_bbs, *bbs, *first_active; |
1207 | basic_block new_bb, bb, first_active_latch = NULL; |
1208 | edge ae, latch_edge; |
1209 | edge spec_edges[2], new_spec_edges[2]; |
1210 | const int SE_LATCH = 0; |
1211 | const int SE_ORIG = 1; |
1212 | unsigned i, j, n; |
1213 | int is_latch = (latch == e->src); |
1214 | profile_probability *scale_step = NULL; |
1215 | profile_probability scale_main = profile_probability::always (); |
1216 | profile_probability scale_act = profile_probability::always (); |
1217 | profile_count after_exit_num = profile_count::zero (), |
1218 | after_exit_den = profile_count::zero (); |
1219 | bool scale_after_exit = false; |
1220 | int add_irreducible_flag; |
1221 | basic_block place_after; |
1222 | bitmap bbs_to_scale = NULL; |
1223 | bitmap_iterator bi; |
1224 | |
1225 | gcc_assert (e->dest == loop->header); |
1226 | gcc_assert (ndupl > 0); |
1227 | |
1228 | if (orig) |
1229 | { |
1230 | /* Orig must be edge out of the loop. */ |
1231 | gcc_assert (flow_bb_inside_loop_p (loop, orig->src)); |
1232 | gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest)); |
1233 | } |
1234 | |
1235 | n = loop->num_nodes; |
1236 | bbs = get_loop_body_in_dom_order (loop); |
1237 | gcc_assert (bbs[0] == loop->header); |
1238 | gcc_assert (bbs[n - 1] == loop->latch); |
1239 | |
1240 | /* Check whether duplication is possible. */ |
1241 | if (!can_copy_bbs_p (bbs, loop->num_nodes)) |
1242 | { |
1243 | free (ptr: bbs); |
1244 | return false; |
1245 | } |
1246 | new_bbs = XNEWVEC (basic_block, loop->num_nodes); |
1247 | |
1248 | /* In case we are doing loop peeling and the loop is in the middle of |
1249 | irreducible region, the peeled copies will be inside it too. */ |
1250 | add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP; |
1251 | gcc_assert (!is_latch || !add_irreducible_flag); |
1252 | |
1253 | /* Find edge from latch. */ |
1254 | latch_edge = loop_latch_edge (loop); |
1255 | |
1256 | if (flags & DLTHE_FLAG_UPDATE_FREQ) |
1257 | { |
1258 | /* Calculate coefficients by that we have to scale counts |
1259 | of duplicated loop bodies. */ |
1260 | profile_count count_in = header->count; |
1261 | profile_count count_le = latch_edge->count (); |
1262 | profile_count count_out_orig = orig ? orig->count () : count_in - count_le; |
1263 | profile_probability prob_pass_thru = count_le.probability_in (overall: count_in); |
1264 | profile_count new_count_le = count_le + count_out_orig; |
1265 | |
1266 | if (orig && orig->probability.initialized_p () |
1267 | && !(orig->probability == profile_probability::always ())) |
1268 | { |
1269 | /* The blocks that are dominated by a removed exit edge ORIG have |
1270 | frequencies scaled by this. */ |
1271 | if (orig->count ().initialized_p ()) |
1272 | { |
1273 | after_exit_num = orig->src->count; |
1274 | after_exit_den = after_exit_num - orig->count (); |
1275 | scale_after_exit = true; |
1276 | } |
1277 | bbs_to_scale = BITMAP_ALLOC (NULL); |
1278 | for (i = 0; i < n; i++) |
1279 | { |
1280 | if (bbs[i] != orig->src |
1281 | && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) |
1282 | bitmap_set_bit (bbs_to_scale, i); |
1283 | } |
1284 | /* Since we will scale up all basic blocks dominated by orig, exits |
1285 | will become more likely; compensate for that. */ |
1286 | if (after_exit_den.nonzero_p ()) |
1287 | { |
1288 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
1289 | for (edge ex : exits) |
1290 | if (ex != orig |
1291 | && dominated_by_p (CDI_DOMINATORS, ex->src, orig->src)) |
1292 | new_count_le -= ex->count ().apply_scale (num: after_exit_num |
1293 | - after_exit_den, |
1294 | den: after_exit_den); |
1295 | } |
1296 | } |
1297 | profile_probability prob_pass_wont_exit = |
1298 | new_count_le.probability_in (overall: count_in); |
1299 | /* If profile count is 0, the probability will be uninitialized. |
1300 | We can set probability to any initialized value to avoid |
1301 | precision loss. If profile is sane, all counts will be 0 anyway. */ |
1302 | if (!count_in.nonzero_p ()) |
1303 | { |
1304 | prob_pass_thru |
1305 | = profile_probability::always ().apply_scale (num: 1, den: 2); |
1306 | prob_pass_wont_exit |
1307 | = profile_probability::always ().apply_scale (num: 1, den: 2); |
1308 | } |
1309 | |
1310 | scale_step = XNEWVEC (profile_probability, ndupl); |
1311 | |
1312 | for (i = 1; i <= ndupl; i++) |
1313 | scale_step[i - 1] = bitmap_bit_p (map: wont_exit, bitno: i) |
1314 | ? prob_pass_wont_exit |
1315 | : prob_pass_thru; |
1316 | |
1317 | /* Complete peeling is special as the probability of exit in last |
1318 | copy becomes 1. */ |
1319 | if (!count_in.nonzero_p ()) |
1320 | ; |
1321 | else if (flags & DLTHE_FLAG_COMPLETTE_PEEL) |
1322 | { |
1323 | profile_count wanted_count = e->count (); |
1324 | |
1325 | gcc_assert (!is_latch); |
1326 | /* First copy has count of incoming edge. Each subsequent |
1327 | count should be reduced by prob_pass_wont_exit. Caller |
1328 | should've managed the flags so all except for original loop |
1329 | has won't exist set. */ |
1330 | scale_act = wanted_count.probability_in (overall: count_in); |
1331 | |
1332 | /* Now simulate the duplication adjustments and compute header |
1333 | frequency of the last copy. */ |
1334 | for (i = 0; i < ndupl; i++) |
1335 | wanted_count = wanted_count.apply_probability (prob: scale_step [i]); |
1336 | scale_main = wanted_count.probability_in (overall: count_in); |
1337 | } |
1338 | /* Here we insert loop bodies inside the loop itself (for loop unrolling). |
1339 | First iteration will be original loop followed by duplicated bodies. |
1340 | It is necessary to scale down the original so we get right overall |
1341 | number of iterations. */ |
1342 | else if (is_latch) |
1343 | { |
1344 | profile_probability prob_pass_main = bitmap_bit_p (map: wont_exit, bitno: 0) |
1345 | ? prob_pass_wont_exit |
1346 | : prob_pass_thru; |
1347 | if (!(flags & DLTHE_FLAG_FLAT_PROFILE)) |
1348 | { |
1349 | profile_probability p = prob_pass_main; |
1350 | profile_count scale_main_den = count_in; |
1351 | for (i = 0; i < ndupl; i++) |
1352 | { |
1353 | scale_main_den += count_in.apply_probability (prob: p); |
1354 | p = p * scale_step[i]; |
1355 | } |
1356 | /* If original loop is executed COUNT_IN times, the unrolled |
1357 | loop will account SCALE_MAIN_DEN times. */ |
1358 | scale_main = count_in.probability_in (overall: scale_main_den); |
1359 | } |
1360 | else |
1361 | scale_main = profile_probability::always (); |
1362 | scale_act = scale_main * prob_pass_main; |
1363 | } |
1364 | else |
1365 | { |
1366 | profile_count = e->count (); |
1367 | for (i = 0; i < ndupl; i++) |
1368 | scale_main = scale_main * scale_step[i]; |
1369 | scale_act = preheader_count.probability_in (overall: count_in); |
1370 | } |
1371 | } |
1372 | |
1373 | /* Loop the new bbs will belong to. */ |
1374 | target = e->src->loop_father; |
1375 | |
1376 | /* Original loops. */ |
1377 | n_orig_loops = 0; |
1378 | for (aloop = loop->inner; aloop; aloop = aloop->next) |
1379 | n_orig_loops++; |
1380 | orig_loops = XNEWVEC (class loop *, n_orig_loops); |
1381 | for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++) |
1382 | orig_loops[i] = aloop; |
1383 | |
1384 | set_loop_copy (loop, target); |
1385 | |
1386 | first_active = XNEWVEC (basic_block, n); |
1387 | if (is_latch) |
1388 | { |
1389 | memcpy (dest: first_active, src: bbs, n: n * sizeof (basic_block)); |
1390 | first_active_latch = latch; |
1391 | } |
1392 | |
1393 | spec_edges[SE_ORIG] = orig; |
1394 | spec_edges[SE_LATCH] = latch_edge; |
1395 | |
1396 | place_after = e->src; |
1397 | for (j = 0; j < ndupl; j++) |
1398 | { |
1399 | /* Copy loops. */ |
1400 | copy_loops_to (copied_loops: orig_loops, n: n_orig_loops, target); |
1401 | |
1402 | /* Copy bbs. */ |
1403 | copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop, |
1404 | place_after, true); |
1405 | place_after = new_spec_edges[SE_LATCH]->src; |
1406 | |
1407 | if (flags & DLTHE_RECORD_COPY_NUMBER) |
1408 | for (i = 0; i < n; i++) |
1409 | { |
1410 | gcc_assert (!new_bbs[i]->aux); |
1411 | new_bbs[i]->aux = (void *)(size_t)(j + 1); |
1412 | } |
1413 | |
1414 | /* Note whether the blocks and edges belong to an irreducible loop. */ |
1415 | if (add_irreducible_flag) |
1416 | { |
1417 | for (i = 0; i < n; i++) |
1418 | new_bbs[i]->flags |= BB_DUPLICATED; |
1419 | for (i = 0; i < n; i++) |
1420 | { |
1421 | edge_iterator ei; |
1422 | new_bb = new_bbs[i]; |
1423 | if (new_bb->loop_father == target) |
1424 | new_bb->flags |= BB_IRREDUCIBLE_LOOP; |
1425 | |
1426 | FOR_EACH_EDGE (ae, ei, new_bb->succs) |
1427 | if ((ae->dest->flags & BB_DUPLICATED) |
1428 | && (ae->src->loop_father == target |
1429 | || ae->dest->loop_father == target)) |
1430 | ae->flags |= EDGE_IRREDUCIBLE_LOOP; |
1431 | } |
1432 | for (i = 0; i < n; i++) |
1433 | new_bbs[i]->flags &= ~BB_DUPLICATED; |
1434 | } |
1435 | |
1436 | /* Redirect the special edges. */ |
1437 | if (is_latch) |
1438 | { |
1439 | redirect_edge_and_branch_force (latch_edge, new_bbs[0]); |
1440 | redirect_edge_and_branch_force (new_spec_edges[SE_LATCH], |
1441 | loop->header); |
1442 | set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch); |
1443 | latch = loop->latch = new_bbs[n - 1]; |
1444 | e = latch_edge = new_spec_edges[SE_LATCH]; |
1445 | } |
1446 | else |
1447 | { |
1448 | redirect_edge_and_branch_force (new_spec_edges[SE_LATCH], |
1449 | loop->header); |
1450 | redirect_edge_and_branch_force (e, new_bbs[0]); |
1451 | set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src); |
1452 | e = new_spec_edges[SE_LATCH]; |
1453 | } |
1454 | |
1455 | /* Record exit edge in this copy. */ |
1456 | if (orig && bitmap_bit_p (map: wont_exit, bitno: j + 1)) |
1457 | { |
1458 | if (to_remove) |
1459 | to_remove->safe_push (obj: new_spec_edges[SE_ORIG]); |
1460 | force_edge_cold (new_spec_edges[SE_ORIG], true); |
1461 | |
1462 | /* Scale the frequencies of the blocks dominated by the exit. */ |
1463 | if (bbs_to_scale && scale_after_exit) |
1464 | { |
1465 | EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) |
1466 | scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num, |
1467 | after_exit_den); |
1468 | } |
1469 | } |
1470 | |
1471 | /* Record the first copy in the control flow order if it is not |
1472 | the original loop (i.e. in case of peeling). */ |
1473 | if (!first_active_latch) |
1474 | { |
1475 | memcpy (dest: first_active, src: new_bbs, n: n * sizeof (basic_block)); |
1476 | first_active_latch = new_bbs[n - 1]; |
1477 | } |
1478 | |
1479 | /* Set counts and frequencies. */ |
1480 | if (flags & DLTHE_FLAG_UPDATE_FREQ) |
1481 | { |
1482 | scale_bbs_frequencies (new_bbs, n, scale_act); |
1483 | scale_act = scale_act * scale_step[j]; |
1484 | } |
1485 | } |
1486 | free (ptr: new_bbs); |
1487 | free (ptr: orig_loops); |
1488 | |
1489 | /* Record the exit edge in the original loop body, and update the frequencies. */ |
1490 | if (orig && bitmap_bit_p (map: wont_exit, bitno: 0)) |
1491 | { |
1492 | if (to_remove) |
1493 | to_remove->safe_push (obj: orig); |
1494 | force_edge_cold (orig, true); |
1495 | |
1496 | /* Scale the frequencies of the blocks dominated by the exit. */ |
1497 | if (bbs_to_scale && scale_after_exit) |
1498 | { |
1499 | EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) |
1500 | scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num, |
1501 | after_exit_den); |
1502 | } |
1503 | } |
1504 | |
1505 | /* Update the original loop. */ |
1506 | if (!is_latch) |
1507 | set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); |
1508 | if (flags & DLTHE_FLAG_UPDATE_FREQ) |
1509 | { |
1510 | scale_bbs_frequencies (bbs, n, scale_main); |
1511 | free (ptr: scale_step); |
1512 | } |
1513 | |
1514 | /* Update dominators of outer blocks if affected. */ |
1515 | for (i = 0; i < n; i++) |
1516 | { |
1517 | basic_block dominated, dom_bb; |
1518 | unsigned j; |
1519 | |
1520 | bb = bbs[i]; |
1521 | |
1522 | auto_vec<basic_block> dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); |
1523 | FOR_EACH_VEC_ELT (dom_bbs, j, dominated) |
1524 | { |
1525 | if (flow_bb_inside_loop_p (loop, dominated)) |
1526 | continue; |
1527 | dom_bb = nearest_common_dominator ( |
1528 | CDI_DOMINATORS, first_active[i], first_active_latch); |
1529 | set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb); |
1530 | } |
1531 | } |
1532 | free (ptr: first_active); |
1533 | |
1534 | free (ptr: bbs); |
1535 | BITMAP_FREE (bbs_to_scale); |
1536 | |
1537 | return true; |
1538 | } |
1539 | |
1540 | /* A callback for make_forwarder block, to redirect all edges except for |
1541 | MFB_KJ_EDGE to the entry part. E is the edge for that we should decide |
1542 | whether to redirect it. */ |
1543 | |
1544 | edge mfb_kj_edge; |
1545 | bool |
1546 | mfb_keep_just (edge e) |
1547 | { |
1548 | return e != mfb_kj_edge; |
1549 | } |
1550 | |
1551 | /* True when a candidate preheader BLOCK has predecessors from LOOP. */ |
1552 | |
1553 | static bool |
1554 | has_preds_from_loop (basic_block block, class loop *loop) |
1555 | { |
1556 | edge e; |
1557 | edge_iterator ei; |
1558 | |
1559 | FOR_EACH_EDGE (e, ei, block->preds) |
1560 | if (e->src->loop_father == loop) |
1561 | return true; |
1562 | return false; |
1563 | } |
1564 | |
1565 | /* Creates a pre-header for a LOOP. Returns newly created block. Unless |
1566 | CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single |
1567 | entry; otherwise we also force preheader block to have only one successor. |
1568 | When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block |
1569 | to be a fallthru predecessor to the loop header and to have only |
1570 | predecessors from outside of the loop. |
1571 | The function also updates dominators. */ |
1572 | |
1573 | basic_block |
1574 | (class loop *loop, int flags) |
1575 | { |
1576 | edge e; |
1577 | basic_block dummy; |
1578 | int nentry = 0; |
1579 | bool irred = false; |
1580 | bool latch_edge_was_fallthru; |
1581 | edge one_succ_pred = NULL, single_entry = NULL; |
1582 | edge_iterator ei; |
1583 | |
1584 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
1585 | { |
1586 | if (e->src == loop->latch) |
1587 | continue; |
1588 | irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0; |
1589 | nentry++; |
1590 | single_entry = e; |
1591 | if (single_succ_p (bb: e->src)) |
1592 | one_succ_pred = e; |
1593 | } |
1594 | gcc_assert (nentry); |
1595 | if (nentry == 1) |
1596 | { |
1597 | bool need_forwarder_block = false; |
1598 | |
1599 | /* We do not allow entry block to be the loop preheader, since we |
1600 | cannot emit code there. */ |
1601 | if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
1602 | need_forwarder_block = true; |
1603 | else |
1604 | { |
1605 | /* If we want simple preheaders, also force the preheader to have |
1606 | just a single successor and a normal edge. */ |
1607 | if ((flags & CP_SIMPLE_PREHEADERS) |
1608 | && ((single_entry->flags & EDGE_COMPLEX) |
1609 | || !single_succ_p (bb: single_entry->src))) |
1610 | need_forwarder_block = true; |
1611 | /* If we want fallthru preheaders, also create forwarder block when |
1612 | preheader ends with a jump or has predecessors from loop. */ |
1613 | else if ((flags & CP_FALLTHRU_PREHEADERS) |
1614 | && (JUMP_P (BB_END (single_entry->src)) |
1615 | || has_preds_from_loop (block: single_entry->src, loop))) |
1616 | need_forwarder_block = true; |
1617 | } |
1618 | if (! need_forwarder_block) |
1619 | return NULL; |
1620 | } |
1621 | |
1622 | mfb_kj_edge = loop_latch_edge (loop); |
1623 | latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0; |
1624 | if (nentry == 1 |
1625 | && ((flags & CP_FALLTHRU_PREHEADERS) == 0 |
1626 | || (single_entry->flags & EDGE_CROSSING) == 0)) |
1627 | dummy = split_edge (single_entry); |
1628 | else |
1629 | { |
1630 | edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL); |
1631 | dummy = fallthru->src; |
1632 | loop->header = fallthru->dest; |
1633 | } |
1634 | |
1635 | /* Try to be clever in placing the newly created preheader. The idea is to |
1636 | avoid breaking any "fallthruness" relationship between blocks. |
1637 | |
1638 | The preheader was created just before the header and all incoming edges |
1639 | to the header were redirected to the preheader, except the latch edge. |
1640 | So the only problematic case is when this latch edge was a fallthru |
1641 | edge: it is not anymore after the preheader creation so we have broken |
1642 | the fallthruness. We're therefore going to look for a better place. */ |
1643 | if (latch_edge_was_fallthru) |
1644 | { |
1645 | if (one_succ_pred) |
1646 | e = one_succ_pred; |
1647 | else |
1648 | e = EDGE_PRED (dummy, 0); |
1649 | |
1650 | move_block_after (dummy, e->src); |
1651 | } |
1652 | |
1653 | if (irred) |
1654 | { |
1655 | dummy->flags |= BB_IRREDUCIBLE_LOOP; |
1656 | single_succ_edge (bb: dummy)->flags |= EDGE_IRREDUCIBLE_LOOP; |
1657 | } |
1658 | |
1659 | if (dump_file) |
1660 | fprintf (stream: dump_file, format: "Created preheader block for loop %i\n" , |
1661 | loop->num); |
1662 | |
1663 | if (flags & CP_FALLTHRU_PREHEADERS) |
1664 | gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU) |
1665 | && !JUMP_P (BB_END (dummy))); |
1666 | |
1667 | return dummy; |
1668 | } |
1669 | |
1670 | /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */ |
1671 | |
1672 | void |
1673 | (int flags) |
1674 | { |
1675 | if (!current_loops) |
1676 | return; |
1677 | |
1678 | for (auto loop : loops_list (cfun, 0)) |
1679 | create_preheader (loop, flags); |
1680 | loops_state_set (flags: LOOPS_HAVE_PREHEADERS); |
1681 | } |
1682 | |
1683 | /* Forces all loop latches to have only single successor. */ |
1684 | |
1685 | void |
1686 | force_single_succ_latches (void) |
1687 | { |
1688 | edge e; |
1689 | |
1690 | for (auto loop : loops_list (cfun, 0)) |
1691 | { |
1692 | if (loop->latch != loop->header && single_succ_p (bb: loop->latch)) |
1693 | continue; |
1694 | |
1695 | e = find_edge (loop->latch, loop->header); |
1696 | gcc_checking_assert (e != NULL); |
1697 | |
1698 | split_edge (e); |
1699 | } |
1700 | loops_state_set (flags: LOOPS_HAVE_SIMPLE_LATCHES); |
1701 | } |
1702 | |
1703 | /* This function is called from loop_version. It splits the entry edge |
1704 | of the loop we want to version, adds the versioning condition, and |
1705 | adjust the edges to the two versions of the loop appropriately. |
1706 | e is an incoming edge. Returns the basic block containing the |
1707 | condition. |
1708 | |
1709 | --- edge e ---- > [second_head] |
1710 | |
1711 | Split it and insert new conditional expression and adjust edges. |
1712 | |
1713 | --- edge e ---> [cond expr] ---> [first_head] |
1714 | | |
1715 | +---------> [second_head] |
1716 | |
1717 | THEN_PROB is the probability of then branch of the condition. |
1718 | ELSE_PROB is the probability of else branch. Note that they may be both |
1719 | REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or |
1720 | IFN_LOOP_DIST_ALIAS. */ |
1721 | |
1722 | static basic_block |
1723 | lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head, |
1724 | edge e, void *cond_expr, |
1725 | profile_probability then_prob, |
1726 | profile_probability else_prob) |
1727 | { |
1728 | basic_block new_head = NULL; |
1729 | edge e1; |
1730 | |
1731 | gcc_assert (e->dest == second_head); |
1732 | |
1733 | /* Split edge 'e'. This will create a new basic block, where we can |
1734 | insert conditional expr. */ |
1735 | new_head = split_edge (e); |
1736 | |
1737 | lv_add_condition_to_bb (first_head, second_head, new_head, |
1738 | cond_expr); |
1739 | |
1740 | /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */ |
1741 | e = single_succ_edge (bb: new_head); |
1742 | e1 = make_edge (new_head, first_head, |
1743 | current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0); |
1744 | e1->probability = then_prob; |
1745 | e->probability = else_prob; |
1746 | |
1747 | set_immediate_dominator (CDI_DOMINATORS, first_head, new_head); |
1748 | set_immediate_dominator (CDI_DOMINATORS, second_head, new_head); |
1749 | |
1750 | /* Adjust loop header phi nodes. */ |
1751 | lv_adjust_loop_header_phi (first_head, second_head, new_head, e1); |
1752 | |
1753 | return new_head; |
1754 | } |
1755 | |
1756 | /* Main entry point for Loop Versioning transformation. |
1757 | |
1758 | This transformation given a condition and a loop, creates |
1759 | -if (condition) { loop_copy1 } else { loop_copy2 }, |
1760 | where loop_copy1 is the loop transformed in one way, and loop_copy2 |
1761 | is the loop transformed in another way (or unchanged). COND_EXPR |
1762 | may be a run time test for things that were not resolved by static |
1763 | analysis (overlapping ranges (anti-aliasing), alignment, etc.). |
1764 | |
1765 | If non-NULL, CONDITION_BB is set to the basic block containing the |
1766 | condition. |
1767 | |
1768 | THEN_PROB is the probability of the then edge of the if. THEN_SCALE |
1769 | is the ratio by that the frequencies in the original loop should |
1770 | be scaled. ELSE_SCALE is the ratio by that the frequencies in the |
1771 | new loop should be scaled. |
1772 | |
1773 | If PLACE_AFTER is true, we place the new loop after LOOP in the |
1774 | instruction stream, otherwise it is placed before LOOP. */ |
1775 | |
1776 | class loop * |
1777 | loop_version (class loop *loop, |
1778 | void *cond_expr, basic_block *condition_bb, |
1779 | profile_probability then_prob, profile_probability else_prob, |
1780 | profile_probability then_scale, profile_probability else_scale, |
1781 | bool place_after) |
1782 | { |
1783 | basic_block first_head, second_head; |
1784 | edge entry, latch_edge; |
1785 | int irred_flag; |
1786 | class loop *nloop; |
1787 | basic_block cond_bb; |
1788 | |
1789 | /* Record entry and latch edges for the loop */ |
1790 | entry = loop_preheader_edge (loop); |
1791 | irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP; |
1792 | entry->flags &= ~EDGE_IRREDUCIBLE_LOOP; |
1793 | |
1794 | /* Note down head of loop as first_head. */ |
1795 | first_head = entry->dest; |
1796 | |
1797 | /* 1) Duplicate loop on the entry edge. */ |
1798 | if (!cfg_hook_duplicate_loop_body_to_header_edge (loop, entry, ndupl: 1, NULL, NULL, |
1799 | NULL, flags: 0)) |
1800 | { |
1801 | entry->flags |= irred_flag; |
1802 | return NULL; |
1803 | } |
1804 | |
1805 | /* 2) loopify the duplicated new loop. */ |
1806 | latch_edge = single_succ_edge (bb: get_bb_copy (loop->latch)); |
1807 | nloop = alloc_loop (); |
1808 | class loop *outer = loop_outer (loop: latch_edge->dest->loop_father); |
1809 | edge = single_pred_edge (bb: get_bb_copy (loop->header)); |
1810 | nloop->header = new_header_edge->dest; |
1811 | nloop->latch = latch_edge->src; |
1812 | loop_redirect_edge (e: latch_edge, dest: nloop->header); |
1813 | |
1814 | /* Compute new loop. */ |
1815 | add_loop (loop: nloop, outer); |
1816 | copy_loop_info (loop, target: nloop); |
1817 | set_loop_copy (loop, nloop); |
1818 | |
1819 | /* loopify redirected latch_edge. Update its PENDING_STMTS. */ |
1820 | lv_flush_pending_stmts (latch_edge); |
1821 | |
1822 | /* After duplication entry edge now points to new loop head block. |
1823 | Note down new head as second_head. */ |
1824 | second_head = entry->dest; |
1825 | |
1826 | /* 3) Split loop entry edge and insert new block with cond expr. */ |
1827 | cond_bb = lv_adjust_loop_entry_edge (first_head, second_head, |
1828 | e: entry, cond_expr, then_prob, else_prob); |
1829 | if (condition_bb) |
1830 | *condition_bb = cond_bb; |
1831 | |
1832 | if (!cond_bb) |
1833 | { |
1834 | entry->flags |= irred_flag; |
1835 | return NULL; |
1836 | } |
1837 | |
1838 | /* Add cond_bb to appropriate loop. */ |
1839 | if (cond_bb->loop_father) |
1840 | remove_bb_from_loops (cond_bb); |
1841 | add_bb_to_loop (cond_bb, outer); |
1842 | |
1843 | /* 4) Scale the original loop and new loop frequency. */ |
1844 | scale_loop_frequencies (loop, p: then_scale); |
1845 | scale_loop_frequencies (loop: nloop, p: else_scale); |
1846 | update_dominators_in_loop (loop); |
1847 | update_dominators_in_loop (loop: nloop); |
1848 | |
1849 | /* Adjust irreducible flag. */ |
1850 | if (irred_flag) |
1851 | { |
1852 | cond_bb->flags |= BB_IRREDUCIBLE_LOOP; |
1853 | loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP; |
1854 | loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP; |
1855 | single_pred_edge (bb: cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP; |
1856 | } |
1857 | |
1858 | if (place_after) |
1859 | { |
1860 | basic_block *bbs = get_loop_body_in_dom_order (nloop), after; |
1861 | unsigned i; |
1862 | |
1863 | after = loop->latch; |
1864 | |
1865 | for (i = 0; i < nloop->num_nodes; i++) |
1866 | { |
1867 | move_block_after (bbs[i], after); |
1868 | after = bbs[i]; |
1869 | } |
1870 | free (ptr: bbs); |
1871 | } |
1872 | |
1873 | /* At this point condition_bb is loop preheader with two successors, |
1874 | first_head and second_head. Make sure that loop preheader has only |
1875 | one successor. */ |
1876 | split_edge (loop_preheader_edge (loop)); |
1877 | split_edge (loop_preheader_edge (nloop)); |
1878 | |
1879 | return nloop; |
1880 | } |
1881 | |