1/* Loop splitting.
2 Copyright (C) 2015-2023 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "tree.h"
25#include "gimple.h"
26#include "tree-pass.h"
27#include "ssa.h"
28#include "fold-const.h"
29#include "tree-cfg.h"
30#include "tree-ssa.h"
31#include "tree-ssa-loop-niter.h"
32#include "tree-ssa-loop.h"
33#include "tree-ssa-loop-manip.h"
34#include "tree-into-ssa.h"
35#include "tree-inline.h"
36#include "tree-cfgcleanup.h"
37#include "cfgloop.h"
38#include "tree-scalar-evolution.h"
39#include "gimple-iterator.h"
40#include "gimple-pretty-print.h"
41#include "cfghooks.h"
42#include "gimple-fold.h"
43#include "gimplify-me.h"
44#include "print-tree.h"
45#include "value-query.h"
46#include "sreal.h"
47
48/* This file implements two kinds of loop splitting.
49
50 One transformation of loops like:
51
52 for (i = 0; i < 100; i++)
53 {
54 if (i < 50)
55 A;
56 else
57 B;
58 }
59
60 into:
61
62 for (i = 0; i < 50; i++)
63 {
64 A;
65 }
66 for (; i < 100; i++)
67 {
68 B;
69 }
70
71 */
72
73/* Return true when BB inside LOOP is a potential iteration space
74 split point, i.e. ends with a condition like "IV < comp", which
75 is true on one side of the iteration space and false on the other,
76 and the split point can be computed. If so, also return the border
77 point in *BORDER and the comparison induction variable in IV. */
78
79static tree
80split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
81 enum tree_code *guard_code)
82{
83 gcond *stmt;
84 affine_iv iv2;
85
86 /* BB must end in a simple conditional jump. */
87 stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb));
88 if (!stmt)
89 return NULL_TREE;
90
91 enum tree_code code = gimple_cond_code (gs: stmt);
92
93 if (loop_exits_from_bb_p (loop, bb))
94 return NULL_TREE;
95
96 tree op0 = gimple_cond_lhs (gs: stmt);
97 tree op1 = gimple_cond_rhs (gs: stmt);
98 class loop *useloop = loop_containing_stmt (stmt);
99
100 if (!simple_iv (loop, useloop, op0, iv, false))
101 return NULL_TREE;
102 if (!simple_iv (loop, useloop, op1, &iv2, false))
103 return NULL_TREE;
104
105 /* Make it so that the first argument of the condition is
106 the looping one. */
107 if (!integer_zerop (iv2.step))
108 {
109 std::swap (a&: op0, b&: op1);
110 std::swap (a&: *iv, b&: iv2);
111 code = swap_tree_comparison (code);
112 gimple_cond_set_condition (stmt, code, lhs: op0, rhs: op1);
113 update_stmt (s: stmt);
114 }
115 else if (integer_zerop (iv->step))
116 return NULL_TREE;
117 if (!integer_zerop (iv2.step))
118 return NULL_TREE;
119 if (!iv->no_overflow)
120 return NULL_TREE;
121
122 /* Only handle relational comparisons, for equality and non-equality
123 we'd have to split the loop into two loops and a middle statement. */
124 switch (code)
125 {
126 case LT_EXPR:
127 case LE_EXPR:
128 case GT_EXPR:
129 case GE_EXPR:
130 break;
131 case NE_EXPR:
132 case EQ_EXPR:
133 /* If the test check for first iteration, we can handle NE/EQ
134 with only one split loop. */
135 if (operand_equal_p (iv->base, iv2.base, flags: 0))
136 {
137 if (code == EQ_EXPR)
138 code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
139 else
140 code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
141 break;
142 }
143 /* Similarly when the test checks for minimal or maximal
144 value range. */
145 else
146 {
147 int_range<2> r;
148 get_global_range_query ()->range_of_expr (r, expr: op0, stmt);
149 if (!r.varying_p () && !r.undefined_p ()
150 && TREE_CODE (op1) == INTEGER_CST)
151 {
152 wide_int val = wi::to_wide (t: op1);
153 if (known_eq (val, r.lower_bound ()))
154 {
155 code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
156 break;
157 }
158 else if (known_eq (val, r.upper_bound ()))
159 {
160 code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
161 break;
162 }
163 }
164 }
165 /* TODO: We can compare with exit condition; it seems that testing for
166 last iteration is common case. */
167 return NULL_TREE;
168 default:
169 return NULL_TREE;
170 }
171
172 if (dump_file && (dump_flags & TDF_DETAILS))
173 {
174 fprintf (stream: dump_file, format: "Found potential split point: ");
175 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
176 fprintf (stream: dump_file, format: " { ");
177 print_generic_expr (dump_file, iv->base, TDF_SLIM);
178 fprintf (stream: dump_file, format: " + I*");
179 print_generic_expr (dump_file, iv->step, TDF_SLIM);
180 fprintf (stream: dump_file, format: " } %s ", get_tree_code_name (code));
181 print_generic_expr (dump_file, iv2.base, TDF_SLIM);
182 fprintf (stream: dump_file, format: "\n");
183 }
184
185 *border = iv2.base;
186 *guard_code = code;
187 return op0;
188}
189
190/* Given a GUARD conditional stmt inside LOOP, which we want to make always
191 true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
192 (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
193 exit test statement to loop back only if the GUARD statement will
194 also be true/false in the next iteration. */
195
196static void
197patch_loop_exit (class loop *loop, gcond *guard, tree nextval, tree newbound,
198 bool initial_true)
199{
200 edge exit = single_exit (loop);
201 gcond *stmt = as_a <gcond *> (p: *gsi_last_bb (bb: exit->src));
202 gimple_cond_set_condition (stmt, code: gimple_cond_code (gs: guard),
203 lhs: nextval, rhs: newbound);
204 update_stmt (s: stmt);
205
206 edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
207
208 exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
209 stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
210
211 if (initial_true)
212 {
213 exit->flags |= EDGE_FALSE_VALUE;
214 stay->flags |= EDGE_TRUE_VALUE;
215 }
216 else
217 {
218 exit->flags |= EDGE_TRUE_VALUE;
219 stay->flags |= EDGE_FALSE_VALUE;
220 }
221}
222
223/* Give an induction variable GUARD_IV, and its affine descriptor IV,
224 find the loop phi node in LOOP defining it directly, or create
225 such phi node. Return that phi node. */
226
227static gphi *
228find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
229{
230 gimple *def = SSA_NAME_DEF_STMT (guard_iv);
231 gphi *phi;
232 if ((phi = dyn_cast <gphi *> (p: def))
233 && gimple_bb (g: phi) == loop->header)
234 return phi;
235
236 /* XXX Create the PHI instead. */
237 return NULL;
238}
239
240/* Returns true if the exit values of all loop phi nodes can be
241 determined easily (i.e. that connect_loop_phis can determine them). */
242
243static bool
244easy_exit_values (class loop *loop)
245{
246 edge exit = single_exit (loop);
247 edge latch = loop_latch_edge (loop);
248 gphi_iterator psi;
249
250 /* Currently we regard the exit values as easy if they are the same
251 as the value over the backedge. Which is the case if the definition
252 of the backedge value dominates the exit edge. */
253 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
254 {
255 gphi *phi = psi.phi ();
256 tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
257 basic_block bb;
258 if (TREE_CODE (next) == SSA_NAME
259 && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
260 && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
261 return false;
262 }
263
264 return true;
265}
266
267/* This function updates the SSA form after connect_loops made a new
268 edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
269 conditional). I.e. the second loop can now be entered either
270 via the original entry or via NEW_E, so the entry values of LOOP2
271 phi nodes are either the original ones or those at the exit
272 of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting
273 this. The loops need to fulfill easy_exit_values(). */
274
275static void
276connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
277{
278 basic_block rest = loop_preheader_edge (loop2)->src;
279 gcc_assert (new_e->dest == rest);
280 edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
281
282 edge firste = loop_preheader_edge (loop1);
283 edge seconde = loop_preheader_edge (loop2);
284 edge firstn = loop_latch_edge (loop1);
285 gphi_iterator psi_first, psi_second;
286 for (psi_first = gsi_start_phis (loop1->header),
287 psi_second = gsi_start_phis (loop2->header);
288 !gsi_end_p (i: psi_first);
289 gsi_next (i: &psi_first), gsi_next (i: &psi_second))
290 {
291 tree init, next, new_init;
292 use_operand_p op;
293 gphi *phi_first = psi_first.phi ();
294 gphi *phi_second = psi_second.phi ();
295
296 init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
297 next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
298 op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
299 gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
300
301 /* Prefer using original variable as a base for the new ssa name.
302 This is necessary for virtual ops, and useful in order to avoid
303 losing debug info for real ops. */
304 if (TREE_CODE (next) == SSA_NAME
305 && useless_type_conversion_p (TREE_TYPE (next),
306 TREE_TYPE (init)))
307 new_init = copy_ssa_name (var: next);
308 else if (TREE_CODE (init) == SSA_NAME
309 && useless_type_conversion_p (TREE_TYPE (init),
310 TREE_TYPE (next)))
311 new_init = copy_ssa_name (var: init);
312 else if (useless_type_conversion_p (TREE_TYPE (next),
313 TREE_TYPE (init)))
314 new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
315 name: "unrinittmp");
316 else
317 new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
318 name: "unrinittmp");
319
320 gphi * newphi = create_phi_node (new_init, rest);
321 add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
322 add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
323 SET_USE (op, new_init);
324 }
325}
326
327/* The two loops LOOP1 and LOOP2 were just created by loop versioning,
328 they are still equivalent and placed in two arms of a diamond, like so:
329
330 .------if (cond)------.
331 v v
332 pre1 pre2
333 | |
334 .--->h1 h2<----.
335 | | | |
336 | ex1---. .---ex2 |
337 | / | | \ |
338 '---l1 X | l2---'
339 | |
340 | |
341 '--->join<---'
342
343 This function transforms the program such that LOOP1 is conditionally
344 falling through to LOOP2, or skipping it. This is done by splitting
345 the ex1->join edge at X in the diagram above, and inserting a condition
346 whose one arm goes to pre2, resulting in this situation:
347
348 .------if (cond)------.
349 v v
350 pre1 .---------->pre2
351 | | |
352 .--->h1 | h2<----.
353 | | | | |
354 | ex1---. | .---ex2 |
355 | / v | | \ |
356 '---l1 skip---' | l2---'
357 | |
358 | |
359 '--->join<---'
360
361
362 The condition used is the exit condition of LOOP1, which effectively means
363 that when the first loop exits (for whatever reason) but the real original
364 exit expression is still false the second loop will be entered.
365 The function returns the new edge cond->pre2.
366
367 This doesn't update the SSA form, see connect_loop_phis for that. */
368
369static edge
370connect_loops (class loop *loop1, class loop *loop2)
371{
372 edge exit = single_exit (loop1);
373 basic_block skip_bb = split_edge (exit);
374 gcond *skip_stmt;
375 gimple_stmt_iterator gsi;
376 edge new_e, skip_e;
377
378 gcond *stmt = as_a <gcond *> (p: *gsi_last_bb (bb: exit->src));
379 skip_stmt = gimple_build_cond (gimple_cond_code (gs: stmt),
380 gimple_cond_lhs (gs: stmt),
381 gimple_cond_rhs (gs: stmt),
382 NULL_TREE, NULL_TREE);
383 gsi = gsi_last_bb (bb: skip_bb);
384 gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
385
386 skip_e = EDGE_SUCC (skip_bb, 0);
387 skip_e->flags &= ~EDGE_FALLTHRU;
388 new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
389 if (exit->flags & EDGE_TRUE_VALUE)
390 {
391 skip_e->flags |= EDGE_TRUE_VALUE;
392 new_e->flags |= EDGE_FALSE_VALUE;
393 }
394 else
395 {
396 skip_e->flags |= EDGE_FALSE_VALUE;
397 new_e->flags |= EDGE_TRUE_VALUE;
398 }
399
400 new_e->probability = profile_probability::very_likely ();
401 skip_e->probability = new_e->probability.invert ();
402
403 return new_e;
404}
405
406/* This returns the new bound for iterations given the original iteration
407 space in NITER, an arbitrary new bound BORDER, assumed to be some
408 comparison value with a different IV, the initial value GUARD_INIT of
409 that other IV, and the comparison code GUARD_CODE that compares
410 that other IV with BORDER. We return an SSA name, and place any
411 necessary statements for that computation into *STMTS.
412
413 For example for such a loop:
414
415 for (i = beg, j = guard_init; i < end; i++, j++)
416 if (j < border) // this is supposed to be true/false
417 ...
418
419 we want to return a new bound (on j) that makes the loop iterate
420 as long as the condition j < border stays true. We also don't want
421 to iterate more often than the original loop, so we have to introduce
422 some cut-off as well (via min/max), effectively resulting in:
423
424 newend = min (end+guard_init-beg, border)
425 for (i = beg; j = guard_init; j < newend; i++, j++)
426 if (j < c)
427 ...
428
429 Depending on the direction of the IVs and if the exit tests
430 are strict or non-strict we need to use MIN or MAX,
431 and add or subtract 1. This routine computes newend above. */
432
433static tree
434compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
435 tree border,
436 enum tree_code guard_code, tree guard_init)
437{
438 /* The niter structure contains the after-increment IV, we need
439 the loop-enter base, so subtract STEP once. */
440 tree controlbase = force_gimple_operand (niter->control.base,
441 stmts, true, NULL_TREE);
442 tree controlstep = niter->control.step;
443 tree enddiff;
444 if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
445 {
446 controlstep = gimple_build (seq: stmts, code: NEGATE_EXPR,
447 TREE_TYPE (controlstep), ops: controlstep);
448 enddiff = gimple_build (seq: stmts, code: POINTER_PLUS_EXPR,
449 TREE_TYPE (controlbase),
450 ops: controlbase, ops: controlstep);
451 }
452 else
453 enddiff = gimple_build (seq: stmts, code: MINUS_EXPR,
454 TREE_TYPE (controlbase),
455 ops: controlbase, ops: controlstep);
456
457 /* Compute end-beg. */
458 gimple_seq stmts2;
459 tree end = force_gimple_operand (niter->bound, &stmts2,
460 true, NULL_TREE);
461 gimple_seq_add_seq_without_update (stmts, stmts2);
462 if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
463 {
464 tree tem = gimple_convert (seq: stmts, sizetype, op: enddiff);
465 tem = gimple_build (seq: stmts, code: NEGATE_EXPR, sizetype, ops: tem);
466 enddiff = gimple_build (seq: stmts, code: POINTER_PLUS_EXPR,
467 TREE_TYPE (enddiff),
468 ops: end, ops: tem);
469 }
470 else
471 enddiff = gimple_build (seq: stmts, code: MINUS_EXPR, TREE_TYPE (enddiff),
472 ops: end, ops: enddiff);
473
474 /* Compute guard_init + (end-beg). */
475 tree newbound;
476 enddiff = gimple_convert (seq: stmts, TREE_TYPE (guard_init), op: enddiff);
477 if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
478 {
479 enddiff = gimple_convert (seq: stmts, sizetype, op: enddiff);
480 newbound = gimple_build (seq: stmts, code: POINTER_PLUS_EXPR,
481 TREE_TYPE (guard_init),
482 ops: guard_init, ops: enddiff);
483 }
484 else
485 newbound = gimple_build (seq: stmts, code: PLUS_EXPR, TREE_TYPE (guard_init),
486 ops: guard_init, ops: enddiff);
487
488 /* Depending on the direction of the IVs the new bound for the first
489 loop is the minimum or maximum of old bound and border.
490 Also, if the guard condition isn't strictly less or greater,
491 we need to adjust the bound. */
492 int addbound = 0;
493 enum tree_code minmax;
494 if (niter->cmp == LT_EXPR)
495 {
496 /* GT and LE are the same, inverted. */
497 if (guard_code == GT_EXPR || guard_code == LE_EXPR)
498 addbound = -1;
499 minmax = MIN_EXPR;
500 }
501 else
502 {
503 gcc_assert (niter->cmp == GT_EXPR);
504 if (guard_code == GE_EXPR || guard_code == LT_EXPR)
505 addbound = 1;
506 minmax = MAX_EXPR;
507 }
508
509 if (addbound)
510 {
511 tree type2 = TREE_TYPE (newbound);
512 if (POINTER_TYPE_P (type2))
513 type2 = sizetype;
514 newbound = gimple_build (seq: stmts,
515 POINTER_TYPE_P (TREE_TYPE (newbound))
516 ? POINTER_PLUS_EXPR : PLUS_EXPR,
517 TREE_TYPE (newbound),
518 ops: newbound,
519 ops: build_int_cst (type2, addbound));
520 }
521
522 tree newend = gimple_build (seq: stmts, code: minmax, TREE_TYPE (border),
523 ops: border, ops: newbound);
524 return newend;
525}
526
527/* Fix the two loop's bb count after split based on the split edge probability,
528 don't adjust the bbs dominated by true branches of that loop to avoid
529 dropping 1s down. */
530static void
531fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
532 edge false_edge)
533{
534 /* Proportion first loop's bb counts except those dominated by true
535 branch to avoid drop 1s down. */
536 basic_block *bbs1, *bbs2;
537 bbs1 = get_loop_body (loop1);
538 unsigned j;
539 for (j = 0; j < loop1->num_nodes; j++)
540 if (bbs1[j] == loop1->latch
541 /* Watch for case where the true conditional is empty. */
542 || !single_pred_p (bb: true_edge->dest)
543 || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
544 bbs1[j]->count
545 = bbs1[j]->count.apply_probability (prob: true_edge->probability);
546 free (ptr: bbs1);
547
548 /* Proportion second loop's bb counts except those dominated by false
549 branch to avoid drop 1s down. */
550 basic_block bbi_copy = get_bb_copy (false_edge->dest);
551 bbs2 = get_loop_body (loop2);
552 for (j = 0; j < loop2->num_nodes; j++)
553 if (bbs2[j] == loop2->latch
554 /* Watch for case where the flase conditional is empty. */
555 || !single_pred_p (bb: bbi_copy)
556 || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
557 bbs2[j]->count
558 = bbs2[j]->count.apply_probability (prob: true_edge->probability.invert ());
559 free (ptr: bbs2);
560}
561
562/* Checks if LOOP contains an conditional block whose condition
563 depends on which side in the iteration space it is, and if so
564 splits the iteration space into two loops. Returns true if the
565 loop was split. NITER must contain the iteration descriptor for the
566 single exit of LOOP. */
567
568static bool
569split_loop (class loop *loop1)
570{
571 class tree_niter_desc niter;
572 basic_block *bbs;
573 unsigned i;
574 bool changed = false;
575 tree guard_iv;
576 tree border = NULL_TREE;
577 affine_iv iv;
578 edge exit1;
579
580 if (!(exit1 = single_exit (loop1))
581 || EDGE_COUNT (exit1->src->succs) != 2
582 /* ??? We could handle non-empty latches when we split the latch edge
583 (not the exit edge), and put the new exit condition in the new block.
584 OTOH this executes some code unconditionally that might have been
585 skipped by the original exit before. */
586 || !empty_block_p (loop1->latch)
587 || !easy_exit_values (loop: loop1)
588 || !number_of_iterations_exit (loop1, exit1, niter: &niter, false, every_iteration: true)
589 || niter.cmp == ERROR_MARK)
590 return false;
591 if (niter.cmp == NE_EXPR)
592 {
593 if (!niter.control.no_overflow)
594 return false;
595 if (tree_int_cst_sign_bit (niter.control.step))
596 niter.cmp = GT_EXPR;
597 else
598 niter.cmp = LT_EXPR;
599 }
600
601 bbs = get_loop_body (loop1);
602
603 if (!can_copy_bbs_p (bbs, loop1->num_nodes))
604 {
605 free (ptr: bbs);
606 return false;
607 }
608
609 /* Find a splitting opportunity. */
610 enum tree_code guard_code;
611 for (i = 0; i < loop1->num_nodes; i++)
612 if ((guard_iv = split_at_bb_p (loop: loop1, bb: bbs[i], border: &border, iv: &iv, guard_code: &guard_code)))
613 {
614 /* Handling opposite steps is not implemented yet. Neither
615 is handling different step sizes. */
616 if ((tree_int_cst_sign_bit (iv.step)
617 != tree_int_cst_sign_bit (niter.control.step))
618 || !tree_int_cst_equal (iv.step, niter.control.step))
619 continue;
620
621 /* Find a loop PHI node that defines guard_iv directly,
622 or create one doing that. */
623 gphi *phi = find_or_create_guard_phi (loop: loop1, guard_iv, &iv);
624 if (!phi)
625 continue;
626 gcond *guard_stmt = as_a<gcond *> (p: *gsi_last_bb (bb: bbs[i]));
627 tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
628 loop_preheader_edge (loop1));
629
630 /* Loop splitting is implemented by versioning the loop, placing
631 the new loop after the old loop, make the first loop iterate
632 as long as the conditional stays true (or false) and let the
633 second (new) loop handle the rest of the iterations.
634
635 First we need to determine if the condition will start being true
636 or false in the first loop. */
637 bool initial_true;
638 switch (guard_code)
639 {
640 case LT_EXPR:
641 case LE_EXPR:
642 initial_true = !tree_int_cst_sign_bit (iv.step);
643 break;
644 case GT_EXPR:
645 case GE_EXPR:
646 initial_true = tree_int_cst_sign_bit (iv.step);
647 break;
648 default:
649 gcc_unreachable ();
650 }
651
652 /* Build a condition that will skip the first loop when the
653 guard condition won't ever be true (or false). */
654 gimple_seq stmts2;
655 border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
656 if (stmts2)
657 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
658 stmts2);
659 tree cond = fold_build2 (guard_code, boolean_type_node,
660 guard_init, border);
661 if (!initial_true)
662 cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
663
664 edge true_edge, false_edge;
665 extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
666
667 /* Now version the loop, placing loop2 after loop1 connecting
668 them, and fix up SSA form for that. */
669 initialize_original_copy_tables ();
670 basic_block cond_bb;
671
672 profile_probability loop1_prob
673 = integer_onep (cond) ? profile_probability::always ()
674 : true_edge->probability;
675 /* TODO: It is commonly a case that we know that both loops will be
676 entered. very_likely below is the probability that second loop will
677 be entered given by connect_loops. We should work out the common
678 case it is always true. */
679 class loop *loop2 = loop_version (loop1, cond, &cond_bb,
680 loop1_prob,
681 /* Pass always as we will later
682 redirect first loop to second
683 loop. */
684 profile_probability::always (),
685 profile_probability::always (),
686 profile_probability::very_likely (),
687 true);
688 gcc_assert (loop2);
689 /* Correct probability of edge cond_bb->preheader_of_loop2. */
690 single_pred_edge
691 (bb: loop_preheader_edge (loop2)->src)->probability
692 = loop1_prob.invert ();
693
694 fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
695 /* If conditional we split on has reliable profilea nd both
696 preconditionals of loop1 and loop2 are constant true, we can
697 only redistribute the iteration counts to the split loops.
698
699 If the conditionals we insert before loop1 or loop2 are non-trivial
700 they increase expected loop count, so account this accordingly.
701 If we do not know the probability of split conditional, avoid
702 reudcing loop estimates, since we do not really know how they are
703 split between of the two new loops. Keep orignal estimate since
704 it is likely better then completely dropping it.
705
706 TODO: If we know that one of the new loops has constant
707 number of iterations, we can do better. We could also update
708 upper bounds. */
709 if (loop1->any_estimate
710 && wi::fits_shwi_p (x: loop1->nb_iterations_estimate))
711 {
712 sreal scale = true_edge->probability.reliable_p ()
713 ? true_edge->probability.to_sreal () : (sreal)1;
714 sreal scale2 = false_edge->probability.reliable_p ()
715 ? false_edge->probability.to_sreal () : (sreal)1;
716 sreal div1 = loop1_prob.to_sreal ();
717 /* +1 to get header interations rather than latch iterations and then
718 -1 to convert back. */
719 if (div1 != 0)
720 loop1->nb_iterations_estimate
721 = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
722 * scale / div1).to_nearest_int () - 1, 0);
723 else
724 loop1->any_estimate = false;
725 loop2->nb_iterations_estimate
726 = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
727 / profile_probability::very_likely ().to_sreal ())
728 .to_nearest_int () - 1, 0);
729 }
730 update_loop_exit_probability_scale_dom_bbs (loop: loop1);
731 update_loop_exit_probability_scale_dom_bbs (loop: loop2);
732
733 edge new_e = connect_loops (loop1, loop2);
734 connect_loop_phis (loop1, loop2, new_e);
735
736 /* The iterations of the second loop is now already
737 exactly those that the first loop didn't do, but the
738 iteration space of the first loop is still the original one.
739 Compute the new bound for the guarding IV and patch the
740 loop exit to use it instead of original IV and bound. */
741 gimple_seq stmts = NULL;
742 tree newend = compute_new_first_bound (stmts: &stmts, niter: &niter, border,
743 guard_code, guard_init);
744 if (stmts)
745 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
746 stmts);
747 tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
748 patch_loop_exit (loop: loop1, guard: guard_stmt, nextval: guard_next, newbound: newend, initial_true);
749
750 /* Finally patch out the two copies of the condition to be always
751 true/false (or opposite). */
752 gcond *force_true = as_a<gcond *> (p: *gsi_last_bb (bb: bbs[i]));
753 gcond *force_false = as_a<gcond *> (p: *gsi_last_bb (bb: get_bb_copy (bbs[i])));
754 if (!initial_true)
755 std::swap (a&: force_true, b&: force_false);
756 gimple_cond_make_true (gs: force_true);
757 gimple_cond_make_false (gs: force_false);
758 update_stmt (s: force_true);
759 update_stmt (s: force_false);
760
761 free_original_copy_tables ();
762
763 changed = true;
764 if (dump_file && (dump_flags & TDF_DETAILS))
765 fprintf (stream: dump_file, format: ";; Loop split.\n");
766
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
769
770 /* Only deal with the first opportunity. */
771 break;
772 }
773
774 free (ptr: bbs);
775 return changed;
776}
777
778/* Another transformation of loops like:
779
780 for (i = INIT (); CHECK (i); i = NEXT ())
781 {
782 if (expr (a_1, a_2, ..., a_n)) // expr is pure
783 a_j = ...; // change at least one a_j
784 else
785 S; // not change any a_j
786 }
787
788 into:
789
790 for (i = INIT (); CHECK (i); i = NEXT ())
791 {
792 if (expr (a_1, a_2, ..., a_n))
793 a_j = ...;
794 else
795 {
796 S;
797 i = NEXT ();
798 break;
799 }
800 }
801
802 for (; CHECK (i); i = NEXT ())
803 {
804 S;
805 }
806
807 */
808
809/* Data structure to hold temporary information during loop split upon
810 semi-invariant conditional statement. */
811class split_info {
812public:
813 /* Array of all basic blocks in a loop, returned by get_loop_body(). */
814 basic_block *bbs;
815
816 /* All memory store/clobber statements in a loop. */
817 auto_vec<gimple *> memory_stores;
818
819 /* Whether above memory stores vector has been filled. */
820 int need_init;
821
822 /* Control dependencies of basic blocks in a loop. */
823 auto_vec<hash_set<basic_block> *> control_deps;
824
825 split_info () : bbs (NULL), need_init (true) { }
826
827 ~split_info ()
828 {
829 if (bbs)
830 free (ptr: bbs);
831
832 for (unsigned i = 0; i < control_deps.length (); i++)
833 delete control_deps[i];
834 }
835};
836
837/* Find all statements with memory-write effect in LOOP, including memory
838 store and non-pure function call, and keep those in a vector. This work
839 is only done one time, for the vector should be constant during analysis
840 stage of semi-invariant condition. */
841
842static void
843find_vdef_in_loop (struct loop *loop)
844{
845 split_info *info = (split_info *) loop->aux;
846 gphi *vphi = get_virtual_phi (loop->header);
847
848 /* Indicate memory store vector has been filled. */
849 info->need_init = false;
850
851 /* If loop contains memory operation, there must be a virtual PHI node in
852 loop header basic block. */
853 if (vphi == NULL)
854 return;
855
856 /* All virtual SSA names inside the loop are connected to be a cyclic
857 graph via virtual PHI nodes. The virtual PHI node in loop header just
858 links the first and the last virtual SSA names, by using the last as
859 PHI operand to define the first. */
860 const edge latch = loop_latch_edge (loop);
861 const tree first = gimple_phi_result (gs: vphi);
862 const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
863
864 /* The virtual SSA cyclic graph might consist of only one SSA name, who
865 is defined by itself.
866
867 .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
868
869 This means the loop contains only memory loads, so we can skip it. */
870 if (first == last)
871 return;
872
873 auto_vec<gimple *> other_stores;
874 auto_vec<tree> worklist;
875 auto_bitmap visited;
876
877 bitmap_set_bit (visited, SSA_NAME_VERSION (first));
878 bitmap_set_bit (visited, SSA_NAME_VERSION (last));
879 worklist.safe_push (obj: last);
880
881 do
882 {
883 tree vuse = worklist.pop ();
884 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
885
886 /* We mark the first and last SSA names as visited at the beginning,
887 and reversely start the process from the last SSA name towards the
888 first, which ensures that this do-while will not touch SSA names
889 defined outside the loop. */
890 gcc_assert (gimple_bb (stmt)
891 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
892
893 if (gimple_code (g: stmt) == GIMPLE_PHI)
894 {
895 gphi *phi = as_a <gphi *> (p: stmt);
896
897 for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i)
898 {
899 tree arg = gimple_phi_arg_def (gs: stmt, index: i);
900
901 if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
902 worklist.safe_push (obj: arg);
903 }
904 }
905 else
906 {
907 tree prev = gimple_vuse (g: stmt);
908
909 /* Non-pure call statement is conservatively assumed to impact all
910 memory locations. So place call statements ahead of other memory
911 stores in the vector with an idea of using them as shortcut
912 terminators to memory alias analysis. */
913 if (gimple_code (g: stmt) == GIMPLE_CALL)
914 info->memory_stores.safe_push (obj: stmt);
915 else
916 other_stores.safe_push (obj: stmt);
917
918 if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
919 worklist.safe_push (obj: prev);
920 }
921 } while (!worklist.is_empty ());
922
923 info->memory_stores.safe_splice (src: other_stores);
924}
925
926/* Two basic blocks have equivalent control dependency if one dominates to
927 the other, and it is post-dominated by the latter. Given a basic block
928 BB in LOOP, find farest equivalent dominating basic block. For BB, there
929 is a constraint that BB does not post-dominate loop header of LOOP, this
930 means BB is control-dependent on at least one basic block in LOOP. */
931
932static basic_block
933get_control_equiv_head_block (struct loop *loop, basic_block bb)
934{
935 while (!bb->aux)
936 {
937 basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
938
939 gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
940
941 if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
942 break;
943
944 bb = dom_bb;
945 }
946 return bb;
947}
948
949/* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
950 dependent on. */
951
952static hash_set<basic_block> *
953find_control_dep_blocks (struct loop *loop, basic_block bb)
954{
955 /* BB has same control dependency as loop header, then it is not control-
956 dependent on any basic block in LOOP. */
957 if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
958 return NULL;
959
960 basic_block equiv_head = get_control_equiv_head_block (loop, bb);
961
962 if (equiv_head->aux)
963 {
964 /* There is a basic block containing control dependency equivalent
965 to BB. No need to recompute that, and also set this information
966 to other equivalent basic blocks. */
967 for (; bb != equiv_head;
968 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
969 bb->aux = equiv_head->aux;
970 return (hash_set<basic_block> *) equiv_head->aux;
971 }
972
973 /* A basic block X is control-dependent on another Y iff there exists
974 a path from X to Y, in which every basic block other than X and Y
975 is post-dominated by Y, but X is not post-dominated by Y.
976
977 According to this rule, traverse basic blocks in the loop backwards
978 starting from BB, if a basic block is post-dominated by BB, extend
979 current post-dominating path to this block, otherwise it is another
980 one that BB is control-dependent on. */
981
982 auto_vec<basic_block> pdom_worklist;
983 hash_set<basic_block> pdom_visited;
984 hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
985
986 pdom_worklist.safe_push (obj: equiv_head);
987
988 do
989 {
990 basic_block pdom_bb = pdom_worklist.pop ();
991 edge_iterator ei;
992 edge e;
993
994 if (pdom_visited.add (k: pdom_bb))
995 continue;
996
997 FOR_EACH_EDGE (e, ei, pdom_bb->preds)
998 {
999 basic_block pred_bb = e->src;
1000
1001 if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1002 {
1003 dep_bbs->add (k: pred_bb);
1004 continue;
1005 }
1006
1007 pred_bb = get_control_equiv_head_block (loop, bb: pred_bb);
1008
1009 if (pdom_visited.contains (k: pred_bb))
1010 continue;
1011
1012 if (!pred_bb->aux)
1013 {
1014 pdom_worklist.safe_push (obj: pred_bb);
1015 continue;
1016 }
1017
1018 /* If control dependency of basic block is available, fast extend
1019 post-dominating path using the information instead of advancing
1020 forward step-by-step. */
1021 hash_set<basic_block> *pred_dep_bbs
1022 = (hash_set<basic_block> *) pred_bb->aux;
1023
1024 for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1025 iter != pred_dep_bbs->end (); ++iter)
1026 {
1027 basic_block pred_dep_bb = *iter;
1028
1029 /* Basic blocks can either be in control dependency of BB, or
1030 must be post-dominated by BB, if so, extend the path from
1031 these basic blocks. */
1032 if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1033 dep_bbs->add (k: pred_dep_bb);
1034 else if (!pdom_visited.contains (k: pred_dep_bb))
1035 pdom_worklist.safe_push (obj: pred_dep_bb);
1036 }
1037 }
1038 } while (!pdom_worklist.is_empty ());
1039
1040 /* Record computed control dependencies in loop so that we can reach them
1041 when reclaiming resources. */
1042 ((split_info *) loop->aux)->control_deps.safe_push (obj: dep_bbs);
1043
1044 /* Associate control dependence with related equivalent basic blocks. */
1045 for (equiv_head->aux = dep_bbs; bb != equiv_head;
1046 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1047 bb->aux = dep_bbs;
1048
1049 return dep_bbs;
1050}
1051
1052/* Forward declaration */
1053
1054static bool
1055stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1056 const_basic_block skip_head,
1057 hash_map<gimple *, bool> &stmt_stat);
1058
1059/* Given STMT, memory load or pure call statement, check whether it is impacted
1060 by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1061 trace is composed of SKIP_HEAD and those basic block dominated by it, always
1062 corresponds to one branch of a conditional statement). If SKIP_HEAD is
1063 NULL, all basic blocks of LOOP are checked. */
1064
1065static bool
1066vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1067 const_basic_block skip_head)
1068{
1069 split_info *info = (split_info *) loop->aux;
1070 tree rhs = NULL_TREE;
1071 ao_ref ref;
1072 gimple *store;
1073 unsigned i;
1074
1075 /* Collect memory store/clobber statements if haven't done that. */
1076 if (info->need_init)
1077 find_vdef_in_loop (loop);
1078
1079 if (is_gimple_assign (gs: stmt))
1080 rhs = gimple_assign_rhs1 (gs: stmt);
1081
1082 ao_ref_init (&ref, rhs);
1083
1084 FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1085 {
1086 /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */
1087 if (skip_head
1088 && dominated_by_p (CDI_DOMINATORS, gimple_bb (g: store), skip_head))
1089 continue;
1090
1091 if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1092 return false;
1093 }
1094
1095 return true;
1096}
1097
1098/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1099 certain iteration of LOOP, check whether an SSA name (NAME) remains
1100 unchanged in next iteration. We call this characteristic semi-
1101 invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1102 blocks and control flows in the loop will be considered. Semi-invariant
1103 state of checked statement is cached in hash map STMT_STAT to avoid
1104 redundant computation in possible following re-check. */
1105
1106static inline bool
1107ssa_semi_invariant_p (struct loop *loop, tree name,
1108 const_basic_block skip_head,
1109 hash_map<gimple *, bool> &stmt_stat)
1110{
1111 gimple *def = SSA_NAME_DEF_STMT (name);
1112 const_basic_block def_bb = gimple_bb (g: def);
1113
1114 /* An SSA name defined outside loop is definitely semi-invariant. */
1115 if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1116 return true;
1117
1118 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1119 return false;
1120
1121 return stmt_semi_invariant_p_1 (loop, stmt: def, skip_head, stmt_stat);
1122}
1123
1124/* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1125 semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1126 are excluded from LOOP. */
1127
1128static bool
1129loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1130 const_basic_block skip_head)
1131{
1132 const_edge latch = loop_latch_edge (loop);
1133 tree name = gimple_phi_result (gs: loop_phi);
1134 tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1135
1136 gcc_checking_assert (from);
1137
1138 /* Loop iteration PHI node locates in loop header, and it has two source
1139 operands, one is an initial value coming from outside the loop, the other
1140 is a value through latch of the loop, which is derived in last iteration,
1141 we call the latter latch value. From the PHI node to definition of latch
1142 value, if excluding branch trace starting from SKIP_HEAD, except copy-
1143 assignment or likewise, there is no other kind of value redefinition, SSA
1144 name defined by the PHI node is semi-invariant.
1145
1146 loop entry
1147 | .--- latch ---.
1148 | | |
1149 v v |
1150 x_1 = PHI <x_0, x_3> |
1151 | |
1152 v |
1153 .------- if (cond) -------. |
1154 | | |
1155 | [ SKIP ] |
1156 | | |
1157 | x_2 = ... |
1158 | | |
1159 '---- T ---->.<---- F ----' |
1160 | |
1161 v |
1162 x_3 = PHI <x_1, x_2> |
1163 | |
1164 '----------------------'
1165
1166 Suppose in certain iteration, execution flow in above graph goes through
1167 true branch, which means that one source value to define x_3 in false
1168 branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1169 iterations is defined by x_3, we know that x_1 will never changed if COND
1170 always chooses true branch from then on. */
1171
1172 while (from != name)
1173 {
1174 /* A new value comes from a CONSTANT. */
1175 if (TREE_CODE (from) != SSA_NAME)
1176 return false;
1177
1178 gimple *stmt = SSA_NAME_DEF_STMT (from);
1179 const_basic_block bb = gimple_bb (g: stmt);
1180
1181 /* A new value comes from outside the loop. */
1182 if (!bb || !flow_bb_inside_loop_p (loop, bb))
1183 return false;
1184
1185 from = NULL_TREE;
1186
1187 if (gimple_code (g: stmt) == GIMPLE_PHI)
1188 {
1189 gphi *phi = as_a <gphi *> (p: stmt);
1190
1191 for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i)
1192 {
1193 if (skip_head)
1194 {
1195 const_edge e = gimple_phi_arg_edge (phi, i);
1196
1197 /* Don't consider redefinitions in excluded basic blocks. */
1198 if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1199 continue;
1200 }
1201
1202 tree arg = gimple_phi_arg_def (gs: phi, index: i);
1203
1204 if (!from)
1205 from = arg;
1206 else if (!operand_equal_p (from, arg, flags: 0))
1207 /* There are more than one source operands that provide
1208 different values to the SSA name, it is variant. */
1209 return false;
1210 }
1211 }
1212 else if (gimple_code (g: stmt) == GIMPLE_ASSIGN)
1213 {
1214 /* For simple value copy, check its rhs instead. */
1215 if (gimple_assign_ssa_name_copy_p (stmt))
1216 from = gimple_assign_rhs1 (gs: stmt);
1217 }
1218
1219 /* Any other kind of definition is deemed to introduce a new value
1220 to the SSA name. */
1221 if (!from)
1222 return false;
1223 }
1224 return true;
1225}
1226
1227/* Check whether conditional predicates that BB is control-dependent on, are
1228 semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1229 are excluded from LOOP. Semi-invariant state of checked statement is cached
1230 in hash map STMT_STAT. */
1231
1232static bool
1233control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1234 const_basic_block skip_head,
1235 hash_map<gimple *, bool> &stmt_stat)
1236{
1237 hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1238
1239 if (!dep_bbs)
1240 return true;
1241
1242 for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1243 iter != dep_bbs->end (); ++iter)
1244 {
1245 gimple *last = *gsi_last_bb (bb: *iter);
1246 if (!last)
1247 return false;
1248
1249 /* Only check condition predicates. */
1250 if (gimple_code (g: last) != GIMPLE_COND
1251 && gimple_code (g: last) != GIMPLE_SWITCH)
1252 return false;
1253
1254 if (!stmt_semi_invariant_p_1 (loop, stmt: last, skip_head, stmt_stat))
1255 return false;
1256 }
1257
1258 return true;
1259}
1260
1261/* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1262 semi-invariant, consequently, all its defined values are semi-invariant.
1263 Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1264 Semi-invariant state of checked statement is cached in hash map
1265 STMT_STAT. */
1266
1267static bool
1268stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1269 const_basic_block skip_head,
1270 hash_map<gimple *, bool> &stmt_stat)
1271{
1272 bool existed;
1273 bool &invar = stmt_stat.get_or_insert (k: stmt, existed: &existed);
1274
1275 if (existed)
1276 return invar;
1277
1278 /* A statement might depend on itself, which is treated as variant. So set
1279 state of statement under check to be variant to ensure that. */
1280 invar = false;
1281
1282 if (gimple_code (g: stmt) == GIMPLE_PHI)
1283 {
1284 gphi *phi = as_a <gphi *> (p: stmt);
1285
1286 if (gimple_bb (g: stmt) == loop->header)
1287 {
1288 /* If the entry value is subject to abnormal coalescing
1289 avoid the transform since we're going to duplicate the
1290 loop header and thus likely introduce overlapping life-ranges
1291 between the PHI def and the entry on the path when the
1292 first loop is skipped. */
1293 tree entry_def
1294 = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1295 if (TREE_CODE (entry_def) == SSA_NAME
1296 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1297 return false;
1298 invar = loop_iter_phi_semi_invariant_p (loop, loop_phi: phi, skip_head);
1299 return invar;
1300 }
1301
1302 /* For a loop PHI node that does not locate in loop header, it is semi-
1303 invariant only if two conditions are met. The first is its source
1304 values are derived from CONSTANT (including loop-invariant value), or
1305 from SSA name defined by semi-invariant loop iteration PHI node. The
1306 second is its source incoming edges are control-dependent on semi-
1307 invariant conditional predicates. */
1308 for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i)
1309 {
1310 const_edge e = gimple_phi_arg_edge (phi, i);
1311 tree arg = gimple_phi_arg_def (gs: phi, index: i);
1312
1313 if (TREE_CODE (arg) == SSA_NAME)
1314 {
1315 if (!ssa_semi_invariant_p (loop, name: arg, skip_head, stmt_stat))
1316 return false;
1317
1318 /* If source value is defined in location from where the source
1319 edge comes in, no need to check control dependency again
1320 since this has been done in above SSA name check stage. */
1321 if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1322 continue;
1323 }
1324
1325 if (!control_dep_semi_invariant_p (loop, bb: e->src, skip_head,
1326 stmt_stat))
1327 return false;
1328 }
1329 }
1330 else
1331 {
1332 ssa_op_iter iter;
1333 tree use;
1334
1335 /* Volatile memory load or return of normal (non-const/non-pure) call
1336 should not be treated as constant in each iteration of loop. */
1337 if (gimple_has_side_effects (stmt))
1338 return false;
1339
1340 /* Check if any memory store may kill memory load at this place. */
1341 if (gimple_vuse (g: stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1342 return false;
1343
1344 /* Although operand of a statement might be SSA name, CONSTANT or
1345 VARDECL, here we only need to check SSA name operands. This is
1346 because check on VARDECL operands, which involve memory loads,
1347 must have been done prior to invocation of this function in
1348 vuse_semi_invariant_p. */
1349 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1350 if (!ssa_semi_invariant_p (loop, name: use, skip_head, stmt_stat))
1351 return false;
1352 }
1353
1354 if (!control_dep_semi_invariant_p (loop, bb: gimple_bb (g: stmt), skip_head,
1355 stmt_stat))
1356 return false;
1357
1358 /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1359 to new insertion, and thus invar may point to invalid memory. */
1360 stmt_stat.put (k: stmt, v: true);
1361 return true;
1362}
1363
1364/* A helper function to check whether STMT is semi-invariant in LOOP. Basic
1365 blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */
1366
1367static bool
1368stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1369 const_basic_block skip_head)
1370{
1371 hash_map<gimple *, bool> stmt_stat;
1372 return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1373}
1374
1375/* Determine when conditional statement never transfers execution to one of its
1376 branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1377 and those basic blocks dominated by BRANCH_BB. */
1378
1379static bool
1380branch_removable_p (basic_block branch_bb)
1381{
1382 edge_iterator ei;
1383 edge e;
1384
1385 if (single_pred_p (bb: branch_bb))
1386 return true;
1387
1388 FOR_EACH_EDGE (e, ei, branch_bb->preds)
1389 {
1390 if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1391 continue;
1392
1393 if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1394 continue;
1395
1396 /* The branch can be reached from opposite branch, or from some
1397 statement not dominated by the conditional statement. */
1398 return false;
1399 }
1400
1401 return true;
1402}
1403
1404/* Find out which branch of a conditional statement (COND) is invariant in the
1405 execution context of LOOP. That is: once the branch is selected in certain
1406 iteration of the loop, any operand that contributes to computation of the
1407 conditional statement remains unchanged in all following iterations. */
1408
1409static edge
1410get_cond_invariant_branch (struct loop *loop, gcond *cond)
1411{
1412 basic_block cond_bb = gimple_bb (g: cond);
1413 basic_block targ_bb[2];
1414 bool invar[2];
1415 unsigned invar_checks = 0;
1416
1417 for (unsigned i = 0; i < 2; i++)
1418 {
1419 targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1420
1421 /* One branch directs to loop exit, no need to perform loop split upon
1422 this conditional statement. Firstly, it is trivial if the exit branch
1423 is semi-invariant, for the statement is just to break loop. Secondly,
1424 if the opposite branch is semi-invariant, it means that the statement
1425 is real loop-invariant, which is covered by loop unswitch. */
1426 if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1427 return NULL;
1428 }
1429
1430 for (unsigned i = 0; i < 2; i++)
1431 {
1432 invar[!i] = false;
1433
1434 if (!branch_removable_p (branch_bb: targ_bb[i]))
1435 continue;
1436
1437 /* Given a semi-invariant branch, if its opposite branch dominates
1438 loop latch, it and its following trace will only be executed in
1439 final iteration of loop, namely it is not part of repeated body
1440 of the loop. Similar to the above case that the branch is loop
1441 exit, no need to split loop. */
1442 if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1443 continue;
1444
1445 invar[!i] = stmt_semi_invariant_p (loop, stmt: cond, skip_head: targ_bb[i]);
1446 invar_checks++;
1447 }
1448
1449 /* With both branches being invariant (handled by loop unswitch) or
1450 variant is not what we want. */
1451 if (invar[0] ^ !invar[1])
1452 return NULL;
1453
1454 /* Found a real loop-invariant condition, do nothing. */
1455 if (invar_checks < 2 && stmt_semi_invariant_p (loop, stmt: cond, NULL))
1456 return NULL;
1457
1458 return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1459}
1460
1461/* Calculate increased code size measured by estimated insn number if applying
1462 loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */
1463
1464static int
1465compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1466{
1467 basic_block cond_bb = branch_edge->src;
1468 unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1469 basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1470 basic_block *bbs = ((split_info *) loop->aux)->bbs;
1471 int num = 0;
1472
1473 for (unsigned i = 0; i < loop->num_nodes; i++)
1474 {
1475 /* Do no count basic blocks only in opposite branch. */
1476 if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1477 continue;
1478
1479 num += estimate_num_insns_seq (bb_seq (bb: bbs[i]), &eni_size_weights);
1480 }
1481
1482 /* It is unnecessary to evaluate expression of the conditional statement
1483 in new loop that contains only invariant branch. This expression should
1484 be constant value (either true or false). Exclude code size of insns
1485 that contribute to computation of the expression. */
1486
1487 auto_vec<gimple *> worklist;
1488 hash_set<gimple *> removed;
1489 gimple *stmt = last_nondebug_stmt (cond_bb);
1490
1491 worklist.safe_push (obj: stmt);
1492 removed.add (k: stmt);
1493 num -= estimate_num_insns (stmt, &eni_size_weights);
1494
1495 do
1496 {
1497 ssa_op_iter opnd_iter;
1498 use_operand_p opnd_p;
1499
1500 stmt = worklist.pop ();
1501 FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1502 {
1503 tree opnd = USE_FROM_PTR (opnd_p);
1504
1505 if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1506 continue;
1507
1508 gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1509 use_operand_p use_p;
1510 imm_use_iterator use_iter;
1511
1512 if (removed.contains (k: opnd_stmt)
1513 || !flow_bb_inside_loop_p (loop, gimple_bb (g: opnd_stmt)))
1514 continue;
1515
1516 FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1517 {
1518 gimple *use_stmt = USE_STMT (use_p);
1519
1520 if (!is_gimple_debug (gs: use_stmt) && !removed.contains (k: use_stmt))
1521 {
1522 opnd_stmt = NULL;
1523 break;
1524 }
1525 }
1526
1527 if (opnd_stmt)
1528 {
1529 worklist.safe_push (obj: opnd_stmt);
1530 removed.add (k: opnd_stmt);
1531 num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1532 }
1533 }
1534 } while (!worklist.is_empty ());
1535
1536 gcc_assert (num >= 0);
1537 return num;
1538}
1539
1540/* Find out loop-invariant branch of a conditional statement (COND) if it has,
1541 and check whether it is eligible and profitable to perform loop split upon
1542 this branch in LOOP. */
1543
1544static edge
1545get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1546{
1547 edge invar_branch = get_cond_invariant_branch (loop, cond);
1548 if (!invar_branch)
1549 return NULL;
1550
1551 /* When accurate profile information is available, and execution
1552 frequency of the branch is too low, just let it go. */
1553 profile_probability prob = invar_branch->probability;
1554 if (prob.reliable_p ())
1555 {
1556 int thres = param_min_loop_cond_split_prob;
1557
1558 if (prob < profile_probability::always ().apply_scale (num: thres, den: 100))
1559 return NULL;
1560 }
1561
1562 /* Add a threshold for increased code size to disable loop split. */
1563 if (compute_added_num_insns (loop, branch_edge: invar_branch) > param_max_peeled_insns)
1564 return NULL;
1565
1566 return invar_branch;
1567}
1568
1569/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1570 conditional statement, perform loop split transformation illustrated
1571 as the following graph.
1572
1573 .-------T------ if (true) ------F------.
1574 | .---------------. |
1575 | | | |
1576 v | v v
1577 pre-header | pre-header
1578 | .------------. | | .------------.
1579 | | | | | | |
1580 | v | | | v |
1581 header | | header |
1582 | | | | |
1583 .--- if (cond) ---. | | .--- if (true) ---. |
1584 | | | | | | |
1585 invariant | | | invariant | |
1586 | | | | | | |
1587 '---T--->.<---F---' | | '---T--->.<---F---' |
1588 | | / | |
1589 stmts | / stmts |
1590 | F T | |
1591 / \ | / / \ |
1592 .-------* * [ if (cond) ] .-------* * |
1593 | | | | | |
1594 | latch | | latch |
1595 | | | | | |
1596 | '------------' | '------------'
1597 '------------------------. .-----------'
1598 loop1 | | loop2
1599 v v
1600 exits
1601
1602 In the graph, loop1 represents the part derived from original one, and
1603 loop2 is duplicated using loop_version (), which corresponds to the part
1604 of original one being splitted out. In original latch edge of loop1, we
1605 insert a new conditional statement duplicated from the semi-invariant cond,
1606 and one of its branch goes back to loop1 header as a latch edge, and the
1607 other branch goes to loop2 pre-header as an entry edge. And also in loop2,
1608 we abandon the variant branch of the conditional statement by setting a
1609 constant bool condition, based on which branch is semi-invariant. */
1610
1611static bool
1612do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1613{
1614 basic_block cond_bb = invar_branch->src;
1615 bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1616 gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: cond_bb));
1617
1618 gcc_assert (cond_bb->loop_father == loop1);
1619
1620 if (dump_enabled_p ())
1621 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1622 "loop split on semi-invariant condition at %s branch\n",
1623 true_invar ? "true" : "false");
1624
1625 initialize_original_copy_tables ();
1626
1627 struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1628 invar_branch->probability.invert (),
1629 invar_branch->probability,
1630 profile_probability::always (),
1631 profile_probability::always (),
1632 true);
1633 if (!loop2)
1634 {
1635 free_original_copy_tables ();
1636 return false;
1637 }
1638
1639 basic_block cond_bb_copy = get_bb_copy (cond_bb);
1640 gcond *cond_copy = as_a<gcond *> (p: *gsi_last_bb (bb: cond_bb_copy));
1641
1642 /* Replace the condition in loop2 with a bool constant to let PassManager
1643 remove the variant branch after current pass completes. */
1644 if (true_invar)
1645 gimple_cond_make_true (gs: cond_copy);
1646 else
1647 gimple_cond_make_false (gs: cond_copy);
1648
1649 update_stmt (s: cond_copy);
1650
1651 /* Insert a new conditional statement on latch edge of loop1, its condition
1652 is duplicated from the semi-invariant. This statement acts as a switch
1653 to transfer execution from loop1 to loop2, when loop1 enters into
1654 invariant state. */
1655 basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1656 basic_block break_bb = split_edge (single_pred_edge (bb: latch_bb));
1657 gimple *break_cond = gimple_build_cond (gimple_cond_code(gs: cond),
1658 gimple_cond_lhs (gs: cond),
1659 gimple_cond_rhs (gs: cond),
1660 NULL_TREE, NULL_TREE);
1661
1662 gimple_stmt_iterator gsi = gsi_last_bb (bb: break_bb);
1663 gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1664
1665 edge to_loop1 = single_succ_edge (bb: break_bb);
1666 edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1667
1668 to_loop1->flags &= ~EDGE_FALLTHRU;
1669 to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1670 to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1671
1672 /* Due to introduction of a control flow edge from loop1 latch to loop2
1673 pre-header, we should update PHIs in loop2 to reflect this connection
1674 between loop1 and loop2. */
1675 connect_loop_phis (loop1, loop2, new_e: to_loop2);
1676
1677 edge true_edge, false_edge, skip_edge1, skip_edge2;
1678 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1679
1680 skip_edge1 = true_invar ? false_edge : true_edge;
1681 skip_edge2 = true_invar ? true_edge : false_edge;
1682 fix_loop_bb_probability (loop1, loop2, true_edge: skip_edge1, false_edge: skip_edge2);
1683
1684 /* Fix first loop's exit probability after scaling. */
1685 to_loop1->probability = invar_branch->probability.invert ();
1686 to_loop2->probability = invar_branch->probability;
1687
1688 free_original_copy_tables ();
1689
1690 return true;
1691}
1692
1693/* Traverse all conditional statements in LOOP, to find out a good candidate
1694 upon which we can do loop split. */
1695
1696static bool
1697split_loop_on_cond (struct loop *loop)
1698{
1699 split_info *info = new split_info ();
1700 basic_block *bbs = info->bbs = get_loop_body (loop);
1701 bool do_split = false;
1702
1703 /* Allocate an area to keep temporary info, and associate its address
1704 with loop aux field. */
1705 loop->aux = info;
1706
1707 for (unsigned i = 0; i < loop->num_nodes; i++)
1708 bbs[i]->aux = NULL;
1709
1710 for (unsigned i = 0; i < loop->num_nodes; i++)
1711 {
1712 basic_block bb = bbs[i];
1713
1714 /* We only consider conditional statement, which be executed at most once
1715 in each iteration of the loop. So skip statements in inner loops. */
1716 if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1717 continue;
1718
1719 /* Actually this check is not a must constraint. With it, we can ensure
1720 conditional statement will always be executed in each iteration. */
1721 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1722 continue;
1723
1724 gcond *cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb));
1725 if (!cond)
1726 continue;
1727
1728 edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1729
1730 if (branch_edge)
1731 {
1732 do_split_loop_on_cond (loop1: loop, invar_branch: branch_edge);
1733 do_split = true;
1734 break;
1735 }
1736 }
1737
1738 delete info;
1739 loop->aux = NULL;
1740
1741 return do_split;
1742}
1743
1744/* Main entry point. Perform loop splitting on all suitable loops. */
1745
1746static unsigned int
1747tree_ssa_split_loops (void)
1748{
1749 bool changed = false;
1750
1751 gcc_assert (scev_initialized_p ());
1752
1753 calculate_dominance_info (CDI_POST_DOMINATORS);
1754
1755 for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1756 loop->aux = NULL;
1757
1758 /* Go through all loops starting from innermost. */
1759 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1760 {
1761 if (loop->aux)
1762 {
1763 /* If any of our inner loops was split, don't split us,
1764 and mark our containing loop as having had splits as well.
1765 This allows for delaying SSA update. */
1766 loop_outer (loop)->aux = loop;
1767 continue;
1768 }
1769
1770 if (optimize_loop_for_size_p (loop))
1771 continue;
1772
1773 if (split_loop (loop1: loop) || split_loop_on_cond (loop))
1774 {
1775 /* Mark our containing loop as having had some split inner loops. */
1776 loop_outer (loop)->aux = loop;
1777 changed = true;
1778 }
1779 }
1780
1781 for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1782 loop->aux = NULL;
1783
1784 clear_aux_for_blocks ();
1785
1786 free_dominance_info (CDI_POST_DOMINATORS);
1787
1788 if (changed)
1789 {
1790 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1791 return TODO_cleanup_cfg;
1792 }
1793 return 0;
1794}
1795
1796/* Loop splitting pass. */
1797
1798namespace {
1799
1800const pass_data pass_data_loop_split =
1801{
1802 .type: GIMPLE_PASS, /* type */
1803 .name: "lsplit", /* name */
1804 .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */
1805 .tv_id: TV_LOOP_SPLIT, /* tv_id */
1806 PROP_cfg, /* properties_required */
1807 .properties_provided: 0, /* properties_provided */
1808 .properties_destroyed: 0, /* properties_destroyed */
1809 .todo_flags_start: 0, /* todo_flags_start */
1810 .todo_flags_finish: 0, /* todo_flags_finish */
1811};
1812
1813class pass_loop_split : public gimple_opt_pass
1814{
1815public:
1816 pass_loop_split (gcc::context *ctxt)
1817 : gimple_opt_pass (pass_data_loop_split, ctxt)
1818 {}
1819
1820 /* opt_pass methods: */
1821 bool gate (function *) final override { return flag_split_loops != 0; }
1822 unsigned int execute (function *) final override;
1823
1824}; // class pass_loop_split
1825
1826unsigned int
1827pass_loop_split::execute (function *fun)
1828{
1829 if (number_of_loops (fn: fun) <= 1)
1830 return 0;
1831
1832 return tree_ssa_split_loops ();
1833}
1834
1835} // anon namespace
1836
1837gimple_opt_pass *
1838make_pass_loop_split (gcc::context *ctxt)
1839{
1840 return new pass_loop_split (ctxt);
1841}
1842

source code of gcc/tree-ssa-loop-split.cc