1 | /* High-level loop manipulation functions. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" /* ??? for TODO_update_ssa but this isn't a pass. */ |
28 | #include "ssa.h" |
29 | #include "gimple-pretty-print.h" |
30 | #include "fold-const.h" |
31 | #include "cfganal.h" |
32 | #include "gimplify.h" |
33 | #include "gimple-iterator.h" |
34 | #include "gimplify-me.h" |
35 | #include "tree-cfg.h" |
36 | #include "tree-ssa-loop-ivopts.h" |
37 | #include "tree-ssa-loop-manip.h" |
38 | #include "tree-ssa-loop-niter.h" |
39 | #include "tree-ssa-loop.h" |
40 | #include "tree-into-ssa.h" |
41 | #include "tree-ssa.h" |
42 | #include "cfgloop.h" |
43 | #include "tree-scalar-evolution.h" |
44 | #include "tree-inline.h" |
45 | |
46 | /* All bitmaps for rewriting into loop-closed SSA go on this obstack, |
47 | so that we can free them all at once. */ |
48 | static bitmap_obstack loop_renamer_obstack; |
49 | |
50 | /* Creates an induction variable with value BASE (+/-) STEP * iteration in LOOP. |
51 | If INCR_OP is PLUS_EXPR, the induction variable is BASE + STEP * iteration. |
52 | If INCR_OP is MINUS_EXPR, the induction variable is BASE - STEP * iteration. |
53 | It is expected that neither BASE nor STEP are shared with other expressions |
54 | (unless the sharing rules allow this). Use VAR as a base var_decl for it |
55 | (if NULL, a new temporary will be created). The increment will occur at |
56 | INCR_POS (after it if AFTER is true, before it otherwise). INCR_POS and |
57 | AFTER can be computed using standard_iv_increment_position. The ssa versions |
58 | of the variable before and after increment will be stored in VAR_BEFORE and |
59 | VAR_AFTER (unless they are NULL). */ |
60 | |
61 | void |
62 | create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop, |
63 | gimple_stmt_iterator *incr_pos, bool after, tree *var_before, |
64 | tree *var_after) |
65 | { |
66 | gassign *stmt; |
67 | gphi *phi; |
68 | tree initial, step1; |
69 | gimple_seq stmts; |
70 | tree vb, va; |
71 | gcc_assert (incr_op == PLUS_EXPR || incr_op == MINUS_EXPR); |
72 | edge pe = loop_preheader_edge (loop); |
73 | |
74 | if (var != NULL_TREE) |
75 | { |
76 | vb = make_ssa_name (var); |
77 | va = make_ssa_name (var); |
78 | } |
79 | else |
80 | { |
81 | vb = make_temp_ssa_name (TREE_TYPE (base), NULL, name: "ivtmp" ); |
82 | va = make_temp_ssa_name (TREE_TYPE (base), NULL, name: "ivtmp" ); |
83 | } |
84 | if (var_before) |
85 | *var_before = vb; |
86 | if (var_after) |
87 | *var_after = va; |
88 | |
89 | /* For easier readability of the created code, produce MINUS_EXPRs |
90 | when suitable. */ |
91 | if (TREE_CODE (step) == INTEGER_CST) |
92 | { |
93 | if (TYPE_UNSIGNED (TREE_TYPE (step))) |
94 | { |
95 | step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); |
96 | if (tree_int_cst_lt (t1: step1, t2: step)) |
97 | { |
98 | incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR); |
99 | step = step1; |
100 | } |
101 | } |
102 | else |
103 | { |
104 | bool ovf; |
105 | |
106 | if (!tree_expr_nonnegative_warnv_p (step, &ovf) |
107 | && may_negate_without_overflow_p (step)) |
108 | { |
109 | incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR); |
110 | step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); |
111 | } |
112 | } |
113 | } |
114 | if (POINTER_TYPE_P (TREE_TYPE (base))) |
115 | { |
116 | if (TREE_CODE (base) == ADDR_EXPR) |
117 | mark_addressable (TREE_OPERAND (base, 0)); |
118 | step = convert_to_ptrofftype (step); |
119 | if (incr_op == MINUS_EXPR) |
120 | step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); |
121 | incr_op = POINTER_PLUS_EXPR; |
122 | } |
123 | /* Gimplify the step if necessary. We put the computations in front of the |
124 | loop (i.e. the step should be loop invariant). */ |
125 | step = force_gimple_operand (step, &stmts, true, NULL_TREE); |
126 | if (stmts) |
127 | gsi_insert_seq_on_edge_immediate (pe, stmts); |
128 | |
129 | stmt = gimple_build_assign (va, incr_op, vb, step); |
130 | /* Prevent the increment from inheriting a bogus location if it is not put |
131 | immediately after a statement whose location is known. */ |
132 | if (after) |
133 | { |
134 | gimple_stmt_iterator gsi = *incr_pos; |
135 | if (!gsi_end_p (i: gsi)) |
136 | gsi_next_nondebug (i: &gsi); |
137 | if (gsi_end_p (i: gsi)) |
138 | { |
139 | edge e = single_succ_edge (bb: gsi_bb (i: *incr_pos)); |
140 | gimple_set_location (g: stmt, location: e->goto_locus); |
141 | } |
142 | gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT); |
143 | } |
144 | else |
145 | { |
146 | gimple_stmt_iterator gsi = *incr_pos; |
147 | if (!gsi_end_p (i: gsi) && is_gimple_debug (gs: gsi_stmt (i: gsi))) |
148 | gsi_next_nondebug (i: &gsi); |
149 | if (!gsi_end_p (i: gsi)) |
150 | gimple_set_location (g: stmt, location: gimple_location (g: gsi_stmt (i: gsi))); |
151 | gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT); |
152 | } |
153 | |
154 | initial = force_gimple_operand (base, &stmts, true, var); |
155 | if (stmts) |
156 | gsi_insert_seq_on_edge_immediate (pe, stmts); |
157 | |
158 | phi = create_phi_node (vb, loop->header); |
159 | add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION); |
160 | add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION); |
161 | } |
162 | |
163 | /* Return the innermost superloop LOOP of USE_LOOP that is a superloop of |
164 | both DEF_LOOP and USE_LOOP. */ |
165 | |
166 | static inline class loop * |
167 | find_sibling_superloop (class loop *use_loop, class loop *def_loop) |
168 | { |
169 | unsigned ud = loop_depth (loop: use_loop); |
170 | unsigned dd = loop_depth (loop: def_loop); |
171 | gcc_assert (ud > 0 && dd > 0); |
172 | if (ud > dd) |
173 | use_loop = superloop_at_depth (use_loop, dd); |
174 | if (ud < dd) |
175 | def_loop = superloop_at_depth (def_loop, ud); |
176 | while (loop_outer (loop: use_loop) != loop_outer (loop: def_loop)) |
177 | { |
178 | use_loop = loop_outer (loop: use_loop); |
179 | def_loop = loop_outer (loop: def_loop); |
180 | gcc_assert (use_loop && def_loop); |
181 | } |
182 | return use_loop; |
183 | } |
184 | |
185 | /* DEF_BB is a basic block containing a DEF that needs rewriting into |
186 | loop-closed SSA form. USE_BLOCKS is the set of basic blocks containing |
187 | uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in |
188 | USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_BB). |
189 | ALL_EXITS[I] is the set of all basic blocks that exit loop I. |
190 | DEF_LOOP_EXITS is a bitmap of loop exit blocks that exit the loop |
191 | containing DEF_BB or its outer loops. |
192 | |
193 | Compute the subset of loop exit destinations that exit the loop |
194 | containing DEF_BB or one of its loop fathers, in which DEF is live. |
195 | This set is returned in the bitmap LIVE_EXITS. |
196 | |
197 | Instead of computing the complete livein set of the def, we use the loop |
198 | nesting tree as a form of poor man's structure analysis. This greatly |
199 | speeds up the analysis, which is important because this function may be |
200 | called on all SSA names that need rewriting, one at a time. */ |
201 | |
202 | static void |
203 | compute_live_loop_exits (bitmap live_exits, bitmap use_blocks, |
204 | basic_block def_bb, bitmap def_loop_exits) |
205 | { |
206 | unsigned i; |
207 | bitmap_iterator bi; |
208 | class loop *def_loop = def_bb->loop_father; |
209 | unsigned def_loop_depth = loop_depth (loop: def_loop); |
210 | |
211 | /* Normally the work list size is bounded by the number of basic |
212 | blocks in the largest loop. We don't know this number, but we |
213 | can be fairly sure that it will be relatively small. */ |
214 | auto_vec<basic_block, 8> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128)); |
215 | |
216 | EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi) |
217 | { |
218 | basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i); |
219 | class loop *use_loop = use_bb->loop_father; |
220 | gcc_checking_assert (def_loop != use_loop |
221 | && ! flow_loop_nested_p (def_loop, use_loop)); |
222 | if (! flow_loop_nested_p (use_loop, def_loop)) |
223 | use_bb = find_sibling_superloop (use_loop, def_loop)->header; |
224 | if (bitmap_set_bit (live_exits, use_bb->index)) |
225 | worklist.safe_push (obj: use_bb); |
226 | } |
227 | |
228 | /* Iterate until the worklist is empty. */ |
229 | while (! worklist.is_empty ()) |
230 | { |
231 | edge e; |
232 | edge_iterator ei; |
233 | |
234 | /* Pull a block off the worklist. */ |
235 | basic_block bb = worklist.pop (); |
236 | |
237 | /* Make sure we have at least enough room in the work list |
238 | for all predecessors of this block. */ |
239 | worklist.reserve (EDGE_COUNT (bb->preds)); |
240 | |
241 | /* For each predecessor block. */ |
242 | FOR_EACH_EDGE (e, ei, bb->preds) |
243 | { |
244 | basic_block pred = e->src; |
245 | class loop *pred_loop = pred->loop_father; |
246 | unsigned pred_loop_depth = loop_depth (loop: pred_loop); |
247 | bool pred_visited; |
248 | |
249 | /* We should have met DEF_BB along the way. */ |
250 | gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
251 | |
252 | if (pred_loop_depth >= def_loop_depth) |
253 | { |
254 | if (pred_loop_depth > def_loop_depth) |
255 | pred_loop = superloop_at_depth (pred_loop, def_loop_depth); |
256 | /* If we've reached DEF_LOOP, our train ends here. */ |
257 | if (pred_loop == def_loop) |
258 | continue; |
259 | } |
260 | else if (! flow_loop_nested_p (pred_loop, def_loop)) |
261 | pred = find_sibling_superloop (use_loop: pred_loop, def_loop)->header; |
262 | |
263 | /* Add PRED to the LIVEIN set. PRED_VISITED is true if |
264 | we had already added PRED to LIVEIN before. */ |
265 | pred_visited = !bitmap_set_bit (live_exits, pred->index); |
266 | |
267 | /* If we have visited PRED before, don't add it to the worklist. |
268 | If BB dominates PRED, then we're probably looking at a loop. |
269 | We're only interested in looking up in the dominance tree |
270 | because DEF_BB dominates all the uses. */ |
271 | if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb)) |
272 | continue; |
273 | |
274 | worklist.quick_push (obj: pred); |
275 | } |
276 | } |
277 | |
278 | bitmap_and_into (live_exits, def_loop_exits); |
279 | } |
280 | |
281 | /* Add a loop-closing PHI for VAR in basic block EXIT. */ |
282 | |
283 | static void |
284 | add_exit_phi (basic_block exit, tree var) |
285 | { |
286 | gphi *phi; |
287 | edge e; |
288 | edge_iterator ei; |
289 | |
290 | /* Check that at least one of the edges entering the EXIT block exits |
291 | the loop, or a superloop of that loop, that VAR is defined in. */ |
292 | if (flag_checking) |
293 | { |
294 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); |
295 | basic_block def_bb = gimple_bb (g: def_stmt); |
296 | FOR_EACH_EDGE (e, ei, exit->preds) |
297 | { |
298 | class loop *aloop = find_common_loop (def_bb->loop_father, |
299 | e->src->loop_father); |
300 | if (!flow_bb_inside_loop_p (aloop, e->dest)) |
301 | break; |
302 | } |
303 | gcc_assert (e); |
304 | } |
305 | |
306 | phi = create_phi_node (NULL_TREE, exit); |
307 | create_new_def_for (var, phi, gimple_phi_result_ptr (gs: phi)); |
308 | FOR_EACH_EDGE (e, ei, exit->preds) |
309 | add_phi_arg (phi, var, e, UNKNOWN_LOCATION); |
310 | |
311 | if (dump_file && (dump_flags & TDF_DETAILS)) |
312 | { |
313 | fprintf (stream: dump_file, format: ";; Created LCSSA PHI: " ); |
314 | print_gimple_stmt (dump_file, phi, 0, dump_flags); |
315 | } |
316 | } |
317 | |
318 | /* Add exit phis for VAR that is used in LIVEIN. |
319 | Exits of the loops are stored in LOOP_EXITS. Returns the number |
320 | of PHIs added for VAR. */ |
321 | |
322 | static unsigned |
323 | add_exit_phis_var (tree var, bitmap use_blocks, bitmap def_loop_exits) |
324 | { |
325 | unsigned index; |
326 | bitmap_iterator bi; |
327 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); |
328 | |
329 | gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index)); |
330 | |
331 | auto_bitmap live_exits (&loop_renamer_obstack); |
332 | compute_live_loop_exits (live_exits, use_blocks, def_bb, def_loop_exits); |
333 | |
334 | unsigned cnt = 0; |
335 | EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi) |
336 | { |
337 | add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var); |
338 | cnt++; |
339 | } |
340 | return cnt; |
341 | } |
342 | |
343 | static int |
344 | loop_name_cmp (const void *p1, const void *p2) |
345 | { |
346 | auto l1 = (const std::pair<int, int> *)p1; |
347 | auto l2 = (const std::pair<int, int> *)p2; |
348 | if (l1->first < l2->first) |
349 | return -1; |
350 | else if (l1->first > l2->first) |
351 | return 1; |
352 | return 0; |
353 | } |
354 | |
355 | /* Add exit phis for the names marked in NAMES_TO_RENAME. |
356 | Exits of the loops are stored in EXITS. Sets of blocks where the ssa |
357 | names are used are stored in USE_BLOCKS. Returns whether any name |
358 | required multiple LC PHI nodes. */ |
359 | |
360 | static bool |
361 | add_exit_phis (bitmap names_to_rename, bitmap *use_blocks) |
362 | { |
363 | unsigned i; |
364 | bitmap_iterator bi; |
365 | bool multiple_p = false; |
366 | |
367 | /* Sort names_to_rename after definition loop so we can avoid re-computing |
368 | def_loop_exits. */ |
369 | auto_vec<std::pair<int, int> > names (bitmap_count_bits (names_to_rename)); |
370 | EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi) |
371 | { |
372 | tree name = ssa_name (i); |
373 | loop_p def_loop = gimple_bb (SSA_NAME_DEF_STMT (name))->loop_father; |
374 | names.quick_push (obj: std::make_pair (x&: def_loop->num, y&: i)); |
375 | } |
376 | names.qsort (loop_name_cmp); |
377 | |
378 | auto_bitmap def_loop_exits (&loop_renamer_obstack); |
379 | loop_p last_def_loop = NULL; |
380 | for (auto p : names) |
381 | { |
382 | loop_p def_loop = get_loop (cfun, num: p.first); |
383 | if (def_loop != last_def_loop) |
384 | { |
385 | bitmap_clear (def_loop_exits); |
386 | last_def_loop = def_loop; |
387 | for (class loop *loop = def_loop; loop != current_loops->tree_root; |
388 | loop = loop_outer (loop)) |
389 | for (auto exit = loop->exits->next; exit->e; exit = exit->next) |
390 | bitmap_set_bit (def_loop_exits, exit->e->dest->index); |
391 | } |
392 | if (add_exit_phis_var (ssa_name (p.second), use_blocks: use_blocks[p.second], |
393 | def_loop_exits) > 1) |
394 | multiple_p = true; |
395 | } |
396 | |
397 | return multiple_p; |
398 | } |
399 | |
400 | /* For USE in BB, if it is used outside of the loop it is defined in, |
401 | mark it for rewrite. Record basic block BB where it is used |
402 | to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap. |
403 | Note that for USEs in phis, BB should be the src of the edge corresponding to |
404 | the use, rather than the bb containing the phi. */ |
405 | |
406 | static void |
407 | find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks, |
408 | bitmap need_phis) |
409 | { |
410 | unsigned ver; |
411 | basic_block def_bb; |
412 | class loop *def_loop; |
413 | |
414 | if (TREE_CODE (use) != SSA_NAME) |
415 | return; |
416 | |
417 | ver = SSA_NAME_VERSION (use); |
418 | def_bb = gimple_bb (SSA_NAME_DEF_STMT (use)); |
419 | if (!def_bb) |
420 | return; |
421 | def_loop = def_bb->loop_father; |
422 | |
423 | /* If the definition is not inside a loop, it is not interesting. */ |
424 | if (!loop_outer (loop: def_loop)) |
425 | return; |
426 | |
427 | /* If the use is not outside of the loop it is defined in, it is not |
428 | interesting. */ |
429 | if (flow_bb_inside_loop_p (def_loop, bb)) |
430 | return; |
431 | |
432 | /* If we're seeing VER for the first time, we still have to allocate |
433 | a bitmap for its uses. */ |
434 | if (bitmap_set_bit (need_phis, ver)) |
435 | use_blocks[ver] = BITMAP_ALLOC (obstack: &loop_renamer_obstack); |
436 | bitmap_set_bit (use_blocks[ver], bb->index); |
437 | } |
438 | |
439 | /* For uses matching USE_FLAGS in STMT, mark names that are used outside of the |
440 | loop they are defined to rewrite. Record the set of blocks in which the ssa |
441 | names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS. */ |
442 | |
443 | static void |
444 | find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis, |
445 | int use_flags) |
446 | { |
447 | ssa_op_iter iter; |
448 | tree var; |
449 | basic_block bb = gimple_bb (g: stmt); |
450 | |
451 | if (is_gimple_debug (gs: stmt)) |
452 | return; |
453 | |
454 | /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES |
455 | only. */ |
456 | if (use_flags == SSA_OP_VIRTUAL_USES) |
457 | { |
458 | tree vuse = gimple_vuse (g: stmt); |
459 | if (vuse != NULL_TREE) |
460 | find_uses_to_rename_use (bb, use: gimple_vuse (g: stmt), use_blocks, need_phis); |
461 | } |
462 | else |
463 | FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags) |
464 | find_uses_to_rename_use (bb, use: var, use_blocks, need_phis); |
465 | } |
466 | |
467 | /* Marks names matching USE_FLAGS that are used in BB and outside of the loop |
468 | they are defined in for rewrite. Records the set of blocks in which the ssa |
469 | names are used to USE_BLOCKS. Record the SSA names that will |
470 | need exit PHIs in NEED_PHIS. */ |
471 | |
472 | static void |
473 | find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis, |
474 | int use_flags) |
475 | { |
476 | edge e; |
477 | edge_iterator ei; |
478 | bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0; |
479 | bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0; |
480 | |
481 | FOR_EACH_EDGE (e, ei, bb->succs) |
482 | for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (i: bsi); |
483 | gsi_next (i: &bsi)) |
484 | { |
485 | gphi *phi = bsi.phi (); |
486 | bool virtual_p = virtual_operand_p (op: gimple_phi_result (gs: phi)); |
487 | if ((virtual_p && do_virtuals) |
488 | || (!virtual_p && do_nonvirtuals)) |
489 | find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e), |
490 | use_blocks, need_phis); |
491 | } |
492 | |
493 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); |
494 | gsi_next (i: &bsi)) |
495 | find_uses_to_rename_stmt (stmt: gsi_stmt (i: bsi), use_blocks, need_phis, |
496 | use_flags); |
497 | } |
498 | |
499 | /* Marks names matching USE_FLAGS that are used outside of the loop they are |
500 | defined in for rewrite. Records the set of blocks in which the ssa names are |
501 | used to USE_BLOCKS. Record the SSA names that will need exit PHIs in |
502 | NEED_PHIS. If CHANGED_BBS is not NULL, scan only blocks in this set. */ |
503 | |
504 | static void |
505 | find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis, |
506 | int use_flags) |
507 | { |
508 | basic_block bb; |
509 | unsigned index; |
510 | bitmap_iterator bi; |
511 | |
512 | if (changed_bbs) |
513 | EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi) |
514 | { |
515 | bb = BASIC_BLOCK_FOR_FN (cfun, index); |
516 | if (bb) |
517 | find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags); |
518 | } |
519 | else |
520 | FOR_EACH_BB_FN (bb, cfun) |
521 | find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags); |
522 | } |
523 | |
524 | /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra |
525 | phi nodes to ensure that no variable is used outside the loop it is |
526 | defined in. |
527 | |
528 | This strengthening of the basic ssa form has several advantages: |
529 | |
530 | 1) Updating it during unrolling/peeling/versioning is trivial, since |
531 | we do not need to care about the uses outside of the loop. |
532 | The same applies to virtual operands which are also rewritten into |
533 | loop closed SSA form. Note that virtual operands are always live |
534 | until function exit. |
535 | 2) The behavior of all uses of an induction variable is the same. |
536 | Without this, you need to distinguish the case when the variable |
537 | is used outside of the loop it is defined in, for example |
538 | |
539 | for (i = 0; i < 100; i++) |
540 | { |
541 | for (j = 0; j < 100; j++) |
542 | { |
543 | k = i + j; |
544 | use1 (k); |
545 | } |
546 | use2 (k); |
547 | } |
548 | |
549 | Looking from the outer loop with the normal SSA form, the first use of k |
550 | is not well-behaved, while the second one is an induction variable with |
551 | base 99 and step 1. |
552 | |
553 | If CHANGED_BBS is not NULL, we look for uses outside loops only in the |
554 | basic blocks in this set. |
555 | |
556 | USE_FLAGS allows us to specify whether we want virtual, non-virtual or |
557 | both variables rewritten. |
558 | |
559 | UPDATE_FLAG is used in the call to update_ssa. See |
560 | TODO_update_ssa* for documentation. */ |
561 | |
562 | static void |
563 | rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag, |
564 | int use_flags) |
565 | { |
566 | bitmap *use_blocks; |
567 | bitmap names_to_rename; |
568 | |
569 | loops_state_set (flags: LOOP_CLOSED_SSA); |
570 | if (number_of_loops (cfun) <= 1) |
571 | return; |
572 | |
573 | /* If the pass has caused the SSA form to be out-of-date, update it |
574 | now. */ |
575 | if (update_flag != 0) |
576 | update_ssa (update_flag); |
577 | else if (flag_checking) |
578 | verify_ssa (true, true); |
579 | |
580 | bitmap_obstack_initialize (&loop_renamer_obstack); |
581 | |
582 | names_to_rename = BITMAP_ALLOC (obstack: &loop_renamer_obstack); |
583 | |
584 | /* Uses of names to rename. We don't have to initialize this array, |
585 | because we know that we will only have entries for the SSA names |
586 | in NAMES_TO_RENAME. */ |
587 | use_blocks = XNEWVEC (bitmap, num_ssa_names); |
588 | find_uses_to_rename (changed_bbs, use_blocks, need_phis: names_to_rename, use_flags); |
589 | |
590 | if (!bitmap_empty_p (map: names_to_rename)) |
591 | { |
592 | bool release_recorded_exits_p = false; |
593 | if (!loops_state_satisfies_p (flags: LOOPS_HAVE_RECORDED_EXITS)) |
594 | { |
595 | /* Doing one scan over the whole function is cheaper than |
596 | traversing the loop tree and gathering BBs of each loop. */ |
597 | record_loop_exits (); |
598 | release_recorded_exits_p = true; |
599 | } |
600 | |
601 | /* Add the PHI nodes on exits of the loops for the names we need to |
602 | rewrite. When no variable required multiple LC PHI nodes to be |
603 | inserted then we know that all uses outside of the loop are |
604 | dominated by the single LC SSA definition and no further PHI |
605 | node insertions are required. */ |
606 | bool need_phis_p = add_exit_phis (names_to_rename, use_blocks); |
607 | |
608 | if (release_recorded_exits_p) |
609 | release_recorded_exits (cfun); |
610 | |
611 | /* Fix up all the names found to be used outside their original |
612 | loops. */ |
613 | update_ssa (need_phis_p ? TODO_update_ssa : TODO_update_ssa_no_phi); |
614 | } |
615 | |
616 | bitmap_obstack_release (&loop_renamer_obstack); |
617 | free (ptr: use_blocks); |
618 | } |
619 | |
620 | /* Rewrites the defs and uses into a loop closed ssa form. |
621 | If CHANGED_BBS is not NULL, we look for uses outside loops only in the basic |
622 | blocks in this set. UPDATE_FLAG is used in the call to update_ssa. See |
623 | TODO_update_ssa* for documentation. */ |
624 | |
625 | void |
626 | rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag) |
627 | { |
628 | rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_ALL_USES); |
629 | } |
630 | |
631 | /* Check invariants of the loop closed ssa form for the def in DEF_BB. */ |
632 | |
633 | static void |
634 | check_loop_closed_ssa_def (basic_block def_bb, tree def) |
635 | { |
636 | use_operand_p use_p; |
637 | imm_use_iterator iterator; |
638 | FOR_EACH_IMM_USE_FAST (use_p, iterator, def) |
639 | { |
640 | if (is_gimple_debug (USE_STMT (use_p))) |
641 | continue; |
642 | |
643 | basic_block use_bb = gimple_bb (USE_STMT (use_p)); |
644 | if (is_a <gphi *> (USE_STMT (use_p))) |
645 | use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src; |
646 | |
647 | gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb)); |
648 | } |
649 | } |
650 | |
651 | /* Checks invariants of loop closed ssa form in BB. */ |
652 | |
653 | static void |
654 | check_loop_closed_ssa_bb (basic_block bb) |
655 | { |
656 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); |
657 | gsi_next (i: &bsi)) |
658 | { |
659 | gphi *phi = bsi.phi (); |
660 | |
661 | check_loop_closed_ssa_def (def_bb: bb, PHI_RESULT (phi)); |
662 | } |
663 | |
664 | for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (i: bsi); |
665 | gsi_next_nondebug (i: &bsi)) |
666 | { |
667 | ssa_op_iter iter; |
668 | tree var; |
669 | gimple *stmt = gsi_stmt (i: bsi); |
670 | |
671 | FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_DEFS) |
672 | check_loop_closed_ssa_def (def_bb: bb, def: var); |
673 | } |
674 | } |
675 | |
676 | /* Checks that invariants of the loop closed ssa form are preserved. |
677 | Call verify_ssa when VERIFY_SSA_P is true. Note all loops are checked |
678 | if LOOP is NULL, otherwise, only LOOP is checked. */ |
679 | |
680 | DEBUG_FUNCTION void |
681 | verify_loop_closed_ssa (bool verify_ssa_p, class loop *loop) |
682 | { |
683 | if (number_of_loops (cfun) <= 1) |
684 | return; |
685 | |
686 | timevar_push (tv: TV_VERIFY_LOOP_CLOSED); |
687 | |
688 | if (loop == NULL) |
689 | { |
690 | basic_block bb; |
691 | |
692 | if (verify_ssa_p) |
693 | verify_ssa (false, true); |
694 | |
695 | FOR_EACH_BB_FN (bb, cfun) |
696 | if (bb->loop_father && bb->loop_father->num > 0) |
697 | check_loop_closed_ssa_bb (bb); |
698 | } |
699 | else |
700 | { |
701 | basic_block *bbs = get_loop_body (loop); |
702 | |
703 | /* We do not have loop-local SSA verification so just |
704 | check there's no update queued. */ |
705 | if (verify_ssa_p) |
706 | gcc_assert (!need_ssa_update_p (cfun)); |
707 | |
708 | for (unsigned i = 0; i < loop->num_nodes; ++i) |
709 | check_loop_closed_ssa_bb (bb: bbs[i]); |
710 | |
711 | free (ptr: bbs); |
712 | } |
713 | |
714 | timevar_pop (tv: TV_VERIFY_LOOP_CLOSED); |
715 | } |
716 | |
717 | /* Split loop exit edge EXIT. The things are a bit complicated by a need to |
718 | preserve the loop closed ssa form. If COPY_CONSTANTS_P is true then |
719 | forwarder PHIs are also created for constant arguments. |
720 | The newly created block is returned. */ |
721 | |
722 | basic_block |
723 | split_loop_exit_edge (edge exit, bool copy_constants_p) |
724 | { |
725 | basic_block dest = exit->dest; |
726 | basic_block bb = split_edge (exit); |
727 | gphi *phi, *new_phi; |
728 | tree new_name, name; |
729 | use_operand_p op_p; |
730 | gphi_iterator psi; |
731 | location_t locus; |
732 | |
733 | for (psi = gsi_start_phis (dest); !gsi_end_p (i: psi); gsi_next (i: &psi)) |
734 | { |
735 | phi = psi.phi (); |
736 | op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb)); |
737 | locus = gimple_phi_arg_location_from_edge (phi, e: single_succ_edge (bb)); |
738 | |
739 | name = USE_FROM_PTR (op_p); |
740 | |
741 | /* If the argument of the PHI node is a constant, we do not need |
742 | to keep it inside loop. */ |
743 | if (TREE_CODE (name) != SSA_NAME |
744 | && !copy_constants_p) |
745 | continue; |
746 | |
747 | /* Otherwise create an auxiliary phi node that will copy the value |
748 | of the SSA name out of the loop. */ |
749 | new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL); |
750 | new_phi = create_phi_node (new_name, bb); |
751 | add_phi_arg (new_phi, name, exit, locus); |
752 | SET_USE (op_p, new_name); |
753 | } |
754 | |
755 | return bb; |
756 | } |
757 | |
758 | /* Returns the basic block in that statements should be emitted for induction |
759 | variables incremented at the end of the LOOP. */ |
760 | |
761 | basic_block |
762 | ip_end_pos (class loop *loop) |
763 | { |
764 | return loop->latch; |
765 | } |
766 | |
767 | /* Returns the basic block in that statements should be emitted for induction |
768 | variables incremented just before exit condition of a LOOP. */ |
769 | |
770 | basic_block |
771 | ip_normal_pos (class loop *loop) |
772 | { |
773 | basic_block bb; |
774 | edge exit; |
775 | |
776 | if (!single_pred_p (bb: loop->latch)) |
777 | return NULL; |
778 | |
779 | bb = single_pred (bb: loop->latch); |
780 | if (!safe_is_a <gcond *> (p: *gsi_last_bb (bb))) |
781 | return NULL; |
782 | |
783 | exit = EDGE_SUCC (bb, 0); |
784 | if (exit->dest == loop->latch) |
785 | exit = EDGE_SUCC (bb, 1); |
786 | |
787 | if (flow_bb_inside_loop_p (loop, exit->dest)) |
788 | return NULL; |
789 | |
790 | return bb; |
791 | } |
792 | |
793 | /* Stores the standard position for induction variable increment in LOOP |
794 | (just before the exit condition if it is available and latch block is empty, |
795 | end of the latch block otherwise) to BSI. INSERT_AFTER is set to true if |
796 | the increment should be inserted after *BSI. */ |
797 | |
798 | void |
799 | standard_iv_increment_position (class loop *loop, gimple_stmt_iterator *bsi, |
800 | bool *insert_after) |
801 | { |
802 | basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop); |
803 | gimple *last = last_nondebug_stmt (latch); |
804 | |
805 | if (!bb |
806 | || (last && gimple_code (g: last) != GIMPLE_LABEL)) |
807 | { |
808 | *bsi = gsi_last_bb (bb: latch); |
809 | *insert_after = true; |
810 | } |
811 | else |
812 | { |
813 | *bsi = gsi_last_bb (bb); |
814 | *insert_after = false; |
815 | } |
816 | } |
817 | |
818 | /* Copies phi node arguments for duplicated blocks. The index of the first |
819 | duplicated block is FIRST_NEW_BLOCK. */ |
820 | |
821 | static void |
822 | copy_phi_node_args (unsigned first_new_block) |
823 | { |
824 | unsigned i; |
825 | |
826 | for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
827 | BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED; |
828 | |
829 | for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
830 | add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i)); |
831 | |
832 | for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
833 | BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED; |
834 | } |
835 | |
836 | |
837 | /* The same as cfgloopmanip.cc:duplicate_loop_body_to_header_edge, but also |
838 | updates the PHI nodes at start of the copied region. In order to |
839 | achieve this, only loops whose exits all lead to the same location |
840 | are handled. |
841 | |
842 | Notice that we do not completely update the SSA web after |
843 | duplication. The caller is responsible for calling update_ssa |
844 | after the loop has been duplicated. */ |
845 | |
846 | bool |
847 | gimple_duplicate_loop_body_to_header_edge (class loop *loop, edge e, |
848 | unsigned int ndupl, |
849 | sbitmap wont_exit, edge orig, |
850 | vec<edge> *to_remove, int flags) |
851 | { |
852 | unsigned first_new_block; |
853 | |
854 | if (!loops_state_satisfies_p (flags: LOOPS_HAVE_SIMPLE_LATCHES)) |
855 | return false; |
856 | if (!loops_state_satisfies_p (flags: LOOPS_HAVE_PREHEADERS)) |
857 | return false; |
858 | |
859 | first_new_block = last_basic_block_for_fn (cfun); |
860 | if (!duplicate_loop_body_to_header_edge (loop, e, ndupl, wont_exit, orig, |
861 | to_remove, flags)) |
862 | return false; |
863 | |
864 | /* Readd the removed phi args for e. */ |
865 | flush_pending_stmts (e); |
866 | |
867 | /* Copy the phi node arguments. */ |
868 | copy_phi_node_args (first_new_block); |
869 | |
870 | scev_reset (); |
871 | |
872 | return true; |
873 | } |
874 | |
875 | /* Returns true if we can unroll LOOP FACTOR times. Number |
876 | of iterations of the loop is returned in NITER. */ |
877 | |
878 | bool |
879 | can_unroll_loop_p (class loop *loop, unsigned factor, |
880 | class tree_niter_desc *niter) |
881 | { |
882 | edge exit; |
883 | |
884 | /* Check whether unrolling is possible. We only want to unroll loops |
885 | for that we are able to determine number of iterations. We also |
886 | want to split the extra iterations of the loop from its end, |
887 | therefore we require that the loop has precisely one |
888 | exit. */ |
889 | |
890 | exit = single_dom_exit (loop); |
891 | if (!exit) |
892 | return false; |
893 | |
894 | if (!number_of_iterations_exit (loop, exit, niter, false) |
895 | || niter->cmp == ERROR_MARK |
896 | /* Scalar evolutions analysis might have copy propagated |
897 | the abnormal ssa names into these expressions, hence |
898 | emitting the computations based on them during loop |
899 | unrolling might create overlapping life ranges for |
900 | them, and failures in out-of-ssa. */ |
901 | || contains_abnormal_ssa_name_p (niter->may_be_zero) |
902 | || contains_abnormal_ssa_name_p (niter->control.base) |
903 | || contains_abnormal_ssa_name_p (niter->control.step) |
904 | || contains_abnormal_ssa_name_p (niter->bound)) |
905 | return false; |
906 | |
907 | /* And of course, we must be able to duplicate the loop. */ |
908 | if (!can_duplicate_loop_p (loop)) |
909 | return false; |
910 | |
911 | /* The final loop should be small enough. */ |
912 | if (tree_num_loop_insns (loop, &eni_size_weights) * factor |
913 | > (unsigned) param_max_unrolled_insns) |
914 | return false; |
915 | |
916 | return true; |
917 | } |
918 | |
919 | /* Determines the conditions that control execution of LOOP unrolled FACTOR |
920 | times. DESC is number of iterations of LOOP. ENTER_COND is set to |
921 | condition that must be true if the main loop can be entered. |
922 | If the loop does not always iterate an exact multiple of FACTOR times, |
923 | EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing |
924 | how the exit from the unrolled loop should be controlled. Otherwise, |
925 | the trees are set to null and EXIT_CMP is set to ERROR_MARK. */ |
926 | |
927 | static void |
928 | determine_exit_conditions (class loop *loop, class tree_niter_desc *desc, |
929 | unsigned factor, tree *enter_cond, |
930 | tree *exit_base, tree *exit_step, |
931 | enum tree_code *exit_cmp, tree *exit_bound) |
932 | { |
933 | gimple_seq stmts; |
934 | tree base = desc->control.base; |
935 | tree step = desc->control.step; |
936 | tree bound = desc->bound; |
937 | tree type = TREE_TYPE (step); |
938 | tree bigstep, delta; |
939 | tree min = lower_bound_in_type (type, type); |
940 | tree max = upper_bound_in_type (type, type); |
941 | enum tree_code cmp = desc->cmp; |
942 | tree cond = boolean_true_node, assum; |
943 | |
944 | /* For pointers, do the arithmetics in the type of step. */ |
945 | base = fold_convert (type, base); |
946 | bound = fold_convert (type, bound); |
947 | |
948 | *enter_cond = boolean_false_node; |
949 | *exit_base = NULL_TREE; |
950 | *exit_step = NULL_TREE; |
951 | *exit_cmp = ERROR_MARK; |
952 | *exit_bound = NULL_TREE; |
953 | gcc_assert (cmp != ERROR_MARK); |
954 | |
955 | /* We only need to be correct when we answer question |
956 | "Do at least FACTOR more iterations remain?" in the unrolled loop. |
957 | Thus, transforming BASE + STEP * i <> BOUND to |
958 | BASE + STEP * i < BOUND is ok. */ |
959 | if (cmp == NE_EXPR) |
960 | { |
961 | if (tree_int_cst_sign_bit (step)) |
962 | cmp = GT_EXPR; |
963 | else |
964 | cmp = LT_EXPR; |
965 | } |
966 | else if (cmp == LT_EXPR) |
967 | { |
968 | gcc_assert (!tree_int_cst_sign_bit (step)); |
969 | } |
970 | else if (cmp == GT_EXPR) |
971 | { |
972 | gcc_assert (tree_int_cst_sign_bit (step)); |
973 | } |
974 | else |
975 | gcc_unreachable (); |
976 | |
977 | /* The main body of the loop may be entered iff: |
978 | |
979 | 1) desc->may_be_zero is false. |
980 | 2) it is possible to check that there are at least FACTOR iterations |
981 | of the loop, i.e., BOUND - step * FACTOR does not overflow. |
982 | 3) # of iterations is at least FACTOR */ |
983 | |
984 | if (!integer_zerop (desc->may_be_zero)) |
985 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
986 | invert_truthvalue (desc->may_be_zero), |
987 | cond); |
988 | |
989 | bigstep = fold_build2 (MULT_EXPR, type, step, |
990 | build_int_cst_type (type, factor)); |
991 | delta = fold_build2 (MINUS_EXPR, type, bigstep, step); |
992 | if (cmp == LT_EXPR) |
993 | assum = fold_build2 (GE_EXPR, boolean_type_node, |
994 | bound, |
995 | fold_build2 (PLUS_EXPR, type, min, delta)); |
996 | else |
997 | assum = fold_build2 (LE_EXPR, boolean_type_node, |
998 | bound, |
999 | fold_build2 (PLUS_EXPR, type, max, delta)); |
1000 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond); |
1001 | |
1002 | bound = fold_build2 (MINUS_EXPR, type, bound, delta); |
1003 | assum = fold_build2 (cmp, boolean_type_node, base, bound); |
1004 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond); |
1005 | |
1006 | if (integer_nonzerop (cond) |
1007 | && integer_zerop (desc->may_be_zero)) |
1008 | { |
1009 | /* Convert the latch count to an iteration count. */ |
1010 | tree niter = fold_build2 (PLUS_EXPR, type, desc->niter, |
1011 | build_one_cst (type)); |
1012 | if (multiple_of_p (type, niter, build_int_cst (type, factor))) |
1013 | return; |
1014 | } |
1015 | |
1016 | cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE); |
1017 | if (stmts) |
1018 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
1019 | /* cond now may be a gimple comparison, which would be OK, but also any |
1020 | other gimple rhs (say a && b). In this case we need to force it to |
1021 | operand. */ |
1022 | if (!is_gimple_condexpr_for_cond (cond)) |
1023 | { |
1024 | cond = force_gimple_operand (cond, &stmts, true, NULL_TREE); |
1025 | if (stmts) |
1026 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
1027 | } |
1028 | *enter_cond = cond; |
1029 | |
1030 | base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE); |
1031 | if (stmts) |
1032 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
1033 | bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE); |
1034 | if (stmts) |
1035 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
1036 | |
1037 | *exit_base = base; |
1038 | *exit_step = bigstep; |
1039 | *exit_cmp = cmp; |
1040 | *exit_bound = bound; |
1041 | } |
1042 | |
1043 | /* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge |
1044 | whose source block dominates the latch. DESC describes the number of |
1045 | iterations of LOOP. |
1046 | |
1047 | If N is number of iterations of the loop and MAY_BE_ZERO is the condition |
1048 | under that loop exits in the first iteration even if N != 0, |
1049 | |
1050 | while (1) |
1051 | { |
1052 | x = phi (init, next); |
1053 | |
1054 | pre; |
1055 | if (st) |
1056 | break; |
1057 | post; |
1058 | } |
1059 | |
1060 | becomes (with possibly the exit conditions formulated a bit differently, |
1061 | avoiding the need to create a new iv): |
1062 | |
1063 | if (MAY_BE_ZERO || N < FACTOR) |
1064 | goto rest; |
1065 | |
1066 | do |
1067 | { |
1068 | x = phi (init, next); |
1069 | |
1070 | pre; |
1071 | post; |
1072 | pre; |
1073 | post; |
1074 | ... |
1075 | pre; |
1076 | post; |
1077 | N -= FACTOR; |
1078 | |
1079 | } while (N >= FACTOR); |
1080 | |
1081 | rest: |
1082 | init' = phi (init, x); |
1083 | |
1084 | while (1) |
1085 | { |
1086 | x = phi (init', next); |
1087 | |
1088 | pre; |
1089 | if (st) |
1090 | break; |
1091 | post; |
1092 | } |
1093 | |
1094 | Before the loop is unrolled, TRANSFORM is called for it (only for the |
1095 | unrolled loop, but not for its versioned copy). DATA is passed to |
1096 | TRANSFORM. */ |
1097 | |
1098 | /* Probability in % that the unrolled loop is entered. Just a guess. */ |
1099 | #define PROB_UNROLLED_LOOP_ENTERED 90 |
1100 | |
1101 | void |
1102 | tree_transform_and_unroll_loop (class loop *loop, unsigned factor, |
1103 | class tree_niter_desc *desc, |
1104 | transform_callback transform, |
1105 | void *data) |
1106 | { |
1107 | unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP; |
1108 | |
1109 | enum tree_code exit_cmp; |
1110 | tree enter_main_cond, exit_base, exit_step, exit_bound; |
1111 | bool flat = maybe_flat_loop_profile (loop); |
1112 | determine_exit_conditions (loop, desc, factor, |
1113 | enter_cond: &enter_main_cond, exit_base: &exit_base, exit_step: &exit_step, |
1114 | exit_cmp: &exit_cmp, exit_bound: &exit_bound); |
1115 | bool single_loop_p = !exit_base; |
1116 | |
1117 | gcond *exit_if = nullptr; |
1118 | class loop *new_loop = nullptr; |
1119 | edge new_exit; |
1120 | if (!single_loop_p) |
1121 | { |
1122 | profile_count entry_count = loop_preheader_edge (loop)->src->count; |
1123 | /* Let us assume that the unrolled loop is quite likely to be entered. */ |
1124 | profile_probability prob_entry; |
1125 | if (integer_nonzerop (enter_main_cond)) |
1126 | prob_entry = profile_probability::always (); |
1127 | else |
1128 | prob_entry = profile_probability::guessed_always () |
1129 | .apply_scale (PROB_UNROLLED_LOOP_ENTERED, den: 100); |
1130 | |
1131 | |
1132 | /* The values for scales should keep profile consistent, and somewhat |
1133 | close to correct. */ |
1134 | new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry, |
1135 | prob_entry.invert (), |
1136 | prob_entry, |
1137 | /* We will later redirect exit from vectorized |
1138 | loop to new_loop. */ |
1139 | profile_probability::always (), |
1140 | true); |
1141 | gcc_assert (new_loop != NULL); |
1142 | update_ssa (TODO_update_ssa_no_phi); |
1143 | |
1144 | /* Prepare the cfg and update the phi nodes. Move the loop exit to the |
1145 | loop latch (and make its condition dummy, for the moment). */ |
1146 | basic_block rest = loop_preheader_edge (new_loop)->src; |
1147 | edge precond_edge = single_pred_edge (bb: rest); |
1148 | split_edge (loop_latch_edge (loop)); |
1149 | basic_block exit_bb = single_pred (bb: loop->latch); |
1150 | edge exit = single_dom_exit (loop); |
1151 | |
1152 | /* Since the exit edge will be removed, the frequency of all the blocks |
1153 | in the loop that are dominated by it must be scaled. */ |
1154 | if (exit->probability.initialized_p ()) |
1155 | scale_dominated_blocks_in_loop (loop, bb: exit->src, |
1156 | /* We are scaling up here so |
1157 | probability does not fit. */ |
1158 | num: exit->src->count, |
1159 | den: exit->src->count - exit->count ()); |
1160 | |
1161 | gimple_stmt_iterator bsi = gsi_last_bb (bb: exit_bb); |
1162 | exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node, |
1163 | integer_zero_node, |
1164 | NULL_TREE, NULL_TREE); |
1165 | |
1166 | gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT); |
1167 | new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr); |
1168 | rescan_loop_exit (new_exit, true, false); |
1169 | |
1170 | /* Set the probability of new exit to the same of the old one. Fix |
1171 | the count of the latch block. */ |
1172 | new_exit->probability = exit->probability; |
1173 | edge new_nonexit = single_pred_edge (bb: loop->latch); |
1174 | new_nonexit->probability = exit->probability.invert (); |
1175 | new_nonexit->flags = EDGE_TRUE_VALUE; |
1176 | set_edge_probability_and_rescale_others |
1177 | (exit, profile_probability::never ()); |
1178 | loop->latch->count = new_nonexit->count (); |
1179 | |
1180 | edge old_entry = loop_preheader_edge (loop); |
1181 | edge new_entry = loop_preheader_edge (new_loop); |
1182 | edge old_latch = loop_latch_edge (loop); |
1183 | for (gphi_iterator psi_old_loop = gsi_start_phis (loop->header), |
1184 | psi_new_loop = gsi_start_phis (new_loop->header); |
1185 | !gsi_end_p (i: psi_old_loop); |
1186 | gsi_next (i: &psi_old_loop), gsi_next (i: &psi_new_loop)) |
1187 | { |
1188 | gphi *phi_old_loop = psi_old_loop.phi (); |
1189 | gphi *phi_new_loop = psi_new_loop.phi (); |
1190 | |
1191 | tree init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry); |
1192 | use_operand_p op |
1193 | = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry); |
1194 | gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op))); |
1195 | tree next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch); |
1196 | |
1197 | /* Prefer using original variable as a base for the new ssa name. |
1198 | This is necessary for virtual ops, and useful in order to avoid |
1199 | losing debug info for real ops. */ |
1200 | tree new_init; |
1201 | if (TREE_CODE (next) == SSA_NAME |
1202 | && useless_type_conversion_p (TREE_TYPE (next), |
1203 | TREE_TYPE (init))) |
1204 | new_init = copy_ssa_name (var: next); |
1205 | else if (TREE_CODE (init) == SSA_NAME |
1206 | && useless_type_conversion_p (TREE_TYPE (init), |
1207 | TREE_TYPE (next))) |
1208 | new_init = copy_ssa_name (var: init); |
1209 | else if (useless_type_conversion_p (TREE_TYPE (next), |
1210 | TREE_TYPE (init))) |
1211 | new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, |
1212 | name: "unrinittmp" ); |
1213 | else |
1214 | new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, |
1215 | name: "unrinittmp" ); |
1216 | |
1217 | gphi *phi_rest = create_phi_node (new_init, rest); |
1218 | add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION); |
1219 | add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION); |
1220 | SET_USE (op, new_init); |
1221 | } |
1222 | |
1223 | remove_path (exit); |
1224 | /* We will later redirect exit from vectorized loop to new_loop. */ |
1225 | loop_preheader_edge (new_loop)->src->count = entry_count; |
1226 | |
1227 | /* The epilog loop latch executes at most factor - 1 times. |
1228 | Since the epilog is entered unconditionally it will need to handle |
1229 | up to factor executions of its body. */ |
1230 | new_loop->any_upper_bound = true; |
1231 | new_loop->nb_iterations_upper_bound = factor - 1; |
1232 | /* We do not really know estimate on number of iterations, since we do not |
1233 | track any estimates modulo unroll factor. |
1234 | Drop estimate from loop_info and scale loop profile. |
1235 | It may be more realistic to scale loop profile to factor / 2 - 1, |
1236 | but vectorizer also uses factor - 1. */ |
1237 | new_loop->any_estimate = false; |
1238 | scale_loop_profile (new_loop, profile_probability::always (), factor - 1); |
1239 | } |
1240 | else |
1241 | new_exit = single_dom_exit (loop); |
1242 | |
1243 | /* Transform the loop. */ |
1244 | if (transform) |
1245 | (*transform) (loop, data); |
1246 | |
1247 | /* Unroll the loop and remove the exits in all iterations except for the |
1248 | last one. */ |
1249 | auto_sbitmap wont_exit (factor); |
1250 | bitmap_ones (wont_exit); |
1251 | bitmap_clear_bit (map: wont_exit, bitno: factor - 1); |
1252 | |
1253 | auto_vec<edge> to_remove; |
1254 | bool ok |
1255 | = gimple_duplicate_loop_body_to_header_edge |
1256 | (loop, e: loop_latch_edge (loop), ndupl: factor - 1, wont_exit, |
1257 | orig: new_exit, to_remove: &to_remove, |
1258 | DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0)); |
1259 | gcc_assert (ok); |
1260 | |
1261 | for (edge e : to_remove) |
1262 | { |
1263 | ok = remove_path (e); |
1264 | gcc_assert (ok); |
1265 | } |
1266 | update_ssa (TODO_update_ssa); |
1267 | |
1268 | new_exit = single_dom_exit (loop); |
1269 | /* gimple_duplicate_loop_body_to_header_edge depending on |
1270 | DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header |
1271 | or scales it down accordingly. |
1272 | However exit edge probability is kept as original. Fix it if needed |
1273 | and compensate. */ |
1274 | update_loop_exit_probability_scale_dom_bbs (loop, exit_edge: new_exit); |
1275 | if (!single_loop_p) |
1276 | { |
1277 | /* Finally create the new counter for number of iterations and add |
1278 | the new exit instruction. */ |
1279 | tree ctr_before, ctr_after; |
1280 | gimple_stmt_iterator bsi = gsi_last_nondebug_bb (bb: new_exit->src); |
1281 | exit_if = as_a <gcond *> (p: gsi_stmt (i: bsi)); |
1282 | create_iv (base: exit_base, incr_op: PLUS_EXPR, step: exit_step, NULL_TREE, loop, |
1283 | incr_pos: &bsi, after: false, var_before: &ctr_before, var_after: &ctr_after); |
1284 | gimple_cond_set_code (gs: exit_if, code: exit_cmp); |
1285 | gimple_cond_set_lhs (gs: exit_if, lhs: ctr_after); |
1286 | gimple_cond_set_rhs (gs: exit_if, rhs: exit_bound); |
1287 | update_stmt (s: exit_if); |
1288 | } |
1289 | if (loop->any_upper_bound) |
1290 | loop->nb_iterations_upper_bound = wi::udiv_floor |
1291 | (x: loop->nb_iterations_upper_bound + 1, y: factor) - 1; |
1292 | if (loop->any_likely_upper_bound) |
1293 | loop->nb_iterations_likely_upper_bound = wi::udiv_floor |
1294 | (x: loop->nb_iterations_likely_upper_bound + 1, y: factor) - 1; |
1295 | if (loop->any_estimate) |
1296 | loop->nb_iterations_estimate = wi::udiv_floor |
1297 | (x: loop->nb_iterations_estimate + 1, y: factor) - 1; |
1298 | |
1299 | checking_verify_flow_info (); |
1300 | checking_verify_loop_structure (); |
1301 | checking_verify_loop_closed_ssa (verify_ssa_p: true, loop); |
1302 | if (new_loop) |
1303 | checking_verify_loop_closed_ssa (verify_ssa_p: true, loop: new_loop); |
1304 | } |
1305 | |
1306 | /* Wrapper over tree_transform_and_unroll_loop for case we do not |
1307 | want to transform the loop before unrolling. The meaning |
1308 | of the arguments is the same as for tree_transform_and_unroll_loop. */ |
1309 | |
1310 | void |
1311 | tree_unroll_loop (class loop *loop, unsigned factor, |
1312 | class tree_niter_desc *desc) |
1313 | { |
1314 | tree_transform_and_unroll_loop (loop, factor, desc, NULL, NULL); |
1315 | } |
1316 | |
1317 | /* Rewrite the phi node at position PSI in function of the main |
1318 | induction variable MAIN_IV and insert the generated code at GSI. */ |
1319 | |
1320 | static void |
1321 | rewrite_phi_with_iv (loop_p loop, |
1322 | gphi_iterator *psi, |
1323 | gimple_stmt_iterator *gsi, |
1324 | tree main_iv) |
1325 | { |
1326 | affine_iv iv; |
1327 | gassign *stmt; |
1328 | gphi *phi = psi->phi (); |
1329 | tree atype, mtype, val, res = PHI_RESULT (phi); |
1330 | |
1331 | if (virtual_operand_p (op: res) || res == main_iv) |
1332 | { |
1333 | gsi_next (i: psi); |
1334 | return; |
1335 | } |
1336 | |
1337 | if (!simple_iv (loop, loop, res, &iv, true)) |
1338 | { |
1339 | gsi_next (i: psi); |
1340 | return; |
1341 | } |
1342 | |
1343 | remove_phi_node (psi, false); |
1344 | |
1345 | atype = TREE_TYPE (res); |
1346 | mtype = POINTER_TYPE_P (atype) ? sizetype : atype; |
1347 | val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step), |
1348 | fold_convert (mtype, main_iv)); |
1349 | val = fold_build2 (POINTER_TYPE_P (atype) |
1350 | ? POINTER_PLUS_EXPR : PLUS_EXPR, |
1351 | atype, unshare_expr (iv.base), val); |
1352 | val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true, |
1353 | GSI_SAME_STMT); |
1354 | stmt = gimple_build_assign (res, val); |
1355 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
1356 | } |
1357 | |
1358 | /* Rewrite all the phi nodes of LOOP in function of the main induction |
1359 | variable MAIN_IV. */ |
1360 | |
1361 | static void |
1362 | rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv) |
1363 | { |
1364 | unsigned i; |
1365 | basic_block *bbs = get_loop_body_in_dom_order (loop); |
1366 | gphi_iterator psi; |
1367 | |
1368 | for (i = 0; i < loop->num_nodes; i++) |
1369 | { |
1370 | basic_block bb = bbs[i]; |
1371 | gimple_stmt_iterator gsi = gsi_after_labels (bb); |
1372 | |
1373 | if (bb->loop_father != loop) |
1374 | continue; |
1375 | |
1376 | for (psi = gsi_start_phis (bb); !gsi_end_p (i: psi); ) |
1377 | rewrite_phi_with_iv (loop, psi: &psi, gsi: &gsi, main_iv); |
1378 | } |
1379 | |
1380 | free (ptr: bbs); |
1381 | } |
1382 | |
1383 | /* Bases all the induction variables in LOOP on a single induction variable |
1384 | (with base 0 and step 1), whose final value is compared with *NIT. When the |
1385 | IV type precision has to be larger than *NIT type precision, *NIT is |
1386 | converted to the larger type, the conversion code is inserted before the |
1387 | loop, and *NIT is updated to the new definition. When BUMP_IN_LATCH is true, |
1388 | the induction variable is incremented in the loop latch, otherwise it is |
1389 | incremented in the loop header. Return the induction variable that was |
1390 | created. */ |
1391 | |
1392 | tree |
1393 | canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch) |
1394 | { |
1395 | unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit)); |
1396 | unsigned original_precision = precision; |
1397 | tree type, var_before; |
1398 | gimple_stmt_iterator gsi; |
1399 | gphi_iterator psi; |
1400 | gcond *stmt; |
1401 | edge exit = single_dom_exit (loop); |
1402 | gimple_seq stmts; |
1403 | bool unsigned_p = false; |
1404 | |
1405 | for (psi = gsi_start_phis (loop->header); |
1406 | !gsi_end_p (i: psi); gsi_next (i: &psi)) |
1407 | { |
1408 | gphi *phi = psi.phi (); |
1409 | tree res = PHI_RESULT (phi); |
1410 | bool uns; |
1411 | |
1412 | type = TREE_TYPE (res); |
1413 | if (virtual_operand_p (op: res) |
1414 | || (!INTEGRAL_TYPE_P (type) |
1415 | && !POINTER_TYPE_P (type)) |
1416 | || TYPE_PRECISION (type) < precision) |
1417 | continue; |
1418 | |
1419 | uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type); |
1420 | |
1421 | if (TYPE_PRECISION (type) > precision) |
1422 | unsigned_p = uns; |
1423 | else |
1424 | unsigned_p |= uns; |
1425 | |
1426 | precision = TYPE_PRECISION (type); |
1427 | } |
1428 | |
1429 | scalar_int_mode mode = smallest_int_mode_for_size (size: precision); |
1430 | precision = GET_MODE_PRECISION (mode); |
1431 | type = build_nonstandard_integer_type (precision, unsigned_p); |
1432 | |
1433 | if (original_precision != precision |
1434 | || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p) |
1435 | { |
1436 | *nit = fold_convert (type, *nit); |
1437 | *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE); |
1438 | if (stmts) |
1439 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
1440 | } |
1441 | |
1442 | if (bump_in_latch) |
1443 | gsi = gsi_last_bb (bb: loop->latch); |
1444 | else |
1445 | gsi = gsi_last_nondebug_bb (bb: loop->header); |
1446 | create_iv (base: build_int_cst_type (type, 0), incr_op: PLUS_EXPR, step: build_int_cst (type, 1), |
1447 | NULL_TREE, loop, incr_pos: &gsi, after: bump_in_latch, var_before: &var_before, NULL); |
1448 | |
1449 | rewrite_all_phi_nodes_with_iv (loop, main_iv: var_before); |
1450 | |
1451 | stmt = as_a <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
1452 | /* Make the loop exit if the control condition is not satisfied. */ |
1453 | if (exit->flags & EDGE_TRUE_VALUE) |
1454 | { |
1455 | edge te, fe; |
1456 | |
1457 | extract_true_false_edges_from_block (exit->src, &te, &fe); |
1458 | te->flags = EDGE_FALSE_VALUE; |
1459 | fe->flags = EDGE_TRUE_VALUE; |
1460 | } |
1461 | gimple_cond_set_code (gs: stmt, code: LT_EXPR); |
1462 | gimple_cond_set_lhs (gs: stmt, lhs: var_before); |
1463 | gimple_cond_set_rhs (gs: stmt, rhs: *nit); |
1464 | update_stmt (s: stmt); |
1465 | |
1466 | return var_before; |
1467 | } |
1468 | |