1 | /* Loop header copying on trees. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" |
28 | #include "gimple-ssa.h" |
29 | #include "gimple-iterator.h" |
30 | #include "tree-cfg.h" |
31 | #include "tree-into-ssa.h" |
32 | #include "cfgloop.h" |
33 | #include "tree-inline.h" |
34 | #include "tree-ssa-threadedge.h" |
35 | #include "tree-ssa-sccvn.h" |
36 | #include "tree-phinodes.h" |
37 | #include "ssa-iterators.h" |
38 | #include "value-range.h" |
39 | #include "gimple-range.h" |
40 | #include "gimple-range-path.h" |
41 | #include "gimple-pretty-print.h" |
42 | #include "cfganal.h" |
43 | |
44 | /* Return path query insteance for testing ranges of statements |
45 | in headers of LOOP contained in basic block BB. |
46 | Use RANGER instance. */ |
47 | |
48 | static path_range_query * |
49 | get_range_query (class loop *loop, |
50 | basic_block bb, |
51 | gimple_ranger &ranger) |
52 | { |
53 | auto_vec<basic_block, 8> path; |
54 | for (; bb != loop->header; bb = single_pred_edge (bb)->src) |
55 | path.safe_push (obj: bb); |
56 | path.safe_push (obj: loop->header); |
57 | path.safe_push (obj: loop_preheader_edge (loop)->src); |
58 | return new path_range_query (ranger, path); |
59 | } |
60 | |
61 | /* Return edge that is true in the first iteration of the loop |
62 | and NULL otherwise. |
63 | Formulate corrent ranger query to RANGER. */ |
64 | |
65 | static edge |
66 | static_loop_exit (class loop *l, basic_block bb, gimple_ranger &ranger, |
67 | path_range_query *&query) |
68 | { |
69 | gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb)); |
70 | edge ret_e; |
71 | |
72 | if (!last) |
73 | return NULL; |
74 | |
75 | edge true_e, false_e; |
76 | extract_true_false_edges_from_block (bb, &true_e, &false_e); |
77 | |
78 | /* If neither edge is the exit edge, this is not a case we'd like to |
79 | special-case. */ |
80 | if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e)) |
81 | return NULL; |
82 | |
83 | int_range<1> desired_static_range; |
84 | if (loop_exit_edge_p (l, true_e)) |
85 | { |
86 | desired_static_range = range_false (); |
87 | ret_e = true_e; |
88 | } |
89 | else |
90 | { |
91 | desired_static_range = range_true (); |
92 | ret_e = false_e; |
93 | } |
94 | |
95 | if (!query) |
96 | query = get_range_query (loop: l, bb: gimple_bb (g: last), ranger); |
97 | |
98 | int_range<2> r; |
99 | if (!query->range_of_stmt (r, last)) |
100 | return NULL; |
101 | return r == desired_static_range ? ret_e : NULL; |
102 | } |
103 | |
104 | /* Return true if STMT is static in LOOP. This means that its value |
105 | is constant in the first iteration. |
106 | Use RANGER and formulate query cached in QUERY. */ |
107 | |
108 | static bool |
109 | loop_static_stmt_p (class loop *loop, |
110 | gimple_ranger &ranger, |
111 | path_range_query *&query, |
112 | gimple *stmt) |
113 | { |
114 | tree type = gimple_range_type (s: stmt); |
115 | if (!type || !Value_Range::supports_type_p (type)) |
116 | return false; |
117 | |
118 | if (!query) |
119 | query = get_range_query (loop, bb: gimple_bb (g: stmt), ranger); |
120 | |
121 | Value_Range r (gimple_range_type (s: stmt)); |
122 | if (!query->range_of_stmt (r, stmt)) |
123 | return false; |
124 | return r.singleton_p (); |
125 | } |
126 | |
127 | /* Return true if OP is invariant. */ |
128 | |
129 | static bool |
130 | loop_invariant_op_p (class loop *loop, |
131 | tree op) |
132 | { |
133 | if (is_gimple_min_invariant (op)) |
134 | return true; |
135 | if (SSA_NAME_IS_DEFAULT_DEF (op) |
136 | || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op)))) |
137 | return true; |
138 | return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1; |
139 | } |
140 | |
141 | /* Return true if OP combines outcome of static and |
142 | loop invariant conditional. */ |
143 | |
144 | static bool |
145 | loop_static_op_p (class loop *loop, tree op) |
146 | { |
147 | /* Always check for invariant first. */ |
148 | gcc_checking_assert (!is_gimple_min_invariant (op) |
149 | && !SSA_NAME_IS_DEFAULT_DEF (op) |
150 | && flow_bb_inside_loop_p (loop, |
151 | gimple_bb (SSA_NAME_DEF_STMT (op)))); |
152 | return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2; |
153 | } |
154 | |
155 | |
156 | /* Return true if OP combines outcome of static and |
157 | loop invariant conditional. */ |
158 | |
159 | static bool |
160 | loop_combined_static_and_iv_p (class loop *loop, |
161 | tree op) |
162 | { |
163 | /* Always check for invariant first. */ |
164 | gcc_checking_assert (!is_gimple_min_invariant (op) |
165 | && !SSA_NAME_IS_DEFAULT_DEF (op) |
166 | && flow_bb_inside_loop_p (loop, |
167 | gimple_bb (SSA_NAME_DEF_STMT (op)))); |
168 | return gimple_uid (SSA_NAME_DEF_STMT (op)) & 4; |
169 | } |
170 | |
171 | /* Decision about posibility of copying a given header. */ |
172 | |
173 | enum ch_decision |
174 | { |
175 | /* We do not want to copy this header. */ |
176 | ch_impossible, |
177 | /* We can copy it if it enables wins. */ |
178 | ch_possible, |
179 | /* We can "copy" it if it enables wins and doing |
180 | so will introduce no new code. */ |
181 | ch_possible_zero_cost, |
182 | /* We want to copy. */ |
183 | ch_win, |
184 | /* We want to copy and we will eliminate loop exit. */ |
185 | ch_win_invariant_exit |
186 | }; |
187 | |
188 | /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT |
189 | instructions should be duplicated, limit is decreased by the actual |
190 | amount. */ |
191 | |
192 | static ch_decision |
193 | (basic_block , class loop *loop, |
194 | gimple_ranger *ranger, |
195 | int *limit, |
196 | hash_set <edge> *invariant_exits, |
197 | hash_set <edge> *static_exits) |
198 | { |
199 | gimple_stmt_iterator bsi; |
200 | |
201 | gcc_assert (!header->aux); |
202 | |
203 | gcc_assert (EDGE_COUNT (header->succs) > 0); |
204 | if (single_succ_p (bb: header)) |
205 | { |
206 | if (dump_file && (dump_flags & TDF_DETAILS)) |
207 | fprintf (stream: dump_file, |
208 | format: " Not duplicating bb %i: it is single succ.\n" , |
209 | header->index); |
210 | return ch_impossible; |
211 | } |
212 | |
213 | if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) |
214 | && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) |
215 | { |
216 | if (dump_file && (dump_flags & TDF_DETAILS)) |
217 | fprintf (stream: dump_file, |
218 | format: " Not duplicating bb %i: both successors are in loop.\n" , |
219 | loop->num); |
220 | return ch_impossible; |
221 | } |
222 | |
223 | /* If this is not the original loop header, we want it to have just |
224 | one predecessor in order to match the && pattern. */ |
225 | if (header != loop->header && !single_pred_p (bb: header)) |
226 | { |
227 | if (dump_file && (dump_flags & TDF_DETAILS)) |
228 | fprintf (stream: dump_file, |
229 | format: " Not duplicating bb %i: it has mutiple predecestors.\n" , |
230 | header->index); |
231 | return ch_impossible; |
232 | } |
233 | |
234 | gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: header)); |
235 | if (!last) |
236 | { |
237 | if (dump_file && (dump_flags & TDF_DETAILS)) |
238 | fprintf (stream: dump_file, |
239 | format: " Not duplicating bb %i: it does not end by conditional.\n" , |
240 | header->index); |
241 | return ch_impossible; |
242 | } |
243 | |
244 | path_range_query *query = NULL; |
245 | for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (i: psi); |
246 | gsi_next (i: &psi)) |
247 | if (!virtual_operand_p (op: gimple_phi_result (gs: psi.phi ()))) |
248 | { |
249 | bool static_p = loop_static_stmt_p (loop, ranger&: *ranger, |
250 | query, stmt: psi.phi ()); |
251 | gimple_set_uid (g: psi.phi (), uid: static_p ? 2 : 0); |
252 | } |
253 | bool code_size_cost = false; |
254 | |
255 | /* Count number of instructions and punt on calls. |
256 | Populate stmts INV flag to later apply heuristics to the |
257 | kind of conditions we want to copy. */ |
258 | for (bsi = gsi_start_bb (bb: header); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
259 | { |
260 | gimple *last = gsi_stmt (i: bsi); |
261 | |
262 | if (gimple_code (g: last) == GIMPLE_LABEL) |
263 | continue; |
264 | |
265 | if (is_gimple_debug (gs: last)) |
266 | continue; |
267 | |
268 | if (gimple_code (g: last) == GIMPLE_COND) |
269 | break; |
270 | |
271 | if (dump_file && (dump_flags & TDF_DETAILS)) |
272 | { |
273 | fprintf (stream: dump_file, format: " Analyzing: " ); |
274 | print_gimple_stmt (dump_file, last, 0, TDF_SLIM); |
275 | } |
276 | |
277 | if (gimple_code (g: last) == GIMPLE_CALL |
278 | && (!gimple_inexpensive_call_p (as_a <gcall *> (p: last)) |
279 | /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed |
280 | at current loop's header. Don't copy in this case. */ |
281 | || gimple_call_internal_p (gs: last, fn: IFN_LOOP_DIST_ALIAS))) |
282 | { |
283 | if (dump_file && (dump_flags & TDF_DETAILS)) |
284 | fprintf (stream: dump_file, |
285 | format: " Not duplicating bb %i: it contains call.\n" , |
286 | header->index); |
287 | if (query) |
288 | delete query; |
289 | return ch_impossible; |
290 | } |
291 | |
292 | /* Classify the stmt is invariant in the loop. */ |
293 | gimple_set_uid (g: last, uid: 0); |
294 | if (!gimple_vuse (g: last) |
295 | && gimple_code (g: last) != GIMPLE_ASM |
296 | && (gimple_code (g: last) != GIMPLE_CALL |
297 | || gimple_call_flags (last) & ECF_CONST)) |
298 | { |
299 | bool inv = true, static_p = false; |
300 | ssa_op_iter i; |
301 | tree op; |
302 | FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE) |
303 | if (!loop_invariant_op_p (loop, op)) |
304 | inv = false; |
305 | /* Assume that code is reasonably optimized and invariant |
306 | constants are already identified. */ |
307 | if (!inv) |
308 | static_p = loop_static_stmt_p (loop, ranger&: *ranger, query, stmt: last); |
309 | gimple_set_uid (g: last, uid: (inv ? 1 : 0) | (static_p ? 2 : 0)); |
310 | if (dump_file && (dump_flags & TDF_DETAILS)) |
311 | { |
312 | if (inv) |
313 | fprintf (stream: dump_file, format: " Stmt is loop invariant\n" ); |
314 | if (static_p) |
315 | fprintf (stream: dump_file, format: " Stmt is static " |
316 | "(constant in the first iteration)\n" ); |
317 | } |
318 | /* Loop invariants will be optimized out in loop body after |
319 | duplication; do not account invariant computation in code |
320 | size costs. |
321 | |
322 | Similarly static computations will be optimized out in the |
323 | duplicatd header. */ |
324 | if (inv || static_p) |
325 | continue; |
326 | |
327 | /* Match the following: |
328 | _1 = i_1 < 10 <- static condtion |
329 | _2 = n != 0 <- loop invariant condition |
330 | _3 = _1 & _2 <- combined static and iv statement. */ |
331 | tree_code code; |
332 | if (gimple_code (g: last) == GIMPLE_ASSIGN |
333 | && ((code = gimple_assign_rhs_code (gs: last)) == BIT_AND_EXPR |
334 | || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR)) |
335 | { |
336 | tree op1 = gimple_assign_rhs1 (gs: last); |
337 | tree op2 = gimple_assign_rhs2 (gs: last); |
338 | |
339 | if ((loop_invariant_op_p (loop, op: op1) |
340 | || loop_combined_static_and_iv_p (loop, op: op1) |
341 | || loop_static_op_p (loop, op: op1)) |
342 | && (loop_invariant_op_p (loop, op: op2) |
343 | || loop_combined_static_and_iv_p (loop, op: op2) |
344 | || loop_static_op_p (loop, op: op2))) |
345 | { |
346 | /* Duplicating loop header with combned conditional will |
347 | remove this statement in each copy. But we account for |
348 | that later when seeing that condition. |
349 | |
350 | Note that this may be overly optimistic for bit operations |
351 | where the static parameter may still result in non-trivial |
352 | bit operation. */ |
353 | gimple_set_uid (g: last, uid: 4); |
354 | if (dump_file && (dump_flags & TDF_DETAILS)) |
355 | fprintf (stream: dump_file, |
356 | format: " Stmt combines static and invariant op\n" ); |
357 | continue; |
358 | } |
359 | } |
360 | } |
361 | |
362 | int insns = estimate_num_insns (last, &eni_size_weights); |
363 | *limit -= insns; |
364 | if (insns) |
365 | code_size_cost = true; |
366 | if (dump_file && (dump_flags & TDF_DETAILS)) |
367 | fprintf (stream: dump_file, |
368 | format: " Acconting stmt as %i insns\n" , insns); |
369 | if (*limit < 0) |
370 | { |
371 | if (dump_file && (dump_flags & TDF_DETAILS)) |
372 | fprintf (stream: dump_file, |
373 | format: " Not duplicating bb %i contains too many insns.\n" , |
374 | header->index); |
375 | if (query) |
376 | delete query; |
377 | return ch_impossible; |
378 | } |
379 | } |
380 | |
381 | if (dump_file && (dump_flags & TDF_DETAILS)) |
382 | { |
383 | fprintf (stream: dump_file, format: " Analyzing: " ); |
384 | print_gimple_stmt (dump_file, last, 0, TDF_SLIM); |
385 | } |
386 | |
387 | /* If the condition tests a non-IV loop variant we do not want to rotate |
388 | the loop further. Unless this is the original loop header. */ |
389 | tree lhs = gimple_cond_lhs (gs: last); |
390 | tree rhs = gimple_cond_rhs (gs: last); |
391 | bool lhs_invariant = loop_invariant_op_p (loop, op: lhs); |
392 | bool rhs_invariant = loop_invariant_op_p (loop, op: rhs); |
393 | |
394 | /* Combined conditional is a result of if combining: |
395 | |
396 | _1 = i_1 < 10 <- static condtion |
397 | _2 = n != 0 <- loop invariant condition |
398 | _3 = _1 & _2 <- combined static and iv statement |
399 | if (_3 != 0) <- combined conditional |
400 | |
401 | Combined conditionals will not be optimized out in either copy. |
402 | However duplicaed header simplifies to: |
403 | |
404 | if (n < 10) |
405 | |
406 | while loop body to |
407 | |
408 | if (i_1 < 10) |
409 | |
410 | So effectively the resulting code sequence will be of same length as |
411 | the original code. |
412 | |
413 | Combined conditionals are never static or invariant, so save some work |
414 | below. */ |
415 | if (lhs_invariant != rhs_invariant |
416 | && (lhs_invariant |
417 | || loop_combined_static_and_iv_p (loop, op: lhs)) |
418 | && (rhs_invariant |
419 | || loop_combined_static_and_iv_p (loop, op: rhs))) |
420 | { |
421 | if (query) |
422 | delete query; |
423 | if (dump_file && (dump_flags & TDF_DETAILS)) |
424 | fprintf (stream: dump_file, |
425 | format: " Conditional combines static and invariant op.\n" ); |
426 | return ch_win; |
427 | } |
428 | |
429 | edge static_exit = static_loop_exit (l: loop, bb: header, ranger&: *ranger, query); |
430 | if (query) |
431 | delete query; |
432 | |
433 | if (static_exit && static_exits) |
434 | { |
435 | static_exits->add (k: static_exit); |
436 | if (dump_file && (dump_flags & TDF_DETAILS)) |
437 | fprintf (stream: dump_file, |
438 | format: " Will eliminate peeled conditional in bb %d.\n" , |
439 | static_exit->src->index); |
440 | /* Still look for invariant exits; exit may be both. */ |
441 | } |
442 | if (lhs_invariant && rhs_invariant) |
443 | { |
444 | if (invariant_exits) |
445 | { |
446 | edge e; |
447 | edge_iterator ei; |
448 | FOR_EACH_EDGE (e, ei, header->succs) |
449 | if (loop_exit_edge_p (loop, e)) |
450 | { |
451 | if (dump_file && (dump_flags & TDF_DETAILS)) |
452 | fprintf (stream: dump_file, |
453 | format: " Will elliminate invariant exit %i->%i\n" , |
454 | e->src->index, e->dest->index); |
455 | invariant_exits->add (k: e); |
456 | } |
457 | } |
458 | return ch_win_invariant_exit; |
459 | } |
460 | |
461 | /* If the static exit fully optimize out, it is win to "duplicate" |
462 | it. |
463 | |
464 | TODO: Even if duplication costs some size we may opt to do so in case |
465 | exit probability is significant enough (do partial peeling). */ |
466 | if (static_exit) |
467 | return !code_size_cost ? ch_possible_zero_cost : ch_possible; |
468 | |
469 | /* We was not able to prove that conditional will be eliminated. */ |
470 | int insns = estimate_num_insns (last, &eni_size_weights); |
471 | *limit -= insns; |
472 | if (dump_file && (dump_flags & TDF_DETAILS)) |
473 | fprintf (stream: dump_file, |
474 | format: " Acconting stmt as %i insns\n" , insns); |
475 | if (*limit < 0) |
476 | { |
477 | if (dump_file && (dump_flags & TDF_DETAILS)) |
478 | fprintf (stream: dump_file, |
479 | format: " Not duplicating bb %i contains too many insns.\n" , |
480 | header->index); |
481 | return ch_impossible; |
482 | } |
483 | |
484 | return ch_possible; |
485 | } |
486 | |
487 | /* Checks whether LOOP is a do-while style loop. */ |
488 | |
489 | static bool |
490 | do_while_loop_p (class loop *loop) |
491 | { |
492 | gimple *stmt = last_nondebug_stmt (loop->latch); |
493 | |
494 | /* If the latch of the loop is not empty, it is not a do-while loop. */ |
495 | if (stmt |
496 | && gimple_code (g: stmt) != GIMPLE_LABEL) |
497 | { |
498 | if (dump_file && (dump_flags & TDF_DETAILS)) |
499 | fprintf (stream: dump_file, |
500 | format: "Loop %i is not do-while loop: latch is not empty.\n" , |
501 | loop->num); |
502 | return false; |
503 | } |
504 | |
505 | /* If the latch does not have a single predecessor, it is not a |
506 | do-while loop. */ |
507 | if (!single_pred_p (bb: loop->latch)) |
508 | { |
509 | if (dump_file && (dump_flags & TDF_DETAILS)) |
510 | fprintf (stream: dump_file, |
511 | format: "Loop %i is not do-while loop: latch has multiple " |
512 | "predecessors.\n" , loop->num); |
513 | return false; |
514 | } |
515 | basic_block pred = single_pred (bb: loop->latch); |
516 | |
517 | /* If the latch predecessor doesn't exit the loop, it is not a |
518 | do-while loop. */ |
519 | if (!loop_exits_from_bb_p (loop, pred)) |
520 | { |
521 | if (dump_file && (dump_flags & TDF_DETAILS)) |
522 | fprintf (stream: dump_file, |
523 | format: "Loop %i is not do-while loop: latch predecessor " |
524 | "does not exit loop.\n" , loop->num); |
525 | return false; |
526 | } |
527 | gcond *last = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: pred)); |
528 | if (last |
529 | && (gimple_cond_lhs (gs: last) == boolean_false_node |
530 | || gimple_cond_lhs (gs: last) == boolean_true_node) |
531 | && gimple_cond_rhs (gs: last) == boolean_false_node) |
532 | { |
533 | if (dump_file && (dump_flags & TDF_DETAILS)) |
534 | fprintf (stream: dump_file, |
535 | format: "Loop %i is not do-while loop: latch predecessor " |
536 | "contains exit we optimized out.\n" , loop->num); |
537 | return false; |
538 | } |
539 | |
540 | if (dump_file && (dump_flags & TDF_DETAILS)) |
541 | fprintf (stream: dump_file, format: "Loop %i is do-while loop\n" , loop->num); |
542 | |
543 | return true; |
544 | } |
545 | |
546 | /* Update profile after header copying of LOOP. |
547 | REGION is the original (in loop) sequence, REGION_COPY is the |
548 | duplicated header (now outside of loop). N_REGION is number of |
549 | bbs duplicated. |
550 | ELIMINATED_EDGE is edge to be removed from duplicated sequence. |
551 | INVARIANT_EXITS are edges in the loop body to be elimianted |
552 | since they are loop invariants |
553 | |
554 | So We expect the following: |
555 | |
556 | // region_copy_start entry will be scaled to entry_count |
557 | if (cond1) <- this condition will become false |
558 | and we update probabilities |
559 | goto loop_exit; |
560 | if (cond2) <- this condition is loop invariant |
561 | goto loop_exit; |
562 | goto loop_header <- this will be redirected to loop. |
563 | // region_copy_end |
564 | loop: |
565 | <body> |
566 | // region start |
567 | loop_header: |
568 | if (cond1) <- we need to update probabbility here |
569 | goto loop_exit; |
570 | if (cond2) <- and determine scaling factor here. |
571 | moreover cond2 is now always true |
572 | goto loop_exit; |
573 | else |
574 | goto loop; |
575 | // region end |
576 | |
577 | Adding support for more exits can be done similarly, |
578 | but only consumer so far is tree-ssa-loop-ch and it uses only this |
579 | to handle the common case of peeling headers which have |
580 | conditionals known to be always true upon entry. */ |
581 | |
582 | static void |
583 | update_profile_after_ch (class loop *loop, |
584 | basic_block *region, basic_block *region_copy, |
585 | unsigned n_region, |
586 | hash_set <edge> *invariant_exits, |
587 | hash_set <edge> *static_exits, |
588 | profile_count entry_count) |
589 | { |
590 | for (unsigned int i = 0; i < n_region; i++) |
591 | { |
592 | edge exit_e, exit_e_copy, e, e_copy; |
593 | if (EDGE_COUNT (region[i]->succs) == 1) |
594 | { |
595 | region_copy[i]->count = entry_count; |
596 | region[i]->count -= entry_count; |
597 | continue; |
598 | } |
599 | |
600 | gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2); |
601 | if (loop_exit_edge_p (loop, |
602 | EDGE_SUCC (region[i], 0))) |
603 | { |
604 | exit_e = EDGE_SUCC (region[i], 0); |
605 | exit_e_copy = EDGE_SUCC (region_copy[i], 0); |
606 | e = EDGE_SUCC (region[i], 1); |
607 | e_copy = EDGE_SUCC (region_copy[i], 1); |
608 | } |
609 | else |
610 | { |
611 | exit_e = EDGE_SUCC (region[i], 1); |
612 | exit_e_copy = EDGE_SUCC (region_copy[i], 1); |
613 | e = EDGE_SUCC (region[i], 0); |
614 | e_copy = EDGE_SUCC (region_copy[i], 0); |
615 | } |
616 | gcc_assert (i == n_region - 1 |
617 | || (e->dest == region[i + 1] |
618 | && e_copy->dest == region_copy[i + 1])); |
619 | region_copy[i]->count = entry_count; |
620 | profile_count exit_e_count = exit_e->count (); |
621 | bool was_static = false; |
622 | if (static_exits->contains (k: exit_e)) |
623 | { |
624 | /* Update profile and the conditional. |
625 | CFG update is done by caller. */ |
626 | static_exits->remove (k: exit_e); |
627 | was_static = true; |
628 | e_copy->probability = profile_probability::always (); |
629 | exit_e_copy->probability = profile_probability::never (); |
630 | gcond *cond_stmt |
631 | = as_a <gcond *>(p: *gsi_last_bb (bb: region_copy[i])); |
632 | if (e_copy->flags & EDGE_TRUE_VALUE) |
633 | gimple_cond_make_true (gs: cond_stmt); |
634 | else |
635 | gimple_cond_make_false (gs: cond_stmt); |
636 | update_stmt (s: cond_stmt); |
637 | /* Header copying is a special case of jump threading, so use |
638 | common code to update loop body exit condition. */ |
639 | update_bb_profile_for_threading (region[i], entry_count, e); |
640 | } |
641 | else |
642 | region[i]->count -= region_copy[i]->count; |
643 | if (invariant_exits->contains (k: exit_e)) |
644 | { |
645 | invariant_exits->remove (k: exit_e); |
646 | /* All exits will happen in exit_e_copy which is out of the |
647 | loop, so increase probability accordingly. |
648 | If the edge is eliminated_edge we already corrected |
649 | profile above. */ |
650 | if (entry_count.nonzero_p () && !was_static) |
651 | set_edge_probability_and_rescale_others |
652 | (exit_e_copy, exit_e_count.probability_in |
653 | (overall: entry_count)); |
654 | /* Eliminate in-loop conditional. */ |
655 | e->probability = profile_probability::always (); |
656 | exit_e->probability = profile_probability::never (); |
657 | gcond *cond_stmt = as_a <gcond *>(p: *gsi_last_bb (bb: region[i])); |
658 | if (e->flags & EDGE_TRUE_VALUE) |
659 | gimple_cond_make_true (gs: cond_stmt); |
660 | else |
661 | gimple_cond_make_false (gs: cond_stmt); |
662 | update_stmt (s: cond_stmt); |
663 | } |
664 | entry_count = e_copy->count (); |
665 | } |
666 | /* Be sure that we seen all invariant exit edges we are supposed to update. |
667 | We may have recorded some static exists we decided to not duplicate. */ |
668 | gcc_checking_assert (invariant_exits->is_empty ()); |
669 | } |
670 | |
671 | namespace { |
672 | |
673 | /* Common superclass for both header-copying phases. */ |
674 | class ch_base : public gimple_opt_pass |
675 | { |
676 | protected: |
677 | ch_base (pass_data data, gcc::context *ctxt) |
678 | : gimple_opt_pass (data, ctxt) |
679 | {} |
680 | |
681 | /* Copies headers of all loops in FUN for which process_loop_p is true. */ |
682 | unsigned int copy_headers (function *fun); |
683 | |
684 | /* Return true to copy headers of LOOP or false to skip. */ |
685 | virtual bool process_loop_p (class loop *loop) = 0; |
686 | }; |
687 | |
688 | const pass_data pass_data_ch = |
689 | { |
690 | .type: GIMPLE_PASS, /* type */ |
691 | .name: "ch" , /* name */ |
692 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
693 | .tv_id: TV_TREE_CH, /* tv_id */ |
694 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
695 | .properties_provided: 0, /* properties_provided */ |
696 | .properties_destroyed: 0, /* properties_destroyed */ |
697 | .todo_flags_start: 0, /* todo_flags_start */ |
698 | .todo_flags_finish: 0, /* todo_flags_finish */ |
699 | }; |
700 | |
701 | class pass_ch : public ch_base |
702 | { |
703 | public: |
704 | pass_ch (gcc::context *ctxt) |
705 | : ch_base (pass_data_ch, ctxt) |
706 | {} |
707 | |
708 | /* opt_pass methods: */ |
709 | bool gate (function *) final override { return flag_tree_ch != 0; } |
710 | |
711 | /* Initialize and finalize loop structures, copying headers inbetween. */ |
712 | unsigned int execute (function *) final override; |
713 | |
714 | opt_pass * clone () final override { return new pass_ch (m_ctxt); } |
715 | |
716 | protected: |
717 | /* ch_base method: */ |
718 | bool process_loop_p (class loop *loop) final override; |
719 | }; // class pass_ch |
720 | |
721 | const pass_data pass_data_ch_vect = |
722 | { |
723 | .type: GIMPLE_PASS, /* type */ |
724 | .name: "ch_vect" , /* name */ |
725 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
726 | .tv_id: TV_TREE_CH, /* tv_id */ |
727 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
728 | .properties_provided: 0, /* properties_provided */ |
729 | .properties_destroyed: 0, /* properties_destroyed */ |
730 | .todo_flags_start: 0, /* todo_flags_start */ |
731 | .todo_flags_finish: 0, /* todo_flags_finish */ |
732 | }; |
733 | |
734 | /* This is a more aggressive version of the same pass, designed to run just |
735 | before if-conversion and vectorization, to put more loops into the form |
736 | required for those phases. */ |
737 | class pass_ch_vect : public ch_base |
738 | { |
739 | public: |
740 | pass_ch_vect (gcc::context *ctxt) |
741 | : ch_base (pass_data_ch_vect, ctxt) |
742 | {} |
743 | |
744 | /* opt_pass methods: */ |
745 | bool gate (function *fun) final override |
746 | { |
747 | return flag_tree_ch != 0 |
748 | && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops); |
749 | } |
750 | |
751 | /* Just copy headers, no initialization/finalization of loop structures. */ |
752 | unsigned int execute (function *) final override; |
753 | |
754 | protected: |
755 | /* ch_base method: */ |
756 | bool process_loop_p (class loop *loop) final override; |
757 | }; // class pass_ch_vect |
758 | |
759 | /* For all loops, copy the condition at the end of the loop body in front |
760 | of the loop. This is beneficial since it increases efficiency of |
761 | code motion optimizations. It also saves one jump on entry to the loop. */ |
762 | |
763 | unsigned int |
764 | ch_base:: (function *fun) |
765 | { |
766 | basic_block *bbs, *copied_bbs; |
767 | unsigned bbs_size; |
768 | bool changed = false; |
769 | |
770 | if (number_of_loops (fn: fun) <= 1) |
771 | return 0; |
772 | |
773 | bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); |
774 | copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); |
775 | bbs_size = n_basic_blocks_for_fn (fun); |
776 | |
777 | struct candidate |
778 | { |
779 | class loop *loop; |
780 | unsigned int ; |
781 | hash_set <edge> *invariant_exits, *static_exits; |
782 | }; |
783 | auto_vec<struct candidate> candidates; |
784 | auto_vec<std::pair<edge, loop_p> > copied; |
785 | auto_vec<class loop *> loops_to_unloop; |
786 | auto_vec<int> loops_to_unloop_nunroll; |
787 | |
788 | mark_dfs_back_edges (); |
789 | gimple_ranger *ranger = new gimple_ranger; |
790 | for (auto loop : loops_list (cfun, 0)) |
791 | { |
792 | int initial_limit = optimize_loop_for_speed_p (loop) |
793 | ? param_max_loop_header_insns : 0; |
794 | int remaining_limit = initial_limit; |
795 | if (dump_file && (dump_flags & TDF_DETAILS)) |
796 | fprintf (stream: dump_file, |
797 | format: "Analyzing loop %i\n" , loop->num); |
798 | |
799 | basic_block = loop->header; |
800 | if (!get_max_loop_iterations_int (loop)) |
801 | { |
802 | if (dump_file && (dump_flags & TDF_DETAILS)) |
803 | fprintf (stream: dump_file, format: "Loop %d never loops.\n" , loop->num); |
804 | scale_loop_profile (loop, profile_probability::always (), 0); |
805 | loops_to_unloop.safe_push (obj: loop); |
806 | loops_to_unloop_nunroll.safe_push (obj: 0); |
807 | continue; |
808 | } |
809 | |
810 | /* If the loop is already a do-while style one (either because it was |
811 | written as such, or because jump threading transformed it into one), |
812 | we might be in fact peeling the first iteration of the loop. This |
813 | in general is not a good idea. Also avoid touching infinite loops. */ |
814 | if (!loop_has_exit_edges (loop) |
815 | || !process_loop_p (loop)) |
816 | continue; |
817 | |
818 | /* Iterate the header copying up to limit; this takes care of the cases |
819 | like while (a && b) {...}, where we want to have both of the conditions |
820 | copied. TODO -- handle while (a || b) - like cases, by not requiring |
821 | the header to have just a single successor and copying up to |
822 | postdominator. */ |
823 | int = 0; |
824 | int = 0; |
825 | bool last_win_invariant_exit = false; |
826 | ch_decision ret; |
827 | auto_vec <ch_decision, 32> decision; |
828 | hash_set <edge> *invariant_exits = new hash_set <edge>; |
829 | hash_set <edge> *static_exits = new hash_set <edge>; |
830 | while ((ret = should_duplicate_loop_header_p (header, loop, ranger, |
831 | limit: &remaining_limit, |
832 | invariant_exits, |
833 | static_exits)) |
834 | != ch_impossible) |
835 | { |
836 | nheaders++; |
837 | decision.safe_push (obj: ret); |
838 | if (ret >= ch_win) |
839 | { |
840 | last_win_nheaders = nheaders; |
841 | last_win_invariant_exit = (ret == ch_win_invariant_exit); |
842 | if (dump_file && (dump_flags & TDF_DETAILS)) |
843 | fprintf (stream: dump_file, format: " Duplicating bb %i is a win\n" , |
844 | header->index); |
845 | } |
846 | else |
847 | if (dump_file && (dump_flags & TDF_DETAILS)) |
848 | fprintf (stream: dump_file, format: " May duplicate bb %i\n" , header->index); |
849 | |
850 | /* Find a successor of header that is inside a loop; i.e. the new |
851 | header after the condition is copied. */ |
852 | if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) |
853 | header = EDGE_SUCC (header, 0)->dest; |
854 | else |
855 | header = EDGE_SUCC (header, 1)->dest; |
856 | } |
857 | |
858 | /* Try to turn loop into do-while. This means ensuring that |
859 | last duplicated header will not have loop invariant exit. */ |
860 | if (last_win_nheaders && last_win_invariant_exit |
861 | && nheaders > last_win_nheaders) |
862 | { |
863 | last_win_nheaders++; |
864 | if (dump_file && (dump_flags & TDF_DETAILS)) |
865 | fprintf (stream: dump_file, |
866 | format: " Duplicating additional BB to obtain do-while loop\n" ); |
867 | } |
868 | else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop)) |
869 | { |
870 | last_win_nheaders = 1; |
871 | if (dump_file && (dump_flags & TDF_DETAILS)) |
872 | fprintf (stream: dump_file, |
873 | format: " Duplicating header BB to obtain do-while loop\n" ); |
874 | } |
875 | /* "Duplicate" all BBs with zero cost following last basic blocks we |
876 | decided to copy. */ |
877 | while (last_win_nheaders < (int)decision.length () |
878 | && decision[last_win_nheaders] == ch_possible_zero_cost) |
879 | { |
880 | if (dump_file && (dump_flags & TDF_DETAILS)) |
881 | fprintf (stream: dump_file, |
882 | format: " Duplicating extra bb is a win; it has zero cost\n" ); |
883 | last_win_nheaders++; |
884 | } |
885 | |
886 | if (last_win_nheaders) |
887 | candidates.safe_push (obj: {.loop: loop, .nheaders: last_win_nheaders, |
888 | .invariant_exits: invariant_exits, .static_exits: static_exits}); |
889 | else |
890 | { |
891 | delete invariant_exits; |
892 | delete static_exits; |
893 | } |
894 | } |
895 | /* Do not use ranger after we change the IL and not have updated SSA. */ |
896 | delete ranger; |
897 | |
898 | for (auto candidate : candidates) |
899 | { |
900 | class loop *loop = candidate.loop; |
901 | if (dump_file && (dump_flags & TDF_DETAILS)) |
902 | fprintf (stream: dump_file, |
903 | format: "Copying headers of loop %i\n" , loop->num); |
904 | |
905 | basic_block = loop->header; |
906 | edge nonexit = NULL; |
907 | edge exit = NULL; |
908 | unsigned int n_bbs = 0; |
909 | int nexits = 0; |
910 | profile_count exit_count = profile_count::zero (); |
911 | profile_count entry_count = profile_count::zero (); |
912 | edge e; |
913 | edge_iterator ei; |
914 | |
915 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
916 | if (e->src != loop->latch) |
917 | entry_count += e->count (); |
918 | while (n_bbs < candidate.nheaders) |
919 | { |
920 | if (dump_file && (dump_flags & TDF_DETAILS)) |
921 | fprintf (stream: dump_file, format: " Will duplicate bb %i\n" , header->index); |
922 | /* Find a successor of header that is inside a loop; i.e. the new |
923 | header after the condition is copied. */ |
924 | if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) |
925 | { |
926 | nonexit = EDGE_SUCC (header, 0); |
927 | exit = EDGE_SUCC (header, 1); |
928 | } |
929 | else |
930 | { |
931 | nonexit = EDGE_SUCC (header, 1); |
932 | exit = EDGE_SUCC (header, 0); |
933 | } |
934 | exit_count += exit->count (); |
935 | nexits++; |
936 | bbs[n_bbs++] = header; |
937 | gcc_assert (bbs_size > n_bbs); |
938 | header = nonexit->dest; |
939 | } |
940 | |
941 | if (dump_file && (dump_flags & TDF_DETAILS)) |
942 | fprintf (stream: dump_file, |
943 | format: "Duplicating header of the loop %d up to edge %d->%d\n" , |
944 | loop->num, exit->src->index, exit->dest->index); |
945 | |
946 | /* Ensure that the header will have just the latch as a predecessor |
947 | inside the loop. */ |
948 | if (!single_pred_p (bb: nonexit->dest)) |
949 | { |
950 | header = split_edge (nonexit); |
951 | exit = single_pred_edge (bb: header); |
952 | } |
953 | |
954 | edge entry = loop_preheader_edge (loop); |
955 | |
956 | propagate_threaded_block_debug_into (exit->dest, entry->dest); |
957 | if (!gimple_duplicate_seme_region (entry, exit, bbs, n_bbs, copied_bbs, |
958 | true)) |
959 | { |
960 | delete candidate.static_exits; |
961 | delete candidate.invariant_exits; |
962 | if (dump_file && (dump_flags & TDF_DETAILS)) |
963 | fprintf (stream: dump_file, format: "Duplication failed.\n" ); |
964 | continue; |
965 | } |
966 | if (loop->header->count.initialized_p ()) |
967 | update_profile_after_ch (loop, region: bbs, region_copy: copied_bbs, n_region: n_bbs, |
968 | invariant_exits: candidate.invariant_exits, |
969 | static_exits: candidate.static_exits, |
970 | entry_count); |
971 | delete candidate.static_exits; |
972 | delete candidate.invariant_exits; |
973 | copied.safe_push (obj: std::make_pair (x&: entry, y&: loop)); |
974 | |
975 | /* If the loop has the form "for (i = j; i < j + 10; i++)" then |
976 | this copying can introduce a case where we rely on undefined |
977 | signed overflow to eliminate the preheader condition, because |
978 | we assume that "j < j + 10" is true. We don't want to warn |
979 | about that case for -Wstrict-overflow, because in general we |
980 | don't warn about overflow involving loops. Prevent the |
981 | warning by setting the no_warning flag in the condition. */ |
982 | if (warn_strict_overflow > 0) |
983 | { |
984 | unsigned int i; |
985 | |
986 | for (i = 0; i < n_bbs; ++i) |
987 | { |
988 | gimple_stmt_iterator bsi; |
989 | |
990 | for (bsi = gsi_start_bb (bb: copied_bbs[i]); |
991 | !gsi_end_p (i: bsi); |
992 | gsi_next (i: &bsi)) |
993 | { |
994 | gimple *stmt = gsi_stmt (i: bsi); |
995 | if (gimple_code (g: stmt) == GIMPLE_COND) |
996 | { |
997 | tree lhs = gimple_cond_lhs (gs: stmt); |
998 | if (gimple_cond_code (gs: stmt) != EQ_EXPR |
999 | && gimple_cond_code (gs: stmt) != NE_EXPR |
1000 | && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
1001 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) |
1002 | suppress_warning (stmt, OPT_Wstrict_overflow_); |
1003 | } |
1004 | else if (is_gimple_assign (gs: stmt)) |
1005 | { |
1006 | enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt); |
1007 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
1008 | if (TREE_CODE_CLASS (rhs_code) == tcc_comparison |
1009 | && rhs_code != EQ_EXPR |
1010 | && rhs_code != NE_EXPR |
1011 | && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) |
1012 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1))) |
1013 | suppress_warning (stmt, OPT_Wstrict_overflow_); |
1014 | } |
1015 | } |
1016 | } |
1017 | } |
1018 | |
1019 | /* Update header of the loop. */ |
1020 | loop->header = header; |
1021 | /* Find correct latch. We only duplicate chain of conditionals so |
1022 | there should be precisely two edges to the new header. One entry |
1023 | edge and one to latch. */ |
1024 | FOR_EACH_EDGE (e, ei, loop->header->preds) |
1025 | if (header != e->src) |
1026 | { |
1027 | loop->latch = e->src; |
1028 | break; |
1029 | } |
1030 | /* Ensure that the latch is simple. */ |
1031 | if (!single_succ_p (bb: loop_latch_edge (loop)->src)) |
1032 | split_edge (loop_latch_edge (loop)); |
1033 | |
1034 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1035 | { |
1036 | if (do_while_loop_p (loop)) |
1037 | fprintf (stream: dump_file, format: "Loop %d is now do-while loop.\n" , loop->num); |
1038 | else |
1039 | fprintf (stream: dump_file, format: "Loop %d is still not do-while loop.\n" , |
1040 | loop->num); |
1041 | fprintf (stream: dump_file, format: "Exit count: " ); |
1042 | exit_count.dump (f: dump_file); |
1043 | fprintf (stream: dump_file, format: "\nEntry count: " ); |
1044 | entry_count.dump (f: dump_file); |
1045 | fprintf (stream: dump_file, format: "\n" ); |
1046 | } |
1047 | |
1048 | /* We possibly decreased number of itrations by 1. */ |
1049 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
1050 | bool precise = (nexits == (int) exits.length ()); |
1051 | /* Check that loop may not terminate in other way than via |
1052 | basic blocks we duplicated. */ |
1053 | if (precise) |
1054 | { |
1055 | basic_block *bbs = get_loop_body (loop); |
1056 | for (unsigned i = 0; i < loop->num_nodes && precise; ++i) |
1057 | { |
1058 | basic_block bb = bbs[i]; |
1059 | bool found_exit = false; |
1060 | FOR_EACH_EDGE (e, ei, bb->succs) |
1061 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
1062 | { |
1063 | found_exit = true; |
1064 | break; |
1065 | } |
1066 | /* If BB has exit, it was duplicated. */ |
1067 | if (found_exit) |
1068 | continue; |
1069 | /* Give up on irreducible loops. */ |
1070 | if (bb->flags & BB_IRREDUCIBLE_LOOP) |
1071 | { |
1072 | precise = false; |
1073 | break; |
1074 | } |
1075 | /* Check that inner loops are finite. */ |
1076 | for (class loop *l = bb->loop_father; l != loop && precise; |
1077 | l = loop_outer (loop: l)) |
1078 | if (!l->finite_p) |
1079 | { |
1080 | precise = false; |
1081 | break; |
1082 | } |
1083 | /* Verify that there is no statement that may be terminate |
1084 | execution in a way not visible to CFG. */ |
1085 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); |
1086 | !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1087 | if (stmt_can_terminate_bb_p (gsi_stmt (i: bsi))) |
1088 | precise = false; |
1089 | } |
1090 | free (ptr: bbs); |
1091 | } |
1092 | if (precise |
1093 | && get_max_loop_iterations_int (loop) == 1) |
1094 | { |
1095 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1096 | fprintf (stream: dump_file, format: "Loop %d no longer loops.\n" , loop->num); |
1097 | scale_loop_profile (loop, profile_probability::always (), 0); |
1098 | loops_to_unloop.safe_push (obj: loop); |
1099 | loops_to_unloop_nunroll.safe_push (obj: 0); |
1100 | } |
1101 | else if (precise) |
1102 | { |
1103 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1104 | fprintf (stream: dump_file, |
1105 | format: "Peeled all exits:" |
1106 | " decreased number of iterations of loop %d by 1.\n" , |
1107 | loop->num); |
1108 | adjust_loop_info_after_peeling (loop, npeel: 1, precise: true); |
1109 | } |
1110 | else if (exit_count >= entry_count.apply_scale (num: 9, den: 10)) |
1111 | { |
1112 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1113 | fprintf (stream: dump_file, |
1114 | format: "Peeled likely exits: likely decreased number " |
1115 | "of iterations of loop %d by 1.\n" , loop->num); |
1116 | adjust_loop_info_after_peeling (loop, npeel: 1, precise: false); |
1117 | } |
1118 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
1119 | fprintf (stream: dump_file, |
1120 | format: "Not decreased number" |
1121 | " of iterations of loop %d; likely exits remains.\n" , |
1122 | loop->num); |
1123 | |
1124 | changed = true; |
1125 | } |
1126 | |
1127 | if (changed) |
1128 | { |
1129 | update_ssa (TODO_update_ssa); |
1130 | /* After updating SSA form perform CSE on the loop header |
1131 | copies. This is esp. required for the pass before |
1132 | vectorization since nothing cleans up copied exit tests |
1133 | that can now be simplified. CSE from the entry of the |
1134 | region we copied till all loop exit blocks but not |
1135 | entering the loop itself. */ |
1136 | for (unsigned i = 0; i < copied.length (); ++i) |
1137 | { |
1138 | edge entry = copied[i].first; |
1139 | loop_p loop = copied[i].second; |
1140 | auto_vec<edge> exit_edges = get_loop_exit_edges (loop); |
1141 | bitmap exit_bbs = BITMAP_ALLOC (NULL); |
1142 | for (unsigned j = 0; j < exit_edges.length (); ++j) |
1143 | bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index); |
1144 | bitmap_set_bit (exit_bbs, loop->header->index); |
1145 | do_rpo_vn (cfun, entry, exit_bbs); |
1146 | BITMAP_FREE (exit_bbs); |
1147 | } |
1148 | } |
1149 | if (!loops_to_unloop.is_empty ()) |
1150 | { |
1151 | bool irred_invalidated; |
1152 | unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, NULL, irred_invalidated: &irred_invalidated); |
1153 | changed = true; |
1154 | } |
1155 | free (ptr: bbs); |
1156 | free (ptr: copied_bbs); |
1157 | |
1158 | return changed ? TODO_cleanup_cfg : 0; |
1159 | } |
1160 | |
1161 | /* Initialize the loop structures we need, and finalize after. */ |
1162 | |
1163 | unsigned int |
1164 | pass_ch::execute (function *fun) |
1165 | { |
1166 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS |
1167 | | LOOPS_HAVE_SIMPLE_LATCHES |
1168 | | LOOPS_HAVE_RECORDED_EXITS); |
1169 | |
1170 | unsigned int res = copy_headers (fun); |
1171 | |
1172 | loop_optimizer_finalize (); |
1173 | return res; |
1174 | } |
1175 | |
1176 | /* Assume an earlier phase has already initialized all the loop structures that |
1177 | we need here (and perhaps others too), and that these will be finalized by |
1178 | a later phase. */ |
1179 | |
1180 | unsigned int |
1181 | pass_ch_vect::execute (function *fun) |
1182 | { |
1183 | return copy_headers (fun); |
1184 | } |
1185 | |
1186 | /* Apply header copying according to a very simple test of do-while shape. */ |
1187 | |
1188 | bool |
1189 | pass_ch::process_loop_p (class loop *) |
1190 | { |
1191 | return true; |
1192 | } |
1193 | |
1194 | /* Apply header-copying to loops where we might enable vectorization. */ |
1195 | |
1196 | bool |
1197 | pass_ch_vect::process_loop_p (class loop *loop) |
1198 | { |
1199 | if (!flag_tree_loop_vectorize && !loop->force_vectorize) |
1200 | return false; |
1201 | |
1202 | if (loop->dont_vectorize) |
1203 | return false; |
1204 | |
1205 | /* The vectorizer won't handle anything with multiple exits, so skip. */ |
1206 | edge exit = single_exit (loop); |
1207 | if (!exit) |
1208 | return false; |
1209 | |
1210 | if (!do_while_loop_p (loop)) |
1211 | return true; |
1212 | |
1213 | return false; |
1214 | } |
1215 | |
1216 | } // anon namespace |
1217 | |
1218 | gimple_opt_pass * |
1219 | make_pass_ch_vect (gcc::context *ctxt) |
1220 | { |
1221 | return new pass_ch_vect (ctxt); |
1222 | } |
1223 | |
1224 | gimple_opt_pass * |
1225 | make_pass_ch (gcc::context *ctxt) |
1226 | { |
1227 | return new pass_ch (ctxt); |
1228 | } |
1229 | |