1 | /* Shrink-wrapping related optimizations. |
2 | Copyright (C) 1987-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* This file handles shrink-wrapping related optimizations. */ |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "target.h" |
27 | #include "rtl.h" |
28 | #include "tree.h" |
29 | #include "cfghooks.h" |
30 | #include "df.h" |
31 | #include "memmodel.h" |
32 | #include "tm_p.h" |
33 | #include "regs.h" |
34 | #include "insn-config.h" |
35 | #include "emit-rtl.h" |
36 | #include "output.h" |
37 | #include "tree-pass.h" |
38 | #include "cfgrtl.h" |
39 | #include "cfgbuild.h" |
40 | #include "bb-reorder.h" |
41 | #include "shrink-wrap.h" |
42 | #include "regcprop.h" |
43 | #include "rtl-iter.h" |
44 | #include "valtrack.h" |
45 | #include "function-abi.h" |
46 | #include "print-rtl.h" |
47 | |
48 | /* Return true if INSN requires the stack frame to be set up. |
49 | PROLOGUE_USED contains the hard registers used in the function |
50 | prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the |
51 | prologue to set up for the function. */ |
52 | bool |
53 | requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, |
54 | HARD_REG_SET set_up_by_prologue) |
55 | { |
56 | df_ref def, use; |
57 | HARD_REG_SET hardregs; |
58 | unsigned regno; |
59 | |
60 | if (CALL_P (insn) && !FAKE_CALL_P (insn)) |
61 | return !SIBLING_CALL_P (insn); |
62 | |
63 | /* We need a frame to get the unique CFA expected by the unwinder. */ |
64 | if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) |
65 | return true; |
66 | |
67 | CLEAR_HARD_REG_SET (set&: hardregs); |
68 | FOR_EACH_INSN_DEF (def, insn) |
69 | { |
70 | rtx dreg = DF_REF_REG (def); |
71 | |
72 | if (!REG_P (dreg)) |
73 | continue; |
74 | |
75 | add_to_hard_reg_set (regs: &hardregs, GET_MODE (dreg), REGNO (dreg)); |
76 | } |
77 | if (hard_reg_set_intersect_p (x: hardregs, y: prologue_used)) |
78 | return true; |
79 | hardregs &= ~crtl->abi->full_reg_clobbers (); |
80 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
81 | if (TEST_HARD_REG_BIT (set: hardregs, bit: regno) |
82 | && df_regs_ever_live_p (regno)) |
83 | return true; |
84 | |
85 | FOR_EACH_INSN_USE (use, insn) |
86 | { |
87 | rtx reg = DF_REF_REG (use); |
88 | |
89 | if (!REG_P (reg)) |
90 | continue; |
91 | |
92 | add_to_hard_reg_set (regs: &hardregs, GET_MODE (reg), |
93 | REGNO (reg)); |
94 | } |
95 | if (hard_reg_set_intersect_p (x: hardregs, y: set_up_by_prologue)) |
96 | return true; |
97 | |
98 | return false; |
99 | } |
100 | |
101 | /* See whether there has a single live edge from BB, which dest uses |
102 | [REGNO, END_REGNO). Return the live edge if its dest bb has |
103 | one or two predecessors. Otherwise return NULL. */ |
104 | |
105 | static edge |
106 | live_edge_for_reg (basic_block bb, int regno, int end_regno) |
107 | { |
108 | edge e, live_edge; |
109 | edge_iterator ei; |
110 | bitmap live; |
111 | int i; |
112 | |
113 | live_edge = NULL; |
114 | FOR_EACH_EDGE (e, ei, bb->succs) |
115 | { |
116 | live = df_get_live_in (bb: e->dest); |
117 | for (i = regno; i < end_regno; i++) |
118 | if (REGNO_REG_SET_P (live, i)) |
119 | { |
120 | if (live_edge && live_edge != e) |
121 | return NULL; |
122 | live_edge = e; |
123 | } |
124 | } |
125 | |
126 | /* We can sometimes encounter dead code. Don't try to move it |
127 | into the exit block. */ |
128 | if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
129 | return NULL; |
130 | |
131 | /* Reject targets of abnormal edges. This is needed for correctness |
132 | on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on |
133 | exception edges even though it is generally treated as call-saved |
134 | for the majority of the compilation. Moving across abnormal edges |
135 | isn't going to be interesting for shrink-wrap usage anyway. */ |
136 | if (live_edge->flags & EDGE_ABNORMAL) |
137 | return NULL; |
138 | |
139 | /* When live_edge->dest->preds == 2, we can create a new block on |
140 | the edge to make it meet the requirement. */ |
141 | if (EDGE_COUNT (live_edge->dest->preds) > 2) |
142 | return NULL; |
143 | |
144 | return live_edge; |
145 | } |
146 | |
147 | /* Try to move INSN from BB to a successor. Return true on success. |
148 | USES and DEFS are the set of registers that are used and defined |
149 | after INSN in BB. SPLIT_P indicates whether a live edge from BB |
150 | is splitted or not. */ |
151 | |
152 | static bool |
153 | move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, |
154 | const_hard_reg_set uses, |
155 | const_hard_reg_set defs, |
156 | bool *split_p, |
157 | struct dead_debug_local *debug) |
158 | { |
159 | rtx set, src, dest; |
160 | bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL; |
161 | unsigned int i, dregno, end_dregno; |
162 | unsigned int sregno = FIRST_PSEUDO_REGISTER; |
163 | unsigned int end_sregno = FIRST_PSEUDO_REGISTER; |
164 | basic_block next_block; |
165 | edge live_edge; |
166 | rtx_insn *dinsn; |
167 | df_ref def; |
168 | |
169 | /* Look for a simple register assignment. We don't use single_set here |
170 | because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary |
171 | destinations. */ |
172 | if (!INSN_P (insn)) |
173 | return false; |
174 | set = PATTERN (insn); |
175 | if (GET_CODE (set) != SET) |
176 | return false; |
177 | src = SET_SRC (set); |
178 | dest = SET_DEST (set); |
179 | |
180 | /* For the destination, we want only a register. Also disallow STACK |
181 | or FRAME related adjustments. They are likely part of the prologue, |
182 | so keep them in the entry block. */ |
183 | if (!REG_P (dest) |
184 | || dest == stack_pointer_rtx |
185 | || dest == frame_pointer_rtx |
186 | || dest == hard_frame_pointer_rtx) |
187 | return false; |
188 | |
189 | /* For the source, we want one of: |
190 | (1) A (non-overlapping) register |
191 | (2) A constant, |
192 | (3) An expression involving no more than one register. |
193 | |
194 | That last point comes from the code following, which was originally |
195 | written to handle only register move operations, and still only handles |
196 | a single source register when checking for overlaps. Happily, the |
197 | same checks can be applied to expressions like (plus reg const). */ |
198 | |
199 | if (CONSTANT_P (src)) |
200 | ; |
201 | else if (!REG_P (src)) |
202 | { |
203 | rtx src_inner = NULL_RTX; |
204 | |
205 | if (can_throw_internal (insn)) |
206 | return false; |
207 | |
208 | subrtx_var_iterator::array_type array; |
209 | FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) |
210 | { |
211 | rtx x = *iter; |
212 | switch (GET_RTX_CLASS (GET_CODE (x))) |
213 | { |
214 | case RTX_CONST_OBJ: |
215 | case RTX_COMPARE: |
216 | case RTX_COMM_COMPARE: |
217 | case RTX_BIN_ARITH: |
218 | case RTX_COMM_ARITH: |
219 | case RTX_UNARY: |
220 | case RTX_TERNARY: |
221 | /* Constant or expression. Continue. */ |
222 | break; |
223 | |
224 | case RTX_OBJ: |
225 | case RTX_EXTRA: |
226 | switch (GET_CODE (x)) |
227 | { |
228 | case UNSPEC: |
229 | case SUBREG: |
230 | case STRICT_LOW_PART: |
231 | case PC: |
232 | case LO_SUM: |
233 | /* Ok. Continue. */ |
234 | break; |
235 | |
236 | case REG: |
237 | /* Fail if we see a second inner register. */ |
238 | if (src_inner != NULL) |
239 | return false; |
240 | src_inner = x; |
241 | break; |
242 | |
243 | default: |
244 | return false; |
245 | } |
246 | break; |
247 | |
248 | default: |
249 | return false; |
250 | } |
251 | } |
252 | |
253 | if (src_inner != NULL) |
254 | src = src_inner; |
255 | } |
256 | |
257 | /* Make sure that the source register isn't defined later in BB. */ |
258 | if (REG_P (src)) |
259 | { |
260 | sregno = REGNO (src); |
261 | end_sregno = END_REGNO (x: src); |
262 | if (overlaps_hard_reg_set_p (regs: defs, GET_MODE (src), regno: sregno)) |
263 | return false; |
264 | } |
265 | |
266 | /* Make sure that the destination register isn't referenced later in BB. */ |
267 | dregno = REGNO (dest); |
268 | end_dregno = END_REGNO (x: dest); |
269 | if (overlaps_hard_reg_set_p (regs: uses, GET_MODE (dest), regno: dregno) |
270 | || overlaps_hard_reg_set_p (regs: defs, GET_MODE (dest), regno: dregno)) |
271 | return false; |
272 | |
273 | /* See whether there is a successor block to which we could move INSN. */ |
274 | live_edge = live_edge_for_reg (bb, regno: dregno, end_regno: end_dregno); |
275 | if (!live_edge) |
276 | return false; |
277 | |
278 | next_block = live_edge->dest; |
279 | /* Create a new basic block on the edge. */ |
280 | if (EDGE_COUNT (next_block->preds) == 2) |
281 | { |
282 | /* split_edge for a block with only one successor is meaningless. */ |
283 | if (EDGE_COUNT (bb->succs) == 1) |
284 | return false; |
285 | |
286 | /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ |
287 | if (!df_live) |
288 | return false; |
289 | |
290 | basic_block old_dest = live_edge->dest; |
291 | next_block = split_edge (live_edge); |
292 | |
293 | /* We create a new basic block. Call df_grow_bb_info to make sure |
294 | all data structures are allocated. */ |
295 | df_grow_bb_info (df_live); |
296 | |
297 | bitmap_and (df_get_live_in (bb: next_block), df_get_live_out (bb), |
298 | df_get_live_in (bb: old_dest)); |
299 | df_set_bb_dirty (next_block); |
300 | |
301 | /* We should not split more than once for a function. */ |
302 | if (*split_p) |
303 | return false; |
304 | |
305 | *split_p = true; |
306 | } |
307 | |
308 | /* At this point we are committed to moving INSN, but let's try to |
309 | move it as far as we can. */ |
310 | do |
311 | { |
312 | if (MAY_HAVE_DEBUG_BIND_INSNS) |
313 | { |
314 | FOR_BB_INSNS_REVERSE (bb, dinsn) |
315 | if (DEBUG_BIND_INSN_P (dinsn)) |
316 | { |
317 | df_ref use; |
318 | FOR_EACH_INSN_USE (use, dinsn) |
319 | if (refers_to_regno_p (dregno, end_dregno, |
320 | DF_REF_REG (use), (rtx *) NULL)) |
321 | dead_debug_add (debug, use, DF_REF_REGNO (use)); |
322 | } |
323 | else if (dinsn == insn) |
324 | break; |
325 | } |
326 | live_out = df_get_live_out (bb); |
327 | live_in = df_get_live_in (bb: next_block); |
328 | bb = next_block; |
329 | |
330 | /* Check whether BB uses DEST or clobbers DEST. We need to add |
331 | INSN to BB if so. Either way, DEST is no longer live on entry, |
332 | except for any part that overlaps SRC (next loop). */ |
333 | if (!*split_p) |
334 | { |
335 | bb_uses = &DF_LR_BB_INFO (bb)->use; |
336 | bb_defs = &DF_LR_BB_INFO (bb)->def; |
337 | } |
338 | if (df_live) |
339 | { |
340 | for (i = dregno; i < end_dregno; i++) |
341 | { |
342 | if (*split_p |
343 | || REGNO_REG_SET_P (bb_uses, i) |
344 | || REGNO_REG_SET_P (bb_defs, i) |
345 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
346 | next_block = NULL; |
347 | CLEAR_REGNO_REG_SET (live_out, i); |
348 | CLEAR_REGNO_REG_SET (live_in, i); |
349 | } |
350 | |
351 | /* Check whether BB clobbers SRC. We need to add INSN to BB if so. |
352 | Either way, SRC is now live on entry. */ |
353 | for (i = sregno; i < end_sregno; i++) |
354 | { |
355 | if (*split_p |
356 | || REGNO_REG_SET_P (bb_defs, i) |
357 | || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
358 | next_block = NULL; |
359 | SET_REGNO_REG_SET (live_out, i); |
360 | SET_REGNO_REG_SET (live_in, i); |
361 | } |
362 | } |
363 | else |
364 | { |
365 | /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and |
366 | DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. |
367 | at -O1, just give up searching NEXT_BLOCK. */ |
368 | next_block = NULL; |
369 | for (i = dregno; i < end_dregno; i++) |
370 | { |
371 | CLEAR_REGNO_REG_SET (live_out, i); |
372 | CLEAR_REGNO_REG_SET (live_in, i); |
373 | } |
374 | |
375 | for (i = sregno; i < end_sregno; i++) |
376 | { |
377 | SET_REGNO_REG_SET (live_out, i); |
378 | SET_REGNO_REG_SET (live_in, i); |
379 | } |
380 | } |
381 | |
382 | /* If we don't need to add the move to BB, look for a single |
383 | successor block. */ |
384 | if (next_block) |
385 | { |
386 | live_edge = live_edge_for_reg (bb: next_block, regno: dregno, end_regno: end_dregno); |
387 | if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) |
388 | break; |
389 | next_block = live_edge->dest; |
390 | } |
391 | } |
392 | while (next_block); |
393 | |
394 | /* For the new created basic block, there is no dataflow info at all. |
395 | So skip the following dataflow update and check. */ |
396 | if (!(*split_p)) |
397 | { |
398 | /* BB now defines DEST. It only uses the parts of DEST that overlap SRC |
399 | (next loop). */ |
400 | for (i = dregno; i < end_dregno; i++) |
401 | { |
402 | CLEAR_REGNO_REG_SET (bb_uses, i); |
403 | SET_REGNO_REG_SET (bb_defs, i); |
404 | } |
405 | |
406 | /* BB now uses SRC. */ |
407 | for (i = sregno; i < end_sregno; i++) |
408 | SET_REGNO_REG_SET (bb_uses, i); |
409 | } |
410 | |
411 | /* Insert debug temps for dead REGs used in subsequent debug insns. */ |
412 | if (debug->used && !bitmap_empty_p (map: debug->used)) |
413 | FOR_EACH_INSN_DEF (def, insn) |
414 | dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn, |
415 | DEBUG_TEMP_BEFORE_WITH_VALUE); |
416 | |
417 | rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb)); |
418 | /* Update the LABEL_NUSES count on any referenced labels. The ideal |
419 | solution here would be to actually move the instruction instead |
420 | of copying/deleting it as this loses some notations on the |
421 | insn. */ |
422 | mark_jump_label (PATTERN (insn), insn_copy, 0); |
423 | delete_insn (insn); |
424 | return true; |
425 | } |
426 | |
427 | /* Look for register copies in the first block of the function, and move |
428 | them down into successor blocks if the register is used only on one |
429 | path. This exposes more opportunities for shrink-wrapping. These |
430 | kinds of sets often occur when incoming argument registers are moved |
431 | to call-saved registers because their values are live across one or |
432 | more calls during the function. */ |
433 | |
434 | static void |
435 | prepare_shrink_wrap (basic_block entry_block) |
436 | { |
437 | rtx_insn *insn, *curr; |
438 | rtx x; |
439 | HARD_REG_SET uses, defs; |
440 | df_ref def, use; |
441 | bool split_p = false; |
442 | unsigned int i; |
443 | struct dead_debug_local debug; |
444 | |
445 | if (JUMP_P (BB_END (entry_block))) |
446 | { |
447 | /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries |
448 | to sink the copies from parameter to callee saved register out of |
449 | entry block. copyprop_hardreg_forward_bb_without_debug_insn is called |
450 | to release some dependences. */ |
451 | copyprop_hardreg_forward_bb_without_debug_insn (bb: entry_block); |
452 | } |
453 | |
454 | dead_debug_local_init (&debug, NULL, NULL); |
455 | CLEAR_HARD_REG_SET (set&: uses); |
456 | CLEAR_HARD_REG_SET (set&: defs); |
457 | |
458 | FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) |
459 | if (NONDEBUG_INSN_P (insn) |
460 | && !move_insn_for_shrink_wrap (bb: entry_block, insn, uses, defs, |
461 | split_p: &split_p, debug: &debug)) |
462 | { |
463 | /* Add all defined registers to DEFs. */ |
464 | FOR_EACH_INSN_DEF (def, insn) |
465 | { |
466 | x = DF_REF_REG (def); |
467 | if (REG_P (x) && HARD_REGISTER_P (x)) |
468 | for (i = REGNO (x); i < END_REGNO (x); i++) |
469 | SET_HARD_REG_BIT (set&: defs, bit: i); |
470 | } |
471 | |
472 | /* Add all used registers to USESs. */ |
473 | FOR_EACH_INSN_USE (use, insn) |
474 | { |
475 | x = DF_REF_REG (use); |
476 | if (REG_P (x) && HARD_REGISTER_P (x)) |
477 | for (i = REGNO (x); i < END_REGNO (x); i++) |
478 | SET_HARD_REG_BIT (set&: uses, bit: i); |
479 | } |
480 | } |
481 | |
482 | dead_debug_local_finish (&debug, NULL); |
483 | } |
484 | |
485 | /* Return whether basic block PRO can get the prologue. It cannot if it |
486 | has incoming complex edges that need a prologue inserted (we make a new |
487 | block for the prologue, so those edges would need to be redirected, which |
488 | does not work). It also cannot if there exist registers live on entry |
489 | to PRO that are clobbered by the prologue. */ |
490 | |
491 | static bool |
492 | can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered) |
493 | { |
494 | edge e; |
495 | edge_iterator ei; |
496 | FOR_EACH_EDGE (e, ei, pro->preds) |
497 | if (e->flags & EDGE_COMPLEX |
498 | && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) |
499 | return false; |
500 | |
501 | HARD_REG_SET live; |
502 | REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro)); |
503 | if (hard_reg_set_intersect_p (x: live, y: prologue_clobbered)) |
504 | return false; |
505 | |
506 | return true; |
507 | } |
508 | |
509 | /* Return whether we can duplicate basic block BB for shrink wrapping. We |
510 | cannot if the block cannot be duplicated at all, or if any of its incoming |
511 | edges are complex and come from a block that does not require a prologue |
512 | (we cannot redirect such edges), or if the block is too big to copy. |
513 | PRO is the basic block before which we would put the prologue, MAX_SIZE is |
514 | the maximum size block we allow to be copied. */ |
515 | |
516 | static bool |
517 | can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size) |
518 | { |
519 | if (!can_duplicate_block_p (bb)) |
520 | return false; |
521 | |
522 | edge e; |
523 | edge_iterator ei; |
524 | FOR_EACH_EDGE (e, ei, bb->preds) |
525 | if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING) |
526 | && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) |
527 | return false; |
528 | |
529 | unsigned size = 0; |
530 | |
531 | rtx_insn *insn; |
532 | FOR_BB_INSNS (bb, insn) |
533 | if (NONDEBUG_INSN_P (insn)) |
534 | { |
535 | size += get_attr_min_length (insn); |
536 | if (size > max_size) |
537 | return false; |
538 | } |
539 | |
540 | return true; |
541 | } |
542 | |
543 | /* Do whatever needs to be done for exits that run without prologue. |
544 | Sibcalls need nothing done. Normal exits get a simple_return inserted. */ |
545 | |
546 | static void |
547 | handle_simple_exit (edge e) |
548 | { |
549 | |
550 | if (e->flags & EDGE_SIBCALL) |
551 | { |
552 | /* Tell function.cc to take no further action on this edge. */ |
553 | e->flags |= EDGE_IGNORE; |
554 | |
555 | e->flags &= ~EDGE_FALLTHRU; |
556 | emit_barrier_after_bb (bb: e->src); |
557 | return; |
558 | } |
559 | |
560 | /* If the basic block the edge comes from has multiple successors, |
561 | split the edge. */ |
562 | if (EDGE_COUNT (e->src->succs) > 1) |
563 | { |
564 | basic_block old_bb = e->src; |
565 | rtx_insn *end = BB_END (old_bb); |
566 | rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end); |
567 | basic_block new_bb = create_basic_block (note, note, old_bb); |
568 | BB_COPY_PARTITION (new_bb, old_bb); |
569 | BB_END (old_bb) = end; |
570 | |
571 | redirect_edge_succ (e, new_bb); |
572 | new_bb->count = e->count (); |
573 | e->flags |= EDGE_FALLTHRU; |
574 | |
575 | e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); |
576 | } |
577 | |
578 | e->flags &= ~EDGE_FALLTHRU; |
579 | rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (), |
580 | BB_END (e->src)); |
581 | JUMP_LABEL (ret) = simple_return_rtx; |
582 | emit_barrier_after_bb (bb: e->src); |
583 | |
584 | if (dump_file) |
585 | fprintf (stream: dump_file, format: "Made simple_return with UID %d in bb %d\n" , |
586 | INSN_UID (insn: ret), e->src->index); |
587 | } |
588 | |
589 | /* Try to perform a kind of shrink-wrapping, making sure the |
590 | prologue/epilogue is emitted only around those parts of the |
591 | function that require it. |
592 | |
593 | There will be exactly one prologue, and it will be executed either |
594 | zero or one time, on any path. Depending on where the prologue is |
595 | placed, some of the basic blocks can be reached via both paths with |
596 | and without a prologue. Such blocks will be duplicated here, and the |
597 | edges changed to match. |
598 | |
599 | Paths that go to the exit without going through the prologue will use |
600 | a simple_return instead of the epilogue. We maximize the number of |
601 | those, making sure to only duplicate blocks that can be duplicated. |
602 | If the prologue can then still be placed in multiple locations, we |
603 | place it as early as possible. |
604 | |
605 | An example, where we duplicate blocks with control flow (legend: |
606 | _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should |
607 | be taken to point down or to the right, to simplify the diagram; here, |
608 | block 3 needs a prologue, the rest does not): |
609 | |
610 | |
611 | B B |
612 | | | |
613 | 2 2 |
614 | |\ |\ |
615 | | 3 becomes | 3 |
616 | |/ | \ |
617 | 4 7 4 |
618 | |\ |\ |\ |
619 | | 5 | 8 | 5 |
620 | |/ |/ |/ |
621 | 6 9 6 |
622 | | | | |
623 | R S R |
624 | |
625 | |
626 | (bb 4 is duplicated to 7, and so on; the prologue is inserted on the |
627 | edge 2->3). |
628 | |
629 | Another example, where part of a loop is duplicated (again, bb 3 is |
630 | the only block that needs a prologue): |
631 | |
632 | |
633 | B 3<-- B ->3<-- |
634 | | | | | | | | |
635 | | v | becomes | | v | |
636 | 2---4--- 2---5-- 4--- |
637 | | | | |
638 | R S R |
639 | |
640 | |
641 | (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3). |
642 | |
643 | ENTRY_EDGE is the edge where the prologue will be placed, possibly |
644 | changed by this function. PROLOGUE_SEQ is the prologue we will insert. */ |
645 | |
646 | void |
647 | try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq) |
648 | { |
649 | /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes |
650 | no sense to shrink-wrap: then do not shrink-wrap! */ |
651 | |
652 | if (!SHRINK_WRAPPING_ENABLED) |
653 | return; |
654 | |
655 | if (crtl->profile && !targetm.profile_before_prologue ()) |
656 | return; |
657 | |
658 | if (crtl->calls_eh_return) |
659 | return; |
660 | |
661 | bool empty_prologue = true; |
662 | for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) |
663 | if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)) |
664 | { |
665 | empty_prologue = false; |
666 | break; |
667 | } |
668 | if (empty_prologue) |
669 | return; |
670 | |
671 | /* Move some code down to expose more shrink-wrapping opportunities. */ |
672 | |
673 | basic_block entry = (*entry_edge)->dest; |
674 | prepare_shrink_wrap (entry_block: entry); |
675 | |
676 | if (dump_file) |
677 | fprintf (stream: dump_file, format: "Attempting shrink-wrapping optimization.\n" ); |
678 | |
679 | /* Compute the registers set and used in the prologue. */ |
680 | |
681 | HARD_REG_SET prologue_clobbered, prologue_used; |
682 | CLEAR_HARD_REG_SET (set&: prologue_clobbered); |
683 | CLEAR_HARD_REG_SET (set&: prologue_used); |
684 | for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) |
685 | if (NONDEBUG_INSN_P (insn)) |
686 | { |
687 | HARD_REG_SET this_used; |
688 | CLEAR_HARD_REG_SET (set&: this_used); |
689 | note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used); |
690 | this_used &= ~prologue_clobbered; |
691 | prologue_used |= this_used; |
692 | note_stores (insn, record_hard_reg_sets, &prologue_clobbered); |
693 | } |
694 | CLEAR_HARD_REG_BIT (set&: prologue_clobbered, STACK_POINTER_REGNUM); |
695 | if (frame_pointer_needed) |
696 | CLEAR_HARD_REG_BIT (set&: prologue_clobbered, HARD_FRAME_POINTER_REGNUM); |
697 | |
698 | /* Find out what registers are set up by the prologue; any use of these |
699 | cannot happen before the prologue. */ |
700 | |
701 | struct hard_reg_set_container set_up_by_prologue; |
702 | CLEAR_HARD_REG_SET (set&: set_up_by_prologue.set); |
703 | add_to_hard_reg_set (regs: &set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM); |
704 | add_to_hard_reg_set (regs: &set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); |
705 | if (frame_pointer_needed) |
706 | add_to_hard_reg_set (regs: &set_up_by_prologue.set, Pmode, |
707 | HARD_FRAME_POINTER_REGNUM); |
708 | if (pic_offset_table_rtx |
709 | && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) |
710 | add_to_hard_reg_set (regs: &set_up_by_prologue.set, Pmode, |
711 | PIC_OFFSET_TABLE_REGNUM); |
712 | if (crtl->drap_reg) |
713 | add_to_hard_reg_set (regs: &set_up_by_prologue.set, |
714 | GET_MODE (crtl->drap_reg), |
715 | REGNO (crtl->drap_reg)); |
716 | if (targetm.set_up_by_prologue) |
717 | targetm.set_up_by_prologue (&set_up_by_prologue); |
718 | |
719 | /* We will insert the prologue before the basic block PRO. PRO should |
720 | dominate all basic blocks that need the prologue to be executed |
721 | before them. First, make PRO the "tightest wrap" possible. */ |
722 | |
723 | calculate_dominance_info (CDI_DOMINATORS); |
724 | |
725 | basic_block pro = 0; |
726 | |
727 | basic_block bb; |
728 | edge e; |
729 | edge_iterator ei; |
730 | FOR_EACH_BB_FN (bb, cfun) |
731 | { |
732 | rtx_insn *insn; |
733 | FOR_BB_INSNS (bb, insn) |
734 | if (NONDEBUG_INSN_P (insn) |
735 | && requires_stack_frame_p (insn, prologue_used, |
736 | set_up_by_prologue: set_up_by_prologue.set)) |
737 | { |
738 | if (dump_file) |
739 | { |
740 | fprintf (stream: dump_file, format: "Block %d needs prologue due to insn %d:\n" , |
741 | bb->index, INSN_UID (insn)); |
742 | print_rtl_single (dump_file, insn); |
743 | } |
744 | pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb); |
745 | break; |
746 | } |
747 | } |
748 | |
749 | /* If nothing needs a prologue, just put it at the start. This really |
750 | shouldn't happen, but we cannot fix it here. */ |
751 | |
752 | if (pro == 0) |
753 | { |
754 | if (dump_file) |
755 | fprintf(stream: dump_file, format: "Nothing needs a prologue, but it isn't empty; " |
756 | "putting it at the start.\n" ); |
757 | pro = entry; |
758 | } |
759 | |
760 | if (dump_file) |
761 | fprintf (stream: dump_file, format: "After wrapping required blocks, PRO is now %d\n" , |
762 | pro->index); |
763 | |
764 | /* Now see if we can put the prologue at the start of PRO. Putting it |
765 | there might require duplicating a block that cannot be duplicated, |
766 | or in some cases we cannot insert the prologue there at all. If PRO |
767 | wont't do, try again with the immediate dominator of PRO, and so on. |
768 | |
769 | The blocks that need duplicating are those reachable from PRO but |
770 | not dominated by it. We keep in BB_WITH a bitmap of the blocks |
771 | reachable from PRO that we already found, and in VEC a stack of |
772 | those we still need to consider (to find successors). */ |
773 | |
774 | auto_bitmap bb_with; |
775 | bitmap_set_bit (bb_with, pro->index); |
776 | |
777 | vec<basic_block> vec; |
778 | vec.create (n_basic_blocks_for_fn (cfun)); |
779 | vec.quick_push (obj: pro); |
780 | |
781 | unsigned max_grow_size = get_uncond_jump_length (); |
782 | max_grow_size *= param_max_grow_copy_bb_insns; |
783 | |
784 | basic_block checked_pro = NULL; |
785 | |
786 | while (pro != entry) |
787 | { |
788 | if (pro != checked_pro) |
789 | { |
790 | while (pro != entry && !can_get_prologue (pro, prologue_clobbered)) |
791 | { |
792 | pro = get_immediate_dominator (CDI_DOMINATORS, pro); |
793 | |
794 | if (bitmap_set_bit (bb_with, pro->index)) |
795 | vec.quick_push (obj: pro); |
796 | } |
797 | checked_pro = pro; |
798 | } |
799 | |
800 | if (vec.is_empty ()) |
801 | break; |
802 | |
803 | basic_block bb = vec.pop (); |
804 | if (!can_dup_for_shrink_wrapping (bb, pro, max_size: max_grow_size)) |
805 | while (!dominated_by_p (CDI_DOMINATORS, bb, pro)) |
806 | { |
807 | gcc_assert (pro != entry); |
808 | |
809 | pro = get_immediate_dominator (CDI_DOMINATORS, pro); |
810 | |
811 | if (bitmap_set_bit (bb_with, pro->index)) |
812 | vec.quick_push (obj: pro); |
813 | } |
814 | |
815 | FOR_EACH_EDGE (e, ei, bb->succs) |
816 | if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) |
817 | && bitmap_set_bit (bb_with, e->dest->index)) |
818 | vec.quick_push (obj: e->dest); |
819 | } |
820 | |
821 | if (dump_file) |
822 | fprintf (stream: dump_file, format: "Avoiding non-duplicatable blocks, PRO is now %d\n" , |
823 | pro->index); |
824 | |
825 | /* If we can move PRO back without having to duplicate more blocks, do so. |
826 | We do this because putting the prologue earlier is better for scheduling. |
827 | |
828 | We can move back to a block PRE if every path from PRE will eventually |
829 | need a prologue, that is, PRO is a post-dominator of PRE. PRE needs |
830 | to dominate every block reachable from itself. We keep in BB_TMP a |
831 | bitmap of the blocks reachable from PRE that we already found, and in |
832 | VEC a stack of those we still need to consider. |
833 | |
834 | Any block reachable from PRE is also reachable from all predecessors |
835 | of PRE, so if we find we need to move PRE back further we can leave |
836 | everything not considered so far on the stack. Any block dominated |
837 | by PRE is also dominated by all other dominators of PRE, so anything |
838 | found good for some PRE does not need to be reconsidered later. |
839 | |
840 | We don't need to update BB_WITH because none of the new blocks found |
841 | can jump to a block that does not need the prologue. */ |
842 | |
843 | if (pro != entry) |
844 | { |
845 | calculate_dominance_info (CDI_POST_DOMINATORS); |
846 | |
847 | auto_bitmap bb_tmp; |
848 | bitmap_copy (bb_tmp, bb_with); |
849 | basic_block last_ok = pro; |
850 | vec.truncate (size: 0); |
851 | |
852 | while (pro != entry) |
853 | { |
854 | basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro); |
855 | if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro)) |
856 | break; |
857 | |
858 | if (bitmap_set_bit (bb_tmp, pre->index)) |
859 | vec.quick_push (obj: pre); |
860 | |
861 | bool ok = true; |
862 | while (!vec.is_empty ()) |
863 | { |
864 | if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre)) |
865 | { |
866 | ok = false; |
867 | break; |
868 | } |
869 | |
870 | basic_block bb = vec.pop (); |
871 | FOR_EACH_EDGE (e, ei, bb->succs) |
872 | if (bitmap_set_bit (bb_tmp, e->dest->index)) |
873 | vec.quick_push (obj: e->dest); |
874 | } |
875 | |
876 | if (ok && can_get_prologue (pro: pre, prologue_clobbered)) |
877 | last_ok = pre; |
878 | |
879 | pro = pre; |
880 | } |
881 | |
882 | pro = last_ok; |
883 | |
884 | free_dominance_info (CDI_POST_DOMINATORS); |
885 | } |
886 | |
887 | vec.release (); |
888 | |
889 | if (dump_file) |
890 | fprintf (stream: dump_file, format: "Bumping back to anticipatable blocks, PRO is now %d\n" , |
891 | pro->index); |
892 | |
893 | if (pro == entry) |
894 | { |
895 | free_dominance_info (CDI_DOMINATORS); |
896 | return; |
897 | } |
898 | |
899 | /* Compute what fraction of the frequency and count of the blocks that run |
900 | both with and without prologue are for running with prologue. This gives |
901 | the correct answer for reducible flow graphs; for irreducible flow graphs |
902 | our profile is messed up beyond repair anyway. */ |
903 | |
904 | profile_count num = profile_count::zero (); |
905 | profile_count den = profile_count::zero (); |
906 | |
907 | FOR_EACH_EDGE (e, ei, pro->preds) |
908 | if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) |
909 | { |
910 | if (e->count ().initialized_p ()) |
911 | num += e->count (); |
912 | if (e->src->count.initialized_p ()) |
913 | den += e->src->count; |
914 | } |
915 | |
916 | /* All is okay, so do it. */ |
917 | |
918 | crtl->shrink_wrapped = true; |
919 | if (dump_file) |
920 | fprintf (stream: dump_file, format: "Performing shrink-wrapping.\n" ); |
921 | |
922 | /* Copy the blocks that can run both with and without prologue. The |
923 | originals run with prologue, the copies without. Store a pointer to |
924 | the copy in the ->aux field of the original. */ |
925 | |
926 | FOR_EACH_BB_FN (bb, cfun) |
927 | if (bitmap_bit_p (bb_with, bb->index) |
928 | && !dominated_by_p (CDI_DOMINATORS, bb, pro)) |
929 | { |
930 | basic_block dup = duplicate_block (bb, 0, 0); |
931 | |
932 | bb->aux = dup; |
933 | |
934 | if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup))) |
935 | emit_barrier_after_bb (bb: dup); |
936 | |
937 | if (EDGE_COUNT (dup->succs) == 0) |
938 | emit_barrier_after_bb (bb: dup); |
939 | |
940 | if (dump_file) |
941 | fprintf (stream: dump_file, format: "Duplicated %d to %d\n" , bb->index, dup->index); |
942 | |
943 | if (num == profile_count::zero () || den.nonzero_p ()) |
944 | bb->count = bb->count.apply_scale (num, den); |
945 | dup->count -= bb->count; |
946 | } |
947 | |
948 | /* Now change the edges to point to the copies, where appropriate. */ |
949 | |
950 | FOR_EACH_BB_FN (bb, cfun) |
951 | if (!dominated_by_p (CDI_DOMINATORS, bb, pro)) |
952 | { |
953 | basic_block src = bb; |
954 | if (bitmap_bit_p (bb_with, bb->index)) |
955 | src = (basic_block) bb->aux; |
956 | |
957 | FOR_EACH_EDGE (e, ei, src->succs) |
958 | { |
959 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
960 | continue; |
961 | |
962 | if (bitmap_bit_p (bb_with, e->dest->index) |
963 | && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) |
964 | { |
965 | if (dump_file) |
966 | fprintf (stream: dump_file, format: "Redirecting edge %d->%d to %d\n" , |
967 | e->src->index, e->dest->index, |
968 | ((basic_block) e->dest->aux)->index); |
969 | redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); |
970 | } |
971 | else if (e->flags & EDGE_FALLTHRU |
972 | && bitmap_bit_p (bb_with, bb->index)) |
973 | force_nonfallthru (e); |
974 | } |
975 | } |
976 | |
977 | /* Also redirect the function entry edge if necessary. */ |
978 | |
979 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) |
980 | if (bitmap_bit_p (bb_with, e->dest->index) |
981 | && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) |
982 | { |
983 | basic_block split_bb = split_edge (e); |
984 | e = single_succ_edge (bb: split_bb); |
985 | redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); |
986 | } |
987 | |
988 | /* Make a simple_return for those exits that run without prologue. */ |
989 | |
990 | FOR_EACH_BB_REVERSE_FN (bb, cfun) |
991 | if (!bitmap_bit_p (bb_with, bb->index)) |
992 | FOR_EACH_EDGE (e, ei, bb->succs) |
993 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
994 | handle_simple_exit (e); |
995 | |
996 | /* Finally, we want a single edge to put the prologue on. Make a new |
997 | block before the PRO block; the edge beteen them is the edge we want. |
998 | Then redirect those edges into PRO that come from blocks without the |
999 | prologue, to point to the new block instead. The new prologue block |
1000 | is put at the end of the insn chain. */ |
1001 | |
1002 | basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); |
1003 | BB_COPY_PARTITION (new_bb, pro); |
1004 | new_bb->count = profile_count::zero (); |
1005 | if (dump_file) |
1006 | fprintf (stream: dump_file, format: "Made prologue block %d\n" , new_bb->index); |
1007 | |
1008 | for (ei = ei_start (pro->preds); (e = ei_safe_edge (i: ei)); ) |
1009 | { |
1010 | if (bitmap_bit_p (bb_with, e->src->index) |
1011 | || dominated_by_p (CDI_DOMINATORS, e->src, pro)) |
1012 | { |
1013 | ei_next (i: &ei); |
1014 | continue; |
1015 | } |
1016 | |
1017 | new_bb->count += e->count (); |
1018 | |
1019 | redirect_edge_and_branch_force (e, new_bb); |
1020 | if (dump_file) |
1021 | fprintf (stream: dump_file, format: "Redirected edge from %d\n" , e->src->index); |
1022 | } |
1023 | |
1024 | *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU); |
1025 | force_nonfallthru (*entry_edge); |
1026 | |
1027 | free_dominance_info (CDI_DOMINATORS); |
1028 | } |
1029 | |
1030 | /* Separate shrink-wrapping |
1031 | |
1032 | Instead of putting all of the prologue and epilogue in one spot, we |
1033 | can put parts of it in places where those components are executed less |
1034 | frequently. The following code does this, for prologue and epilogue |
1035 | components that can be put in more than one location, and where those |
1036 | components can be executed more than once (the epilogue component will |
1037 | always be executed before the prologue component is executed a second |
1038 | time). |
1039 | |
1040 | What exactly is a component is target-dependent. The more usual |
1041 | components are simple saves/restores to/from the frame of callee-saved |
1042 | registers. This code treats components abstractly (as an sbitmap), |
1043 | letting the target handle all details. |
1044 | |
1045 | Prologue components are placed in such a way that for every component |
1046 | the prologue is executed as infrequently as possible. We do this by |
1047 | walking the dominator tree, comparing the cost of placing a prologue |
1048 | component before a block to the sum of costs determined for all subtrees |
1049 | of that block. |
1050 | |
1051 | From this placement, we then determine for each component all blocks |
1052 | where at least one of this block's dominators (including itself) will |
1053 | get a prologue inserted. That then is how the components are placed. |
1054 | We could place the epilogue components a bit smarter (we can save a |
1055 | bit of code size sometimes); this is a possible future improvement. |
1056 | |
1057 | Prologues and epilogues are preferably placed into a block, either at |
1058 | the beginning or end of it, if it is needed for all predecessor resp. |
1059 | successor edges; or placed on the edge otherwise. |
1060 | |
1061 | If the placement of any prologue/epilogue leads to a situation we cannot |
1062 | handle (for example, an abnormal edge would need to be split, or some |
1063 | targets want to use some specific registers that may not be available |
1064 | where we want to put them), separate shrink-wrapping for the components |
1065 | in that prologue/epilogue is aborted. */ |
1066 | |
1067 | |
1068 | /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the |
1069 | label LABEL. */ |
1070 | static void |
1071 | dump_components (const char *label, sbitmap components) |
1072 | { |
1073 | if (bitmap_empty_p (components)) |
1074 | return; |
1075 | |
1076 | fprintf (stream: dump_file, format: " [%s" , label); |
1077 | |
1078 | for (unsigned int j = 0; j < components->n_bits; j++) |
1079 | if (bitmap_bit_p (map: components, bitno: j)) |
1080 | fprintf (stream: dump_file, format: " %u" , j); |
1081 | |
1082 | fprintf (stream: dump_file, format: "]" ); |
1083 | } |
1084 | |
1085 | /* The data we collect for each bb. */ |
1086 | struct sw { |
1087 | /* What components does this BB need? */ |
1088 | sbitmap needs_components; |
1089 | |
1090 | /* What components does this BB have? This is the main decision this |
1091 | pass makes. */ |
1092 | sbitmap has_components; |
1093 | |
1094 | /* The components for which we placed code at the start of the BB (instead |
1095 | of on all incoming edges). */ |
1096 | sbitmap head_components; |
1097 | |
1098 | /* The components for which we placed code at the end of the BB (instead |
1099 | of on all outgoing edges). */ |
1100 | sbitmap tail_components; |
1101 | |
1102 | /* The frequency of executing the prologue for this BB, if a prologue is |
1103 | placed on this BB. This is a pessimistic estimate (no prologue is |
1104 | needed for edges from blocks that have the component under consideration |
1105 | active already). */ |
1106 | gcov_type own_cost; |
1107 | |
1108 | /* The frequency of executing the prologue for this BB and all BBs |
1109 | dominated by it. */ |
1110 | gcov_type total_cost; |
1111 | }; |
1112 | |
1113 | /* A helper function for accessing the pass-specific info. */ |
1114 | static inline struct sw * |
1115 | SW (basic_block bb) |
1116 | { |
1117 | gcc_assert (bb->aux); |
1118 | return (struct sw *) bb->aux; |
1119 | } |
1120 | |
1121 | /* Create the pass-specific data structures for separately shrink-wrapping |
1122 | with components COMPONENTS. */ |
1123 | static void |
1124 | init_separate_shrink_wrap (sbitmap components) |
1125 | { |
1126 | basic_block bb; |
1127 | FOR_ALL_BB_FN (bb, cfun) |
1128 | { |
1129 | bb->aux = xcalloc (1, sizeof (struct sw)); |
1130 | |
1131 | SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb); |
1132 | |
1133 | /* Mark all basic blocks without successor as needing all components. |
1134 | This avoids problems in at least cfgcleanup, sel-sched, and |
1135 | regrename (largely to do with all paths to such a block still |
1136 | needing the same dwarf CFI info). */ |
1137 | if (EDGE_COUNT (bb->succs) == 0) |
1138 | bitmap_copy (SW (bb)->needs_components, components); |
1139 | |
1140 | if (dump_file) |
1141 | { |
1142 | fprintf (stream: dump_file, format: "bb %d components:" , bb->index); |
1143 | dump_components (label: "has" , components: SW (bb)->needs_components); |
1144 | fprintf (stream: dump_file, format: "\n" ); |
1145 | } |
1146 | |
1147 | SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components)); |
1148 | SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components)); |
1149 | SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components)); |
1150 | bitmap_clear (SW (bb)->has_components); |
1151 | } |
1152 | } |
1153 | |
1154 | /* Destroy the pass-specific data. */ |
1155 | static void |
1156 | fini_separate_shrink_wrap (void) |
1157 | { |
1158 | basic_block bb; |
1159 | FOR_ALL_BB_FN (bb, cfun) |
1160 | if (bb->aux) |
1161 | { |
1162 | sbitmap_free (map: SW (bb)->needs_components); |
1163 | sbitmap_free (map: SW (bb)->has_components); |
1164 | sbitmap_free (map: SW (bb)->head_components); |
1165 | sbitmap_free (map: SW (bb)->tail_components); |
1166 | free (ptr: bb->aux); |
1167 | bb->aux = 0; |
1168 | } |
1169 | } |
1170 | |
1171 | /* Place the prologue for component WHICH, in the basic blocks dominated |
1172 | by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the |
1173 | HAS_COMPONENTS of a block if either the block has that bit set in |
1174 | NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all |
1175 | dominator subtrees separately. */ |
1176 | static void |
1177 | place_prologue_for_one_component (unsigned int which, basic_block head) |
1178 | { |
1179 | /* The block we are currently dealing with. */ |
1180 | basic_block bb = head; |
1181 | /* Is this the first time we visit this block, i.e. have we just gone |
1182 | down the tree. */ |
1183 | bool first_visit = true; |
1184 | |
1185 | /* Walk the dominator tree, visit one block per iteration of this loop. |
1186 | Each basic block is visited twice: once before visiting any children |
1187 | of the block, and once after visiting all of them (leaf nodes are |
1188 | visited only once). As an optimization, we do not visit subtrees |
1189 | that can no longer influence the prologue placement. */ |
1190 | for (;;) |
1191 | { |
1192 | /* First visit of a block: set the (children) cost accumulator to zero; |
1193 | if the block does not have the component itself, walk down. */ |
1194 | if (first_visit) |
1195 | { |
1196 | /* Initialize the cost. The cost is the block execution frequency |
1197 | that does not come from backedges. Calculating this by simply |
1198 | adding the cost of all edges that aren't backedges does not |
1199 | work: this does not always add up to the block frequency at |
1200 | all, and even if it does, rounding error makes for bad |
1201 | decisions. */ |
1202 | SW (bb)->own_cost = bb->count.to_frequency (cfun); |
1203 | |
1204 | edge e; |
1205 | edge_iterator ei; |
1206 | FOR_EACH_EDGE (e, ei, bb->preds) |
1207 | if (dominated_by_p (CDI_DOMINATORS, e->src, bb)) |
1208 | { |
1209 | if (SW (bb)->own_cost > EDGE_FREQUENCY (e)) |
1210 | SW (bb)->own_cost -= EDGE_FREQUENCY (e); |
1211 | else |
1212 | SW (bb)->own_cost = 0; |
1213 | } |
1214 | |
1215 | SW (bb)->total_cost = 0; |
1216 | |
1217 | if (!bitmap_bit_p (map: SW (bb)->needs_components, bitno: which) |
1218 | && first_dom_son (CDI_DOMINATORS, bb)) |
1219 | { |
1220 | bb = first_dom_son (CDI_DOMINATORS, bb); |
1221 | continue; |
1222 | } |
1223 | } |
1224 | |
1225 | /* If this block does need the component itself, or it is cheaper to |
1226 | put the prologue here than in all the descendants that need it, |
1227 | mark it so. If this block's immediate post-dominator is dominated |
1228 | by this block, and that needs the prologue, we can put it on this |
1229 | block as well (earlier is better). */ |
1230 | if (bitmap_bit_p (map: SW (bb)->needs_components, bitno: which) |
1231 | || SW (bb)->total_cost > SW (bb)->own_cost) |
1232 | { |
1233 | SW (bb)->total_cost = SW (bb)->own_cost; |
1234 | bitmap_set_bit (map: SW (bb)->has_components, bitno: which); |
1235 | } |
1236 | else |
1237 | { |
1238 | basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb); |
1239 | if (dominated_by_p (CDI_DOMINATORS, kid, bb) |
1240 | && bitmap_bit_p (map: SW (bb: kid)->has_components, bitno: which)) |
1241 | { |
1242 | SW (bb)->total_cost = SW (bb)->own_cost; |
1243 | bitmap_set_bit (map: SW (bb)->has_components, bitno: which); |
1244 | } |
1245 | } |
1246 | |
1247 | /* We are back where we started, so we are done now. */ |
1248 | if (bb == head) |
1249 | return; |
1250 | |
1251 | /* We now know the cost of the subtree rooted at the current block. |
1252 | Accumulate this cost in the parent. */ |
1253 | basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb); |
1254 | SW (bb: parent)->total_cost += SW (bb)->total_cost; |
1255 | |
1256 | /* Don't walk the tree down unless necessary. */ |
1257 | if (next_dom_son (CDI_DOMINATORS, bb) |
1258 | && SW (bb: parent)->total_cost <= SW (bb: parent)->own_cost) |
1259 | { |
1260 | bb = next_dom_son (CDI_DOMINATORS, bb); |
1261 | first_visit = true; |
1262 | } |
1263 | else |
1264 | { |
1265 | bb = parent; |
1266 | first_visit = false; |
1267 | } |
1268 | } |
1269 | } |
1270 | |
1271 | /* Set HAS_COMPONENTS in every block to the maximum it can be set to without |
1272 | setting it on any path from entry to exit where it was not already set |
1273 | somewhere (or, for blocks that have no path to the exit, consider only |
1274 | paths from the entry to the block itself). Return whether any changes |
1275 | were made to some HAS_COMPONENTS. */ |
1276 | static bool |
1277 | spread_components (sbitmap components) |
1278 | { |
1279 | basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
1280 | basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun); |
1281 | |
1282 | /* A stack of all blocks left to consider, and a bitmap of all blocks |
1283 | on that stack. */ |
1284 | vec<basic_block> todo; |
1285 | todo.create (n_basic_blocks_for_fn (cfun)); |
1286 | auto_bitmap seen; |
1287 | |
1288 | auto_sbitmap old (SBITMAP_SIZE (components)); |
1289 | |
1290 | /* Find for every block the components that are *not* needed on some path |
1291 | from the entry to that block. Do this with a flood fill from the entry |
1292 | block. Every block can be visited at most as often as the number of |
1293 | components (plus one), and usually much less often. */ |
1294 | |
1295 | if (dump_file) |
1296 | fprintf (stream: dump_file, format: "Spreading down...\n" ); |
1297 | |
1298 | basic_block bb; |
1299 | FOR_ALL_BB_FN (bb, cfun) |
1300 | bitmap_clear (SW (bb)->head_components); |
1301 | |
1302 | bitmap_copy (SW (bb: entry_block)->head_components, components); |
1303 | |
1304 | edge e; |
1305 | edge_iterator ei; |
1306 | |
1307 | todo.quick_push (obj: single_succ (bb: entry_block)); |
1308 | bitmap_set_bit (seen, single_succ (bb: entry_block)->index); |
1309 | while (!todo.is_empty ()) |
1310 | { |
1311 | bb = todo.pop (); |
1312 | |
1313 | bitmap_copy (old, SW (bb)->head_components); |
1314 | |
1315 | FOR_EACH_EDGE (e, ei, bb->preds) |
1316 | bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, |
1317 | SW (bb: e->src)->head_components); |
1318 | |
1319 | bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components, |
1320 | SW (bb)->has_components); |
1321 | |
1322 | if (!bitmap_equal_p (old, SW (bb)->head_components)) |
1323 | FOR_EACH_EDGE (e, ei, bb->succs) |
1324 | if (bitmap_set_bit (seen, e->dest->index)) |
1325 | todo.quick_push (obj: e->dest); |
1326 | |
1327 | bitmap_clear_bit (seen, bb->index); |
1328 | } |
1329 | |
1330 | /* Find for every block the components that are *not* needed on some reverse |
1331 | path from the exit to that block. */ |
1332 | |
1333 | if (dump_file) |
1334 | fprintf (stream: dump_file, format: "Spreading up...\n" ); |
1335 | |
1336 | /* First, mark all blocks not reachable from the exit block as not needing |
1337 | any component on any path to the exit. Mark everything, and then clear |
1338 | again by a flood fill. */ |
1339 | |
1340 | FOR_ALL_BB_FN (bb, cfun) |
1341 | bitmap_copy (SW (bb)->tail_components, components); |
1342 | |
1343 | FOR_EACH_EDGE (e, ei, exit_block->preds) |
1344 | { |
1345 | todo.quick_push (obj: e->src); |
1346 | bitmap_set_bit (seen, e->src->index); |
1347 | } |
1348 | |
1349 | while (!todo.is_empty ()) |
1350 | { |
1351 | bb = todo.pop (); |
1352 | |
1353 | if (!bitmap_empty_p (SW (bb)->tail_components)) |
1354 | FOR_EACH_EDGE (e, ei, bb->preds) |
1355 | if (bitmap_set_bit (seen, e->src->index)) |
1356 | todo.quick_push (obj: e->src); |
1357 | |
1358 | bitmap_clear (SW (bb)->tail_components); |
1359 | |
1360 | bitmap_clear_bit (seen, bb->index); |
1361 | } |
1362 | |
1363 | /* And then, flood fill backwards to find for every block the components |
1364 | not needed on some path to the exit. */ |
1365 | |
1366 | bitmap_copy (SW (bb: exit_block)->tail_components, components); |
1367 | |
1368 | FOR_EACH_EDGE (e, ei, exit_block->preds) |
1369 | { |
1370 | todo.quick_push (obj: e->src); |
1371 | bitmap_set_bit (seen, e->src->index); |
1372 | } |
1373 | |
1374 | while (!todo.is_empty ()) |
1375 | { |
1376 | bb = todo.pop (); |
1377 | |
1378 | bitmap_copy (old, SW (bb)->tail_components); |
1379 | |
1380 | FOR_EACH_EDGE (e, ei, bb->succs) |
1381 | bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, |
1382 | SW (bb: e->dest)->tail_components); |
1383 | |
1384 | bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components, |
1385 | SW (bb)->has_components); |
1386 | |
1387 | if (!bitmap_equal_p (old, SW (bb)->tail_components)) |
1388 | FOR_EACH_EDGE (e, ei, bb->preds) |
1389 | if (bitmap_set_bit (seen, e->src->index)) |
1390 | todo.quick_push (obj: e->src); |
1391 | |
1392 | bitmap_clear_bit (seen, bb->index); |
1393 | } |
1394 | |
1395 | todo.release (); |
1396 | |
1397 | /* Finally, mark everything not needed both forwards and backwards. */ |
1398 | |
1399 | bool did_changes = false; |
1400 | |
1401 | FOR_EACH_BB_FN (bb, cfun) |
1402 | { |
1403 | bitmap_copy (old, SW (bb)->has_components); |
1404 | |
1405 | bitmap_and (SW (bb)->head_components, SW (bb)->head_components, |
1406 | SW (bb)->tail_components); |
1407 | bitmap_and_compl (SW (bb)->has_components, components, |
1408 | SW (bb)->head_components); |
1409 | |
1410 | if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components)) |
1411 | did_changes = true; |
1412 | } |
1413 | |
1414 | FOR_ALL_BB_FN (bb, cfun) |
1415 | { |
1416 | if (dump_file) |
1417 | { |
1418 | fprintf (stream: dump_file, format: "bb %d components:" , bb->index); |
1419 | dump_components (label: "has" , components: SW (bb)->has_components); |
1420 | fprintf (stream: dump_file, format: "\n" ); |
1421 | } |
1422 | } |
1423 | |
1424 | return did_changes; |
1425 | } |
1426 | |
1427 | /* If we cannot handle placing some component's prologues or epilogues where |
1428 | we decided we should place them, unmark that component in COMPONENTS so |
1429 | that it is not wrapped separately. */ |
1430 | static void |
1431 | disqualify_problematic_components (sbitmap components) |
1432 | { |
1433 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1434 | auto_sbitmap epi (SBITMAP_SIZE (components)); |
1435 | |
1436 | basic_block bb; |
1437 | FOR_EACH_BB_FN (bb, cfun) |
1438 | { |
1439 | edge e; |
1440 | edge_iterator ei; |
1441 | FOR_EACH_EDGE (e, ei, bb->succs) |
1442 | { |
1443 | /* Find which components we want pro/epilogues for here. */ |
1444 | bitmap_and_compl (epi, SW (bb: e->src)->has_components, |
1445 | SW (bb: e->dest)->has_components); |
1446 | bitmap_and_compl (pro, SW (bb: e->dest)->has_components, |
1447 | SW (bb: e->src)->has_components); |
1448 | |
1449 | /* Ask the target what it thinks about things. */ |
1450 | if (!bitmap_empty_p (epi)) |
1451 | targetm.shrink_wrap.disqualify_components (components, e, epi, |
1452 | false); |
1453 | if (!bitmap_empty_p (pro)) |
1454 | targetm.shrink_wrap.disqualify_components (components, e, pro, |
1455 | true); |
1456 | |
1457 | /* If this edge doesn't need splitting, we're fine. */ |
1458 | if (single_pred_p (bb: e->dest) |
1459 | && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
1460 | continue; |
1461 | |
1462 | /* If the edge can be split, that is fine too. */ |
1463 | if ((e->flags & EDGE_ABNORMAL) == 0) |
1464 | continue; |
1465 | |
1466 | /* We also can handle sibcalls. */ |
1467 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
1468 | { |
1469 | gcc_assert (e->flags & EDGE_SIBCALL); |
1470 | continue; |
1471 | } |
1472 | |
1473 | /* Remove from consideration those components we would need |
1474 | pro/epilogues for on edges where we cannot insert them. */ |
1475 | bitmap_and_compl (components, components, epi); |
1476 | bitmap_and_compl (components, components, pro); |
1477 | |
1478 | if (dump_file && !bitmap_subset_p (epi, components)) |
1479 | { |
1480 | fprintf (stream: dump_file, format: " BAD epi %d->%d" , e->src->index, |
1481 | e->dest->index); |
1482 | if (e->flags & EDGE_EH) |
1483 | fprintf (stream: dump_file, format: " for EH" ); |
1484 | dump_components (label: "epi" , components: epi); |
1485 | fprintf (stream: dump_file, format: "\n" ); |
1486 | } |
1487 | |
1488 | if (dump_file && !bitmap_subset_p (pro, components)) |
1489 | { |
1490 | fprintf (stream: dump_file, format: " BAD pro %d->%d" , e->src->index, |
1491 | e->dest->index); |
1492 | if (e->flags & EDGE_EH) |
1493 | fprintf (stream: dump_file, format: " for EH" ); |
1494 | dump_components (label: "pro" , components: pro); |
1495 | fprintf (stream: dump_file, format: "\n" ); |
1496 | } |
1497 | } |
1498 | } |
1499 | } |
1500 | |
1501 | /* Place code for prologues and epilogues for COMPONENTS where we can put |
1502 | that code at the start of basic blocks. */ |
1503 | static void |
1504 | emit_common_heads_for_components (sbitmap components) |
1505 | { |
1506 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1507 | auto_sbitmap epi (SBITMAP_SIZE (components)); |
1508 | auto_sbitmap tmp (SBITMAP_SIZE (components)); |
1509 | |
1510 | basic_block bb; |
1511 | FOR_ALL_BB_FN (bb, cfun) |
1512 | bitmap_clear (SW (bb)->head_components); |
1513 | |
1514 | FOR_EACH_BB_FN (bb, cfun) |
1515 | { |
1516 | /* Find which prologue resp. epilogue components are needed for all |
1517 | predecessor edges to this block. */ |
1518 | |
1519 | /* First, select all possible components. */ |
1520 | bitmap_copy (epi, components); |
1521 | bitmap_copy (pro, components); |
1522 | |
1523 | edge e; |
1524 | edge_iterator ei; |
1525 | FOR_EACH_EDGE (e, ei, bb->preds) |
1526 | { |
1527 | if (e->flags & EDGE_ABNORMAL) |
1528 | { |
1529 | bitmap_clear (epi); |
1530 | bitmap_clear (pro); |
1531 | break; |
1532 | } |
1533 | |
1534 | /* Deselect those epilogue components that should not be inserted |
1535 | for this edge. */ |
1536 | bitmap_and_compl (tmp, SW (bb: e->src)->has_components, |
1537 | SW (bb: e->dest)->has_components); |
1538 | bitmap_and (epi, epi, tmp); |
1539 | |
1540 | /* Similar, for the prologue. */ |
1541 | bitmap_and_compl (tmp, SW (bb: e->dest)->has_components, |
1542 | SW (bb: e->src)->has_components); |
1543 | bitmap_and (pro, pro, tmp); |
1544 | } |
1545 | |
1546 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) |
1547 | fprintf (stream: dump_file, format: " bb %d" , bb->index); |
1548 | |
1549 | if (dump_file && !bitmap_empty_p (epi)) |
1550 | dump_components (label: "epi" , components: epi); |
1551 | if (dump_file && !bitmap_empty_p (pro)) |
1552 | dump_components (label: "pro" , components: pro); |
1553 | |
1554 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) |
1555 | fprintf (stream: dump_file, format: "\n" ); |
1556 | |
1557 | /* Place code after the BB note. */ |
1558 | if (!bitmap_empty_p (pro)) |
1559 | { |
1560 | start_sequence (); |
1561 | targetm.shrink_wrap.emit_prologue_components (pro); |
1562 | rtx_insn *seq = get_insns (); |
1563 | end_sequence (); |
1564 | record_prologue_seq (seq); |
1565 | |
1566 | emit_insn_after (seq, bb_note (bb)); |
1567 | |
1568 | bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro); |
1569 | } |
1570 | |
1571 | if (!bitmap_empty_p (epi)) |
1572 | { |
1573 | start_sequence (); |
1574 | targetm.shrink_wrap.emit_epilogue_components (epi); |
1575 | rtx_insn *seq = get_insns (); |
1576 | end_sequence (); |
1577 | record_epilogue_seq (seq); |
1578 | |
1579 | emit_insn_after (seq, bb_note (bb)); |
1580 | |
1581 | bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi); |
1582 | } |
1583 | } |
1584 | } |
1585 | |
1586 | /* Place code for prologues and epilogues for COMPONENTS where we can put |
1587 | that code at the end of basic blocks. */ |
1588 | static void |
1589 | emit_common_tails_for_components (sbitmap components) |
1590 | { |
1591 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1592 | auto_sbitmap epi (SBITMAP_SIZE (components)); |
1593 | auto_sbitmap tmp (SBITMAP_SIZE (components)); |
1594 | |
1595 | basic_block bb; |
1596 | FOR_ALL_BB_FN (bb, cfun) |
1597 | bitmap_clear (SW (bb)->tail_components); |
1598 | |
1599 | FOR_EACH_BB_FN (bb, cfun) |
1600 | { |
1601 | /* Find which prologue resp. epilogue components are needed for all |
1602 | successor edges from this block. */ |
1603 | if (EDGE_COUNT (bb->succs) == 0) |
1604 | continue; |
1605 | |
1606 | /* First, select all possible components. */ |
1607 | bitmap_copy (epi, components); |
1608 | bitmap_copy (pro, components); |
1609 | |
1610 | edge e; |
1611 | edge_iterator ei; |
1612 | FOR_EACH_EDGE (e, ei, bb->succs) |
1613 | { |
1614 | if (e->flags & EDGE_ABNORMAL) |
1615 | { |
1616 | bitmap_clear (epi); |
1617 | bitmap_clear (pro); |
1618 | break; |
1619 | } |
1620 | |
1621 | /* Deselect those epilogue components that should not be inserted |
1622 | for this edge, and also those that are already put at the head |
1623 | of the successor block. */ |
1624 | bitmap_and_compl (tmp, SW (bb: e->src)->has_components, |
1625 | SW (bb: e->dest)->has_components); |
1626 | bitmap_and_compl (tmp, tmp, SW (bb: e->dest)->head_components); |
1627 | bitmap_and (epi, epi, tmp); |
1628 | |
1629 | /* Similarly, for the prologue. */ |
1630 | bitmap_and_compl (tmp, SW (bb: e->dest)->has_components, |
1631 | SW (bb: e->src)->has_components); |
1632 | bitmap_and_compl (tmp, tmp, SW (bb: e->dest)->head_components); |
1633 | bitmap_and (pro, pro, tmp); |
1634 | } |
1635 | |
1636 | /* If the last insn of this block is a control flow insn we cannot |
1637 | put anything after it. We can put our code before it instead, |
1638 | but only if that jump insn is a simple jump. */ |
1639 | rtx_insn *last_insn = BB_END (bb); |
1640 | if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn)) |
1641 | { |
1642 | bitmap_clear (epi); |
1643 | bitmap_clear (pro); |
1644 | } |
1645 | |
1646 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) |
1647 | fprintf (stream: dump_file, format: " bb %d" , bb->index); |
1648 | |
1649 | if (dump_file && !bitmap_empty_p (epi)) |
1650 | dump_components (label: "epi" , components: epi); |
1651 | if (dump_file && !bitmap_empty_p (pro)) |
1652 | dump_components (label: "pro" , components: pro); |
1653 | |
1654 | if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) |
1655 | fprintf (stream: dump_file, format: "\n" ); |
1656 | |
1657 | /* Put the code at the end of the BB, but before any final jump. */ |
1658 | if (!bitmap_empty_p (epi)) |
1659 | { |
1660 | start_sequence (); |
1661 | targetm.shrink_wrap.emit_epilogue_components (epi); |
1662 | rtx_insn *seq = get_insns (); |
1663 | end_sequence (); |
1664 | record_epilogue_seq (seq); |
1665 | |
1666 | if (control_flow_insn_p (last_insn)) |
1667 | emit_insn_before (seq, last_insn); |
1668 | else |
1669 | emit_insn_after (seq, last_insn); |
1670 | |
1671 | bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi); |
1672 | } |
1673 | |
1674 | if (!bitmap_empty_p (pro)) |
1675 | { |
1676 | start_sequence (); |
1677 | targetm.shrink_wrap.emit_prologue_components (pro); |
1678 | rtx_insn *seq = get_insns (); |
1679 | end_sequence (); |
1680 | record_prologue_seq (seq); |
1681 | |
1682 | if (control_flow_insn_p (last_insn)) |
1683 | emit_insn_before (seq, last_insn); |
1684 | else |
1685 | emit_insn_after (seq, last_insn); |
1686 | |
1687 | bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro); |
1688 | } |
1689 | } |
1690 | } |
1691 | |
1692 | /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already |
1693 | placed them inside blocks directly. */ |
1694 | static void |
1695 | insert_prologue_epilogue_for_components (sbitmap components) |
1696 | { |
1697 | auto_sbitmap pro (SBITMAP_SIZE (components)); |
1698 | auto_sbitmap epi (SBITMAP_SIZE (components)); |
1699 | |
1700 | basic_block bb; |
1701 | FOR_EACH_BB_FN (bb, cfun) |
1702 | { |
1703 | if (!bb->aux) |
1704 | continue; |
1705 | |
1706 | edge e; |
1707 | edge_iterator ei; |
1708 | FOR_EACH_EDGE (e, ei, bb->succs) |
1709 | { |
1710 | /* Find which pro/epilogue components are needed on this edge. */ |
1711 | bitmap_and_compl (epi, SW (bb: e->src)->has_components, |
1712 | SW (bb: e->dest)->has_components); |
1713 | bitmap_and_compl (pro, SW (bb: e->dest)->has_components, |
1714 | SW (bb: e->src)->has_components); |
1715 | bitmap_and (epi, epi, components); |
1716 | bitmap_and (pro, pro, components); |
1717 | |
1718 | /* Deselect those we already have put at the head or tail of the |
1719 | edge's dest resp. src. */ |
1720 | bitmap_and_compl (epi, epi, SW (bb: e->dest)->head_components); |
1721 | bitmap_and_compl (pro, pro, SW (bb: e->dest)->head_components); |
1722 | bitmap_and_compl (epi, epi, SW (bb: e->src)->tail_components); |
1723 | bitmap_and_compl (pro, pro, SW (bb: e->src)->tail_components); |
1724 | |
1725 | if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro)) |
1726 | { |
1727 | if (dump_file) |
1728 | { |
1729 | fprintf (stream: dump_file, format: " %d->%d" , e->src->index, |
1730 | e->dest->index); |
1731 | dump_components (label: "epi" , components: epi); |
1732 | dump_components (label: "pro" , components: pro); |
1733 | if (e->flags & EDGE_SIBCALL) |
1734 | fprintf (stream: dump_file, format: " (SIBCALL)" ); |
1735 | else if (e->flags & EDGE_ABNORMAL) |
1736 | fprintf (stream: dump_file, format: " (ABNORMAL)" ); |
1737 | fprintf (stream: dump_file, format: "\n" ); |
1738 | } |
1739 | |
1740 | /* Put the epilogue components in place. */ |
1741 | start_sequence (); |
1742 | targetm.shrink_wrap.emit_epilogue_components (epi); |
1743 | rtx_insn *seq = get_insns (); |
1744 | end_sequence (); |
1745 | record_epilogue_seq (seq); |
1746 | |
1747 | if (e->flags & EDGE_SIBCALL) |
1748 | { |
1749 | gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)); |
1750 | |
1751 | rtx_insn *insn = BB_END (e->src); |
1752 | gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn)); |
1753 | emit_insn_before (seq, insn); |
1754 | } |
1755 | else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
1756 | { |
1757 | gcc_assert (e->flags & EDGE_FALLTHRU); |
1758 | basic_block new_bb = split_edge (e); |
1759 | emit_insn_after (seq, BB_END (new_bb)); |
1760 | } |
1761 | else |
1762 | insert_insn_on_edge (seq, e); |
1763 | |
1764 | /* Put the prologue components in place. */ |
1765 | start_sequence (); |
1766 | targetm.shrink_wrap.emit_prologue_components (pro); |
1767 | seq = get_insns (); |
1768 | end_sequence (); |
1769 | record_prologue_seq (seq); |
1770 | |
1771 | insert_insn_on_edge (seq, e); |
1772 | } |
1773 | } |
1774 | } |
1775 | |
1776 | commit_edge_insertions (); |
1777 | } |
1778 | |
1779 | bool |
1780 | use_shrink_wrapping_separate (void) |
1781 | { |
1782 | if (!(SHRINK_WRAPPING_ENABLED && flag_shrink_wrap_separate |
1783 | && optimize_function_for_speed_p (cfun) |
1784 | && targetm.shrink_wrap.get_separate_components)) |
1785 | return false; |
1786 | |
1787 | /* We don't handle "strange" functions. */ |
1788 | if (cfun->calls_alloca |
1789 | || cfun->calls_setjmp |
1790 | || cfun->can_throw_non_call_exceptions |
1791 | || crtl->calls_eh_return |
1792 | || crtl->has_nonlocal_goto |
1793 | || crtl->saves_all_registers) |
1794 | return false; |
1795 | |
1796 | return true; |
1797 | } |
1798 | |
1799 | /* The main entry point to this subpass. FIRST_BB is where the prologue |
1800 | would be normally put. */ |
1801 | void |
1802 | try_shrink_wrapping_separate (basic_block first_bb) |
1803 | { |
1804 | if (!use_shrink_wrapping_separate ()) |
1805 | return; |
1806 | |
1807 | /* Ask the target what components there are. If it returns NULL, don't |
1808 | do anything. */ |
1809 | sbitmap components = targetm.shrink_wrap.get_separate_components (); |
1810 | if (!components) |
1811 | return; |
1812 | |
1813 | /* We need LIVE info, not defining anything in the entry block and not |
1814 | using anything in the exit block. A block then needs a component if |
1815 | the register for that component is in the IN or GEN or KILL set for |
1816 | that block. */ |
1817 | df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT; |
1818 | df_update_entry_exit_and_calls (); |
1819 | df_live_add_problem (); |
1820 | df_live_set_all_dirty (); |
1821 | df_analyze (); |
1822 | |
1823 | calculate_dominance_info (CDI_DOMINATORS); |
1824 | calculate_dominance_info (CDI_POST_DOMINATORS); |
1825 | |
1826 | init_separate_shrink_wrap (components); |
1827 | |
1828 | sbitmap_iterator sbi; |
1829 | unsigned int j; |
1830 | EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi) |
1831 | place_prologue_for_one_component (which: j, head: first_bb); |
1832 | |
1833 | /* Try to minimize the number of saves and restores. Do this as long as |
1834 | it changes anything. This does not iterate more than a few times. */ |
1835 | int spread_times = 0; |
1836 | while (spread_components (components)) |
1837 | { |
1838 | spread_times++; |
1839 | |
1840 | if (dump_file) |
1841 | fprintf (stream: dump_file, format: "Now spread %d times.\n" , spread_times); |
1842 | } |
1843 | |
1844 | disqualify_problematic_components (components); |
1845 | |
1846 | /* Don't separately shrink-wrap anything where the "main" prologue will |
1847 | go; the target code can often optimize things if it is presented with |
1848 | all components together (say, if it generates store-multiple insns). */ |
1849 | bitmap_and_compl (components, components, SW (bb: first_bb)->has_components); |
1850 | |
1851 | if (bitmap_empty_p (components)) |
1852 | { |
1853 | if (dump_file) |
1854 | fprintf (stream: dump_file, format: "Not wrapping anything separately.\n" ); |
1855 | } |
1856 | else |
1857 | { |
1858 | if (dump_file) |
1859 | { |
1860 | fprintf (stream: dump_file, format: "The components we wrap separately are" ); |
1861 | dump_components (label: "sep" , components); |
1862 | fprintf (stream: dump_file, format: "\n" ); |
1863 | |
1864 | fprintf (stream: dump_file, format: "... Inserting common heads...\n" ); |
1865 | } |
1866 | |
1867 | emit_common_heads_for_components (components); |
1868 | |
1869 | if (dump_file) |
1870 | fprintf (stream: dump_file, format: "... Inserting common tails...\n" ); |
1871 | |
1872 | emit_common_tails_for_components (components); |
1873 | |
1874 | if (dump_file) |
1875 | fprintf (stream: dump_file, format: "... Inserting the more difficult ones...\n" ); |
1876 | |
1877 | insert_prologue_epilogue_for_components (components); |
1878 | |
1879 | if (dump_file) |
1880 | fprintf (stream: dump_file, format: "... Done.\n" ); |
1881 | |
1882 | targetm.shrink_wrap.set_handled_components (components); |
1883 | |
1884 | crtl->shrink_wrapped_separate = true; |
1885 | } |
1886 | |
1887 | fini_separate_shrink_wrap (); |
1888 | |
1889 | sbitmap_free (map: components); |
1890 | free_dominance_info (CDI_DOMINATORS); |
1891 | free_dominance_info (CDI_POST_DOMINATORS); |
1892 | |
1893 | /* All done. */ |
1894 | df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT; |
1895 | df_update_entry_exit_and_calls (); |
1896 | df_live_set_all_dirty (); |
1897 | df_analyze (); |
1898 | } |
1899 | |