1 | /* The tracer pass for the GNU compiler. |
2 | Contributed by Jan Hubicka, SuSE Labs. |
3 | Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. |
4 | Copyright (C) 2001-2023 Free Software Foundation, Inc. |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify it |
9 | under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation; either version 3, or (at your option) |
11 | any later version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
15 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
16 | License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | /* This pass performs the tail duplication needed for superblock formation. |
23 | For more information see: |
24 | |
25 | Design and Analysis of Profile-Based Optimization in Compaq's |
26 | Compilation Tools for Alpha; Journal of Instruction-Level |
27 | Parallelism 3 (2000) 1-25 |
28 | |
29 | Unlike Compaq's implementation we don't do the loop peeling as most |
30 | probably a better job can be done by a special pass and we don't |
31 | need to worry too much about the code size implications as the tail |
32 | duplicates are crossjumped again if optimizations are not |
33 | performed. */ |
34 | |
35 | |
36 | #include "config.h" |
37 | #include "system.h" |
38 | #include "coretypes.h" |
39 | #include "backend.h" |
40 | #include "rtl.h" |
41 | #include "tree.h" |
42 | #include "gimple.h" |
43 | #include "cfghooks.h" |
44 | #include "tree-pass.h" |
45 | #include "profile.h" |
46 | #include "cfganal.h" |
47 | #include "gimple-iterator.h" |
48 | #include "tree-cfg.h" |
49 | #include "tree-ssa.h" |
50 | #include "tree-inline.h" |
51 | #include "cfgloop.h" |
52 | #include "alloc-pool.h" |
53 | #include "fibonacci_heap.h" |
54 | #include "tracer.h" |
55 | |
56 | static void analyze_bb (basic_block, int *); |
57 | static bool better_p (const_edge, const_edge); |
58 | static edge find_best_successor (basic_block); |
59 | static edge find_best_predecessor (basic_block); |
60 | static int find_trace (basic_block, basic_block *); |
61 | |
62 | /* Minimal outgoing edge probability considered for superblock formation. */ |
63 | static int probability_cutoff; |
64 | static int branch_ratio_cutoff; |
65 | |
66 | /* A bit BB->index is set if BB has already been seen, i.e. it is |
67 | connected to some trace already. */ |
68 | static sbitmap bb_seen; |
69 | |
70 | static inline void |
71 | mark_bb_seen (basic_block bb) |
72 | { |
73 | unsigned int size = SBITMAP_SIZE (bb_seen); |
74 | |
75 | if ((unsigned int)bb->index >= size) |
76 | bb_seen = sbitmap_resize (bb_seen, size * 2, 0); |
77 | |
78 | bitmap_set_bit (map: bb_seen, bitno: bb->index); |
79 | } |
80 | |
81 | static inline bool |
82 | bb_seen_p (basic_block bb) |
83 | { |
84 | return bitmap_bit_p (map: bb_seen, bitno: bb->index); |
85 | } |
86 | |
87 | static sbitmap can_duplicate_bb; |
88 | |
89 | /* Cache VAL as value of can_duplicate_bb_p for BB. */ |
90 | static inline void |
91 | cache_can_duplicate_bb_p (const_basic_block bb, bool val) |
92 | { |
93 | if (val) |
94 | bitmap_set_bit (map: can_duplicate_bb, bitno: bb->index); |
95 | } |
96 | |
97 | /* Return cached value of can_duplicate_bb_p for BB. */ |
98 | static bool |
99 | cached_can_duplicate_bb_p (const_basic_block bb) |
100 | { |
101 | if (can_duplicate_bb) |
102 | { |
103 | unsigned int size = SBITMAP_SIZE (can_duplicate_bb); |
104 | if ((unsigned int)bb->index < size) |
105 | return bitmap_bit_p (map: can_duplicate_bb, bitno: bb->index); |
106 | |
107 | /* Assume added bb's should not be duplicated. */ |
108 | return false; |
109 | } |
110 | |
111 | return can_duplicate_block_p (bb); |
112 | } |
113 | |
114 | /* Return true if we should ignore the basic block for purposes of tracing. */ |
115 | bool |
116 | ignore_bb_p (const_basic_block bb) |
117 | { |
118 | if (bb->index < NUM_FIXED_BLOCKS) |
119 | return true; |
120 | if (optimize_bb_for_size_p (bb)) |
121 | return true; |
122 | |
123 | return !cached_can_duplicate_bb_p (bb); |
124 | } |
125 | |
126 | /* Return number of instructions in the block. */ |
127 | |
128 | static void |
129 | analyze_bb (basic_block bb, int *count) |
130 | { |
131 | gimple_stmt_iterator gsi; |
132 | gimple *stmt; |
133 | int n = 0; |
134 | |
135 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
136 | { |
137 | stmt = gsi_stmt (i: gsi); |
138 | n += estimate_num_insns (stmt, &eni_size_weights); |
139 | } |
140 | *count = n; |
141 | |
142 | cache_can_duplicate_bb_p (bb, val: can_duplicate_block_p (CONST_CAST_BB (bb))); |
143 | } |
144 | |
145 | /* Return true if E1 is more frequent than E2. */ |
146 | static bool |
147 | better_p (const_edge e1, const_edge e2) |
148 | { |
149 | if ((e1->count () > e2->count ()) || (e1->count () < e2->count ())) |
150 | return e1->count () > e2->count (); |
151 | /* This is needed to avoid changes in the decision after |
152 | CFG is modified. */ |
153 | if (e1->src != e2->src) |
154 | return e1->src->index > e2->src->index; |
155 | return e1->dest->index > e2->dest->index; |
156 | } |
157 | |
158 | /* Return most frequent successor of basic block BB. */ |
159 | |
160 | static edge |
161 | find_best_successor (basic_block bb) |
162 | { |
163 | edge e; |
164 | edge best = NULL; |
165 | edge_iterator ei; |
166 | |
167 | FOR_EACH_EDGE (e, ei, bb->succs) |
168 | { |
169 | if (!e->count ().initialized_p ()) |
170 | return NULL; |
171 | if (!best || better_p (e1: e, e2: best)) |
172 | best = e; |
173 | } |
174 | if (!best || ignore_bb_p (bb: best->dest)) |
175 | return NULL; |
176 | if (!best->probability.initialized_p () |
177 | || best->probability.to_reg_br_prob_base () <= probability_cutoff) |
178 | return NULL; |
179 | return best; |
180 | } |
181 | |
182 | /* Return most frequent predecessor of basic block BB. */ |
183 | |
184 | static edge |
185 | find_best_predecessor (basic_block bb) |
186 | { |
187 | edge e; |
188 | edge best = NULL; |
189 | edge_iterator ei; |
190 | |
191 | FOR_EACH_EDGE (e, ei, bb->preds) |
192 | { |
193 | if (!e->count ().initialized_p ()) |
194 | return NULL; |
195 | if (!best || better_p (e1: e, e2: best)) |
196 | best = e; |
197 | } |
198 | if (!best || ignore_bb_p (bb: best->src)) |
199 | return NULL; |
200 | if (bb->count.initialized_p () |
201 | && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE |
202 | < bb->count.to_frequency (cfun) * branch_ratio_cutoff)) |
203 | return NULL; |
204 | return best; |
205 | } |
206 | |
207 | /* Find the trace using bb and record it in the TRACE array. |
208 | Return number of basic blocks recorded. */ |
209 | |
210 | static int |
211 | find_trace (basic_block bb, basic_block *trace) |
212 | { |
213 | int i = 0; |
214 | edge e; |
215 | |
216 | if (dump_file) |
217 | fprintf (stream: dump_file, format: "Trace seed %i [%i]" , bb->index, bb->count.to_frequency (cfun)); |
218 | |
219 | while ((e = find_best_predecessor (bb)) != NULL) |
220 | { |
221 | basic_block bb2 = e->src; |
222 | if (bb_seen_p (bb: bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
223 | || find_best_successor (bb: bb2) != e) |
224 | break; |
225 | if (dump_file) |
226 | fprintf (stream: dump_file, format: ",%i [%i]" , bb->index, bb->count.to_frequency (cfun)); |
227 | bb = bb2; |
228 | } |
229 | if (dump_file) |
230 | fprintf (stream: dump_file, format: " forward %i [%i]" , bb->index, bb->count.to_frequency (cfun)); |
231 | trace[i++] = bb; |
232 | |
233 | /* Follow the trace in forward direction. */ |
234 | while ((e = find_best_successor (bb)) != NULL) |
235 | { |
236 | bb = e->dest; |
237 | if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
238 | || find_best_predecessor (bb) != e) |
239 | break; |
240 | if (dump_file) |
241 | fprintf (stream: dump_file, format: ",%i [%i]" , bb->index, bb->count.to_frequency (cfun)); |
242 | trace[i++] = bb; |
243 | } |
244 | if (dump_file) |
245 | fprintf (stream: dump_file, format: "\n" ); |
246 | return i; |
247 | } |
248 | |
249 | /* Duplicate block BB2, placing it after BB in the CFG. Return the |
250 | newly created block. */ |
251 | basic_block |
252 | transform_duplicate (basic_block bb, basic_block bb2) |
253 | { |
254 | edge e; |
255 | basic_block copy; |
256 | |
257 | e = find_edge (bb, bb2); |
258 | |
259 | copy = duplicate_block (bb2, e, bb); |
260 | flush_pending_stmts (e); |
261 | |
262 | add_phi_args_after_copy (©, 1, NULL); |
263 | |
264 | return (copy); |
265 | } |
266 | |
267 | /* Look for basic blocks in frequency order, construct traces and tail duplicate |
268 | if profitable. */ |
269 | |
270 | static bool |
271 | tail_duplicate (void) |
272 | { |
273 | auto_vec<fibonacci_node<long, basic_block_def>*> blocks; |
274 | blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), exact: true); |
275 | |
276 | basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
277 | int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun)); |
278 | int ninsns = 0, nduplicated = 0; |
279 | gcov_type weighted_insns = 0, traced_insns = 0; |
280 | fibonacci_heap<long, basic_block_def> heap (LONG_MIN); |
281 | gcov_type cover_insns; |
282 | int max_dup_insns; |
283 | basic_block bb; |
284 | bool changed = false; |
285 | |
286 | /* Create an oversized sbitmap to reduce the chance that we need to |
287 | resize it. */ |
288 | bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2); |
289 | bitmap_clear (bb_seen); |
290 | can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun)); |
291 | bitmap_clear (can_duplicate_bb); |
292 | initialize_original_copy_tables (); |
293 | |
294 | if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) |
295 | probability_cutoff = param_tracer_min_branch_probability_feedback; |
296 | else |
297 | probability_cutoff = param_tracer_min_branch_probability; |
298 | probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; |
299 | |
300 | branch_ratio_cutoff = |
301 | (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio); |
302 | |
303 | FOR_EACH_BB_FN (bb, cfun) |
304 | { |
305 | int n; |
306 | analyze_bb (bb, count: &n); |
307 | if (!ignore_bb_p (bb)) |
308 | blocks[bb->index] = heap.insert (key: -bb->count.to_frequency (cfun), data: bb); |
309 | |
310 | counts [bb->index] = n; |
311 | ninsns += n; |
312 | weighted_insns += n * bb->count.to_frequency (cfun); |
313 | } |
314 | |
315 | if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) |
316 | cover_insns = param_tracer_dynamic_coverage_feedback; |
317 | else |
318 | cover_insns = param_tracer_dynamic_coverage; |
319 | cover_insns = (weighted_insns * cover_insns + 50) / 100; |
320 | max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100; |
321 | |
322 | while (traced_insns < cover_insns && nduplicated < max_dup_insns |
323 | && !heap.empty ()) |
324 | { |
325 | basic_block bb = heap.extract_min (); |
326 | int n, pos; |
327 | |
328 | if (!bb) |
329 | break; |
330 | |
331 | blocks[bb->index] = NULL; |
332 | |
333 | if (ignore_bb_p (bb)) |
334 | continue; |
335 | gcc_assert (!bb_seen_p (bb)); |
336 | |
337 | n = find_trace (bb, trace); |
338 | |
339 | bb = trace[0]; |
340 | traced_insns += bb->count.to_frequency (cfun) * counts [bb->index]; |
341 | if (blocks[bb->index]) |
342 | { |
343 | heap.delete_node (node: blocks[bb->index]); |
344 | blocks[bb->index] = NULL; |
345 | } |
346 | |
347 | for (pos = 1; pos < n; pos++) |
348 | { |
349 | basic_block bb2 = trace[pos]; |
350 | |
351 | if (blocks[bb2->index]) |
352 | { |
353 | heap.delete_node (node: blocks[bb2->index]); |
354 | blocks[bb2->index] = NULL; |
355 | } |
356 | traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index]; |
357 | if (EDGE_COUNT (bb2->preds) > 1 |
358 | && can_duplicate_block_p (bb2) |
359 | /* We have the tendency to duplicate the loop header |
360 | of all do { } while loops. Do not do that - it is |
361 | not profitable and it might create a loop with multiple |
362 | entries or at least rotate the loop. */ |
363 | && bb2->loop_father->header != bb2) |
364 | { |
365 | nduplicated += counts [bb2->index]; |
366 | basic_block copy = transform_duplicate (bb, bb2); |
367 | |
368 | /* Reconsider the original copy of block we've duplicated. |
369 | Removing the most common predecessor may make it to be |
370 | head. */ |
371 | blocks[bb2->index] = heap.insert (key: -bb2->count.to_frequency (cfun), data: bb2); |
372 | |
373 | if (dump_file) |
374 | fprintf (stream: dump_file, format: "Duplicated %i as %i [%i]\n" , |
375 | bb2->index, copy->index, copy->count.to_frequency (cfun)); |
376 | |
377 | bb2 = copy; |
378 | changed = true; |
379 | } |
380 | mark_bb_seen (bb: bb2); |
381 | bb = bb2; |
382 | /* In case the trace became infrequent, stop duplicating. */ |
383 | if (ignore_bb_p (bb)) |
384 | break; |
385 | } |
386 | if (dump_file) |
387 | fprintf (stream: dump_file, format: " covered now %.1f\n\n" , |
388 | traced_insns * 100.0 / weighted_insns); |
389 | } |
390 | if (dump_file) |
391 | fprintf (stream: dump_file, format: "Duplicated %i insns (%i%%)\n" , nduplicated, |
392 | nduplicated * 100 / ninsns); |
393 | |
394 | free_original_copy_tables (); |
395 | sbitmap_free (map: bb_seen); |
396 | sbitmap_free (map: can_duplicate_bb); |
397 | can_duplicate_bb = NULL; |
398 | free (ptr: trace); |
399 | free (ptr: counts); |
400 | |
401 | return changed; |
402 | } |
403 | |
404 | namespace { |
405 | |
406 | const pass_data pass_data_tracer = |
407 | { |
408 | .type: GIMPLE_PASS, /* type */ |
409 | .name: "tracer" , /* name */ |
410 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
411 | .tv_id: TV_TRACER, /* tv_id */ |
412 | .properties_required: 0, /* properties_required */ |
413 | .properties_provided: 0, /* properties_provided */ |
414 | .properties_destroyed: 0, /* properties_destroyed */ |
415 | .todo_flags_start: 0, /* todo_flags_start */ |
416 | TODO_update_ssa, /* todo_flags_finish */ |
417 | }; |
418 | |
419 | class pass_tracer : public gimple_opt_pass |
420 | { |
421 | public: |
422 | pass_tracer (gcc::context *ctxt) |
423 | : gimple_opt_pass (pass_data_tracer, ctxt) |
424 | {} |
425 | |
426 | /* opt_pass methods: */ |
427 | bool gate (function *) final override |
428 | { |
429 | return (optimize > 0 && flag_tracer && flag_reorder_blocks); |
430 | } |
431 | |
432 | unsigned int execute (function *) final override; |
433 | |
434 | }; // class pass_tracer |
435 | |
436 | unsigned int |
437 | pass_tracer::execute (function *fun) |
438 | { |
439 | bool changed; |
440 | |
441 | if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1) |
442 | return 0; |
443 | |
444 | mark_dfs_back_edges (); |
445 | if (dump_file) |
446 | brief_dump_cfg (dump_file, dump_flags); |
447 | |
448 | /* Trace formation is done on the fly inside tail_duplicate */ |
449 | changed = tail_duplicate (); |
450 | if (changed) |
451 | { |
452 | free_dominance_info (CDI_DOMINATORS); |
453 | /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */ |
454 | loops_state_set (flags: LOOPS_NEED_FIXUP); |
455 | } |
456 | |
457 | if (dump_file) |
458 | brief_dump_cfg (dump_file, dump_flags); |
459 | |
460 | return changed ? TODO_cleanup_cfg : 0; |
461 | } |
462 | } // anon namespace |
463 | |
464 | gimple_opt_pass * |
465 | make_pass_tracer (gcc::context *ctxt) |
466 | { |
467 | return new pass_tracer (ctxt); |
468 | } |
469 | |