1 | /* SSA Jump Threading |
2 | Copyright (C) 2005-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "predict.h" |
25 | #include "tree.h" |
26 | #include "gimple.h" |
27 | #include "fold-const.h" |
28 | #include "cfgloop.h" |
29 | #include "gimple-iterator.h" |
30 | #include "tree-cfg.h" |
31 | #include "tree-ssa-threadupdate.h" |
32 | #include "tree-ssa-loop.h" |
33 | #include "cfganal.h" |
34 | #include "tree-pass.h" |
35 | #include "gimple-ssa.h" |
36 | #include "tree-phinodes.h" |
37 | #include "tree-inline.h" |
38 | #include "tree-vectorizer.h" |
39 | #include "value-range.h" |
40 | #include "gimple-range.h" |
41 | #include "tree-ssa-threadedge.h" |
42 | #include "gimple-range-path.h" |
43 | #include "ssa.h" |
44 | #include "tree-cfgcleanup.h" |
45 | #include "tree-pretty-print.h" |
46 | #include "cfghooks.h" |
47 | #include "dbgcnt.h" |
48 | |
49 | // Path registry for the backwards threader. After all paths have been |
50 | // registered with register_path(), thread_through_all_blocks() is called |
51 | // to modify the CFG. |
52 | |
53 | class back_threader_registry : public back_jt_path_registry |
54 | { |
55 | public: |
56 | bool register_path (const vec<basic_block> &, edge taken); |
57 | }; |
58 | |
59 | // Class to abstract the profitability code for the backwards threader. |
60 | |
61 | class back_threader_profitability |
62 | { |
63 | public: |
64 | back_threader_profitability (bool speed_p, gimple *stmt); |
65 | bool possibly_profitable_path_p (const vec<basic_block> &, bool *); |
66 | bool profitable_path_p (const vec<basic_block> &, |
67 | edge taken, bool *irreducible_loop); |
68 | private: |
69 | const bool m_speed_p; |
70 | int m_exit_jump_benefit; |
71 | bool m_threaded_multiway_branch; |
72 | // The following are computed by possibly_profitable_path_p |
73 | bool m_threaded_through_latch; |
74 | bool m_multiway_branch_in_path; |
75 | bool m_contains_hot_bb; |
76 | int m_n_insns; |
77 | }; |
78 | |
79 | back_threader_profitability::back_threader_profitability (bool speed_p, |
80 | gimple *last) |
81 | : m_speed_p (speed_p) |
82 | { |
83 | m_threaded_multiway_branch = (gimple_code (g: last) == GIMPLE_SWITCH |
84 | || gimple_code (g: last) == GIMPLE_GOTO); |
85 | // The forward threader has estimate_threading_killed_stmts, in |
86 | // particular it estimates further DCE from eliminating the exit |
87 | // control stmt. |
88 | m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights); |
89 | } |
90 | |
91 | // Back threader flags. |
92 | #define BT_NONE 0 |
93 | // Generate fast code at the expense of code size. |
94 | #define BT_SPEED 1 |
95 | // Resolve unknown SSAs on entry to a threading path. If set, use the |
96 | // ranger. If not, assume all ranges on entry to a path are VARYING. |
97 | #define BT_RESOLVE 2 |
98 | |
99 | class back_threader |
100 | { |
101 | public: |
102 | back_threader (function *fun, unsigned flags, bool first); |
103 | ~back_threader (); |
104 | unsigned thread_blocks (); |
105 | private: |
106 | void maybe_thread_block (basic_block bb); |
107 | bool debug_counter (); |
108 | edge maybe_register_path (back_threader_profitability &); |
109 | void maybe_register_path_dump (edge taken_edge); |
110 | void find_paths_to_names (basic_block bb, bitmap imports, unsigned, |
111 | back_threader_profitability &); |
112 | edge find_taken_edge (const vec<basic_block> &path); |
113 | edge find_taken_edge_cond (const vec<basic_block> &path, gcond *); |
114 | edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *); |
115 | virtual void debug (); |
116 | virtual void dump (FILE *out); |
117 | |
118 | back_threader_registry m_registry; |
119 | |
120 | // Current path being analyzed. |
121 | auto_vec<basic_block> m_path; |
122 | // Hash to mark visited BBs while analyzing a path. |
123 | hash_set<basic_block> m_visited_bbs; |
124 | // The set of SSA names, any of which could potentially change the |
125 | // value of the final conditional in a path. |
126 | auto_bitmap m_imports; |
127 | // The last statement in the path. |
128 | gimple *m_last_stmt; |
129 | // Marker to differentiate unreachable edges. |
130 | static const edge UNREACHABLE_EDGE; |
131 | // Set to TRUE if unknown SSA names along a path should be resolved |
132 | // with the ranger. Otherwise, unknown SSA names are assumed to be |
133 | // VARYING. Setting to true is more precise but slower. |
134 | function *m_fun; |
135 | // Ranger for the path solver. |
136 | gimple_ranger *m_ranger; |
137 | unsigned m_flags; |
138 | // Set to TRUE for the first of each thread[12] pass or the first of |
139 | // each threadfull[12] pass. This is used to differentiate between |
140 | // the different threading passes so we can set up debug counters. |
141 | bool m_first; |
142 | }; |
143 | |
144 | // Used to differentiate unreachable edges, so we may stop the search |
145 | // in a the given direction. |
146 | const edge back_threader::UNREACHABLE_EDGE = (edge) -1; |
147 | |
148 | back_threader::back_threader (function *fun, unsigned flags, bool first) |
149 | : m_first (first) |
150 | { |
151 | if (flags & BT_SPEED) |
152 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES); |
153 | else |
154 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
155 | |
156 | m_fun = fun; |
157 | m_flags = flags; |
158 | m_last_stmt = NULL; |
159 | |
160 | // The path solver needs EDGE_DFS_BACK in resolving mode. |
161 | if (flags & BT_RESOLVE) |
162 | mark_dfs_back_edges (); |
163 | |
164 | m_ranger = new gimple_ranger; |
165 | } |
166 | |
167 | back_threader::~back_threader () |
168 | { |
169 | delete m_ranger; |
170 | loop_optimizer_finalize (); |
171 | } |
172 | |
173 | // A wrapper for the various debug counters for the threading passes. |
174 | // Returns TRUE if it's OK to register the current threading |
175 | // candidate. |
176 | |
177 | bool |
178 | back_threader::debug_counter () |
179 | { |
180 | // The ethread pass is mostly harmless ;-). |
181 | if ((m_flags & BT_SPEED) == 0) |
182 | return true; |
183 | |
184 | if (m_flags & BT_RESOLVE) |
185 | { |
186 | if (m_first && !dbg_cnt (index: back_threadfull1)) |
187 | return false; |
188 | |
189 | if (!m_first && !dbg_cnt (index: back_threadfull2)) |
190 | return false; |
191 | } |
192 | else |
193 | { |
194 | if (m_first && !dbg_cnt (index: back_thread1)) |
195 | return false; |
196 | |
197 | if (!m_first && !dbg_cnt (index: back_thread2)) |
198 | return false; |
199 | } |
200 | return true; |
201 | } |
202 | |
203 | static void |
204 | dump_path (FILE *dump_file, const vec<basic_block> &path) |
205 | { |
206 | for (unsigned i = path.length (); i > 0; --i) |
207 | { |
208 | basic_block bb = path[i - 1]; |
209 | fprintf (stream: dump_file, format: "%d" , bb->index); |
210 | if (i > 1) |
211 | fprintf (stream: dump_file, format: "->" ); |
212 | } |
213 | } |
214 | |
215 | // Dump details of an attempt to register a path. |
216 | |
217 | void |
218 | back_threader::maybe_register_path_dump (edge taken) |
219 | { |
220 | if (m_path.is_empty ()) |
221 | return; |
222 | |
223 | fprintf (stream: dump_file, format: "path: " ); |
224 | dump_path (dump_file, path: m_path); |
225 | fprintf (stream: dump_file, format: "->" ); |
226 | |
227 | if (taken == UNREACHABLE_EDGE) |
228 | fprintf (stream: dump_file, format: "xx REJECTED (unreachable)\n" ); |
229 | else if (taken) |
230 | fprintf (stream: dump_file, format: "%d SUCCESS\n" , taken->dest->index); |
231 | else |
232 | fprintf (stream: dump_file, format: "xx REJECTED\n" ); |
233 | } |
234 | |
235 | // If an outgoing edge can be determined out of the current path, |
236 | // register it for jump threading and return the taken edge. |
237 | // |
238 | // Return NULL if it is unprofitable to thread this path, or the |
239 | // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is |
240 | // unreachable. |
241 | |
242 | edge |
243 | back_threader::maybe_register_path (back_threader_profitability &profit) |
244 | { |
245 | edge taken_edge = find_taken_edge (path: m_path); |
246 | |
247 | if (taken_edge && taken_edge != UNREACHABLE_EDGE) |
248 | { |
249 | bool irreducible = false; |
250 | if (profit.profitable_path_p (m_path, taken: taken_edge, irreducible_loop: &irreducible) |
251 | && debug_counter () |
252 | && m_registry.register_path (m_path, taken: taken_edge)) |
253 | { |
254 | if (irreducible) |
255 | vect_free_loop_info_assumptions (m_path[0]->loop_father); |
256 | } |
257 | else |
258 | taken_edge = NULL; |
259 | } |
260 | |
261 | if (dump_file && (dump_flags & TDF_DETAILS)) |
262 | maybe_register_path_dump (taken: taken_edge); |
263 | |
264 | return taken_edge; |
265 | } |
266 | |
267 | // Return the known taken edge out of a path. If the path can be |
268 | // determined to be unreachable, return UNREACHABLE_EDGE. If no |
269 | // outgoing edge can be calculated, return NULL. |
270 | |
271 | edge |
272 | back_threader::find_taken_edge (const vec<basic_block> &path) |
273 | { |
274 | gcc_checking_assert (path.length () > 1); |
275 | switch (gimple_code (g: m_last_stmt)) |
276 | { |
277 | case GIMPLE_COND: |
278 | return find_taken_edge_cond (path, as_a<gcond *> (p: m_last_stmt)); |
279 | |
280 | case GIMPLE_SWITCH: |
281 | return find_taken_edge_switch (path, as_a<gswitch *> (p: m_last_stmt)); |
282 | |
283 | default: |
284 | return NULL; |
285 | } |
286 | } |
287 | |
288 | // Same as find_taken_edge, but for paths ending in a switch. |
289 | |
290 | edge |
291 | back_threader::find_taken_edge_switch (const vec<basic_block> &path, |
292 | gswitch *sw) |
293 | { |
294 | tree name = gimple_switch_index (gs: sw); |
295 | int_range_max r; |
296 | |
297 | path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE); |
298 | solver.range_of_expr (r, name, sw); |
299 | |
300 | if (r.undefined_p ()) |
301 | return UNREACHABLE_EDGE; |
302 | |
303 | if (r.varying_p ()) |
304 | return NULL; |
305 | |
306 | tree label = find_case_label_range (sw, vr: &r); |
307 | if (!label) |
308 | return NULL; |
309 | |
310 | return find_edge (gimple_bb (g: sw), label_to_block (cfun, CASE_LABEL (label))); |
311 | } |
312 | |
313 | // Same as find_taken_edge, but for paths ending in a GIMPLE_COND. |
314 | |
315 | edge |
316 | back_threader::find_taken_edge_cond (const vec<basic_block> &path, |
317 | gcond *cond) |
318 | { |
319 | int_range_max r; |
320 | |
321 | path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE); |
322 | solver.range_of_stmt (r, cond); |
323 | |
324 | if (solver.unreachable_path_p ()) |
325 | return UNREACHABLE_EDGE; |
326 | |
327 | int_range<2> true_range = range_true (); |
328 | int_range<2> false_range = range_false (); |
329 | |
330 | if (r == true_range || r == false_range) |
331 | { |
332 | edge e_true, e_false; |
333 | basic_block bb = gimple_bb (g: cond); |
334 | extract_true_false_edges_from_block (bb, &e_true, &e_false); |
335 | return r == true_range ? e_true : e_false; |
336 | } |
337 | return NULL; |
338 | } |
339 | |
340 | // Find jump threading paths to any of the SSA names in the |
341 | // INTERESTING bitmap, and register any such paths. |
342 | // |
343 | // BB is the current path being processed. |
344 | // |
345 | // OVERALL_PATHS is the search space up to this block |
346 | |
347 | void |
348 | back_threader::find_paths_to_names (basic_block bb, bitmap interesting, |
349 | unsigned overall_paths, |
350 | back_threader_profitability &profit) |
351 | { |
352 | if (m_visited_bbs.add (k: bb)) |
353 | return; |
354 | |
355 | m_path.safe_push (obj: bb); |
356 | |
357 | // Try to resolve the path without looking back. Avoid resolving paths |
358 | // we know are large but are not (yet) recognized as Finite State Machine. |
359 | // ??? Ideally we'd explore the cheapest path to the loop backedge here, |
360 | // avoiding the exponential greedy search and only start that from there. |
361 | // Precomputing a path-size-to-immediate-dominator-of-successor for each |
362 | // edge might help here. Alternatively copying divergent control flow |
363 | // on the way to the backedge could be worthwhile. |
364 | bool large_non_fsm; |
365 | if (m_path.length () > 1 |
366 | && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm) |
367 | || (!large_non_fsm |
368 | && maybe_register_path (profit)))) |
369 | ; |
370 | |
371 | // The backwards thread copier cannot copy blocks that do not belong |
372 | // to the same loop, so when the new source of the path entry no |
373 | // longer belongs to it we don't need to search further. |
374 | else if (m_path[0]->loop_father != bb->loop_father) |
375 | ; |
376 | |
377 | // Continue looking for ways to extend the path but limit the |
378 | // search space along a branch |
379 | else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds)) |
380 | <= (unsigned)param_max_jump_thread_paths) |
381 | { |
382 | // For further greedy searching we want to remove interesting |
383 | // names defined in BB but add ones on the PHI edges for the |
384 | // respective edges and adding imports from those stmts. |
385 | // We do this by starting with all names |
386 | // not defined in BB as interesting, collecting a list of |
387 | // interesting PHIs in BB on the fly. Then we iterate over |
388 | // predecessor edges, adding interesting PHI edge defs to |
389 | // the set of interesting names to consider when processing it. |
390 | auto_bitmap new_interesting; |
391 | auto_vec<int, 16> new_imports; |
392 | auto_vec<gphi *, 4> interesting_phis; |
393 | bitmap_iterator bi; |
394 | unsigned i; |
395 | auto_vec<tree, 16> worklist; |
396 | EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi) |
397 | { |
398 | tree name = ssa_name (i); |
399 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); |
400 | /* Imports remain interesting. */ |
401 | if (gimple_bb (g: def_stmt) != bb) |
402 | { |
403 | bitmap_set_bit (new_interesting, i); |
404 | continue; |
405 | } |
406 | worklist.quick_push (obj: name); |
407 | while (!worklist.is_empty ()) |
408 | { |
409 | tree name = worklist.pop (); |
410 | gimple *def_stmt = SSA_NAME_DEF_STMT (name); |
411 | /* Newly discovered imports are interesting. */ |
412 | if (gimple_bb (g: def_stmt) != bb) |
413 | { |
414 | bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name)); |
415 | continue; |
416 | } |
417 | /* Local PHIs participate in renaming below. */ |
418 | if (gphi *phi = dyn_cast<gphi *> (p: def_stmt)) |
419 | { |
420 | tree res = gimple_phi_result (gs: phi); |
421 | if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res)) |
422 | interesting_phis.safe_push (obj: phi); |
423 | } |
424 | /* For other local defs process their uses, amending |
425 | imports on the way. */ |
426 | else |
427 | { |
428 | tree ssa[3]; |
429 | unsigned lim = gimple_range_ssa_names (vec: ssa, vec_size: 3, stmt: def_stmt); |
430 | for (unsigned j = 0; j < lim; ++j) |
431 | { |
432 | tree rhs = ssa[j]; |
433 | if (rhs |
434 | && bitmap_set_bit (m_imports, |
435 | SSA_NAME_VERSION (rhs))) |
436 | { |
437 | new_imports.safe_push (SSA_NAME_VERSION (rhs)); |
438 | worklist.safe_push (obj: rhs); |
439 | } |
440 | } |
441 | } |
442 | } |
443 | } |
444 | if (!bitmap_empty_p (map: new_interesting) |
445 | || !interesting_phis.is_empty ()) |
446 | { |
447 | auto_vec<int, 4> unwind (interesting_phis.length ()); |
448 | auto_vec<int, 4> imports_unwind (interesting_phis.length ()); |
449 | edge_iterator iter; |
450 | edge e; |
451 | FOR_EACH_EDGE (e, iter, bb->preds) |
452 | { |
453 | if (e->flags & EDGE_ABNORMAL |
454 | // This is like path_crosses_loops in profitable_path_p but |
455 | // more restrictive to avoid peeling off loop iterations (see |
456 | // tree-ssa/pr14341.c for an example). |
457 | // ??? Note this restriction only applied when visiting an |
458 | // interesting PHI with the former resolve_phi. |
459 | || (!interesting_phis.is_empty () |
460 | && m_path[0]->loop_father != e->src->loop_father)) |
461 | continue; |
462 | for (gphi *phi : interesting_phis) |
463 | { |
464 | tree def = PHI_ARG_DEF_FROM_EDGE (phi, e); |
465 | if (TREE_CODE (def) == SSA_NAME) |
466 | { |
467 | int ver = SSA_NAME_VERSION (def); |
468 | if (bitmap_set_bit (new_interesting, ver)) |
469 | { |
470 | if (bitmap_set_bit (m_imports, ver)) |
471 | imports_unwind.quick_push (obj: ver); |
472 | unwind.quick_push (obj: ver); |
473 | } |
474 | } |
475 | } |
476 | find_paths_to_names (bb: e->src, interesting: new_interesting, overall_paths, |
477 | profit); |
478 | // Restore new_interesting. |
479 | for (int def : unwind) |
480 | bitmap_clear_bit (new_interesting, def); |
481 | unwind.truncate (size: 0); |
482 | // Restore and m_imports. |
483 | for (int def : imports_unwind) |
484 | bitmap_clear_bit (m_imports, def); |
485 | imports_unwind.truncate (size: 0); |
486 | } |
487 | } |
488 | /* m_imports tracks all interesting names on the path, so when |
489 | backtracking we have to restore it. */ |
490 | for (int j : new_imports) |
491 | bitmap_clear_bit (m_imports, j); |
492 | } |
493 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
494 | fprintf (stream: dump_file, format: " FAIL: Search space limit %d reached.\n" , |
495 | param_max_jump_thread_paths); |
496 | |
497 | // Reset things to their original state. |
498 | m_path.pop (); |
499 | m_visited_bbs.remove (k: bb); |
500 | } |
501 | |
502 | // Search backwards from BB looking for paths where the final |
503 | // conditional maybe threaded to a successor block. Record such paths |
504 | // for jump threading. |
505 | |
506 | void |
507 | back_threader::maybe_thread_block (basic_block bb) |
508 | { |
509 | if (EDGE_COUNT (bb->succs) <= 1) |
510 | return; |
511 | |
512 | gimple *stmt = *gsi_last_bb (bb); |
513 | if (!stmt) |
514 | return; |
515 | |
516 | enum gimple_code code = gimple_code (g: stmt); |
517 | if (code != GIMPLE_SWITCH |
518 | && code != GIMPLE_COND) |
519 | return; |
520 | |
521 | m_last_stmt = stmt; |
522 | m_visited_bbs.empty (); |
523 | m_path.truncate (size: 0); |
524 | |
525 | // We compute imports of the path during discovery starting |
526 | // just with names used in the conditional. |
527 | bitmap_clear (m_imports); |
528 | ssa_op_iter iter; |
529 | tree name; |
530 | FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE) |
531 | { |
532 | if (!gimple_range_ssa_p (exp: name)) |
533 | return; |
534 | bitmap_set_bit (m_imports, SSA_NAME_VERSION (name)); |
535 | } |
536 | |
537 | // Interesting is the set of imports we still not have see |
538 | // the definition of. So while imports only grow, the |
539 | // set of interesting defs dwindles and once empty we can |
540 | // stop searching. |
541 | auto_bitmap interesting; |
542 | bitmap_copy (interesting, m_imports); |
543 | back_threader_profitability profit (m_flags & BT_SPEED, stmt); |
544 | find_paths_to_names (bb, interesting, overall_paths: 1, profit); |
545 | } |
546 | |
547 | DEBUG_FUNCTION void |
548 | debug (const vec <basic_block> &path) |
549 | { |
550 | dump_path (stderr, path); |
551 | fputc (c: '\n', stderr); |
552 | } |
553 | |
554 | void |
555 | back_threader::dump (FILE *out) |
556 | { |
557 | fprintf (stream: out, format: "\nCandidates for pre-computation:\n" ); |
558 | fprintf (stream: out, format: "===================================\n" ); |
559 | |
560 | bitmap_iterator bi; |
561 | unsigned i; |
562 | |
563 | EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi) |
564 | { |
565 | tree name = ssa_name (i); |
566 | print_generic_expr (out, name, TDF_NONE); |
567 | fprintf (stream: out, format: "\n" ); |
568 | } |
569 | } |
570 | |
571 | void |
572 | back_threader::debug () |
573 | { |
574 | dump (stderr); |
575 | } |
576 | |
577 | /* Examine jump threading path PATH and return TRUE if it is possibly |
578 | profitable to thread it, otherwise return FALSE. If this function |
579 | returns TRUE profitable_path_p might not be satisfied but when |
580 | the path is extended it might be. In particular indicate in |
581 | *LARGE_NON_FSM whether the thread is too large for a non-FSM thread |
582 | but would be OK if we extend the path to cover the loop backedge. |
583 | |
584 | ?? It seems we should be able to loosen some of the restrictions in |
585 | this function after loop optimizations have run. */ |
586 | |
587 | bool |
588 | back_threader_profitability::possibly_profitable_path_p |
589 | (const vec<basic_block> &m_path, |
590 | bool *large_non_fsm) |
591 | { |
592 | gcc_checking_assert (!m_path.is_empty ()); |
593 | |
594 | /* We can an empty path here (excluding the DEF block) when the |
595 | statement that makes a conditional generate a compile-time |
596 | constant result is in the same block as the conditional. |
597 | |
598 | That's not really a jump threading opportunity, but instead is |
599 | simple cprop & simplification. We could handle it here if we |
600 | wanted by wiring up all the incoming edges. If we run this |
601 | early in IPA, that might be worth doing. For now we just |
602 | reject that case. */ |
603 | if (m_path.length () <= 1) |
604 | return false; |
605 | |
606 | gimple_stmt_iterator gsi; |
607 | loop_p loop = m_path[0]->loop_father; |
608 | |
609 | // We recompute the following, when we rewrite possibly_profitable_path_p |
610 | // to work incrementally on added BBs we have to unwind them on backtracking |
611 | m_n_insns = 0; |
612 | m_threaded_through_latch = false; |
613 | m_multiway_branch_in_path = false; |
614 | m_contains_hot_bb = false; |
615 | |
616 | if (dump_file && (dump_flags & TDF_DETAILS)) |
617 | fprintf (stream: dump_file, format: "Checking profitability of path (backwards): " ); |
618 | |
619 | /* Count the number of instructions on the path: as these instructions |
620 | will have to be duplicated, we will not record the path if there |
621 | are too many instructions on the path. Also check that all the |
622 | blocks in the path belong to a single loop. */ |
623 | for (unsigned j = 0; j < m_path.length (); j++) |
624 | { |
625 | basic_block bb = m_path[j]; |
626 | |
627 | if (dump_file && (dump_flags & TDF_DETAILS)) |
628 | fprintf (stream: dump_file, format: " bb:%i" , bb->index); |
629 | /* Remember, blocks in the path are stored in opposite order in |
630 | the PATH array. The last entry in the array represents the |
631 | block with an outgoing edge that we will redirect to the jump |
632 | threading path. Thus we don't care how many statements are |
633 | in that block because it will not be copied or whether or not |
634 | it ends in a multiway branch. */ |
635 | if (j < m_path.length () - 1) |
636 | { |
637 | int orig_n_insns = m_n_insns; |
638 | if (!m_contains_hot_bb && m_speed_p) |
639 | m_contains_hot_bb |= optimize_bb_for_speed_p (bb); |
640 | for (gsi = gsi_after_labels (bb); |
641 | !gsi_end_p (i: gsi); |
642 | gsi_next_nondebug (i: &gsi)) |
643 | { |
644 | /* Do not allow OpenACC loop markers and __builtin_constant_p on |
645 | threading paths. The latter is disallowed, because an |
646 | expression might be constant on two threading paths, and |
647 | become non-constant (i.e.: phi) when they merge. */ |
648 | gimple *stmt = gsi_stmt (i: gsi); |
649 | if (gimple_call_internal_p (gs: stmt, fn: IFN_UNIQUE) |
650 | || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)) |
651 | { |
652 | if (dump_file && (dump_flags & TDF_DETAILS)) |
653 | fputc (c: '\n', stream: dump_file); |
654 | return false; |
655 | } |
656 | /* Do not count empty statements and labels. */ |
657 | if (gimple_code (g: stmt) != GIMPLE_NOP |
658 | && !is_gimple_debug (gs: stmt)) |
659 | m_n_insns += estimate_num_insns (stmt, &eni_size_weights); |
660 | } |
661 | if (dump_file && (dump_flags & TDF_DETAILS)) |
662 | fprintf (stream: dump_file, format: " (%i insns)" , m_n_insns-orig_n_insns); |
663 | |
664 | /* We do not look at the block with the threaded branch |
665 | in this loop. So if any block with a last statement that |
666 | is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a |
667 | multiway branch on our path. |
668 | |
669 | The block in PATH[0] is special, it's the block were we're |
670 | going to be able to eliminate its branch. */ |
671 | if (j > 0) |
672 | { |
673 | gimple *last = *gsi_last_bb (bb); |
674 | if (last |
675 | && (gimple_code (g: last) == GIMPLE_SWITCH |
676 | || gimple_code (g: last) == GIMPLE_GOTO)) |
677 | m_multiway_branch_in_path = true; |
678 | } |
679 | } |
680 | |
681 | /* Note if we thread through the latch, we will want to include |
682 | the last entry in the array when determining if we thread |
683 | through the loop latch. */ |
684 | if (loop->latch == bb) |
685 | { |
686 | m_threaded_through_latch = true; |
687 | if (dump_file && (dump_flags & TDF_DETAILS)) |
688 | fprintf (stream: dump_file, format: " (latch)" ); |
689 | } |
690 | } |
691 | |
692 | /* We are going to remove the control statement at the end of the |
693 | last block in the threading path. So don't count it against our |
694 | statement count. */ |
695 | m_n_insns -= m_exit_jump_benefit; |
696 | |
697 | if (dump_file && (dump_flags & TDF_DETAILS)) |
698 | fprintf (stream: dump_file, format: "\n Control statement insns: %i\n" |
699 | " Overall: %i insns\n" , |
700 | m_exit_jump_benefit, m_n_insns); |
701 | |
702 | /* Threading is profitable if the path duplicated is hot but also |
703 | in a case we separate cold path from hot path and permit optimization |
704 | of the hot path later. Be on the agressive side here. In some testcases, |
705 | as in PR 78407 this leads to noticeable improvements. */ |
706 | if (m_speed_p) |
707 | { |
708 | if (m_n_insns >= param_max_fsm_thread_path_insns) |
709 | { |
710 | if (dump_file && (dump_flags & TDF_DETAILS)) |
711 | fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: " |
712 | "the number of instructions on the path " |
713 | "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n" ); |
714 | return false; |
715 | } |
716 | edge entry = find_edge (m_path[m_path.length () - 1], |
717 | m_path[m_path.length () - 2]); |
718 | if (probably_never_executed_edge_p (cfun, entry)) |
719 | { |
720 | if (dump_file && (dump_flags & TDF_DETAILS)) |
721 | fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: " |
722 | "path entry is probably never executed.\n" ); |
723 | return false; |
724 | } |
725 | } |
726 | else if (m_n_insns > 1) |
727 | { |
728 | if (dump_file && (dump_flags & TDF_DETAILS)) |
729 | fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: " |
730 | "duplication of %i insns is needed and optimizing for size.\n" , |
731 | m_n_insns); |
732 | return false; |
733 | } |
734 | |
735 | /* The generic copier used by the backthreader does not re-use an |
736 | existing threading path to reduce code duplication. So for that |
737 | case, drastically reduce the number of statements we are allowed |
738 | to copy. We don't know yet whether we will thread through the latch |
739 | so we have to be permissive and continue threading, but indicate |
740 | to the caller the thread, if final, wouldn't be profitable. */ |
741 | if ((!m_threaded_multiway_branch |
742 | || !loop->latch |
743 | || loop->latch->index == EXIT_BLOCK) |
744 | && (m_n_insns * param_fsm_scale_path_stmts |
745 | >= param_max_jump_thread_duplication_stmts)) |
746 | { |
747 | if (dump_file && (dump_flags & TDF_DETAILS)) |
748 | fprintf (stream: dump_file, |
749 | format: " FAIL: Did not thread around loop and would copy too " |
750 | "many statements.\n" ); |
751 | return false; |
752 | } |
753 | *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch) |
754 | && (m_n_insns * param_fsm_scale_path_stmts |
755 | >= param_max_jump_thread_duplication_stmts)); |
756 | |
757 | if (dump_file && (dump_flags & TDF_DETAILS)) |
758 | fputc (c: '\n', stream: dump_file); |
759 | return true; |
760 | } |
761 | |
762 | /* Examine jump threading path PATH and return TRUE if it is profitable to |
763 | thread it, otherwise return FALSE. |
764 | |
765 | The taken edge out of the path is TAKEN_EDGE. |
766 | |
767 | CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path |
768 | would create an irreducible loop. |
769 | |
770 | ?? It seems we should be able to loosen some of the restrictions in |
771 | this function after loop optimizations have run. */ |
772 | |
773 | bool |
774 | back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path, |
775 | edge taken_edge, |
776 | bool *creates_irreducible_loop) |
777 | { |
778 | // We can assume that possibly_profitable_path_p holds here |
779 | |
780 | loop_p loop = m_path[0]->loop_father; |
781 | |
782 | if (dump_file && (dump_flags & TDF_DETAILS)) |
783 | fprintf (stream: dump_file, format: "Checking profitability of path (backwards): " ); |
784 | |
785 | /* If this path threaded through the loop latch back into the |
786 | same loop and the destination does not dominate the loop |
787 | latch, then this thread would create an irreducible loop. */ |
788 | *creates_irreducible_loop = false; |
789 | if (m_threaded_through_latch |
790 | && loop == taken_edge->dest->loop_father |
791 | && (determine_bb_domination_status (loop, taken_edge->dest) |
792 | == DOMST_NONDOMINATING)) |
793 | *creates_irreducible_loop = true; |
794 | |
795 | /* Threading is profitable if the path duplicated is hot but also |
796 | in a case we separate cold path from hot path and permit optimization |
797 | of the hot path later. Be on the agressive side here. In some testcases, |
798 | as in PR 78407 this leads to noticeable improvements. */ |
799 | if (m_speed_p |
800 | && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb)) |
801 | { |
802 | if (probably_never_executed_edge_p (cfun, taken_edge)) |
803 | { |
804 | if (dump_file && (dump_flags & TDF_DETAILS)) |
805 | fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: " |
806 | "path leads to probably never executed edge.\n" ); |
807 | return false; |
808 | } |
809 | } |
810 | else if (m_n_insns > 1) |
811 | { |
812 | if (dump_file && (dump_flags & TDF_DETAILS)) |
813 | fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: " |
814 | "duplication of %i insns is needed and optimizing for size.\n" , |
815 | m_n_insns); |
816 | return false; |
817 | } |
818 | |
819 | /* We avoid creating irreducible inner loops unless we thread through |
820 | a multiway branch, in which case we have deemed it worth losing |
821 | other loop optimizations later. |
822 | |
823 | We also consider it worth creating an irreducible inner loop after |
824 | loop optimizations if the number of copied statement is low. */ |
825 | if (!m_threaded_multiway_branch |
826 | && *creates_irreducible_loop |
827 | && (!(cfun->curr_properties & PROP_loop_opts_done) |
828 | || (m_n_insns * param_fsm_scale_path_stmts |
829 | >= param_max_jump_thread_duplication_stmts))) |
830 | { |
831 | if (dump_file && (dump_flags & TDF_DETAILS)) |
832 | fprintf (stream: dump_file, |
833 | format: " FAIL: Would create irreducible loop early without " |
834 | "threading multiway branch.\n" ); |
835 | /* We compute creates_irreducible_loop only late. */ |
836 | return false; |
837 | } |
838 | |
839 | /* The generic copier used by the backthreader does not re-use an |
840 | existing threading path to reduce code duplication. So for that |
841 | case, drastically reduce the number of statements we are allowed |
842 | to copy. */ |
843 | if (!(m_threaded_through_latch && m_threaded_multiway_branch) |
844 | && (m_n_insns * param_fsm_scale_path_stmts |
845 | >= param_max_jump_thread_duplication_stmts)) |
846 | { |
847 | if (dump_file && (dump_flags & TDF_DETAILS)) |
848 | fprintf (stream: dump_file, |
849 | format: " FAIL: Did not thread around loop and would copy too " |
850 | "many statements.\n" ); |
851 | return false; |
852 | } |
853 | |
854 | /* When there is a multi-way branch on the path, then threading can |
855 | explode the CFG due to duplicating the edges for that multi-way |
856 | branch. So like above, only allow a multi-way branch on the path |
857 | if we actually thread a multi-way branch. */ |
858 | if (!m_threaded_multiway_branch && m_multiway_branch_in_path) |
859 | { |
860 | if (dump_file && (dump_flags & TDF_DETAILS)) |
861 | fprintf (stream: dump_file, |
862 | format: " FAIL: Thread through multiway branch without threading " |
863 | "a multiway branch.\n" ); |
864 | return false; |
865 | } |
866 | |
867 | /* Threading through an empty latch would cause code to be added to |
868 | the latch. This could alter the loop form sufficiently to cause |
869 | loop optimizations to fail. Disable these threads until after |
870 | loop optimizations have run. */ |
871 | if ((m_threaded_through_latch || taken_edge->dest == loop->latch) |
872 | && !(cfun->curr_properties & PROP_loop_opts_done) |
873 | && empty_block_p (loop->latch)) |
874 | { |
875 | if (dump_file && (dump_flags & TDF_DETAILS)) |
876 | fprintf (stream: dump_file, |
877 | format: " FAIL: Thread through latch before loop opts would create " |
878 | "non-empty latch\n" ); |
879 | return false; |
880 | } |
881 | if (dump_file && (dump_flags & TDF_DETAILS)) |
882 | fputc (c: '\n', stream: dump_file); |
883 | return true; |
884 | } |
885 | |
886 | |
887 | /* The current path PATH is a vector of blocks forming a jump threading |
888 | path in reverse order. TAKEN_EDGE is the edge taken from path[0]. |
889 | |
890 | Convert the current path into the form used by register_jump_thread and |
891 | register it. |
892 | |
893 | Return TRUE if successful or FALSE otherwise. */ |
894 | |
895 | bool |
896 | back_threader_registry::register_path (const vec<basic_block> &m_path, |
897 | edge taken_edge) |
898 | { |
899 | vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path (); |
900 | |
901 | // The generic copier ignores the edge type. We can build the |
902 | // thread edges with any type. |
903 | for (unsigned int j = 0; j + 1 < m_path.length (); j++) |
904 | { |
905 | basic_block bb1 = m_path[m_path.length () - j - 1]; |
906 | basic_block bb2 = m_path[m_path.length () - j - 2]; |
907 | |
908 | edge e = find_edge (bb1, bb2); |
909 | gcc_assert (e); |
910 | push_edge (path: jump_thread_path, e, EDGE_COPY_SRC_BLOCK); |
911 | } |
912 | |
913 | push_edge (path: jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK); |
914 | return register_jump_thread (jump_thread_path); |
915 | } |
916 | |
917 | // Thread all suitable paths in the current function. |
918 | // |
919 | // Return TODO_flags. |
920 | |
921 | unsigned int |
922 | back_threader::thread_blocks () |
923 | { |
924 | basic_block bb; |
925 | FOR_EACH_BB_FN (bb, m_fun) |
926 | if (EDGE_COUNT (bb->succs) > 1) |
927 | maybe_thread_block (bb); |
928 | |
929 | bool changed = m_registry.thread_through_all_blocks (peel_loop_headers: true); |
930 | |
931 | if (m_flags & BT_SPEED) |
932 | return changed ? TODO_cleanup_cfg : 0; |
933 | |
934 | return false; |
935 | } |
936 | |
937 | namespace { |
938 | |
939 | const pass_data pass_data_early_thread_jumps = |
940 | { |
941 | .type: GIMPLE_PASS, |
942 | .name: "ethread" , |
943 | .optinfo_flags: OPTGROUP_NONE, |
944 | .tv_id: TV_TREE_SSA_THREAD_JUMPS, |
945 | .properties_required: ( PROP_cfg | PROP_ssa ), |
946 | .properties_provided: 0, |
947 | .properties_destroyed: 0, |
948 | .todo_flags_start: 0, |
949 | .todo_flags_finish: ( TODO_cleanup_cfg | TODO_update_ssa ), |
950 | }; |
951 | |
952 | const pass_data pass_data_thread_jumps = |
953 | { |
954 | .type: GIMPLE_PASS, |
955 | .name: "thread" , |
956 | .optinfo_flags: OPTGROUP_NONE, |
957 | .tv_id: TV_TREE_SSA_THREAD_JUMPS, |
958 | .properties_required: ( PROP_cfg | PROP_ssa ), |
959 | .properties_provided: 0, |
960 | .properties_destroyed: 0, |
961 | .todo_flags_start: 0, |
962 | TODO_update_ssa, |
963 | }; |
964 | |
965 | const pass_data pass_data_thread_jumps_full = |
966 | { |
967 | .type: GIMPLE_PASS, |
968 | .name: "threadfull" , |
969 | .optinfo_flags: OPTGROUP_NONE, |
970 | .tv_id: TV_TREE_SSA_THREAD_JUMPS, |
971 | .properties_required: ( PROP_cfg | PROP_ssa ), |
972 | .properties_provided: 0, |
973 | .properties_destroyed: 0, |
974 | .todo_flags_start: 0, |
975 | TODO_update_ssa, |
976 | }; |
977 | |
978 | // Early jump threading pass optimizing for size. |
979 | class pass_early_thread_jumps : public gimple_opt_pass |
980 | { |
981 | public: |
982 | pass_early_thread_jumps (gcc::context *ctxt) |
983 | : gimple_opt_pass (pass_data_early_thread_jumps, ctxt) |
984 | {} |
985 | |
986 | opt_pass * clone () override |
987 | { |
988 | return new pass_early_thread_jumps (m_ctxt); |
989 | } |
990 | void set_pass_param (unsigned int, bool param) override |
991 | { |
992 | m_first = param; |
993 | } |
994 | bool gate (function *) override |
995 | { |
996 | return flag_thread_jumps; |
997 | } |
998 | unsigned int execute (function *fun) override |
999 | { |
1000 | back_threader threader (fun, BT_NONE, m_first); |
1001 | return threader.thread_blocks (); |
1002 | } |
1003 | private: |
1004 | bool m_first; |
1005 | }; |
1006 | |
1007 | // Jump threading pass without resolving of unknown SSAs. |
1008 | class pass_thread_jumps : public gimple_opt_pass |
1009 | { |
1010 | public: |
1011 | pass_thread_jumps (gcc::context *ctxt) |
1012 | : gimple_opt_pass (pass_data_thread_jumps, ctxt) |
1013 | {} |
1014 | opt_pass * clone (void) override |
1015 | { |
1016 | return new pass_thread_jumps (m_ctxt); |
1017 | } |
1018 | void set_pass_param (unsigned int, bool param) override |
1019 | { |
1020 | m_first = param; |
1021 | } |
1022 | bool gate (function *) override |
1023 | { |
1024 | return flag_thread_jumps && flag_expensive_optimizations; |
1025 | } |
1026 | unsigned int execute (function *fun) override |
1027 | { |
1028 | back_threader threader (fun, BT_SPEED, m_first); |
1029 | return threader.thread_blocks (); |
1030 | } |
1031 | private: |
1032 | bool m_first; |
1033 | }; |
1034 | |
1035 | // Jump threading pass that fully resolves unknown SSAs. |
1036 | class pass_thread_jumps_full : public gimple_opt_pass |
1037 | { |
1038 | public: |
1039 | pass_thread_jumps_full (gcc::context *ctxt) |
1040 | : gimple_opt_pass (pass_data_thread_jumps_full, ctxt) |
1041 | {} |
1042 | opt_pass * clone (void) override |
1043 | { |
1044 | return new pass_thread_jumps_full (m_ctxt); |
1045 | } |
1046 | void set_pass_param (unsigned int, bool param) override |
1047 | { |
1048 | m_first = param; |
1049 | } |
1050 | bool gate (function *) override |
1051 | { |
1052 | return flag_thread_jumps && flag_expensive_optimizations; |
1053 | } |
1054 | unsigned int execute (function *fun) override |
1055 | { |
1056 | back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first); |
1057 | return threader.thread_blocks (); |
1058 | } |
1059 | private: |
1060 | bool m_first; |
1061 | }; |
1062 | |
1063 | } // namespace { |
1064 | |
1065 | gimple_opt_pass * |
1066 | make_pass_thread_jumps (gcc::context *ctxt) |
1067 | { |
1068 | return new pass_thread_jumps (ctxt); |
1069 | } |
1070 | |
1071 | gimple_opt_pass * |
1072 | make_pass_thread_jumps_full (gcc::context *ctxt) |
1073 | { |
1074 | return new pass_thread_jumps_full (ctxt); |
1075 | } |
1076 | |
1077 | gimple_opt_pass * |
1078 | make_pass_early_thread_jumps (gcc::context *ctxt) |
1079 | { |
1080 | return new pass_early_thread_jumps (ctxt); |
1081 | } |
1082 | |