1/* SSA Jump Threading
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "predict.h"
25#include "tree.h"
26#include "gimple.h"
27#include "fold-const.h"
28#include "cfgloop.h"
29#include "gimple-iterator.h"
30#include "tree-cfg.h"
31#include "tree-ssa-threadupdate.h"
32#include "tree-ssa-loop.h"
33#include "cfganal.h"
34#include "tree-pass.h"
35#include "gimple-ssa.h"
36#include "tree-phinodes.h"
37#include "tree-inline.h"
38#include "tree-vectorizer.h"
39#include "value-range.h"
40#include "gimple-range.h"
41#include "tree-ssa-threadedge.h"
42#include "gimple-range-path.h"
43#include "ssa.h"
44#include "tree-cfgcleanup.h"
45#include "tree-pretty-print.h"
46#include "cfghooks.h"
47#include "dbgcnt.h"
48
49// Path registry for the backwards threader. After all paths have been
50// registered with register_path(), thread_through_all_blocks() is called
51// to modify the CFG.
52
53class back_threader_registry : public back_jt_path_registry
54{
55public:
56 bool register_path (const vec<basic_block> &, edge taken);
57};
58
59// Class to abstract the profitability code for the backwards threader.
60
61class back_threader_profitability
62{
63public:
64 back_threader_profitability (bool speed_p, gimple *stmt);
65 bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
66 bool profitable_path_p (const vec<basic_block> &,
67 edge taken, bool *irreducible_loop);
68private:
69 const bool m_speed_p;
70 int m_exit_jump_benefit;
71 bool m_threaded_multiway_branch;
72 // The following are computed by possibly_profitable_path_p
73 bool m_threaded_through_latch;
74 bool m_multiway_branch_in_path;
75 bool m_contains_hot_bb;
76 int m_n_insns;
77};
78
79back_threader_profitability::back_threader_profitability (bool speed_p,
80 gimple *last)
81 : m_speed_p (speed_p)
82{
83 m_threaded_multiway_branch = (gimple_code (g: last) == GIMPLE_SWITCH
84 || gimple_code (g: last) == GIMPLE_GOTO);
85 // The forward threader has estimate_threading_killed_stmts, in
86 // particular it estimates further DCE from eliminating the exit
87 // control stmt.
88 m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
89}
90
91// Back threader flags.
92#define BT_NONE 0
93// Generate fast code at the expense of code size.
94#define BT_SPEED 1
95// Resolve unknown SSAs on entry to a threading path. If set, use the
96// ranger. If not, assume all ranges on entry to a path are VARYING.
97#define BT_RESOLVE 2
98
99class back_threader
100{
101public:
102 back_threader (function *fun, unsigned flags, bool first);
103 ~back_threader ();
104 unsigned thread_blocks ();
105private:
106 void maybe_thread_block (basic_block bb);
107 bool debug_counter ();
108 edge maybe_register_path (back_threader_profitability &);
109 void maybe_register_path_dump (edge taken_edge);
110 void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 back_threader_profitability &);
112 edge find_taken_edge (const vec<basic_block> &path);
113 edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 virtual void debug ();
116 virtual void dump (FILE *out);
117
118 back_threader_registry m_registry;
119
120 // Current path being analyzed.
121 auto_vec<basic_block> m_path;
122 // Hash to mark visited BBs while analyzing a path.
123 hash_set<basic_block> m_visited_bbs;
124 // The set of SSA names, any of which could potentially change the
125 // value of the final conditional in a path.
126 auto_bitmap m_imports;
127 // The last statement in the path.
128 gimple *m_last_stmt;
129 // Marker to differentiate unreachable edges.
130 static const edge UNREACHABLE_EDGE;
131 // Set to TRUE if unknown SSA names along a path should be resolved
132 // with the ranger. Otherwise, unknown SSA names are assumed to be
133 // VARYING. Setting to true is more precise but slower.
134 function *m_fun;
135 // Ranger for the path solver.
136 gimple_ranger *m_ranger;
137 unsigned m_flags;
138 // Set to TRUE for the first of each thread[12] pass or the first of
139 // each threadfull[12] pass. This is used to differentiate between
140 // the different threading passes so we can set up debug counters.
141 bool m_first;
142};
143
144// Used to differentiate unreachable edges, so we may stop the search
145// in a the given direction.
146const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
147
148back_threader::back_threader (function *fun, unsigned flags, bool first)
149 : m_first (first)
150{
151 if (flags & BT_SPEED)
152 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
153 else
154 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
155
156 m_fun = fun;
157 m_flags = flags;
158 m_last_stmt = NULL;
159
160 // The path solver needs EDGE_DFS_BACK in resolving mode.
161 if (flags & BT_RESOLVE)
162 mark_dfs_back_edges ();
163
164 m_ranger = new gimple_ranger;
165}
166
167back_threader::~back_threader ()
168{
169 delete m_ranger;
170 loop_optimizer_finalize ();
171}
172
173// A wrapper for the various debug counters for the threading passes.
174// Returns TRUE if it's OK to register the current threading
175// candidate.
176
177bool
178back_threader::debug_counter ()
179{
180 // The ethread pass is mostly harmless ;-).
181 if ((m_flags & BT_SPEED) == 0)
182 return true;
183
184 if (m_flags & BT_RESOLVE)
185 {
186 if (m_first && !dbg_cnt (index: back_threadfull1))
187 return false;
188
189 if (!m_first && !dbg_cnt (index: back_threadfull2))
190 return false;
191 }
192 else
193 {
194 if (m_first && !dbg_cnt (index: back_thread1))
195 return false;
196
197 if (!m_first && !dbg_cnt (index: back_thread2))
198 return false;
199 }
200 return true;
201}
202
203static void
204dump_path (FILE *dump_file, const vec<basic_block> &path)
205{
206 for (unsigned i = path.length (); i > 0; --i)
207 {
208 basic_block bb = path[i - 1];
209 fprintf (stream: dump_file, format: "%d", bb->index);
210 if (i > 1)
211 fprintf (stream: dump_file, format: "->");
212 }
213}
214
215// Dump details of an attempt to register a path.
216
217void
218back_threader::maybe_register_path_dump (edge taken)
219{
220 if (m_path.is_empty ())
221 return;
222
223 fprintf (stream: dump_file, format: "path: ");
224 dump_path (dump_file, path: m_path);
225 fprintf (stream: dump_file, format: "->");
226
227 if (taken == UNREACHABLE_EDGE)
228 fprintf (stream: dump_file, format: "xx REJECTED (unreachable)\n");
229 else if (taken)
230 fprintf (stream: dump_file, format: "%d SUCCESS\n", taken->dest->index);
231 else
232 fprintf (stream: dump_file, format: "xx REJECTED\n");
233}
234
235// If an outgoing edge can be determined out of the current path,
236// register it for jump threading and return the taken edge.
237//
238// Return NULL if it is unprofitable to thread this path, or the
239// outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
240// unreachable.
241
242edge
243back_threader::maybe_register_path (back_threader_profitability &profit)
244{
245 edge taken_edge = find_taken_edge (path: m_path);
246
247 if (taken_edge && taken_edge != UNREACHABLE_EDGE)
248 {
249 bool irreducible = false;
250 if (profit.profitable_path_p (m_path, taken: taken_edge, irreducible_loop: &irreducible)
251 && debug_counter ()
252 && m_registry.register_path (m_path, taken: taken_edge))
253 {
254 if (irreducible)
255 vect_free_loop_info_assumptions (m_path[0]->loop_father);
256 }
257 else
258 taken_edge = NULL;
259 }
260
261 if (dump_file && (dump_flags & TDF_DETAILS))
262 maybe_register_path_dump (taken: taken_edge);
263
264 return taken_edge;
265}
266
267// Return the known taken edge out of a path. If the path can be
268// determined to be unreachable, return UNREACHABLE_EDGE. If no
269// outgoing edge can be calculated, return NULL.
270
271edge
272back_threader::find_taken_edge (const vec<basic_block> &path)
273{
274 gcc_checking_assert (path.length () > 1);
275 switch (gimple_code (g: m_last_stmt))
276 {
277 case GIMPLE_COND:
278 return find_taken_edge_cond (path, as_a<gcond *> (p: m_last_stmt));
279
280 case GIMPLE_SWITCH:
281 return find_taken_edge_switch (path, as_a<gswitch *> (p: m_last_stmt));
282
283 default:
284 return NULL;
285 }
286}
287
288// Same as find_taken_edge, but for paths ending in a switch.
289
290edge
291back_threader::find_taken_edge_switch (const vec<basic_block> &path,
292 gswitch *sw)
293{
294 tree name = gimple_switch_index (gs: sw);
295 int_range_max r;
296
297 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
298 solver.range_of_expr (r, name, sw);
299
300 if (r.undefined_p ())
301 return UNREACHABLE_EDGE;
302
303 if (r.varying_p ())
304 return NULL;
305
306 tree label = find_case_label_range (sw, vr: &r);
307 if (!label)
308 return NULL;
309
310 return find_edge (gimple_bb (g: sw), label_to_block (cfun, CASE_LABEL (label)));
311}
312
313// Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
314
315edge
316back_threader::find_taken_edge_cond (const vec<basic_block> &path,
317 gcond *cond)
318{
319 int_range_max r;
320
321 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
322 solver.range_of_stmt (r, cond);
323
324 if (solver.unreachable_path_p ())
325 return UNREACHABLE_EDGE;
326
327 int_range<2> true_range = range_true ();
328 int_range<2> false_range = range_false ();
329
330 if (r == true_range || r == false_range)
331 {
332 edge e_true, e_false;
333 basic_block bb = gimple_bb (g: cond);
334 extract_true_false_edges_from_block (bb, &e_true, &e_false);
335 return r == true_range ? e_true : e_false;
336 }
337 return NULL;
338}
339
340// Find jump threading paths to any of the SSA names in the
341// INTERESTING bitmap, and register any such paths.
342//
343// BB is the current path being processed.
344//
345// OVERALL_PATHS is the search space up to this block
346
347void
348back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
349 unsigned overall_paths,
350 back_threader_profitability &profit)
351{
352 if (m_visited_bbs.add (k: bb))
353 return;
354
355 m_path.safe_push (obj: bb);
356
357 // Try to resolve the path without looking back. Avoid resolving paths
358 // we know are large but are not (yet) recognized as Finite State Machine.
359 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
360 // avoiding the exponential greedy search and only start that from there.
361 // Precomputing a path-size-to-immediate-dominator-of-successor for each
362 // edge might help here. Alternatively copying divergent control flow
363 // on the way to the backedge could be worthwhile.
364 bool large_non_fsm;
365 if (m_path.length () > 1
366 && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
367 || (!large_non_fsm
368 && maybe_register_path (profit))))
369 ;
370
371 // The backwards thread copier cannot copy blocks that do not belong
372 // to the same loop, so when the new source of the path entry no
373 // longer belongs to it we don't need to search further.
374 else if (m_path[0]->loop_father != bb->loop_father)
375 ;
376
377 // Continue looking for ways to extend the path but limit the
378 // search space along a branch
379 else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
380 <= (unsigned)param_max_jump_thread_paths)
381 {
382 // For further greedy searching we want to remove interesting
383 // names defined in BB but add ones on the PHI edges for the
384 // respective edges and adding imports from those stmts.
385 // We do this by starting with all names
386 // not defined in BB as interesting, collecting a list of
387 // interesting PHIs in BB on the fly. Then we iterate over
388 // predecessor edges, adding interesting PHI edge defs to
389 // the set of interesting names to consider when processing it.
390 auto_bitmap new_interesting;
391 auto_vec<int, 16> new_imports;
392 auto_vec<gphi *, 4> interesting_phis;
393 bitmap_iterator bi;
394 unsigned i;
395 auto_vec<tree, 16> worklist;
396 EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
397 {
398 tree name = ssa_name (i);
399 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
400 /* Imports remain interesting. */
401 if (gimple_bb (g: def_stmt) != bb)
402 {
403 bitmap_set_bit (new_interesting, i);
404 continue;
405 }
406 worklist.quick_push (obj: name);
407 while (!worklist.is_empty ())
408 {
409 tree name = worklist.pop ();
410 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
411 /* Newly discovered imports are interesting. */
412 if (gimple_bb (g: def_stmt) != bb)
413 {
414 bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
415 continue;
416 }
417 /* Local PHIs participate in renaming below. */
418 if (gphi *phi = dyn_cast<gphi *> (p: def_stmt))
419 {
420 tree res = gimple_phi_result (gs: phi);
421 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
422 interesting_phis.safe_push (obj: phi);
423 }
424 /* For other local defs process their uses, amending
425 imports on the way. */
426 else
427 {
428 tree ssa[3];
429 unsigned lim = gimple_range_ssa_names (vec: ssa, vec_size: 3, stmt: def_stmt);
430 for (unsigned j = 0; j < lim; ++j)
431 {
432 tree rhs = ssa[j];
433 if (rhs
434 && bitmap_set_bit (m_imports,
435 SSA_NAME_VERSION (rhs)))
436 {
437 new_imports.safe_push (SSA_NAME_VERSION (rhs));
438 worklist.safe_push (obj: rhs);
439 }
440 }
441 }
442 }
443 }
444 if (!bitmap_empty_p (map: new_interesting)
445 || !interesting_phis.is_empty ())
446 {
447 auto_vec<int, 4> unwind (interesting_phis.length ());
448 auto_vec<int, 4> imports_unwind (interesting_phis.length ());
449 edge_iterator iter;
450 edge e;
451 FOR_EACH_EDGE (e, iter, bb->preds)
452 {
453 if (e->flags & EDGE_ABNORMAL
454 // This is like path_crosses_loops in profitable_path_p but
455 // more restrictive to avoid peeling off loop iterations (see
456 // tree-ssa/pr14341.c for an example).
457 // ??? Note this restriction only applied when visiting an
458 // interesting PHI with the former resolve_phi.
459 || (!interesting_phis.is_empty ()
460 && m_path[0]->loop_father != e->src->loop_father))
461 continue;
462 for (gphi *phi : interesting_phis)
463 {
464 tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
465 if (TREE_CODE (def) == SSA_NAME)
466 {
467 int ver = SSA_NAME_VERSION (def);
468 if (bitmap_set_bit (new_interesting, ver))
469 {
470 if (bitmap_set_bit (m_imports, ver))
471 imports_unwind.quick_push (obj: ver);
472 unwind.quick_push (obj: ver);
473 }
474 }
475 }
476 find_paths_to_names (bb: e->src, interesting: new_interesting, overall_paths,
477 profit);
478 // Restore new_interesting.
479 for (int def : unwind)
480 bitmap_clear_bit (new_interesting, def);
481 unwind.truncate (size: 0);
482 // Restore and m_imports.
483 for (int def : imports_unwind)
484 bitmap_clear_bit (m_imports, def);
485 imports_unwind.truncate (size: 0);
486 }
487 }
488 /* m_imports tracks all interesting names on the path, so when
489 backtracking we have to restore it. */
490 for (int j : new_imports)
491 bitmap_clear_bit (m_imports, j);
492 }
493 else if (dump_file && (dump_flags & TDF_DETAILS))
494 fprintf (stream: dump_file, format: " FAIL: Search space limit %d reached.\n",
495 param_max_jump_thread_paths);
496
497 // Reset things to their original state.
498 m_path.pop ();
499 m_visited_bbs.remove (k: bb);
500}
501
502// Search backwards from BB looking for paths where the final
503// conditional maybe threaded to a successor block. Record such paths
504// for jump threading.
505
506void
507back_threader::maybe_thread_block (basic_block bb)
508{
509 if (EDGE_COUNT (bb->succs) <= 1)
510 return;
511
512 gimple *stmt = *gsi_last_bb (bb);
513 if (!stmt)
514 return;
515
516 enum gimple_code code = gimple_code (g: stmt);
517 if (code != GIMPLE_SWITCH
518 && code != GIMPLE_COND)
519 return;
520
521 m_last_stmt = stmt;
522 m_visited_bbs.empty ();
523 m_path.truncate (size: 0);
524
525 // We compute imports of the path during discovery starting
526 // just with names used in the conditional.
527 bitmap_clear (m_imports);
528 ssa_op_iter iter;
529 tree name;
530 FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
531 {
532 if (!gimple_range_ssa_p (exp: name))
533 return;
534 bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
535 }
536
537 // Interesting is the set of imports we still not have see
538 // the definition of. So while imports only grow, the
539 // set of interesting defs dwindles and once empty we can
540 // stop searching.
541 auto_bitmap interesting;
542 bitmap_copy (interesting, m_imports);
543 back_threader_profitability profit (m_flags & BT_SPEED, stmt);
544 find_paths_to_names (bb, interesting, overall_paths: 1, profit);
545}
546
547DEBUG_FUNCTION void
548debug (const vec <basic_block> &path)
549{
550 dump_path (stderr, path);
551 fputc (c: '\n', stderr);
552}
553
554void
555back_threader::dump (FILE *out)
556{
557 fprintf (stream: out, format: "\nCandidates for pre-computation:\n");
558 fprintf (stream: out, format: "===================================\n");
559
560 bitmap_iterator bi;
561 unsigned i;
562
563 EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
564 {
565 tree name = ssa_name (i);
566 print_generic_expr (out, name, TDF_NONE);
567 fprintf (stream: out, format: "\n");
568 }
569}
570
571void
572back_threader::debug ()
573{
574 dump (stderr);
575}
576
577/* Examine jump threading path PATH and return TRUE if it is possibly
578 profitable to thread it, otherwise return FALSE. If this function
579 returns TRUE profitable_path_p might not be satisfied but when
580 the path is extended it might be. In particular indicate in
581 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
582 but would be OK if we extend the path to cover the loop backedge.
583
584 ?? It seems we should be able to loosen some of the restrictions in
585 this function after loop optimizations have run. */
586
587bool
588back_threader_profitability::possibly_profitable_path_p
589 (const vec<basic_block> &m_path,
590 bool *large_non_fsm)
591{
592 gcc_checking_assert (!m_path.is_empty ());
593
594 /* We can an empty path here (excluding the DEF block) when the
595 statement that makes a conditional generate a compile-time
596 constant result is in the same block as the conditional.
597
598 That's not really a jump threading opportunity, but instead is
599 simple cprop & simplification. We could handle it here if we
600 wanted by wiring up all the incoming edges. If we run this
601 early in IPA, that might be worth doing. For now we just
602 reject that case. */
603 if (m_path.length () <= 1)
604 return false;
605
606 gimple_stmt_iterator gsi;
607 loop_p loop = m_path[0]->loop_father;
608
609 // We recompute the following, when we rewrite possibly_profitable_path_p
610 // to work incrementally on added BBs we have to unwind them on backtracking
611 m_n_insns = 0;
612 m_threaded_through_latch = false;
613 m_multiway_branch_in_path = false;
614 m_contains_hot_bb = false;
615
616 if (dump_file && (dump_flags & TDF_DETAILS))
617 fprintf (stream: dump_file, format: "Checking profitability of path (backwards): ");
618
619 /* Count the number of instructions on the path: as these instructions
620 will have to be duplicated, we will not record the path if there
621 are too many instructions on the path. Also check that all the
622 blocks in the path belong to a single loop. */
623 for (unsigned j = 0; j < m_path.length (); j++)
624 {
625 basic_block bb = m_path[j];
626
627 if (dump_file && (dump_flags & TDF_DETAILS))
628 fprintf (stream: dump_file, format: " bb:%i", bb->index);
629 /* Remember, blocks in the path are stored in opposite order in
630 the PATH array. The last entry in the array represents the
631 block with an outgoing edge that we will redirect to the jump
632 threading path. Thus we don't care how many statements are
633 in that block because it will not be copied or whether or not
634 it ends in a multiway branch. */
635 if (j < m_path.length () - 1)
636 {
637 int orig_n_insns = m_n_insns;
638 if (!m_contains_hot_bb && m_speed_p)
639 m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
640 for (gsi = gsi_after_labels (bb);
641 !gsi_end_p (i: gsi);
642 gsi_next_nondebug (i: &gsi))
643 {
644 /* Do not allow OpenACC loop markers and __builtin_constant_p on
645 threading paths. The latter is disallowed, because an
646 expression might be constant on two threading paths, and
647 become non-constant (i.e.: phi) when they merge. */
648 gimple *stmt = gsi_stmt (i: gsi);
649 if (gimple_call_internal_p (gs: stmt, fn: IFN_UNIQUE)
650 || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
651 {
652 if (dump_file && (dump_flags & TDF_DETAILS))
653 fputc (c: '\n', stream: dump_file);
654 return false;
655 }
656 /* Do not count empty statements and labels. */
657 if (gimple_code (g: stmt) != GIMPLE_NOP
658 && !is_gimple_debug (gs: stmt))
659 m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
660 }
661 if (dump_file && (dump_flags & TDF_DETAILS))
662 fprintf (stream: dump_file, format: " (%i insns)", m_n_insns-orig_n_insns);
663
664 /* We do not look at the block with the threaded branch
665 in this loop. So if any block with a last statement that
666 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
667 multiway branch on our path.
668
669 The block in PATH[0] is special, it's the block were we're
670 going to be able to eliminate its branch. */
671 if (j > 0)
672 {
673 gimple *last = *gsi_last_bb (bb);
674 if (last
675 && (gimple_code (g: last) == GIMPLE_SWITCH
676 || gimple_code (g: last) == GIMPLE_GOTO))
677 m_multiway_branch_in_path = true;
678 }
679 }
680
681 /* Note if we thread through the latch, we will want to include
682 the last entry in the array when determining if we thread
683 through the loop latch. */
684 if (loop->latch == bb)
685 {
686 m_threaded_through_latch = true;
687 if (dump_file && (dump_flags & TDF_DETAILS))
688 fprintf (stream: dump_file, format: " (latch)");
689 }
690 }
691
692 /* We are going to remove the control statement at the end of the
693 last block in the threading path. So don't count it against our
694 statement count. */
695 m_n_insns -= m_exit_jump_benefit;
696
697 if (dump_file && (dump_flags & TDF_DETAILS))
698 fprintf (stream: dump_file, format: "\n Control statement insns: %i\n"
699 " Overall: %i insns\n",
700 m_exit_jump_benefit, m_n_insns);
701
702 /* Threading is profitable if the path duplicated is hot but also
703 in a case we separate cold path from hot path and permit optimization
704 of the hot path later. Be on the agressive side here. In some testcases,
705 as in PR 78407 this leads to noticeable improvements. */
706 if (m_speed_p)
707 {
708 if (m_n_insns >= param_max_fsm_thread_path_insns)
709 {
710 if (dump_file && (dump_flags & TDF_DETAILS))
711 fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: "
712 "the number of instructions on the path "
713 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
714 return false;
715 }
716 edge entry = find_edge (m_path[m_path.length () - 1],
717 m_path[m_path.length () - 2]);
718 if (probably_never_executed_edge_p (cfun, entry))
719 {
720 if (dump_file && (dump_flags & TDF_DETAILS))
721 fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: "
722 "path entry is probably never executed.\n");
723 return false;
724 }
725 }
726 else if (m_n_insns > 1)
727 {
728 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: "
730 "duplication of %i insns is needed and optimizing for size.\n",
731 m_n_insns);
732 return false;
733 }
734
735 /* The generic copier used by the backthreader does not re-use an
736 existing threading path to reduce code duplication. So for that
737 case, drastically reduce the number of statements we are allowed
738 to copy. We don't know yet whether we will thread through the latch
739 so we have to be permissive and continue threading, but indicate
740 to the caller the thread, if final, wouldn't be profitable. */
741 if ((!m_threaded_multiway_branch
742 || !loop->latch
743 || loop->latch->index == EXIT_BLOCK)
744 && (m_n_insns * param_fsm_scale_path_stmts
745 >= param_max_jump_thread_duplication_stmts))
746 {
747 if (dump_file && (dump_flags & TDF_DETAILS))
748 fprintf (stream: dump_file,
749 format: " FAIL: Did not thread around loop and would copy too "
750 "many statements.\n");
751 return false;
752 }
753 *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
754 && (m_n_insns * param_fsm_scale_path_stmts
755 >= param_max_jump_thread_duplication_stmts));
756
757 if (dump_file && (dump_flags & TDF_DETAILS))
758 fputc (c: '\n', stream: dump_file);
759 return true;
760}
761
762/* Examine jump threading path PATH and return TRUE if it is profitable to
763 thread it, otherwise return FALSE.
764
765 The taken edge out of the path is TAKEN_EDGE.
766
767 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
768 would create an irreducible loop.
769
770 ?? It seems we should be able to loosen some of the restrictions in
771 this function after loop optimizations have run. */
772
773bool
774back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
775 edge taken_edge,
776 bool *creates_irreducible_loop)
777{
778 // We can assume that possibly_profitable_path_p holds here
779
780 loop_p loop = m_path[0]->loop_father;
781
782 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (stream: dump_file, format: "Checking profitability of path (backwards): ");
784
785 /* If this path threaded through the loop latch back into the
786 same loop and the destination does not dominate the loop
787 latch, then this thread would create an irreducible loop. */
788 *creates_irreducible_loop = false;
789 if (m_threaded_through_latch
790 && loop == taken_edge->dest->loop_father
791 && (determine_bb_domination_status (loop, taken_edge->dest)
792 == DOMST_NONDOMINATING))
793 *creates_irreducible_loop = true;
794
795 /* Threading is profitable if the path duplicated is hot but also
796 in a case we separate cold path from hot path and permit optimization
797 of the hot path later. Be on the agressive side here. In some testcases,
798 as in PR 78407 this leads to noticeable improvements. */
799 if (m_speed_p
800 && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
801 {
802 if (probably_never_executed_edge_p (cfun, taken_edge))
803 {
804 if (dump_file && (dump_flags & TDF_DETAILS))
805 fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: "
806 "path leads to probably never executed edge.\n");
807 return false;
808 }
809 }
810 else if (m_n_insns > 1)
811 {
812 if (dump_file && (dump_flags & TDF_DETAILS))
813 fprintf (stream: dump_file, format: " FAIL: Jump-thread path not considered: "
814 "duplication of %i insns is needed and optimizing for size.\n",
815 m_n_insns);
816 return false;
817 }
818
819 /* We avoid creating irreducible inner loops unless we thread through
820 a multiway branch, in which case we have deemed it worth losing
821 other loop optimizations later.
822
823 We also consider it worth creating an irreducible inner loop after
824 loop optimizations if the number of copied statement is low. */
825 if (!m_threaded_multiway_branch
826 && *creates_irreducible_loop
827 && (!(cfun->curr_properties & PROP_loop_opts_done)
828 || (m_n_insns * param_fsm_scale_path_stmts
829 >= param_max_jump_thread_duplication_stmts)))
830 {
831 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (stream: dump_file,
833 format: " FAIL: Would create irreducible loop early without "
834 "threading multiway branch.\n");
835 /* We compute creates_irreducible_loop only late. */
836 return false;
837 }
838
839 /* The generic copier used by the backthreader does not re-use an
840 existing threading path to reduce code duplication. So for that
841 case, drastically reduce the number of statements we are allowed
842 to copy. */
843 if (!(m_threaded_through_latch && m_threaded_multiway_branch)
844 && (m_n_insns * param_fsm_scale_path_stmts
845 >= param_max_jump_thread_duplication_stmts))
846 {
847 if (dump_file && (dump_flags & TDF_DETAILS))
848 fprintf (stream: dump_file,
849 format: " FAIL: Did not thread around loop and would copy too "
850 "many statements.\n");
851 return false;
852 }
853
854 /* When there is a multi-way branch on the path, then threading can
855 explode the CFG due to duplicating the edges for that multi-way
856 branch. So like above, only allow a multi-way branch on the path
857 if we actually thread a multi-way branch. */
858 if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
859 {
860 if (dump_file && (dump_flags & TDF_DETAILS))
861 fprintf (stream: dump_file,
862 format: " FAIL: Thread through multiway branch without threading "
863 "a multiway branch.\n");
864 return false;
865 }
866
867 /* Threading through an empty latch would cause code to be added to
868 the latch. This could alter the loop form sufficiently to cause
869 loop optimizations to fail. Disable these threads until after
870 loop optimizations have run. */
871 if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
872 && !(cfun->curr_properties & PROP_loop_opts_done)
873 && empty_block_p (loop->latch))
874 {
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (stream: dump_file,
877 format: " FAIL: Thread through latch before loop opts would create "
878 "non-empty latch\n");
879 return false;
880 }
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fputc (c: '\n', stream: dump_file);
883 return true;
884}
885
886
887/* The current path PATH is a vector of blocks forming a jump threading
888 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
889
890 Convert the current path into the form used by register_jump_thread and
891 register it.
892
893 Return TRUE if successful or FALSE otherwise. */
894
895bool
896back_threader_registry::register_path (const vec<basic_block> &m_path,
897 edge taken_edge)
898{
899 vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
900
901 // The generic copier ignores the edge type. We can build the
902 // thread edges with any type.
903 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
904 {
905 basic_block bb1 = m_path[m_path.length () - j - 1];
906 basic_block bb2 = m_path[m_path.length () - j - 2];
907
908 edge e = find_edge (bb1, bb2);
909 gcc_assert (e);
910 push_edge (path: jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
911 }
912
913 push_edge (path: jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
914 return register_jump_thread (jump_thread_path);
915}
916
917// Thread all suitable paths in the current function.
918//
919// Return TODO_flags.
920
921unsigned int
922back_threader::thread_blocks ()
923{
924 basic_block bb;
925 FOR_EACH_BB_FN (bb, m_fun)
926 if (EDGE_COUNT (bb->succs) > 1)
927 maybe_thread_block (bb);
928
929 bool changed = m_registry.thread_through_all_blocks (peel_loop_headers: true);
930
931 if (m_flags & BT_SPEED)
932 return changed ? TODO_cleanup_cfg : 0;
933
934 return false;
935}
936
937namespace {
938
939const pass_data pass_data_early_thread_jumps =
940{
941 .type: GIMPLE_PASS,
942 .name: "ethread",
943 .optinfo_flags: OPTGROUP_NONE,
944 .tv_id: TV_TREE_SSA_THREAD_JUMPS,
945 .properties_required: ( PROP_cfg | PROP_ssa ),
946 .properties_provided: 0,
947 .properties_destroyed: 0,
948 .todo_flags_start: 0,
949 .todo_flags_finish: ( TODO_cleanup_cfg | TODO_update_ssa ),
950};
951
952const pass_data pass_data_thread_jumps =
953{
954 .type: GIMPLE_PASS,
955 .name: "thread",
956 .optinfo_flags: OPTGROUP_NONE,
957 .tv_id: TV_TREE_SSA_THREAD_JUMPS,
958 .properties_required: ( PROP_cfg | PROP_ssa ),
959 .properties_provided: 0,
960 .properties_destroyed: 0,
961 .todo_flags_start: 0,
962 TODO_update_ssa,
963};
964
965const pass_data pass_data_thread_jumps_full =
966{
967 .type: GIMPLE_PASS,
968 .name: "threadfull",
969 .optinfo_flags: OPTGROUP_NONE,
970 .tv_id: TV_TREE_SSA_THREAD_JUMPS,
971 .properties_required: ( PROP_cfg | PROP_ssa ),
972 .properties_provided: 0,
973 .properties_destroyed: 0,
974 .todo_flags_start: 0,
975 TODO_update_ssa,
976};
977
978// Early jump threading pass optimizing for size.
979class pass_early_thread_jumps : public gimple_opt_pass
980{
981public:
982 pass_early_thread_jumps (gcc::context *ctxt)
983 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
984 {}
985
986 opt_pass * clone () override
987 {
988 return new pass_early_thread_jumps (m_ctxt);
989 }
990 void set_pass_param (unsigned int, bool param) override
991 {
992 m_first = param;
993 }
994 bool gate (function *) override
995 {
996 return flag_thread_jumps;
997 }
998 unsigned int execute (function *fun) override
999 {
1000 back_threader threader (fun, BT_NONE, m_first);
1001 return threader.thread_blocks ();
1002 }
1003private:
1004 bool m_first;
1005};
1006
1007// Jump threading pass without resolving of unknown SSAs.
1008class pass_thread_jumps : public gimple_opt_pass
1009{
1010public:
1011 pass_thread_jumps (gcc::context *ctxt)
1012 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1013 {}
1014 opt_pass * clone (void) override
1015 {
1016 return new pass_thread_jumps (m_ctxt);
1017 }
1018 void set_pass_param (unsigned int, bool param) override
1019 {
1020 m_first = param;
1021 }
1022 bool gate (function *) override
1023 {
1024 return flag_thread_jumps && flag_expensive_optimizations;
1025 }
1026 unsigned int execute (function *fun) override
1027 {
1028 back_threader threader (fun, BT_SPEED, m_first);
1029 return threader.thread_blocks ();
1030 }
1031private:
1032 bool m_first;
1033};
1034
1035// Jump threading pass that fully resolves unknown SSAs.
1036class pass_thread_jumps_full : public gimple_opt_pass
1037{
1038public:
1039 pass_thread_jumps_full (gcc::context *ctxt)
1040 : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1041 {}
1042 opt_pass * clone (void) override
1043 {
1044 return new pass_thread_jumps_full (m_ctxt);
1045 }
1046 void set_pass_param (unsigned int, bool param) override
1047 {
1048 m_first = param;
1049 }
1050 bool gate (function *) override
1051 {
1052 return flag_thread_jumps && flag_expensive_optimizations;
1053 }
1054 unsigned int execute (function *fun) override
1055 {
1056 back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1057 return threader.thread_blocks ();
1058 }
1059private:
1060 bool m_first;
1061};
1062
1063} // namespace {
1064
1065gimple_opt_pass *
1066make_pass_thread_jumps (gcc::context *ctxt)
1067{
1068 return new pass_thread_jumps (ctxt);
1069}
1070
1071gimple_opt_pass *
1072make_pass_thread_jumps_full (gcc::context *ctxt)
1073{
1074 return new pass_thread_jumps_full (ctxt);
1075}
1076
1077gimple_opt_pass *
1078make_pass_early_thread_jumps (gcc::context *ctxt)
1079{
1080 return new pass_early_thread_jumps (ctxt);
1081}
1082

source code of gcc/tree-ssa-threadbackward.cc