1/* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "tree.h"
25#include "gimple.h"
26#include "cfghooks.h"
27#include "tree-pass.h"
28#include "ssa.h"
29#include "fold-const.h"
30#include "cfganal.h"
31#include "gimple-iterator.h"
32#include "tree-ssa.h"
33#include "tree-ssa-threadupdate.h"
34#include "cfgloop.h"
35#include "dbgcnt.h"
36#include "tree-cfg.h"
37#include "tree-vectorizer.h"
38#include "tree-pass.h"
39
40/* Given a block B, update the CFG and SSA graph to reflect redirecting
41 one or more in-edges to B to instead reach the destination of an
42 out-edge from B while preserving any side effects in B.
43
44 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
45 side effects of executing B.
46
47 1. Make a copy of B (including its outgoing edges and statements). Call
48 the copy B'. Note B' has no incoming edges or PHIs at this time.
49
50 2. Remove the control statement at the end of B' and all outgoing edges
51 except B'->C.
52
53 3. Add a new argument to each PHI in C with the same value as the existing
54 argument associated with edge B->C. Associate the new PHI arguments
55 with the edge B'->C.
56
57 4. For each PHI in B, find or create a PHI in B' with an identical
58 PHI_RESULT. Add an argument to the PHI in B' which has the same
59 value as the PHI in B associated with the edge A->B. Associate
60 the new argument in the PHI in B' with the edge A->B.
61
62 5. Change the edge A->B to A->B'.
63
64 5a. This automatically deletes any PHI arguments associated with the
65 edge A->B in B.
66
67 5b. This automatically associates each new argument added in step 4
68 with the edge A->B'.
69
70 6. Repeat for other incoming edges into B.
71
72 7. Put the duplicated resources in B and all the B' blocks into SSA form.
73
74 Note that block duplication can be minimized by first collecting the
75 set of unique destination blocks that the incoming edges should
76 be threaded to.
77
78 We reduce the number of edges and statements we create by not copying all
79 the outgoing edges and the control statement in step #1. We instead create
80 a template block without the outgoing edges and duplicate the template.
81
82 Another case this code handles is threading through a "joiner" block. In
83 this case, we do not know the destination of the joiner block, but one
84 of the outgoing edges from the joiner block leads to a threadable path. This
85 case largely works as outlined above, except the duplicate of the joiner
86 block still contains a full set of outgoing edges and its control statement.
87 We just redirect one of its outgoing edges to our jump threading path. */
88
89
90/* Steps #5 and #6 of the above algorithm are best implemented by walking
91 all the incoming edges which thread to the same destination edge at
92 the same time. That avoids lots of table lookups to get information
93 for the destination edge.
94
95 To realize that implementation we create a list of incoming edges
96 which thread to the same outgoing edge. Thus to implement steps
97 #5 and #6 we traverse our hash table of outgoing edge information.
98 For each entry we walk the list of incoming edges which thread to
99 the current outgoing edge. */
100
101struct el
102{
103 edge e;
104 struct el *next;
105};
106
107/* Main data structure recording information regarding B's duplicate
108 blocks. */
109
110/* We need to efficiently record the unique thread destinations of this
111 block and specific information associated with those destinations. We
112 may have many incoming edges threaded to the same outgoing edge. This
113 can be naturally implemented with a hash table. */
114
115struct redirection_data : free_ptr_hash<redirection_data>
116{
117 /* We support wiring up two block duplicates in a jump threading path.
118
119 One is a normal block copy where we remove the control statement
120 and wire up its single remaining outgoing edge to the thread path.
121
122 The other is a joiner block where we leave the control statement
123 in place, but wire one of the outgoing edges to a thread path.
124
125 In theory we could have multiple block duplicates in a jump
126 threading path, but I haven't tried that.
127
128 The duplicate blocks appear in this array in the same order in
129 which they appear in the jump thread path. */
130 basic_block dup_blocks[2];
131
132 vec<jump_thread_edge *> *path;
133
134 /* A list of incoming edges which we want to thread to the
135 same path. */
136 struct el *incoming_edges;
137
138 /* hash_table support. */
139 static inline hashval_t hash (const redirection_data *);
140 static inline int equal (const redirection_data *, const redirection_data *);
141};
142
143jump_thread_path_allocator::jump_thread_path_allocator ()
144{
145 obstack_init (&m_obstack);
146}
147
148jump_thread_path_allocator::~jump_thread_path_allocator ()
149{
150 obstack_free (&m_obstack, NULL);
151}
152
153jump_thread_edge *
154jump_thread_path_allocator::allocate_thread_edge (edge e,
155 jump_thread_edge_type type)
156{
157 void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge));
158 return new (r) jump_thread_edge (e, type);
159}
160
161vec<jump_thread_edge *> *
162jump_thread_path_allocator::allocate_thread_path ()
163{
164 // ?? Since the paths live in an obstack, we should be able to remove all
165 // references to path->release() throughout the code.
166 void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>));
167 return new (r) vec<jump_thread_edge *> ();
168}
169
170jt_path_registry::jt_path_registry (bool backedge_threads)
171{
172 m_paths.create (nelems: 5);
173 m_num_threaded_edges = 0;
174 m_backedge_threads = backedge_threads;
175}
176
177jt_path_registry::~jt_path_registry ()
178{
179 m_paths.release ();
180}
181
182fwd_jt_path_registry::fwd_jt_path_registry ()
183 : jt_path_registry (/*backedge_threads=*/false)
184{
185 m_removed_edges = new hash_table<struct removed_edges> (17);
186 m_redirection_data = NULL;
187}
188
189fwd_jt_path_registry::~fwd_jt_path_registry ()
190{
191 delete m_removed_edges;
192}
193
194back_jt_path_registry::back_jt_path_registry ()
195 : jt_path_registry (/*backedge_threads=*/true)
196{
197}
198
199void
200jt_path_registry::push_edge (vec<jump_thread_edge *> *path,
201 edge e, jump_thread_edge_type type)
202{
203 jump_thread_edge *x = m_allocator.allocate_thread_edge (e, type);
204 path->safe_push (obj: x);
205}
206
207vec<jump_thread_edge *> *
208jt_path_registry::allocate_thread_path ()
209{
210 return m_allocator.allocate_thread_path ();
211}
212
213/* Dump a jump threading path, including annotations about each
214 edge in the path. */
215
216static void
217dump_jump_thread_path (FILE *dump_file,
218 const vec<jump_thread_edge *> &path,
219 bool registering)
220{
221 if (registering)
222 fprintf (stream: dump_file,
223 format: " [%u] Registering jump thread: (%d, %d) incoming edge; ",
224 dbg_cnt_counter (index: registered_jump_thread),
225 path[0]->e->src->index, path[0]->e->dest->index);
226 else
227 fprintf (stream: dump_file,
228 format: " Cancelling jump thread: (%d, %d) incoming edge; ",
229 path[0]->e->src->index, path[0]->e->dest->index);
230
231 for (unsigned int i = 1; i < path.length (); i++)
232 {
233 /* We can get paths with a NULL edge when the final destination
234 of a jump thread turns out to be a constant address. We dump
235 those paths when debugging, so we have to be prepared for that
236 possibility here. */
237 if (path[i]->e == NULL)
238 continue;
239
240 fprintf (stream: dump_file, format: " (%d, %d) ",
241 path[i]->e->src->index, path[i]->e->dest->index);
242 switch (path[i]->type)
243 {
244 case EDGE_COPY_SRC_JOINER_BLOCK:
245 fprintf (stream: dump_file, format: "joiner");
246 break;
247 case EDGE_COPY_SRC_BLOCK:
248 fprintf (stream: dump_file, format: "normal");
249 break;
250 case EDGE_NO_COPY_SRC_BLOCK:
251 fprintf (stream: dump_file, format: "nocopy");
252 break;
253 default:
254 gcc_unreachable ();
255 }
256
257 if ((path[i]->e->flags & EDGE_DFS_BACK) != 0)
258 fprintf (stream: dump_file, format: " (back)");
259 }
260 fprintf (stream: dump_file, format: "; \n");
261}
262
263DEBUG_FUNCTION void
264debug (const vec<jump_thread_edge *> &path)
265{
266 dump_jump_thread_path (stderr, path, registering: true);
267}
268
269DEBUG_FUNCTION void
270debug (const vec<jump_thread_edge *> *path)
271{
272 debug (path: *path);
273}
274
275/* Release the memory associated with PATH, and if dumping is enabled,
276 dump out the reason why the thread was canceled. */
277
278static void
279cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL)
280{
281 if (dump_file && (dump_flags & TDF_DETAILS))
282 {
283 if (reason)
284 fprintf (stream: dump_file, format: "%s: ", reason);
285
286 dump_jump_thread_path (dump_file, path: *path, registering: false);
287 fprintf (stream: dump_file, format: "\n");
288 }
289 path->release ();
290}
291
292/* Simple hashing function. For any given incoming edge E, we're going
293 to be most concerned with the final destination of its jump thread
294 path. So hash on the block index of the final edge in the path. */
295
296inline hashval_t
297redirection_data::hash (const redirection_data *p)
298{
299 vec<jump_thread_edge *> *path = p->path;
300 return path->last ()->e->dest->index;
301}
302
303/* Given two hash table entries, return true if they have the same
304 jump threading path. */
305inline int
306redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
307{
308 vec<jump_thread_edge *> *path1 = p1->path;
309 vec<jump_thread_edge *> *path2 = p2->path;
310
311 if (path1->length () != path2->length ())
312 return false;
313
314 for (unsigned int i = 1; i < path1->length (); i++)
315 {
316 if ((*path1)[i]->type != (*path2)[i]->type
317 || (*path1)[i]->e != (*path2)[i]->e)
318 return false;
319 }
320
321 return true;
322}
323
324/* Data structure of information to pass to hash table traversal routines. */
325struct ssa_local_info_t
326{
327 /* The current block we are working on. */
328 basic_block bb;
329
330 /* We only create a template block for the first duplicated block in a
331 jump threading path as we may need many duplicates of that block.
332
333 The second duplicate block in a path is specific to that path. Creating
334 and sharing a template for that block is considerably more difficult. */
335 basic_block template_block;
336
337 /* If we append debug stmts to the template block after creating it,
338 this iterator won't be the last one in the block, and further
339 copies of the template block shouldn't get debug stmts after
340 it. */
341 gimple_stmt_iterator template_last_to_copy;
342
343 /* Blocks duplicated for the thread. */
344 bitmap duplicate_blocks;
345
346 /* TRUE if we thread one or more jumps, FALSE otherwise. */
347 bool jumps_threaded;
348
349 /* When we have multiple paths through a joiner which reach different
350 final destinations, then we may need to correct for potential
351 profile insanities. */
352 bool need_profile_correction;
353
354 // Jump threading statistics.
355 unsigned long num_threaded_edges;
356};
357
358/* When we start updating the CFG for threading, data necessary for jump
359 threading is attached to the AUX field for the incoming edge. Use these
360 macros to access the underlying structure attached to the AUX field. */
361#define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
362
363/* Remove the last statement in block BB if it is a control statement
364 Also remove all outgoing edges except the edge which reaches DEST_BB.
365 If DEST_BB is NULL, then remove all outgoing edges. */
366
367static void
368remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
369{
370 gimple_stmt_iterator gsi;
371 edge e;
372 edge_iterator ei;
373
374 gsi = gsi_last_bb (bb);
375
376 /* If the duplicate ends with a control statement, then remove it.
377
378 Note that if we are duplicating the template block rather than the
379 original basic block, then the duplicate might not have any real
380 statements in it. */
381 if (!gsi_end_p (i: gsi)
382 && gsi_stmt (i: gsi)
383 && (gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_COND
384 || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_GOTO
385 || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_SWITCH))
386 gsi_remove (&gsi, true);
387
388 for (ei = ei_start (bb->succs); (e = ei_safe_edge (i: ei)); )
389 {
390 if (e->dest != dest_bb)
391 {
392 free_dom_edge_info (e);
393 remove_edge (e);
394 }
395 else
396 {
397 e->probability = profile_probability::always ();
398 ei_next (i: &ei);
399 }
400 }
401
402 /* If the remaining edge is a loop exit, there must have
403 a removed edge that was not a loop exit.
404
405 In that case BB and possibly other blocks were previously
406 in the loop, but are now outside the loop. Thus, we need
407 to update the loop structures. */
408 if (single_succ_p (bb)
409 && loop_outer (loop: bb->loop_father)
410 && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
411 loops_state_set (flags: LOOPS_NEED_FIXUP);
412}
413
414/* Create a duplicate of BB. Record the duplicate block in an array
415 indexed by COUNT stored in RD. */
416
417static void
418create_block_for_threading (basic_block bb,
419 struct redirection_data *rd,
420 unsigned int count,
421 bitmap *duplicate_blocks)
422{
423 edge_iterator ei;
424 edge e;
425
426 /* We can use the generic block duplication code and simply remove
427 the stuff we do not need. */
428 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
429
430 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
431 {
432 e->aux = NULL;
433
434 /* If we duplicate a block with an outgoing edge marked as
435 EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
436 leak out of the current pass.
437
438 It would be better to simplify switch statements and remove
439 the edges before we get here, but the sequencing is nontrivial. */
440 e->flags &= ~EDGE_IGNORE;
441 }
442
443 /* Zero out the profile, since the block is unreachable for now. */
444 rd->dup_blocks[count]->count = profile_count::uninitialized ();
445 if (duplicate_blocks)
446 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
447}
448
449/* Given an outgoing edge E lookup and return its entry in our hash table.
450
451 If INSERT is true, then we insert the entry into the hash table if
452 it is not already present. INCOMING_EDGE is added to the list of incoming
453 edges associated with E in the hash table. */
454
455redirection_data *
456fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert)
457{
458 struct redirection_data **slot;
459 struct redirection_data *elt;
460 vec<jump_thread_edge *> *path = THREAD_PATH (e);
461
462 /* Build a hash table element so we can see if E is already
463 in the table. */
464 elt = XNEW (struct redirection_data);
465 elt->path = path;
466 elt->dup_blocks[0] = NULL;
467 elt->dup_blocks[1] = NULL;
468 elt->incoming_edges = NULL;
469
470 slot = m_redirection_data->find_slot (value: elt, insert);
471
472 /* This will only happen if INSERT is false and the entry is not
473 in the hash table. */
474 if (slot == NULL)
475 {
476 free (ptr: elt);
477 return NULL;
478 }
479
480 /* This will only happen if E was not in the hash table and
481 INSERT is true. */
482 if (*slot == NULL)
483 {
484 *slot = elt;
485 elt->incoming_edges = XNEW (struct el);
486 elt->incoming_edges->e = e;
487 elt->incoming_edges->next = NULL;
488 return elt;
489 }
490 /* E was in the hash table. */
491 else
492 {
493 /* Free ELT as we do not need it anymore, we will extract the
494 relevant entry from the hash table itself. */
495 free (ptr: elt);
496
497 /* Get the entry stored in the hash table. */
498 elt = *slot;
499
500 /* If insertion was requested, then we need to add INCOMING_EDGE
501 to the list of incoming edges associated with E. */
502 if (insert)
503 {
504 struct el *el = XNEW (struct el);
505 el->next = elt->incoming_edges;
506 el->e = e;
507 elt->incoming_edges = el;
508 }
509
510 return elt;
511 }
512}
513
514/* Given ssa_name DEF, backtrack jump threading PATH from node IDX
515 to see if it has constant value in a flow sensitive manner. Set
516 LOCUS to location of the constant phi arg and return the value.
517 Return DEF directly if either PATH or idx is ZERO. */
518
519static tree
520get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
521 basic_block bb, int idx, location_t *locus)
522{
523 tree arg;
524 gphi *def_phi;
525 basic_block def_bb;
526
527 if (path == NULL || idx == 0)
528 return def;
529
530 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
531 if (!def_phi)
532 return def;
533
534 def_bb = gimple_bb (g: def_phi);
535 /* Don't propagate loop invariants into deeper loops. */
536 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
537 return def;
538
539 /* Backtrack jump threading path from IDX to see if def has constant
540 value. */
541 for (int j = idx - 1; j >= 0; j--)
542 {
543 edge e = (*path)[j]->e;
544 if (e->dest == def_bb)
545 {
546 arg = gimple_phi_arg_def (gs: def_phi, index: e->dest_idx);
547 if (is_gimple_min_invariant (arg))
548 {
549 *locus = gimple_phi_arg_location (phi: def_phi, i: e->dest_idx);
550 return arg;
551 }
552 break;
553 }
554 }
555
556 return def;
557}
558
559/* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
560 Try to backtrack jump threading PATH from node IDX to see if the arg
561 has constant value, copy constant value instead of argument itself
562 if yes. */
563
564static void
565copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
566 vec<jump_thread_edge *> *path, int idx)
567{
568 gphi_iterator gsi;
569 int src_indx = src_e->dest_idx;
570
571 for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
572 {
573 gphi *phi = gsi.phi ();
574 tree def = gimple_phi_arg_def (gs: phi, index: src_indx);
575 location_t locus = gimple_phi_arg_location (phi, i: src_indx);
576
577 if (TREE_CODE (def) == SSA_NAME
578 && !virtual_operand_p (op: gimple_phi_result (gs: phi)))
579 def = get_value_locus_in_path (def, path, bb, idx, locus: &locus);
580
581 add_phi_arg (phi, def, tgt_e, locus);
582 }
583}
584
585/* We have recently made a copy of ORIG_BB, including its outgoing
586 edges. The copy is NEW_BB. Every PHI node in every direct successor of
587 ORIG_BB has a new argument associated with edge from NEW_BB to the
588 successor. Initialize the PHI argument so that it is equal to the PHI
589 argument associated with the edge from ORIG_BB to the successor.
590 PATH and IDX are used to check if the new PHI argument has constant
591 value in a flow sensitive manner. */
592
593static void
594update_destination_phis (basic_block orig_bb, basic_block new_bb,
595 vec<jump_thread_edge *> *path, int idx)
596{
597 edge_iterator ei;
598 edge e;
599
600 FOR_EACH_EDGE (e, ei, orig_bb->succs)
601 {
602 edge e2 = find_edge (new_bb, e->dest);
603 copy_phi_args (bb: e->dest, src_e: e, tgt_e: e2, path, idx);
604 }
605}
606
607/* Given a duplicate block and its single destination (both stored
608 in RD). Create an edge between the duplicate and its single
609 destination.
610
611 Add an additional argument to any PHI nodes at the single
612 destination. IDX is the start node in jump threading path
613 we start to check to see if the new PHI argument has constant
614 value along the jump threading path. */
615
616static void
617create_edge_and_update_destination_phis (struct redirection_data *rd,
618 basic_block bb, int idx)
619{
620 edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
621
622 rescan_loop_exit (e, true, false);
623
624 /* We used to copy the thread path here. That was added in 2007
625 and dutifully updated through the representation changes in 2013.
626
627 In 2013 we added code to thread from an interior node through
628 the backedge to another interior node. That runs after the code
629 to thread through loop headers from outside the loop.
630
631 The latter may delete edges in the CFG, including those
632 which appeared in the jump threading path we copied here. Thus
633 we'd end up using a dangling pointer.
634
635 After reviewing the 2007/2011 code, I can't see how anything
636 depended on copying the AUX field and clearly copying the jump
637 threading path is problematical due to embedded edge pointers.
638 It has been removed. */
639 e->aux = NULL;
640
641 /* If there are any PHI nodes at the destination of the outgoing edge
642 from the duplicate block, then we will need to add a new argument
643 to them. The argument should have the same value as the argument
644 associated with the outgoing edge stored in RD. */
645 copy_phi_args (bb: e->dest, src_e: rd->path->last ()->e, tgt_e: e, path: rd->path, idx);
646}
647
648/* Look through PATH beginning at START and return TRUE if there are
649 any additional blocks that need to be duplicated. Otherwise,
650 return FALSE. */
651static bool
652any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
653 unsigned int start)
654{
655 for (unsigned int i = start + 1; i < path->length (); i++)
656 {
657 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
658 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
659 return true;
660 }
661 return false;
662}
663
664
665/* Compute the amount of profile count coming into the jump threading
666 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
667 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
668 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
669 identify blocks duplicated for jump threading, which have duplicated
670 edges that need to be ignored in the analysis. Return true if path contains
671 a joiner, false otherwise.
672
673 In the non-joiner case, this is straightforward - all the counts
674 flowing into the jump threading path should flow through the duplicated
675 block and out of the duplicated path.
676
677 In the joiner case, it is very tricky. Some of the counts flowing into
678 the original path go offpath at the joiner. The problem is that while
679 we know how much total count goes off-path in the original control flow,
680 we don't know how many of the counts corresponding to just the jump
681 threading path go offpath at the joiner.
682
683 For example, assume we have the following control flow and identified
684 jump threading paths:
685
686 A B C
687 \ | /
688 Ea \ |Eb / Ec
689 \ | /
690 v v v
691 J <-- Joiner
692 / \
693 Eoff/ \Eon
694 / \
695 v v
696 Soff Son <--- Normal
697 /\
698 Ed/ \ Ee
699 / \
700 v v
701 D E
702
703 Jump threading paths: A -> J -> Son -> D (path 1)
704 C -> J -> Son -> E (path 2)
705
706 Note that the control flow could be more complicated:
707 - Each jump threading path may have more than one incoming edge. I.e. A and
708 Ea could represent multiple incoming blocks/edges that are included in
709 path 1.
710 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
711 before or after the "normal" copy block). These are not duplicated onto
712 the jump threading path, as they are single-successor.
713 - Any of the blocks along the path may have other incoming edges that
714 are not part of any jump threading path, but add profile counts along
715 the path.
716
717 In the above example, after all jump threading is complete, we will
718 end up with the following control flow:
719
720 A B C
721 | | |
722 Ea| |Eb |Ec
723 | | |
724 v v v
725 Ja J Jc
726 / \ / \Eon' / \
727 Eona/ \ ---/---\-------- \Eonc
728 / \ / / \ \
729 v v v v v
730 Sona Soff Son Sonc
731 \ /\ /
732 \___________ / \ _____/
733 \ / \/
734 vv v
735 D E
736
737 The main issue to notice here is that when we are processing path 1
738 (A->J->Son->D) we need to figure out the outgoing edge weights to
739 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
740 sum of the incoming weights to D remain Ed. The problem with simply
741 assuming that Ja (and Jc when processing path 2) has the same outgoing
742 probabilities to its successors as the original block J, is that after
743 all paths are processed and other edges/counts removed (e.g. none
744 of Ec will reach D after processing path 2), we may end up with not
745 enough count flowing along duplicated edge Sona->D.
746
747 Therefore, in the case of a joiner, we keep track of all counts
748 coming in along the current path, as well as from predecessors not
749 on any jump threading path (Eb in the above example). While we
750 first assume that the duplicated Eona for Ja->Sona has the same
751 probability as the original, we later compensate for other jump
752 threading paths that may eliminate edges. We do that by keep track
753 of all counts coming into the original path that are not in a jump
754 thread (Eb in the above example, but as noted earlier, there could
755 be other predecessors incoming to the path at various points, such
756 as at Son). Call this cumulative non-path count coming into the path
757 before D as Enonpath. We then ensure that the count from Sona->D is as at
758 least as big as (Ed - Enonpath), but no bigger than the minimum
759 weight along the jump threading path. The probabilities of both the
760 original and duplicated joiner block J and Ja will be adjusted
761 accordingly after the updates. */
762
763static bool
764compute_path_counts (struct redirection_data *rd,
765 ssa_local_info_t *local_info,
766 profile_count *path_in_count_ptr,
767 profile_count *path_out_count_ptr)
768{
769 edge e = rd->incoming_edges->e;
770 vec<jump_thread_edge *> *path = THREAD_PATH (e);
771 edge elast = path->last ()->e;
772 profile_count nonpath_count = profile_count::zero ();
773 bool has_joiner = false;
774 profile_count path_in_count = profile_count::zero ();
775
776 /* Start by accumulating incoming edge counts to the path's first bb
777 into a couple buckets:
778 path_in_count: total count of incoming edges that flow into the
779 current path.
780 nonpath_count: total count of incoming edges that are not
781 flowing along *any* path. These are the counts
782 that will still flow along the original path after
783 all path duplication is done by potentially multiple
784 calls to this routine.
785 (any other incoming edge counts are for a different jump threading
786 path that will be handled by a later call to this routine.)
787 To make this easier, start by recording all incoming edges that flow into
788 the current path in a bitmap. We could add up the path's incoming edge
789 counts here, but we still need to walk all the first bb's incoming edges
790 below to add up the counts of the other edges not included in this jump
791 threading path. */
792 struct el *next, *el;
793 auto_bitmap in_edge_srcs;
794 for (el = rd->incoming_edges; el; el = next)
795 {
796 next = el->next;
797 bitmap_set_bit (in_edge_srcs, el->e->src->index);
798 }
799 edge ein;
800 edge_iterator ei;
801 FOR_EACH_EDGE (ein, ei, e->dest->preds)
802 {
803 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
804 /* Simply check the incoming edge src against the set captured above. */
805 if (ein_path
806 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
807 {
808 /* It is necessary but not sufficient that the last path edges
809 are identical. There may be different paths that share the
810 same last path edge in the case where the last edge has a nocopy
811 source block. */
812 gcc_assert (ein_path->last ()->e == elast);
813 path_in_count += ein->count ();
814 }
815 else if (!ein_path)
816 {
817 /* Keep track of the incoming edges that are not on any jump-threading
818 path. These counts will still flow out of original path after all
819 jump threading is complete. */
820 nonpath_count += ein->count ();
821 }
822 }
823
824 /* Now compute the fraction of the total count coming into the first
825 path bb that is from the current threading path. */
826 profile_count total_count = e->dest->count;
827 /* Handle incoming profile insanities. */
828 if (total_count < path_in_count)
829 path_in_count = total_count;
830 profile_probability onpath_scale = path_in_count.probability_in (overall: total_count);
831
832 /* Walk the entire path to do some more computation in order to estimate
833 how much of the path_in_count will flow out of the duplicated threading
834 path. In the non-joiner case this is straightforward (it should be
835 the same as path_in_count, although we will handle incoming profile
836 insanities by setting it equal to the minimum count along the path).
837
838 In the joiner case, we need to estimate how much of the path_in_count
839 will stay on the threading path after the joiner's conditional branch.
840 We don't really know for sure how much of the counts
841 associated with this path go to each successor of the joiner, but we'll
842 estimate based on the fraction of the total count coming into the path
843 bb was from the threading paths (computed above in onpath_scale).
844 Afterwards, we will need to do some fixup to account for other threading
845 paths and possible profile insanities.
846
847 In order to estimate the joiner case's counts we also need to update
848 nonpath_count with any additional counts coming into the path. Other
849 blocks along the path may have additional predecessors from outside
850 the path. */
851 profile_count path_out_count = path_in_count;
852 profile_count min_path_count = path_in_count;
853 for (unsigned int i = 1; i < path->length (); i++)
854 {
855 edge epath = (*path)[i]->e;
856 profile_count cur_count = epath->count ();
857 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
858 {
859 has_joiner = true;
860 cur_count = cur_count.apply_probability (prob: onpath_scale);
861 }
862 /* In the joiner case we need to update nonpath_count for any edges
863 coming into the path that will contribute to the count flowing
864 into the path successor. */
865 if (has_joiner && epath != elast)
866 {
867 /* Look for other incoming edges after joiner. */
868 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
869 {
870 if (ein != epath
871 /* Ignore in edges from blocks we have duplicated for a
872 threading path, which have duplicated edge counts until
873 they are redirected by an invocation of this routine. */
874 && !bitmap_bit_p (local_info->duplicate_blocks,
875 ein->src->index))
876 nonpath_count += ein->count ();
877 }
878 }
879 if (cur_count < path_out_count)
880 path_out_count = cur_count;
881 if (epath->count () < min_path_count)
882 min_path_count = epath->count ();
883 }
884
885 /* We computed path_out_count above assuming that this path targeted
886 the joiner's on-path successor with the same likelihood as it
887 reached the joiner. However, other thread paths through the joiner
888 may take a different path through the normal copy source block
889 (i.e. they have a different elast), meaning that they do not
890 contribute any counts to this path's elast. As a result, it may
891 turn out that this path must have more count flowing to the on-path
892 successor of the joiner. Essentially, all of this path's elast
893 count must be contributed by this path and any nonpath counts
894 (since any path through the joiner with a different elast will not
895 include a copy of this elast in its duplicated path).
896 So ensure that this path's path_out_count is at least the
897 difference between elast->count () and nonpath_count. Otherwise the edge
898 counts after threading will not be sane. */
899 if (local_info->need_profile_correction
900 && has_joiner && path_out_count < elast->count () - nonpath_count)
901 {
902 path_out_count = elast->count () - nonpath_count;
903 /* But neither can we go above the minimum count along the path
904 we are duplicating. This can be an issue due to profile
905 insanities coming in to this pass. */
906 if (path_out_count > min_path_count)
907 path_out_count = min_path_count;
908 }
909
910 *path_in_count_ptr = path_in_count;
911 *path_out_count_ptr = path_out_count;
912 return has_joiner;
913}
914
915
916/* Update the counts and frequencies for both an original path
917 edge EPATH and its duplicate EDUP. The duplicate source block
918 will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
919 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
920static void
921update_profile (edge epath, edge edup, profile_count path_in_count,
922 profile_count path_out_count)
923{
924
925 /* First update the duplicated block's count. */
926 if (edup)
927 {
928 basic_block dup_block = edup->src;
929
930 /* Edup's count is reduced by path_out_count. We need to redistribute
931 probabilities to the remaining edges. */
932
933 edge esucc;
934 edge_iterator ei;
935 profile_probability edup_prob
936 = path_out_count.probability_in (overall: path_in_count);
937
938 /* Either scale up or down the remaining edges.
939 probabilities are always in range <0,1> and thus we can't do
940 both by same loop. */
941 if (edup->probability > edup_prob)
942 {
943 profile_probability rev_scale
944 = (profile_probability::always () - edup->probability)
945 / (profile_probability::always () - edup_prob);
946 FOR_EACH_EDGE (esucc, ei, dup_block->succs)
947 if (esucc != edup)
948 esucc->probability /= rev_scale;
949 }
950 else if (edup->probability < edup_prob)
951 {
952 profile_probability scale
953 = (profile_probability::always () - edup_prob)
954 / (profile_probability::always () - edup->probability);
955 FOR_EACH_EDGE (esucc, ei, dup_block->succs)
956 if (esucc != edup)
957 esucc->probability *= scale;
958 }
959 if (edup_prob.initialized_p ())
960 edup->probability = edup_prob;
961
962 gcc_assert (!dup_block->count.initialized_p ());
963 dup_block->count = path_in_count;
964 }
965
966 if (path_in_count == profile_count::zero ())
967 return;
968
969 profile_count final_count = epath->count () - path_out_count;
970
971 /* Now update the original block's count in the
972 opposite manner - remove the counts/freq that will flow
973 into the duplicated block. Handle underflow due to precision/
974 rounding issues. */
975 epath->src->count -= path_in_count;
976
977 /* Next update this path edge's original and duplicated counts. We know
978 that the duplicated path will have path_out_count flowing
979 out of it (in the joiner case this is the count along the duplicated path
980 out of the duplicated joiner). This count can then be removed from the
981 original path edge. */
982
983 edge esucc;
984 edge_iterator ei;
985 profile_probability epath_prob = final_count.probability_in (overall: epath->src->count);
986
987 if (epath->probability > epath_prob)
988 {
989 profile_probability rev_scale
990 = (profile_probability::always () - epath->probability)
991 / (profile_probability::always () - epath_prob);
992 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
993 if (esucc != epath)
994 esucc->probability /= rev_scale;
995 }
996 else if (epath->probability < epath_prob)
997 {
998 profile_probability scale
999 = (profile_probability::always () - epath_prob)
1000 / (profile_probability::always () - epath->probability);
1001 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1002 if (esucc != epath)
1003 esucc->probability *= scale;
1004 }
1005 if (epath_prob.initialized_p ())
1006 epath->probability = epath_prob;
1007}
1008
1009/* Wire up the outgoing edges from the duplicate blocks and
1010 update any PHIs as needed. Also update the profile counts
1011 on the original and duplicate blocks and edges. */
1012void
1013ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1014 ssa_local_info_t *local_info)
1015{
1016 bool multi_incomings = (rd->incoming_edges->next != NULL);
1017 edge e = rd->incoming_edges->e;
1018 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1019 edge elast = path->last ()->e;
1020 profile_count path_in_count = profile_count::zero ();
1021 profile_count path_out_count = profile_count::zero ();
1022
1023 /* First determine how much profile count to move from original
1024 path to the duplicate path. This is tricky in the presence of
1025 a joiner (see comments for compute_path_counts), where some portion
1026 of the path's counts will flow off-path from the joiner. In the
1027 non-joiner case the path_in_count and path_out_count should be the
1028 same. */
1029 bool has_joiner = compute_path_counts (rd, local_info,
1030 path_in_count_ptr: &path_in_count, path_out_count_ptr: &path_out_count);
1031
1032 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1033 {
1034 edge epath = (*path)[i]->e;
1035
1036 /* If we were threading through an joiner block, then we want
1037 to keep its control statement and redirect an outgoing edge.
1038 Else we want to remove the control statement & edges, then create
1039 a new outgoing edge. In both cases we may need to update PHIs. */
1040 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1041 {
1042 edge victim;
1043 edge e2;
1044
1045 gcc_assert (has_joiner);
1046
1047 /* This updates the PHIs at the destination of the duplicate
1048 block. Pass 0 instead of i if we are threading a path which
1049 has multiple incoming edges. */
1050 update_destination_phis (orig_bb: local_info->bb, new_bb: rd->dup_blocks[count],
1051 path, idx: multi_incomings ? 0 : i);
1052
1053 /* Find the edge from the duplicate block to the block we're
1054 threading through. That's the edge we want to redirect. */
1055 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1056
1057 /* If there are no remaining blocks on the path to duplicate,
1058 then redirect VICTIM to the final destination of the jump
1059 threading path. */
1060 if (!any_remaining_duplicated_blocks (path, start: i))
1061 {
1062 if (victim->dest != elast->dest)
1063 {
1064 e2 = redirect_edge_and_branch (victim, elast->dest);
1065 /* If we redirected the edge, then we need to copy PHI arguments
1066 at the target. If the edge already existed (e2 != victim
1067 case), then the PHIs in the target already have the correct
1068 arguments. */
1069 if (e2 == victim)
1070 copy_phi_args (bb: e2->dest, src_e: elast, tgt_e: e2,
1071 path, idx: multi_incomings ? 0 : i);
1072 }
1073 else
1074 e2 = victim;
1075 }
1076 else
1077 {
1078 /* Redirect VICTIM to the next duplicated block in the path. */
1079 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1080
1081 /* We need to update the PHIs in the next duplicated block. We
1082 want the new PHI args to have the same value as they had
1083 in the source of the next duplicate block.
1084
1085 Thus, we need to know which edge we traversed into the
1086 source of the duplicate. Furthermore, we may have
1087 traversed many edges to reach the source of the duplicate.
1088
1089 Walk through the path starting at element I until we
1090 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1091 the edge from the prior element. */
1092 for (unsigned int j = i + 1; j < path->length (); j++)
1093 {
1094 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1095 {
1096 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1097 break;
1098 }
1099 }
1100 }
1101
1102 /* Update the counts of both the original block
1103 and path edge, and the duplicates. The path duplicate's
1104 incoming count are the totals for all edges
1105 incoming to this jump threading path computed earlier.
1106 And we know that the duplicated path will have path_out_count
1107 flowing out of it (i.e. along the duplicated path out of the
1108 duplicated joiner). */
1109 update_profile (epath, edup: e2, path_in_count, path_out_count);
1110 }
1111 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1112 {
1113 remove_ctrl_stmt_and_useless_edges (bb: rd->dup_blocks[count], NULL);
1114 create_edge_and_update_destination_phis (rd, bb: rd->dup_blocks[count],
1115 idx: multi_incomings ? 0 : i);
1116 if (count == 1)
1117 single_succ_edge (bb: rd->dup_blocks[1])->aux = NULL;
1118
1119 /* Update the counts of both the original block
1120 and path edge, and the duplicates. Since we are now after
1121 any joiner that may have existed on the path, the count
1122 flowing along the duplicated threaded path is path_out_count.
1123 If we didn't have a joiner, then cur_path_freq was the sum
1124 of the total frequencies along all incoming edges to the
1125 thread path (path_in_freq). If we had a joiner, it would have
1126 been updated at the end of that handling to the edge frequency
1127 along the duplicated joiner path edge. */
1128 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1129 path_in_count: path_out_count, path_out_count);
1130 }
1131 else
1132 {
1133 /* No copy case. In this case we don't have an equivalent block
1134 on the duplicated thread path to update, but we do need
1135 to remove the portion of the counts/freqs that were moved
1136 to the duplicated path from the counts/freqs flowing through
1137 this block on the original path. Since all the no-copy edges
1138 are after any joiner, the removed count is the same as
1139 path_out_count.
1140
1141 If we didn't have a joiner, then cur_path_freq was the sum
1142 of the total frequencies along all incoming edges to the
1143 thread path (path_in_freq). If we had a joiner, it would have
1144 been updated at the end of that handling to the edge frequency
1145 along the duplicated joiner path edge. */
1146 update_profile (epath, NULL, path_in_count: path_out_count, path_out_count);
1147 }
1148
1149 /* Increment the index into the duplicated path when we processed
1150 a duplicated block. */
1151 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1152 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1153 {
1154 count++;
1155 }
1156 }
1157}
1158
1159/* Hash table traversal callback routine to create duplicate blocks. */
1160
1161int
1162ssa_create_duplicates (struct redirection_data **slot,
1163 ssa_local_info_t *local_info)
1164{
1165 struct redirection_data *rd = *slot;
1166
1167 /* The second duplicated block in a jump threading path is specific
1168 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1169
1170 Each time we're called, we have to look through the path and see
1171 if a second block needs to be duplicated.
1172
1173 Note the search starts with the third edge on the path. The first
1174 edge is the incoming edge, the second edge always has its source
1175 duplicated. Thus we start our search with the third edge. */
1176 vec<jump_thread_edge *> *path = rd->path;
1177 for (unsigned int i = 2; i < path->length (); i++)
1178 {
1179 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1180 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1181 {
1182 create_block_for_threading (bb: (*path)[i]->e->src, rd, count: 1,
1183 duplicate_blocks: &local_info->duplicate_blocks);
1184 break;
1185 }
1186 }
1187
1188 /* Create a template block if we have not done so already. Otherwise
1189 use the template to create a new block. */
1190 if (local_info->template_block == NULL)
1191 {
1192 create_block_for_threading (bb: (*path)[1]->e->src, rd, count: 0,
1193 duplicate_blocks: &local_info->duplicate_blocks);
1194 local_info->template_block = rd->dup_blocks[0];
1195 local_info->template_last_to_copy
1196 = gsi_last_bb (bb: local_info->template_block);
1197
1198 /* We do not create any outgoing edges for the template. We will
1199 take care of that in a later traversal. That way we do not
1200 create edges that are going to just be deleted. */
1201 }
1202 else
1203 {
1204 gimple_seq seq = NULL;
1205 if (gsi_stmt (i: local_info->template_last_to_copy)
1206 != gsi_stmt (i: gsi_last_bb (bb: local_info->template_block)))
1207 {
1208 if (gsi_end_p (i: local_info->template_last_to_copy))
1209 {
1210 seq = bb_seq (bb: local_info->template_block);
1211 set_bb_seq (bb: local_info->template_block, NULL);
1212 }
1213 else
1214 seq = gsi_split_seq_after (local_info->template_last_to_copy);
1215 }
1216 create_block_for_threading (bb: local_info->template_block, rd, count: 0,
1217 duplicate_blocks: &local_info->duplicate_blocks);
1218 if (seq)
1219 {
1220 if (gsi_end_p (i: local_info->template_last_to_copy))
1221 set_bb_seq (bb: local_info->template_block, seq);
1222 else
1223 gsi_insert_seq_after (&local_info->template_last_to_copy,
1224 seq, GSI_SAME_STMT);
1225 }
1226
1227 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1228 block. */
1229 ssa_fix_duplicate_block_edges (rd, local_info);
1230 }
1231
1232 if (MAY_HAVE_DEBUG_STMTS)
1233 {
1234 /* Copy debug stmts from each NO_COPY src block to the block
1235 that would have been its predecessor, if we can append to it
1236 (we can't add stmts after a block-ending stmt), or prepending
1237 to the duplicate of the successor, if there is one. If
1238 there's no duplicate successor, we'll mostly drop the blocks
1239 on the floor; propagate_threaded_block_debug_into, called
1240 elsewhere, will consolidate and preserve the effects of the
1241 binds, but none of the markers. */
1242 gimple_stmt_iterator copy_to = gsi_last_bb (bb: rd->dup_blocks[0]);
1243 if (!gsi_end_p (i: copy_to))
1244 {
1245 if (stmt_ends_bb_p (gsi_stmt (i: copy_to)))
1246 {
1247 if (rd->dup_blocks[1])
1248 copy_to = gsi_after_labels (bb: rd->dup_blocks[1]);
1249 else
1250 copy_to = gsi_none ();
1251 }
1252 else
1253 gsi_next (i: &copy_to);
1254 }
1255 for (unsigned int i = 2, j = 0; i < path->length (); i++)
1256 if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK
1257 && gsi_bb (i: copy_to))
1258 {
1259 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: (*path)[i]->e->src);
1260 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1261 {
1262 if (!is_gimple_debug (gs: gsi_stmt (i: gsi)))
1263 continue;
1264 gimple *stmt = gsi_stmt (i: gsi);
1265 gimple *copy = gimple_copy (stmt);
1266 gsi_insert_before (&copy_to, copy, GSI_SAME_STMT);
1267 }
1268 }
1269 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1270 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1271 {
1272 j++;
1273 gcc_assert (j < 2);
1274 copy_to = gsi_last_bb (bb: rd->dup_blocks[j]);
1275 if (!gsi_end_p (i: copy_to))
1276 {
1277 if (stmt_ends_bb_p (gsi_stmt (i: copy_to)))
1278 copy_to = gsi_none ();
1279 else
1280 gsi_next (i: &copy_to);
1281 }
1282 }
1283 }
1284
1285 /* Keep walking the hash table. */
1286 return 1;
1287}
1288
1289/* We did not create any outgoing edges for the template block during
1290 block creation. This hash table traversal callback creates the
1291 outgoing edge for the template block. */
1292
1293inline int
1294ssa_fixup_template_block (struct redirection_data **slot,
1295 ssa_local_info_t *local_info)
1296{
1297 struct redirection_data *rd = *slot;
1298
1299 /* If this is the template block halt the traversal after updating
1300 it appropriately.
1301
1302 If we were threading through an joiner block, then we want
1303 to keep its control statement and redirect an outgoing edge.
1304 Else we want to remove the control statement & edges, then create
1305 a new outgoing edge. In both cases we may need to update PHIs. */
1306 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1307 {
1308 ssa_fix_duplicate_block_edges (rd, local_info);
1309 return 0;
1310 }
1311
1312 return 1;
1313}
1314
1315/* Hash table traversal callback to redirect each incoming edge
1316 associated with this hash table element to its new destination. */
1317
1318static int
1319ssa_redirect_edges (struct redirection_data **slot,
1320 ssa_local_info_t *local_info)
1321{
1322 struct redirection_data *rd = *slot;
1323 struct el *next, *el;
1324
1325 /* Walk over all the incoming edges associated with this hash table
1326 entry. */
1327 for (el = rd->incoming_edges; el; el = next)
1328 {
1329 edge e = el->e;
1330 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1331
1332 /* Go ahead and free this element from the list. Doing this now
1333 avoids the need for another list walk when we destroy the hash
1334 table. */
1335 next = el->next;
1336 free (ptr: el);
1337
1338 local_info->num_threaded_edges++;
1339
1340 if (rd->dup_blocks[0])
1341 {
1342 edge e2;
1343
1344 if (dump_file && (dump_flags & TDF_DETAILS))
1345 fprintf (stream: dump_file, format: " Threaded jump %d --> %d to %d\n",
1346 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1347
1348 /* Redirect the incoming edge (possibly to the joiner block) to the
1349 appropriate duplicate block. */
1350 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1351 gcc_assert (e == e2);
1352 flush_pending_stmts (e2);
1353 }
1354
1355 /* Go ahead and clear E->aux. It's not needed anymore and failure
1356 to clear it will cause all kinds of unpleasant problems later. */
1357 path->release ();
1358 e->aux = NULL;
1359
1360 }
1361
1362 /* Indicate that we actually threaded one or more jumps. */
1363 if (rd->incoming_edges)
1364 local_info->jumps_threaded = true;
1365
1366 return 1;
1367}
1368
1369/* Return true if this block has no executable statements other than
1370 a simple ctrl flow instruction. When the number of outgoing edges
1371 is one, this is equivalent to a "forwarder" block. */
1372
1373static bool
1374redirection_block_p (basic_block bb)
1375{
1376 gimple_stmt_iterator gsi;
1377
1378 /* Advance to the first executable statement. */
1379 gsi = gsi_start_bb (bb);
1380 while (!gsi_end_p (i: gsi)
1381 && (gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_LABEL
1382 || is_gimple_debug (gs: gsi_stmt (i: gsi))
1383 || gimple_nop_p (g: gsi_stmt (i: gsi))
1384 || gimple_clobber_p (s: gsi_stmt (i: gsi))))
1385 gsi_next (i: &gsi);
1386
1387 /* Check if this is an empty block. */
1388 if (gsi_end_p (i: gsi))
1389 return true;
1390
1391 /* Test that we've reached the terminating control statement. */
1392 return gsi_stmt (i: gsi)
1393 && (gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_COND
1394 || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_GOTO
1395 || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_SWITCH);
1396}
1397
1398/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1399 is reached via one or more specific incoming edges, we know which
1400 outgoing edge from BB will be traversed.
1401
1402 We want to redirect those incoming edges to the target of the
1403 appropriate outgoing edge. Doing so avoids a conditional branch
1404 and may expose new optimization opportunities. Note that we have
1405 to update dominator tree and SSA graph after such changes.
1406
1407 The key to keeping the SSA graph update manageable is to duplicate
1408 the side effects occurring in BB so that those side effects still
1409 occur on the paths which bypass BB after redirecting edges.
1410
1411 We accomplish this by creating duplicates of BB and arranging for
1412 the duplicates to unconditionally pass control to one specific
1413 successor of BB. We then revector the incoming edges into BB to
1414 the appropriate duplicate of BB.
1415
1416 If NOLOOP_ONLY is true, we only perform the threading as long as it
1417 does not affect the structure of the loops in a nontrivial way.
1418
1419 If JOINERS is true, then thread through joiner blocks as well. */
1420
1421bool
1422fwd_jt_path_registry::thread_block_1 (basic_block bb,
1423 bool noloop_only,
1424 bool joiners)
1425{
1426 /* E is an incoming edge into BB that we may or may not want to
1427 redirect to a duplicate of BB. */
1428 edge e, e2;
1429 edge_iterator ei;
1430 ssa_local_info_t local_info;
1431
1432 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1433 local_info.need_profile_correction = false;
1434 local_info.num_threaded_edges = 0;
1435
1436 /* To avoid scanning a linear array for the element we need we instead
1437 use a hash table. For normal code there should be no noticeable
1438 difference. However, if we have a block with a large number of
1439 incoming and outgoing edges such linear searches can get expensive. */
1440 m_redirection_data
1441 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1442
1443 /* Record each unique threaded destination into a hash table for
1444 efficient lookups. */
1445 edge last = NULL;
1446 FOR_EACH_EDGE (e, ei, bb->preds)
1447 {
1448 if (e->aux == NULL)
1449 continue;
1450
1451 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1452
1453 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1454 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1455 continue;
1456
1457 /* When a NO_COPY_SRC block became non-empty cancel the path. */
1458 if (path->last ()->type == EDGE_NO_COPY_SRC_BLOCK)
1459 {
1460 auto gsi = gsi_start_nondebug_bb (bb: path->last ()->e->src);
1461 if (!gsi_end_p (i: gsi)
1462 && !is_ctrl_stmt (gsi_stmt (i: gsi)))
1463 {
1464 cancel_thread (path, reason: "Non-empty EDGE_NO_COPY_SRC_BLOCK");
1465 e->aux = NULL;
1466 continue;
1467 }
1468 }
1469
1470 e2 = path->last ()->e;
1471 if (!e2 || noloop_only)
1472 {
1473 /* If NOLOOP_ONLY is true, we only allow threading through the
1474 header of a loop to exit edges. */
1475
1476 /* One case occurs when there was loop header buried in a jump
1477 threading path that crosses loop boundaries. We do not try
1478 and thread this elsewhere, so just cancel the jump threading
1479 request by clearing the AUX field now. */
1480 if (bb->loop_father != e2->src->loop_father
1481 && (!loop_exit_edge_p (e2->src->loop_father, e2)
1482 || flow_loop_nested_p (bb->loop_father,
1483 e2->dest->loop_father)))
1484 {
1485 /* Since this case is not handled by our special code
1486 to thread through a loop header, we must explicitly
1487 cancel the threading request here. */
1488 cancel_thread (path, reason: "Threading through unhandled loop header");
1489 e->aux = NULL;
1490 continue;
1491 }
1492
1493 /* Another case occurs when trying to thread through our
1494 own loop header, possibly from inside the loop. We will
1495 thread these later. */
1496 unsigned int i;
1497 for (i = 1; i < path->length (); i++)
1498 {
1499 if ((*path)[i]->e->src == bb->loop_father->header
1500 && (!loop_exit_edge_p (bb->loop_father, e2)
1501 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1502 break;
1503 }
1504
1505 if (i != path->length ())
1506 continue;
1507
1508 /* Loop parallelization can be confused by the result of
1509 threading through the loop exit test back into the loop.
1510 However, theading those jumps seems to help other codes.
1511
1512 I have been unable to find anything related to the shape of
1513 the CFG, the contents of the affected blocks, etc which would
1514 allow a more sensible test than what we're using below which
1515 merely avoids the optimization when parallelizing loops. */
1516 if (flag_tree_parallelize_loops > 1)
1517 {
1518 for (i = 1; i < path->length (); i++)
1519 if (bb->loop_father == e2->src->loop_father
1520 && loop_exits_from_bb_p (bb->loop_father,
1521 (*path)[i]->e->src)
1522 && !loop_exit_edge_p (bb->loop_father, e2))
1523 break;
1524
1525 if (i != path->length ())
1526 {
1527 cancel_thread (path, reason: "Threading through loop exit");
1528 e->aux = NULL;
1529 continue;
1530 }
1531 }
1532 }
1533
1534 /* Insert the outgoing edge into the hash table if it is not
1535 already in the hash table. */
1536 lookup_redirection_data (e, insert: INSERT);
1537
1538 /* When we have thread paths through a common joiner with different
1539 final destinations, then we may need corrections to deal with
1540 profile insanities. See the big comment before compute_path_counts. */
1541 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1542 {
1543 if (!last)
1544 last = e2;
1545 else if (e2 != last)
1546 local_info.need_profile_correction = true;
1547 }
1548 }
1549
1550 /* We do not update dominance info. */
1551 free_dominance_info (CDI_DOMINATORS);
1552
1553 /* We know we only thread through the loop header to loop exits.
1554 Let the basic block duplication hook know we are not creating
1555 a multiple entry loop. */
1556 if (noloop_only
1557 && bb == bb->loop_father->header)
1558 set_loop_copy (bb->loop_father, loop_outer (loop: bb->loop_father));
1559
1560 /* Now create duplicates of BB.
1561
1562 Note that for a block with a high outgoing degree we can waste
1563 a lot of time and memory creating and destroying useless edges.
1564
1565 So we first duplicate BB and remove the control structure at the
1566 tail of the duplicate as well as all outgoing edges from the
1567 duplicate. We then use that duplicate block as a template for
1568 the rest of the duplicates. */
1569 local_info.template_block = NULL;
1570 local_info.bb = bb;
1571 local_info.jumps_threaded = false;
1572 m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1573 (argument: &local_info);
1574
1575 /* The template does not have an outgoing edge. Create that outgoing
1576 edge and update PHI nodes as the edge's target as necessary.
1577
1578 We do this after creating all the duplicates to avoid creating
1579 unnecessary edges. */
1580 m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1581 (argument: &local_info);
1582
1583 /* The hash table traversals above created the duplicate blocks (and the
1584 statements within the duplicate blocks). This loop creates PHI nodes for
1585 the duplicated blocks and redirects the incoming edges into BB to reach
1586 the duplicates of BB. */
1587 m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1588 (argument: &local_info);
1589
1590 /* Done with this block. Clear REDIRECTION_DATA. */
1591 delete m_redirection_data;
1592 m_redirection_data = NULL;
1593
1594 if (noloop_only
1595 && bb == bb->loop_father->header)
1596 set_loop_copy (bb->loop_father, NULL);
1597
1598 BITMAP_FREE (local_info.duplicate_blocks);
1599 local_info.duplicate_blocks = NULL;
1600
1601 m_num_threaded_edges += local_info.num_threaded_edges;
1602
1603 /* Indicate to our caller whether or not any jumps were threaded. */
1604 return local_info.jumps_threaded;
1605}
1606
1607/* Wrapper for thread_block_1 so that we can first handle jump
1608 thread paths which do not involve copying joiner blocks, then
1609 handle jump thread paths which have joiner blocks.
1610
1611 By doing things this way we can be as aggressive as possible and
1612 not worry that copying a joiner block will create a jump threading
1613 opportunity. */
1614
1615bool
1616fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only)
1617{
1618 bool retval;
1619 retval = thread_block_1 (bb, noloop_only, joiners: false);
1620 retval |= thread_block_1 (bb, noloop_only, joiners: true);
1621 return retval;
1622}
1623
1624/* Callback for dfs_enumerate_from. Returns true if BB is different
1625 from STOP and DBDS_CE_STOP. */
1626
1627static basic_block dbds_ce_stop;
1628static bool
1629dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1630{
1631 return (bb != (const_basic_block) stop
1632 && bb != dbds_ce_stop);
1633}
1634
1635/* Evaluates the dominance relationship of latch of the LOOP and BB, and
1636 returns the state. */
1637
1638enum bb_dom_status
1639determine_bb_domination_status (class loop *loop, basic_block bb)
1640{
1641 basic_block *bblocks;
1642 unsigned nblocks, i;
1643 bool bb_reachable = false;
1644 edge_iterator ei;
1645 edge e;
1646
1647 /* This function assumes BB is a successor of LOOP->header.
1648 If that is not the case return DOMST_NONDOMINATING which
1649 is always safe. */
1650 {
1651 bool ok = false;
1652
1653 FOR_EACH_EDGE (e, ei, bb->preds)
1654 {
1655 if (e->src == loop->header)
1656 {
1657 ok = true;
1658 break;
1659 }
1660 }
1661
1662 if (!ok)
1663 return DOMST_NONDOMINATING;
1664 }
1665
1666 if (bb == loop->latch)
1667 return DOMST_DOMINATING;
1668
1669 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1670 from it. */
1671
1672 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1673 dbds_ce_stop = loop->header;
1674 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1675 bblocks, loop->num_nodes, bb);
1676 for (i = 0; i < nblocks; i++)
1677 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1678 {
1679 if (e->src == loop->header)
1680 {
1681 free (ptr: bblocks);
1682 return DOMST_NONDOMINATING;
1683 }
1684 if (e->src == bb)
1685 bb_reachable = true;
1686 }
1687
1688 free (ptr: bblocks);
1689 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1690}
1691
1692/* Thread jumps through the header of LOOP. Returns true if cfg changes.
1693 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1694 to the inside of the loop. */
1695
1696bool
1697fwd_jt_path_registry::thread_through_loop_header (class loop *loop,
1698 bool may_peel_loop_headers)
1699{
1700 basic_block header = loop->header;
1701 edge e, tgt_edge, latch = loop_latch_edge (loop);
1702 edge_iterator ei;
1703 basic_block tgt_bb, atgt_bb;
1704 enum bb_dom_status domst;
1705
1706 /* We have already threaded through headers to exits, so all the threading
1707 requests now are to the inside of the loop. We need to avoid creating
1708 irreducible regions (i.e., loops with more than one entry block), and
1709 also loop with several latch edges, or new subloops of the loop (although
1710 there are cases where it might be appropriate, it is difficult to decide,
1711 and doing it wrongly may confuse other optimizers).
1712
1713 We could handle more general cases here. However, the intention is to
1714 preserve some information about the loop, which is impossible if its
1715 structure changes significantly, in a way that is not well understood.
1716 Thus we only handle few important special cases, in which also updating
1717 of the loop-carried information should be feasible:
1718
1719 1) Propagation of latch edge to a block that dominates the latch block
1720 of a loop. This aims to handle the following idiom:
1721
1722 first = 1;
1723 while (1)
1724 {
1725 if (first)
1726 initialize;
1727 first = 0;
1728 body;
1729 }
1730
1731 After threading the latch edge, this becomes
1732
1733 first = 1;
1734 if (first)
1735 initialize;
1736 while (1)
1737 {
1738 first = 0;
1739 body;
1740 }
1741
1742 The original header of the loop is moved out of it, and we may thread
1743 the remaining edges through it without further constraints.
1744
1745 2) All entry edges are propagated to a single basic block that dominates
1746 the latch block of the loop. This aims to handle the following idiom
1747 (normally created for "for" loops):
1748
1749 i = 0;
1750 while (1)
1751 {
1752 if (i >= 100)
1753 break;
1754 body;
1755 i++;
1756 }
1757
1758 This becomes
1759
1760 i = 0;
1761 while (1)
1762 {
1763 body;
1764 i++;
1765 if (i >= 100)
1766 break;
1767 }
1768 */
1769
1770 /* Threading through the header won't improve the code if the header has just
1771 one successor. */
1772 if (single_succ_p (bb: header))
1773 goto fail;
1774
1775 if (!may_peel_loop_headers && !redirection_block_p (bb: loop->header))
1776 goto fail;
1777 else
1778 {
1779 tgt_bb = NULL;
1780 tgt_edge = NULL;
1781 FOR_EACH_EDGE (e, ei, header->preds)
1782 {
1783 if (!e->aux)
1784 {
1785 if (e == latch)
1786 continue;
1787
1788 /* If latch is not threaded, and there is a header
1789 edge that is not threaded, we would create loop
1790 with multiple entries. */
1791 goto fail;
1792 }
1793
1794 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1795
1796 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1797 goto fail;
1798 tgt_edge = (*path)[1]->e;
1799 atgt_bb = tgt_edge->dest;
1800 if (!tgt_bb)
1801 tgt_bb = atgt_bb;
1802 /* Two targets of threading would make us create loop
1803 with multiple entries. */
1804 else if (tgt_bb != atgt_bb)
1805 goto fail;
1806 }
1807
1808 if (!tgt_bb)
1809 {
1810 /* There are no threading requests. */
1811 return false;
1812 }
1813
1814 /* Redirecting to empty loop latch is useless. */
1815 if (tgt_bb == loop->latch
1816 && empty_block_p (loop->latch))
1817 goto fail;
1818 }
1819
1820 /* The target block must dominate the loop latch, otherwise we would be
1821 creating a subloop. */
1822 domst = determine_bb_domination_status (loop, bb: tgt_bb);
1823 if (domst == DOMST_NONDOMINATING)
1824 goto fail;
1825 if (domst == DOMST_LOOP_BROKEN)
1826 {
1827 /* If the loop ceased to exist, mark it as such, and thread through its
1828 original header. */
1829 mark_loop_for_removal (loop);
1830 return thread_block (bb: header, noloop_only: false);
1831 }
1832
1833 if (tgt_bb->loop_father->header == tgt_bb)
1834 {
1835 /* If the target of the threading is a header of a subloop, we need
1836 to create a preheader for it, so that the headers of the two loops
1837 do not merge. */
1838 if (EDGE_COUNT (tgt_bb->preds) > 2)
1839 {
1840 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1841 gcc_assert (tgt_bb != NULL);
1842 }
1843 else
1844 tgt_bb = split_edge (tgt_edge);
1845 }
1846
1847 basic_block new_preheader;
1848
1849 /* Now consider the case entry edges are redirected to the new entry
1850 block. Remember one entry edge, so that we can find the new
1851 preheader (its destination after threading). */
1852 FOR_EACH_EDGE (e, ei, header->preds)
1853 {
1854 if (e->aux)
1855 break;
1856 }
1857
1858 /* The duplicate of the header is the new preheader of the loop. Ensure
1859 that it is placed correctly in the loop hierarchy. */
1860 set_loop_copy (loop, loop_outer (loop));
1861
1862 thread_block (bb: header, noloop_only: false);
1863 set_loop_copy (loop, NULL);
1864 new_preheader = e->dest;
1865
1866 /* Create the new latch block. This is always necessary, as the latch
1867 must have only a single successor, but the original header had at
1868 least two successors. */
1869 loop->latch = NULL;
1870 mfb_kj_edge = single_succ_edge (bb: new_preheader);
1871 loop->header = mfb_kj_edge->dest;
1872 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1873 loop->header = latch->dest;
1874 loop->latch = latch->src;
1875 return true;
1876
1877fail:
1878 /* We failed to thread anything. Cancel the requests. */
1879 FOR_EACH_EDGE (e, ei, header->preds)
1880 {
1881 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1882
1883 if (path)
1884 {
1885 cancel_thread (path, reason: "Failure in thread_through_loop_header");
1886 e->aux = NULL;
1887 }
1888 }
1889 return false;
1890}
1891
1892/* E1 and E2 are edges into the same basic block. Return TRUE if the
1893 PHI arguments associated with those edges are equal or there are no
1894 PHI arguments, otherwise return FALSE. */
1895
1896static bool
1897phi_args_equal_on_edges (edge e1, edge e2)
1898{
1899 gphi_iterator gsi;
1900 int indx1 = e1->dest_idx;
1901 int indx2 = e2->dest_idx;
1902
1903 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1904 {
1905 gphi *phi = gsi.phi ();
1906
1907 if (!operand_equal_p (gimple_phi_arg_def (gs: phi, index: indx1),
1908 gimple_phi_arg_def (gs: phi, index: indx2), flags: 0))
1909 return false;
1910 }
1911 return true;
1912}
1913
1914/* Return the number of non-debug statements and non-virtual PHIs in a
1915 block. */
1916
1917static unsigned int
1918count_stmts_and_phis_in_block (basic_block bb)
1919{
1920 unsigned int num_stmts = 0;
1921
1922 gphi_iterator gpi;
1923 for (gpi = gsi_start_phis (bb); !gsi_end_p (i: gpi); gsi_next (i: &gpi))
1924 if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
1925 num_stmts++;
1926
1927 gimple_stmt_iterator gsi;
1928 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1929 {
1930 gimple *stmt = gsi_stmt (i: gsi);
1931 if (!is_gimple_debug (gs: stmt))
1932 num_stmts++;
1933 }
1934
1935 return num_stmts;
1936}
1937
1938
1939/* Walk through the registered jump threads and convert them into a
1940 form convenient for this pass.
1941
1942 Any block which has incoming edges threaded to outgoing edges
1943 will have its entry in THREADED_BLOCK set.
1944
1945 Any threaded edge will have its new outgoing edge stored in the
1946 original edge's AUX field.
1947
1948 This form avoids the need to walk all the edges in the CFG to
1949 discover blocks which need processing and avoids unnecessary
1950 hash table lookups to map from threaded edge to new target. */
1951
1952void
1953fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks)
1954{
1955 unsigned int i;
1956 bitmap_iterator bi;
1957 auto_bitmap tmp;
1958 basic_block bb;
1959 edge e;
1960 edge_iterator ei;
1961
1962 /* It is possible to have jump threads in which one is a subpath
1963 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1964 block and (B, C), (C, D) where no joiner block exists.
1965
1966 When this occurs ignore the jump thread request with the joiner
1967 block. It's totally subsumed by the simpler jump thread request.
1968
1969 This results in less block copying, simpler CFGs. More importantly,
1970 when we duplicate the joiner block, B, in this case we will create
1971 a new threading opportunity that we wouldn't be able to optimize
1972 until the next jump threading iteration.
1973
1974 So first convert the jump thread requests which do not require a
1975 joiner block. */
1976 for (i = 0; i < m_paths.length (); i++)
1977 {
1978 vec<jump_thread_edge *> *path = m_paths[i];
1979
1980 if (path->length () > 1
1981 && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1982 {
1983 edge e = (*path)[0]->e;
1984 e->aux = (void *)path;
1985 bitmap_set_bit (tmp, e->dest->index);
1986 }
1987 }
1988
1989 /* Now iterate again, converting cases where we want to thread
1990 through a joiner block, but only if no other edge on the path
1991 already has a jump thread attached to it. We do this in two passes,
1992 to avoid situations where the order in the paths vec can hide overlapping
1993 threads (the path is recorded on the incoming edge, so we would miss
1994 cases where the second path starts at a downstream edge on the same
1995 path). First record all joiner paths, deleting any in the unexpected
1996 case where there is already a path for that incoming edge. */
1997 for (i = 0; i < m_paths.length ();)
1998 {
1999 vec<jump_thread_edge *> *path = m_paths[i];
2000
2001 if (path->length () > 1
2002 && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2003 {
2004 /* Attach the path to the starting edge if none is yet recorded. */
2005 if ((*path)[0]->e->aux == NULL)
2006 {
2007 (*path)[0]->e->aux = path;
2008 i++;
2009 }
2010 else
2011 {
2012 m_paths.unordered_remove (ix: i);
2013 cancel_thread (path);
2014 }
2015 }
2016 else
2017 {
2018 i++;
2019 }
2020 }
2021
2022 /* Second, look for paths that have any other jump thread attached to
2023 them, and either finish converting them or cancel them. */
2024 for (i = 0; i < m_paths.length ();)
2025 {
2026 vec<jump_thread_edge *> *path = m_paths[i];
2027 edge e = (*path)[0]->e;
2028
2029 if (path->length () > 1
2030 && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2031 {
2032 unsigned int j;
2033 for (j = 1; j < path->length (); j++)
2034 if ((*path)[j]->e->aux != NULL)
2035 break;
2036
2037 /* If we iterated through the entire path without exiting the loop,
2038 then we are good to go, record it. */
2039 if (j == path->length ())
2040 {
2041 bitmap_set_bit (tmp, e->dest->index);
2042 i++;
2043 }
2044 else
2045 {
2046 e->aux = NULL;
2047 m_paths.unordered_remove (ix: i);
2048 cancel_thread (path);
2049 }
2050 }
2051 else
2052 {
2053 i++;
2054 }
2055 }
2056
2057 /* When optimizing for size, prune all thread paths where statement
2058 duplication is necessary.
2059
2060 We walk the jump thread path looking for copied blocks. There's
2061 two types of copied blocks.
2062
2063 EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
2064 cancel the jump threading request when optimizing for size.
2065
2066 EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
2067 will be killed by threading. If threading does not kill all of
2068 its statements, then we should cancel the jump threading request
2069 when optimizing for size. */
2070 if (optimize_function_for_size_p (cfun))
2071 {
2072 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2073 {
2074 FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
2075 if (e->aux)
2076 {
2077 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2078
2079 unsigned int j;
2080 for (j = 1; j < path->length (); j++)
2081 {
2082 bb = (*path)[j]->e->src;
2083 if (redirection_block_p (bb))
2084 ;
2085 else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
2086 || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2087 && (count_stmts_and_phis_in_block (bb)
2088 != estimate_threading_killed_stmts (bb))))
2089 break;
2090 }
2091
2092 if (j != path->length ())
2093 {
2094 cancel_thread (path);
2095 e->aux = NULL;
2096 }
2097 else
2098 bitmap_set_bit (threaded_blocks, i);
2099 }
2100 }
2101 }
2102 else
2103 bitmap_copy (threaded_blocks, tmp);
2104
2105 /* If we have a joiner block (J) which has two successors S1 and S2 and
2106 we are threading though S1 and the final destination of the thread
2107 is S2, then we must verify that any PHI nodes in S2 have the same
2108 PHI arguments for the edge J->S2 and J->S1->...->S2.
2109
2110 We used to detect this prior to registering the jump thread, but
2111 that prohibits propagation of edge equivalences into non-dominated
2112 PHI nodes as the equivalency test might occur before propagation.
2113
2114 This must also occur after we truncate any jump threading paths
2115 as this scenario may only show up after truncation.
2116
2117 This works for now, but will need improvement as part of the FSA
2118 optimization.
2119
2120 Note since we've moved the thread request data to the edges,
2121 we have to iterate on those rather than the threaded_edges vector. */
2122 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2123 {
2124 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2125 FOR_EACH_EDGE (e, ei, bb->preds)
2126 {
2127 if (e->aux)
2128 {
2129 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2130 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2131
2132 if (have_joiner)
2133 {
2134 basic_block joiner = e->dest;
2135 edge final_edge = path->last ()->e;
2136 basic_block final_dest = final_edge->dest;
2137 edge e2 = find_edge (joiner, final_dest);
2138
2139 if (e2 && !phi_args_equal_on_edges (e1: e2, e2: final_edge))
2140 {
2141 cancel_thread (path);
2142 e->aux = NULL;
2143 }
2144 }
2145 }
2146 }
2147 }
2148
2149 /* Look for jump threading paths which cross multiple loop headers.
2150
2151 The code to thread through loop headers will change the CFG in ways
2152 that invalidate the cached loop iteration information. So we must
2153 detect that case and wipe the cached information. */
2154 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2155 {
2156 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2157 FOR_EACH_EDGE (e, ei, bb->preds)
2158 {
2159 if (e->aux)
2160 {
2161 gcc_assert (loops_state_satisfies_p
2162 (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS));
2163 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2164
2165 for (unsigned int i = 0, crossed_headers = 0;
2166 i < path->length ();
2167 i++)
2168 {
2169 basic_block dest = (*path)[i]->e->dest;
2170 basic_block src = (*path)[i]->e->src;
2171 /* If we enter a loop. */
2172 if (flow_loop_nested_p (src->loop_father, dest->loop_father))
2173 ++crossed_headers;
2174 /* If we step from a block outside an irreducible region
2175 to a block inside an irreducible region, then we have
2176 crossed into a loop. */
2177 else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
2178 && (dest->flags & BB_IRREDUCIBLE_LOOP))
2179 ++crossed_headers;
2180 if (crossed_headers > 1)
2181 {
2182 vect_free_loop_info_assumptions
2183 ((*path)[path->length () - 1]->e->dest->loop_father);
2184 break;
2185 }
2186 }
2187 }
2188 }
2189 }
2190}
2191
2192
2193/* Verify that the REGION is a valid jump thread. A jump thread is a special
2194 case of SEME Single Entry Multiple Exits region in which all nodes in the
2195 REGION have exactly one incoming edge. The only exception is the first block
2196 that may not have been connected to the rest of the cfg yet. */
2197
2198DEBUG_FUNCTION void
2199verify_jump_thread (basic_block *region, unsigned n_region)
2200{
2201 for (unsigned i = 0; i < n_region; i++)
2202 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2203}
2204
2205/* Return true when BB is one of the first N items in BBS. */
2206
2207static inline bool
2208bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2209{
2210 for (int i = 0; i < n; i++)
2211 if (bb == bbs[i])
2212 return true;
2213
2214 return false;
2215}
2216
2217void
2218jt_path_registry::debug_path (FILE *dump_file, int pathno)
2219{
2220 vec<jump_thread_edge *> *p = m_paths[pathno];
2221 fprintf (stream: dump_file, format: "path: ");
2222 for (unsigned i = 0; i < p->length (); ++i)
2223 fprintf (stream: dump_file, format: "%d -> %d, ",
2224 (*p)[i]->e->src->index, (*p)[i]->e->dest->index);
2225 fprintf (stream: dump_file, format: "\n");
2226}
2227
2228void
2229jt_path_registry::debug ()
2230{
2231 for (unsigned i = 0; i < m_paths.length (); ++i)
2232 debug_path (stderr, pathno: i);
2233}
2234
2235/* Rewire a jump_thread_edge so that the source block is now a
2236 threaded source block.
2237
2238 PATH_NUM is an index into the global path table PATHS.
2239 EDGE_NUM is the jump thread edge number into said path.
2240
2241 Returns TRUE if we were able to successfully rewire the edge. */
2242
2243bool
2244back_jt_path_registry::rewire_first_differing_edge (unsigned path_num,
2245 unsigned edge_num)
2246{
2247 vec<jump_thread_edge *> *path = m_paths[path_num];
2248 edge &e = (*path)[edge_num]->e;
2249 if (dump_file && (dump_flags & TDF_DETAILS))
2250 fprintf (stream: dump_file, format: "rewiring edge candidate: %d -> %d\n",
2251 e->src->index, e->dest->index);
2252 basic_block src_copy = get_bb_copy (e->src);
2253 if (src_copy == NULL)
2254 {
2255 if (dump_file && (dump_flags & TDF_DETAILS))
2256 fprintf (stream: dump_file, format: "ignoring candidate: there is no src COPY\n");
2257 return false;
2258 }
2259 edge new_edge = find_edge (src_copy, e->dest);
2260 /* If the previously threaded paths created a flow graph where we
2261 can no longer figure out where to go, give up. */
2262 if (new_edge == NULL)
2263 {
2264 if (dump_file && (dump_flags & TDF_DETAILS))
2265 fprintf (stream: dump_file, format: "ignoring candidate: we lost our way\n");
2266 return false;
2267 }
2268 e = new_edge;
2269 return true;
2270}
2271
2272/* After a path has been jump threaded, adjust the remaining paths
2273 that are subsets of this path, so these paths can be safely
2274 threaded within the context of the new threaded path.
2275
2276 For example, suppose we have just threaded:
2277
2278 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12'
2279
2280 And we have an upcoming threading candidate:
2281 5 -> 6 -> 7 -> 8 -> 15 -> 20
2282
2283 This function adjusts the upcoming path into:
2284 8' -> 15 -> 20
2285
2286 CURR_PATH_NUM is an index into the global paths table. It
2287 specifies the path that was just threaded. */
2288
2289void
2290back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num)
2291{
2292 vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num];
2293
2294 /* Iterate through all the other paths and adjust them. */
2295 for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
2296 {
2297 if (cand_path_num == curr_path_num)
2298 {
2299 ++cand_path_num;
2300 continue;
2301 }
2302 /* Make sure the candidate to adjust starts with the same path
2303 as the recently threaded path. */
2304 vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num];
2305 if ((*cand_path)[0]->e != (*curr_path)[0]->e)
2306 {
2307 ++cand_path_num;
2308 continue;
2309 }
2310 if (dump_file && (dump_flags & TDF_DETAILS))
2311 {
2312 fprintf (stream: dump_file, format: "adjusting candidate: ");
2313 debug_path (dump_file, pathno: cand_path_num);
2314 }
2315
2316 /* Chop off from the candidate path any prefix it shares with
2317 the recently threaded path. */
2318 unsigned minlength = MIN (curr_path->length (), cand_path->length ());
2319 unsigned j;
2320 for (j = 0; j < minlength; ++j)
2321 {
2322 edge cand_edge = (*cand_path)[j]->e;
2323 edge curr_edge = (*curr_path)[j]->e;
2324
2325 /* Once the prefix no longer matches, adjust the first
2326 non-matching edge to point from an adjusted edge to
2327 wherever it was going. */
2328 if (cand_edge != curr_edge)
2329 {
2330 gcc_assert (cand_edge->src == curr_edge->src);
2331 if (!rewire_first_differing_edge (path_num: cand_path_num, edge_num: j))
2332 goto remove_candidate_from_list;
2333 break;
2334 }
2335 }
2336 if (j == minlength)
2337 {
2338 /* If we consumed the max subgraph we could look at, and
2339 still didn't find any different edges, it's the
2340 last edge after MINLENGTH. */
2341 if (cand_path->length () > minlength)
2342 {
2343 if (!rewire_first_differing_edge (path_num: cand_path_num, edge_num: j))
2344 goto remove_candidate_from_list;
2345 }
2346 else if (dump_file && (dump_flags & TDF_DETAILS))
2347 fprintf (stream: dump_file, format: "adjusting first edge after MINLENGTH.\n");
2348 }
2349 if (j > 0)
2350 {
2351 /* If we are removing everything, delete the entire candidate. */
2352 if (j == cand_path->length ())
2353 {
2354 remove_candidate_from_list:
2355 cancel_thread (path: cand_path, reason: "Adjusted candidate is EMPTY");
2356 m_paths.unordered_remove (ix: cand_path_num);
2357 continue;
2358 }
2359 /* Otherwise, just remove the redundant sub-path. */
2360 if (cand_path->length () - j > 1)
2361 cand_path->block_remove (ix: 0, len: j);
2362 else if (dump_file && (dump_flags & TDF_DETAILS))
2363 fprintf (stream: dump_file, format: "Dropping illformed candidate.\n");
2364 }
2365 if (dump_file && (dump_flags & TDF_DETAILS))
2366 {
2367 fprintf (stream: dump_file, format: "adjusted candidate: ");
2368 debug_path (dump_file, pathno: cand_path_num);
2369 }
2370 ++cand_path_num;
2371 }
2372}
2373
2374/* Duplicates a jump-thread path of N_REGION basic blocks.
2375 The ENTRY edge is redirected to the duplicate of the region.
2376
2377 Remove the last conditional statement in the last basic block in the REGION,
2378 and create a single fallthru edge pointing to the same destination as the
2379 EXIT edge.
2380
2381 CURRENT_PATH_NO is an index into the global paths[] table
2382 specifying the jump-thread path.
2383
2384 Returns false if it is unable to copy the region, true otherwise. */
2385
2386bool
2387back_jt_path_registry::duplicate_thread_path (edge entry,
2388 edge exit,
2389 basic_block *region,
2390 unsigned n_region,
2391 unsigned current_path_no)
2392{
2393 unsigned i;
2394 class loop *loop = entry->dest->loop_father;
2395 edge exit_copy;
2396 edge redirected;
2397 profile_count curr_count;
2398
2399 if (!can_copy_bbs_p (region, n_region))
2400 return false;
2401
2402 /* Some sanity checking. Note that we do not check for all possible
2403 missuses of the functions. I.e. if you ask to copy something weird,
2404 it will work, but the state of structures probably will not be
2405 correct. */
2406 for (i = 0; i < n_region; i++)
2407 {
2408 /* We do not handle subloops, i.e. all the blocks must belong to the
2409 same loop. */
2410 if (region[i]->loop_father != loop)
2411 return false;
2412 }
2413
2414 initialize_original_copy_tables ();
2415
2416 set_loop_copy (loop, loop);
2417
2418 basic_block *region_copy = XNEWVEC (basic_block, n_region);
2419 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2420 split_edge_bb_loc (entry), false);
2421
2422 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2423 following code ensures that all the edges exiting the jump-thread path are
2424 redirected back to the original code: these edges are exceptions
2425 invalidating the property that is propagated by executing all the blocks of
2426 the jump-thread path in order. */
2427
2428 curr_count = entry->count ();
2429
2430 for (i = 0; i < n_region; i++)
2431 {
2432 edge e;
2433 edge_iterator ei;
2434 basic_block bb = region_copy[i];
2435
2436 /* Watch inconsistent profile. */
2437 if (curr_count > region[i]->count)
2438 curr_count = region[i]->count;
2439 /* Scale current BB. */
2440 if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
2441 {
2442 /* In the middle of the path we only scale the frequencies.
2443 In last BB we need to update probabilities of outgoing edges
2444 because we know which one is taken at the threaded path. */
2445 if (i + 1 != n_region)
2446 scale_bbs_frequencies_profile_count (region + i, 1,
2447 region[i]->count - curr_count,
2448 region[i]->count);
2449 else
2450 update_bb_profile_for_threading (region[i],
2451 curr_count,
2452 exit);
2453 scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2454 region_copy[i]->count);
2455 }
2456
2457 if (single_succ_p (bb))
2458 {
2459 /* Make sure the successor is the next node in the path. */
2460 gcc_assert (i + 1 == n_region
2461 || region_copy[i + 1] == single_succ_edge (bb)->dest);
2462 if (i + 1 != n_region)
2463 {
2464 curr_count = single_succ_edge (bb)->count ();
2465 }
2466 continue;
2467 }
2468
2469 /* Special case the last block on the path: make sure that it does not
2470 jump back on the copied path, including back to itself. */
2471 if (i + 1 == n_region)
2472 {
2473 FOR_EACH_EDGE (e, ei, bb->succs)
2474 if (bb_in_bbs (bb: e->dest, bbs: region_copy, n: n_region))
2475 {
2476 basic_block orig = get_bb_original (e->dest);
2477 if (orig)
2478 redirect_edge_and_branch_force (e, orig);
2479 }
2480 continue;
2481 }
2482
2483 /* Redirect all other edges jumping to non-adjacent blocks back to the
2484 original code. */
2485 FOR_EACH_EDGE (e, ei, bb->succs)
2486 if (region_copy[i + 1] != e->dest)
2487 {
2488 basic_block orig = get_bb_original (e->dest);
2489 if (orig)
2490 redirect_edge_and_branch_force (e, orig);
2491 }
2492 else
2493 {
2494 curr_count = e->count ();
2495 }
2496 }
2497
2498
2499 if (flag_checking)
2500 verify_jump_thread (region: region_copy, n_region);
2501
2502 /* Remove the last branch in the jump thread path. */
2503 remove_ctrl_stmt_and_useless_edges (bb: region_copy[n_region - 1], dest_bb: exit->dest);
2504
2505 /* And fixup the flags on the single remaining edge. */
2506 edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2507 fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2508 fix_e->flags |= EDGE_FALLTHRU;
2509
2510 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2511
2512 if (e)
2513 {
2514 rescan_loop_exit (e, true, false);
2515 e->probability = profile_probability::always ();
2516 }
2517
2518 /* Redirect the entry and add the phi node arguments. */
2519 if (entry->dest == loop->header)
2520 mark_loop_for_removal (loop);
2521 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2522 gcc_assert (redirected != NULL);
2523 flush_pending_stmts (entry);
2524
2525 /* Add the other PHI node arguments. */
2526 add_phi_args_after_copy (region_copy, n_region, NULL);
2527
2528 free (ptr: region_copy);
2529
2530 adjust_paths_after_duplication (curr_path_num: current_path_no);
2531
2532 free_original_copy_tables ();
2533 return true;
2534}
2535
2536/* Return true when PATH is a valid jump-thread path. */
2537
2538static bool
2539valid_jump_thread_path (vec<jump_thread_edge *> *path)
2540{
2541 unsigned len = path->length ();
2542
2543 /* Check that the path is connected. */
2544 for (unsigned int j = 0; j < len - 1; j++)
2545 {
2546 edge e = (*path)[j]->e;
2547 if (e->dest != (*path)[j+1]->e->src)
2548 return false;
2549 }
2550 return true;
2551}
2552
2553/* Remove any queued jump threads that include edge E.
2554
2555 We don't actually remove them here, just record the edges into ax
2556 hash table. That way we can do the search once per iteration of
2557 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2558
2559void
2560fwd_jt_path_registry::remove_jump_threads_including (edge_def *e)
2561{
2562 if (!m_paths.exists () || !flag_thread_jumps)
2563 return;
2564
2565 edge *slot = m_removed_edges->find_slot (value: e, insert: INSERT);
2566 *slot = e;
2567}
2568
2569/* Thread all paths that have been queued for jump threading, and
2570 update the CFG accordingly.
2571
2572 It is the caller's responsibility to fix the dominance information
2573 and rewrite duplicated SSA_NAMEs back into SSA form.
2574
2575 If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
2576 headers if it does not simplify the loop.
2577
2578 Returns true if one or more edges were threaded. */
2579
2580bool
2581jt_path_registry::thread_through_all_blocks (bool peel_loop_headers)
2582{
2583 if (m_paths.length () == 0)
2584 return false;
2585
2586 m_num_threaded_edges = 0;
2587
2588 bool retval = update_cfg (peel_loop_headers);
2589
2590 statistics_counter_event (cfun, "Jumps threaded", m_num_threaded_edges);
2591
2592 if (retval)
2593 {
2594 loops_state_set (flags: LOOPS_NEED_FIXUP);
2595 return true;
2596 }
2597 return false;
2598}
2599
2600/* This is the backward threader version of thread_through_all_blocks
2601 using a generic BB copier. */
2602
2603bool
2604back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
2605{
2606 bool retval = false;
2607 hash_set<edge> visited_starting_edges;
2608
2609 while (m_paths.length ())
2610 {
2611 vec<jump_thread_edge *> *path = m_paths[0];
2612 edge entry = (*path)[0]->e;
2613
2614 /* Do not jump-thread twice from the same starting edge.
2615
2616 Previously we only checked that we weren't threading twice
2617 from the same BB, but that was too restrictive. Imagine a
2618 path that starts from GIMPLE_COND(x_123 == 0,...), where both
2619 edges out of this conditional yield paths that can be
2620 threaded (for example, both lead to an x_123==0 or x_123!=0
2621 conditional further down the line. */
2622 if (visited_starting_edges.contains (k: entry)
2623 /* We may not want to realize this jump thread path for
2624 various reasons. So check it first. */
2625 || !valid_jump_thread_path (path))
2626 {
2627 /* Remove invalid jump-thread paths. */
2628 cancel_thread (path, reason: "Avoiding threading twice from same edge");
2629 m_paths.unordered_remove (ix: 0);
2630 continue;
2631 }
2632
2633 unsigned len = path->length ();
2634 edge exit = (*path)[len - 1]->e;
2635 basic_block *region = XNEWVEC (basic_block, len - 1);
2636
2637 for (unsigned int j = 0; j < len - 1; j++)
2638 region[j] = (*path)[j]->e->dest;
2639
2640 if (duplicate_thread_path (entry, exit, region, n_region: len - 1, current_path_no: 0))
2641 {
2642 /* We do not update dominance info. */
2643 free_dominance_info (CDI_DOMINATORS);
2644 visited_starting_edges.add (k: entry);
2645 retval = true;
2646 m_num_threaded_edges++;
2647 }
2648
2649 path->release ();
2650 m_paths.unordered_remove (ix: 0);
2651 free (ptr: region);
2652 }
2653 return retval;
2654}
2655
2656/* This is the forward threader version of thread_through_all_blocks,
2657 using a custom BB copier. */
2658
2659bool
2660fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers)
2661{
2662 bool retval = false;
2663
2664 /* Remove any paths that referenced removed edges. */
2665 if (m_removed_edges)
2666 for (unsigned i = 0; i < m_paths.length (); )
2667 {
2668 unsigned int j;
2669 vec<jump_thread_edge *> *path = m_paths[i];
2670
2671 for (j = 0; j < path->length (); j++)
2672 {
2673 edge e = (*path)[j]->e;
2674 if (m_removed_edges->find_slot (value: e, insert: NO_INSERT)
2675 || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2676 || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2677 && !can_duplicate_block_p (e->src)))
2678 break;
2679 }
2680
2681 if (j != path->length ())
2682 {
2683 cancel_thread (path, reason: "Thread references removed edge");
2684 m_paths.unordered_remove (ix: i);
2685 continue;
2686 }
2687 i++;
2688 }
2689
2690 auto_bitmap threaded_blocks;
2691 mark_threaded_blocks (threaded_blocks);
2692
2693 initialize_original_copy_tables ();
2694
2695 /* The order in which we process jump threads can be important.
2696
2697 Consider if we have two jump threading paths A and B. If the
2698 target edge of A is the starting edge of B and we thread path A
2699 first, then we create an additional incoming edge into B->dest that
2700 we cannot discover as a jump threading path on this iteration.
2701
2702 If we instead thread B first, then the edge into B->dest will have
2703 already been redirected before we process path A and path A will
2704 natually, with no further work, target the redirected path for B.
2705
2706 An post-order is sufficient here. Compute the ordering first, then
2707 process the blocks. */
2708 if (!bitmap_empty_p (map: threaded_blocks))
2709 {
2710 int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2711 unsigned int postorder_num = post_order_compute (postorder, false, false);
2712 for (unsigned int i = 0; i < postorder_num; i++)
2713 {
2714 unsigned int indx = postorder[i];
2715 if (bitmap_bit_p (threaded_blocks, indx))
2716 {
2717 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
2718 retval |= thread_block (bb, noloop_only: true);
2719 }
2720 }
2721 free (ptr: postorder);
2722 }
2723
2724 /* Then perform the threading through loop headers. We start with the
2725 innermost loop, so that the changes in cfg we perform won't affect
2726 further threading. */
2727 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
2728 {
2729 if (!loop->header
2730 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2731 continue;
2732
2733 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2734 }
2735
2736 /* All jump threading paths should have been resolved at this
2737 point. Verify that is the case. */
2738 basic_block bb;
2739 FOR_EACH_BB_FN (bb, cfun)
2740 {
2741 edge_iterator ei;
2742 edge e;
2743 FOR_EACH_EDGE (e, ei, bb->preds)
2744 gcc_assert (e->aux == NULL);
2745 }
2746
2747 free_original_copy_tables ();
2748
2749 return retval;
2750}
2751
2752bool
2753jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path)
2754{
2755 gcc_checking_assert (!path.is_empty ());
2756 edge entry = path[0]->e;
2757 edge exit = path[path.length () - 1]->e;
2758 bool seen_latch = false;
2759 int loops_crossed = 0;
2760 bool crossed_latch = false;
2761 bool crossed_loop_header = false;
2762 // Use ->dest here instead of ->src to ignore the first block. The
2763 // first block is allowed to be in a different loop, since it'll be
2764 // redirected. See similar comment in profitable_path_p: "we don't
2765 // care about that block...".
2766 loop_p loop = entry->dest->loop_father;
2767 loop_p curr_loop = loop;
2768
2769 for (unsigned int i = 0; i < path.length (); i++)
2770 {
2771 edge e = path[i]->e;
2772
2773 if (e == NULL)
2774 {
2775 // NULL outgoing edges on a path can happen for jumping to a
2776 // constant address.
2777 cancel_thread (path: &path, reason: "Found NULL edge in jump threading path");
2778 return true;
2779 }
2780
2781 if (loop->latch == e->src || loop->latch == e->dest)
2782 {
2783 seen_latch = true;
2784 // Like seen_latch, but excludes the first block.
2785 if (e->src != entry->src)
2786 crossed_latch = true;
2787 }
2788
2789 if (e->dest->loop_father != curr_loop)
2790 {
2791 curr_loop = e->dest->loop_father;
2792 ++loops_crossed;
2793 }
2794
2795 // ?? Avoid threading through loop headers that remain in the
2796 // loop, as such threadings tend to create sub-loops which
2797 // _might_ be OK ??.
2798 if (e->dest->loop_father->header == e->dest
2799 && !flow_loop_nested_p (exit->dest->loop_father,
2800 e->dest->loop_father))
2801 crossed_loop_header = true;
2802
2803 if (flag_checking && !m_backedge_threads)
2804 gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0);
2805 }
2806
2807 // If we crossed a loop into an outer loop without crossing the
2808 // latch, this is just an early exit from the loop.
2809 if (loops_crossed == 1
2810 && !crossed_latch
2811 && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father))
2812 return false;
2813
2814 if (cfun->curr_properties & PROP_loop_opts_done)
2815 return false;
2816
2817 if (seen_latch && empty_block_p (loop->latch))
2818 {
2819 cancel_thread (path: &path, reason: "Threading through latch before loop opts "
2820 "would create non-empty latch");
2821 return true;
2822 }
2823 if (loops_crossed)
2824 {
2825 cancel_thread (path: &path, reason: "Path crosses loops");
2826 return true;
2827 }
2828 // The path should either start and end in the same loop or exit the
2829 // loop it starts in but never enter a loop. This also catches
2830 // creating irreducible loops, not only rotation.
2831 if (entry->src->loop_father != exit->dest->loop_father
2832 && !flow_loop_nested_p (exit->src->loop_father,
2833 entry->dest->loop_father))
2834 {
2835 cancel_thread (path: &path, reason: "Path rotates loop");
2836 return true;
2837 }
2838 if (crossed_loop_header)
2839 {
2840 cancel_thread (path: &path, reason: "Path crosses loop header but does not exit it");
2841 return true;
2842 }
2843 return false;
2844}
2845
2846/* Register a jump threading opportunity. We queue up all the jump
2847 threading opportunities discovered by a pass and update the CFG
2848 and SSA form all at once.
2849
2850 E is the edge we can thread, E2 is the new target edge, i.e., we
2851 are effectively recording that E->dest can be changed to E2->dest
2852 after fixing the SSA graph.
2853
2854 Return TRUE if PATH was successfully threaded. */
2855
2856bool
2857jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
2858{
2859 gcc_checking_assert (flag_thread_jumps);
2860
2861 if (!dbg_cnt (index: registered_jump_thread))
2862 {
2863 path->release ();
2864 return false;
2865 }
2866
2867 if (cancel_invalid_paths (path&: *path))
2868 return false;
2869
2870 if (dump_file && (dump_flags & TDF_DETAILS))
2871 dump_jump_thread_path (dump_file, path: *path, registering: true);
2872
2873 m_paths.safe_push (obj: path);
2874 return true;
2875}
2876
2877/* Return how many uses of T there are within BB, as long as there
2878 aren't any uses outside BB. If there are any uses outside BB,
2879 return -1 if there's at most one use within BB, or -2 if there is
2880 more than one use within BB. */
2881
2882static int
2883uses_in_bb (tree t, basic_block bb)
2884{
2885 int uses = 0;
2886 bool outside_bb = false;
2887
2888 imm_use_iterator iter;
2889 use_operand_p use_p;
2890 FOR_EACH_IMM_USE_FAST (use_p, iter, t)
2891 {
2892 if (is_gimple_debug (USE_STMT (use_p)))
2893 continue;
2894
2895 if (gimple_bb (USE_STMT (use_p)) != bb)
2896 outside_bb = true;
2897 else
2898 uses++;
2899
2900 if (outside_bb && uses > 1)
2901 return -2;
2902 }
2903
2904 if (outside_bb)
2905 return -1;
2906
2907 return uses;
2908}
2909
2910/* Starting from the final control flow stmt in BB, assuming it will
2911 be removed, follow uses in to-be-removed stmts back to their defs
2912 and count how many defs are to become dead and be removed as
2913 well. */
2914
2915unsigned int
2916estimate_threading_killed_stmts (basic_block bb)
2917{
2918 int killed_stmts = 0;
2919 hash_map<tree, int> ssa_remaining_uses;
2920 auto_vec<gimple *, 4> dead_worklist;
2921
2922 /* If the block has only two predecessors, threading will turn phi
2923 dsts into either src, so count them as dead stmts. */
2924 bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
2925
2926 if (drop_all_phis)
2927 for (gphi_iterator gsi = gsi_start_phis (bb);
2928 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2929 {
2930 gphi *phi = gsi.phi ();
2931 tree dst = gimple_phi_result (gs: phi);
2932
2933 /* We don't count virtual PHIs as stmts in
2934 record_temporary_equivalences_from_phis. */
2935 if (virtual_operand_p (op: dst))
2936 continue;
2937
2938 killed_stmts++;
2939 }
2940
2941 if (gsi_end_p (i: gsi_last_bb (bb)))
2942 return killed_stmts;
2943
2944 gimple *stmt = gsi_stmt (i: gsi_last_bb (bb));
2945 if (gimple_code (g: stmt) != GIMPLE_COND
2946 && gimple_code (g: stmt) != GIMPLE_GOTO
2947 && gimple_code (g: stmt) != GIMPLE_SWITCH)
2948 return killed_stmts;
2949
2950 /* The control statement is always dead. */
2951 killed_stmts++;
2952 dead_worklist.quick_push (obj: stmt);
2953 while (!dead_worklist.is_empty ())
2954 {
2955 stmt = dead_worklist.pop ();
2956
2957 ssa_op_iter iter;
2958 use_operand_p use_p;
2959 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2960 {
2961 tree t = USE_FROM_PTR (use_p);
2962 gimple *def = SSA_NAME_DEF_STMT (t);
2963
2964 if (gimple_bb (g: def) == bb
2965 && (gimple_code (g: def) != GIMPLE_PHI
2966 || !drop_all_phis)
2967 && !gimple_has_side_effects (def))
2968 {
2969 int *usesp = ssa_remaining_uses.get (k: t);
2970 int uses;
2971
2972 if (usesp)
2973 uses = *usesp;
2974 else
2975 uses = uses_in_bb (t, bb);
2976
2977 gcc_assert (uses);
2978
2979 /* Don't bother recording the expected use count if we
2980 won't find any further uses within BB. */
2981 if (!usesp && (uses < -1 || uses > 1))
2982 {
2983 usesp = &ssa_remaining_uses.get_or_insert (k: t);
2984 *usesp = uses;
2985 }
2986
2987 if (uses < 0)
2988 continue;
2989
2990 --uses;
2991 if (usesp)
2992 *usesp = uses;
2993
2994 if (!uses)
2995 {
2996 killed_stmts++;
2997 if (usesp)
2998 ssa_remaining_uses.remove (k: t);
2999 if (gimple_code (g: def) != GIMPLE_PHI)
3000 dead_worklist.safe_push (obj: def);
3001 }
3002 }
3003 }
3004 }
3005
3006 if (dump_file)
3007 fprintf (stream: dump_file, format: "threading bb %i kills %i stmts\n",
3008 bb->index, killed_stmts);
3009
3010 return killed_stmts;
3011}
3012

source code of gcc/tree-ssa-threadupdate.cc