1 | /* Thread edges through blocks and update the control flow and SSA graphs. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" |
28 | #include "ssa.h" |
29 | #include "fold-const.h" |
30 | #include "cfganal.h" |
31 | #include "gimple-iterator.h" |
32 | #include "tree-ssa.h" |
33 | #include "tree-ssa-threadupdate.h" |
34 | #include "cfgloop.h" |
35 | #include "dbgcnt.h" |
36 | #include "tree-cfg.h" |
37 | #include "tree-vectorizer.h" |
38 | #include "tree-pass.h" |
39 | |
40 | /* Given a block B, update the CFG and SSA graph to reflect redirecting |
41 | one or more in-edges to B to instead reach the destination of an |
42 | out-edge from B while preserving any side effects in B. |
43 | |
44 | i.e., given A->B and B->C, change A->B to be A->C yet still preserve the |
45 | side effects of executing B. |
46 | |
47 | 1. Make a copy of B (including its outgoing edges and statements). Call |
48 | the copy B'. Note B' has no incoming edges or PHIs at this time. |
49 | |
50 | 2. Remove the control statement at the end of B' and all outgoing edges |
51 | except B'->C. |
52 | |
53 | 3. Add a new argument to each PHI in C with the same value as the existing |
54 | argument associated with edge B->C. Associate the new PHI arguments |
55 | with the edge B'->C. |
56 | |
57 | 4. For each PHI in B, find or create a PHI in B' with an identical |
58 | PHI_RESULT. Add an argument to the PHI in B' which has the same |
59 | value as the PHI in B associated with the edge A->B. Associate |
60 | the new argument in the PHI in B' with the edge A->B. |
61 | |
62 | 5. Change the edge A->B to A->B'. |
63 | |
64 | 5a. This automatically deletes any PHI arguments associated with the |
65 | edge A->B in B. |
66 | |
67 | 5b. This automatically associates each new argument added in step 4 |
68 | with the edge A->B'. |
69 | |
70 | 6. Repeat for other incoming edges into B. |
71 | |
72 | 7. Put the duplicated resources in B and all the B' blocks into SSA form. |
73 | |
74 | Note that block duplication can be minimized by first collecting the |
75 | set of unique destination blocks that the incoming edges should |
76 | be threaded to. |
77 | |
78 | We reduce the number of edges and statements we create by not copying all |
79 | the outgoing edges and the control statement in step #1. We instead create |
80 | a template block without the outgoing edges and duplicate the template. |
81 | |
82 | Another case this code handles is threading through a "joiner" block. In |
83 | this case, we do not know the destination of the joiner block, but one |
84 | of the outgoing edges from the joiner block leads to a threadable path. This |
85 | case largely works as outlined above, except the duplicate of the joiner |
86 | block still contains a full set of outgoing edges and its control statement. |
87 | We just redirect one of its outgoing edges to our jump threading path. */ |
88 | |
89 | |
90 | /* Steps #5 and #6 of the above algorithm are best implemented by walking |
91 | all the incoming edges which thread to the same destination edge at |
92 | the same time. That avoids lots of table lookups to get information |
93 | for the destination edge. |
94 | |
95 | To realize that implementation we create a list of incoming edges |
96 | which thread to the same outgoing edge. Thus to implement steps |
97 | #5 and #6 we traverse our hash table of outgoing edge information. |
98 | For each entry we walk the list of incoming edges which thread to |
99 | the current outgoing edge. */ |
100 | |
101 | struct el |
102 | { |
103 | edge e; |
104 | struct el *next; |
105 | }; |
106 | |
107 | /* Main data structure recording information regarding B's duplicate |
108 | blocks. */ |
109 | |
110 | /* We need to efficiently record the unique thread destinations of this |
111 | block and specific information associated with those destinations. We |
112 | may have many incoming edges threaded to the same outgoing edge. This |
113 | can be naturally implemented with a hash table. */ |
114 | |
115 | struct redirection_data : free_ptr_hash<redirection_data> |
116 | { |
117 | /* We support wiring up two block duplicates in a jump threading path. |
118 | |
119 | One is a normal block copy where we remove the control statement |
120 | and wire up its single remaining outgoing edge to the thread path. |
121 | |
122 | The other is a joiner block where we leave the control statement |
123 | in place, but wire one of the outgoing edges to a thread path. |
124 | |
125 | In theory we could have multiple block duplicates in a jump |
126 | threading path, but I haven't tried that. |
127 | |
128 | The duplicate blocks appear in this array in the same order in |
129 | which they appear in the jump thread path. */ |
130 | basic_block dup_blocks[2]; |
131 | |
132 | vec<jump_thread_edge *> *path; |
133 | |
134 | /* A list of incoming edges which we want to thread to the |
135 | same path. */ |
136 | struct el *incoming_edges; |
137 | |
138 | /* hash_table support. */ |
139 | static inline hashval_t hash (const redirection_data *); |
140 | static inline int equal (const redirection_data *, const redirection_data *); |
141 | }; |
142 | |
143 | jump_thread_path_allocator::jump_thread_path_allocator () |
144 | { |
145 | obstack_init (&m_obstack); |
146 | } |
147 | |
148 | jump_thread_path_allocator::~jump_thread_path_allocator () |
149 | { |
150 | obstack_free (&m_obstack, NULL); |
151 | } |
152 | |
153 | jump_thread_edge * |
154 | jump_thread_path_allocator::allocate_thread_edge (edge e, |
155 | jump_thread_edge_type type) |
156 | { |
157 | void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge)); |
158 | return new (r) jump_thread_edge (e, type); |
159 | } |
160 | |
161 | vec<jump_thread_edge *> * |
162 | jump_thread_path_allocator::allocate_thread_path () |
163 | { |
164 | // ?? Since the paths live in an obstack, we should be able to remove all |
165 | // references to path->release() throughout the code. |
166 | void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>)); |
167 | return new (r) vec<jump_thread_edge *> (); |
168 | } |
169 | |
170 | jt_path_registry::jt_path_registry (bool backedge_threads) |
171 | { |
172 | m_paths.create (nelems: 5); |
173 | m_num_threaded_edges = 0; |
174 | m_backedge_threads = backedge_threads; |
175 | } |
176 | |
177 | jt_path_registry::~jt_path_registry () |
178 | { |
179 | m_paths.release (); |
180 | } |
181 | |
182 | fwd_jt_path_registry::fwd_jt_path_registry () |
183 | : jt_path_registry (/*backedge_threads=*/false) |
184 | { |
185 | m_removed_edges = new hash_table<struct removed_edges> (17); |
186 | m_redirection_data = NULL; |
187 | } |
188 | |
189 | fwd_jt_path_registry::~fwd_jt_path_registry () |
190 | { |
191 | delete m_removed_edges; |
192 | } |
193 | |
194 | back_jt_path_registry::back_jt_path_registry () |
195 | : jt_path_registry (/*backedge_threads=*/true) |
196 | { |
197 | } |
198 | |
199 | void |
200 | jt_path_registry::push_edge (vec<jump_thread_edge *> *path, |
201 | edge e, jump_thread_edge_type type) |
202 | { |
203 | jump_thread_edge *x = m_allocator.allocate_thread_edge (e, type); |
204 | path->safe_push (obj: x); |
205 | } |
206 | |
207 | vec<jump_thread_edge *> * |
208 | jt_path_registry::allocate_thread_path () |
209 | { |
210 | return m_allocator.allocate_thread_path (); |
211 | } |
212 | |
213 | /* Dump a jump threading path, including annotations about each |
214 | edge in the path. */ |
215 | |
216 | static void |
217 | dump_jump_thread_path (FILE *dump_file, |
218 | const vec<jump_thread_edge *> &path, |
219 | bool registering) |
220 | { |
221 | if (registering) |
222 | fprintf (stream: dump_file, |
223 | format: " [%u] Registering jump thread: (%d, %d) incoming edge; " , |
224 | dbg_cnt_counter (index: registered_jump_thread), |
225 | path[0]->e->src->index, path[0]->e->dest->index); |
226 | else |
227 | fprintf (stream: dump_file, |
228 | format: " Cancelling jump thread: (%d, %d) incoming edge; " , |
229 | path[0]->e->src->index, path[0]->e->dest->index); |
230 | |
231 | for (unsigned int i = 1; i < path.length (); i++) |
232 | { |
233 | /* We can get paths with a NULL edge when the final destination |
234 | of a jump thread turns out to be a constant address. We dump |
235 | those paths when debugging, so we have to be prepared for that |
236 | possibility here. */ |
237 | if (path[i]->e == NULL) |
238 | continue; |
239 | |
240 | fprintf (stream: dump_file, format: " (%d, %d) " , |
241 | path[i]->e->src->index, path[i]->e->dest->index); |
242 | switch (path[i]->type) |
243 | { |
244 | case EDGE_COPY_SRC_JOINER_BLOCK: |
245 | fprintf (stream: dump_file, format: "joiner" ); |
246 | break; |
247 | case EDGE_COPY_SRC_BLOCK: |
248 | fprintf (stream: dump_file, format: "normal" ); |
249 | break; |
250 | case EDGE_NO_COPY_SRC_BLOCK: |
251 | fprintf (stream: dump_file, format: "nocopy" ); |
252 | break; |
253 | default: |
254 | gcc_unreachable (); |
255 | } |
256 | |
257 | if ((path[i]->e->flags & EDGE_DFS_BACK) != 0) |
258 | fprintf (stream: dump_file, format: " (back)" ); |
259 | } |
260 | fprintf (stream: dump_file, format: "; \n" ); |
261 | } |
262 | |
263 | DEBUG_FUNCTION void |
264 | debug (const vec<jump_thread_edge *> &path) |
265 | { |
266 | dump_jump_thread_path (stderr, path, registering: true); |
267 | } |
268 | |
269 | DEBUG_FUNCTION void |
270 | debug (const vec<jump_thread_edge *> *path) |
271 | { |
272 | debug (path: *path); |
273 | } |
274 | |
275 | /* Release the memory associated with PATH, and if dumping is enabled, |
276 | dump out the reason why the thread was canceled. */ |
277 | |
278 | static void |
279 | cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL) |
280 | { |
281 | if (dump_file && (dump_flags & TDF_DETAILS)) |
282 | { |
283 | if (reason) |
284 | fprintf (stream: dump_file, format: "%s: " , reason); |
285 | |
286 | dump_jump_thread_path (dump_file, path: *path, registering: false); |
287 | fprintf (stream: dump_file, format: "\n" ); |
288 | } |
289 | path->release (); |
290 | } |
291 | |
292 | /* Simple hashing function. For any given incoming edge E, we're going |
293 | to be most concerned with the final destination of its jump thread |
294 | path. So hash on the block index of the final edge in the path. */ |
295 | |
296 | inline hashval_t |
297 | redirection_data::hash (const redirection_data *p) |
298 | { |
299 | vec<jump_thread_edge *> *path = p->path; |
300 | return path->last ()->e->dest->index; |
301 | } |
302 | |
303 | /* Given two hash table entries, return true if they have the same |
304 | jump threading path. */ |
305 | inline int |
306 | redirection_data::equal (const redirection_data *p1, const redirection_data *p2) |
307 | { |
308 | vec<jump_thread_edge *> *path1 = p1->path; |
309 | vec<jump_thread_edge *> *path2 = p2->path; |
310 | |
311 | if (path1->length () != path2->length ()) |
312 | return false; |
313 | |
314 | for (unsigned int i = 1; i < path1->length (); i++) |
315 | { |
316 | if ((*path1)[i]->type != (*path2)[i]->type |
317 | || (*path1)[i]->e != (*path2)[i]->e) |
318 | return false; |
319 | } |
320 | |
321 | return true; |
322 | } |
323 | |
324 | /* Data structure of information to pass to hash table traversal routines. */ |
325 | struct ssa_local_info_t |
326 | { |
327 | /* The current block we are working on. */ |
328 | basic_block bb; |
329 | |
330 | /* We only create a template block for the first duplicated block in a |
331 | jump threading path as we may need many duplicates of that block. |
332 | |
333 | The second duplicate block in a path is specific to that path. Creating |
334 | and sharing a template for that block is considerably more difficult. */ |
335 | basic_block template_block; |
336 | |
337 | /* If we append debug stmts to the template block after creating it, |
338 | this iterator won't be the last one in the block, and further |
339 | copies of the template block shouldn't get debug stmts after |
340 | it. */ |
341 | gimple_stmt_iterator template_last_to_copy; |
342 | |
343 | /* Blocks duplicated for the thread. */ |
344 | bitmap duplicate_blocks; |
345 | |
346 | /* TRUE if we thread one or more jumps, FALSE otherwise. */ |
347 | bool jumps_threaded; |
348 | |
349 | /* When we have multiple paths through a joiner which reach different |
350 | final destinations, then we may need to correct for potential |
351 | profile insanities. */ |
352 | bool need_profile_correction; |
353 | |
354 | // Jump threading statistics. |
355 | unsigned long num_threaded_edges; |
356 | }; |
357 | |
358 | /* When we start updating the CFG for threading, data necessary for jump |
359 | threading is attached to the AUX field for the incoming edge. Use these |
360 | macros to access the underlying structure attached to the AUX field. */ |
361 | #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux) |
362 | |
363 | /* Remove the last statement in block BB if it is a control statement |
364 | Also remove all outgoing edges except the edge which reaches DEST_BB. |
365 | If DEST_BB is NULL, then remove all outgoing edges. */ |
366 | |
367 | static void |
368 | remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) |
369 | { |
370 | gimple_stmt_iterator gsi; |
371 | edge e; |
372 | edge_iterator ei; |
373 | |
374 | gsi = gsi_last_bb (bb); |
375 | |
376 | /* If the duplicate ends with a control statement, then remove it. |
377 | |
378 | Note that if we are duplicating the template block rather than the |
379 | original basic block, then the duplicate might not have any real |
380 | statements in it. */ |
381 | if (!gsi_end_p (i: gsi) |
382 | && gsi_stmt (i: gsi) |
383 | && (gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_COND |
384 | || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_GOTO |
385 | || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_SWITCH)) |
386 | gsi_remove (&gsi, true); |
387 | |
388 | for (ei = ei_start (bb->succs); (e = ei_safe_edge (i: ei)); ) |
389 | { |
390 | if (e->dest != dest_bb) |
391 | { |
392 | free_dom_edge_info (e); |
393 | remove_edge (e); |
394 | } |
395 | else |
396 | { |
397 | e->probability = profile_probability::always (); |
398 | ei_next (i: &ei); |
399 | } |
400 | } |
401 | |
402 | /* If the remaining edge is a loop exit, there must have |
403 | a removed edge that was not a loop exit. |
404 | |
405 | In that case BB and possibly other blocks were previously |
406 | in the loop, but are now outside the loop. Thus, we need |
407 | to update the loop structures. */ |
408 | if (single_succ_p (bb) |
409 | && loop_outer (loop: bb->loop_father) |
410 | && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb))) |
411 | loops_state_set (flags: LOOPS_NEED_FIXUP); |
412 | } |
413 | |
414 | /* Create a duplicate of BB. Record the duplicate block in an array |
415 | indexed by COUNT stored in RD. */ |
416 | |
417 | static void |
418 | create_block_for_threading (basic_block bb, |
419 | struct redirection_data *rd, |
420 | unsigned int count, |
421 | bitmap *duplicate_blocks) |
422 | { |
423 | edge_iterator ei; |
424 | edge e; |
425 | |
426 | /* We can use the generic block duplication code and simply remove |
427 | the stuff we do not need. */ |
428 | rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL); |
429 | |
430 | FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs) |
431 | { |
432 | e->aux = NULL; |
433 | |
434 | /* If we duplicate a block with an outgoing edge marked as |
435 | EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't |
436 | leak out of the current pass. |
437 | |
438 | It would be better to simplify switch statements and remove |
439 | the edges before we get here, but the sequencing is nontrivial. */ |
440 | e->flags &= ~EDGE_IGNORE; |
441 | } |
442 | |
443 | /* Zero out the profile, since the block is unreachable for now. */ |
444 | rd->dup_blocks[count]->count = profile_count::uninitialized (); |
445 | if (duplicate_blocks) |
446 | bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index); |
447 | } |
448 | |
449 | /* Given an outgoing edge E lookup and return its entry in our hash table. |
450 | |
451 | If INSERT is true, then we insert the entry into the hash table if |
452 | it is not already present. INCOMING_EDGE is added to the list of incoming |
453 | edges associated with E in the hash table. */ |
454 | |
455 | redirection_data * |
456 | fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert) |
457 | { |
458 | struct redirection_data **slot; |
459 | struct redirection_data *elt; |
460 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
461 | |
462 | /* Build a hash table element so we can see if E is already |
463 | in the table. */ |
464 | elt = XNEW (struct redirection_data); |
465 | elt->path = path; |
466 | elt->dup_blocks[0] = NULL; |
467 | elt->dup_blocks[1] = NULL; |
468 | elt->incoming_edges = NULL; |
469 | |
470 | slot = m_redirection_data->find_slot (value: elt, insert); |
471 | |
472 | /* This will only happen if INSERT is false and the entry is not |
473 | in the hash table. */ |
474 | if (slot == NULL) |
475 | { |
476 | free (ptr: elt); |
477 | return NULL; |
478 | } |
479 | |
480 | /* This will only happen if E was not in the hash table and |
481 | INSERT is true. */ |
482 | if (*slot == NULL) |
483 | { |
484 | *slot = elt; |
485 | elt->incoming_edges = XNEW (struct el); |
486 | elt->incoming_edges->e = e; |
487 | elt->incoming_edges->next = NULL; |
488 | return elt; |
489 | } |
490 | /* E was in the hash table. */ |
491 | else |
492 | { |
493 | /* Free ELT as we do not need it anymore, we will extract the |
494 | relevant entry from the hash table itself. */ |
495 | free (ptr: elt); |
496 | |
497 | /* Get the entry stored in the hash table. */ |
498 | elt = *slot; |
499 | |
500 | /* If insertion was requested, then we need to add INCOMING_EDGE |
501 | to the list of incoming edges associated with E. */ |
502 | if (insert) |
503 | { |
504 | struct el *el = XNEW (struct el); |
505 | el->next = elt->incoming_edges; |
506 | el->e = e; |
507 | elt->incoming_edges = el; |
508 | } |
509 | |
510 | return elt; |
511 | } |
512 | } |
513 | |
514 | /* Given ssa_name DEF, backtrack jump threading PATH from node IDX |
515 | to see if it has constant value in a flow sensitive manner. Set |
516 | LOCUS to location of the constant phi arg and return the value. |
517 | Return DEF directly if either PATH or idx is ZERO. */ |
518 | |
519 | static tree |
520 | get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path, |
521 | basic_block bb, int idx, location_t *locus) |
522 | { |
523 | tree arg; |
524 | gphi *def_phi; |
525 | basic_block def_bb; |
526 | |
527 | if (path == NULL || idx == 0) |
528 | return def; |
529 | |
530 | def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def)); |
531 | if (!def_phi) |
532 | return def; |
533 | |
534 | def_bb = gimple_bb (g: def_phi); |
535 | /* Don't propagate loop invariants into deeper loops. */ |
536 | if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb)) |
537 | return def; |
538 | |
539 | /* Backtrack jump threading path from IDX to see if def has constant |
540 | value. */ |
541 | for (int j = idx - 1; j >= 0; j--) |
542 | { |
543 | edge e = (*path)[j]->e; |
544 | if (e->dest == def_bb) |
545 | { |
546 | arg = gimple_phi_arg_def (gs: def_phi, index: e->dest_idx); |
547 | if (is_gimple_min_invariant (arg)) |
548 | { |
549 | *locus = gimple_phi_arg_location (phi: def_phi, i: e->dest_idx); |
550 | return arg; |
551 | } |
552 | break; |
553 | } |
554 | } |
555 | |
556 | return def; |
557 | } |
558 | |
559 | /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. |
560 | Try to backtrack jump threading PATH from node IDX to see if the arg |
561 | has constant value, copy constant value instead of argument itself |
562 | if yes. */ |
563 | |
564 | static void |
565 | copy_phi_args (basic_block bb, edge src_e, edge tgt_e, |
566 | vec<jump_thread_edge *> *path, int idx) |
567 | { |
568 | gphi_iterator gsi; |
569 | int src_indx = src_e->dest_idx; |
570 | |
571 | for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
572 | { |
573 | gphi *phi = gsi.phi (); |
574 | tree def = gimple_phi_arg_def (gs: phi, index: src_indx); |
575 | location_t locus = gimple_phi_arg_location (phi, i: src_indx); |
576 | |
577 | if (TREE_CODE (def) == SSA_NAME |
578 | && !virtual_operand_p (op: gimple_phi_result (gs: phi))) |
579 | def = get_value_locus_in_path (def, path, bb, idx, locus: &locus); |
580 | |
581 | add_phi_arg (phi, def, tgt_e, locus); |
582 | } |
583 | } |
584 | |
585 | /* We have recently made a copy of ORIG_BB, including its outgoing |
586 | edges. The copy is NEW_BB. Every PHI node in every direct successor of |
587 | ORIG_BB has a new argument associated with edge from NEW_BB to the |
588 | successor. Initialize the PHI argument so that it is equal to the PHI |
589 | argument associated with the edge from ORIG_BB to the successor. |
590 | PATH and IDX are used to check if the new PHI argument has constant |
591 | value in a flow sensitive manner. */ |
592 | |
593 | static void |
594 | update_destination_phis (basic_block orig_bb, basic_block new_bb, |
595 | vec<jump_thread_edge *> *path, int idx) |
596 | { |
597 | edge_iterator ei; |
598 | edge e; |
599 | |
600 | FOR_EACH_EDGE (e, ei, orig_bb->succs) |
601 | { |
602 | edge e2 = find_edge (new_bb, e->dest); |
603 | copy_phi_args (bb: e->dest, src_e: e, tgt_e: e2, path, idx); |
604 | } |
605 | } |
606 | |
607 | /* Given a duplicate block and its single destination (both stored |
608 | in RD). Create an edge between the duplicate and its single |
609 | destination. |
610 | |
611 | Add an additional argument to any PHI nodes at the single |
612 | destination. IDX is the start node in jump threading path |
613 | we start to check to see if the new PHI argument has constant |
614 | value along the jump threading path. */ |
615 | |
616 | static void |
617 | create_edge_and_update_destination_phis (struct redirection_data *rd, |
618 | basic_block bb, int idx) |
619 | { |
620 | edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU); |
621 | |
622 | rescan_loop_exit (e, true, false); |
623 | |
624 | /* We used to copy the thread path here. That was added in 2007 |
625 | and dutifully updated through the representation changes in 2013. |
626 | |
627 | In 2013 we added code to thread from an interior node through |
628 | the backedge to another interior node. That runs after the code |
629 | to thread through loop headers from outside the loop. |
630 | |
631 | The latter may delete edges in the CFG, including those |
632 | which appeared in the jump threading path we copied here. Thus |
633 | we'd end up using a dangling pointer. |
634 | |
635 | After reviewing the 2007/2011 code, I can't see how anything |
636 | depended on copying the AUX field and clearly copying the jump |
637 | threading path is problematical due to embedded edge pointers. |
638 | It has been removed. */ |
639 | e->aux = NULL; |
640 | |
641 | /* If there are any PHI nodes at the destination of the outgoing edge |
642 | from the duplicate block, then we will need to add a new argument |
643 | to them. The argument should have the same value as the argument |
644 | associated with the outgoing edge stored in RD. */ |
645 | copy_phi_args (bb: e->dest, src_e: rd->path->last ()->e, tgt_e: e, path: rd->path, idx); |
646 | } |
647 | |
648 | /* Look through PATH beginning at START and return TRUE if there are |
649 | any additional blocks that need to be duplicated. Otherwise, |
650 | return FALSE. */ |
651 | static bool |
652 | any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path, |
653 | unsigned int start) |
654 | { |
655 | for (unsigned int i = start + 1; i < path->length (); i++) |
656 | { |
657 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK |
658 | || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) |
659 | return true; |
660 | } |
661 | return false; |
662 | } |
663 | |
664 | |
665 | /* Compute the amount of profile count coming into the jump threading |
666 | path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and |
667 | PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the |
668 | duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to |
669 | identify blocks duplicated for jump threading, which have duplicated |
670 | edges that need to be ignored in the analysis. Return true if path contains |
671 | a joiner, false otherwise. |
672 | |
673 | In the non-joiner case, this is straightforward - all the counts |
674 | flowing into the jump threading path should flow through the duplicated |
675 | block and out of the duplicated path. |
676 | |
677 | In the joiner case, it is very tricky. Some of the counts flowing into |
678 | the original path go offpath at the joiner. The problem is that while |
679 | we know how much total count goes off-path in the original control flow, |
680 | we don't know how many of the counts corresponding to just the jump |
681 | threading path go offpath at the joiner. |
682 | |
683 | For example, assume we have the following control flow and identified |
684 | jump threading paths: |
685 | |
686 | A B C |
687 | \ | / |
688 | Ea \ |Eb / Ec |
689 | \ | / |
690 | v v v |
691 | J <-- Joiner |
692 | / \ |
693 | Eoff/ \Eon |
694 | / \ |
695 | v v |
696 | Soff Son <--- Normal |
697 | /\ |
698 | Ed/ \ Ee |
699 | / \ |
700 | v v |
701 | D E |
702 | |
703 | Jump threading paths: A -> J -> Son -> D (path 1) |
704 | C -> J -> Son -> E (path 2) |
705 | |
706 | Note that the control flow could be more complicated: |
707 | - Each jump threading path may have more than one incoming edge. I.e. A and |
708 | Ea could represent multiple incoming blocks/edges that are included in |
709 | path 1. |
710 | - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either |
711 | before or after the "normal" copy block). These are not duplicated onto |
712 | the jump threading path, as they are single-successor. |
713 | - Any of the blocks along the path may have other incoming edges that |
714 | are not part of any jump threading path, but add profile counts along |
715 | the path. |
716 | |
717 | In the above example, after all jump threading is complete, we will |
718 | end up with the following control flow: |
719 | |
720 | A B C |
721 | | | | |
722 | Ea| |Eb |Ec |
723 | | | | |
724 | v v v |
725 | Ja J Jc |
726 | / \ / \Eon' / \ |
727 | Eona/ \ ---/---\-------- \Eonc |
728 | / \ / / \ \ |
729 | v v v v v |
730 | Sona Soff Son Sonc |
731 | \ /\ / |
732 | \___________ / \ _____/ |
733 | \ / \/ |
734 | vv v |
735 | D E |
736 | |
737 | The main issue to notice here is that when we are processing path 1 |
738 | (A->J->Son->D) we need to figure out the outgoing edge weights to |
739 | the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the |
740 | sum of the incoming weights to D remain Ed. The problem with simply |
741 | assuming that Ja (and Jc when processing path 2) has the same outgoing |
742 | probabilities to its successors as the original block J, is that after |
743 | all paths are processed and other edges/counts removed (e.g. none |
744 | of Ec will reach D after processing path 2), we may end up with not |
745 | enough count flowing along duplicated edge Sona->D. |
746 | |
747 | Therefore, in the case of a joiner, we keep track of all counts |
748 | coming in along the current path, as well as from predecessors not |
749 | on any jump threading path (Eb in the above example). While we |
750 | first assume that the duplicated Eona for Ja->Sona has the same |
751 | probability as the original, we later compensate for other jump |
752 | threading paths that may eliminate edges. We do that by keep track |
753 | of all counts coming into the original path that are not in a jump |
754 | thread (Eb in the above example, but as noted earlier, there could |
755 | be other predecessors incoming to the path at various points, such |
756 | as at Son). Call this cumulative non-path count coming into the path |
757 | before D as Enonpath. We then ensure that the count from Sona->D is as at |
758 | least as big as (Ed - Enonpath), but no bigger than the minimum |
759 | weight along the jump threading path. The probabilities of both the |
760 | original and duplicated joiner block J and Ja will be adjusted |
761 | accordingly after the updates. */ |
762 | |
763 | static bool |
764 | compute_path_counts (struct redirection_data *rd, |
765 | ssa_local_info_t *local_info, |
766 | profile_count *path_in_count_ptr, |
767 | profile_count *path_out_count_ptr) |
768 | { |
769 | edge e = rd->incoming_edges->e; |
770 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
771 | edge elast = path->last ()->e; |
772 | profile_count nonpath_count = profile_count::zero (); |
773 | bool has_joiner = false; |
774 | profile_count path_in_count = profile_count::zero (); |
775 | |
776 | /* Start by accumulating incoming edge counts to the path's first bb |
777 | into a couple buckets: |
778 | path_in_count: total count of incoming edges that flow into the |
779 | current path. |
780 | nonpath_count: total count of incoming edges that are not |
781 | flowing along *any* path. These are the counts |
782 | that will still flow along the original path after |
783 | all path duplication is done by potentially multiple |
784 | calls to this routine. |
785 | (any other incoming edge counts are for a different jump threading |
786 | path that will be handled by a later call to this routine.) |
787 | To make this easier, start by recording all incoming edges that flow into |
788 | the current path in a bitmap. We could add up the path's incoming edge |
789 | counts here, but we still need to walk all the first bb's incoming edges |
790 | below to add up the counts of the other edges not included in this jump |
791 | threading path. */ |
792 | struct el *next, *el; |
793 | auto_bitmap in_edge_srcs; |
794 | for (el = rd->incoming_edges; el; el = next) |
795 | { |
796 | next = el->next; |
797 | bitmap_set_bit (in_edge_srcs, el->e->src->index); |
798 | } |
799 | edge ein; |
800 | edge_iterator ei; |
801 | FOR_EACH_EDGE (ein, ei, e->dest->preds) |
802 | { |
803 | vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein); |
804 | /* Simply check the incoming edge src against the set captured above. */ |
805 | if (ein_path |
806 | && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index)) |
807 | { |
808 | /* It is necessary but not sufficient that the last path edges |
809 | are identical. There may be different paths that share the |
810 | same last path edge in the case where the last edge has a nocopy |
811 | source block. */ |
812 | gcc_assert (ein_path->last ()->e == elast); |
813 | path_in_count += ein->count (); |
814 | } |
815 | else if (!ein_path) |
816 | { |
817 | /* Keep track of the incoming edges that are not on any jump-threading |
818 | path. These counts will still flow out of original path after all |
819 | jump threading is complete. */ |
820 | nonpath_count += ein->count (); |
821 | } |
822 | } |
823 | |
824 | /* Now compute the fraction of the total count coming into the first |
825 | path bb that is from the current threading path. */ |
826 | profile_count total_count = e->dest->count; |
827 | /* Handle incoming profile insanities. */ |
828 | if (total_count < path_in_count) |
829 | path_in_count = total_count; |
830 | profile_probability onpath_scale = path_in_count.probability_in (overall: total_count); |
831 | |
832 | /* Walk the entire path to do some more computation in order to estimate |
833 | how much of the path_in_count will flow out of the duplicated threading |
834 | path. In the non-joiner case this is straightforward (it should be |
835 | the same as path_in_count, although we will handle incoming profile |
836 | insanities by setting it equal to the minimum count along the path). |
837 | |
838 | In the joiner case, we need to estimate how much of the path_in_count |
839 | will stay on the threading path after the joiner's conditional branch. |
840 | We don't really know for sure how much of the counts |
841 | associated with this path go to each successor of the joiner, but we'll |
842 | estimate based on the fraction of the total count coming into the path |
843 | bb was from the threading paths (computed above in onpath_scale). |
844 | Afterwards, we will need to do some fixup to account for other threading |
845 | paths and possible profile insanities. |
846 | |
847 | In order to estimate the joiner case's counts we also need to update |
848 | nonpath_count with any additional counts coming into the path. Other |
849 | blocks along the path may have additional predecessors from outside |
850 | the path. */ |
851 | profile_count path_out_count = path_in_count; |
852 | profile_count min_path_count = path_in_count; |
853 | for (unsigned int i = 1; i < path->length (); i++) |
854 | { |
855 | edge epath = (*path)[i]->e; |
856 | profile_count cur_count = epath->count (); |
857 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
858 | { |
859 | has_joiner = true; |
860 | cur_count = cur_count.apply_probability (prob: onpath_scale); |
861 | } |
862 | /* In the joiner case we need to update nonpath_count for any edges |
863 | coming into the path that will contribute to the count flowing |
864 | into the path successor. */ |
865 | if (has_joiner && epath != elast) |
866 | { |
867 | /* Look for other incoming edges after joiner. */ |
868 | FOR_EACH_EDGE (ein, ei, epath->dest->preds) |
869 | { |
870 | if (ein != epath |
871 | /* Ignore in edges from blocks we have duplicated for a |
872 | threading path, which have duplicated edge counts until |
873 | they are redirected by an invocation of this routine. */ |
874 | && !bitmap_bit_p (local_info->duplicate_blocks, |
875 | ein->src->index)) |
876 | nonpath_count += ein->count (); |
877 | } |
878 | } |
879 | if (cur_count < path_out_count) |
880 | path_out_count = cur_count; |
881 | if (epath->count () < min_path_count) |
882 | min_path_count = epath->count (); |
883 | } |
884 | |
885 | /* We computed path_out_count above assuming that this path targeted |
886 | the joiner's on-path successor with the same likelihood as it |
887 | reached the joiner. However, other thread paths through the joiner |
888 | may take a different path through the normal copy source block |
889 | (i.e. they have a different elast), meaning that they do not |
890 | contribute any counts to this path's elast. As a result, it may |
891 | turn out that this path must have more count flowing to the on-path |
892 | successor of the joiner. Essentially, all of this path's elast |
893 | count must be contributed by this path and any nonpath counts |
894 | (since any path through the joiner with a different elast will not |
895 | include a copy of this elast in its duplicated path). |
896 | So ensure that this path's path_out_count is at least the |
897 | difference between elast->count () and nonpath_count. Otherwise the edge |
898 | counts after threading will not be sane. */ |
899 | if (local_info->need_profile_correction |
900 | && has_joiner && path_out_count < elast->count () - nonpath_count) |
901 | { |
902 | path_out_count = elast->count () - nonpath_count; |
903 | /* But neither can we go above the minimum count along the path |
904 | we are duplicating. This can be an issue due to profile |
905 | insanities coming in to this pass. */ |
906 | if (path_out_count > min_path_count) |
907 | path_out_count = min_path_count; |
908 | } |
909 | |
910 | *path_in_count_ptr = path_in_count; |
911 | *path_out_count_ptr = path_out_count; |
912 | return has_joiner; |
913 | } |
914 | |
915 | |
916 | /* Update the counts and frequencies for both an original path |
917 | edge EPATH and its duplicate EDUP. The duplicate source block |
918 | will get a count of PATH_IN_COUNT and PATH_IN_FREQ, |
919 | and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */ |
920 | static void |
921 | update_profile (edge epath, edge edup, profile_count path_in_count, |
922 | profile_count path_out_count) |
923 | { |
924 | |
925 | /* First update the duplicated block's count. */ |
926 | if (edup) |
927 | { |
928 | basic_block dup_block = edup->src; |
929 | |
930 | /* Edup's count is reduced by path_out_count. We need to redistribute |
931 | probabilities to the remaining edges. */ |
932 | |
933 | edge esucc; |
934 | edge_iterator ei; |
935 | profile_probability edup_prob |
936 | = path_out_count.probability_in (overall: path_in_count); |
937 | |
938 | /* Either scale up or down the remaining edges. |
939 | probabilities are always in range <0,1> and thus we can't do |
940 | both by same loop. */ |
941 | if (edup->probability > edup_prob) |
942 | { |
943 | profile_probability rev_scale |
944 | = (profile_probability::always () - edup->probability) |
945 | / (profile_probability::always () - edup_prob); |
946 | FOR_EACH_EDGE (esucc, ei, dup_block->succs) |
947 | if (esucc != edup) |
948 | esucc->probability /= rev_scale; |
949 | } |
950 | else if (edup->probability < edup_prob) |
951 | { |
952 | profile_probability scale |
953 | = (profile_probability::always () - edup_prob) |
954 | / (profile_probability::always () - edup->probability); |
955 | FOR_EACH_EDGE (esucc, ei, dup_block->succs) |
956 | if (esucc != edup) |
957 | esucc->probability *= scale; |
958 | } |
959 | if (edup_prob.initialized_p ()) |
960 | edup->probability = edup_prob; |
961 | |
962 | gcc_assert (!dup_block->count.initialized_p ()); |
963 | dup_block->count = path_in_count; |
964 | } |
965 | |
966 | if (path_in_count == profile_count::zero ()) |
967 | return; |
968 | |
969 | profile_count final_count = epath->count () - path_out_count; |
970 | |
971 | /* Now update the original block's count in the |
972 | opposite manner - remove the counts/freq that will flow |
973 | into the duplicated block. Handle underflow due to precision/ |
974 | rounding issues. */ |
975 | epath->src->count -= path_in_count; |
976 | |
977 | /* Next update this path edge's original and duplicated counts. We know |
978 | that the duplicated path will have path_out_count flowing |
979 | out of it (in the joiner case this is the count along the duplicated path |
980 | out of the duplicated joiner). This count can then be removed from the |
981 | original path edge. */ |
982 | |
983 | edge esucc; |
984 | edge_iterator ei; |
985 | profile_probability epath_prob = final_count.probability_in (overall: epath->src->count); |
986 | |
987 | if (epath->probability > epath_prob) |
988 | { |
989 | profile_probability rev_scale |
990 | = (profile_probability::always () - epath->probability) |
991 | / (profile_probability::always () - epath_prob); |
992 | FOR_EACH_EDGE (esucc, ei, epath->src->succs) |
993 | if (esucc != epath) |
994 | esucc->probability /= rev_scale; |
995 | } |
996 | else if (epath->probability < epath_prob) |
997 | { |
998 | profile_probability scale |
999 | = (profile_probability::always () - epath_prob) |
1000 | / (profile_probability::always () - epath->probability); |
1001 | FOR_EACH_EDGE (esucc, ei, epath->src->succs) |
1002 | if (esucc != epath) |
1003 | esucc->probability *= scale; |
1004 | } |
1005 | if (epath_prob.initialized_p ()) |
1006 | epath->probability = epath_prob; |
1007 | } |
1008 | |
1009 | /* Wire up the outgoing edges from the duplicate blocks and |
1010 | update any PHIs as needed. Also update the profile counts |
1011 | on the original and duplicate blocks and edges. */ |
1012 | void |
1013 | ssa_fix_duplicate_block_edges (struct redirection_data *rd, |
1014 | ssa_local_info_t *local_info) |
1015 | { |
1016 | bool multi_incomings = (rd->incoming_edges->next != NULL); |
1017 | edge e = rd->incoming_edges->e; |
1018 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1019 | edge elast = path->last ()->e; |
1020 | profile_count path_in_count = profile_count::zero (); |
1021 | profile_count path_out_count = profile_count::zero (); |
1022 | |
1023 | /* First determine how much profile count to move from original |
1024 | path to the duplicate path. This is tricky in the presence of |
1025 | a joiner (see comments for compute_path_counts), where some portion |
1026 | of the path's counts will flow off-path from the joiner. In the |
1027 | non-joiner case the path_in_count and path_out_count should be the |
1028 | same. */ |
1029 | bool has_joiner = compute_path_counts (rd, local_info, |
1030 | path_in_count_ptr: &path_in_count, path_out_count_ptr: &path_out_count); |
1031 | |
1032 | for (unsigned int count = 0, i = 1; i < path->length (); i++) |
1033 | { |
1034 | edge epath = (*path)[i]->e; |
1035 | |
1036 | /* If we were threading through an joiner block, then we want |
1037 | to keep its control statement and redirect an outgoing edge. |
1038 | Else we want to remove the control statement & edges, then create |
1039 | a new outgoing edge. In both cases we may need to update PHIs. */ |
1040 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1041 | { |
1042 | edge victim; |
1043 | edge e2; |
1044 | |
1045 | gcc_assert (has_joiner); |
1046 | |
1047 | /* This updates the PHIs at the destination of the duplicate |
1048 | block. Pass 0 instead of i if we are threading a path which |
1049 | has multiple incoming edges. */ |
1050 | update_destination_phis (orig_bb: local_info->bb, new_bb: rd->dup_blocks[count], |
1051 | path, idx: multi_incomings ? 0 : i); |
1052 | |
1053 | /* Find the edge from the duplicate block to the block we're |
1054 | threading through. That's the edge we want to redirect. */ |
1055 | victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest); |
1056 | |
1057 | /* If there are no remaining blocks on the path to duplicate, |
1058 | then redirect VICTIM to the final destination of the jump |
1059 | threading path. */ |
1060 | if (!any_remaining_duplicated_blocks (path, start: i)) |
1061 | { |
1062 | if (victim->dest != elast->dest) |
1063 | { |
1064 | e2 = redirect_edge_and_branch (victim, elast->dest); |
1065 | /* If we redirected the edge, then we need to copy PHI arguments |
1066 | at the target. If the edge already existed (e2 != victim |
1067 | case), then the PHIs in the target already have the correct |
1068 | arguments. */ |
1069 | if (e2 == victim) |
1070 | copy_phi_args (bb: e2->dest, src_e: elast, tgt_e: e2, |
1071 | path, idx: multi_incomings ? 0 : i); |
1072 | } |
1073 | else |
1074 | e2 = victim; |
1075 | } |
1076 | else |
1077 | { |
1078 | /* Redirect VICTIM to the next duplicated block in the path. */ |
1079 | e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]); |
1080 | |
1081 | /* We need to update the PHIs in the next duplicated block. We |
1082 | want the new PHI args to have the same value as they had |
1083 | in the source of the next duplicate block. |
1084 | |
1085 | Thus, we need to know which edge we traversed into the |
1086 | source of the duplicate. Furthermore, we may have |
1087 | traversed many edges to reach the source of the duplicate. |
1088 | |
1089 | Walk through the path starting at element I until we |
1090 | hit an edge marked with EDGE_COPY_SRC_BLOCK. We want |
1091 | the edge from the prior element. */ |
1092 | for (unsigned int j = i + 1; j < path->length (); j++) |
1093 | { |
1094 | if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK) |
1095 | { |
1096 | copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2); |
1097 | break; |
1098 | } |
1099 | } |
1100 | } |
1101 | |
1102 | /* Update the counts of both the original block |
1103 | and path edge, and the duplicates. The path duplicate's |
1104 | incoming count are the totals for all edges |
1105 | incoming to this jump threading path computed earlier. |
1106 | And we know that the duplicated path will have path_out_count |
1107 | flowing out of it (i.e. along the duplicated path out of the |
1108 | duplicated joiner). */ |
1109 | update_profile (epath, edup: e2, path_in_count, path_out_count); |
1110 | } |
1111 | else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) |
1112 | { |
1113 | remove_ctrl_stmt_and_useless_edges (bb: rd->dup_blocks[count], NULL); |
1114 | create_edge_and_update_destination_phis (rd, bb: rd->dup_blocks[count], |
1115 | idx: multi_incomings ? 0 : i); |
1116 | if (count == 1) |
1117 | single_succ_edge (bb: rd->dup_blocks[1])->aux = NULL; |
1118 | |
1119 | /* Update the counts of both the original block |
1120 | and path edge, and the duplicates. Since we are now after |
1121 | any joiner that may have existed on the path, the count |
1122 | flowing along the duplicated threaded path is path_out_count. |
1123 | If we didn't have a joiner, then cur_path_freq was the sum |
1124 | of the total frequencies along all incoming edges to the |
1125 | thread path (path_in_freq). If we had a joiner, it would have |
1126 | been updated at the end of that handling to the edge frequency |
1127 | along the duplicated joiner path edge. */ |
1128 | update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0), |
1129 | path_in_count: path_out_count, path_out_count); |
1130 | } |
1131 | else |
1132 | { |
1133 | /* No copy case. In this case we don't have an equivalent block |
1134 | on the duplicated thread path to update, but we do need |
1135 | to remove the portion of the counts/freqs that were moved |
1136 | to the duplicated path from the counts/freqs flowing through |
1137 | this block on the original path. Since all the no-copy edges |
1138 | are after any joiner, the removed count is the same as |
1139 | path_out_count. |
1140 | |
1141 | If we didn't have a joiner, then cur_path_freq was the sum |
1142 | of the total frequencies along all incoming edges to the |
1143 | thread path (path_in_freq). If we had a joiner, it would have |
1144 | been updated at the end of that handling to the edge frequency |
1145 | along the duplicated joiner path edge. */ |
1146 | update_profile (epath, NULL, path_in_count: path_out_count, path_out_count); |
1147 | } |
1148 | |
1149 | /* Increment the index into the duplicated path when we processed |
1150 | a duplicated block. */ |
1151 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK |
1152 | || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) |
1153 | { |
1154 | count++; |
1155 | } |
1156 | } |
1157 | } |
1158 | |
1159 | /* Hash table traversal callback routine to create duplicate blocks. */ |
1160 | |
1161 | int |
1162 | ssa_create_duplicates (struct redirection_data **slot, |
1163 | ssa_local_info_t *local_info) |
1164 | { |
1165 | struct redirection_data *rd = *slot; |
1166 | |
1167 | /* The second duplicated block in a jump threading path is specific |
1168 | to the path. So it gets stored in RD rather than in LOCAL_DATA. |
1169 | |
1170 | Each time we're called, we have to look through the path and see |
1171 | if a second block needs to be duplicated. |
1172 | |
1173 | Note the search starts with the third edge on the path. The first |
1174 | edge is the incoming edge, the second edge always has its source |
1175 | duplicated. Thus we start our search with the third edge. */ |
1176 | vec<jump_thread_edge *> *path = rd->path; |
1177 | for (unsigned int i = 2; i < path->length (); i++) |
1178 | { |
1179 | if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK |
1180 | || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1181 | { |
1182 | create_block_for_threading (bb: (*path)[i]->e->src, rd, count: 1, |
1183 | duplicate_blocks: &local_info->duplicate_blocks); |
1184 | break; |
1185 | } |
1186 | } |
1187 | |
1188 | /* Create a template block if we have not done so already. Otherwise |
1189 | use the template to create a new block. */ |
1190 | if (local_info->template_block == NULL) |
1191 | { |
1192 | create_block_for_threading (bb: (*path)[1]->e->src, rd, count: 0, |
1193 | duplicate_blocks: &local_info->duplicate_blocks); |
1194 | local_info->template_block = rd->dup_blocks[0]; |
1195 | local_info->template_last_to_copy |
1196 | = gsi_last_bb (bb: local_info->template_block); |
1197 | |
1198 | /* We do not create any outgoing edges for the template. We will |
1199 | take care of that in a later traversal. That way we do not |
1200 | create edges that are going to just be deleted. */ |
1201 | } |
1202 | else |
1203 | { |
1204 | gimple_seq seq = NULL; |
1205 | if (gsi_stmt (i: local_info->template_last_to_copy) |
1206 | != gsi_stmt (i: gsi_last_bb (bb: local_info->template_block))) |
1207 | { |
1208 | if (gsi_end_p (i: local_info->template_last_to_copy)) |
1209 | { |
1210 | seq = bb_seq (bb: local_info->template_block); |
1211 | set_bb_seq (bb: local_info->template_block, NULL); |
1212 | } |
1213 | else |
1214 | seq = gsi_split_seq_after (local_info->template_last_to_copy); |
1215 | } |
1216 | create_block_for_threading (bb: local_info->template_block, rd, count: 0, |
1217 | duplicate_blocks: &local_info->duplicate_blocks); |
1218 | if (seq) |
1219 | { |
1220 | if (gsi_end_p (i: local_info->template_last_to_copy)) |
1221 | set_bb_seq (bb: local_info->template_block, seq); |
1222 | else |
1223 | gsi_insert_seq_after (&local_info->template_last_to_copy, |
1224 | seq, GSI_SAME_STMT); |
1225 | } |
1226 | |
1227 | /* Go ahead and wire up outgoing edges and update PHIs for the duplicate |
1228 | block. */ |
1229 | ssa_fix_duplicate_block_edges (rd, local_info); |
1230 | } |
1231 | |
1232 | if (MAY_HAVE_DEBUG_STMTS) |
1233 | { |
1234 | /* Copy debug stmts from each NO_COPY src block to the block |
1235 | that would have been its predecessor, if we can append to it |
1236 | (we can't add stmts after a block-ending stmt), or prepending |
1237 | to the duplicate of the successor, if there is one. If |
1238 | there's no duplicate successor, we'll mostly drop the blocks |
1239 | on the floor; propagate_threaded_block_debug_into, called |
1240 | elsewhere, will consolidate and preserve the effects of the |
1241 | binds, but none of the markers. */ |
1242 | gimple_stmt_iterator copy_to = gsi_last_bb (bb: rd->dup_blocks[0]); |
1243 | if (!gsi_end_p (i: copy_to)) |
1244 | { |
1245 | if (stmt_ends_bb_p (gsi_stmt (i: copy_to))) |
1246 | { |
1247 | if (rd->dup_blocks[1]) |
1248 | copy_to = gsi_after_labels (bb: rd->dup_blocks[1]); |
1249 | else |
1250 | copy_to = gsi_none (); |
1251 | } |
1252 | else |
1253 | gsi_next (i: ©_to); |
1254 | } |
1255 | for (unsigned int i = 2, j = 0; i < path->length (); i++) |
1256 | if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK |
1257 | && gsi_bb (i: copy_to)) |
1258 | { |
1259 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb: (*path)[i]->e->src); |
1260 | !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1261 | { |
1262 | if (!is_gimple_debug (gs: gsi_stmt (i: gsi))) |
1263 | continue; |
1264 | gimple *stmt = gsi_stmt (i: gsi); |
1265 | gimple *copy = gimple_copy (stmt); |
1266 | gsi_insert_before (©_to, copy, GSI_SAME_STMT); |
1267 | } |
1268 | } |
1269 | else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK |
1270 | || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1271 | { |
1272 | j++; |
1273 | gcc_assert (j < 2); |
1274 | copy_to = gsi_last_bb (bb: rd->dup_blocks[j]); |
1275 | if (!gsi_end_p (i: copy_to)) |
1276 | { |
1277 | if (stmt_ends_bb_p (gsi_stmt (i: copy_to))) |
1278 | copy_to = gsi_none (); |
1279 | else |
1280 | gsi_next (i: ©_to); |
1281 | } |
1282 | } |
1283 | } |
1284 | |
1285 | /* Keep walking the hash table. */ |
1286 | return 1; |
1287 | } |
1288 | |
1289 | /* We did not create any outgoing edges for the template block during |
1290 | block creation. This hash table traversal callback creates the |
1291 | outgoing edge for the template block. */ |
1292 | |
1293 | inline int |
1294 | ssa_fixup_template_block (struct redirection_data **slot, |
1295 | ssa_local_info_t *local_info) |
1296 | { |
1297 | struct redirection_data *rd = *slot; |
1298 | |
1299 | /* If this is the template block halt the traversal after updating |
1300 | it appropriately. |
1301 | |
1302 | If we were threading through an joiner block, then we want |
1303 | to keep its control statement and redirect an outgoing edge. |
1304 | Else we want to remove the control statement & edges, then create |
1305 | a new outgoing edge. In both cases we may need to update PHIs. */ |
1306 | if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block) |
1307 | { |
1308 | ssa_fix_duplicate_block_edges (rd, local_info); |
1309 | return 0; |
1310 | } |
1311 | |
1312 | return 1; |
1313 | } |
1314 | |
1315 | /* Hash table traversal callback to redirect each incoming edge |
1316 | associated with this hash table element to its new destination. */ |
1317 | |
1318 | static int |
1319 | ssa_redirect_edges (struct redirection_data **slot, |
1320 | ssa_local_info_t *local_info) |
1321 | { |
1322 | struct redirection_data *rd = *slot; |
1323 | struct el *next, *el; |
1324 | |
1325 | /* Walk over all the incoming edges associated with this hash table |
1326 | entry. */ |
1327 | for (el = rd->incoming_edges; el; el = next) |
1328 | { |
1329 | edge e = el->e; |
1330 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1331 | |
1332 | /* Go ahead and free this element from the list. Doing this now |
1333 | avoids the need for another list walk when we destroy the hash |
1334 | table. */ |
1335 | next = el->next; |
1336 | free (ptr: el); |
1337 | |
1338 | local_info->num_threaded_edges++; |
1339 | |
1340 | if (rd->dup_blocks[0]) |
1341 | { |
1342 | edge e2; |
1343 | |
1344 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1345 | fprintf (stream: dump_file, format: " Threaded jump %d --> %d to %d\n" , |
1346 | e->src->index, e->dest->index, rd->dup_blocks[0]->index); |
1347 | |
1348 | /* Redirect the incoming edge (possibly to the joiner block) to the |
1349 | appropriate duplicate block. */ |
1350 | e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]); |
1351 | gcc_assert (e == e2); |
1352 | flush_pending_stmts (e2); |
1353 | } |
1354 | |
1355 | /* Go ahead and clear E->aux. It's not needed anymore and failure |
1356 | to clear it will cause all kinds of unpleasant problems later. */ |
1357 | path->release (); |
1358 | e->aux = NULL; |
1359 | |
1360 | } |
1361 | |
1362 | /* Indicate that we actually threaded one or more jumps. */ |
1363 | if (rd->incoming_edges) |
1364 | local_info->jumps_threaded = true; |
1365 | |
1366 | return 1; |
1367 | } |
1368 | |
1369 | /* Return true if this block has no executable statements other than |
1370 | a simple ctrl flow instruction. When the number of outgoing edges |
1371 | is one, this is equivalent to a "forwarder" block. */ |
1372 | |
1373 | static bool |
1374 | redirection_block_p (basic_block bb) |
1375 | { |
1376 | gimple_stmt_iterator gsi; |
1377 | |
1378 | /* Advance to the first executable statement. */ |
1379 | gsi = gsi_start_bb (bb); |
1380 | while (!gsi_end_p (i: gsi) |
1381 | && (gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_LABEL |
1382 | || is_gimple_debug (gs: gsi_stmt (i: gsi)) |
1383 | || gimple_nop_p (g: gsi_stmt (i: gsi)) |
1384 | || gimple_clobber_p (s: gsi_stmt (i: gsi)))) |
1385 | gsi_next (i: &gsi); |
1386 | |
1387 | /* Check if this is an empty block. */ |
1388 | if (gsi_end_p (i: gsi)) |
1389 | return true; |
1390 | |
1391 | /* Test that we've reached the terminating control statement. */ |
1392 | return gsi_stmt (i: gsi) |
1393 | && (gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_COND |
1394 | || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_GOTO |
1395 | || gimple_code (g: gsi_stmt (i: gsi)) == GIMPLE_SWITCH); |
1396 | } |
1397 | |
1398 | /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB |
1399 | is reached via one or more specific incoming edges, we know which |
1400 | outgoing edge from BB will be traversed. |
1401 | |
1402 | We want to redirect those incoming edges to the target of the |
1403 | appropriate outgoing edge. Doing so avoids a conditional branch |
1404 | and may expose new optimization opportunities. Note that we have |
1405 | to update dominator tree and SSA graph after such changes. |
1406 | |
1407 | The key to keeping the SSA graph update manageable is to duplicate |
1408 | the side effects occurring in BB so that those side effects still |
1409 | occur on the paths which bypass BB after redirecting edges. |
1410 | |
1411 | We accomplish this by creating duplicates of BB and arranging for |
1412 | the duplicates to unconditionally pass control to one specific |
1413 | successor of BB. We then revector the incoming edges into BB to |
1414 | the appropriate duplicate of BB. |
1415 | |
1416 | If NOLOOP_ONLY is true, we only perform the threading as long as it |
1417 | does not affect the structure of the loops in a nontrivial way. |
1418 | |
1419 | If JOINERS is true, then thread through joiner blocks as well. */ |
1420 | |
1421 | bool |
1422 | fwd_jt_path_registry::thread_block_1 (basic_block bb, |
1423 | bool noloop_only, |
1424 | bool joiners) |
1425 | { |
1426 | /* E is an incoming edge into BB that we may or may not want to |
1427 | redirect to a duplicate of BB. */ |
1428 | edge e, e2; |
1429 | edge_iterator ei; |
1430 | ssa_local_info_t local_info; |
1431 | |
1432 | local_info.duplicate_blocks = BITMAP_ALLOC (NULL); |
1433 | local_info.need_profile_correction = false; |
1434 | local_info.num_threaded_edges = 0; |
1435 | |
1436 | /* To avoid scanning a linear array for the element we need we instead |
1437 | use a hash table. For normal code there should be no noticeable |
1438 | difference. However, if we have a block with a large number of |
1439 | incoming and outgoing edges such linear searches can get expensive. */ |
1440 | m_redirection_data |
1441 | = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs)); |
1442 | |
1443 | /* Record each unique threaded destination into a hash table for |
1444 | efficient lookups. */ |
1445 | edge last = NULL; |
1446 | FOR_EACH_EDGE (e, ei, bb->preds) |
1447 | { |
1448 | if (e->aux == NULL) |
1449 | continue; |
1450 | |
1451 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1452 | |
1453 | if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners) |
1454 | || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners)) |
1455 | continue; |
1456 | |
1457 | /* When a NO_COPY_SRC block became non-empty cancel the path. */ |
1458 | if (path->last ()->type == EDGE_NO_COPY_SRC_BLOCK) |
1459 | { |
1460 | auto gsi = gsi_start_nondebug_bb (bb: path->last ()->e->src); |
1461 | if (!gsi_end_p (i: gsi) |
1462 | && !is_ctrl_stmt (gsi_stmt (i: gsi))) |
1463 | { |
1464 | cancel_thread (path, reason: "Non-empty EDGE_NO_COPY_SRC_BLOCK" ); |
1465 | e->aux = NULL; |
1466 | continue; |
1467 | } |
1468 | } |
1469 | |
1470 | e2 = path->last ()->e; |
1471 | if (!e2 || noloop_only) |
1472 | { |
1473 | /* If NOLOOP_ONLY is true, we only allow threading through the |
1474 | header of a loop to exit edges. */ |
1475 | |
1476 | /* One case occurs when there was loop header buried in a jump |
1477 | threading path that crosses loop boundaries. We do not try |
1478 | and thread this elsewhere, so just cancel the jump threading |
1479 | request by clearing the AUX field now. */ |
1480 | if (bb->loop_father != e2->src->loop_father |
1481 | && (!loop_exit_edge_p (e2->src->loop_father, e2) |
1482 | || flow_loop_nested_p (bb->loop_father, |
1483 | e2->dest->loop_father))) |
1484 | { |
1485 | /* Since this case is not handled by our special code |
1486 | to thread through a loop header, we must explicitly |
1487 | cancel the threading request here. */ |
1488 | cancel_thread (path, reason: "Threading through unhandled loop header" ); |
1489 | e->aux = NULL; |
1490 | continue; |
1491 | } |
1492 | |
1493 | /* Another case occurs when trying to thread through our |
1494 | own loop header, possibly from inside the loop. We will |
1495 | thread these later. */ |
1496 | unsigned int i; |
1497 | for (i = 1; i < path->length (); i++) |
1498 | { |
1499 | if ((*path)[i]->e->src == bb->loop_father->header |
1500 | && (!loop_exit_edge_p (bb->loop_father, e2) |
1501 | || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)) |
1502 | break; |
1503 | } |
1504 | |
1505 | if (i != path->length ()) |
1506 | continue; |
1507 | |
1508 | /* Loop parallelization can be confused by the result of |
1509 | threading through the loop exit test back into the loop. |
1510 | However, theading those jumps seems to help other codes. |
1511 | |
1512 | I have been unable to find anything related to the shape of |
1513 | the CFG, the contents of the affected blocks, etc which would |
1514 | allow a more sensible test than what we're using below which |
1515 | merely avoids the optimization when parallelizing loops. */ |
1516 | if (flag_tree_parallelize_loops > 1) |
1517 | { |
1518 | for (i = 1; i < path->length (); i++) |
1519 | if (bb->loop_father == e2->src->loop_father |
1520 | && loop_exits_from_bb_p (bb->loop_father, |
1521 | (*path)[i]->e->src) |
1522 | && !loop_exit_edge_p (bb->loop_father, e2)) |
1523 | break; |
1524 | |
1525 | if (i != path->length ()) |
1526 | { |
1527 | cancel_thread (path, reason: "Threading through loop exit" ); |
1528 | e->aux = NULL; |
1529 | continue; |
1530 | } |
1531 | } |
1532 | } |
1533 | |
1534 | /* Insert the outgoing edge into the hash table if it is not |
1535 | already in the hash table. */ |
1536 | lookup_redirection_data (e, insert: INSERT); |
1537 | |
1538 | /* When we have thread paths through a common joiner with different |
1539 | final destinations, then we may need corrections to deal with |
1540 | profile insanities. See the big comment before compute_path_counts. */ |
1541 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1542 | { |
1543 | if (!last) |
1544 | last = e2; |
1545 | else if (e2 != last) |
1546 | local_info.need_profile_correction = true; |
1547 | } |
1548 | } |
1549 | |
1550 | /* We do not update dominance info. */ |
1551 | free_dominance_info (CDI_DOMINATORS); |
1552 | |
1553 | /* We know we only thread through the loop header to loop exits. |
1554 | Let the basic block duplication hook know we are not creating |
1555 | a multiple entry loop. */ |
1556 | if (noloop_only |
1557 | && bb == bb->loop_father->header) |
1558 | set_loop_copy (bb->loop_father, loop_outer (loop: bb->loop_father)); |
1559 | |
1560 | /* Now create duplicates of BB. |
1561 | |
1562 | Note that for a block with a high outgoing degree we can waste |
1563 | a lot of time and memory creating and destroying useless edges. |
1564 | |
1565 | So we first duplicate BB and remove the control structure at the |
1566 | tail of the duplicate as well as all outgoing edges from the |
1567 | duplicate. We then use that duplicate block as a template for |
1568 | the rest of the duplicates. */ |
1569 | local_info.template_block = NULL; |
1570 | local_info.bb = bb; |
1571 | local_info.jumps_threaded = false; |
1572 | m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates> |
1573 | (argument: &local_info); |
1574 | |
1575 | /* The template does not have an outgoing edge. Create that outgoing |
1576 | edge and update PHI nodes as the edge's target as necessary. |
1577 | |
1578 | We do this after creating all the duplicates to avoid creating |
1579 | unnecessary edges. */ |
1580 | m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block> |
1581 | (argument: &local_info); |
1582 | |
1583 | /* The hash table traversals above created the duplicate blocks (and the |
1584 | statements within the duplicate blocks). This loop creates PHI nodes for |
1585 | the duplicated blocks and redirects the incoming edges into BB to reach |
1586 | the duplicates of BB. */ |
1587 | m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges> |
1588 | (argument: &local_info); |
1589 | |
1590 | /* Done with this block. Clear REDIRECTION_DATA. */ |
1591 | delete m_redirection_data; |
1592 | m_redirection_data = NULL; |
1593 | |
1594 | if (noloop_only |
1595 | && bb == bb->loop_father->header) |
1596 | set_loop_copy (bb->loop_father, NULL); |
1597 | |
1598 | BITMAP_FREE (local_info.duplicate_blocks); |
1599 | local_info.duplicate_blocks = NULL; |
1600 | |
1601 | m_num_threaded_edges += local_info.num_threaded_edges; |
1602 | |
1603 | /* Indicate to our caller whether or not any jumps were threaded. */ |
1604 | return local_info.jumps_threaded; |
1605 | } |
1606 | |
1607 | /* Wrapper for thread_block_1 so that we can first handle jump |
1608 | thread paths which do not involve copying joiner blocks, then |
1609 | handle jump thread paths which have joiner blocks. |
1610 | |
1611 | By doing things this way we can be as aggressive as possible and |
1612 | not worry that copying a joiner block will create a jump threading |
1613 | opportunity. */ |
1614 | |
1615 | bool |
1616 | fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only) |
1617 | { |
1618 | bool retval; |
1619 | retval = thread_block_1 (bb, noloop_only, joiners: false); |
1620 | retval |= thread_block_1 (bb, noloop_only, joiners: true); |
1621 | return retval; |
1622 | } |
1623 | |
1624 | /* Callback for dfs_enumerate_from. Returns true if BB is different |
1625 | from STOP and DBDS_CE_STOP. */ |
1626 | |
1627 | static basic_block dbds_ce_stop; |
1628 | static bool |
1629 | dbds_continue_enumeration_p (const_basic_block bb, const void *stop) |
1630 | { |
1631 | return (bb != (const_basic_block) stop |
1632 | && bb != dbds_ce_stop); |
1633 | } |
1634 | |
1635 | /* Evaluates the dominance relationship of latch of the LOOP and BB, and |
1636 | returns the state. */ |
1637 | |
1638 | enum bb_dom_status |
1639 | determine_bb_domination_status (class loop *loop, basic_block bb) |
1640 | { |
1641 | basic_block *bblocks; |
1642 | unsigned nblocks, i; |
1643 | bool bb_reachable = false; |
1644 | edge_iterator ei; |
1645 | edge e; |
1646 | |
1647 | /* This function assumes BB is a successor of LOOP->header. |
1648 | If that is not the case return DOMST_NONDOMINATING which |
1649 | is always safe. */ |
1650 | { |
1651 | bool ok = false; |
1652 | |
1653 | FOR_EACH_EDGE (e, ei, bb->preds) |
1654 | { |
1655 | if (e->src == loop->header) |
1656 | { |
1657 | ok = true; |
1658 | break; |
1659 | } |
1660 | } |
1661 | |
1662 | if (!ok) |
1663 | return DOMST_NONDOMINATING; |
1664 | } |
1665 | |
1666 | if (bb == loop->latch) |
1667 | return DOMST_DOMINATING; |
1668 | |
1669 | /* Check that BB dominates LOOP->latch, and that it is back-reachable |
1670 | from it. */ |
1671 | |
1672 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); |
1673 | dbds_ce_stop = loop->header; |
1674 | nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, |
1675 | bblocks, loop->num_nodes, bb); |
1676 | for (i = 0; i < nblocks; i++) |
1677 | FOR_EACH_EDGE (e, ei, bblocks[i]->preds) |
1678 | { |
1679 | if (e->src == loop->header) |
1680 | { |
1681 | free (ptr: bblocks); |
1682 | return DOMST_NONDOMINATING; |
1683 | } |
1684 | if (e->src == bb) |
1685 | bb_reachable = true; |
1686 | } |
1687 | |
1688 | free (ptr: bblocks); |
1689 | return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); |
1690 | } |
1691 | |
1692 | /* Thread jumps through the header of LOOP. Returns true if cfg changes. |
1693 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges |
1694 | to the inside of the loop. */ |
1695 | |
1696 | bool |
1697 | fwd_jt_path_registry:: (class loop *loop, |
1698 | bool ) |
1699 | { |
1700 | basic_block = loop->header; |
1701 | edge e, tgt_edge, latch = loop_latch_edge (loop); |
1702 | edge_iterator ei; |
1703 | basic_block tgt_bb, atgt_bb; |
1704 | enum bb_dom_status domst; |
1705 | |
1706 | /* We have already threaded through headers to exits, so all the threading |
1707 | requests now are to the inside of the loop. We need to avoid creating |
1708 | irreducible regions (i.e., loops with more than one entry block), and |
1709 | also loop with several latch edges, or new subloops of the loop (although |
1710 | there are cases where it might be appropriate, it is difficult to decide, |
1711 | and doing it wrongly may confuse other optimizers). |
1712 | |
1713 | We could handle more general cases here. However, the intention is to |
1714 | preserve some information about the loop, which is impossible if its |
1715 | structure changes significantly, in a way that is not well understood. |
1716 | Thus we only handle few important special cases, in which also updating |
1717 | of the loop-carried information should be feasible: |
1718 | |
1719 | 1) Propagation of latch edge to a block that dominates the latch block |
1720 | of a loop. This aims to handle the following idiom: |
1721 | |
1722 | first = 1; |
1723 | while (1) |
1724 | { |
1725 | if (first) |
1726 | initialize; |
1727 | first = 0; |
1728 | body; |
1729 | } |
1730 | |
1731 | After threading the latch edge, this becomes |
1732 | |
1733 | first = 1; |
1734 | if (first) |
1735 | initialize; |
1736 | while (1) |
1737 | { |
1738 | first = 0; |
1739 | body; |
1740 | } |
1741 | |
1742 | The original header of the loop is moved out of it, and we may thread |
1743 | the remaining edges through it without further constraints. |
1744 | |
1745 | 2) All entry edges are propagated to a single basic block that dominates |
1746 | the latch block of the loop. This aims to handle the following idiom |
1747 | (normally created for "for" loops): |
1748 | |
1749 | i = 0; |
1750 | while (1) |
1751 | { |
1752 | if (i >= 100) |
1753 | break; |
1754 | body; |
1755 | i++; |
1756 | } |
1757 | |
1758 | This becomes |
1759 | |
1760 | i = 0; |
1761 | while (1) |
1762 | { |
1763 | body; |
1764 | i++; |
1765 | if (i >= 100) |
1766 | break; |
1767 | } |
1768 | */ |
1769 | |
1770 | /* Threading through the header won't improve the code if the header has just |
1771 | one successor. */ |
1772 | if (single_succ_p (bb: header)) |
1773 | goto fail; |
1774 | |
1775 | if (!may_peel_loop_headers && !redirection_block_p (bb: loop->header)) |
1776 | goto fail; |
1777 | else |
1778 | { |
1779 | tgt_bb = NULL; |
1780 | tgt_edge = NULL; |
1781 | FOR_EACH_EDGE (e, ei, header->preds) |
1782 | { |
1783 | if (!e->aux) |
1784 | { |
1785 | if (e == latch) |
1786 | continue; |
1787 | |
1788 | /* If latch is not threaded, and there is a header |
1789 | edge that is not threaded, we would create loop |
1790 | with multiple entries. */ |
1791 | goto fail; |
1792 | } |
1793 | |
1794 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1795 | |
1796 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
1797 | goto fail; |
1798 | tgt_edge = (*path)[1]->e; |
1799 | atgt_bb = tgt_edge->dest; |
1800 | if (!tgt_bb) |
1801 | tgt_bb = atgt_bb; |
1802 | /* Two targets of threading would make us create loop |
1803 | with multiple entries. */ |
1804 | else if (tgt_bb != atgt_bb) |
1805 | goto fail; |
1806 | } |
1807 | |
1808 | if (!tgt_bb) |
1809 | { |
1810 | /* There are no threading requests. */ |
1811 | return false; |
1812 | } |
1813 | |
1814 | /* Redirecting to empty loop latch is useless. */ |
1815 | if (tgt_bb == loop->latch |
1816 | && empty_block_p (loop->latch)) |
1817 | goto fail; |
1818 | } |
1819 | |
1820 | /* The target block must dominate the loop latch, otherwise we would be |
1821 | creating a subloop. */ |
1822 | domst = determine_bb_domination_status (loop, bb: tgt_bb); |
1823 | if (domst == DOMST_NONDOMINATING) |
1824 | goto fail; |
1825 | if (domst == DOMST_LOOP_BROKEN) |
1826 | { |
1827 | /* If the loop ceased to exist, mark it as such, and thread through its |
1828 | original header. */ |
1829 | mark_loop_for_removal (loop); |
1830 | return thread_block (bb: header, noloop_only: false); |
1831 | } |
1832 | |
1833 | if (tgt_bb->loop_father->header == tgt_bb) |
1834 | { |
1835 | /* If the target of the threading is a header of a subloop, we need |
1836 | to create a preheader for it, so that the headers of the two loops |
1837 | do not merge. */ |
1838 | if (EDGE_COUNT (tgt_bb->preds) > 2) |
1839 | { |
1840 | tgt_bb = create_preheader (tgt_bb->loop_father, 0); |
1841 | gcc_assert (tgt_bb != NULL); |
1842 | } |
1843 | else |
1844 | tgt_bb = split_edge (tgt_edge); |
1845 | } |
1846 | |
1847 | basic_block ; |
1848 | |
1849 | /* Now consider the case entry edges are redirected to the new entry |
1850 | block. Remember one entry edge, so that we can find the new |
1851 | preheader (its destination after threading). */ |
1852 | FOR_EACH_EDGE (e, ei, header->preds) |
1853 | { |
1854 | if (e->aux) |
1855 | break; |
1856 | } |
1857 | |
1858 | /* The duplicate of the header is the new preheader of the loop. Ensure |
1859 | that it is placed correctly in the loop hierarchy. */ |
1860 | set_loop_copy (loop, loop_outer (loop)); |
1861 | |
1862 | thread_block (bb: header, noloop_only: false); |
1863 | set_loop_copy (loop, NULL); |
1864 | new_preheader = e->dest; |
1865 | |
1866 | /* Create the new latch block. This is always necessary, as the latch |
1867 | must have only a single successor, but the original header had at |
1868 | least two successors. */ |
1869 | loop->latch = NULL; |
1870 | mfb_kj_edge = single_succ_edge (bb: new_preheader); |
1871 | loop->header = mfb_kj_edge->dest; |
1872 | latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); |
1873 | loop->header = latch->dest; |
1874 | loop->latch = latch->src; |
1875 | return true; |
1876 | |
1877 | fail: |
1878 | /* We failed to thread anything. Cancel the requests. */ |
1879 | FOR_EACH_EDGE (e, ei, header->preds) |
1880 | { |
1881 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1882 | |
1883 | if (path) |
1884 | { |
1885 | cancel_thread (path, reason: "Failure in thread_through_loop_header" ); |
1886 | e->aux = NULL; |
1887 | } |
1888 | } |
1889 | return false; |
1890 | } |
1891 | |
1892 | /* E1 and E2 are edges into the same basic block. Return TRUE if the |
1893 | PHI arguments associated with those edges are equal or there are no |
1894 | PHI arguments, otherwise return FALSE. */ |
1895 | |
1896 | static bool |
1897 | phi_args_equal_on_edges (edge e1, edge e2) |
1898 | { |
1899 | gphi_iterator gsi; |
1900 | int indx1 = e1->dest_idx; |
1901 | int indx2 = e2->dest_idx; |
1902 | |
1903 | for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1904 | { |
1905 | gphi *phi = gsi.phi (); |
1906 | |
1907 | if (!operand_equal_p (gimple_phi_arg_def (gs: phi, index: indx1), |
1908 | gimple_phi_arg_def (gs: phi, index: indx2), flags: 0)) |
1909 | return false; |
1910 | } |
1911 | return true; |
1912 | } |
1913 | |
1914 | /* Return the number of non-debug statements and non-virtual PHIs in a |
1915 | block. */ |
1916 | |
1917 | static unsigned int |
1918 | count_stmts_and_phis_in_block (basic_block bb) |
1919 | { |
1920 | unsigned int num_stmts = 0; |
1921 | |
1922 | gphi_iterator gpi; |
1923 | for (gpi = gsi_start_phis (bb); !gsi_end_p (i: gpi); gsi_next (i: &gpi)) |
1924 | if (!virtual_operand_p (PHI_RESULT (gpi.phi ()))) |
1925 | num_stmts++; |
1926 | |
1927 | gimple_stmt_iterator gsi; |
1928 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1929 | { |
1930 | gimple *stmt = gsi_stmt (i: gsi); |
1931 | if (!is_gimple_debug (gs: stmt)) |
1932 | num_stmts++; |
1933 | } |
1934 | |
1935 | return num_stmts; |
1936 | } |
1937 | |
1938 | |
1939 | /* Walk through the registered jump threads and convert them into a |
1940 | form convenient for this pass. |
1941 | |
1942 | Any block which has incoming edges threaded to outgoing edges |
1943 | will have its entry in THREADED_BLOCK set. |
1944 | |
1945 | Any threaded edge will have its new outgoing edge stored in the |
1946 | original edge's AUX field. |
1947 | |
1948 | This form avoids the need to walk all the edges in the CFG to |
1949 | discover blocks which need processing and avoids unnecessary |
1950 | hash table lookups to map from threaded edge to new target. */ |
1951 | |
1952 | void |
1953 | fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks) |
1954 | { |
1955 | unsigned int i; |
1956 | bitmap_iterator bi; |
1957 | auto_bitmap tmp; |
1958 | basic_block bb; |
1959 | edge e; |
1960 | edge_iterator ei; |
1961 | |
1962 | /* It is possible to have jump threads in which one is a subpath |
1963 | of the other. ie, (A, B), (B, C), (C, D) where B is a joiner |
1964 | block and (B, C), (C, D) where no joiner block exists. |
1965 | |
1966 | When this occurs ignore the jump thread request with the joiner |
1967 | block. It's totally subsumed by the simpler jump thread request. |
1968 | |
1969 | This results in less block copying, simpler CFGs. More importantly, |
1970 | when we duplicate the joiner block, B, in this case we will create |
1971 | a new threading opportunity that we wouldn't be able to optimize |
1972 | until the next jump threading iteration. |
1973 | |
1974 | So first convert the jump thread requests which do not require a |
1975 | joiner block. */ |
1976 | for (i = 0; i < m_paths.length (); i++) |
1977 | { |
1978 | vec<jump_thread_edge *> *path = m_paths[i]; |
1979 | |
1980 | if (path->length () > 1 |
1981 | && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK) |
1982 | { |
1983 | edge e = (*path)[0]->e; |
1984 | e->aux = (void *)path; |
1985 | bitmap_set_bit (tmp, e->dest->index); |
1986 | } |
1987 | } |
1988 | |
1989 | /* Now iterate again, converting cases where we want to thread |
1990 | through a joiner block, but only if no other edge on the path |
1991 | already has a jump thread attached to it. We do this in two passes, |
1992 | to avoid situations where the order in the paths vec can hide overlapping |
1993 | threads (the path is recorded on the incoming edge, so we would miss |
1994 | cases where the second path starts at a downstream edge on the same |
1995 | path). First record all joiner paths, deleting any in the unexpected |
1996 | case where there is already a path for that incoming edge. */ |
1997 | for (i = 0; i < m_paths.length ();) |
1998 | { |
1999 | vec<jump_thread_edge *> *path = m_paths[i]; |
2000 | |
2001 | if (path->length () > 1 |
2002 | && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
2003 | { |
2004 | /* Attach the path to the starting edge if none is yet recorded. */ |
2005 | if ((*path)[0]->e->aux == NULL) |
2006 | { |
2007 | (*path)[0]->e->aux = path; |
2008 | i++; |
2009 | } |
2010 | else |
2011 | { |
2012 | m_paths.unordered_remove (ix: i); |
2013 | cancel_thread (path); |
2014 | } |
2015 | } |
2016 | else |
2017 | { |
2018 | i++; |
2019 | } |
2020 | } |
2021 | |
2022 | /* Second, look for paths that have any other jump thread attached to |
2023 | them, and either finish converting them or cancel them. */ |
2024 | for (i = 0; i < m_paths.length ();) |
2025 | { |
2026 | vec<jump_thread_edge *> *path = m_paths[i]; |
2027 | edge e = (*path)[0]->e; |
2028 | |
2029 | if (path->length () > 1 |
2030 | && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path) |
2031 | { |
2032 | unsigned int j; |
2033 | for (j = 1; j < path->length (); j++) |
2034 | if ((*path)[j]->e->aux != NULL) |
2035 | break; |
2036 | |
2037 | /* If we iterated through the entire path without exiting the loop, |
2038 | then we are good to go, record it. */ |
2039 | if (j == path->length ()) |
2040 | { |
2041 | bitmap_set_bit (tmp, e->dest->index); |
2042 | i++; |
2043 | } |
2044 | else |
2045 | { |
2046 | e->aux = NULL; |
2047 | m_paths.unordered_remove (ix: i); |
2048 | cancel_thread (path); |
2049 | } |
2050 | } |
2051 | else |
2052 | { |
2053 | i++; |
2054 | } |
2055 | } |
2056 | |
2057 | /* When optimizing for size, prune all thread paths where statement |
2058 | duplication is necessary. |
2059 | |
2060 | We walk the jump thread path looking for copied blocks. There's |
2061 | two types of copied blocks. |
2062 | |
2063 | EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will |
2064 | cancel the jump threading request when optimizing for size. |
2065 | |
2066 | EDGE_COPY_SRC_BLOCK which is copied, but some of its statements |
2067 | will be killed by threading. If threading does not kill all of |
2068 | its statements, then we should cancel the jump threading request |
2069 | when optimizing for size. */ |
2070 | if (optimize_function_for_size_p (cfun)) |
2071 | { |
2072 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) |
2073 | { |
2074 | FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds) |
2075 | if (e->aux) |
2076 | { |
2077 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
2078 | |
2079 | unsigned int j; |
2080 | for (j = 1; j < path->length (); j++) |
2081 | { |
2082 | bb = (*path)[j]->e->src; |
2083 | if (redirection_block_p (bb)) |
2084 | ; |
2085 | else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK |
2086 | || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK |
2087 | && (count_stmts_and_phis_in_block (bb) |
2088 | != estimate_threading_killed_stmts (bb)))) |
2089 | break; |
2090 | } |
2091 | |
2092 | if (j != path->length ()) |
2093 | { |
2094 | cancel_thread (path); |
2095 | e->aux = NULL; |
2096 | } |
2097 | else |
2098 | bitmap_set_bit (threaded_blocks, i); |
2099 | } |
2100 | } |
2101 | } |
2102 | else |
2103 | bitmap_copy (threaded_blocks, tmp); |
2104 | |
2105 | /* If we have a joiner block (J) which has two successors S1 and S2 and |
2106 | we are threading though S1 and the final destination of the thread |
2107 | is S2, then we must verify that any PHI nodes in S2 have the same |
2108 | PHI arguments for the edge J->S2 and J->S1->...->S2. |
2109 | |
2110 | We used to detect this prior to registering the jump thread, but |
2111 | that prohibits propagation of edge equivalences into non-dominated |
2112 | PHI nodes as the equivalency test might occur before propagation. |
2113 | |
2114 | This must also occur after we truncate any jump threading paths |
2115 | as this scenario may only show up after truncation. |
2116 | |
2117 | This works for now, but will need improvement as part of the FSA |
2118 | optimization. |
2119 | |
2120 | Note since we've moved the thread request data to the edges, |
2121 | we have to iterate on those rather than the threaded_edges vector. */ |
2122 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) |
2123 | { |
2124 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
2125 | FOR_EACH_EDGE (e, ei, bb->preds) |
2126 | { |
2127 | if (e->aux) |
2128 | { |
2129 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
2130 | bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK); |
2131 | |
2132 | if (have_joiner) |
2133 | { |
2134 | basic_block joiner = e->dest; |
2135 | edge final_edge = path->last ()->e; |
2136 | basic_block final_dest = final_edge->dest; |
2137 | edge e2 = find_edge (joiner, final_dest); |
2138 | |
2139 | if (e2 && !phi_args_equal_on_edges (e1: e2, e2: final_edge)) |
2140 | { |
2141 | cancel_thread (path); |
2142 | e->aux = NULL; |
2143 | } |
2144 | } |
2145 | } |
2146 | } |
2147 | } |
2148 | |
2149 | /* Look for jump threading paths which cross multiple loop headers. |
2150 | |
2151 | The code to thread through loop headers will change the CFG in ways |
2152 | that invalidate the cached loop iteration information. So we must |
2153 | detect that case and wipe the cached information. */ |
2154 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) |
2155 | { |
2156 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); |
2157 | FOR_EACH_EDGE (e, ei, bb->preds) |
2158 | { |
2159 | if (e->aux) |
2160 | { |
2161 | gcc_assert (loops_state_satisfies_p |
2162 | (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)); |
2163 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
2164 | |
2165 | for (unsigned int i = 0, = 0; |
2166 | i < path->length (); |
2167 | i++) |
2168 | { |
2169 | basic_block dest = (*path)[i]->e->dest; |
2170 | basic_block src = (*path)[i]->e->src; |
2171 | /* If we enter a loop. */ |
2172 | if (flow_loop_nested_p (src->loop_father, dest->loop_father)) |
2173 | ++crossed_headers; |
2174 | /* If we step from a block outside an irreducible region |
2175 | to a block inside an irreducible region, then we have |
2176 | crossed into a loop. */ |
2177 | else if (! (src->flags & BB_IRREDUCIBLE_LOOP) |
2178 | && (dest->flags & BB_IRREDUCIBLE_LOOP)) |
2179 | ++crossed_headers; |
2180 | if (crossed_headers > 1) |
2181 | { |
2182 | vect_free_loop_info_assumptions |
2183 | ((*path)[path->length () - 1]->e->dest->loop_father); |
2184 | break; |
2185 | } |
2186 | } |
2187 | } |
2188 | } |
2189 | } |
2190 | } |
2191 | |
2192 | |
2193 | /* Verify that the REGION is a valid jump thread. A jump thread is a special |
2194 | case of SEME Single Entry Multiple Exits region in which all nodes in the |
2195 | REGION have exactly one incoming edge. The only exception is the first block |
2196 | that may not have been connected to the rest of the cfg yet. */ |
2197 | |
2198 | DEBUG_FUNCTION void |
2199 | verify_jump_thread (basic_block *region, unsigned n_region) |
2200 | { |
2201 | for (unsigned i = 0; i < n_region; i++) |
2202 | gcc_assert (EDGE_COUNT (region[i]->preds) <= 1); |
2203 | } |
2204 | |
2205 | /* Return true when BB is one of the first N items in BBS. */ |
2206 | |
2207 | static inline bool |
2208 | bb_in_bbs (basic_block bb, basic_block *bbs, int n) |
2209 | { |
2210 | for (int i = 0; i < n; i++) |
2211 | if (bb == bbs[i]) |
2212 | return true; |
2213 | |
2214 | return false; |
2215 | } |
2216 | |
2217 | void |
2218 | jt_path_registry::debug_path (FILE *dump_file, int pathno) |
2219 | { |
2220 | vec<jump_thread_edge *> *p = m_paths[pathno]; |
2221 | fprintf (stream: dump_file, format: "path: " ); |
2222 | for (unsigned i = 0; i < p->length (); ++i) |
2223 | fprintf (stream: dump_file, format: "%d -> %d, " , |
2224 | (*p)[i]->e->src->index, (*p)[i]->e->dest->index); |
2225 | fprintf (stream: dump_file, format: "\n" ); |
2226 | } |
2227 | |
2228 | void |
2229 | jt_path_registry::debug () |
2230 | { |
2231 | for (unsigned i = 0; i < m_paths.length (); ++i) |
2232 | debug_path (stderr, pathno: i); |
2233 | } |
2234 | |
2235 | /* Rewire a jump_thread_edge so that the source block is now a |
2236 | threaded source block. |
2237 | |
2238 | PATH_NUM is an index into the global path table PATHS. |
2239 | EDGE_NUM is the jump thread edge number into said path. |
2240 | |
2241 | Returns TRUE if we were able to successfully rewire the edge. */ |
2242 | |
2243 | bool |
2244 | back_jt_path_registry::rewire_first_differing_edge (unsigned path_num, |
2245 | unsigned edge_num) |
2246 | { |
2247 | vec<jump_thread_edge *> *path = m_paths[path_num]; |
2248 | edge &e = (*path)[edge_num]->e; |
2249 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2250 | fprintf (stream: dump_file, format: "rewiring edge candidate: %d -> %d\n" , |
2251 | e->src->index, e->dest->index); |
2252 | basic_block src_copy = get_bb_copy (e->src); |
2253 | if (src_copy == NULL) |
2254 | { |
2255 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2256 | fprintf (stream: dump_file, format: "ignoring candidate: there is no src COPY\n" ); |
2257 | return false; |
2258 | } |
2259 | edge new_edge = find_edge (src_copy, e->dest); |
2260 | /* If the previously threaded paths created a flow graph where we |
2261 | can no longer figure out where to go, give up. */ |
2262 | if (new_edge == NULL) |
2263 | { |
2264 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2265 | fprintf (stream: dump_file, format: "ignoring candidate: we lost our way\n" ); |
2266 | return false; |
2267 | } |
2268 | e = new_edge; |
2269 | return true; |
2270 | } |
2271 | |
2272 | /* After a path has been jump threaded, adjust the remaining paths |
2273 | that are subsets of this path, so these paths can be safely |
2274 | threaded within the context of the new threaded path. |
2275 | |
2276 | For example, suppose we have just threaded: |
2277 | |
2278 | 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12' |
2279 | |
2280 | And we have an upcoming threading candidate: |
2281 | 5 -> 6 -> 7 -> 8 -> 15 -> 20 |
2282 | |
2283 | This function adjusts the upcoming path into: |
2284 | 8' -> 15 -> 20 |
2285 | |
2286 | CURR_PATH_NUM is an index into the global paths table. It |
2287 | specifies the path that was just threaded. */ |
2288 | |
2289 | void |
2290 | back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num) |
2291 | { |
2292 | vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num]; |
2293 | |
2294 | /* Iterate through all the other paths and adjust them. */ |
2295 | for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); ) |
2296 | { |
2297 | if (cand_path_num == curr_path_num) |
2298 | { |
2299 | ++cand_path_num; |
2300 | continue; |
2301 | } |
2302 | /* Make sure the candidate to adjust starts with the same path |
2303 | as the recently threaded path. */ |
2304 | vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num]; |
2305 | if ((*cand_path)[0]->e != (*curr_path)[0]->e) |
2306 | { |
2307 | ++cand_path_num; |
2308 | continue; |
2309 | } |
2310 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2311 | { |
2312 | fprintf (stream: dump_file, format: "adjusting candidate: " ); |
2313 | debug_path (dump_file, pathno: cand_path_num); |
2314 | } |
2315 | |
2316 | /* Chop off from the candidate path any prefix it shares with |
2317 | the recently threaded path. */ |
2318 | unsigned minlength = MIN (curr_path->length (), cand_path->length ()); |
2319 | unsigned j; |
2320 | for (j = 0; j < minlength; ++j) |
2321 | { |
2322 | edge cand_edge = (*cand_path)[j]->e; |
2323 | edge curr_edge = (*curr_path)[j]->e; |
2324 | |
2325 | /* Once the prefix no longer matches, adjust the first |
2326 | non-matching edge to point from an adjusted edge to |
2327 | wherever it was going. */ |
2328 | if (cand_edge != curr_edge) |
2329 | { |
2330 | gcc_assert (cand_edge->src == curr_edge->src); |
2331 | if (!rewire_first_differing_edge (path_num: cand_path_num, edge_num: j)) |
2332 | goto remove_candidate_from_list; |
2333 | break; |
2334 | } |
2335 | } |
2336 | if (j == minlength) |
2337 | { |
2338 | /* If we consumed the max subgraph we could look at, and |
2339 | still didn't find any different edges, it's the |
2340 | last edge after MINLENGTH. */ |
2341 | if (cand_path->length () > minlength) |
2342 | { |
2343 | if (!rewire_first_differing_edge (path_num: cand_path_num, edge_num: j)) |
2344 | goto remove_candidate_from_list; |
2345 | } |
2346 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
2347 | fprintf (stream: dump_file, format: "adjusting first edge after MINLENGTH.\n" ); |
2348 | } |
2349 | if (j > 0) |
2350 | { |
2351 | /* If we are removing everything, delete the entire candidate. */ |
2352 | if (j == cand_path->length ()) |
2353 | { |
2354 | remove_candidate_from_list: |
2355 | cancel_thread (path: cand_path, reason: "Adjusted candidate is EMPTY" ); |
2356 | m_paths.unordered_remove (ix: cand_path_num); |
2357 | continue; |
2358 | } |
2359 | /* Otherwise, just remove the redundant sub-path. */ |
2360 | if (cand_path->length () - j > 1) |
2361 | cand_path->block_remove (ix: 0, len: j); |
2362 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
2363 | fprintf (stream: dump_file, format: "Dropping illformed candidate.\n" ); |
2364 | } |
2365 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2366 | { |
2367 | fprintf (stream: dump_file, format: "adjusted candidate: " ); |
2368 | debug_path (dump_file, pathno: cand_path_num); |
2369 | } |
2370 | ++cand_path_num; |
2371 | } |
2372 | } |
2373 | |
2374 | /* Duplicates a jump-thread path of N_REGION basic blocks. |
2375 | The ENTRY edge is redirected to the duplicate of the region. |
2376 | |
2377 | Remove the last conditional statement in the last basic block in the REGION, |
2378 | and create a single fallthru edge pointing to the same destination as the |
2379 | EXIT edge. |
2380 | |
2381 | CURRENT_PATH_NO is an index into the global paths[] table |
2382 | specifying the jump-thread path. |
2383 | |
2384 | Returns false if it is unable to copy the region, true otherwise. */ |
2385 | |
2386 | bool |
2387 | back_jt_path_registry::duplicate_thread_path (edge entry, |
2388 | edge exit, |
2389 | basic_block *region, |
2390 | unsigned n_region, |
2391 | unsigned current_path_no) |
2392 | { |
2393 | unsigned i; |
2394 | class loop *loop = entry->dest->loop_father; |
2395 | edge exit_copy; |
2396 | edge redirected; |
2397 | profile_count curr_count; |
2398 | |
2399 | if (!can_copy_bbs_p (region, n_region)) |
2400 | return false; |
2401 | |
2402 | /* Some sanity checking. Note that we do not check for all possible |
2403 | missuses of the functions. I.e. if you ask to copy something weird, |
2404 | it will work, but the state of structures probably will not be |
2405 | correct. */ |
2406 | for (i = 0; i < n_region; i++) |
2407 | { |
2408 | /* We do not handle subloops, i.e. all the blocks must belong to the |
2409 | same loop. */ |
2410 | if (region[i]->loop_father != loop) |
2411 | return false; |
2412 | } |
2413 | |
2414 | initialize_original_copy_tables (); |
2415 | |
2416 | set_loop_copy (loop, loop); |
2417 | |
2418 | basic_block *region_copy = XNEWVEC (basic_block, n_region); |
2419 | copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, |
2420 | split_edge_bb_loc (entry), false); |
2421 | |
2422 | /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The |
2423 | following code ensures that all the edges exiting the jump-thread path are |
2424 | redirected back to the original code: these edges are exceptions |
2425 | invalidating the property that is propagated by executing all the blocks of |
2426 | the jump-thread path in order. */ |
2427 | |
2428 | curr_count = entry->count (); |
2429 | |
2430 | for (i = 0; i < n_region; i++) |
2431 | { |
2432 | edge e; |
2433 | edge_iterator ei; |
2434 | basic_block bb = region_copy[i]; |
2435 | |
2436 | /* Watch inconsistent profile. */ |
2437 | if (curr_count > region[i]->count) |
2438 | curr_count = region[i]->count; |
2439 | /* Scale current BB. */ |
2440 | if (region[i]->count.nonzero_p () && curr_count.initialized_p ()) |
2441 | { |
2442 | /* In the middle of the path we only scale the frequencies. |
2443 | In last BB we need to update probabilities of outgoing edges |
2444 | because we know which one is taken at the threaded path. */ |
2445 | if (i + 1 != n_region) |
2446 | scale_bbs_frequencies_profile_count (region + i, 1, |
2447 | region[i]->count - curr_count, |
2448 | region[i]->count); |
2449 | else |
2450 | update_bb_profile_for_threading (region[i], |
2451 | curr_count, |
2452 | exit); |
2453 | scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count, |
2454 | region_copy[i]->count); |
2455 | } |
2456 | |
2457 | if (single_succ_p (bb)) |
2458 | { |
2459 | /* Make sure the successor is the next node in the path. */ |
2460 | gcc_assert (i + 1 == n_region |
2461 | || region_copy[i + 1] == single_succ_edge (bb)->dest); |
2462 | if (i + 1 != n_region) |
2463 | { |
2464 | curr_count = single_succ_edge (bb)->count (); |
2465 | } |
2466 | continue; |
2467 | } |
2468 | |
2469 | /* Special case the last block on the path: make sure that it does not |
2470 | jump back on the copied path, including back to itself. */ |
2471 | if (i + 1 == n_region) |
2472 | { |
2473 | FOR_EACH_EDGE (e, ei, bb->succs) |
2474 | if (bb_in_bbs (bb: e->dest, bbs: region_copy, n: n_region)) |
2475 | { |
2476 | basic_block orig = get_bb_original (e->dest); |
2477 | if (orig) |
2478 | redirect_edge_and_branch_force (e, orig); |
2479 | } |
2480 | continue; |
2481 | } |
2482 | |
2483 | /* Redirect all other edges jumping to non-adjacent blocks back to the |
2484 | original code. */ |
2485 | FOR_EACH_EDGE (e, ei, bb->succs) |
2486 | if (region_copy[i + 1] != e->dest) |
2487 | { |
2488 | basic_block orig = get_bb_original (e->dest); |
2489 | if (orig) |
2490 | redirect_edge_and_branch_force (e, orig); |
2491 | } |
2492 | else |
2493 | { |
2494 | curr_count = e->count (); |
2495 | } |
2496 | } |
2497 | |
2498 | |
2499 | if (flag_checking) |
2500 | verify_jump_thread (region: region_copy, n_region); |
2501 | |
2502 | /* Remove the last branch in the jump thread path. */ |
2503 | remove_ctrl_stmt_and_useless_edges (bb: region_copy[n_region - 1], dest_bb: exit->dest); |
2504 | |
2505 | /* And fixup the flags on the single remaining edge. */ |
2506 | edge fix_e = find_edge (region_copy[n_region - 1], exit->dest); |
2507 | fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); |
2508 | fix_e->flags |= EDGE_FALLTHRU; |
2509 | |
2510 | edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU); |
2511 | |
2512 | if (e) |
2513 | { |
2514 | rescan_loop_exit (e, true, false); |
2515 | e->probability = profile_probability::always (); |
2516 | } |
2517 | |
2518 | /* Redirect the entry and add the phi node arguments. */ |
2519 | if (entry->dest == loop->header) |
2520 | mark_loop_for_removal (loop); |
2521 | redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest)); |
2522 | gcc_assert (redirected != NULL); |
2523 | flush_pending_stmts (entry); |
2524 | |
2525 | /* Add the other PHI node arguments. */ |
2526 | add_phi_args_after_copy (region_copy, n_region, NULL); |
2527 | |
2528 | free (ptr: region_copy); |
2529 | |
2530 | adjust_paths_after_duplication (curr_path_num: current_path_no); |
2531 | |
2532 | free_original_copy_tables (); |
2533 | return true; |
2534 | } |
2535 | |
2536 | /* Return true when PATH is a valid jump-thread path. */ |
2537 | |
2538 | static bool |
2539 | valid_jump_thread_path (vec<jump_thread_edge *> *path) |
2540 | { |
2541 | unsigned len = path->length (); |
2542 | |
2543 | /* Check that the path is connected. */ |
2544 | for (unsigned int j = 0; j < len - 1; j++) |
2545 | { |
2546 | edge e = (*path)[j]->e; |
2547 | if (e->dest != (*path)[j+1]->e->src) |
2548 | return false; |
2549 | } |
2550 | return true; |
2551 | } |
2552 | |
2553 | /* Remove any queued jump threads that include edge E. |
2554 | |
2555 | We don't actually remove them here, just record the edges into ax |
2556 | hash table. That way we can do the search once per iteration of |
2557 | DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */ |
2558 | |
2559 | void |
2560 | fwd_jt_path_registry::remove_jump_threads_including (edge_def *e) |
2561 | { |
2562 | if (!m_paths.exists () || !flag_thread_jumps) |
2563 | return; |
2564 | |
2565 | edge *slot = m_removed_edges->find_slot (value: e, insert: INSERT); |
2566 | *slot = e; |
2567 | } |
2568 | |
2569 | /* Thread all paths that have been queued for jump threading, and |
2570 | update the CFG accordingly. |
2571 | |
2572 | It is the caller's responsibility to fix the dominance information |
2573 | and rewrite duplicated SSA_NAMEs back into SSA form. |
2574 | |
2575 | If PEEL_LOOP_HEADERS is false, avoid threading edges through loop |
2576 | headers if it does not simplify the loop. |
2577 | |
2578 | Returns true if one or more edges were threaded. */ |
2579 | |
2580 | bool |
2581 | jt_path_registry::thread_through_all_blocks (bool ) |
2582 | { |
2583 | if (m_paths.length () == 0) |
2584 | return false; |
2585 | |
2586 | m_num_threaded_edges = 0; |
2587 | |
2588 | bool retval = update_cfg (peel_loop_headers); |
2589 | |
2590 | statistics_counter_event (cfun, "Jumps threaded" , m_num_threaded_edges); |
2591 | |
2592 | if (retval) |
2593 | { |
2594 | loops_state_set (flags: LOOPS_NEED_FIXUP); |
2595 | return true; |
2596 | } |
2597 | return false; |
2598 | } |
2599 | |
2600 | /* This is the backward threader version of thread_through_all_blocks |
2601 | using a generic BB copier. */ |
2602 | |
2603 | bool |
2604 | back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/) |
2605 | { |
2606 | bool retval = false; |
2607 | hash_set<edge> visited_starting_edges; |
2608 | |
2609 | while (m_paths.length ()) |
2610 | { |
2611 | vec<jump_thread_edge *> *path = m_paths[0]; |
2612 | edge entry = (*path)[0]->e; |
2613 | |
2614 | /* Do not jump-thread twice from the same starting edge. |
2615 | |
2616 | Previously we only checked that we weren't threading twice |
2617 | from the same BB, but that was too restrictive. Imagine a |
2618 | path that starts from GIMPLE_COND(x_123 == 0,...), where both |
2619 | edges out of this conditional yield paths that can be |
2620 | threaded (for example, both lead to an x_123==0 or x_123!=0 |
2621 | conditional further down the line. */ |
2622 | if (visited_starting_edges.contains (k: entry) |
2623 | /* We may not want to realize this jump thread path for |
2624 | various reasons. So check it first. */ |
2625 | || !valid_jump_thread_path (path)) |
2626 | { |
2627 | /* Remove invalid jump-thread paths. */ |
2628 | cancel_thread (path, reason: "Avoiding threading twice from same edge" ); |
2629 | m_paths.unordered_remove (ix: 0); |
2630 | continue; |
2631 | } |
2632 | |
2633 | unsigned len = path->length (); |
2634 | edge exit = (*path)[len - 1]->e; |
2635 | basic_block *region = XNEWVEC (basic_block, len - 1); |
2636 | |
2637 | for (unsigned int j = 0; j < len - 1; j++) |
2638 | region[j] = (*path)[j]->e->dest; |
2639 | |
2640 | if (duplicate_thread_path (entry, exit, region, n_region: len - 1, current_path_no: 0)) |
2641 | { |
2642 | /* We do not update dominance info. */ |
2643 | free_dominance_info (CDI_DOMINATORS); |
2644 | visited_starting_edges.add (k: entry); |
2645 | retval = true; |
2646 | m_num_threaded_edges++; |
2647 | } |
2648 | |
2649 | path->release (); |
2650 | m_paths.unordered_remove (ix: 0); |
2651 | free (ptr: region); |
2652 | } |
2653 | return retval; |
2654 | } |
2655 | |
2656 | /* This is the forward threader version of thread_through_all_blocks, |
2657 | using a custom BB copier. */ |
2658 | |
2659 | bool |
2660 | fwd_jt_path_registry::update_cfg (bool ) |
2661 | { |
2662 | bool retval = false; |
2663 | |
2664 | /* Remove any paths that referenced removed edges. */ |
2665 | if (m_removed_edges) |
2666 | for (unsigned i = 0; i < m_paths.length (); ) |
2667 | { |
2668 | unsigned int j; |
2669 | vec<jump_thread_edge *> *path = m_paths[i]; |
2670 | |
2671 | for (j = 0; j < path->length (); j++) |
2672 | { |
2673 | edge e = (*path)[j]->e; |
2674 | if (m_removed_edges->find_slot (value: e, insert: NO_INSERT) |
2675 | || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK |
2676 | || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK) |
2677 | && !can_duplicate_block_p (e->src))) |
2678 | break; |
2679 | } |
2680 | |
2681 | if (j != path->length ()) |
2682 | { |
2683 | cancel_thread (path, reason: "Thread references removed edge" ); |
2684 | m_paths.unordered_remove (ix: i); |
2685 | continue; |
2686 | } |
2687 | i++; |
2688 | } |
2689 | |
2690 | auto_bitmap threaded_blocks; |
2691 | mark_threaded_blocks (threaded_blocks); |
2692 | |
2693 | initialize_original_copy_tables (); |
2694 | |
2695 | /* The order in which we process jump threads can be important. |
2696 | |
2697 | Consider if we have two jump threading paths A and B. If the |
2698 | target edge of A is the starting edge of B and we thread path A |
2699 | first, then we create an additional incoming edge into B->dest that |
2700 | we cannot discover as a jump threading path on this iteration. |
2701 | |
2702 | If we instead thread B first, then the edge into B->dest will have |
2703 | already been redirected before we process path A and path A will |
2704 | natually, with no further work, target the redirected path for B. |
2705 | |
2706 | An post-order is sufficient here. Compute the ordering first, then |
2707 | process the blocks. */ |
2708 | if (!bitmap_empty_p (map: threaded_blocks)) |
2709 | { |
2710 | int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); |
2711 | unsigned int postorder_num = post_order_compute (postorder, false, false); |
2712 | for (unsigned int i = 0; i < postorder_num; i++) |
2713 | { |
2714 | unsigned int indx = postorder[i]; |
2715 | if (bitmap_bit_p (threaded_blocks, indx)) |
2716 | { |
2717 | basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx); |
2718 | retval |= thread_block (bb, noloop_only: true); |
2719 | } |
2720 | } |
2721 | free (ptr: postorder); |
2722 | } |
2723 | |
2724 | /* Then perform the threading through loop headers. We start with the |
2725 | innermost loop, so that the changes in cfg we perform won't affect |
2726 | further threading. */ |
2727 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
2728 | { |
2729 | if (!loop->header |
2730 | || !bitmap_bit_p (threaded_blocks, loop->header->index)) |
2731 | continue; |
2732 | |
2733 | retval |= thread_through_loop_header (loop, may_peel_loop_headers); |
2734 | } |
2735 | |
2736 | /* All jump threading paths should have been resolved at this |
2737 | point. Verify that is the case. */ |
2738 | basic_block bb; |
2739 | FOR_EACH_BB_FN (bb, cfun) |
2740 | { |
2741 | edge_iterator ei; |
2742 | edge e; |
2743 | FOR_EACH_EDGE (e, ei, bb->preds) |
2744 | gcc_assert (e->aux == NULL); |
2745 | } |
2746 | |
2747 | free_original_copy_tables (); |
2748 | |
2749 | return retval; |
2750 | } |
2751 | |
2752 | bool |
2753 | jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path) |
2754 | { |
2755 | gcc_checking_assert (!path.is_empty ()); |
2756 | edge entry = path[0]->e; |
2757 | edge exit = path[path.length () - 1]->e; |
2758 | bool seen_latch = false; |
2759 | int loops_crossed = 0; |
2760 | bool crossed_latch = false; |
2761 | bool = false; |
2762 | // Use ->dest here instead of ->src to ignore the first block. The |
2763 | // first block is allowed to be in a different loop, since it'll be |
2764 | // redirected. See similar comment in profitable_path_p: "we don't |
2765 | // care about that block...". |
2766 | loop_p loop = entry->dest->loop_father; |
2767 | loop_p curr_loop = loop; |
2768 | |
2769 | for (unsigned int i = 0; i < path.length (); i++) |
2770 | { |
2771 | edge e = path[i]->e; |
2772 | |
2773 | if (e == NULL) |
2774 | { |
2775 | // NULL outgoing edges on a path can happen for jumping to a |
2776 | // constant address. |
2777 | cancel_thread (path: &path, reason: "Found NULL edge in jump threading path" ); |
2778 | return true; |
2779 | } |
2780 | |
2781 | if (loop->latch == e->src || loop->latch == e->dest) |
2782 | { |
2783 | seen_latch = true; |
2784 | // Like seen_latch, but excludes the first block. |
2785 | if (e->src != entry->src) |
2786 | crossed_latch = true; |
2787 | } |
2788 | |
2789 | if (e->dest->loop_father != curr_loop) |
2790 | { |
2791 | curr_loop = e->dest->loop_father; |
2792 | ++loops_crossed; |
2793 | } |
2794 | |
2795 | // ?? Avoid threading through loop headers that remain in the |
2796 | // loop, as such threadings tend to create sub-loops which |
2797 | // _might_ be OK ??. |
2798 | if (e->dest->loop_father->header == e->dest |
2799 | && !flow_loop_nested_p (exit->dest->loop_father, |
2800 | e->dest->loop_father)) |
2801 | crossed_loop_header = true; |
2802 | |
2803 | if (flag_checking && !m_backedge_threads) |
2804 | gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0); |
2805 | } |
2806 | |
2807 | // If we crossed a loop into an outer loop without crossing the |
2808 | // latch, this is just an early exit from the loop. |
2809 | if (loops_crossed == 1 |
2810 | && !crossed_latch |
2811 | && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father)) |
2812 | return false; |
2813 | |
2814 | if (cfun->curr_properties & PROP_loop_opts_done) |
2815 | return false; |
2816 | |
2817 | if (seen_latch && empty_block_p (loop->latch)) |
2818 | { |
2819 | cancel_thread (path: &path, reason: "Threading through latch before loop opts " |
2820 | "would create non-empty latch" ); |
2821 | return true; |
2822 | } |
2823 | if (loops_crossed) |
2824 | { |
2825 | cancel_thread (path: &path, reason: "Path crosses loops" ); |
2826 | return true; |
2827 | } |
2828 | // The path should either start and end in the same loop or exit the |
2829 | // loop it starts in but never enter a loop. This also catches |
2830 | // creating irreducible loops, not only rotation. |
2831 | if (entry->src->loop_father != exit->dest->loop_father |
2832 | && !flow_loop_nested_p (exit->src->loop_father, |
2833 | entry->dest->loop_father)) |
2834 | { |
2835 | cancel_thread (path: &path, reason: "Path rotates loop" ); |
2836 | return true; |
2837 | } |
2838 | if (crossed_loop_header) |
2839 | { |
2840 | cancel_thread (path: &path, reason: "Path crosses loop header but does not exit it" ); |
2841 | return true; |
2842 | } |
2843 | return false; |
2844 | } |
2845 | |
2846 | /* Register a jump threading opportunity. We queue up all the jump |
2847 | threading opportunities discovered by a pass and update the CFG |
2848 | and SSA form all at once. |
2849 | |
2850 | E is the edge we can thread, E2 is the new target edge, i.e., we |
2851 | are effectively recording that E->dest can be changed to E2->dest |
2852 | after fixing the SSA graph. |
2853 | |
2854 | Return TRUE if PATH was successfully threaded. */ |
2855 | |
2856 | bool |
2857 | jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path) |
2858 | { |
2859 | gcc_checking_assert (flag_thread_jumps); |
2860 | |
2861 | if (!dbg_cnt (index: registered_jump_thread)) |
2862 | { |
2863 | path->release (); |
2864 | return false; |
2865 | } |
2866 | |
2867 | if (cancel_invalid_paths (path&: *path)) |
2868 | return false; |
2869 | |
2870 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2871 | dump_jump_thread_path (dump_file, path: *path, registering: true); |
2872 | |
2873 | m_paths.safe_push (obj: path); |
2874 | return true; |
2875 | } |
2876 | |
2877 | /* Return how many uses of T there are within BB, as long as there |
2878 | aren't any uses outside BB. If there are any uses outside BB, |
2879 | return -1 if there's at most one use within BB, or -2 if there is |
2880 | more than one use within BB. */ |
2881 | |
2882 | static int |
2883 | uses_in_bb (tree t, basic_block bb) |
2884 | { |
2885 | int uses = 0; |
2886 | bool outside_bb = false; |
2887 | |
2888 | imm_use_iterator iter; |
2889 | use_operand_p use_p; |
2890 | FOR_EACH_IMM_USE_FAST (use_p, iter, t) |
2891 | { |
2892 | if (is_gimple_debug (USE_STMT (use_p))) |
2893 | continue; |
2894 | |
2895 | if (gimple_bb (USE_STMT (use_p)) != bb) |
2896 | outside_bb = true; |
2897 | else |
2898 | uses++; |
2899 | |
2900 | if (outside_bb && uses > 1) |
2901 | return -2; |
2902 | } |
2903 | |
2904 | if (outside_bb) |
2905 | return -1; |
2906 | |
2907 | return uses; |
2908 | } |
2909 | |
2910 | /* Starting from the final control flow stmt in BB, assuming it will |
2911 | be removed, follow uses in to-be-removed stmts back to their defs |
2912 | and count how many defs are to become dead and be removed as |
2913 | well. */ |
2914 | |
2915 | unsigned int |
2916 | estimate_threading_killed_stmts (basic_block bb) |
2917 | { |
2918 | int killed_stmts = 0; |
2919 | hash_map<tree, int> ssa_remaining_uses; |
2920 | auto_vec<gimple *, 4> dead_worklist; |
2921 | |
2922 | /* If the block has only two predecessors, threading will turn phi |
2923 | dsts into either src, so count them as dead stmts. */ |
2924 | bool drop_all_phis = EDGE_COUNT (bb->preds) == 2; |
2925 | |
2926 | if (drop_all_phis) |
2927 | for (gphi_iterator gsi = gsi_start_phis (bb); |
2928 | !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
2929 | { |
2930 | gphi *phi = gsi.phi (); |
2931 | tree dst = gimple_phi_result (gs: phi); |
2932 | |
2933 | /* We don't count virtual PHIs as stmts in |
2934 | record_temporary_equivalences_from_phis. */ |
2935 | if (virtual_operand_p (op: dst)) |
2936 | continue; |
2937 | |
2938 | killed_stmts++; |
2939 | } |
2940 | |
2941 | if (gsi_end_p (i: gsi_last_bb (bb))) |
2942 | return killed_stmts; |
2943 | |
2944 | gimple *stmt = gsi_stmt (i: gsi_last_bb (bb)); |
2945 | if (gimple_code (g: stmt) != GIMPLE_COND |
2946 | && gimple_code (g: stmt) != GIMPLE_GOTO |
2947 | && gimple_code (g: stmt) != GIMPLE_SWITCH) |
2948 | return killed_stmts; |
2949 | |
2950 | /* The control statement is always dead. */ |
2951 | killed_stmts++; |
2952 | dead_worklist.quick_push (obj: stmt); |
2953 | while (!dead_worklist.is_empty ()) |
2954 | { |
2955 | stmt = dead_worklist.pop (); |
2956 | |
2957 | ssa_op_iter iter; |
2958 | use_operand_p use_p; |
2959 | FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) |
2960 | { |
2961 | tree t = USE_FROM_PTR (use_p); |
2962 | gimple *def = SSA_NAME_DEF_STMT (t); |
2963 | |
2964 | if (gimple_bb (g: def) == bb |
2965 | && (gimple_code (g: def) != GIMPLE_PHI |
2966 | || !drop_all_phis) |
2967 | && !gimple_has_side_effects (def)) |
2968 | { |
2969 | int *usesp = ssa_remaining_uses.get (k: t); |
2970 | int uses; |
2971 | |
2972 | if (usesp) |
2973 | uses = *usesp; |
2974 | else |
2975 | uses = uses_in_bb (t, bb); |
2976 | |
2977 | gcc_assert (uses); |
2978 | |
2979 | /* Don't bother recording the expected use count if we |
2980 | won't find any further uses within BB. */ |
2981 | if (!usesp && (uses < -1 || uses > 1)) |
2982 | { |
2983 | usesp = &ssa_remaining_uses.get_or_insert (k: t); |
2984 | *usesp = uses; |
2985 | } |
2986 | |
2987 | if (uses < 0) |
2988 | continue; |
2989 | |
2990 | --uses; |
2991 | if (usesp) |
2992 | *usesp = uses; |
2993 | |
2994 | if (!uses) |
2995 | { |
2996 | killed_stmts++; |
2997 | if (usesp) |
2998 | ssa_remaining_uses.remove (k: t); |
2999 | if (gimple_code (g: def) != GIMPLE_PHI) |
3000 | dead_worklist.safe_push (obj: def); |
3001 | } |
3002 | } |
3003 | } |
3004 | } |
3005 | |
3006 | if (dump_file) |
3007 | fprintf (stream: dump_file, format: "threading bb %i kills %i stmts\n" , |
3008 | bb->index, killed_stmts); |
3009 | |
3010 | return killed_stmts; |
3011 | } |
3012 | |