gimple-ssa-split-paths.cc source code [gcc/gimple-ssa-split-paths.cc]

1	/ Support routines for Splitting Paths to loop backedges*
2	Copyright (C) 2015-2023 Free Software Foundation, Inc.
3	Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
4
5	This file is part of GCC.
6
7	GCC is free software; you can redistribute it and/or modify
8	it under the terms of the GNU General Public License as published by
9	the Free Software Foundation; either version 3, or (at your option)
10	any later version.
11
12	GCC is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	GNU General Public License for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with GCC; see the file COPYING3. If not see
19	<http://www.gnu.org/licenses/>. /*
20
21	#include "config.h"
22	#include "system.h"
23	#include "coretypes.h"
24	#include "backend.h"
25	#include "tree.h"
26	#include "gimple.h"
27	#include "tree-pass.h"
28	#include "tree-cfg.h"
29	#include "cfganal.h"
30	#include "cfgloop.h"
31	#include "gimple-iterator.h"
32	#include "tracer.h"
33	#include "predict.h"
34	#include "gimple-ssa.h"
35	#include "tree-phinodes.h"
36	#include "ssa-iterators.h"
37	#include "fold-const.h"
38
39	/ Given LATCH, the latch block in a loop, see if the shape of the*
40	path reaching LATCH is suitable for being split by duplication.
41	If so, return the block that will be duplicated into its predecessor
42	paths. Else return NULL. /*
43
44	static basic_block
45	find_block_to_duplicate_for_splitting_paths (basic_block latch)
46	{
47	/ We should have simple latches at this point. So the latch should*
48	have a single successor. This implies the predecessor of the latch
49	likely has the loop exit. And it's that predecessor we're most
50	interested in. To keep things simple, we're going to require that
51	the latch have a single predecessor too. /*
52	if (single_succ_p (bb: latch) && single_pred_p (bb: latch))
53	{
54	basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
55	gcc_assert (single_pred_edge (latch)->src == bb);
56
57	/ If BB has been marked as not to be duplicated, then honor that*
58	request. /*
59	if (ignore_bb_p (bb))
60	return NULL;
61
62	gimple *last = gsi_stmt (i: gsi_last_nondebug_bb (bb));
63	/ The immediate dominator of the latch must end in a conditional. /
64	if (!last \|\| gimple_code (g: last) != GIMPLE_COND)
65	return NULL;
66
67	/ We're hoping that BB is a join point for an IF-THEN-ELSE diamond*
68	region. Verify that it is.
69
70	First, verify that BB has two predecessors (each arm of the
71	IF-THEN-ELSE) and two successors (the latch and exit) and that
72	all edges are normal. /*
73	if (EDGE_COUNT (bb->preds) == `2`
74	&& !(EDGE_PRED (bb, `0`)->flags & EDGE_COMPLEX)
75	&& !(EDGE_PRED (bb, `1`)->flags & EDGE_COMPLEX)
76	&& EDGE_COUNT (bb->succs) == `2`
77	&& !(EDGE_SUCC (bb, `0`)->flags & EDGE_COMPLEX)
78	&& !(EDGE_SUCC (bb, `1`)->flags & EDGE_COMPLEX))
79	{
80	/ Now verify that BB's immediate dominator ends in a*
81	conditional as well. /*
82	basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
83	gimple *last = gsi_stmt (i: gsi_last_nondebug_bb (bb: bb_idom));
84	if (!last \|\| gimple_code (g: last) != GIMPLE_COND)
85	return NULL;
86
87	/ And that BB's immediate dominator's successors are the*
88	predecessors of BB or BB itself. /*
89	if (!(EDGE_PRED (bb, `0`)->src == bb_idom
90	\|\| find_edge (bb_idom, EDGE_PRED (bb, `0`)->src))
91	\|\| !(EDGE_PRED (bb, `1`)->src == bb_idom
92	\|\| find_edge (bb_idom, EDGE_PRED (bb, `1`)->src)))
93	return NULL;
94
95	/ And that the predecessors of BB each have a single successor*
96	or are BB's immediate domiator itself. /*
97	if (!(EDGE_PRED (bb, `0`)->src == bb_idom
98	\|\| single_succ_p (EDGE_PRED (bb, `0`)->src))
99	\|\| !(EDGE_PRED (bb, `1`)->src == bb_idom
100	\|\| single_succ_p (EDGE_PRED (bb, `1`)->src)))
101	return NULL;
102
103	/ So at this point we have a simple diamond for an IF-THEN-ELSE*
104	construct starting at BB_IDOM, with a join point at BB. BB
105	pass control outside the loop or to the loop latch.
106
107	We're going to want to create two duplicates of BB, one for
108	each successor of BB_IDOM. /*
109	return bb;
110	}
111	}
112	return NULL;
113	}
114
115	/ Return the number of non-debug statements in a block. /
116	static unsigned int
117	count_stmts_in_block (basic_block bb)
118	{
119	gimple_stmt_iterator gsi;
120	unsigned int num_stmts = `0`;
121
122	for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
123	{
124	gimple *stmt = gsi_stmt (i: gsi);
125	if (!is_gimple_debug (gs: stmt))
126	num_stmts++;
127	}
128	return num_stmts;
129	}
130
131	/ Return TRUE if CODE represents a tree code that is not likely to*
132	be easily if-convertable because it likely expands into multiple
133	insns, FALSE otherwise. /*
134	static bool
135	poor_ifcvt_candidate_code (enum tree_code code)
136	{
137	return (code == MIN_EXPR
138	\|\| code == MAX_EXPR
139	\|\| code == ABS_EXPR
140	\|\| code == COND_EXPR
141	\|\| code == CALL_EXPR);
142	}
143
144	/ Return TRUE if BB is a reasonable block to duplicate by examining*
145	its size, false otherwise. BB will always be a loop latch block.
146
147	Things to consider:
148
149	We do not want to spoil if-conversion if at all possible.
150
151	Most of the benefit seems to be from eliminating the unconditional
152	jump rather than CSE/DCE opportunities. So favor duplicating
153	small latches. A latch with just a conditional branch is ideal.
154
155	CSE/DCE opportunties crop up when statements from the predecessors
156	feed statements in the latch and allow statements in the latch to
157	simplify. /*
158
159	static bool
160	is_feasible_trace (basic_block bb)
161	{
162	basic_block pred1 = EDGE_PRED (bb, `0`)->src;
163	basic_block pred2 = EDGE_PRED (bb, `1`)->src;
164	int num_stmts_in_join = count_stmts_in_block (bb);
165	int num_stmts_in_pred1
166	= EDGE_COUNT (pred1->succs) == `1` ? count_stmts_in_block (bb: pred1) : `0`;
167	int num_stmts_in_pred2
168	= EDGE_COUNT (pred2->succs) == `1` ? count_stmts_in_block (bb: pred2) : `0`;
169
170	/ This is meant to catch cases that are likely opportunities for*
171	if-conversion. Essentially we look for the case where
172	BB's predecessors are both single statement blocks where
173	the output of that statement feed the same PHI in BB. /*
174	if (num_stmts_in_pred1 == `1` && num_stmts_in_pred2 == `1`)
175	{
176	gimple *stmt1 = last_and_only_stmt (pred1);
177	gimple *stmt2 = last_and_only_stmt (pred2);
178
179	if (stmt1 && stmt2
180	&& gimple_code (g: stmt1) == GIMPLE_ASSIGN
181	&& gimple_code (g: stmt2) == GIMPLE_ASSIGN)
182	{
183	enum tree_code code1 = gimple_assign_rhs_code (gs: stmt1);
184	enum tree_code code2 = gimple_assign_rhs_code (gs: stmt2);
185
186	if (!poor_ifcvt_candidate_code (code: code1)
187	&& !poor_ifcvt_candidate_code (code: code2))
188	{
189	tree lhs1 = gimple_assign_lhs (gs: stmt1);
190	tree lhs2 = gimple_assign_lhs (gs: stmt2);
191	gimple_stmt_iterator gsi;
192	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
193	{
194	gimple *phi = gsi_stmt (i: gsi);
195	if ((gimple_phi_arg_def (gs: phi, index: `0`) == lhs1
196	&& gimple_phi_arg_def (gs: phi, index: `1`) == lhs2)
197	\|\| (gimple_phi_arg_def (gs: phi, index: `1`) == lhs1
198	&& gimple_phi_arg_def (gs: phi, index: `0`) == lhs2))
199	{
200	if (dump_file && (dump_flags & TDF_DETAILS))
201	fprintf (stream: dump_file,
202	format: "Block %d appears to be a join point for "
203	"if-convertable diamond.\n",
204	bb->index);
205	return false;
206	}
207	}
208	}
209	}
210	}
211
212	/ Canonicalize the form. /
213	if (num_stmts_in_pred1 == `0` && num_stmts_in_pred2 == `1`)
214	{
215	std::swap (a&: pred1, b&: pred2);
216	std::swap (a&: num_stmts_in_pred1, b&: num_stmts_in_pred2);
217	}
218
219	/ Another variant. This one is half-diamond. /
220	if (num_stmts_in_pred1 == `1` && num_stmts_in_pred2 == `0`
221	&& dominated_by_p (CDI_DOMINATORS, pred1, pred2))
222	{
223	gimple *stmt1 = last_and_only_stmt (pred1);
224
225	/ The only statement in PRED1 must be an assignment that is*
226	not a good candidate for if-conversion. This may need some
227	generalization. /*
228	if (stmt1 && gimple_code (g: stmt1) == GIMPLE_ASSIGN)
229	{
230	enum tree_code code1 = gimple_assign_rhs_code (gs: stmt1);
231
232	if (!poor_ifcvt_candidate_code (code: code1))
233	{
234	tree lhs1 = gimple_assign_lhs (gs: stmt1);
235	tree rhs1 = gimple_assign_rhs1 (gs: stmt1);
236
237	gimple_stmt_iterator gsi;
238	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
239	{
240	gimple *phi = gsi_stmt (i: gsi);
241	if ((gimple_phi_arg_def (gs: phi, index: `0`) == lhs1
242	&& gimple_phi_arg_def (gs: phi, index: `1`) == rhs1)
243	\|\| (gimple_phi_arg_def (gs: phi, index: `1`) == lhs1
244	&& gimple_phi_arg_def (gs: phi, index: `0`) == rhs1))
245	{
246	if (dump_file && (dump_flags & TDF_DETAILS))
247	fprintf (stream: dump_file,
248	format: "Block %d appears to be a join point for "
249	"if-convertable half-diamond.\n",
250	bb->index);
251	return false;
252	}
253	}
254	}
255	}
256	}
257
258	/ Canonicalize the form. /
259	if (single_pred_p (bb: pred1) && single_pred (bb: pred1) == pred2
260	&& num_stmts_in_pred1 == `0`)
261	std::swap (a&: pred1, b&: pred2);
262
263	/ This is meant to catch another kind of cases that are likely opportunities*
264	for if-conversion. After canonicalizing, PRED2 must be an empty block and
265	PRED1 must be the only predecessor of PRED2. Moreover, PRED1 is supposed
266	to end with a cond_stmt which has the same args with the PHI in BB. /*
267	if (single_pred_p (bb: pred2) && single_pred (bb: pred2) == pred1
268	&& num_stmts_in_pred2 == `0`)
269	{
270	if (gcond cond_stmt = dyn_cast <gcond > (p: *gsi_last_bb (bb: pred1)))
271	{
272	tree lhs = gimple_cond_lhs (gs: cond_stmt);
273	tree rhs = gimple_cond_rhs (gs: cond_stmt);
274
275	gimple_stmt_iterator gsi;
276	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
277	{
278	gimple *phi = gsi_stmt (i: gsi);
279	if ((operand_equal_p (gimple_phi_arg_def (gs: phi, index: `0`), lhs)
280	&& operand_equal_p (gimple_phi_arg_def (gs: phi, index: `1`), rhs))
281	\|\| (operand_equal_p (gimple_phi_arg_def (gs: phi, index: `0`), rhs)
282	&& (operand_equal_p (gimple_phi_arg_def (gs: phi, index: `1`), lhs))))
283	{
284	if (dump_file && (dump_flags & TDF_DETAILS))
285	fprintf (stream: dump_file,
286	format: "Block %d appears to be optimized to a join "
287	"point for if-convertable half-diamond.\n",
288	bb->index);
289	return false;
290	}
291	}
292	}
293	}
294
295	/ If the joiner has no PHIs with useful uses there is zero chance*
296	of CSE/DCE/jump-threading possibilities exposed by duplicating it. /*
297	bool found_useful_phi = false;
298	for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (i: si);
299	gsi_next (i: &si))
300	{
301	gphi *phi = si.phi ();
302	use_operand_p use_p;
303	imm_use_iterator iter;
304	FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
305	{
306	gimple *stmt = USE_STMT (use_p);
307	if (is_gimple_debug (gs: stmt))
308	continue;
309	/ If there's a use in the joiner this might be a CSE/DCE*
310	opportunity, but not if the use is in a conditional
311	which makes this a likely if-conversion candidate. /*
312	if (gimple_bb (g: stmt) == bb
313	&& (!is_gimple_assign (gs: stmt)
314	\|\| (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
315	!= tcc_comparison)))
316	{
317	found_useful_phi = true;
318	break;
319	}
320	/ If the use is on a loop header PHI and on one path the*
321	value is unchanged this might expose a jump threading
322	opportunity. /*
323	if (gimple_code (g: stmt) == GIMPLE_PHI
324	&& gimple_bb (g: stmt) == bb->loop_father->header
325	/ But for memory the PHI alone isn't good enough. /
326	&& ! virtual_operand_p (op: gimple_phi_result (gs: stmt)))
327	{
328	bool found_unchanged_path = false;
329	for (unsigned i = `0`; i < gimple_phi_num_args (gs: phi); ++i)
330	if (gimple_phi_arg_def (gs: phi, index: i) == gimple_phi_result (gs: stmt))
331	{
332	found_unchanged_path = true;
333	break;
334	}
335	/ If we found an unchanged path this can only be a threading*
336	opportunity if we have uses of the loop header PHI result
337	in a stmt dominating the merge block. Otherwise the
338	splitting may prevent if-conversion. /*
339	if (found_unchanged_path)
340	{
341	use_operand_p use2_p;
342	imm_use_iterator iter2;
343	FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
344	{
345	gimple *use_stmt = USE_STMT (use2_p);
346	if (is_gimple_debug (gs: use_stmt))
347	continue;
348	basic_block use_bb = gimple_bb (g: use_stmt);
349	if (use_bb != bb
350	&& dominated_by_p (CDI_DOMINATORS, bb, use_bb))
351	{
352	if (gcond cond = dyn_cast <gcond > (p: use_stmt))
353	if (gimple_cond_code (gs: cond) == EQ_EXPR
354	\|\| gimple_cond_code (gs: cond) == NE_EXPR)
355	found_useful_phi = true;
356	break;
357	}
358	}
359	}
360	if (found_useful_phi)
361	break;
362	}
363	}
364	if (found_useful_phi)
365	break;
366	}
367	/ There is one exception namely a controlling condition we can propagate*
368	an equivalence from to the joiner. /*
369	bool found_cprop_opportunity = false;
370	basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
371	gcond cond = as_a <gcond > (p: *gsi_last_bb (bb: dom));
372	if (gimple_cond_code (gs: cond) == EQ_EXPR
373	\|\| gimple_cond_code (gs: cond) == NE_EXPR)
374	for (unsigned i = `0`; i < `2`; ++i)
375	{
376	tree op = gimple_op (gs: cond, i);
377	if (TREE_CODE (op) == SSA_NAME)
378	{
379	use_operand_p use_p;
380	imm_use_iterator iter;
381	FOR_EACH_IMM_USE_FAST (use_p, iter, op)
382	{
383	if (is_gimple_debug (USE_STMT (use_p)))
384	continue;
385	if (gimple_bb (USE_STMT (use_p)) == bb)
386	{
387	found_cprop_opportunity = true;
388	break;
389	}
390	}
391	}
392	if (found_cprop_opportunity)
393	break;
394	}
395
396	if (! found_useful_phi && ! found_cprop_opportunity)
397	{
398	if (dump_file && (dump_flags & TDF_DETAILS))
399	fprintf (stream: dump_file,
400	format: "Block %d is a join that does not expose CSE/DCE/jump-thread "
401	"opportunities when duplicated.\n",
402	bb->index);
403	return false;
404	}
405
406	/ We may want something here which looks at dataflow and tries*
407	to guess if duplication of BB is likely to result in simplification
408	of instructions in BB in either the original or the duplicate. /*
409
410	/ Upper Hard limit on the number statements to copy. /
411	if (num_stmts_in_join
412	>= param_max_jump_thread_duplication_stmts)
413	return false;
414
415	return true;
416	}
417
418	/ If the immediate dominator of the latch of the loop is*
419	block with conditional branch, then the loop latch is
420	duplicated to its predecessors path preserving the SSA
421	semantics.
422
423	CFG before transformation.
424
425	2
426	\|
427	\|
428	+---->3
429	\| / \
430	\| / \
431	\| 4 5
432	\| \ /
433	\| \ /
434	\| 6
435	\| / \
436	\| / \
437	\| 8 7
438	\| \| \|
439	---+ E
440
441
442
443	Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
444	and wire things up so they look like this:
445
446	2
447	\|
448	\|
449	+---->3
450	\| / \
451	\| / \
452	\| 4 5
453	\| \| \|
454	\| \| \|
455	\| 9 10
456	\| \|\ /\|
457	\| \| \ / \|
458	\| \| 7 \|
459	\| \| \| \|
460	\| \| E \|
461	\| \| \|
462	\| \ /
463	\| \ /
464	+-----8
465
466
467	Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
468	enables CSE, DCE and other optimizations to occur on a larger block
469	of code. /*
470
471	static bool
472	split_paths ()
473	{
474	bool changed = false;
475
476	loop_optimizer_init (LOOPS_NORMAL \| LOOPS_HAVE_RECORDED_EXITS);
477	initialize_original_copy_tables ();
478	calculate_dominance_info (CDI_DOMINATORS);
479
480	for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
481	{
482	/ Only split paths if we are optimizing this loop for speed. /
483	if (!optimize_loop_for_speed_p (loop))
484	continue;
485
486	/ See if there is a block that we can duplicate to split the*
487	path to the loop latch. /*
488	basic_block bb
489	= find_block_to_duplicate_for_splitting_paths (latch: loop->latch);
490
491	/ BB is the merge point for an IF-THEN-ELSE we want to transform.*
492
493	Essentially we want to create a duplicate of bb and redirect the
494	first predecessor of BB to the duplicate (leaving the second
495	predecessor as is. This will split the path leading to the latch
496	re-using BB to avoid useless copying. /*
497	if (bb && is_feasible_trace (bb))
498	{
499	if (dump_file && (dump_flags & TDF_DETAILS))
500	fprintf (stream: dump_file,
501	format: "Duplicating join block %d into predecessor paths\n",
502	bb->index);
503	basic_block pred0 = EDGE_PRED (bb, `0`)->src;
504	if (EDGE_COUNT (pred0->succs) != `1`)
505	pred0 = EDGE_PRED (bb, `1`)->src;
506	transform_duplicate (bb: pred0, bb2: bb);
507	changed = true;
508
509	/ If BB has an outgoing edge marked as IRREDUCIBLE, then*
510	duplicating BB may result in an irreducible region turning
511	into a natural loop.
512
513	Long term we might want to hook this into the block
514	duplication code, but as we've seen with similar changes
515	for edge removal, that can be somewhat risky. /*
516	if (EDGE_SUCC (bb, `0`)->flags & EDGE_IRREDUCIBLE_LOOP
517	\|\| EDGE_SUCC (bb, `1`)->flags & EDGE_IRREDUCIBLE_LOOP)
518	{
519	if (dump_file && (dump_flags & TDF_DETAILS))
520	fprintf (stream: dump_file,
521	format: "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
522	"Scheduling loop fixups.\n",
523	bb->index);
524	loops_state_set (flags: LOOPS_NEED_FIXUP);
525	}
526	}
527	}
528
529	loop_optimizer_finalize ();
530	free_original_copy_tables ();
531	return changed;
532	}
533
534	/ Main entry point for splitting paths. Returns TODO_cleanup_cfg if any*
535	paths where split, otherwise return zero. /*
536
537	static unsigned int
538	execute_split_paths ()
539	{
540	/ If we don't have at least 2 real blocks and backedges in the*
541	CFG, then there's no point in trying to perform path splitting. /*
542	if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + `1`
543	\|\| !mark_dfs_back_edges ())
544	return `0`;
545
546	bool changed = split_paths();
547	if (changed)
548	free_dominance_info (CDI_DOMINATORS);
549
550	return changed ? TODO_cleanup_cfg : `0`;
551	}
552
553	static bool
554	gate_split_paths ()
555	{
556	return flag_split_paths;
557	}
558
559	namespace {
560
561	const pass_data pass_data_split_paths =
562	{
563	.type: GIMPLE_PASS, / type /
564	.name: "split-paths", / name /
565	.optinfo_flags: OPTGROUP_NONE, / optinfo_flags /
566	.tv_id: TV_SPLIT_PATHS, / tv_id /
567	PROP_ssa, / properties_required /
568	.properties_provided: `0`, / properties_provided /
569	.properties_destroyed: `0`, / properties_destroyed /
570	.todo_flags_start: `0`, / todo_flags_start /
571	TODO_update_ssa, / todo_flags_finish /
572	};
573
574	class pass_split_paths : public gimple_opt_pass
575	{
576	public:
577	pass_split_paths (gcc::context *ctxt)
578	: gimple_opt_pass (pass_data_split_paths, ctxt)
579	{}
580	/ opt_pass methods: /
581	opt_pass * clone () final override { return new pass_split_paths (m_ctxt); }
582	bool gate (function ) final override { return* gate_split_paths (); }
583	unsigned int execute (function *) final override
584	{
585	return execute_split_paths ();
586	}
587
588	}; // class pass_split_paths
589
590	} // anon namespace
591
592	gimple_opt_pass *
593	make_pass_split_paths (gcc::context *ctxt)
594	{
595	return new pass_split_paths (ctxt);
596	}
597

source code of gcc/gimple-ssa-split-paths.cc