tree-ssa-loop-ch.cc source code [gcc/tree-ssa-loop-ch.cc]

1	/ Loop header copying on trees.*
2	Copyright (C) 2004-2023 Free Software Foundation, Inc.
3
4	This file is part of GCC.
5
6	GCC is free software; you can redistribute it and/or modify it
7	under the terms of the GNU General Public License as published by the
8	Free Software Foundation; either version 3, or (at your option) any
9	later version.
10
11	GCC is distributed in the hope that it will be useful, but WITHOUT
12	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GCC; see the file COPYING3. If not see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include "config.h"
21	#include "system.h"
22	#include "coretypes.h"
23	#include "backend.h"
24	#include "tree.h"
25	#include "gimple.h"
26	#include "cfghooks.h"
27	#include "tree-pass.h"
28	#include "gimple-ssa.h"
29	#include "gimple-iterator.h"
30	#include "tree-cfg.h"
31	#include "tree-into-ssa.h"
32	#include "cfgloop.h"
33	#include "tree-inline.h"
34	#include "tree-ssa-threadedge.h"
35	#include "tree-ssa-sccvn.h"
36	#include "tree-phinodes.h"
37	#include "ssa-iterators.h"
38	#include "value-range.h"
39	#include "gimple-range.h"
40	#include "gimple-range-path.h"
41	#include "gimple-pretty-print.h"
42	#include "cfganal.h"
43
44	/ Return path query insteance for testing ranges of statements*
45	in headers of LOOP contained in basic block BB.
46	Use RANGER instance. /*
47
48	static path_range_query *
49	get_range_query (class loop *loop,
50	basic_block bb,
51	gimple_ranger &ranger)
52	{
53	auto_vec<basic_block, `8`> path;
54	for (; bb != loop->header; bb = single_pred_edge (bb)->src)
55	path.safe_push (obj: bb);
56	path.safe_push (obj: loop->header);
57	path.safe_push (obj: loop_preheader_edge (loop)->src);
58	return new path_range_query (ranger, path);
59	}
60
61	/ Return edge that is true in the first iteration of the loop*
62	and NULL otherwise.
63	Formulate corrent ranger query to RANGER. /*
64
65	static edge
66	static_loop_exit (class loop *l, basic_block bb, gimple_ranger &ranger,
67	path_range_query *&query)
68	{
69	gcond last = safe_dyn_cast <gcond > (p: *gsi_last_bb (bb));
70	edge ret_e;
71
72	if (!last)
73	return NULL;
74
75	edge true_e, false_e;
76	extract_true_false_edges_from_block (bb, &true_e, &false_e);
77
78	/ If neither edge is the exit edge, this is not a case we'd like to*
79	special-case. /*
80	if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e))
81	return NULL;
82
83	int_range<`1`> desired_static_range;
84	if (loop_exit_edge_p (l, true_e))
85	{
86	desired_static_range = range_false ();
87	ret_e = true_e;
88	}
89	else
90	{
91	desired_static_range = range_true ();
92	ret_e = false_e;
93	}
94
95	if (!query)
96	query = get_range_query (loop: l, bb: gimple_bb (g: last), ranger);
97
98	int_range<`2`> r;
99	if (!query->range_of_stmt (r, last))
100	return NULL;
101	return r == desired_static_range ? ret_e : NULL;
102	}
103
104	/ Return true if STMT is static in LOOP. This means that its value*
105	is constant in the first iteration.
106	Use RANGER and formulate query cached in QUERY. /*
107
108	static bool
109	loop_static_stmt_p (class loop *loop,
110	gimple_ranger &ranger,
111	path_range_query *&query,
112	gimple *stmt)
113	{
114	tree type = gimple_range_type (s: stmt);
115	if (!type \|\| !Value_Range::supports_type_p (type))
116	return false;
117
118	if (!query)
119	query = get_range_query (loop, bb: gimple_bb (g: stmt), ranger);
120
121	Value_Range r (gimple_range_type (s: stmt));
122	if (!query->range_of_stmt (r, stmt))
123	return false;
124	return r.singleton_p ();
125	}
126
127	/ Return true if OP is invariant. /
128
129	static bool
130	loop_invariant_op_p (class loop *loop,
131	tree op)
132	{
133	if (is_gimple_min_invariant (op))
134	return true;
135	if (SSA_NAME_IS_DEFAULT_DEF (op)
136	\|\| !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op))))
137	return true;
138	return gimple_uid (SSA_NAME_DEF_STMT (op)) & `1`;
139	}
140
141	/ Return true if OP combines outcome of static and*
142	loop invariant conditional. /*
143
144	static bool
145	loop_static_op_p (class loop *loop, tree op)
146	{
147	/ Always check for invariant first. /
148	gcc_checking_assert (!is_gimple_min_invariant (op)
149	&& !SSA_NAME_IS_DEFAULT_DEF (op)
150	&& flow_bb_inside_loop_p (loop,
151	gimple_bb (SSA_NAME_DEF_STMT (op))));
152	return gimple_uid (SSA_NAME_DEF_STMT (op)) & `2`;
153	}
154
155
156	/ Return true if OP combines outcome of static and*
157	loop invariant conditional. /*
158
159	static bool
160	loop_combined_static_and_iv_p (class loop *loop,
161	tree op)
162	{
163	/ Always check for invariant first. /
164	gcc_checking_assert (!is_gimple_min_invariant (op)
165	&& !SSA_NAME_IS_DEFAULT_DEF (op)
166	&& flow_bb_inside_loop_p (loop,
167	gimple_bb (SSA_NAME_DEF_STMT (op))));
168	return gimple_uid (SSA_NAME_DEF_STMT (op)) & `4`;
169	}
170
171	/ Decision about posibility of copying a given header. /
172
173	enum ch_decision
174	{
175	/ We do not want to copy this header. /
176	ch_impossible,
177	/ We can copy it if it enables wins. /
178	ch_possible,
179	/ We can "copy" it if it enables wins and doing*
180	so will introduce no new code. /*
181	ch_possible_zero_cost,
182	/ We want to copy. /
183	ch_win,
184	/ We want to copy and we will eliminate loop exit. /
185	ch_win_invariant_exit
186	};
187
188	/ Check whether we should duplicate HEADER of LOOP. At most LIMIT
189	instructions should be duplicated, limit is decreased by the actual
190	amount. /*
191
192	static ch_decision
193	should_duplicate_loop_header_p (basic_block header, class loop *loop,
194	gimple_ranger *ranger,
195	int *limit,
196	hash_set <edge> *invariant_exits,
197	hash_set <edge> *static_exits)
198	{
199	gimple_stmt_iterator bsi;
200
201	gcc_assert (!header->aux);
202
203	gcc_assert (EDGE_COUNT (header->succs) > `0`);
204	if (single_succ_p (bb: header))
205	{
206	if (dump_file && (dump_flags & TDF_DETAILS))
207	fprintf (stream: dump_file,
208	format: " Not duplicating bb %i: it is single succ.\n",
209	header->index);
210	return ch_impossible;
211	}
212
213	if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, `0`)->dest)
214	&& flow_bb_inside_loop_p (loop, EDGE_SUCC (header, `1`)->dest))
215	{
216	if (dump_file && (dump_flags & TDF_DETAILS))
217	fprintf (stream: dump_file,
218	format: " Not duplicating bb %i: both successors are in loop.\n",
219	loop->num);
220	return ch_impossible;
221	}
222
223	/ If this is not the original loop header, we want it to have just*
224	one predecessor in order to match the && pattern. /*
225	if (header != loop->header && !single_pred_p (bb: header))
226	{
227	if (dump_file && (dump_flags & TDF_DETAILS))
228	fprintf (stream: dump_file,
229	format: " Not duplicating bb %i: it has mutiple predecestors.\n",
230	header->index);
231	return ch_impossible;
232	}
233
234	gcond last = safe_dyn_cast <gcond > (p: *gsi_last_bb (bb: header));
235	if (!last)
236	{
237	if (dump_file && (dump_flags & TDF_DETAILS))
238	fprintf (stream: dump_file,
239	format: " Not duplicating bb %i: it does not end by conditional.\n",
240	header->index);
241	return ch_impossible;
242	}
243
244	path_range_query *query = NULL;
245	for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (i: psi);
246	gsi_next (i: &psi))
247	if (!virtual_operand_p (op: gimple_phi_result (gs: psi.phi ())))
248	{
249	bool static_p = loop_static_stmt_p (loop, ranger&: *ranger,
250	query, stmt: psi.phi ());
251	gimple_set_uid (g: psi.phi (), uid: static_p ? `2` : `0`);
252	}
253	bool code_size_cost = false;
254
255	/ Count number of instructions and punt on calls.*
256	Populate stmts INV flag to later apply heuristics to the
257	kind of conditions we want to copy. /*
258	for (bsi = gsi_start_bb (bb: header); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
259	{
260	gimple *last = gsi_stmt (i: bsi);
261
262	if (gimple_code (g: last) == GIMPLE_LABEL)
263	continue;
264
265	if (is_gimple_debug (gs: last))
266	continue;
267
268	if (gimple_code (g: last) == GIMPLE_COND)
269	break;
270
271	if (dump_file && (dump_flags & TDF_DETAILS))
272	{
273	fprintf (stream: dump_file, format: " Analyzing: ");
274	print_gimple_stmt (dump_file, last, `0`, TDF_SLIM);
275	}
276
277	if (gimple_code (g: last) == GIMPLE_CALL
278	&& (!gimple_inexpensive_call_p (as_a <gcall *> (p: last))
279	/ IFN_LOOP_DIST_ALIAS means that inner loop is distributed*
280	at current loop's header. Don't copy in this case. /*
281	\|\| gimple_call_internal_p (gs: last, fn: IFN_LOOP_DIST_ALIAS)))
282	{
283	if (dump_file && (dump_flags & TDF_DETAILS))
284	fprintf (stream: dump_file,
285	format: " Not duplicating bb %i: it contains call.\n",
286	header->index);
287	if (query)
288	delete query;
289	return ch_impossible;
290	}
291
292	/ Classify the stmt is invariant in the loop. /
293	gimple_set_uid (g: last, uid: `0`);
294	if (!gimple_vuse (g: last)
295	&& gimple_code (g: last) != GIMPLE_ASM
296	&& (gimple_code (g: last) != GIMPLE_CALL
297	\|\| gimple_call_flags (last) & ECF_CONST))
298	{
299	bool inv = true, static_p = false;
300	ssa_op_iter i;
301	tree op;
302	FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
303	if (!loop_invariant_op_p (loop, op))
304	inv = false;
305	/ Assume that code is reasonably optimized and invariant*
306	constants are already identified. /*
307	if (!inv)
308	static_p = loop_static_stmt_p (loop, ranger&: *ranger, query, stmt: last);
309	gimple_set_uid (g: last, uid: (inv ? `1` : `0`) \| (static_p ? `2` : `0`));
310	if (dump_file && (dump_flags & TDF_DETAILS))
311	{
312	if (inv)
313	fprintf (stream: dump_file, format: " Stmt is loop invariant\n");
314	if (static_p)
315	fprintf (stream: dump_file, format: " Stmt is static "
316	"(constant in the first iteration)\n");
317	}
318	/ Loop invariants will be optimized out in loop body after*
319	duplication; do not account invariant computation in code
320	size costs.
321
322	Similarly static computations will be optimized out in the
323	duplicatd header. /*
324	if (inv \|\| static_p)
325	continue;
326
327	/ Match the following:*
328	_1 = i_1 < 10 <- static condtion
329	_2 = n != 0 <- loop invariant condition
330	_3 = _1 & _2 <- combined static and iv statement. /*
331	tree_code code;
332	if (gimple_code (g: last) == GIMPLE_ASSIGN
333	&& ((code = gimple_assign_rhs_code (gs: last)) == BIT_AND_EXPR
334	\|\| code == BIT_IOR_EXPR \|\| code == BIT_XOR_EXPR))
335	{
336	tree op1 = gimple_assign_rhs1 (gs: last);
337	tree op2 = gimple_assign_rhs2 (gs: last);
338
339	if ((loop_invariant_op_p (loop, op: op1)
340	\|\| loop_combined_static_and_iv_p (loop, op: op1)
341	\|\| loop_static_op_p (loop, op: op1))
342	&& (loop_invariant_op_p (loop, op: op2)
343	\|\| loop_combined_static_and_iv_p (loop, op: op2)
344	\|\| loop_static_op_p (loop, op: op2)))
345	{
346	/ Duplicating loop header with combned conditional will*
347	remove this statement in each copy. But we account for
348	that later when seeing that condition.
349
350	Note that this may be overly optimistic for bit operations
351	where the static parameter may still result in non-trivial
352	bit operation. /*
353	gimple_set_uid (g: last, uid: `4`);
354	if (dump_file && (dump_flags & TDF_DETAILS))
355	fprintf (stream: dump_file,
356	format: " Stmt combines static and invariant op\n");
357	continue;
358	}
359	}
360	}
361
362	int insns = estimate_num_insns (last, &eni_size_weights);
363	*limit -= insns;
364	if (insns)
365	code_size_cost = true;
366	if (dump_file && (dump_flags & TDF_DETAILS))
367	fprintf (stream: dump_file,
368	format: " Acconting stmt as %i insns\n", insns);
369	if (*limit < `0`)
370	{
371	if (dump_file && (dump_flags & TDF_DETAILS))
372	fprintf (stream: dump_file,
373	format: " Not duplicating bb %i contains too many insns.\n",
374	header->index);
375	if (query)
376	delete query;
377	return ch_impossible;
378	}
379	}
380
381	if (dump_file && (dump_flags & TDF_DETAILS))
382	{
383	fprintf (stream: dump_file, format: " Analyzing: ");
384	print_gimple_stmt (dump_file, last, `0`, TDF_SLIM);
385	}
386
387	/ If the condition tests a non-IV loop variant we do not want to rotate*
388	the loop further. Unless this is the original loop header. /*
389	tree lhs = gimple_cond_lhs (gs: last);
390	tree rhs = gimple_cond_rhs (gs: last);
391	bool lhs_invariant = loop_invariant_op_p (loop, op: lhs);
392	bool rhs_invariant = loop_invariant_op_p (loop, op: rhs);
393
394	/ Combined conditional is a result of if combining:*
395
396	_1 = i_1 < 10 <- static condtion
397	_2 = n != 0 <- loop invariant condition
398	_3 = _1 & _2 <- combined static and iv statement
399	if (_3 != 0) <- combined conditional
400
401	Combined conditionals will not be optimized out in either copy.
402	However duplicaed header simplifies to:
403
404	if (n < 10)
405
406	while loop body to
407
408	if (i_1 < 10)
409
410	So effectively the resulting code sequence will be of same length as
411	the original code.
412
413	Combined conditionals are never static or invariant, so save some work
414	below. /*
415	if (lhs_invariant != rhs_invariant
416	&& (lhs_invariant
417	\|\| loop_combined_static_and_iv_p (loop, op: lhs))
418	&& (rhs_invariant
419	\|\| loop_combined_static_and_iv_p (loop, op: rhs)))
420	{
421	if (query)
422	delete query;
423	if (dump_file && (dump_flags & TDF_DETAILS))
424	fprintf (stream: dump_file,
425	format: " Conditional combines static and invariant op.\n");
426	return ch_win;
427	}
428
429	edge static_exit = static_loop_exit (l: loop, bb: header, ranger&: *ranger, query);
430	if (query)
431	delete query;
432
433	if (static_exit && static_exits)
434	{
435	static_exits->add (k: static_exit);
436	if (dump_file && (dump_flags & TDF_DETAILS))
437	fprintf (stream: dump_file,
438	format: " Will eliminate peeled conditional in bb %d.\n",
439	static_exit->src->index);
440	/ Still look for invariant exits; exit may be both. /
441	}
442	if (lhs_invariant && rhs_invariant)
443	{
444	if (invariant_exits)
445	{
446	edge e;
447	edge_iterator ei;
448	FOR_EACH_EDGE (e, ei, header->succs)
449	if (loop_exit_edge_p (loop, e))
450	{
451	if (dump_file && (dump_flags & TDF_DETAILS))
452	fprintf (stream: dump_file,
453	format: " Will elliminate invariant exit %i->%i\n",
454	e->src->index, e->dest->index);
455	invariant_exits->add (k: e);
456	}
457	}
458	return ch_win_invariant_exit;
459	}
460
461	/ If the static exit fully optimize out, it is win to "duplicate"*
462	it.
463
464	TODO: Even if duplication costs some size we may opt to do so in case
465	exit probability is significant enough (do partial peeling). /*
466	if (static_exit)
467	return !code_size_cost ? ch_possible_zero_cost : ch_possible;
468
469	/ We was not able to prove that conditional will be eliminated. /
470	int insns = estimate_num_insns (last, &eni_size_weights);
471	*limit -= insns;
472	if (dump_file && (dump_flags & TDF_DETAILS))
473	fprintf (stream: dump_file,
474	format: " Acconting stmt as %i insns\n", insns);
475	if (*limit < `0`)
476	{
477	if (dump_file && (dump_flags & TDF_DETAILS))
478	fprintf (stream: dump_file,
479	format: " Not duplicating bb %i contains too many insns.\n",
480	header->index);
481	return ch_impossible;
482	}
483
484	return ch_possible;
485	}
486
487	/ Checks whether LOOP is a do-while style loop. /
488
489	static bool
490	do_while_loop_p (class loop *loop)
491	{
492	gimple *stmt = last_nondebug_stmt (loop->latch);
493
494	/ If the latch of the loop is not empty, it is not a do-while loop. /
495	if (stmt
496	&& gimple_code (g: stmt) != GIMPLE_LABEL)
497	{
498	if (dump_file && (dump_flags & TDF_DETAILS))
499	fprintf (stream: dump_file,
500	format: "Loop %i is not do-while loop: latch is not empty.\n",
501	loop->num);
502	return false;
503	}
504
505	/ If the latch does not have a single predecessor, it is not a*
506	do-while loop. /*
507	if (!single_pred_p (bb: loop->latch))
508	{
509	if (dump_file && (dump_flags & TDF_DETAILS))
510	fprintf (stream: dump_file,
511	format: "Loop %i is not do-while loop: latch has multiple "
512	"predecessors.\n", loop->num);
513	return false;
514	}
515	basic_block pred = single_pred (bb: loop->latch);
516
517	/ If the latch predecessor doesn't exit the loop, it is not a*
518	do-while loop. /*
519	if (!loop_exits_from_bb_p (loop, pred))
520	{
521	if (dump_file && (dump_flags & TDF_DETAILS))
522	fprintf (stream: dump_file,
523	format: "Loop %i is not do-while loop: latch predecessor "
524	"does not exit loop.\n", loop->num);
525	return false;
526	}
527	gcond last = safe_dyn_cast <gcond > (p: *gsi_last_bb (bb: pred));
528	if (last
529	&& (gimple_cond_lhs (gs: last) == boolean_false_node
530	\|\| gimple_cond_lhs (gs: last) == boolean_true_node)
531	&& gimple_cond_rhs (gs: last) == boolean_false_node)
532	{
533	if (dump_file && (dump_flags & TDF_DETAILS))
534	fprintf (stream: dump_file,
535	format: "Loop %i is not do-while loop: latch predecessor "
536	"contains exit we optimized out.\n", loop->num);
537	return false;
538	}
539
540	if (dump_file && (dump_flags & TDF_DETAILS))
541	fprintf (stream: dump_file, format: "Loop %i is do-while loop\n", loop->num);
542
543	return true;
544	}
545
546	/ Update profile after header copying of LOOP.*
547	REGION is the original (in loop) sequence, REGION_COPY is the
548	duplicated header (now outside of loop). N_REGION is number of
549	bbs duplicated.
550	ELIMINATED_EDGE is edge to be removed from duplicated sequence.
551	INVARIANT_EXITS are edges in the loop body to be elimianted
552	since they are loop invariants
553
554	So We expect the following:
555
556	// region_copy_start entry will be scaled to entry_count
557	if (cond1) <- this condition will become false
558	and we update probabilities
559	goto loop_exit;
560	if (cond2) <- this condition is loop invariant
561	goto loop_exit;
562	goto loop_header <- this will be redirected to loop.
563	// region_copy_end
564	loop:
565	<body>
566	// region start
567	loop_header:
568	if (cond1) <- we need to update probabbility here
569	goto loop_exit;
570	if (cond2) <- and determine scaling factor here.
571	moreover cond2 is now always true
572	goto loop_exit;
573	else
574	goto loop;
575	// region end
576
577	Adding support for more exits can be done similarly,
578	but only consumer so far is tree-ssa-loop-ch and it uses only this
579	to handle the common case of peeling headers which have
580	conditionals known to be always true upon entry. /*
581
582	static void
583	update_profile_after_ch (class loop *loop,
584	basic_block region, basic_block region_copy,
585	unsigned n_region,
586	hash_set <edge> *invariant_exits,
587	hash_set <edge> *static_exits,
588	profile_count entry_count)
589	{
590	for (unsigned int i = `0`; i < n_region; i++)
591	{
592	edge exit_e, exit_e_copy, e, e_copy;
593	if (EDGE_COUNT (region[i]->succs) == `1`)
594	{
595	region_copy[i]->count = entry_count;
596	region[i]->count -= entry_count;
597	continue;
598	}
599
600	gcc_checking_assert (EDGE_COUNT (region[i]->succs) == `2`);
601	if (loop_exit_edge_p (loop,
602	EDGE_SUCC (region[i], `0`)))
603	{
604	exit_e = EDGE_SUCC (region[i], `0`);
605	exit_e_copy = EDGE_SUCC (region_copy[i], `0`);
606	e = EDGE_SUCC (region[i], `1`);
607	e_copy = EDGE_SUCC (region_copy[i], `1`);
608	}
609	else
610	{
611	exit_e = EDGE_SUCC (region[i], `1`);
612	exit_e_copy = EDGE_SUCC (region_copy[i], `1`);
613	e = EDGE_SUCC (region[i], `0`);
614	e_copy = EDGE_SUCC (region_copy[i], `0`);
615	}
616	gcc_assert (i == n_region - `1`
617	\|\| (e->dest == region[i + `1`]
618	&& e_copy->dest == region_copy[i + `1`]));
619	region_copy[i]->count = entry_count;
620	profile_count exit_e_count = exit_e->count ();
621	bool was_static = false;
622	if (static_exits->contains (k: exit_e))
623	{
624	/ Update profile and the conditional.*
625	CFG update is done by caller. /*
626	static_exits->remove (k: exit_e);
627	was_static = true;
628	e_copy->probability = profile_probability::always ();
629	exit_e_copy->probability = profile_probability::never ();
630	gcond *cond_stmt
631	= as_a <gcond >(p: gsi_last_bb (bb: region_copy[i]));
632	if (e_copy->flags & EDGE_TRUE_VALUE)
633	gimple_cond_make_true (gs: cond_stmt);
634	else
635	gimple_cond_make_false (gs: cond_stmt);
636	update_stmt (s: cond_stmt);
637	/ Header copying is a special case of jump threading, so use*
638	common code to update loop body exit condition. /*
639	update_bb_profile_for_threading (region[i], entry_count, e);
640	}
641	else
642	region[i]->count -= region_copy[i]->count;
643	if (invariant_exits->contains (k: exit_e))
644	{
645	invariant_exits->remove (k: exit_e);
646	/ All exits will happen in exit_e_copy which is out of the*
647	loop, so increase probability accordingly.
648	If the edge is eliminated_edge we already corrected
649	profile above. /*
650	if (entry_count.nonzero_p () && !was_static)
651	set_edge_probability_and_rescale_others
652	(exit_e_copy, exit_e_count.probability_in
653	(overall: entry_count));
654	/ Eliminate in-loop conditional. /
655	e->probability = profile_probability::always ();
656	exit_e->probability = profile_probability::never ();
657	gcond cond_stmt = as_a <gcond >(p: *gsi_last_bb (bb: region[i]));
658	if (e->flags & EDGE_TRUE_VALUE)
659	gimple_cond_make_true (gs: cond_stmt);
660	else
661	gimple_cond_make_false (gs: cond_stmt);
662	update_stmt (s: cond_stmt);
663	}
664	entry_count = e_copy->count ();
665	}
666	/ Be sure that we seen all invariant exit edges we are supposed to update.*
667	We may have recorded some static exists we decided to not duplicate. /*
668	gcc_checking_assert (invariant_exits->is_empty ());
669	}
670
671	namespace {
672
673	/ Common superclass for both header-copying phases. /
674	class ch_base : public gimple_opt_pass
675	{
676	protected:
677	ch_base (pass_data data, gcc::context *ctxt)
678	: gimple_opt_pass (data, ctxt)
679	{}
680
681	/ Copies headers of all loops in FUN for which process_loop_p is true. /
682	unsigned int copy_headers (function *fun);
683
684	/ Return true to copy headers of LOOP or false to skip. /
685	virtual bool process_loop_p (class loop *loop) = `0`;
686	};
687
688	const pass_data pass_data_ch =
689	{
690	.type: GIMPLE_PASS, / type /
691	.name: "ch", / name /
692	.optinfo_flags: OPTGROUP_LOOP, / optinfo_flags /
693	.tv_id: TV_TREE_CH, / tv_id /
694	.properties_required: ( PROP_cfg \| PROP_ssa ), / properties_required /
695	.properties_provided: `0`, / properties_provided /
696	.properties_destroyed: `0`, / properties_destroyed /
697	.todo_flags_start: `0`, / todo_flags_start /
698	.todo_flags_finish: `0`, / todo_flags_finish /
699	};
700
701	class pass_ch : public ch_base
702	{
703	public:
704	pass_ch (gcc::context *ctxt)
705	: ch_base (pass_data_ch, ctxt)
706	{}
707
708	/ opt_pass methods: /
709	bool gate (function ) final override { return* flag_tree_ch != `0`; }
710
711	/ Initialize and finalize loop structures, copying headers inbetween. /
712	unsigned int execute (function *) final override;
713
714	opt_pass * clone () final override { return new pass_ch (m_ctxt); }
715
716	protected:
717	/ ch_base method: /
718	bool process_loop_p (class loop *loop) final override;
719	}; // class pass_ch
720
721	const pass_data pass_data_ch_vect =
722	{
723	.type: GIMPLE_PASS, / type /
724	.name: "ch_vect", / name /
725	.optinfo_flags: OPTGROUP_LOOP, / optinfo_flags /
726	.tv_id: TV_TREE_CH, / tv_id /
727	.properties_required: ( PROP_cfg \| PROP_ssa ), / properties_required /
728	.properties_provided: `0`, / properties_provided /
729	.properties_destroyed: `0`, / properties_destroyed /
730	.todo_flags_start: `0`, / todo_flags_start /
731	.todo_flags_finish: `0`, / todo_flags_finish /
732	};
733
734	/ This is a more aggressive version of the same pass, designed to run just*
735	before if-conversion and vectorization, to put more loops into the form
736	required for those phases. /*
737	class pass_ch_vect : public ch_base
738	{
739	public:
740	pass_ch_vect (gcc::context *ctxt)
741	: ch_base (pass_data_ch_vect, ctxt)
742	{}
743
744	/ opt_pass methods: /
745	bool gate (function *fun) final override
746	{
747	return flag_tree_ch != `0`
748	&& (flag_tree_loop_vectorize != `0` \|\| fun->has_force_vectorize_loops);
749	}
750
751	/ Just copy headers, no initialization/finalization of loop structures. /
752	unsigned int execute (function *) final override;
753
754	protected:
755	/ ch_base method: /
756	bool process_loop_p (class loop *loop) final override;
757	}; // class pass_ch_vect
758
759	/ For all loops, copy the condition at the end of the loop body in front*
760	of the loop. This is beneficial since it increases efficiency of
761	code motion optimizations. It also saves one jump on entry to the loop. /*
762
763	unsigned int
764	ch_base::copy_headers (function *fun)
765	{
766	basic_block bbs, copied_bbs;
767	unsigned bbs_size;
768	bool changed = false;
769
770	if (number_of_loops (fn: fun) <= `1`)
771	return `0`;
772
773	bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
774	copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
775	bbs_size = n_basic_blocks_for_fn (fun);
776
777	struct candidate
778	{
779	class loop *loop;
780	unsigned int nheaders;
781	hash_set <edge> invariant_exits, static_exits;
782	};
783	auto_vec<struct candidate> candidates;
784	auto_vec<std::pair<edge, loop_p> > copied;
785	auto_vec<class loop *> loops_to_unloop;
786	auto_vec<int> loops_to_unloop_nunroll;
787
788	mark_dfs_back_edges ();
789	gimple_ranger ranger = new* gimple_ranger;
790	for (auto loop : loops_list (cfun, `0`))
791	{
792	int initial_limit = optimize_loop_for_speed_p (loop)
793	? param_max_loop_header_insns : `0`;
794	int remaining_limit = initial_limit;
795	if (dump_file && (dump_flags & TDF_DETAILS))
796	fprintf (stream: dump_file,
797	format: "Analyzing loop %i\n", loop->num);
798
799	basic_block header = loop->header;
800	if (!get_max_loop_iterations_int (loop))
801	{
802	if (dump_file && (dump_flags & TDF_DETAILS))
803	fprintf (stream: dump_file, format: "Loop %d never loops.\n", loop->num);
804	scale_loop_profile (loop, profile_probability::always (), `0`);
805	loops_to_unloop.safe_push (obj: loop);
806	loops_to_unloop_nunroll.safe_push (obj: `0`);
807	continue;
808	}
809
810	/ If the loop is already a do-while style one (either because it was*
811	written as such, or because jump threading transformed it into one),
812	we might be in fact peeling the first iteration of the loop. This
813	in general is not a good idea. Also avoid touching infinite loops. /*
814	if (!loop_has_exit_edges (loop)
815	\|\| !process_loop_p (loop))
816	continue;
817
818	/ Iterate the header copying up to limit; this takes care of the cases*
819	like while (a && b) {...}, where we want to have both of the conditions
820	copied. TODO -- handle while (a \|\| b) - like cases, by not requiring
821	the header to have just a single successor and copying up to
822	postdominator. /*
823	int nheaders = `0`;
824	int last_win_nheaders = `0`;
825	bool last_win_invariant_exit = false;
826	ch_decision ret;
827	auto_vec <ch_decision, `32`> decision;
828	hash_set <edge> invariant_exits = new* hash_set <edge>;
829	hash_set <edge> static_exits = new* hash_set <edge>;
830	while ((ret = should_duplicate_loop_header_p (header, loop, ranger,
831	limit: &remaining_limit,
832	invariant_exits,
833	static_exits))
834	!= ch_impossible)
835	{
836	nheaders++;
837	decision.safe_push (obj: ret);
838	if (ret >= ch_win)
839	{
840	last_win_nheaders = nheaders;
841	last_win_invariant_exit = (ret == ch_win_invariant_exit);
842	if (dump_file && (dump_flags & TDF_DETAILS))
843	fprintf (stream: dump_file, format: " Duplicating bb %i is a win\n",
844	header->index);
845	}
846	else
847	if (dump_file && (dump_flags & TDF_DETAILS))
848	fprintf (stream: dump_file, format: " May duplicate bb %i\n", header->index);
849
850	/ Find a successor of header that is inside a loop; i.e. the new*
851	header after the condition is copied. /*
852	if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, `0`)->dest))
853	header = EDGE_SUCC (header, `0`)->dest;
854	else
855	header = EDGE_SUCC (header, `1`)->dest;
856	}
857
858	/ Try to turn loop into do-while. This means ensuring that*
859	last duplicated header will not have loop invariant exit. /*
860	if (last_win_nheaders && last_win_invariant_exit
861	&& nheaders > last_win_nheaders)
862	{
863	last_win_nheaders++;
864	if (dump_file && (dump_flags & TDF_DETAILS))
865	fprintf (stream: dump_file,
866	format: " Duplicating additional BB to obtain do-while loop\n");
867	}
868	else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop))
869	{
870	last_win_nheaders = `1`;
871	if (dump_file && (dump_flags & TDF_DETAILS))
872	fprintf (stream: dump_file,
873	format: " Duplicating header BB to obtain do-while loop\n");
874	}
875	/ "Duplicate" all BBs with zero cost following last basic blocks we*
876	decided to copy. /*
877	while (last_win_nheaders < (int)decision.length ()
878	&& decision [last_win_nheaders] == ch_possible_zero_cost)
879	{
880	if (dump_file && (dump_flags & TDF_DETAILS))
881	fprintf (stream: dump_file,
882	format: " Duplicating extra bb is a win; it has zero cost\n");
883	last_win_nheaders++;
884	}
885
886	if (last_win_nheaders)
887	candidates.safe_push (obj: {.loop: loop, .nheaders: last_win_nheaders,
888	.invariant_exits: invariant_exits, .static_exits: static_exits});
889	else
890	{
891	delete invariant_exits;
892	delete static_exits;
893	}
894	}
895	/ Do not use ranger after we change the IL and not have updated SSA. /
896	delete ranger;
897
898	for (auto candidate : candidates)
899	{
900	class loop *loop = candidate.loop;
901	if (dump_file && (dump_flags & TDF_DETAILS))
902	fprintf (stream: dump_file,
903	format: "Copying headers of loop %i\n", loop->num);
904
905	basic_block header = loop->header;
906	edge nonexit = NULL;
907	edge exit = NULL;
908	unsigned int n_bbs = `0`;
909	int nexits = `0`;
910	profile_count exit_count = profile_count::zero ();
911	profile_count entry_count = profile_count::zero ();
912	edge e;
913	edge_iterator ei;
914
915	FOR_EACH_EDGE (e, ei, loop->header->preds)
916	if (e->src != loop->latch)
917	entry_count += e->count ();
918	while (n_bbs < candidate.nheaders)
919	{
920	if (dump_file && (dump_flags & TDF_DETAILS))
921	fprintf (stream: dump_file, format: " Will duplicate bb %i\n", header->index);
922	/ Find a successor of header that is inside a loop; i.e. the new*
923	header after the condition is copied. /*
924	if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, `0`)->dest))
925	{
926	nonexit = EDGE_SUCC (header, `0`);
927	exit = EDGE_SUCC (header, `1`);
928	}
929	else
930	{
931	nonexit = EDGE_SUCC (header, `1`);
932	exit = EDGE_SUCC (header, `0`);
933	}
934	exit_count += exit->count ();
935	nexits++;
936	bbs[n_bbs++] = header;
937	gcc_assert (bbs_size > n_bbs);
938	header = nonexit->dest;
939	}
940
941	if (dump_file && (dump_flags & TDF_DETAILS))
942	fprintf (stream: dump_file,
943	format: "Duplicating header of the loop %d up to edge %d->%d\n",
944	loop->num, exit->src->index, exit->dest->index);
945
946	/ Ensure that the header will have just the latch as a predecessor*
947	inside the loop. /*
948	if (!single_pred_p (bb: nonexit->dest))
949	{
950	header = split_edge (nonexit);
951	exit = single_pred_edge (bb: header);
952	}
953
954	edge entry = loop_preheader_edge (loop);
955
956	propagate_threaded_block_debug_into (exit->dest, entry->dest);
957	if (!gimple_duplicate_seme_region (entry, exit, bbs, n_bbs, copied_bbs,
958	true))
959	{
960	delete candidate.static_exits;
961	delete candidate.invariant_exits;
962	if (dump_file && (dump_flags & TDF_DETAILS))
963	fprintf (stream: dump_file, format: "Duplication failed.\n");
964	continue;
965	}
966	if (loop->header->count.initialized_p ())
967	update_profile_after_ch (loop, region: bbs, region_copy: copied_bbs, n_region: n_bbs,
968	invariant_exits: candidate.invariant_exits,
969	static_exits: candidate.static_exits,
970	entry_count);
971	delete candidate.static_exits;
972	delete candidate.invariant_exits;
973	copied.safe_push (obj: std::make_pair (x&: entry, y&: loop));
974
975	/ If the loop has the form "for (i = j; i < j + 10; i++)" then*
976	this copying can introduce a case where we rely on undefined
977	signed overflow to eliminate the preheader condition, because
978	we assume that "j < j + 10" is true. We don't want to warn
979	about that case for -Wstrict-overflow, because in general we
980	don't warn about overflow involving loops. Prevent the
981	warning by setting the no_warning flag in the condition. /*
982	if (warn_strict_overflow > `0`)
983	{
984	unsigned int i;
985
986	for (i = `0`; i < n_bbs; ++i)
987	{
988	gimple_stmt_iterator bsi;
989
990	for (bsi = gsi_start_bb (bb: copied_bbs[i]);
991	!gsi_end_p (i: bsi);
992	gsi_next (i: &bsi))
993	{
994	gimple *stmt = gsi_stmt (i: bsi);
995	if (gimple_code (g: stmt) == GIMPLE_COND)
996	{
997	tree lhs = gimple_cond_lhs (gs: stmt);
998	if (gimple_cond_code (gs: stmt) != EQ_EXPR
999	&& gimple_cond_code (gs: stmt) != NE_EXPR
1000	&& INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1001	&& TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
1002	suppress_warning (stmt, OPT_Wstrict_overflow_);
1003	}
1004	else if (is_gimple_assign (gs: stmt))
1005	{
1006	enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt);
1007	tree rhs1 = gimple_assign_rhs1 (gs: stmt);
1008	if (TREE_CODE_CLASS (rhs_code) == tcc_comparison
1009	&& rhs_code != EQ_EXPR
1010	&& rhs_code != NE_EXPR
1011	&& INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1012	&& TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))
1013	suppress_warning (stmt, OPT_Wstrict_overflow_);
1014	}
1015	}
1016	}
1017	}
1018
1019	/ Update header of the loop. /
1020	loop->header = header;
1021	/ Find correct latch. We only duplicate chain of conditionals so*
1022	there should be precisely two edges to the new header. One entry
1023	edge and one to latch. /*
1024	FOR_EACH_EDGE (e, ei, loop->header->preds)
1025	if (header != e->src)
1026	{
1027	loop->latch = e->src;
1028	break;
1029	}
1030	/ Ensure that the latch is simple. /
1031	if (!single_succ_p (bb: loop_latch_edge (loop)->src))
1032	split_edge (loop_latch_edge (loop));
1033
1034	if (dump_file && (dump_flags & TDF_DETAILS))
1035	{
1036	if (do_while_loop_p (loop))
1037	fprintf (stream: dump_file, format: "Loop %d is now do-while loop.\n", loop->num);
1038	else
1039	fprintf (stream: dump_file, format: "Loop %d is still not do-while loop.\n",
1040	loop->num);
1041	fprintf (stream: dump_file, format: "Exit count: ");
1042	exit_count.dump (f: dump_file);
1043	fprintf (stream: dump_file, format: "\nEntry count: ");
1044	entry_count.dump (f: dump_file);
1045	fprintf (stream: dump_file, format: "\n");
1046	}
1047
1048	/ We possibly decreased number of itrations by 1. /
1049	auto_vec<edge> exits = get_loop_exit_edges (loop);
1050	bool precise = (nexits == (int) exits.length ());
1051	/ Check that loop may not terminate in other way than via*
1052	basic blocks we duplicated. /*
1053	if (precise)
1054	{
1055	basic_block *bbs = get_loop_body (loop);
1056	for (unsigned i = `0`; i < loop->num_nodes && precise; ++i)
1057	{
1058	basic_block bb = bbs[i];
1059	bool found_exit = false;
1060	FOR_EACH_EDGE (e, ei, bb->succs)
1061	if (!flow_bb_inside_loop_p (loop, e->dest))
1062	{
1063	found_exit = true;
1064	break;
1065	}
1066	/ If BB has exit, it was duplicated. /
1067	if (found_exit)
1068	continue;
1069	/ Give up on irreducible loops. /
1070	if (bb->flags & BB_IRREDUCIBLE_LOOP)
1071	{
1072	precise = false;
1073	break;
1074	}
1075	/ Check that inner loops are finite. /
1076	for (class loop *l = bb->loop_father; l != loop && precise;
1077	l = loop_outer (loop: l))
1078	if (!l->finite_p)
1079	{
1080	precise = false;
1081	break;
1082	}
1083	/ Verify that there is no statement that may be terminate*
1084	execution in a way not visible to CFG. /*
1085	for (gimple_stmt_iterator bsi = gsi_start_bb (bb);
1086	!gsi_end_p (i: bsi); gsi_next (i: &bsi))
1087	if (stmt_can_terminate_bb_p (gsi_stmt (i: bsi)))
1088	precise = false;
1089	}
1090	free (ptr: bbs);
1091	}
1092	if (precise
1093	&& get_max_loop_iterations_int (loop) == `1`)
1094	{
1095	if (dump_file && (dump_flags & TDF_DETAILS))
1096	fprintf (stream: dump_file, format: "Loop %d no longer loops.\n", loop->num);
1097	scale_loop_profile (loop, profile_probability::always (), `0`);
1098	loops_to_unloop.safe_push (obj: loop);
1099	loops_to_unloop_nunroll.safe_push (obj: `0`);
1100	}
1101	else if (precise)
1102	{
1103	if (dump_file && (dump_flags & TDF_DETAILS))
1104	fprintf (stream: dump_file,
1105	format: "Peeled all exits:"
1106	" decreased number of iterations of loop %d by 1.\n",
1107	loop->num);
1108	adjust_loop_info_after_peeling (loop, npeel: `1`, precise: true);
1109	}
1110	else if (exit_count >= entry_count.apply_scale (num: `9`, den: `10`))
1111	{
1112	if (dump_file && (dump_flags & TDF_DETAILS))
1113	fprintf (stream: dump_file,
1114	format: "Peeled likely exits: likely decreased number "
1115	"of iterations of loop %d by 1.\n", loop->num);
1116	adjust_loop_info_after_peeling (loop, npeel: `1`, precise: false);
1117	}
1118	else if (dump_file && (dump_flags & TDF_DETAILS))
1119	fprintf (stream: dump_file,
1120	format: "Not decreased number"
1121	" of iterations of loop %d; likely exits remains.\n",
1122	loop->num);
1123
1124	changed = true;
1125	}
1126
1127	if (changed)
1128	{
1129	update_ssa (TODO_update_ssa);
1130	/ After updating SSA form perform CSE on the loop header*
1131	copies. This is esp. required for the pass before
1132	vectorization since nothing cleans up copied exit tests
1133	that can now be simplified. CSE from the entry of the
1134	region we copied till all loop exit blocks but not
1135	entering the loop itself. /*
1136	for (unsigned i = `0`; i < copied.length (); ++i)
1137	{
1138	edge entry = copied [i].first;
1139	loop_p loop = copied [i].second;
1140	auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
1141	bitmap exit_bbs = BITMAP_ALLOC (NULL);
1142	for (unsigned j = `0`; j < exit_edges.length (); ++j)
1143	bitmap_set_bit (exit_bbs, exit_edges [j]->dest->index);
1144	bitmap_set_bit (exit_bbs, loop->header->index);
1145	do_rpo_vn (cfun, entry, exit_bbs);
1146	BITMAP_FREE (exit_bbs);
1147	}
1148	}
1149	if (!loops_to_unloop.is_empty ())
1150	{
1151	bool irred_invalidated;
1152	unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, NULL, irred_invalidated: &irred_invalidated);
1153	changed = true;
1154	}
1155	free (ptr: bbs);
1156	free (ptr: copied_bbs);
1157
1158	return changed ? TODO_cleanup_cfg : `0`;
1159	}
1160
1161	/ Initialize the loop structures we need, and finalize after. /
1162
1163	unsigned int
1164	pass_ch::execute (function *fun)
1165	{
1166	loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1167	\| LOOPS_HAVE_SIMPLE_LATCHES
1168	\| LOOPS_HAVE_RECORDED_EXITS);
1169
1170	unsigned int res = copy_headers (fun);
1171
1172	loop_optimizer_finalize ();
1173	return res;
1174	}
1175
1176	/ Assume an earlier phase has already initialized all the loop structures that*
1177	we need here (and perhaps others too), and that these will be finalized by
1178	a later phase. /*
1179
1180	unsigned int
1181	pass_ch_vect::execute (function *fun)
1182	{
1183	return copy_headers (fun);
1184	}
1185
1186	/ Apply header copying according to a very simple test of do-while shape. /
1187
1188	bool
1189	pass_ch::process_loop_p (class loop *)
1190	{
1191	return true;
1192	}
1193
1194	/ Apply header-copying to loops where we might enable vectorization. /
1195
1196	bool
1197	pass_ch_vect::process_loop_p (class loop *loop)
1198	{
1199	if (!flag_tree_loop_vectorize && !loop->force_vectorize)
1200	return false;
1201
1202	if (loop->dont_vectorize)
1203	return false;
1204
1205	/ The vectorizer won't handle anything with multiple exits, so skip. /
1206	edge exit = single_exit (loop);
1207	if (!exit)
1208	return false;
1209
1210	if (!do_while_loop_p (loop))
1211	return true;
1212
1213	return false;
1214	}
1215
1216	} // anon namespace
1217
1218	gimple_opt_pass *
1219	make_pass_ch_vect (gcc::context *ctxt)
1220	{
1221	return new pass_ch_vect (ctxt);
1222	}
1223
1224	gimple_opt_pass *
1225	make_pass_ch (gcc::context *ctxt)
1226	{
1227	return new pass_ch (ctxt);
1228	}
1229

source code of gcc/tree-ssa-loop-ch.cc