cfgloopanal.cc source code [gcc/cfgloopanal.cc]

1	/ Natural loop analysis code for GNU compiler.*
2	Copyright (C) 2002-2023 Free Software Foundation, Inc.
3
4	This file is part of GCC.
5
6	GCC is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free
8	Software Foundation; either version 3, or (at your option) any later
9	version.
10
11	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12	WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GCC; see the file COPYING3. If not see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include "config.h"
21	#include "system.h"
22	#include "coretypes.h"
23	#include "backend.h"
24	#include "rtl.h"
25	#include "tree.h"
26	#include "predict.h"
27	#include "memmodel.h"
28	#include "emit-rtl.h"
29	#include "cfgloop.h"
30	#include "explow.h"
31	#include "expr.h"
32	#include "graphds.h"
33	#include "sreal.h"
34	#include "regs.h"
35	#include "function-abi.h"
36
37	struct target_cfgloop default_target_cfgloop;
38	#if SWITCHABLE_TARGET
39	struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
40	#endif
41
42	/ Checks whether BB is executed exactly once in each LOOP iteration. /
43
44	bool
45	just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
46	{
47	/ It must be executed at least once each iteration. /
48	if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
49	return false;
50
51	/ And just once. /
52	if (bb->loop_father != loop)
53	return false;
54
55	/ But this was not enough. We might have some irreducible loop here. /
56	if (bb->flags & BB_IRREDUCIBLE_LOOP)
57	return false;
58
59	return true;
60	}
61
62	/ Marks blocks and edges that are part of non-recognized loops; i.e. we*
63	throw away all latch edges and mark blocks inside any remaining cycle.
64	Everything is a bit complicated due to fact we do not want to do this
65	for parts of cycles that only "pass" through some loop -- i.e. for
66	each cycle, we want to mark blocks that belong directly to innermost
67	loop containing the whole cycle.
68
69	LOOPS is the loop tree. /*
70
71	#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
72	#define BB_REPR(BB) ((BB)->index + 1)
73
74	bool
75	mark_irreducible_loops (void)
76	{
77	basic_block act;
78	struct graph_edge *ge;
79	edge e;
80	edge_iterator ei;
81	int src, dest;
82	unsigned depth;
83	struct graph *g;
84	int num = number_of_loops (cfun);
85	class loop *cloop;
86	bool irred_loop_found = false;
87	int i;
88
89	gcc_assert (current_loops != NULL);
90
91	/ Reset the flags. /
92	FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
93	EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
94	{
95	act->flags &= ~BB_IRREDUCIBLE_LOOP;
96	FOR_EACH_EDGE (e, ei, act->succs)
97	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
98	}
99
100	/ Create the edge lists. /
101	g = new_graph (last_basic_block_for_fn (cfun) + num);
102
103	FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
104	EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
105	FOR_EACH_EDGE (e, ei, act->succs)
106	{
107	/ Ignore edges to exit. /
108	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
109	continue;
110
111	src = BB_REPR (act);
112	dest = BB_REPR (e->dest);
113
114	/ Ignore latch edges. /
115	if (e->dest->loop_father->header == e->dest
116	&& dominated_by_p (CDI_DOMINATORS, act, e->dest))
117	continue;
118
119	/ Edges inside a single loop should be left where they are. Edges*
120	to subloop headers should lead to representative of the subloop,
121	but from the same place.
122
123	Edges exiting loops should lead from representative
124	of the son of nearest common ancestor of the loops in that
125	act lays. /*
126
127	if (e->dest->loop_father->header == e->dest)
128	dest = LOOP_REPR (e->dest->loop_father);
129
130	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
131	{
132	depth = `1` + loop_depth (loop: find_common_loop (act->loop_father,
133	e->dest->loop_father));
134	if (depth == loop_depth (loop: act->loop_father))
135	cloop = act->loop_father;
136	else
137	cloop = (*act->loop_father->superloops)[depth];
138
139	src = LOOP_REPR (cloop);
140	}
141
142	add_edge (g, src, dest)->data = e;
143	}
144
145	/ Find the strongly connected components. /
146	graphds_scc (g, NULL);
147
148	/ Mark the irreducible loops. /
149	for (i = `0`; i < g->n_vertices; i++)
150	for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
151	{
152	edge real = (edge) ge->data;
153	/ edge E in graph G is irreducible if it connects two vertices in the*
154	same scc. /*
155
156	/ All edges should lead from a component with higher number to the*
157	one with lower one. /*
158	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
159
160	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
161	continue;
162
163	real->flags \|= EDGE_IRREDUCIBLE_LOOP;
164	irred_loop_found = true;
165	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
166	real->src->flags \|= BB_IRREDUCIBLE_LOOP;
167	}
168
169	free_graph (g);
170
171	loops_state_set (flags: LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
172	return irred_loop_found;
173	}
174
175	/ Counts number of insns inside LOOP. /
176	int
177	num_loop_insns (const class loop *loop)
178	{
179	basic_block *bbs, bb;
180	unsigned i, ninsns = `0`;
181	rtx_insn *insn;
182
183	bbs = get_loop_body (loop);
184	for (i = `0`; i < loop->num_nodes; i++)
185	{
186	bb = bbs[i];
187	FOR_BB_INSNS (bb, insn)
188	if (NONDEBUG_INSN_P (insn))
189	ninsns++;
190	}
191	free (ptr: bbs);
192
193	if (!ninsns)
194	ninsns = `1`; / To avoid division by zero. /
195
196	return ninsns;
197	}
198
199	/ Counts number of insns executed on average per iteration LOOP. /
200	int
201	average_num_loop_insns (const class loop *loop)
202	{
203	basic_block *bbs, bb;
204	unsigned i, binsns;
205	sreal ninsns;
206	rtx_insn *insn;
207
208	ninsns = `0`;
209	bbs = get_loop_body (loop);
210	for (i = `0`; i < loop->num_nodes; i++)
211	{
212	bb = bbs[i];
213
214	binsns = `0`;
215	FOR_BB_INSNS (bb, insn)
216	if (NONDEBUG_INSN_P (insn))
217	binsns++;
218
219	ninsns += (sreal)binsns * bb->count.to_sreal_scale (in: loop->header->count);
220	/ Avoid overflows. /
221	if (ninsns > `1000000`)
222	{
223	free (ptr: bbs);
224	return `1000000`;
225	}
226	}
227	free (ptr: bbs);
228
229	int64_t ret = ninsns.to_int ();
230	if (!ret)
231	ret = `1`; / To avoid division by zero. /
232
233	return ret;
234	}
235
236	/ Compute how many times loop is entered. /
237
238	profile_count
239	loop_count_in (const class loop *loop)
240	{
241	/ Compute number of invocations of the loop. /
242	profile_count count_in = profile_count::zero ();
243	edge e;
244	edge_iterator ei;
245	bool found_latch = false;
246
247	if (loops_state_satisfies_p (flags: LOOPS_MAY_HAVE_MULTIPLE_LATCHES))
248	FOR_EACH_EDGE (e, ei, loop->header->preds)
249	if (!flow_bb_inside_loop_p (loop, e->src))
250	count_in += e->count ();
251	else
252	found_latch = true;
253	else
254	FOR_EACH_EDGE (e, ei, loop->header->preds)
255	if (e->src != loop->latch)
256	count_in += e->count ();
257	else
258	found_latch = true;
259	gcc_checking_assert (found_latch);
260	return count_in;
261	}
262
263	/ Return true if BB profile can be used to determine the expected number of*
264	iterations (that is number of executions of latch edge(s) for each
265	entry of the loop. If this is the case initialize RET with the number
266	of iterations.
267
268	RELIABLE is set if profile indiates that the returned value should be
269	realistic estimate. (This is the case if we read profile and did not
270	messed it up yet and not the case of guessed profiles.)
271
272	This function uses only CFG profile. We track more reliable info in
273	loop_info structure and for loop optimization heuristics more relevant
274	is get_estimated_loop_iterations API. /*
275
276	bool
277	expected_loop_iterations_by_profile (const class loop loop, sreal ret,
278	bool *reliable)
279	{
280	profile_count header_count = loop->header->count;
281	if (reliable)
282	reliable = false*;
283
284	/ TODO: For single exit loops we can use loop exit edge probability.*
285	It also may be reliable while loop itself was adjusted. /*
286	if (!header_count.initialized_p ()
287	\|\| !header_count.nonzero_p ())
288	return false;
289
290	profile_count count_in = loop_count_in (loop);
291
292	bool known;
293	/ Number of iterations is number of executions of latch edge. /
294	*ret = (header_count - count_in).to_sreal_scale (in: count_in, known: &known);
295	if (!known)
296	return false;
297	if (reliable)
298	{
299	/ Header should have at least count_in many executions.*
300	Give up on clearly inconsistent profile. /*
301	if (header_count < count_in && header_count.differs_from_p (other: count_in))
302	{
303	if (dump_file && (dump_flags & TDF_DETAILS))
304	fprintf (stream: dump_file, format: "Inconsistent bb profile of loop %i\n",
305	loop->num);
306	reliable = false*;
307	}
308	else
309	*reliable = count_in.reliable_p () && header_count.reliable_p ();
310	}
311	return true;
312	}
313
314	/ Return true if loop CFG profile may be unrealistically flat.*
315	This is a common case, since average loops iterate only about 5 times.
316	In the case we do not have profile feedback or do not know real number of
317	iterations during profile estimation, we are likely going to predict it with
318	similar low iteration count. For static loop profiles we also artificially
319	cap profile of loops with known large iteration count so they do not appear
320	significantly more hot than other loops with unknown iteration counts.
321
322	For loop optimization heuristics we ignore CFG profile and instead
323	use get_estimated_loop_iterations API which returns estimate
324	only when it is realistic. For unknown counts some optimizations,
325	like vectorizer or unroller make guess that iteration count will
326	be large. In this case we need to avoid scaling down the profile
327	after the loop transform. /*
328
329	bool
330	maybe_flat_loop_profile (const class loop *loop)
331	{
332	bool reliable;
333	sreal ret;
334
335	if (!expected_loop_iterations_by_profile (loop, ret: &ret, reliable: &reliable))
336	return true;
337
338	/ Reliable CFG estimates ought never be flat. Sanity check with*
339	nb_iterations_estimate. If those differ, it is a but in profile
340	updating code /*
341	if (reliable)
342	{
343	int64_t intret = ret.to_nearest_int ();
344	if (loop->any_estimate
345	&& (wi::ltu_p (x: intret * `2`, y: loop->nb_iterations_estimate)
346	\|\| wi::gtu_p (x: intret, y: loop->nb_iterations_estimate * `2`)))
347	{
348	if (dump_file && (dump_flags & TDF_DETAILS))
349	fprintf (stream: dump_file,
350	format: "Loop %i has inconsistent iterations estimates: "
351	"reliable CFG based iteration estimate is %f "
352	"while nb_iterations_estimate is %i\n",
353	loop->num,
354	ret.to_double (),
355	(int)loop->nb_iterations_estimate.to_shwi ());
356	return true;
357	}
358	return false;
359	}
360
361	/ Allow some margin of error and see if we are close to known bounds.*
362	sreal (9,-3) is 9/8 /*
363	int64_t intret = (ret * sreal (`9`, -`3`)).to_nearest_int ();
364	if (loop->any_upper_bound && wi::geu_p (x: intret, y: loop->nb_iterations_upper_bound))
365	return false;
366	if (loop->any_likely_upper_bound
367	&& wi::geu_p (x: intret, y: loop->nb_iterations_likely_upper_bound))
368	return false;
369	if (loop->any_estimate
370	&& wi::geu_p (x: intret, y: loop->nb_iterations_estimate))
371	return false;
372	return true;
373	}
374
375	/ Returns expected number of iterations of LOOP, according to*
376	measured or guessed profile.
377
378	This functions attempts to return "sane" value even if profile
379	information is not good enough to derive osmething. /*
380
381	gcov_type
382	expected_loop_iterations_unbounded (const class loop *loop,
383	bool *read_profile_p)
384	{
385	gcov_type expected = -`1`;
386
387	if (read_profile_p)
388	read_profile_p = false*;
389
390	sreal sreal_expected;
391	if (expected_loop_iterations_by_profile
392	(loop, ret: &sreal_expected, reliable: read_profile_p))
393	expected = sreal_expected.to_nearest_int ();
394	else
395	expected = param_avg_loop_niter;
396
397	HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
398	if (max != -`1` && max < expected)
399	return max;
400
401	return expected;
402	}
403
404	/ Returns expected number of LOOP iterations. The returned value is bounded*
405	by REG_BR_PROB_BASE. /*
406
407	unsigned
408	expected_loop_iterations (class loop *loop)
409	{
410	gcov_type expected = expected_loop_iterations_unbounded (loop);
411	return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
412	}
413
414	/ Returns the maximum level of nesting of subloops of LOOP. /
415
416	unsigned
417	get_loop_level (const class loop *loop)
418	{
419	const class loop *ploop;
420	unsigned mx = `0`, l;
421
422	for (ploop = loop->inner; ploop; ploop = ploop->next)
423	{
424	l = get_loop_level (loop: ploop);
425	if (l >= mx)
426	mx = l + `1`;
427	}
428	return mx;
429	}
430
431	/ Initialize the constants for computing set costs. /
432
433	void
434	init_set_costs (void)
435	{
436	int speed;
437	rtx_insn *seq;
438	rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + `1`);
439	rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + `2`);
440	rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + `3`);
441	rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
442	unsigned i;
443
444	target_avail_regs = `0`;
445	target_clobbered_regs = `0`;
446	for (i = `0`; i < FIRST_PSEUDO_REGISTER; i++)
447	if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], bit: i)
448	&& !fixed_regs[i])
449	{
450	target_avail_regs++;
451	/ ??? This is only a rough heuristic. It doesn't cope well*
452	with alternative ABIs, but that's an optimization rather than
453	correctness issue. /*
454	if (default_function_abi.clobbers_full_reg_p (regno: i))
455	target_clobbered_regs++;
456	}
457
458	target_res_regs = `3`;
459
460	for (speed = `0`; speed < `2`; speed++)
461	{
462	crtl->maybe_hot_insn_p = speed;
463	/ Set up the costs for using extra registers:*
464
465	1) If not many free registers remain, we should prefer having an
466	additional move to decreasing the number of available registers.
467	(TARGET_REG_COST).
468	2) If no registers are available, we need to spill, which may require
469	storing the old value to memory and loading it back
470	(TARGET_SPILL_COST). /*
471
472	start_sequence ();
473	emit_move_insn (reg1, reg2);
474	seq = get_insns ();
475	end_sequence ();
476	target_reg_cost [speed] = seq_cost (seq, speed);
477
478	start_sequence ();
479	emit_move_insn (mem, reg1);
480	emit_move_insn (reg2, mem);
481	seq = get_insns ();
482	end_sequence ();
483	target_spill_cost [speed] = seq_cost (seq, speed);
484	}
485	default_rtl_profile ();
486	}
487
488	/ Estimates cost of increased register pressure caused by making N_NEW new*
489	registers live around the loop. N_OLD is the number of registers live
490	around the loop. If CALL_P is true, also take into account that
491	call-used registers may be clobbered in the loop body, reducing the
492	number of available registers before we spill. /*
493
494	unsigned
495	estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
496	bool call_p)
497	{
498	unsigned cost;
499	unsigned regs_needed = n_new + n_old;
500	unsigned available_regs = target_avail_regs;
501
502	/ If there is a call in the loop body, the call-clobbered registers*
503	are not available for loop invariants. /*
504	if (call_p)
505	available_regs = available_regs - target_clobbered_regs;
506
507	/ If we have enough registers, we should use them and not restrict*
508	the transformations unnecessarily. /*
509	if (regs_needed + target_res_regs <= available_regs)
510	return `0`;
511
512	if (regs_needed <= available_regs)
513	/ If we are close to running out of registers, try to preserve*
514	them. /*
515	cost = target_reg_cost [speed] * n_new;
516	else
517	/ If we run out of registers, it is very expensive to add another*
518	one. /*
519	cost = target_spill_cost [speed] * n_new;
520
521	if (optimize && (flag_ira_region == IRA_REGION_ALL
522	\|\| flag_ira_region == IRA_REGION_MIXED)
523	&& number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
524	/ IRA regional allocation deals with high register pressure*
525	better. So decrease the cost (to do more accurate the cost
526	calculation for IRA, we need to know how many registers lives
527	through the loop transparently). /*
528	cost /= `2`;
529
530	return cost;
531	}
532
533	/ Sets EDGE_LOOP_EXIT flag for all loop exits. /
534
535	void
536	mark_loop_exit_edges (void)
537	{
538	basic_block bb;
539	edge e;
540
541	if (number_of_loops (cfun) <= `1`)
542	return;
543
544	FOR_EACH_BB_FN (bb, cfun)
545	{
546	edge_iterator ei;
547
548	FOR_EACH_EDGE (e, ei, bb->succs)
549	{
550	if (loop_outer (loop: bb->loop_father)
551	&& loop_exit_edge_p (bb->loop_father, e))
552	e->flags \|= EDGE_LOOP_EXIT;
553	else
554	e->flags &= ~EDGE_LOOP_EXIT;
555	}
556	}
557	}
558
559	/ Return exit edge if loop has only one exit that is likely*
560	to be executed on runtime (i.e. it is not EH or leading
561	to noreturn call. /*
562
563	edge
564	single_likely_exit (class loop loop, const* vec<edge> &exits)
565	{
566	edge found = single_exit (loop);
567	unsigned i;
568	edge ex;
569
570	if (found)
571	return found;
572	FOR_EACH_VEC_ELT (exits, i, ex)
573	{
574	if (probably_never_executed_edge_p (cfun, ex)
575	/ We want to rule out paths to noreturns but not low probabilities*
576	resulting from adjustments or combining.
577	FIXME: once we have better quality tracking, make this more
578	robust. /*
579	\|\| ex->probability <= profile_probability::very_unlikely ())
580	continue;
581	if (!found)
582	found = ex;
583	else
584	return NULL;
585	}
586	return found;
587	}
588
589
590	/ Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs*
591	order against direction of edges from latch. Specially, if
592	header != latch, latch is the 1-st block. /*
593
594	auto_vec<basic_block>
595	get_loop_hot_path (const class loop *loop)
596	{
597	basic_block bb = loop->header;
598	auto_vec<basic_block> path;
599	bitmap visited = BITMAP_ALLOC (NULL);
600
601	while (true)
602	{
603	edge_iterator ei;
604	edge e;
605	edge best = NULL;
606
607	path.safe_push (obj: bb);
608	bitmap_set_bit (visited, bb->index);
609	FOR_EACH_EDGE (e, ei, bb->succs)
610	if ((!best \|\| e->probability > best->probability)
611	&& !loop_exit_edge_p (loop, e)
612	&& !bitmap_bit_p (visited, e->dest->index))
613	best = e;
614	if (!best \|\| best->dest == loop->header)
615	break;
616	bb = best->dest;
617	}
618	BITMAP_FREE (visited);
619	return path;
620	}
621

source code of gcc/cfgloopanal.cc