loop-doloop.cc source code [gcc/loop-doloop.cc]

1	/ Perform doloop optimizations*
2	Copyright (C) 2004-2025 Free Software Foundation, Inc.
3	Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
4
5	This file is part of GCC.
6
7	GCC is free software; you can redistribute it and/or modify it under
8	the terms of the GNU General Public License as published by the Free
9	Software Foundation; either version 3, or (at your option) any later
10	version.
11
12	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13	WARRANTY; without even the implied warranty of MERCHANTABILITY or
14	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15	for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with GCC; see the file COPYING3. If not see
19	<http://www.gnu.org/licenses/>. /*
20
21	#include "config.h"
22	#include "system.h"
23	#include "coretypes.h"
24	#include "backend.h"
25	#include "target.h"
26	#include "rtl.h"
27	#include "tree.h"
28	#include "cfghooks.h"
29	#include "memmodel.h"
30	#include "emit-rtl.h"
31	#include "dojump.h"
32	#include "expr.h"
33	#include "cfgloop.h"
34	#include "cfgrtl.h"
35	#include "dumpfile.h"
36	#include "loop-unroll.h"
37	#include "regs.h"
38	#include "df.h"
39	#include "targhooks.h"
40
41	/ This module is used to modify loops with a determinable number of*
42	iterations to use special low-overhead looping instructions.
43
44	It first validates whether the loop is well behaved and has a
45	determinable number of iterations (either at compile or run-time).
46	It then modifies the loop to use a low-overhead looping pattern as
47	follows:
48
49	1. A pseudo register is allocated as the loop iteration counter.
50
51	2. The number of loop iterations is calculated and is stored
52	in the loop counter.
53
54	3. At the end of the loop, the jump insn is replaced by the
55	doloop_end pattern. The compare must remain because it might be
56	used elsewhere. If the loop-variable or condition register are
57	used elsewhere, they will be eliminated by flow.
58
59	4. An optional doloop_begin pattern is inserted at the top of the
60	loop.
61
62	TODO The optimization should only performed when either the biv used for exit
63	condition is unused at all except for the exit test, or if we do not have to
64	change its value, since otherwise we have to add a new induction variable,
65	which usually will not pay up (unless the cost of the doloop pattern is
66	somehow extremely lower than the cost of compare & jump, or unless the bct
67	register cannot be used for anything else but doloop -- ??? detect these
68	cases). /*
69
70	/ Return the loop termination condition for PATTERN or zero*
71	if it is not a decrement and branch jump insn. /*
72
73	rtx
74	doloop_condition_get (rtx_insn *doloop_pat)
75	{
76	rtx cmp;
77	rtx inc;
78	rtx reg;
79	rtx inc_src;
80	rtx condition;
81	rtx pattern;
82	rtx cc_reg = NULL_RTX;
83	rtx reg_orig = NULL_RTX;
84
85	/ The canonical doloop pattern we expect has one of the following*
86	forms:
87
88	1) (parallel [(set (pc) (if_then_else (condition)
89	(label_ref (label))
90	(pc)))
91	(set (reg) (plus (reg) (const_int -1)))
92	(additional clobbers and uses)])
93
94	The branch must be the first entry of the parallel (also required
95	by jump.cc), and the second entry of the parallel must be a set of
96	the loop counter register. Some targets (IA-64) wrap the set of
97	the loop counter in an if_then_else too.
98
99	2) (set (reg) (plus (reg) (const_int -1))
100	(set (pc) (if_then_else (reg != 0)
101	(label_ref (label))
102	(pc))).
103
104	3) Some targets (Arm) do the comparison before the branch, as in the
105	following form:
106
107	(parallel [(set (cc) (compare (plus (reg) (const_int -1)) 0))
108	(set (reg) (plus (reg) (const_int -1)))])
109	(set (pc) (if_then_else (cc == NE)
110	(label_ref (label))
111	(pc)))
112
113	4) This form supports a construct that is used to represent a vectorized
114	do loop with predication, however we do not need to care about the
115	details of the predication here.
116	Arm uses this construct to support MVE tail predication.
117
118	(parallel
119	[(set (pc)
120	(if_then_else (gtu (plus (reg) (const_int -n))
121	(const_int n-1))
122	(label_ref)
123	(pc)))
124	(set (reg) (plus (reg) (const_int -n)))
125	(additional clobbers and uses)])
126	*/
127	pattern = PATTERN (insn: doloop_pat);
128
129	if (GET_CODE (pattern) != PARALLEL)
130	{
131	rtx cond;
132	rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
133	rtx cmp_arg1, cmp_arg2;
134	rtx cmp_orig;
135
136	/ In case the pattern is not PARALLEL we expect two forms*
137	of doloop which are cases 2) and 3) above: in case 2) the
138	decrement immediately precedes the branch, while in case 3)
139	the compare and decrement instructions immediately precede
140	the branch. /*
141
142	if (prev_insn == NULL_RTX \|\| !INSN_P (prev_insn))
143	return `0`;
144
145	cmp = pattern;
146	if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
147	{
148	/ The third case: the compare and decrement instructions*
149	immediately precede the branch. /*
150	cmp_orig = XVECEXP (PATTERN (prev_insn), `0`, `0`);
151	if (GET_CODE (cmp_orig) != SET)
152	return `0`;
153	if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
154	return `0`;
155	cmp_arg1 = XEXP (SET_SRC (cmp_orig), `0`);
156	cmp_arg2 = XEXP (SET_SRC (cmp_orig), `1`);
157	if (cmp_arg2 != const0_rtx
158	\|\| GET_CODE (cmp_arg1) != PLUS)
159	return `0`;
160	reg_orig = XEXP (cmp_arg1, `0`);
161	if (XEXP (cmp_arg1, `1`) != GEN_INT (-`1`)
162	\|\| !REG_P (reg_orig))
163	return `0`;
164	cc_reg = SET_DEST (cmp_orig);
165
166	inc = XVECEXP (PATTERN (prev_insn), `0`, `1`);
167	}
168	else
169	inc = PATTERN (insn: prev_insn);
170	if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE)
171	{
172	/ We expect the condition to be of the form (reg != 0) /
173	cond = XEXP (SET_SRC (cmp), `0`);
174	if (GET_CODE (cond) != NE \|\| XEXP (cond, `1`) != const0_rtx)
175	return `0`;
176	}
177	}
178	else
179	{
180	cmp = XVECEXP (pattern, `0`, `0`);
181	inc = XVECEXP (pattern, `0`, `1`);
182	}
183
184	/ Check for (set (reg) (something)). /
185	if (GET_CODE (inc) != SET)
186	return `0`;
187	reg = SET_DEST (inc);
188	if (! REG_P (reg))
189	return `0`;
190
191	/ Check if something = (plus (reg) (const_int -n)).*
192	On IA-64, this decrement is wrapped in an if_then_else. /*
193	inc_src = SET_SRC (inc);
194	if (GET_CODE (inc_src) == IF_THEN_ELSE)
195	inc_src = XEXP (inc_src, `1`);
196	if (GET_CODE (inc_src) != PLUS
197	\|\| !rtx_equal_p (XEXP (inc_src, `0`), reg)
198	\|\| !CONST_INT_P (XEXP (inc_src, `1`))
199	\|\| INTVAL (XEXP (inc_src, `1`)) >= `0`)
200	return `0`;
201	int dec_num = -INTVAL (XEXP (inc_src, `1`));
202
203	/ Check for (set (pc) (if_then_else (condition)*
204	(label_ref (label))
205	(pc))). /*
206	if (GET_CODE (cmp) != SET
207	\|\| SET_DEST (cmp) != pc_rtx
208	\|\| GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
209	\|\| GET_CODE (XEXP (SET_SRC (cmp), `1`)) != LABEL_REF
210	\|\| XEXP (SET_SRC (cmp), `2`) != pc_rtx)
211	return `0`;
212
213	/ Extract loop termination condition. /
214	condition = XEXP (SET_SRC (cmp), `0`);
215
216	/ We expect a GE or NE comparison with 0 or 1, or a GTU comparison with*
217	dec_num - 1. /*
218	if (!((GET_CODE (condition) == GE
219	\|\| GET_CODE (condition) == NE)
220	&& (XEXP (condition, `1`) == const0_rtx
221	\|\| XEXP (condition, `1`) == const1_rtx ))
222	&&!(GET_CODE (condition) == GTU
223	&& ((INTVAL (XEXP (condition, `1`))) == (dec_num - `1`))))
224	return `0`;
225
226	if (rtx_equal_p (XEXP (condition, `0`), reg)
227	/ For the third case: /
228	\|\| ((cc_reg != NULL_RTX)
229	&& (XEXP (condition, `0`) == cc_reg)
230	&& (rtx_equal_p (reg_orig, reg)))
231	\|\| (GET_CODE (XEXP (condition, `0`)) == PLUS
232	&& rtx_equal_p (XEXP (XEXP (condition, `0`), `0`), reg, NULL)))
233	{
234	if (GET_CODE (pattern) != PARALLEL)
235	/ For the second form we expect:*
236
237	(set (reg) (plus (reg) (const_int -1))
238	(set (pc) (if_then_else (reg != 0)
239	(label_ref (label))
240	(pc))).
241
242	is equivalent to the following:
243
244	(parallel [(set (pc) (if_then_else (reg != 1)
245	(label_ref (label))
246	(pc)))
247	(set (reg) (plus (reg) (const_int -1)))
248	(additional clobbers and uses)])
249
250	For the third form we expect:
251
252	(parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
253	(set (reg) (plus (reg) (const_int -1)))])
254	(set (pc) (if_then_else (cc == NE)
255	(label_ref (label))
256	(pc)))
257
258	which is equivalent to the following:
259
260	(parallel [(set (cc) (compare (reg, 1))
261	(set (reg) (plus (reg) (const_int -1)))
262	(set (pc) (if_then_else (NE == cc)
263	(label_ref (label))
264	(pc))))])
265
266	So we return the second form instead for the two cases.
267
268	*/
269	condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
270
271	return condition;
272	}
273
274	/ ??? If a machine uses a funny comparison, we could return a*
275	canonicalized form here. /*
276
277	return `0`;
278	}
279
280	/ Return nonzero if the loop specified by LOOP is suitable for*
281	the use of special low-overhead looping instructions. DESC
282	describes the number of iterations of the loop. /*
283
284	static bool
285	doloop_valid_p (class loop loop, class* niter_desc *desc)
286	{
287	basic_block *body = get_loop_body (loop), bb;
288	rtx_insn *insn;
289	unsigned i;
290	bool result = true;
291
292	/ Check for loops that may not terminate under special conditions. /
293	if (!desc->simple_p
294	\|\| desc->assumptions
295	\|\| desc->infinite)
296	{
297	/ There are some cases that would require a special attention.*
298	For example if the comparison is LEU and the comparison value
299	is UINT_MAX then the loop will not terminate. Similarly, if the
300	comparison code is GEU and the comparison value is 0, the
301	loop will not terminate.
302
303	If the absolute increment is not 1, the loop can be infinite
304	even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
305
306	??? We could compute these conditions at run-time and have a
307	additional jump around the loop to ensure an infinite loop.
308	However, it is very unlikely that this is the intended
309	behavior of the loop and checking for these rare boundary
310	conditions would pessimize all other code.
311
312	If the loop is executed only a few times an extra check to
313	restart the loop could use up most of the benefits of using a
314	count register loop. Note however, that normally, this
315	restart branch would never execute, so it could be predicted
316	well by the CPU. We should generate the pessimistic code by
317	default, and have an option, e.g. -funsafe-loops that would
318	enable count-register loops in this case. /*
319	if (dump_file)
320	fprintf (stream: dump_file, format: "Doloop: Possible infinite iteration case.\n");
321	result = false;
322	goto cleanup;
323	}
324
325	for (i = `0`; i < loop->num_nodes; i++)
326	{
327	bb = body[i];
328
329	for (insn = BB_HEAD (bb);
330	insn != NEXT_INSN (BB_END (bb));
331	insn = NEXT_INSN (insn))
332	{
333	/ Different targets have different necessities for low-overhead*
334	looping. Call the back end for each instruction within the loop
335	to let it decide whether the insn prohibits a low-overhead loop.
336	It will then return the cause for it to emit to the dump file. /*
337	const char * invalid = targetm.invalid_within_doloop (insn);
338	if (invalid)
339	{
340	if (dump_file)
341	fprintf (stream: dump_file, format: "Doloop: %s\n", invalid);
342	result = false;
343	goto cleanup;
344	}
345	}
346	}
347	result = true;
348
349	cleanup:
350	free (ptr: body);
351
352	return result;
353	}
354
355	/ Adds test of COND jumping to DEST on edge E and set E to the new fallthru*
356	edge. If the condition is always false, do not do anything. If it is always
357	true, redirect E to DEST and return false. In all other cases, true is
358	returned. /*
359
360	static bool
361	add_test (rtx cond, edge *e, basic_block dest)
362	{
363	rtx_insn seq, jump;
364	rtx_code_label *label;
365	machine_mode mode;
366	rtx op0 = XEXP (cond, `0`), op1 = XEXP (cond, `1`);
367	enum rtx_code code = GET_CODE (cond);
368	basic_block bb;
369	/ The jump is supposed to handle an unlikely special case. /
370	profile_probability prob = profile_probability::guessed_never ();
371
372	mode = GET_MODE (XEXP (cond, `0`));
373	if (mode == VOIDmode)
374	mode = GET_MODE (XEXP (cond, `1`));
375
376	start_sequence ();
377	op0 = force_operand (op0, NULL_RTX);
378	op1 = force_operand (op1, NULL_RTX);
379	label = block_label (dest);
380	do_compare_rtx_and_jump (op0, op1, code, `0`, mode, NULL_RTX, NULL, label,
381	prob);
382
383	jump = get_last_insn ();
384	if (!jump \|\| !JUMP_P (jump))
385	{
386	/ The condition is always false and the jump was optimized out. /
387	end_sequence ();
388	return true;
389	}
390
391	seq = get_insns ();
392	unshare_all_rtl_in_chain (seq);
393	end_sequence ();
394
395	/ There always is at least the jump insn in the sequence. /
396	gcc_assert (seq != NULL_RTX);
397
398	bb = split_edge_and_insert (*e, seq);
399	*e = single_succ_edge (bb);
400
401	if (any_uncondjump_p (jump) && onlyjump_p (jump))
402	{
403	/ The condition is always true. /
404	delete_insn (jump);
405	redirect_edge_and_branch_force (*e, dest);
406	return false;
407	}
408
409	JUMP_LABEL (jump) = label;
410
411	LABEL_NUSES (label)++;
412
413	edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
414	e2->probability = prob;
415	(*e)->probability = prob.invert ();
416	update_br_prob_note (e2->src);
417	return true;
418	}
419
420	/ Fold (add -1; zero_ext; add +1) operations to zero_ext if not wrapping. i.e:*
421
422	73: r145:SI=r123:DI#0-0x1
423	74: r144:DI=zero_extend (r145:SI)
424	75: r143:DI=r144:DI+0x1
425	...
426	31: r135:CC=cmp (r123:DI,0)
427	72: {pc={(r143:DI!=0x1)?L70:pc};r143:DI=r143:DI-0x1;...}
428
429	r123:DI#0-0x1 is param count derived from loop->niter_expr equal to number of
430	loop iterations, if loop iterations expression doesn't overflow, then
431	(zero_extend (r123:DI#0-1))+1 can be simplified to zero_extend. /*
432
433	static rtx
434	doloop_simplify_count (class loop *loop, scalar_int_mode mode, rtx count)
435	{
436	widest_int iterations;
437	if (GET_CODE (count) == ZERO_EXTEND)
438	{
439	rtx extop0 = XEXP (count, `0`);
440	if (GET_CODE (extop0) == PLUS)
441	{
442	rtx addop0 = XEXP (extop0, `0`);
443	rtx addop1 = XEXP (extop0, `1`);
444
445	if (get_max_loop_iterations (loop, nit: &iterations)
446	&& wi::ltu_p (x: iterations, GET_MODE_MASK (GET_MODE (addop0)))
447	&& addop1 == constm1_rtx)
448	return simplify_gen_unary (code: ZERO_EXTEND, mode, op: addop0,
449	GET_MODE (addop0));
450	}
451	}
452
453	return simplify_gen_binary (code: PLUS, mode, op0: count, const1_rtx);
454	}
455
456	/ Modify the loop to use the low-overhead looping insn where LOOP*
457	describes the loop, DESC describes the number of iterations of the
458	loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
459	end of the loop. CONDITION is the condition separated from the
460	DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. /*
461
462	static void
463	doloop_modify (class loop loop, class* niter_desc *desc,
464	rtx_insn *doloop_seq, rtx condition, rtx count)
465	{
466	rtx counter_reg;
467	rtx tmp, noloop = NULL_RTX;
468	rtx_insn *sequence;
469	rtx_insn *jump_insn;
470	rtx_code_label *jump_label;
471	int nonneg = `0`;
472	bool increment_count;
473	basic_block loop_end = desc->out_edge->src;
474	scalar_int_mode mode;
475	widest_int iterations;
476
477	jump_insn = BB_END (loop_end);
478
479	if (dump_file)
480	{
481	fprintf (stream: dump_file, format: "Doloop: Inserting doloop pattern (");
482	if (desc->const_iter)
483	fprintf (stream: dump_file, format: "%" PRId64, desc->niter);
484	else
485	fputs (s: "runtime", stream: dump_file);
486	fputs (s: " iterations).\n", stream: dump_file);
487	}
488
489	/ Discard original jump to continue loop. The original compare*
490	result may still be live, so it cannot be discarded explicitly. /*
491	delete_insn (jump_insn);
492
493	counter_reg = XEXP (condition, `0`);
494	if (GET_CODE (counter_reg) == PLUS)
495	counter_reg = XEXP (counter_reg, `0`);
496	/ These patterns must operate on integer counters. /
497	mode = as_a <scalar_int_mode> (GET_MODE (counter_reg));
498
499	increment_count = false;
500	switch (GET_CODE (condition))
501	{
502	case NE:
503	/ Currently only NE tests against zero and one are supported. /
504	noloop = XEXP (condition, `1`);
505	if (noloop != const0_rtx)
506	{
507	gcc_assert (noloop == const1_rtx);
508	increment_count = true;
509	}
510	break;
511
512	case GE:
513	/ Currently only GE tests against zero are supported. /
514	gcc_assert (XEXP (condition, `1`) == const0_rtx);
515
516	noloop = constm1_rtx;
517
518	/ The iteration count does not need incrementing for a GE test. /
519	increment_count = false;
520
521	/ Determine if the iteration counter will be non-negative.*
522	Note that the maximum value loaded is iterations_max - 1. /*
523	if (get_max_loop_iterations (loop, nit: &iterations)
524	&& wi::leu_p (x: iterations,
525	y: wi::set_bit_in_zero <widest_int>
526	(GET_MODE_PRECISION (mode) - `1`)))
527	nonneg = `1`;
528	break;
529
530	case GTU:
531	/ The iteration count does not need incrementing for a GTU test. /
532	increment_count = false;
533	break;
534
535	/ Abort if an invalid doloop pattern has been generated. /
536	default:
537	gcc_unreachable ();
538	}
539
540	if (increment_count)
541	count = doloop_simplify_count (loop, mode, count);
542
543	/ Insert initialization of the count register into the loop header. /
544	start_sequence ();
545	/ count has been already copied through copy_rtx. /
546	reset_used_flags (count);
547	set_used_flags (condition);
548	tmp = force_operand (count, counter_reg);
549	convert_move (counter_reg, tmp, `1`);
550	sequence = get_insns ();
551	unshare_all_rtl_in_chain (sequence);
552	end_sequence ();
553	emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
554
555	if (desc->noloop_assumptions)
556	{
557	/ The GTU case has only been implemented for Arm, where*
558	noloop_assumptions gets explicitly set to NULL for that case, so
559	assert here for safety. /*
560	gcc_assert (GET_CODE (condition) != GTU);
561	rtx ass = copy_rtx (desc->noloop_assumptions);
562	basic_block preheader = loop_preheader_edge (loop)->src;
563	basic_block set_zero = split_edge (loop_preheader_edge (loop));
564	basic_block new_preheader = split_edge (loop_preheader_edge (loop));
565	edge te;
566
567	/ Expand the condition testing the assumptions and if it does not pass,*
568	reset the count register to 0. /*
569	redirect_edge_and_branch_force (single_succ_edge (bb: preheader), new_preheader);
570	set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
571
572	set_zero->count = profile_count::uninitialized ();
573
574	te = single_succ_edge (bb: preheader);
575	for (; ass; ass = XEXP (ass, `1`))
576	if (!add_test (XEXP (ass, `0`), e: &te, dest: set_zero))
577	break;
578
579	if (ass)
580	{
581	/ We reached a condition that is always true. This is very hard to*
582	reproduce (such a loop does not roll, and thus it would most
583	likely get optimized out by some of the preceding optimizations).
584	In fact, I do not have any testcase for it. However, it would
585	also be very hard to show that it is impossible, so we must
586	handle this case. /*
587	set_zero->count = preheader->count;
588	}
589
590	if (EDGE_COUNT (set_zero->preds) == `0`)
591	{
592	/ All the conditions were simplified to false, remove the*
593	unreachable set_zero block. /*
594	delete_basic_block (set_zero);
595	}
596	else
597	{
598	/ Reset the counter to zero in the set_zero block. /
599	start_sequence ();
600	convert_move (counter_reg, noloop, `0`);
601	sequence = end_sequence ();
602	emit_insn_after (sequence, BB_END (set_zero));
603
604	set_immediate_dominator (CDI_DOMINATORS, set_zero,
605	recompute_dominator (CDI_DOMINATORS,
606	set_zero));
607	}
608
609	set_immediate_dominator (CDI_DOMINATORS, new_preheader,
610	recompute_dominator (CDI_DOMINATORS,
611	new_preheader));
612	}
613
614	/ Some targets (eg, C4x) need to initialize special looping*
615	registers. /*
616	if (targetm.have_doloop_begin ())
617	if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq))
618	emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src));
619
620	/ Insert the new low-overhead looping insn. /
621	emit_jump_insn_after (doloop_seq, BB_END (loop_end));
622	jump_insn = BB_END (loop_end);
623	jump_label = block_label (desc->in_edge->dest);
624	JUMP_LABEL (jump_insn) = jump_label;
625	LABEL_NUSES (jump_label)++;
626
627	/ Ensure the right fallthru edge is marked, for case we have reversed*
628	the condition. /*
629	desc->in_edge->flags &= ~EDGE_FALLTHRU;
630	desc->out_edge->flags \|= EDGE_FALLTHRU;
631
632	/ Add a REG_NONNEG note if the actual or estimated maximum number*
633	of iterations is non-negative. /*
634	if (nonneg)
635	add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
636
637	/ Update the REG_BR_PROB note. /
638	if (desc->in_edge->probability.initialized_p ())
639	add_reg_br_prob_note (jump_insn, desc->in_edge->probability);
640	}
641
642	/ Called through note_stores. /
643
644	static void
645	record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
646	{
647	bitmap mod = (bitmap)data;
648	if (REG_P (x))
649	{
650	unsigned int regno = REGNO (x);
651	if (HARD_REGISTER_P (x))
652	{
653	unsigned int end_regno = end_hard_regno (GET_MODE (x), regno);
654	do
655	bitmap_set_bit (mod, regno);
656	while (++regno < end_regno);
657	}
658	else
659	bitmap_set_bit (mod, regno);
660	}
661	}
662
663	/ Process loop described by LOOP validating that the loop is suitable for*
664	conversion to use a low overhead looping instruction, replacing the jump
665	insn where suitable. Returns true if the loop was successfully
666	modified. /*
667
668	static bool
669	doloop_optimize (class loop *loop)
670	{
671	scalar_int_mode mode;
672	rtx doloop_reg;
673	rtx count = NULL_RTX;
674	widest_int iterations, iterations_max;
675	rtx_code_label *start_label;
676	rtx condition;
677	unsigned level;
678	HOST_WIDE_INT est_niter;
679	int max_cost;
680	class niter_desc *desc;
681	unsigned word_mode_size;
682	unsigned HOST_WIDE_INT word_mode_max;
683	int entered_at_top;
684
685	if (dump_file)
686	fprintf (stream: dump_file, format: "Doloop: Processing loop %d.\n", loop->num);
687
688	iv_analysis_loop_init (loop);
689
690	/ Find the simple exit of a LOOP. /
691	desc = get_simple_loop_desc (loop);
692
693	/ Check that loop is a candidate for a low-overhead looping insn. /
694	if (!doloop_valid_p (loop, desc))
695	{
696	if (dump_file)
697	fprintf (stream: dump_file,
698	format: "Doloop: The loop is not suitable.\n");
699	return false;
700	}
701	mode = desc->mode;
702
703	est_niter = get_estimated_loop_iterations_int (loop);
704	if (est_niter == -`1`)
705	est_niter = get_likely_max_loop_iterations_int (loop);
706
707	if (est_niter >= `0` && est_niter < `3`)
708	{
709	if (dump_file)
710	fprintf (stream: dump_file,
711	format: "Doloop: Too few iterations (%u) to be profitable.\n",
712	(unsigned int)est_niter);
713	return false;
714	}
715
716	if (desc->const_iter)
717	iterations = widest_int::from (x: rtx_mode_t (desc->niter_expr, mode),
718	sgn: UNSIGNED);
719	else
720	iterations = `0`;
721	if (!get_max_loop_iterations (loop, nit: &iterations_max))
722	iterations_max = `0`;
723	level = get_loop_level (loop) + `1`;
724	entered_at_top = (loop->latch == desc->in_edge->dest
725	&& contains_no_active_insn_p (loop->latch));
726	if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
727	entered_at_top))
728	{
729	if (dump_file)
730	fprintf (stream: dump_file, format: "Loop rejected by can_use_doloop_p.\n");
731	return false;
732	}
733
734	/ Generate looping insn. If the pattern FAILs then give up trying*
735	to modify the loop since there is some aspect the back-end does
736	not like. If this succeeds, there is a chance that the loop
737	desc->niter_expr has been altered by the backend, so only extract
738	that data after the gen_doloop_end. /*
739	start_label = block_label (desc->in_edge->dest);
740	doloop_reg = gen_reg_rtx (mode);
741	rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
742
743	max_cost
744	= COSTS_N_INSNS (param_max_iterations_computation_cost);
745	if (set_src_cost (x: desc->niter_expr, mode, speed_p: optimize_loop_for_speed_p (loop))
746	> max_cost)
747	{
748	if (dump_file)
749	fprintf (stream: dump_file,
750	format: "Doloop: number of iterations too costly to compute.\n");
751	return false;
752	}
753
754	count = copy_rtx (desc->niter_expr);
755	word_mode_size = GET_MODE_PRECISION (mode: word_mode);
756	word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - `1`) << `1`) - `1`;
757	if (! doloop_seq
758	&& mode != word_mode
759	/ Before trying mode different from the one in that # of iterations is*
760	computed, we must be sure that the number of iterations fits into
761	the new mode. /*
762	&& (word_mode_size >= GET_MODE_PRECISION (mode)
763	\|\| wi::leu_p (x: iterations_max, y: word_mode_max)))
764	{
765	if (word_mode_size > GET_MODE_PRECISION (mode))
766	count = simplify_gen_unary (code: ZERO_EXTEND, mode: word_mode, op: count, op_mode: mode);
767	else
768	count = lowpart_subreg (outermode: word_mode, op: count, innermode: mode);
769	PUT_MODE (x: doloop_reg, mode: word_mode);
770	doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
771	}
772	if (! doloop_seq)
773	{
774	if (dump_file)
775	fprintf (stream: dump_file,
776	format: "Doloop: Target unwilling to use doloop pattern!\n");
777	return false;
778	}
779
780	/ If multiple instructions were created, the last must be the*
781	jump instruction. /*
782	rtx_insn *doloop_insn = doloop_seq;
783	while (NEXT_INSN (insn: doloop_insn) != NULL_RTX)
784	doloop_insn = NEXT_INSN (insn: doloop_insn);
785	if (!JUMP_P (doloop_insn)
786	\|\| !(condition = doloop_condition_get (doloop_pat: doloop_insn)))
787	{
788	if (dump_file)
789	fprintf (stream: dump_file, format: "Doloop: Unrecognizable doloop pattern!\n");
790	return false;
791	}
792
793	/ Ensure that the new sequence doesn't clobber a register that*
794	is live at the end of the block. /*
795	{
796	bitmap modified = BITMAP_ALLOC (NULL);
797
798	for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (insn: i))
799	note_stores (i, record_reg_sets, modified);
800
801	basic_block loop_end = desc->out_edge->src;
802	bool fail = bitmap_intersect_p (df_get_live_out (bb: loop_end), modified);
803	/ iv_analysis_loop_init calls df_analyze_loop, which computes just*
804	partial df for blocks of the loop only. The above will catch if
805	any of the modified registers are use inside of the loop body, but
806	it will most likely not have accurate info on registers used
807	at the destination of the out_edge. We call df_analyze on the
808	whole function at the start of the pass though and iterate only
809	on innermost loops or from innermost loops, so
810	live in on desc->out_edge->dest should be still unmodified from
811	the initial df_analyze. /*
812	if (!fail)
813	fail = bitmap_intersect_p (df_get_live_in (bb: desc->out_edge->dest),
814	modified);
815	BITMAP_FREE (modified);
816
817	if (fail)
818	{
819	if (dump_file)
820	fprintf (stream: dump_file, format: "Doloop: doloop pattern clobbers live out\n");
821	return false;
822	}
823	}
824
825	doloop_modify (loop, desc, doloop_seq, condition, count);
826	return true;
827	}
828
829	/ This is the main entry point. Process all loops using doloop_optimize. /
830
831	void
832	doloop_optimize_loops (void)
833	{
834	if (optimize == `1`)
835	{
836	df_live_add_problem ();
837	df_live_set_all_dirty ();
838	}
839
840	df_analyze ();
841
842	for (auto loop : loops_list (cfun,
843	targetm.can_use_doloop_p
844	== can_use_doloop_if_innermost
845	? LI_ONLY_INNERMOST : LI_FROM_INNERMOST))
846	doloop_optimize (loop);
847
848	if (optimize == `1`)
849	df_remove_problem (df_live);
850
851	iv_analysis_done ();
852
853	checking_verify_loop_structure ();
854	}
855

source code of gcc/loop-doloop.cc