tree-ssa-loop-ivopts.cc source code [gcc/tree-ssa-loop-ivopts.cc]

1	/ Induction variable optimizations.*
2	Copyright (C) 2003-2023 Free Software Foundation, Inc.
3
4	This file is part of GCC.
5
6	GCC is free software; you can redistribute it and/or modify it
7	under the terms of the GNU General Public License as published by the
8	Free Software Foundation; either version 3, or (at your option) any
9	later version.
10
11	GCC is distributed in the hope that it will be useful, but WITHOUT
12	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GCC; see the file COPYING3. If not see
18	<http://www.gnu.org/licenses/>. /*
19
20	/ This pass tries to find the optimal set of induction variables for the loop.*
21	It optimizes just the basic linear induction variables (although adding
22	support for other types should not be too hard). It includes the
23	optimizations commonly known as strength reduction, induction variable
24	coalescing and induction variable elimination. It does it in the
25	following steps:
26
27	1) The interesting uses of induction variables are found. This includes
28
29	-- uses of induction variables in non-linear expressions
30	-- addresses of arrays
31	-- comparisons of induction variables
32
33	Note the interesting uses are categorized and handled in group.
34	Generally, address type uses are grouped together if their iv bases
35	are different in constant offset.
36
37	2) Candidates for the induction variables are found. This includes
38
39	-- old induction variables
40	-- the variables defined by expressions derived from the "interesting
41	groups/uses" above
42
43	3) The optimal (w.r. to a cost function) set of variables is chosen. The
44	cost function assigns a cost to sets of induction variables and consists
45	of three parts:
46
47	-- The group/use costs. Each of the interesting groups/uses chooses
48	the best induction variable in the set and adds its cost to the sum.
49	The cost reflects the time spent on modifying the induction variables
50	value to be usable for the given purpose (adding base and offset for
51	arrays, etc.).
52	-- The variable costs. Each of the variables has a cost assigned that
53	reflects the costs associated with incrementing the value of the
54	variable. The original variables are somewhat preferred.
55	-- The set cost. Depending on the size of the set, extra cost may be
56	added to reflect register pressure.
57
58	All the costs are defined in a machine-specific way, using the target
59	hooks and machine descriptions to determine them.
60
61	4) The trees are transformed to use the new variables, the dead code is
62	removed.
63
64	All of this is done loop by loop. Doing it globally is theoretically
65	possible, it might give a better performance and it might enable us
66	to decide costs more precisely, but getting all the interactions right
67	would be complicated.
68
69	For the targets supporting low-overhead loops, IVOPTs has to take care of
70	the loops which will probably be transformed in RTL doloop optimization,
71	to try to make selected IV candidate set optimal. The process of doloop
72	support includes:
73
74	1) Analyze the current loop will be transformed to doloop or not, find and
75	mark its compare type IV use as doloop use (iv_group field doloop_p), and
76	set flag doloop_use_p of ivopts_data to notify subsequent processings on
77	doloop. See analyze_and_mark_doloop_use and its callees for the details.
78	The target hook predict_doloop_p can be used for target specific checks.
79
80	2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81	set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82	like biv. For cost determination between doloop IV cand and IV use, the
83	target hooks doloop_cost_for_generic and doloop_cost_for_address are
84	provided to add on extra costs for generic type and address type IV use.
85	Zero cost is assigned to the pair between doloop IV cand and doloop IV
86	use, and bound zero is set for IV elimination.
87
88	3) With the cost setting in step 2), the current cost model based IV
89	selection algorithm will process as usual, pick up doloop dedicated IV if
90	profitable. /*
91
92	#include "config.h"
93	#include "system.h"
94	#include "coretypes.h"
95	#include "backend.h"
96	#include "rtl.h"
97	#include "tree.h"
98	#include "gimple.h"
99	#include "cfghooks.h"
100	#include "tree-pass.h"
101	#include "memmodel.h"
102	#include "tm_p.h"
103	#include "ssa.h"
104	#include "expmed.h"
105	#include "insn-config.h"
106	#include "emit-rtl.h"
107	#include "recog.h"
108	#include "cgraph.h"
109	#include "gimple-pretty-print.h"
110	#include "alias.h"
111	#include "fold-const.h"
112	#include "stor-layout.h"
113	#include "tree-eh.h"
114	#include "gimplify.h"
115	#include "gimple-iterator.h"
116	#include "gimplify-me.h"
117	#include "tree-cfg.h"
118	#include "tree-ssa-loop-ivopts.h"
119	#include "tree-ssa-loop-manip.h"
120	#include "tree-ssa-loop-niter.h"
121	#include "tree-ssa-loop.h"
122	#include "explow.h"
123	#include "expr.h"
124	#include "tree-dfa.h"
125	#include "tree-ssa.h"
126	#include "cfgloop.h"
127	#include "tree-scalar-evolution.h"
128	#include "tree-affine.h"
129	#include "tree-ssa-propagate.h"
130	#include "tree-ssa-address.h"
131	#include "builtins.h"
132	#include "tree-vectorizer.h"
133	#include "dbgcnt.h"
134	#include "cfganal.h"
135
136	/ For lang_hooks.types.type_for_mode. /
137	#include "langhooks.h"
138
139	/ FIXME: Expressions are expanded to RTL in this pass to determine the*
140	cost of different addressing modes. This should be moved to a TBD
141	interface between the GIMPLE and RTL worlds. /*
142
143	/ The infinite cost. /
144	#define INFTY 1000000000
145
146	/ Returns the expected number of loop iterations for LOOP.*
147	The average trip count is computed from profile data if it
148	exists. /*
149
150	static inline HOST_WIDE_INT
151	avg_loop_niter (class loop *loop)
152	{
153	HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154	if (niter == -`1`)
155	{
156	niter = likely_max_stmt_executions_int (loop);
157
158	if (niter == -`1` \|\| niter > param_avg_loop_niter)
159	return param_avg_loop_niter;
160	}
161
162	return niter;
163	}
164
165	struct iv_use;
166
167	/ Representation of the induction variable. /
168	struct iv
169	{
170	tree base; / Initial value of the iv. /
171	tree base_object; / A memory object to that the induction variable points. /
172	tree step; / Step of the iv (constant only). /
173	tree ssa_name; / The ssa name with the value. /
174	struct iv_use nonlin_use; /* The identifier in the use if it is the case. /
175	bool biv_p; / Is it a biv? /
176	bool no_overflow; / True if the iv doesn't overflow. /
177	bool have_address_use;/ For biv, indicate if it's used in any address*
178	type use. /*
179	};
180
181	/ Per-ssa version information (induction variable descriptions, etc.). /
182	struct version_info
183	{
184	tree name; / The ssa name. /
185	struct iv iv; /* Induction variable description. /
186	bool has_nonlin_use; / For a loop-level invariant, whether it is used in*
187	an expression that is not an induction variable. /*
188	bool preserve_biv; / For the original biv, whether to preserve it. /
189	unsigned inv_id; / Id of an invariant. /
190	};
191
192	/ Types of uses. /
193	enum use_type
194	{
195	USE_NONLINEAR_EXPR, / Use in a nonlinear expression. /
196	USE_REF_ADDRESS, / Use is an address for an explicit memory*
197	reference. /*
198	USE_PTR_ADDRESS, / Use is a pointer argument to a function in*
199	cases where the expansion of the function
200	will turn the argument into a normal address. /*
201	USE_COMPARE / Use is a compare. /
202	};
203
204	/ Cost of a computation. /
205	class comp_cost
206	{
207	public:
208	comp_cost (): cost (`0`), complexity (`0`), scratch (`0`)
209	{}
210
211	comp_cost (int64_t cost, unsigned complexity, int64_t scratch = `0`)
212	: cost (cost), complexity (complexity), scratch (scratch)
213	{}
214
215	/ Returns true if COST is infinite. /
216	bool infinite_cost_p ();
217
218	/ Adds costs COST1 and COST2. /
219	friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
220
221	/ Adds COST to the comp_cost. /
222	comp_cost operator+= (comp_cost cost);
223
224	/ Adds constant C to this comp_cost. /
225	comp_cost operator+= (HOST_WIDE_INT c);
226
227	/ Subtracts constant C to this comp_cost. /
228	comp_cost operator-= (HOST_WIDE_INT c);
229
230	/ Divide the comp_cost by constant C. /
231	comp_cost operator/= (HOST_WIDE_INT c);
232
233	/ Multiply the comp_cost by constant C. /
234	comp_cost operator*= (HOST_WIDE_INT c);
235
236	/ Subtracts costs COST1 and COST2. /
237	friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
238
239	/ Subtracts COST from this comp_cost. /
240	comp_cost operator-= (comp_cost cost);
241
242	/ Returns true if COST1 is smaller than COST2. /
243	friend bool operator< (comp_cost cost1, comp_cost cost2);
244
245	/ Returns true if COST1 and COST2 are equal. /
246	friend bool operator== (comp_cost cost1, comp_cost cost2);
247
248	/ Returns true if COST1 is smaller or equal than COST2. /
249	friend bool operator<= (comp_cost cost1, comp_cost cost2);
250
251	int64_t cost; / The runtime cost. /
252	unsigned complexity; / The estimate of the complexity of the code for*
253	the computation (in no concrete units --
254	complexity field should be larger for more
255	complex expressions and addressing modes). /*
256	int64_t scratch; / Scratch used during cost computation. /
257	};
258
259	static const comp_cost no_cost;
260	static const comp_cost infinite_cost (INFTY, `0`, INFTY);
261
262	bool
263	comp_cost::infinite_cost_p ()
264	{
265	return cost == INFTY;
266	}
267
268	comp_cost
269	operator+ (comp_cost cost1, comp_cost cost2)
270	{
271	if (cost1.infinite_cost_p () \|\| cost2.infinite_cost_p ())
272	return infinite_cost;
273
274	gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275	cost1.cost += cost2.cost;
276	cost1.complexity += cost2.complexity;
277
278	return cost1;
279	}
280
281	comp_cost
282	operator- (comp_cost cost1, comp_cost cost2)
283	{
284	if (cost1.infinite_cost_p ())
285	return infinite_cost;
286
287	gcc_assert (!cost2.infinite_cost_p ());
288	gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
289
290	cost1.cost -= cost2.cost;
291	cost1.complexity -= cost2.complexity;
292
293	return cost1;
294	}
295
296	comp_cost
297	comp_cost::operator+= (comp_cost cost)
298	{
299	*this = *this + cost;
300	return *this;
301	}
302
303	comp_cost
304	comp_cost::operator+= (HOST_WIDE_INT c)
305	{
306	if (c >= INFTY)
307	this->cost = INFTY;
308
309	if (infinite_cost_p ())
310	return *this;
311
312	gcc_assert (this->cost + c < infinite_cost.cost);
313	this->cost += c;
314
315	return *this;
316	}
317
318	comp_cost
319	comp_cost::operator-= (HOST_WIDE_INT c)
320	{
321	if (infinite_cost_p ())
322	return *this;
323
324	gcc_assert (this->cost - c < infinite_cost.cost);
325	this->cost -= c;
326
327	return *this;
328	}
329
330	comp_cost
331	comp_cost::operator/= (HOST_WIDE_INT c)
332	{
333	gcc_assert (c != `0`);
334	if (infinite_cost_p ())
335	return *this;
336
337	this->cost /= c;
338
339	return *this;
340	}
341
342	comp_cost
343	comp_cost::operator*= (HOST_WIDE_INT c)
344	{
345	if (infinite_cost_p ())
346	return *this;
347
348	gcc_assert (this->cost * c < infinite_cost.cost);
349	this->cost *= c;
350
351	return *this;
352	}
353
354	comp_cost
355	comp_cost::operator-= (comp_cost cost)
356	{
357	*this = *this - cost;
358	return *this;
359	}
360
361	bool
362	operator< (comp_cost cost1, comp_cost cost2)
363	{
364	if (cost1.cost == cost2.cost)
365	return cost1.complexity < cost2.complexity;
366
367	return cost1.cost < cost2.cost;
368	}
369
370	bool
371	operator== (comp_cost cost1, comp_cost cost2)
372	{
373	return cost1.cost == cost2.cost
374	&& cost1.complexity == cost2.complexity;
375	}
376
377	bool
378	operator<= (comp_cost cost1, comp_cost cost2)
379	{
380	return cost1 < cost2 \|\| cost1 == cost2;
381	}
382
383	struct iv_inv_expr_ent;
384
385	/ The candidate - cost pair. /
386	class cost_pair
387	{
388	public:
389	struct iv_cand cand; /* The candidate. /
390	comp_cost cost; / The cost. /
391	enum tree_code comp; / For iv elimination, the comparison. /
392	bitmap inv_vars; / The list of invariant ssa_vars that have to be*
393	preserved when representing iv_use with iv_cand. /*
394	bitmap inv_exprs; / The list of newly created invariant expressions*
395	when representing iv_use with iv_cand. /*
396	tree value; / For final value elimination, the expression for*
397	the final value of the iv. For iv elimination,
398	the new bound to compare with. /*
399	};
400
401	/ Use. /
402	struct iv_use
403	{
404	unsigned id; / The id of the use. /
405	unsigned group_id; / The group id the use belongs to. /
406	enum use_type type; / Type of the use. /
407	tree mem_type; / The memory type to use when testing whether an*
408	address is legitimate, and what the address's
409	cost is. /*
410	struct iv iv; /* The induction variable it is based on. /
411	gimple stmt; /* Statement in that it occurs. /
412	tree op_p; /* The place where it occurs. /
413
414	tree addr_base; / Base address with const offset stripped. /
415	poly_uint64 addr_offset;
416	/ Const offset stripped from base address. /
417	};
418
419	/ Group of uses. /
420	struct iv_group
421	{
422	/ The id of the group. /
423	unsigned id;
424	/ Uses of the group are of the same type. /
425	enum use_type type;
426	/ The set of "related" IV candidates, plus the important ones. /
427	bitmap related_cands;
428	/ Number of IV candidates in the cost_map. /
429	unsigned n_map_members;
430	/ The costs wrto the iv candidates. /
431	class cost_pair *cost_map;
432	/ The selected candidate for the group. /
433	struct iv_cand *selected;
434	/ To indicate this is a doloop use group. /
435	bool doloop_p;
436	/ Uses in the group. /
437	vec<struct iv_use *> vuses;
438	};
439
440	/ The position where the iv is computed. /
441	enum iv_position
442	{
443	IP_NORMAL, / At the end, just before the exit condition. /
444	IP_END, / At the end of the latch block. /
445	IP_BEFORE_USE, / Immediately before a specific use. /
446	IP_AFTER_USE, / Immediately after a specific use. /
447	IP_ORIGINAL / The original biv. /
448	};
449
450	/ The induction variable candidate. /
451	struct iv_cand
452	{
453	unsigned id; / The number of the candidate. /
454	bool important; / Whether this is an "important" candidate, i.e. such*
455	that it should be considered by all uses. /*
456	bool involves_undefs; / Whether the IV involves undefined values. /
457	ENUM_BITFIELD(iv_position) pos : `8`; / Where it is computed. /
458	gimple incremented_at;/* For original biv, the statement where it is*
459	incremented. /*
460	tree var_before; / The variable used for it before increment. /
461	tree var_after; / The variable used for it after increment. /
462	struct iv iv; /* The value of the candidate. NULL for*
463	"pseudocandidate" used to indicate the possibility
464	to replace the final value of an iv by direct
465	computation of the value. /*
466	unsigned cost; / Cost of the candidate. /
467	unsigned cost_step; / Cost of the candidate's increment operation. /
468	struct iv_use ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place*
469	where it is incremented. /*
470	bitmap inv_vars; / The list of invariant ssa_vars used in step of the*
471	iv_cand. /*
472	bitmap inv_exprs; / If step is more complicated than a single ssa_var,*
473	handle it as a new invariant expression which will
474	be hoisted out of loop. /*
475	struct iv orig_iv; /* The original iv if this cand is added from biv with*
476	smaller type. /*
477	bool doloop_p; / Whether this is a doloop candidate. /
478	};
479
480	/ Hashtable entry for common candidate derived from iv uses. /
481	class iv_common_cand
482	{
483	public:
484	tree base;
485	tree step;
486	/ IV uses from which this common candidate is derived. /
487	auto_vec<struct iv_use *> uses;
488	hashval_t hash;
489	};
490
491	/ Hashtable helpers. /
492
493	struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
494	{
495	static inline hashval_t hash (const iv_common_cand *);
496	static inline bool equal (const iv_common_cand , const* iv_common_cand *);
497	};
498
499	/ Hash function for possible common candidates. /
500
501	inline hashval_t
502	iv_common_cand_hasher::hash (const iv_common_cand *ccand)
503	{
504	return ccand->hash;
505	}
506
507	/ Hash table equality function for common candidates. /
508
509	inline bool
510	iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
511	const iv_common_cand *ccand2)
512	{
513	return (ccand1->hash == ccand2->hash
514	&& operand_equal_p (ccand1->base, ccand2->base, flags: `0`)
515	&& operand_equal_p (ccand1->step, ccand2->step, flags: `0`)
516	&& (TYPE_PRECISION (TREE_TYPE (ccand1->base))
517	== TYPE_PRECISION (TREE_TYPE (ccand2->base))));
518	}
519
520	/ Loop invariant expression hashtable entry. /
521
522	struct iv_inv_expr_ent
523	{
524	/ Tree expression of the entry. /
525	tree expr;
526	/ Unique indentifier. /
527	int id;
528	/ Hash value. /
529	hashval_t hash;
530	};
531
532	/ Sort iv_inv_expr_ent pair A and B by id field. /
533
534	static int
535	sort_iv_inv_expr_ent (const void a, const* void *b)
536	{
537	const iv_inv_expr_ent * const e1 = (const* iv_inv_expr_ent * const *) (a);
538	const iv_inv_expr_ent * const e2 = (const* iv_inv_expr_ent * const *) (b);
539
540	unsigned id1 = (*e1)->id;
541	unsigned id2 = (*e2)->id;
542
543	if (id1 < id2)
544	return -`1`;
545	else if (id1 > id2)
546	return `1`;
547	else
548	return `0`;
549	}
550
551	/ Hashtable helpers. /
552
553	struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
554	{
555	static inline hashval_t hash (const iv_inv_expr_ent *);
556	static inline bool equal (const iv_inv_expr_ent , const* iv_inv_expr_ent *);
557	};
558
559	/ Return true if uses of type TYPE represent some form of address. /
560
561	inline bool
562	address_p (use_type type)
563	{
564	return type == USE_REF_ADDRESS \|\| type == USE_PTR_ADDRESS;
565	}
566
567	/ Hash function for loop invariant expressions. /
568
569	inline hashval_t
570	iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
571	{
572	return expr->hash;
573	}
574
575	/ Hash table equality function for expressions. /
576
577	inline bool
578	iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
579	const iv_inv_expr_ent *expr2)
580	{
581	return expr1->hash == expr2->hash
582	&& operand_equal_p (expr1->expr, expr2->expr, flags: `0`);
583	}
584
585	struct ivopts_data
586	{
587	/ The currently optimized loop. /
588	class loop *current_loop;
589	location_t loop_loc;
590
591	/ Numbers of iterations for all exits of the current loop. /
592	hash_map<edge, tree_niter_desc > niters;
593
594	/ Number of registers used in it. /
595	unsigned regs_used;
596
597	/ The size of version_info array allocated. /
598	unsigned version_info_size;
599
600	/ The array of information for the ssa names. /
601	struct version_info *version_info;
602
603	/ The hashtable of loop invariant expressions created*
604	by ivopt. /*
605	hash_table<iv_inv_expr_hasher> *inv_expr_tab;
606
607	/ The bitmap of indices in version_info whose value was changed. /
608	bitmap relevant;
609
610	/ The uses of induction variables. /
611	vec<iv_group *> vgroups;
612
613	/ The candidates. /
614	vec<iv_cand *> vcands;
615
616	/ A bitmap of important candidates. /
617	bitmap important_candidates;
618
619	/ Cache used by tree_to_aff_combination_expand. /
620	hash_map<tree, name_expansion > name_expansion_cache;
621
622	/ The hashtable of common candidates derived from iv uses. /
623	hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
624
625	/ The common candidates. /
626	vec<iv_common_cand *> iv_common_cands;
627
628	/ Hash map recording base object information of tree exp. /
629	hash_map<tree, tree> *base_object_map;
630
631	/ The maximum invariant variable id. /
632	unsigned max_inv_var_id;
633
634	/ The maximum invariant expression id. /
635	unsigned max_inv_expr_id;
636
637	/ Number of no_overflow BIVs which are not used in memory address. /
638	unsigned bivs_not_used_in_addr;
639
640	/ Obstack for iv structure. /
641	struct obstack iv_obstack;
642
643	/ Whether to consider just related and important candidates when replacing a*
644	use. /*
645	bool consider_all_candidates;
646
647	/ Are we optimizing for speed? /
648	bool speed;
649
650	/ Whether the loop body includes any function calls. /
651	bool body_includes_call;
652
653	/ Whether the loop body can only be exited via single exit. /
654	bool loop_single_exit_p;
655
656	/ Whether the loop has doloop comparison use. /
657	bool doloop_use_p;
658	};
659
660	/ An assignment of iv candidates to uses. /
661
662	class iv_ca
663	{
664	public:
665	/ The number of uses covered by the assignment. /
666	unsigned upto;
667
668	/ Number of uses that cannot be expressed by the candidates in the set. /
669	unsigned bad_groups;
670
671	/ Candidate assigned to a use, together with the related costs. /
672	class cost_pair **cand_for_group;
673
674	/ Number of times each candidate is used. /
675	unsigned *n_cand_uses;
676
677	/ The candidates used. /
678	bitmap cands;
679
680	/ The number of candidates in the set. /
681	unsigned n_cands;
682
683	/ The number of invariants needed, including both invariant variants and*
684	invariant expressions. /*
685	unsigned n_invs;
686
687	/ Total cost of expressing uses. /
688	comp_cost cand_use_cost;
689
690	/ Total cost of candidates. /
691	int64_t cand_cost;
692
693	/ Number of times each invariant variable is used. /
694	unsigned *n_inv_var_uses;
695
696	/ Number of times each invariant expression is used. /
697	unsigned *n_inv_expr_uses;
698
699	/ Total cost of the assignment. /
700	comp_cost cost;
701	};
702
703	/ Difference of two iv candidate assignments. /
704
705	struct iv_ca_delta
706	{
707	/ Changed group. /
708	struct iv_group *group;
709
710	/ An old assignment (for rollback purposes). /
711	class cost_pair *old_cp;
712
713	/ A new assignment. /
714	class cost_pair *new_cp;
715
716	/ Next change in the list. /
717	struct iv_ca_delta *next;
718	};
719
720	/ Bound on number of candidates below that all candidates are considered. /
721
722	#define CONSIDER_ALL_CANDIDATES_BOUND \
723	((unsigned) param_iv_consider_all_candidates_bound)
724
725	/ If there are more iv occurrences, we just give up (it is quite unlikely that*
726	optimizing such a loop would help, and it would take ages). /*
727
728	#define MAX_CONSIDERED_GROUPS \
729	((unsigned) param_iv_max_considered_uses)
730
731	/ If there are at most this number of ivs in the set, try removing unnecessary*
732	ivs from the set always. /*
733
734	#define ALWAYS_PRUNE_CAND_SET_BOUND \
735	((unsigned) param_iv_always_prune_cand_set_bound)
736
737	/ The list of trees for that the decl_rtl field must be reset is stored*
738	here. /*
739
740	static vec<tree> decl_rtl_to_reset;
741
742	static comp_cost force_expr_to_var_cost (tree, bool);
743
744	/ The single loop exit if it dominates the latch, NULL otherwise. /
745
746	edge
747	single_dom_exit (class loop *loop)
748	{
749	edge exit = single_exit (loop);
750
751	if (!exit)
752	return NULL;
753
754	if (!just_once_each_iteration_p (loop, exit->src))
755	return NULL;
756
757	return exit;
758	}
759
760	/ Dumps information about the induction variable IV to FILE. Don't dump*
761	variable's name if DUMP_NAME is FALSE. The information is dumped with
762	preceding spaces indicated by INDENT_LEVEL. /*
763
764	void
765	dump_iv (FILE file, struct* iv iv, bool* dump_name, unsigned indent_level)
766	{
767	const char *p;
768	const char spaces[`9`] = {`' '`, `' '`, `' '`, `' '`, `' '`, `' '`, `' '`, `' '`, `'\0'`};
769
770	if (indent_level > `4`)
771	indent_level = `4`;
772	p = spaces + `8` - (indent_level << `1`);
773
774	fprintf (stream: file, format: "%sIV struct:\n", p);
775	if (iv->ssa_name && dump_name)
776	{
777	fprintf (stream: file, format: "%s SSA_NAME:\t", p);
778	print_generic_expr (file, iv->ssa_name, TDF_SLIM);
779	fprintf (stream: file, format: "\n");
780	}
781
782	fprintf (stream: file, format: "%s Type:\t", p);
783	print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
784	fprintf (stream: file, format: "\n");
785
786	fprintf (stream: file, format: "%s Base:\t", p);
787	print_generic_expr (file, iv->base, TDF_SLIM);
788	fprintf (stream: file, format: "\n");
789
790	fprintf (stream: file, format: "%s Step:\t", p);
791	print_generic_expr (file, iv->step, TDF_SLIM);
792	fprintf (stream: file, format: "\n");
793
794	if (iv->base_object)
795	{
796	fprintf (stream: file, format: "%s Object:\t", p);
797	print_generic_expr (file, iv->base_object, TDF_SLIM);
798	fprintf (stream: file, format: "\n");
799	}
800
801	fprintf (stream: file, format: "%s Biv:\t%c\n", p, iv->biv_p ? `'Y'` : `'N'`);
802
803	fprintf (stream: file, format: "%s Overflowness wrto loop niter:\t%s\n",
804	p, iv->no_overflow ? "No-overflow" : "Overflow");
805	}
806
807	/ Dumps information about the USE to FILE. /
808
809	void
810	dump_use (FILE file, struct* iv_use *use)
811	{
812	fprintf (stream: file, format: " Use %d.%d:\n", use->group_id, use->id);
813	fprintf (stream: file, format: " At stmt:\t");
814	print_gimple_stmt (file, use->stmt, `0`);
815	fprintf (stream: file, format: " At pos:\t");
816	if (use->op_p)
817	print_generic_expr (file, *use->op_p, TDF_SLIM);
818	fprintf (stream: file, format: "\n");
819	dump_iv (file, iv: use->iv, dump_name: false, indent_level: `2`);
820	}
821
822	/ Dumps information about the uses to FILE. /
823
824	void
825	dump_groups (FILE file, struct* ivopts_data *data)
826	{
827	unsigned i, j;
828	struct iv_group *group;
829
830	for (i = `0`; i < data->vgroups.length (); i++)
831	{
832	group = data->vgroups [i];
833	fprintf (stream: file, format: "Group %d:\n", group->id);
834	if (group->type == USE_NONLINEAR_EXPR)
835	fprintf (stream: file, format: " Type:\tGENERIC\n");
836	else if (group->type == USE_REF_ADDRESS)
837	fprintf (stream: file, format: " Type:\tREFERENCE ADDRESS\n");
838	else if (group->type == USE_PTR_ADDRESS)
839	fprintf (stream: file, format: " Type:\tPOINTER ARGUMENT ADDRESS\n");
840	else
841	{
842	gcc_assert (group->type == USE_COMPARE);
843	fprintf (stream: file, format: " Type:\tCOMPARE\n");
844	}
845	for (j = `0`; j < group->vuses.length (); j++)
846	dump_use (file, use: group->vuses [j]);
847	}
848	}
849
850	/ Dumps information about induction variable candidate CAND to FILE. /
851
852	void
853	dump_cand (FILE file, struct* iv_cand *cand)
854	{
855	struct iv *iv = cand->iv;
856
857	fprintf (stream: file, format: "Candidate %d:\n", cand->id);
858	if (cand->inv_vars)
859	{
860	fprintf (stream: file, format: " Depend on inv.vars: ");
861	dump_bitmap (file, map: cand->inv_vars);
862	}
863	if (cand->inv_exprs)
864	{
865	fprintf (stream: file, format: " Depend on inv.exprs: ");
866	dump_bitmap (file, map: cand->inv_exprs);
867	}
868
869	if (cand->var_before)
870	{
871	fprintf (stream: file, format: " Var befor: ");
872	print_generic_expr (file, cand->var_before, TDF_SLIM);
873	fprintf (stream: file, format: "\n");
874	}
875	if (cand->var_after)
876	{
877	fprintf (stream: file, format: " Var after: ");
878	print_generic_expr (file, cand->var_after, TDF_SLIM);
879	fprintf (stream: file, format: "\n");
880	}
881
882	switch (cand->pos)
883	{
884	case IP_NORMAL:
885	fprintf (stream: file, format: " Incr POS: before exit test\n");
886	break;
887
888	case IP_BEFORE_USE:
889	fprintf (stream: file, format: " Incr POS: before use %d\n", cand->ainc_use->id);
890	break;
891
892	case IP_AFTER_USE:
893	fprintf (stream: file, format: " Incr POS: after use %d\n", cand->ainc_use->id);
894	break;
895
896	case IP_END:
897	fprintf (stream: file, format: " Incr POS: at end\n");
898	break;
899
900	case IP_ORIGINAL:
901	fprintf (stream: file, format: " Incr POS: orig biv\n");
902	break;
903	}
904
905	dump_iv (file, iv, dump_name: false, indent_level: `1`);
906	}
907
908	/ Returns the info for ssa version VER. /
909
910	static inline struct version_info *
911	ver_info (struct ivopts_data data, unsigned* ver)
912	{
913	return data->version_info + ver;
914	}
915
916	/ Returns the info for ssa name NAME. /
917
918	static inline struct version_info *
919	name_info (struct ivopts_data *data, tree name)
920	{
921	return ver_info (data, SSA_NAME_VERSION (name));
922	}
923
924	/ Returns true if STMT is after the place where the IP_NORMAL ivs will be*
925	emitted in LOOP. /*
926
927	static bool
928	stmt_after_ip_normal_pos (class loop loop, gimple stmt)
929	{
930	basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (g: stmt);
931
932	gcc_assert (bb);
933
934	if (sbb == loop->latch)
935	return true;
936
937	if (sbb != bb)
938	return false;
939
940	return stmt == last_nondebug_stmt (bb);
941	}
942
943	/ Returns true if STMT if after the place where the original induction*
944	variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
945	if the positions are identical. /*
946
947	static bool
948	stmt_after_inc_pos (struct iv_cand cand, gimple stmt, bool true_if_equal)
949	{
950	basic_block cand_bb = gimple_bb (g: cand->incremented_at);
951	basic_block stmt_bb = gimple_bb (g: stmt);
952
953	if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
954	return false;
955
956	if (stmt_bb != cand_bb)
957	return true;
958
959	if (true_if_equal
960	&& gimple_uid (g: stmt) == gimple_uid (g: cand->incremented_at))
961	return true;
962	return gimple_uid (g: stmt) > gimple_uid (g: cand->incremented_at);
963	}
964
965	/ Returns true if STMT if after the place where the induction variable*
966	CAND is incremented in LOOP. /*
967
968	static bool
969	stmt_after_increment (class loop loop, struct* iv_cand cand, gimple stmt)
970	{
971	switch (cand->pos)
972	{
973	case IP_END:
974	return false;
975
976	case IP_NORMAL:
977	return stmt_after_ip_normal_pos (loop, stmt);
978
979	case IP_ORIGINAL:
980	case IP_AFTER_USE:
981	return stmt_after_inc_pos (cand, stmt, true_if_equal: false);
982
983	case IP_BEFORE_USE:
984	return stmt_after_inc_pos (cand, stmt, true_if_equal: true);
985
986	default:
987	gcc_unreachable ();
988	}
989	}
990
991	/ walk_tree callback for contains_abnormal_ssa_name_p. /
992
993	static tree
994	contains_abnormal_ssa_name_p_1 (tree tp, int* walk_subtrees, void* *)
995	{
996	if (TREE_CODE (*tp) == SSA_NAME
997	&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
998	return *tp;
999
1000	if (!EXPR_P (*tp))
1001	*walk_subtrees = `0`;
1002
1003	return NULL_TREE;
1004	}
1005
1006	/ Returns true if EXPR contains a ssa name that occurs in an*
1007	abnormal phi node. /*
1008
1009	bool
1010	contains_abnormal_ssa_name_p (tree expr)
1011	{
1012	return walk_tree_without_duplicates
1013	(&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014	}
1015
1016	/ Returns the structure describing number of iterations determined from*
1017	EXIT of DATA->current_loop, or NULL if something goes wrong. /*
1018
1019	static class tree_niter_desc *
1020	niter_for_exit (struct ivopts_data *data, edge exit)
1021	{
1022	class tree_niter_desc *desc;
1023	tree_niter_desc **slot;
1024
1025	if (!data->niters)
1026	{
1027	data->niters = new hash_map<edge, tree_niter_desc *>;
1028	slot = NULL;
1029	}
1030	else
1031	slot = data->niters->get (k: exit);
1032
1033	if (!slot)
1034	{
1035	/ Try to determine number of iterations. We cannot safely work with ssa*
1036	names that appear in phi nodes on abnormal edges, so that we do not
1037	create overlapping life ranges for them (PR 27283). /*
1038	desc = XNEW (class tree_niter_desc);
1039	::new (static_cast<void*> (desc)) tree_niter_desc ();
1040	if (!number_of_iterations_exit (data->current_loop,
1041	exit, niter: desc, true)
1042	\|\| contains_abnormal_ssa_name_p (expr: desc->niter))
1043	{
1044	desc->~tree_niter_desc ();
1045	XDELETE (desc);
1046	desc = NULL;
1047	}
1048	data->niters->put (k: exit, v: desc);
1049	}
1050	else
1051	desc = *slot;
1052
1053	return desc;
1054	}
1055
1056	/ Returns the structure describing number of iterations determined from*
1057	single dominating exit of DATA->current_loop, or NULL if something
1058	goes wrong. /*
1059
1060	static class tree_niter_desc *
1061	niter_for_single_dom_exit (struct ivopts_data *data)
1062	{
1063	edge exit = single_dom_exit (loop: data->current_loop);
1064
1065	if (!exit)
1066	return NULL;
1067
1068	return niter_for_exit (data, exit);
1069	}
1070
1071	/ Initializes data structures used by the iv optimization pass, stored*
1072	in DATA. /*
1073
1074	static void
1075	tree_ssa_iv_optimize_init (struct ivopts_data *data)
1076	{
1077	data->version_info_size = `2` * num_ssa_names;
1078	data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1079	data->relevant = BITMAP_ALLOC (NULL);
1080	data->important_candidates = BITMAP_ALLOC (NULL);
1081	data->max_inv_var_id = `0`;
1082	data->max_inv_expr_id = `0`;
1083	data->niters = NULL;
1084	data->vgroups.create (nelems: `20`);
1085	data->vcands.create (nelems: `20`);
1086	data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (`10`);
1087	data->name_expansion_cache = NULL;
1088	data->base_object_map = NULL;
1089	data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (`10`);
1090	data->iv_common_cands.create (nelems: `20`);
1091	decl_rtl_to_reset.create (nelems: `20`);
1092	gcc_obstack_init (&data->iv_obstack);
1093	}
1094
1095	/ walk_tree callback for determine_base_object. /
1096
1097	static tree
1098	determine_base_object_1 (tree tp, int* walk_subtrees, void* *wdata)
1099	{
1100	tree_code code = TREE_CODE (*tp);
1101	tree obj = NULL_TREE;
1102	if (code == ADDR_EXPR)
1103	{
1104	tree base = get_base_address (TREE_OPERAND (*tp, `0`));
1105	if (!base)
1106	obj = *tp;
1107	else if (TREE_CODE (base) != MEM_REF)
1108	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1109	}
1110	else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1111	obj = fold_convert (ptr_type_node, *tp);
1112
1113	if (!obj)
1114	{
1115	if (!EXPR_P (*tp))
1116	*walk_subtrees = `0`;
1117
1118	return NULL_TREE;
1119	}
1120	/ Record special node for multiple base objects and stop. /
1121	if (*static_cast<tree *> (wdata))
1122	{
1123	*static_cast<tree *> (wdata) = integer_zero_node;
1124	return integer_zero_node;
1125	}
1126	/ Record the base object and continue looking. /
1127	*static_cast<tree *> (wdata) = obj;
1128	return NULL_TREE;
1129	}
1130
1131	/ Returns a memory object to that EXPR points with caching. Return NULL if we*
1132	are able to determine that it does not point to any such object; specially
1133	return integer_zero_node if EXPR contains multiple base objects. /*
1134
1135	static tree
1136	determine_base_object (struct ivopts_data *data, tree expr)
1137	{
1138	tree *slot, obj = NULL_TREE;
1139	if (data->base_object_map)
1140	{
1141	if ((slot = data->base_object_map->get(k: expr)) != NULL)
1142	return *slot;
1143	}
1144	else
1145	data->base_object_map = new hash_map<tree, tree>;
1146
1147	(void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1148	data->base_object_map->put (k: expr, v: obj);
1149	return obj;
1150	}
1151
1152	/ Return true if address expression with non-DECL_P operand appears*
1153	in EXPR. /*
1154
1155	static bool
1156	contain_complex_addr_expr (tree expr)
1157	{
1158	bool res = false;
1159
1160	STRIP_NOPS (expr);
1161	switch (TREE_CODE (expr))
1162	{
1163	case POINTER_PLUS_EXPR:
1164	case PLUS_EXPR:
1165	case MINUS_EXPR:
1166	res \|= contain_complex_addr_expr (TREE_OPERAND (expr, `0`));
1167	res \|= contain_complex_addr_expr (TREE_OPERAND (expr, `1`));
1168	break;
1169
1170	case ADDR_EXPR:
1171	return (!DECL_P (TREE_OPERAND (expr, `0`)));
1172
1173	default:
1174	return false;
1175	}
1176
1177	return res;
1178	}
1179
1180	/ Allocates an induction variable with given initial value BASE and step STEP*
1181	for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. /*
1182
1183	static struct iv *
1184	alloc_iv (struct ivopts_data *data, tree base, tree step,
1185	bool no_overflow = false)
1186	{
1187	tree expr = base;
1188	struct iv iv = (struct* iv*) obstack_alloc (&data->iv_obstack,
1189	sizeof (struct iv));
1190	gcc_assert (step != NULL_TREE);
1191
1192	/ Lower address expression in base except ones with DECL_P as operand.*
1193	By doing this:
1194	1) More accurate cost can be computed for address expressions;
1195	2) Duplicate candidates won't be created for bases in different
1196	forms, like &a[0] and &a. /*
1197	STRIP_NOPS (expr);
1198	if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, `0`)))
1199	\|\| contain_complex_addr_expr (expr))
1200	{
1201	aff_tree comb;
1202	tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1203	base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1204	}
1205
1206	iv->base = base;
1207	iv->base_object = determine_base_object (data, expr: base);
1208	iv->step = step;
1209	iv->biv_p = false;
1210	iv->nonlin_use = NULL;
1211	iv->ssa_name = NULL_TREE;
1212	if (!no_overflow
1213	&& !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1214	base, step))
1215	no_overflow = true;
1216	iv->no_overflow = no_overflow;
1217	iv->have_address_use = false;
1218
1219	return iv;
1220	}
1221
1222	/ Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV*
1223	doesn't overflow. /*
1224
1225	static void
1226	set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1227	bool no_overflow)
1228	{
1229	struct version_info *info = name_info (data, name: iv);
1230
1231	gcc_assert (!info->iv);
1232
1233	bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1234	info->iv = alloc_iv (data, base, step, no_overflow);
1235	info->iv->ssa_name = iv;
1236	}
1237
1238	/ Finds induction variable declaration for VAR. /
1239
1240	static struct iv *
1241	get_iv (struct ivopts_data *data, tree var)
1242	{
1243	basic_block bb;
1244	tree type = TREE_TYPE (var);
1245
1246	if (!POINTER_TYPE_P (type)
1247	&& !INTEGRAL_TYPE_P (type))
1248	return NULL;
1249
1250	if (!name_info (data, name: var)->iv)
1251	{
1252	bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1253
1254	if (!bb
1255	\|\| !flow_bb_inside_loop_p (data->current_loop, bb))
1256	{
1257	if (POINTER_TYPE_P (type))
1258	type = sizetype;
1259	set_iv (data, iv: var, base: var, step: build_int_cst (type, `0`), no_overflow: true);
1260	}
1261	}
1262
1263	return name_info (data, name: var)->iv;
1264	}
1265
1266	/ Return the first non-invariant ssa var found in EXPR. /
1267
1268	static tree
1269	extract_single_var_from_expr (tree expr)
1270	{
1271	int i, n;
1272	tree tmp;
1273	enum tree_code code;
1274
1275	if (!expr \|\| is_gimple_min_invariant (expr))
1276	return NULL;
1277
1278	code = TREE_CODE (expr);
1279	if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1280	{
1281	n = TREE_OPERAND_LENGTH (expr);
1282	for (i = `0`; i < n; i++)
1283	{
1284	tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1285
1286	if (tmp)
1287	return tmp;
1288	}
1289	}
1290	return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1291	}
1292
1293	/ Finds basic ivs. /
1294
1295	static bool
1296	find_bivs (struct ivopts_data *data)
1297	{
1298	gphi *phi;
1299	affine_iv iv;
1300	tree step, type, base, stop;
1301	bool found = false;
1302	class loop *loop = data->current_loop;
1303	gphi_iterator psi;
1304
1305	for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1306	{
1307	phi = psi.phi ();
1308
1309	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1310	continue;
1311
1312	if (virtual_operand_p (PHI_RESULT (phi)))
1313	continue;
1314
1315	if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1316	continue;
1317
1318	if (integer_zerop (iv.step))
1319	continue;
1320
1321	step = iv.step;
1322	base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1323	/ Stop expanding iv base at the first ssa var referred by iv step.*
1324	Ideally we should stop at any ssa var, because that's expensive
1325	and unusual to happen, we just do it on the first one.
1326
1327	See PR64705 for the rationale. /*
1328	stop = extract_single_var_from_expr (expr: step);
1329	base = expand_simple_operations (base, stop);
1330	if (contains_abnormal_ssa_name_p (expr: base)
1331	\|\| contains_abnormal_ssa_name_p (expr: step))
1332	continue;
1333
1334	type = TREE_TYPE (PHI_RESULT (phi));
1335	base = fold_convert (type, base);
1336	if (step)
1337	{
1338	if (POINTER_TYPE_P (type))
1339	step = convert_to_ptrofftype (step);
1340	else
1341	step = fold_convert (type, step);
1342	}
1343
1344	set_iv (data, PHI_RESULT (phi), base, step, no_overflow: iv.no_overflow);
1345	found = true;
1346	}
1347
1348	return found;
1349	}
1350
1351	/ Marks basic ivs. /
1352
1353	static void
1354	mark_bivs (struct ivopts_data *data)
1355	{
1356	gphi *phi;
1357	gimple *def;
1358	tree var;
1359	struct iv iv, incr_iv;
1360	class loop *loop = data->current_loop;
1361	basic_block incr_bb;
1362	gphi_iterator psi;
1363
1364	data->bivs_not_used_in_addr = `0`;
1365	for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1366	{
1367	phi = psi.phi ();
1368
1369	iv = get_iv (data, PHI_RESULT (phi));
1370	if (!iv)
1371	continue;
1372
1373	var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1374	def = SSA_NAME_DEF_STMT (var);
1375	/ Don't mark iv peeled from other one as biv. /
1376	if (def
1377	&& gimple_code (g: def) == GIMPLE_PHI
1378	&& gimple_bb (g: def) == loop->header)
1379	continue;
1380
1381	incr_iv = get_iv (data, var);
1382	if (!incr_iv)
1383	continue;
1384
1385	/ If the increment is in the subloop, ignore it. /
1386	incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1387	if (incr_bb->loop_father != data->current_loop
1388	\|\| (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1389	continue;
1390
1391	iv->biv_p = true;
1392	incr_iv->biv_p = true;
1393	if (iv->no_overflow)
1394	data->bivs_not_used_in_addr++;
1395	if (incr_iv->no_overflow)
1396	data->bivs_not_used_in_addr++;
1397	}
1398	}
1399
1400	/ Checks whether STMT defines a linear induction variable and stores its*
1401	parameters to IV. /*
1402
1403	static bool
1404	find_givs_in_stmt_scev (struct ivopts_data data, gimple stmt, affine_iv *iv)
1405	{
1406	tree lhs, stop;
1407	class loop *loop = data->current_loop;
1408
1409	iv->base = NULL_TREE;
1410	iv->step = NULL_TREE;
1411
1412	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1413	return false;
1414
1415	lhs = gimple_assign_lhs (gs: stmt);
1416	if (TREE_CODE (lhs) != SSA_NAME)
1417	return false;
1418
1419	if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1420	return false;
1421
1422	/ Stop expanding iv base at the first ssa var referred by iv step.*
1423	Ideally we should stop at any ssa var, because that's expensive
1424	and unusual to happen, we just do it on the first one.
1425
1426	See PR64705 for the rationale. /*
1427	stop = extract_single_var_from_expr (expr: iv->step);
1428	iv->base = expand_simple_operations (iv->base, stop);
1429	if (contains_abnormal_ssa_name_p (expr: iv->base)
1430	\|\| contains_abnormal_ssa_name_p (expr: iv->step))
1431	return false;
1432
1433	/ If STMT could throw, then do not consider STMT as defining a GIV.*
1434	While this will suppress optimizations, we cannot safely delete this
1435	GIV and associated statements, even if it appears it is not used. /*
1436	if (stmt_could_throw_p (cfun, stmt))
1437	return false;
1438
1439	return true;
1440	}
1441
1442	/ Finds general ivs in statement STMT. /
1443
1444	static void
1445	find_givs_in_stmt (struct ivopts_data data, gimple stmt)
1446	{
1447	affine_iv iv;
1448
1449	if (!find_givs_in_stmt_scev (data, stmt, iv: &iv))
1450	return;
1451
1452	set_iv (data, iv: gimple_assign_lhs (gs: stmt), base: iv.base, step: iv.step, no_overflow: iv.no_overflow);
1453	}
1454
1455	/ Finds general ivs in basic block BB. /
1456
1457	static void
1458	find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1459	{
1460	gimple_stmt_iterator bsi;
1461
1462	for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1463	find_givs_in_stmt (data, stmt: gsi_stmt (i: bsi));
1464	}
1465
1466	/ Finds general ivs. /
1467
1468	static void
1469	find_givs (struct ivopts_data data, basic_block body)
1470	{
1471	class loop *loop = data->current_loop;
1472	unsigned i;
1473
1474	for (i = `0`; i < loop->num_nodes; i++)
1475	find_givs_in_bb (data, bb: body[i]);
1476	}
1477
1478	/ For each ssa name defined in LOOP determines whether it is an induction*
1479	variable and if so, its initial value and step. /*
1480
1481	static bool
1482	find_induction_variables (struct ivopts_data data, basic_block body)
1483	{
1484	unsigned i;
1485	bitmap_iterator bi;
1486
1487	if (!find_bivs (data))
1488	return false;
1489
1490	find_givs (data, body);
1491	mark_bivs (data);
1492
1493	if (dump_file && (dump_flags & TDF_DETAILS))
1494	{
1495	class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1496
1497	if (niter)
1498	{
1499	fprintf (stream: dump_file, format: " number of iterations ");
1500	print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1501	if (!integer_zerop (niter->may_be_zero))
1502	{
1503	fprintf (stream: dump_file, format: "; zero if ");
1504	print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1505	}
1506	fprintf (stream: dump_file, format: "\n");
1507	};
1508
1509	fprintf (stream: dump_file, format: "\n<Induction Vars>:\n");
1510	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
1511	{
1512	struct version_info *info = ver_info (data, ver: i);
1513	if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1514	dump_iv (file: dump_file, iv: ver_info (data, ver: i)->iv, dump_name: true, indent_level: `0`);
1515	}
1516	}
1517
1518	return true;
1519	}
1520
1521	/ Records a use of TYPE at USE_P in STMT whose value is IV in GROUP.
1522	For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1523	is the const offset stripped from IV base and MEM_TYPE is the type
1524	of the memory being addressed. For uses of other types, ADDR_BASE
1525	and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. /*
1526
1527	static struct iv_use *
1528	record_use (struct iv_group group, tree use_p, struct iv *iv,
1529	gimple stmt, enum* use_type type, tree mem_type,
1530	tree addr_base, poly_uint64 addr_offset)
1531	{
1532	struct iv_use use = XCNEW (struct* iv_use);
1533
1534	use->id = group->vuses.length ();
1535	use->group_id = group->id;
1536	use->type = type;
1537	use->mem_type = mem_type;
1538	use->iv = iv;
1539	use->stmt = stmt;
1540	use->op_p = use_p;
1541	use->addr_base = addr_base;
1542	use->addr_offset = addr_offset;
1543
1544	group->vuses.safe_push (obj: use);
1545	return use;
1546	}
1547
1548	/ Checks whether OP is a loop-level invariant and if so, records it.*
1549	NONLINEAR_USE is true if the invariant is used in a way we do not
1550	handle specially. /*
1551
1552	static void
1553	record_invariant (struct ivopts_data data, tree op, bool* nonlinear_use)
1554	{
1555	basic_block bb;
1556	struct version_info *info;
1557
1558	if (TREE_CODE (op) != SSA_NAME
1559	\|\| virtual_operand_p (op))
1560	return;
1561
1562	bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1563	if (bb
1564	&& flow_bb_inside_loop_p (data->current_loop, bb))
1565	return;
1566
1567	info = name_info (data, name: op);
1568	info->name = op;
1569	info->has_nonlin_use \|= nonlinear_use;
1570	if (!info->inv_id)
1571	info->inv_id = ++data->max_inv_var_id;
1572	bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1573	}
1574
1575	/ Record a group of TYPE. /
1576
1577	static struct iv_group *
1578	record_group (struct ivopts_data data, enum* use_type type)
1579	{
1580	struct iv_group group = XCNEW (struct* iv_group);
1581
1582	group->id = data->vgroups.length ();
1583	group->type = type;
1584	group->related_cands = BITMAP_ALLOC (NULL);
1585	group->vuses.create (nelems: `1`);
1586	group->doloop_p = false;
1587
1588	data->vgroups.safe_push (obj: group);
1589	return group;
1590	}
1591
1592	/ Record a use of TYPE at USE_P in STMT whose value is IV in a group.
1593	New group will be created if there is no existing group for the use.
1594	MEM_TYPE is the type of memory being addressed, or NULL if this
1595	isn't an address reference. /*
1596
1597	static struct iv_use *
1598	record_group_use (struct ivopts_data data, tree use_p,
1599	struct iv iv, gimple stmt, enum use_type type,
1600	tree mem_type)
1601	{
1602	tree addr_base = NULL;
1603	struct iv_group *group = NULL;
1604	poly_uint64 addr_offset = `0`;
1605
1606	/ Record non address type use in a new group. /
1607	if (address_p (type))
1608	{
1609	unsigned int i;
1610
1611	gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1612	tree addr_toffset;
1613	split_constant_offset (iv->base, &addr_base, &addr_toffset);
1614	addr_offset = int_cst_value (addr_toffset);
1615	for (i = `0`; i < data->vgroups.length (); i++)
1616	{
1617	struct iv_use *use;
1618
1619	group = data->vgroups [i];
1620	use = group->vuses [`0`];
1621	if (!address_p (type: use->type))
1622	continue;
1623
1624	/ Check if it has the same stripped base and step. /
1625	if (operand_equal_p (iv->base_object, use->iv->base_object, flags: `0`)
1626	&& operand_equal_p (iv->step, use->iv->step, flags: `0`)
1627	&& operand_equal_p (addr_base, use->addr_base, flags: `0`))
1628	break;
1629	}
1630	if (i == data->vgroups.length ())
1631	group = NULL;
1632	}
1633
1634	if (!group)
1635	group = record_group (data, type);
1636
1637	return record_use (group, use_p, iv, stmt, type, mem_type,
1638	addr_base, addr_offset);
1639	}
1640
1641	/ Checks whether the use OP is interesting and if so, records it. /
1642
1643	static struct iv_use *
1644	find_interesting_uses_op (struct ivopts_data *data, tree op)
1645	{
1646	struct iv *iv;
1647	gimple *stmt;
1648	struct iv_use *use;
1649
1650	if (TREE_CODE (op) != SSA_NAME)
1651	return NULL;
1652
1653	iv = get_iv (data, var: op);
1654	if (!iv)
1655	return NULL;
1656
1657	if (iv->nonlin_use)
1658	{
1659	gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1660	return iv->nonlin_use;
1661	}
1662
1663	if (integer_zerop (iv->step))
1664	{
1665	record_invariant (data, op, nonlinear_use: true);
1666	return NULL;
1667	}
1668
1669	stmt = SSA_NAME_DEF_STMT (op);
1670	gcc_assert (gimple_code (stmt) == GIMPLE_PHI \|\| is_gimple_assign (stmt));
1671
1672	use = record_group_use (data, NULL, iv, stmt, type: USE_NONLINEAR_EXPR, NULL_TREE);
1673	iv->nonlin_use = use;
1674	return use;
1675	}
1676
1677	/ Indicate how compare type iv_use can be handled. /
1678	enum comp_iv_rewrite
1679	{
1680	COMP_IV_NA,
1681	/ We may rewrite compare type iv_use by expressing value of the iv_use. /
1682	COMP_IV_EXPR,
1683	/ We may rewrite compare type iv_uses on both sides of comparison by*
1684	expressing value of each iv_use. /*
1685	COMP_IV_EXPR_2,
1686	/ We may rewrite compare type iv_use by expressing value of the iv_use*
1687	or by eliminating it with other iv_cand. /*
1688	COMP_IV_ELIM
1689	};
1690
1691	/ Given a condition in statement STMT, checks whether it is a compare*
1692	of an induction variable and an invariant. If this is the case,
1693	CONTROL_VAR is set to location of the iv, BOUND to the location of
1694	the invariant, IV_VAR and IV_BOUND are set to the corresponding
1695	induction variable descriptions, and true is returned. If this is not
1696	the case, CONTROL_VAR and BOUND are set to the arguments of the
1697	condition and false is returned. /*
1698
1699	static enum comp_iv_rewrite
1700	extract_cond_operands (struct ivopts_data data, gimple stmt,
1701	tree control_var, tree bound,
1702	struct iv iv_var, struct iv iv_bound)
1703	{
1704	/ The objects returned when COND has constant operands. /
1705	static struct iv const_iv;
1706	static tree zero;
1707	tree op0 = &zero, op1 = &zero;
1708	struct iv iv0 = &const_iv, iv1 = &const_iv;
1709	enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1710
1711	if (gimple_code (g: stmt) == GIMPLE_COND)
1712	{
1713	gcond cond_stmt = as_a <gcond > (p: stmt);
1714	op0 = gimple_cond_lhs_ptr (gs: cond_stmt);
1715	op1 = gimple_cond_rhs_ptr (gs: cond_stmt);
1716	}
1717	else
1718	{
1719	op0 = gimple_assign_rhs1_ptr (gs: stmt);
1720	op1 = gimple_assign_rhs2_ptr (gs: stmt);
1721	}
1722
1723	zero = integer_zero_node;
1724	const_iv.step = integer_zero_node;
1725
1726	if (TREE_CODE (*op0) == SSA_NAME)
1727	iv0 = get_iv (data, var: *op0);
1728	if (TREE_CODE (*op1) == SSA_NAME)
1729	iv1 = get_iv (data, var: *op1);
1730
1731	/ If both sides of comparison are IVs. We can express ivs on both end. /
1732	if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1733	{
1734	rewrite_type = COMP_IV_EXPR_2;
1735	goto end;
1736	}
1737
1738	/ If none side of comparison is IV. /
1739	if ((!iv0 \|\| integer_zerop (iv0->step))
1740	&& (!iv1 \|\| integer_zerop (iv1->step)))
1741	goto end;
1742
1743	/ Control variable may be on the other side. /
1744	if (!iv0 \|\| integer_zerop (iv0->step))
1745	{
1746	std::swap (a&: op0, b&: op1);
1747	std::swap (a&: iv0, b&: iv1);
1748	}
1749	/ If one side is IV and the other side isn't loop invariant. /
1750	if (!iv1)
1751	rewrite_type = COMP_IV_EXPR;
1752	/ If one side is IV and the other side is loop invariant. /
1753	else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1754	rewrite_type = COMP_IV_ELIM;
1755
1756	end:
1757	if (control_var)
1758	*control_var = op0;
1759	if (iv_var)
1760	*iv_var = iv0;
1761	if (bound)
1762	*bound = op1;
1763	if (iv_bound)
1764	*iv_bound = iv1;
1765
1766	return rewrite_type;
1767	}
1768
1769	/ Checks whether the condition in STMT is interesting and if so,*
1770	records it. /*
1771
1772	static void
1773	find_interesting_uses_cond (struct ivopts_data data, gimple stmt)
1774	{
1775	tree var_p, bound_p;
1776	struct iv var_iv, bound_iv;
1777	enum comp_iv_rewrite ret;
1778
1779	ret = extract_cond_operands (data, stmt,
1780	control_var: &var_p, bound: &bound_p, iv_var: &var_iv, iv_bound: &bound_iv);
1781	if (ret == COMP_IV_NA)
1782	{
1783	find_interesting_uses_op (data, op: *var_p);
1784	find_interesting_uses_op (data, op: *bound_p);
1785	return;
1786	}
1787
1788	record_group_use (data, use_p: var_p, iv: var_iv, stmt, type: USE_COMPARE, NULL_TREE);
1789	/ Record compare type iv_use for iv on the other side of comparison. /
1790	if (ret == COMP_IV_EXPR_2)
1791	record_group_use (data, use_p: bound_p, iv: bound_iv, stmt, type: USE_COMPARE, NULL_TREE);
1792	}
1793
1794	/ Returns the outermost loop EXPR is obviously invariant in*
1795	relative to the loop LOOP, i.e. if all its operands are defined
1796	outside of the returned loop. Returns NULL if EXPR is not
1797	even obviously invariant in LOOP. /*
1798
1799	class loop *
1800	outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1801	{
1802	basic_block def_bb;
1803	unsigned i, len;
1804
1805	if (is_gimple_min_invariant (expr))
1806	return current_loops->tree_root;
1807
1808	if (TREE_CODE (expr) == SSA_NAME)
1809	{
1810	def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1811	if (def_bb)
1812	{
1813	if (flow_bb_inside_loop_p (loop, def_bb))
1814	return NULL;
1815	return superloop_at_depth (loop,
1816	loop_depth (loop: def_bb->loop_father) + `1`);
1817	}
1818
1819	return current_loops->tree_root;
1820	}
1821
1822	if (!EXPR_P (expr))
1823	return NULL;
1824
1825	unsigned maxdepth = `0`;
1826	len = TREE_OPERAND_LENGTH (expr);
1827	for (i = `0`; i < len; i++)
1828	{
1829	class loop *ivloop;
1830	if (!TREE_OPERAND (expr, i))
1831	continue;
1832
1833	ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1834	if (!ivloop)
1835	return NULL;
1836	maxdepth = MAX (maxdepth, loop_depth (ivloop));
1837	}
1838
1839	return superloop_at_depth (loop, maxdepth);
1840	}
1841
1842	/ Returns true if expression EXPR is obviously invariant in LOOP,*
1843	i.e. if all its operands are defined outside of the LOOP. LOOP
1844	should not be the function body. /*
1845
1846	bool
1847	expr_invariant_in_loop_p (class loop *loop, tree expr)
1848	{
1849	basic_block def_bb;
1850	unsigned i, len;
1851
1852	gcc_assert (loop_depth (loop) > `0`);
1853
1854	if (is_gimple_min_invariant (expr))
1855	return true;
1856
1857	if (TREE_CODE (expr) == SSA_NAME)
1858	{
1859	def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1860	if (def_bb
1861	&& flow_bb_inside_loop_p (loop, def_bb))
1862	return false;
1863
1864	return true;
1865	}
1866
1867	if (!EXPR_P (expr))
1868	return false;
1869
1870	len = TREE_OPERAND_LENGTH (expr);
1871	for (i = `0`; i < len; i++)
1872	if (TREE_OPERAND (expr, i)
1873	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1874	return false;
1875
1876	return true;
1877	}
1878
1879	/ Given expression EXPR which computes inductive values with respect*
1880	to loop recorded in DATA, this function returns biv from which EXPR
1881	is derived by tracing definition chains of ssa variables in EXPR. /*
1882
1883	static struct iv*
1884	find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1885	{
1886	struct iv *iv;
1887	unsigned i, n;
1888	tree e2, e1;
1889	enum tree_code code;
1890	gimple *stmt;
1891
1892	if (expr == NULL_TREE)
1893	return NULL;
1894
1895	if (is_gimple_min_invariant (expr))
1896	return NULL;
1897
1898	code = TREE_CODE (expr);
1899	if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1900	{
1901	n = TREE_OPERAND_LENGTH (expr);
1902	for (i = `0`; i < n; i++)
1903	{
1904	iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1905	if (iv)
1906	return iv;
1907	}
1908	}
1909
1910	/ Stop if it's not ssa name. /
1911	if (code != SSA_NAME)
1912	return NULL;
1913
1914	iv = get_iv (data, var: expr);
1915	if (!iv \|\| integer_zerop (iv->step))
1916	return NULL;
1917	else if (iv->biv_p)
1918	return iv;
1919
1920	stmt = SSA_NAME_DEF_STMT (expr);
1921	if (gphi phi = dyn_cast <gphi > (p: stmt))
1922	{
1923	ssa_op_iter iter;
1924	use_operand_p use_p;
1925	basic_block phi_bb = gimple_bb (g: phi);
1926
1927	/ Skip loop header PHI that doesn't define biv. /
1928	if (phi_bb->loop_father == data->current_loop)
1929	return NULL;
1930
1931	if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
1932	return NULL;
1933
1934	FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1935	{
1936	tree use = USE_FROM_PTR (use_p);
1937	iv = find_deriving_biv_for_expr (data, expr: use);
1938	if (iv)
1939	return iv;
1940	}
1941	return NULL;
1942	}
1943	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1944	return NULL;
1945
1946	e1 = gimple_assign_rhs1 (gs: stmt);
1947	code = gimple_assign_rhs_code (gs: stmt);
1948	if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1949	return find_deriving_biv_for_expr (data, expr: e1);
1950
1951	switch (code)
1952	{
1953	case MULT_EXPR:
1954	case PLUS_EXPR:
1955	case MINUS_EXPR:
1956	case POINTER_PLUS_EXPR:
1957	/ Increments, decrements and multiplications by a constant*
1958	are simple. /*
1959	e2 = gimple_assign_rhs2 (gs: stmt);
1960	iv = find_deriving_biv_for_expr (data, expr: e2);
1961	if (iv)
1962	return iv;
1963	gcc_fallthrough ();
1964
1965	CASE_CONVERT:
1966	/ Casts are simple. /
1967	return find_deriving_biv_for_expr (data, expr: e1);
1968
1969	default:
1970	break;
1971	}
1972
1973	return NULL;
1974	}
1975
1976	/ Record BIV, its predecessor and successor that they are used in*
1977	address type uses. /*
1978
1979	static void
1980	record_biv_for_address_use (struct ivopts_data data, struct* iv *biv)
1981	{
1982	unsigned i;
1983	tree type, base_1, base_2;
1984	bitmap_iterator bi;
1985
1986	if (!biv \|\| !biv->biv_p \|\| integer_zerop (biv->step)
1987	\|\| biv->have_address_use \|\| !biv->no_overflow)
1988	return;
1989
1990	type = TREE_TYPE (biv->base);
1991	if (!INTEGRAL_TYPE_P (type))
1992	return;
1993
1994	biv->have_address_use = true;
1995	data->bivs_not_used_in_addr--;
1996	base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1997	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
1998	{
1999	struct iv *iv = ver_info (data, ver: i)->iv;
2000
2001	if (!iv \|\| !iv->biv_p \|\| integer_zerop (iv->step)
2002	\|\| iv->have_address_use \|\| !iv->no_overflow)
2003	continue;
2004
2005	if (type != TREE_TYPE (iv->base)
2006	\|\| !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2007	continue;
2008
2009	if (!operand_equal_p (biv->step, iv->step, flags: `0`))
2010	continue;
2011
2012	base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2013	if (operand_equal_p (base_1, iv->base, flags: `0`)
2014	\|\| operand_equal_p (base_2, biv->base, flags: `0`))
2015	{
2016	iv->have_address_use = true;
2017	data->bivs_not_used_in_addr--;
2018	}
2019	}
2020	}
2021
2022	/ Cumulates the steps of indices into DATA and replaces their values with the*
2023	initial ones. Returns false when the value of the index cannot be determined.
2024	Callback for for_each_index. /*
2025
2026	struct ifs_ivopts_data
2027	{
2028	struct ivopts_data *ivopts_data;
2029	gimple *stmt;
2030	tree step;
2031	};
2032
2033	static bool
2034	idx_find_step (tree base, tree idx, void* *data)
2035	{
2036	struct ifs_ivopts_data dta = (struct* ifs_ivopts_data *) data;
2037	struct iv *iv;
2038	bool use_overflow_semantics = false;
2039	tree step, iv_base, iv_step, lbound, off;
2040	class loop *loop = dta->ivopts_data->current_loop;
2041
2042	/ If base is a component ref, require that the offset of the reference*
2043	be invariant. /*
2044	if (TREE_CODE (base) == COMPONENT_REF)
2045	{
2046	off = component_ref_field_offset (base);
2047	return expr_invariant_in_loop_p (loop, expr: off);
2048	}
2049
2050	/ If base is array, first check whether we will be able to move the*
2051	reference out of the loop (in order to take its address in strength
2052	reduction). In order for this to work we need both lower bound
2053	and step to be loop invariants. /*
2054	if (TREE_CODE (base) == ARRAY_REF \|\| TREE_CODE (base) == ARRAY_RANGE_REF)
2055	{
2056	/ Moreover, for a range, the size needs to be invariant as well. /
2057	if (TREE_CODE (base) == ARRAY_RANGE_REF
2058	&& !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2059	return false;
2060
2061	step = array_ref_element_size (base);
2062	lbound = array_ref_low_bound (base);
2063
2064	if (!expr_invariant_in_loop_p (loop, expr: step)
2065	\|\| !expr_invariant_in_loop_p (loop, expr: lbound))
2066	return false;
2067	}
2068
2069	if (TREE_CODE (*idx) != SSA_NAME)
2070	return true;
2071
2072	iv = get_iv (data: dta->ivopts_data, var: *idx);
2073	if (!iv)
2074	return false;
2075
2076	/ XXX We produce for a base of D42 with iv->base being &x[0]
2077	*&x[0], which is not folded and does not trigger the
2078	ARRAY_REF path below. /*
2079	*idx = iv->base;
2080
2081	if (integer_zerop (iv->step))
2082	return true;
2083
2084	if (TREE_CODE (base) == ARRAY_REF \|\| TREE_CODE (base) == ARRAY_RANGE_REF)
2085	{
2086	step = array_ref_element_size (base);
2087
2088	/ We only handle addresses whose step is an integer constant. /
2089	if (TREE_CODE (step) != INTEGER_CST)
2090	return false;
2091	}
2092	else
2093	/ The step for pointer arithmetics already is 1 byte. /
2094	step = size_one_node;
2095
2096	iv_base = iv->base;
2097	iv_step = iv->step;
2098	if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2099	use_overflow_semantics = true;
2100
2101	if (!convert_affine_scev (dta->ivopts_data->current_loop,
2102	sizetype, &iv_base, &iv_step, dta->stmt,
2103	use_overflow_semantics))
2104	{
2105	/ The index might wrap. /
2106	return false;
2107	}
2108
2109	step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2110	dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2111
2112	if (dta->ivopts_data->bivs_not_used_in_addr)
2113	{
2114	if (!iv->biv_p)
2115	iv = find_deriving_biv_for_expr (data: dta->ivopts_data, expr: iv->ssa_name);
2116
2117	record_biv_for_address_use (data: dta->ivopts_data, biv: iv);
2118	}
2119	return true;
2120	}
2121
2122	/ Records use in index IDX. Callback for for_each_index. Ivopts data*
2123	object is passed to it in DATA. /*
2124
2125	static bool
2126	idx_record_use (tree base, tree *idx,
2127	void *vdata)
2128	{
2129	struct ivopts_data data = (struct* ivopts_data *) vdata;
2130	find_interesting_uses_op (data, op: *idx);
2131	if (TREE_CODE (base) == ARRAY_REF \|\| TREE_CODE (base) == ARRAY_RANGE_REF)
2132	{
2133	if (TREE_OPERAND (base, `2`))
2134	find_interesting_uses_op (data, TREE_OPERAND (base, `2`));
2135	if (TREE_OPERAND (base, `3`))
2136	find_interesting_uses_op (data, TREE_OPERAND (base, `3`));
2137	}
2138	return true;
2139	}
2140
2141	/ If we can prove that TOP = cst * BOT for some constant cst,*
2142	store cst to MUL and return true. Otherwise return false.
2143	The returned value is always sign-extended, regardless of the
2144	signedness of TOP and BOT. /*
2145
2146	static bool
2147	constant_multiple_of (tree top, tree bot, widest_int *mul)
2148	{
2149	tree mby;
2150	enum tree_code code;
2151	unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2152	widest_int res, p0, p1;
2153
2154	STRIP_NOPS (top);
2155	STRIP_NOPS (bot);
2156
2157	if (operand_equal_p (top, bot, flags: `0`))
2158	{
2159	*mul = `1`;
2160	return true;
2161	}
2162
2163	code = TREE_CODE (top);
2164	switch (code)
2165	{
2166	case MULT_EXPR:
2167	mby = TREE_OPERAND (top, `1`);
2168	if (TREE_CODE (mby) != INTEGER_CST)
2169	return false;
2170
2171	if (!constant_multiple_of (TREE_OPERAND (top, `0`), bot, mul: &res))
2172	return false;
2173
2174	mul = wi::sext (x: res wi::to_widest (t: mby), offset: precision);
2175	return true;
2176
2177	case PLUS_EXPR:
2178	case MINUS_EXPR:
2179	if (!constant_multiple_of (TREE_OPERAND (top, `0`), bot, mul: &p0)
2180	\|\| !constant_multiple_of (TREE_OPERAND (top, `1`), bot, mul: &p1))
2181	return false;
2182
2183	if (code == MINUS_EXPR)
2184	p1 = -p1;
2185	*mul = wi::sext (x: p0 + p1, offset: precision);
2186	return true;
2187
2188	case INTEGER_CST:
2189	if (TREE_CODE (bot) != INTEGER_CST)
2190	return false;
2191
2192	p0 = widest_int::from (x: wi::to_wide (t: top), sgn: SIGNED);
2193	p1 = widest_int::from (x: wi::to_wide (t: bot), sgn: SIGNED);
2194	if (p1 == `0`)
2195	return false;
2196	*mul = wi::sext (x: wi::divmod_trunc (x: p0, y: p1, sgn: SIGNED, remainder_ptr: &res), offset: precision);
2197	return res == `0`;
2198
2199	default:
2200	if (POLY_INT_CST_P (top)
2201	&& POLY_INT_CST_P (bot)
2202	&& constant_multiple_p (a: wi::to_poly_widest (t: top),
2203	b: wi::to_poly_widest (t: bot), multiple: mul))
2204	return true;
2205
2206	return false;
2207	}
2208	}
2209
2210	/ Return true if memory reference REF with step STEP may be unaligned. /
2211
2212	static bool
2213	may_be_unaligned_p (tree ref, tree step)
2214	{
2215	/ TARGET_MEM_REFs are translated directly to valid MEMs on the target,*
2216	thus they are not misaligned. /*
2217	if (TREE_CODE (ref) == TARGET_MEM_REF)
2218	return false;
2219
2220	unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2221	if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2222	align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2223
2224	unsigned HOST_WIDE_INT bitpos;
2225	unsigned int ref_align;
2226	get_object_alignment_1 (ref, &ref_align, &bitpos);
2227	if (ref_align < align
2228	\|\| (bitpos % align) != `0`
2229	\|\| (bitpos % BITS_PER_UNIT) != `0`)
2230	return true;
2231
2232	unsigned int trailing_zeros = tree_ctz (step);
2233	if (trailing_zeros < HOST_BITS_PER_INT
2234	&& (`1U` << trailing_zeros) * BITS_PER_UNIT < align)
2235	return true;
2236
2237	return false;
2238	}
2239
2240	/ Return true if EXPR may be non-addressable. /
2241
2242	bool
2243	may_be_nonaddressable_p (tree expr)
2244	{
2245	switch (TREE_CODE (expr))
2246	{
2247	case VAR_DECL:
2248	/ Check if it's a register variable. /
2249	return DECL_HARD_REGISTER (expr);
2250
2251	case TARGET_MEM_REF:
2252	/ TARGET_MEM_REFs are translated directly to valid MEMs on the*
2253	target, thus they are always addressable. /*
2254	return false;
2255
2256	case MEM_REF:
2257	/ Likewise for MEM_REFs, modulo the storage order. /
2258	return REF_REVERSE_STORAGE_ORDER (expr);
2259
2260	case BIT_FIELD_REF:
2261	if (REF_REVERSE_STORAGE_ORDER (expr))
2262	return true;
2263	return may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2264
2265	case COMPONENT_REF:
2266	if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, `0`))))
2267	return true;
2268	return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, `1`))
2269	\|\| may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2270
2271	case ARRAY_REF:
2272	case ARRAY_RANGE_REF:
2273	if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, `0`))))
2274	return true;
2275	return may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2276
2277	case VIEW_CONVERT_EXPR:
2278	/ This kind of view-conversions may wrap non-addressable objects*
2279	and make them look addressable. After some processing the
2280	non-addressability may be uncovered again, causing ADDR_EXPRs
2281	of inappropriate objects to be built. /*
2282	if (is_gimple_reg (TREE_OPERAND (expr, `0`))
2283	\|\| !is_gimple_addressable (TREE_OPERAND (expr, `0`)))
2284	return true;
2285	return may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2286
2287	CASE_CONVERT:
2288	return true;
2289
2290	default:
2291	break;
2292	}
2293
2294	return false;
2295	}
2296
2297	/ Finds addresses in OP_P inside STMT. /*
2298
2299	static void
2300	find_interesting_uses_address (struct ivopts_data data, gimple stmt,
2301	tree *op_p)
2302	{
2303	tree base = *op_p, step = size_zero_node;
2304	struct iv *civ;
2305	struct ifs_ivopts_data ifs_ivopts_data;
2306
2307	/ Do not play with volatile memory references. A bit too conservative,*
2308	perhaps, but safe. /*
2309	if (gimple_has_volatile_ops (stmt))
2310	goto fail;
2311
2312	/ Ignore bitfields for now. Not really something terribly complicated*
2313	to handle. TODO. /*
2314	if (TREE_CODE (base) == BIT_FIELD_REF)
2315	goto fail;
2316
2317	base = unshare_expr (base);
2318
2319	if (TREE_CODE (base) == TARGET_MEM_REF)
2320	{
2321	tree type = build_pointer_type (TREE_TYPE (base));
2322	tree astep;
2323
2324	if (TMR_BASE (base)
2325	&& TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2326	{
2327	civ = get_iv (data, TMR_BASE (base));
2328	if (!civ)
2329	goto fail;
2330
2331	TMR_BASE (base) = civ->base;
2332	step = civ->step;
2333	}
2334	if (TMR_INDEX2 (base)
2335	&& TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2336	{
2337	civ = get_iv (data, TMR_INDEX2 (base));
2338	if (!civ)
2339	goto fail;
2340
2341	TMR_INDEX2 (base) = civ->base;
2342	step = civ->step;
2343	}
2344	if (TMR_INDEX (base)
2345	&& TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2346	{
2347	civ = get_iv (data, TMR_INDEX (base));
2348	if (!civ)
2349	goto fail;
2350
2351	TMR_INDEX (base) = civ->base;
2352	astep = civ->step;
2353
2354	if (astep)
2355	{
2356	if (TMR_STEP (base))
2357	astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2358
2359	step = fold_build2 (PLUS_EXPR, type, step, astep);
2360	}
2361	}
2362
2363	if (integer_zerop (step))
2364	goto fail;
2365	base = tree_mem_ref_addr (type, base);
2366	}
2367	else
2368	{
2369	ifs_ivopts_data.ivopts_data = data;
2370	ifs_ivopts_data.stmt = stmt;
2371	ifs_ivopts_data.step = size_zero_node;
2372	if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2373	\|\| integer_zerop (ifs_ivopts_data.step))
2374	goto fail;
2375	step = ifs_ivopts_data.step;
2376
2377	/ Check that the base expression is addressable. This needs*
2378	to be done after substituting bases of IVs into it. /*
2379	if (may_be_nonaddressable_p (expr: base))
2380	goto fail;
2381
2382	/ Moreover, on strict alignment platforms, check that it is*
2383	sufficiently aligned. /*
2384	if (STRICT_ALIGNMENT && may_be_unaligned_p (ref: base, step))
2385	goto fail;
2386
2387	base = build_fold_addr_expr (base);
2388
2389	/ Substituting bases of IVs into the base expression might*
2390	have caused folding opportunities. /*
2391	if (TREE_CODE (base) == ADDR_EXPR)
2392	{
2393	tree *ref = &TREE_OPERAND (base, `0`);
2394	while (handled_component_p (t: *ref))
2395	ref = &TREE_OPERAND (*ref, `0`);
2396	if (TREE_CODE (*ref) == MEM_REF)
2397	{
2398	tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2399	TREE_OPERAND (*ref, `0`),
2400	TREE_OPERAND (*ref, `1`));
2401	if (tem)
2402	*ref = tem;
2403	}
2404	}
2405	}
2406
2407	civ = alloc_iv (data, base, step);
2408	/ Fail if base object of this memory reference is unknown. /
2409	if (civ->base_object == NULL_TREE)
2410	goto fail;
2411
2412	record_group_use (data, use_p: op_p, iv: civ, stmt, type: USE_REF_ADDRESS, TREE_TYPE (*op_p));
2413	return;
2414
2415	fail:
2416	for_each_index (op_p, idx_record_use, data);
2417	}
2418
2419	/ Finds and records invariants used in STMT. /
2420
2421	static void
2422	find_invariants_stmt (struct ivopts_data data, gimple stmt)
2423	{
2424	ssa_op_iter iter;
2425	use_operand_p use_p;
2426	tree op;
2427
2428	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2429	{
2430	op = USE_FROM_PTR (use_p);
2431	record_invariant (data, op, nonlinear_use: false);
2432	}
2433	}
2434
2435	/ CALL calls an internal function. If operand OP_P will become an
2436	address when the call is expanded, return the type of the memory
2437	being addressed, otherwise return null. /*
2438
2439	static tree
2440	get_mem_type_for_internal_fn (gcall call, tree op_p)
2441	{
2442	switch (gimple_call_internal_fn (gs: call))
2443	{
2444	case IFN_MASK_LOAD:
2445	case IFN_MASK_LOAD_LANES:
2446	case IFN_MASK_LEN_LOAD_LANES:
2447	case IFN_LEN_LOAD:
2448	case IFN_MASK_LEN_LOAD:
2449	if (op_p == gimple_call_arg_ptr (gs: call, index: `0`))
2450	return TREE_TYPE (gimple_call_lhs (call));
2451	return NULL_TREE;
2452
2453	case IFN_MASK_STORE:
2454	case IFN_MASK_STORE_LANES:
2455	case IFN_MASK_LEN_STORE_LANES:
2456	case IFN_LEN_STORE:
2457	case IFN_MASK_LEN_STORE:
2458	{
2459	if (op_p == gimple_call_arg_ptr (gs: call, index: `0`))
2460	{
2461	internal_fn ifn = gimple_call_internal_fn (gs: call);
2462	int index = internal_fn_stored_value_index (ifn);
2463	return TREE_TYPE (gimple_call_arg (call, index));
2464	}
2465	return NULL_TREE;
2466	}
2467
2468	default:
2469	return NULL_TREE;
2470	}
2471	}
2472
2473	/ IV is a (non-address) iv that describes operand OP_P of STMT.
2474	Return true if the operand will become an address when STMT
2475	is expanded and record the associated address use if so. /*
2476
2477	static bool
2478	find_address_like_use (struct ivopts_data data, gimple stmt, tree *op_p,
2479	struct iv *iv)
2480	{
2481	/ Fail if base object of this memory reference is unknown. /
2482	if (iv->base_object == NULL_TREE)
2483	return false;
2484
2485	tree mem_type = NULL_TREE;
2486	if (gcall call = dyn_cast <gcall > (p: stmt))
2487	if (gimple_call_internal_p (gs: call))
2488	mem_type = get_mem_type_for_internal_fn (call, op_p);
2489	if (mem_type)
2490	{
2491	iv = alloc_iv (data, base: iv->base, step: iv->step);
2492	record_group_use (data, use_p: op_p, iv, stmt, type: USE_PTR_ADDRESS, mem_type);
2493	return true;
2494	}
2495	return false;
2496	}
2497
2498	/ Finds interesting uses of induction variables in the statement STMT. /
2499
2500	static void
2501	find_interesting_uses_stmt (struct ivopts_data data, gimple stmt)
2502	{
2503	struct iv *iv;
2504	tree op, lhs, rhs;
2505	ssa_op_iter iter;
2506	use_operand_p use_p;
2507	enum tree_code code;
2508
2509	find_invariants_stmt (data, stmt);
2510
2511	if (gimple_code (g: stmt) == GIMPLE_COND)
2512	{
2513	find_interesting_uses_cond (data, stmt);
2514	return;
2515	}
2516
2517	if (is_gimple_assign (gs: stmt))
2518	{
2519	lhs = gimple_assign_lhs_ptr (gs: stmt);
2520	rhs = gimple_assign_rhs1_ptr (gs: stmt);
2521
2522	if (TREE_CODE (*lhs) == SSA_NAME)
2523	{
2524	/ If the statement defines an induction variable, the uses are not*
2525	interesting by themselves. /*
2526
2527	iv = get_iv (data, var: *lhs);
2528
2529	if (iv && !integer_zerop (iv->step))
2530	return;
2531	}
2532
2533	code = gimple_assign_rhs_code (gs: stmt);
2534	if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2535	&& (REFERENCE_CLASS_P (*rhs)
2536	\|\| is_gimple_val (*rhs)))
2537	{
2538	if (REFERENCE_CLASS_P (*rhs))
2539	find_interesting_uses_address (data, stmt, op_p: rhs);
2540	else
2541	find_interesting_uses_op (data, op: *rhs);
2542
2543	if (REFERENCE_CLASS_P (*lhs))
2544	find_interesting_uses_address (data, stmt, op_p: lhs);
2545	return;
2546	}
2547	else if (TREE_CODE_CLASS (code) == tcc_comparison)
2548	{
2549	find_interesting_uses_cond (data, stmt);
2550	return;
2551	}
2552
2553	/ TODO -- we should also handle address uses of type*
2554
2555	memory = call (whatever);
2556
2557	and
2558
2559	call (memory). /*
2560	}
2561
2562	if (gimple_code (g: stmt) == GIMPLE_PHI
2563	&& gimple_bb (g: stmt) == data->current_loop->header)
2564	{
2565	iv = get_iv (data, PHI_RESULT (stmt));
2566
2567	if (iv && !integer_zerop (iv->step))
2568	return;
2569	}
2570
2571	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2572	{
2573	op = USE_FROM_PTR (use_p);
2574
2575	if (TREE_CODE (op) != SSA_NAME)
2576	continue;
2577
2578	iv = get_iv (data, var: op);
2579	if (!iv)
2580	continue;
2581
2582	if (!find_address_like_use (data, stmt, op_p: use_p->use, iv))
2583	find_interesting_uses_op (data, op);
2584	}
2585	}
2586
2587	/ Finds interesting uses of induction variables outside of loops*
2588	on loop exit edge EXIT. /*
2589
2590	static void
2591	find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2592	{
2593	gphi *phi;
2594	gphi_iterator psi;
2595	tree def;
2596
2597	for (psi = gsi_start_phis (exit->dest); !gsi_end_p (i: psi); gsi_next (i: &psi))
2598	{
2599	phi = psi.phi ();
2600	def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2601	if (!virtual_operand_p (op: def))
2602	find_interesting_uses_op (data, op: def);
2603	}
2604	}
2605
2606	/ Return TRUE if OFFSET is within the range of [base + offset] addressing*
2607	mode for memory reference represented by USE. /*
2608
2609	static GTY (()) vec<rtx, va_gc> *addr_list;
2610
2611	static bool
2612	addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2613	{
2614	rtx reg, addr;
2615	unsigned list_index;
2616	addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2617	machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2618
2619	list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2620	if (list_index >= vec_safe_length (v: addr_list))
2621	vec_safe_grow_cleared (v&: addr_list, len: list_index + MAX_MACHINE_MODE, exact: true);
2622
2623	addr = (*addr_list)[list_index];
2624	if (!addr)
2625	{
2626	addr_mode = targetm.addr_space.address_mode (as);
2627	reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + `1`);
2628	addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2629	(*addr_list)[list_index] = addr;
2630	}
2631	else
2632	addr_mode = GET_MODE (addr);
2633
2634	XEXP (addr, `1`) = gen_int_mode (offset, addr_mode);
2635	return (memory_address_addr_space_p (mem_mode, addr, as));
2636	}
2637
2638	/ Comparison function to sort group in ascending order of addr_offset. /
2639
2640	static int
2641	group_compare_offset (const void a, const* void *b)
2642	{
2643	const struct iv_use *const u1 = (const* struct iv_use *const *) a;
2644	const struct iv_use *const u2 = (const* struct iv_use *const *) b;
2645
2646	return compare_sizes_for_sort (a: (u1)->addr_offset, b: (u2)->addr_offset);
2647	}
2648
2649	/ Check if small groups should be split. Return true if no group*
2650	contains more than two uses with distinct addr_offsets. Return
2651	false otherwise. We want to split such groups because:
2652
2653	1) Small groups don't have much benefit and may interfer with
2654	general candidate selection.
2655	2) Size for problem with only small groups is usually small and
2656	general algorithm can handle it well.
2657
2658	TODO -- Above claim may not hold when we want to merge memory
2659	accesses with conseuctive addresses. /*
2660
2661	static bool
2662	split_small_address_groups_p (struct ivopts_data *data)
2663	{
2664	unsigned int i, j, distinct = `1`;
2665	struct iv_use *pre;
2666	struct iv_group *group;
2667
2668	for (i = `0`; i < data->vgroups.length (); i++)
2669	{
2670	group = data->vgroups [i];
2671	if (group->vuses.length () == `1`)
2672	continue;
2673
2674	gcc_assert (address_p (group->type));
2675	if (group->vuses.length () == `2`)
2676	{
2677	if (compare_sizes_for_sort (a: group->vuses [`0`]->addr_offset,
2678	b: group->vuses [`1`]->addr_offset) > `0`)
2679	std::swap (a&: group->vuses [`0`], b&: group->vuses [`1`]);
2680	}
2681	else
2682	group->vuses.qsort (group_compare_offset);
2683
2684	if (distinct > `2`)
2685	continue;
2686
2687	distinct = `1`;
2688	for (pre = group->vuses [`0`], j = `1`; j < group->vuses.length (); j++)
2689	{
2690	if (maybe_ne (a: group->vuses [j]->addr_offset, b: pre->addr_offset))
2691	{
2692	pre = group->vuses [j];
2693	distinct++;
2694	}
2695
2696	if (distinct > `2`)
2697	break;
2698	}
2699	}
2700
2701	return (distinct <= `2`);
2702	}
2703
2704	/ For each group of address type uses, this function further groups*
2705	these uses according to the maximum offset supported by target's
2706	[base + offset] addressing mode. /*
2707
2708	static void
2709	split_address_groups (struct ivopts_data *data)
2710	{
2711	unsigned int i, j;
2712	/ Always split group. /
2713	bool split_p = split_small_address_groups_p (data);
2714
2715	for (i = `0`; i < data->vgroups.length (); i++)
2716	{
2717	struct iv_group *new_group = NULL;
2718	struct iv_group *group = data->vgroups [i];
2719	struct iv_use *use = group->vuses [`0`];
2720
2721	use->id = `0`;
2722	use->group_id = group->id;
2723	if (group->vuses.length () == `1`)
2724	continue;
2725
2726	gcc_assert (address_p (use->type));
2727
2728	for (j = `1`; j < group->vuses.length ();)
2729	{
2730	struct iv_use *next = group->vuses [j];
2731	poly_int64 offset = next->addr_offset - use->addr_offset;
2732
2733	/ Split group if aksed to, or the offset against the first*
2734	use can't fit in offset part of addressing mode. IV uses
2735	having the same offset are still kept in one group. /*
2736	if (maybe_ne (a: offset, b: `0`)
2737	&& (split_p \|\| !addr_offset_valid_p (use, offset)))
2738	{
2739	if (!new_group)
2740	new_group = record_group (data, type: group->type);
2741	group->vuses.ordered_remove (ix: j);
2742	new_group->vuses.safe_push (obj: next);
2743	continue;
2744	}
2745
2746	next->id = j;
2747	next->group_id = group->id;
2748	j++;
2749	}
2750	}
2751	}
2752
2753	/ Finds uses of the induction variables that are interesting. /
2754
2755	static void
2756	find_interesting_uses (struct ivopts_data data, basic_block body)
2757	{
2758	basic_block bb;
2759	gimple_stmt_iterator bsi;
2760	unsigned i;
2761	edge e;
2762
2763	for (i = `0`; i < data->current_loop->num_nodes; i++)
2764	{
2765	edge_iterator ei;
2766	bb = body[i];
2767
2768	FOR_EACH_EDGE (e, ei, bb->succs)
2769	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2770	&& !flow_bb_inside_loop_p (data->current_loop, e->dest))
2771	find_interesting_uses_outside (data, exit: e);
2772
2773	for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2774	find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2775	for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2776	if (!is_gimple_debug (gs: gsi_stmt (i: bsi)))
2777	find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2778	}
2779
2780	split_address_groups (data);
2781
2782	if (dump_file && (dump_flags & TDF_DETAILS))
2783	{
2784	fprintf (stream: dump_file, format: "\n<IV Groups>:\n");
2785	dump_groups (file: dump_file, data);
2786	fprintf (stream: dump_file, format: "\n");
2787	}
2788	}
2789
2790	/ Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR*
2791	is true, assume we are inside an address. If TOP_COMPREF is true, assume
2792	we are at the top-level of the processed address. /*
2793
2794	static tree
2795	strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2796	poly_int64 *offset)
2797	{
2798	tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2799	enum tree_code code;
2800	tree type, orig_type = TREE_TYPE (expr);
2801	poly_int64 off0, off1;
2802	HOST_WIDE_INT st;
2803	tree orig_expr = expr;
2804
2805	STRIP_NOPS (expr);
2806
2807	type = TREE_TYPE (expr);
2808	code = TREE_CODE (expr);
2809	*offset = `0`;
2810
2811	switch (code)
2812	{
2813	case POINTER_PLUS_EXPR:
2814	case PLUS_EXPR:
2815	case MINUS_EXPR:
2816	op0 = TREE_OPERAND (expr, `0`);
2817	op1 = TREE_OPERAND (expr, `1`);
2818
2819	op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2820	op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2821
2822	*offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2823	if (op0 == TREE_OPERAND (expr, `0`)
2824	&& op1 == TREE_OPERAND (expr, `1`))
2825	return orig_expr;
2826
2827	if (integer_zerop (op1))
2828	expr = op0;
2829	else if (integer_zerop (op0))
2830	{
2831	if (code == MINUS_EXPR)
2832	{
2833	if (TYPE_OVERFLOW_UNDEFINED (type))
2834	{
2835	type = unsigned_type_for (type);
2836	op1 = fold_convert (type, op1);
2837	}
2838	expr = fold_build1 (NEGATE_EXPR, type, op1);
2839	}
2840	else
2841	expr = op1;
2842	}
2843	else
2844	{
2845	if (TYPE_OVERFLOW_UNDEFINED (type))
2846	{
2847	type = unsigned_type_for (type);
2848	if (code == POINTER_PLUS_EXPR)
2849	code = PLUS_EXPR;
2850	op0 = fold_convert (type, op0);
2851	op1 = fold_convert (type, op1);
2852	}
2853	expr = fold_build2 (code, type, op0, op1);
2854	}
2855
2856	return fold_convert (orig_type, expr);
2857
2858	case MULT_EXPR:
2859	op1 = TREE_OPERAND (expr, `1`);
2860	if (!cst_and_fits_in_hwi (op1))
2861	return orig_expr;
2862
2863	op0 = TREE_OPERAND (expr, `0`);
2864	op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2865	if (op0 == TREE_OPERAND (expr, `0`))
2866	return orig_expr;
2867
2868	offset = off0 int_cst_value (op1);
2869	if (integer_zerop (op0))
2870	expr = op0;
2871	else
2872	{
2873	if (TYPE_OVERFLOW_UNDEFINED (type))
2874	{
2875	type = unsigned_type_for (type);
2876	op0 = fold_convert (type, op0);
2877	op1 = fold_convert (type, op1);
2878	}
2879	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2880	}
2881
2882	return fold_convert (orig_type, expr);
2883
2884	case ARRAY_REF:
2885	case ARRAY_RANGE_REF:
2886	if (!inside_addr)
2887	return orig_expr;
2888
2889	step = array_ref_element_size (expr);
2890	if (!cst_and_fits_in_hwi (step))
2891	break;
2892
2893	st = int_cst_value (step);
2894	op1 = TREE_OPERAND (expr, `1`);
2895	op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2896	offset = off1 st;
2897
2898	if (top_compref
2899	&& integer_zerop (op1))
2900	{
2901	/ Strip the component reference completely. /
2902	op0 = TREE_OPERAND (expr, `0`);
2903	op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2904	*offset += off0;
2905	return op0;
2906	}
2907	break;
2908
2909	case COMPONENT_REF:
2910	{
2911	tree field;
2912
2913	if (!inside_addr)
2914	return orig_expr;
2915
2916	tmp = component_ref_field_offset (expr);
2917	field = TREE_OPERAND (expr, `1`);
2918	if (top_compref
2919	&& cst_and_fits_in_hwi (tmp)
2920	&& cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2921	{
2922	HOST_WIDE_INT boffset, abs_off;
2923
2924	/ Strip the component reference completely. /
2925	op0 = TREE_OPERAND (expr, `0`);
2926	op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2927	boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2928	abs_off = abs_hwi (x: boffset) / BITS_PER_UNIT;
2929	if (boffset < `0`)
2930	abs_off = -abs_off;
2931
2932	*offset = off0 + int_cst_value (tmp) + abs_off;
2933	return op0;
2934	}
2935	}
2936	break;
2937
2938	case ADDR_EXPR:
2939	op0 = TREE_OPERAND (expr, `0`);
2940	op0 = strip_offset_1 (expr: op0, inside_addr: true, top_compref: true, offset: &off0);
2941	*offset += off0;
2942
2943	if (op0 == TREE_OPERAND (expr, `0`))
2944	return orig_expr;
2945
2946	expr = build_fold_addr_expr (op0);
2947	return fold_convert (orig_type, expr);
2948
2949	case MEM_REF:
2950	/ ??? Offset operand? /
2951	inside_addr = false;
2952	break;
2953
2954	default:
2955	if (ptrdiff_tree_p (expr, offset) && maybe_ne (a: *offset, b: `0`))
2956	return build_int_cst (orig_type, `0`);
2957	return orig_expr;
2958	}
2959
2960	/ Default handling of expressions for that we want to recurse into*
2961	the first operand. /*
2962	op0 = TREE_OPERAND (expr, `0`);
2963	op0 = strip_offset_1 (expr: op0, inside_addr, top_compref: false, offset: &off0);
2964	*offset += off0;
2965
2966	if (op0 == TREE_OPERAND (expr, `0`)
2967	&& (!op1 \|\| op1 == TREE_OPERAND (expr, `1`)))
2968	return orig_expr;
2969
2970	expr = copy_node (expr);
2971	TREE_OPERAND (expr, `0`) = op0;
2972	if (op1)
2973	TREE_OPERAND (expr, `1`) = op1;
2974
2975	/ Inside address, we might strip the top level component references,*
2976	thus changing type of the expression. Handling of ADDR_EXPR
2977	will fix that. /*
2978	expr = fold_convert (orig_type, expr);
2979
2980	return expr;
2981	}
2982
2983	/ Strips constant offsets from EXPR and stores them to OFFSET. /
2984
2985	static tree
2986	strip_offset (tree expr, poly_uint64 *offset)
2987	{
2988	poly_int64 off;
2989	tree core = strip_offset_1 (expr, inside_addr: false, top_compref: false, offset: &off);
2990	*offset = off;
2991	return core;
2992	}
2993
2994	/ Returns variant of TYPE that can be used as base for different uses.*
2995	We return unsigned type with the same precision, which avoids problems
2996	with overflows. /*
2997
2998	static tree
2999	generic_type_for (tree type)
3000	{
3001	if (POINTER_TYPE_P (type))
3002	return unsigned_type_for (type);
3003
3004	if (TYPE_UNSIGNED (type))
3005	return type;
3006
3007	return unsigned_type_for (type);
3008	}
3009
3010	/ Private data for walk_tree. /
3011
3012	struct walk_tree_data
3013	{
3014	bitmap *inv_vars;
3015	struct ivopts_data *idata;
3016	};
3017
3018	/ Callback function for walk_tree, it records invariants and symbol*
3019	reference in EXPR_P. DATA is the structure storing result info. /
3020
3021	static tree
3022	find_inv_vars_cb (tree expr_p, int* ws ATTRIBUTE_UNUSED, void* *data)
3023	{
3024	tree op = *expr_p;
3025	struct version_info *info;
3026	struct walk_tree_data wdata = (struct* walk_tree_data*) data;
3027
3028	if (TREE_CODE (op) != SSA_NAME)
3029	return NULL_TREE;
3030
3031	info = name_info (data: wdata->idata, name: op);
3032	/ Because we expand simple operations when finding IVs, loop invariant*
3033	variable that isn't referred by the original loop could be used now.
3034	Record such invariant variables here. /*
3035	if (!info->iv)
3036	{
3037	struct ivopts_data *idata = wdata->idata;
3038	basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3039
3040	if (!bb \|\| !flow_bb_inside_loop_p (idata->current_loop, bb))
3041	{
3042	tree steptype = TREE_TYPE (op);
3043	if (POINTER_TYPE_P (steptype))
3044	steptype = sizetype;
3045	set_iv (data: idata, iv: op, base: op, step: build_int_cst (steptype, `0`), no_overflow: true);
3046	record_invariant (data: idata, op, nonlinear_use: false);
3047	}
3048	}
3049	if (!info->inv_id \|\| info->has_nonlin_use)
3050	return NULL_TREE;
3051
3052	if (!*wdata->inv_vars)
3053	*wdata->inv_vars = BITMAP_ALLOC (NULL);
3054	bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3055
3056	return NULL_TREE;
3057	}
3058
3059	/ Records invariants in EXPR_P. INV_VARS is the bitmap to that we should
3060	store it. /*
3061
3062	static inline void
3063	find_inv_vars (struct ivopts_data data, tree expr_p, bitmap *inv_vars)
3064	{
3065	struct walk_tree_data wdata;
3066
3067	if (!inv_vars)
3068	return;
3069
3070	wdata.idata = data;
3071	wdata.inv_vars = inv_vars;
3072	walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3073	}
3074
3075	/ Get entry from invariant expr hash table for INV_EXPR. New entry*
3076	will be recorded if it doesn't exist yet. Given below two exprs:
3077	inv_expr + cst1, inv_expr + cst2
3078	It's hard to make decision whether constant part should be stripped
3079	or not. We choose to not strip based on below facts:
3080	1) We need to count ADD cost for constant part if it's stripped,
3081	which isn't always trivial where this functions is called.
3082	2) Stripping constant away may be conflict with following loop
3083	invariant hoisting pass.
3084	3) Not stripping constant away results in more invariant exprs,
3085	which usually leads to decision preferring lower reg pressure. /*
3086
3087	static iv_inv_expr_ent *
3088	get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3089	{
3090	STRIP_NOPS (inv_expr);
3091
3092	if (poly_int_tree_p (t: inv_expr)
3093	\|\| TREE_CODE (inv_expr) == SSA_NAME)
3094	return NULL;
3095
3096	/ Don't strip constant part away as we used to. /
3097
3098	/ Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. /
3099	struct iv_inv_expr_ent ent;
3100	ent.expr = inv_expr;
3101	ent.hash = iterative_hash_expr (tree: inv_expr, seed: `0`);
3102	struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (value: &ent, insert: INSERT);
3103
3104	if (!*slot)
3105	{
3106	slot = XNEW (struct* iv_inv_expr_ent);
3107	(*slot)->expr = inv_expr;
3108	(*slot)->hash = ent.hash;
3109	(*slot)->id = ++data->max_inv_expr_id;
3110	}
3111
3112	return *slot;
3113	}
3114
3115
3116	/ Return TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3117	unsuitable as ivopts candidates for potentially involving undefined
3118	behavior. /*
3119
3120	static tree
3121	find_ssa_undef (tree tp, int* walk_subtrees, void* *bb_)
3122	{
3123	basic_block bb = (basic_block) bb_;
3124	if (TREE_CODE (*tp) == SSA_NAME
3125	&& ssa_name_maybe_undef_p (var: *tp)
3126	&& !ssa_name_any_use_dominates_bb_p (var: *tp, bb))
3127	return *tp;
3128	if (!EXPR_P (*tp))
3129	*walk_subtrees = `0`;
3130	return NULL;
3131	}
3132
3133	/ Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and*
3134	position to POS. If USE is not NULL, the candidate is set as related to
3135	it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3136	replacement of the final value of the iv by a direct computation. /*
3137
3138	static struct iv_cand *
3139	add_candidate_1 (struct ivopts_data data, tree base, tree step, bool* important,
3140	enum iv_position pos, struct iv_use *use,
3141	gimple incremented_at, struct* iv *orig_iv = NULL,
3142	bool doloop = false)
3143	{
3144	unsigned i;
3145	struct iv_cand *cand = NULL;
3146	tree type, orig_type;
3147
3148	gcc_assert (base && step);
3149
3150	/ -fkeep-gc-roots-live means that we have to keep a real pointer*
3151	live, but the ivopts code may replace a real pointer with one
3152	pointing before or after the memory block that is then adjusted
3153	into the memory block during the loop. FIXME: It would likely be
3154	better to actually force the pointer live and still use ivopts;
3155	for example, it would be enough to write the pointer into memory
3156	and keep it there until after the loop. /*
3157	if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3158	return NULL;
3159
3160	/ If BASE contains undefined SSA names make sure we only record*
3161	the original IV. /*
3162	bool involves_undefs = false;
3163	if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3164	{
3165	if (pos != IP_ORIGINAL)
3166	return NULL;
3167	important = false;
3168	involves_undefs = true;
3169	}
3170
3171	/ For non-original variables, make sure their values are computed in a type*
3172	that does not invoke undefined behavior on overflows (since in general,
3173	we cannot prove that these induction variables are non-wrapping). /*
3174	if (pos != IP_ORIGINAL)
3175	{
3176	orig_type = TREE_TYPE (base);
3177	type = generic_type_for (type: orig_type);
3178	if (type != orig_type)
3179	{
3180	base = fold_convert (type, base);
3181	step = fold_convert (type, step);
3182	}
3183	}
3184
3185	for (i = `0`; i < data->vcands.length (); i++)
3186	{
3187	cand = data->vcands [i];
3188
3189	if (cand->pos != pos)
3190	continue;
3191
3192	if (cand->incremented_at != incremented_at
3193	\|\| ((pos == IP_AFTER_USE \|\| pos == IP_BEFORE_USE)
3194	&& cand->ainc_use != use))
3195	continue;
3196
3197	if (operand_equal_p (base, cand->iv->base, flags: `0`)
3198	&& operand_equal_p (step, cand->iv->step, flags: `0`)
3199	&& (TYPE_PRECISION (TREE_TYPE (base))
3200	== TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3201	break;
3202	}
3203
3204	if (i == data->vcands.length ())
3205	{
3206	cand = XCNEW (struct iv_cand);
3207	cand->id = i;
3208	cand->iv = alloc_iv (data, base, step);
3209	cand->pos = pos;
3210	if (pos != IP_ORIGINAL)
3211	{
3212	if (doloop)
3213	cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3214	else
3215	cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3216	cand->var_after = cand->var_before;
3217	}
3218	cand->important = important;
3219	cand->involves_undefs = involves_undefs;
3220	cand->incremented_at = incremented_at;
3221	cand->doloop_p = doloop;
3222	data->vcands.safe_push (obj: cand);
3223
3224	if (!poly_int_tree_p (t: step))
3225	{
3226	find_inv_vars (data, expr_p: &step, inv_vars: &cand->inv_vars);
3227
3228	iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, inv_expr: step);
3229	/ Share bitmap between inv_vars and inv_exprs for cand. /
3230	if (inv_expr != NULL)
3231	{
3232	cand->inv_exprs = cand->inv_vars;
3233	cand->inv_vars = NULL;
3234	if (cand->inv_exprs)
3235	bitmap_clear (cand->inv_exprs);
3236	else
3237	cand->inv_exprs = BITMAP_ALLOC (NULL);
3238
3239	bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3240	}
3241	}
3242
3243	if (pos == IP_AFTER_USE \|\| pos == IP_BEFORE_USE)
3244	cand->ainc_use = use;
3245	else
3246	cand->ainc_use = NULL;
3247
3248	cand->orig_iv = orig_iv;
3249	if (dump_file && (dump_flags & TDF_DETAILS))
3250	dump_cand (file: dump_file, cand);
3251	}
3252
3253	cand->important \|= important;
3254	cand->doloop_p \|= doloop;
3255
3256	/ Relate candidate to the group for which it is added. /
3257	if (use)
3258	bitmap_set_bit (data->vgroups [use->group_id]->related_cands, i);
3259
3260	return cand;
3261	}
3262
3263	/ Returns true if incrementing the induction variable at the end of the LOOP*
3264	is allowed.
3265
3266	The purpose is to avoid splitting latch edge with a biv increment, thus
3267	creating a jump, possibly confusing other optimization passes and leaving
3268	less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3269	available (so we do not have a better alternative), or if the latch edge
3270	is already nonempty. /*
3271
3272	static bool
3273	allow_ip_end_pos_p (class loop *loop)
3274	{
3275	if (!ip_normal_pos (loop))
3276	return true;
3277
3278	if (!empty_block_p (ip_end_pos (loop)))
3279	return true;
3280
3281	return false;
3282	}
3283
3284	/ If possible, adds autoincrement candidates BASE + STEP * i based on use USE.*
3285	Important field is set to IMPORTANT. /*
3286
3287	static void
3288	add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3289	bool important, struct iv_use *use)
3290	{
3291	basic_block use_bb = gimple_bb (g: use->stmt);
3292	machine_mode mem_mode;
3293	unsigned HOST_WIDE_INT cstepi;
3294
3295	/ If we insert the increment in any position other than the standard*
3296	ones, we must ensure that it is incremented once per iteration.
3297	It must not be in an inner nested loop, or one side of an if
3298	statement. /*
3299	if (use_bb->loop_father != data->current_loop
3300	\|\| !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3301	\|\| stmt_can_throw_internal (cfun, use->stmt)
3302	\|\| !cst_and_fits_in_hwi (step))
3303	return;
3304
3305	cstepi = int_cst_value (step);
3306
3307	mem_mode = TYPE_MODE (use->mem_type);
3308	if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3309	\|\| USE_STORE_PRE_INCREMENT (mem_mode))
3310	&& known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3311	\|\| ((USE_LOAD_PRE_DECREMENT (mem_mode)
3312	\|\| USE_STORE_PRE_DECREMENT (mem_mode))
3313	&& known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3314	{
3315	enum tree_code code = MINUS_EXPR;
3316	tree new_base;
3317	tree new_step = step;
3318
3319	if (POINTER_TYPE_P (TREE_TYPE (base)))
3320	{
3321	new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3322	code = POINTER_PLUS_EXPR;
3323	}
3324	else
3325	new_step = fold_convert (TREE_TYPE (base), new_step);
3326	new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3327	add_candidate_1 (data, base: new_base, step, important, pos: IP_BEFORE_USE, use,
3328	incremented_at: use->stmt);
3329	}
3330	if (((USE_LOAD_POST_INCREMENT (mem_mode)
3331	\|\| USE_STORE_POST_INCREMENT (mem_mode))
3332	&& known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3333	\|\| ((USE_LOAD_POST_DECREMENT (mem_mode)
3334	\|\| USE_STORE_POST_DECREMENT (mem_mode))
3335	&& known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3336	{
3337	add_candidate_1 (data, base, step, important, pos: IP_AFTER_USE, use,
3338	incremented_at: use->stmt);
3339	}
3340	}
3341
3342	/ Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and*
3343	position to POS. If USE is not NULL, the candidate is set as related to
3344	it. The candidate computation is scheduled before exit condition and at
3345	the end of loop. /*
3346
3347	static void
3348	add_candidate (struct ivopts_data data, tree base, tree step, bool* important,
3349	struct iv_use use, struct* iv *orig_iv = NULL,
3350	bool doloop = false)
3351	{
3352	if (ip_normal_pos (data->current_loop))
3353	add_candidate_1 (data, base, step, important, pos: IP_NORMAL, use, NULL, orig_iv,
3354	doloop);
3355	/ Exclude doloop candidate here since it requires decrement then comparison*
3356	and jump, the IP_END position doesn't match. /*
3357	if (!doloop && ip_end_pos (data->current_loop)
3358	&& allow_ip_end_pos_p (loop: data->current_loop))
3359	add_candidate_1 (data, base, step, important, pos: IP_END, use, NULL, orig_iv);
3360	}
3361
3362	/ Adds standard iv candidates. /
3363
3364	static void
3365	add_standard_iv_candidates (struct ivopts_data *data)
3366	{
3367	add_candidate (data, integer_zero_node, integer_one_node, important: true, NULL);
3368
3369	/ The same for a double-integer type if it is still fast enough. /
3370	if (TYPE_PRECISION
3371	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3372	&& TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3373	add_candidate (data, base: build_int_cst (long_integer_type_node, `0`),
3374	step: build_int_cst (long_integer_type_node, `1`), important: true, NULL);
3375
3376	/ The same for a double-integer type if it is still fast enough. /
3377	if (TYPE_PRECISION
3378	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3379	&& TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3380	add_candidate (data, base: build_int_cst (long_long_integer_type_node, `0`),
3381	step: build_int_cst (long_long_integer_type_node, `1`), important: true, NULL);
3382	}
3383
3384
3385	/ Adds candidates bases on the old induction variable IV. /
3386
3387	static void
3388	add_iv_candidate_for_biv (struct ivopts_data data, struct* iv *iv)
3389	{
3390	gimple *phi;
3391	tree def;
3392	struct iv_cand *cand;
3393
3394	/ Check if this biv is used in address type use. /
3395	if (iv->no_overflow && iv->have_address_use
3396	&& INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3397	&& TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3398	{
3399	tree base = fold_convert (sizetype, iv->base);
3400	tree step = fold_convert (sizetype, iv->step);
3401
3402	/ Add iv cand of same precision as index part in TARGET_MEM_REF. /
3403	add_candidate (data, base, step, important: true, NULL, orig_iv: iv);
3404	/ Add iv cand of the original type only if it has nonlinear use. /
3405	if (iv->nonlin_use)
3406	add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3407	}
3408	else
3409	add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3410
3411	/ The same, but with initial value zero. /
3412	if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3413	add_candidate (data, size_int (`0`), step: iv->step, important: true, NULL);
3414	else
3415	add_candidate (data, base: build_int_cst (TREE_TYPE (iv->base), `0`),
3416	step: iv->step, important: true, NULL);
3417
3418	phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3419	if (gimple_code (g: phi) == GIMPLE_PHI)
3420	{
3421	/ Additionally record the possibility of leaving the original iv*
3422	untouched. /*
3423	def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3424	/ Don't add candidate if it's from another PHI node because*
3425	it's an affine iv appearing in the form of PEELED_CHREC. /*
3426	phi = SSA_NAME_DEF_STMT (def);
3427	if (gimple_code (g: phi) != GIMPLE_PHI)
3428	{
3429	cand = add_candidate_1 (data,
3430	base: iv->base, step: iv->step, important: true, pos: IP_ORIGINAL, NULL,
3431	SSA_NAME_DEF_STMT (def));
3432	if (cand)
3433	{
3434	cand->var_before = iv->ssa_name;
3435	cand->var_after = def;
3436	}
3437	}
3438	else
3439	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3440	}
3441	}
3442
3443	/ Adds candidates based on the old induction variables. /
3444
3445	static void
3446	add_iv_candidate_for_bivs (struct ivopts_data *data)
3447	{
3448	unsigned i;
3449	struct iv *iv;
3450	bitmap_iterator bi;
3451
3452	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
3453	{
3454	iv = ver_info (data, ver: i)->iv;
3455	if (iv && iv->biv_p && !integer_zerop (iv->step))
3456	add_iv_candidate_for_biv (data, iv);
3457	}
3458	}
3459
3460	/ Record common candidate {BASE, STEP} derived from USE in hashtable. /
3461
3462	static void
3463	record_common_cand (struct ivopts_data *data, tree base,
3464	tree step, struct iv_use *use)
3465	{
3466	class iv_common_cand ent;
3467	class iv_common_cand **slot;
3468
3469	ent.base = base;
3470	ent.step = step;
3471	ent.hash = iterative_hash_expr (tree: base, seed: `0`);
3472	ent.hash = iterative_hash_expr (tree: step, seed: ent.hash);
3473
3474	slot = data->iv_common_cand_tab->find_slot (value: &ent, insert: INSERT);
3475	if (*slot == NULL)
3476	{
3477	slot = new* iv_common_cand ();
3478	(*slot)->base = base;
3479	(*slot)->step = step;
3480	(*slot)->uses.create (nelems: `8`);
3481	(*slot)->hash = ent.hash;
3482	data->iv_common_cands.safe_push (obj: (*slot));
3483	}
3484
3485	gcc_assert (use != NULL);
3486	(*slot)->uses.safe_push (obj: use);
3487	return;
3488	}
3489
3490	/ Comparison function used to sort common candidates. /
3491
3492	static int
3493	common_cand_cmp (const void p1, const* void *p2)
3494	{
3495	unsigned n1, n2;
3496	const class iv_common_cand *const *const ccand1
3497	= (const class iv_common_cand *const *)p1;
3498	const class iv_common_cand *const *const ccand2
3499	= (const class iv_common_cand *const *)p2;
3500
3501	n1 = (*ccand1)->uses.length ();
3502	n2 = (*ccand2)->uses.length ();
3503	return n2 - n1;
3504	}
3505
3506	/ Adds IV candidates based on common candidated recorded. /
3507
3508	static void
3509	add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3510	{
3511	unsigned i, j;
3512	struct iv_cand cand_1, cand_2;
3513
3514	data->iv_common_cands.qsort (common_cand_cmp);
3515	for (i = `0`; i < data->iv_common_cands.length (); i++)
3516	{
3517	class iv_common_cand *ptr = data->iv_common_cands [i];
3518
3519	/ Only add IV candidate if it's derived from multiple uses. /
3520	if (ptr->uses.length () <= `1`)
3521	break;
3522
3523	cand_1 = NULL;
3524	cand_2 = NULL;
3525	if (ip_normal_pos (data->current_loop))
3526	cand_1 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3527	important: false, pos: IP_NORMAL, NULL, NULL);
3528
3529	if (ip_end_pos (data->current_loop)
3530	&& allow_ip_end_pos_p (loop: data->current_loop))
3531	cand_2 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3532	important: false, pos: IP_END, NULL, NULL);
3533
3534	/ Bind deriving uses and the new candidates. /
3535	for (j = `0`; j < ptr->uses.length (); j++)
3536	{
3537	struct iv_group *group = data->vgroups [ptr->uses [j]->group_id];
3538	if (cand_1)
3539	bitmap_set_bit (group->related_cands, cand_1->id);
3540	if (cand_2)
3541	bitmap_set_bit (group->related_cands, cand_2->id);
3542	}
3543	}
3544
3545	/ Release data since it is useless from this point. /
3546	data->iv_common_cand_tab->empty ();
3547	data->iv_common_cands.truncate (size: `0`);
3548	}
3549
3550	/ Adds candidates based on the value of USE's iv. /
3551
3552	static void
3553	add_iv_candidate_for_use (struct ivopts_data data, struct* iv_use *use)
3554	{
3555	poly_uint64 offset;
3556	tree base;
3557	struct iv *iv = use->iv;
3558	tree basetype = TREE_TYPE (iv->base);
3559
3560	/ Don't add candidate for iv_use with non integer, pointer or non-mode*
3561	precision types, instead, add candidate for the corresponding scev in
3562	unsigned type with the same precision. See PR93674 for more info. /*
3563	if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3564	\|\| !type_has_mode_precision_p (t: basetype))
3565	{
3566	basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3567	TYPE_UNSIGNED (basetype));
3568	add_candidate (data, fold_convert (basetype, iv->base),
3569	fold_convert (basetype, iv->step), important: false, NULL);
3570	return;
3571	}
3572
3573	add_candidate (data, base: iv->base, step: iv->step, important: false, use);
3574
3575	/ Record common candidate for use in case it can be shared by others. /
3576	record_common_cand (data, base: iv->base, step: iv->step, use);
3577
3578	/ Record common candidate with initial value zero. /
3579	basetype = TREE_TYPE (iv->base);
3580	if (POINTER_TYPE_P (basetype))
3581	basetype = sizetype;
3582	record_common_cand (data, base: build_int_cst (basetype, `0`), step: iv->step, use);
3583
3584	/ Compare the cost of an address with an unscaled index with the cost of*
3585	an address with a scaled index and add candidate if useful. /*
3586	poly_int64 step;
3587	if (use != NULL
3588	&& poly_int_tree_p (t: iv->step, value: &step)
3589	&& address_p (type: use->type))
3590	{
3591	poly_int64 new_step;
3592	unsigned int fact = preferred_mem_scale_factor
3593	(base: use->iv->base,
3594	TYPE_MODE (use->mem_type),
3595	speed: optimize_loop_for_speed_p (data->current_loop));
3596
3597	if (fact != `1`
3598	&& multiple_p (a: step, b: fact, multiple: &new_step))
3599	add_candidate (data, size_int (`0`),
3600	step: wide_int_to_tree (sizetype, cst: new_step),
3601	important: true, NULL);
3602	}
3603
3604	/ Record common candidate with constant offset stripped in base.*
3605	Like the use itself, we also add candidate directly for it. /*
3606	base = strip_offset (expr: iv->base, offset: &offset);
3607	if (maybe_ne (a: offset, b: `0U`) \|\| base != iv->base)
3608	{
3609	record_common_cand (data, base, step: iv->step, use);
3610	add_candidate (data, base, step: iv->step, important: false, use);
3611	}
3612
3613	/ Record common candidate with base_object removed in base. /
3614	base = iv->base;
3615	STRIP_NOPS (base);
3616	if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3617	{
3618	tree step = iv->step;
3619
3620	STRIP_NOPS (step);
3621	base = TREE_OPERAND (base, `1`);
3622	step = fold_convert (sizetype, step);
3623	record_common_cand (data, base, step, use);
3624	/ Also record common candidate with offset stripped. /
3625	tree alt_base, alt_offset;
3626	split_constant_offset (base, &alt_base, &alt_offset);
3627	if (!integer_zerop (alt_offset))
3628	record_common_cand (data, base: alt_base, step, use);
3629	}
3630
3631	/ At last, add auto-incremental candidates. Make such variables*
3632	important since other iv uses with same base object may be based
3633	on it. /*
3634	if (use != NULL && address_p (type: use->type))
3635	add_autoinc_candidates (data, base: iv->base, step: iv->step, important: true, use);
3636	}
3637
3638	/ Adds candidates based on the uses. /
3639
3640	static void
3641	add_iv_candidate_for_groups (struct ivopts_data *data)
3642	{
3643	unsigned i;
3644
3645	/ Only add candidate for the first use in group. /
3646	for (i = `0`; i < data->vgroups.length (); i++)
3647	{
3648	struct iv_group *group = data->vgroups [i];
3649
3650	gcc_assert (group->vuses[`0`] != NULL);
3651	add_iv_candidate_for_use (data, use: group->vuses [`0`]);
3652	}
3653	add_iv_candidate_derived_from_uses (data);
3654	}
3655
3656	/ Record important candidates and add them to related_cands bitmaps. /
3657
3658	static void
3659	record_important_candidates (struct ivopts_data *data)
3660	{
3661	unsigned i;
3662	struct iv_group *group;
3663
3664	for (i = `0`; i < data->vcands.length (); i++)
3665	{
3666	struct iv_cand *cand = data->vcands [i];
3667
3668	if (cand->important)
3669	bitmap_set_bit (data->important_candidates, i);
3670	}
3671
3672	data->consider_all_candidates = (data->vcands.length ()
3673	<= CONSIDER_ALL_CANDIDATES_BOUND);
3674
3675	/ Add important candidates to groups' related_cands bitmaps. /
3676	for (i = `0`; i < data->vgroups.length (); i++)
3677	{
3678	group = data->vgroups [i];
3679	bitmap_ior_into (group->related_cands, data->important_candidates);
3680	}
3681	}
3682
3683	/ Allocates the data structure mapping the (use, candidate) pairs to costs.*
3684	If consider_all_candidates is true, we use a two-dimensional array, otherwise
3685	we allocate a simple list to every use. /*
3686
3687	static void
3688	alloc_use_cost_map (struct ivopts_data *data)
3689	{
3690	unsigned i, size, s;
3691
3692	for (i = `0`; i < data->vgroups.length (); i++)
3693	{
3694	struct iv_group *group = data->vgroups [i];
3695
3696	if (data->consider_all_candidates)
3697	size = data->vcands.length ();
3698	else
3699	{
3700	s = bitmap_count_bits (group->related_cands);
3701
3702	/ Round up to the power of two, so that moduling by it is fast. /
3703	size = s ? (`1` << ceil_log2 (x: s)) : `1`;
3704	}
3705
3706	group->n_map_members = size;
3707	group->cost_map = XCNEWVEC (class cost_pair, size);
3708	}
3709	}
3710
3711	/ Sets cost of (GROUP, CAND) pair to COST and record that it depends*
3712	on invariants INV_VARS and that the value used in expressing it is
3713	VALUE, and in case of iv elimination the comparison operator is COMP. /*
3714
3715	static void
3716	set_group_iv_cost (struct ivopts_data *data,
3717	struct iv_group group, struct* iv_cand *cand,
3718	comp_cost cost, bitmap inv_vars, tree value,
3719	enum tree_code comp, bitmap inv_exprs)
3720	{
3721	unsigned i, s;
3722
3723	if (cost.infinite_cost_p ())
3724	{
3725	BITMAP_FREE (inv_vars);
3726	BITMAP_FREE (inv_exprs);
3727	return;
3728	}
3729
3730	if (data->consider_all_candidates)
3731	{
3732	group->cost_map[cand->id].cand = cand;
3733	group->cost_map[cand->id].cost = cost;
3734	group->cost_map[cand->id].inv_vars = inv_vars;
3735	group->cost_map[cand->id].inv_exprs = inv_exprs;
3736	group->cost_map[cand->id].value = value;
3737	group->cost_map[cand->id].comp = comp;
3738	return;
3739	}
3740
3741	/ n_map_members is a power of two, so this computes modulo. /
3742	s = cand->id & (group->n_map_members - `1`);
3743	for (i = s; i < group->n_map_members; i++)
3744	if (!group->cost_map[i].cand)
3745	goto found;
3746	for (i = `0`; i < s; i++)
3747	if (!group->cost_map[i].cand)
3748	goto found;
3749
3750	gcc_unreachable ();
3751
3752	found:
3753	group->cost_map[i].cand = cand;
3754	group->cost_map[i].cost = cost;
3755	group->cost_map[i].inv_vars = inv_vars;
3756	group->cost_map[i].inv_exprs = inv_exprs;
3757	group->cost_map[i].value = value;
3758	group->cost_map[i].comp = comp;
3759	}
3760
3761	/ Gets cost of (GROUP, CAND) pair. /
3762
3763	static class cost_pair *
3764	get_group_iv_cost (struct ivopts_data data, struct* iv_group *group,
3765	struct iv_cand *cand)
3766	{
3767	unsigned i, s;
3768	class cost_pair *ret;
3769
3770	if (!cand)
3771	return NULL;
3772
3773	if (data->consider_all_candidates)
3774	{
3775	ret = group->cost_map + cand->id;
3776	if (!ret->cand)
3777	return NULL;
3778
3779	return ret;
3780	}
3781
3782	/ n_map_members is a power of two, so this computes modulo. /
3783	s = cand->id & (group->n_map_members - `1`);
3784	for (i = s; i < group->n_map_members; i++)
3785	if (group->cost_map[i].cand == cand)
3786	return group->cost_map + i;
3787	else if (group->cost_map[i].cand == NULL)
3788	return NULL;
3789	for (i = `0`; i < s; i++)
3790	if (group->cost_map[i].cand == cand)
3791	return group->cost_map + i;
3792	else if (group->cost_map[i].cand == NULL)
3793	return NULL;
3794
3795	return NULL;
3796	}
3797
3798	/ Produce DECL_RTL for object obj so it looks like it is stored in memory. /
3799	static rtx
3800	produce_memory_decl_rtl (tree obj, int *regno)
3801	{
3802	addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3803	machine_mode address_mode = targetm.addr_space.address_mode (as);
3804	rtx x;
3805
3806	gcc_assert (obj);
3807	if (TREE_STATIC (obj) \|\| DECL_EXTERNAL (obj))
3808	{
3809	const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3810	x = gen_rtx_SYMBOL_REF (address_mode, name);
3811	SET_SYMBOL_REF_DECL (x, obj);
3812	x = gen_rtx_MEM (DECL_MODE (obj), x);
3813	set_mem_addr_space (x, as);
3814	targetm.encode_section_info (obj, x, true);
3815	}
3816	else
3817	{
3818	x = gen_raw_REG (address_mode, (*regno)++);
3819	x = gen_rtx_MEM (DECL_MODE (obj), x);
3820	set_mem_addr_space (x, as);
3821	}
3822
3823	return x;
3824	}
3825
3826	/ Prepares decl_rtl for variables referred in EXPR_P. Callback for
3827	walk_tree. DATA contains the actual fake register number. /*
3828
3829	static tree
3830	prepare_decl_rtl (tree expr_p, int* ws, void* *data)
3831	{
3832	tree obj = NULL_TREE;
3833	rtx x = NULL_RTX;
3834	int regno = (int* *) data;
3835
3836	switch (TREE_CODE (*expr_p))
3837	{
3838	case ADDR_EXPR:
3839	for (expr_p = &TREE_OPERAND (*expr_p, `0`);
3840	handled_component_p (t: *expr_p);
3841	expr_p = &TREE_OPERAND (*expr_p, `0`))
3842	continue;
3843	obj = *expr_p;
3844	if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3845	x = produce_memory_decl_rtl (obj, regno);
3846	break;
3847
3848	case SSA_NAME:
3849	*ws = `0`;
3850	obj = SSA_NAME_VAR (*expr_p);
3851	/ Defer handling of anonymous SSA_NAMEs to the expander. /
3852	if (!obj)
3853	return NULL_TREE;
3854	if (!DECL_RTL_SET_P (obj))
3855	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3856	break;
3857
3858	case VAR_DECL:
3859	case PARM_DECL:
3860	case RESULT_DECL:
3861	*ws = `0`;
3862	obj = *expr_p;
3863
3864	if (DECL_RTL_SET_P (obj))
3865	break;
3866
3867	if (DECL_MODE (obj) == BLKmode)
3868	x = produce_memory_decl_rtl (obj, regno);
3869	else
3870	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3871
3872	break;
3873
3874	default:
3875	break;
3876	}
3877
3878	if (x)
3879	{
3880	decl_rtl_to_reset.safe_push (obj);
3881	SET_DECL_RTL (obj, x);
3882	}
3883
3884	return NULL_TREE;
3885	}
3886
3887	/ Predict whether the given loop will be transformed in the RTL*
3888	doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3889	This is only for target independent checks, see targetm.predict_doloop_p
3890	for the target dependent ones.
3891
3892	Note that according to some initial investigation, some checks like costly
3893	niter check and invalid stmt scanning don't have much gains among general
3894	cases, so keep this as simple as possible first.
3895
3896	Some RTL specific checks seems unable to be checked in gimple, if any new
3897	checks or easy checks _are_ missing here, please add them. /*
3898
3899	static bool
3900	generic_predict_doloop_p (struct ivopts_data *data)
3901	{
3902	class loop *loop = data->current_loop;
3903
3904	/ Call target hook for target dependent checks. /
3905	if (!targetm.predict_doloop_p (loop))
3906	{
3907	if (dump_file && (dump_flags & TDF_DETAILS))
3908	fprintf (stream: dump_file, format: "Predict doloop failure due to"
3909	" target specific checks.\n");
3910	return false;
3911	}
3912
3913	/ Similar to doloop_optimize, check iteration description to know it's*
3914	suitable or not. Keep it as simple as possible, feel free to extend it
3915	if you find any multiple exits cases matter. /*
3916	edge exit = single_dom_exit (loop);
3917	class tree_niter_desc *niter_desc;
3918	if (!exit \|\| !(niter_desc = niter_for_exit (data, exit)))
3919	{
3920	if (dump_file && (dump_flags & TDF_DETAILS))
3921	fprintf (stream: dump_file, format: "Predict doloop failure due to"
3922	" unexpected niters.\n");
3923	return false;
3924	}
3925
3926	/ Similar to doloop_optimize, check whether iteration count too small*
3927	and not profitable. /*
3928	HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3929	if (est_niter == -`1`)
3930	est_niter = get_likely_max_loop_iterations_int (loop);
3931	if (est_niter >= `0` && est_niter < `3`)
3932	{
3933	if (dump_file && (dump_flags & TDF_DETAILS))
3934	fprintf (stream: dump_file,
3935	format: "Predict doloop failure due to"
3936	" too few iterations (%u).\n",
3937	(unsigned int) est_niter);
3938	return false;
3939	}
3940
3941	return true;
3942	}
3943
3944	/ Determines cost of the computation of EXPR. /
3945
3946	static unsigned
3947	computation_cost (tree expr, bool speed)
3948	{
3949	rtx_insn *seq;
3950	rtx rslt;
3951	tree type = TREE_TYPE (expr);
3952	unsigned cost;
3953	/ Avoid using hard regs in ways which may be unsupported. /
3954	int regno = LAST_VIRTUAL_REGISTER + `1`;
3955	struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
3956	enum node_frequency real_frequency = node->frequency;
3957
3958	node->frequency = NODE_FREQUENCY_NORMAL;
3959	crtl->maybe_hot_insn_p = speed;
3960	walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3961	start_sequence ();
3962	rslt = expand_expr (exp: expr, NULL_RTX, TYPE_MODE (type), modifier: EXPAND_NORMAL);
3963	seq = get_insns ();
3964	end_sequence ();
3965	default_rtl_profile ();
3966	node->frequency = real_frequency;
3967
3968	cost = seq_cost (seq, speed);
3969	if (MEM_P (rslt))
3970	cost += address_cost (XEXP (rslt, `0`), TYPE_MODE (type),
3971	TYPE_ADDR_SPACE (type), speed);
3972	else if (!REG_P (rslt))
3973	cost += set_src_cost (x: rslt, TYPE_MODE (type), speed_p: speed);
3974
3975	return cost;
3976	}
3977
3978	/ Returns variable containing the value of candidate CAND at statement AT. /
3979
3980	static tree
3981	var_at_stmt (class loop loop, struct* iv_cand cand, gimple stmt)
3982	{
3983	if (stmt_after_increment (loop, cand, stmt))
3984	return cand->var_after;
3985	else
3986	return cand->var_before;
3987	}
3988
3989	/ If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the*
3990	same precision that is at least as wide as the precision of TYPE, stores
3991	BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3992	type of A and B. /*
3993
3994	static tree
3995	determine_common_wider_type (tree a, tree b)
3996	{
3997	tree wider_type = NULL;
3998	tree suba, subb;
3999	tree atype = TREE_TYPE (*a);
4000
4001	if (CONVERT_EXPR_P (*a))
4002	{
4003	suba = TREE_OPERAND (*a, `0`);
4004	wider_type = TREE_TYPE (suba);
4005	if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
4006	return atype;
4007	}
4008	else
4009	return atype;
4010
4011	if (CONVERT_EXPR_P (*b))
4012	{
4013	subb = TREE_OPERAND (*b, `0`);
4014	if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
4015	return atype;
4016	}
4017	else
4018	return atype;
4019
4020	*a = suba;
4021	*b = subb;
4022	return wider_type;
4023	}
4024
4025	/ Determines the expression by that USE is expressed from induction variable*
4026	CAND at statement AT in LOOP. The expression is stored in two parts in a
4027	decomposed form. The invariant part is stored in AFF_INV; while variant
4028	part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
4029	non-null. Returns false if USE cannot be expressed using CAND. /*
4030
4031	static bool
4032	get_computation_aff_1 (class loop loop, gimple at, struct iv_use *use,
4033	struct iv_cand cand, class* aff_tree *aff_inv,
4034	class aff_tree aff_var, widest_int prat = NULL)
4035	{
4036	tree ubase = use->iv->base, ustep = use->iv->step;
4037	tree cbase = cand->iv->base, cstep = cand->iv->step;
4038	tree common_type, uutype, var, cstep_common;
4039	tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4040	aff_tree aff_cbase;
4041	widest_int rat;
4042
4043	/ We must have a precision to express the values of use. /
4044	if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4045	return false;
4046
4047	var = var_at_stmt (loop, cand, stmt: at);
4048	uutype = unsigned_type_for (utype);
4049
4050	/ If the conversion is not noop, perform it. /
4051	if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4052	{
4053	if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4054	&& (CONVERT_EXPR_P (cstep) \|\| poly_int_tree_p (t: cstep)))
4055	{
4056	tree inner_base, inner_step, inner_type;
4057	inner_base = TREE_OPERAND (cbase, `0`);
4058	if (CONVERT_EXPR_P (cstep))
4059	inner_step = TREE_OPERAND (cstep, `0`);
4060	else
4061	inner_step = cstep;
4062
4063	inner_type = TREE_TYPE (inner_base);
4064	/ If candidate is added from a biv whose type is smaller than*
4065	ctype, we know both candidate and the biv won't overflow.
4066	In this case, it's safe to skip the convertion in candidate.
4067	As an example, (unsigned short)((unsigned long)A) equals to
4068	(unsigned short)A, if A has a type no larger than short. /*
4069	if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4070	{
4071	cbase = inner_base;
4072	cstep = inner_step;
4073	}
4074	}
4075	cbase = fold_convert (uutype, cbase);
4076	cstep = fold_convert (uutype, cstep);
4077	var = fold_convert (uutype, var);
4078	}
4079
4080	/ Ratio is 1 when computing the value of biv cand by itself.*
4081	We can't rely on constant_multiple_of in this case because the
4082	use is created after the original biv is selected. The call
4083	could fail because of inconsistent fold behavior. See PR68021
4084	for more information. /*
4085	if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4086	{
4087	gcc_assert (is_gimple_assign (use->stmt));
4088	gcc_assert (use->iv->ssa_name == cand->var_after);
4089	gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4090	rat = `1`;
4091	}
4092	else if (!constant_multiple_of (top: ustep, bot: cstep, mul: &rat))
4093	return false;
4094
4095	if (prat)
4096	*prat = rat;
4097
4098	/ In case both UBASE and CBASE are shortened to UUTYPE from some common*
4099	type, we achieve better folding by computing their difference in this
4100	wider type, and cast the result to UUTYPE. We do not need to worry about
4101	overflows, as all the arithmetics will in the end be performed in UUTYPE
4102	anyway. /*
4103	common_type = determine_common_wider_type (a: &ubase, b: &cbase);
4104
4105	/ use = ubase - ratio * cbase + ratio * var. /
4106	tree_to_aff_combination (ubase, common_type, aff_inv);
4107	tree_to_aff_combination (cbase, common_type, &aff_cbase);
4108	tree_to_aff_combination (var, uutype, aff_var);
4109
4110	/ We need to shift the value if we are after the increment. /
4111	if (stmt_after_increment (loop, cand, stmt: at))
4112	{
4113	aff_tree cstep_aff;
4114
4115	if (common_type != uutype)
4116	cstep_common = fold_convert (common_type, cstep);
4117	else
4118	cstep_common = cstep;
4119
4120	tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4121	aff_combination_add (&aff_cbase, &cstep_aff);
4122	}
4123
4124	aff_combination_scale (&aff_cbase, -rat);
4125	aff_combination_add (aff_inv, &aff_cbase);
4126	if (common_type != uutype)
4127	aff_combination_convert (aff_inv, uutype);
4128
4129	aff_combination_scale (aff_var, rat);
4130	return true;
4131	}
4132
4133	/ Determines the expression by that USE is expressed from induction variable*
4134	CAND at statement AT in LOOP. The expression is stored in a decomposed
4135	form into AFF. Returns false if USE cannot be expressed using CAND. /*
4136
4137	static bool
4138	get_computation_aff (class loop loop, gimple at, struct iv_use *use,
4139	struct iv_cand cand, class* aff_tree *aff)
4140	{
4141	aff_tree aff_var;
4142
4143	if (!get_computation_aff_1 (loop, at, use, cand, aff_inv: aff, aff_var: &aff_var))
4144	return false;
4145
4146	aff_combination_add (aff, &aff_var);
4147	return true;
4148	}
4149
4150	/ Return the type of USE. /
4151
4152	static tree
4153	get_use_type (struct iv_use *use)
4154	{
4155	tree base_type = TREE_TYPE (use->iv->base);
4156	tree type;
4157
4158	if (use->type == USE_REF_ADDRESS)
4159	{
4160	/ The base_type may be a void pointer. Create a pointer type based on*
4161	the mem_ref instead. /*
4162	type = build_pointer_type (TREE_TYPE (*use->op_p));
4163	gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4164	== TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4165	}
4166	else
4167	type = base_type;
4168
4169	return type;
4170	}
4171
4172	/ Determines the expression by that USE is expressed from induction variable*
4173	CAND at statement AT in LOOP. The computation is unshared. /*
4174
4175	static tree
4176	get_computation_at (class loop loop, gimple at,
4177	struct iv_use use, struct* iv_cand *cand)
4178	{
4179	aff_tree aff;
4180	tree type = get_use_type (use);
4181
4182	if (!get_computation_aff (loop, at, use, cand, aff: &aff))
4183	return NULL_TREE;
4184	unshare_aff_combination (&aff);
4185	return fold_convert (type, aff_combination_to_tree (&aff));
4186	}
4187
4188	/ Like get_computation_at, but try harder, even if the computation*
4189	is more expensive. Intended for debug stmts. /*
4190
4191	static tree
4192	get_debug_computation_at (class loop loop, gimple at,
4193	struct iv_use use, struct* iv_cand *cand)
4194	{
4195	if (tree ret = get_computation_at (loop, at, use, cand))
4196	return ret;
4197
4198	tree ubase = use->iv->base, ustep = use->iv->step;
4199	tree cbase = cand->iv->base, cstep = cand->iv->step;
4200	tree var;
4201	tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4202	widest_int rat;
4203
4204	/ We must have a precision to express the values of use. /
4205	if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4206	return NULL_TREE;
4207
4208	/ Try to handle the case that get_computation_at doesn't,*
4209	try to express
4210	use = ubase + (var - cbase) / ratio. /*
4211	if (!constant_multiple_of (top: cstep, fold_convert (TREE_TYPE (cstep), ustep),
4212	mul: &rat))
4213	return NULL_TREE;
4214
4215	bool neg_p = false;
4216	if (wi::neg_p (x: rat))
4217	{
4218	if (TYPE_UNSIGNED (ctype))
4219	return NULL_TREE;
4220	neg_p = true;
4221	rat = wi::neg (x: rat);
4222	}
4223
4224	/ If both IVs can wrap around and CAND doesn't have a power of two step,*
4225	it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4226	the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4227	uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4228	... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. /*
4229	if (!use->iv->no_overflow
4230	&& !cand->iv->no_overflow
4231	&& !integer_pow2p (cstep))
4232	return NULL_TREE;
4233
4234	int bits = wi::exact_log2 (rat);
4235	if (bits == -`1`)
4236	bits = wi::floor_log2 (rat) + `1`;
4237	if (!cand->iv->no_overflow
4238	&& TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4239	return NULL_TREE;
4240
4241	var = var_at_stmt (loop, cand, stmt: at);
4242
4243	if (POINTER_TYPE_P (ctype))
4244	{
4245	ctype = unsigned_type_for (ctype);
4246	cbase = fold_convert (ctype, cbase);
4247	cstep = fold_convert (ctype, cstep);
4248	var = fold_convert (ctype, var);
4249	}
4250
4251	if (stmt_after_increment (loop, cand, stmt: at))
4252	var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4253	unshare_expr (cstep));
4254
4255	var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4256	var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4257	wide_int_to_tree (TREE_TYPE (var), rat));
4258	if (POINTER_TYPE_P (utype))
4259	{
4260	var = fold_convert (sizetype, var);
4261	if (neg_p)
4262	var = fold_build1 (NEGATE_EXPR, sizetype, var);
4263	var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4264	}
4265	else
4266	{
4267	var = fold_convert (utype, var);
4268	var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4269	ubase, var);
4270	}
4271	return var;
4272	}
4273
4274	/ Adjust the cost COST for being in loop setup rather than loop body.*
4275	If we're optimizing for space, the loop setup overhead is constant;
4276	if we're optimizing for speed, amortize it over the per-iteration cost.
4277	If ROUND_UP_P is true, the result is round up rather than to zero when
4278	optimizing for speed. /*
4279	static int64_t
4280	adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4281	bool round_up_p = false)
4282	{
4283	if (cost == INFTY)
4284	return cost;
4285	else if (optimize_loop_for_speed_p (data->current_loop))
4286	{
4287	int64_t niters = (int64_t) avg_loop_niter (loop: data->current_loop);
4288	return (cost + (round_up_p ? niters - `1` : `0`)) / niters;
4289	}
4290	else
4291	return cost;
4292	}
4293
4294	/ Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the*
4295	EXPR operand holding the shift. COST0 and COST1 are the costs for
4296	calculating the operands of EXPR. Returns true if successful, and returns
4297	the cost in COST. /*
4298
4299	static bool
4300	get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4301	comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4302	{
4303	comp_cost res;
4304	tree op1 = TREE_OPERAND (expr, `1`);
4305	tree cst = TREE_OPERAND (mult, `1`);
4306	tree multop = TREE_OPERAND (mult, `0`);
4307	int m = exact_log2 (x: int_cst_value (cst));
4308	int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4309	int as_cost, sa_cost;
4310	bool mult_in_op1;
4311
4312	if (!(m >= `0` && m < maxm))
4313	return false;
4314
4315	STRIP_NOPS (op1);
4316	mult_in_op1 = operand_equal_p (op1, mult, flags: `0`);
4317
4318	as_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m);
4319
4320	/ If the target has a cheap shift-and-add or shift-and-sub instruction,*
4321	use that in preference to a shift insn followed by an add insn. /*
4322	sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4323	? shiftadd_cost (speed, mode, bits: m)
4324	: (mult_in_op1
4325	? shiftsub1_cost (speed, mode, bits: m)
4326	: shiftsub0_cost (speed, mode, bits: m)));
4327
4328	res = comp_cost (MIN (as_cost, sa_cost), `0`);
4329	res += (mult_in_op1 ? cost0 : cost1);
4330
4331	STRIP_NOPS (multop);
4332	if (!is_gimple_val (multop))
4333	res += force_expr_to_var_cost (multop, speed);
4334
4335	*cost = res;
4336	return true;
4337	}
4338
4339	/ Estimates cost of forcing expression EXPR into a variable. /
4340
4341	static comp_cost
4342	force_expr_to_var_cost (tree expr, bool speed)
4343	{
4344	static bool costs_initialized = false;
4345	static unsigned integer_cost [`2`];
4346	static unsigned symbol_cost [`2`];
4347	static unsigned address_cost [`2`];
4348	tree op0, op1;
4349	comp_cost cost0, cost1, cost;
4350	machine_mode mode;
4351	scalar_int_mode int_mode;
4352
4353	if (!costs_initialized)
4354	{
4355	tree type = build_pointer_type (integer_type_node);
4356	tree var, addr;
4357	rtx x;
4358	int i;
4359
4360	var = create_tmp_var_raw (integer_type_node, "test_var");
4361	TREE_STATIC (var) = `1`;
4362	x = produce_memory_decl_rtl (obj: var, NULL);
4363	SET_DECL_RTL (var, x);
4364
4365	addr = build1 (ADDR_EXPR, type, var);
4366
4367
4368	for (i = `0`; i < `2`; i++)
4369	{
4370	integer_cost[i] = computation_cost (expr: build_int_cst (integer_type_node,
4371	`2000`), speed: i);
4372
4373	symbol_cost[i] = computation_cost (expr: addr, speed: i) + `1`;
4374
4375	address_cost[i]
4376	= computation_cost (fold_build_pointer_plus_hwi (addr, `2000`), speed: i) + `1`;
4377	if (dump_file && (dump_flags & TDF_DETAILS))
4378	{
4379	fprintf (stream: dump_file, format: "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4380	fprintf (stream: dump_file, format: " integer %d\n", (int) integer_cost[i]);
4381	fprintf (stream: dump_file, format: " symbol %d\n", (int) symbol_cost[i]);
4382	fprintf (stream: dump_file, format: " address %d\n", (int) address_cost[i]);
4383	fprintf (stream: dump_file, format: " other %d\n", (int) target_spill_cost[i]);
4384	fprintf (stream: dump_file, format: "\n");
4385	}
4386	}
4387
4388	costs_initialized = true;
4389	}
4390
4391	STRIP_NOPS (expr);
4392
4393	if (SSA_VAR_P (expr))
4394	return no_cost;
4395
4396	if (is_gimple_min_invariant (expr))
4397	{
4398	if (poly_int_tree_p (t: expr))
4399	return comp_cost (integer_cost [speed], `0`);
4400
4401	if (TREE_CODE (expr) == ADDR_EXPR)
4402	{
4403	tree obj = TREE_OPERAND (expr, `0`);
4404
4405	if (VAR_P (obj)
4406	\|\| TREE_CODE (obj) == PARM_DECL
4407	\|\| TREE_CODE (obj) == RESULT_DECL)
4408	return comp_cost (symbol_cost [speed], `0`);
4409	}
4410
4411	return comp_cost (address_cost [speed], `0`);
4412	}
4413
4414	switch (TREE_CODE (expr))
4415	{
4416	case POINTER_PLUS_EXPR:
4417	case PLUS_EXPR:
4418	case MINUS_EXPR:
4419	case MULT_EXPR:
4420	case TRUNC_DIV_EXPR:
4421	case BIT_AND_EXPR:
4422	case BIT_IOR_EXPR:
4423	case LSHIFT_EXPR:
4424	case RSHIFT_EXPR:
4425	op0 = TREE_OPERAND (expr, `0`);
4426	op1 = TREE_OPERAND (expr, `1`);
4427	STRIP_NOPS (op0);
4428	STRIP_NOPS (op1);
4429	break;
4430
4431	CASE_CONVERT:
4432	case NEGATE_EXPR:
4433	case BIT_NOT_EXPR:
4434	op0 = TREE_OPERAND (expr, `0`);
4435	STRIP_NOPS (op0);
4436	op1 = NULL_TREE;
4437	break;
4438	/ See add_iv_candidate_for_doloop, for doloop may_be_zero case, we*
4439	introduce COND_EXPR for IV base, need to support better cost estimation
4440	for this COND_EXPR and tcc_comparison. /*
4441	case COND_EXPR:
4442	op0 = TREE_OPERAND (expr, `1`);
4443	STRIP_NOPS (op0);
4444	op1 = TREE_OPERAND (expr, `2`);
4445	STRIP_NOPS (op1);
4446	break;
4447	case LT_EXPR:
4448	case LE_EXPR:
4449	case GT_EXPR:
4450	case GE_EXPR:
4451	case EQ_EXPR:
4452	case NE_EXPR:
4453	case UNORDERED_EXPR:
4454	case ORDERED_EXPR:
4455	case UNLT_EXPR:
4456	case UNLE_EXPR:
4457	case UNGT_EXPR:
4458	case UNGE_EXPR:
4459	case UNEQ_EXPR:
4460	case LTGT_EXPR:
4461	case MAX_EXPR:
4462	case MIN_EXPR:
4463	op0 = TREE_OPERAND (expr, `0`);
4464	STRIP_NOPS (op0);
4465	op1 = TREE_OPERAND (expr, `1`);
4466	STRIP_NOPS (op1);
4467	break;
4468
4469	default:
4470	/ Just an arbitrary value, FIXME. /
4471	return comp_cost (target_spill_cost[speed], `0`);
4472	}
4473
4474	if (op0 == NULL_TREE
4475	\|\| TREE_CODE (op0) == SSA_NAME \|\| CONSTANT_CLASS_P (op0))
4476	cost0 = no_cost;
4477	else
4478	cost0 = force_expr_to_var_cost (expr: op0, speed);
4479
4480	if (op1 == NULL_TREE
4481	\|\| TREE_CODE (op1) == SSA_NAME \|\| CONSTANT_CLASS_P (op1))
4482	cost1 = no_cost;
4483	else
4484	cost1 = force_expr_to_var_cost (expr: op1, speed);
4485
4486	mode = TYPE_MODE (TREE_TYPE (expr));
4487	switch (TREE_CODE (expr))
4488	{
4489	case POINTER_PLUS_EXPR:
4490	case PLUS_EXPR:
4491	case MINUS_EXPR:
4492	case NEGATE_EXPR:
4493	cost = comp_cost (add_cost (speed, mode), `0`);
4494	if (TREE_CODE (expr) != NEGATE_EXPR)
4495	{
4496	tree mult = NULL_TREE;
4497	comp_cost sa_cost;
4498	if (TREE_CODE (op1) == MULT_EXPR)
4499	mult = op1;
4500	else if (TREE_CODE (op0) == MULT_EXPR)
4501	mult = op0;
4502
4503	if (mult != NULL_TREE
4504	&& is_a <scalar_int_mode> (m: mode, result: &int_mode)
4505	&& cst_and_fits_in_hwi (TREE_OPERAND (mult, `1`))
4506	&& get_shiftadd_cost (expr, mode: int_mode, cost0, cost1, mult,
4507	speed, cost: &sa_cost))
4508	return sa_cost;
4509	}
4510	break;
4511
4512	CASE_CONVERT:
4513	{
4514	tree inner_mode, outer_mode;
4515	outer_mode = TREE_TYPE (expr);
4516	inner_mode = TREE_TYPE (op0);
4517	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4518	TYPE_MODE (inner_mode), speed), `0`);
4519	}
4520	break;
4521
4522	case MULT_EXPR:
4523	if (cst_and_fits_in_hwi (op0))
4524	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4525	mode, speed), `0`);
4526	else if (cst_and_fits_in_hwi (op1))
4527	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4528	mode, speed), `0`);
4529	else
4530	return comp_cost (target_spill_cost [speed], `0`);
4531	break;
4532
4533	case TRUNC_DIV_EXPR:
4534	/ Division by power of two is usually cheap, so we allow it. Forbid*
4535	anything else. /*
4536	if (integer_pow2p (TREE_OPERAND (expr, `1`)))
4537	cost = comp_cost (add_cost (speed, mode), `0`);
4538	else
4539	cost = comp_cost (target_spill_cost[speed], `0`);
4540	break;
4541
4542	case BIT_AND_EXPR:
4543	case BIT_IOR_EXPR:
4544	case BIT_NOT_EXPR:
4545	case LSHIFT_EXPR:
4546	case RSHIFT_EXPR:
4547	cost = comp_cost (add_cost (speed, mode), `0`);
4548	break;
4549	case COND_EXPR:
4550	op0 = TREE_OPERAND (expr, `0`);
4551	STRIP_NOPS (op0);
4552	if (op0 == NULL_TREE \|\| TREE_CODE (op0) == SSA_NAME
4553	\|\| CONSTANT_CLASS_P (op0))
4554	cost = no_cost;
4555	else
4556	cost = force_expr_to_var_cost (expr: op0, speed);
4557	break;
4558	case LT_EXPR:
4559	case LE_EXPR:
4560	case GT_EXPR:
4561	case GE_EXPR:
4562	case EQ_EXPR:
4563	case NE_EXPR:
4564	case UNORDERED_EXPR:
4565	case ORDERED_EXPR:
4566	case UNLT_EXPR:
4567	case UNLE_EXPR:
4568	case UNGT_EXPR:
4569	case UNGE_EXPR:
4570	case UNEQ_EXPR:
4571	case LTGT_EXPR:
4572	case MAX_EXPR:
4573	case MIN_EXPR:
4574	/ Simply use add cost for now, FIXME if there is some more accurate cost*
4575	evaluation way. /*
4576	cost = comp_cost (add_cost (speed, mode), `0`);
4577	break;
4578
4579	default:
4580	gcc_unreachable ();
4581	}
4582
4583	cost += cost0;
4584	cost += cost1;
4585	return cost;
4586	}
4587
4588	/ Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the*
4589	invariants the computation depends on. /*
4590
4591	static comp_cost
4592	force_var_cost (struct ivopts_data data, tree expr, bitmap inv_vars)
4593	{
4594	if (!expr)
4595	return no_cost;
4596
4597	find_inv_vars (data, expr_p: &expr, inv_vars);
4598	return force_expr_to_var_cost (expr, speed: data->speed);
4599	}
4600
4601	/ Returns cost of auto-modifying address expression in shape base + offset.*
4602	AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4603	address expression. The address expression has ADDR_MODE in addr space
4604	AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4605	speed or size. /*
4606
4607	enum ainc_type
4608	{
4609	AINC_PRE_INC, / Pre increment. /
4610	AINC_PRE_DEC, / Pre decrement. /
4611	AINC_POST_INC, / Post increment. /
4612	AINC_POST_DEC, / Post decrement. /
4613	AINC_NONE / Also the number of auto increment types. /
4614	};
4615
4616	struct ainc_cost_data
4617	{
4618	int64_t costs[AINC_NONE];
4619	};
4620
4621	static comp_cost
4622	get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4623	machine_mode addr_mode, machine_mode mem_mode,
4624	addr_space_t as, bool speed)
4625	{
4626	if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4627	&& !USE_STORE_PRE_DECREMENT (mem_mode)
4628	&& !USE_LOAD_POST_DECREMENT (mem_mode)
4629	&& !USE_STORE_POST_DECREMENT (mem_mode)
4630	&& !USE_LOAD_PRE_INCREMENT (mem_mode)
4631	&& !USE_STORE_PRE_INCREMENT (mem_mode)
4632	&& !USE_LOAD_POST_INCREMENT (mem_mode)
4633	&& !USE_STORE_POST_INCREMENT (mem_mode))
4634	return infinite_cost;
4635
4636	static vec<ainc_cost_data *> ainc_cost_data_list;
4637	unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4638	if (idx >= ainc_cost_data_list.length ())
4639	{
4640	unsigned nsize = ((unsigned) as + `1`) *MAX_MACHINE_MODE;
4641
4642	gcc_assert (nsize > idx);
4643	ainc_cost_data_list.safe_grow_cleared (len: nsize, exact: true);
4644	}
4645
4646	ainc_cost_data *data = ainc_cost_data_list [idx];
4647	if (data == NULL)
4648	{
4649	rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + `1`);
4650
4651	data = (ainc_cost_data ) xcalloc (`1`, sizeof* (*data));
4652	data->costs[AINC_PRE_DEC] = INFTY;
4653	data->costs[AINC_POST_DEC] = INFTY;
4654	data->costs[AINC_PRE_INC] = INFTY;
4655	data->costs[AINC_POST_INC] = INFTY;
4656	if (USE_LOAD_PRE_DECREMENT (mem_mode)
4657	\|\| USE_STORE_PRE_DECREMENT (mem_mode))
4658	{
4659	rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4660
4661	if (memory_address_addr_space_p (mem_mode, addr, as))
4662	data->costs[AINC_PRE_DEC]
4663	= address_cost (addr, mem_mode, as, speed);
4664	}
4665	if (USE_LOAD_POST_DECREMENT (mem_mode)
4666	\|\| USE_STORE_POST_DECREMENT (mem_mode))
4667	{
4668	rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4669
4670	if (memory_address_addr_space_p (mem_mode, addr, as))
4671	data->costs[AINC_POST_DEC]
4672	= address_cost (addr, mem_mode, as, speed);
4673	}
4674	if (USE_LOAD_PRE_INCREMENT (mem_mode)
4675	\|\| USE_STORE_PRE_INCREMENT (mem_mode))
4676	{
4677	rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4678
4679	if (memory_address_addr_space_p (mem_mode, addr, as))
4680	data->costs[AINC_PRE_INC]
4681	= address_cost (addr, mem_mode, as, speed);
4682	}
4683	if (USE_LOAD_POST_INCREMENT (mem_mode)
4684	\|\| USE_STORE_POST_INCREMENT (mem_mode))
4685	{
4686	rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4687
4688	if (memory_address_addr_space_p (mem_mode, addr, as))
4689	data->costs[AINC_POST_INC]
4690	= address_cost (addr, mem_mode, as, speed);
4691	}
4692	ainc_cost_data_list [idx] = data;
4693	}
4694
4695	poly_int64 msize = GET_MODE_SIZE (mode: mem_mode);
4696	if (known_eq (ainc_offset, `0`) && known_eq (msize, ainc_step))
4697	return comp_cost (data->costs[AINC_POST_INC], `0`);
4698	if (known_eq (ainc_offset, `0`) && known_eq (msize, -ainc_step))
4699	return comp_cost (data->costs[AINC_POST_DEC], `0`);
4700	if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4701	return comp_cost (data->costs[AINC_PRE_INC], `0`);
4702	if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4703	return comp_cost (data->costs[AINC_PRE_DEC], `0`);
4704
4705	return infinite_cost;
4706	}
4707
4708	/ Return cost of computing USE's address expression by using CAND.*
4709	AFF_INV and AFF_VAR represent invariant and variant parts of the
4710	address expression, respectively. If AFF_INV is simple, store
4711	the loop invariant variables which are depended by it in INV_VARS;
4712	if AFF_INV is complicated, handle it as a new invariant expression
4713	and record it in INV_EXPR. RATIO indicates multiple times between
4714	steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4715	value to it indicating if this is an auto-increment address. /*
4716
4717	static comp_cost
4718	get_address_cost (struct ivopts_data data, struct* iv_use *use,
4719	struct iv_cand cand, aff_tree aff_inv,
4720	aff_tree *aff_var, HOST_WIDE_INT ratio,
4721	bitmap inv_vars, iv_inv_expr_ent *inv_expr,
4722	bool can_autoinc, bool* speed)
4723	{
4724	rtx addr;
4725	bool simple_inv = true;
4726	tree comp_inv = NULL_TREE, type = aff_var->type;
4727	comp_cost var_cost = no_cost, cost = no_cost;
4728	struct mem_address parts = {NULL_TREE, integer_one_node,
4729	NULL_TREE, NULL_TREE, NULL_TREE};
4730	machine_mode addr_mode = TYPE_MODE (type);
4731	machine_mode mem_mode = TYPE_MODE (use->mem_type);
4732	addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4733	/ Only true if ratio != 1. /
4734	bool ok_with_ratio_p = false;
4735	bool ok_without_ratio_p = false;
4736	code_helper code = ERROR_MARK;
4737
4738	if (use->type == USE_PTR_ADDRESS)
4739	{
4740	gcall call = as_a<gcall > (p: use->stmt);
4741	gcc_assert (gimple_call_internal_p (call));
4742	code = gimple_call_internal_fn (gs: call);
4743	}
4744
4745	if (!aff_combination_const_p (aff: aff_inv))
4746	{
4747	parts.index = integer_one_node;
4748	/ Addressing mode "base + index". /
4749	ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4750	if (ratio != `1`)
4751	{
4752	parts.step = wide_int_to_tree (type, cst: ratio);
4753	/ Addressing mode "base + index << scale". /
4754	ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4755	if (!ok_with_ratio_p)
4756	parts.step = NULL_TREE;
4757	}
4758	if (ok_with_ratio_p \|\| ok_without_ratio_p)
4759	{
4760	if (maybe_ne (a: aff_inv->offset, b: `0`))
4761	{
4762	parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4763	/ Addressing mode "base + index [<< scale] + offset". /
4764	if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4765	parts.offset = NULL_TREE;
4766	else
4767	aff_inv->offset = `0`;
4768	}
4769
4770	move_fixed_address_to_symbol (&parts, aff_inv);
4771	/ Base is fixed address and is moved to symbol part. /
4772	if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff: aff_inv))
4773	parts.base = NULL_TREE;
4774
4775	/ Addressing mode "symbol + base + index [<< scale] [+ offset]". /
4776	if (parts.symbol != NULL_TREE
4777	&& !valid_mem_ref_p (mem_mode, as, &parts, code))
4778	{
4779	aff_combination_add_elt (aff_inv, parts.symbol, `1`);
4780	parts.symbol = NULL_TREE;
4781	/ Reset SIMPLE_INV since symbol address needs to be computed*
4782	outside of address expression in this case. /*
4783	simple_inv = false;
4784	/ Symbol part is moved back to base part, it can't be NULL. /
4785	parts.base = integer_one_node;
4786	}
4787	}
4788	else
4789	parts.index = NULL_TREE;
4790	}
4791	else
4792	{
4793	poly_int64 ainc_step;
4794	if (can_autoinc
4795	&& ratio == `1`
4796	&& ptrdiff_tree_p (cand->iv->step, &ainc_step))
4797	{
4798	poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4799
4800	if (stmt_after_increment (loop: data->current_loop, cand, stmt: use->stmt))
4801	ainc_offset += ainc_step;
4802	cost = get_address_cost_ainc (ainc_step, ainc_offset,
4803	addr_mode, mem_mode, as, speed);
4804	if (!cost.infinite_cost_p ())
4805	{
4806	can_autoinc = true*;
4807	return cost;
4808	}
4809	cost = no_cost;
4810	}
4811	if (!aff_combination_zero_p (aff: aff_inv))
4812	{
4813	parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4814	/ Addressing mode "base + offset". /
4815	if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4816	parts.offset = NULL_TREE;
4817	else
4818	aff_inv->offset = `0`;
4819	}
4820	}
4821
4822	if (simple_inv)
4823	simple_inv = (aff_inv == NULL
4824	\|\| aff_combination_const_p (aff: aff_inv)
4825	\|\| aff_combination_singleton_var_p (aff: aff_inv));
4826	if (!aff_combination_zero_p (aff: aff_inv))
4827	comp_inv = aff_combination_to_tree (aff_inv);
4828	if (comp_inv != NULL_TREE)
4829	cost = force_var_cost (data, expr: comp_inv, inv_vars);
4830	if (ratio != `1` && parts.step == NULL_TREE)
4831	var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4832	if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4833	var_cost += add_cost (speed, mode: addr_mode);
4834
4835	if (comp_inv && inv_expr && !simple_inv)
4836	{
4837	*inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4838	/ Clear depends on. /
4839	if (inv_expr != NULL && inv_vars && inv_vars)
4840	bitmap_clear (*inv_vars);
4841
4842	/ Cost of small invariant expression adjusted against loop niters*
4843	is usually zero, which makes it difficult to be differentiated
4844	from candidate based on loop invariant variables. Secondly, the
4845	generated invariant expression may not be hoisted out of loop by
4846	following pass. We penalize the cost by rounding up in order to
4847	neutralize such effects. /*
4848	cost.cost = adjust_setup_cost (data, cost: cost.cost, round_up_p: true);
4849	cost.scratch = cost.cost;
4850	}
4851
4852	cost += var_cost;
4853	addr = addr_for_mem_ref (&parts, as, false);
4854	gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4855	cost += address_cost (addr, mem_mode, as, speed);
4856
4857	if (parts.symbol != NULL_TREE)
4858	cost.complexity += `1`;
4859	/ Don't increase the complexity of adding a scaled index if it's*
4860	the only kind of index that the target allows. /*
4861	if (parts.step != NULL_TREE && ok_without_ratio_p)
4862	cost.complexity += `1`;
4863	if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4864	cost.complexity += `1`;
4865	if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4866	cost.complexity += `1`;
4867
4868	return cost;
4869	}
4870
4871	/ Scale (multiply) the computed COST (except scratch part that should be*
4872	hoisted out a loop) by header->frequency / AT->frequency, which makes
4873	expected cost more accurate. /*
4874
4875	static comp_cost
4876	get_scaled_computation_cost_at (ivopts_data data, gimple at, comp_cost cost)
4877	{
4878	if (data->speed
4879	&& data->current_loop->header->count.to_frequency (cfun) > `0`)
4880	{
4881	basic_block bb = gimple_bb (g: at);
4882	gcc_assert (cost.scratch <= cost.cost);
4883	int scale_factor = (int)(intptr_t) bb->aux;
4884	if (scale_factor == `1`)
4885	return cost;
4886
4887	int64_t scaled_cost
4888	= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4889
4890	if (dump_file && (dump_flags & TDF_DETAILS))
4891	fprintf (stream: dump_file, format: "Scaling cost based on bb prob by %2.2f: "
4892	"%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4893	`1.0f` * scale_factor, cost.cost, cost.scratch, scaled_cost);
4894
4895	cost.cost = scaled_cost;
4896	}
4897
4898	return cost;
4899	}
4900
4901	/ Determines the cost of the computation by that USE is expressed*
4902	from induction variable CAND. If ADDRESS_P is true, we just need
4903	to create an address from it, otherwise we want to get it into
4904	register. A set of invariants we depend on is stored in INV_VARS.
4905	If CAN_AUTOINC is nonnull, use it to record whether autoinc
4906	addressing is likely. If INV_EXPR is nonnull, record invariant
4907	expr entry in it. /*
4908
4909	static comp_cost
4910	get_computation_cost (struct ivopts_data data, struct* iv_use *use,
4911	struct iv_cand cand, bool* address_p, bitmap *inv_vars,
4912	bool can_autoinc, iv_inv_expr_ent *inv_expr)
4913	{
4914	gimple *at = use->stmt;
4915	tree ubase = use->iv->base, cbase = cand->iv->base;
4916	tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4917	tree comp_inv = NULL_TREE;
4918	HOST_WIDE_INT ratio, aratio;
4919	comp_cost cost;
4920	widest_int rat;
4921	aff_tree aff_inv, aff_var;
4922	bool speed = optimize_bb_for_speed_p (gimple_bb (g: at));
4923
4924	if (inv_vars)
4925	*inv_vars = NULL;
4926	if (can_autoinc)
4927	can_autoinc = false*;
4928	if (inv_expr)
4929	*inv_expr = NULL;
4930
4931	/ Check if we have enough precision to express the values of use. /
4932	if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4933	return infinite_cost;
4934
4935	if (address_p
4936	\|\| (use->iv->base_object
4937	&& cand->iv->base_object
4938	&& POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4939	&& POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4940	{
4941	/ Do not try to express address of an object with computation based*
4942	on address of a different object. This may cause problems in rtl
4943	level alias analysis (that does not expect this to be happening,
4944	as this is illegal in C), and would be unlikely to be useful
4945	anyway. /*
4946	if (use->iv->base_object
4947	&& cand->iv->base_object
4948	&& !operand_equal_p (use->iv->base_object, cand->iv->base_object, flags: `0`))
4949	return infinite_cost;
4950	}
4951
4952	if (!get_computation_aff_1 (loop: data->current_loop, at, use,
4953	cand, aff_inv: &aff_inv, aff_var: &aff_var, prat: &rat)
4954	\|\| !wi::fits_shwi_p (x: rat))
4955	return infinite_cost;
4956
4957	ratio = rat.to_shwi ();
4958	if (address_p)
4959	{
4960	cost = get_address_cost (data, use, cand, aff_inv: &aff_inv, aff_var: &aff_var, ratio,
4961	inv_vars, inv_expr, can_autoinc, speed);
4962	cost = get_scaled_computation_cost_at (data, at, cost);
4963	/ For doloop IV cand, add on the extra cost. /
4964	cost += cand->doloop_p ? targetm.doloop_cost_for_address : `0`;
4965	return cost;
4966	}
4967
4968	bool simple_inv = (aff_combination_const_p (aff: &aff_inv)
4969	\|\| aff_combination_singleton_var_p (aff: &aff_inv));
4970	tree signed_type = signed_type_for (aff_combination_type (aff: &aff_inv));
4971	aff_combination_convert (&aff_inv, signed_type);
4972	if (!aff_combination_zero_p (aff: &aff_inv))
4973	comp_inv = aff_combination_to_tree (&aff_inv);
4974
4975	cost = force_var_cost (data, expr: comp_inv, inv_vars);
4976	if (comp_inv && inv_expr && !simple_inv)
4977	{
4978	*inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4979	/ Clear depends on. /
4980	if (inv_expr != NULL && inv_vars && inv_vars)
4981	bitmap_clear (*inv_vars);
4982
4983	cost.cost = adjust_setup_cost (data, cost: cost.cost);
4984	/ Record setup cost in scratch field. /
4985	cost.scratch = cost.cost;
4986	}
4987	/ Cost of constant integer can be covered when adding invariant part to*
4988	variant part. /*
4989	else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4990	cost = no_cost;
4991
4992	/ Need type narrowing to represent use with cand. /
4993	if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4994	{
4995	machine_mode outer_mode = TYPE_MODE (utype);
4996	machine_mode inner_mode = TYPE_MODE (ctype);
4997	cost += comp_cost (convert_cost (to_mode: outer_mode, from_mode: inner_mode, speed), `0`);
4998	}
4999
5000	/ Turn a + i * (-c) into a - i * c. /
5001	if (ratio < `0` && comp_inv && !integer_zerop (comp_inv))
5002	aratio = -ratio;
5003	else
5004	aratio = ratio;
5005
5006	if (ratio != `1`)
5007	cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
5008
5009	/ TODO: We may also need to check if we can compute a + i * 4 in one*
5010	instruction. /*
5011	/ Need to add up the invariant and variant parts. /
5012	if (comp_inv && !integer_zerop (comp_inv))
5013	cost += add_cost (speed, TYPE_MODE (utype));
5014
5015	cost = get_scaled_computation_cost_at (data, at, cost);
5016
5017	/ For doloop IV cand, add on the extra cost. /
5018	if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
5019	cost += targetm.doloop_cost_for_generic;
5020
5021	return cost;
5022	}
5023
5024	/ Determines cost of computing the use in GROUP with CAND in a generic*
5025	expression. /*
5026
5027	static bool
5028	determine_group_iv_cost_generic (struct ivopts_data *data,
5029	struct iv_group group, struct* iv_cand *cand)
5030	{
5031	comp_cost cost;
5032	iv_inv_expr_ent *inv_expr = NULL;
5033	bitmap inv_vars = NULL, inv_exprs = NULL;
5034	struct iv_use *use = group->vuses [`0`];
5035
5036	/ The simple case first -- if we need to express value of the preserved*
5037	original biv, the cost is 0. This also prevents us from counting the
5038	cost of increment twice -- once at this use and once in the cost of
5039	the candidate. /*
5040	if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5041	cost = no_cost;
5042	/ If the IV candidate involves undefined SSA values and is not the*
5043	same IV as on the USE avoid using that candidate here. /*
5044	else if (cand->involves_undefs
5045	&& (!use->iv \|\| !operand_equal_p (cand->iv->base, use->iv->base, flags: `0`)))
5046	return false;
5047	else
5048	cost = get_computation_cost (data, use, cand, address_p: false,
5049	inv_vars: &inv_vars, NULL, inv_expr: &inv_expr);
5050
5051	if (inv_expr)
5052	{
5053	inv_exprs = BITMAP_ALLOC (NULL);
5054	bitmap_set_bit (inv_exprs, inv_expr->id);
5055	}
5056	set_group_iv_cost (data, group, cand, cost, inv_vars,
5057	NULL_TREE, comp: ERROR_MARK, inv_exprs);
5058	return !cost.infinite_cost_p ();
5059	}
5060
5061	/ Determines cost of computing uses in GROUP with CAND in addresses. /
5062
5063	static bool
5064	determine_group_iv_cost_address (struct ivopts_data *data,
5065	struct iv_group group, struct* iv_cand *cand)
5066	{
5067	unsigned i;
5068	bitmap inv_vars = NULL, inv_exprs = NULL;
5069	bool can_autoinc;
5070	iv_inv_expr_ent *inv_expr = NULL;
5071	struct iv_use *use = group->vuses [`0`];
5072	comp_cost sum_cost = no_cost, cost;
5073
5074	cost = get_computation_cost (data, use, cand, address_p: true,
5075	inv_vars: &inv_vars, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5076
5077	if (inv_expr)
5078	{
5079	inv_exprs = BITMAP_ALLOC (NULL);
5080	bitmap_set_bit (inv_exprs, inv_expr->id);
5081	}
5082	sum_cost = cost;
5083	if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5084	{
5085	if (can_autoinc)
5086	sum_cost -= cand->cost_step;
5087	/ If we generated the candidate solely for exploiting autoincrement*
5088	opportunities, and it turns out it can't be used, set the cost to
5089	infinity to make sure we ignore it. /*
5090	else if (cand->pos == IP_AFTER_USE \|\| cand->pos == IP_BEFORE_USE)
5091	sum_cost = infinite_cost;
5092	}
5093
5094	/ Uses in a group can share setup code, so only add setup cost once. /
5095	cost -= cost.scratch;
5096	/ Compute and add costs for rest uses of this group. /
5097	for (i = `1`; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5098	{
5099	struct iv_use *next = group->vuses [i];
5100
5101	/ TODO: We could skip computing cost for sub iv_use when it has the*
5102	same cost as the first iv_use, but the cost really depends on the
5103	offset and where the iv_use is. /*
5104	cost = get_computation_cost (data, use: next, cand, address_p: true,
5105	NULL, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5106	if (inv_expr)
5107	{
5108	if (!inv_exprs)
5109	inv_exprs = BITMAP_ALLOC (NULL);
5110
5111	bitmap_set_bit (inv_exprs, inv_expr->id);
5112	}
5113	sum_cost += cost;
5114	}
5115	set_group_iv_cost (data, group, cand, cost: sum_cost, inv_vars,
5116	NULL_TREE, comp: ERROR_MARK, inv_exprs);
5117
5118	return !sum_cost.infinite_cost_p ();
5119	}
5120
5121	/ Computes value of candidate CAND at position AT in iteration DESC->NITER,*
5122	and stores it to VAL. /*
5123
5124	static void
5125	cand_value_at (class loop loop, struct* iv_cand cand, gimple at,
5126	class tree_niter_desc desc, aff_tree val)
5127	{
5128	aff_tree step, delta, nit;
5129	struct iv *iv = cand->iv;
5130	tree type = TREE_TYPE (iv->base);
5131	tree niter = desc->niter;
5132	bool after_adjust = stmt_after_increment (loop, cand, stmt: at);
5133	tree steptype;
5134
5135	if (POINTER_TYPE_P (type))
5136	steptype = sizetype;
5137	else
5138	steptype = unsigned_type_for (type);
5139
5140	/ If AFTER_ADJUST is required, the code below generates the equivalent*
5141	of BASE + NITER STEP + STEP, when ideally we'd prefer the expression*
5142	BASE + (NITER + 1) STEP, especially when NITER is often of the form*
5143	SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5144	doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5145	class for common idioms that we know are safe. /*
5146	if (after_adjust
5147	&& desc->control.no_overflow
5148	&& integer_onep (desc->control.step)
5149	&& (desc->cmp == LT_EXPR
5150	\|\| desc->cmp == NE_EXPR)
5151	&& TREE_CODE (desc->bound) == SSA_NAME)
5152	{
5153	if (integer_onep (desc->control.base))
5154	{
5155	niter = desc->bound;
5156	after_adjust = false;
5157	}
5158	else if (TREE_CODE (niter) == MINUS_EXPR
5159	&& integer_onep (TREE_OPERAND (niter, `1`)))
5160	{
5161	niter = TREE_OPERAND (niter, `0`);
5162	after_adjust = false;
5163	}
5164	}
5165
5166	tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5167	aff_combination_convert (&step, steptype);
5168	tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5169	aff_combination_convert (&nit, steptype);
5170	aff_combination_mult (&nit, &step, &delta);
5171	if (after_adjust)
5172	aff_combination_add (&delta, &step);
5173
5174	tree_to_aff_combination (iv->base, type, val);
5175	if (!POINTER_TYPE_P (type))
5176	aff_combination_convert (val, steptype);
5177	aff_combination_add (val, &delta);
5178	}
5179
5180	/ Returns period of induction variable iv. /
5181
5182	static tree
5183	iv_period (struct iv *iv)
5184	{
5185	tree step = iv->step, period, type;
5186	tree pow2div;
5187
5188	gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5189
5190	type = unsigned_type_for (TREE_TYPE (step));
5191	/ Period of the iv is lcm (step, type_range)/step -1,*
5192	i.e., Ntype_range/step - 1. Since type range is power*
5193	of two, N == (step >> num_of_ending_zeros_binary (step),
5194	so the final result is
5195
5196	(type_range >> num_of_ending_zeros_binary (step)) - 1
5197
5198	*/
5199	pow2div = num_ending_zeros (step);
5200
5201	period = build_low_bits_mask (type,
5202	(TYPE_PRECISION (type)
5203	- tree_to_uhwi (pow2div)));
5204
5205	return period;
5206	}
5207
5208	/ Returns the comparison operator used when eliminating the iv USE. /
5209
5210	static enum tree_code
5211	iv_elimination_compare (struct ivopts_data data, struct* iv_use *use)
5212	{
5213	class loop *loop = data->current_loop;
5214	basic_block ex_bb;
5215	edge exit;
5216
5217	ex_bb = gimple_bb (g: use->stmt);
5218	exit = EDGE_SUCC (ex_bb, `0`);
5219	if (flow_bb_inside_loop_p (loop, exit->dest))
5220	exit = EDGE_SUCC (ex_bb, `1`);
5221
5222	return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5223	}
5224
5225	/ Returns true if we can prove that BASE - OFFSET does not overflow. For now,*
5226	we only detect the situation that BASE = SOMETHING + OFFSET, where the
5227	calculation is performed in non-wrapping type.
5228
5229	TODO: More generally, we could test for the situation that
5230	BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5231	This would require knowing the sign of OFFSET. /*
5232
5233	static bool
5234	difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5235	{
5236	enum tree_code code;
5237	tree e1, e2;
5238	aff_tree aff_e1, aff_e2, aff_offset;
5239
5240	if (!nowrap_type_p (TREE_TYPE (base)))
5241	return false;
5242
5243	base = expand_simple_operations (base);
5244
5245	if (TREE_CODE (base) == SSA_NAME)
5246	{
5247	gimple *stmt = SSA_NAME_DEF_STMT (base);
5248
5249	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
5250	return false;
5251
5252	code = gimple_assign_rhs_code (gs: stmt);
5253	if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5254	return false;
5255
5256	e1 = gimple_assign_rhs1 (gs: stmt);
5257	e2 = gimple_assign_rhs2 (gs: stmt);
5258	}
5259	else
5260	{
5261	code = TREE_CODE (base);
5262	if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5263	return false;
5264	e1 = TREE_OPERAND (base, `0`);
5265	e2 = TREE_OPERAND (base, `1`);
5266	}
5267
5268	/ Use affine expansion as deeper inspection to prove the equality. /
5269	tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5270	&aff_e2, &data->name_expansion_cache);
5271	tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5272	&aff_offset, &data->name_expansion_cache);
5273	aff_combination_scale (&aff_offset, -`1`);
5274	switch (code)
5275	{
5276	case PLUS_EXPR:
5277	aff_combination_add (&aff_e2, &aff_offset);
5278	if (aff_combination_zero_p (aff: &aff_e2))
5279	return true;
5280
5281	tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5282	&aff_e1, &data->name_expansion_cache);
5283	aff_combination_add (&aff_e1, &aff_offset);
5284	return aff_combination_zero_p (aff: &aff_e1);
5285
5286	case POINTER_PLUS_EXPR:
5287	aff_combination_add (&aff_e2, &aff_offset);
5288	return aff_combination_zero_p (aff: &aff_e2);
5289
5290	default:
5291	return false;
5292	}
5293	}
5294
5295	/ Tries to replace loop exit by one formulated in terms of a LT_EXPR*
5296	comparison with CAND. NITER describes the number of iterations of
5297	the loops. If successful, the comparison in COMP_P is altered accordingly.
5298
5299	We aim to handle the following situation:
5300
5301	sometype base, p;
5302	int a, b, i;
5303
5304	i = a;
5305	p = p_0 = base + a;
5306
5307	do
5308	{
5309	bla (p);*
5310	p++;
5311	i++;
5312	}
5313	while (i < b);
5314
5315	Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5316	We aim to optimize this to
5317
5318	p = p_0 = base + a;
5319	do
5320	{
5321	bla (p);*
5322	p++;
5323	}
5324	while (p < p_0 - a + b);
5325
5326	This preserves the correctness, since the pointer arithmetics does not
5327	overflow. More precisely:
5328
5329	1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5330	overflow in computing it or the values of p.
5331	2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5332	overflow. To prove this, we use the fact that p_0 = base + a. /*
5333
5334	static bool
5335	iv_elimination_compare_lt (struct ivopts_data *data,
5336	struct iv_cand cand, enum* tree_code *comp_p,
5337	class tree_niter_desc *niter)
5338	{
5339	tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5340	class aff_tree nit, tmpa, tmpb;
5341	enum tree_code comp;
5342	HOST_WIDE_INT step;
5343
5344	/ We need to know that the candidate induction variable does not overflow.*
5345	While more complex analysis may be used to prove this, for now just
5346	check that the variable appears in the original program and that it
5347	is computed in a type that guarantees no overflows. /*
5348	cand_type = TREE_TYPE (cand->iv->base);
5349	if (cand->pos != IP_ORIGINAL \|\| !nowrap_type_p (cand_type))
5350	return false;
5351
5352	/ Make sure that the loop iterates till the loop bound is hit, as otherwise*
5353	the calculation of the BOUND could overflow, making the comparison
5354	invalid. /*
5355	if (!data->loop_single_exit_p)
5356	return false;
5357
5358	/ We need to be able to decide whether candidate is increasing or decreasing*
5359	in order to choose the right comparison operator. /*
5360	if (!cst_and_fits_in_hwi (cand->iv->step))
5361	return false;
5362	step = int_cst_value (cand->iv->step);
5363
5364	/ Check that the number of iterations matches the expected pattern:*
5365	a + 1 > b ? 0 : b - a - 1. /*
5366	mbz = niter->may_be_zero;
5367	if (TREE_CODE (mbz) == GT_EXPR)
5368	{
5369	/ Handle a + 1 > b. /
5370	tree op0 = TREE_OPERAND (mbz, `0`);
5371	if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, `1`)))
5372	{
5373	a = TREE_OPERAND (op0, `0`);
5374	b = TREE_OPERAND (mbz, `1`);
5375	}
5376	else
5377	return false;
5378	}
5379	else if (TREE_CODE (mbz) == LT_EXPR)
5380	{
5381	tree op1 = TREE_OPERAND (mbz, `1`);
5382
5383	/ Handle b < a + 1. /
5384	if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, `1`)))
5385	{
5386	a = TREE_OPERAND (op1, `0`);
5387	b = TREE_OPERAND (mbz, `0`);
5388	}
5389	else
5390	return false;
5391	}
5392	else
5393	return false;
5394
5395	/ Expected number of iterations is B - A - 1. Check that it matches*
5396	the actual number, i.e., that B - A - NITER = 1. /*
5397	tree_to_aff_combination (niter->niter, nit_type, &nit);
5398	tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5399	tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5400	aff_combination_scale (&nit, -`1`);
5401	aff_combination_scale (&tmpa, -`1`);
5402	aff_combination_add (&tmpb, &tmpa);
5403	aff_combination_add (&tmpb, &nit);
5404	if (tmpb.n != `0` \|\| maybe_ne (a: tmpb.offset, b: `1`))
5405	return false;
5406
5407	/ Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not*
5408	overflow. /*
5409	offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5410	cand->iv->step,
5411	fold_convert (TREE_TYPE (cand->iv->step), a));
5412	if (!difference_cannot_overflow_p (data, base: cand->iv->base, offset))
5413	return false;
5414
5415	/ Determine the new comparison operator. /
5416	comp = step < `0` ? GT_EXPR : LT_EXPR;
5417	if (*comp_p == NE_EXPR)
5418	*comp_p = comp;
5419	else if (*comp_p == EQ_EXPR)
5420	comp_p = invert_tree_comparison (comp, false*);
5421	else
5422	gcc_unreachable ();
5423
5424	return true;
5425	}
5426
5427	/ Check whether it is possible to express the condition in USE by comparison*
5428	of candidate CAND. If so, store the value compared with to BOUND, and the
5429	comparison operator to COMP. /*
5430
5431	static bool
5432	may_eliminate_iv (struct ivopts_data *data,
5433	struct iv_use use, struct* iv_cand cand, tree bound,
5434	enum tree_code *comp)
5435	{
5436	basic_block ex_bb;
5437	edge exit;
5438	tree period;
5439	class loop *loop = data->current_loop;
5440	aff_tree bnd;
5441	class tree_niter_desc *desc = NULL;
5442
5443	if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5444	return false;
5445
5446	/ For now works only for exits that dominate the loop latch.*
5447	TODO: extend to other conditions inside loop body. /*
5448	ex_bb = gimple_bb (g: use->stmt);
5449	if (use->stmt != last_nondebug_stmt (ex_bb)
5450	\|\| gimple_code (g: use->stmt) != GIMPLE_COND
5451	\|\| !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5452	return false;
5453
5454	exit = EDGE_SUCC (ex_bb, `0`);
5455	if (flow_bb_inside_loop_p (loop, exit->dest))
5456	exit = EDGE_SUCC (ex_bb, `1`);
5457	if (flow_bb_inside_loop_p (loop, exit->dest))
5458	return false;
5459
5460	desc = niter_for_exit (data, exit);
5461	if (!desc)
5462	return false;
5463
5464	/ Determine whether we can use the variable to test the exit condition.*
5465	This is the case iff the period of the induction variable is greater
5466	than the number of iterations for which the exit condition is true. /*
5467	period = iv_period (iv: cand->iv);
5468
5469	/ If the number of iterations is constant, compare against it directly. /
5470	if (TREE_CODE (desc->niter) == INTEGER_CST)
5471	{
5472	/ See cand_value_at. /
5473	if (stmt_after_increment (loop, cand, stmt: use->stmt))
5474	{
5475	if (!tree_int_cst_lt (t1: desc->niter, t2: period))
5476	return false;
5477	}
5478	else
5479	{
5480	if (tree_int_cst_lt (t1: period, t2: desc->niter))
5481	return false;
5482	}
5483	}
5484
5485	/ If not, and if this is the only possible exit of the loop, see whether*
5486	we can get a conservative estimate on the number of iterations of the
5487	entire loop and compare against that instead. /*
5488	else
5489	{
5490	widest_int period_value, max_niter;
5491
5492	max_niter = desc->max;
5493	if (stmt_after_increment (loop, cand, stmt: use->stmt))
5494	max_niter += `1`;
5495	period_value = wi::to_widest (t: period);
5496	if (wi::gtu_p (x: max_niter, y: period_value))
5497	{
5498	/ See if we can take advantage of inferred loop bound*
5499	information. /*
5500	if (data->loop_single_exit_p)
5501	{
5502	if (!max_loop_iterations (loop, &max_niter))
5503	return false;
5504	/ The loop bound is already adjusted by adding 1. /
5505	if (wi::gtu_p (x: max_niter, y: period_value))
5506	return false;
5507	}
5508	else
5509	return false;
5510	}
5511	}
5512
5513	/ For doloop IV cand, the bound would be zero. It's safe whether*
5514	may_be_zero set or not. /*
5515	if (cand->doloop_p)
5516	{
5517	*bound = build_int_cst (TREE_TYPE (cand->iv->base), `0`);
5518	*comp = iv_elimination_compare (data, use);
5519	return true;
5520	}
5521
5522	cand_value_at (loop, cand, at: use->stmt, desc, val: &bnd);
5523
5524	*bound = fold_convert (TREE_TYPE (cand->iv->base),
5525	aff_combination_to_tree (&bnd));
5526	*comp = iv_elimination_compare (data, use);
5527
5528	/ It is unlikely that computing the number of iterations using division*
5529	would be more profitable than keeping the original induction variable. /*
5530	bool cond_overflow_p;
5531	if (expression_expensive_p (*bound, &cond_overflow_p))
5532	return false;
5533
5534	/ Sometimes, it is possible to handle the situation that the number of*
5535	iterations may be zero unless additional assumptions by using <
5536	instead of != in the exit condition.
5537
5538	TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5539	base the exit condition on it. However, that is often too
5540	expensive. /*
5541	if (!integer_zerop (desc->may_be_zero))
5542	return iv_elimination_compare_lt (data, cand, comp_p: comp, niter: desc);
5543
5544	return true;
5545	}
5546
5547	/ Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must*
5548	be copied, if it is used in the loop body and DATA->body_includes_call. /*
5549
5550	static int
5551	parm_decl_cost (struct ivopts_data *data, tree bound)
5552	{
5553	tree sbound = bound;
5554	STRIP_NOPS (sbound);
5555
5556	if (TREE_CODE (sbound) == SSA_NAME
5557	&& SSA_NAME_IS_DEFAULT_DEF (sbound)
5558	&& TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5559	&& data->body_includes_call)
5560	return COSTS_N_INSNS (`1`);
5561
5562	return `0`;
5563	}
5564
5565	/ Determines cost of computing the use in GROUP with CAND in a condition. /
5566
5567	static bool
5568	determine_group_iv_cost_cond (struct ivopts_data *data,
5569	struct iv_group group, struct* iv_cand *cand)
5570	{
5571	tree bound = NULL_TREE;
5572	struct iv *cmp_iv;
5573	bitmap inv_exprs = NULL;
5574	bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5575	comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5576	enum comp_iv_rewrite rewrite_type;
5577	iv_inv_expr_ent inv_expr_elim = NULL, inv_expr_express = NULL, *inv_expr;
5578	tree control_var, bound_cst;
5579	enum tree_code comp = ERROR_MARK;
5580	struct iv_use *use = group->vuses [`0`];
5581
5582	/ Extract condition operands. /
5583	rewrite_type = extract_cond_operands (data, stmt: use->stmt, control_var: &control_var,
5584	bound: &bound_cst, NULL, iv_bound: &cmp_iv);
5585	gcc_assert (rewrite_type != COMP_IV_NA);
5586
5587	/ Try iv elimination. /
5588	if (rewrite_type == COMP_IV_ELIM
5589	&& may_eliminate_iv (data, use, cand, bound: &bound, comp: &comp))
5590	{
5591	elim_cost = force_var_cost (data, expr: bound, inv_vars: &inv_vars_elim);
5592	if (elim_cost.cost == `0`)
5593	elim_cost.cost = parm_decl_cost (data, bound);
5594	else if (TREE_CODE (bound) == INTEGER_CST)
5595	elim_cost.cost = `0`;
5596	/ If we replace a loop condition 'i < n' with 'p < base + n',*
5597	inv_vars_elim will have 'base' and 'n' set, which implies that both
5598	'base' and 'n' will be live during the loop. More likely,
5599	'base + n' will be loop invariant, resulting in only one live value
5600	during the loop. So in that case we clear inv_vars_elim and set
5601	inv_expr_elim instead. /*
5602	if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > `1`)
5603	{
5604	inv_expr_elim = get_loop_invariant_expr (data, inv_expr: bound);
5605	bitmap_clear (inv_vars_elim);
5606	}
5607	/ The bound is a loop invariant, so it will be only computed*
5608	once. /*
5609	elim_cost.cost = adjust_setup_cost (data, cost: elim_cost.cost);
5610	}
5611
5612	/ When the condition is a comparison of the candidate IV against*
5613	zero, prefer this IV.
5614
5615	TODO: The constant that we're subtracting from the cost should
5616	be target-dependent. This information should be added to the
5617	target costs for each backend. /*
5618	if (!elim_cost.infinite_cost_p () / Do not try to decrease infinite! /
5619	&& integer_zerop (*bound_cst)
5620	&& (operand_equal_p (*control_var, cand->var_after, flags: `0`)
5621	\|\| operand_equal_p (*control_var, cand->var_before, flags: `0`)))
5622	elim_cost -= `1`;
5623
5624	express_cost = get_computation_cost (data, use, cand, address_p: false,
5625	inv_vars: &inv_vars_express, NULL,
5626	inv_expr: &inv_expr_express);
5627	if (cmp_iv != NULL)
5628	find_inv_vars (data, expr_p: &cmp_iv->base, inv_vars: &inv_vars_express);
5629
5630	/ Count the cost of the original bound as well. /
5631	bound_cost = force_var_cost (data, expr: *bound_cst, NULL);
5632	if (bound_cost.cost == `0`)
5633	bound_cost.cost = parm_decl_cost (data, bound: *bound_cst);
5634	else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5635	bound_cost.cost = `0`;
5636	express_cost += bound_cost;
5637
5638	/ Choose the better approach, preferring the eliminated IV. /
5639	if (elim_cost <= express_cost)
5640	{
5641	cost = elim_cost;
5642	inv_vars = inv_vars_elim;
5643	inv_vars_elim = NULL;
5644	inv_expr = inv_expr_elim;
5645	/ For doloop candidate/use pair, adjust to zero cost. /
5646	if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5647	cost = no_cost;
5648	}
5649	else
5650	{
5651	cost = express_cost;
5652	inv_vars = inv_vars_express;
5653	inv_vars_express = NULL;
5654	bound = NULL_TREE;
5655	comp = ERROR_MARK;
5656	inv_expr = inv_expr_express;
5657	}
5658
5659	if (inv_expr)
5660	{
5661	inv_exprs = BITMAP_ALLOC (NULL);
5662	bitmap_set_bit (inv_exprs, inv_expr->id);
5663	}
5664	set_group_iv_cost (data, group, cand, cost,
5665	inv_vars, value: bound, comp, inv_exprs);
5666
5667	if (inv_vars_elim)
5668	BITMAP_FREE (inv_vars_elim);
5669	if (inv_vars_express)
5670	BITMAP_FREE (inv_vars_express);
5671
5672	return !cost.infinite_cost_p ();
5673	}
5674
5675	/ Determines cost of computing uses in GROUP with CAND. Returns false*
5676	if USE cannot be represented with CAND. /*
5677
5678	static bool
5679	determine_group_iv_cost (struct ivopts_data *data,
5680	struct iv_group group, struct* iv_cand *cand)
5681	{
5682	switch (group->type)
5683	{
5684	case USE_NONLINEAR_EXPR:
5685	return determine_group_iv_cost_generic (data, group, cand);
5686
5687	case USE_REF_ADDRESS:
5688	case USE_PTR_ADDRESS:
5689	return determine_group_iv_cost_address (data, group, cand);
5690
5691	case USE_COMPARE:
5692	return determine_group_iv_cost_cond (data, group, cand);
5693
5694	default:
5695	gcc_unreachable ();
5696	}
5697	}
5698
5699	/ Return true if get_computation_cost indicates that autoincrement is*
5700	a possibility for the pair of USE and CAND, false otherwise. /*
5701
5702	static bool
5703	autoinc_possible_for_pair (struct ivopts_data data, struct* iv_use *use,
5704	struct iv_cand *cand)
5705	{
5706	if (!address_p (type: use->type))
5707	return false;
5708
5709	bool can_autoinc = false;
5710	get_computation_cost (data, use, cand, address_p: true, NULL, can_autoinc: &can_autoinc, NULL);
5711	return can_autoinc;
5712	}
5713
5714	/ Examine IP_ORIGINAL candidates to see if they are incremented next to a*
5715	use that allows autoincrement, and set their AINC_USE if possible. /*
5716
5717	static void
5718	set_autoinc_for_original_candidates (struct ivopts_data *data)
5719	{
5720	unsigned i, j;
5721
5722	for (i = `0`; i < data->vcands.length (); i++)
5723	{
5724	struct iv_cand *cand = data->vcands [i];
5725	struct iv_use *closest_before = NULL;
5726	struct iv_use *closest_after = NULL;
5727	if (cand->pos != IP_ORIGINAL)
5728	continue;
5729
5730	for (j = `0`; j < data->vgroups.length (); j++)
5731	{
5732	struct iv_group *group = data->vgroups [j];
5733	struct iv_use *use = group->vuses [`0`];
5734	unsigned uid = gimple_uid (g: use->stmt);
5735
5736	if (gimple_bb (g: use->stmt) != gimple_bb (g: cand->incremented_at))
5737	continue;
5738
5739	if (uid < gimple_uid (g: cand->incremented_at)
5740	&& (closest_before == NULL
5741	\|\| uid > gimple_uid (g: closest_before->stmt)))
5742	closest_before = use;
5743
5744	if (uid > gimple_uid (g: cand->incremented_at)
5745	&& (closest_after == NULL
5746	\|\| uid < gimple_uid (g: closest_after->stmt)))
5747	closest_after = use;
5748	}
5749
5750	if (closest_before != NULL
5751	&& autoinc_possible_for_pair (data, use: closest_before, cand))
5752	cand->ainc_use = closest_before;
5753	else if (closest_after != NULL
5754	&& autoinc_possible_for_pair (data, use: closest_after, cand))
5755	cand->ainc_use = closest_after;
5756	}
5757	}
5758
5759	/ Relate compare use with all candidates. /
5760
5761	static void
5762	relate_compare_use_with_all_cands (struct ivopts_data *data)
5763	{
5764	unsigned i, count = data->vcands.length ();
5765	for (i = `0`; i < data->vgroups.length (); i++)
5766	{
5767	struct iv_group *group = data->vgroups [i];
5768
5769	if (group->type == USE_COMPARE)
5770	bitmap_set_range (group->related_cands, `0`, count);
5771	}
5772	}
5773
5774	/ If PREFERRED_MODE is suitable and profitable, use the preferred*
5775	PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. /*
5776
5777	static tree
5778	compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5779	const widest_int &iterations_max)
5780	{
5781	tree ntype = TREE_TYPE (niter);
5782	tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, `1`);
5783	if (!pref_type)
5784	return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5785	build_int_cst (ntype, `1`));
5786
5787	gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5788
5789	int prec = TYPE_PRECISION (ntype);
5790	int pref_prec = TYPE_PRECISION (pref_type);
5791
5792	tree base;
5793
5794	/ Check if the PREFERRED_MODED is able to present niter. /
5795	if (pref_prec > prec
5796	\|\| wi::ltu_p (x: iterations_max,
5797	y: widest_int::from (x: wi::max_value (pref_prec, UNSIGNED),
5798	sgn: UNSIGNED)))
5799	{
5800	/ No wrap, it is safe to use preferred type after niter + 1. /
5801	if (wi::ltu_p (x: iterations_max,
5802	y: widest_int::from (x: wi::max_value (prec, UNSIGNED),
5803	sgn: UNSIGNED)))
5804	{
5805	/ This could help to optimize "-1 +1" pair when niter looks*
5806	like "n-1": n is in original mode. "base = (n - 1) + 1"
5807	in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. /*
5808	base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5809	build_int_cst (ntype, `1`));
5810	base = fold_convert (pref_type, base);
5811	}
5812
5813	/ To avoid wrap, convert niter to preferred type before plus 1. /
5814	else
5815	{
5816	niter = fold_convert (pref_type, niter);
5817	base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5818	build_int_cst (pref_type, `1`));
5819	}
5820	}
5821	else
5822	base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5823	build_int_cst (ntype, `1`));
5824	return base;
5825	}
5826
5827	/ Add one doloop dedicated IV candidate:*
5828	- Base is (may_be_zero ? 1 : (niter + 1)).
5829	- Step is -1. /*
5830
5831	static void
5832	add_iv_candidate_for_doloop (struct ivopts_data *data)
5833	{
5834	tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5835	gcc_assert (niter_desc && niter_desc->assumptions);
5836
5837	tree niter = niter_desc->niter;
5838	tree ntype = TREE_TYPE (niter);
5839	gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5840
5841	tree may_be_zero = niter_desc->may_be_zero;
5842	if (may_be_zero && integer_zerop (may_be_zero))
5843	may_be_zero = NULL_TREE;
5844	if (may_be_zero)
5845	{
5846	if (COMPARISON_CLASS_P (may_be_zero))
5847	{
5848	niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5849	build_int_cst (ntype, `0`),
5850	rewrite_to_non_trapping_overflow (niter));
5851	}
5852	/ Don't try to obtain the iteration count expression when may_be_zero is*
5853	integer_nonzerop (actually iteration count is one) or else. /*
5854	else
5855	return;
5856	}
5857
5858	machine_mode mode = TYPE_MODE (ntype);
5859	machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5860
5861	tree base;
5862	if (mode != pref_mode)
5863	{
5864	base = compute_doloop_base_on_mode (preferred_mode: pref_mode, niter, iterations_max: niter_desc->max);
5865	ntype = TREE_TYPE (base);
5866	}
5867	else
5868	base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5869	build_int_cst (ntype, `1`));
5870
5871
5872	add_candidate (data, base, step: build_int_cst (ntype, -`1`), important: true, NULL, NULL, doloop: true);
5873	}
5874
5875	/ Finds the candidates for the induction variables. /
5876
5877	static void
5878	find_iv_candidates (struct ivopts_data *data)
5879	{
5880	/ Add commonly used ivs. /
5881	add_standard_iv_candidates (data);
5882
5883	/ Add doloop dedicated ivs. /
5884	if (data->doloop_use_p)
5885	add_iv_candidate_for_doloop (data);
5886
5887	/ Add old induction variables. /
5888	add_iv_candidate_for_bivs (data);
5889
5890	/ Add induction variables derived from uses. /
5891	add_iv_candidate_for_groups (data);
5892
5893	set_autoinc_for_original_candidates (data);
5894
5895	/ Record the important candidates. /
5896	record_important_candidates (data);
5897
5898	/ Relate compare iv_use with all candidates. /
5899	if (!data->consider_all_candidates)
5900	relate_compare_use_with_all_cands (data);
5901
5902	if (dump_file && (dump_flags & TDF_DETAILS))
5903	{
5904	unsigned i;
5905
5906	fprintf (stream: dump_file, format: "\n<Important Candidates>:\t");
5907	for (i = `0`; i < data->vcands.length (); i++)
5908	if (data->vcands [i]->important)
5909	fprintf (stream: dump_file, format: " %d,", data->vcands [i]->id);
5910	fprintf (stream: dump_file, format: "\n");
5911
5912	fprintf (stream: dump_file, format: "\n<Group, Cand> Related:\n");
5913	for (i = `0`; i < data->vgroups.length (); i++)
5914	{
5915	struct iv_group *group = data->vgroups [i];
5916
5917	if (group->related_cands)
5918	{
5919	fprintf (stream: dump_file, format: " Group %d:\t", group->id);
5920	dump_bitmap (file: dump_file, map: group->related_cands);
5921	}
5922	}
5923	fprintf (stream: dump_file, format: "\n");
5924	}
5925	}
5926
5927	/ Determines costs of computing use of iv with an iv candidate. /
5928
5929	static void
5930	determine_group_iv_costs (struct ivopts_data *data)
5931	{
5932	unsigned i, j;
5933	struct iv_cand *cand;
5934	struct iv_group *group;
5935	bitmap to_clear = BITMAP_ALLOC (NULL);
5936
5937	alloc_use_cost_map (data);
5938
5939	for (i = `0`; i < data->vgroups.length (); i++)
5940	{
5941	group = data->vgroups [i];
5942
5943	if (data->consider_all_candidates)
5944	{
5945	for (j = `0`; j < data->vcands.length (); j++)
5946	{
5947	cand = data->vcands [j];
5948	determine_group_iv_cost (data, group, cand);
5949	}
5950	}
5951	else
5952	{
5953	bitmap_iterator bi;
5954
5955	EXECUTE_IF_SET_IN_BITMAP (group->related_cands, `0`, j, bi)
5956	{
5957	cand = data->vcands [j];
5958	if (!determine_group_iv_cost (data, group, cand))
5959	bitmap_set_bit (to_clear, j);
5960	}
5961
5962	/ Remove the candidates for that the cost is infinite from*
5963	the list of related candidates. /*
5964	bitmap_and_compl_into (group->related_cands, to_clear);
5965	bitmap_clear (to_clear);
5966	}
5967	}
5968
5969	BITMAP_FREE (to_clear);
5970
5971	if (dump_file && (dump_flags & TDF_DETAILS))
5972	{
5973	bitmap_iterator bi;
5974
5975	/ Dump invariant variables. /
5976	fprintf (stream: dump_file, format: "\n<Invariant Vars>:\n");
5977	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
5978	{
5979	struct version_info *info = ver_info (data, ver: i);
5980	if (info->inv_id)
5981	{
5982	fprintf (stream: dump_file, format: "Inv %d:\t", info->inv_id);
5983	print_generic_expr (dump_file, info->name, TDF_SLIM);
5984	fprintf (stream: dump_file, format: "%s\n",
5985	info->has_nonlin_use ? "" : "\t(eliminable)");
5986	}
5987	}
5988
5989	/ Dump invariant expressions. /
5990	fprintf (stream: dump_file, format: "\n<Invariant Expressions>:\n");
5991	auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5992
5993	for (hash_table<iv_inv_expr_hasher>::iterator it
5994	= data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5995	++it)
5996	list.safe_push (obj: *it);
5997
5998	list.qsort (sort_iv_inv_expr_ent);
5999
6000	for (i = `0`; i < list.length (); ++i)
6001	{
6002	fprintf (stream: dump_file, format: "inv_expr %d: \t", list [i]->id);
6003	print_generic_expr (dump_file, list [i]->expr, TDF_SLIM);
6004	fprintf (stream: dump_file, format: "\n");
6005	}
6006
6007	fprintf (stream: dump_file, format: "\n<Group-candidate Costs>:\n");
6008
6009	for (i = `0`; i < data->vgroups.length (); i++)
6010	{
6011	group = data->vgroups [i];
6012
6013	fprintf (stream: dump_file, format: "Group %d:\n", i);
6014	fprintf (stream: dump_file, format: " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
6015	for (j = `0`; j < group->n_map_members; j++)
6016	{
6017	if (!group->cost_map[j].cand
6018	\|\| group->cost_map[j].cost.infinite_cost_p ())
6019	continue;
6020
6021	fprintf (stream: dump_file, format: " %d\t%" PRId64 "\t%d\t",
6022	group->cost_map[j].cand->id,
6023	group->cost_map[j].cost.cost,
6024	group->cost_map[j].cost.complexity);
6025	if (!group->cost_map[j].inv_exprs
6026	\|\| bitmap_empty_p (map: group->cost_map[j].inv_exprs))
6027	fprintf (stream: dump_file, format: "NIL;\t");
6028	else
6029	bitmap_print (dump_file,
6030	group->cost_map[j].inv_exprs, "", ";\t");
6031	if (!group->cost_map[j].inv_vars
6032	\|\| bitmap_empty_p (map: group->cost_map[j].inv_vars))
6033	fprintf (stream: dump_file, format: "NIL;\n");
6034	else
6035	bitmap_print (dump_file,
6036	group->cost_map[j].inv_vars, "", "\n");
6037	}
6038
6039	fprintf (stream: dump_file, format: "\n");
6040	}
6041	fprintf (stream: dump_file, format: "\n");
6042	}
6043	}
6044
6045	/ Determines cost of the candidate CAND. /
6046
6047	static void
6048	determine_iv_cost (struct ivopts_data data, struct* iv_cand *cand)
6049	{
6050	comp_cost cost_base;
6051	int64_t cost, cost_step;
6052	tree base;
6053
6054	gcc_assert (cand->iv != NULL);
6055
6056	/ There are two costs associated with the candidate -- its increment*
6057	and its initialization. The second is almost negligible for any loop
6058	that rolls enough, so we take it just very little into account. /*
6059
6060	base = cand->iv->base;
6061	cost_base = force_var_cost (data, expr: base, NULL);
6062	/ It will be exceptional that the iv register happens to be initialized with*
6063	the proper value at no cost. In general, there will at least be a regcopy
6064	or a const set. /*
6065	if (cost_base.cost == `0`)
6066	cost_base.cost = COSTS_N_INSNS (`1`);
6067	/ Doloop decrement should be considered as zero cost. /
6068	if (cand->doloop_p)
6069	cost_step = `0`;
6070	else
6071	cost_step = add_cost (speed: data->speed, TYPE_MODE (TREE_TYPE (base)));
6072	cost = cost_step + adjust_setup_cost (data, cost: cost_base.cost);
6073
6074	/ Prefer the original ivs unless we may gain something by replacing it.*
6075	The reason is to make debugging simpler; so this is not relevant for
6076	artificial ivs created by other optimization passes. /*
6077	if ((cand->pos != IP_ORIGINAL
6078	\|\| !SSA_NAME_VAR (cand->var_before)
6079	\|\| DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6080	/ Prefer doloop as well. /
6081	&& !cand->doloop_p)
6082	cost++;
6083
6084	/ Prefer not to insert statements into latch unless there are some*
6085	already (so that we do not create unnecessary jumps). /*
6086	if (cand->pos == IP_END
6087	&& empty_block_p (ip_end_pos (data->current_loop)))
6088	cost++;
6089
6090	cand->cost = cost;
6091	cand->cost_step = cost_step;
6092	}
6093
6094	/ Determines costs of computation of the candidates. /
6095
6096	static void
6097	determine_iv_costs (struct ivopts_data *data)
6098	{
6099	unsigned i;
6100
6101	if (dump_file && (dump_flags & TDF_DETAILS))
6102	{
6103	fprintf (stream: dump_file, format: "<Candidate Costs>:\n");
6104	fprintf (stream: dump_file, format: " cand\tcost\n");
6105	}
6106
6107	for (i = `0`; i < data->vcands.length (); i++)
6108	{
6109	struct iv_cand *cand = data->vcands [i];
6110
6111	determine_iv_cost (data, cand);
6112
6113	if (dump_file && (dump_flags & TDF_DETAILS))
6114	fprintf (stream: dump_file, format: " %d\t%d\n", i, cand->cost);
6115	}
6116
6117	if (dump_file && (dump_flags & TDF_DETAILS))
6118	fprintf (stream: dump_file, format: "\n");
6119	}
6120
6121	/ Estimate register pressure for loop having N_INVS invariants and N_CANDS*
6122	induction variables. Note N_INVS includes both invariant variables and
6123	invariant expressions. /*
6124
6125	static unsigned
6126	ivopts_estimate_reg_pressure (struct ivopts_data data, unsigned* n_invs,
6127	unsigned n_cands)
6128	{
6129	unsigned cost;
6130	unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6131	unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6132	bool speed = data->speed;
6133
6134	/ If there is a call in the loop body, the call-clobbered registers*
6135	are not available for loop invariants. /*
6136	if (data->body_includes_call)
6137	available_regs = available_regs - target_clobbered_regs;
6138
6139	/ If we have enough registers. /
6140	if (regs_needed + target_res_regs < available_regs)
6141	cost = n_new;
6142	/ If close to running out of registers, try to preserve them. /
6143	else if (regs_needed <= available_regs)
6144	cost = target_reg_cost [speed] * regs_needed;
6145	/ If we run out of available registers but the number of candidates*
6146	does not, we penalize extra registers using target_spill_cost. /*
6147	else if (n_cands <= available_regs)
6148	cost = target_reg_cost [speed] * available_regs
6149	+ target_spill_cost [speed] * (regs_needed - available_regs);
6150	/ If the number of candidates runs out available registers, we penalize*
6151	extra candidate registers using target_spill_cost 2. Because it is*
6152	more expensive to spill induction variable than invariant. /*
6153	else
6154	cost = target_reg_cost [speed] * available_regs
6155	+ target_spill_cost [speed] * (n_cands - available_regs) * `2`
6156	+ target_spill_cost [speed] * (regs_needed - n_cands);
6157
6158	/ Finally, add the number of candidates, so that we prefer eliminating*
6159	induction variables if possible. /*
6160	return cost + n_cands;
6161	}
6162
6163	/ For each size of the induction variable set determine the penalty. /
6164
6165	static void
6166	determine_set_costs (struct ivopts_data *data)
6167	{
6168	unsigned j, n;
6169	gphi *phi;
6170	gphi_iterator psi;
6171	tree op;
6172	class loop *loop = data->current_loop;
6173	bitmap_iterator bi;
6174
6175	if (dump_file && (dump_flags & TDF_DETAILS))
6176	{
6177	fprintf (stream: dump_file, format: "<Global Costs>:\n");
6178	fprintf (stream: dump_file, format: " target_avail_regs %d\n", target_avail_regs);
6179	fprintf (stream: dump_file, format: " target_clobbered_regs %d\n", target_clobbered_regs);
6180	fprintf (stream: dump_file, format: " target_reg_cost %d\n", target_reg_cost[data->speed]);
6181	fprintf (stream: dump_file, format: " target_spill_cost %d\n", target_spill_cost[data->speed]);
6182	}
6183
6184	n = `0`;
6185	for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
6186	{
6187	phi = psi.phi ();
6188	op = PHI_RESULT (phi);
6189
6190	if (virtual_operand_p (op))
6191	continue;
6192
6193	if (get_iv (data, var: op))
6194	continue;
6195
6196	if (!POINTER_TYPE_P (TREE_TYPE (op))
6197	&& !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6198	continue;
6199
6200	n++;
6201	}
6202
6203	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, j, bi)
6204	{
6205	struct version_info *info = ver_info (data, ver: j);
6206
6207	if (info->inv_id && info->has_nonlin_use)
6208	n++;
6209	}
6210
6211	data->regs_used = n;
6212	if (dump_file && (dump_flags & TDF_DETAILS))
6213	fprintf (stream: dump_file, format: " regs_used %d\n", n);
6214
6215	if (dump_file && (dump_flags & TDF_DETAILS))
6216	{
6217	fprintf (stream: dump_file, format: " cost for size:\n");
6218	fprintf (stream: dump_file, format: " ivs\tcost\n");
6219	for (j = `0`; j <= `2` * target_avail_regs; j++)
6220	fprintf (stream: dump_file, format: " %d\t%d\n", j,
6221	ivopts_estimate_reg_pressure (data, n_invs: `0`, n_cands: j));
6222	fprintf (stream: dump_file, format: "\n");
6223	}
6224	}
6225
6226	/ Returns true if A is a cheaper cost pair than B. /
6227
6228	static bool
6229	cheaper_cost_pair (class cost_pair a, class* cost_pair *b)
6230	{
6231	if (!a)
6232	return false;
6233
6234	if (!b)
6235	return true;
6236
6237	if (a->cost < b->cost)
6238	return true;
6239
6240	if (b->cost < a->cost)
6241	return false;
6242
6243	/ In case the costs are the same, prefer the cheaper candidate. /
6244	if (a->cand->cost < b->cand->cost)
6245	return true;
6246
6247	return false;
6248	}
6249
6250	/ Compare if A is a more expensive cost pair than B. Return 1, 0 and -1*
6251	for more expensive, equal and cheaper respectively. /*
6252
6253	static int
6254	compare_cost_pair (class cost_pair a, class* cost_pair *b)
6255	{
6256	if (cheaper_cost_pair (a, b))
6257	return -`1`;
6258	if (cheaper_cost_pair (a: b, b: a))
6259	return `1`;
6260
6261	return `0`;
6262	}
6263
6264	/ Returns candidate by that USE is expressed in IVS. /
6265
6266	static class cost_pair *
6267	iv_ca_cand_for_group (class iv_ca ivs, struct* iv_group *group)
6268	{
6269	return ivs->cand_for_group[group->id];
6270	}
6271
6272	/ Computes the cost field of IVS structure. /
6273
6274	static void
6275	iv_ca_recount_cost (struct ivopts_data data, class* iv_ca *ivs)
6276	{
6277	comp_cost cost = ivs->cand_use_cost;
6278
6279	cost += ivs->cand_cost;
6280	cost += ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands);
6281	ivs->cost = cost;
6282	}
6283
6284	/ Remove use of invariants in set INVS by decreasing counter in N_INV_USES*
6285	and IVS. /*
6286
6287	static void
6288	iv_ca_set_remove_invs (class iv_ca ivs, bitmap invs, unsigned* *n_inv_uses)
6289	{
6290	bitmap_iterator bi;
6291	unsigned iid;
6292
6293	if (!invs)
6294	return;
6295
6296	gcc_assert (n_inv_uses != NULL);
6297	EXECUTE_IF_SET_IN_BITMAP (invs, `0`, iid, bi)
6298	{
6299	n_inv_uses[iid]--;
6300	if (n_inv_uses[iid] == `0`)
6301	ivs->n_invs--;
6302	}
6303	}
6304
6305	/ Set USE not to be expressed by any candidate in IVS. /
6306
6307	static void
6308	iv_ca_set_no_cp (struct ivopts_data data, class* iv_ca *ivs,
6309	struct iv_group *group)
6310	{
6311	unsigned gid = group->id, cid;
6312	class cost_pair *cp;
6313
6314	cp = ivs->cand_for_group[gid];
6315	if (!cp)
6316	return;
6317	cid = cp->cand->id;
6318
6319	ivs->bad_groups++;
6320	ivs->cand_for_group[gid] = NULL;
6321	ivs->n_cand_uses[cid]--;
6322
6323	if (ivs->n_cand_uses[cid] == `0`)
6324	{
6325	bitmap_clear_bit (ivs->cands, cid);
6326	if (!cp->cand->doloop_p \|\| !targetm.have_count_reg_decr_p)
6327	ivs->n_cands--;
6328	ivs->cand_cost -= cp->cand->cost;
6329	iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6330	iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6331	}
6332
6333	ivs->cand_use_cost -= cp->cost;
6334	iv_ca_set_remove_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6335	iv_ca_set_remove_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6336	iv_ca_recount_cost (data, ivs);
6337	}
6338
6339	/ Add use of invariants in set INVS by increasing counter in N_INV_USES and*
6340	IVS. /*
6341
6342	static void
6343	iv_ca_set_add_invs (class iv_ca ivs, bitmap invs, unsigned* *n_inv_uses)
6344	{
6345	bitmap_iterator bi;
6346	unsigned iid;
6347
6348	if (!invs)
6349	return;
6350
6351	gcc_assert (n_inv_uses != NULL);
6352	EXECUTE_IF_SET_IN_BITMAP (invs, `0`, iid, bi)
6353	{
6354	n_inv_uses[iid]++;
6355	if (n_inv_uses[iid] == `1`)
6356	ivs->n_invs++;
6357	}
6358	}
6359
6360	/ Set cost pair for GROUP in set IVS to CP. /
6361
6362	static void
6363	iv_ca_set_cp (struct ivopts_data data, class* iv_ca *ivs,
6364	struct iv_group group, class* cost_pair *cp)
6365	{
6366	unsigned gid = group->id, cid;
6367
6368	if (ivs->cand_for_group[gid] == cp)
6369	return;
6370
6371	if (ivs->cand_for_group[gid])
6372	iv_ca_set_no_cp (data, ivs, group);
6373
6374	if (cp)
6375	{
6376	cid = cp->cand->id;
6377
6378	ivs->bad_groups--;
6379	ivs->cand_for_group[gid] = cp;
6380	ivs->n_cand_uses[cid]++;
6381	if (ivs->n_cand_uses[cid] == `1`)
6382	{
6383	bitmap_set_bit (ivs->cands, cid);
6384	if (!cp->cand->doloop_p \|\| !targetm.have_count_reg_decr_p)
6385	ivs->n_cands++;
6386	ivs->cand_cost += cp->cand->cost;
6387	iv_ca_set_add_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6388	iv_ca_set_add_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6389	}
6390
6391	ivs->cand_use_cost += cp->cost;
6392	iv_ca_set_add_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6393	iv_ca_set_add_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6394	iv_ca_recount_cost (data, ivs);
6395	}
6396	}
6397
6398	/ Extend set IVS by expressing USE by some of the candidates in it*
6399	if possible. Consider all important candidates if candidates in
6400	set IVS don't give any result. /*
6401
6402	static void
6403	iv_ca_add_group (struct ivopts_data data, class* iv_ca *ivs,
6404	struct iv_group *group)
6405	{
6406	class cost_pair best_cp = NULL, cp;
6407	bitmap_iterator bi;
6408	unsigned i;
6409	struct iv_cand *cand;
6410
6411	gcc_assert (ivs->upto >= group->id);
6412	ivs->upto++;
6413	ivs->bad_groups++;
6414
6415	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, i, bi)
6416	{
6417	cand = data->vcands [i];
6418	cp = get_group_iv_cost (data, group, cand);
6419	if (cheaper_cost_pair (a: cp, b: best_cp))
6420	best_cp = cp;
6421	}
6422
6423	if (best_cp == NULL)
6424	{
6425	EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, `0`, i, bi)
6426	{
6427	cand = data->vcands [i];
6428	cp = get_group_iv_cost (data, group, cand);
6429	if (cheaper_cost_pair (a: cp, b: best_cp))
6430	best_cp = cp;
6431	}
6432	}
6433
6434	iv_ca_set_cp (data, ivs, group, cp: best_cp);
6435	}
6436
6437	/ Get cost for assignment IVS. /
6438
6439	static comp_cost
6440	iv_ca_cost (class iv_ca *ivs)
6441	{
6442	/ This was a conditional expression but it triggered a bug in*
6443	Sun C 5.5. /*
6444	if (ivs->bad_groups)
6445	return infinite_cost;
6446	else
6447	return ivs->cost;
6448	}
6449
6450	/ Compare if applying NEW_CP to GROUP for IVS introduces more invariants*
6451	than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6452	respectively. /*
6453
6454	static int
6455	iv_ca_compare_deps (struct ivopts_data data, class* iv_ca *ivs,
6456	struct iv_group group, class* cost_pair *old_cp,
6457	class cost_pair *new_cp)
6458	{
6459	gcc_assert (old_cp && new_cp && old_cp != new_cp);
6460	unsigned old_n_invs = ivs->n_invs;
6461	iv_ca_set_cp (data, ivs, group, cp: new_cp);
6462	unsigned new_n_invs = ivs->n_invs;
6463	iv_ca_set_cp (data, ivs, group, cp: old_cp);
6464
6465	return new_n_invs > old_n_invs ? `1` : (new_n_invs < old_n_invs ? -`1` : `0`);
6466	}
6467
6468	/ Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains*
6469	it before NEXT. /*
6470
6471	static struct iv_ca_delta *
6472	iv_ca_delta_add (struct iv_group group, class* cost_pair *old_cp,
6473	class cost_pair new_cp, struct* iv_ca_delta *next)
6474	{
6475	struct iv_ca_delta change = XNEW (struct* iv_ca_delta);
6476
6477	change->group = group;
6478	change->old_cp = old_cp;
6479	change->new_cp = new_cp;
6480	change->next = next;
6481
6482	return change;
6483	}
6484
6485	/ Joins two lists of changes L1 and L2. Destructive -- old lists*
6486	are rewritten. /*
6487
6488	static struct iv_ca_delta *
6489	iv_ca_delta_join (struct iv_ca_delta l1, struct* iv_ca_delta *l2)
6490	{
6491	struct iv_ca_delta *last;
6492
6493	if (!l2)
6494	return l1;
6495
6496	if (!l1)
6497	return l2;
6498
6499	for (last = l1; last->next; last = last->next)
6500	continue;
6501	last->next = l2;
6502
6503	return l1;
6504	}
6505
6506	/ Reverse the list of changes DELTA, forming the inverse to it. /
6507
6508	static struct iv_ca_delta *
6509	iv_ca_delta_reverse (struct iv_ca_delta *delta)
6510	{
6511	struct iv_ca_delta act, next, *prev = NULL;
6512
6513	for (act = delta; act; act = next)
6514	{
6515	next = act->next;
6516	act->next = prev;
6517	prev = act;
6518
6519	std::swap (a&: act->old_cp, b&: act->new_cp);
6520	}
6521
6522	return prev;
6523	}
6524
6525	/ Commit changes in DELTA to IVS. If FORWARD is false, the changes are*
6526	reverted instead. /*
6527
6528	static void
6529	iv_ca_delta_commit (struct ivopts_data data, class* iv_ca *ivs,
6530	struct iv_ca_delta delta, bool* forward)
6531	{
6532	class cost_pair from, to;
6533	struct iv_ca_delta *act;
6534
6535	if (!forward)
6536	delta = iv_ca_delta_reverse (delta);
6537
6538	for (act = delta; act; act = act->next)
6539	{
6540	from = act->old_cp;
6541	to = act->new_cp;
6542	gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6543	iv_ca_set_cp (data, ivs, group: act->group, cp: to);
6544	}
6545
6546	if (!forward)
6547	iv_ca_delta_reverse (delta);
6548	}
6549
6550	/ Returns true if CAND is used in IVS. /
6551
6552	static bool
6553	iv_ca_cand_used_p (class iv_ca ivs, struct* iv_cand *cand)
6554	{
6555	return ivs->n_cand_uses[cand->id] > `0`;
6556	}
6557
6558	/ Returns number of induction variable candidates in the set IVS. /
6559
6560	static unsigned
6561	iv_ca_n_cands (class iv_ca *ivs)
6562	{
6563	return ivs->n_cands;
6564	}
6565
6566	/ Free the list of changes DELTA. /
6567
6568	static void
6569	iv_ca_delta_free (struct iv_ca_delta **delta)
6570	{
6571	struct iv_ca_delta act, next;
6572
6573	for (act = *delta; act; act = next)
6574	{
6575	next = act->next;
6576	free (ptr: act);
6577	}
6578
6579	*delta = NULL;
6580	}
6581
6582	/ Allocates new iv candidates assignment. /
6583
6584	static class iv_ca *
6585	iv_ca_new (struct ivopts_data *data)
6586	{
6587	class iv_ca nw = XNEW (class* iv_ca);
6588
6589	nw->upto = `0`;
6590	nw->bad_groups = `0`;
6591	nw->cand_for_group = XCNEWVEC (class cost_pair *,
6592	data->vgroups.length ());
6593	nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6594	nw->cands = BITMAP_ALLOC (NULL);
6595	nw->n_cands = `0`;
6596	nw->n_invs = `0`;
6597	nw->cand_use_cost = no_cost;
6598	nw->cand_cost = `0`;
6599	nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + `1`);
6600	nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + `1`);
6601	nw->cost = no_cost;
6602
6603	return nw;
6604	}
6605
6606	/ Free memory occupied by the set IVS. /
6607
6608	static void
6609	iv_ca_free (class iv_ca **ivs)
6610	{
6611	free (ptr: (*ivs)->cand_for_group);
6612	free (ptr: (*ivs)->n_cand_uses);
6613	BITMAP_FREE ((*ivs)->cands);
6614	free (ptr: (*ivs)->n_inv_var_uses);
6615	free (ptr: (*ivs)->n_inv_expr_uses);
6616	free (ptr: *ivs);
6617	*ivs = NULL;
6618	}
6619
6620	/ Dumps IVS to FILE. /
6621
6622	static void
6623	iv_ca_dump (struct ivopts_data data, FILE file, class iv_ca *ivs)
6624	{
6625	unsigned i;
6626	comp_cost cost = iv_ca_cost (ivs);
6627
6628	fprintf (stream: file, format: " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6629	cost.complexity);
6630	fprintf (stream: file, format: " reg_cost: %d\n",
6631	ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands));
6632	fprintf (stream: file, format: " cand_cost: %" PRId64 "\n cand_group_cost: "
6633	"%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6634	ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6635	bitmap_print (file, ivs->cands, " candidates: ","\n");
6636
6637	for (i = `0`; i < ivs->upto; i++)
6638	{
6639	struct iv_group *group = data->vgroups [i];
6640	class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6641	if (cp)
6642	fprintf (stream: file, format: " group:%d --> iv_cand:%d, cost=("
6643	"%" PRId64 ",%d)\n", group->id, cp->cand->id,
6644	cp->cost.cost, cp->cost.complexity);
6645	else
6646	fprintf (stream: file, format: " group:%d --> ??\n", group->id);
6647	}
6648
6649	const char *pref = "";
6650	fprintf (stream: file, format: " invariant variables: ");
6651	for (i = `1`; i <= data->max_inv_var_id; i++)
6652	if (ivs->n_inv_var_uses[i])
6653	{
6654	fprintf (stream: file, format: "%s%d", pref, i);
6655	pref = ", ";
6656	}
6657
6658	pref = "";
6659	fprintf (stream: file, format: "\n invariant expressions: ");
6660	for (i = `1`; i <= data->max_inv_expr_id; i++)
6661	if (ivs->n_inv_expr_uses[i])
6662	{
6663	fprintf (stream: file, format: "%s%d", pref, i);
6664	pref = ", ";
6665	}
6666
6667	fprintf (stream: file, format: "\n\n");
6668	}
6669
6670	/ Try changing candidate in IVS to CAND for each use. Return cost of the*
6671	new set, and store differences in DELTA. Number of induction variables
6672	in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6673	the function will try to find a solution with mimimal iv candidates. /*
6674
6675	static comp_cost
6676	iv_ca_extend (struct ivopts_data data, class* iv_ca *ivs,
6677	struct iv_cand cand, struct* iv_ca_delta **delta,
6678	unsigned n_ivs, bool* min_ncand)
6679	{
6680	unsigned i;
6681	comp_cost cost;
6682	struct iv_group *group;
6683	class cost_pair old_cp, new_cp;
6684
6685	*delta = NULL;
6686	for (i = `0`; i < ivs->upto; i++)
6687	{
6688	group = data->vgroups [i];
6689	old_cp = iv_ca_cand_for_group (ivs, group);
6690
6691	if (old_cp
6692	&& old_cp->cand == cand)
6693	continue;
6694
6695	new_cp = get_group_iv_cost (data, group, cand);
6696	if (!new_cp)
6697	continue;
6698
6699	if (!min_ncand)
6700	{
6701	int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6702	/ Skip if new_cp depends on more invariants. /
6703	if (cmp_invs > `0`)
6704	continue;
6705
6706	int cmp_cost = compare_cost_pair (a: new_cp, b: old_cp);
6707	/ Skip if new_cp is not cheaper. /
6708	if (cmp_cost > `0` \|\| (cmp_cost == `0` && cmp_invs == `0`))
6709	continue;
6710	}
6711
6712	delta = iv_ca_delta_add (group, old_cp, new_cp, next: delta);
6713	}
6714
6715	iv_ca_delta_commit (data, ivs, delta: delta, forward: true*);
6716	cost = iv_ca_cost (ivs);
6717	if (n_ivs)
6718	*n_ivs = iv_ca_n_cands (ivs);
6719	iv_ca_delta_commit (data, ivs, delta: delta, forward: false*);
6720
6721	return cost;
6722	}
6723
6724	/ Try narrowing set IVS by removing CAND. Return the cost of*
6725	the new set and store the differences in DELTA. START is
6726	the candidate with which we start narrowing. /*
6727
6728	static comp_cost
6729	iv_ca_narrow (struct ivopts_data data, class* iv_ca *ivs,
6730	struct iv_cand cand, struct* iv_cand *start,
6731	struct iv_ca_delta **delta)
6732	{
6733	unsigned i, ci;
6734	struct iv_group *group;
6735	class cost_pair old_cp, new_cp, *cp;
6736	bitmap_iterator bi;
6737	struct iv_cand *cnd;
6738	comp_cost cost, best_cost, acost;
6739
6740	*delta = NULL;
6741	for (i = `0`; i < data->vgroups.length (); i++)
6742	{
6743	group = data->vgroups [i];
6744
6745	old_cp = iv_ca_cand_for_group (ivs, group);
6746	if (old_cp->cand != cand)
6747	continue;
6748
6749	best_cost = iv_ca_cost (ivs);
6750	/ Start narrowing with START. /
6751	new_cp = get_group_iv_cost (data, group, cand: start);
6752
6753	if (data->consider_all_candidates)
6754	{
6755	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, ci, bi)
6756	{
6757	if (ci == cand->id \|\| (start && ci == start->id))
6758	continue;
6759
6760	cnd = data->vcands [ci];
6761
6762	cp = get_group_iv_cost (data, group, cand: cnd);
6763	if (!cp)
6764	continue;
6765
6766	iv_ca_set_cp (data, ivs, group, cp);
6767	acost = iv_ca_cost (ivs);
6768
6769	if (acost < best_cost)
6770	{
6771	best_cost = acost;
6772	new_cp = cp;
6773	}
6774	}
6775	}
6776	else
6777	{
6778	EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, `0`, ci, bi)
6779	{
6780	if (ci == cand->id \|\| (start && ci == start->id))
6781	continue;
6782
6783	cnd = data->vcands [ci];
6784
6785	cp = get_group_iv_cost (data, group, cand: cnd);
6786	if (!cp)
6787	continue;
6788
6789	iv_ca_set_cp (data, ivs, group, cp);
6790	acost = iv_ca_cost (ivs);
6791
6792	if (acost < best_cost)
6793	{
6794	best_cost = acost;
6795	new_cp = cp;
6796	}
6797	}
6798	}
6799	/ Restore to old cp for use. /
6800	iv_ca_set_cp (data, ivs, group, cp: old_cp);
6801
6802	if (!new_cp)
6803	{
6804	iv_ca_delta_free (delta);
6805	return infinite_cost;
6806	}
6807
6808	delta = iv_ca_delta_add (group, old_cp, new_cp, next: delta);
6809	}
6810
6811	iv_ca_delta_commit (data, ivs, delta: delta, forward: true*);
6812	cost = iv_ca_cost (ivs);
6813	iv_ca_delta_commit (data, ivs, delta: delta, forward: false*);
6814
6815	return cost;
6816	}
6817
6818	/ Try optimizing the set of candidates IVS by removing candidates different*
6819	from to EXCEPT_CAND from it. Return cost of the new set, and store
6820	differences in DELTA. /*
6821
6822	static comp_cost
6823	iv_ca_prune (struct ivopts_data data, class* iv_ca *ivs,
6824	struct iv_cand except_cand, struct* iv_ca_delta **delta)
6825	{
6826	bitmap_iterator bi;
6827	struct iv_ca_delta act_delta, best_delta;
6828	unsigned i;
6829	comp_cost best_cost, acost;
6830	struct iv_cand *cand;
6831
6832	best_delta = NULL;
6833	best_cost = iv_ca_cost (ivs);
6834
6835	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, i, bi)
6836	{
6837	cand = data->vcands [i];
6838
6839	if (cand == except_cand)
6840	continue;
6841
6842	acost = iv_ca_narrow (data, ivs, cand, start: except_cand, delta: &act_delta);
6843
6844	if (acost < best_cost)
6845	{
6846	best_cost = acost;
6847	iv_ca_delta_free (delta: &best_delta);
6848	best_delta = act_delta;
6849	}
6850	else
6851	iv_ca_delta_free (delta: &act_delta);
6852	}
6853
6854	if (!best_delta)
6855	{
6856	*delta = NULL;
6857	return best_cost;
6858	}
6859
6860	/ Recurse to possibly remove other unnecessary ivs. /
6861	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
6862	best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6863	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: false);
6864	delta = iv_ca_delta_join (l1: best_delta, l2: delta);
6865	return best_cost;
6866	}
6867
6868	/ Check if CAND_IDX is a candidate other than OLD_CAND and has*
6869	cheaper local cost for GROUP than BEST_CP. Return pointer to
6870	the corresponding cost_pair, otherwise just return BEST_CP. /*
6871
6872	static class cost_pair*
6873	cheaper_cost_with_cand (struct ivopts_data data, struct* iv_group *group,
6874	unsigned int cand_idx, struct iv_cand *old_cand,
6875	class cost_pair *best_cp)
6876	{
6877	struct iv_cand *cand;
6878	class cost_pair *cp;
6879
6880	gcc_assert (old_cand != NULL && best_cp != NULL);
6881	if (cand_idx == old_cand->id)
6882	return best_cp;
6883
6884	cand = data->vcands [cand_idx];
6885	cp = get_group_iv_cost (data, group, cand);
6886	if (cp != NULL && cheaper_cost_pair (a: cp, b: best_cp))
6887	return cp;
6888
6889	return best_cp;
6890	}
6891
6892	/ Try breaking local optimal fixed-point for IVS by replacing candidates*
6893	which are used by more than one iv uses. For each of those candidates,
6894	this function tries to represent iv uses under that candidate using
6895	other ones with lower local cost, then tries to prune the new set.
6896	If the new set has lower cost, It returns the new cost after recording
6897	candidate replacement in list DELTA. /*
6898
6899	static comp_cost
6900	iv_ca_replace (struct ivopts_data data, class* iv_ca *ivs,
6901	struct iv_ca_delta **delta)
6902	{
6903	bitmap_iterator bi, bj;
6904	unsigned int i, j, k;
6905	struct iv_cand *cand;
6906	comp_cost orig_cost, acost;
6907	struct iv_ca_delta act_delta, tmp_delta;
6908	class cost_pair old_cp, best_cp = NULL;
6909
6910	*delta = NULL;
6911	orig_cost = iv_ca_cost (ivs);
6912
6913	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, i, bi)
6914	{
6915	if (ivs->n_cand_uses[i] == `1`
6916	\|\| ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6917	continue;
6918
6919	cand = data->vcands [i];
6920
6921	act_delta = NULL;
6922	/ Represent uses under current candidate using other ones with*
6923	lower local cost. /*
6924	for (j = `0`; j < ivs->upto; j++)
6925	{
6926	struct iv_group *group = data->vgroups [j];
6927	old_cp = iv_ca_cand_for_group (ivs, group);
6928
6929	if (old_cp->cand != cand)
6930	continue;
6931
6932	best_cp = old_cp;
6933	if (data->consider_all_candidates)
6934	for (k = `0`; k < data->vcands.length (); k++)
6935	best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6936	old_cand: old_cp->cand, best_cp);
6937	else
6938	EXECUTE_IF_SET_IN_BITMAP (group->related_cands, `0`, k, bj)
6939	best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6940	old_cand: old_cp->cand, best_cp);
6941
6942	if (best_cp == old_cp)
6943	continue;
6944
6945	act_delta = iv_ca_delta_add (group, old_cp, new_cp: best_cp, next: act_delta);
6946	}
6947	/ No need for further prune. /
6948	if (!act_delta)
6949	continue;
6950
6951	/ Prune the new candidate set. /
6952	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
6953	acost = iv_ca_prune (data, ivs, NULL, delta: &tmp_delta);
6954	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
6955	act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
6956
6957	if (acost < orig_cost)
6958	{
6959	*delta = act_delta;
6960	return acost;
6961	}
6962	else
6963	iv_ca_delta_free (delta: &act_delta);
6964	}
6965
6966	return orig_cost;
6967	}
6968
6969	/ Tries to extend the sets IVS in the best possible way in order to*
6970	express the GROUP. If ORIGINALP is true, prefer candidates from
6971	the original set of IVs, otherwise favor important candidates not
6972	based on any memory object. /*
6973
6974	static bool
6975	try_add_cand_for (struct ivopts_data data, class* iv_ca *ivs,
6976	struct iv_group group, bool* originalp)
6977	{
6978	comp_cost best_cost, act_cost;
6979	unsigned i;
6980	bitmap_iterator bi;
6981	struct iv_cand *cand;
6982	struct iv_ca_delta best_delta = NULL, act_delta;
6983	class cost_pair *cp;
6984
6985	iv_ca_add_group (data, ivs, group);
6986	best_cost = iv_ca_cost (ivs);
6987	cp = iv_ca_cand_for_group (ivs, group);
6988	if (cp)
6989	{
6990	best_delta = iv_ca_delta_add (group, NULL, new_cp: cp, NULL);
6991	iv_ca_set_no_cp (data, ivs, group);
6992	}
6993
6994	/ If ORIGINALP is true, try to find the original IV for the use. Otherwise*
6995	first try important candidates not based on any memory object. Only if
6996	this fails, try the specific ones. Rationale -- in loops with many
6997	variables the best choice often is to use just one generic biv. If we
6998	added here many ivs specific to the uses, the optimization algorithm later
6999	would be likely to get stuck in a local minimum, thus causing us to create
7000	too many ivs. The approach from few ivs to more seems more likely to be
7001	successful -- starting from few ivs, replacing an expensive use by a
7002	specific iv should always be a win. /*
7003	EXECUTE_IF_SET_IN_BITMAP (group->related_cands, `0`, i, bi)
7004	{
7005	cand = data->vcands [i];
7006
7007	if (originalp && cand->pos !=IP_ORIGINAL)
7008	continue;
7009
7010	if (!originalp && cand->iv->base_object != NULL_TREE)
7011	continue;
7012
7013	if (iv_ca_cand_used_p (ivs, cand))
7014	continue;
7015
7016	cp = get_group_iv_cost (data, group, cand);
7017	if (!cp)
7018	continue;
7019
7020	iv_ca_set_cp (data, ivs, group, cp);
7021	act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL,
7022	min_ncand: true);
7023	iv_ca_set_no_cp (data, ivs, group);
7024	act_delta = iv_ca_delta_add (group, NULL, new_cp: cp, next: act_delta);
7025
7026	if (act_cost < best_cost)
7027	{
7028	best_cost = act_cost;
7029
7030	iv_ca_delta_free (delta: &best_delta);
7031	best_delta = act_delta;
7032	}
7033	else
7034	iv_ca_delta_free (delta: &act_delta);
7035	}
7036
7037	if (best_cost.infinite_cost_p ())
7038	{
7039	for (i = `0`; i < group->n_map_members; i++)
7040	{
7041	cp = group->cost_map + i;
7042	cand = cp->cand;
7043	if (!cand)
7044	continue;
7045
7046	/ Already tried this. /
7047	if (cand->important)
7048	{
7049	if (originalp && cand->pos == IP_ORIGINAL)
7050	continue;
7051	if (!originalp && cand->iv->base_object == NULL_TREE)
7052	continue;
7053	}
7054
7055	if (iv_ca_cand_used_p (ivs, cand))
7056	continue;
7057
7058	act_delta = NULL;
7059	iv_ca_set_cp (data, ivs, group, cp);
7060	act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL, min_ncand: true);
7061	iv_ca_set_no_cp (data, ivs, group);
7062	act_delta = iv_ca_delta_add (group,
7063	old_cp: iv_ca_cand_for_group (ivs, group),
7064	new_cp: cp, next: act_delta);
7065
7066	if (act_cost < best_cost)
7067	{
7068	best_cost = act_cost;
7069
7070	if (best_delta)
7071	iv_ca_delta_free (delta: &best_delta);
7072	best_delta = act_delta;
7073	}
7074	else
7075	iv_ca_delta_free (delta: &act_delta);
7076	}
7077	}
7078
7079	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7080	iv_ca_delta_free (delta: &best_delta);
7081
7082	return !best_cost.infinite_cost_p ();
7083	}
7084
7085	/ Finds an initial assignment of candidates to uses. /
7086
7087	static class iv_ca *
7088	get_initial_solution (struct ivopts_data data, bool* originalp)
7089	{
7090	unsigned i;
7091	class iv_ca *ivs = iv_ca_new (data);
7092
7093	for (i = `0`; i < data->vgroups.length (); i++)
7094	if (!try_add_cand_for (data, ivs, group: data->vgroups [i], originalp))
7095	{
7096	iv_ca_free (ivs: &ivs);
7097	return NULL;
7098	}
7099
7100	return ivs;
7101	}
7102
7103	/ Tries to improve set of induction variables IVS. TRY_REPLACE_P*
7104	points to a bool variable, this function tries to break local
7105	optimal fixed-point by replacing candidates in IVS if it's true. /*
7106
7107	static bool
7108	try_improve_iv_set (struct ivopts_data *data,
7109	class iv_ca ivs, bool* *try_replace_p)
7110	{
7111	unsigned i, n_ivs;
7112	comp_cost acost, best_cost = iv_ca_cost (ivs);
7113	struct iv_ca_delta best_delta = NULL, act_delta, *tmp_delta;
7114	struct iv_cand *cand;
7115
7116	/ Try extending the set of induction variables by one. /
7117	for (i = `0`; i < data->vcands.length (); i++)
7118	{
7119	cand = data->vcands [i];
7120
7121	if (iv_ca_cand_used_p (ivs, cand))
7122	continue;
7123
7124	acost = iv_ca_extend (data, ivs, cand, delta: &act_delta, n_ivs: &n_ivs, min_ncand: false);
7125	if (!act_delta)
7126	continue;
7127
7128	/ If we successfully added the candidate and the set is small enough,*
7129	try optimizing it by removing other candidates. /*
7130	if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7131	{
7132	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
7133	acost = iv_ca_prune (data, ivs, except_cand: cand, delta: &tmp_delta);
7134	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
7135	act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
7136	}
7137
7138	if (acost < best_cost)
7139	{
7140	best_cost = acost;
7141	iv_ca_delta_free (delta: &best_delta);
7142	best_delta = act_delta;
7143	}
7144	else
7145	iv_ca_delta_free (delta: &act_delta);
7146	}
7147
7148	if (!best_delta)
7149	{
7150	/ Try removing the candidates from the set instead. /
7151	best_cost = iv_ca_prune (data, ivs, NULL, delta: &best_delta);
7152
7153	if (!best_delta && *try_replace_p)
7154	{
7155	try_replace_p = false*;
7156	/ So far candidate selecting algorithm tends to choose fewer IVs*
7157	so that it can handle cases in which loops have many variables
7158	but the best choice is often to use only one general biv. One
7159	weakness is it can't handle opposite cases, in which different
7160	candidates should be chosen with respect to each use. To solve
7161	the problem, we replace candidates in a manner described by the
7162	comments of iv_ca_replace, thus give general algorithm a chance
7163	to break local optimal fixed-point in these cases. /*
7164	best_cost = iv_ca_replace (data, ivs, delta: &best_delta);
7165	}
7166
7167	if (!best_delta)
7168	return false;
7169	}
7170
7171	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7172	iv_ca_delta_free (delta: &best_delta);
7173	return best_cost == iv_ca_cost (ivs);
7174	}
7175
7176	/ Attempts to find the optimal set of induction variables. We do simple*
7177	greedy heuristic -- we try to replace at most one candidate in the selected
7178	solution and remove the unused ivs while this improves the cost. /*
7179
7180	static class iv_ca *
7181	find_optimal_iv_set_1 (struct ivopts_data data, bool* originalp)
7182	{
7183	class iv_ca *set;
7184	bool try_replace_p = true;
7185
7186	/ Get the initial solution. /
7187	set = get_initial_solution (data, originalp);
7188	if (!set)
7189	{
7190	if (dump_file && (dump_flags & TDF_DETAILS))
7191	fprintf (stream: dump_file, format: "Unable to substitute for ivs, failed.\n");
7192	return NULL;
7193	}
7194
7195	if (dump_file && (dump_flags & TDF_DETAILS))
7196	{
7197	fprintf (stream: dump_file, format: "Initial set of candidates:\n");
7198	iv_ca_dump (data, file: dump_file, ivs: set);
7199	}
7200
7201	while (try_improve_iv_set (data, ivs: set, try_replace_p: &try_replace_p))
7202	{
7203	if (dump_file && (dump_flags & TDF_DETAILS))
7204	{
7205	fprintf (stream: dump_file, format: "Improved to:\n");
7206	iv_ca_dump (data, file: dump_file, ivs: set);
7207	}
7208	}
7209
7210	/ If the set has infinite_cost, it can't be optimal. /
7211	if (iv_ca_cost (ivs: set).infinite_cost_p ())
7212	{
7213	if (dump_file && (dump_flags & TDF_DETAILS))
7214	fprintf (stream: dump_file,
7215	format: "Overflow to infinite cost in try_improve_iv_set.\n");
7216	iv_ca_free (ivs: &set);
7217	}
7218	return set;
7219	}
7220
7221	static class iv_ca *
7222	find_optimal_iv_set (struct ivopts_data *data)
7223	{
7224	unsigned i;
7225	comp_cost cost, origcost;
7226	class iv_ca set, origset;
7227
7228	/ Determine the cost based on a strategy that starts with original IVs,*
7229	and try again using a strategy that prefers candidates not based
7230	on any IVs. /*
7231	origset = find_optimal_iv_set_1 (data, originalp: true);
7232	set = find_optimal_iv_set_1 (data, originalp: false);
7233
7234	if (!origset && !set)
7235	return NULL;
7236
7237	origcost = origset ? iv_ca_cost (ivs: origset) : infinite_cost;
7238	cost = set ? iv_ca_cost (ivs: set) : infinite_cost;
7239
7240	if (dump_file && (dump_flags & TDF_DETAILS))
7241	{
7242	fprintf (stream: dump_file, format: "Original cost %" PRId64 " (complexity %d)\n\n",
7243	origcost.cost, origcost.complexity);
7244	fprintf (stream: dump_file, format: "Final cost %" PRId64 " (complexity %d)\n\n",
7245	cost.cost, cost.complexity);
7246	}
7247
7248	/ Choose the one with the best cost. /
7249	if (origcost <= cost)
7250	{
7251	if (set)
7252	iv_ca_free (ivs: &set);
7253	set = origset;
7254	}
7255	else if (origset)
7256	iv_ca_free (ivs: &origset);
7257
7258	for (i = `0`; i < data->vgroups.length (); i++)
7259	{
7260	struct iv_group *group = data->vgroups [i];
7261	group->selected = iv_ca_cand_for_group (ivs: set, group)->cand;
7262	}
7263
7264	return set;
7265	}
7266
7267	/ Creates a new induction variable corresponding to CAND. /
7268
7269	static void
7270	create_new_iv (struct ivopts_data data, struct* iv_cand *cand)
7271	{
7272	gimple_stmt_iterator incr_pos;
7273	tree base;
7274	struct iv_use *use;
7275	struct iv_group *group;
7276	bool after = false;
7277
7278	gcc_assert (cand->iv != NULL);
7279
7280	switch (cand->pos)
7281	{
7282	case IP_NORMAL:
7283	incr_pos = gsi_last_bb (bb: ip_normal_pos (data->current_loop));
7284	break;
7285
7286	case IP_END:
7287	incr_pos = gsi_last_bb (bb: ip_end_pos (data->current_loop));
7288	after = true;
7289	if (!gsi_end_p (i: incr_pos) && stmt_ends_bb_p (gsi_stmt (i: incr_pos)))
7290	{
7291	edge e = find_edge (gsi_bb (i: incr_pos), data->current_loop->header);
7292	incr_pos = gsi_after_labels (bb: split_edge (e));
7293	after = false;
7294	}
7295	break;
7296
7297	case IP_AFTER_USE:
7298	after = true;
7299	/ fall through /
7300	case IP_BEFORE_USE:
7301	incr_pos = gsi_for_stmt (cand->incremented_at);
7302	break;
7303
7304	case IP_ORIGINAL:
7305	/ Mark that the iv is preserved. /
7306	name_info (data, name: cand->var_before)->preserve_biv = true;
7307	name_info (data, name: cand->var_after)->preserve_biv = true;
7308
7309	/ Rewrite the increment so that it uses var_before directly. /
7310	use = find_interesting_uses_op (data, op: cand->var_after);
7311	group = data->vgroups [use->group_id];
7312	group->selected = cand;
7313	return;
7314	}
7315
7316	gimple_add_tmp_var (cand->var_before);
7317
7318	base = unshare_expr (cand->iv->base);
7319
7320	create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7321	cand->var_before, data->current_loop,
7322	&incr_pos, after, &cand->var_before, &cand->var_after);
7323	}
7324
7325	/ Creates new induction variables described in SET. /
7326
7327	static void
7328	create_new_ivs (struct ivopts_data data, class* iv_ca *set)
7329	{
7330	unsigned i;
7331	struct iv_cand *cand;
7332	bitmap_iterator bi;
7333
7334	EXECUTE_IF_SET_IN_BITMAP (set->cands, `0`, i, bi)
7335	{
7336	cand = data->vcands [i];
7337	create_new_iv (data, cand);
7338	}
7339
7340	if (dump_file && (dump_flags & TDF_DETAILS))
7341	{
7342	fprintf (stream: dump_file, format: "Selected IV set for loop %d",
7343	data->current_loop->num);
7344	if (data->loop_loc != UNKNOWN_LOCATION)
7345	fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
7346	LOCATION_LINE (data->loop_loc));
7347	fprintf (stream: dump_file, format: ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7348	avg_loop_niter (loop: data->current_loop));
7349	fprintf (stream: dump_file, format: ", %lu IVs:\n", bitmap_count_bits (set->cands));
7350	EXECUTE_IF_SET_IN_BITMAP (set->cands, `0`, i, bi)
7351	{
7352	cand = data->vcands [i];
7353	dump_cand (file: dump_file, cand);
7354	}
7355	fprintf (stream: dump_file, format: "\n");
7356	}
7357	}
7358
7359	/ Rewrites USE (definition of iv used in a nonlinear expression)*
7360	using candidate CAND. /*
7361
7362	static void
7363	rewrite_use_nonlinear_expr (struct ivopts_data *data,
7364	struct iv_use use, struct* iv_cand *cand)
7365	{
7366	gassign *ass;
7367	gimple_stmt_iterator bsi;
7368	tree comp, type = get_use_type (use), tgt;
7369
7370	/ An important special case -- if we are asked to express value of*
7371	the original iv by itself, just exit; there is no need to
7372	introduce a new computation (that might also need casting the
7373	variable to unsigned and back). /*
7374	if (cand->pos == IP_ORIGINAL
7375	&& cand->incremented_at == use->stmt)
7376	{
7377	tree op = NULL_TREE;
7378	enum tree_code stmt_code;
7379
7380	gcc_assert (is_gimple_assign (use->stmt));
7381	gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7382
7383	/ Check whether we may leave the computation unchanged.*
7384	This is the case only if it does not rely on other
7385	computations in the loop -- otherwise, the computation
7386	we rely upon may be removed in remove_unused_ivs,
7387	thus leading to ICE. /*
7388	stmt_code = gimple_assign_rhs_code (gs: use->stmt);
7389	if (stmt_code == PLUS_EXPR
7390	\|\| stmt_code == MINUS_EXPR
7391	\|\| stmt_code == POINTER_PLUS_EXPR)
7392	{
7393	if (gimple_assign_rhs1 (gs: use->stmt) == cand->var_before)
7394	op = gimple_assign_rhs2 (gs: use->stmt);
7395	else if (gimple_assign_rhs2 (gs: use->stmt) == cand->var_before)
7396	op = gimple_assign_rhs1 (gs: use->stmt);
7397	}
7398
7399	if (op != NULL_TREE)
7400	{
7401	if (expr_invariant_in_loop_p (loop: data->current_loop, expr: op))
7402	return;
7403	if (TREE_CODE (op) == SSA_NAME)
7404	{
7405	struct iv *iv = get_iv (data, var: op);
7406	if (iv != NULL && integer_zerop (iv->step))
7407	return;
7408	}
7409	}
7410	}
7411
7412	switch (gimple_code (g: use->stmt))
7413	{
7414	case GIMPLE_PHI:
7415	tgt = PHI_RESULT (use->stmt);
7416
7417	/ If we should keep the biv, do not replace it. /
7418	if (name_info (data, name: tgt)->preserve_biv)
7419	return;
7420
7421	bsi = gsi_after_labels (bb: gimple_bb (g: use->stmt));
7422	break;
7423
7424	case GIMPLE_ASSIGN:
7425	tgt = gimple_assign_lhs (gs: use->stmt);
7426	bsi = gsi_for_stmt (use->stmt);
7427	break;
7428
7429	default:
7430	gcc_unreachable ();
7431	}
7432
7433	aff_tree aff_inv, aff_var;
7434	if (!get_computation_aff_1 (loop: data->current_loop, at: use->stmt,
7435	use, cand, aff_inv: &aff_inv, aff_var: &aff_var))
7436	gcc_unreachable ();
7437
7438	unshare_aff_combination (&aff_inv);
7439	unshare_aff_combination (&aff_var);
7440	/ Prefer CSE opportunity than loop invariant by adding offset at last*
7441	so that iv_uses have different offsets can be CSEed. /*
7442	poly_widest_int offset = aff_inv.offset;
7443	aff_inv.offset = `0`;
7444
7445	gimple_seq stmt_list = NULL, seq = NULL;
7446	tree comp_op1 = aff_combination_to_tree (&aff_inv);
7447	tree comp_op2 = aff_combination_to_tree (&aff_var);
7448	gcc_assert (comp_op1 && comp_op2);
7449
7450	comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7451	gimple_seq_add_seq (&stmt_list, seq);
7452	comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7453	gimple_seq_add_seq (&stmt_list, seq);
7454
7455	if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7456	std::swap (a&: comp_op1, b&: comp_op2);
7457
7458	if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7459	{
7460	comp = fold_build_pointer_plus (comp_op1,
7461	fold_convert (sizetype, comp_op2));
7462	comp = fold_build_pointer_plus (comp,
7463	wide_int_to_tree (sizetype, offset));
7464	}
7465	else
7466	{
7467	comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7468	fold_convert (TREE_TYPE (comp_op1), comp_op2));
7469	comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7470	wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7471	}
7472
7473	comp = fold_convert (type, comp);
7474	comp = force_gimple_operand (comp, &seq, false, NULL);
7475	gimple_seq_add_seq (&stmt_list, seq);
7476	if (gimple_code (g: use->stmt) != GIMPLE_PHI
7477	/ We can't allow re-allocating the stmt as it might be pointed*
7478	to still. /*
7479	&& (get_gimple_rhs_num_ops (TREE_CODE (comp))
7480	>= gimple_num_ops (gs: gsi_stmt (i: bsi))))
7481	{
7482	comp = force_gimple_operand (comp, &seq, true, NULL);
7483	gimple_seq_add_seq (&stmt_list, seq);
7484	if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7485	{
7486	duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7487	/ As this isn't a plain copy we have to reset alignment*
7488	information. /*
7489	if (SSA_NAME_PTR_INFO (comp))
7490	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7491	}
7492	}
7493
7494	gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7495	if (gimple_code (g: use->stmt) == GIMPLE_PHI)
7496	{
7497	ass = gimple_build_assign (tgt, comp);
7498	gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7499
7500	bsi = gsi_for_stmt (use->stmt);
7501	remove_phi_node (&bsi, false);
7502	}
7503	else
7504	{
7505	gimple_assign_set_rhs_from_tree (&bsi, comp);
7506	use->stmt = gsi_stmt (i: bsi);
7507	}
7508	}
7509
7510	/ Performs a peephole optimization to reorder the iv update statement with*
7511	a mem ref to enable instruction combining in later phases. The mem ref uses
7512	the iv value before the update, so the reordering transformation requires
7513	adjustment of the offset. CAND is the selected IV_CAND.
7514
7515	Example:
7516
7517	t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7518	iv2 = iv1 + 1;
7519
7520	if (t < val) (1)
7521	goto L;
7522	goto Head;
7523
7524
7525	directly propagating t over to (1) will introduce overlapping live range
7526	thus increase register pressure. This peephole transform it into:
7527
7528
7529	iv2 = iv1 + 1;
7530	t = MEM_REF (base, iv2, 8, 8);
7531	if (t < val)
7532	goto L;
7533	goto Head;
7534	*/
7535
7536	static void
7537	adjust_iv_update_pos (struct iv_cand cand, struct* iv_use *use)
7538	{
7539	tree var_after;
7540	gimple iv_update, stmt;
7541	basic_block bb;
7542	gimple_stmt_iterator gsi, gsi_iv;
7543
7544	if (cand->pos != IP_NORMAL)
7545	return;
7546
7547	var_after = cand->var_after;
7548	iv_update = SSA_NAME_DEF_STMT (var_after);
7549
7550	bb = gimple_bb (g: iv_update);
7551	gsi = gsi_last_nondebug_bb (bb);
7552	stmt = gsi_stmt (i: gsi);
7553
7554	/ Only handle conditional statement for now. /
7555	if (gimple_code (g: stmt) != GIMPLE_COND)
7556	return;
7557
7558	gsi_prev_nondebug (i: &gsi);
7559	stmt = gsi_stmt (i: gsi);
7560	if (stmt != iv_update)
7561	return;
7562
7563	gsi_prev_nondebug (i: &gsi);
7564	if (gsi_end_p (i: gsi))
7565	return;
7566
7567	stmt = gsi_stmt (i: gsi);
7568	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
7569	return;
7570
7571	if (stmt != use->stmt)
7572	return;
7573
7574	if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7575	return;
7576
7577	if (dump_file && (dump_flags & TDF_DETAILS))
7578	{
7579	fprintf (stream: dump_file, format: "Reordering \n");
7580	print_gimple_stmt (dump_file, iv_update, `0`);
7581	print_gimple_stmt (dump_file, use->stmt, `0`);
7582	fprintf (stream: dump_file, format: "\n");
7583	}
7584
7585	gsi = gsi_for_stmt (use->stmt);
7586	gsi_iv = gsi_for_stmt (iv_update);
7587	gsi_move_before (&gsi_iv, &gsi);
7588
7589	cand->pos = IP_BEFORE_USE;
7590	cand->incremented_at = use->stmt;
7591	}
7592
7593	/ Return the alias pointer type that should be used for a MEM_REF*
7594	associated with USE, which has type USE_PTR_ADDRESS. /*
7595
7596	static tree
7597	get_alias_ptr_type_for_ptr_address (iv_use *use)
7598	{
7599	gcall call = as_a <gcall > (p: use->stmt);
7600	switch (gimple_call_internal_fn (gs: call))
7601	{
7602	case IFN_MASK_LOAD:
7603	case IFN_MASK_STORE:
7604	case IFN_MASK_LOAD_LANES:
7605	case IFN_MASK_STORE_LANES:
7606	case IFN_MASK_LEN_LOAD_LANES:
7607	case IFN_MASK_LEN_STORE_LANES:
7608	case IFN_LEN_LOAD:
7609	case IFN_LEN_STORE:
7610	case IFN_MASK_LEN_LOAD:
7611	case IFN_MASK_LEN_STORE:
7612	/ The second argument contains the correct alias type. /
7613	gcc_assert (use->op_p = gimple_call_arg_ptr (call, `0`));
7614	return TREE_TYPE (gimple_call_arg (call, `1`));
7615
7616	default:
7617	gcc_unreachable ();
7618	}
7619	}
7620
7621
7622	/ Rewrites USE (address that is an iv) using candidate CAND. /
7623
7624	static void
7625	rewrite_use_address (struct ivopts_data *data,
7626	struct iv_use use, struct* iv_cand *cand)
7627	{
7628	aff_tree aff;
7629	bool ok;
7630
7631	adjust_iv_update_pos (cand, use);
7632	ok = get_computation_aff (loop: data->current_loop, at: use->stmt, use, cand, aff: &aff);
7633	gcc_assert (ok);
7634	unshare_aff_combination (&aff);
7635
7636	/ To avoid undefined overflow problems, all IV candidates use unsigned*
7637	integer types. The drawback is that this makes it impossible for
7638	create_mem_ref to distinguish an IV that is based on a memory object
7639	from one that represents simply an offset.
7640
7641	To work around this problem, we pass a hint to create_mem_ref that
7642	indicates which variable (if any) in aff is an IV based on a memory
7643	object. Note that we only consider the candidate. If this is not
7644	based on an object, the base of the reference is in some subexpression
7645	of the use -- but these will use pointer types, so they are recognized
7646	by the create_mem_ref heuristics anyway. /*
7647	tree iv = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7648	tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7649	gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7650	tree type = use->mem_type;
7651	tree alias_ptr_type;
7652	if (use->type == USE_PTR_ADDRESS)
7653	alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7654	else
7655	{
7656	gcc_assert (type == TREE_TYPE (*use->op_p));
7657	unsigned int align = get_object_alignment (*use->op_p);
7658	if (align != TYPE_ALIGN (type))
7659	type = build_aligned_type (type, align);
7660	alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7661	}
7662	tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7663	iv, base_hint, data->speed);
7664
7665	if (use->type == USE_PTR_ADDRESS)
7666	{
7667	ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7668	ref = fold_convert (get_use_type (use), ref);
7669	ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7670	true, GSI_SAME_STMT);
7671	}
7672	else
7673	{
7674	/ When we end up confused enough and have no suitable base but*
7675	stuffed everything to index2 use a LEA for the address and
7676	create a plain MEM_REF to avoid basing a memory reference
7677	on address zero which create_mem_ref_raw does as fallback. /*
7678	if (TREE_CODE (ref) == TARGET_MEM_REF
7679	&& TMR_INDEX2 (ref) != NULL_TREE
7680	&& integer_zerop (TREE_OPERAND (ref, `0`)))
7681	{
7682	ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, `0`)), ref);
7683	ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7684	true, GSI_SAME_STMT);
7685	ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7686	}
7687	copy_ref_info (ref, *use->op_p);
7688	}
7689
7690	*use->op_p = ref;
7691	}
7692
7693	/ Rewrites USE (the condition such that one of the arguments is an iv) using*
7694	candidate CAND. /*
7695
7696	static void
7697	rewrite_use_compare (struct ivopts_data *data,
7698	struct iv_use use, struct* iv_cand *cand)
7699	{
7700	tree comp, op, bound;
7701	gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7702	enum tree_code compare;
7703	struct iv_group *group = data->vgroups [use->group_id];
7704	class cost_pair *cp = get_group_iv_cost (data, group, cand);
7705
7706	bound = cp->value;
7707	if (bound)
7708	{
7709	tree var = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7710	tree var_type = TREE_TYPE (var);
7711	gimple_seq stmts;
7712
7713	if (dump_file && (dump_flags & TDF_DETAILS))
7714	{
7715	fprintf (stream: dump_file, format: "Replacing exit test: ");
7716	print_gimple_stmt (dump_file, use->stmt, `0`, TDF_SLIM);
7717	}
7718	compare = cp->comp;
7719	bound = unshare_expr (fold_convert (var_type, bound));
7720	op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7721	if (stmts)
7722	gsi_insert_seq_on_edge_immediate (
7723	loop_preheader_edge (data->current_loop),
7724	stmts);
7725
7726	gcond cond_stmt = as_a <gcond > (p: use->stmt);
7727	gimple_cond_set_lhs (gs: cond_stmt, lhs: var);
7728	gimple_cond_set_code (gs: cond_stmt, code: compare);
7729	gimple_cond_set_rhs (gs: cond_stmt, rhs: op);
7730	return;
7731	}
7732
7733	/ The induction variable elimination failed; just express the original*
7734	giv. /*
7735	comp = get_computation_at (loop: data->current_loop, at: use->stmt, use, cand);
7736	gcc_assert (comp != NULL_TREE);
7737	gcc_assert (use->op_p != NULL);
7738	use->op_p = force_gimple_operand_gsi (&bsi, comp, true*,
7739	SSA_NAME_VAR (*use->op_p),
7740	true, GSI_SAME_STMT);
7741	}
7742
7743	/ Rewrite the groups using the selected induction variables. /
7744
7745	static void
7746	rewrite_groups (struct ivopts_data *data)
7747	{
7748	unsigned i, j;
7749
7750	for (i = `0`; i < data->vgroups.length (); i++)
7751	{
7752	struct iv_group *group = data->vgroups [i];
7753	struct iv_cand *cand = group->selected;
7754
7755	gcc_assert (cand);
7756
7757	if (group->type == USE_NONLINEAR_EXPR)
7758	{
7759	for (j = `0`; j < group->vuses.length (); j++)
7760	{
7761	rewrite_use_nonlinear_expr (data, use: group->vuses [j], cand);
7762	update_stmt (s: group->vuses [j]->stmt);
7763	}
7764	}
7765	else if (address_p (type: group->type))
7766	{
7767	for (j = `0`; j < group->vuses.length (); j++)
7768	{
7769	rewrite_use_address (data, use: group->vuses [j], cand);
7770	update_stmt (s: group->vuses [j]->stmt);
7771	}
7772	}
7773	else
7774	{
7775	gcc_assert (group->type == USE_COMPARE);
7776
7777	for (j = `0`; j < group->vuses.length (); j++)
7778	{
7779	rewrite_use_compare (data, use: group->vuses [j], cand);
7780	update_stmt (s: group->vuses [j]->stmt);
7781	}
7782	}
7783	}
7784	}
7785
7786	/ Removes the ivs that are not used after rewriting. /
7787
7788	static void
7789	remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7790	{
7791	unsigned j;
7792	bitmap_iterator bi;
7793
7794	/ Figure out an order in which to release SSA DEFs so that we don't*
7795	release something that we'd have to propagate into a debug stmt
7796	afterwards. /*
7797	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, j, bi)
7798	{
7799	struct version_info *info;
7800
7801	info = ver_info (data, ver: j);
7802	if (info->iv
7803	&& !integer_zerop (info->iv->step)
7804	&& !info->inv_id
7805	&& !info->iv->nonlin_use
7806	&& !info->preserve_biv)
7807	{
7808	bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7809
7810	tree def = info->iv->ssa_name;
7811
7812	if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7813	{
7814	imm_use_iterator imm_iter;
7815	use_operand_p use_p;
7816	gimple *stmt;
7817	int count = `0`;
7818
7819	FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7820	{
7821	if (!gimple_debug_bind_p (s: stmt))
7822	continue;
7823
7824	/ We just want to determine whether to do nothing*
7825	(count == 0), to substitute the computed
7826	expression into a single use of the SSA DEF by
7827	itself (count == 1), or to use a debug temp
7828	because the SSA DEF is used multiple times or as
7829	part of a larger expression (count > 1). /*
7830	count++;
7831	if (gimple_debug_bind_get_value (dbg: stmt) != def)
7832	count++;
7833
7834	if (count > `1`)
7835	break;
7836	}
7837
7838	if (!count)
7839	continue;
7840
7841	struct iv_use dummy_use;
7842	struct iv_cand best_cand = NULL, cand;
7843	unsigned i, best_pref = `0`, cand_pref;
7844	tree comp = NULL_TREE;
7845
7846	memset (s: &dummy_use, c: `0`, n: sizeof (dummy_use));
7847	dummy_use.iv = info->iv;
7848	for (i = `0`; i < data->vgroups.length () && i < `64`; i++)
7849	{
7850	cand = data->vgroups [i]->selected;
7851	if (cand == best_cand)
7852	continue;
7853	cand_pref = operand_equal_p (cand->iv->step,
7854	info->iv->step, flags: `0`)
7855	? `4` : `0`;
7856	cand_pref
7857	+= TYPE_MODE (TREE_TYPE (cand->iv->base))
7858	== TYPE_MODE (TREE_TYPE (info->iv->base))
7859	? `2` : `0`;
7860	cand_pref
7861	+= TREE_CODE (cand->iv->base) == INTEGER_CST
7862	? `1` : `0`;
7863	if (best_cand == NULL \|\| best_pref < cand_pref)
7864	{
7865	tree this_comp
7866	= get_debug_computation_at (loop: data->current_loop,
7867	SSA_NAME_DEF_STMT (def),
7868	use: &dummy_use, cand);
7869	if (this_comp)
7870	{
7871	best_cand = cand;
7872	best_pref = cand_pref;
7873	comp = this_comp;
7874	}
7875	}
7876	}
7877
7878	if (!best_cand)
7879	continue;
7880
7881	comp = unshare_expr (comp);
7882	if (count > `1`)
7883	{
7884	tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7885	/ FIXME: Is setting the mode really necessary? /
7886	if (SSA_NAME_VAR (def))
7887	SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7888	else
7889	SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7890	gdebug *def_temp
7891	= gimple_build_debug_bind (vexpr, comp, NULL);
7892	gimple_stmt_iterator gsi;
7893
7894	if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7895	gsi = gsi_after_labels (bb: gimple_bb
7896	(SSA_NAME_DEF_STMT (def)));
7897	else
7898	gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7899
7900	gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7901	comp = vexpr;
7902	}
7903
7904	FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7905	{
7906	if (!gimple_debug_bind_p (s: stmt))
7907	continue;
7908
7909	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7910	SET_USE (use_p, comp);
7911
7912	update_stmt (s: stmt);
7913	}
7914	}
7915	}
7916	}
7917	}
7918
7919	/ Frees memory occupied by class tree_niter_desc in VALUE. Callback
7920	for hash_map::traverse. /*
7921
7922	bool
7923	free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7924	{
7925	if (value)
7926	{
7927	value->~tree_niter_desc ();
7928	free (ptr: value);
7929	}
7930	return true;
7931	}
7932
7933	/ Frees data allocated by the optimization of a single loop. /
7934
7935	static void
7936	free_loop_data (struct ivopts_data *data)
7937	{
7938	unsigned i, j;
7939	bitmap_iterator bi;
7940	tree obj;
7941
7942	if (data->niters)
7943	{
7944	data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7945	delete data->niters;
7946	data->niters = NULL;
7947	}
7948
7949	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
7950	{
7951	struct version_info *info;
7952
7953	info = ver_info (data, ver: i);
7954	info->iv = NULL;
7955	info->has_nonlin_use = false;
7956	info->preserve_biv = false;
7957	info->inv_id = `0`;
7958	}
7959	bitmap_clear (data->relevant);
7960	bitmap_clear (data->important_candidates);
7961
7962	for (i = `0`; i < data->vgroups.length (); i++)
7963	{
7964	struct iv_group *group = data->vgroups [i];
7965
7966	for (j = `0`; j < group->vuses.length (); j++)
7967	free (ptr: group->vuses [j]);
7968	group->vuses.release ();
7969
7970	BITMAP_FREE (group->related_cands);
7971	for (j = `0`; j < group->n_map_members; j++)
7972	{
7973	if (group->cost_map[j].inv_vars)
7974	BITMAP_FREE (group->cost_map[j].inv_vars);
7975	if (group->cost_map[j].inv_exprs)
7976	BITMAP_FREE (group->cost_map[j].inv_exprs);
7977	}
7978
7979	free (ptr: group->cost_map);
7980	free (ptr: group);
7981	}
7982	data->vgroups.truncate (size: `0`);
7983
7984	for (i = `0`; i < data->vcands.length (); i++)
7985	{
7986	struct iv_cand *cand = data->vcands [i];
7987
7988	if (cand->inv_vars)
7989	BITMAP_FREE (cand->inv_vars);
7990	if (cand->inv_exprs)
7991	BITMAP_FREE (cand->inv_exprs);
7992	free (ptr: cand);
7993	}
7994	data->vcands.truncate (size: `0`);
7995
7996	if (data->version_info_size < num_ssa_names)
7997	{
7998	data->version_info_size = `2` * num_ssa_names;
7999	free (ptr: data->version_info);
8000	data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8001	}
8002
8003	data->max_inv_var_id = `0`;
8004	data->max_inv_expr_id = `0`;
8005
8006	FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
8007	SET_DECL_RTL (obj, NULL_RTX);
8008
8009	decl_rtl_to_reset.truncate (size: `0`);
8010
8011	data->inv_expr_tab->empty ();
8012
8013	data->iv_common_cand_tab->empty ();
8014	data->iv_common_cands.truncate (size: `0`);
8015	}
8016
8017	/ Finalizes data structures used by the iv optimization pass. LOOPS is the*
8018	loop tree. /*
8019
8020	static void
8021	tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8022	{
8023	free_loop_data (data);
8024	free (ptr: data->version_info);
8025	BITMAP_FREE (data->relevant);
8026	BITMAP_FREE (data->important_candidates);
8027
8028	decl_rtl_to_reset.release ();
8029	data->vgroups.release ();
8030	data->vcands.release ();
8031	delete data->inv_expr_tab;
8032	data->inv_expr_tab = NULL;
8033	free_affine_expand_cache (&data->name_expansion_cache);
8034	if (data->base_object_map)
8035	delete data->base_object_map;
8036	delete data->iv_common_cand_tab;
8037	data->iv_common_cand_tab = NULL;
8038	data->iv_common_cands.release ();
8039	obstack_free (&data->iv_obstack, NULL);
8040	}
8041
8042	/ Returns true if the loop body BODY includes any function calls. /
8043
8044	static bool
8045	loop_body_includes_call (basic_block body, unsigned* num_nodes)
8046	{
8047	gimple_stmt_iterator gsi;
8048	unsigned i;
8049
8050	for (i = `0`; i < num_nodes; i++)
8051	for (gsi = gsi_start_bb (bb: body[i]); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
8052	{
8053	gimple *stmt = gsi_stmt (i: gsi);
8054	if (is_gimple_call (gs: stmt)
8055	&& !gimple_call_internal_p (gs: stmt)
8056	&& !is_inexpensive_builtin (gimple_call_fndecl (gs: stmt)))
8057	return true;
8058	}
8059	return false;
8060	}
8061
8062	/ Determine cost scaling factor for basic blocks in loop. /
8063	#define COST_SCALING_FACTOR_BOUND (20)
8064
8065	static void
8066	determine_scaling_factor (struct ivopts_data data, basic_block body)
8067	{
8068	int lfreq = data->current_loop->header->count.to_frequency (cfun);
8069	if (!data->speed \|\| lfreq <= `0`)
8070	return;
8071
8072	int max_freq = lfreq;
8073	for (unsigned i = `0`; i < data->current_loop->num_nodes; i++)
8074	{
8075	body[i]->aux = (void *)(intptr_t) `1`;
8076	if (max_freq < body[i]->count.to_frequency (cfun))
8077	max_freq = body[i]->count.to_frequency (cfun);
8078	}
8079	if (max_freq > lfreq)
8080	{
8081	int divisor, factor;
8082	/ Check if scaling factor itself needs to be scaled by the bound. This*
8083	is to avoid overflow when scaling cost according to profile info. /*
8084	if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8085	{
8086	divisor = max_freq;
8087	factor = COST_SCALING_FACTOR_BOUND;
8088	}
8089	else
8090	{
8091	divisor = lfreq;
8092	factor = `1`;
8093	}
8094	for (unsigned i = `0`; i < data->current_loop->num_nodes; i++)
8095	{
8096	int bfreq = body[i]->count.to_frequency (cfun);
8097	if (bfreq <= lfreq)
8098	continue;
8099
8100	body[i]->aux = (void)(intptr_t) (factor bfreq / divisor);
8101	}
8102	}
8103	}
8104
8105	/ Find doloop comparison use and set its doloop_p on if found. /
8106
8107	static bool
8108	find_doloop_use (struct ivopts_data *data)
8109	{
8110	struct loop *loop = data->current_loop;
8111
8112	for (unsigned i = `0`; i < data->vgroups.length (); i++)
8113	{
8114	struct iv_group *group = data->vgroups [i];
8115	if (group->type == USE_COMPARE)
8116	{
8117	gcc_assert (group->vuses.length () == `1`);
8118	struct iv_use *use = group->vuses [`0`];
8119	gimple *stmt = use->stmt;
8120	if (gimple_code (g: stmt) == GIMPLE_COND)
8121	{
8122	basic_block bb = gimple_bb (g: stmt);
8123	edge true_edge, false_edge;
8124	extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8125	/ This comparison is used for loop latch. Require latch is empty*
8126	for now. /*
8127	if ((loop->latch == true_edge->dest
8128	\|\| loop->latch == false_edge->dest)
8129	&& empty_block_p (loop->latch))
8130	{
8131	group->doloop_p = true;
8132	if (dump_file && (dump_flags & TDF_DETAILS))
8133	{
8134	fprintf (stream: dump_file, format: "Doloop cmp iv use: ");
8135	print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8136	}
8137	return true;
8138	}
8139	}
8140	}
8141	}
8142
8143	return false;
8144	}
8145
8146	/ For the targets which support doloop, to predict whether later RTL doloop*
8147	transformation will perform on this loop, further detect the doloop use and
8148	mark the flag doloop_use_p if predicted. /*
8149
8150	void
8151	analyze_and_mark_doloop_use (struct ivopts_data *data)
8152	{
8153	data->doloop_use_p = false;
8154
8155	if (!flag_branch_on_count_reg)
8156	return;
8157
8158	if (data->current_loop->unroll == USHRT_MAX)
8159	return;
8160
8161	if (!generic_predict_doloop_p (data))
8162	return;
8163
8164	if (find_doloop_use (data))
8165	{
8166	data->doloop_use_p = true;
8167	if (dump_file && (dump_flags & TDF_DETAILS))
8168	{
8169	struct loop *loop = data->current_loop;
8170	fprintf (stream: dump_file,
8171	format: "Predict loop %d can perform"
8172	" doloop optimization later.\n",
8173	loop->num);
8174	flow_loop_dump (loop, dump_file, NULL, `1`);
8175	}
8176	}
8177	}
8178
8179	/ Optimizes the LOOP. Returns true if anything changed. /
8180
8181	static bool
8182	tree_ssa_iv_optimize_loop (struct ivopts_data data, class* loop *loop,
8183	bitmap toremove)
8184	{
8185	bool changed = false;
8186	class iv_ca *iv_ca;
8187	edge exit = single_dom_exit (loop);
8188	basic_block *body;
8189
8190	gcc_assert (!data->niters);
8191	data->current_loop = loop;
8192	data->loop_loc = find_loop_location (loop).get_location_t ();
8193	data->speed = optimize_loop_for_speed_p (loop);
8194
8195	if (dump_file && (dump_flags & TDF_DETAILS))
8196	{
8197	fprintf (stream: dump_file, format: "Processing loop %d", loop->num);
8198	if (data->loop_loc != UNKNOWN_LOCATION)
8199	fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
8200	LOCATION_LINE (data->loop_loc));
8201	fprintf (stream: dump_file, format: "\n");
8202
8203	if (exit)
8204	{
8205	fprintf (stream: dump_file, format: " single exit %d -> %d, exit condition ",
8206	exit->src->index, exit->dest->index);
8207	print_gimple_stmt (dump_file, *gsi_last_bb (bb: exit->src),
8208	`0`, TDF_SLIM);
8209	fprintf (stream: dump_file, format: "\n");
8210	}
8211
8212	fprintf (stream: dump_file, format: "\n");
8213	}
8214
8215	body = get_loop_body (loop);
8216	data->body_includes_call = loop_body_includes_call (body, num_nodes: loop->num_nodes);
8217	renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8218
8219	data->loop_single_exit_p
8220	= exit != NULL && loop_only_exit_p (loop, body, exit);
8221
8222	/ For each ssa name determines whether it behaves as an induction variable*
8223	in some loop. /*
8224	if (!find_induction_variables (data, body))
8225	goto finish;
8226
8227	/ Finds interesting uses (item 1). /
8228	find_interesting_uses (data, body);
8229	if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8230	goto finish;
8231
8232	/ Determine cost scaling factor for basic blocks in loop. /
8233	determine_scaling_factor (data, body);
8234
8235	/ Analyze doloop possibility and mark the doloop use if predicted. /
8236	analyze_and_mark_doloop_use (data);
8237
8238	/ Finds candidates for the induction variables (item 2). /
8239	find_iv_candidates (data);
8240
8241	/ Calculates the costs (item 3, part 1). /
8242	determine_iv_costs (data);
8243	determine_group_iv_costs (data);
8244	determine_set_costs (data);
8245
8246	/ Find the optimal set of induction variables (item 3, part 2). /
8247	iv_ca = find_optimal_iv_set (data);
8248	/ Cleanup basic block aux field. /
8249	for (unsigned i = `0`; i < data->current_loop->num_nodes; i++)
8250	body[i]->aux = NULL;
8251	if (!iv_ca)
8252	goto finish;
8253	changed = true;
8254
8255	/ Create the new induction variables (item 4, part 1). /
8256	create_new_ivs (data, set: iv_ca);
8257	iv_ca_free (ivs: &iv_ca);
8258
8259	/ Rewrite the uses (item 4, part 2). /
8260	rewrite_groups (data);
8261
8262	/ Remove the ivs that are unused after rewriting. /
8263	remove_unused_ivs (data, toremove);
8264
8265	finish:
8266	free (ptr: body);
8267	free_loop_data (data);
8268
8269	return changed;
8270	}
8271
8272	/ Main entry point. Optimizes induction variables in loops. /
8273
8274	void
8275	tree_ssa_iv_optimize (void)
8276	{
8277	struct ivopts_data data;
8278	auto_bitmap toremove;
8279
8280	tree_ssa_iv_optimize_init (data: &data);
8281	mark_ssa_maybe_undefs ();
8282
8283	/ Optimize the loops starting with the innermost ones. /
8284	for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8285	{
8286	if (!dbg_cnt (index: ivopts_loop))
8287	continue;
8288
8289	if (dump_file && (dump_flags & TDF_DETAILS))
8290	flow_loop_dump (loop, dump_file, NULL, `1`);
8291
8292	tree_ssa_iv_optimize_loop (data: &data, loop, toremove);
8293	}
8294
8295	/ Remove eliminated IV defs. /
8296	release_defs_bitset (toremove);
8297
8298	/ We have changed the structure of induction variables; it might happen*
8299	that definitions in the scev database refer to some of them that were
8300	eliminated. /*
8301	scev_reset_htab ();
8302	/ Likewise niter and control-IV information. /
8303	free_numbers_of_iterations_estimates (cfun);
8304
8305	tree_ssa_iv_optimize_finalize (data: &data);
8306	}
8307
8308	#include "gt-tree-ssa-loop-ivopts.h"
8309

source code of gcc/tree-ssa-loop-ivopts.cc