1/* Induction variable optimizations.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated.
68
69 For the targets supporting low-overhead loops, IVOPTs has to take care of
70 the loops which will probably be transformed in RTL doloop optimization,
71 to try to make selected IV candidate set optimal. The process of doloop
72 support includes:
73
74 1) Analyze the current loop will be transformed to doloop or not, find and
75 mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 The target hook predict_doloop_p can be used for target specific checks.
79
80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 like biv. For cost determination between doloop IV cand and IV use, the
83 target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 provided to add on extra costs for generic type and address type IV use.
85 Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 use, and bound zero is set for IV elimination.
87
88 3) With the cost setting in step 2), the current cost model based IV
89 selection algorithm will process as usual, pick up doloop dedicated IV if
90 profitable. */
91
92#include "config.h"
93#include "system.h"
94#include "coretypes.h"
95#include "backend.h"
96#include "rtl.h"
97#include "tree.h"
98#include "gimple.h"
99#include "cfghooks.h"
100#include "tree-pass.h"
101#include "memmodel.h"
102#include "tm_p.h"
103#include "ssa.h"
104#include "expmed.h"
105#include "insn-config.h"
106#include "emit-rtl.h"
107#include "recog.h"
108#include "cgraph.h"
109#include "gimple-pretty-print.h"
110#include "alias.h"
111#include "fold-const.h"
112#include "stor-layout.h"
113#include "tree-eh.h"
114#include "gimplify.h"
115#include "gimple-iterator.h"
116#include "gimplify-me.h"
117#include "tree-cfg.h"
118#include "tree-ssa-loop-ivopts.h"
119#include "tree-ssa-loop-manip.h"
120#include "tree-ssa-loop-niter.h"
121#include "tree-ssa-loop.h"
122#include "explow.h"
123#include "expr.h"
124#include "tree-dfa.h"
125#include "tree-ssa.h"
126#include "cfgloop.h"
127#include "tree-scalar-evolution.h"
128#include "tree-affine.h"
129#include "tree-ssa-propagate.h"
130#include "tree-ssa-address.h"
131#include "builtins.h"
132#include "tree-vectorizer.h"
133#include "dbgcnt.h"
134#include "cfganal.h"
135
136/* For lang_hooks.types.type_for_mode. */
137#include "langhooks.h"
138
139/* FIXME: Expressions are expanded to RTL in this pass to determine the
140 cost of different addressing modes. This should be moved to a TBD
141 interface between the GIMPLE and RTL worlds. */
142
143/* The infinite cost. */
144#define INFTY 1000000000
145
146/* Returns the expected number of loop iterations for LOOP.
147 The average trip count is computed from profile data if it
148 exists. */
149
150static inline HOST_WIDE_INT
151avg_loop_niter (class loop *loop)
152{
153 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154 if (niter == -1)
155 {
156 niter = likely_max_stmt_executions_int (loop);
157
158 if (niter == -1 || niter > param_avg_loop_niter)
159 return param_avg_loop_niter;
160 }
161
162 return niter;
163}
164
165struct iv_use;
166
167/* Representation of the induction variable. */
168struct iv
169{
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
175 bool biv_p; /* Is it a biv? */
176 bool no_overflow; /* True if the iv doesn't overflow. */
177 bool have_address_use;/* For biv, indicate if it's used in any address
178 type use. */
179};
180
181/* Per-ssa version information (induction variable descriptions, etc.). */
182struct version_info
183{
184 tree name; /* The ssa name. */
185 struct iv *iv; /* Induction variable description. */
186 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
187 an expression that is not an induction variable. */
188 bool preserve_biv; /* For the original biv, whether to preserve it. */
189 unsigned inv_id; /* Id of an invariant. */
190};
191
192/* Types of uses. */
193enum use_type
194{
195 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
196 USE_REF_ADDRESS, /* Use is an address for an explicit memory
197 reference. */
198 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
199 cases where the expansion of the function
200 will turn the argument into a normal address. */
201 USE_COMPARE /* Use is a compare. */
202};
203
204/* Cost of a computation. */
205class comp_cost
206{
207public:
208 comp_cost (): cost (0), complexity (0), scratch (0)
209 {}
210
211 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
212 : cost (cost), complexity (complexity), scratch (scratch)
213 {}
214
215 /* Returns true if COST is infinite. */
216 bool infinite_cost_p ();
217
218 /* Adds costs COST1 and COST2. */
219 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
220
221 /* Adds COST to the comp_cost. */
222 comp_cost operator+= (comp_cost cost);
223
224 /* Adds constant C to this comp_cost. */
225 comp_cost operator+= (HOST_WIDE_INT c);
226
227 /* Subtracts constant C to this comp_cost. */
228 comp_cost operator-= (HOST_WIDE_INT c);
229
230 /* Divide the comp_cost by constant C. */
231 comp_cost operator/= (HOST_WIDE_INT c);
232
233 /* Multiply the comp_cost by constant C. */
234 comp_cost operator*= (HOST_WIDE_INT c);
235
236 /* Subtracts costs COST1 and COST2. */
237 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
238
239 /* Subtracts COST from this comp_cost. */
240 comp_cost operator-= (comp_cost cost);
241
242 /* Returns true if COST1 is smaller than COST2. */
243 friend bool operator< (comp_cost cost1, comp_cost cost2);
244
245 /* Returns true if COST1 and COST2 are equal. */
246 friend bool operator== (comp_cost cost1, comp_cost cost2);
247
248 /* Returns true if COST1 is smaller or equal than COST2. */
249 friend bool operator<= (comp_cost cost1, comp_cost cost2);
250
251 int64_t cost; /* The runtime cost. */
252 unsigned complexity; /* The estimate of the complexity of the code for
253 the computation (in no concrete units --
254 complexity field should be larger for more
255 complex expressions and addressing modes). */
256 int64_t scratch; /* Scratch used during cost computation. */
257};
258
259static const comp_cost no_cost;
260static const comp_cost infinite_cost (INFTY, 0, INFTY);
261
262bool
263comp_cost::infinite_cost_p ()
264{
265 return cost == INFTY;
266}
267
268comp_cost
269operator+ (comp_cost cost1, comp_cost cost2)
270{
271 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272 return infinite_cost;
273
274 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275 cost1.cost += cost2.cost;
276 cost1.complexity += cost2.complexity;
277
278 return cost1;
279}
280
281comp_cost
282operator- (comp_cost cost1, comp_cost cost2)
283{
284 if (cost1.infinite_cost_p ())
285 return infinite_cost;
286
287 gcc_assert (!cost2.infinite_cost_p ());
288 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
289
290 cost1.cost -= cost2.cost;
291 cost1.complexity -= cost2.complexity;
292
293 return cost1;
294}
295
296comp_cost
297comp_cost::operator+= (comp_cost cost)
298{
299 *this = *this + cost;
300 return *this;
301}
302
303comp_cost
304comp_cost::operator+= (HOST_WIDE_INT c)
305{
306 if (c >= INFTY)
307 this->cost = INFTY;
308
309 if (infinite_cost_p ())
310 return *this;
311
312 gcc_assert (this->cost + c < infinite_cost.cost);
313 this->cost += c;
314
315 return *this;
316}
317
318comp_cost
319comp_cost::operator-= (HOST_WIDE_INT c)
320{
321 if (infinite_cost_p ())
322 return *this;
323
324 gcc_assert (this->cost - c < infinite_cost.cost);
325 this->cost -= c;
326
327 return *this;
328}
329
330comp_cost
331comp_cost::operator/= (HOST_WIDE_INT c)
332{
333 gcc_assert (c != 0);
334 if (infinite_cost_p ())
335 return *this;
336
337 this->cost /= c;
338
339 return *this;
340}
341
342comp_cost
343comp_cost::operator*= (HOST_WIDE_INT c)
344{
345 if (infinite_cost_p ())
346 return *this;
347
348 gcc_assert (this->cost * c < infinite_cost.cost);
349 this->cost *= c;
350
351 return *this;
352}
353
354comp_cost
355comp_cost::operator-= (comp_cost cost)
356{
357 *this = *this - cost;
358 return *this;
359}
360
361bool
362operator< (comp_cost cost1, comp_cost cost2)
363{
364 if (cost1.cost == cost2.cost)
365 return cost1.complexity < cost2.complexity;
366
367 return cost1.cost < cost2.cost;
368}
369
370bool
371operator== (comp_cost cost1, comp_cost cost2)
372{
373 return cost1.cost == cost2.cost
374 && cost1.complexity == cost2.complexity;
375}
376
377bool
378operator<= (comp_cost cost1, comp_cost cost2)
379{
380 return cost1 < cost2 || cost1 == cost2;
381}
382
383struct iv_inv_expr_ent;
384
385/* The candidate - cost pair. */
386class cost_pair
387{
388public:
389 struct iv_cand *cand; /* The candidate. */
390 comp_cost cost; /* The cost. */
391 enum tree_code comp; /* For iv elimination, the comparison. */
392 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
393 preserved when representing iv_use with iv_cand. */
394 bitmap inv_exprs; /* The list of newly created invariant expressions
395 when representing iv_use with iv_cand. */
396 tree value; /* For final value elimination, the expression for
397 the final value of the iv. For iv elimination,
398 the new bound to compare with. */
399};
400
401/* Use. */
402struct iv_use
403{
404 unsigned id; /* The id of the use. */
405 unsigned group_id; /* The group id the use belongs to. */
406 enum use_type type; /* Type of the use. */
407 tree mem_type; /* The memory type to use when testing whether an
408 address is legitimate, and what the address's
409 cost is. */
410 struct iv *iv; /* The induction variable it is based on. */
411 gimple *stmt; /* Statement in that it occurs. */
412 tree *op_p; /* The place where it occurs. */
413
414 tree addr_base; /* Base address with const offset stripped. */
415 poly_uint64 addr_offset;
416 /* Const offset stripped from base address. */
417};
418
419/* Group of uses. */
420struct iv_group
421{
422 /* The id of the group. */
423 unsigned id;
424 /* Uses of the group are of the same type. */
425 enum use_type type;
426 /* The set of "related" IV candidates, plus the important ones. */
427 bitmap related_cands;
428 /* Number of IV candidates in the cost_map. */
429 unsigned n_map_members;
430 /* The costs wrto the iv candidates. */
431 class cost_pair *cost_map;
432 /* The selected candidate for the group. */
433 struct iv_cand *selected;
434 /* To indicate this is a doloop use group. */
435 bool doloop_p;
436 /* Uses in the group. */
437 vec<struct iv_use *> vuses;
438};
439
440/* The position where the iv is computed. */
441enum iv_position
442{
443 IP_NORMAL, /* At the end, just before the exit condition. */
444 IP_END, /* At the end of the latch block. */
445 IP_BEFORE_USE, /* Immediately before a specific use. */
446 IP_AFTER_USE, /* Immediately after a specific use. */
447 IP_ORIGINAL /* The original biv. */
448};
449
450/* The induction variable candidate. */
451struct iv_cand
452{
453 unsigned id; /* The number of the candidate. */
454 bool important; /* Whether this is an "important" candidate, i.e. such
455 that it should be considered by all uses. */
456 bool involves_undefs; /* Whether the IV involves undefined values. */
457 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
458 gimple *incremented_at;/* For original biv, the statement where it is
459 incremented. */
460 tree var_before; /* The variable used for it before increment. */
461 tree var_after; /* The variable used for it after increment. */
462 struct iv *iv; /* The value of the candidate. NULL for
463 "pseudocandidate" used to indicate the possibility
464 to replace the final value of an iv by direct
465 computation of the value. */
466 unsigned cost; /* Cost of the candidate. */
467 unsigned cost_step; /* Cost of the candidate's increment operation. */
468 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
469 where it is incremented. */
470 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
471 iv_cand. */
472 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
473 handle it as a new invariant expression which will
474 be hoisted out of loop. */
475 struct iv *orig_iv; /* The original iv if this cand is added from biv with
476 smaller type. */
477 bool doloop_p; /* Whether this is a doloop candidate. */
478};
479
480/* Hashtable entry for common candidate derived from iv uses. */
481class iv_common_cand
482{
483public:
484 tree base;
485 tree step;
486 /* IV uses from which this common candidate is derived. */
487 auto_vec<struct iv_use *> uses;
488 hashval_t hash;
489};
490
491/* Hashtable helpers. */
492
493struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
494{
495 static inline hashval_t hash (const iv_common_cand *);
496 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
497};
498
499/* Hash function for possible common candidates. */
500
501inline hashval_t
502iv_common_cand_hasher::hash (const iv_common_cand *ccand)
503{
504 return ccand->hash;
505}
506
507/* Hash table equality function for common candidates. */
508
509inline bool
510iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
511 const iv_common_cand *ccand2)
512{
513 return (ccand1->hash == ccand2->hash
514 && operand_equal_p (ccand1->base, ccand2->base, flags: 0)
515 && operand_equal_p (ccand1->step, ccand2->step, flags: 0)
516 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
517 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
518}
519
520/* Loop invariant expression hashtable entry. */
521
522struct iv_inv_expr_ent
523{
524 /* Tree expression of the entry. */
525 tree expr;
526 /* Unique indentifier. */
527 int id;
528 /* Hash value. */
529 hashval_t hash;
530};
531
532/* Sort iv_inv_expr_ent pair A and B by id field. */
533
534static int
535sort_iv_inv_expr_ent (const void *a, const void *b)
536{
537 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
538 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
539
540 unsigned id1 = (*e1)->id;
541 unsigned id2 = (*e2)->id;
542
543 if (id1 < id2)
544 return -1;
545 else if (id1 > id2)
546 return 1;
547 else
548 return 0;
549}
550
551/* Hashtable helpers. */
552
553struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
554{
555 static inline hashval_t hash (const iv_inv_expr_ent *);
556 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
557};
558
559/* Return true if uses of type TYPE represent some form of address. */
560
561inline bool
562address_p (use_type type)
563{
564 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
565}
566
567/* Hash function for loop invariant expressions. */
568
569inline hashval_t
570iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
571{
572 return expr->hash;
573}
574
575/* Hash table equality function for expressions. */
576
577inline bool
578iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
579 const iv_inv_expr_ent *expr2)
580{
581 return expr1->hash == expr2->hash
582 && operand_equal_p (expr1->expr, expr2->expr, flags: 0);
583}
584
585struct ivopts_data
586{
587 /* The currently optimized loop. */
588 class loop *current_loop;
589 location_t loop_loc;
590
591 /* Numbers of iterations for all exits of the current loop. */
592 hash_map<edge, tree_niter_desc *> *niters;
593
594 /* Number of registers used in it. */
595 unsigned regs_used;
596
597 /* The size of version_info array allocated. */
598 unsigned version_info_size;
599
600 /* The array of information for the ssa names. */
601 struct version_info *version_info;
602
603 /* The hashtable of loop invariant expressions created
604 by ivopt. */
605 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
606
607 /* The bitmap of indices in version_info whose value was changed. */
608 bitmap relevant;
609
610 /* The uses of induction variables. */
611 vec<iv_group *> vgroups;
612
613 /* The candidates. */
614 vec<iv_cand *> vcands;
615
616 /* A bitmap of important candidates. */
617 bitmap important_candidates;
618
619 /* Cache used by tree_to_aff_combination_expand. */
620 hash_map<tree, name_expansion *> *name_expansion_cache;
621
622 /* The hashtable of common candidates derived from iv uses. */
623 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
624
625 /* The common candidates. */
626 vec<iv_common_cand *> iv_common_cands;
627
628 /* Hash map recording base object information of tree exp. */
629 hash_map<tree, tree> *base_object_map;
630
631 /* The maximum invariant variable id. */
632 unsigned max_inv_var_id;
633
634 /* The maximum invariant expression id. */
635 unsigned max_inv_expr_id;
636
637 /* Number of no_overflow BIVs which are not used in memory address. */
638 unsigned bivs_not_used_in_addr;
639
640 /* Obstack for iv structure. */
641 struct obstack iv_obstack;
642
643 /* Whether to consider just related and important candidates when replacing a
644 use. */
645 bool consider_all_candidates;
646
647 /* Are we optimizing for speed? */
648 bool speed;
649
650 /* Whether the loop body includes any function calls. */
651 bool body_includes_call;
652
653 /* Whether the loop body can only be exited via single exit. */
654 bool loop_single_exit_p;
655
656 /* Whether the loop has doloop comparison use. */
657 bool doloop_use_p;
658};
659
660/* An assignment of iv candidates to uses. */
661
662class iv_ca
663{
664public:
665 /* The number of uses covered by the assignment. */
666 unsigned upto;
667
668 /* Number of uses that cannot be expressed by the candidates in the set. */
669 unsigned bad_groups;
670
671 /* Candidate assigned to a use, together with the related costs. */
672 class cost_pair **cand_for_group;
673
674 /* Number of times each candidate is used. */
675 unsigned *n_cand_uses;
676
677 /* The candidates used. */
678 bitmap cands;
679
680 /* The number of candidates in the set. */
681 unsigned n_cands;
682
683 /* The number of invariants needed, including both invariant variants and
684 invariant expressions. */
685 unsigned n_invs;
686
687 /* Total cost of expressing uses. */
688 comp_cost cand_use_cost;
689
690 /* Total cost of candidates. */
691 int64_t cand_cost;
692
693 /* Number of times each invariant variable is used. */
694 unsigned *n_inv_var_uses;
695
696 /* Number of times each invariant expression is used. */
697 unsigned *n_inv_expr_uses;
698
699 /* Total cost of the assignment. */
700 comp_cost cost;
701};
702
703/* Difference of two iv candidate assignments. */
704
705struct iv_ca_delta
706{
707 /* Changed group. */
708 struct iv_group *group;
709
710 /* An old assignment (for rollback purposes). */
711 class cost_pair *old_cp;
712
713 /* A new assignment. */
714 class cost_pair *new_cp;
715
716 /* Next change in the list. */
717 struct iv_ca_delta *next;
718};
719
720/* Bound on number of candidates below that all candidates are considered. */
721
722#define CONSIDER_ALL_CANDIDATES_BOUND \
723 ((unsigned) param_iv_consider_all_candidates_bound)
724
725/* If there are more iv occurrences, we just give up (it is quite unlikely that
726 optimizing such a loop would help, and it would take ages). */
727
728#define MAX_CONSIDERED_GROUPS \
729 ((unsigned) param_iv_max_considered_uses)
730
731/* If there are at most this number of ivs in the set, try removing unnecessary
732 ivs from the set always. */
733
734#define ALWAYS_PRUNE_CAND_SET_BOUND \
735 ((unsigned) param_iv_always_prune_cand_set_bound)
736
737/* The list of trees for that the decl_rtl field must be reset is stored
738 here. */
739
740static vec<tree> decl_rtl_to_reset;
741
742static comp_cost force_expr_to_var_cost (tree, bool);
743
744/* The single loop exit if it dominates the latch, NULL otherwise. */
745
746edge
747single_dom_exit (class loop *loop)
748{
749 edge exit = single_exit (loop);
750
751 if (!exit)
752 return NULL;
753
754 if (!just_once_each_iteration_p (loop, exit->src))
755 return NULL;
756
757 return exit;
758}
759
760/* Dumps information about the induction variable IV to FILE. Don't dump
761 variable's name if DUMP_NAME is FALSE. The information is dumped with
762 preceding spaces indicated by INDENT_LEVEL. */
763
764void
765dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
766{
767 const char *p;
768 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
769
770 if (indent_level > 4)
771 indent_level = 4;
772 p = spaces + 8 - (indent_level << 1);
773
774 fprintf (stream: file, format: "%sIV struct:\n", p);
775 if (iv->ssa_name && dump_name)
776 {
777 fprintf (stream: file, format: "%s SSA_NAME:\t", p);
778 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
779 fprintf (stream: file, format: "\n");
780 }
781
782 fprintf (stream: file, format: "%s Type:\t", p);
783 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
784 fprintf (stream: file, format: "\n");
785
786 fprintf (stream: file, format: "%s Base:\t", p);
787 print_generic_expr (file, iv->base, TDF_SLIM);
788 fprintf (stream: file, format: "\n");
789
790 fprintf (stream: file, format: "%s Step:\t", p);
791 print_generic_expr (file, iv->step, TDF_SLIM);
792 fprintf (stream: file, format: "\n");
793
794 if (iv->base_object)
795 {
796 fprintf (stream: file, format: "%s Object:\t", p);
797 print_generic_expr (file, iv->base_object, TDF_SLIM);
798 fprintf (stream: file, format: "\n");
799 }
800
801 fprintf (stream: file, format: "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
802
803 fprintf (stream: file, format: "%s Overflowness wrto loop niter:\t%s\n",
804 p, iv->no_overflow ? "No-overflow" : "Overflow");
805}
806
807/* Dumps information about the USE to FILE. */
808
809void
810dump_use (FILE *file, struct iv_use *use)
811{
812 fprintf (stream: file, format: " Use %d.%d:\n", use->group_id, use->id);
813 fprintf (stream: file, format: " At stmt:\t");
814 print_gimple_stmt (file, use->stmt, 0);
815 fprintf (stream: file, format: " At pos:\t");
816 if (use->op_p)
817 print_generic_expr (file, *use->op_p, TDF_SLIM);
818 fprintf (stream: file, format: "\n");
819 dump_iv (file, iv: use->iv, dump_name: false, indent_level: 2);
820}
821
822/* Dumps information about the uses to FILE. */
823
824void
825dump_groups (FILE *file, struct ivopts_data *data)
826{
827 unsigned i, j;
828 struct iv_group *group;
829
830 for (i = 0; i < data->vgroups.length (); i++)
831 {
832 group = data->vgroups[i];
833 fprintf (stream: file, format: "Group %d:\n", group->id);
834 if (group->type == USE_NONLINEAR_EXPR)
835 fprintf (stream: file, format: " Type:\tGENERIC\n");
836 else if (group->type == USE_REF_ADDRESS)
837 fprintf (stream: file, format: " Type:\tREFERENCE ADDRESS\n");
838 else if (group->type == USE_PTR_ADDRESS)
839 fprintf (stream: file, format: " Type:\tPOINTER ARGUMENT ADDRESS\n");
840 else
841 {
842 gcc_assert (group->type == USE_COMPARE);
843 fprintf (stream: file, format: " Type:\tCOMPARE\n");
844 }
845 for (j = 0; j < group->vuses.length (); j++)
846 dump_use (file, use: group->vuses[j]);
847 }
848}
849
850/* Dumps information about induction variable candidate CAND to FILE. */
851
852void
853dump_cand (FILE *file, struct iv_cand *cand)
854{
855 struct iv *iv = cand->iv;
856
857 fprintf (stream: file, format: "Candidate %d:\n", cand->id);
858 if (cand->inv_vars)
859 {
860 fprintf (stream: file, format: " Depend on inv.vars: ");
861 dump_bitmap (file, map: cand->inv_vars);
862 }
863 if (cand->inv_exprs)
864 {
865 fprintf (stream: file, format: " Depend on inv.exprs: ");
866 dump_bitmap (file, map: cand->inv_exprs);
867 }
868
869 if (cand->var_before)
870 {
871 fprintf (stream: file, format: " Var befor: ");
872 print_generic_expr (file, cand->var_before, TDF_SLIM);
873 fprintf (stream: file, format: "\n");
874 }
875 if (cand->var_after)
876 {
877 fprintf (stream: file, format: " Var after: ");
878 print_generic_expr (file, cand->var_after, TDF_SLIM);
879 fprintf (stream: file, format: "\n");
880 }
881
882 switch (cand->pos)
883 {
884 case IP_NORMAL:
885 fprintf (stream: file, format: " Incr POS: before exit test\n");
886 break;
887
888 case IP_BEFORE_USE:
889 fprintf (stream: file, format: " Incr POS: before use %d\n", cand->ainc_use->id);
890 break;
891
892 case IP_AFTER_USE:
893 fprintf (stream: file, format: " Incr POS: after use %d\n", cand->ainc_use->id);
894 break;
895
896 case IP_END:
897 fprintf (stream: file, format: " Incr POS: at end\n");
898 break;
899
900 case IP_ORIGINAL:
901 fprintf (stream: file, format: " Incr POS: orig biv\n");
902 break;
903 }
904
905 dump_iv (file, iv, dump_name: false, indent_level: 1);
906}
907
908/* Returns the info for ssa version VER. */
909
910static inline struct version_info *
911ver_info (struct ivopts_data *data, unsigned ver)
912{
913 return data->version_info + ver;
914}
915
916/* Returns the info for ssa name NAME. */
917
918static inline struct version_info *
919name_info (struct ivopts_data *data, tree name)
920{
921 return ver_info (data, SSA_NAME_VERSION (name));
922}
923
924/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
925 emitted in LOOP. */
926
927static bool
928stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
929{
930 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (g: stmt);
931
932 gcc_assert (bb);
933
934 if (sbb == loop->latch)
935 return true;
936
937 if (sbb != bb)
938 return false;
939
940 return stmt == last_nondebug_stmt (bb);
941}
942
943/* Returns true if STMT if after the place where the original induction
944 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
945 if the positions are identical. */
946
947static bool
948stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
949{
950 basic_block cand_bb = gimple_bb (g: cand->incremented_at);
951 basic_block stmt_bb = gimple_bb (g: stmt);
952
953 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
954 return false;
955
956 if (stmt_bb != cand_bb)
957 return true;
958
959 if (true_if_equal
960 && gimple_uid (g: stmt) == gimple_uid (g: cand->incremented_at))
961 return true;
962 return gimple_uid (g: stmt) > gimple_uid (g: cand->incremented_at);
963}
964
965/* Returns true if STMT if after the place where the induction variable
966 CAND is incremented in LOOP. */
967
968static bool
969stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
970{
971 switch (cand->pos)
972 {
973 case IP_END:
974 return false;
975
976 case IP_NORMAL:
977 return stmt_after_ip_normal_pos (loop, stmt);
978
979 case IP_ORIGINAL:
980 case IP_AFTER_USE:
981 return stmt_after_inc_pos (cand, stmt, true_if_equal: false);
982
983 case IP_BEFORE_USE:
984 return stmt_after_inc_pos (cand, stmt, true_if_equal: true);
985
986 default:
987 gcc_unreachable ();
988 }
989}
990
991/* walk_tree callback for contains_abnormal_ssa_name_p. */
992
993static tree
994contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
995{
996 if (TREE_CODE (*tp) == SSA_NAME
997 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
998 return *tp;
999
1000 if (!EXPR_P (*tp))
1001 *walk_subtrees = 0;
1002
1003 return NULL_TREE;
1004}
1005
1006/* Returns true if EXPR contains a ssa name that occurs in an
1007 abnormal phi node. */
1008
1009bool
1010contains_abnormal_ssa_name_p (tree expr)
1011{
1012 return walk_tree_without_duplicates
1013 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014}
1015
1016/* Returns the structure describing number of iterations determined from
1017 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1018
1019static class tree_niter_desc *
1020niter_for_exit (struct ivopts_data *data, edge exit)
1021{
1022 class tree_niter_desc *desc;
1023 tree_niter_desc **slot;
1024
1025 if (!data->niters)
1026 {
1027 data->niters = new hash_map<edge, tree_niter_desc *>;
1028 slot = NULL;
1029 }
1030 else
1031 slot = data->niters->get (k: exit);
1032
1033 if (!slot)
1034 {
1035 /* Try to determine number of iterations. We cannot safely work with ssa
1036 names that appear in phi nodes on abnormal edges, so that we do not
1037 create overlapping life ranges for them (PR 27283). */
1038 desc = XNEW (class tree_niter_desc);
1039 ::new (static_cast<void*> (desc)) tree_niter_desc ();
1040 if (!number_of_iterations_exit (data->current_loop,
1041 exit, niter: desc, true)
1042 || contains_abnormal_ssa_name_p (expr: desc->niter))
1043 {
1044 desc->~tree_niter_desc ();
1045 XDELETE (desc);
1046 desc = NULL;
1047 }
1048 data->niters->put (k: exit, v: desc);
1049 }
1050 else
1051 desc = *slot;
1052
1053 return desc;
1054}
1055
1056/* Returns the structure describing number of iterations determined from
1057 single dominating exit of DATA->current_loop, or NULL if something
1058 goes wrong. */
1059
1060static class tree_niter_desc *
1061niter_for_single_dom_exit (struct ivopts_data *data)
1062{
1063 edge exit = single_dom_exit (loop: data->current_loop);
1064
1065 if (!exit)
1066 return NULL;
1067
1068 return niter_for_exit (data, exit);
1069}
1070
1071/* Initializes data structures used by the iv optimization pass, stored
1072 in DATA. */
1073
1074static void
1075tree_ssa_iv_optimize_init (struct ivopts_data *data)
1076{
1077 data->version_info_size = 2 * num_ssa_names;
1078 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1079 data->relevant = BITMAP_ALLOC (NULL);
1080 data->important_candidates = BITMAP_ALLOC (NULL);
1081 data->max_inv_var_id = 0;
1082 data->max_inv_expr_id = 0;
1083 data->niters = NULL;
1084 data->vgroups.create (nelems: 20);
1085 data->vcands.create (nelems: 20);
1086 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1087 data->name_expansion_cache = NULL;
1088 data->base_object_map = NULL;
1089 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1090 data->iv_common_cands.create (nelems: 20);
1091 decl_rtl_to_reset.create (nelems: 20);
1092 gcc_obstack_init (&data->iv_obstack);
1093}
1094
1095/* walk_tree callback for determine_base_object. */
1096
1097static tree
1098determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1099{
1100 tree_code code = TREE_CODE (*tp);
1101 tree obj = NULL_TREE;
1102 if (code == ADDR_EXPR)
1103 {
1104 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1105 if (!base)
1106 obj = *tp;
1107 else if (TREE_CODE (base) != MEM_REF)
1108 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1109 }
1110 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1111 obj = fold_convert (ptr_type_node, *tp);
1112
1113 if (!obj)
1114 {
1115 if (!EXPR_P (*tp))
1116 *walk_subtrees = 0;
1117
1118 return NULL_TREE;
1119 }
1120 /* Record special node for multiple base objects and stop. */
1121 if (*static_cast<tree *> (wdata))
1122 {
1123 *static_cast<tree *> (wdata) = integer_zero_node;
1124 return integer_zero_node;
1125 }
1126 /* Record the base object and continue looking. */
1127 *static_cast<tree *> (wdata) = obj;
1128 return NULL_TREE;
1129}
1130
1131/* Returns a memory object to that EXPR points with caching. Return NULL if we
1132 are able to determine that it does not point to any such object; specially
1133 return integer_zero_node if EXPR contains multiple base objects. */
1134
1135static tree
1136determine_base_object (struct ivopts_data *data, tree expr)
1137{
1138 tree *slot, obj = NULL_TREE;
1139 if (data->base_object_map)
1140 {
1141 if ((slot = data->base_object_map->get(k: expr)) != NULL)
1142 return *slot;
1143 }
1144 else
1145 data->base_object_map = new hash_map<tree, tree>;
1146
1147 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1148 data->base_object_map->put (k: expr, v: obj);
1149 return obj;
1150}
1151
1152/* Return true if address expression with non-DECL_P operand appears
1153 in EXPR. */
1154
1155static bool
1156contain_complex_addr_expr (tree expr)
1157{
1158 bool res = false;
1159
1160 STRIP_NOPS (expr);
1161 switch (TREE_CODE (expr))
1162 {
1163 case POINTER_PLUS_EXPR:
1164 case PLUS_EXPR:
1165 case MINUS_EXPR:
1166 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1167 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1168 break;
1169
1170 case ADDR_EXPR:
1171 return (!DECL_P (TREE_OPERAND (expr, 0)));
1172
1173 default:
1174 return false;
1175 }
1176
1177 return res;
1178}
1179
1180/* Allocates an induction variable with given initial value BASE and step STEP
1181 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1182
1183static struct iv *
1184alloc_iv (struct ivopts_data *data, tree base, tree step,
1185 bool no_overflow = false)
1186{
1187 tree expr = base;
1188 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1189 sizeof (struct iv));
1190 gcc_assert (step != NULL_TREE);
1191
1192 /* Lower address expression in base except ones with DECL_P as operand.
1193 By doing this:
1194 1) More accurate cost can be computed for address expressions;
1195 2) Duplicate candidates won't be created for bases in different
1196 forms, like &a[0] and &a. */
1197 STRIP_NOPS (expr);
1198 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1199 || contain_complex_addr_expr (expr))
1200 {
1201 aff_tree comb;
1202 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1203 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1204 }
1205
1206 iv->base = base;
1207 iv->base_object = determine_base_object (data, expr: base);
1208 iv->step = step;
1209 iv->biv_p = false;
1210 iv->nonlin_use = NULL;
1211 iv->ssa_name = NULL_TREE;
1212 if (!no_overflow
1213 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1214 base, step))
1215 no_overflow = true;
1216 iv->no_overflow = no_overflow;
1217 iv->have_address_use = false;
1218
1219 return iv;
1220}
1221
1222/* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1223 doesn't overflow. */
1224
1225static void
1226set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1227 bool no_overflow)
1228{
1229 struct version_info *info = name_info (data, name: iv);
1230
1231 gcc_assert (!info->iv);
1232
1233 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1234 info->iv = alloc_iv (data, base, step, no_overflow);
1235 info->iv->ssa_name = iv;
1236}
1237
1238/* Finds induction variable declaration for VAR. */
1239
1240static struct iv *
1241get_iv (struct ivopts_data *data, tree var)
1242{
1243 basic_block bb;
1244 tree type = TREE_TYPE (var);
1245
1246 if (!POINTER_TYPE_P (type)
1247 && !INTEGRAL_TYPE_P (type))
1248 return NULL;
1249
1250 if (!name_info (data, name: var)->iv)
1251 {
1252 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1253
1254 if (!bb
1255 || !flow_bb_inside_loop_p (data->current_loop, bb))
1256 {
1257 if (POINTER_TYPE_P (type))
1258 type = sizetype;
1259 set_iv (data, iv: var, base: var, step: build_int_cst (type, 0), no_overflow: true);
1260 }
1261 }
1262
1263 return name_info (data, name: var)->iv;
1264}
1265
1266/* Return the first non-invariant ssa var found in EXPR. */
1267
1268static tree
1269extract_single_var_from_expr (tree expr)
1270{
1271 int i, n;
1272 tree tmp;
1273 enum tree_code code;
1274
1275 if (!expr || is_gimple_min_invariant (expr))
1276 return NULL;
1277
1278 code = TREE_CODE (expr);
1279 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1280 {
1281 n = TREE_OPERAND_LENGTH (expr);
1282 for (i = 0; i < n; i++)
1283 {
1284 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1285
1286 if (tmp)
1287 return tmp;
1288 }
1289 }
1290 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1291}
1292
1293/* Finds basic ivs. */
1294
1295static bool
1296find_bivs (struct ivopts_data *data)
1297{
1298 gphi *phi;
1299 affine_iv iv;
1300 tree step, type, base, stop;
1301 bool found = false;
1302 class loop *loop = data->current_loop;
1303 gphi_iterator psi;
1304
1305 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1306 {
1307 phi = psi.phi ();
1308
1309 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1310 continue;
1311
1312 if (virtual_operand_p (PHI_RESULT (phi)))
1313 continue;
1314
1315 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1316 continue;
1317
1318 if (integer_zerop (iv.step))
1319 continue;
1320
1321 step = iv.step;
1322 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1323 /* Stop expanding iv base at the first ssa var referred by iv step.
1324 Ideally we should stop at any ssa var, because that's expensive
1325 and unusual to happen, we just do it on the first one.
1326
1327 See PR64705 for the rationale. */
1328 stop = extract_single_var_from_expr (expr: step);
1329 base = expand_simple_operations (base, stop);
1330 if (contains_abnormal_ssa_name_p (expr: base)
1331 || contains_abnormal_ssa_name_p (expr: step))
1332 continue;
1333
1334 type = TREE_TYPE (PHI_RESULT (phi));
1335 base = fold_convert (type, base);
1336 if (step)
1337 {
1338 if (POINTER_TYPE_P (type))
1339 step = convert_to_ptrofftype (step);
1340 else
1341 step = fold_convert (type, step);
1342 }
1343
1344 set_iv (data, PHI_RESULT (phi), base, step, no_overflow: iv.no_overflow);
1345 found = true;
1346 }
1347
1348 return found;
1349}
1350
1351/* Marks basic ivs. */
1352
1353static void
1354mark_bivs (struct ivopts_data *data)
1355{
1356 gphi *phi;
1357 gimple *def;
1358 tree var;
1359 struct iv *iv, *incr_iv;
1360 class loop *loop = data->current_loop;
1361 basic_block incr_bb;
1362 gphi_iterator psi;
1363
1364 data->bivs_not_used_in_addr = 0;
1365 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1366 {
1367 phi = psi.phi ();
1368
1369 iv = get_iv (data, PHI_RESULT (phi));
1370 if (!iv)
1371 continue;
1372
1373 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1374 def = SSA_NAME_DEF_STMT (var);
1375 /* Don't mark iv peeled from other one as biv. */
1376 if (def
1377 && gimple_code (g: def) == GIMPLE_PHI
1378 && gimple_bb (g: def) == loop->header)
1379 continue;
1380
1381 incr_iv = get_iv (data, var);
1382 if (!incr_iv)
1383 continue;
1384
1385 /* If the increment is in the subloop, ignore it. */
1386 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1387 if (incr_bb->loop_father != data->current_loop
1388 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1389 continue;
1390
1391 iv->biv_p = true;
1392 incr_iv->biv_p = true;
1393 if (iv->no_overflow)
1394 data->bivs_not_used_in_addr++;
1395 if (incr_iv->no_overflow)
1396 data->bivs_not_used_in_addr++;
1397 }
1398}
1399
1400/* Checks whether STMT defines a linear induction variable and stores its
1401 parameters to IV. */
1402
1403static bool
1404find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1405{
1406 tree lhs, stop;
1407 class loop *loop = data->current_loop;
1408
1409 iv->base = NULL_TREE;
1410 iv->step = NULL_TREE;
1411
1412 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1413 return false;
1414
1415 lhs = gimple_assign_lhs (gs: stmt);
1416 if (TREE_CODE (lhs) != SSA_NAME)
1417 return false;
1418
1419 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1420 return false;
1421
1422 /* Stop expanding iv base at the first ssa var referred by iv step.
1423 Ideally we should stop at any ssa var, because that's expensive
1424 and unusual to happen, we just do it on the first one.
1425
1426 See PR64705 for the rationale. */
1427 stop = extract_single_var_from_expr (expr: iv->step);
1428 iv->base = expand_simple_operations (iv->base, stop);
1429 if (contains_abnormal_ssa_name_p (expr: iv->base)
1430 || contains_abnormal_ssa_name_p (expr: iv->step))
1431 return false;
1432
1433 /* If STMT could throw, then do not consider STMT as defining a GIV.
1434 While this will suppress optimizations, we cannot safely delete this
1435 GIV and associated statements, even if it appears it is not used. */
1436 if (stmt_could_throw_p (cfun, stmt))
1437 return false;
1438
1439 return true;
1440}
1441
1442/* Finds general ivs in statement STMT. */
1443
1444static void
1445find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1446{
1447 affine_iv iv;
1448
1449 if (!find_givs_in_stmt_scev (data, stmt, iv: &iv))
1450 return;
1451
1452 set_iv (data, iv: gimple_assign_lhs (gs: stmt), base: iv.base, step: iv.step, no_overflow: iv.no_overflow);
1453}
1454
1455/* Finds general ivs in basic block BB. */
1456
1457static void
1458find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1459{
1460 gimple_stmt_iterator bsi;
1461
1462 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1463 find_givs_in_stmt (data, stmt: gsi_stmt (i: bsi));
1464}
1465
1466/* Finds general ivs. */
1467
1468static void
1469find_givs (struct ivopts_data *data, basic_block *body)
1470{
1471 class loop *loop = data->current_loop;
1472 unsigned i;
1473
1474 for (i = 0; i < loop->num_nodes; i++)
1475 find_givs_in_bb (data, bb: body[i]);
1476}
1477
1478/* For each ssa name defined in LOOP determines whether it is an induction
1479 variable and if so, its initial value and step. */
1480
1481static bool
1482find_induction_variables (struct ivopts_data *data, basic_block *body)
1483{
1484 unsigned i;
1485 bitmap_iterator bi;
1486
1487 if (!find_bivs (data))
1488 return false;
1489
1490 find_givs (data, body);
1491 mark_bivs (data);
1492
1493 if (dump_file && (dump_flags & TDF_DETAILS))
1494 {
1495 class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1496
1497 if (niter)
1498 {
1499 fprintf (stream: dump_file, format: " number of iterations ");
1500 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1501 if (!integer_zerop (niter->may_be_zero))
1502 {
1503 fprintf (stream: dump_file, format: "; zero if ");
1504 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1505 }
1506 fprintf (stream: dump_file, format: "\n");
1507 };
1508
1509 fprintf (stream: dump_file, format: "\n<Induction Vars>:\n");
1510 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1511 {
1512 struct version_info *info = ver_info (data, ver: i);
1513 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1514 dump_iv (file: dump_file, iv: ver_info (data, ver: i)->iv, dump_name: true, indent_level: 0);
1515 }
1516 }
1517
1518 return true;
1519}
1520
1521/* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1522 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1523 is the const offset stripped from IV base and MEM_TYPE is the type
1524 of the memory being addressed. For uses of other types, ADDR_BASE
1525 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1526
1527static struct iv_use *
1528record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1529 gimple *stmt, enum use_type type, tree mem_type,
1530 tree addr_base, poly_uint64 addr_offset)
1531{
1532 struct iv_use *use = XCNEW (struct iv_use);
1533
1534 use->id = group->vuses.length ();
1535 use->group_id = group->id;
1536 use->type = type;
1537 use->mem_type = mem_type;
1538 use->iv = iv;
1539 use->stmt = stmt;
1540 use->op_p = use_p;
1541 use->addr_base = addr_base;
1542 use->addr_offset = addr_offset;
1543
1544 group->vuses.safe_push (obj: use);
1545 return use;
1546}
1547
1548/* Checks whether OP is a loop-level invariant and if so, records it.
1549 NONLINEAR_USE is true if the invariant is used in a way we do not
1550 handle specially. */
1551
1552static void
1553record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1554{
1555 basic_block bb;
1556 struct version_info *info;
1557
1558 if (TREE_CODE (op) != SSA_NAME
1559 || virtual_operand_p (op))
1560 return;
1561
1562 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1563 if (bb
1564 && flow_bb_inside_loop_p (data->current_loop, bb))
1565 return;
1566
1567 info = name_info (data, name: op);
1568 info->name = op;
1569 info->has_nonlin_use |= nonlinear_use;
1570 if (!info->inv_id)
1571 info->inv_id = ++data->max_inv_var_id;
1572 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1573}
1574
1575/* Record a group of TYPE. */
1576
1577static struct iv_group *
1578record_group (struct ivopts_data *data, enum use_type type)
1579{
1580 struct iv_group *group = XCNEW (struct iv_group);
1581
1582 group->id = data->vgroups.length ();
1583 group->type = type;
1584 group->related_cands = BITMAP_ALLOC (NULL);
1585 group->vuses.create (nelems: 1);
1586 group->doloop_p = false;
1587
1588 data->vgroups.safe_push (obj: group);
1589 return group;
1590}
1591
1592/* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1593 New group will be created if there is no existing group for the use.
1594 MEM_TYPE is the type of memory being addressed, or NULL if this
1595 isn't an address reference. */
1596
1597static struct iv_use *
1598record_group_use (struct ivopts_data *data, tree *use_p,
1599 struct iv *iv, gimple *stmt, enum use_type type,
1600 tree mem_type)
1601{
1602 tree addr_base = NULL;
1603 struct iv_group *group = NULL;
1604 poly_uint64 addr_offset = 0;
1605
1606 /* Record non address type use in a new group. */
1607 if (address_p (type))
1608 {
1609 unsigned int i;
1610
1611 gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1612 tree addr_toffset;
1613 split_constant_offset (iv->base, &addr_base, &addr_toffset);
1614 addr_offset = int_cst_value (addr_toffset);
1615 for (i = 0; i < data->vgroups.length (); i++)
1616 {
1617 struct iv_use *use;
1618
1619 group = data->vgroups[i];
1620 use = group->vuses[0];
1621 if (!address_p (type: use->type))
1622 continue;
1623
1624 /* Check if it has the same stripped base and step. */
1625 if (operand_equal_p (iv->base_object, use->iv->base_object, flags: 0)
1626 && operand_equal_p (iv->step, use->iv->step, flags: 0)
1627 && operand_equal_p (addr_base, use->addr_base, flags: 0))
1628 break;
1629 }
1630 if (i == data->vgroups.length ())
1631 group = NULL;
1632 }
1633
1634 if (!group)
1635 group = record_group (data, type);
1636
1637 return record_use (group, use_p, iv, stmt, type, mem_type,
1638 addr_base, addr_offset);
1639}
1640
1641/* Checks whether the use OP is interesting and if so, records it. */
1642
1643static struct iv_use *
1644find_interesting_uses_op (struct ivopts_data *data, tree op)
1645{
1646 struct iv *iv;
1647 gimple *stmt;
1648 struct iv_use *use;
1649
1650 if (TREE_CODE (op) != SSA_NAME)
1651 return NULL;
1652
1653 iv = get_iv (data, var: op);
1654 if (!iv)
1655 return NULL;
1656
1657 if (iv->nonlin_use)
1658 {
1659 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1660 return iv->nonlin_use;
1661 }
1662
1663 if (integer_zerop (iv->step))
1664 {
1665 record_invariant (data, op, nonlinear_use: true);
1666 return NULL;
1667 }
1668
1669 stmt = SSA_NAME_DEF_STMT (op);
1670 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1671
1672 use = record_group_use (data, NULL, iv, stmt, type: USE_NONLINEAR_EXPR, NULL_TREE);
1673 iv->nonlin_use = use;
1674 return use;
1675}
1676
1677/* Indicate how compare type iv_use can be handled. */
1678enum comp_iv_rewrite
1679{
1680 COMP_IV_NA,
1681 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1682 COMP_IV_EXPR,
1683 /* We may rewrite compare type iv_uses on both sides of comparison by
1684 expressing value of each iv_use. */
1685 COMP_IV_EXPR_2,
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use
1687 or by eliminating it with other iv_cand. */
1688 COMP_IV_ELIM
1689};
1690
1691/* Given a condition in statement STMT, checks whether it is a compare
1692 of an induction variable and an invariant. If this is the case,
1693 CONTROL_VAR is set to location of the iv, BOUND to the location of
1694 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1695 induction variable descriptions, and true is returned. If this is not
1696 the case, CONTROL_VAR and BOUND are set to the arguments of the
1697 condition and false is returned. */
1698
1699static enum comp_iv_rewrite
1700extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1701 tree **control_var, tree **bound,
1702 struct iv **iv_var, struct iv **iv_bound)
1703{
1704 /* The objects returned when COND has constant operands. */
1705 static struct iv const_iv;
1706 static tree zero;
1707 tree *op0 = &zero, *op1 = &zero;
1708 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1709 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1710
1711 if (gimple_code (g: stmt) == GIMPLE_COND)
1712 {
1713 gcond *cond_stmt = as_a <gcond *> (p: stmt);
1714 op0 = gimple_cond_lhs_ptr (gs: cond_stmt);
1715 op1 = gimple_cond_rhs_ptr (gs: cond_stmt);
1716 }
1717 else
1718 {
1719 op0 = gimple_assign_rhs1_ptr (gs: stmt);
1720 op1 = gimple_assign_rhs2_ptr (gs: stmt);
1721 }
1722
1723 zero = integer_zero_node;
1724 const_iv.step = integer_zero_node;
1725
1726 if (TREE_CODE (*op0) == SSA_NAME)
1727 iv0 = get_iv (data, var: *op0);
1728 if (TREE_CODE (*op1) == SSA_NAME)
1729 iv1 = get_iv (data, var: *op1);
1730
1731 /* If both sides of comparison are IVs. We can express ivs on both end. */
1732 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1733 {
1734 rewrite_type = COMP_IV_EXPR_2;
1735 goto end;
1736 }
1737
1738 /* If none side of comparison is IV. */
1739 if ((!iv0 || integer_zerop (iv0->step))
1740 && (!iv1 || integer_zerop (iv1->step)))
1741 goto end;
1742
1743 /* Control variable may be on the other side. */
1744 if (!iv0 || integer_zerop (iv0->step))
1745 {
1746 std::swap (a&: op0, b&: op1);
1747 std::swap (a&: iv0, b&: iv1);
1748 }
1749 /* If one side is IV and the other side isn't loop invariant. */
1750 if (!iv1)
1751 rewrite_type = COMP_IV_EXPR;
1752 /* If one side is IV and the other side is loop invariant. */
1753 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1754 rewrite_type = COMP_IV_ELIM;
1755
1756end:
1757 if (control_var)
1758 *control_var = op0;
1759 if (iv_var)
1760 *iv_var = iv0;
1761 if (bound)
1762 *bound = op1;
1763 if (iv_bound)
1764 *iv_bound = iv1;
1765
1766 return rewrite_type;
1767}
1768
1769/* Checks whether the condition in STMT is interesting and if so,
1770 records it. */
1771
1772static void
1773find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1774{
1775 tree *var_p, *bound_p;
1776 struct iv *var_iv, *bound_iv;
1777 enum comp_iv_rewrite ret;
1778
1779 ret = extract_cond_operands (data, stmt,
1780 control_var: &var_p, bound: &bound_p, iv_var: &var_iv, iv_bound: &bound_iv);
1781 if (ret == COMP_IV_NA)
1782 {
1783 find_interesting_uses_op (data, op: *var_p);
1784 find_interesting_uses_op (data, op: *bound_p);
1785 return;
1786 }
1787
1788 record_group_use (data, use_p: var_p, iv: var_iv, stmt, type: USE_COMPARE, NULL_TREE);
1789 /* Record compare type iv_use for iv on the other side of comparison. */
1790 if (ret == COMP_IV_EXPR_2)
1791 record_group_use (data, use_p: bound_p, iv: bound_iv, stmt, type: USE_COMPARE, NULL_TREE);
1792}
1793
1794/* Returns the outermost loop EXPR is obviously invariant in
1795 relative to the loop LOOP, i.e. if all its operands are defined
1796 outside of the returned loop. Returns NULL if EXPR is not
1797 even obviously invariant in LOOP. */
1798
1799class loop *
1800outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1801{
1802 basic_block def_bb;
1803 unsigned i, len;
1804
1805 if (is_gimple_min_invariant (expr))
1806 return current_loops->tree_root;
1807
1808 if (TREE_CODE (expr) == SSA_NAME)
1809 {
1810 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1811 if (def_bb)
1812 {
1813 if (flow_bb_inside_loop_p (loop, def_bb))
1814 return NULL;
1815 return superloop_at_depth (loop,
1816 loop_depth (loop: def_bb->loop_father) + 1);
1817 }
1818
1819 return current_loops->tree_root;
1820 }
1821
1822 if (!EXPR_P (expr))
1823 return NULL;
1824
1825 unsigned maxdepth = 0;
1826 len = TREE_OPERAND_LENGTH (expr);
1827 for (i = 0; i < len; i++)
1828 {
1829 class loop *ivloop;
1830 if (!TREE_OPERAND (expr, i))
1831 continue;
1832
1833 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1834 if (!ivloop)
1835 return NULL;
1836 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1837 }
1838
1839 return superloop_at_depth (loop, maxdepth);
1840}
1841
1842/* Returns true if expression EXPR is obviously invariant in LOOP,
1843 i.e. if all its operands are defined outside of the LOOP. LOOP
1844 should not be the function body. */
1845
1846bool
1847expr_invariant_in_loop_p (class loop *loop, tree expr)
1848{
1849 basic_block def_bb;
1850 unsigned i, len;
1851
1852 gcc_assert (loop_depth (loop) > 0);
1853
1854 if (is_gimple_min_invariant (expr))
1855 return true;
1856
1857 if (TREE_CODE (expr) == SSA_NAME)
1858 {
1859 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1860 if (def_bb
1861 && flow_bb_inside_loop_p (loop, def_bb))
1862 return false;
1863
1864 return true;
1865 }
1866
1867 if (!EXPR_P (expr))
1868 return false;
1869
1870 len = TREE_OPERAND_LENGTH (expr);
1871 for (i = 0; i < len; i++)
1872 if (TREE_OPERAND (expr, i)
1873 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1874 return false;
1875
1876 return true;
1877}
1878
1879/* Given expression EXPR which computes inductive values with respect
1880 to loop recorded in DATA, this function returns biv from which EXPR
1881 is derived by tracing definition chains of ssa variables in EXPR. */
1882
1883static struct iv*
1884find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1885{
1886 struct iv *iv;
1887 unsigned i, n;
1888 tree e2, e1;
1889 enum tree_code code;
1890 gimple *stmt;
1891
1892 if (expr == NULL_TREE)
1893 return NULL;
1894
1895 if (is_gimple_min_invariant (expr))
1896 return NULL;
1897
1898 code = TREE_CODE (expr);
1899 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1900 {
1901 n = TREE_OPERAND_LENGTH (expr);
1902 for (i = 0; i < n; i++)
1903 {
1904 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1905 if (iv)
1906 return iv;
1907 }
1908 }
1909
1910 /* Stop if it's not ssa name. */
1911 if (code != SSA_NAME)
1912 return NULL;
1913
1914 iv = get_iv (data, var: expr);
1915 if (!iv || integer_zerop (iv->step))
1916 return NULL;
1917 else if (iv->biv_p)
1918 return iv;
1919
1920 stmt = SSA_NAME_DEF_STMT (expr);
1921 if (gphi *phi = dyn_cast <gphi *> (p: stmt))
1922 {
1923 ssa_op_iter iter;
1924 use_operand_p use_p;
1925 basic_block phi_bb = gimple_bb (g: phi);
1926
1927 /* Skip loop header PHI that doesn't define biv. */
1928 if (phi_bb->loop_father == data->current_loop)
1929 return NULL;
1930
1931 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
1932 return NULL;
1933
1934 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1935 {
1936 tree use = USE_FROM_PTR (use_p);
1937 iv = find_deriving_biv_for_expr (data, expr: use);
1938 if (iv)
1939 return iv;
1940 }
1941 return NULL;
1942 }
1943 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1944 return NULL;
1945
1946 e1 = gimple_assign_rhs1 (gs: stmt);
1947 code = gimple_assign_rhs_code (gs: stmt);
1948 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1949 return find_deriving_biv_for_expr (data, expr: e1);
1950
1951 switch (code)
1952 {
1953 case MULT_EXPR:
1954 case PLUS_EXPR:
1955 case MINUS_EXPR:
1956 case POINTER_PLUS_EXPR:
1957 /* Increments, decrements and multiplications by a constant
1958 are simple. */
1959 e2 = gimple_assign_rhs2 (gs: stmt);
1960 iv = find_deriving_biv_for_expr (data, expr: e2);
1961 if (iv)
1962 return iv;
1963 gcc_fallthrough ();
1964
1965 CASE_CONVERT:
1966 /* Casts are simple. */
1967 return find_deriving_biv_for_expr (data, expr: e1);
1968
1969 default:
1970 break;
1971 }
1972
1973 return NULL;
1974}
1975
1976/* Record BIV, its predecessor and successor that they are used in
1977 address type uses. */
1978
1979static void
1980record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1981{
1982 unsigned i;
1983 tree type, base_1, base_2;
1984 bitmap_iterator bi;
1985
1986 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1987 || biv->have_address_use || !biv->no_overflow)
1988 return;
1989
1990 type = TREE_TYPE (biv->base);
1991 if (!INTEGRAL_TYPE_P (type))
1992 return;
1993
1994 biv->have_address_use = true;
1995 data->bivs_not_used_in_addr--;
1996 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1997 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1998 {
1999 struct iv *iv = ver_info (data, ver: i)->iv;
2000
2001 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2002 || iv->have_address_use || !iv->no_overflow)
2003 continue;
2004
2005 if (type != TREE_TYPE (iv->base)
2006 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2007 continue;
2008
2009 if (!operand_equal_p (biv->step, iv->step, flags: 0))
2010 continue;
2011
2012 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2013 if (operand_equal_p (base_1, iv->base, flags: 0)
2014 || operand_equal_p (base_2, biv->base, flags: 0))
2015 {
2016 iv->have_address_use = true;
2017 data->bivs_not_used_in_addr--;
2018 }
2019 }
2020}
2021
2022/* Cumulates the steps of indices into DATA and replaces their values with the
2023 initial ones. Returns false when the value of the index cannot be determined.
2024 Callback for for_each_index. */
2025
2026struct ifs_ivopts_data
2027{
2028 struct ivopts_data *ivopts_data;
2029 gimple *stmt;
2030 tree step;
2031};
2032
2033static bool
2034idx_find_step (tree base, tree *idx, void *data)
2035{
2036 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2037 struct iv *iv;
2038 bool use_overflow_semantics = false;
2039 tree step, iv_base, iv_step, lbound, off;
2040 class loop *loop = dta->ivopts_data->current_loop;
2041
2042 /* If base is a component ref, require that the offset of the reference
2043 be invariant. */
2044 if (TREE_CODE (base) == COMPONENT_REF)
2045 {
2046 off = component_ref_field_offset (base);
2047 return expr_invariant_in_loop_p (loop, expr: off);
2048 }
2049
2050 /* If base is array, first check whether we will be able to move the
2051 reference out of the loop (in order to take its address in strength
2052 reduction). In order for this to work we need both lower bound
2053 and step to be loop invariants. */
2054 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2055 {
2056 /* Moreover, for a range, the size needs to be invariant as well. */
2057 if (TREE_CODE (base) == ARRAY_RANGE_REF
2058 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2059 return false;
2060
2061 step = array_ref_element_size (base);
2062 lbound = array_ref_low_bound (base);
2063
2064 if (!expr_invariant_in_loop_p (loop, expr: step)
2065 || !expr_invariant_in_loop_p (loop, expr: lbound))
2066 return false;
2067 }
2068
2069 if (TREE_CODE (*idx) != SSA_NAME)
2070 return true;
2071
2072 iv = get_iv (data: dta->ivopts_data, var: *idx);
2073 if (!iv)
2074 return false;
2075
2076 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2077 *&x[0], which is not folded and does not trigger the
2078 ARRAY_REF path below. */
2079 *idx = iv->base;
2080
2081 if (integer_zerop (iv->step))
2082 return true;
2083
2084 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2085 {
2086 step = array_ref_element_size (base);
2087
2088 /* We only handle addresses whose step is an integer constant. */
2089 if (TREE_CODE (step) != INTEGER_CST)
2090 return false;
2091 }
2092 else
2093 /* The step for pointer arithmetics already is 1 byte. */
2094 step = size_one_node;
2095
2096 iv_base = iv->base;
2097 iv_step = iv->step;
2098 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2099 use_overflow_semantics = true;
2100
2101 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2102 sizetype, &iv_base, &iv_step, dta->stmt,
2103 use_overflow_semantics))
2104 {
2105 /* The index might wrap. */
2106 return false;
2107 }
2108
2109 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2110 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2111
2112 if (dta->ivopts_data->bivs_not_used_in_addr)
2113 {
2114 if (!iv->biv_p)
2115 iv = find_deriving_biv_for_expr (data: dta->ivopts_data, expr: iv->ssa_name);
2116
2117 record_biv_for_address_use (data: dta->ivopts_data, biv: iv);
2118 }
2119 return true;
2120}
2121
2122/* Records use in index IDX. Callback for for_each_index. Ivopts data
2123 object is passed to it in DATA. */
2124
2125static bool
2126idx_record_use (tree base, tree *idx,
2127 void *vdata)
2128{
2129 struct ivopts_data *data = (struct ivopts_data *) vdata;
2130 find_interesting_uses_op (data, op: *idx);
2131 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2132 {
2133 if (TREE_OPERAND (base, 2))
2134 find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2135 if (TREE_OPERAND (base, 3))
2136 find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2137 }
2138 return true;
2139}
2140
2141/* If we can prove that TOP = cst * BOT for some constant cst,
2142 store cst to MUL and return true. Otherwise return false.
2143 The returned value is always sign-extended, regardless of the
2144 signedness of TOP and BOT. */
2145
2146static bool
2147constant_multiple_of (tree top, tree bot, widest_int *mul)
2148{
2149 tree mby;
2150 enum tree_code code;
2151 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2152 widest_int res, p0, p1;
2153
2154 STRIP_NOPS (top);
2155 STRIP_NOPS (bot);
2156
2157 if (operand_equal_p (top, bot, flags: 0))
2158 {
2159 *mul = 1;
2160 return true;
2161 }
2162
2163 code = TREE_CODE (top);
2164 switch (code)
2165 {
2166 case MULT_EXPR:
2167 mby = TREE_OPERAND (top, 1);
2168 if (TREE_CODE (mby) != INTEGER_CST)
2169 return false;
2170
2171 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, mul: &res))
2172 return false;
2173
2174 *mul = wi::sext (x: res * wi::to_widest (t: mby), offset: precision);
2175 return true;
2176
2177 case PLUS_EXPR:
2178 case MINUS_EXPR:
2179 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, mul: &p0)
2180 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, mul: &p1))
2181 return false;
2182
2183 if (code == MINUS_EXPR)
2184 p1 = -p1;
2185 *mul = wi::sext (x: p0 + p1, offset: precision);
2186 return true;
2187
2188 case INTEGER_CST:
2189 if (TREE_CODE (bot) != INTEGER_CST)
2190 return false;
2191
2192 p0 = widest_int::from (x: wi::to_wide (t: top), sgn: SIGNED);
2193 p1 = widest_int::from (x: wi::to_wide (t: bot), sgn: SIGNED);
2194 if (p1 == 0)
2195 return false;
2196 *mul = wi::sext (x: wi::divmod_trunc (x: p0, y: p1, sgn: SIGNED, remainder_ptr: &res), offset: precision);
2197 return res == 0;
2198
2199 default:
2200 if (POLY_INT_CST_P (top)
2201 && POLY_INT_CST_P (bot)
2202 && constant_multiple_p (a: wi::to_poly_widest (t: top),
2203 b: wi::to_poly_widest (t: bot), multiple: mul))
2204 return true;
2205
2206 return false;
2207 }
2208}
2209
2210/* Return true if memory reference REF with step STEP may be unaligned. */
2211
2212static bool
2213may_be_unaligned_p (tree ref, tree step)
2214{
2215 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2216 thus they are not misaligned. */
2217 if (TREE_CODE (ref) == TARGET_MEM_REF)
2218 return false;
2219
2220 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2221 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2222 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2223
2224 unsigned HOST_WIDE_INT bitpos;
2225 unsigned int ref_align;
2226 get_object_alignment_1 (ref, &ref_align, &bitpos);
2227 if (ref_align < align
2228 || (bitpos % align) != 0
2229 || (bitpos % BITS_PER_UNIT) != 0)
2230 return true;
2231
2232 unsigned int trailing_zeros = tree_ctz (step);
2233 if (trailing_zeros < HOST_BITS_PER_INT
2234 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2235 return true;
2236
2237 return false;
2238}
2239
2240/* Return true if EXPR may be non-addressable. */
2241
2242bool
2243may_be_nonaddressable_p (tree expr)
2244{
2245 switch (TREE_CODE (expr))
2246 {
2247 case VAR_DECL:
2248 /* Check if it's a register variable. */
2249 return DECL_HARD_REGISTER (expr);
2250
2251 case TARGET_MEM_REF:
2252 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2253 target, thus they are always addressable. */
2254 return false;
2255
2256 case MEM_REF:
2257 /* Likewise for MEM_REFs, modulo the storage order. */
2258 return REF_REVERSE_STORAGE_ORDER (expr);
2259
2260 case BIT_FIELD_REF:
2261 if (REF_REVERSE_STORAGE_ORDER (expr))
2262 return true;
2263 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2264
2265 case COMPONENT_REF:
2266 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2267 return true;
2268 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2269 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2270
2271 case ARRAY_REF:
2272 case ARRAY_RANGE_REF:
2273 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2274 return true;
2275 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2276
2277 case VIEW_CONVERT_EXPR:
2278 /* This kind of view-conversions may wrap non-addressable objects
2279 and make them look addressable. After some processing the
2280 non-addressability may be uncovered again, causing ADDR_EXPRs
2281 of inappropriate objects to be built. */
2282 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2283 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2284 return true;
2285 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2286
2287 CASE_CONVERT:
2288 return true;
2289
2290 default:
2291 break;
2292 }
2293
2294 return false;
2295}
2296
2297/* Finds addresses in *OP_P inside STMT. */
2298
2299static void
2300find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2301 tree *op_p)
2302{
2303 tree base = *op_p, step = size_zero_node;
2304 struct iv *civ;
2305 struct ifs_ivopts_data ifs_ivopts_data;
2306
2307 /* Do not play with volatile memory references. A bit too conservative,
2308 perhaps, but safe. */
2309 if (gimple_has_volatile_ops (stmt))
2310 goto fail;
2311
2312 /* Ignore bitfields for now. Not really something terribly complicated
2313 to handle. TODO. */
2314 if (TREE_CODE (base) == BIT_FIELD_REF)
2315 goto fail;
2316
2317 base = unshare_expr (base);
2318
2319 if (TREE_CODE (base) == TARGET_MEM_REF)
2320 {
2321 tree type = build_pointer_type (TREE_TYPE (base));
2322 tree astep;
2323
2324 if (TMR_BASE (base)
2325 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2326 {
2327 civ = get_iv (data, TMR_BASE (base));
2328 if (!civ)
2329 goto fail;
2330
2331 TMR_BASE (base) = civ->base;
2332 step = civ->step;
2333 }
2334 if (TMR_INDEX2 (base)
2335 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2336 {
2337 civ = get_iv (data, TMR_INDEX2 (base));
2338 if (!civ)
2339 goto fail;
2340
2341 TMR_INDEX2 (base) = civ->base;
2342 step = civ->step;
2343 }
2344 if (TMR_INDEX (base)
2345 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2346 {
2347 civ = get_iv (data, TMR_INDEX (base));
2348 if (!civ)
2349 goto fail;
2350
2351 TMR_INDEX (base) = civ->base;
2352 astep = civ->step;
2353
2354 if (astep)
2355 {
2356 if (TMR_STEP (base))
2357 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2358
2359 step = fold_build2 (PLUS_EXPR, type, step, astep);
2360 }
2361 }
2362
2363 if (integer_zerop (step))
2364 goto fail;
2365 base = tree_mem_ref_addr (type, base);
2366 }
2367 else
2368 {
2369 ifs_ivopts_data.ivopts_data = data;
2370 ifs_ivopts_data.stmt = stmt;
2371 ifs_ivopts_data.step = size_zero_node;
2372 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2373 || integer_zerop (ifs_ivopts_data.step))
2374 goto fail;
2375 step = ifs_ivopts_data.step;
2376
2377 /* Check that the base expression is addressable. This needs
2378 to be done after substituting bases of IVs into it. */
2379 if (may_be_nonaddressable_p (expr: base))
2380 goto fail;
2381
2382 /* Moreover, on strict alignment platforms, check that it is
2383 sufficiently aligned. */
2384 if (STRICT_ALIGNMENT && may_be_unaligned_p (ref: base, step))
2385 goto fail;
2386
2387 base = build_fold_addr_expr (base);
2388
2389 /* Substituting bases of IVs into the base expression might
2390 have caused folding opportunities. */
2391 if (TREE_CODE (base) == ADDR_EXPR)
2392 {
2393 tree *ref = &TREE_OPERAND (base, 0);
2394 while (handled_component_p (t: *ref))
2395 ref = &TREE_OPERAND (*ref, 0);
2396 if (TREE_CODE (*ref) == MEM_REF)
2397 {
2398 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2399 TREE_OPERAND (*ref, 0),
2400 TREE_OPERAND (*ref, 1));
2401 if (tem)
2402 *ref = tem;
2403 }
2404 }
2405 }
2406
2407 civ = alloc_iv (data, base, step);
2408 /* Fail if base object of this memory reference is unknown. */
2409 if (civ->base_object == NULL_TREE)
2410 goto fail;
2411
2412 record_group_use (data, use_p: op_p, iv: civ, stmt, type: USE_REF_ADDRESS, TREE_TYPE (*op_p));
2413 return;
2414
2415fail:
2416 for_each_index (op_p, idx_record_use, data);
2417}
2418
2419/* Finds and records invariants used in STMT. */
2420
2421static void
2422find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2423{
2424 ssa_op_iter iter;
2425 use_operand_p use_p;
2426 tree op;
2427
2428 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2429 {
2430 op = USE_FROM_PTR (use_p);
2431 record_invariant (data, op, nonlinear_use: false);
2432 }
2433}
2434
2435/* CALL calls an internal function. If operand *OP_P will become an
2436 address when the call is expanded, return the type of the memory
2437 being addressed, otherwise return null. */
2438
2439static tree
2440get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2441{
2442 switch (gimple_call_internal_fn (gs: call))
2443 {
2444 case IFN_MASK_LOAD:
2445 case IFN_MASK_LOAD_LANES:
2446 case IFN_MASK_LEN_LOAD_LANES:
2447 case IFN_LEN_LOAD:
2448 case IFN_MASK_LEN_LOAD:
2449 if (op_p == gimple_call_arg_ptr (gs: call, index: 0))
2450 return TREE_TYPE (gimple_call_lhs (call));
2451 return NULL_TREE;
2452
2453 case IFN_MASK_STORE:
2454 case IFN_MASK_STORE_LANES:
2455 case IFN_MASK_LEN_STORE_LANES:
2456 case IFN_LEN_STORE:
2457 case IFN_MASK_LEN_STORE:
2458 {
2459 if (op_p == gimple_call_arg_ptr (gs: call, index: 0))
2460 {
2461 internal_fn ifn = gimple_call_internal_fn (gs: call);
2462 int index = internal_fn_stored_value_index (ifn);
2463 return TREE_TYPE (gimple_call_arg (call, index));
2464 }
2465 return NULL_TREE;
2466 }
2467
2468 default:
2469 return NULL_TREE;
2470 }
2471}
2472
2473/* IV is a (non-address) iv that describes operand *OP_P of STMT.
2474 Return true if the operand will become an address when STMT
2475 is expanded and record the associated address use if so. */
2476
2477static bool
2478find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2479 struct iv *iv)
2480{
2481 /* Fail if base object of this memory reference is unknown. */
2482 if (iv->base_object == NULL_TREE)
2483 return false;
2484
2485 tree mem_type = NULL_TREE;
2486 if (gcall *call = dyn_cast <gcall *> (p: stmt))
2487 if (gimple_call_internal_p (gs: call))
2488 mem_type = get_mem_type_for_internal_fn (call, op_p);
2489 if (mem_type)
2490 {
2491 iv = alloc_iv (data, base: iv->base, step: iv->step);
2492 record_group_use (data, use_p: op_p, iv, stmt, type: USE_PTR_ADDRESS, mem_type);
2493 return true;
2494 }
2495 return false;
2496}
2497
2498/* Finds interesting uses of induction variables in the statement STMT. */
2499
2500static void
2501find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2502{
2503 struct iv *iv;
2504 tree op, *lhs, *rhs;
2505 ssa_op_iter iter;
2506 use_operand_p use_p;
2507 enum tree_code code;
2508
2509 find_invariants_stmt (data, stmt);
2510
2511 if (gimple_code (g: stmt) == GIMPLE_COND)
2512 {
2513 find_interesting_uses_cond (data, stmt);
2514 return;
2515 }
2516
2517 if (is_gimple_assign (gs: stmt))
2518 {
2519 lhs = gimple_assign_lhs_ptr (gs: stmt);
2520 rhs = gimple_assign_rhs1_ptr (gs: stmt);
2521
2522 if (TREE_CODE (*lhs) == SSA_NAME)
2523 {
2524 /* If the statement defines an induction variable, the uses are not
2525 interesting by themselves. */
2526
2527 iv = get_iv (data, var: *lhs);
2528
2529 if (iv && !integer_zerop (iv->step))
2530 return;
2531 }
2532
2533 code = gimple_assign_rhs_code (gs: stmt);
2534 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2535 && (REFERENCE_CLASS_P (*rhs)
2536 || is_gimple_val (*rhs)))
2537 {
2538 if (REFERENCE_CLASS_P (*rhs))
2539 find_interesting_uses_address (data, stmt, op_p: rhs);
2540 else
2541 find_interesting_uses_op (data, op: *rhs);
2542
2543 if (REFERENCE_CLASS_P (*lhs))
2544 find_interesting_uses_address (data, stmt, op_p: lhs);
2545 return;
2546 }
2547 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2548 {
2549 find_interesting_uses_cond (data, stmt);
2550 return;
2551 }
2552
2553 /* TODO -- we should also handle address uses of type
2554
2555 memory = call (whatever);
2556
2557 and
2558
2559 call (memory). */
2560 }
2561
2562 if (gimple_code (g: stmt) == GIMPLE_PHI
2563 && gimple_bb (g: stmt) == data->current_loop->header)
2564 {
2565 iv = get_iv (data, PHI_RESULT (stmt));
2566
2567 if (iv && !integer_zerop (iv->step))
2568 return;
2569 }
2570
2571 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2572 {
2573 op = USE_FROM_PTR (use_p);
2574
2575 if (TREE_CODE (op) != SSA_NAME)
2576 continue;
2577
2578 iv = get_iv (data, var: op);
2579 if (!iv)
2580 continue;
2581
2582 if (!find_address_like_use (data, stmt, op_p: use_p->use, iv))
2583 find_interesting_uses_op (data, op);
2584 }
2585}
2586
2587/* Finds interesting uses of induction variables outside of loops
2588 on loop exit edge EXIT. */
2589
2590static void
2591find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2592{
2593 gphi *phi;
2594 gphi_iterator psi;
2595 tree def;
2596
2597 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (i: psi); gsi_next (i: &psi))
2598 {
2599 phi = psi.phi ();
2600 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2601 if (!virtual_operand_p (op: def))
2602 find_interesting_uses_op (data, op: def);
2603 }
2604}
2605
2606/* Return TRUE if OFFSET is within the range of [base + offset] addressing
2607 mode for memory reference represented by USE. */
2608
2609static GTY (()) vec<rtx, va_gc> *addr_list;
2610
2611static bool
2612addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2613{
2614 rtx reg, addr;
2615 unsigned list_index;
2616 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2617 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2618
2619 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2620 if (list_index >= vec_safe_length (v: addr_list))
2621 vec_safe_grow_cleared (v&: addr_list, len: list_index + MAX_MACHINE_MODE, exact: true);
2622
2623 addr = (*addr_list)[list_index];
2624 if (!addr)
2625 {
2626 addr_mode = targetm.addr_space.address_mode (as);
2627 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2628 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2629 (*addr_list)[list_index] = addr;
2630 }
2631 else
2632 addr_mode = GET_MODE (addr);
2633
2634 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2635 return (memory_address_addr_space_p (mem_mode, addr, as));
2636}
2637
2638/* Comparison function to sort group in ascending order of addr_offset. */
2639
2640static int
2641group_compare_offset (const void *a, const void *b)
2642{
2643 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2644 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2645
2646 return compare_sizes_for_sort (a: (*u1)->addr_offset, b: (*u2)->addr_offset);
2647}
2648
2649/* Check if small groups should be split. Return true if no group
2650 contains more than two uses with distinct addr_offsets. Return
2651 false otherwise. We want to split such groups because:
2652
2653 1) Small groups don't have much benefit and may interfer with
2654 general candidate selection.
2655 2) Size for problem with only small groups is usually small and
2656 general algorithm can handle it well.
2657
2658 TODO -- Above claim may not hold when we want to merge memory
2659 accesses with conseuctive addresses. */
2660
2661static bool
2662split_small_address_groups_p (struct ivopts_data *data)
2663{
2664 unsigned int i, j, distinct = 1;
2665 struct iv_use *pre;
2666 struct iv_group *group;
2667
2668 for (i = 0; i < data->vgroups.length (); i++)
2669 {
2670 group = data->vgroups[i];
2671 if (group->vuses.length () == 1)
2672 continue;
2673
2674 gcc_assert (address_p (group->type));
2675 if (group->vuses.length () == 2)
2676 {
2677 if (compare_sizes_for_sort (a: group->vuses[0]->addr_offset,
2678 b: group->vuses[1]->addr_offset) > 0)
2679 std::swap (a&: group->vuses[0], b&: group->vuses[1]);
2680 }
2681 else
2682 group->vuses.qsort (group_compare_offset);
2683
2684 if (distinct > 2)
2685 continue;
2686
2687 distinct = 1;
2688 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2689 {
2690 if (maybe_ne (a: group->vuses[j]->addr_offset, b: pre->addr_offset))
2691 {
2692 pre = group->vuses[j];
2693 distinct++;
2694 }
2695
2696 if (distinct > 2)
2697 break;
2698 }
2699 }
2700
2701 return (distinct <= 2);
2702}
2703
2704/* For each group of address type uses, this function further groups
2705 these uses according to the maximum offset supported by target's
2706 [base + offset] addressing mode. */
2707
2708static void
2709split_address_groups (struct ivopts_data *data)
2710{
2711 unsigned int i, j;
2712 /* Always split group. */
2713 bool split_p = split_small_address_groups_p (data);
2714
2715 for (i = 0; i < data->vgroups.length (); i++)
2716 {
2717 struct iv_group *new_group = NULL;
2718 struct iv_group *group = data->vgroups[i];
2719 struct iv_use *use = group->vuses[0];
2720
2721 use->id = 0;
2722 use->group_id = group->id;
2723 if (group->vuses.length () == 1)
2724 continue;
2725
2726 gcc_assert (address_p (use->type));
2727
2728 for (j = 1; j < group->vuses.length ();)
2729 {
2730 struct iv_use *next = group->vuses[j];
2731 poly_int64 offset = next->addr_offset - use->addr_offset;
2732
2733 /* Split group if aksed to, or the offset against the first
2734 use can't fit in offset part of addressing mode. IV uses
2735 having the same offset are still kept in one group. */
2736 if (maybe_ne (a: offset, b: 0)
2737 && (split_p || !addr_offset_valid_p (use, offset)))
2738 {
2739 if (!new_group)
2740 new_group = record_group (data, type: group->type);
2741 group->vuses.ordered_remove (ix: j);
2742 new_group->vuses.safe_push (obj: next);
2743 continue;
2744 }
2745
2746 next->id = j;
2747 next->group_id = group->id;
2748 j++;
2749 }
2750 }
2751}
2752
2753/* Finds uses of the induction variables that are interesting. */
2754
2755static void
2756find_interesting_uses (struct ivopts_data *data, basic_block *body)
2757{
2758 basic_block bb;
2759 gimple_stmt_iterator bsi;
2760 unsigned i;
2761 edge e;
2762
2763 for (i = 0; i < data->current_loop->num_nodes; i++)
2764 {
2765 edge_iterator ei;
2766 bb = body[i];
2767
2768 FOR_EACH_EDGE (e, ei, bb->succs)
2769 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2770 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2771 find_interesting_uses_outside (data, exit: e);
2772
2773 for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2774 find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2775 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2776 if (!is_gimple_debug (gs: gsi_stmt (i: bsi)))
2777 find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2778 }
2779
2780 split_address_groups (data);
2781
2782 if (dump_file && (dump_flags & TDF_DETAILS))
2783 {
2784 fprintf (stream: dump_file, format: "\n<IV Groups>:\n");
2785 dump_groups (file: dump_file, data);
2786 fprintf (stream: dump_file, format: "\n");
2787 }
2788}
2789
2790/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2791 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2792 we are at the top-level of the processed address. */
2793
2794static tree
2795strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2796 poly_int64 *offset)
2797{
2798 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2799 enum tree_code code;
2800 tree type, orig_type = TREE_TYPE (expr);
2801 poly_int64 off0, off1;
2802 HOST_WIDE_INT st;
2803 tree orig_expr = expr;
2804
2805 STRIP_NOPS (expr);
2806
2807 type = TREE_TYPE (expr);
2808 code = TREE_CODE (expr);
2809 *offset = 0;
2810
2811 switch (code)
2812 {
2813 case POINTER_PLUS_EXPR:
2814 case PLUS_EXPR:
2815 case MINUS_EXPR:
2816 op0 = TREE_OPERAND (expr, 0);
2817 op1 = TREE_OPERAND (expr, 1);
2818
2819 op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2820 op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2821
2822 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2823 if (op0 == TREE_OPERAND (expr, 0)
2824 && op1 == TREE_OPERAND (expr, 1))
2825 return orig_expr;
2826
2827 if (integer_zerop (op1))
2828 expr = op0;
2829 else if (integer_zerop (op0))
2830 {
2831 if (code == MINUS_EXPR)
2832 {
2833 if (TYPE_OVERFLOW_UNDEFINED (type))
2834 {
2835 type = unsigned_type_for (type);
2836 op1 = fold_convert (type, op1);
2837 }
2838 expr = fold_build1 (NEGATE_EXPR, type, op1);
2839 }
2840 else
2841 expr = op1;
2842 }
2843 else
2844 {
2845 if (TYPE_OVERFLOW_UNDEFINED (type))
2846 {
2847 type = unsigned_type_for (type);
2848 if (code == POINTER_PLUS_EXPR)
2849 code = PLUS_EXPR;
2850 op0 = fold_convert (type, op0);
2851 op1 = fold_convert (type, op1);
2852 }
2853 expr = fold_build2 (code, type, op0, op1);
2854 }
2855
2856 return fold_convert (orig_type, expr);
2857
2858 case MULT_EXPR:
2859 op1 = TREE_OPERAND (expr, 1);
2860 if (!cst_and_fits_in_hwi (op1))
2861 return orig_expr;
2862
2863 op0 = TREE_OPERAND (expr, 0);
2864 op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2865 if (op0 == TREE_OPERAND (expr, 0))
2866 return orig_expr;
2867
2868 *offset = off0 * int_cst_value (op1);
2869 if (integer_zerop (op0))
2870 expr = op0;
2871 else
2872 {
2873 if (TYPE_OVERFLOW_UNDEFINED (type))
2874 {
2875 type = unsigned_type_for (type);
2876 op0 = fold_convert (type, op0);
2877 op1 = fold_convert (type, op1);
2878 }
2879 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2880 }
2881
2882 return fold_convert (orig_type, expr);
2883
2884 case ARRAY_REF:
2885 case ARRAY_RANGE_REF:
2886 if (!inside_addr)
2887 return orig_expr;
2888
2889 step = array_ref_element_size (expr);
2890 if (!cst_and_fits_in_hwi (step))
2891 break;
2892
2893 st = int_cst_value (step);
2894 op1 = TREE_OPERAND (expr, 1);
2895 op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2896 *offset = off1 * st;
2897
2898 if (top_compref
2899 && integer_zerop (op1))
2900 {
2901 /* Strip the component reference completely. */
2902 op0 = TREE_OPERAND (expr, 0);
2903 op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2904 *offset += off0;
2905 return op0;
2906 }
2907 break;
2908
2909 case COMPONENT_REF:
2910 {
2911 tree field;
2912
2913 if (!inside_addr)
2914 return orig_expr;
2915
2916 tmp = component_ref_field_offset (expr);
2917 field = TREE_OPERAND (expr, 1);
2918 if (top_compref
2919 && cst_and_fits_in_hwi (tmp)
2920 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2921 {
2922 HOST_WIDE_INT boffset, abs_off;
2923
2924 /* Strip the component reference completely. */
2925 op0 = TREE_OPERAND (expr, 0);
2926 op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2927 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2928 abs_off = abs_hwi (x: boffset) / BITS_PER_UNIT;
2929 if (boffset < 0)
2930 abs_off = -abs_off;
2931
2932 *offset = off0 + int_cst_value (tmp) + abs_off;
2933 return op0;
2934 }
2935 }
2936 break;
2937
2938 case ADDR_EXPR:
2939 op0 = TREE_OPERAND (expr, 0);
2940 op0 = strip_offset_1 (expr: op0, inside_addr: true, top_compref: true, offset: &off0);
2941 *offset += off0;
2942
2943 if (op0 == TREE_OPERAND (expr, 0))
2944 return orig_expr;
2945
2946 expr = build_fold_addr_expr (op0);
2947 return fold_convert (orig_type, expr);
2948
2949 case MEM_REF:
2950 /* ??? Offset operand? */
2951 inside_addr = false;
2952 break;
2953
2954 default:
2955 if (ptrdiff_tree_p (expr, offset) && maybe_ne (a: *offset, b: 0))
2956 return build_int_cst (orig_type, 0);
2957 return orig_expr;
2958 }
2959
2960 /* Default handling of expressions for that we want to recurse into
2961 the first operand. */
2962 op0 = TREE_OPERAND (expr, 0);
2963 op0 = strip_offset_1 (expr: op0, inside_addr, top_compref: false, offset: &off0);
2964 *offset += off0;
2965
2966 if (op0 == TREE_OPERAND (expr, 0)
2967 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2968 return orig_expr;
2969
2970 expr = copy_node (expr);
2971 TREE_OPERAND (expr, 0) = op0;
2972 if (op1)
2973 TREE_OPERAND (expr, 1) = op1;
2974
2975 /* Inside address, we might strip the top level component references,
2976 thus changing type of the expression. Handling of ADDR_EXPR
2977 will fix that. */
2978 expr = fold_convert (orig_type, expr);
2979
2980 return expr;
2981}
2982
2983/* Strips constant offsets from EXPR and stores them to OFFSET. */
2984
2985static tree
2986strip_offset (tree expr, poly_uint64 *offset)
2987{
2988 poly_int64 off;
2989 tree core = strip_offset_1 (expr, inside_addr: false, top_compref: false, offset: &off);
2990 *offset = off;
2991 return core;
2992}
2993
2994/* Returns variant of TYPE that can be used as base for different uses.
2995 We return unsigned type with the same precision, which avoids problems
2996 with overflows. */
2997
2998static tree
2999generic_type_for (tree type)
3000{
3001 if (POINTER_TYPE_P (type))
3002 return unsigned_type_for (type);
3003
3004 if (TYPE_UNSIGNED (type))
3005 return type;
3006
3007 return unsigned_type_for (type);
3008}
3009
3010/* Private data for walk_tree. */
3011
3012struct walk_tree_data
3013{
3014 bitmap *inv_vars;
3015 struct ivopts_data *idata;
3016};
3017
3018/* Callback function for walk_tree, it records invariants and symbol
3019 reference in *EXPR_P. DATA is the structure storing result info. */
3020
3021static tree
3022find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
3023{
3024 tree op = *expr_p;
3025 struct version_info *info;
3026 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
3027
3028 if (TREE_CODE (op) != SSA_NAME)
3029 return NULL_TREE;
3030
3031 info = name_info (data: wdata->idata, name: op);
3032 /* Because we expand simple operations when finding IVs, loop invariant
3033 variable that isn't referred by the original loop could be used now.
3034 Record such invariant variables here. */
3035 if (!info->iv)
3036 {
3037 struct ivopts_data *idata = wdata->idata;
3038 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3039
3040 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3041 {
3042 tree steptype = TREE_TYPE (op);
3043 if (POINTER_TYPE_P (steptype))
3044 steptype = sizetype;
3045 set_iv (data: idata, iv: op, base: op, step: build_int_cst (steptype, 0), no_overflow: true);
3046 record_invariant (data: idata, op, nonlinear_use: false);
3047 }
3048 }
3049 if (!info->inv_id || info->has_nonlin_use)
3050 return NULL_TREE;
3051
3052 if (!*wdata->inv_vars)
3053 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3054 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3055
3056 return NULL_TREE;
3057}
3058
3059/* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3060 store it. */
3061
3062static inline void
3063find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3064{
3065 struct walk_tree_data wdata;
3066
3067 if (!inv_vars)
3068 return;
3069
3070 wdata.idata = data;
3071 wdata.inv_vars = inv_vars;
3072 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3073}
3074
3075/* Get entry from invariant expr hash table for INV_EXPR. New entry
3076 will be recorded if it doesn't exist yet. Given below two exprs:
3077 inv_expr + cst1, inv_expr + cst2
3078 It's hard to make decision whether constant part should be stripped
3079 or not. We choose to not strip based on below facts:
3080 1) We need to count ADD cost for constant part if it's stripped,
3081 which isn't always trivial where this functions is called.
3082 2) Stripping constant away may be conflict with following loop
3083 invariant hoisting pass.
3084 3) Not stripping constant away results in more invariant exprs,
3085 which usually leads to decision preferring lower reg pressure. */
3086
3087static iv_inv_expr_ent *
3088get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3089{
3090 STRIP_NOPS (inv_expr);
3091
3092 if (poly_int_tree_p (t: inv_expr)
3093 || TREE_CODE (inv_expr) == SSA_NAME)
3094 return NULL;
3095
3096 /* Don't strip constant part away as we used to. */
3097
3098 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3099 struct iv_inv_expr_ent ent;
3100 ent.expr = inv_expr;
3101 ent.hash = iterative_hash_expr (tree: inv_expr, seed: 0);
3102 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (value: &ent, insert: INSERT);
3103
3104 if (!*slot)
3105 {
3106 *slot = XNEW (struct iv_inv_expr_ent);
3107 (*slot)->expr = inv_expr;
3108 (*slot)->hash = ent.hash;
3109 (*slot)->id = ++data->max_inv_expr_id;
3110 }
3111
3112 return *slot;
3113}
3114
3115
3116/* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3117 unsuitable as ivopts candidates for potentially involving undefined
3118 behavior. */
3119
3120static tree
3121find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3122{
3123 basic_block bb = (basic_block) bb_;
3124 if (TREE_CODE (*tp) == SSA_NAME
3125 && ssa_name_maybe_undef_p (var: *tp)
3126 && !ssa_name_any_use_dominates_bb_p (var: *tp, bb))
3127 return *tp;
3128 if (!EXPR_P (*tp))
3129 *walk_subtrees = 0;
3130 return NULL;
3131}
3132
3133/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3134 position to POS. If USE is not NULL, the candidate is set as related to
3135 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3136 replacement of the final value of the iv by a direct computation. */
3137
3138static struct iv_cand *
3139add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3140 enum iv_position pos, struct iv_use *use,
3141 gimple *incremented_at, struct iv *orig_iv = NULL,
3142 bool doloop = false)
3143{
3144 unsigned i;
3145 struct iv_cand *cand = NULL;
3146 tree type, orig_type;
3147
3148 gcc_assert (base && step);
3149
3150 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3151 live, but the ivopts code may replace a real pointer with one
3152 pointing before or after the memory block that is then adjusted
3153 into the memory block during the loop. FIXME: It would likely be
3154 better to actually force the pointer live and still use ivopts;
3155 for example, it would be enough to write the pointer into memory
3156 and keep it there until after the loop. */
3157 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3158 return NULL;
3159
3160 /* If BASE contains undefined SSA names make sure we only record
3161 the original IV. */
3162 bool involves_undefs = false;
3163 if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3164 {
3165 if (pos != IP_ORIGINAL)
3166 return NULL;
3167 important = false;
3168 involves_undefs = true;
3169 }
3170
3171 /* For non-original variables, make sure their values are computed in a type
3172 that does not invoke undefined behavior on overflows (since in general,
3173 we cannot prove that these induction variables are non-wrapping). */
3174 if (pos != IP_ORIGINAL)
3175 {
3176 orig_type = TREE_TYPE (base);
3177 type = generic_type_for (type: orig_type);
3178 if (type != orig_type)
3179 {
3180 base = fold_convert (type, base);
3181 step = fold_convert (type, step);
3182 }
3183 }
3184
3185 for (i = 0; i < data->vcands.length (); i++)
3186 {
3187 cand = data->vcands[i];
3188
3189 if (cand->pos != pos)
3190 continue;
3191
3192 if (cand->incremented_at != incremented_at
3193 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3194 && cand->ainc_use != use))
3195 continue;
3196
3197 if (operand_equal_p (base, cand->iv->base, flags: 0)
3198 && operand_equal_p (step, cand->iv->step, flags: 0)
3199 && (TYPE_PRECISION (TREE_TYPE (base))
3200 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3201 break;
3202 }
3203
3204 if (i == data->vcands.length ())
3205 {
3206 cand = XCNEW (struct iv_cand);
3207 cand->id = i;
3208 cand->iv = alloc_iv (data, base, step);
3209 cand->pos = pos;
3210 if (pos != IP_ORIGINAL)
3211 {
3212 if (doloop)
3213 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3214 else
3215 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3216 cand->var_after = cand->var_before;
3217 }
3218 cand->important = important;
3219 cand->involves_undefs = involves_undefs;
3220 cand->incremented_at = incremented_at;
3221 cand->doloop_p = doloop;
3222 data->vcands.safe_push (obj: cand);
3223
3224 if (!poly_int_tree_p (t: step))
3225 {
3226 find_inv_vars (data, expr_p: &step, inv_vars: &cand->inv_vars);
3227
3228 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, inv_expr: step);
3229 /* Share bitmap between inv_vars and inv_exprs for cand. */
3230 if (inv_expr != NULL)
3231 {
3232 cand->inv_exprs = cand->inv_vars;
3233 cand->inv_vars = NULL;
3234 if (cand->inv_exprs)
3235 bitmap_clear (cand->inv_exprs);
3236 else
3237 cand->inv_exprs = BITMAP_ALLOC (NULL);
3238
3239 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3240 }
3241 }
3242
3243 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3244 cand->ainc_use = use;
3245 else
3246 cand->ainc_use = NULL;
3247
3248 cand->orig_iv = orig_iv;
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 dump_cand (file: dump_file, cand);
3251 }
3252
3253 cand->important |= important;
3254 cand->doloop_p |= doloop;
3255
3256 /* Relate candidate to the group for which it is added. */
3257 if (use)
3258 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3259
3260 return cand;
3261}
3262
3263/* Returns true if incrementing the induction variable at the end of the LOOP
3264 is allowed.
3265
3266 The purpose is to avoid splitting latch edge with a biv increment, thus
3267 creating a jump, possibly confusing other optimization passes and leaving
3268 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3269 available (so we do not have a better alternative), or if the latch edge
3270 is already nonempty. */
3271
3272static bool
3273allow_ip_end_pos_p (class loop *loop)
3274{
3275 if (!ip_normal_pos (loop))
3276 return true;
3277
3278 if (!empty_block_p (ip_end_pos (loop)))
3279 return true;
3280
3281 return false;
3282}
3283
3284/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3285 Important field is set to IMPORTANT. */
3286
3287static void
3288add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3289 bool important, struct iv_use *use)
3290{
3291 basic_block use_bb = gimple_bb (g: use->stmt);
3292 machine_mode mem_mode;
3293 unsigned HOST_WIDE_INT cstepi;
3294
3295 /* If we insert the increment in any position other than the standard
3296 ones, we must ensure that it is incremented once per iteration.
3297 It must not be in an inner nested loop, or one side of an if
3298 statement. */
3299 if (use_bb->loop_father != data->current_loop
3300 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3301 || stmt_can_throw_internal (cfun, use->stmt)
3302 || !cst_and_fits_in_hwi (step))
3303 return;
3304
3305 cstepi = int_cst_value (step);
3306
3307 mem_mode = TYPE_MODE (use->mem_type);
3308 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3309 || USE_STORE_PRE_INCREMENT (mem_mode))
3310 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3311 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3312 || USE_STORE_PRE_DECREMENT (mem_mode))
3313 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3314 {
3315 enum tree_code code = MINUS_EXPR;
3316 tree new_base;
3317 tree new_step = step;
3318
3319 if (POINTER_TYPE_P (TREE_TYPE (base)))
3320 {
3321 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3322 code = POINTER_PLUS_EXPR;
3323 }
3324 else
3325 new_step = fold_convert (TREE_TYPE (base), new_step);
3326 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3327 add_candidate_1 (data, base: new_base, step, important, pos: IP_BEFORE_USE, use,
3328 incremented_at: use->stmt);
3329 }
3330 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3331 || USE_STORE_POST_INCREMENT (mem_mode))
3332 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3333 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3334 || USE_STORE_POST_DECREMENT (mem_mode))
3335 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3336 {
3337 add_candidate_1 (data, base, step, important, pos: IP_AFTER_USE, use,
3338 incremented_at: use->stmt);
3339 }
3340}
3341
3342/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3343 position to POS. If USE is not NULL, the candidate is set as related to
3344 it. The candidate computation is scheduled before exit condition and at
3345 the end of loop. */
3346
3347static void
3348add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3349 struct iv_use *use, struct iv *orig_iv = NULL,
3350 bool doloop = false)
3351{
3352 if (ip_normal_pos (data->current_loop))
3353 add_candidate_1 (data, base, step, important, pos: IP_NORMAL, use, NULL, orig_iv,
3354 doloop);
3355 /* Exclude doloop candidate here since it requires decrement then comparison
3356 and jump, the IP_END position doesn't match. */
3357 if (!doloop && ip_end_pos (data->current_loop)
3358 && allow_ip_end_pos_p (loop: data->current_loop))
3359 add_candidate_1 (data, base, step, important, pos: IP_END, use, NULL, orig_iv);
3360}
3361
3362/* Adds standard iv candidates. */
3363
3364static void
3365add_standard_iv_candidates (struct ivopts_data *data)
3366{
3367 add_candidate (data, integer_zero_node, integer_one_node, important: true, NULL);
3368
3369 /* The same for a double-integer type if it is still fast enough. */
3370 if (TYPE_PRECISION
3371 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3372 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3373 add_candidate (data, base: build_int_cst (long_integer_type_node, 0),
3374 step: build_int_cst (long_integer_type_node, 1), important: true, NULL);
3375
3376 /* The same for a double-integer type if it is still fast enough. */
3377 if (TYPE_PRECISION
3378 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3379 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3380 add_candidate (data, base: build_int_cst (long_long_integer_type_node, 0),
3381 step: build_int_cst (long_long_integer_type_node, 1), important: true, NULL);
3382}
3383
3384
3385/* Adds candidates bases on the old induction variable IV. */
3386
3387static void
3388add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3389{
3390 gimple *phi;
3391 tree def;
3392 struct iv_cand *cand;
3393
3394 /* Check if this biv is used in address type use. */
3395 if (iv->no_overflow && iv->have_address_use
3396 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3397 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3398 {
3399 tree base = fold_convert (sizetype, iv->base);
3400 tree step = fold_convert (sizetype, iv->step);
3401
3402 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3403 add_candidate (data, base, step, important: true, NULL, orig_iv: iv);
3404 /* Add iv cand of the original type only if it has nonlinear use. */
3405 if (iv->nonlin_use)
3406 add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3407 }
3408 else
3409 add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3410
3411 /* The same, but with initial value zero. */
3412 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3413 add_candidate (data, size_int (0), step: iv->step, important: true, NULL);
3414 else
3415 add_candidate (data, base: build_int_cst (TREE_TYPE (iv->base), 0),
3416 step: iv->step, important: true, NULL);
3417
3418 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3419 if (gimple_code (g: phi) == GIMPLE_PHI)
3420 {
3421 /* Additionally record the possibility of leaving the original iv
3422 untouched. */
3423 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3424 /* Don't add candidate if it's from another PHI node because
3425 it's an affine iv appearing in the form of PEELED_CHREC. */
3426 phi = SSA_NAME_DEF_STMT (def);
3427 if (gimple_code (g: phi) != GIMPLE_PHI)
3428 {
3429 cand = add_candidate_1 (data,
3430 base: iv->base, step: iv->step, important: true, pos: IP_ORIGINAL, NULL,
3431 SSA_NAME_DEF_STMT (def));
3432 if (cand)
3433 {
3434 cand->var_before = iv->ssa_name;
3435 cand->var_after = def;
3436 }
3437 }
3438 else
3439 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3440 }
3441}
3442
3443/* Adds candidates based on the old induction variables. */
3444
3445static void
3446add_iv_candidate_for_bivs (struct ivopts_data *data)
3447{
3448 unsigned i;
3449 struct iv *iv;
3450 bitmap_iterator bi;
3451
3452 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3453 {
3454 iv = ver_info (data, ver: i)->iv;
3455 if (iv && iv->biv_p && !integer_zerop (iv->step))
3456 add_iv_candidate_for_biv (data, iv);
3457 }
3458}
3459
3460/* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3461
3462static void
3463record_common_cand (struct ivopts_data *data, tree base,
3464 tree step, struct iv_use *use)
3465{
3466 class iv_common_cand ent;
3467 class iv_common_cand **slot;
3468
3469 ent.base = base;
3470 ent.step = step;
3471 ent.hash = iterative_hash_expr (tree: base, seed: 0);
3472 ent.hash = iterative_hash_expr (tree: step, seed: ent.hash);
3473
3474 slot = data->iv_common_cand_tab->find_slot (value: &ent, insert: INSERT);
3475 if (*slot == NULL)
3476 {
3477 *slot = new iv_common_cand ();
3478 (*slot)->base = base;
3479 (*slot)->step = step;
3480 (*slot)->uses.create (nelems: 8);
3481 (*slot)->hash = ent.hash;
3482 data->iv_common_cands.safe_push (obj: (*slot));
3483 }
3484
3485 gcc_assert (use != NULL);
3486 (*slot)->uses.safe_push (obj: use);
3487 return;
3488}
3489
3490/* Comparison function used to sort common candidates. */
3491
3492static int
3493common_cand_cmp (const void *p1, const void *p2)
3494{
3495 unsigned n1, n2;
3496 const class iv_common_cand *const *const ccand1
3497 = (const class iv_common_cand *const *)p1;
3498 const class iv_common_cand *const *const ccand2
3499 = (const class iv_common_cand *const *)p2;
3500
3501 n1 = (*ccand1)->uses.length ();
3502 n2 = (*ccand2)->uses.length ();
3503 return n2 - n1;
3504}
3505
3506/* Adds IV candidates based on common candidated recorded. */
3507
3508static void
3509add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3510{
3511 unsigned i, j;
3512 struct iv_cand *cand_1, *cand_2;
3513
3514 data->iv_common_cands.qsort (common_cand_cmp);
3515 for (i = 0; i < data->iv_common_cands.length (); i++)
3516 {
3517 class iv_common_cand *ptr = data->iv_common_cands[i];
3518
3519 /* Only add IV candidate if it's derived from multiple uses. */
3520 if (ptr->uses.length () <= 1)
3521 break;
3522
3523 cand_1 = NULL;
3524 cand_2 = NULL;
3525 if (ip_normal_pos (data->current_loop))
3526 cand_1 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3527 important: false, pos: IP_NORMAL, NULL, NULL);
3528
3529 if (ip_end_pos (data->current_loop)
3530 && allow_ip_end_pos_p (loop: data->current_loop))
3531 cand_2 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3532 important: false, pos: IP_END, NULL, NULL);
3533
3534 /* Bind deriving uses and the new candidates. */
3535 for (j = 0; j < ptr->uses.length (); j++)
3536 {
3537 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3538 if (cand_1)
3539 bitmap_set_bit (group->related_cands, cand_1->id);
3540 if (cand_2)
3541 bitmap_set_bit (group->related_cands, cand_2->id);
3542 }
3543 }
3544
3545 /* Release data since it is useless from this point. */
3546 data->iv_common_cand_tab->empty ();
3547 data->iv_common_cands.truncate (size: 0);
3548}
3549
3550/* Adds candidates based on the value of USE's iv. */
3551
3552static void
3553add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3554{
3555 poly_uint64 offset;
3556 tree base;
3557 struct iv *iv = use->iv;
3558 tree basetype = TREE_TYPE (iv->base);
3559
3560 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3561 precision types, instead, add candidate for the corresponding scev in
3562 unsigned type with the same precision. See PR93674 for more info. */
3563 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3564 || !type_has_mode_precision_p (t: basetype))
3565 {
3566 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3567 TYPE_UNSIGNED (basetype));
3568 add_candidate (data, fold_convert (basetype, iv->base),
3569 fold_convert (basetype, iv->step), important: false, NULL);
3570 return;
3571 }
3572
3573 add_candidate (data, base: iv->base, step: iv->step, important: false, use);
3574
3575 /* Record common candidate for use in case it can be shared by others. */
3576 record_common_cand (data, base: iv->base, step: iv->step, use);
3577
3578 /* Record common candidate with initial value zero. */
3579 basetype = TREE_TYPE (iv->base);
3580 if (POINTER_TYPE_P (basetype))
3581 basetype = sizetype;
3582 record_common_cand (data, base: build_int_cst (basetype, 0), step: iv->step, use);
3583
3584 /* Compare the cost of an address with an unscaled index with the cost of
3585 an address with a scaled index and add candidate if useful. */
3586 poly_int64 step;
3587 if (use != NULL
3588 && poly_int_tree_p (t: iv->step, value: &step)
3589 && address_p (type: use->type))
3590 {
3591 poly_int64 new_step;
3592 unsigned int fact = preferred_mem_scale_factor
3593 (base: use->iv->base,
3594 TYPE_MODE (use->mem_type),
3595 speed: optimize_loop_for_speed_p (data->current_loop));
3596
3597 if (fact != 1
3598 && multiple_p (a: step, b: fact, multiple: &new_step))
3599 add_candidate (data, size_int (0),
3600 step: wide_int_to_tree (sizetype, cst: new_step),
3601 important: true, NULL);
3602 }
3603
3604 /* Record common candidate with constant offset stripped in base.
3605 Like the use itself, we also add candidate directly for it. */
3606 base = strip_offset (expr: iv->base, offset: &offset);
3607 if (maybe_ne (a: offset, b: 0U) || base != iv->base)
3608 {
3609 record_common_cand (data, base, step: iv->step, use);
3610 add_candidate (data, base, step: iv->step, important: false, use);
3611 }
3612
3613 /* Record common candidate with base_object removed in base. */
3614 base = iv->base;
3615 STRIP_NOPS (base);
3616 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3617 {
3618 tree step = iv->step;
3619
3620 STRIP_NOPS (step);
3621 base = TREE_OPERAND (base, 1);
3622 step = fold_convert (sizetype, step);
3623 record_common_cand (data, base, step, use);
3624 /* Also record common candidate with offset stripped. */
3625 tree alt_base, alt_offset;
3626 split_constant_offset (base, &alt_base, &alt_offset);
3627 if (!integer_zerop (alt_offset))
3628 record_common_cand (data, base: alt_base, step, use);
3629 }
3630
3631 /* At last, add auto-incremental candidates. Make such variables
3632 important since other iv uses with same base object may be based
3633 on it. */
3634 if (use != NULL && address_p (type: use->type))
3635 add_autoinc_candidates (data, base: iv->base, step: iv->step, important: true, use);
3636}
3637
3638/* Adds candidates based on the uses. */
3639
3640static void
3641add_iv_candidate_for_groups (struct ivopts_data *data)
3642{
3643 unsigned i;
3644
3645 /* Only add candidate for the first use in group. */
3646 for (i = 0; i < data->vgroups.length (); i++)
3647 {
3648 struct iv_group *group = data->vgroups[i];
3649
3650 gcc_assert (group->vuses[0] != NULL);
3651 add_iv_candidate_for_use (data, use: group->vuses[0]);
3652 }
3653 add_iv_candidate_derived_from_uses (data);
3654}
3655
3656/* Record important candidates and add them to related_cands bitmaps. */
3657
3658static void
3659record_important_candidates (struct ivopts_data *data)
3660{
3661 unsigned i;
3662 struct iv_group *group;
3663
3664 for (i = 0; i < data->vcands.length (); i++)
3665 {
3666 struct iv_cand *cand = data->vcands[i];
3667
3668 if (cand->important)
3669 bitmap_set_bit (data->important_candidates, i);
3670 }
3671
3672 data->consider_all_candidates = (data->vcands.length ()
3673 <= CONSIDER_ALL_CANDIDATES_BOUND);
3674
3675 /* Add important candidates to groups' related_cands bitmaps. */
3676 for (i = 0; i < data->vgroups.length (); i++)
3677 {
3678 group = data->vgroups[i];
3679 bitmap_ior_into (group->related_cands, data->important_candidates);
3680 }
3681}
3682
3683/* Allocates the data structure mapping the (use, candidate) pairs to costs.
3684 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3685 we allocate a simple list to every use. */
3686
3687static void
3688alloc_use_cost_map (struct ivopts_data *data)
3689{
3690 unsigned i, size, s;
3691
3692 for (i = 0; i < data->vgroups.length (); i++)
3693 {
3694 struct iv_group *group = data->vgroups[i];
3695
3696 if (data->consider_all_candidates)
3697 size = data->vcands.length ();
3698 else
3699 {
3700 s = bitmap_count_bits (group->related_cands);
3701
3702 /* Round up to the power of two, so that moduling by it is fast. */
3703 size = s ? (1 << ceil_log2 (x: s)) : 1;
3704 }
3705
3706 group->n_map_members = size;
3707 group->cost_map = XCNEWVEC (class cost_pair, size);
3708 }
3709}
3710
3711/* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3712 on invariants INV_VARS and that the value used in expressing it is
3713 VALUE, and in case of iv elimination the comparison operator is COMP. */
3714
3715static void
3716set_group_iv_cost (struct ivopts_data *data,
3717 struct iv_group *group, struct iv_cand *cand,
3718 comp_cost cost, bitmap inv_vars, tree value,
3719 enum tree_code comp, bitmap inv_exprs)
3720{
3721 unsigned i, s;
3722
3723 if (cost.infinite_cost_p ())
3724 {
3725 BITMAP_FREE (inv_vars);
3726 BITMAP_FREE (inv_exprs);
3727 return;
3728 }
3729
3730 if (data->consider_all_candidates)
3731 {
3732 group->cost_map[cand->id].cand = cand;
3733 group->cost_map[cand->id].cost = cost;
3734 group->cost_map[cand->id].inv_vars = inv_vars;
3735 group->cost_map[cand->id].inv_exprs = inv_exprs;
3736 group->cost_map[cand->id].value = value;
3737 group->cost_map[cand->id].comp = comp;
3738 return;
3739 }
3740
3741 /* n_map_members is a power of two, so this computes modulo. */
3742 s = cand->id & (group->n_map_members - 1);
3743 for (i = s; i < group->n_map_members; i++)
3744 if (!group->cost_map[i].cand)
3745 goto found;
3746 for (i = 0; i < s; i++)
3747 if (!group->cost_map[i].cand)
3748 goto found;
3749
3750 gcc_unreachable ();
3751
3752found:
3753 group->cost_map[i].cand = cand;
3754 group->cost_map[i].cost = cost;
3755 group->cost_map[i].inv_vars = inv_vars;
3756 group->cost_map[i].inv_exprs = inv_exprs;
3757 group->cost_map[i].value = value;
3758 group->cost_map[i].comp = comp;
3759}
3760
3761/* Gets cost of (GROUP, CAND) pair. */
3762
3763static class cost_pair *
3764get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3765 struct iv_cand *cand)
3766{
3767 unsigned i, s;
3768 class cost_pair *ret;
3769
3770 if (!cand)
3771 return NULL;
3772
3773 if (data->consider_all_candidates)
3774 {
3775 ret = group->cost_map + cand->id;
3776 if (!ret->cand)
3777 return NULL;
3778
3779 return ret;
3780 }
3781
3782 /* n_map_members is a power of two, so this computes modulo. */
3783 s = cand->id & (group->n_map_members - 1);
3784 for (i = s; i < group->n_map_members; i++)
3785 if (group->cost_map[i].cand == cand)
3786 return group->cost_map + i;
3787 else if (group->cost_map[i].cand == NULL)
3788 return NULL;
3789 for (i = 0; i < s; i++)
3790 if (group->cost_map[i].cand == cand)
3791 return group->cost_map + i;
3792 else if (group->cost_map[i].cand == NULL)
3793 return NULL;
3794
3795 return NULL;
3796}
3797
3798/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3799static rtx
3800produce_memory_decl_rtl (tree obj, int *regno)
3801{
3802 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3803 machine_mode address_mode = targetm.addr_space.address_mode (as);
3804 rtx x;
3805
3806 gcc_assert (obj);
3807 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3808 {
3809 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3810 x = gen_rtx_SYMBOL_REF (address_mode, name);
3811 SET_SYMBOL_REF_DECL (x, obj);
3812 x = gen_rtx_MEM (DECL_MODE (obj), x);
3813 set_mem_addr_space (x, as);
3814 targetm.encode_section_info (obj, x, true);
3815 }
3816 else
3817 {
3818 x = gen_raw_REG (address_mode, (*regno)++);
3819 x = gen_rtx_MEM (DECL_MODE (obj), x);
3820 set_mem_addr_space (x, as);
3821 }
3822
3823 return x;
3824}
3825
3826/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3827 walk_tree. DATA contains the actual fake register number. */
3828
3829static tree
3830prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3831{
3832 tree obj = NULL_TREE;
3833 rtx x = NULL_RTX;
3834 int *regno = (int *) data;
3835
3836 switch (TREE_CODE (*expr_p))
3837 {
3838 case ADDR_EXPR:
3839 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3840 handled_component_p (t: *expr_p);
3841 expr_p = &TREE_OPERAND (*expr_p, 0))
3842 continue;
3843 obj = *expr_p;
3844 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3845 x = produce_memory_decl_rtl (obj, regno);
3846 break;
3847
3848 case SSA_NAME:
3849 *ws = 0;
3850 obj = SSA_NAME_VAR (*expr_p);
3851 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3852 if (!obj)
3853 return NULL_TREE;
3854 if (!DECL_RTL_SET_P (obj))
3855 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3856 break;
3857
3858 case VAR_DECL:
3859 case PARM_DECL:
3860 case RESULT_DECL:
3861 *ws = 0;
3862 obj = *expr_p;
3863
3864 if (DECL_RTL_SET_P (obj))
3865 break;
3866
3867 if (DECL_MODE (obj) == BLKmode)
3868 x = produce_memory_decl_rtl (obj, regno);
3869 else
3870 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3871
3872 break;
3873
3874 default:
3875 break;
3876 }
3877
3878 if (x)
3879 {
3880 decl_rtl_to_reset.safe_push (obj);
3881 SET_DECL_RTL (obj, x);
3882 }
3883
3884 return NULL_TREE;
3885}
3886
3887/* Predict whether the given loop will be transformed in the RTL
3888 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3889 This is only for target independent checks, see targetm.predict_doloop_p
3890 for the target dependent ones.
3891
3892 Note that according to some initial investigation, some checks like costly
3893 niter check and invalid stmt scanning don't have much gains among general
3894 cases, so keep this as simple as possible first.
3895
3896 Some RTL specific checks seems unable to be checked in gimple, if any new
3897 checks or easy checks _are_ missing here, please add them. */
3898
3899static bool
3900generic_predict_doloop_p (struct ivopts_data *data)
3901{
3902 class loop *loop = data->current_loop;
3903
3904 /* Call target hook for target dependent checks. */
3905 if (!targetm.predict_doloop_p (loop))
3906 {
3907 if (dump_file && (dump_flags & TDF_DETAILS))
3908 fprintf (stream: dump_file, format: "Predict doloop failure due to"
3909 " target specific checks.\n");
3910 return false;
3911 }
3912
3913 /* Similar to doloop_optimize, check iteration description to know it's
3914 suitable or not. Keep it as simple as possible, feel free to extend it
3915 if you find any multiple exits cases matter. */
3916 edge exit = single_dom_exit (loop);
3917 class tree_niter_desc *niter_desc;
3918 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3919 {
3920 if (dump_file && (dump_flags & TDF_DETAILS))
3921 fprintf (stream: dump_file, format: "Predict doloop failure due to"
3922 " unexpected niters.\n");
3923 return false;
3924 }
3925
3926 /* Similar to doloop_optimize, check whether iteration count too small
3927 and not profitable. */
3928 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3929 if (est_niter == -1)
3930 est_niter = get_likely_max_loop_iterations_int (loop);
3931 if (est_niter >= 0 && est_niter < 3)
3932 {
3933 if (dump_file && (dump_flags & TDF_DETAILS))
3934 fprintf (stream: dump_file,
3935 format: "Predict doloop failure due to"
3936 " too few iterations (%u).\n",
3937 (unsigned int) est_niter);
3938 return false;
3939 }
3940
3941 return true;
3942}
3943
3944/* Determines cost of the computation of EXPR. */
3945
3946static unsigned
3947computation_cost (tree expr, bool speed)
3948{
3949 rtx_insn *seq;
3950 rtx rslt;
3951 tree type = TREE_TYPE (expr);
3952 unsigned cost;
3953 /* Avoid using hard regs in ways which may be unsupported. */
3954 int regno = LAST_VIRTUAL_REGISTER + 1;
3955 struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
3956 enum node_frequency real_frequency = node->frequency;
3957
3958 node->frequency = NODE_FREQUENCY_NORMAL;
3959 crtl->maybe_hot_insn_p = speed;
3960 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3961 start_sequence ();
3962 rslt = expand_expr (exp: expr, NULL_RTX, TYPE_MODE (type), modifier: EXPAND_NORMAL);
3963 seq = get_insns ();
3964 end_sequence ();
3965 default_rtl_profile ();
3966 node->frequency = real_frequency;
3967
3968 cost = seq_cost (seq, speed);
3969 if (MEM_P (rslt))
3970 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3971 TYPE_ADDR_SPACE (type), speed);
3972 else if (!REG_P (rslt))
3973 cost += set_src_cost (x: rslt, TYPE_MODE (type), speed_p: speed);
3974
3975 return cost;
3976}
3977
3978/* Returns variable containing the value of candidate CAND at statement AT. */
3979
3980static tree
3981var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3982{
3983 if (stmt_after_increment (loop, cand, stmt))
3984 return cand->var_after;
3985 else
3986 return cand->var_before;
3987}
3988
3989/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3990 same precision that is at least as wide as the precision of TYPE, stores
3991 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3992 type of A and B. */
3993
3994static tree
3995determine_common_wider_type (tree *a, tree *b)
3996{
3997 tree wider_type = NULL;
3998 tree suba, subb;
3999 tree atype = TREE_TYPE (*a);
4000
4001 if (CONVERT_EXPR_P (*a))
4002 {
4003 suba = TREE_OPERAND (*a, 0);
4004 wider_type = TREE_TYPE (suba);
4005 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
4006 return atype;
4007 }
4008 else
4009 return atype;
4010
4011 if (CONVERT_EXPR_P (*b))
4012 {
4013 subb = TREE_OPERAND (*b, 0);
4014 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
4015 return atype;
4016 }
4017 else
4018 return atype;
4019
4020 *a = suba;
4021 *b = subb;
4022 return wider_type;
4023}
4024
4025/* Determines the expression by that USE is expressed from induction variable
4026 CAND at statement AT in LOOP. The expression is stored in two parts in a
4027 decomposed form. The invariant part is stored in AFF_INV; while variant
4028 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
4029 non-null. Returns false if USE cannot be expressed using CAND. */
4030
4031static bool
4032get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
4033 struct iv_cand *cand, class aff_tree *aff_inv,
4034 class aff_tree *aff_var, widest_int *prat = NULL)
4035{
4036 tree ubase = use->iv->base, ustep = use->iv->step;
4037 tree cbase = cand->iv->base, cstep = cand->iv->step;
4038 tree common_type, uutype, var, cstep_common;
4039 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4040 aff_tree aff_cbase;
4041 widest_int rat;
4042
4043 /* We must have a precision to express the values of use. */
4044 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4045 return false;
4046
4047 var = var_at_stmt (loop, cand, stmt: at);
4048 uutype = unsigned_type_for (utype);
4049
4050 /* If the conversion is not noop, perform it. */
4051 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4052 {
4053 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4054 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (t: cstep)))
4055 {
4056 tree inner_base, inner_step, inner_type;
4057 inner_base = TREE_OPERAND (cbase, 0);
4058 if (CONVERT_EXPR_P (cstep))
4059 inner_step = TREE_OPERAND (cstep, 0);
4060 else
4061 inner_step = cstep;
4062
4063 inner_type = TREE_TYPE (inner_base);
4064 /* If candidate is added from a biv whose type is smaller than
4065 ctype, we know both candidate and the biv won't overflow.
4066 In this case, it's safe to skip the convertion in candidate.
4067 As an example, (unsigned short)((unsigned long)A) equals to
4068 (unsigned short)A, if A has a type no larger than short. */
4069 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4070 {
4071 cbase = inner_base;
4072 cstep = inner_step;
4073 }
4074 }
4075 cbase = fold_convert (uutype, cbase);
4076 cstep = fold_convert (uutype, cstep);
4077 var = fold_convert (uutype, var);
4078 }
4079
4080 /* Ratio is 1 when computing the value of biv cand by itself.
4081 We can't rely on constant_multiple_of in this case because the
4082 use is created after the original biv is selected. The call
4083 could fail because of inconsistent fold behavior. See PR68021
4084 for more information. */
4085 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4086 {
4087 gcc_assert (is_gimple_assign (use->stmt));
4088 gcc_assert (use->iv->ssa_name == cand->var_after);
4089 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4090 rat = 1;
4091 }
4092 else if (!constant_multiple_of (top: ustep, bot: cstep, mul: &rat))
4093 return false;
4094
4095 if (prat)
4096 *prat = rat;
4097
4098 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4099 type, we achieve better folding by computing their difference in this
4100 wider type, and cast the result to UUTYPE. We do not need to worry about
4101 overflows, as all the arithmetics will in the end be performed in UUTYPE
4102 anyway. */
4103 common_type = determine_common_wider_type (a: &ubase, b: &cbase);
4104
4105 /* use = ubase - ratio * cbase + ratio * var. */
4106 tree_to_aff_combination (ubase, common_type, aff_inv);
4107 tree_to_aff_combination (cbase, common_type, &aff_cbase);
4108 tree_to_aff_combination (var, uutype, aff_var);
4109
4110 /* We need to shift the value if we are after the increment. */
4111 if (stmt_after_increment (loop, cand, stmt: at))
4112 {
4113 aff_tree cstep_aff;
4114
4115 if (common_type != uutype)
4116 cstep_common = fold_convert (common_type, cstep);
4117 else
4118 cstep_common = cstep;
4119
4120 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4121 aff_combination_add (&aff_cbase, &cstep_aff);
4122 }
4123
4124 aff_combination_scale (&aff_cbase, -rat);
4125 aff_combination_add (aff_inv, &aff_cbase);
4126 if (common_type != uutype)
4127 aff_combination_convert (aff_inv, uutype);
4128
4129 aff_combination_scale (aff_var, rat);
4130 return true;
4131}
4132
4133/* Determines the expression by that USE is expressed from induction variable
4134 CAND at statement AT in LOOP. The expression is stored in a decomposed
4135 form into AFF. Returns false if USE cannot be expressed using CAND. */
4136
4137static bool
4138get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4139 struct iv_cand *cand, class aff_tree *aff)
4140{
4141 aff_tree aff_var;
4142
4143 if (!get_computation_aff_1 (loop, at, use, cand, aff_inv: aff, aff_var: &aff_var))
4144 return false;
4145
4146 aff_combination_add (aff, &aff_var);
4147 return true;
4148}
4149
4150/* Return the type of USE. */
4151
4152static tree
4153get_use_type (struct iv_use *use)
4154{
4155 tree base_type = TREE_TYPE (use->iv->base);
4156 tree type;
4157
4158 if (use->type == USE_REF_ADDRESS)
4159 {
4160 /* The base_type may be a void pointer. Create a pointer type based on
4161 the mem_ref instead. */
4162 type = build_pointer_type (TREE_TYPE (*use->op_p));
4163 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4164 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4165 }
4166 else
4167 type = base_type;
4168
4169 return type;
4170}
4171
4172/* Determines the expression by that USE is expressed from induction variable
4173 CAND at statement AT in LOOP. The computation is unshared. */
4174
4175static tree
4176get_computation_at (class loop *loop, gimple *at,
4177 struct iv_use *use, struct iv_cand *cand)
4178{
4179 aff_tree aff;
4180 tree type = get_use_type (use);
4181
4182 if (!get_computation_aff (loop, at, use, cand, aff: &aff))
4183 return NULL_TREE;
4184 unshare_aff_combination (&aff);
4185 return fold_convert (type, aff_combination_to_tree (&aff));
4186}
4187
4188/* Like get_computation_at, but try harder, even if the computation
4189 is more expensive. Intended for debug stmts. */
4190
4191static tree
4192get_debug_computation_at (class loop *loop, gimple *at,
4193 struct iv_use *use, struct iv_cand *cand)
4194{
4195 if (tree ret = get_computation_at (loop, at, use, cand))
4196 return ret;
4197
4198 tree ubase = use->iv->base, ustep = use->iv->step;
4199 tree cbase = cand->iv->base, cstep = cand->iv->step;
4200 tree var;
4201 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4202 widest_int rat;
4203
4204 /* We must have a precision to express the values of use. */
4205 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4206 return NULL_TREE;
4207
4208 /* Try to handle the case that get_computation_at doesn't,
4209 try to express
4210 use = ubase + (var - cbase) / ratio. */
4211 if (!constant_multiple_of (top: cstep, fold_convert (TREE_TYPE (cstep), ustep),
4212 mul: &rat))
4213 return NULL_TREE;
4214
4215 bool neg_p = false;
4216 if (wi::neg_p (x: rat))
4217 {
4218 if (TYPE_UNSIGNED (ctype))
4219 return NULL_TREE;
4220 neg_p = true;
4221 rat = wi::neg (x: rat);
4222 }
4223
4224 /* If both IVs can wrap around and CAND doesn't have a power of two step,
4225 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4226 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4227 uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4228 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4229 if (!use->iv->no_overflow
4230 && !cand->iv->no_overflow
4231 && !integer_pow2p (cstep))
4232 return NULL_TREE;
4233
4234 int bits = wi::exact_log2 (rat);
4235 if (bits == -1)
4236 bits = wi::floor_log2 (rat) + 1;
4237 if (!cand->iv->no_overflow
4238 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4239 return NULL_TREE;
4240
4241 var = var_at_stmt (loop, cand, stmt: at);
4242
4243 if (POINTER_TYPE_P (ctype))
4244 {
4245 ctype = unsigned_type_for (ctype);
4246 cbase = fold_convert (ctype, cbase);
4247 cstep = fold_convert (ctype, cstep);
4248 var = fold_convert (ctype, var);
4249 }
4250
4251 if (stmt_after_increment (loop, cand, stmt: at))
4252 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4253 unshare_expr (cstep));
4254
4255 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4256 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4257 wide_int_to_tree (TREE_TYPE (var), rat));
4258 if (POINTER_TYPE_P (utype))
4259 {
4260 var = fold_convert (sizetype, var);
4261 if (neg_p)
4262 var = fold_build1 (NEGATE_EXPR, sizetype, var);
4263 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4264 }
4265 else
4266 {
4267 var = fold_convert (utype, var);
4268 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4269 ubase, var);
4270 }
4271 return var;
4272}
4273
4274/* Adjust the cost COST for being in loop setup rather than loop body.
4275 If we're optimizing for space, the loop setup overhead is constant;
4276 if we're optimizing for speed, amortize it over the per-iteration cost.
4277 If ROUND_UP_P is true, the result is round up rather than to zero when
4278 optimizing for speed. */
4279static int64_t
4280adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4281 bool round_up_p = false)
4282{
4283 if (cost == INFTY)
4284 return cost;
4285 else if (optimize_loop_for_speed_p (data->current_loop))
4286 {
4287 int64_t niters = (int64_t) avg_loop_niter (loop: data->current_loop);
4288 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4289 }
4290 else
4291 return cost;
4292}
4293
4294/* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4295 EXPR operand holding the shift. COST0 and COST1 are the costs for
4296 calculating the operands of EXPR. Returns true if successful, and returns
4297 the cost in COST. */
4298
4299static bool
4300get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4301 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4302{
4303 comp_cost res;
4304 tree op1 = TREE_OPERAND (expr, 1);
4305 tree cst = TREE_OPERAND (mult, 1);
4306 tree multop = TREE_OPERAND (mult, 0);
4307 int m = exact_log2 (x: int_cst_value (cst));
4308 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4309 int as_cost, sa_cost;
4310 bool mult_in_op1;
4311
4312 if (!(m >= 0 && m < maxm))
4313 return false;
4314
4315 STRIP_NOPS (op1);
4316 mult_in_op1 = operand_equal_p (op1, mult, flags: 0);
4317
4318 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m);
4319
4320 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4321 use that in preference to a shift insn followed by an add insn. */
4322 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4323 ? shiftadd_cost (speed, mode, bits: m)
4324 : (mult_in_op1
4325 ? shiftsub1_cost (speed, mode, bits: m)
4326 : shiftsub0_cost (speed, mode, bits: m)));
4327
4328 res = comp_cost (MIN (as_cost, sa_cost), 0);
4329 res += (mult_in_op1 ? cost0 : cost1);
4330
4331 STRIP_NOPS (multop);
4332 if (!is_gimple_val (multop))
4333 res += force_expr_to_var_cost (multop, speed);
4334
4335 *cost = res;
4336 return true;
4337}
4338
4339/* Estimates cost of forcing expression EXPR into a variable. */
4340
4341static comp_cost
4342force_expr_to_var_cost (tree expr, bool speed)
4343{
4344 static bool costs_initialized = false;
4345 static unsigned integer_cost [2];
4346 static unsigned symbol_cost [2];
4347 static unsigned address_cost [2];
4348 tree op0, op1;
4349 comp_cost cost0, cost1, cost;
4350 machine_mode mode;
4351 scalar_int_mode int_mode;
4352
4353 if (!costs_initialized)
4354 {
4355 tree type = build_pointer_type (integer_type_node);
4356 tree var, addr;
4357 rtx x;
4358 int i;
4359
4360 var = create_tmp_var_raw (integer_type_node, "test_var");
4361 TREE_STATIC (var) = 1;
4362 x = produce_memory_decl_rtl (obj: var, NULL);
4363 SET_DECL_RTL (var, x);
4364
4365 addr = build1 (ADDR_EXPR, type, var);
4366
4367
4368 for (i = 0; i < 2; i++)
4369 {
4370 integer_cost[i] = computation_cost (expr: build_int_cst (integer_type_node,
4371 2000), speed: i);
4372
4373 symbol_cost[i] = computation_cost (expr: addr, speed: i) + 1;
4374
4375 address_cost[i]
4376 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), speed: i) + 1;
4377 if (dump_file && (dump_flags & TDF_DETAILS))
4378 {
4379 fprintf (stream: dump_file, format: "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4380 fprintf (stream: dump_file, format: " integer %d\n", (int) integer_cost[i]);
4381 fprintf (stream: dump_file, format: " symbol %d\n", (int) symbol_cost[i]);
4382 fprintf (stream: dump_file, format: " address %d\n", (int) address_cost[i]);
4383 fprintf (stream: dump_file, format: " other %d\n", (int) target_spill_cost[i]);
4384 fprintf (stream: dump_file, format: "\n");
4385 }
4386 }
4387
4388 costs_initialized = true;
4389 }
4390
4391 STRIP_NOPS (expr);
4392
4393 if (SSA_VAR_P (expr))
4394 return no_cost;
4395
4396 if (is_gimple_min_invariant (expr))
4397 {
4398 if (poly_int_tree_p (t: expr))
4399 return comp_cost (integer_cost [speed], 0);
4400
4401 if (TREE_CODE (expr) == ADDR_EXPR)
4402 {
4403 tree obj = TREE_OPERAND (expr, 0);
4404
4405 if (VAR_P (obj)
4406 || TREE_CODE (obj) == PARM_DECL
4407 || TREE_CODE (obj) == RESULT_DECL)
4408 return comp_cost (symbol_cost [speed], 0);
4409 }
4410
4411 return comp_cost (address_cost [speed], 0);
4412 }
4413
4414 switch (TREE_CODE (expr))
4415 {
4416 case POINTER_PLUS_EXPR:
4417 case PLUS_EXPR:
4418 case MINUS_EXPR:
4419 case MULT_EXPR:
4420 case TRUNC_DIV_EXPR:
4421 case BIT_AND_EXPR:
4422 case BIT_IOR_EXPR:
4423 case LSHIFT_EXPR:
4424 case RSHIFT_EXPR:
4425 op0 = TREE_OPERAND (expr, 0);
4426 op1 = TREE_OPERAND (expr, 1);
4427 STRIP_NOPS (op0);
4428 STRIP_NOPS (op1);
4429 break;
4430
4431 CASE_CONVERT:
4432 case NEGATE_EXPR:
4433 case BIT_NOT_EXPR:
4434 op0 = TREE_OPERAND (expr, 0);
4435 STRIP_NOPS (op0);
4436 op1 = NULL_TREE;
4437 break;
4438 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4439 introduce COND_EXPR for IV base, need to support better cost estimation
4440 for this COND_EXPR and tcc_comparison. */
4441 case COND_EXPR:
4442 op0 = TREE_OPERAND (expr, 1);
4443 STRIP_NOPS (op0);
4444 op1 = TREE_OPERAND (expr, 2);
4445 STRIP_NOPS (op1);
4446 break;
4447 case LT_EXPR:
4448 case LE_EXPR:
4449 case GT_EXPR:
4450 case GE_EXPR:
4451 case EQ_EXPR:
4452 case NE_EXPR:
4453 case UNORDERED_EXPR:
4454 case ORDERED_EXPR:
4455 case UNLT_EXPR:
4456 case UNLE_EXPR:
4457 case UNGT_EXPR:
4458 case UNGE_EXPR:
4459 case UNEQ_EXPR:
4460 case LTGT_EXPR:
4461 case MAX_EXPR:
4462 case MIN_EXPR:
4463 op0 = TREE_OPERAND (expr, 0);
4464 STRIP_NOPS (op0);
4465 op1 = TREE_OPERAND (expr, 1);
4466 STRIP_NOPS (op1);
4467 break;
4468
4469 default:
4470 /* Just an arbitrary value, FIXME. */
4471 return comp_cost (target_spill_cost[speed], 0);
4472 }
4473
4474 if (op0 == NULL_TREE
4475 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4476 cost0 = no_cost;
4477 else
4478 cost0 = force_expr_to_var_cost (expr: op0, speed);
4479
4480 if (op1 == NULL_TREE
4481 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4482 cost1 = no_cost;
4483 else
4484 cost1 = force_expr_to_var_cost (expr: op1, speed);
4485
4486 mode = TYPE_MODE (TREE_TYPE (expr));
4487 switch (TREE_CODE (expr))
4488 {
4489 case POINTER_PLUS_EXPR:
4490 case PLUS_EXPR:
4491 case MINUS_EXPR:
4492 case NEGATE_EXPR:
4493 cost = comp_cost (add_cost (speed, mode), 0);
4494 if (TREE_CODE (expr) != NEGATE_EXPR)
4495 {
4496 tree mult = NULL_TREE;
4497 comp_cost sa_cost;
4498 if (TREE_CODE (op1) == MULT_EXPR)
4499 mult = op1;
4500 else if (TREE_CODE (op0) == MULT_EXPR)
4501 mult = op0;
4502
4503 if (mult != NULL_TREE
4504 && is_a <scalar_int_mode> (m: mode, result: &int_mode)
4505 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4506 && get_shiftadd_cost (expr, mode: int_mode, cost0, cost1, mult,
4507 speed, cost: &sa_cost))
4508 return sa_cost;
4509 }
4510 break;
4511
4512 CASE_CONVERT:
4513 {
4514 tree inner_mode, outer_mode;
4515 outer_mode = TREE_TYPE (expr);
4516 inner_mode = TREE_TYPE (op0);
4517 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4518 TYPE_MODE (inner_mode), speed), 0);
4519 }
4520 break;
4521
4522 case MULT_EXPR:
4523 if (cst_and_fits_in_hwi (op0))
4524 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4525 mode, speed), 0);
4526 else if (cst_and_fits_in_hwi (op1))
4527 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4528 mode, speed), 0);
4529 else
4530 return comp_cost (target_spill_cost [speed], 0);
4531 break;
4532
4533 case TRUNC_DIV_EXPR:
4534 /* Division by power of two is usually cheap, so we allow it. Forbid
4535 anything else. */
4536 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4537 cost = comp_cost (add_cost (speed, mode), 0);
4538 else
4539 cost = comp_cost (target_spill_cost[speed], 0);
4540 break;
4541
4542 case BIT_AND_EXPR:
4543 case BIT_IOR_EXPR:
4544 case BIT_NOT_EXPR:
4545 case LSHIFT_EXPR:
4546 case RSHIFT_EXPR:
4547 cost = comp_cost (add_cost (speed, mode), 0);
4548 break;
4549 case COND_EXPR:
4550 op0 = TREE_OPERAND (expr, 0);
4551 STRIP_NOPS (op0);
4552 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4553 || CONSTANT_CLASS_P (op0))
4554 cost = no_cost;
4555 else
4556 cost = force_expr_to_var_cost (expr: op0, speed);
4557 break;
4558 case LT_EXPR:
4559 case LE_EXPR:
4560 case GT_EXPR:
4561 case GE_EXPR:
4562 case EQ_EXPR:
4563 case NE_EXPR:
4564 case UNORDERED_EXPR:
4565 case ORDERED_EXPR:
4566 case UNLT_EXPR:
4567 case UNLE_EXPR:
4568 case UNGT_EXPR:
4569 case UNGE_EXPR:
4570 case UNEQ_EXPR:
4571 case LTGT_EXPR:
4572 case MAX_EXPR:
4573 case MIN_EXPR:
4574 /* Simply use add cost for now, FIXME if there is some more accurate cost
4575 evaluation way. */
4576 cost = comp_cost (add_cost (speed, mode), 0);
4577 break;
4578
4579 default:
4580 gcc_unreachable ();
4581 }
4582
4583 cost += cost0;
4584 cost += cost1;
4585 return cost;
4586}
4587
4588/* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4589 invariants the computation depends on. */
4590
4591static comp_cost
4592force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4593{
4594 if (!expr)
4595 return no_cost;
4596
4597 find_inv_vars (data, expr_p: &expr, inv_vars);
4598 return force_expr_to_var_cost (expr, speed: data->speed);
4599}
4600
4601/* Returns cost of auto-modifying address expression in shape base + offset.
4602 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4603 address expression. The address expression has ADDR_MODE in addr space
4604 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4605 speed or size. */
4606
4607enum ainc_type
4608{
4609 AINC_PRE_INC, /* Pre increment. */
4610 AINC_PRE_DEC, /* Pre decrement. */
4611 AINC_POST_INC, /* Post increment. */
4612 AINC_POST_DEC, /* Post decrement. */
4613 AINC_NONE /* Also the number of auto increment types. */
4614};
4615
4616struct ainc_cost_data
4617{
4618 int64_t costs[AINC_NONE];
4619};
4620
4621static comp_cost
4622get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4623 machine_mode addr_mode, machine_mode mem_mode,
4624 addr_space_t as, bool speed)
4625{
4626 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4627 && !USE_STORE_PRE_DECREMENT (mem_mode)
4628 && !USE_LOAD_POST_DECREMENT (mem_mode)
4629 && !USE_STORE_POST_DECREMENT (mem_mode)
4630 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4631 && !USE_STORE_PRE_INCREMENT (mem_mode)
4632 && !USE_LOAD_POST_INCREMENT (mem_mode)
4633 && !USE_STORE_POST_INCREMENT (mem_mode))
4634 return infinite_cost;
4635
4636 static vec<ainc_cost_data *> ainc_cost_data_list;
4637 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4638 if (idx >= ainc_cost_data_list.length ())
4639 {
4640 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4641
4642 gcc_assert (nsize > idx);
4643 ainc_cost_data_list.safe_grow_cleared (len: nsize, exact: true);
4644 }
4645
4646 ainc_cost_data *data = ainc_cost_data_list[idx];
4647 if (data == NULL)
4648 {
4649 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4650
4651 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4652 data->costs[AINC_PRE_DEC] = INFTY;
4653 data->costs[AINC_POST_DEC] = INFTY;
4654 data->costs[AINC_PRE_INC] = INFTY;
4655 data->costs[AINC_POST_INC] = INFTY;
4656 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4657 || USE_STORE_PRE_DECREMENT (mem_mode))
4658 {
4659 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4660
4661 if (memory_address_addr_space_p (mem_mode, addr, as))
4662 data->costs[AINC_PRE_DEC]
4663 = address_cost (addr, mem_mode, as, speed);
4664 }
4665 if (USE_LOAD_POST_DECREMENT (mem_mode)
4666 || USE_STORE_POST_DECREMENT (mem_mode))
4667 {
4668 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4669
4670 if (memory_address_addr_space_p (mem_mode, addr, as))
4671 data->costs[AINC_POST_DEC]
4672 = address_cost (addr, mem_mode, as, speed);
4673 }
4674 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4675 || USE_STORE_PRE_INCREMENT (mem_mode))
4676 {
4677 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4678
4679 if (memory_address_addr_space_p (mem_mode, addr, as))
4680 data->costs[AINC_PRE_INC]
4681 = address_cost (addr, mem_mode, as, speed);
4682 }
4683 if (USE_LOAD_POST_INCREMENT (mem_mode)
4684 || USE_STORE_POST_INCREMENT (mem_mode))
4685 {
4686 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4687
4688 if (memory_address_addr_space_p (mem_mode, addr, as))
4689 data->costs[AINC_POST_INC]
4690 = address_cost (addr, mem_mode, as, speed);
4691 }
4692 ainc_cost_data_list[idx] = data;
4693 }
4694
4695 poly_int64 msize = GET_MODE_SIZE (mode: mem_mode);
4696 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4697 return comp_cost (data->costs[AINC_POST_INC], 0);
4698 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4699 return comp_cost (data->costs[AINC_POST_DEC], 0);
4700 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4701 return comp_cost (data->costs[AINC_PRE_INC], 0);
4702 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4703 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4704
4705 return infinite_cost;
4706}
4707
4708/* Return cost of computing USE's address expression by using CAND.
4709 AFF_INV and AFF_VAR represent invariant and variant parts of the
4710 address expression, respectively. If AFF_INV is simple, store
4711 the loop invariant variables which are depended by it in INV_VARS;
4712 if AFF_INV is complicated, handle it as a new invariant expression
4713 and record it in INV_EXPR. RATIO indicates multiple times between
4714 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4715 value to it indicating if this is an auto-increment address. */
4716
4717static comp_cost
4718get_address_cost (struct ivopts_data *data, struct iv_use *use,
4719 struct iv_cand *cand, aff_tree *aff_inv,
4720 aff_tree *aff_var, HOST_WIDE_INT ratio,
4721 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4722 bool *can_autoinc, bool speed)
4723{
4724 rtx addr;
4725 bool simple_inv = true;
4726 tree comp_inv = NULL_TREE, type = aff_var->type;
4727 comp_cost var_cost = no_cost, cost = no_cost;
4728 struct mem_address parts = {NULL_TREE, integer_one_node,
4729 NULL_TREE, NULL_TREE, NULL_TREE};
4730 machine_mode addr_mode = TYPE_MODE (type);
4731 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4732 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4733 /* Only true if ratio != 1. */
4734 bool ok_with_ratio_p = false;
4735 bool ok_without_ratio_p = false;
4736 code_helper code = ERROR_MARK;
4737
4738 if (use->type == USE_PTR_ADDRESS)
4739 {
4740 gcall *call = as_a<gcall *> (p: use->stmt);
4741 gcc_assert (gimple_call_internal_p (call));
4742 code = gimple_call_internal_fn (gs: call);
4743 }
4744
4745 if (!aff_combination_const_p (aff: aff_inv))
4746 {
4747 parts.index = integer_one_node;
4748 /* Addressing mode "base + index". */
4749 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4750 if (ratio != 1)
4751 {
4752 parts.step = wide_int_to_tree (type, cst: ratio);
4753 /* Addressing mode "base + index << scale". */
4754 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4755 if (!ok_with_ratio_p)
4756 parts.step = NULL_TREE;
4757 }
4758 if (ok_with_ratio_p || ok_without_ratio_p)
4759 {
4760 if (maybe_ne (a: aff_inv->offset, b: 0))
4761 {
4762 parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4763 /* Addressing mode "base + index [<< scale] + offset". */
4764 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4765 parts.offset = NULL_TREE;
4766 else
4767 aff_inv->offset = 0;
4768 }
4769
4770 move_fixed_address_to_symbol (&parts, aff_inv);
4771 /* Base is fixed address and is moved to symbol part. */
4772 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff: aff_inv))
4773 parts.base = NULL_TREE;
4774
4775 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4776 if (parts.symbol != NULL_TREE
4777 && !valid_mem_ref_p (mem_mode, as, &parts, code))
4778 {
4779 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4780 parts.symbol = NULL_TREE;
4781 /* Reset SIMPLE_INV since symbol address needs to be computed
4782 outside of address expression in this case. */
4783 simple_inv = false;
4784 /* Symbol part is moved back to base part, it can't be NULL. */
4785 parts.base = integer_one_node;
4786 }
4787 }
4788 else
4789 parts.index = NULL_TREE;
4790 }
4791 else
4792 {
4793 poly_int64 ainc_step;
4794 if (can_autoinc
4795 && ratio == 1
4796 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4797 {
4798 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4799
4800 if (stmt_after_increment (loop: data->current_loop, cand, stmt: use->stmt))
4801 ainc_offset += ainc_step;
4802 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4803 addr_mode, mem_mode, as, speed);
4804 if (!cost.infinite_cost_p ())
4805 {
4806 *can_autoinc = true;
4807 return cost;
4808 }
4809 cost = no_cost;
4810 }
4811 if (!aff_combination_zero_p (aff: aff_inv))
4812 {
4813 parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4814 /* Addressing mode "base + offset". */
4815 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4816 parts.offset = NULL_TREE;
4817 else
4818 aff_inv->offset = 0;
4819 }
4820 }
4821
4822 if (simple_inv)
4823 simple_inv = (aff_inv == NULL
4824 || aff_combination_const_p (aff: aff_inv)
4825 || aff_combination_singleton_var_p (aff: aff_inv));
4826 if (!aff_combination_zero_p (aff: aff_inv))
4827 comp_inv = aff_combination_to_tree (aff_inv);
4828 if (comp_inv != NULL_TREE)
4829 cost = force_var_cost (data, expr: comp_inv, inv_vars);
4830 if (ratio != 1 && parts.step == NULL_TREE)
4831 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4832 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4833 var_cost += add_cost (speed, mode: addr_mode);
4834
4835 if (comp_inv && inv_expr && !simple_inv)
4836 {
4837 *inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4838 /* Clear depends on. */
4839 if (*inv_expr != NULL && inv_vars && *inv_vars)
4840 bitmap_clear (*inv_vars);
4841
4842 /* Cost of small invariant expression adjusted against loop niters
4843 is usually zero, which makes it difficult to be differentiated
4844 from candidate based on loop invariant variables. Secondly, the
4845 generated invariant expression may not be hoisted out of loop by
4846 following pass. We penalize the cost by rounding up in order to
4847 neutralize such effects. */
4848 cost.cost = adjust_setup_cost (data, cost: cost.cost, round_up_p: true);
4849 cost.scratch = cost.cost;
4850 }
4851
4852 cost += var_cost;
4853 addr = addr_for_mem_ref (&parts, as, false);
4854 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4855 cost += address_cost (addr, mem_mode, as, speed);
4856
4857 if (parts.symbol != NULL_TREE)
4858 cost.complexity += 1;
4859 /* Don't increase the complexity of adding a scaled index if it's
4860 the only kind of index that the target allows. */
4861 if (parts.step != NULL_TREE && ok_without_ratio_p)
4862 cost.complexity += 1;
4863 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4864 cost.complexity += 1;
4865 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4866 cost.complexity += 1;
4867
4868 return cost;
4869}
4870
4871/* Scale (multiply) the computed COST (except scratch part that should be
4872 hoisted out a loop) by header->frequency / AT->frequency, which makes
4873 expected cost more accurate. */
4874
4875static comp_cost
4876get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4877{
4878 if (data->speed
4879 && data->current_loop->header->count.to_frequency (cfun) > 0)
4880 {
4881 basic_block bb = gimple_bb (g: at);
4882 gcc_assert (cost.scratch <= cost.cost);
4883 int scale_factor = (int)(intptr_t) bb->aux;
4884 if (scale_factor == 1)
4885 return cost;
4886
4887 int64_t scaled_cost
4888 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4889
4890 if (dump_file && (dump_flags & TDF_DETAILS))
4891 fprintf (stream: dump_file, format: "Scaling cost based on bb prob by %2.2f: "
4892 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4893 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4894
4895 cost.cost = scaled_cost;
4896 }
4897
4898 return cost;
4899}
4900
4901/* Determines the cost of the computation by that USE is expressed
4902 from induction variable CAND. If ADDRESS_P is true, we just need
4903 to create an address from it, otherwise we want to get it into
4904 register. A set of invariants we depend on is stored in INV_VARS.
4905 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4906 addressing is likely. If INV_EXPR is nonnull, record invariant
4907 expr entry in it. */
4908
4909static comp_cost
4910get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4911 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4912 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4913{
4914 gimple *at = use->stmt;
4915 tree ubase = use->iv->base, cbase = cand->iv->base;
4916 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4917 tree comp_inv = NULL_TREE;
4918 HOST_WIDE_INT ratio, aratio;
4919 comp_cost cost;
4920 widest_int rat;
4921 aff_tree aff_inv, aff_var;
4922 bool speed = optimize_bb_for_speed_p (gimple_bb (g: at));
4923
4924 if (inv_vars)
4925 *inv_vars = NULL;
4926 if (can_autoinc)
4927 *can_autoinc = false;
4928 if (inv_expr)
4929 *inv_expr = NULL;
4930
4931 /* Check if we have enough precision to express the values of use. */
4932 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4933 return infinite_cost;
4934
4935 if (address_p
4936 || (use->iv->base_object
4937 && cand->iv->base_object
4938 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4939 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4940 {
4941 /* Do not try to express address of an object with computation based
4942 on address of a different object. This may cause problems in rtl
4943 level alias analysis (that does not expect this to be happening,
4944 as this is illegal in C), and would be unlikely to be useful
4945 anyway. */
4946 if (use->iv->base_object
4947 && cand->iv->base_object
4948 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, flags: 0))
4949 return infinite_cost;
4950 }
4951
4952 if (!get_computation_aff_1 (loop: data->current_loop, at, use,
4953 cand, aff_inv: &aff_inv, aff_var: &aff_var, prat: &rat)
4954 || !wi::fits_shwi_p (x: rat))
4955 return infinite_cost;
4956
4957 ratio = rat.to_shwi ();
4958 if (address_p)
4959 {
4960 cost = get_address_cost (data, use, cand, aff_inv: &aff_inv, aff_var: &aff_var, ratio,
4961 inv_vars, inv_expr, can_autoinc, speed);
4962 cost = get_scaled_computation_cost_at (data, at, cost);
4963 /* For doloop IV cand, add on the extra cost. */
4964 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4965 return cost;
4966 }
4967
4968 bool simple_inv = (aff_combination_const_p (aff: &aff_inv)
4969 || aff_combination_singleton_var_p (aff: &aff_inv));
4970 tree signed_type = signed_type_for (aff_combination_type (aff: &aff_inv));
4971 aff_combination_convert (&aff_inv, signed_type);
4972 if (!aff_combination_zero_p (aff: &aff_inv))
4973 comp_inv = aff_combination_to_tree (&aff_inv);
4974
4975 cost = force_var_cost (data, expr: comp_inv, inv_vars);
4976 if (comp_inv && inv_expr && !simple_inv)
4977 {
4978 *inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4979 /* Clear depends on. */
4980 if (*inv_expr != NULL && inv_vars && *inv_vars)
4981 bitmap_clear (*inv_vars);
4982
4983 cost.cost = adjust_setup_cost (data, cost: cost.cost);
4984 /* Record setup cost in scratch field. */
4985 cost.scratch = cost.cost;
4986 }
4987 /* Cost of constant integer can be covered when adding invariant part to
4988 variant part. */
4989 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4990 cost = no_cost;
4991
4992 /* Need type narrowing to represent use with cand. */
4993 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4994 {
4995 machine_mode outer_mode = TYPE_MODE (utype);
4996 machine_mode inner_mode = TYPE_MODE (ctype);
4997 cost += comp_cost (convert_cost (to_mode: outer_mode, from_mode: inner_mode, speed), 0);
4998 }
4999
5000 /* Turn a + i * (-c) into a - i * c. */
5001 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
5002 aratio = -ratio;
5003 else
5004 aratio = ratio;
5005
5006 if (ratio != 1)
5007 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
5008
5009 /* TODO: We may also need to check if we can compute a + i * 4 in one
5010 instruction. */
5011 /* Need to add up the invariant and variant parts. */
5012 if (comp_inv && !integer_zerop (comp_inv))
5013 cost += add_cost (speed, TYPE_MODE (utype));
5014
5015 cost = get_scaled_computation_cost_at (data, at, cost);
5016
5017 /* For doloop IV cand, add on the extra cost. */
5018 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
5019 cost += targetm.doloop_cost_for_generic;
5020
5021 return cost;
5022}
5023
5024/* Determines cost of computing the use in GROUP with CAND in a generic
5025 expression. */
5026
5027static bool
5028determine_group_iv_cost_generic (struct ivopts_data *data,
5029 struct iv_group *group, struct iv_cand *cand)
5030{
5031 comp_cost cost;
5032 iv_inv_expr_ent *inv_expr = NULL;
5033 bitmap inv_vars = NULL, inv_exprs = NULL;
5034 struct iv_use *use = group->vuses[0];
5035
5036 /* The simple case first -- if we need to express value of the preserved
5037 original biv, the cost is 0. This also prevents us from counting the
5038 cost of increment twice -- once at this use and once in the cost of
5039 the candidate. */
5040 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5041 cost = no_cost;
5042 /* If the IV candidate involves undefined SSA values and is not the
5043 same IV as on the USE avoid using that candidate here. */
5044 else if (cand->involves_undefs
5045 && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, flags: 0)))
5046 return false;
5047 else
5048 cost = get_computation_cost (data, use, cand, address_p: false,
5049 inv_vars: &inv_vars, NULL, inv_expr: &inv_expr);
5050
5051 if (inv_expr)
5052 {
5053 inv_exprs = BITMAP_ALLOC (NULL);
5054 bitmap_set_bit (inv_exprs, inv_expr->id);
5055 }
5056 set_group_iv_cost (data, group, cand, cost, inv_vars,
5057 NULL_TREE, comp: ERROR_MARK, inv_exprs);
5058 return !cost.infinite_cost_p ();
5059}
5060
5061/* Determines cost of computing uses in GROUP with CAND in addresses. */
5062
5063static bool
5064determine_group_iv_cost_address (struct ivopts_data *data,
5065 struct iv_group *group, struct iv_cand *cand)
5066{
5067 unsigned i;
5068 bitmap inv_vars = NULL, inv_exprs = NULL;
5069 bool can_autoinc;
5070 iv_inv_expr_ent *inv_expr = NULL;
5071 struct iv_use *use = group->vuses[0];
5072 comp_cost sum_cost = no_cost, cost;
5073
5074 cost = get_computation_cost (data, use, cand, address_p: true,
5075 inv_vars: &inv_vars, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5076
5077 if (inv_expr)
5078 {
5079 inv_exprs = BITMAP_ALLOC (NULL);
5080 bitmap_set_bit (inv_exprs, inv_expr->id);
5081 }
5082 sum_cost = cost;
5083 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5084 {
5085 if (can_autoinc)
5086 sum_cost -= cand->cost_step;
5087 /* If we generated the candidate solely for exploiting autoincrement
5088 opportunities, and it turns out it can't be used, set the cost to
5089 infinity to make sure we ignore it. */
5090 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5091 sum_cost = infinite_cost;
5092 }
5093
5094 /* Uses in a group can share setup code, so only add setup cost once. */
5095 cost -= cost.scratch;
5096 /* Compute and add costs for rest uses of this group. */
5097 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5098 {
5099 struct iv_use *next = group->vuses[i];
5100
5101 /* TODO: We could skip computing cost for sub iv_use when it has the
5102 same cost as the first iv_use, but the cost really depends on the
5103 offset and where the iv_use is. */
5104 cost = get_computation_cost (data, use: next, cand, address_p: true,
5105 NULL, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5106 if (inv_expr)
5107 {
5108 if (!inv_exprs)
5109 inv_exprs = BITMAP_ALLOC (NULL);
5110
5111 bitmap_set_bit (inv_exprs, inv_expr->id);
5112 }
5113 sum_cost += cost;
5114 }
5115 set_group_iv_cost (data, group, cand, cost: sum_cost, inv_vars,
5116 NULL_TREE, comp: ERROR_MARK, inv_exprs);
5117
5118 return !sum_cost.infinite_cost_p ();
5119}
5120
5121/* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5122 and stores it to VAL. */
5123
5124static void
5125cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5126 class tree_niter_desc *desc, aff_tree *val)
5127{
5128 aff_tree step, delta, nit;
5129 struct iv *iv = cand->iv;
5130 tree type = TREE_TYPE (iv->base);
5131 tree niter = desc->niter;
5132 bool after_adjust = stmt_after_increment (loop, cand, stmt: at);
5133 tree steptype;
5134
5135 if (POINTER_TYPE_P (type))
5136 steptype = sizetype;
5137 else
5138 steptype = unsigned_type_for (type);
5139
5140 /* If AFTER_ADJUST is required, the code below generates the equivalent
5141 of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5142 BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5143 SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5144 doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5145 class for common idioms that we know are safe. */
5146 if (after_adjust
5147 && desc->control.no_overflow
5148 && integer_onep (desc->control.step)
5149 && (desc->cmp == LT_EXPR
5150 || desc->cmp == NE_EXPR)
5151 && TREE_CODE (desc->bound) == SSA_NAME)
5152 {
5153 if (integer_onep (desc->control.base))
5154 {
5155 niter = desc->bound;
5156 after_adjust = false;
5157 }
5158 else if (TREE_CODE (niter) == MINUS_EXPR
5159 && integer_onep (TREE_OPERAND (niter, 1)))
5160 {
5161 niter = TREE_OPERAND (niter, 0);
5162 after_adjust = false;
5163 }
5164 }
5165
5166 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5167 aff_combination_convert (&step, steptype);
5168 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5169 aff_combination_convert (&nit, steptype);
5170 aff_combination_mult (&nit, &step, &delta);
5171 if (after_adjust)
5172 aff_combination_add (&delta, &step);
5173
5174 tree_to_aff_combination (iv->base, type, val);
5175 if (!POINTER_TYPE_P (type))
5176 aff_combination_convert (val, steptype);
5177 aff_combination_add (val, &delta);
5178}
5179
5180/* Returns period of induction variable iv. */
5181
5182static tree
5183iv_period (struct iv *iv)
5184{
5185 tree step = iv->step, period, type;
5186 tree pow2div;
5187
5188 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5189
5190 type = unsigned_type_for (TREE_TYPE (step));
5191 /* Period of the iv is lcm (step, type_range)/step -1,
5192 i.e., N*type_range/step - 1. Since type range is power
5193 of two, N == (step >> num_of_ending_zeros_binary (step),
5194 so the final result is
5195
5196 (type_range >> num_of_ending_zeros_binary (step)) - 1
5197
5198 */
5199 pow2div = num_ending_zeros (step);
5200
5201 period = build_low_bits_mask (type,
5202 (TYPE_PRECISION (type)
5203 - tree_to_uhwi (pow2div)));
5204
5205 return period;
5206}
5207
5208/* Returns the comparison operator used when eliminating the iv USE. */
5209
5210static enum tree_code
5211iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5212{
5213 class loop *loop = data->current_loop;
5214 basic_block ex_bb;
5215 edge exit;
5216
5217 ex_bb = gimple_bb (g: use->stmt);
5218 exit = EDGE_SUCC (ex_bb, 0);
5219 if (flow_bb_inside_loop_p (loop, exit->dest))
5220 exit = EDGE_SUCC (ex_bb, 1);
5221
5222 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5223}
5224
5225/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5226 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5227 calculation is performed in non-wrapping type.
5228
5229 TODO: More generally, we could test for the situation that
5230 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5231 This would require knowing the sign of OFFSET. */
5232
5233static bool
5234difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5235{
5236 enum tree_code code;
5237 tree e1, e2;
5238 aff_tree aff_e1, aff_e2, aff_offset;
5239
5240 if (!nowrap_type_p (TREE_TYPE (base)))
5241 return false;
5242
5243 base = expand_simple_operations (base);
5244
5245 if (TREE_CODE (base) == SSA_NAME)
5246 {
5247 gimple *stmt = SSA_NAME_DEF_STMT (base);
5248
5249 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
5250 return false;
5251
5252 code = gimple_assign_rhs_code (gs: stmt);
5253 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5254 return false;
5255
5256 e1 = gimple_assign_rhs1 (gs: stmt);
5257 e2 = gimple_assign_rhs2 (gs: stmt);
5258 }
5259 else
5260 {
5261 code = TREE_CODE (base);
5262 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5263 return false;
5264 e1 = TREE_OPERAND (base, 0);
5265 e2 = TREE_OPERAND (base, 1);
5266 }
5267
5268 /* Use affine expansion as deeper inspection to prove the equality. */
5269 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5270 &aff_e2, &data->name_expansion_cache);
5271 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5272 &aff_offset, &data->name_expansion_cache);
5273 aff_combination_scale (&aff_offset, -1);
5274 switch (code)
5275 {
5276 case PLUS_EXPR:
5277 aff_combination_add (&aff_e2, &aff_offset);
5278 if (aff_combination_zero_p (aff: &aff_e2))
5279 return true;
5280
5281 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5282 &aff_e1, &data->name_expansion_cache);
5283 aff_combination_add (&aff_e1, &aff_offset);
5284 return aff_combination_zero_p (aff: &aff_e1);
5285
5286 case POINTER_PLUS_EXPR:
5287 aff_combination_add (&aff_e2, &aff_offset);
5288 return aff_combination_zero_p (aff: &aff_e2);
5289
5290 default:
5291 return false;
5292 }
5293}
5294
5295/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5296 comparison with CAND. NITER describes the number of iterations of
5297 the loops. If successful, the comparison in COMP_P is altered accordingly.
5298
5299 We aim to handle the following situation:
5300
5301 sometype *base, *p;
5302 int a, b, i;
5303
5304 i = a;
5305 p = p_0 = base + a;
5306
5307 do
5308 {
5309 bla (*p);
5310 p++;
5311 i++;
5312 }
5313 while (i < b);
5314
5315 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5316 We aim to optimize this to
5317
5318 p = p_0 = base + a;
5319 do
5320 {
5321 bla (*p);
5322 p++;
5323 }
5324 while (p < p_0 - a + b);
5325
5326 This preserves the correctness, since the pointer arithmetics does not
5327 overflow. More precisely:
5328
5329 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5330 overflow in computing it or the values of p.
5331 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5332 overflow. To prove this, we use the fact that p_0 = base + a. */
5333
5334static bool
5335iv_elimination_compare_lt (struct ivopts_data *data,
5336 struct iv_cand *cand, enum tree_code *comp_p,
5337 class tree_niter_desc *niter)
5338{
5339 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5340 class aff_tree nit, tmpa, tmpb;
5341 enum tree_code comp;
5342 HOST_WIDE_INT step;
5343
5344 /* We need to know that the candidate induction variable does not overflow.
5345 While more complex analysis may be used to prove this, for now just
5346 check that the variable appears in the original program and that it
5347 is computed in a type that guarantees no overflows. */
5348 cand_type = TREE_TYPE (cand->iv->base);
5349 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5350 return false;
5351
5352 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5353 the calculation of the BOUND could overflow, making the comparison
5354 invalid. */
5355 if (!data->loop_single_exit_p)
5356 return false;
5357
5358 /* We need to be able to decide whether candidate is increasing or decreasing
5359 in order to choose the right comparison operator. */
5360 if (!cst_and_fits_in_hwi (cand->iv->step))
5361 return false;
5362 step = int_cst_value (cand->iv->step);
5363
5364 /* Check that the number of iterations matches the expected pattern:
5365 a + 1 > b ? 0 : b - a - 1. */
5366 mbz = niter->may_be_zero;
5367 if (TREE_CODE (mbz) == GT_EXPR)
5368 {
5369 /* Handle a + 1 > b. */
5370 tree op0 = TREE_OPERAND (mbz, 0);
5371 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5372 {
5373 a = TREE_OPERAND (op0, 0);
5374 b = TREE_OPERAND (mbz, 1);
5375 }
5376 else
5377 return false;
5378 }
5379 else if (TREE_CODE (mbz) == LT_EXPR)
5380 {
5381 tree op1 = TREE_OPERAND (mbz, 1);
5382
5383 /* Handle b < a + 1. */
5384 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5385 {
5386 a = TREE_OPERAND (op1, 0);
5387 b = TREE_OPERAND (mbz, 0);
5388 }
5389 else
5390 return false;
5391 }
5392 else
5393 return false;
5394
5395 /* Expected number of iterations is B - A - 1. Check that it matches
5396 the actual number, i.e., that B - A - NITER = 1. */
5397 tree_to_aff_combination (niter->niter, nit_type, &nit);
5398 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5399 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5400 aff_combination_scale (&nit, -1);
5401 aff_combination_scale (&tmpa, -1);
5402 aff_combination_add (&tmpb, &tmpa);
5403 aff_combination_add (&tmpb, &nit);
5404 if (tmpb.n != 0 || maybe_ne (a: tmpb.offset, b: 1))
5405 return false;
5406
5407 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5408 overflow. */
5409 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5410 cand->iv->step,
5411 fold_convert (TREE_TYPE (cand->iv->step), a));
5412 if (!difference_cannot_overflow_p (data, base: cand->iv->base, offset))
5413 return false;
5414
5415 /* Determine the new comparison operator. */
5416 comp = step < 0 ? GT_EXPR : LT_EXPR;
5417 if (*comp_p == NE_EXPR)
5418 *comp_p = comp;
5419 else if (*comp_p == EQ_EXPR)
5420 *comp_p = invert_tree_comparison (comp, false);
5421 else
5422 gcc_unreachable ();
5423
5424 return true;
5425}
5426
5427/* Check whether it is possible to express the condition in USE by comparison
5428 of candidate CAND. If so, store the value compared with to BOUND, and the
5429 comparison operator to COMP. */
5430
5431static bool
5432may_eliminate_iv (struct ivopts_data *data,
5433 struct iv_use *use, struct iv_cand *cand, tree *bound,
5434 enum tree_code *comp)
5435{
5436 basic_block ex_bb;
5437 edge exit;
5438 tree period;
5439 class loop *loop = data->current_loop;
5440 aff_tree bnd;
5441 class tree_niter_desc *desc = NULL;
5442
5443 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5444 return false;
5445
5446 /* For now works only for exits that dominate the loop latch.
5447 TODO: extend to other conditions inside loop body. */
5448 ex_bb = gimple_bb (g: use->stmt);
5449 if (use->stmt != last_nondebug_stmt (ex_bb)
5450 || gimple_code (g: use->stmt) != GIMPLE_COND
5451 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5452 return false;
5453
5454 exit = EDGE_SUCC (ex_bb, 0);
5455 if (flow_bb_inside_loop_p (loop, exit->dest))
5456 exit = EDGE_SUCC (ex_bb, 1);
5457 if (flow_bb_inside_loop_p (loop, exit->dest))
5458 return false;
5459
5460 desc = niter_for_exit (data, exit);
5461 if (!desc)
5462 return false;
5463
5464 /* Determine whether we can use the variable to test the exit condition.
5465 This is the case iff the period of the induction variable is greater
5466 than the number of iterations for which the exit condition is true. */
5467 period = iv_period (iv: cand->iv);
5468
5469 /* If the number of iterations is constant, compare against it directly. */
5470 if (TREE_CODE (desc->niter) == INTEGER_CST)
5471 {
5472 /* See cand_value_at. */
5473 if (stmt_after_increment (loop, cand, stmt: use->stmt))
5474 {
5475 if (!tree_int_cst_lt (t1: desc->niter, t2: period))
5476 return false;
5477 }
5478 else
5479 {
5480 if (tree_int_cst_lt (t1: period, t2: desc->niter))
5481 return false;
5482 }
5483 }
5484
5485 /* If not, and if this is the only possible exit of the loop, see whether
5486 we can get a conservative estimate on the number of iterations of the
5487 entire loop and compare against that instead. */
5488 else
5489 {
5490 widest_int period_value, max_niter;
5491
5492 max_niter = desc->max;
5493 if (stmt_after_increment (loop, cand, stmt: use->stmt))
5494 max_niter += 1;
5495 period_value = wi::to_widest (t: period);
5496 if (wi::gtu_p (x: max_niter, y: period_value))
5497 {
5498 /* See if we can take advantage of inferred loop bound
5499 information. */
5500 if (data->loop_single_exit_p)
5501 {
5502 if (!max_loop_iterations (loop, &max_niter))
5503 return false;
5504 /* The loop bound is already adjusted by adding 1. */
5505 if (wi::gtu_p (x: max_niter, y: period_value))
5506 return false;
5507 }
5508 else
5509 return false;
5510 }
5511 }
5512
5513 /* For doloop IV cand, the bound would be zero. It's safe whether
5514 may_be_zero set or not. */
5515 if (cand->doloop_p)
5516 {
5517 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5518 *comp = iv_elimination_compare (data, use);
5519 return true;
5520 }
5521
5522 cand_value_at (loop, cand, at: use->stmt, desc, val: &bnd);
5523
5524 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5525 aff_combination_to_tree (&bnd));
5526 *comp = iv_elimination_compare (data, use);
5527
5528 /* It is unlikely that computing the number of iterations using division
5529 would be more profitable than keeping the original induction variable. */
5530 bool cond_overflow_p;
5531 if (expression_expensive_p (*bound, &cond_overflow_p))
5532 return false;
5533
5534 /* Sometimes, it is possible to handle the situation that the number of
5535 iterations may be zero unless additional assumptions by using <
5536 instead of != in the exit condition.
5537
5538 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5539 base the exit condition on it. However, that is often too
5540 expensive. */
5541 if (!integer_zerop (desc->may_be_zero))
5542 return iv_elimination_compare_lt (data, cand, comp_p: comp, niter: desc);
5543
5544 return true;
5545}
5546
5547 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5548 be copied, if it is used in the loop body and DATA->body_includes_call. */
5549
5550static int
5551parm_decl_cost (struct ivopts_data *data, tree bound)
5552{
5553 tree sbound = bound;
5554 STRIP_NOPS (sbound);
5555
5556 if (TREE_CODE (sbound) == SSA_NAME
5557 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5558 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5559 && data->body_includes_call)
5560 return COSTS_N_INSNS (1);
5561
5562 return 0;
5563}
5564
5565/* Determines cost of computing the use in GROUP with CAND in a condition. */
5566
5567static bool
5568determine_group_iv_cost_cond (struct ivopts_data *data,
5569 struct iv_group *group, struct iv_cand *cand)
5570{
5571 tree bound = NULL_TREE;
5572 struct iv *cmp_iv;
5573 bitmap inv_exprs = NULL;
5574 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5575 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5576 enum comp_iv_rewrite rewrite_type;
5577 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5578 tree *control_var, *bound_cst;
5579 enum tree_code comp = ERROR_MARK;
5580 struct iv_use *use = group->vuses[0];
5581
5582 /* Extract condition operands. */
5583 rewrite_type = extract_cond_operands (data, stmt: use->stmt, control_var: &control_var,
5584 bound: &bound_cst, NULL, iv_bound: &cmp_iv);
5585 gcc_assert (rewrite_type != COMP_IV_NA);
5586
5587 /* Try iv elimination. */
5588 if (rewrite_type == COMP_IV_ELIM
5589 && may_eliminate_iv (data, use, cand, bound: &bound, comp: &comp))
5590 {
5591 elim_cost = force_var_cost (data, expr: bound, inv_vars: &inv_vars_elim);
5592 if (elim_cost.cost == 0)
5593 elim_cost.cost = parm_decl_cost (data, bound);
5594 else if (TREE_CODE (bound) == INTEGER_CST)
5595 elim_cost.cost = 0;
5596 /* If we replace a loop condition 'i < n' with 'p < base + n',
5597 inv_vars_elim will have 'base' and 'n' set, which implies that both
5598 'base' and 'n' will be live during the loop. More likely,
5599 'base + n' will be loop invariant, resulting in only one live value
5600 during the loop. So in that case we clear inv_vars_elim and set
5601 inv_expr_elim instead. */
5602 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5603 {
5604 inv_expr_elim = get_loop_invariant_expr (data, inv_expr: bound);
5605 bitmap_clear (inv_vars_elim);
5606 }
5607 /* The bound is a loop invariant, so it will be only computed
5608 once. */
5609 elim_cost.cost = adjust_setup_cost (data, cost: elim_cost.cost);
5610 }
5611
5612 /* When the condition is a comparison of the candidate IV against
5613 zero, prefer this IV.
5614
5615 TODO: The constant that we're subtracting from the cost should
5616 be target-dependent. This information should be added to the
5617 target costs for each backend. */
5618 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5619 && integer_zerop (*bound_cst)
5620 && (operand_equal_p (*control_var, cand->var_after, flags: 0)
5621 || operand_equal_p (*control_var, cand->var_before, flags: 0)))
5622 elim_cost -= 1;
5623
5624 express_cost = get_computation_cost (data, use, cand, address_p: false,
5625 inv_vars: &inv_vars_express, NULL,
5626 inv_expr: &inv_expr_express);
5627 if (cmp_iv != NULL)
5628 find_inv_vars (data, expr_p: &cmp_iv->base, inv_vars: &inv_vars_express);
5629
5630 /* Count the cost of the original bound as well. */
5631 bound_cost = force_var_cost (data, expr: *bound_cst, NULL);
5632 if (bound_cost.cost == 0)
5633 bound_cost.cost = parm_decl_cost (data, bound: *bound_cst);
5634 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5635 bound_cost.cost = 0;
5636 express_cost += bound_cost;
5637
5638 /* Choose the better approach, preferring the eliminated IV. */
5639 if (elim_cost <= express_cost)
5640 {
5641 cost = elim_cost;
5642 inv_vars = inv_vars_elim;
5643 inv_vars_elim = NULL;
5644 inv_expr = inv_expr_elim;
5645 /* For doloop candidate/use pair, adjust to zero cost. */
5646 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5647 cost = no_cost;
5648 }
5649 else
5650 {
5651 cost = express_cost;
5652 inv_vars = inv_vars_express;
5653 inv_vars_express = NULL;
5654 bound = NULL_TREE;
5655 comp = ERROR_MARK;
5656 inv_expr = inv_expr_express;
5657 }
5658
5659 if (inv_expr)
5660 {
5661 inv_exprs = BITMAP_ALLOC (NULL);
5662 bitmap_set_bit (inv_exprs, inv_expr->id);
5663 }
5664 set_group_iv_cost (data, group, cand, cost,
5665 inv_vars, value: bound, comp, inv_exprs);
5666
5667 if (inv_vars_elim)
5668 BITMAP_FREE (inv_vars_elim);
5669 if (inv_vars_express)
5670 BITMAP_FREE (inv_vars_express);
5671
5672 return !cost.infinite_cost_p ();
5673}
5674
5675/* Determines cost of computing uses in GROUP with CAND. Returns false
5676 if USE cannot be represented with CAND. */
5677
5678static bool
5679determine_group_iv_cost (struct ivopts_data *data,
5680 struct iv_group *group, struct iv_cand *cand)
5681{
5682 switch (group->type)
5683 {
5684 case USE_NONLINEAR_EXPR:
5685 return determine_group_iv_cost_generic (data, group, cand);
5686
5687 case USE_REF_ADDRESS:
5688 case USE_PTR_ADDRESS:
5689 return determine_group_iv_cost_address (data, group, cand);
5690
5691 case USE_COMPARE:
5692 return determine_group_iv_cost_cond (data, group, cand);
5693
5694 default:
5695 gcc_unreachable ();
5696 }
5697}
5698
5699/* Return true if get_computation_cost indicates that autoincrement is
5700 a possibility for the pair of USE and CAND, false otherwise. */
5701
5702static bool
5703autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5704 struct iv_cand *cand)
5705{
5706 if (!address_p (type: use->type))
5707 return false;
5708
5709 bool can_autoinc = false;
5710 get_computation_cost (data, use, cand, address_p: true, NULL, can_autoinc: &can_autoinc, NULL);
5711 return can_autoinc;
5712}
5713
5714/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5715 use that allows autoincrement, and set their AINC_USE if possible. */
5716
5717static void
5718set_autoinc_for_original_candidates (struct ivopts_data *data)
5719{
5720 unsigned i, j;
5721
5722 for (i = 0; i < data->vcands.length (); i++)
5723 {
5724 struct iv_cand *cand = data->vcands[i];
5725 struct iv_use *closest_before = NULL;
5726 struct iv_use *closest_after = NULL;
5727 if (cand->pos != IP_ORIGINAL)
5728 continue;
5729
5730 for (j = 0; j < data->vgroups.length (); j++)
5731 {
5732 struct iv_group *group = data->vgroups[j];
5733 struct iv_use *use = group->vuses[0];
5734 unsigned uid = gimple_uid (g: use->stmt);
5735
5736 if (gimple_bb (g: use->stmt) != gimple_bb (g: cand->incremented_at))
5737 continue;
5738
5739 if (uid < gimple_uid (g: cand->incremented_at)
5740 && (closest_before == NULL
5741 || uid > gimple_uid (g: closest_before->stmt)))
5742 closest_before = use;
5743
5744 if (uid > gimple_uid (g: cand->incremented_at)
5745 && (closest_after == NULL
5746 || uid < gimple_uid (g: closest_after->stmt)))
5747 closest_after = use;
5748 }
5749
5750 if (closest_before != NULL
5751 && autoinc_possible_for_pair (data, use: closest_before, cand))
5752 cand->ainc_use = closest_before;
5753 else if (closest_after != NULL
5754 && autoinc_possible_for_pair (data, use: closest_after, cand))
5755 cand->ainc_use = closest_after;
5756 }
5757}
5758
5759/* Relate compare use with all candidates. */
5760
5761static void
5762relate_compare_use_with_all_cands (struct ivopts_data *data)
5763{
5764 unsigned i, count = data->vcands.length ();
5765 for (i = 0; i < data->vgroups.length (); i++)
5766 {
5767 struct iv_group *group = data->vgroups[i];
5768
5769 if (group->type == USE_COMPARE)
5770 bitmap_set_range (group->related_cands, 0, count);
5771 }
5772}
5773
5774/* If PREFERRED_MODE is suitable and profitable, use the preferred
5775 PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5776
5777static tree
5778compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5779 const widest_int &iterations_max)
5780{
5781 tree ntype = TREE_TYPE (niter);
5782 tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5783 if (!pref_type)
5784 return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5785 build_int_cst (ntype, 1));
5786
5787 gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5788
5789 int prec = TYPE_PRECISION (ntype);
5790 int pref_prec = TYPE_PRECISION (pref_type);
5791
5792 tree base;
5793
5794 /* Check if the PREFERRED_MODED is able to present niter. */
5795 if (pref_prec > prec
5796 || wi::ltu_p (x: iterations_max,
5797 y: widest_int::from (x: wi::max_value (pref_prec, UNSIGNED),
5798 sgn: UNSIGNED)))
5799 {
5800 /* No wrap, it is safe to use preferred type after niter + 1. */
5801 if (wi::ltu_p (x: iterations_max,
5802 y: widest_int::from (x: wi::max_value (prec, UNSIGNED),
5803 sgn: UNSIGNED)))
5804 {
5805 /* This could help to optimize "-1 +1" pair when niter looks
5806 like "n-1": n is in original mode. "base = (n - 1) + 1"
5807 in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5808 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5809 build_int_cst (ntype, 1));
5810 base = fold_convert (pref_type, base);
5811 }
5812
5813 /* To avoid wrap, convert niter to preferred type before plus 1. */
5814 else
5815 {
5816 niter = fold_convert (pref_type, niter);
5817 base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5818 build_int_cst (pref_type, 1));
5819 }
5820 }
5821 else
5822 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5823 build_int_cst (ntype, 1));
5824 return base;
5825}
5826
5827/* Add one doloop dedicated IV candidate:
5828 - Base is (may_be_zero ? 1 : (niter + 1)).
5829 - Step is -1. */
5830
5831static void
5832add_iv_candidate_for_doloop (struct ivopts_data *data)
5833{
5834 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5835 gcc_assert (niter_desc && niter_desc->assumptions);
5836
5837 tree niter = niter_desc->niter;
5838 tree ntype = TREE_TYPE (niter);
5839 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5840
5841 tree may_be_zero = niter_desc->may_be_zero;
5842 if (may_be_zero && integer_zerop (may_be_zero))
5843 may_be_zero = NULL_TREE;
5844 if (may_be_zero)
5845 {
5846 if (COMPARISON_CLASS_P (may_be_zero))
5847 {
5848 niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5849 build_int_cst (ntype, 0),
5850 rewrite_to_non_trapping_overflow (niter));
5851 }
5852 /* Don't try to obtain the iteration count expression when may_be_zero is
5853 integer_nonzerop (actually iteration count is one) or else. */
5854 else
5855 return;
5856 }
5857
5858 machine_mode mode = TYPE_MODE (ntype);
5859 machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5860
5861 tree base;
5862 if (mode != pref_mode)
5863 {
5864 base = compute_doloop_base_on_mode (preferred_mode: pref_mode, niter, iterations_max: niter_desc->max);
5865 ntype = TREE_TYPE (base);
5866 }
5867 else
5868 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5869 build_int_cst (ntype, 1));
5870
5871
5872 add_candidate (data, base, step: build_int_cst (ntype, -1), important: true, NULL, NULL, doloop: true);
5873}
5874
5875/* Finds the candidates for the induction variables. */
5876
5877static void
5878find_iv_candidates (struct ivopts_data *data)
5879{
5880 /* Add commonly used ivs. */
5881 add_standard_iv_candidates (data);
5882
5883 /* Add doloop dedicated ivs. */
5884 if (data->doloop_use_p)
5885 add_iv_candidate_for_doloop (data);
5886
5887 /* Add old induction variables. */
5888 add_iv_candidate_for_bivs (data);
5889
5890 /* Add induction variables derived from uses. */
5891 add_iv_candidate_for_groups (data);
5892
5893 set_autoinc_for_original_candidates (data);
5894
5895 /* Record the important candidates. */
5896 record_important_candidates (data);
5897
5898 /* Relate compare iv_use with all candidates. */
5899 if (!data->consider_all_candidates)
5900 relate_compare_use_with_all_cands (data);
5901
5902 if (dump_file && (dump_flags & TDF_DETAILS))
5903 {
5904 unsigned i;
5905
5906 fprintf (stream: dump_file, format: "\n<Important Candidates>:\t");
5907 for (i = 0; i < data->vcands.length (); i++)
5908 if (data->vcands[i]->important)
5909 fprintf (stream: dump_file, format: " %d,", data->vcands[i]->id);
5910 fprintf (stream: dump_file, format: "\n");
5911
5912 fprintf (stream: dump_file, format: "\n<Group, Cand> Related:\n");
5913 for (i = 0; i < data->vgroups.length (); i++)
5914 {
5915 struct iv_group *group = data->vgroups[i];
5916
5917 if (group->related_cands)
5918 {
5919 fprintf (stream: dump_file, format: " Group %d:\t", group->id);
5920 dump_bitmap (file: dump_file, map: group->related_cands);
5921 }
5922 }
5923 fprintf (stream: dump_file, format: "\n");
5924 }
5925}
5926
5927/* Determines costs of computing use of iv with an iv candidate. */
5928
5929static void
5930determine_group_iv_costs (struct ivopts_data *data)
5931{
5932 unsigned i, j;
5933 struct iv_cand *cand;
5934 struct iv_group *group;
5935 bitmap to_clear = BITMAP_ALLOC (NULL);
5936
5937 alloc_use_cost_map (data);
5938
5939 for (i = 0; i < data->vgroups.length (); i++)
5940 {
5941 group = data->vgroups[i];
5942
5943 if (data->consider_all_candidates)
5944 {
5945 for (j = 0; j < data->vcands.length (); j++)
5946 {
5947 cand = data->vcands[j];
5948 determine_group_iv_cost (data, group, cand);
5949 }
5950 }
5951 else
5952 {
5953 bitmap_iterator bi;
5954
5955 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5956 {
5957 cand = data->vcands[j];
5958 if (!determine_group_iv_cost (data, group, cand))
5959 bitmap_set_bit (to_clear, j);
5960 }
5961
5962 /* Remove the candidates for that the cost is infinite from
5963 the list of related candidates. */
5964 bitmap_and_compl_into (group->related_cands, to_clear);
5965 bitmap_clear (to_clear);
5966 }
5967 }
5968
5969 BITMAP_FREE (to_clear);
5970
5971 if (dump_file && (dump_flags & TDF_DETAILS))
5972 {
5973 bitmap_iterator bi;
5974
5975 /* Dump invariant variables. */
5976 fprintf (stream: dump_file, format: "\n<Invariant Vars>:\n");
5977 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5978 {
5979 struct version_info *info = ver_info (data, ver: i);
5980 if (info->inv_id)
5981 {
5982 fprintf (stream: dump_file, format: "Inv %d:\t", info->inv_id);
5983 print_generic_expr (dump_file, info->name, TDF_SLIM);
5984 fprintf (stream: dump_file, format: "%s\n",
5985 info->has_nonlin_use ? "" : "\t(eliminable)");
5986 }
5987 }
5988
5989 /* Dump invariant expressions. */
5990 fprintf (stream: dump_file, format: "\n<Invariant Expressions>:\n");
5991 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5992
5993 for (hash_table<iv_inv_expr_hasher>::iterator it
5994 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5995 ++it)
5996 list.safe_push (obj: *it);
5997
5998 list.qsort (sort_iv_inv_expr_ent);
5999
6000 for (i = 0; i < list.length (); ++i)
6001 {
6002 fprintf (stream: dump_file, format: "inv_expr %d: \t", list[i]->id);
6003 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
6004 fprintf (stream: dump_file, format: "\n");
6005 }
6006
6007 fprintf (stream: dump_file, format: "\n<Group-candidate Costs>:\n");
6008
6009 for (i = 0; i < data->vgroups.length (); i++)
6010 {
6011 group = data->vgroups[i];
6012
6013 fprintf (stream: dump_file, format: "Group %d:\n", i);
6014 fprintf (stream: dump_file, format: " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
6015 for (j = 0; j < group->n_map_members; j++)
6016 {
6017 if (!group->cost_map[j].cand
6018 || group->cost_map[j].cost.infinite_cost_p ())
6019 continue;
6020
6021 fprintf (stream: dump_file, format: " %d\t%" PRId64 "\t%d\t",
6022 group->cost_map[j].cand->id,
6023 group->cost_map[j].cost.cost,
6024 group->cost_map[j].cost.complexity);
6025 if (!group->cost_map[j].inv_exprs
6026 || bitmap_empty_p (map: group->cost_map[j].inv_exprs))
6027 fprintf (stream: dump_file, format: "NIL;\t");
6028 else
6029 bitmap_print (dump_file,
6030 group->cost_map[j].inv_exprs, "", ";\t");
6031 if (!group->cost_map[j].inv_vars
6032 || bitmap_empty_p (map: group->cost_map[j].inv_vars))
6033 fprintf (stream: dump_file, format: "NIL;\n");
6034 else
6035 bitmap_print (dump_file,
6036 group->cost_map[j].inv_vars, "", "\n");
6037 }
6038
6039 fprintf (stream: dump_file, format: "\n");
6040 }
6041 fprintf (stream: dump_file, format: "\n");
6042 }
6043}
6044
6045/* Determines cost of the candidate CAND. */
6046
6047static void
6048determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6049{
6050 comp_cost cost_base;
6051 int64_t cost, cost_step;
6052 tree base;
6053
6054 gcc_assert (cand->iv != NULL);
6055
6056 /* There are two costs associated with the candidate -- its increment
6057 and its initialization. The second is almost negligible for any loop
6058 that rolls enough, so we take it just very little into account. */
6059
6060 base = cand->iv->base;
6061 cost_base = force_var_cost (data, expr: base, NULL);
6062 /* It will be exceptional that the iv register happens to be initialized with
6063 the proper value at no cost. In general, there will at least be a regcopy
6064 or a const set. */
6065 if (cost_base.cost == 0)
6066 cost_base.cost = COSTS_N_INSNS (1);
6067 /* Doloop decrement should be considered as zero cost. */
6068 if (cand->doloop_p)
6069 cost_step = 0;
6070 else
6071 cost_step = add_cost (speed: data->speed, TYPE_MODE (TREE_TYPE (base)));
6072 cost = cost_step + adjust_setup_cost (data, cost: cost_base.cost);
6073
6074 /* Prefer the original ivs unless we may gain something by replacing it.
6075 The reason is to make debugging simpler; so this is not relevant for
6076 artificial ivs created by other optimization passes. */
6077 if ((cand->pos != IP_ORIGINAL
6078 || !SSA_NAME_VAR (cand->var_before)
6079 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6080 /* Prefer doloop as well. */
6081 && !cand->doloop_p)
6082 cost++;
6083
6084 /* Prefer not to insert statements into latch unless there are some
6085 already (so that we do not create unnecessary jumps). */
6086 if (cand->pos == IP_END
6087 && empty_block_p (ip_end_pos (data->current_loop)))
6088 cost++;
6089
6090 cand->cost = cost;
6091 cand->cost_step = cost_step;
6092}
6093
6094/* Determines costs of computation of the candidates. */
6095
6096static void
6097determine_iv_costs (struct ivopts_data *data)
6098{
6099 unsigned i;
6100
6101 if (dump_file && (dump_flags & TDF_DETAILS))
6102 {
6103 fprintf (stream: dump_file, format: "<Candidate Costs>:\n");
6104 fprintf (stream: dump_file, format: " cand\tcost\n");
6105 }
6106
6107 for (i = 0; i < data->vcands.length (); i++)
6108 {
6109 struct iv_cand *cand = data->vcands[i];
6110
6111 determine_iv_cost (data, cand);
6112
6113 if (dump_file && (dump_flags & TDF_DETAILS))
6114 fprintf (stream: dump_file, format: " %d\t%d\n", i, cand->cost);
6115 }
6116
6117 if (dump_file && (dump_flags & TDF_DETAILS))
6118 fprintf (stream: dump_file, format: "\n");
6119}
6120
6121/* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6122 induction variables. Note N_INVS includes both invariant variables and
6123 invariant expressions. */
6124
6125static unsigned
6126ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6127 unsigned n_cands)
6128{
6129 unsigned cost;
6130 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6131 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6132 bool speed = data->speed;
6133
6134 /* If there is a call in the loop body, the call-clobbered registers
6135 are not available for loop invariants. */
6136 if (data->body_includes_call)
6137 available_regs = available_regs - target_clobbered_regs;
6138
6139 /* If we have enough registers. */
6140 if (regs_needed + target_res_regs < available_regs)
6141 cost = n_new;
6142 /* If close to running out of registers, try to preserve them. */
6143 else if (regs_needed <= available_regs)
6144 cost = target_reg_cost [speed] * regs_needed;
6145 /* If we run out of available registers but the number of candidates
6146 does not, we penalize extra registers using target_spill_cost. */
6147 else if (n_cands <= available_regs)
6148 cost = target_reg_cost [speed] * available_regs
6149 + target_spill_cost [speed] * (regs_needed - available_regs);
6150 /* If the number of candidates runs out available registers, we penalize
6151 extra candidate registers using target_spill_cost * 2. Because it is
6152 more expensive to spill induction variable than invariant. */
6153 else
6154 cost = target_reg_cost [speed] * available_regs
6155 + target_spill_cost [speed] * (n_cands - available_regs) * 2
6156 + target_spill_cost [speed] * (regs_needed - n_cands);
6157
6158 /* Finally, add the number of candidates, so that we prefer eliminating
6159 induction variables if possible. */
6160 return cost + n_cands;
6161}
6162
6163/* For each size of the induction variable set determine the penalty. */
6164
6165static void
6166determine_set_costs (struct ivopts_data *data)
6167{
6168 unsigned j, n;
6169 gphi *phi;
6170 gphi_iterator psi;
6171 tree op;
6172 class loop *loop = data->current_loop;
6173 bitmap_iterator bi;
6174
6175 if (dump_file && (dump_flags & TDF_DETAILS))
6176 {
6177 fprintf (stream: dump_file, format: "<Global Costs>:\n");
6178 fprintf (stream: dump_file, format: " target_avail_regs %d\n", target_avail_regs);
6179 fprintf (stream: dump_file, format: " target_clobbered_regs %d\n", target_clobbered_regs);
6180 fprintf (stream: dump_file, format: " target_reg_cost %d\n", target_reg_cost[data->speed]);
6181 fprintf (stream: dump_file, format: " target_spill_cost %d\n", target_spill_cost[data->speed]);
6182 }
6183
6184 n = 0;
6185 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
6186 {
6187 phi = psi.phi ();
6188 op = PHI_RESULT (phi);
6189
6190 if (virtual_operand_p (op))
6191 continue;
6192
6193 if (get_iv (data, var: op))
6194 continue;
6195
6196 if (!POINTER_TYPE_P (TREE_TYPE (op))
6197 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6198 continue;
6199
6200 n++;
6201 }
6202
6203 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6204 {
6205 struct version_info *info = ver_info (data, ver: j);
6206
6207 if (info->inv_id && info->has_nonlin_use)
6208 n++;
6209 }
6210
6211 data->regs_used = n;
6212 if (dump_file && (dump_flags & TDF_DETAILS))
6213 fprintf (stream: dump_file, format: " regs_used %d\n", n);
6214
6215 if (dump_file && (dump_flags & TDF_DETAILS))
6216 {
6217 fprintf (stream: dump_file, format: " cost for size:\n");
6218 fprintf (stream: dump_file, format: " ivs\tcost\n");
6219 for (j = 0; j <= 2 * target_avail_regs; j++)
6220 fprintf (stream: dump_file, format: " %d\t%d\n", j,
6221 ivopts_estimate_reg_pressure (data, n_invs: 0, n_cands: j));
6222 fprintf (stream: dump_file, format: "\n");
6223 }
6224}
6225
6226/* Returns true if A is a cheaper cost pair than B. */
6227
6228static bool
6229cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6230{
6231 if (!a)
6232 return false;
6233
6234 if (!b)
6235 return true;
6236
6237 if (a->cost < b->cost)
6238 return true;
6239
6240 if (b->cost < a->cost)
6241 return false;
6242
6243 /* In case the costs are the same, prefer the cheaper candidate. */
6244 if (a->cand->cost < b->cand->cost)
6245 return true;
6246
6247 return false;
6248}
6249
6250/* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6251 for more expensive, equal and cheaper respectively. */
6252
6253static int
6254compare_cost_pair (class cost_pair *a, class cost_pair *b)
6255{
6256 if (cheaper_cost_pair (a, b))
6257 return -1;
6258 if (cheaper_cost_pair (a: b, b: a))
6259 return 1;
6260
6261 return 0;
6262}
6263
6264/* Returns candidate by that USE is expressed in IVS. */
6265
6266static class cost_pair *
6267iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6268{
6269 return ivs->cand_for_group[group->id];
6270}
6271
6272/* Computes the cost field of IVS structure. */
6273
6274static void
6275iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6276{
6277 comp_cost cost = ivs->cand_use_cost;
6278
6279 cost += ivs->cand_cost;
6280 cost += ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands);
6281 ivs->cost = cost;
6282}
6283
6284/* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6285 and IVS. */
6286
6287static void
6288iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6289{
6290 bitmap_iterator bi;
6291 unsigned iid;
6292
6293 if (!invs)
6294 return;
6295
6296 gcc_assert (n_inv_uses != NULL);
6297 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6298 {
6299 n_inv_uses[iid]--;
6300 if (n_inv_uses[iid] == 0)
6301 ivs->n_invs--;
6302 }
6303}
6304
6305/* Set USE not to be expressed by any candidate in IVS. */
6306
6307static void
6308iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6309 struct iv_group *group)
6310{
6311 unsigned gid = group->id, cid;
6312 class cost_pair *cp;
6313
6314 cp = ivs->cand_for_group[gid];
6315 if (!cp)
6316 return;
6317 cid = cp->cand->id;
6318
6319 ivs->bad_groups++;
6320 ivs->cand_for_group[gid] = NULL;
6321 ivs->n_cand_uses[cid]--;
6322
6323 if (ivs->n_cand_uses[cid] == 0)
6324 {
6325 bitmap_clear_bit (ivs->cands, cid);
6326 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6327 ivs->n_cands--;
6328 ivs->cand_cost -= cp->cand->cost;
6329 iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6330 iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6331 }
6332
6333 ivs->cand_use_cost -= cp->cost;
6334 iv_ca_set_remove_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6335 iv_ca_set_remove_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6336 iv_ca_recount_cost (data, ivs);
6337}
6338
6339/* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6340 IVS. */
6341
6342static void
6343iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6344{
6345 bitmap_iterator bi;
6346 unsigned iid;
6347
6348 if (!invs)
6349 return;
6350
6351 gcc_assert (n_inv_uses != NULL);
6352 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6353 {
6354 n_inv_uses[iid]++;
6355 if (n_inv_uses[iid] == 1)
6356 ivs->n_invs++;
6357 }
6358}
6359
6360/* Set cost pair for GROUP in set IVS to CP. */
6361
6362static void
6363iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6364 struct iv_group *group, class cost_pair *cp)
6365{
6366 unsigned gid = group->id, cid;
6367
6368 if (ivs->cand_for_group[gid] == cp)
6369 return;
6370
6371 if (ivs->cand_for_group[gid])
6372 iv_ca_set_no_cp (data, ivs, group);
6373
6374 if (cp)
6375 {
6376 cid = cp->cand->id;
6377
6378 ivs->bad_groups--;
6379 ivs->cand_for_group[gid] = cp;
6380 ivs->n_cand_uses[cid]++;
6381 if (ivs->n_cand_uses[cid] == 1)
6382 {
6383 bitmap_set_bit (ivs->cands, cid);
6384 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6385 ivs->n_cands++;
6386 ivs->cand_cost += cp->cand->cost;
6387 iv_ca_set_add_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6388 iv_ca_set_add_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6389 }
6390
6391 ivs->cand_use_cost += cp->cost;
6392 iv_ca_set_add_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6393 iv_ca_set_add_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6394 iv_ca_recount_cost (data, ivs);
6395 }
6396}
6397
6398/* Extend set IVS by expressing USE by some of the candidates in it
6399 if possible. Consider all important candidates if candidates in
6400 set IVS don't give any result. */
6401
6402static void
6403iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6404 struct iv_group *group)
6405{
6406 class cost_pair *best_cp = NULL, *cp;
6407 bitmap_iterator bi;
6408 unsigned i;
6409 struct iv_cand *cand;
6410
6411 gcc_assert (ivs->upto >= group->id);
6412 ivs->upto++;
6413 ivs->bad_groups++;
6414
6415 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6416 {
6417 cand = data->vcands[i];
6418 cp = get_group_iv_cost (data, group, cand);
6419 if (cheaper_cost_pair (a: cp, b: best_cp))
6420 best_cp = cp;
6421 }
6422
6423 if (best_cp == NULL)
6424 {
6425 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6426 {
6427 cand = data->vcands[i];
6428 cp = get_group_iv_cost (data, group, cand);
6429 if (cheaper_cost_pair (a: cp, b: best_cp))
6430 best_cp = cp;
6431 }
6432 }
6433
6434 iv_ca_set_cp (data, ivs, group, cp: best_cp);
6435}
6436
6437/* Get cost for assignment IVS. */
6438
6439static comp_cost
6440iv_ca_cost (class iv_ca *ivs)
6441{
6442 /* This was a conditional expression but it triggered a bug in
6443 Sun C 5.5. */
6444 if (ivs->bad_groups)
6445 return infinite_cost;
6446 else
6447 return ivs->cost;
6448}
6449
6450/* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6451 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6452 respectively. */
6453
6454static int
6455iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6456 struct iv_group *group, class cost_pair *old_cp,
6457 class cost_pair *new_cp)
6458{
6459 gcc_assert (old_cp && new_cp && old_cp != new_cp);
6460 unsigned old_n_invs = ivs->n_invs;
6461 iv_ca_set_cp (data, ivs, group, cp: new_cp);
6462 unsigned new_n_invs = ivs->n_invs;
6463 iv_ca_set_cp (data, ivs, group, cp: old_cp);
6464
6465 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6466}
6467
6468/* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6469 it before NEXT. */
6470
6471static struct iv_ca_delta *
6472iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6473 class cost_pair *new_cp, struct iv_ca_delta *next)
6474{
6475 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6476
6477 change->group = group;
6478 change->old_cp = old_cp;
6479 change->new_cp = new_cp;
6480 change->next = next;
6481
6482 return change;
6483}
6484
6485/* Joins two lists of changes L1 and L2. Destructive -- old lists
6486 are rewritten. */
6487
6488static struct iv_ca_delta *
6489iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6490{
6491 struct iv_ca_delta *last;
6492
6493 if (!l2)
6494 return l1;
6495
6496 if (!l1)
6497 return l2;
6498
6499 for (last = l1; last->next; last = last->next)
6500 continue;
6501 last->next = l2;
6502
6503 return l1;
6504}
6505
6506/* Reverse the list of changes DELTA, forming the inverse to it. */
6507
6508static struct iv_ca_delta *
6509iv_ca_delta_reverse (struct iv_ca_delta *delta)
6510{
6511 struct iv_ca_delta *act, *next, *prev = NULL;
6512
6513 for (act = delta; act; act = next)
6514 {
6515 next = act->next;
6516 act->next = prev;
6517 prev = act;
6518
6519 std::swap (a&: act->old_cp, b&: act->new_cp);
6520 }
6521
6522 return prev;
6523}
6524
6525/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6526 reverted instead. */
6527
6528static void
6529iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6530 struct iv_ca_delta *delta, bool forward)
6531{
6532 class cost_pair *from, *to;
6533 struct iv_ca_delta *act;
6534
6535 if (!forward)
6536 delta = iv_ca_delta_reverse (delta);
6537
6538 for (act = delta; act; act = act->next)
6539 {
6540 from = act->old_cp;
6541 to = act->new_cp;
6542 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6543 iv_ca_set_cp (data, ivs, group: act->group, cp: to);
6544 }
6545
6546 if (!forward)
6547 iv_ca_delta_reverse (delta);
6548}
6549
6550/* Returns true if CAND is used in IVS. */
6551
6552static bool
6553iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6554{
6555 return ivs->n_cand_uses[cand->id] > 0;
6556}
6557
6558/* Returns number of induction variable candidates in the set IVS. */
6559
6560static unsigned
6561iv_ca_n_cands (class iv_ca *ivs)
6562{
6563 return ivs->n_cands;
6564}
6565
6566/* Free the list of changes DELTA. */
6567
6568static void
6569iv_ca_delta_free (struct iv_ca_delta **delta)
6570{
6571 struct iv_ca_delta *act, *next;
6572
6573 for (act = *delta; act; act = next)
6574 {
6575 next = act->next;
6576 free (ptr: act);
6577 }
6578
6579 *delta = NULL;
6580}
6581
6582/* Allocates new iv candidates assignment. */
6583
6584static class iv_ca *
6585iv_ca_new (struct ivopts_data *data)
6586{
6587 class iv_ca *nw = XNEW (class iv_ca);
6588
6589 nw->upto = 0;
6590 nw->bad_groups = 0;
6591 nw->cand_for_group = XCNEWVEC (class cost_pair *,
6592 data->vgroups.length ());
6593 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6594 nw->cands = BITMAP_ALLOC (NULL);
6595 nw->n_cands = 0;
6596 nw->n_invs = 0;
6597 nw->cand_use_cost = no_cost;
6598 nw->cand_cost = 0;
6599 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6600 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6601 nw->cost = no_cost;
6602
6603 return nw;
6604}
6605
6606/* Free memory occupied by the set IVS. */
6607
6608static void
6609iv_ca_free (class iv_ca **ivs)
6610{
6611 free (ptr: (*ivs)->cand_for_group);
6612 free (ptr: (*ivs)->n_cand_uses);
6613 BITMAP_FREE ((*ivs)->cands);
6614 free (ptr: (*ivs)->n_inv_var_uses);
6615 free (ptr: (*ivs)->n_inv_expr_uses);
6616 free (ptr: *ivs);
6617 *ivs = NULL;
6618}
6619
6620/* Dumps IVS to FILE. */
6621
6622static void
6623iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6624{
6625 unsigned i;
6626 comp_cost cost = iv_ca_cost (ivs);
6627
6628 fprintf (stream: file, format: " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6629 cost.complexity);
6630 fprintf (stream: file, format: " reg_cost: %d\n",
6631 ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands));
6632 fprintf (stream: file, format: " cand_cost: %" PRId64 "\n cand_group_cost: "
6633 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6634 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6635 bitmap_print (file, ivs->cands, " candidates: ","\n");
6636
6637 for (i = 0; i < ivs->upto; i++)
6638 {
6639 struct iv_group *group = data->vgroups[i];
6640 class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6641 if (cp)
6642 fprintf (stream: file, format: " group:%d --> iv_cand:%d, cost=("
6643 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6644 cp->cost.cost, cp->cost.complexity);
6645 else
6646 fprintf (stream: file, format: " group:%d --> ??\n", group->id);
6647 }
6648
6649 const char *pref = "";
6650 fprintf (stream: file, format: " invariant variables: ");
6651 for (i = 1; i <= data->max_inv_var_id; i++)
6652 if (ivs->n_inv_var_uses[i])
6653 {
6654 fprintf (stream: file, format: "%s%d", pref, i);
6655 pref = ", ";
6656 }
6657
6658 pref = "";
6659 fprintf (stream: file, format: "\n invariant expressions: ");
6660 for (i = 1; i <= data->max_inv_expr_id; i++)
6661 if (ivs->n_inv_expr_uses[i])
6662 {
6663 fprintf (stream: file, format: "%s%d", pref, i);
6664 pref = ", ";
6665 }
6666
6667 fprintf (stream: file, format: "\n\n");
6668}
6669
6670/* Try changing candidate in IVS to CAND for each use. Return cost of the
6671 new set, and store differences in DELTA. Number of induction variables
6672 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6673 the function will try to find a solution with mimimal iv candidates. */
6674
6675static comp_cost
6676iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6677 struct iv_cand *cand, struct iv_ca_delta **delta,
6678 unsigned *n_ivs, bool min_ncand)
6679{
6680 unsigned i;
6681 comp_cost cost;
6682 struct iv_group *group;
6683 class cost_pair *old_cp, *new_cp;
6684
6685 *delta = NULL;
6686 for (i = 0; i < ivs->upto; i++)
6687 {
6688 group = data->vgroups[i];
6689 old_cp = iv_ca_cand_for_group (ivs, group);
6690
6691 if (old_cp
6692 && old_cp->cand == cand)
6693 continue;
6694
6695 new_cp = get_group_iv_cost (data, group, cand);
6696 if (!new_cp)
6697 continue;
6698
6699 if (!min_ncand)
6700 {
6701 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6702 /* Skip if new_cp depends on more invariants. */
6703 if (cmp_invs > 0)
6704 continue;
6705
6706 int cmp_cost = compare_cost_pair (a: new_cp, b: old_cp);
6707 /* Skip if new_cp is not cheaper. */
6708 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6709 continue;
6710 }
6711
6712 *delta = iv_ca_delta_add (group, old_cp, new_cp, next: *delta);
6713 }
6714
6715 iv_ca_delta_commit (data, ivs, delta: *delta, forward: true);
6716 cost = iv_ca_cost (ivs);
6717 if (n_ivs)
6718 *n_ivs = iv_ca_n_cands (ivs);
6719 iv_ca_delta_commit (data, ivs, delta: *delta, forward: false);
6720
6721 return cost;
6722}
6723
6724/* Try narrowing set IVS by removing CAND. Return the cost of
6725 the new set and store the differences in DELTA. START is
6726 the candidate with which we start narrowing. */
6727
6728static comp_cost
6729iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6730 struct iv_cand *cand, struct iv_cand *start,
6731 struct iv_ca_delta **delta)
6732{
6733 unsigned i, ci;
6734 struct iv_group *group;
6735 class cost_pair *old_cp, *new_cp, *cp;
6736 bitmap_iterator bi;
6737 struct iv_cand *cnd;
6738 comp_cost cost, best_cost, acost;
6739
6740 *delta = NULL;
6741 for (i = 0; i < data->vgroups.length (); i++)
6742 {
6743 group = data->vgroups[i];
6744
6745 old_cp = iv_ca_cand_for_group (ivs, group);
6746 if (old_cp->cand != cand)
6747 continue;
6748
6749 best_cost = iv_ca_cost (ivs);
6750 /* Start narrowing with START. */
6751 new_cp = get_group_iv_cost (data, group, cand: start);
6752
6753 if (data->consider_all_candidates)
6754 {
6755 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6756 {
6757 if (ci == cand->id || (start && ci == start->id))
6758 continue;
6759
6760 cnd = data->vcands[ci];
6761
6762 cp = get_group_iv_cost (data, group, cand: cnd);
6763 if (!cp)
6764 continue;
6765
6766 iv_ca_set_cp (data, ivs, group, cp);
6767 acost = iv_ca_cost (ivs);
6768
6769 if (acost < best_cost)
6770 {
6771 best_cost = acost;
6772 new_cp = cp;
6773 }
6774 }
6775 }
6776 else
6777 {
6778 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6779 {
6780 if (ci == cand->id || (start && ci == start->id))
6781 continue;
6782
6783 cnd = data->vcands[ci];
6784
6785 cp = get_group_iv_cost (data, group, cand: cnd);
6786 if (!cp)
6787 continue;
6788
6789 iv_ca_set_cp (data, ivs, group, cp);
6790 acost = iv_ca_cost (ivs);
6791
6792 if (acost < best_cost)
6793 {
6794 best_cost = acost;
6795 new_cp = cp;
6796 }
6797 }
6798 }
6799 /* Restore to old cp for use. */
6800 iv_ca_set_cp (data, ivs, group, cp: old_cp);
6801
6802 if (!new_cp)
6803 {
6804 iv_ca_delta_free (delta);
6805 return infinite_cost;
6806 }
6807
6808 *delta = iv_ca_delta_add (group, old_cp, new_cp, next: *delta);
6809 }
6810
6811 iv_ca_delta_commit (data, ivs, delta: *delta, forward: true);
6812 cost = iv_ca_cost (ivs);
6813 iv_ca_delta_commit (data, ivs, delta: *delta, forward: false);
6814
6815 return cost;
6816}
6817
6818/* Try optimizing the set of candidates IVS by removing candidates different
6819 from to EXCEPT_CAND from it. Return cost of the new set, and store
6820 differences in DELTA. */
6821
6822static comp_cost
6823iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6824 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6825{
6826 bitmap_iterator bi;
6827 struct iv_ca_delta *act_delta, *best_delta;
6828 unsigned i;
6829 comp_cost best_cost, acost;
6830 struct iv_cand *cand;
6831
6832 best_delta = NULL;
6833 best_cost = iv_ca_cost (ivs);
6834
6835 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6836 {
6837 cand = data->vcands[i];
6838
6839 if (cand == except_cand)
6840 continue;
6841
6842 acost = iv_ca_narrow (data, ivs, cand, start: except_cand, delta: &act_delta);
6843
6844 if (acost < best_cost)
6845 {
6846 best_cost = acost;
6847 iv_ca_delta_free (delta: &best_delta);
6848 best_delta = act_delta;
6849 }
6850 else
6851 iv_ca_delta_free (delta: &act_delta);
6852 }
6853
6854 if (!best_delta)
6855 {
6856 *delta = NULL;
6857 return best_cost;
6858 }
6859
6860 /* Recurse to possibly remove other unnecessary ivs. */
6861 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
6862 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6863 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: false);
6864 *delta = iv_ca_delta_join (l1: best_delta, l2: *delta);
6865 return best_cost;
6866}
6867
6868/* Check if CAND_IDX is a candidate other than OLD_CAND and has
6869 cheaper local cost for GROUP than BEST_CP. Return pointer to
6870 the corresponding cost_pair, otherwise just return BEST_CP. */
6871
6872static class cost_pair*
6873cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6874 unsigned int cand_idx, struct iv_cand *old_cand,
6875 class cost_pair *best_cp)
6876{
6877 struct iv_cand *cand;
6878 class cost_pair *cp;
6879
6880 gcc_assert (old_cand != NULL && best_cp != NULL);
6881 if (cand_idx == old_cand->id)
6882 return best_cp;
6883
6884 cand = data->vcands[cand_idx];
6885 cp = get_group_iv_cost (data, group, cand);
6886 if (cp != NULL && cheaper_cost_pair (a: cp, b: best_cp))
6887 return cp;
6888
6889 return best_cp;
6890}
6891
6892/* Try breaking local optimal fixed-point for IVS by replacing candidates
6893 which are used by more than one iv uses. For each of those candidates,
6894 this function tries to represent iv uses under that candidate using
6895 other ones with lower local cost, then tries to prune the new set.
6896 If the new set has lower cost, It returns the new cost after recording
6897 candidate replacement in list DELTA. */
6898
6899static comp_cost
6900iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6901 struct iv_ca_delta **delta)
6902{
6903 bitmap_iterator bi, bj;
6904 unsigned int i, j, k;
6905 struct iv_cand *cand;
6906 comp_cost orig_cost, acost;
6907 struct iv_ca_delta *act_delta, *tmp_delta;
6908 class cost_pair *old_cp, *best_cp = NULL;
6909
6910 *delta = NULL;
6911 orig_cost = iv_ca_cost (ivs);
6912
6913 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6914 {
6915 if (ivs->n_cand_uses[i] == 1
6916 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6917 continue;
6918
6919 cand = data->vcands[i];
6920
6921 act_delta = NULL;
6922 /* Represent uses under current candidate using other ones with
6923 lower local cost. */
6924 for (j = 0; j < ivs->upto; j++)
6925 {
6926 struct iv_group *group = data->vgroups[j];
6927 old_cp = iv_ca_cand_for_group (ivs, group);
6928
6929 if (old_cp->cand != cand)
6930 continue;
6931
6932 best_cp = old_cp;
6933 if (data->consider_all_candidates)
6934 for (k = 0; k < data->vcands.length (); k++)
6935 best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6936 old_cand: old_cp->cand, best_cp);
6937 else
6938 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6939 best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6940 old_cand: old_cp->cand, best_cp);
6941
6942 if (best_cp == old_cp)
6943 continue;
6944
6945 act_delta = iv_ca_delta_add (group, old_cp, new_cp: best_cp, next: act_delta);
6946 }
6947 /* No need for further prune. */
6948 if (!act_delta)
6949 continue;
6950
6951 /* Prune the new candidate set. */
6952 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
6953 acost = iv_ca_prune (data, ivs, NULL, delta: &tmp_delta);
6954 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
6955 act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
6956
6957 if (acost < orig_cost)
6958 {
6959 *delta = act_delta;
6960 return acost;
6961 }
6962 else
6963 iv_ca_delta_free (delta: &act_delta);
6964 }
6965
6966 return orig_cost;
6967}
6968
6969/* Tries to extend the sets IVS in the best possible way in order to
6970 express the GROUP. If ORIGINALP is true, prefer candidates from
6971 the original set of IVs, otherwise favor important candidates not
6972 based on any memory object. */
6973
6974static bool
6975try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6976 struct iv_group *group, bool originalp)
6977{
6978 comp_cost best_cost, act_cost;
6979 unsigned i;
6980 bitmap_iterator bi;
6981 struct iv_cand *cand;
6982 struct iv_ca_delta *best_delta = NULL, *act_delta;
6983 class cost_pair *cp;
6984
6985 iv_ca_add_group (data, ivs, group);
6986 best_cost = iv_ca_cost (ivs);
6987 cp = iv_ca_cand_for_group (ivs, group);
6988 if (cp)
6989 {
6990 best_delta = iv_ca_delta_add (group, NULL, new_cp: cp, NULL);
6991 iv_ca_set_no_cp (data, ivs, group);
6992 }
6993
6994 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6995 first try important candidates not based on any memory object. Only if
6996 this fails, try the specific ones. Rationale -- in loops with many
6997 variables the best choice often is to use just one generic biv. If we
6998 added here many ivs specific to the uses, the optimization algorithm later
6999 would be likely to get stuck in a local minimum, thus causing us to create
7000 too many ivs. The approach from few ivs to more seems more likely to be
7001 successful -- starting from few ivs, replacing an expensive use by a
7002 specific iv should always be a win. */
7003 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
7004 {
7005 cand = data->vcands[i];
7006
7007 if (originalp && cand->pos !=IP_ORIGINAL)
7008 continue;
7009
7010 if (!originalp && cand->iv->base_object != NULL_TREE)
7011 continue;
7012
7013 if (iv_ca_cand_used_p (ivs, cand))
7014 continue;
7015
7016 cp = get_group_iv_cost (data, group, cand);
7017 if (!cp)
7018 continue;
7019
7020 iv_ca_set_cp (data, ivs, group, cp);
7021 act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL,
7022 min_ncand: true);
7023 iv_ca_set_no_cp (data, ivs, group);
7024 act_delta = iv_ca_delta_add (group, NULL, new_cp: cp, next: act_delta);
7025
7026 if (act_cost < best_cost)
7027 {
7028 best_cost = act_cost;
7029
7030 iv_ca_delta_free (delta: &best_delta);
7031 best_delta = act_delta;
7032 }
7033 else
7034 iv_ca_delta_free (delta: &act_delta);
7035 }
7036
7037 if (best_cost.infinite_cost_p ())
7038 {
7039 for (i = 0; i < group->n_map_members; i++)
7040 {
7041 cp = group->cost_map + i;
7042 cand = cp->cand;
7043 if (!cand)
7044 continue;
7045
7046 /* Already tried this. */
7047 if (cand->important)
7048 {
7049 if (originalp && cand->pos == IP_ORIGINAL)
7050 continue;
7051 if (!originalp && cand->iv->base_object == NULL_TREE)
7052 continue;
7053 }
7054
7055 if (iv_ca_cand_used_p (ivs, cand))
7056 continue;
7057
7058 act_delta = NULL;
7059 iv_ca_set_cp (data, ivs, group, cp);
7060 act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL, min_ncand: true);
7061 iv_ca_set_no_cp (data, ivs, group);
7062 act_delta = iv_ca_delta_add (group,
7063 old_cp: iv_ca_cand_for_group (ivs, group),
7064 new_cp: cp, next: act_delta);
7065
7066 if (act_cost < best_cost)
7067 {
7068 best_cost = act_cost;
7069
7070 if (best_delta)
7071 iv_ca_delta_free (delta: &best_delta);
7072 best_delta = act_delta;
7073 }
7074 else
7075 iv_ca_delta_free (delta: &act_delta);
7076 }
7077 }
7078
7079 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7080 iv_ca_delta_free (delta: &best_delta);
7081
7082 return !best_cost.infinite_cost_p ();
7083}
7084
7085/* Finds an initial assignment of candidates to uses. */
7086
7087static class iv_ca *
7088get_initial_solution (struct ivopts_data *data, bool originalp)
7089{
7090 unsigned i;
7091 class iv_ca *ivs = iv_ca_new (data);
7092
7093 for (i = 0; i < data->vgroups.length (); i++)
7094 if (!try_add_cand_for (data, ivs, group: data->vgroups[i], originalp))
7095 {
7096 iv_ca_free (ivs: &ivs);
7097 return NULL;
7098 }
7099
7100 return ivs;
7101}
7102
7103/* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7104 points to a bool variable, this function tries to break local
7105 optimal fixed-point by replacing candidates in IVS if it's true. */
7106
7107static bool
7108try_improve_iv_set (struct ivopts_data *data,
7109 class iv_ca *ivs, bool *try_replace_p)
7110{
7111 unsigned i, n_ivs;
7112 comp_cost acost, best_cost = iv_ca_cost (ivs);
7113 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7114 struct iv_cand *cand;
7115
7116 /* Try extending the set of induction variables by one. */
7117 for (i = 0; i < data->vcands.length (); i++)
7118 {
7119 cand = data->vcands[i];
7120
7121 if (iv_ca_cand_used_p (ivs, cand))
7122 continue;
7123
7124 acost = iv_ca_extend (data, ivs, cand, delta: &act_delta, n_ivs: &n_ivs, min_ncand: false);
7125 if (!act_delta)
7126 continue;
7127
7128 /* If we successfully added the candidate and the set is small enough,
7129 try optimizing it by removing other candidates. */
7130 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7131 {
7132 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
7133 acost = iv_ca_prune (data, ivs, except_cand: cand, delta: &tmp_delta);
7134 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
7135 act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
7136 }
7137
7138 if (acost < best_cost)
7139 {
7140 best_cost = acost;
7141 iv_ca_delta_free (delta: &best_delta);
7142 best_delta = act_delta;
7143 }
7144 else
7145 iv_ca_delta_free (delta: &act_delta);
7146 }
7147
7148 if (!best_delta)
7149 {
7150 /* Try removing the candidates from the set instead. */
7151 best_cost = iv_ca_prune (data, ivs, NULL, delta: &best_delta);
7152
7153 if (!best_delta && *try_replace_p)
7154 {
7155 *try_replace_p = false;
7156 /* So far candidate selecting algorithm tends to choose fewer IVs
7157 so that it can handle cases in which loops have many variables
7158 but the best choice is often to use only one general biv. One
7159 weakness is it can't handle opposite cases, in which different
7160 candidates should be chosen with respect to each use. To solve
7161 the problem, we replace candidates in a manner described by the
7162 comments of iv_ca_replace, thus give general algorithm a chance
7163 to break local optimal fixed-point in these cases. */
7164 best_cost = iv_ca_replace (data, ivs, delta: &best_delta);
7165 }
7166
7167 if (!best_delta)
7168 return false;
7169 }
7170
7171 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7172 iv_ca_delta_free (delta: &best_delta);
7173 return best_cost == iv_ca_cost (ivs);
7174}
7175
7176/* Attempts to find the optimal set of induction variables. We do simple
7177 greedy heuristic -- we try to replace at most one candidate in the selected
7178 solution and remove the unused ivs while this improves the cost. */
7179
7180static class iv_ca *
7181find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7182{
7183 class iv_ca *set;
7184 bool try_replace_p = true;
7185
7186 /* Get the initial solution. */
7187 set = get_initial_solution (data, originalp);
7188 if (!set)
7189 {
7190 if (dump_file && (dump_flags & TDF_DETAILS))
7191 fprintf (stream: dump_file, format: "Unable to substitute for ivs, failed.\n");
7192 return NULL;
7193 }
7194
7195 if (dump_file && (dump_flags & TDF_DETAILS))
7196 {
7197 fprintf (stream: dump_file, format: "Initial set of candidates:\n");
7198 iv_ca_dump (data, file: dump_file, ivs: set);
7199 }
7200
7201 while (try_improve_iv_set (data, ivs: set, try_replace_p: &try_replace_p))
7202 {
7203 if (dump_file && (dump_flags & TDF_DETAILS))
7204 {
7205 fprintf (stream: dump_file, format: "Improved to:\n");
7206 iv_ca_dump (data, file: dump_file, ivs: set);
7207 }
7208 }
7209
7210 /* If the set has infinite_cost, it can't be optimal. */
7211 if (iv_ca_cost (ivs: set).infinite_cost_p ())
7212 {
7213 if (dump_file && (dump_flags & TDF_DETAILS))
7214 fprintf (stream: dump_file,
7215 format: "Overflow to infinite cost in try_improve_iv_set.\n");
7216 iv_ca_free (ivs: &set);
7217 }
7218 return set;
7219}
7220
7221static class iv_ca *
7222find_optimal_iv_set (struct ivopts_data *data)
7223{
7224 unsigned i;
7225 comp_cost cost, origcost;
7226 class iv_ca *set, *origset;
7227
7228 /* Determine the cost based on a strategy that starts with original IVs,
7229 and try again using a strategy that prefers candidates not based
7230 on any IVs. */
7231 origset = find_optimal_iv_set_1 (data, originalp: true);
7232 set = find_optimal_iv_set_1 (data, originalp: false);
7233
7234 if (!origset && !set)
7235 return NULL;
7236
7237 origcost = origset ? iv_ca_cost (ivs: origset) : infinite_cost;
7238 cost = set ? iv_ca_cost (ivs: set) : infinite_cost;
7239
7240 if (dump_file && (dump_flags & TDF_DETAILS))
7241 {
7242 fprintf (stream: dump_file, format: "Original cost %" PRId64 " (complexity %d)\n\n",
7243 origcost.cost, origcost.complexity);
7244 fprintf (stream: dump_file, format: "Final cost %" PRId64 " (complexity %d)\n\n",
7245 cost.cost, cost.complexity);
7246 }
7247
7248 /* Choose the one with the best cost. */
7249 if (origcost <= cost)
7250 {
7251 if (set)
7252 iv_ca_free (ivs: &set);
7253 set = origset;
7254 }
7255 else if (origset)
7256 iv_ca_free (ivs: &origset);
7257
7258 for (i = 0; i < data->vgroups.length (); i++)
7259 {
7260 struct iv_group *group = data->vgroups[i];
7261 group->selected = iv_ca_cand_for_group (ivs: set, group)->cand;
7262 }
7263
7264 return set;
7265}
7266
7267/* Creates a new induction variable corresponding to CAND. */
7268
7269static void
7270create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7271{
7272 gimple_stmt_iterator incr_pos;
7273 tree base;
7274 struct iv_use *use;
7275 struct iv_group *group;
7276 bool after = false;
7277
7278 gcc_assert (cand->iv != NULL);
7279
7280 switch (cand->pos)
7281 {
7282 case IP_NORMAL:
7283 incr_pos = gsi_last_bb (bb: ip_normal_pos (data->current_loop));
7284 break;
7285
7286 case IP_END:
7287 incr_pos = gsi_last_bb (bb: ip_end_pos (data->current_loop));
7288 after = true;
7289 if (!gsi_end_p (i: incr_pos) && stmt_ends_bb_p (gsi_stmt (i: incr_pos)))
7290 {
7291 edge e = find_edge (gsi_bb (i: incr_pos), data->current_loop->header);
7292 incr_pos = gsi_after_labels (bb: split_edge (e));
7293 after = false;
7294 }
7295 break;
7296
7297 case IP_AFTER_USE:
7298 after = true;
7299 /* fall through */
7300 case IP_BEFORE_USE:
7301 incr_pos = gsi_for_stmt (cand->incremented_at);
7302 break;
7303
7304 case IP_ORIGINAL:
7305 /* Mark that the iv is preserved. */
7306 name_info (data, name: cand->var_before)->preserve_biv = true;
7307 name_info (data, name: cand->var_after)->preserve_biv = true;
7308
7309 /* Rewrite the increment so that it uses var_before directly. */
7310 use = find_interesting_uses_op (data, op: cand->var_after);
7311 group = data->vgroups[use->group_id];
7312 group->selected = cand;
7313 return;
7314 }
7315
7316 gimple_add_tmp_var (cand->var_before);
7317
7318 base = unshare_expr (cand->iv->base);
7319
7320 create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7321 cand->var_before, data->current_loop,
7322 &incr_pos, after, &cand->var_before, &cand->var_after);
7323}
7324
7325/* Creates new induction variables described in SET. */
7326
7327static void
7328create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7329{
7330 unsigned i;
7331 struct iv_cand *cand;
7332 bitmap_iterator bi;
7333
7334 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7335 {
7336 cand = data->vcands[i];
7337 create_new_iv (data, cand);
7338 }
7339
7340 if (dump_file && (dump_flags & TDF_DETAILS))
7341 {
7342 fprintf (stream: dump_file, format: "Selected IV set for loop %d",
7343 data->current_loop->num);
7344 if (data->loop_loc != UNKNOWN_LOCATION)
7345 fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
7346 LOCATION_LINE (data->loop_loc));
7347 fprintf (stream: dump_file, format: ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7348 avg_loop_niter (loop: data->current_loop));
7349 fprintf (stream: dump_file, format: ", %lu IVs:\n", bitmap_count_bits (set->cands));
7350 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7351 {
7352 cand = data->vcands[i];
7353 dump_cand (file: dump_file, cand);
7354 }
7355 fprintf (stream: dump_file, format: "\n");
7356 }
7357}
7358
7359/* Rewrites USE (definition of iv used in a nonlinear expression)
7360 using candidate CAND. */
7361
7362static void
7363rewrite_use_nonlinear_expr (struct ivopts_data *data,
7364 struct iv_use *use, struct iv_cand *cand)
7365{
7366 gassign *ass;
7367 gimple_stmt_iterator bsi;
7368 tree comp, type = get_use_type (use), tgt;
7369
7370 /* An important special case -- if we are asked to express value of
7371 the original iv by itself, just exit; there is no need to
7372 introduce a new computation (that might also need casting the
7373 variable to unsigned and back). */
7374 if (cand->pos == IP_ORIGINAL
7375 && cand->incremented_at == use->stmt)
7376 {
7377 tree op = NULL_TREE;
7378 enum tree_code stmt_code;
7379
7380 gcc_assert (is_gimple_assign (use->stmt));
7381 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7382
7383 /* Check whether we may leave the computation unchanged.
7384 This is the case only if it does not rely on other
7385 computations in the loop -- otherwise, the computation
7386 we rely upon may be removed in remove_unused_ivs,
7387 thus leading to ICE. */
7388 stmt_code = gimple_assign_rhs_code (gs: use->stmt);
7389 if (stmt_code == PLUS_EXPR
7390 || stmt_code == MINUS_EXPR
7391 || stmt_code == POINTER_PLUS_EXPR)
7392 {
7393 if (gimple_assign_rhs1 (gs: use->stmt) == cand->var_before)
7394 op = gimple_assign_rhs2 (gs: use->stmt);
7395 else if (gimple_assign_rhs2 (gs: use->stmt) == cand->var_before)
7396 op = gimple_assign_rhs1 (gs: use->stmt);
7397 }
7398
7399 if (op != NULL_TREE)
7400 {
7401 if (expr_invariant_in_loop_p (loop: data->current_loop, expr: op))
7402 return;
7403 if (TREE_CODE (op) == SSA_NAME)
7404 {
7405 struct iv *iv = get_iv (data, var: op);
7406 if (iv != NULL && integer_zerop (iv->step))
7407 return;
7408 }
7409 }
7410 }
7411
7412 switch (gimple_code (g: use->stmt))
7413 {
7414 case GIMPLE_PHI:
7415 tgt = PHI_RESULT (use->stmt);
7416
7417 /* If we should keep the biv, do not replace it. */
7418 if (name_info (data, name: tgt)->preserve_biv)
7419 return;
7420
7421 bsi = gsi_after_labels (bb: gimple_bb (g: use->stmt));
7422 break;
7423
7424 case GIMPLE_ASSIGN:
7425 tgt = gimple_assign_lhs (gs: use->stmt);
7426 bsi = gsi_for_stmt (use->stmt);
7427 break;
7428
7429 default:
7430 gcc_unreachable ();
7431 }
7432
7433 aff_tree aff_inv, aff_var;
7434 if (!get_computation_aff_1 (loop: data->current_loop, at: use->stmt,
7435 use, cand, aff_inv: &aff_inv, aff_var: &aff_var))
7436 gcc_unreachable ();
7437
7438 unshare_aff_combination (&aff_inv);
7439 unshare_aff_combination (&aff_var);
7440 /* Prefer CSE opportunity than loop invariant by adding offset at last
7441 so that iv_uses have different offsets can be CSEed. */
7442 poly_widest_int offset = aff_inv.offset;
7443 aff_inv.offset = 0;
7444
7445 gimple_seq stmt_list = NULL, seq = NULL;
7446 tree comp_op1 = aff_combination_to_tree (&aff_inv);
7447 tree comp_op2 = aff_combination_to_tree (&aff_var);
7448 gcc_assert (comp_op1 && comp_op2);
7449
7450 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7451 gimple_seq_add_seq (&stmt_list, seq);
7452 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7453 gimple_seq_add_seq (&stmt_list, seq);
7454
7455 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7456 std::swap (a&: comp_op1, b&: comp_op2);
7457
7458 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7459 {
7460 comp = fold_build_pointer_plus (comp_op1,
7461 fold_convert (sizetype, comp_op2));
7462 comp = fold_build_pointer_plus (comp,
7463 wide_int_to_tree (sizetype, offset));
7464 }
7465 else
7466 {
7467 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7468 fold_convert (TREE_TYPE (comp_op1), comp_op2));
7469 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7470 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7471 }
7472
7473 comp = fold_convert (type, comp);
7474 comp = force_gimple_operand (comp, &seq, false, NULL);
7475 gimple_seq_add_seq (&stmt_list, seq);
7476 if (gimple_code (g: use->stmt) != GIMPLE_PHI
7477 /* We can't allow re-allocating the stmt as it might be pointed
7478 to still. */
7479 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7480 >= gimple_num_ops (gs: gsi_stmt (i: bsi))))
7481 {
7482 comp = force_gimple_operand (comp, &seq, true, NULL);
7483 gimple_seq_add_seq (&stmt_list, seq);
7484 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7485 {
7486 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7487 /* As this isn't a plain copy we have to reset alignment
7488 information. */
7489 if (SSA_NAME_PTR_INFO (comp))
7490 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7491 }
7492 }
7493
7494 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7495 if (gimple_code (g: use->stmt) == GIMPLE_PHI)
7496 {
7497 ass = gimple_build_assign (tgt, comp);
7498 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7499
7500 bsi = gsi_for_stmt (use->stmt);
7501 remove_phi_node (&bsi, false);
7502 }
7503 else
7504 {
7505 gimple_assign_set_rhs_from_tree (&bsi, comp);
7506 use->stmt = gsi_stmt (i: bsi);
7507 }
7508}
7509
7510/* Performs a peephole optimization to reorder the iv update statement with
7511 a mem ref to enable instruction combining in later phases. The mem ref uses
7512 the iv value before the update, so the reordering transformation requires
7513 adjustment of the offset. CAND is the selected IV_CAND.
7514
7515 Example:
7516
7517 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7518 iv2 = iv1 + 1;
7519
7520 if (t < val) (1)
7521 goto L;
7522 goto Head;
7523
7524
7525 directly propagating t over to (1) will introduce overlapping live range
7526 thus increase register pressure. This peephole transform it into:
7527
7528
7529 iv2 = iv1 + 1;
7530 t = MEM_REF (base, iv2, 8, 8);
7531 if (t < val)
7532 goto L;
7533 goto Head;
7534*/
7535
7536static void
7537adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7538{
7539 tree var_after;
7540 gimple *iv_update, *stmt;
7541 basic_block bb;
7542 gimple_stmt_iterator gsi, gsi_iv;
7543
7544 if (cand->pos != IP_NORMAL)
7545 return;
7546
7547 var_after = cand->var_after;
7548 iv_update = SSA_NAME_DEF_STMT (var_after);
7549
7550 bb = gimple_bb (g: iv_update);
7551 gsi = gsi_last_nondebug_bb (bb);
7552 stmt = gsi_stmt (i: gsi);
7553
7554 /* Only handle conditional statement for now. */
7555 if (gimple_code (g: stmt) != GIMPLE_COND)
7556 return;
7557
7558 gsi_prev_nondebug (i: &gsi);
7559 stmt = gsi_stmt (i: gsi);
7560 if (stmt != iv_update)
7561 return;
7562
7563 gsi_prev_nondebug (i: &gsi);
7564 if (gsi_end_p (i: gsi))
7565 return;
7566
7567 stmt = gsi_stmt (i: gsi);
7568 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
7569 return;
7570
7571 if (stmt != use->stmt)
7572 return;
7573
7574 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7575 return;
7576
7577 if (dump_file && (dump_flags & TDF_DETAILS))
7578 {
7579 fprintf (stream: dump_file, format: "Reordering \n");
7580 print_gimple_stmt (dump_file, iv_update, 0);
7581 print_gimple_stmt (dump_file, use->stmt, 0);
7582 fprintf (stream: dump_file, format: "\n");
7583 }
7584
7585 gsi = gsi_for_stmt (use->stmt);
7586 gsi_iv = gsi_for_stmt (iv_update);
7587 gsi_move_before (&gsi_iv, &gsi);
7588
7589 cand->pos = IP_BEFORE_USE;
7590 cand->incremented_at = use->stmt;
7591}
7592
7593/* Return the alias pointer type that should be used for a MEM_REF
7594 associated with USE, which has type USE_PTR_ADDRESS. */
7595
7596static tree
7597get_alias_ptr_type_for_ptr_address (iv_use *use)
7598{
7599 gcall *call = as_a <gcall *> (p: use->stmt);
7600 switch (gimple_call_internal_fn (gs: call))
7601 {
7602 case IFN_MASK_LOAD:
7603 case IFN_MASK_STORE:
7604 case IFN_MASK_LOAD_LANES:
7605 case IFN_MASK_STORE_LANES:
7606 case IFN_MASK_LEN_LOAD_LANES:
7607 case IFN_MASK_LEN_STORE_LANES:
7608 case IFN_LEN_LOAD:
7609 case IFN_LEN_STORE:
7610 case IFN_MASK_LEN_LOAD:
7611 case IFN_MASK_LEN_STORE:
7612 /* The second argument contains the correct alias type. */
7613 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7614 return TREE_TYPE (gimple_call_arg (call, 1));
7615
7616 default:
7617 gcc_unreachable ();
7618 }
7619}
7620
7621
7622/* Rewrites USE (address that is an iv) using candidate CAND. */
7623
7624static void
7625rewrite_use_address (struct ivopts_data *data,
7626 struct iv_use *use, struct iv_cand *cand)
7627{
7628 aff_tree aff;
7629 bool ok;
7630
7631 adjust_iv_update_pos (cand, use);
7632 ok = get_computation_aff (loop: data->current_loop, at: use->stmt, use, cand, aff: &aff);
7633 gcc_assert (ok);
7634 unshare_aff_combination (&aff);
7635
7636 /* To avoid undefined overflow problems, all IV candidates use unsigned
7637 integer types. The drawback is that this makes it impossible for
7638 create_mem_ref to distinguish an IV that is based on a memory object
7639 from one that represents simply an offset.
7640
7641 To work around this problem, we pass a hint to create_mem_ref that
7642 indicates which variable (if any) in aff is an IV based on a memory
7643 object. Note that we only consider the candidate. If this is not
7644 based on an object, the base of the reference is in some subexpression
7645 of the use -- but these will use pointer types, so they are recognized
7646 by the create_mem_ref heuristics anyway. */
7647 tree iv = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7648 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7649 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7650 tree type = use->mem_type;
7651 tree alias_ptr_type;
7652 if (use->type == USE_PTR_ADDRESS)
7653 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7654 else
7655 {
7656 gcc_assert (type == TREE_TYPE (*use->op_p));
7657 unsigned int align = get_object_alignment (*use->op_p);
7658 if (align != TYPE_ALIGN (type))
7659 type = build_aligned_type (type, align);
7660 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7661 }
7662 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7663 iv, base_hint, data->speed);
7664
7665 if (use->type == USE_PTR_ADDRESS)
7666 {
7667 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7668 ref = fold_convert (get_use_type (use), ref);
7669 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7670 true, GSI_SAME_STMT);
7671 }
7672 else
7673 {
7674 /* When we end up confused enough and have no suitable base but
7675 stuffed everything to index2 use a LEA for the address and
7676 create a plain MEM_REF to avoid basing a memory reference
7677 on address zero which create_mem_ref_raw does as fallback. */
7678 if (TREE_CODE (ref) == TARGET_MEM_REF
7679 && TMR_INDEX2 (ref) != NULL_TREE
7680 && integer_zerop (TREE_OPERAND (ref, 0)))
7681 {
7682 ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7683 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7684 true, GSI_SAME_STMT);
7685 ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7686 }
7687 copy_ref_info (ref, *use->op_p);
7688 }
7689
7690 *use->op_p = ref;
7691}
7692
7693/* Rewrites USE (the condition such that one of the arguments is an iv) using
7694 candidate CAND. */
7695
7696static void
7697rewrite_use_compare (struct ivopts_data *data,
7698 struct iv_use *use, struct iv_cand *cand)
7699{
7700 tree comp, op, bound;
7701 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7702 enum tree_code compare;
7703 struct iv_group *group = data->vgroups[use->group_id];
7704 class cost_pair *cp = get_group_iv_cost (data, group, cand);
7705
7706 bound = cp->value;
7707 if (bound)
7708 {
7709 tree var = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7710 tree var_type = TREE_TYPE (var);
7711 gimple_seq stmts;
7712
7713 if (dump_file && (dump_flags & TDF_DETAILS))
7714 {
7715 fprintf (stream: dump_file, format: "Replacing exit test: ");
7716 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7717 }
7718 compare = cp->comp;
7719 bound = unshare_expr (fold_convert (var_type, bound));
7720 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7721 if (stmts)
7722 gsi_insert_seq_on_edge_immediate (
7723 loop_preheader_edge (data->current_loop),
7724 stmts);
7725
7726 gcond *cond_stmt = as_a <gcond *> (p: use->stmt);
7727 gimple_cond_set_lhs (gs: cond_stmt, lhs: var);
7728 gimple_cond_set_code (gs: cond_stmt, code: compare);
7729 gimple_cond_set_rhs (gs: cond_stmt, rhs: op);
7730 return;
7731 }
7732
7733 /* The induction variable elimination failed; just express the original
7734 giv. */
7735 comp = get_computation_at (loop: data->current_loop, at: use->stmt, use, cand);
7736 gcc_assert (comp != NULL_TREE);
7737 gcc_assert (use->op_p != NULL);
7738 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7739 SSA_NAME_VAR (*use->op_p),
7740 true, GSI_SAME_STMT);
7741}
7742
7743/* Rewrite the groups using the selected induction variables. */
7744
7745static void
7746rewrite_groups (struct ivopts_data *data)
7747{
7748 unsigned i, j;
7749
7750 for (i = 0; i < data->vgroups.length (); i++)
7751 {
7752 struct iv_group *group = data->vgroups[i];
7753 struct iv_cand *cand = group->selected;
7754
7755 gcc_assert (cand);
7756
7757 if (group->type == USE_NONLINEAR_EXPR)
7758 {
7759 for (j = 0; j < group->vuses.length (); j++)
7760 {
7761 rewrite_use_nonlinear_expr (data, use: group->vuses[j], cand);
7762 update_stmt (s: group->vuses[j]->stmt);
7763 }
7764 }
7765 else if (address_p (type: group->type))
7766 {
7767 for (j = 0; j < group->vuses.length (); j++)
7768 {
7769 rewrite_use_address (data, use: group->vuses[j], cand);
7770 update_stmt (s: group->vuses[j]->stmt);
7771 }
7772 }
7773 else
7774 {
7775 gcc_assert (group->type == USE_COMPARE);
7776
7777 for (j = 0; j < group->vuses.length (); j++)
7778 {
7779 rewrite_use_compare (data, use: group->vuses[j], cand);
7780 update_stmt (s: group->vuses[j]->stmt);
7781 }
7782 }
7783 }
7784}
7785
7786/* Removes the ivs that are not used after rewriting. */
7787
7788static void
7789remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7790{
7791 unsigned j;
7792 bitmap_iterator bi;
7793
7794 /* Figure out an order in which to release SSA DEFs so that we don't
7795 release something that we'd have to propagate into a debug stmt
7796 afterwards. */
7797 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7798 {
7799 struct version_info *info;
7800
7801 info = ver_info (data, ver: j);
7802 if (info->iv
7803 && !integer_zerop (info->iv->step)
7804 && !info->inv_id
7805 && !info->iv->nonlin_use
7806 && !info->preserve_biv)
7807 {
7808 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7809
7810 tree def = info->iv->ssa_name;
7811
7812 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7813 {
7814 imm_use_iterator imm_iter;
7815 use_operand_p use_p;
7816 gimple *stmt;
7817 int count = 0;
7818
7819 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7820 {
7821 if (!gimple_debug_bind_p (s: stmt))
7822 continue;
7823
7824 /* We just want to determine whether to do nothing
7825 (count == 0), to substitute the computed
7826 expression into a single use of the SSA DEF by
7827 itself (count == 1), or to use a debug temp
7828 because the SSA DEF is used multiple times or as
7829 part of a larger expression (count > 1). */
7830 count++;
7831 if (gimple_debug_bind_get_value (dbg: stmt) != def)
7832 count++;
7833
7834 if (count > 1)
7835 break;
7836 }
7837
7838 if (!count)
7839 continue;
7840
7841 struct iv_use dummy_use;
7842 struct iv_cand *best_cand = NULL, *cand;
7843 unsigned i, best_pref = 0, cand_pref;
7844 tree comp = NULL_TREE;
7845
7846 memset (s: &dummy_use, c: 0, n: sizeof (dummy_use));
7847 dummy_use.iv = info->iv;
7848 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7849 {
7850 cand = data->vgroups[i]->selected;
7851 if (cand == best_cand)
7852 continue;
7853 cand_pref = operand_equal_p (cand->iv->step,
7854 info->iv->step, flags: 0)
7855 ? 4 : 0;
7856 cand_pref
7857 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7858 == TYPE_MODE (TREE_TYPE (info->iv->base))
7859 ? 2 : 0;
7860 cand_pref
7861 += TREE_CODE (cand->iv->base) == INTEGER_CST
7862 ? 1 : 0;
7863 if (best_cand == NULL || best_pref < cand_pref)
7864 {
7865 tree this_comp
7866 = get_debug_computation_at (loop: data->current_loop,
7867 SSA_NAME_DEF_STMT (def),
7868 use: &dummy_use, cand);
7869 if (this_comp)
7870 {
7871 best_cand = cand;
7872 best_pref = cand_pref;
7873 comp = this_comp;
7874 }
7875 }
7876 }
7877
7878 if (!best_cand)
7879 continue;
7880
7881 comp = unshare_expr (comp);
7882 if (count > 1)
7883 {
7884 tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7885 /* FIXME: Is setting the mode really necessary? */
7886 if (SSA_NAME_VAR (def))
7887 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7888 else
7889 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7890 gdebug *def_temp
7891 = gimple_build_debug_bind (vexpr, comp, NULL);
7892 gimple_stmt_iterator gsi;
7893
7894 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7895 gsi = gsi_after_labels (bb: gimple_bb
7896 (SSA_NAME_DEF_STMT (def)));
7897 else
7898 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7899
7900 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7901 comp = vexpr;
7902 }
7903
7904 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7905 {
7906 if (!gimple_debug_bind_p (s: stmt))
7907 continue;
7908
7909 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7910 SET_USE (use_p, comp);
7911
7912 update_stmt (s: stmt);
7913 }
7914 }
7915 }
7916 }
7917}
7918
7919/* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7920 for hash_map::traverse. */
7921
7922bool
7923free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7924{
7925 if (value)
7926 {
7927 value->~tree_niter_desc ();
7928 free (ptr: value);
7929 }
7930 return true;
7931}
7932
7933/* Frees data allocated by the optimization of a single loop. */
7934
7935static void
7936free_loop_data (struct ivopts_data *data)
7937{
7938 unsigned i, j;
7939 bitmap_iterator bi;
7940 tree obj;
7941
7942 if (data->niters)
7943 {
7944 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7945 delete data->niters;
7946 data->niters = NULL;
7947 }
7948
7949 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7950 {
7951 struct version_info *info;
7952
7953 info = ver_info (data, ver: i);
7954 info->iv = NULL;
7955 info->has_nonlin_use = false;
7956 info->preserve_biv = false;
7957 info->inv_id = 0;
7958 }
7959 bitmap_clear (data->relevant);
7960 bitmap_clear (data->important_candidates);
7961
7962 for (i = 0; i < data->vgroups.length (); i++)
7963 {
7964 struct iv_group *group = data->vgroups[i];
7965
7966 for (j = 0; j < group->vuses.length (); j++)
7967 free (ptr: group->vuses[j]);
7968 group->vuses.release ();
7969
7970 BITMAP_FREE (group->related_cands);
7971 for (j = 0; j < group->n_map_members; j++)
7972 {
7973 if (group->cost_map[j].inv_vars)
7974 BITMAP_FREE (group->cost_map[j].inv_vars);
7975 if (group->cost_map[j].inv_exprs)
7976 BITMAP_FREE (group->cost_map[j].inv_exprs);
7977 }
7978
7979 free (ptr: group->cost_map);
7980 free (ptr: group);
7981 }
7982 data->vgroups.truncate (size: 0);
7983
7984 for (i = 0; i < data->vcands.length (); i++)
7985 {
7986 struct iv_cand *cand = data->vcands[i];
7987
7988 if (cand->inv_vars)
7989 BITMAP_FREE (cand->inv_vars);
7990 if (cand->inv_exprs)
7991 BITMAP_FREE (cand->inv_exprs);
7992 free (ptr: cand);
7993 }
7994 data->vcands.truncate (size: 0);
7995
7996 if (data->version_info_size < num_ssa_names)
7997 {
7998 data->version_info_size = 2 * num_ssa_names;
7999 free (ptr: data->version_info);
8000 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8001 }
8002
8003 data->max_inv_var_id = 0;
8004 data->max_inv_expr_id = 0;
8005
8006 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
8007 SET_DECL_RTL (obj, NULL_RTX);
8008
8009 decl_rtl_to_reset.truncate (size: 0);
8010
8011 data->inv_expr_tab->empty ();
8012
8013 data->iv_common_cand_tab->empty ();
8014 data->iv_common_cands.truncate (size: 0);
8015}
8016
8017/* Finalizes data structures used by the iv optimization pass. LOOPS is the
8018 loop tree. */
8019
8020static void
8021tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8022{
8023 free_loop_data (data);
8024 free (ptr: data->version_info);
8025 BITMAP_FREE (data->relevant);
8026 BITMAP_FREE (data->important_candidates);
8027
8028 decl_rtl_to_reset.release ();
8029 data->vgroups.release ();
8030 data->vcands.release ();
8031 delete data->inv_expr_tab;
8032 data->inv_expr_tab = NULL;
8033 free_affine_expand_cache (&data->name_expansion_cache);
8034 if (data->base_object_map)
8035 delete data->base_object_map;
8036 delete data->iv_common_cand_tab;
8037 data->iv_common_cand_tab = NULL;
8038 data->iv_common_cands.release ();
8039 obstack_free (&data->iv_obstack, NULL);
8040}
8041
8042/* Returns true if the loop body BODY includes any function calls. */
8043
8044static bool
8045loop_body_includes_call (basic_block *body, unsigned num_nodes)
8046{
8047 gimple_stmt_iterator gsi;
8048 unsigned i;
8049
8050 for (i = 0; i < num_nodes; i++)
8051 for (gsi = gsi_start_bb (bb: body[i]); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
8052 {
8053 gimple *stmt = gsi_stmt (i: gsi);
8054 if (is_gimple_call (gs: stmt)
8055 && !gimple_call_internal_p (gs: stmt)
8056 && !is_inexpensive_builtin (gimple_call_fndecl (gs: stmt)))
8057 return true;
8058 }
8059 return false;
8060}
8061
8062/* Determine cost scaling factor for basic blocks in loop. */
8063#define COST_SCALING_FACTOR_BOUND (20)
8064
8065static void
8066determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8067{
8068 int lfreq = data->current_loop->header->count.to_frequency (cfun);
8069 if (!data->speed || lfreq <= 0)
8070 return;
8071
8072 int max_freq = lfreq;
8073 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8074 {
8075 body[i]->aux = (void *)(intptr_t) 1;
8076 if (max_freq < body[i]->count.to_frequency (cfun))
8077 max_freq = body[i]->count.to_frequency (cfun);
8078 }
8079 if (max_freq > lfreq)
8080 {
8081 int divisor, factor;
8082 /* Check if scaling factor itself needs to be scaled by the bound. This
8083 is to avoid overflow when scaling cost according to profile info. */
8084 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8085 {
8086 divisor = max_freq;
8087 factor = COST_SCALING_FACTOR_BOUND;
8088 }
8089 else
8090 {
8091 divisor = lfreq;
8092 factor = 1;
8093 }
8094 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8095 {
8096 int bfreq = body[i]->count.to_frequency (cfun);
8097 if (bfreq <= lfreq)
8098 continue;
8099
8100 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8101 }
8102 }
8103}
8104
8105/* Find doloop comparison use and set its doloop_p on if found. */
8106
8107static bool
8108find_doloop_use (struct ivopts_data *data)
8109{
8110 struct loop *loop = data->current_loop;
8111
8112 for (unsigned i = 0; i < data->vgroups.length (); i++)
8113 {
8114 struct iv_group *group = data->vgroups[i];
8115 if (group->type == USE_COMPARE)
8116 {
8117 gcc_assert (group->vuses.length () == 1);
8118 struct iv_use *use = group->vuses[0];
8119 gimple *stmt = use->stmt;
8120 if (gimple_code (g: stmt) == GIMPLE_COND)
8121 {
8122 basic_block bb = gimple_bb (g: stmt);
8123 edge true_edge, false_edge;
8124 extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8125 /* This comparison is used for loop latch. Require latch is empty
8126 for now. */
8127 if ((loop->latch == true_edge->dest
8128 || loop->latch == false_edge->dest)
8129 && empty_block_p (loop->latch))
8130 {
8131 group->doloop_p = true;
8132 if (dump_file && (dump_flags & TDF_DETAILS))
8133 {
8134 fprintf (stream: dump_file, format: "Doloop cmp iv use: ");
8135 print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8136 }
8137 return true;
8138 }
8139 }
8140 }
8141 }
8142
8143 return false;
8144}
8145
8146/* For the targets which support doloop, to predict whether later RTL doloop
8147 transformation will perform on this loop, further detect the doloop use and
8148 mark the flag doloop_use_p if predicted. */
8149
8150void
8151analyze_and_mark_doloop_use (struct ivopts_data *data)
8152{
8153 data->doloop_use_p = false;
8154
8155 if (!flag_branch_on_count_reg)
8156 return;
8157
8158 if (data->current_loop->unroll == USHRT_MAX)
8159 return;
8160
8161 if (!generic_predict_doloop_p (data))
8162 return;
8163
8164 if (find_doloop_use (data))
8165 {
8166 data->doloop_use_p = true;
8167 if (dump_file && (dump_flags & TDF_DETAILS))
8168 {
8169 struct loop *loop = data->current_loop;
8170 fprintf (stream: dump_file,
8171 format: "Predict loop %d can perform"
8172 " doloop optimization later.\n",
8173 loop->num);
8174 flow_loop_dump (loop, dump_file, NULL, 1);
8175 }
8176 }
8177}
8178
8179/* Optimizes the LOOP. Returns true if anything changed. */
8180
8181static bool
8182tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8183 bitmap toremove)
8184{
8185 bool changed = false;
8186 class iv_ca *iv_ca;
8187 edge exit = single_dom_exit (loop);
8188 basic_block *body;
8189
8190 gcc_assert (!data->niters);
8191 data->current_loop = loop;
8192 data->loop_loc = find_loop_location (loop).get_location_t ();
8193 data->speed = optimize_loop_for_speed_p (loop);
8194
8195 if (dump_file && (dump_flags & TDF_DETAILS))
8196 {
8197 fprintf (stream: dump_file, format: "Processing loop %d", loop->num);
8198 if (data->loop_loc != UNKNOWN_LOCATION)
8199 fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
8200 LOCATION_LINE (data->loop_loc));
8201 fprintf (stream: dump_file, format: "\n");
8202
8203 if (exit)
8204 {
8205 fprintf (stream: dump_file, format: " single exit %d -> %d, exit condition ",
8206 exit->src->index, exit->dest->index);
8207 print_gimple_stmt (dump_file, *gsi_last_bb (bb: exit->src),
8208 0, TDF_SLIM);
8209 fprintf (stream: dump_file, format: "\n");
8210 }
8211
8212 fprintf (stream: dump_file, format: "\n");
8213 }
8214
8215 body = get_loop_body (loop);
8216 data->body_includes_call = loop_body_includes_call (body, num_nodes: loop->num_nodes);
8217 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8218
8219 data->loop_single_exit_p
8220 = exit != NULL && loop_only_exit_p (loop, body, exit);
8221
8222 /* For each ssa name determines whether it behaves as an induction variable
8223 in some loop. */
8224 if (!find_induction_variables (data, body))
8225 goto finish;
8226
8227 /* Finds interesting uses (item 1). */
8228 find_interesting_uses (data, body);
8229 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8230 goto finish;
8231
8232 /* Determine cost scaling factor for basic blocks in loop. */
8233 determine_scaling_factor (data, body);
8234
8235 /* Analyze doloop possibility and mark the doloop use if predicted. */
8236 analyze_and_mark_doloop_use (data);
8237
8238 /* Finds candidates for the induction variables (item 2). */
8239 find_iv_candidates (data);
8240
8241 /* Calculates the costs (item 3, part 1). */
8242 determine_iv_costs (data);
8243 determine_group_iv_costs (data);
8244 determine_set_costs (data);
8245
8246 /* Find the optimal set of induction variables (item 3, part 2). */
8247 iv_ca = find_optimal_iv_set (data);
8248 /* Cleanup basic block aux field. */
8249 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8250 body[i]->aux = NULL;
8251 if (!iv_ca)
8252 goto finish;
8253 changed = true;
8254
8255 /* Create the new induction variables (item 4, part 1). */
8256 create_new_ivs (data, set: iv_ca);
8257 iv_ca_free (ivs: &iv_ca);
8258
8259 /* Rewrite the uses (item 4, part 2). */
8260 rewrite_groups (data);
8261
8262 /* Remove the ivs that are unused after rewriting. */
8263 remove_unused_ivs (data, toremove);
8264
8265finish:
8266 free (ptr: body);
8267 free_loop_data (data);
8268
8269 return changed;
8270}
8271
8272/* Main entry point. Optimizes induction variables in loops. */
8273
8274void
8275tree_ssa_iv_optimize (void)
8276{
8277 struct ivopts_data data;
8278 auto_bitmap toremove;
8279
8280 tree_ssa_iv_optimize_init (data: &data);
8281 mark_ssa_maybe_undefs ();
8282
8283 /* Optimize the loops starting with the innermost ones. */
8284 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8285 {
8286 if (!dbg_cnt (index: ivopts_loop))
8287 continue;
8288
8289 if (dump_file && (dump_flags & TDF_DETAILS))
8290 flow_loop_dump (loop, dump_file, NULL, 1);
8291
8292 tree_ssa_iv_optimize_loop (data: &data, loop, toremove);
8293 }
8294
8295 /* Remove eliminated IV defs. */
8296 release_defs_bitset (toremove);
8297
8298 /* We have changed the structure of induction variables; it might happen
8299 that definitions in the scev database refer to some of them that were
8300 eliminated. */
8301 scev_reset_htab ();
8302 /* Likewise niter and control-IV information. */
8303 free_numbers_of_iterations_estimates (cfun);
8304
8305 tree_ssa_iv_optimize_finalize (data: &data);
8306}
8307
8308#include "gt-tree-ssa-loop-ivopts.h"
8309

source code of gcc/tree-ssa-loop-ivopts.cc