1 | /* Scalar evolution detector. |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | Contributed by Sebastian Pop <s.pop@laposte.net> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* |
22 | Description: |
23 | |
24 | This pass analyzes the evolution of scalar variables in loop |
25 | structures. The algorithm is based on the SSA representation, |
26 | and on the loop hierarchy tree. This algorithm is not based on |
27 | the notion of versions of a variable, as it was the case for the |
28 | previous implementations of the scalar evolution algorithm, but |
29 | it assumes that each defined name is unique. |
30 | |
31 | The notation used in this file is called "chains of recurrences", |
32 | and has been proposed by Eugene Zima, Robert Van Engelen, and |
33 | others for describing induction variables in programs. For example |
34 | "b -> {0, +, 2}_1" means that the scalar variable "b" is equal to 0 |
35 | when entering in the loop_1 and has a step 2 in this loop, in other |
36 | words "for (b = 0; b < N; b+=2);". Note that the coefficients of |
37 | this chain of recurrence (or chrec [shrek]) can contain the name of |
38 | other variables, in which case they are called parametric chrecs. |
39 | For example, "b -> {a, +, 2}_1" means that the initial value of "b" |
40 | is the value of "a". In most of the cases these parametric chrecs |
41 | are fully instantiated before their use because symbolic names can |
42 | hide some difficult cases such as self-references described later |
43 | (see the Fibonacci example). |
44 | |
45 | A short sketch of the algorithm is: |
46 | |
47 | Given a scalar variable to be analyzed, follow the SSA edge to |
48 | its definition: |
49 | |
50 | - When the definition is a GIMPLE_ASSIGN: if the right hand side |
51 | (RHS) of the definition cannot be statically analyzed, the answer |
52 | of the analyzer is: "don't know". |
53 | Otherwise, for all the variables that are not yet analyzed in the |
54 | RHS, try to determine their evolution, and finally try to |
55 | evaluate the operation of the RHS that gives the evolution |
56 | function of the analyzed variable. |
57 | |
58 | - When the definition is a condition-phi-node: determine the |
59 | evolution function for all the branches of the phi node, and |
60 | finally merge these evolutions (see chrec_merge). |
61 | |
62 | - When the definition is a loop-phi-node: determine its initial |
63 | condition, that is the SSA edge defined in an outer loop, and |
64 | keep it symbolic. Then determine the SSA edges that are defined |
65 | in the body of the loop. Follow the inner edges until ending on |
66 | another loop-phi-node of the same analyzed loop. If the reached |
67 | loop-phi-node is not the starting loop-phi-node, then we keep |
68 | this definition under a symbolic form. If the reached |
69 | loop-phi-node is the same as the starting one, then we compute a |
70 | symbolic stride on the return path. The result is then the |
71 | symbolic chrec {initial_condition, +, symbolic_stride}_loop. |
72 | |
73 | Examples: |
74 | |
75 | Example 1: Illustration of the basic algorithm. |
76 | |
77 | | a = 3 |
78 | | loop_1 |
79 | | b = phi (a, c) |
80 | | c = b + 1 |
81 | | if (c > 10) exit_loop |
82 | | endloop |
83 | |
84 | Suppose that we want to know the number of iterations of the |
85 | loop_1. The exit_loop is controlled by a COND_EXPR (c > 10). We |
86 | ask the scalar evolution analyzer two questions: what's the |
87 | scalar evolution (scev) of "c", and what's the scev of "10". For |
88 | "10" the answer is "10" since it is a scalar constant. For the |
89 | scalar variable "c", it follows the SSA edge to its definition, |
90 | "c = b + 1", and then asks again what's the scev of "b". |
91 | Following the SSA edge, we end on a loop-phi-node "b = phi (a, |
92 | c)", where the initial condition is "a", and the inner loop edge |
93 | is "c". The initial condition is kept under a symbolic form (it |
94 | may be the case that the copy constant propagation has done its |
95 | work and we end with the constant "3" as one of the edges of the |
96 | loop-phi-node). The update edge is followed to the end of the |
97 | loop, and until reaching again the starting loop-phi-node: b -> c |
98 | -> b. At this point we have drawn a path from "b" to "b" from |
99 | which we compute the stride in the loop: in this example it is |
100 | "+1". The resulting scev for "b" is "b -> {a, +, 1}_1". Now |
101 | that the scev for "b" is known, it is possible to compute the |
102 | scev for "c", that is "c -> {a + 1, +, 1}_1". In order to |
103 | determine the number of iterations in the loop_1, we have to |
104 | instantiate_parameters (loop_1, {a + 1, +, 1}_1), that gives after some |
105 | more analysis the scev {4, +, 1}_1, or in other words, this is |
106 | the function "f (x) = x + 4", where x is the iteration count of |
107 | the loop_1. Now we have to solve the inequality "x + 4 > 10", |
108 | and take the smallest iteration number for which the loop is |
109 | exited: x = 7. This loop runs from x = 0 to x = 7, and in total |
110 | there are 8 iterations. In terms of loop normalization, we have |
111 | created a variable that is implicitly defined, "x" or just "_1", |
112 | and all the other analyzed scalars of the loop are defined in |
113 | function of this variable: |
114 | |
115 | a -> 3 |
116 | b -> {3, +, 1}_1 |
117 | c -> {4, +, 1}_1 |
118 | |
119 | or in terms of a C program: |
120 | |
121 | | a = 3 |
122 | | for (x = 0; x <= 7; x++) |
123 | | { |
124 | | b = x + 3 |
125 | | c = x + 4 |
126 | | } |
127 | |
128 | Example 2a: Illustration of the algorithm on nested loops. |
129 | |
130 | | loop_1 |
131 | | a = phi (1, b) |
132 | | c = a + 2 |
133 | | loop_2 10 times |
134 | | b = phi (c, d) |
135 | | d = b + 3 |
136 | | endloop |
137 | | endloop |
138 | |
139 | For analyzing the scalar evolution of "a", the algorithm follows |
140 | the SSA edge into the loop's body: "a -> b". "b" is an inner |
141 | loop-phi-node, and its analysis as in Example 1, gives: |
142 | |
143 | b -> {c, +, 3}_2 |
144 | d -> {c + 3, +, 3}_2 |
145 | |
146 | Following the SSA edge for the initial condition, we end on "c = a |
147 | + 2", and then on the starting loop-phi-node "a". From this point, |
148 | the loop stride is computed: back on "c = a + 2" we get a "+2" in |
149 | the loop_1, then on the loop-phi-node "b" we compute the overall |
150 | effect of the inner loop that is "b = c + 30", and we get a "+30" |
151 | in the loop_1. That means that the overall stride in loop_1 is |
152 | equal to "+32", and the result is: |
153 | |
154 | a -> {1, +, 32}_1 |
155 | c -> {3, +, 32}_1 |
156 | |
157 | Example 2b: Multivariate chains of recurrences. |
158 | |
159 | | loop_1 |
160 | | k = phi (0, k + 1) |
161 | | loop_2 4 times |
162 | | j = phi (0, j + 1) |
163 | | loop_3 4 times |
164 | | i = phi (0, i + 1) |
165 | | A[j + k] = ... |
166 | | endloop |
167 | | endloop |
168 | | endloop |
169 | |
170 | Analyzing the access function of array A with |
171 | instantiate_parameters (loop_1, "j + k"), we obtain the |
172 | instantiation and the analysis of the scalar variables "j" and "k" |
173 | in loop_1. This leads to the scalar evolution {4, +, 1}_1: the end |
174 | value of loop_2 for "j" is 4, and the evolution of "k" in loop_1 is |
175 | {0, +, 1}_1. To obtain the evolution function in loop_3 and |
176 | instantiate the scalar variables up to loop_1, one has to use: |
177 | instantiate_scev (block_before_loop (loop_1), loop_3, "j + k"). |
178 | The result of this call is {{0, +, 1}_1, +, 1}_2. |
179 | |
180 | Example 3: Higher degree polynomials. |
181 | |
182 | | loop_1 |
183 | | a = phi (2, b) |
184 | | c = phi (5, d) |
185 | | b = a + 1 |
186 | | d = c + a |
187 | | endloop |
188 | |
189 | a -> {2, +, 1}_1 |
190 | b -> {3, +, 1}_1 |
191 | c -> {5, +, a}_1 |
192 | d -> {5 + a, +, a}_1 |
193 | |
194 | instantiate_parameters (loop_1, {5, +, a}_1) -> {5, +, 2, +, 1}_1 |
195 | instantiate_parameters (loop_1, {5 + a, +, a}_1) -> {7, +, 3, +, 1}_1 |
196 | |
197 | Example 4: Lucas, Fibonacci, or mixers in general. |
198 | |
199 | | loop_1 |
200 | | a = phi (1, b) |
201 | | c = phi (3, d) |
202 | | b = c |
203 | | d = c + a |
204 | | endloop |
205 | |
206 | a -> (1, c)_1 |
207 | c -> {3, +, a}_1 |
208 | |
209 | The syntax "(1, c)_1" stands for a PEELED_CHREC that has the |
210 | following semantics: during the first iteration of the loop_1, the |
211 | variable contains the value 1, and then it contains the value "c". |
212 | Note that this syntax is close to the syntax of the loop-phi-node: |
213 | "a -> (1, c)_1" vs. "a = phi (1, c)". |
214 | |
215 | The symbolic chrec representation contains all the semantics of the |
216 | original code. What is more difficult is to use this information. |
217 | |
218 | Example 5: Flip-flops, or exchangers. |
219 | |
220 | | loop_1 |
221 | | a = phi (1, b) |
222 | | c = phi (3, d) |
223 | | b = c |
224 | | d = a |
225 | | endloop |
226 | |
227 | a -> (1, c)_1 |
228 | c -> (3, a)_1 |
229 | |
230 | Based on these symbolic chrecs, it is possible to refine this |
231 | information into the more precise PERIODIC_CHRECs: |
232 | |
233 | a -> |1, 3|_1 |
234 | c -> |3, 1|_1 |
235 | |
236 | This transformation is not yet implemented. |
237 | |
238 | Further readings: |
239 | |
240 | You can find a more detailed description of the algorithm in: |
241 | http://icps.u-strasbg.fr/~pop/DEA_03_Pop.pdf |
242 | http://icps.u-strasbg.fr/~pop/DEA_03_Pop.ps.gz. But note that |
243 | this is a preliminary report and some of the details of the |
244 | algorithm have changed. I'm working on a research report that |
245 | updates the description of the algorithms to reflect the design |
246 | choices used in this implementation. |
247 | |
248 | A set of slides show a high level overview of the algorithm and run |
249 | an example through the scalar evolution analyzer: |
250 | http://cri.ensmp.fr/~pop/gcc/mar04/slides.pdf |
251 | |
252 | The slides that I have presented at the GCC Summit'04 are available |
253 | at: http://cri.ensmp.fr/~pop/gcc/20040604/gccsummit-lno-spop.pdf |
254 | */ |
255 | |
256 | #include "config.h" |
257 | #include "system.h" |
258 | #include "coretypes.h" |
259 | #include "backend.h" |
260 | #include "target.h" |
261 | #include "rtl.h" |
262 | #include "optabs-query.h" |
263 | #include "tree.h" |
264 | #include "gimple.h" |
265 | #include "ssa.h" |
266 | #include "gimple-pretty-print.h" |
267 | #include "fold-const.h" |
268 | #include "gimplify.h" |
269 | #include "gimple-iterator.h" |
270 | #include "gimplify-me.h" |
271 | #include "tree-cfg.h" |
272 | #include "tree-ssa-loop-ivopts.h" |
273 | #include "tree-ssa-loop-manip.h" |
274 | #include "tree-ssa-loop-niter.h" |
275 | #include "tree-ssa-loop.h" |
276 | #include "tree-ssa.h" |
277 | #include "cfgloop.h" |
278 | #include "tree-chrec.h" |
279 | #include "tree-affine.h" |
280 | #include "tree-scalar-evolution.h" |
281 | #include "dumpfile.h" |
282 | #include "tree-ssa-propagate.h" |
283 | #include "gimple-fold.h" |
284 | #include "tree-into-ssa.h" |
285 | #include "builtins.h" |
286 | #include "case-cfn-macros.h" |
287 | |
288 | static tree analyze_scalar_evolution_1 (class loop *, tree); |
289 | static tree analyze_scalar_evolution_for_address_of (class loop *loop, |
290 | tree var); |
291 | |
292 | /* The cached information about an SSA name with version NAME_VERSION, |
293 | claiming that below basic block with index INSTANTIATED_BELOW, the |
294 | value of the SSA name can be expressed as CHREC. */ |
295 | |
296 | struct GTY((for_user)) scev_info_str { |
297 | unsigned int name_version; |
298 | int instantiated_below; |
299 | tree chrec; |
300 | }; |
301 | |
302 | /* Counters for the scev database. */ |
303 | static unsigned nb_set_scev = 0; |
304 | static unsigned nb_get_scev = 0; |
305 | |
306 | struct scev_info_hasher : ggc_ptr_hash<scev_info_str> |
307 | { |
308 | static hashval_t hash (scev_info_str *i); |
309 | static bool equal (const scev_info_str *a, const scev_info_str *b); |
310 | }; |
311 | |
312 | static GTY (()) hash_table<scev_info_hasher> *scalar_evolution_info; |
313 | |
314 | |
315 | /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */ |
316 | |
317 | static inline struct scev_info_str * |
318 | new_scev_info_str (basic_block instantiated_below, tree var) |
319 | { |
320 | struct scev_info_str *res; |
321 | |
322 | res = ggc_alloc<scev_info_str> (); |
323 | res->name_version = SSA_NAME_VERSION (var); |
324 | res->chrec = chrec_not_analyzed_yet; |
325 | res->instantiated_below = instantiated_below->index; |
326 | |
327 | return res; |
328 | } |
329 | |
330 | /* Computes a hash function for database element ELT. */ |
331 | |
332 | hashval_t |
333 | scev_info_hasher::hash (scev_info_str *elt) |
334 | { |
335 | return elt->name_version ^ elt->instantiated_below; |
336 | } |
337 | |
338 | /* Compares database elements E1 and E2. */ |
339 | |
340 | bool |
341 | scev_info_hasher::equal (const scev_info_str *elt1, const scev_info_str *elt2) |
342 | { |
343 | return (elt1->name_version == elt2->name_version |
344 | && elt1->instantiated_below == elt2->instantiated_below); |
345 | } |
346 | |
347 | /* Get the scalar evolution of VAR for INSTANTIATED_BELOW basic block. |
348 | A first query on VAR returns chrec_not_analyzed_yet. */ |
349 | |
350 | static tree * |
351 | find_var_scev_info (basic_block instantiated_below, tree var) |
352 | { |
353 | struct scev_info_str *res; |
354 | struct scev_info_str tmp; |
355 | |
356 | tmp.name_version = SSA_NAME_VERSION (var); |
357 | tmp.instantiated_below = instantiated_below->index; |
358 | scev_info_str **slot = scalar_evolution_info->find_slot (value: &tmp, insert: INSERT); |
359 | |
360 | if (!*slot) |
361 | *slot = new_scev_info_str (instantiated_below, var); |
362 | res = *slot; |
363 | |
364 | return &res->chrec; |
365 | } |
366 | |
367 | |
368 | /* Hashtable helpers for a temporary hash-table used when |
369 | analyzing a scalar evolution, instantiating a CHREC or |
370 | resolving mixers. */ |
371 | |
372 | class instantiate_cache_type |
373 | { |
374 | public: |
375 | htab_t map; |
376 | vec<scev_info_str> entries; |
377 | |
378 | instantiate_cache_type () : map (NULL), entries (vNULL) {} |
379 | ~instantiate_cache_type (); |
380 | tree get (unsigned slot) { return entries[slot].chrec; } |
381 | void set (unsigned slot, tree chrec) { entries[slot].chrec = chrec; } |
382 | }; |
383 | |
384 | instantiate_cache_type::~instantiate_cache_type () |
385 | { |
386 | if (map != NULL) |
387 | { |
388 | htab_delete (map); |
389 | entries.release (); |
390 | } |
391 | } |
392 | |
393 | /* Cache to avoid infinite recursion when instantiating an SSA name. |
394 | Live during the outermost analyze_scalar_evolution, instantiate_scev |
395 | or resolve_mixers call. */ |
396 | static instantiate_cache_type *global_cache; |
397 | |
398 | |
399 | /* Return true when PHI is a loop-phi-node. */ |
400 | |
401 | static bool |
402 | loop_phi_node_p (gimple *phi) |
403 | { |
404 | /* The implementation of this function is based on the following |
405 | property: "all the loop-phi-nodes of a loop are contained in the |
406 | loop's header basic block". */ |
407 | |
408 | return loop_containing_stmt (stmt: phi)->header == gimple_bb (g: phi); |
409 | } |
410 | |
411 | /* Compute the scalar evolution for EVOLUTION_FN after crossing LOOP. |
412 | In general, in the case of multivariate evolutions we want to get |
413 | the evolution in different loops. LOOP specifies the level for |
414 | which to get the evolution. |
415 | |
416 | Example: |
417 | |
418 | | for (j = 0; j < 100; j++) |
419 | | { |
420 | | for (k = 0; k < 100; k++) |
421 | | { |
422 | | i = k + j; - Here the value of i is a function of j, k. |
423 | | } |
424 | | ... = i - Here the value of i is a function of j. |
425 | | } |
426 | | ... = i - Here the value of i is a scalar. |
427 | |
428 | Example: |
429 | |
430 | | i_0 = ... |
431 | | loop_1 10 times |
432 | | i_1 = phi (i_0, i_2) |
433 | | i_2 = i_1 + 2 |
434 | | endloop |
435 | |
436 | This loop has the same effect as: |
437 | LOOP_1 has the same effect as: |
438 | |
439 | | i_1 = i_0 + 20 |
440 | |
441 | The overall effect of the loop, "i_0 + 20" in the previous example, |
442 | is obtained by passing in the parameters: LOOP = 1, |
443 | EVOLUTION_FN = {i_0, +, 2}_1. |
444 | */ |
445 | |
446 | tree |
447 | compute_overall_effect_of_inner_loop (class loop *loop, tree evolution_fn) |
448 | { |
449 | bool val = false; |
450 | |
451 | if (evolution_fn == chrec_dont_know) |
452 | return chrec_dont_know; |
453 | |
454 | else if (TREE_CODE (evolution_fn) == POLYNOMIAL_CHREC) |
455 | { |
456 | class loop *inner_loop = get_chrec_loop (chrec: evolution_fn); |
457 | |
458 | if (inner_loop == loop |
459 | || flow_loop_nested_p (loop, inner_loop)) |
460 | { |
461 | tree nb_iter = number_of_latch_executions (inner_loop); |
462 | |
463 | if (nb_iter == chrec_dont_know) |
464 | return chrec_dont_know; |
465 | else |
466 | { |
467 | tree res; |
468 | |
469 | /* evolution_fn is the evolution function in LOOP. Get |
470 | its value in the nb_iter-th iteration. */ |
471 | res = chrec_apply (inner_loop->num, evolution_fn, nb_iter); |
472 | |
473 | if (chrec_contains_symbols_defined_in_loop (res, loop->num)) |
474 | res = instantiate_parameters (loop, chrec: res); |
475 | |
476 | /* Continue the computation until ending on a parent of LOOP. */ |
477 | return compute_overall_effect_of_inner_loop (loop, evolution_fn: res); |
478 | } |
479 | } |
480 | else |
481 | return evolution_fn; |
482 | } |
483 | |
484 | /* If the evolution function is an invariant, there is nothing to do. */ |
485 | else if (no_evolution_in_loop_p (chrec: evolution_fn, loop_num: loop->num, res: &val) && val) |
486 | return evolution_fn; |
487 | |
488 | else |
489 | return chrec_dont_know; |
490 | } |
491 | |
492 | /* Associate CHREC to SCALAR. */ |
493 | |
494 | static void |
495 | set_scalar_evolution (basic_block instantiated_below, tree scalar, tree chrec) |
496 | { |
497 | tree *scalar_info; |
498 | |
499 | if (TREE_CODE (scalar) != SSA_NAME) |
500 | return; |
501 | |
502 | scalar_info = find_var_scev_info (instantiated_below, var: scalar); |
503 | |
504 | if (dump_file) |
505 | { |
506 | if (dump_flags & TDF_SCEV) |
507 | { |
508 | fprintf (stream: dump_file, format: "(set_scalar_evolution \n" ); |
509 | fprintf (stream: dump_file, format: " instantiated_below = %d \n" , |
510 | instantiated_below->index); |
511 | fprintf (stream: dump_file, format: " (scalar = " ); |
512 | print_generic_expr (dump_file, scalar); |
513 | fprintf (stream: dump_file, format: ")\n (scalar_evolution = " ); |
514 | print_generic_expr (dump_file, chrec); |
515 | fprintf (stream: dump_file, format: "))\n" ); |
516 | } |
517 | if (dump_flags & TDF_STATS) |
518 | nb_set_scev++; |
519 | } |
520 | |
521 | *scalar_info = chrec; |
522 | } |
523 | |
524 | /* Retrieve the chrec associated to SCALAR instantiated below |
525 | INSTANTIATED_BELOW block. */ |
526 | |
527 | static tree |
528 | get_scalar_evolution (basic_block instantiated_below, tree scalar) |
529 | { |
530 | tree res; |
531 | |
532 | if (dump_file) |
533 | { |
534 | if (dump_flags & TDF_SCEV) |
535 | { |
536 | fprintf (stream: dump_file, format: "(get_scalar_evolution \n" ); |
537 | fprintf (stream: dump_file, format: " (scalar = " ); |
538 | print_generic_expr (dump_file, scalar); |
539 | fprintf (stream: dump_file, format: ")\n" ); |
540 | } |
541 | if (dump_flags & TDF_STATS) |
542 | nb_get_scev++; |
543 | } |
544 | |
545 | if (VECTOR_TYPE_P (TREE_TYPE (scalar)) |
546 | || TREE_CODE (TREE_TYPE (scalar)) == COMPLEX_TYPE) |
547 | /* For chrec_dont_know we keep the symbolic form. */ |
548 | res = scalar; |
549 | else |
550 | switch (TREE_CODE (scalar)) |
551 | { |
552 | case SSA_NAME: |
553 | if (SSA_NAME_IS_DEFAULT_DEF (scalar)) |
554 | res = scalar; |
555 | else |
556 | res = *find_var_scev_info (instantiated_below, var: scalar); |
557 | break; |
558 | |
559 | case REAL_CST: |
560 | case FIXED_CST: |
561 | case INTEGER_CST: |
562 | res = scalar; |
563 | break; |
564 | |
565 | default: |
566 | res = chrec_not_analyzed_yet; |
567 | break; |
568 | } |
569 | |
570 | if (dump_file && (dump_flags & TDF_SCEV)) |
571 | { |
572 | fprintf (stream: dump_file, format: " (scalar_evolution = " ); |
573 | print_generic_expr (dump_file, res); |
574 | fprintf (stream: dump_file, format: "))\n" ); |
575 | } |
576 | |
577 | return res; |
578 | } |
579 | |
580 | |
581 | /* Depth first search algorithm. */ |
582 | |
583 | enum t_bool { |
584 | t_false, |
585 | t_true, |
586 | t_dont_know |
587 | }; |
588 | |
589 | class scev_dfs |
590 | { |
591 | public: |
592 | scev_dfs (class loop *loop_, gphi *phi_, tree init_cond_) |
593 | : loop (loop_), loop_phi_node (phi_), init_cond (init_cond_) {} |
594 | t_bool get_ev (tree *, tree); |
595 | |
596 | private: |
597 | t_bool follow_ssa_edge_expr (gimple *, tree, tree *, int); |
598 | t_bool follow_ssa_edge_binary (gimple *at_stmt, |
599 | tree type, tree rhs0, enum tree_code code, |
600 | tree rhs1, tree *evolution_of_loop, int limit); |
601 | t_bool follow_ssa_edge_in_condition_phi_branch (int i, |
602 | gphi *condition_phi, |
603 | tree *evolution_of_branch, |
604 | tree init_cond, int limit); |
605 | t_bool follow_ssa_edge_in_condition_phi (gphi *condition_phi, |
606 | tree *evolution_of_loop, int limit); |
607 | t_bool follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node, |
608 | tree *evolution_of_loop, int limit); |
609 | tree add_to_evolution (tree chrec_before, enum tree_code code, |
610 | tree to_add, gimple *at_stmt); |
611 | tree add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt); |
612 | |
613 | class loop *loop; |
614 | gphi *loop_phi_node; |
615 | tree init_cond; |
616 | }; |
617 | |
618 | t_bool |
619 | scev_dfs::get_ev (tree *ev_fn, tree arg) |
620 | { |
621 | *ev_fn = chrec_dont_know; |
622 | return follow_ssa_edge_expr (loop_phi_node, arg, ev_fn, 0); |
623 | } |
624 | |
625 | /* Helper function for add_to_evolution. Returns the evolution |
626 | function for an assignment of the form "a = b + c", where "a" and |
627 | "b" are on the strongly connected component. CHREC_BEFORE is the |
628 | information that we already have collected up to this point. |
629 | TO_ADD is the evolution of "c". |
630 | |
631 | When CHREC_BEFORE has an evolution part in LOOP_NB, add to this |
632 | evolution the expression TO_ADD, otherwise construct an evolution |
633 | part for this loop. */ |
634 | |
635 | tree |
636 | scev_dfs::add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt) |
637 | { |
638 | tree type, left, right; |
639 | unsigned loop_nb = loop->num; |
640 | class loop *chloop; |
641 | |
642 | switch (TREE_CODE (chrec_before)) |
643 | { |
644 | case POLYNOMIAL_CHREC: |
645 | chloop = get_chrec_loop (chrec: chrec_before); |
646 | if (chloop == loop |
647 | || flow_loop_nested_p (chloop, loop)) |
648 | { |
649 | unsigned var; |
650 | |
651 | type = chrec_type (chrec: chrec_before); |
652 | |
653 | /* When there is no evolution part in this loop, build it. */ |
654 | if (chloop != loop) |
655 | { |
656 | var = loop_nb; |
657 | left = chrec_before; |
658 | right = SCALAR_FLOAT_TYPE_P (type) |
659 | ? build_real (type, dconst0) |
660 | : build_int_cst (type, 0); |
661 | } |
662 | else |
663 | { |
664 | var = CHREC_VARIABLE (chrec_before); |
665 | left = CHREC_LEFT (chrec_before); |
666 | right = CHREC_RIGHT (chrec_before); |
667 | } |
668 | |
669 | to_add = chrec_convert (type, to_add, at_stmt); |
670 | right = chrec_convert_rhs (type, right, at_stmt); |
671 | right = chrec_fold_plus (chrec_type (chrec: right), right, to_add); |
672 | return build_polynomial_chrec (loop_num: var, left, right); |
673 | } |
674 | else |
675 | { |
676 | gcc_assert (flow_loop_nested_p (loop, chloop)); |
677 | |
678 | /* Search the evolution in LOOP_NB. */ |
679 | left = add_to_evolution_1 (CHREC_LEFT (chrec_before), |
680 | to_add, at_stmt); |
681 | right = CHREC_RIGHT (chrec_before); |
682 | right = chrec_convert_rhs (chrec_type (chrec: left), right, at_stmt); |
683 | return build_polynomial_chrec (CHREC_VARIABLE (chrec_before), |
684 | left, right); |
685 | } |
686 | |
687 | default: |
688 | /* These nodes do not depend on a loop. */ |
689 | if (chrec_before == chrec_dont_know) |
690 | return chrec_dont_know; |
691 | |
692 | left = chrec_before; |
693 | right = chrec_convert_rhs (chrec_type (chrec: left), to_add, at_stmt); |
694 | /* When we add the first evolution we need to replace the symbolic |
695 | evolution we've put in when the DFS reached the loop PHI node |
696 | with the initial value. There's only a limited cases of |
697 | extra operations ontop of that symbol allowed, namely |
698 | sign-conversions we can look through. For other cases we leave |
699 | the symbolic initial condition which causes build_polynomial_chrec |
700 | to return chrec_dont_know. See PR42512, PR66375 and PR107176 for |
701 | cases we mishandled before. */ |
702 | STRIP_NOPS (chrec_before); |
703 | if (chrec_before == gimple_phi_result (gs: loop_phi_node)) |
704 | left = fold_convert (TREE_TYPE (left), init_cond); |
705 | return build_polynomial_chrec (loop_num: loop_nb, left, right); |
706 | } |
707 | } |
708 | |
709 | /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension |
710 | of LOOP_NB. |
711 | |
712 | Description (provided for completeness, for those who read code in |
713 | a plane, and for my poor 62 bytes brain that would have forgotten |
714 | all this in the next two or three months): |
715 | |
716 | The algorithm of translation of programs from the SSA representation |
717 | into the chrecs syntax is based on a pattern matching. After having |
718 | reconstructed the overall tree expression for a loop, there are only |
719 | two cases that can arise: |
720 | |
721 | 1. a = loop-phi (init, a + expr) |
722 | 2. a = loop-phi (init, expr) |
723 | |
724 | where EXPR is either a scalar constant with respect to the analyzed |
725 | loop (this is a degree 0 polynomial), or an expression containing |
726 | other loop-phi definitions (these are higher degree polynomials). |
727 | |
728 | Examples: |
729 | |
730 | 1. |
731 | | init = ... |
732 | | loop_1 |
733 | | a = phi (init, a + 5) |
734 | | endloop |
735 | |
736 | 2. |
737 | | inita = ... |
738 | | initb = ... |
739 | | loop_1 |
740 | | a = phi (inita, 2 * b + 3) |
741 | | b = phi (initb, b + 1) |
742 | | endloop |
743 | |
744 | For the first case, the semantics of the SSA representation is: |
745 | |
746 | | a (x) = init + \sum_{j = 0}^{x - 1} expr (j) |
747 | |
748 | that is, there is a loop index "x" that determines the scalar value |
749 | of the variable during the loop execution. During the first |
750 | iteration, the value is that of the initial condition INIT, while |
751 | during the subsequent iterations, it is the sum of the initial |
752 | condition with the sum of all the values of EXPR from the initial |
753 | iteration to the before last considered iteration. |
754 | |
755 | For the second case, the semantics of the SSA program is: |
756 | |
757 | | a (x) = init, if x = 0; |
758 | | expr (x - 1), otherwise. |
759 | |
760 | The second case corresponds to the PEELED_CHREC, whose syntax is |
761 | close to the syntax of a loop-phi-node: |
762 | |
763 | | phi (init, expr) vs. (init, expr)_x |
764 | |
765 | The proof of the translation algorithm for the first case is a |
766 | proof by structural induction based on the degree of EXPR. |
767 | |
768 | Degree 0: |
769 | When EXPR is a constant with respect to the analyzed loop, or in |
770 | other words when EXPR is a polynomial of degree 0, the evolution of |
771 | the variable A in the loop is an affine function with an initial |
772 | condition INIT, and a step EXPR. In order to show this, we start |
773 | from the semantics of the SSA representation: |
774 | |
775 | f (x) = init + \sum_{j = 0}^{x - 1} expr (j) |
776 | |
777 | and since "expr (j)" is a constant with respect to "j", |
778 | |
779 | f (x) = init + x * expr |
780 | |
781 | Finally, based on the semantics of the pure sum chrecs, by |
782 | identification we get the corresponding chrecs syntax: |
783 | |
784 | f (x) = init * \binom{x}{0} + expr * \binom{x}{1} |
785 | f (x) -> {init, +, expr}_x |
786 | |
787 | Higher degree: |
788 | Suppose that EXPR is a polynomial of degree N with respect to the |
789 | analyzed loop_x for which we have already determined that it is |
790 | written under the chrecs syntax: |
791 | |
792 | | expr (x) -> {b_0, +, b_1, +, ..., +, b_{n-1}} (x) |
793 | |
794 | We start from the semantics of the SSA program: |
795 | |
796 | | f (x) = init + \sum_{j = 0}^{x - 1} expr (j) |
797 | | |
798 | | f (x) = init + \sum_{j = 0}^{x - 1} |
799 | | (b_0 * \binom{j}{0} + ... + b_{n-1} * \binom{j}{n-1}) |
800 | | |
801 | | f (x) = init + \sum_{j = 0}^{x - 1} |
802 | | \sum_{k = 0}^{n - 1} (b_k * \binom{j}{k}) |
803 | | |
804 | | f (x) = init + \sum_{k = 0}^{n - 1} |
805 | | (b_k * \sum_{j = 0}^{x - 1} \binom{j}{k}) |
806 | | |
807 | | f (x) = init + \sum_{k = 0}^{n - 1} |
808 | | (b_k * \binom{x}{k + 1}) |
809 | | |
810 | | f (x) = init + b_0 * \binom{x}{1} + ... |
811 | | + b_{n-1} * \binom{x}{n} |
812 | | |
813 | | f (x) = init * \binom{x}{0} + b_0 * \binom{x}{1} + ... |
814 | | + b_{n-1} * \binom{x}{n} |
815 | | |
816 | |
817 | And finally from the definition of the chrecs syntax, we identify: |
818 | | f (x) -> {init, +, b_0, +, ..., +, b_{n-1}}_x |
819 | |
820 | This shows the mechanism that stands behind the add_to_evolution |
821 | function. An important point is that the use of symbolic |
822 | parameters avoids the need of an analysis schedule. |
823 | |
824 | Example: |
825 | |
826 | | inita = ... |
827 | | initb = ... |
828 | | loop_1 |
829 | | a = phi (inita, a + 2 + b) |
830 | | b = phi (initb, b + 1) |
831 | | endloop |
832 | |
833 | When analyzing "a", the algorithm keeps "b" symbolically: |
834 | |
835 | | a -> {inita, +, 2 + b}_1 |
836 | |
837 | Then, after instantiation, the analyzer ends on the evolution: |
838 | |
839 | | a -> {inita, +, 2 + initb, +, 1}_1 |
840 | |
841 | */ |
842 | |
843 | tree |
844 | scev_dfs::add_to_evolution (tree chrec_before, enum tree_code code, |
845 | tree to_add, gimple *at_stmt) |
846 | { |
847 | tree type = chrec_type (chrec: to_add); |
848 | tree res = NULL_TREE; |
849 | |
850 | if (to_add == NULL_TREE) |
851 | return chrec_before; |
852 | |
853 | /* TO_ADD is either a scalar, or a parameter. TO_ADD is not |
854 | instantiated at this point. */ |
855 | if (TREE_CODE (to_add) == POLYNOMIAL_CHREC) |
856 | /* This should not happen. */ |
857 | return chrec_dont_know; |
858 | |
859 | if (dump_file && (dump_flags & TDF_SCEV)) |
860 | { |
861 | fprintf (stream: dump_file, format: "(add_to_evolution \n" ); |
862 | fprintf (stream: dump_file, format: " (loop_nb = %d)\n" , loop->num); |
863 | fprintf (stream: dump_file, format: " (chrec_before = " ); |
864 | print_generic_expr (dump_file, chrec_before); |
865 | fprintf (stream: dump_file, format: ")\n (to_add = " ); |
866 | print_generic_expr (dump_file, to_add); |
867 | fprintf (stream: dump_file, format: ")\n" ); |
868 | } |
869 | |
870 | if (code == MINUS_EXPR) |
871 | to_add = chrec_fold_multiply (type, to_add, SCALAR_FLOAT_TYPE_P (type) |
872 | ? build_real (type, dconstm1) |
873 | : build_int_cst_type (type, -1)); |
874 | |
875 | res = add_to_evolution_1 (chrec_before, to_add, at_stmt); |
876 | |
877 | if (dump_file && (dump_flags & TDF_SCEV)) |
878 | { |
879 | fprintf (stream: dump_file, format: " (res = " ); |
880 | print_generic_expr (dump_file, res); |
881 | fprintf (stream: dump_file, format: "))\n" ); |
882 | } |
883 | |
884 | return res; |
885 | } |
886 | |
887 | |
888 | /* Follow the ssa edge into the binary expression RHS0 CODE RHS1. |
889 | Return true if the strongly connected component has been found. */ |
890 | |
891 | t_bool |
892 | scev_dfs::follow_ssa_edge_binary (gimple *at_stmt, tree type, tree rhs0, |
893 | enum tree_code code, tree rhs1, |
894 | tree *evolution_of_loop, int limit) |
895 | { |
896 | t_bool res = t_false; |
897 | tree evol; |
898 | |
899 | switch (code) |
900 | { |
901 | case POINTER_PLUS_EXPR: |
902 | case PLUS_EXPR: |
903 | if (TREE_CODE (rhs0) == SSA_NAME) |
904 | { |
905 | if (TREE_CODE (rhs1) == SSA_NAME) |
906 | { |
907 | /* Match an assignment under the form: |
908 | "a = b + c". */ |
909 | |
910 | /* We want only assignments of form "name + name" contribute to |
911 | LIMIT, as the other cases do not necessarily contribute to |
912 | the complexity of the expression. */ |
913 | limit++; |
914 | |
915 | evol = *evolution_of_loop; |
916 | res = follow_ssa_edge_expr (at_stmt, rhs0, &evol, limit); |
917 | if (res == t_true) |
918 | *evolution_of_loop = add_to_evolution |
919 | (chrec_before: chrec_convert (type, evol, at_stmt), code, to_add: rhs1, at_stmt); |
920 | else if (res == t_false) |
921 | { |
922 | res = follow_ssa_edge_expr |
923 | (at_stmt, rhs1, evolution_of_loop, limit); |
924 | if (res == t_true) |
925 | *evolution_of_loop = add_to_evolution |
926 | (chrec_before: chrec_convert (type, *evolution_of_loop, at_stmt), |
927 | code, to_add: rhs0, at_stmt); |
928 | } |
929 | } |
930 | |
931 | else |
932 | gcc_unreachable (); /* Handled in caller. */ |
933 | } |
934 | |
935 | else if (TREE_CODE (rhs1) == SSA_NAME) |
936 | { |
937 | /* Match an assignment under the form: |
938 | "a = ... + c". */ |
939 | res = follow_ssa_edge_expr (at_stmt, rhs1, evolution_of_loop, limit); |
940 | if (res == t_true) |
941 | *evolution_of_loop = add_to_evolution |
942 | (chrec_before: chrec_convert (type, *evolution_of_loop, at_stmt), |
943 | code, to_add: rhs0, at_stmt); |
944 | } |
945 | |
946 | else |
947 | /* Otherwise, match an assignment under the form: |
948 | "a = ... + ...". */ |
949 | /* And there is nothing to do. */ |
950 | res = t_false; |
951 | break; |
952 | |
953 | case MINUS_EXPR: |
954 | /* This case is under the form "opnd0 = rhs0 - rhs1". */ |
955 | if (TREE_CODE (rhs0) == SSA_NAME) |
956 | gcc_unreachable (); /* Handled in caller. */ |
957 | else |
958 | /* Otherwise, match an assignment under the form: |
959 | "a = ... - ...". */ |
960 | /* And there is nothing to do. */ |
961 | res = t_false; |
962 | break; |
963 | |
964 | default: |
965 | res = t_false; |
966 | } |
967 | |
968 | return res; |
969 | } |
970 | |
971 | /* Checks whether the I-th argument of a PHI comes from a backedge. */ |
972 | |
973 | static bool |
974 | backedge_phi_arg_p (gphi *phi, int i) |
975 | { |
976 | const_edge e = gimple_phi_arg_edge (phi, i); |
977 | |
978 | /* We would in fact like to test EDGE_DFS_BACK here, but we do not care |
979 | about updating it anywhere, and this should work as well most of the |
980 | time. */ |
981 | if (e->flags & EDGE_IRREDUCIBLE_LOOP) |
982 | return true; |
983 | |
984 | return false; |
985 | } |
986 | |
987 | /* Helper function for one branch of the condition-phi-node. Return |
988 | true if the strongly connected component has been found following |
989 | this path. */ |
990 | |
991 | t_bool |
992 | scev_dfs::follow_ssa_edge_in_condition_phi_branch (int i, |
993 | gphi *condition_phi, |
994 | tree *evolution_of_branch, |
995 | tree init_cond, int limit) |
996 | { |
997 | tree branch = PHI_ARG_DEF (condition_phi, i); |
998 | *evolution_of_branch = chrec_dont_know; |
999 | |
1000 | /* Do not follow back edges (they must belong to an irreducible loop, which |
1001 | we really do not want to worry about). */ |
1002 | if (backedge_phi_arg_p (phi: condition_phi, i)) |
1003 | return t_false; |
1004 | |
1005 | if (TREE_CODE (branch) == SSA_NAME) |
1006 | { |
1007 | *evolution_of_branch = init_cond; |
1008 | return follow_ssa_edge_expr (condition_phi, branch, |
1009 | evolution_of_branch, limit); |
1010 | } |
1011 | |
1012 | /* This case occurs when one of the condition branches sets |
1013 | the variable to a constant: i.e. a phi-node like |
1014 | "a_2 = PHI <a_7(5), 2(6)>;". |
1015 | |
1016 | FIXME: This case have to be refined correctly: |
1017 | in some cases it is possible to say something better than |
1018 | chrec_dont_know, for example using a wrap-around notation. */ |
1019 | return t_false; |
1020 | } |
1021 | |
1022 | /* This function merges the branches of a condition-phi-node in a |
1023 | loop. */ |
1024 | |
1025 | t_bool |
1026 | scev_dfs::follow_ssa_edge_in_condition_phi (gphi *condition_phi, |
1027 | tree *evolution_of_loop, int limit) |
1028 | { |
1029 | int i, n; |
1030 | tree init = *evolution_of_loop; |
1031 | tree evolution_of_branch; |
1032 | t_bool res = follow_ssa_edge_in_condition_phi_branch (i: 0, condition_phi, |
1033 | evolution_of_branch: &evolution_of_branch, |
1034 | init_cond: init, limit); |
1035 | if (res == t_false || res == t_dont_know) |
1036 | return res; |
1037 | |
1038 | *evolution_of_loop = evolution_of_branch; |
1039 | |
1040 | n = gimple_phi_num_args (gs: condition_phi); |
1041 | for (i = 1; i < n; i++) |
1042 | { |
1043 | /* Quickly give up when the evolution of one of the branches is |
1044 | not known. */ |
1045 | if (*evolution_of_loop == chrec_dont_know) |
1046 | return t_true; |
1047 | |
1048 | /* Increase the limit by the PHI argument number to avoid exponential |
1049 | time and memory complexity. */ |
1050 | res = follow_ssa_edge_in_condition_phi_branch (i, condition_phi, |
1051 | evolution_of_branch: &evolution_of_branch, |
1052 | init_cond: init, limit: limit + i); |
1053 | if (res == t_false || res == t_dont_know) |
1054 | return res; |
1055 | |
1056 | *evolution_of_loop = chrec_merge (*evolution_of_loop, |
1057 | evolution_of_branch); |
1058 | } |
1059 | |
1060 | return t_true; |
1061 | } |
1062 | |
1063 | /* Follow an SSA edge in an inner loop. It computes the overall |
1064 | effect of the loop, and following the symbolic initial conditions, |
1065 | it follows the edges in the parent loop. The inner loop is |
1066 | considered as a single statement. */ |
1067 | |
1068 | t_bool |
1069 | scev_dfs::follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node, |
1070 | tree *evolution_of_loop, int limit) |
1071 | { |
1072 | class loop *loop = loop_containing_stmt (stmt: loop_phi_node); |
1073 | tree ev = analyze_scalar_evolution (loop, PHI_RESULT (loop_phi_node)); |
1074 | |
1075 | /* Sometimes, the inner loop is too difficult to analyze, and the |
1076 | result of the analysis is a symbolic parameter. */ |
1077 | if (ev == PHI_RESULT (loop_phi_node)) |
1078 | { |
1079 | t_bool res = t_false; |
1080 | int i, n = gimple_phi_num_args (gs: loop_phi_node); |
1081 | |
1082 | for (i = 0; i < n; i++) |
1083 | { |
1084 | tree arg = PHI_ARG_DEF (loop_phi_node, i); |
1085 | basic_block bb; |
1086 | |
1087 | /* Follow the edges that exit the inner loop. */ |
1088 | bb = gimple_phi_arg_edge (phi: loop_phi_node, i)->src; |
1089 | if (!flow_bb_inside_loop_p (loop, bb)) |
1090 | res = follow_ssa_edge_expr (loop_phi_node, |
1091 | arg, evolution_of_loop, limit); |
1092 | if (res == t_true) |
1093 | break; |
1094 | } |
1095 | |
1096 | /* If the path crosses this loop-phi, give up. */ |
1097 | if (res == t_true) |
1098 | *evolution_of_loop = chrec_dont_know; |
1099 | |
1100 | return res; |
1101 | } |
1102 | |
1103 | /* Otherwise, compute the overall effect of the inner loop. */ |
1104 | ev = compute_overall_effect_of_inner_loop (loop, evolution_fn: ev); |
1105 | return follow_ssa_edge_expr (loop_phi_node, ev, evolution_of_loop, limit); |
1106 | } |
1107 | |
1108 | /* Follow the ssa edge into the expression EXPR. |
1109 | Return true if the strongly connected component has been found. */ |
1110 | |
1111 | t_bool |
1112 | scev_dfs::follow_ssa_edge_expr (gimple *at_stmt, tree expr, |
1113 | tree *evolution_of_loop, int limit) |
1114 | { |
1115 | gphi *halting_phi = loop_phi_node; |
1116 | enum tree_code code; |
1117 | tree type, rhs0, rhs1 = NULL_TREE; |
1118 | |
1119 | /* The EXPR is one of the following cases: |
1120 | - an SSA_NAME, |
1121 | - an INTEGER_CST, |
1122 | - a PLUS_EXPR, |
1123 | - a POINTER_PLUS_EXPR, |
1124 | - a MINUS_EXPR, |
1125 | - other cases are not yet handled. */ |
1126 | |
1127 | /* For SSA_NAME look at the definition statement, handling |
1128 | PHI nodes and otherwise expand appropriately for the expression |
1129 | handling below. */ |
1130 | if (TREE_CODE (expr) == SSA_NAME) |
1131 | { |
1132 | gimple *def = SSA_NAME_DEF_STMT (expr); |
1133 | |
1134 | if (gimple_nop_p (g: def)) |
1135 | return t_false; |
1136 | |
1137 | /* Give up if the path is longer than the MAX that we allow. */ |
1138 | if (limit > param_scev_max_expr_complexity) |
1139 | { |
1140 | *evolution_of_loop = chrec_dont_know; |
1141 | return t_dont_know; |
1142 | } |
1143 | |
1144 | if (gphi *phi = dyn_cast <gphi *>(p: def)) |
1145 | { |
1146 | if (!loop_phi_node_p (phi)) |
1147 | /* DEF is a condition-phi-node. Follow the branches, and |
1148 | record their evolutions. Finally, merge the collected |
1149 | information and set the approximation to the main |
1150 | variable. */ |
1151 | return follow_ssa_edge_in_condition_phi (condition_phi: phi, evolution_of_loop, |
1152 | limit); |
1153 | |
1154 | /* When the analyzed phi is the halting_phi, the |
1155 | depth-first search is over: we have found a path from |
1156 | the halting_phi to itself in the loop. */ |
1157 | if (phi == halting_phi) |
1158 | { |
1159 | *evolution_of_loop = expr; |
1160 | return t_true; |
1161 | } |
1162 | |
1163 | /* Otherwise, the evolution of the HALTING_PHI depends |
1164 | on the evolution of another loop-phi-node, i.e. the |
1165 | evolution function is a higher degree polynomial. */ |
1166 | class loop *def_loop = loop_containing_stmt (stmt: def); |
1167 | if (def_loop == loop) |
1168 | return t_false; |
1169 | |
1170 | /* Inner loop. */ |
1171 | if (flow_loop_nested_p (loop, def_loop)) |
1172 | return follow_ssa_edge_inner_loop_phi (loop_phi_node: phi, evolution_of_loop, |
1173 | limit: limit + 1); |
1174 | |
1175 | /* Outer loop. */ |
1176 | return t_false; |
1177 | } |
1178 | |
1179 | /* At this level of abstraction, the program is just a set |
1180 | of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no |
1181 | other def to be handled. */ |
1182 | if (!is_gimple_assign (gs: def)) |
1183 | return t_false; |
1184 | |
1185 | code = gimple_assign_rhs_code (gs: def); |
1186 | switch (get_gimple_rhs_class (code)) |
1187 | { |
1188 | case GIMPLE_BINARY_RHS: |
1189 | rhs0 = gimple_assign_rhs1 (gs: def); |
1190 | rhs1 = gimple_assign_rhs2 (gs: def); |
1191 | break; |
1192 | case GIMPLE_UNARY_RHS: |
1193 | case GIMPLE_SINGLE_RHS: |
1194 | rhs0 = gimple_assign_rhs1 (gs: def); |
1195 | break; |
1196 | default: |
1197 | return t_false; |
1198 | } |
1199 | type = TREE_TYPE (gimple_assign_lhs (def)); |
1200 | at_stmt = def; |
1201 | } |
1202 | else |
1203 | { |
1204 | code = TREE_CODE (expr); |
1205 | type = TREE_TYPE (expr); |
1206 | /* Via follow_ssa_edge_inner_loop_phi we arrive here with the |
1207 | GENERIC scalar evolution of the inner loop. */ |
1208 | switch (code) |
1209 | { |
1210 | CASE_CONVERT: |
1211 | rhs0 = TREE_OPERAND (expr, 0); |
1212 | break; |
1213 | case POINTER_PLUS_EXPR: |
1214 | case PLUS_EXPR: |
1215 | case MINUS_EXPR: |
1216 | rhs0 = TREE_OPERAND (expr, 0); |
1217 | rhs1 = TREE_OPERAND (expr, 1); |
1218 | STRIP_USELESS_TYPE_CONVERSION (rhs0); |
1219 | STRIP_USELESS_TYPE_CONVERSION (rhs1); |
1220 | break; |
1221 | default: |
1222 | rhs0 = expr; |
1223 | } |
1224 | } |
1225 | |
1226 | switch (code) |
1227 | { |
1228 | CASE_CONVERT: |
1229 | { |
1230 | /* This assignment is under the form "a_1 = (cast) rhs. We cannot |
1231 | validate any precision altering conversion during the SCC |
1232 | analysis, so don't even try. */ |
1233 | if (!tree_nop_conversion_p (type, TREE_TYPE (rhs0))) |
1234 | return t_false; |
1235 | t_bool res = follow_ssa_edge_expr (at_stmt, expr: rhs0, |
1236 | evolution_of_loop, limit); |
1237 | if (res == t_true) |
1238 | *evolution_of_loop = chrec_convert (type, *evolution_of_loop, |
1239 | at_stmt); |
1240 | return res; |
1241 | } |
1242 | |
1243 | case INTEGER_CST: |
1244 | /* This assignment is under the form "a_1 = 7". */ |
1245 | return t_false; |
1246 | |
1247 | case ADDR_EXPR: |
1248 | { |
1249 | /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ |
1250 | if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF) |
1251 | return t_false; |
1252 | tree mem = TREE_OPERAND (rhs0, 0); |
1253 | rhs0 = TREE_OPERAND (mem, 0); |
1254 | rhs1 = TREE_OPERAND (mem, 1); |
1255 | code = POINTER_PLUS_EXPR; |
1256 | } |
1257 | /* Fallthru. */ |
1258 | case POINTER_PLUS_EXPR: |
1259 | case PLUS_EXPR: |
1260 | case MINUS_EXPR: |
1261 | /* This case is under the form "rhs0 +- rhs1". */ |
1262 | if (TREE_CODE (rhs0) == SSA_NAME |
1263 | && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR)) |
1264 | { |
1265 | /* Match an assignment under the form: |
1266 | "a = b +- ...". */ |
1267 | t_bool res = follow_ssa_edge_expr (at_stmt, expr: rhs0, |
1268 | evolution_of_loop, limit); |
1269 | if (res == t_true) |
1270 | *evolution_of_loop = add_to_evolution |
1271 | (chrec_before: chrec_convert (type, *evolution_of_loop, at_stmt), |
1272 | code, to_add: rhs1, at_stmt); |
1273 | return res; |
1274 | } |
1275 | /* Else search for the SCC in both rhs0 and rhs1. */ |
1276 | return follow_ssa_edge_binary (at_stmt, type, rhs0, code, rhs1, |
1277 | evolution_of_loop, limit); |
1278 | |
1279 | default: |
1280 | return t_false; |
1281 | } |
1282 | } |
1283 | |
1284 | |
1285 | /* This section selects the loops that will be good candidates for the |
1286 | scalar evolution analysis. For the moment, greedily select all the |
1287 | loop nests we could analyze. */ |
1288 | |
1289 | /* For a loop with a single exit edge, return the COND_EXPR that |
1290 | guards the exit edge. If the expression is too difficult to |
1291 | analyze, then give up. */ |
1292 | |
1293 | gcond * |
1294 | get_loop_exit_condition (const class loop *loop) |
1295 | { |
1296 | return get_loop_exit_condition (single_exit (loop)); |
1297 | } |
1298 | |
1299 | /* If the statement just before the EXIT_EDGE contains a condition then |
1300 | return the condition, otherwise NULL. */ |
1301 | |
1302 | gcond * |
1303 | get_loop_exit_condition (const_edge exit_edge) |
1304 | { |
1305 | gcond *res = NULL; |
1306 | |
1307 | if (dump_file && (dump_flags & TDF_SCEV)) |
1308 | fprintf (stream: dump_file, format: "(get_loop_exit_condition \n " ); |
1309 | |
1310 | if (exit_edge) |
1311 | res = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit_edge->src)); |
1312 | |
1313 | if (dump_file && (dump_flags & TDF_SCEV)) |
1314 | { |
1315 | print_gimple_stmt (dump_file, res, 0); |
1316 | fprintf (stream: dump_file, format: ")\n" ); |
1317 | } |
1318 | |
1319 | return res; |
1320 | } |
1321 | |
1322 | |
1323 | /* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP. |
1324 | Handle below case and return the corresponding POLYNOMIAL_CHREC: |
1325 | |
1326 | # i_17 = PHI <i_13(5), 0(3)> |
1327 | # _20 = PHI <_5(5), start_4(D)(3)> |
1328 | ... |
1329 | i_13 = i_17 + 1; |
1330 | _5 = start_4(D) + i_13; |
1331 | |
1332 | Though variable _20 appears as a PEELED_CHREC in the form of |
1333 | (start_4, _5)_LOOP, it's a POLYNOMIAL_CHREC like {start_4, 1}_LOOP. |
1334 | |
1335 | See PR41488. */ |
1336 | |
1337 | static tree |
1338 | simplify_peeled_chrec (class loop *loop, tree arg, tree init_cond) |
1339 | { |
1340 | aff_tree aff1, aff2; |
1341 | tree ev, left, right, type, step_val; |
1342 | hash_map<tree, name_expansion *> *peeled_chrec_map = NULL; |
1343 | |
1344 | ev = instantiate_parameters (loop, chrec: analyze_scalar_evolution (loop, arg)); |
1345 | if (ev == NULL_TREE || TREE_CODE (ev) != POLYNOMIAL_CHREC) |
1346 | return chrec_dont_know; |
1347 | |
1348 | left = CHREC_LEFT (ev); |
1349 | right = CHREC_RIGHT (ev); |
1350 | type = TREE_TYPE (left); |
1351 | step_val = chrec_fold_plus (type, init_cond, right); |
1352 | |
1353 | /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP |
1354 | if "left" equals to "init + right". */ |
1355 | if (operand_equal_p (left, step_val, flags: 0)) |
1356 | { |
1357 | if (dump_file && (dump_flags & TDF_SCEV)) |
1358 | fprintf (stream: dump_file, format: "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n" ); |
1359 | |
1360 | return build_polynomial_chrec (loop_num: loop->num, left: init_cond, right); |
1361 | } |
1362 | |
1363 | /* The affine code only deals with pointer and integer types. */ |
1364 | if (!POINTER_TYPE_P (type) |
1365 | && !INTEGRAL_TYPE_P (type)) |
1366 | return chrec_dont_know; |
1367 | |
1368 | /* Try harder to check if they are equal. */ |
1369 | tree_to_aff_combination_expand (left, type, &aff1, &peeled_chrec_map); |
1370 | tree_to_aff_combination_expand (step_val, type, &aff2, &peeled_chrec_map); |
1371 | free_affine_expand_cache (&peeled_chrec_map); |
1372 | aff_combination_scale (&aff2, -1); |
1373 | aff_combination_add (&aff1, &aff2); |
1374 | |
1375 | /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP |
1376 | if "left" equals to "init + right". */ |
1377 | if (aff_combination_zero_p (aff: &aff1)) |
1378 | { |
1379 | if (dump_file && (dump_flags & TDF_SCEV)) |
1380 | fprintf (stream: dump_file, format: "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n" ); |
1381 | |
1382 | return build_polynomial_chrec (loop_num: loop->num, left: init_cond, right); |
1383 | } |
1384 | return chrec_dont_know; |
1385 | } |
1386 | |
1387 | /* Given a LOOP_PHI_NODE, this function determines the evolution |
1388 | function from LOOP_PHI_NODE to LOOP_PHI_NODE in the loop. */ |
1389 | |
1390 | static tree |
1391 | analyze_evolution_in_loop (gphi *loop_phi_node, |
1392 | tree init_cond) |
1393 | { |
1394 | int i, n = gimple_phi_num_args (gs: loop_phi_node); |
1395 | tree evolution_function = chrec_not_analyzed_yet; |
1396 | class loop *loop = loop_containing_stmt (stmt: loop_phi_node); |
1397 | basic_block bb; |
1398 | static bool simplify_peeled_chrec_p = true; |
1399 | |
1400 | if (dump_file && (dump_flags & TDF_SCEV)) |
1401 | { |
1402 | fprintf (stream: dump_file, format: "(analyze_evolution_in_loop \n" ); |
1403 | fprintf (stream: dump_file, format: " (loop_phi_node = " ); |
1404 | print_gimple_stmt (dump_file, loop_phi_node, 0); |
1405 | fprintf (stream: dump_file, format: ")\n" ); |
1406 | } |
1407 | |
1408 | for (i = 0; i < n; i++) |
1409 | { |
1410 | tree arg = PHI_ARG_DEF (loop_phi_node, i); |
1411 | tree ev_fn = chrec_dont_know; |
1412 | t_bool res; |
1413 | |
1414 | /* Select the edges that enter the loop body. */ |
1415 | bb = gimple_phi_arg_edge (phi: loop_phi_node, i)->src; |
1416 | if (!flow_bb_inside_loop_p (loop, bb)) |
1417 | continue; |
1418 | |
1419 | if (TREE_CODE (arg) == SSA_NAME) |
1420 | { |
1421 | bool val = false; |
1422 | |
1423 | /* Pass in the initial condition to the follow edge function. */ |
1424 | scev_dfs dfs (loop, loop_phi_node, init_cond); |
1425 | res = dfs.get_ev (ev_fn: &ev_fn, arg); |
1426 | |
1427 | /* If ev_fn has no evolution in the inner loop, and the |
1428 | init_cond is not equal to ev_fn, then we have an |
1429 | ambiguity between two possible values, as we cannot know |
1430 | the number of iterations at this point. */ |
1431 | if (TREE_CODE (ev_fn) != POLYNOMIAL_CHREC |
1432 | && no_evolution_in_loop_p (chrec: ev_fn, loop_num: loop->num, res: &val) && val |
1433 | && !operand_equal_p (init_cond, ev_fn, flags: 0)) |
1434 | ev_fn = chrec_dont_know; |
1435 | } |
1436 | else |
1437 | res = t_false; |
1438 | |
1439 | /* When it is impossible to go back on the same |
1440 | loop_phi_node by following the ssa edges, the |
1441 | evolution is represented by a peeled chrec, i.e. the |
1442 | first iteration, EV_FN has the value INIT_COND, then |
1443 | all the other iterations it has the value of ARG. |
1444 | For the moment, PEELED_CHREC nodes are not built. */ |
1445 | if (res != t_true) |
1446 | { |
1447 | ev_fn = chrec_dont_know; |
1448 | /* Try to recognize POLYNOMIAL_CHREC which appears in |
1449 | the form of PEELED_CHREC, but guard the process with |
1450 | a bool variable to keep the analyzer from infinite |
1451 | recurrence for real PEELED_RECs. */ |
1452 | if (simplify_peeled_chrec_p && TREE_CODE (arg) == SSA_NAME) |
1453 | { |
1454 | simplify_peeled_chrec_p = false; |
1455 | ev_fn = simplify_peeled_chrec (loop, arg, init_cond); |
1456 | simplify_peeled_chrec_p = true; |
1457 | } |
1458 | } |
1459 | |
1460 | /* When there are multiple back edges of the loop (which in fact never |
1461 | happens currently, but nevertheless), merge their evolutions. */ |
1462 | evolution_function = chrec_merge (evolution_function, ev_fn); |
1463 | |
1464 | if (evolution_function == chrec_dont_know) |
1465 | break; |
1466 | } |
1467 | |
1468 | if (dump_file && (dump_flags & TDF_SCEV)) |
1469 | { |
1470 | fprintf (stream: dump_file, format: " (evolution_function = " ); |
1471 | print_generic_expr (dump_file, evolution_function); |
1472 | fprintf (stream: dump_file, format: "))\n" ); |
1473 | } |
1474 | |
1475 | return evolution_function; |
1476 | } |
1477 | |
1478 | /* Looks to see if VAR is a copy of a constant (via straightforward assignments |
1479 | or degenerate phi's). If so, returns the constant; else, returns VAR. */ |
1480 | |
1481 | static tree |
1482 | follow_copies_to_constant (tree var) |
1483 | { |
1484 | tree res = var; |
1485 | while (TREE_CODE (res) == SSA_NAME |
1486 | /* We face not updated SSA form in multiple places and this walk |
1487 | may end up in sibling loops so we have to guard it. */ |
1488 | && !name_registered_for_update_p (res)) |
1489 | { |
1490 | gimple *def = SSA_NAME_DEF_STMT (res); |
1491 | if (gphi *phi = dyn_cast <gphi *> (p: def)) |
1492 | { |
1493 | if (tree rhs = degenerate_phi_result (phi)) |
1494 | res = rhs; |
1495 | else |
1496 | break; |
1497 | } |
1498 | else if (gimple_assign_single_p (gs: def)) |
1499 | /* Will exit loop if not an SSA_NAME. */ |
1500 | res = gimple_assign_rhs1 (gs: def); |
1501 | else |
1502 | break; |
1503 | } |
1504 | if (CONSTANT_CLASS_P (res)) |
1505 | return res; |
1506 | return var; |
1507 | } |
1508 | |
1509 | /* Given a loop-phi-node, return the initial conditions of the |
1510 | variable on entry of the loop. When the CCP has propagated |
1511 | constants into the loop-phi-node, the initial condition is |
1512 | instantiated, otherwise the initial condition is kept symbolic. |
1513 | This analyzer does not analyze the evolution outside the current |
1514 | loop, and leaves this task to the on-demand tree reconstructor. */ |
1515 | |
1516 | static tree |
1517 | analyze_initial_condition (gphi *loop_phi_node) |
1518 | { |
1519 | int i, n; |
1520 | tree init_cond = chrec_not_analyzed_yet; |
1521 | class loop *loop = loop_containing_stmt (stmt: loop_phi_node); |
1522 | |
1523 | if (dump_file && (dump_flags & TDF_SCEV)) |
1524 | { |
1525 | fprintf (stream: dump_file, format: "(analyze_initial_condition \n" ); |
1526 | fprintf (stream: dump_file, format: " (loop_phi_node = \n" ); |
1527 | print_gimple_stmt (dump_file, loop_phi_node, 0); |
1528 | fprintf (stream: dump_file, format: ")\n" ); |
1529 | } |
1530 | |
1531 | n = gimple_phi_num_args (gs: loop_phi_node); |
1532 | for (i = 0; i < n; i++) |
1533 | { |
1534 | tree branch = PHI_ARG_DEF (loop_phi_node, i); |
1535 | basic_block bb = gimple_phi_arg_edge (phi: loop_phi_node, i)->src; |
1536 | |
1537 | /* When the branch is oriented to the loop's body, it does |
1538 | not contribute to the initial condition. */ |
1539 | if (flow_bb_inside_loop_p (loop, bb)) |
1540 | continue; |
1541 | |
1542 | if (init_cond == chrec_not_analyzed_yet) |
1543 | { |
1544 | init_cond = branch; |
1545 | continue; |
1546 | } |
1547 | |
1548 | if (TREE_CODE (branch) == SSA_NAME) |
1549 | { |
1550 | init_cond = chrec_dont_know; |
1551 | break; |
1552 | } |
1553 | |
1554 | init_cond = chrec_merge (init_cond, branch); |
1555 | } |
1556 | |
1557 | /* Ooops -- a loop without an entry??? */ |
1558 | if (init_cond == chrec_not_analyzed_yet) |
1559 | init_cond = chrec_dont_know; |
1560 | |
1561 | /* We may not have fully constant propagated IL. Handle degenerate PHIs here |
1562 | to not miss important early loop unrollings. */ |
1563 | init_cond = follow_copies_to_constant (var: init_cond); |
1564 | |
1565 | if (dump_file && (dump_flags & TDF_SCEV)) |
1566 | { |
1567 | fprintf (stream: dump_file, format: " (init_cond = " ); |
1568 | print_generic_expr (dump_file, init_cond); |
1569 | fprintf (stream: dump_file, format: "))\n" ); |
1570 | } |
1571 | |
1572 | return init_cond; |
1573 | } |
1574 | |
1575 | /* Analyze the scalar evolution for LOOP_PHI_NODE. */ |
1576 | |
1577 | static tree |
1578 | interpret_loop_phi (class loop *loop, gphi *loop_phi_node) |
1579 | { |
1580 | class loop *phi_loop = loop_containing_stmt (stmt: loop_phi_node); |
1581 | tree init_cond; |
1582 | |
1583 | gcc_assert (phi_loop == loop); |
1584 | |
1585 | /* Otherwise really interpret the loop phi. */ |
1586 | init_cond = analyze_initial_condition (loop_phi_node); |
1587 | return analyze_evolution_in_loop (loop_phi_node, init_cond); |
1588 | } |
1589 | |
1590 | /* This function merges the branches of a condition-phi-node, |
1591 | contained in the outermost loop, and whose arguments are already |
1592 | analyzed. */ |
1593 | |
1594 | static tree |
1595 | interpret_condition_phi (class loop *loop, gphi *condition_phi) |
1596 | { |
1597 | int i, n = gimple_phi_num_args (gs: condition_phi); |
1598 | tree res = chrec_not_analyzed_yet; |
1599 | |
1600 | for (i = 0; i < n; i++) |
1601 | { |
1602 | tree branch_chrec; |
1603 | |
1604 | if (backedge_phi_arg_p (phi: condition_phi, i)) |
1605 | { |
1606 | res = chrec_dont_know; |
1607 | break; |
1608 | } |
1609 | |
1610 | branch_chrec = analyze_scalar_evolution |
1611 | (loop, PHI_ARG_DEF (condition_phi, i)); |
1612 | |
1613 | res = chrec_merge (res, branch_chrec); |
1614 | if (res == chrec_dont_know) |
1615 | break; |
1616 | } |
1617 | |
1618 | return res; |
1619 | } |
1620 | |
1621 | /* Interpret the operation RHS1 OP RHS2. If we didn't |
1622 | analyze this node before, follow the definitions until ending |
1623 | either on an analyzed GIMPLE_ASSIGN, or on a loop-phi-node. On the |
1624 | return path, this function propagates evolutions (ala constant copy |
1625 | propagation). OPND1 is not a GIMPLE expression because we could |
1626 | analyze the effect of an inner loop: see interpret_loop_phi. */ |
1627 | |
1628 | static tree |
1629 | interpret_rhs_expr (class loop *loop, gimple *at_stmt, |
1630 | tree type, tree rhs1, enum tree_code code, tree rhs2) |
1631 | { |
1632 | tree res, chrec1, chrec2, ctype; |
1633 | gimple *def; |
1634 | |
1635 | if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) |
1636 | { |
1637 | if (is_gimple_min_invariant (rhs1)) |
1638 | return chrec_convert (type, rhs1, at_stmt); |
1639 | |
1640 | if (code == SSA_NAME) |
1641 | return chrec_convert (type, analyze_scalar_evolution (loop, rhs1), |
1642 | at_stmt); |
1643 | } |
1644 | |
1645 | switch (code) |
1646 | { |
1647 | case ADDR_EXPR: |
1648 | if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF |
1649 | || handled_component_p (TREE_OPERAND (rhs1, 0))) |
1650 | { |
1651 | machine_mode mode; |
1652 | poly_int64 bitsize, bitpos; |
1653 | int unsignedp, reversep; |
1654 | int volatilep = 0; |
1655 | tree base, offset; |
1656 | tree chrec3; |
1657 | tree unitpos; |
1658 | |
1659 | base = get_inner_reference (TREE_OPERAND (rhs1, 0), |
1660 | &bitsize, &bitpos, &offset, &mode, |
1661 | &unsignedp, &reversep, &volatilep); |
1662 | |
1663 | if (TREE_CODE (base) == MEM_REF) |
1664 | { |
1665 | rhs2 = TREE_OPERAND (base, 1); |
1666 | rhs1 = TREE_OPERAND (base, 0); |
1667 | |
1668 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1669 | chrec2 = analyze_scalar_evolution (loop, rhs2); |
1670 | chrec1 = chrec_convert (type, chrec1, at_stmt); |
1671 | chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt); |
1672 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1673 | chrec2 = instantiate_parameters (loop, chrec: chrec2); |
1674 | res = chrec_fold_plus (type, chrec1, chrec2); |
1675 | } |
1676 | else |
1677 | { |
1678 | chrec1 = analyze_scalar_evolution_for_address_of (loop, var: base); |
1679 | chrec1 = chrec_convert (type, chrec1, at_stmt); |
1680 | res = chrec1; |
1681 | } |
1682 | |
1683 | if (offset != NULL_TREE) |
1684 | { |
1685 | chrec2 = analyze_scalar_evolution (loop, offset); |
1686 | chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt); |
1687 | chrec2 = instantiate_parameters (loop, chrec: chrec2); |
1688 | res = chrec_fold_plus (type, res, chrec2); |
1689 | } |
1690 | |
1691 | if (maybe_ne (a: bitpos, b: 0)) |
1692 | { |
1693 | unitpos = size_int (exact_div (bitpos, BITS_PER_UNIT)); |
1694 | chrec3 = analyze_scalar_evolution (loop, unitpos); |
1695 | chrec3 = chrec_convert (TREE_TYPE (unitpos), chrec3, at_stmt); |
1696 | chrec3 = instantiate_parameters (loop, chrec: chrec3); |
1697 | res = chrec_fold_plus (type, res, chrec3); |
1698 | } |
1699 | } |
1700 | else |
1701 | res = chrec_dont_know; |
1702 | break; |
1703 | |
1704 | case POINTER_PLUS_EXPR: |
1705 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1706 | chrec2 = analyze_scalar_evolution (loop, rhs2); |
1707 | chrec1 = chrec_convert (type, chrec1, at_stmt); |
1708 | chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt); |
1709 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1710 | chrec2 = instantiate_parameters (loop, chrec: chrec2); |
1711 | res = chrec_fold_plus (type, chrec1, chrec2); |
1712 | break; |
1713 | |
1714 | case PLUS_EXPR: |
1715 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1716 | chrec2 = analyze_scalar_evolution (loop, rhs2); |
1717 | ctype = type; |
1718 | /* When the stmt is conditionally executed re-write the CHREC |
1719 | into a form that has well-defined behavior on overflow. */ |
1720 | if (at_stmt |
1721 | && INTEGRAL_TYPE_P (type) |
1722 | && ! TYPE_OVERFLOW_WRAPS (type) |
1723 | && ! dominated_by_p (CDI_DOMINATORS, loop->latch, |
1724 | gimple_bb (g: at_stmt))) |
1725 | ctype = unsigned_type_for (type); |
1726 | chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
1727 | chrec2 = chrec_convert (ctype, chrec2, at_stmt); |
1728 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1729 | chrec2 = instantiate_parameters (loop, chrec: chrec2); |
1730 | res = chrec_fold_plus (ctype, chrec1, chrec2); |
1731 | if (type != ctype) |
1732 | res = chrec_convert (type, res, at_stmt); |
1733 | break; |
1734 | |
1735 | case MINUS_EXPR: |
1736 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1737 | chrec2 = analyze_scalar_evolution (loop, rhs2); |
1738 | ctype = type; |
1739 | /* When the stmt is conditionally executed re-write the CHREC |
1740 | into a form that has well-defined behavior on overflow. */ |
1741 | if (at_stmt |
1742 | && INTEGRAL_TYPE_P (type) |
1743 | && ! TYPE_OVERFLOW_WRAPS (type) |
1744 | && ! dominated_by_p (CDI_DOMINATORS, |
1745 | loop->latch, gimple_bb (g: at_stmt))) |
1746 | ctype = unsigned_type_for (type); |
1747 | chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
1748 | chrec2 = chrec_convert (ctype, chrec2, at_stmt); |
1749 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1750 | chrec2 = instantiate_parameters (loop, chrec: chrec2); |
1751 | res = chrec_fold_minus (ctype, chrec1, chrec2); |
1752 | if (type != ctype) |
1753 | res = chrec_convert (type, res, at_stmt); |
1754 | break; |
1755 | |
1756 | case NEGATE_EXPR: |
1757 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1758 | ctype = type; |
1759 | /* When the stmt is conditionally executed re-write the CHREC |
1760 | into a form that has well-defined behavior on overflow. */ |
1761 | if (at_stmt |
1762 | && INTEGRAL_TYPE_P (type) |
1763 | && ! TYPE_OVERFLOW_WRAPS (type) |
1764 | && ! dominated_by_p (CDI_DOMINATORS, |
1765 | loop->latch, gimple_bb (g: at_stmt))) |
1766 | ctype = unsigned_type_for (type); |
1767 | chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
1768 | /* TYPE may be integer, real or complex, so use fold_convert. */ |
1769 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1770 | res = chrec_fold_multiply (ctype, chrec1, |
1771 | fold_convert (ctype, integer_minus_one_node)); |
1772 | if (type != ctype) |
1773 | res = chrec_convert (type, res, at_stmt); |
1774 | break; |
1775 | |
1776 | case BIT_NOT_EXPR: |
1777 | /* Handle ~X as -1 - X. */ |
1778 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1779 | chrec1 = chrec_convert (type, chrec1, at_stmt); |
1780 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1781 | res = chrec_fold_minus (type, |
1782 | fold_convert (type, integer_minus_one_node), |
1783 | chrec1); |
1784 | break; |
1785 | |
1786 | case MULT_EXPR: |
1787 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1788 | chrec2 = analyze_scalar_evolution (loop, rhs2); |
1789 | ctype = type; |
1790 | /* When the stmt is conditionally executed re-write the CHREC |
1791 | into a form that has well-defined behavior on overflow. */ |
1792 | if (at_stmt |
1793 | && INTEGRAL_TYPE_P (type) |
1794 | && ! TYPE_OVERFLOW_WRAPS (type) |
1795 | && ! dominated_by_p (CDI_DOMINATORS, |
1796 | loop->latch, gimple_bb (g: at_stmt))) |
1797 | ctype = unsigned_type_for (type); |
1798 | chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
1799 | chrec2 = chrec_convert (ctype, chrec2, at_stmt); |
1800 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1801 | chrec2 = instantiate_parameters (loop, chrec: chrec2); |
1802 | res = chrec_fold_multiply (ctype, chrec1, chrec2); |
1803 | if (type != ctype) |
1804 | res = chrec_convert (type, res, at_stmt); |
1805 | break; |
1806 | |
1807 | case LSHIFT_EXPR: |
1808 | { |
1809 | /* Handle A<<B as A * (1<<B). */ |
1810 | tree uns = unsigned_type_for (type); |
1811 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1812 | chrec2 = analyze_scalar_evolution (loop, rhs2); |
1813 | chrec1 = chrec_convert (uns, chrec1, at_stmt); |
1814 | chrec1 = instantiate_parameters (loop, chrec: chrec1); |
1815 | chrec2 = instantiate_parameters (loop, chrec: chrec2); |
1816 | |
1817 | tree one = build_int_cst (uns, 1); |
1818 | chrec2 = fold_build2 (LSHIFT_EXPR, uns, one, chrec2); |
1819 | res = chrec_fold_multiply (uns, chrec1, chrec2); |
1820 | res = chrec_convert (type, res, at_stmt); |
1821 | } |
1822 | break; |
1823 | |
1824 | CASE_CONVERT: |
1825 | /* In case we have a truncation of a widened operation that in |
1826 | the truncated type has undefined overflow behavior analyze |
1827 | the operation done in an unsigned type of the same precision |
1828 | as the final truncation. We cannot derive a scalar evolution |
1829 | for the widened operation but for the truncated result. */ |
1830 | if (TREE_CODE (type) == INTEGER_TYPE |
1831 | && TREE_CODE (TREE_TYPE (rhs1)) == INTEGER_TYPE |
1832 | && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (rhs1)) |
1833 | && TYPE_OVERFLOW_UNDEFINED (type) |
1834 | && TREE_CODE (rhs1) == SSA_NAME |
1835 | && (def = SSA_NAME_DEF_STMT (rhs1)) |
1836 | && is_gimple_assign (gs: def) |
1837 | && TREE_CODE_CLASS (gimple_assign_rhs_code (def)) == tcc_binary |
1838 | && TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST) |
1839 | { |
1840 | tree utype = unsigned_type_for (type); |
1841 | chrec1 = interpret_rhs_expr (loop, at_stmt, type: utype, |
1842 | rhs1: gimple_assign_rhs1 (gs: def), |
1843 | code: gimple_assign_rhs_code (gs: def), |
1844 | rhs2: gimple_assign_rhs2 (gs: def)); |
1845 | } |
1846 | else |
1847 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1848 | res = chrec_convert (type, chrec1, at_stmt, true, rhs1); |
1849 | break; |
1850 | |
1851 | case BIT_AND_EXPR: |
1852 | /* Given int variable A, handle A&0xffff as (int)(unsigned short)A. |
1853 | If A is SCEV and its value is in the range of representable set |
1854 | of type unsigned short, the result expression is a (no-overflow) |
1855 | SCEV. */ |
1856 | res = chrec_dont_know; |
1857 | if (tree_fits_uhwi_p (rhs2)) |
1858 | { |
1859 | int precision; |
1860 | unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2); |
1861 | |
1862 | val ++; |
1863 | /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or |
1864 | it's not the maximum value of a smaller type than rhs1. */ |
1865 | if (val != 0 |
1866 | && (precision = exact_log2 (x: val)) > 0 |
1867 | && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1))) |
1868 | { |
1869 | tree utype = build_nonstandard_integer_type (precision, 1); |
1870 | |
1871 | if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1))) |
1872 | { |
1873 | chrec1 = analyze_scalar_evolution (loop, rhs1); |
1874 | chrec1 = chrec_convert (utype, chrec1, at_stmt); |
1875 | res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt); |
1876 | } |
1877 | } |
1878 | } |
1879 | break; |
1880 | |
1881 | default: |
1882 | res = chrec_dont_know; |
1883 | break; |
1884 | } |
1885 | |
1886 | return res; |
1887 | } |
1888 | |
1889 | /* Interpret the expression EXPR. */ |
1890 | |
1891 | static tree |
1892 | interpret_expr (class loop *loop, gimple *at_stmt, tree expr) |
1893 | { |
1894 | enum tree_code code; |
1895 | tree type = TREE_TYPE (expr), op0, op1; |
1896 | |
1897 | if (automatically_generated_chrec_p (chrec: expr)) |
1898 | return expr; |
1899 | |
1900 | if (TREE_CODE (expr) == POLYNOMIAL_CHREC |
1901 | || TREE_CODE (expr) == CALL_EXPR |
1902 | || get_gimple_rhs_class (TREE_CODE (expr)) == GIMPLE_TERNARY_RHS) |
1903 | return chrec_dont_know; |
1904 | |
1905 | extract_ops_from_tree (expr, code: &code, op0: &op0, op1: &op1); |
1906 | |
1907 | return interpret_rhs_expr (loop, at_stmt, type, |
1908 | rhs1: op0, code, rhs2: op1); |
1909 | } |
1910 | |
1911 | /* Interpret the rhs of the assignment STMT. */ |
1912 | |
1913 | static tree |
1914 | interpret_gimple_assign (class loop *loop, gimple *stmt) |
1915 | { |
1916 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
1917 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
1918 | |
1919 | return interpret_rhs_expr (loop, at_stmt: stmt, type, |
1920 | rhs1: gimple_assign_rhs1 (gs: stmt), code, |
1921 | rhs2: gimple_assign_rhs2 (gs: stmt)); |
1922 | } |
1923 | |
1924 | |
1925 | |
1926 | /* This section contains all the entry points: |
1927 | - number_of_iterations_in_loop, |
1928 | - analyze_scalar_evolution, |
1929 | - instantiate_parameters. |
1930 | */ |
1931 | |
1932 | /* Helper recursive function. */ |
1933 | |
1934 | static tree |
1935 | analyze_scalar_evolution_1 (class loop *loop, tree var) |
1936 | { |
1937 | gimple *def; |
1938 | basic_block bb; |
1939 | class loop *def_loop; |
1940 | tree res; |
1941 | |
1942 | if (TREE_CODE (var) != SSA_NAME) |
1943 | return interpret_expr (loop, NULL, expr: var); |
1944 | |
1945 | def = SSA_NAME_DEF_STMT (var); |
1946 | bb = gimple_bb (g: def); |
1947 | def_loop = bb->loop_father; |
1948 | |
1949 | if (!flow_bb_inside_loop_p (loop, bb)) |
1950 | { |
1951 | /* Keep symbolic form, but look through obvious copies for constants. */ |
1952 | res = follow_copies_to_constant (var); |
1953 | goto set_and_end; |
1954 | } |
1955 | |
1956 | if (loop != def_loop) |
1957 | { |
1958 | res = analyze_scalar_evolution_1 (loop: def_loop, var); |
1959 | class loop *loop_to_skip = superloop_at_depth (def_loop, |
1960 | loop_depth (loop) + 1); |
1961 | res = compute_overall_effect_of_inner_loop (loop: loop_to_skip, evolution_fn: res); |
1962 | if (chrec_contains_symbols_defined_in_loop (res, loop->num)) |
1963 | res = analyze_scalar_evolution_1 (loop, var: res); |
1964 | goto set_and_end; |
1965 | } |
1966 | |
1967 | switch (gimple_code (g: def)) |
1968 | { |
1969 | case GIMPLE_ASSIGN: |
1970 | res = interpret_gimple_assign (loop, stmt: def); |
1971 | break; |
1972 | |
1973 | case GIMPLE_PHI: |
1974 | if (loop_phi_node_p (phi: def)) |
1975 | res = interpret_loop_phi (loop, loop_phi_node: as_a <gphi *> (p: def)); |
1976 | else |
1977 | res = interpret_condition_phi (loop, condition_phi: as_a <gphi *> (p: def)); |
1978 | break; |
1979 | |
1980 | default: |
1981 | res = chrec_dont_know; |
1982 | break; |
1983 | } |
1984 | |
1985 | set_and_end: |
1986 | |
1987 | /* Keep the symbolic form. */ |
1988 | if (res == chrec_dont_know) |
1989 | res = var; |
1990 | |
1991 | if (loop == def_loop) |
1992 | set_scalar_evolution (instantiated_below: block_before_loop (loop), scalar: var, chrec: res); |
1993 | |
1994 | return res; |
1995 | } |
1996 | |
1997 | /* Analyzes and returns the scalar evolution of the ssa_name VAR in |
1998 | LOOP. LOOP is the loop in which the variable is used. |
1999 | |
2000 | Example of use: having a pointer VAR to a SSA_NAME node, STMT a |
2001 | pointer to the statement that uses this variable, in order to |
2002 | determine the evolution function of the variable, use the following |
2003 | calls: |
2004 | |
2005 | loop_p loop = loop_containing_stmt (stmt); |
2006 | tree chrec_with_symbols = analyze_scalar_evolution (loop, var); |
2007 | tree chrec_instantiated = instantiate_parameters (loop, chrec_with_symbols); |
2008 | */ |
2009 | |
2010 | tree |
2011 | analyze_scalar_evolution (class loop *loop, tree var) |
2012 | { |
2013 | tree res; |
2014 | |
2015 | /* ??? Fix callers. */ |
2016 | if (! loop) |
2017 | return var; |
2018 | |
2019 | if (dump_file && (dump_flags & TDF_SCEV)) |
2020 | { |
2021 | fprintf (stream: dump_file, format: "(analyze_scalar_evolution \n" ); |
2022 | fprintf (stream: dump_file, format: " (loop_nb = %d)\n" , loop->num); |
2023 | fprintf (stream: dump_file, format: " (scalar = " ); |
2024 | print_generic_expr (dump_file, var); |
2025 | fprintf (stream: dump_file, format: ")\n" ); |
2026 | } |
2027 | |
2028 | res = get_scalar_evolution (instantiated_below: block_before_loop (loop), scalar: var); |
2029 | if (res == chrec_not_analyzed_yet) |
2030 | { |
2031 | /* We'll recurse into instantiate_scev, avoid tearing down the |
2032 | instantiate cache repeatedly and keep it live from here. */ |
2033 | bool destr = false; |
2034 | if (!global_cache) |
2035 | { |
2036 | global_cache = new instantiate_cache_type; |
2037 | destr = true; |
2038 | } |
2039 | res = analyze_scalar_evolution_1 (loop, var); |
2040 | if (destr) |
2041 | { |
2042 | delete global_cache; |
2043 | global_cache = NULL; |
2044 | } |
2045 | } |
2046 | |
2047 | if (dump_file && (dump_flags & TDF_SCEV)) |
2048 | fprintf (stream: dump_file, format: ")\n" ); |
2049 | |
2050 | return res; |
2051 | } |
2052 | |
2053 | /* Analyzes and returns the scalar evolution of VAR address in LOOP. */ |
2054 | |
2055 | static tree |
2056 | analyze_scalar_evolution_for_address_of (class loop *loop, tree var) |
2057 | { |
2058 | return analyze_scalar_evolution (loop, build_fold_addr_expr (var)); |
2059 | } |
2060 | |
2061 | /* Analyze scalar evolution of use of VERSION in USE_LOOP with respect to |
2062 | WRTO_LOOP (which should be a superloop of USE_LOOP) |
2063 | |
2064 | FOLDED_CASTS is set to true if resolve_mixers used |
2065 | chrec_convert_aggressive (TODO -- not really, we are way too conservative |
2066 | at the moment in order to keep things simple). |
2067 | |
2068 | To illustrate the meaning of USE_LOOP and WRTO_LOOP, consider the following |
2069 | example: |
2070 | |
2071 | for (i = 0; i < 100; i++) -- loop 1 |
2072 | { |
2073 | for (j = 0; j < 100; j++) -- loop 2 |
2074 | { |
2075 | k1 = i; |
2076 | k2 = j; |
2077 | |
2078 | use2 (k1, k2); |
2079 | |
2080 | for (t = 0; t < 100; t++) -- loop 3 |
2081 | use3 (k1, k2); |
2082 | |
2083 | } |
2084 | use1 (k1, k2); |
2085 | } |
2086 | |
2087 | Both k1 and k2 are invariants in loop3, thus |
2088 | analyze_scalar_evolution_in_loop (loop3, loop3, k1) = k1 |
2089 | analyze_scalar_evolution_in_loop (loop3, loop3, k2) = k2 |
2090 | |
2091 | As they are invariant, it does not matter whether we consider their |
2092 | usage in loop 3 or loop 2, hence |
2093 | analyze_scalar_evolution_in_loop (loop2, loop3, k1) = |
2094 | analyze_scalar_evolution_in_loop (loop2, loop2, k1) = i |
2095 | analyze_scalar_evolution_in_loop (loop2, loop3, k2) = |
2096 | analyze_scalar_evolution_in_loop (loop2, loop2, k2) = [0,+,1]_2 |
2097 | |
2098 | Similarly for their evolutions with respect to loop 1. The values of K2 |
2099 | in the use in loop 2 vary independently on loop 1, thus we cannot express |
2100 | the evolution with respect to loop 1: |
2101 | analyze_scalar_evolution_in_loop (loop1, loop3, k1) = |
2102 | analyze_scalar_evolution_in_loop (loop1, loop2, k1) = [0,+,1]_1 |
2103 | analyze_scalar_evolution_in_loop (loop1, loop3, k2) = |
2104 | analyze_scalar_evolution_in_loop (loop1, loop2, k2) = dont_know |
2105 | |
2106 | The value of k2 in the use in loop 1 is known, though: |
2107 | analyze_scalar_evolution_in_loop (loop1, loop1, k1) = [0,+,1]_1 |
2108 | analyze_scalar_evolution_in_loop (loop1, loop1, k2) = 100 |
2109 | */ |
2110 | |
2111 | static tree |
2112 | analyze_scalar_evolution_in_loop (class loop *wrto_loop, class loop *use_loop, |
2113 | tree version, bool *folded_casts) |
2114 | { |
2115 | bool val = false; |
2116 | tree ev = version, tmp; |
2117 | |
2118 | /* We cannot just do |
2119 | |
2120 | tmp = analyze_scalar_evolution (use_loop, version); |
2121 | ev = resolve_mixers (wrto_loop, tmp, folded_casts); |
2122 | |
2123 | as resolve_mixers would query the scalar evolution with respect to |
2124 | wrto_loop. For example, in the situation described in the function |
2125 | comment, suppose that wrto_loop = loop1, use_loop = loop3 and |
2126 | version = k2. Then |
2127 | |
2128 | analyze_scalar_evolution (use_loop, version) = k2 |
2129 | |
2130 | and resolve_mixers (loop1, k2, folded_casts) finds that the value of |
2131 | k2 in loop 1 is 100, which is a wrong result, since we are interested |
2132 | in the value in loop 3. |
2133 | |
2134 | Instead, we need to proceed from use_loop to wrto_loop loop by loop, |
2135 | each time checking that there is no evolution in the inner loop. */ |
2136 | |
2137 | if (folded_casts) |
2138 | *folded_casts = false; |
2139 | while (1) |
2140 | { |
2141 | tmp = analyze_scalar_evolution (loop: use_loop, var: ev); |
2142 | ev = resolve_mixers (use_loop, tmp, folded_casts); |
2143 | |
2144 | if (use_loop == wrto_loop) |
2145 | return ev; |
2146 | |
2147 | /* If the value of the use changes in the inner loop, we cannot express |
2148 | its value in the outer loop (we might try to return interval chrec, |
2149 | but we do not have a user for it anyway) */ |
2150 | if (!no_evolution_in_loop_p (chrec: ev, loop_num: use_loop->num, res: &val) |
2151 | || !val) |
2152 | return chrec_dont_know; |
2153 | |
2154 | use_loop = loop_outer (loop: use_loop); |
2155 | } |
2156 | } |
2157 | |
2158 | |
2159 | /* Computes a hash function for database element ELT. */ |
2160 | |
2161 | static inline hashval_t |
2162 | hash_idx_scev_info (const void *elt_) |
2163 | { |
2164 | unsigned idx = ((size_t) elt_) - 2; |
2165 | return scev_info_hasher::hash (elt: &global_cache->entries[idx]); |
2166 | } |
2167 | |
2168 | /* Compares database elements E1 and E2. */ |
2169 | |
2170 | static inline int |
2171 | eq_idx_scev_info (const void *e1, const void *e2) |
2172 | { |
2173 | unsigned idx1 = ((size_t) e1) - 2; |
2174 | return scev_info_hasher::equal (elt1: &global_cache->entries[idx1], |
2175 | elt2: (const scev_info_str *) e2); |
2176 | } |
2177 | |
2178 | /* Returns from CACHE the slot number of the cached chrec for NAME. */ |
2179 | |
2180 | static unsigned |
2181 | get_instantiated_value_entry (instantiate_cache_type &cache, |
2182 | tree name, edge instantiate_below) |
2183 | { |
2184 | if (!cache.map) |
2185 | { |
2186 | cache.map = htab_create (10, hash_idx_scev_info, eq_idx_scev_info, NULL); |
2187 | cache.entries.create (nelems: 10); |
2188 | } |
2189 | |
2190 | scev_info_str e; |
2191 | e.name_version = SSA_NAME_VERSION (name); |
2192 | e.instantiated_below = instantiate_below->dest->index; |
2193 | void **slot = htab_find_slot_with_hash (cache.map, &e, |
2194 | scev_info_hasher::hash (elt: &e), INSERT); |
2195 | if (!*slot) |
2196 | { |
2197 | e.chrec = chrec_not_analyzed_yet; |
2198 | *slot = (void *)(size_t)(cache.entries.length () + 2); |
2199 | cache.entries.safe_push (obj: e); |
2200 | } |
2201 | |
2202 | return ((size_t)*slot) - 2; |
2203 | } |
2204 | |
2205 | |
2206 | /* Return the closed_loop_phi node for VAR. If there is none, return |
2207 | NULL_TREE. */ |
2208 | |
2209 | static tree |
2210 | loop_closed_phi_def (tree var) |
2211 | { |
2212 | class loop *loop; |
2213 | edge exit; |
2214 | gphi *phi; |
2215 | gphi_iterator psi; |
2216 | |
2217 | if (var == NULL_TREE |
2218 | || TREE_CODE (var) != SSA_NAME) |
2219 | return NULL_TREE; |
2220 | |
2221 | loop = loop_containing_stmt (SSA_NAME_DEF_STMT (var)); |
2222 | exit = single_exit (loop); |
2223 | if (!exit) |
2224 | return NULL_TREE; |
2225 | |
2226 | for (psi = gsi_start_phis (exit->dest); !gsi_end_p (i: psi); gsi_next (i: &psi)) |
2227 | { |
2228 | phi = psi.phi (); |
2229 | if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == var) |
2230 | return PHI_RESULT (phi); |
2231 | } |
2232 | |
2233 | return NULL_TREE; |
2234 | } |
2235 | |
2236 | static tree instantiate_scev_r (edge, class loop *, class loop *, |
2237 | tree, bool *, int); |
2238 | |
2239 | /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
2240 | and EVOLUTION_LOOP, that were left under a symbolic form. |
2241 | |
2242 | CHREC is an SSA_NAME to be instantiated. |
2243 | |
2244 | CACHE is the cache of already instantiated values. |
2245 | |
2246 | Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
2247 | conversions that may wrap in signed/pointer type are folded, as long |
2248 | as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
2249 | then we don't do such fold. |
2250 | |
2251 | SIZE_EXPR is used for computing the size of the expression to be |
2252 | instantiated, and to stop if it exceeds some limit. */ |
2253 | |
2254 | static tree |
2255 | instantiate_scev_name (edge instantiate_below, |
2256 | class loop *evolution_loop, class loop *inner_loop, |
2257 | tree chrec, |
2258 | bool *fold_conversions, |
2259 | int size_expr) |
2260 | { |
2261 | tree res; |
2262 | class loop *def_loop; |
2263 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec)); |
2264 | |
2265 | /* A parameter, nothing to do. */ |
2266 | if (!def_bb |
2267 | || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest)) |
2268 | return chrec; |
2269 | |
2270 | /* We cache the value of instantiated variable to avoid exponential |
2271 | time complexity due to reevaluations. We also store the convenient |
2272 | value in the cache in order to prevent infinite recursion -- we do |
2273 | not want to instantiate the SSA_NAME if it is in a mixer |
2274 | structure. This is used for avoiding the instantiation of |
2275 | recursively defined functions, such as: |
2276 | |
2277 | | a_2 -> {0, +, 1, +, a_2}_1 */ |
2278 | |
2279 | unsigned si = get_instantiated_value_entry (cache&: *global_cache, |
2280 | name: chrec, instantiate_below); |
2281 | if (global_cache->get (slot: si) != chrec_not_analyzed_yet) |
2282 | return global_cache->get (slot: si); |
2283 | |
2284 | /* On recursion return chrec_dont_know. */ |
2285 | global_cache->set (slot: si, chrec_dont_know); |
2286 | |
2287 | def_loop = find_common_loop (evolution_loop, def_bb->loop_father); |
2288 | |
2289 | if (! dominated_by_p (CDI_DOMINATORS, |
2290 | def_loop->header, instantiate_below->dest)) |
2291 | { |
2292 | gimple *def = SSA_NAME_DEF_STMT (chrec); |
2293 | if (gassign *ass = dyn_cast <gassign *> (p: def)) |
2294 | { |
2295 | switch (gimple_assign_rhs_class (gs: ass)) |
2296 | { |
2297 | case GIMPLE_UNARY_RHS: |
2298 | { |
2299 | tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
2300 | inner_loop, gimple_assign_rhs1 (gs: ass), |
2301 | fold_conversions, size_expr); |
2302 | if (op0 == chrec_dont_know) |
2303 | return chrec_dont_know; |
2304 | res = fold_build1 (gimple_assign_rhs_code (ass), |
2305 | TREE_TYPE (chrec), op0); |
2306 | break; |
2307 | } |
2308 | case GIMPLE_BINARY_RHS: |
2309 | { |
2310 | tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
2311 | inner_loop, gimple_assign_rhs1 (gs: ass), |
2312 | fold_conversions, size_expr); |
2313 | if (op0 == chrec_dont_know) |
2314 | return chrec_dont_know; |
2315 | tree op1 = instantiate_scev_r (instantiate_below, evolution_loop, |
2316 | inner_loop, gimple_assign_rhs2 (gs: ass), |
2317 | fold_conversions, size_expr); |
2318 | if (op1 == chrec_dont_know) |
2319 | return chrec_dont_know; |
2320 | res = fold_build2 (gimple_assign_rhs_code (ass), |
2321 | TREE_TYPE (chrec), op0, op1); |
2322 | break; |
2323 | } |
2324 | default: |
2325 | res = chrec_dont_know; |
2326 | } |
2327 | } |
2328 | else |
2329 | res = chrec_dont_know; |
2330 | global_cache->set (slot: si, chrec: res); |
2331 | return res; |
2332 | } |
2333 | |
2334 | /* If the analysis yields a parametric chrec, instantiate the |
2335 | result again. */ |
2336 | res = analyze_scalar_evolution (loop: def_loop, var: chrec); |
2337 | |
2338 | /* Don't instantiate default definitions. */ |
2339 | if (TREE_CODE (res) == SSA_NAME |
2340 | && SSA_NAME_IS_DEFAULT_DEF (res)) |
2341 | ; |
2342 | |
2343 | /* Don't instantiate loop-closed-ssa phi nodes. */ |
2344 | else if (TREE_CODE (res) == SSA_NAME |
2345 | && loop_depth (loop: loop_containing_stmt (SSA_NAME_DEF_STMT (res))) |
2346 | > loop_depth (loop: def_loop)) |
2347 | { |
2348 | if (res == chrec) |
2349 | res = loop_closed_phi_def (var: chrec); |
2350 | else |
2351 | res = chrec; |
2352 | |
2353 | /* When there is no loop_closed_phi_def, it means that the |
2354 | variable is not used after the loop: try to still compute the |
2355 | value of the variable when exiting the loop. */ |
2356 | if (res == NULL_TREE) |
2357 | { |
2358 | loop_p loop = loop_containing_stmt (SSA_NAME_DEF_STMT (chrec)); |
2359 | res = analyze_scalar_evolution (loop, var: chrec); |
2360 | res = compute_overall_effect_of_inner_loop (loop, evolution_fn: res); |
2361 | res = instantiate_scev_r (instantiate_below, evolution_loop, |
2362 | inner_loop, res, |
2363 | fold_conversions, size_expr); |
2364 | } |
2365 | else if (dominated_by_p (CDI_DOMINATORS, |
2366 | gimple_bb (SSA_NAME_DEF_STMT (res)), |
2367 | instantiate_below->dest)) |
2368 | res = chrec_dont_know; |
2369 | } |
2370 | |
2371 | else if (res != chrec_dont_know) |
2372 | { |
2373 | if (inner_loop |
2374 | && def_bb->loop_father != inner_loop |
2375 | && !flow_loop_nested_p (def_bb->loop_father, inner_loop)) |
2376 | /* ??? We could try to compute the overall effect of the loop here. */ |
2377 | res = chrec_dont_know; |
2378 | else |
2379 | res = instantiate_scev_r (instantiate_below, evolution_loop, |
2380 | inner_loop, res, |
2381 | fold_conversions, size_expr); |
2382 | } |
2383 | |
2384 | /* Store the correct value to the cache. */ |
2385 | global_cache->set (slot: si, chrec: res); |
2386 | return res; |
2387 | } |
2388 | |
2389 | /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
2390 | and EVOLUTION_LOOP, that were left under a symbolic form. |
2391 | |
2392 | CHREC is a polynomial chain of recurrence to be instantiated. |
2393 | |
2394 | CACHE is the cache of already instantiated values. |
2395 | |
2396 | Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
2397 | conversions that may wrap in signed/pointer type are folded, as long |
2398 | as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
2399 | then we don't do such fold. |
2400 | |
2401 | SIZE_EXPR is used for computing the size of the expression to be |
2402 | instantiated, and to stop if it exceeds some limit. */ |
2403 | |
2404 | static tree |
2405 | instantiate_scev_poly (edge instantiate_below, |
2406 | class loop *evolution_loop, class loop *, |
2407 | tree chrec, bool *fold_conversions, int size_expr) |
2408 | { |
2409 | tree op1; |
2410 | tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
2411 | get_chrec_loop (chrec), |
2412 | CHREC_LEFT (chrec), fold_conversions, |
2413 | size_expr); |
2414 | if (op0 == chrec_dont_know) |
2415 | return chrec_dont_know; |
2416 | |
2417 | op1 = instantiate_scev_r (instantiate_below, evolution_loop, |
2418 | get_chrec_loop (chrec), |
2419 | CHREC_RIGHT (chrec), fold_conversions, |
2420 | size_expr); |
2421 | if (op1 == chrec_dont_know) |
2422 | return chrec_dont_know; |
2423 | |
2424 | if (CHREC_LEFT (chrec) != op0 |
2425 | || CHREC_RIGHT (chrec) != op1) |
2426 | { |
2427 | op1 = chrec_convert_rhs (chrec_type (chrec: op0), op1, NULL); |
2428 | chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec), left: op0, right: op1); |
2429 | } |
2430 | |
2431 | return chrec; |
2432 | } |
2433 | |
2434 | /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
2435 | and EVOLUTION_LOOP, that were left under a symbolic form. |
2436 | |
2437 | "C0 CODE C1" is a binary expression of type TYPE to be instantiated. |
2438 | |
2439 | CACHE is the cache of already instantiated values. |
2440 | |
2441 | Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
2442 | conversions that may wrap in signed/pointer type are folded, as long |
2443 | as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
2444 | then we don't do such fold. |
2445 | |
2446 | SIZE_EXPR is used for computing the size of the expression to be |
2447 | instantiated, and to stop if it exceeds some limit. */ |
2448 | |
2449 | static tree |
2450 | instantiate_scev_binary (edge instantiate_below, |
2451 | class loop *evolution_loop, class loop *inner_loop, |
2452 | tree chrec, enum tree_code code, |
2453 | tree type, tree c0, tree c1, |
2454 | bool *fold_conversions, int size_expr) |
2455 | { |
2456 | tree op1; |
2457 | tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop, |
2458 | c0, fold_conversions, size_expr); |
2459 | if (op0 == chrec_dont_know) |
2460 | return chrec_dont_know; |
2461 | |
2462 | /* While we eventually compute the same op1 if c0 == c1 the process |
2463 | of doing this is expensive so the following short-cut prevents |
2464 | exponential compile-time behavior. */ |
2465 | if (c0 != c1) |
2466 | { |
2467 | op1 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop, |
2468 | c1, fold_conversions, size_expr); |
2469 | if (op1 == chrec_dont_know) |
2470 | return chrec_dont_know; |
2471 | } |
2472 | else |
2473 | op1 = op0; |
2474 | |
2475 | if (c0 != op0 |
2476 | || c1 != op1) |
2477 | { |
2478 | op0 = chrec_convert (type, op0, NULL); |
2479 | op1 = chrec_convert_rhs (type, op1, NULL); |
2480 | |
2481 | switch (code) |
2482 | { |
2483 | case POINTER_PLUS_EXPR: |
2484 | case PLUS_EXPR: |
2485 | return chrec_fold_plus (type, op0, op1); |
2486 | |
2487 | case MINUS_EXPR: |
2488 | return chrec_fold_minus (type, op0, op1); |
2489 | |
2490 | case MULT_EXPR: |
2491 | return chrec_fold_multiply (type, op0, op1); |
2492 | |
2493 | default: |
2494 | gcc_unreachable (); |
2495 | } |
2496 | } |
2497 | |
2498 | return chrec ? chrec : fold_build2 (code, type, c0, c1); |
2499 | } |
2500 | |
2501 | /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
2502 | and EVOLUTION_LOOP, that were left under a symbolic form. |
2503 | |
2504 | "CHREC" that stands for a convert expression "(TYPE) OP" is to be |
2505 | instantiated. |
2506 | |
2507 | CACHE is the cache of already instantiated values. |
2508 | |
2509 | Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
2510 | conversions that may wrap in signed/pointer type are folded, as long |
2511 | as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
2512 | then we don't do such fold. |
2513 | |
2514 | SIZE_EXPR is used for computing the size of the expression to be |
2515 | instantiated, and to stop if it exceeds some limit. */ |
2516 | |
2517 | static tree |
2518 | instantiate_scev_convert (edge instantiate_below, |
2519 | class loop *evolution_loop, class loop *inner_loop, |
2520 | tree chrec, tree type, tree op, |
2521 | bool *fold_conversions, int size_expr) |
2522 | { |
2523 | tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
2524 | inner_loop, op, |
2525 | fold_conversions, size_expr); |
2526 | |
2527 | if (op0 == chrec_dont_know) |
2528 | return chrec_dont_know; |
2529 | |
2530 | if (fold_conversions) |
2531 | { |
2532 | tree tmp = chrec_convert_aggressive (type, op0, fold_conversions); |
2533 | if (tmp) |
2534 | return tmp; |
2535 | |
2536 | /* If we used chrec_convert_aggressive, we can no longer assume that |
2537 | signed chrecs do not overflow, as chrec_convert does, so avoid |
2538 | calling it in that case. */ |
2539 | if (*fold_conversions) |
2540 | { |
2541 | if (chrec && op0 == op) |
2542 | return chrec; |
2543 | |
2544 | return fold_convert (type, op0); |
2545 | } |
2546 | } |
2547 | |
2548 | return chrec_convert (type, op0, NULL); |
2549 | } |
2550 | |
2551 | /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
2552 | and EVOLUTION_LOOP, that were left under a symbolic form. |
2553 | |
2554 | CHREC is a BIT_NOT_EXPR or a NEGATE_EXPR expression to be instantiated. |
2555 | Handle ~X as -1 - X. |
2556 | Handle -X as -1 * X. |
2557 | |
2558 | CACHE is the cache of already instantiated values. |
2559 | |
2560 | Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
2561 | conversions that may wrap in signed/pointer type are folded, as long |
2562 | as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
2563 | then we don't do such fold. |
2564 | |
2565 | SIZE_EXPR is used for computing the size of the expression to be |
2566 | instantiated, and to stop if it exceeds some limit. */ |
2567 | |
2568 | static tree |
2569 | instantiate_scev_not (edge instantiate_below, |
2570 | class loop *evolution_loop, class loop *inner_loop, |
2571 | tree chrec, |
2572 | enum tree_code code, tree type, tree op, |
2573 | bool *fold_conversions, int size_expr) |
2574 | { |
2575 | tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
2576 | inner_loop, op, |
2577 | fold_conversions, size_expr); |
2578 | |
2579 | if (op0 == chrec_dont_know) |
2580 | return chrec_dont_know; |
2581 | |
2582 | if (op != op0) |
2583 | { |
2584 | op0 = chrec_convert (type, op0, NULL); |
2585 | |
2586 | switch (code) |
2587 | { |
2588 | case BIT_NOT_EXPR: |
2589 | return chrec_fold_minus |
2590 | (type, fold_convert (type, integer_minus_one_node), op0); |
2591 | |
2592 | case NEGATE_EXPR: |
2593 | return chrec_fold_multiply |
2594 | (type, fold_convert (type, integer_minus_one_node), op0); |
2595 | |
2596 | default: |
2597 | gcc_unreachable (); |
2598 | } |
2599 | } |
2600 | |
2601 | return chrec ? chrec : fold_build1 (code, type, op0); |
2602 | } |
2603 | |
2604 | /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
2605 | and EVOLUTION_LOOP, that were left under a symbolic form. |
2606 | |
2607 | CHREC is the scalar evolution to instantiate. |
2608 | |
2609 | CACHE is the cache of already instantiated values. |
2610 | |
2611 | Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
2612 | conversions that may wrap in signed/pointer type are folded, as long |
2613 | as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
2614 | then we don't do such fold. |
2615 | |
2616 | SIZE_EXPR is used for computing the size of the expression to be |
2617 | instantiated, and to stop if it exceeds some limit. */ |
2618 | |
2619 | static tree |
2620 | instantiate_scev_r (edge instantiate_below, |
2621 | class loop *evolution_loop, class loop *inner_loop, |
2622 | tree chrec, |
2623 | bool *fold_conversions, int size_expr) |
2624 | { |
2625 | /* Give up if the expression is larger than the MAX that we allow. */ |
2626 | if (size_expr++ > param_scev_max_expr_size) |
2627 | return chrec_dont_know; |
2628 | |
2629 | if (chrec == NULL_TREE |
2630 | || automatically_generated_chrec_p (chrec) |
2631 | || is_gimple_min_invariant (chrec)) |
2632 | return chrec; |
2633 | |
2634 | switch (TREE_CODE (chrec)) |
2635 | { |
2636 | case SSA_NAME: |
2637 | return instantiate_scev_name (instantiate_below, evolution_loop, |
2638 | inner_loop, chrec, |
2639 | fold_conversions, size_expr); |
2640 | |
2641 | case POLYNOMIAL_CHREC: |
2642 | return instantiate_scev_poly (instantiate_below, evolution_loop, |
2643 | inner_loop, chrec, |
2644 | fold_conversions, size_expr); |
2645 | |
2646 | case POINTER_PLUS_EXPR: |
2647 | case PLUS_EXPR: |
2648 | case MINUS_EXPR: |
2649 | case MULT_EXPR: |
2650 | return instantiate_scev_binary (instantiate_below, evolution_loop, |
2651 | inner_loop, chrec, |
2652 | TREE_CODE (chrec), type: chrec_type (chrec), |
2653 | TREE_OPERAND (chrec, 0), |
2654 | TREE_OPERAND (chrec, 1), |
2655 | fold_conversions, size_expr); |
2656 | |
2657 | CASE_CONVERT: |
2658 | return instantiate_scev_convert (instantiate_below, evolution_loop, |
2659 | inner_loop, chrec, |
2660 | TREE_TYPE (chrec), TREE_OPERAND (chrec, 0), |
2661 | fold_conversions, size_expr); |
2662 | |
2663 | case NEGATE_EXPR: |
2664 | case BIT_NOT_EXPR: |
2665 | return instantiate_scev_not (instantiate_below, evolution_loop, |
2666 | inner_loop, chrec, |
2667 | TREE_CODE (chrec), TREE_TYPE (chrec), |
2668 | TREE_OPERAND (chrec, 0), |
2669 | fold_conversions, size_expr); |
2670 | |
2671 | case ADDR_EXPR: |
2672 | if (is_gimple_min_invariant (chrec)) |
2673 | return chrec; |
2674 | /* Fallthru. */ |
2675 | case SCEV_NOT_KNOWN: |
2676 | return chrec_dont_know; |
2677 | |
2678 | case SCEV_KNOWN: |
2679 | return chrec_known; |
2680 | |
2681 | default: |
2682 | if (CONSTANT_CLASS_P (chrec)) |
2683 | return chrec; |
2684 | return chrec_dont_know; |
2685 | } |
2686 | } |
2687 | |
2688 | /* Analyze all the parameters of the chrec that were left under a |
2689 | symbolic form. INSTANTIATE_BELOW is the basic block that stops the |
2690 | recursive instantiation of parameters: a parameter is a variable |
2691 | that is defined in a basic block that dominates INSTANTIATE_BELOW or |
2692 | a function parameter. */ |
2693 | |
2694 | tree |
2695 | instantiate_scev (edge instantiate_below, class loop *evolution_loop, |
2696 | tree chrec) |
2697 | { |
2698 | tree res; |
2699 | |
2700 | if (dump_file && (dump_flags & TDF_SCEV)) |
2701 | { |
2702 | fprintf (stream: dump_file, format: "(instantiate_scev \n" ); |
2703 | fprintf (stream: dump_file, format: " (instantiate_below = %d -> %d)\n" , |
2704 | instantiate_below->src->index, instantiate_below->dest->index); |
2705 | if (evolution_loop) |
2706 | fprintf (stream: dump_file, format: " (evolution_loop = %d)\n" , evolution_loop->num); |
2707 | fprintf (stream: dump_file, format: " (chrec = " ); |
2708 | print_generic_expr (dump_file, chrec); |
2709 | fprintf (stream: dump_file, format: ")\n" ); |
2710 | } |
2711 | |
2712 | bool destr = false; |
2713 | if (!global_cache) |
2714 | { |
2715 | global_cache = new instantiate_cache_type; |
2716 | destr = true; |
2717 | } |
2718 | |
2719 | res = instantiate_scev_r (instantiate_below, evolution_loop, |
2720 | NULL, chrec, NULL, size_expr: 0); |
2721 | |
2722 | if (destr) |
2723 | { |
2724 | delete global_cache; |
2725 | global_cache = NULL; |
2726 | } |
2727 | |
2728 | if (dump_file && (dump_flags & TDF_SCEV)) |
2729 | { |
2730 | fprintf (stream: dump_file, format: " (res = " ); |
2731 | print_generic_expr (dump_file, res); |
2732 | fprintf (stream: dump_file, format: "))\n" ); |
2733 | } |
2734 | |
2735 | return res; |
2736 | } |
2737 | |
2738 | /* Similar to instantiate_parameters, but does not introduce the |
2739 | evolutions in outer loops for LOOP invariants in CHREC, and does not |
2740 | care about causing overflows, as long as they do not affect value |
2741 | of an expression. */ |
2742 | |
2743 | tree |
2744 | resolve_mixers (class loop *loop, tree chrec, bool *folded_casts) |
2745 | { |
2746 | bool destr = false; |
2747 | bool fold_conversions = false; |
2748 | if (!global_cache) |
2749 | { |
2750 | global_cache = new instantiate_cache_type; |
2751 | destr = true; |
2752 | } |
2753 | |
2754 | tree ret = instantiate_scev_r (instantiate_below: loop_preheader_edge (loop), evolution_loop: loop, NULL, |
2755 | chrec, fold_conversions: &fold_conversions, size_expr: 0); |
2756 | |
2757 | if (folded_casts && !*folded_casts) |
2758 | *folded_casts = fold_conversions; |
2759 | |
2760 | if (destr) |
2761 | { |
2762 | delete global_cache; |
2763 | global_cache = NULL; |
2764 | } |
2765 | |
2766 | return ret; |
2767 | } |
2768 | |
2769 | /* Entry point for the analysis of the number of iterations pass. |
2770 | This function tries to safely approximate the number of iterations |
2771 | the loop will run. When this property is not decidable at compile |
2772 | time, the result is chrec_dont_know. Otherwise the result is a |
2773 | scalar or a symbolic parameter. When the number of iterations may |
2774 | be equal to zero and the property cannot be determined at compile |
2775 | time, the result is a COND_EXPR that represents in a symbolic form |
2776 | the conditions under which the number of iterations is not zero. |
2777 | |
2778 | Example of analysis: suppose that the loop has an exit condition: |
2779 | |
2780 | "if (b > 49) goto end_loop;" |
2781 | |
2782 | and that in a previous analysis we have determined that the |
2783 | variable 'b' has an evolution function: |
2784 | |
2785 | "EF = {23, +, 5}_2". |
2786 | |
2787 | When we evaluate the function at the point 5, i.e. the value of the |
2788 | variable 'b' after 5 iterations in the loop, we have EF (5) = 48, |
2789 | and EF (6) = 53. In this case the value of 'b' on exit is '53' and |
2790 | the loop body has been executed 6 times. */ |
2791 | |
2792 | tree |
2793 | number_of_latch_executions (class loop *loop) |
2794 | { |
2795 | edge exit; |
2796 | class tree_niter_desc niter_desc; |
2797 | tree may_be_zero; |
2798 | tree res; |
2799 | |
2800 | /* Determine whether the number of iterations in loop has already |
2801 | been computed. */ |
2802 | res = loop->nb_iterations; |
2803 | if (res) |
2804 | return res; |
2805 | |
2806 | may_be_zero = NULL_TREE; |
2807 | |
2808 | if (dump_file && (dump_flags & TDF_SCEV)) |
2809 | fprintf (stream: dump_file, format: "(number_of_iterations_in_loop = \n" ); |
2810 | |
2811 | res = chrec_dont_know; |
2812 | exit = single_exit (loop); |
2813 | |
2814 | if (exit && number_of_iterations_exit (loop, exit, niter: &niter_desc, false)) |
2815 | { |
2816 | may_be_zero = niter_desc.may_be_zero; |
2817 | res = niter_desc.niter; |
2818 | } |
2819 | |
2820 | if (res == chrec_dont_know |
2821 | || !may_be_zero |
2822 | || integer_zerop (may_be_zero)) |
2823 | ; |
2824 | else if (integer_nonzerop (may_be_zero)) |
2825 | res = build_int_cst (TREE_TYPE (res), 0); |
2826 | |
2827 | else if (COMPARISON_CLASS_P (may_be_zero)) |
2828 | res = fold_build3 (COND_EXPR, TREE_TYPE (res), may_be_zero, |
2829 | build_int_cst (TREE_TYPE (res), 0), res); |
2830 | else |
2831 | res = chrec_dont_know; |
2832 | |
2833 | if (dump_file && (dump_flags & TDF_SCEV)) |
2834 | { |
2835 | fprintf (stream: dump_file, format: " (set_nb_iterations_in_loop = " ); |
2836 | print_generic_expr (dump_file, res); |
2837 | fprintf (stream: dump_file, format: "))\n" ); |
2838 | } |
2839 | |
2840 | loop->nb_iterations = res; |
2841 | return res; |
2842 | } |
2843 | |
2844 | |
2845 | /* Counters for the stats. */ |
2846 | |
2847 | struct chrec_stats |
2848 | { |
2849 | unsigned nb_chrecs; |
2850 | unsigned nb_affine; |
2851 | unsigned nb_affine_multivar; |
2852 | unsigned nb_higher_poly; |
2853 | unsigned nb_chrec_dont_know; |
2854 | unsigned nb_undetermined; |
2855 | }; |
2856 | |
2857 | /* Reset the counters. */ |
2858 | |
2859 | static inline void |
2860 | reset_chrecs_counters (struct chrec_stats *stats) |
2861 | { |
2862 | stats->nb_chrecs = 0; |
2863 | stats->nb_affine = 0; |
2864 | stats->nb_affine_multivar = 0; |
2865 | stats->nb_higher_poly = 0; |
2866 | stats->nb_chrec_dont_know = 0; |
2867 | stats->nb_undetermined = 0; |
2868 | } |
2869 | |
2870 | /* Dump the contents of a CHREC_STATS structure. */ |
2871 | |
2872 | static void |
2873 | dump_chrecs_stats (FILE *file, struct chrec_stats *stats) |
2874 | { |
2875 | fprintf (stream: file, format: "\n(\n" ); |
2876 | fprintf (stream: file, format: "-----------------------------------------\n" ); |
2877 | fprintf (stream: file, format: "%d\taffine univariate chrecs\n" , stats->nb_affine); |
2878 | fprintf (stream: file, format: "%d\taffine multivariate chrecs\n" , stats->nb_affine_multivar); |
2879 | fprintf (stream: file, format: "%d\tdegree greater than 2 polynomials\n" , |
2880 | stats->nb_higher_poly); |
2881 | fprintf (stream: file, format: "%d\tchrec_dont_know chrecs\n" , stats->nb_chrec_dont_know); |
2882 | fprintf (stream: file, format: "-----------------------------------------\n" ); |
2883 | fprintf (stream: file, format: "%d\ttotal chrecs\n" , stats->nb_chrecs); |
2884 | fprintf (stream: file, format: "%d\twith undetermined coefficients\n" , |
2885 | stats->nb_undetermined); |
2886 | fprintf (stream: file, format: "-----------------------------------------\n" ); |
2887 | fprintf (stream: file, format: "%d\tchrecs in the scev database\n" , |
2888 | (int) scalar_evolution_info->elements ()); |
2889 | fprintf (stream: file, format: "%d\tsets in the scev database\n" , nb_set_scev); |
2890 | fprintf (stream: file, format: "%d\tgets in the scev database\n" , nb_get_scev); |
2891 | fprintf (stream: file, format: "-----------------------------------------\n" ); |
2892 | fprintf (stream: file, format: ")\n\n" ); |
2893 | } |
2894 | |
2895 | /* Gather statistics about CHREC. */ |
2896 | |
2897 | static void |
2898 | gather_chrec_stats (tree chrec, struct chrec_stats *stats) |
2899 | { |
2900 | if (dump_file && (dump_flags & TDF_STATS)) |
2901 | { |
2902 | fprintf (stream: dump_file, format: "(classify_chrec " ); |
2903 | print_generic_expr (dump_file, chrec); |
2904 | fprintf (stream: dump_file, format: "\n" ); |
2905 | } |
2906 | |
2907 | stats->nb_chrecs++; |
2908 | |
2909 | if (chrec == NULL_TREE) |
2910 | { |
2911 | stats->nb_undetermined++; |
2912 | return; |
2913 | } |
2914 | |
2915 | switch (TREE_CODE (chrec)) |
2916 | { |
2917 | case POLYNOMIAL_CHREC: |
2918 | if (evolution_function_is_affine_p (chrec)) |
2919 | { |
2920 | if (dump_file && (dump_flags & TDF_STATS)) |
2921 | fprintf (stream: dump_file, format: " affine_univariate\n" ); |
2922 | stats->nb_affine++; |
2923 | } |
2924 | else if (evolution_function_is_affine_multivariate_p (chrec, 0)) |
2925 | { |
2926 | if (dump_file && (dump_flags & TDF_STATS)) |
2927 | fprintf (stream: dump_file, format: " affine_multivariate\n" ); |
2928 | stats->nb_affine_multivar++; |
2929 | } |
2930 | else |
2931 | { |
2932 | if (dump_file && (dump_flags & TDF_STATS)) |
2933 | fprintf (stream: dump_file, format: " higher_degree_polynomial\n" ); |
2934 | stats->nb_higher_poly++; |
2935 | } |
2936 | |
2937 | break; |
2938 | |
2939 | default: |
2940 | break; |
2941 | } |
2942 | |
2943 | if (chrec_contains_undetermined (chrec)) |
2944 | { |
2945 | if (dump_file && (dump_flags & TDF_STATS)) |
2946 | fprintf (stream: dump_file, format: " undetermined\n" ); |
2947 | stats->nb_undetermined++; |
2948 | } |
2949 | |
2950 | if (dump_file && (dump_flags & TDF_STATS)) |
2951 | fprintf (stream: dump_file, format: ")\n" ); |
2952 | } |
2953 | |
2954 | /* Classify the chrecs of the whole database. */ |
2955 | |
2956 | void |
2957 | gather_stats_on_scev_database (void) |
2958 | { |
2959 | struct chrec_stats stats; |
2960 | |
2961 | if (!dump_file) |
2962 | return; |
2963 | |
2964 | reset_chrecs_counters (stats: &stats); |
2965 | |
2966 | hash_table<scev_info_hasher>::iterator iter; |
2967 | scev_info_str *elt; |
2968 | FOR_EACH_HASH_TABLE_ELEMENT (*scalar_evolution_info, elt, scev_info_str *, |
2969 | iter) |
2970 | gather_chrec_stats (chrec: elt->chrec, stats: &stats); |
2971 | |
2972 | dump_chrecs_stats (file: dump_file, stats: &stats); |
2973 | } |
2974 | |
2975 | |
2976 | /* Initialize the analysis of scalar evolutions for LOOPS. */ |
2977 | |
2978 | void |
2979 | scev_initialize (void) |
2980 | { |
2981 | gcc_assert (! scev_initialized_p () |
2982 | && loops_state_satisfies_p (cfun, LOOPS_NORMAL)); |
2983 | |
2984 | scalar_evolution_info = hash_table<scev_info_hasher>::create_ggc (n: 100); |
2985 | |
2986 | for (auto loop : loops_list (cfun, 0)) |
2987 | loop->nb_iterations = NULL_TREE; |
2988 | } |
2989 | |
2990 | /* Return true if SCEV is initialized. */ |
2991 | |
2992 | bool |
2993 | scev_initialized_p (void) |
2994 | { |
2995 | return scalar_evolution_info != NULL; |
2996 | } |
2997 | |
2998 | /* Cleans up the information cached by the scalar evolutions analysis |
2999 | in the hash table. */ |
3000 | |
3001 | void |
3002 | scev_reset_htab (void) |
3003 | { |
3004 | if (!scalar_evolution_info) |
3005 | return; |
3006 | |
3007 | scalar_evolution_info->empty (); |
3008 | } |
3009 | |
3010 | /* Cleans up the information cached by the scalar evolutions analysis |
3011 | in the hash table and in the loop->nb_iterations. */ |
3012 | |
3013 | void |
3014 | scev_reset (void) |
3015 | { |
3016 | scev_reset_htab (); |
3017 | |
3018 | for (auto loop : loops_list (cfun, 0)) |
3019 | loop->nb_iterations = NULL_TREE; |
3020 | } |
3021 | |
3022 | /* Return true if the IV calculation in TYPE can overflow based on the knowledge |
3023 | of the upper bound on the number of iterations of LOOP, the BASE and STEP |
3024 | of IV. |
3025 | |
3026 | We do not use information whether TYPE can overflow so it is safe to |
3027 | use this test even for derived IVs not computed every iteration or |
3028 | hypotetical IVs to be inserted into code. */ |
3029 | |
3030 | bool |
3031 | iv_can_overflow_p (class loop *loop, tree type, tree base, tree step) |
3032 | { |
3033 | widest_int nit; |
3034 | wide_int base_min, base_max, step_min, step_max, type_min, type_max; |
3035 | signop sgn = TYPE_SIGN (type); |
3036 | value_range r; |
3037 | |
3038 | if (integer_zerop (step)) |
3039 | return false; |
3040 | |
3041 | if (!INTEGRAL_TYPE_P (TREE_TYPE (base)) |
3042 | || !get_range_query (cfun)->range_of_expr (r, expr: base) |
3043 | || r.varying_p () |
3044 | || r.undefined_p ()) |
3045 | return true; |
3046 | |
3047 | base_min = r.lower_bound (); |
3048 | base_max = r.upper_bound (); |
3049 | |
3050 | if (!INTEGRAL_TYPE_P (TREE_TYPE (step)) |
3051 | || !get_range_query (cfun)->range_of_expr (r, expr: step) |
3052 | || r.varying_p () |
3053 | || r.undefined_p ()) |
3054 | return true; |
3055 | |
3056 | step_min = r.lower_bound (); |
3057 | step_max = r.upper_bound (); |
3058 | |
3059 | if (!get_max_loop_iterations (loop, nit: &nit)) |
3060 | return true; |
3061 | |
3062 | type_min = wi::min_value (type); |
3063 | type_max = wi::max_value (type); |
3064 | |
3065 | /* Just sanity check that we don't see values out of the range of the type. |
3066 | In this case the arithmetics bellow would overflow. */ |
3067 | gcc_checking_assert (wi::ge_p (base_min, type_min, sgn) |
3068 | && wi::le_p (base_max, type_max, sgn)); |
3069 | |
3070 | /* Account the possible increment in the last ieration. */ |
3071 | wi::overflow_type overflow = wi::OVF_NONE; |
3072 | nit = wi::add (x: nit, y: 1, sgn: SIGNED, overflow: &overflow); |
3073 | if (overflow) |
3074 | return true; |
3075 | |
3076 | /* NIT is typeless and can exceed the precision of the type. In this case |
3077 | overflow is always possible, because we know STEP is non-zero. */ |
3078 | if (wi::min_precision (x: nit, sgn: UNSIGNED) > TYPE_PRECISION (type)) |
3079 | return true; |
3080 | wide_int nit2 = wide_int::from (x: nit, TYPE_PRECISION (type), sgn: UNSIGNED); |
3081 | |
3082 | /* If step can be positive, check that nit*step <= type_max-base. |
3083 | This can be done by unsigned arithmetic and we only need to watch overflow |
3084 | in the multiplication. The right hand side can always be represented in |
3085 | the type. */ |
3086 | if (sgn == UNSIGNED || !wi::neg_p (x: step_max)) |
3087 | { |
3088 | wi::overflow_type overflow = wi::OVF_NONE; |
3089 | if (wi::gtu_p (x: wi::mul (x: step_max, y: nit2, sgn: UNSIGNED, overflow: &overflow), |
3090 | y: type_max - base_max) |
3091 | || overflow) |
3092 | return true; |
3093 | } |
3094 | /* If step can be negative, check that nit*(-step) <= base_min-type_min. */ |
3095 | if (sgn == SIGNED && wi::neg_p (x: step_min)) |
3096 | { |
3097 | wi::overflow_type overflow, overflow2; |
3098 | overflow = overflow2 = wi::OVF_NONE; |
3099 | if (wi::gtu_p (x: wi::mul (x: wi::neg (x: step_min, overflow: &overflow2), |
3100 | y: nit2, sgn: UNSIGNED, overflow: &overflow), |
3101 | y: base_min - type_min) |
3102 | || overflow || overflow2) |
3103 | return true; |
3104 | } |
3105 | |
3106 | return false; |
3107 | } |
3108 | |
3109 | /* Given EV with form of "(type) {inner_base, inner_step}_loop", this |
3110 | function tries to derive condition under which it can be simplified |
3111 | into "{(type)inner_base, (type)inner_step}_loop". The condition is |
3112 | the maximum number that inner iv can iterate. */ |
3113 | |
3114 | static tree |
3115 | derive_simple_iv_with_niters (tree ev, tree *niters) |
3116 | { |
3117 | if (!CONVERT_EXPR_P (ev)) |
3118 | return ev; |
3119 | |
3120 | tree inner_ev = TREE_OPERAND (ev, 0); |
3121 | if (TREE_CODE (inner_ev) != POLYNOMIAL_CHREC) |
3122 | return ev; |
3123 | |
3124 | tree init = CHREC_LEFT (inner_ev); |
3125 | tree step = CHREC_RIGHT (inner_ev); |
3126 | if (TREE_CODE (init) != INTEGER_CST |
3127 | || TREE_CODE (step) != INTEGER_CST || integer_zerop (step)) |
3128 | return ev; |
3129 | |
3130 | tree type = TREE_TYPE (ev); |
3131 | tree inner_type = TREE_TYPE (inner_ev); |
3132 | if (TYPE_PRECISION (inner_type) >= TYPE_PRECISION (type)) |
3133 | return ev; |
3134 | |
3135 | /* Type conversion in "(type) {inner_base, inner_step}_loop" can be |
3136 | folded only if inner iv won't overflow. We compute the maximum |
3137 | number the inner iv can iterate before overflowing and return the |
3138 | simplified affine iv. */ |
3139 | tree delta; |
3140 | init = fold_convert (type, init); |
3141 | step = fold_convert (type, step); |
3142 | ev = build_polynomial_chrec (CHREC_VARIABLE (inner_ev), left: init, right: step); |
3143 | if (tree_int_cst_sign_bit (step)) |
3144 | { |
3145 | tree bound = lower_bound_in_type (inner_type, inner_type); |
3146 | delta = fold_build2 (MINUS_EXPR, type, init, fold_convert (type, bound)); |
3147 | step = fold_build1 (NEGATE_EXPR, type, step); |
3148 | } |
3149 | else |
3150 | { |
3151 | tree bound = upper_bound_in_type (inner_type, inner_type); |
3152 | delta = fold_build2 (MINUS_EXPR, type, fold_convert (type, bound), init); |
3153 | } |
3154 | *niters = fold_build2 (FLOOR_DIV_EXPR, type, delta, step); |
3155 | return ev; |
3156 | } |
3157 | |
3158 | /* Checks whether use of OP in USE_LOOP behaves as a simple affine iv with |
3159 | respect to WRTO_LOOP and returns its base and step in IV if possible |
3160 | (see analyze_scalar_evolution_in_loop for more details on USE_LOOP |
3161 | and WRTO_LOOP). If ALLOW_NONCONSTANT_STEP is true, we want step to be |
3162 | invariant in LOOP. Otherwise we require it to be an integer constant. |
3163 | |
3164 | IV->no_overflow is set to true if we are sure the iv cannot overflow (e.g. |
3165 | because it is computed in signed arithmetics). Consequently, adding an |
3166 | induction variable |
3167 | |
3168 | for (i = IV->base; ; i += IV->step) |
3169 | |
3170 | is only safe if IV->no_overflow is false, or TYPE_OVERFLOW_UNDEFINED is |
3171 | false for the type of the induction variable, or you can prove that i does |
3172 | not wrap by some other argument. Otherwise, this might introduce undefined |
3173 | behavior, and |
3174 | |
3175 | i = iv->base; |
3176 | for (; ; i = (type) ((unsigned type) i + (unsigned type) iv->step)) |
3177 | |
3178 | must be used instead. |
3179 | |
3180 | When IV_NITERS is not NULL, this function also checks case in which OP |
3181 | is a conversion of an inner simple iv of below form: |
3182 | |
3183 | (outer_type){inner_base, inner_step}_loop. |
3184 | |
3185 | If type of inner iv has smaller precision than outer_type, it can't be |
3186 | folded into {(outer_type)inner_base, (outer_type)inner_step}_loop because |
3187 | the inner iv could overflow/wrap. In this case, we derive a condition |
3188 | under which the inner iv won't overflow/wrap and do the simplification. |
3189 | The derived condition normally is the maximum number the inner iv can |
3190 | iterate, and will be stored in IV_NITERS. This is useful in loop niter |
3191 | analysis, to derive break conditions when a loop must terminate, when is |
3192 | infinite. */ |
3193 | |
3194 | bool |
3195 | simple_iv_with_niters (class loop *wrto_loop, class loop *use_loop, |
3196 | tree op, affine_iv *iv, tree *iv_niters, |
3197 | bool allow_nonconstant_step) |
3198 | { |
3199 | enum tree_code code; |
3200 | tree type, ev, base, e; |
3201 | wide_int extreme; |
3202 | bool folded_casts; |
3203 | |
3204 | iv->base = NULL_TREE; |
3205 | iv->step = NULL_TREE; |
3206 | iv->no_overflow = false; |
3207 | |
3208 | type = TREE_TYPE (op); |
3209 | if (!POINTER_TYPE_P (type) |
3210 | && !INTEGRAL_TYPE_P (type)) |
3211 | return false; |
3212 | |
3213 | ev = analyze_scalar_evolution_in_loop (wrto_loop, use_loop, version: op, |
3214 | folded_casts: &folded_casts); |
3215 | if (chrec_contains_undetermined (ev) |
3216 | || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num)) |
3217 | return false; |
3218 | |
3219 | if (tree_does_not_contain_chrecs (expr: ev)) |
3220 | { |
3221 | iv->base = ev; |
3222 | iv->step = build_int_cst (TREE_TYPE (ev), 0); |
3223 | iv->no_overflow = true; |
3224 | return true; |
3225 | } |
3226 | |
3227 | /* If we can derive valid scalar evolution with assumptions. */ |
3228 | if (iv_niters && TREE_CODE (ev) != POLYNOMIAL_CHREC) |
3229 | ev = derive_simple_iv_with_niters (ev, niters: iv_niters); |
3230 | |
3231 | if (TREE_CODE (ev) != POLYNOMIAL_CHREC) |
3232 | return false; |
3233 | |
3234 | if (CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num) |
3235 | return false; |
3236 | |
3237 | iv->step = CHREC_RIGHT (ev); |
3238 | if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST) |
3239 | || tree_contains_chrecs (iv->step, NULL)) |
3240 | return false; |
3241 | |
3242 | iv->base = CHREC_LEFT (ev); |
3243 | if (tree_contains_chrecs (iv->base, NULL)) |
3244 | return false; |
3245 | |
3246 | iv->no_overflow = !folded_casts && nowrap_type_p (type); |
3247 | |
3248 | if (!iv->no_overflow |
3249 | && !iv_can_overflow_p (loop: wrto_loop, type, base: iv->base, step: iv->step)) |
3250 | iv->no_overflow = true; |
3251 | |
3252 | /* Try to simplify iv base: |
3253 | |
3254 | (signed T) ((unsigned T)base + step) ;; TREE_TYPE (base) == signed T |
3255 | == (signed T)(unsigned T)base + step |
3256 | == base + step |
3257 | |
3258 | If we can prove operation (base + step) doesn't overflow or underflow. |
3259 | Specifically, we try to prove below conditions are satisfied: |
3260 | |
3261 | base <= UPPER_BOUND (type) - step ;;step > 0 |
3262 | base >= LOWER_BOUND (type) - step ;;step < 0 |
3263 | |
3264 | This is done by proving the reverse conditions are false using loop's |
3265 | initial conditions. |
3266 | |
3267 | The is necessary to make loop niter, or iv overflow analysis easier |
3268 | for below example: |
3269 | |
3270 | int foo (int *a, signed char s, signed char l) |
3271 | { |
3272 | signed char i; |
3273 | for (i = s; i < l; i++) |
3274 | a[i] = 0; |
3275 | return 0; |
3276 | } |
3277 | |
3278 | Note variable I is firstly converted to type unsigned char, incremented, |
3279 | then converted back to type signed char. */ |
3280 | |
3281 | if (wrto_loop->num != use_loop->num) |
3282 | return true; |
3283 | |
3284 | if (!CONVERT_EXPR_P (iv->base) || TREE_CODE (iv->step) != INTEGER_CST) |
3285 | return true; |
3286 | |
3287 | type = TREE_TYPE (iv->base); |
3288 | e = TREE_OPERAND (iv->base, 0); |
3289 | if (!tree_nop_conversion_p (type, TREE_TYPE (e)) |
3290 | || TREE_CODE (e) != PLUS_EXPR |
3291 | || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST |
3292 | || !tree_int_cst_equal (iv->step, |
3293 | fold_convert (type, TREE_OPERAND (e, 1)))) |
3294 | return true; |
3295 | e = TREE_OPERAND (e, 0); |
3296 | if (!CONVERT_EXPR_P (e)) |
3297 | return true; |
3298 | base = TREE_OPERAND (e, 0); |
3299 | if (!useless_type_conversion_p (type, TREE_TYPE (base))) |
3300 | return true; |
3301 | |
3302 | if (tree_int_cst_sign_bit (iv->step)) |
3303 | { |
3304 | code = LT_EXPR; |
3305 | extreme = wi::min_value (type); |
3306 | } |
3307 | else |
3308 | { |
3309 | code = GT_EXPR; |
3310 | extreme = wi::max_value (type); |
3311 | } |
3312 | wi::overflow_type overflow = wi::OVF_NONE; |
3313 | extreme = wi::sub (x: extreme, y: wi::to_wide (t: iv->step), |
3314 | TYPE_SIGN (type), overflow: &overflow); |
3315 | if (overflow) |
3316 | return true; |
3317 | e = fold_build2 (code, boolean_type_node, base, |
3318 | wide_int_to_tree (type, extreme)); |
3319 | e = simplify_using_initial_conditions (use_loop, e); |
3320 | if (!integer_zerop (e)) |
3321 | return true; |
3322 | |
3323 | if (POINTER_TYPE_P (TREE_TYPE (base))) |
3324 | code = POINTER_PLUS_EXPR; |
3325 | else |
3326 | code = PLUS_EXPR; |
3327 | |
3328 | iv->base = fold_build2 (code, TREE_TYPE (base), base, iv->step); |
3329 | return true; |
3330 | } |
3331 | |
3332 | /* Like simple_iv_with_niters, but return TRUE when OP behaves as a simple |
3333 | affine iv unconditionally. */ |
3334 | |
3335 | bool |
3336 | simple_iv (class loop *wrto_loop, class loop *use_loop, tree op, |
3337 | affine_iv *iv, bool allow_nonconstant_step) |
3338 | { |
3339 | return simple_iv_with_niters (wrto_loop, use_loop, op, iv, |
3340 | NULL, allow_nonconstant_step); |
3341 | } |
3342 | |
3343 | /* Finalize the scalar evolution analysis. */ |
3344 | |
3345 | void |
3346 | scev_finalize (void) |
3347 | { |
3348 | if (!scalar_evolution_info) |
3349 | return; |
3350 | scalar_evolution_info->empty (); |
3351 | scalar_evolution_info = NULL; |
3352 | free_numbers_of_iterations_estimates (cfun); |
3353 | } |
3354 | |
3355 | /* Returns true if the expression EXPR is considered to be too expensive |
3356 | for scev_const_prop. Sets *COND_OVERFLOW_P to true when the |
3357 | expression might contain a sub-expression that is subject to undefined |
3358 | overflow behavior and conditionally evaluated. */ |
3359 | |
3360 | static bool |
3361 | expression_expensive_p (tree expr, bool *cond_overflow_p, |
3362 | hash_map<tree, uint64_t> &cache, uint64_t &cost) |
3363 | { |
3364 | enum tree_code code; |
3365 | |
3366 | if (is_gimple_val (expr)) |
3367 | return false; |
3368 | |
3369 | code = TREE_CODE (expr); |
3370 | if (code == TRUNC_DIV_EXPR |
3371 | || code == CEIL_DIV_EXPR |
3372 | || code == FLOOR_DIV_EXPR |
3373 | || code == ROUND_DIV_EXPR |
3374 | || code == TRUNC_MOD_EXPR |
3375 | || code == CEIL_MOD_EXPR |
3376 | || code == FLOOR_MOD_EXPR |
3377 | || code == ROUND_MOD_EXPR |
3378 | || code == EXACT_DIV_EXPR) |
3379 | { |
3380 | /* Division by power of two is usually cheap, so we allow it. |
3381 | Forbid anything else. */ |
3382 | if (!integer_pow2p (TREE_OPERAND (expr, 1))) |
3383 | return true; |
3384 | } |
3385 | |
3386 | bool visited_p; |
3387 | uint64_t &local_cost = cache.get_or_insert (k: expr, existed: &visited_p); |
3388 | if (visited_p) |
3389 | { |
3390 | uint64_t tem = cost + local_cost; |
3391 | if (tem < cost) |
3392 | return true; |
3393 | cost = tem; |
3394 | return false; |
3395 | } |
3396 | local_cost = 1; |
3397 | |
3398 | uint64_t op_cost = 0; |
3399 | if (code == CALL_EXPR) |
3400 | { |
3401 | tree arg; |
3402 | call_expr_arg_iterator iter; |
3403 | /* Even though is_inexpensive_builtin might say true, we will get a |
3404 | library call for popcount when backend does not have an instruction |
3405 | to do so. We consider this to be expensive and generate |
3406 | __builtin_popcount only when backend defines it. */ |
3407 | optab optab; |
3408 | combined_fn cfn = get_call_combined_fn (expr); |
3409 | switch (cfn) |
3410 | { |
3411 | CASE_CFN_POPCOUNT: |
3412 | optab = popcount_optab; |
3413 | goto bitcount_call; |
3414 | CASE_CFN_CLZ: |
3415 | optab = clz_optab; |
3416 | goto bitcount_call; |
3417 | CASE_CFN_CTZ: |
3418 | optab = ctz_optab; |
3419 | bitcount_call: |
3420 | /* Check if opcode for popcount is available in the mode required. */ |
3421 | if (optab_handler (op: optab, |
3422 | TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0)))) |
3423 | == CODE_FOR_nothing) |
3424 | { |
3425 | machine_mode mode; |
3426 | mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0))); |
3427 | scalar_int_mode int_mode; |
3428 | |
3429 | /* If the mode is of 2 * UNITS_PER_WORD size, we can handle |
3430 | double-word popcount by emitting two single-word popcount |
3431 | instructions. */ |
3432 | if (is_a <scalar_int_mode> (m: mode, result: &int_mode) |
3433 | && GET_MODE_SIZE (mode: int_mode) == 2 * UNITS_PER_WORD |
3434 | && (optab_handler (op: optab, mode: word_mode) |
3435 | != CODE_FOR_nothing)) |
3436 | break; |
3437 | return true; |
3438 | } |
3439 | break; |
3440 | |
3441 | default: |
3442 | if (cfn == CFN_LAST |
3443 | || !is_inexpensive_builtin (get_callee_fndecl (expr))) |
3444 | return true; |
3445 | break; |
3446 | } |
3447 | |
3448 | FOR_EACH_CALL_EXPR_ARG (arg, iter, expr) |
3449 | if (expression_expensive_p (expr: arg, cond_overflow_p, cache, cost&: op_cost)) |
3450 | return true; |
3451 | *cache.get (k: expr) += op_cost; |
3452 | cost += op_cost + 1; |
3453 | return false; |
3454 | } |
3455 | |
3456 | if (code == COND_EXPR) |
3457 | { |
3458 | if (expression_expensive_p (TREE_OPERAND (expr, 0), cond_overflow_p, |
3459 | cache, cost&: op_cost) |
3460 | || (EXPR_P (TREE_OPERAND (expr, 1)) |
3461 | && EXPR_P (TREE_OPERAND (expr, 2))) |
3462 | /* If either branch has side effects or could trap. */ |
3463 | || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 1)) |
3464 | || generic_expr_could_trap_p (TREE_OPERAND (expr, 1)) |
3465 | || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 0)) |
3466 | || generic_expr_could_trap_p (TREE_OPERAND (expr, 0)) |
3467 | || expression_expensive_p (TREE_OPERAND (expr, 1), cond_overflow_p, |
3468 | cache, cost&: op_cost) |
3469 | || expression_expensive_p (TREE_OPERAND (expr, 2), cond_overflow_p, |
3470 | cache, cost&: op_cost)) |
3471 | return true; |
3472 | /* Conservatively assume there's overflow for now. */ |
3473 | *cond_overflow_p = true; |
3474 | *cache.get (k: expr) += op_cost; |
3475 | cost += op_cost + 1; |
3476 | return false; |
3477 | } |
3478 | |
3479 | switch (TREE_CODE_CLASS (code)) |
3480 | { |
3481 | case tcc_binary: |
3482 | case tcc_comparison: |
3483 | if (expression_expensive_p (TREE_OPERAND (expr, 1), cond_overflow_p, |
3484 | cache, cost&: op_cost)) |
3485 | return true; |
3486 | |
3487 | /* Fallthru. */ |
3488 | case tcc_unary: |
3489 | if (expression_expensive_p (TREE_OPERAND (expr, 0), cond_overflow_p, |
3490 | cache, cost&: op_cost)) |
3491 | return true; |
3492 | *cache.get (k: expr) += op_cost; |
3493 | cost += op_cost + 1; |
3494 | return false; |
3495 | |
3496 | default: |
3497 | return true; |
3498 | } |
3499 | } |
3500 | |
3501 | bool |
3502 | expression_expensive_p (tree expr, bool *cond_overflow_p) |
3503 | { |
3504 | hash_map<tree, uint64_t> cache; |
3505 | uint64_t expanded_size = 0; |
3506 | *cond_overflow_p = false; |
3507 | return (expression_expensive_p (expr, cond_overflow_p, cache, cost&: expanded_size) |
3508 | || expanded_size > cache.elements ()); |
3509 | } |
3510 | |
3511 | /* Match.pd function to match bitwise inductive expression. |
3512 | .i.e. |
3513 | _2 = 1 << _1; |
3514 | _3 = ~_2; |
3515 | tmp_9 = _3 & tmp_12; */ |
3516 | extern bool gimple_bitwise_induction_p (tree, tree *, tree (*)(tree)); |
3517 | |
3518 | /* Return the inductive expression of bitwise operation if possible, |
3519 | otherwise returns DEF. */ |
3520 | static tree |
3521 | analyze_and_compute_bitwise_induction_effect (class loop* loop, |
3522 | tree phidef, |
3523 | unsigned HOST_WIDE_INT niter) |
3524 | { |
3525 | tree match_op[3],inv, bitwise_scev; |
3526 | tree type = TREE_TYPE (phidef); |
3527 | gphi* = NULL; |
3528 | |
3529 | /* Match things like op2(MATCH_OP[2]), op1(MATCH_OP[1]), phidef(PHIDEF) |
3530 | |
3531 | op2 = PHI <phidef, inv> |
3532 | _1 = (int) bit_17; |
3533 | _3 = 1 << _1; |
3534 | op1 = ~_3; |
3535 | phidef = op1 & op2; */ |
3536 | if (!gimple_bitwise_induction_p (phidef, &match_op[0], NULL) |
3537 | || TREE_CODE (match_op[2]) != SSA_NAME |
3538 | || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[2]))) |
3539 | || gimple_bb (g: header_phi) != loop->header |
3540 | || gimple_phi_num_args (gs: header_phi) != 2) |
3541 | return NULL_TREE; |
3542 | |
3543 | if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef) |
3544 | return NULL_TREE; |
3545 | |
3546 | bitwise_scev = analyze_scalar_evolution (loop, var: match_op[1]); |
3547 | bitwise_scev = instantiate_parameters (loop, chrec: bitwise_scev); |
3548 | |
3549 | /* Make sure bits is in range of type precision. */ |
3550 | if (TREE_CODE (bitwise_scev) != POLYNOMIAL_CHREC |
3551 | || !INTEGRAL_TYPE_P (TREE_TYPE (bitwise_scev)) |
3552 | || !tree_fits_uhwi_p (CHREC_LEFT (bitwise_scev)) |
3553 | || tree_to_uhwi (CHREC_LEFT (bitwise_scev)) >= TYPE_PRECISION (type) |
3554 | || !tree_fits_shwi_p (CHREC_RIGHT (bitwise_scev))) |
3555 | return NULL_TREE; |
3556 | |
3557 | enum bit_op_kind |
3558 | { |
3559 | INDUCTION_BIT_CLEAR, |
3560 | INDUCTION_BIT_IOR, |
3561 | INDUCTION_BIT_XOR, |
3562 | INDUCTION_BIT_RESET, |
3563 | INDUCTION_ZERO, |
3564 | INDUCTION_ALL |
3565 | }; |
3566 | |
3567 | enum bit_op_kind induction_kind; |
3568 | enum tree_code code1 |
3569 | = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef)); |
3570 | enum tree_code code2 |
3571 | = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (match_op[0])); |
3572 | |
3573 | /* BIT_CLEAR: A &= ~(1 << bit) |
3574 | BIT_RESET: A ^= (1 << bit). |
3575 | BIT_IOR: A |= (1 << bit) |
3576 | BIT_ZERO: A &= (1 << bit) |
3577 | BIT_ALL: A |= ~(1 << bit) |
3578 | BIT_XOR: A ^= ~(1 << bit). |
3579 | bit is induction variable. */ |
3580 | switch (code1) |
3581 | { |
3582 | case BIT_AND_EXPR: |
3583 | induction_kind = code2 == BIT_NOT_EXPR |
3584 | ? INDUCTION_BIT_CLEAR |
3585 | : INDUCTION_ZERO; |
3586 | break; |
3587 | case BIT_IOR_EXPR: |
3588 | induction_kind = code2 == BIT_NOT_EXPR |
3589 | ? INDUCTION_ALL |
3590 | : INDUCTION_BIT_IOR; |
3591 | break; |
3592 | case BIT_XOR_EXPR: |
3593 | induction_kind = code2 == BIT_NOT_EXPR |
3594 | ? INDUCTION_BIT_XOR |
3595 | : INDUCTION_BIT_RESET; |
3596 | break; |
3597 | /* A ^ ~(1 << bit) is equal to ~(A ^ (1 << bit)). */ |
3598 | case BIT_NOT_EXPR: |
3599 | gcc_assert (code2 == BIT_XOR_EXPR); |
3600 | induction_kind = INDUCTION_BIT_XOR; |
3601 | break; |
3602 | default: |
3603 | gcc_unreachable (); |
3604 | } |
3605 | |
3606 | if (induction_kind == INDUCTION_ZERO) |
3607 | return build_zero_cst (type); |
3608 | if (induction_kind == INDUCTION_ALL) |
3609 | return build_all_ones_cst (type); |
3610 | |
3611 | wide_int bits = wi::zero (TYPE_PRECISION (type)); |
3612 | HOST_WIDE_INT bit_start = tree_to_shwi (CHREC_LEFT (bitwise_scev)); |
3613 | HOST_WIDE_INT step = tree_to_shwi (CHREC_RIGHT (bitwise_scev)); |
3614 | HOST_WIDE_INT bit_final = bit_start + step * niter; |
3615 | |
3616 | /* bit_start, bit_final in range of [0,TYPE_PRECISION) |
3617 | implies all bits are set in range. */ |
3618 | if (bit_final >= TYPE_PRECISION (type) |
3619 | || bit_final < 0) |
3620 | return NULL_TREE; |
3621 | |
3622 | /* Loop tripcount should be niter + 1. */ |
3623 | for (unsigned i = 0; i != niter + 1; i++) |
3624 | { |
3625 | bits = wi::set_bit (x: bits, bit: bit_start); |
3626 | bit_start += step; |
3627 | } |
3628 | |
3629 | bool inverted = false; |
3630 | switch (induction_kind) |
3631 | { |
3632 | case INDUCTION_BIT_CLEAR: |
3633 | code1 = BIT_AND_EXPR; |
3634 | inverted = true; |
3635 | break; |
3636 | case INDUCTION_BIT_IOR: |
3637 | code1 = BIT_IOR_EXPR; |
3638 | break; |
3639 | case INDUCTION_BIT_RESET: |
3640 | code1 = BIT_XOR_EXPR; |
3641 | break; |
3642 | /* A ^= ~(1 << bit) is special, when loop tripcount is even, |
3643 | it's equal to A ^= bits, else A ^= ~bits. */ |
3644 | case INDUCTION_BIT_XOR: |
3645 | code1 = BIT_XOR_EXPR; |
3646 | if (niter % 2 == 0) |
3647 | inverted = true; |
3648 | break; |
3649 | default: |
3650 | gcc_unreachable (); |
3651 | } |
3652 | |
3653 | if (inverted) |
3654 | bits = wi::bit_not (x: bits); |
3655 | |
3656 | inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop)); |
3657 | return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits)); |
3658 | } |
3659 | |
3660 | /* Match.pd function to match bitop with invariant expression |
3661 | .i.e. |
3662 | tmp_7 = _0 & _1; */ |
3663 | extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree)); |
3664 | |
3665 | /* Return the inductive expression of bitop with invariant if possible, |
3666 | otherwise returns DEF. */ |
3667 | static tree |
3668 | analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, |
3669 | tree niter) |
3670 | { |
3671 | tree match_op[2],inv; |
3672 | tree type = TREE_TYPE (phidef); |
3673 | gphi* = NULL; |
3674 | enum tree_code code; |
3675 | /* match thing like op0 (match[0]), op1 (match[1]), phidef (PHIDEF) |
3676 | |
3677 | op1 = PHI <phidef, inv> |
3678 | phidef = op0 & op1 |
3679 | if op0 is an invariant, it could change to |
3680 | phidef = op0 & inv. */ |
3681 | gimple *def; |
3682 | def = SSA_NAME_DEF_STMT (phidef); |
3683 | if (!(is_gimple_assign (gs: def) |
3684 | && ((code = gimple_assign_rhs_code (gs: def)), true) |
3685 | && (code == BIT_AND_EXPR || code == BIT_IOR_EXPR |
3686 | || code == BIT_XOR_EXPR))) |
3687 | return NULL_TREE; |
3688 | |
3689 | match_op[0] = gimple_assign_rhs1 (gs: def); |
3690 | match_op[1] = gimple_assign_rhs2 (gs: def); |
3691 | |
3692 | if (TREE_CODE (match_op[1]) != SSA_NAME |
3693 | || !expr_invariant_in_loop_p (loop, match_op[0]) |
3694 | || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[1]))) |
3695 | || gimple_bb (g: header_phi) != loop->header |
3696 | || gimple_phi_num_args (gs: header_phi) != 2) |
3697 | return NULL_TREE; |
3698 | |
3699 | if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef) |
3700 | return NULL_TREE; |
3701 | |
3702 | enum tree_code code1 |
3703 | = gimple_assign_rhs_code (gs: def); |
3704 | |
3705 | if (code1 == BIT_XOR_EXPR) |
3706 | { |
3707 | if (!tree_fits_uhwi_p (niter)) |
3708 | return NULL_TREE; |
3709 | unsigned HOST_WIDE_INT niter_num; |
3710 | niter_num = tree_to_uhwi (niter); |
3711 | if (niter_num % 2 != 0) |
3712 | match_op[0] = build_zero_cst (type); |
3713 | } |
3714 | |
3715 | inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop)); |
3716 | return fold_build2 (code1, type, inv, match_op[0]); |
3717 | } |
3718 | |
3719 | /* Do final value replacement for LOOP, return true if we did anything. */ |
3720 | |
3721 | bool |
3722 | final_value_replacement_loop (class loop *loop) |
3723 | { |
3724 | /* If we do not know exact number of iterations of the loop, we cannot |
3725 | replace the final value. */ |
3726 | edge exit = single_exit (loop); |
3727 | if (!exit) |
3728 | return false; |
3729 | |
3730 | tree niter = number_of_latch_executions (loop); |
3731 | if (niter == chrec_dont_know) |
3732 | return false; |
3733 | |
3734 | /* Ensure that it is possible to insert new statements somewhere. */ |
3735 | if (!single_pred_p (bb: exit->dest)) |
3736 | split_loop_exit_edge (exit); |
3737 | |
3738 | /* Set stmt insertion pointer. All stmts are inserted before this point. */ |
3739 | gimple_stmt_iterator gsi = gsi_after_labels (bb: exit->dest); |
3740 | |
3741 | class loop *ex_loop |
3742 | = superloop_at_depth (loop, |
3743 | loop_depth (loop: exit->dest->loop_father) + 1); |
3744 | |
3745 | bool any = false; |
3746 | gphi_iterator psi; |
3747 | for (psi = gsi_start_phis (exit->dest); !gsi_end_p (i: psi); ) |
3748 | { |
3749 | gphi *phi = psi.phi (); |
3750 | tree rslt = PHI_RESULT (phi); |
3751 | tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit); |
3752 | tree def = phidef; |
3753 | if (virtual_operand_p (op: def)) |
3754 | { |
3755 | gsi_next (i: &psi); |
3756 | continue; |
3757 | } |
3758 | |
3759 | if (!POINTER_TYPE_P (TREE_TYPE (def)) |
3760 | && !INTEGRAL_TYPE_P (TREE_TYPE (def))) |
3761 | { |
3762 | gsi_next (i: &psi); |
3763 | continue; |
3764 | } |
3765 | |
3766 | bool folded_casts; |
3767 | def = analyze_scalar_evolution_in_loop (wrto_loop: ex_loop, use_loop: loop, version: def, |
3768 | folded_casts: &folded_casts); |
3769 | |
3770 | tree bitinv_def, bit_def; |
3771 | unsigned HOST_WIDE_INT niter_num; |
3772 | |
3773 | if (def != chrec_dont_know) |
3774 | def = compute_overall_effect_of_inner_loop (loop: ex_loop, evolution_fn: def); |
3775 | |
3776 | /* Handle bitop with invariant induction expression. |
3777 | |
3778 | .i.e |
3779 | for (int i =0 ;i < 32; i++) |
3780 | tmp &= bit2; |
3781 | if bit2 is an invariant in loop which could simple to |
3782 | tmp &= bit2. */ |
3783 | else if ((bitinv_def |
3784 | = analyze_and_compute_bitop_with_inv_effect (loop, |
3785 | phidef, niter))) |
3786 | def = bitinv_def; |
3787 | |
3788 | /* Handle bitwise induction expression. |
3789 | |
3790 | .i.e. |
3791 | for (int i = 0; i != 64; i+=3) |
3792 | res &= ~(1UL << i); |
3793 | |
3794 | RES can't be analyzed out by SCEV because it is not polynomially |
3795 | expressible, but in fact final value of RES can be replaced by |
3796 | RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} |
3797 | being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ |
3798 | else if (tree_fits_uhwi_p (niter) |
3799 | && (niter_num = tree_to_uhwi (niter)) != 0 |
3800 | && niter_num < TYPE_PRECISION (TREE_TYPE (phidef)) |
3801 | && (bit_def |
3802 | = analyze_and_compute_bitwise_induction_effect (loop, |
3803 | phidef, |
3804 | niter: niter_num))) |
3805 | def = bit_def; |
3806 | |
3807 | bool cond_overflow_p; |
3808 | if (!tree_does_not_contain_chrecs (expr: def) |
3809 | || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) |
3810 | /* Moving the computation from the loop may prolong life range |
3811 | of some ssa names, which may cause problems if they appear |
3812 | on abnormal edges. */ |
3813 | || contains_abnormal_ssa_name_p (def) |
3814 | /* Do not emit expensive expressions. The rationale is that |
3815 | when someone writes a code like |
3816 | |
3817 | while (n > 45) n -= 45; |
3818 | |
3819 | he probably knows that n is not large, and does not want it |
3820 | to be turned into n %= 45. */ |
3821 | || expression_expensive_p (expr: def, cond_overflow_p: &cond_overflow_p)) |
3822 | { |
3823 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3824 | { |
3825 | fprintf (stream: dump_file, format: "not replacing:\n " ); |
3826 | print_gimple_stmt (dump_file, phi, 0); |
3827 | fprintf (stream: dump_file, format: "\n" ); |
3828 | } |
3829 | gsi_next (i: &psi); |
3830 | continue; |
3831 | } |
3832 | |
3833 | /* Eliminate the PHI node and replace it by a computation outside |
3834 | the loop. */ |
3835 | if (dump_file) |
3836 | { |
3837 | fprintf (stream: dump_file, format: "\nfinal value replacement:\n " ); |
3838 | print_gimple_stmt (dump_file, phi, 0); |
3839 | fprintf (stream: dump_file, format: " with expr: " ); |
3840 | print_generic_expr (dump_file, def); |
3841 | } |
3842 | any = true; |
3843 | def = unshare_expr (def); |
3844 | remove_phi_node (&psi, false); |
3845 | |
3846 | /* Create the replacement statements. */ |
3847 | gimple_seq stmts; |
3848 | def = force_gimple_operand (def, &stmts, false, NULL_TREE); |
3849 | gassign *ass = gimple_build_assign (rslt, def); |
3850 | gimple_set_location (g: ass, |
3851 | location: gimple_phi_arg_location (phi, i: exit->dest_idx)); |
3852 | gimple_seq_add_stmt (&stmts, ass); |
3853 | |
3854 | /* If def's type has undefined overflow and there were folded |
3855 | casts, rewrite all stmts added for def into arithmetics |
3856 | with defined overflow behavior. */ |
3857 | if ((folded_casts |
3858 | && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def)) |
3859 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def))) |
3860 | || cond_overflow_p) |
3861 | { |
3862 | gimple_stmt_iterator gsi2; |
3863 | gsi2 = gsi_start (seq&: stmts); |
3864 | while (!gsi_end_p (i: gsi2)) |
3865 | { |
3866 | gimple *stmt = gsi_stmt (i: gsi2); |
3867 | if (is_gimple_assign (gs: stmt) |
3868 | && arith_code_with_undefined_signed_overflow |
3869 | (gimple_assign_rhs_code (gs: stmt))) |
3870 | rewrite_to_defined_overflow (&gsi2); |
3871 | gsi_next (i: &gsi2); |
3872 | } |
3873 | } |
3874 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); |
3875 | if (dump_file) |
3876 | { |
3877 | fprintf (stream: dump_file, format: "\n final stmt:\n " ); |
3878 | print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (rslt), 0); |
3879 | fprintf (stream: dump_file, format: "\n" ); |
3880 | } |
3881 | } |
3882 | |
3883 | return any; |
3884 | } |
3885 | |
3886 | #include "gt-tree-scalar-evolution.h" |
3887 | |