1 | /* Functions to determine/estimate number of iterations of a loop. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "rtl.h" |
25 | #include "tree.h" |
26 | #include "gimple.h" |
27 | #include "tree-pass.h" |
28 | #include "ssa.h" |
29 | #include "gimple-pretty-print.h" |
30 | #include "diagnostic-core.h" |
31 | #include "stor-layout.h" |
32 | #include "fold-const.h" |
33 | #include "calls.h" |
34 | #include "intl.h" |
35 | #include "gimplify.h" |
36 | #include "gimple-iterator.h" |
37 | #include "tree-cfg.h" |
38 | #include "tree-ssa-loop-ivopts.h" |
39 | #include "tree-ssa-loop-niter.h" |
40 | #include "tree-ssa-loop.h" |
41 | #include "cfgloop.h" |
42 | #include "tree-chrec.h" |
43 | #include "tree-scalar-evolution.h" |
44 | #include "tree-dfa.h" |
45 | #include "internal-fn.h" |
46 | #include "gimple-range.h" |
47 | #include "sreal.h" |
48 | |
49 | |
50 | /* The maximum number of dominator BBs we search for conditions |
51 | of loop header copies we use for simplifying a conditional |
52 | expression. */ |
53 | #define MAX_DOMINATORS_TO_WALK 8 |
54 | |
55 | /* |
56 | |
57 | Analysis of number of iterations of an affine exit test. |
58 | |
59 | */ |
60 | |
61 | /* Bounds on some value, BELOW <= X <= UP. */ |
62 | |
63 | struct bounds |
64 | { |
65 | mpz_t below, up; |
66 | }; |
67 | |
68 | /* Splits expression EXPR to a variable part VAR and constant OFFSET. */ |
69 | |
70 | static void |
71 | split_to_var_and_offset (tree expr, tree *var, mpz_t offset) |
72 | { |
73 | tree type = TREE_TYPE (expr); |
74 | tree op0, op1; |
75 | bool negate = false; |
76 | |
77 | *var = expr; |
78 | mpz_set_ui (offset, 0); |
79 | |
80 | switch (TREE_CODE (expr)) |
81 | { |
82 | case MINUS_EXPR: |
83 | negate = true; |
84 | /* Fallthru. */ |
85 | |
86 | case PLUS_EXPR: |
87 | case POINTER_PLUS_EXPR: |
88 | op0 = TREE_OPERAND (expr, 0); |
89 | op1 = TREE_OPERAND (expr, 1); |
90 | |
91 | if (TREE_CODE (op1) != INTEGER_CST) |
92 | break; |
93 | |
94 | *var = op0; |
95 | /* Always sign extend the offset. */ |
96 | wi::to_mpz (wi::to_wide (t: op1), offset, SIGNED); |
97 | if (negate) |
98 | mpz_neg (gmp_w: offset, gmp_u: offset); |
99 | break; |
100 | |
101 | case INTEGER_CST: |
102 | *var = build_int_cst_type (type, 0); |
103 | wi::to_mpz (wi::to_wide (t: expr), offset, TYPE_SIGN (type)); |
104 | break; |
105 | |
106 | default: |
107 | break; |
108 | } |
109 | } |
110 | |
111 | /* From condition C0 CMP C1 derives information regarding the value range |
112 | of VAR, which is of TYPE. Results are stored in to BELOW and UP. */ |
113 | |
114 | static void |
115 | refine_value_range_using_guard (tree type, tree var, |
116 | tree c0, enum tree_code cmp, tree c1, |
117 | mpz_t below, mpz_t up) |
118 | { |
119 | tree varc0, varc1, ctype; |
120 | mpz_t offc0, offc1; |
121 | mpz_t mint, maxt, minc1, maxc1; |
122 | bool no_wrap = nowrap_type_p (type); |
123 | bool c0_ok, c1_ok; |
124 | signop sgn = TYPE_SIGN (type); |
125 | |
126 | switch (cmp) |
127 | { |
128 | case LT_EXPR: |
129 | case LE_EXPR: |
130 | case GT_EXPR: |
131 | case GE_EXPR: |
132 | STRIP_SIGN_NOPS (c0); |
133 | STRIP_SIGN_NOPS (c1); |
134 | ctype = TREE_TYPE (c0); |
135 | if (!useless_type_conversion_p (ctype, type)) |
136 | return; |
137 | |
138 | break; |
139 | |
140 | case EQ_EXPR: |
141 | /* We could derive quite precise information from EQ_EXPR, however, |
142 | such a guard is unlikely to appear, so we do not bother with |
143 | handling it. */ |
144 | return; |
145 | |
146 | case NE_EXPR: |
147 | /* NE_EXPR comparisons do not contain much of useful information, |
148 | except for cases of comparing with bounds. */ |
149 | if (TREE_CODE (c1) != INTEGER_CST |
150 | || !INTEGRAL_TYPE_P (type)) |
151 | return; |
152 | |
153 | /* Ensure that the condition speaks about an expression in the same |
154 | type as X and Y. */ |
155 | ctype = TREE_TYPE (c0); |
156 | if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type)) |
157 | return; |
158 | c0 = fold_convert (type, c0); |
159 | c1 = fold_convert (type, c1); |
160 | |
161 | if (operand_equal_p (var, c0, flags: 0)) |
162 | { |
163 | /* Case of comparing VAR with its below/up bounds. */ |
164 | auto_mpz valc1; |
165 | wi::to_mpz (wi::to_wide (t: c1), valc1, TYPE_SIGN (type)); |
166 | if (mpz_cmp (valc1, below) == 0) |
167 | cmp = GT_EXPR; |
168 | if (mpz_cmp (valc1, up) == 0) |
169 | cmp = LT_EXPR; |
170 | } |
171 | else |
172 | { |
173 | /* Case of comparing with the bounds of the type. */ |
174 | wide_int min = wi::min_value (type); |
175 | wide_int max = wi::max_value (type); |
176 | |
177 | if (wi::to_wide (t: c1) == min) |
178 | cmp = GT_EXPR; |
179 | if (wi::to_wide (t: c1) == max) |
180 | cmp = LT_EXPR; |
181 | } |
182 | |
183 | /* Quick return if no useful information. */ |
184 | if (cmp == NE_EXPR) |
185 | return; |
186 | |
187 | break; |
188 | |
189 | default: |
190 | return; |
191 | } |
192 | |
193 | mpz_init (offc0); |
194 | mpz_init (offc1); |
195 | split_to_var_and_offset (expr: expand_simple_operations (c0), var: &varc0, offset: offc0); |
196 | split_to_var_and_offset (expr: expand_simple_operations (c1), var: &varc1, offset: offc1); |
197 | |
198 | /* We are only interested in comparisons of expressions based on VAR. */ |
199 | if (operand_equal_p (var, varc1, flags: 0)) |
200 | { |
201 | std::swap (a&: varc0, b&: varc1); |
202 | mpz_swap (offc0, offc1); |
203 | cmp = swap_tree_comparison (cmp); |
204 | } |
205 | else if (!operand_equal_p (var, varc0, flags: 0)) |
206 | { |
207 | mpz_clear (offc0); |
208 | mpz_clear (offc1); |
209 | return; |
210 | } |
211 | |
212 | mpz_init (mint); |
213 | mpz_init (maxt); |
214 | get_type_static_bounds (type, mint, maxt); |
215 | mpz_init (minc1); |
216 | mpz_init (maxc1); |
217 | Value_Range r (TREE_TYPE (varc1)); |
218 | /* Setup range information for varc1. */ |
219 | if (integer_zerop (varc1)) |
220 | { |
221 | wi::to_mpz (0, minc1, TYPE_SIGN (type)); |
222 | wi::to_mpz (0, maxc1, TYPE_SIGN (type)); |
223 | } |
224 | else if (TREE_CODE (varc1) == SSA_NAME |
225 | && INTEGRAL_TYPE_P (type) |
226 | && get_range_query (cfun)->range_of_expr (r, expr: varc1) |
227 | && !r.undefined_p () |
228 | && !r.varying_p ()) |
229 | { |
230 | gcc_assert (wi::le_p (r.lower_bound (), r.upper_bound (), sgn)); |
231 | wi::to_mpz (r.lower_bound (), minc1, sgn); |
232 | wi::to_mpz (r.upper_bound (), maxc1, sgn); |
233 | } |
234 | else |
235 | { |
236 | mpz_set (minc1, mint); |
237 | mpz_set (maxc1, maxt); |
238 | } |
239 | |
240 | /* Compute valid range information for varc1 + offc1. Note nothing |
241 | useful can be derived if it overflows or underflows. Overflow or |
242 | underflow could happen when: |
243 | |
244 | offc1 > 0 && varc1 + offc1 > MAX_VAL (type) |
245 | offc1 < 0 && varc1 + offc1 < MIN_VAL (type). */ |
246 | mpz_add (minc1, minc1, offc1); |
247 | mpz_add (maxc1, maxc1, offc1); |
248 | c1_ok = (no_wrap |
249 | || mpz_sgn (offc1) == 0 |
250 | || (mpz_sgn (offc1) < 0 && mpz_cmp (minc1, mint) >= 0) |
251 | || (mpz_sgn (offc1) > 0 && mpz_cmp (maxc1, maxt) <= 0)); |
252 | if (!c1_ok) |
253 | goto end; |
254 | |
255 | if (mpz_cmp (minc1, mint) < 0) |
256 | mpz_set (minc1, mint); |
257 | if (mpz_cmp (maxc1, maxt) > 0) |
258 | mpz_set (maxc1, maxt); |
259 | |
260 | if (cmp == LT_EXPR) |
261 | { |
262 | cmp = LE_EXPR; |
263 | mpz_sub_ui (maxc1, maxc1, 1); |
264 | } |
265 | if (cmp == GT_EXPR) |
266 | { |
267 | cmp = GE_EXPR; |
268 | mpz_add_ui (minc1, minc1, 1); |
269 | } |
270 | |
271 | /* Compute range information for varc0. If there is no overflow, |
272 | the condition implied that |
273 | |
274 | (varc0) cmp (varc1 + offc1 - offc0) |
275 | |
276 | We can possibly improve the upper bound of varc0 if cmp is LE_EXPR, |
277 | or the below bound if cmp is GE_EXPR. |
278 | |
279 | To prove there is no overflow/underflow, we need to check below |
280 | four cases: |
281 | 1) cmp == LE_EXPR && offc0 > 0 |
282 | |
283 | (varc0 + offc0) doesn't overflow |
284 | && (varc1 + offc1 - offc0) doesn't underflow |
285 | |
286 | 2) cmp == LE_EXPR && offc0 < 0 |
287 | |
288 | (varc0 + offc0) doesn't underflow |
289 | && (varc1 + offc1 - offc0) doesn't overfloe |
290 | |
291 | In this case, (varc0 + offc0) will never underflow if we can |
292 | prove (varc1 + offc1 - offc0) doesn't overflow. |
293 | |
294 | 3) cmp == GE_EXPR && offc0 < 0 |
295 | |
296 | (varc0 + offc0) doesn't underflow |
297 | && (varc1 + offc1 - offc0) doesn't overflow |
298 | |
299 | 4) cmp == GE_EXPR && offc0 > 0 |
300 | |
301 | (varc0 + offc0) doesn't overflow |
302 | && (varc1 + offc1 - offc0) doesn't underflow |
303 | |
304 | In this case, (varc0 + offc0) will never overflow if we can |
305 | prove (varc1 + offc1 - offc0) doesn't underflow. |
306 | |
307 | Note we only handle case 2 and 4 in below code. */ |
308 | |
309 | mpz_sub (minc1, minc1, offc0); |
310 | mpz_sub (maxc1, maxc1, offc0); |
311 | c0_ok = (no_wrap |
312 | || mpz_sgn (offc0) == 0 |
313 | || (cmp == LE_EXPR |
314 | && mpz_sgn (offc0) < 0 && mpz_cmp (maxc1, maxt) <= 0) |
315 | || (cmp == GE_EXPR |
316 | && mpz_sgn (offc0) > 0 && mpz_cmp (minc1, mint) >= 0)); |
317 | if (!c0_ok) |
318 | goto end; |
319 | |
320 | if (cmp == LE_EXPR) |
321 | { |
322 | if (mpz_cmp (up, maxc1) > 0) |
323 | mpz_set (up, maxc1); |
324 | } |
325 | else |
326 | { |
327 | if (mpz_cmp (below, minc1) < 0) |
328 | mpz_set (below, minc1); |
329 | } |
330 | |
331 | end: |
332 | mpz_clear (mint); |
333 | mpz_clear (maxt); |
334 | mpz_clear (minc1); |
335 | mpz_clear (maxc1); |
336 | mpz_clear (offc0); |
337 | mpz_clear (offc1); |
338 | } |
339 | |
340 | /* Stores estimate on the minimum/maximum value of the expression VAR + OFF |
341 | in TYPE to MIN and MAX. */ |
342 | |
343 | static void |
344 | determine_value_range (class loop *loop, tree type, tree var, mpz_t off, |
345 | mpz_t min, mpz_t max) |
346 | { |
347 | int cnt = 0; |
348 | mpz_t minm, maxm; |
349 | basic_block bb; |
350 | wide_int minv, maxv; |
351 | enum value_range_kind rtype = VR_VARYING; |
352 | |
353 | /* If the expression is a constant, we know its value exactly. */ |
354 | if (integer_zerop (var)) |
355 | { |
356 | mpz_set (min, off); |
357 | mpz_set (max, off); |
358 | return; |
359 | } |
360 | |
361 | get_type_static_bounds (type, min, max); |
362 | |
363 | /* See if we have some range info from VRP. */ |
364 | if (TREE_CODE (var) == SSA_NAME && INTEGRAL_TYPE_P (type)) |
365 | { |
366 | edge e = loop_preheader_edge (loop); |
367 | signop sgn = TYPE_SIGN (type); |
368 | gphi_iterator gsi; |
369 | |
370 | /* Either for VAR itself... */ |
371 | Value_Range var_range (TREE_TYPE (var)); |
372 | get_range_query (cfun)->range_of_expr (r&: var_range, expr: var); |
373 | if (var_range.varying_p () || var_range.undefined_p ()) |
374 | rtype = VR_VARYING; |
375 | else |
376 | rtype = VR_RANGE; |
377 | if (!var_range.undefined_p ()) |
378 | { |
379 | minv = var_range.lower_bound (); |
380 | maxv = var_range.upper_bound (); |
381 | } |
382 | |
383 | /* Or for PHI results in loop->header where VAR is used as |
384 | PHI argument from the loop preheader edge. */ |
385 | Value_Range phi_range (TREE_TYPE (var)); |
386 | for (gsi = gsi_start_phis (loop->header); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
387 | { |
388 | gphi *phi = gsi.phi (); |
389 | if (PHI_ARG_DEF_FROM_EDGE (phi, e) == var |
390 | && get_range_query (cfun)->range_of_expr (r&: phi_range, |
391 | expr: gimple_phi_result (gs: phi)) |
392 | && !phi_range.varying_p () |
393 | && !phi_range.undefined_p ()) |
394 | { |
395 | if (rtype != VR_RANGE) |
396 | { |
397 | rtype = VR_RANGE; |
398 | minv = phi_range.lower_bound (); |
399 | maxv = phi_range.upper_bound (); |
400 | } |
401 | else |
402 | { |
403 | minv = wi::max (x: minv, y: phi_range.lower_bound (), sgn); |
404 | maxv = wi::min (x: maxv, y: phi_range.upper_bound (), sgn); |
405 | /* If the PHI result range are inconsistent with |
406 | the VAR range, give up on looking at the PHI |
407 | results. This can happen if VR_UNDEFINED is |
408 | involved. */ |
409 | if (wi::gt_p (x: minv, y: maxv, sgn)) |
410 | { |
411 | Value_Range vr (TREE_TYPE (var)); |
412 | get_range_query (cfun)->range_of_expr (r&: vr, expr: var); |
413 | if (vr.varying_p () || vr.undefined_p ()) |
414 | rtype = VR_VARYING; |
415 | else |
416 | rtype = VR_RANGE; |
417 | if (!vr.undefined_p ()) |
418 | { |
419 | minv = vr.lower_bound (); |
420 | maxv = vr.upper_bound (); |
421 | } |
422 | break; |
423 | } |
424 | } |
425 | } |
426 | } |
427 | mpz_init (minm); |
428 | mpz_init (maxm); |
429 | if (rtype != VR_RANGE) |
430 | { |
431 | mpz_set (minm, min); |
432 | mpz_set (maxm, max); |
433 | } |
434 | else |
435 | { |
436 | gcc_assert (wi::le_p (minv, maxv, sgn)); |
437 | wi::to_mpz (minv, minm, sgn); |
438 | wi::to_mpz (maxv, maxm, sgn); |
439 | } |
440 | /* Now walk the dominators of the loop header and use the entry |
441 | guards to refine the estimates. */ |
442 | for (bb = loop->header; |
443 | bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK; |
444 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
445 | { |
446 | edge e; |
447 | tree c0, c1; |
448 | enum tree_code cmp; |
449 | |
450 | if (!single_pred_p (bb)) |
451 | continue; |
452 | e = single_pred_edge (bb); |
453 | |
454 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
455 | continue; |
456 | |
457 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: e->src)); |
458 | c0 = gimple_cond_lhs (gs: cond); |
459 | cmp = gimple_cond_code (gs: cond); |
460 | c1 = gimple_cond_rhs (gs: cond); |
461 | |
462 | if (e->flags & EDGE_FALSE_VALUE) |
463 | cmp = invert_tree_comparison (cmp, false); |
464 | |
465 | refine_value_range_using_guard (type, var, c0, cmp, c1, below: minm, up: maxm); |
466 | ++cnt; |
467 | } |
468 | |
469 | mpz_add (minm, minm, off); |
470 | mpz_add (maxm, maxm, off); |
471 | /* If the computation may not wrap or off is zero, then this |
472 | is always fine. If off is negative and minv + off isn't |
473 | smaller than type's minimum, or off is positive and |
474 | maxv + off isn't bigger than type's maximum, use the more |
475 | precise range too. */ |
476 | if (nowrap_type_p (type) |
477 | || mpz_sgn (off) == 0 |
478 | || (mpz_sgn (off) < 0 && mpz_cmp (minm, min) >= 0) |
479 | || (mpz_sgn (off) > 0 && mpz_cmp (maxm, max) <= 0)) |
480 | { |
481 | mpz_set (min, minm); |
482 | mpz_set (max, maxm); |
483 | mpz_clear (minm); |
484 | mpz_clear (maxm); |
485 | return; |
486 | } |
487 | mpz_clear (minm); |
488 | mpz_clear (maxm); |
489 | } |
490 | |
491 | /* If the computation may wrap, we know nothing about the value, except for |
492 | the range of the type. */ |
493 | if (!nowrap_type_p (type)) |
494 | return; |
495 | |
496 | /* Since the addition of OFF does not wrap, if OFF is positive, then we may |
497 | add it to MIN, otherwise to MAX. */ |
498 | if (mpz_sgn (off) < 0) |
499 | mpz_add (max, max, off); |
500 | else |
501 | mpz_add (min, min, off); |
502 | } |
503 | |
504 | /* Stores the bounds on the difference of the values of the expressions |
505 | (var + X) and (var + Y), computed in TYPE, to BNDS. */ |
506 | |
507 | static void |
508 | bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y, |
509 | bounds *bnds) |
510 | { |
511 | int rel = mpz_cmp (x, y); |
512 | bool may_wrap = !nowrap_type_p (type); |
513 | |
514 | /* If X == Y, then the expressions are always equal. |
515 | If X > Y, there are the following possibilities: |
516 | a) neither of var + X and var + Y overflow or underflow, or both of |
517 | them do. Then their difference is X - Y. |
518 | b) var + X overflows, and var + Y does not. Then the values of the |
519 | expressions are var + X - M and var + Y, where M is the range of |
520 | the type, and their difference is X - Y - M. |
521 | c) var + Y underflows and var + X does not. Their difference again |
522 | is M - X + Y. |
523 | Therefore, if the arithmetics in type does not overflow, then the |
524 | bounds are (X - Y, X - Y), otherwise they are (X - Y - M, X - Y) |
525 | Similarly, if X < Y, the bounds are either (X - Y, X - Y) or |
526 | (X - Y, X - Y + M). */ |
527 | |
528 | if (rel == 0) |
529 | { |
530 | mpz_set_ui (bnds->below, 0); |
531 | mpz_set_ui (bnds->up, 0); |
532 | return; |
533 | } |
534 | |
535 | auto_mpz m; |
536 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), m, UNSIGNED); |
537 | mpz_add_ui (m, m, 1); |
538 | mpz_sub (bnds->up, x, y); |
539 | mpz_set (bnds->below, bnds->up); |
540 | |
541 | if (may_wrap) |
542 | { |
543 | if (rel > 0) |
544 | mpz_sub (bnds->below, bnds->below, m); |
545 | else |
546 | mpz_add (bnds->up, bnds->up, m); |
547 | } |
548 | } |
549 | |
550 | /* From condition C0 CMP C1 derives information regarding the |
551 | difference of values of VARX + OFFX and VARY + OFFY, computed in TYPE, |
552 | and stores it to BNDS. */ |
553 | |
554 | static void |
555 | refine_bounds_using_guard (tree type, tree varx, mpz_t offx, |
556 | tree vary, mpz_t offy, |
557 | tree c0, enum tree_code cmp, tree c1, |
558 | bounds *bnds) |
559 | { |
560 | tree varc0, varc1, ctype; |
561 | mpz_t offc0, offc1, loffx, loffy, bnd; |
562 | bool lbound = false; |
563 | bool no_wrap = nowrap_type_p (type); |
564 | bool x_ok, y_ok; |
565 | |
566 | switch (cmp) |
567 | { |
568 | case LT_EXPR: |
569 | case LE_EXPR: |
570 | case GT_EXPR: |
571 | case GE_EXPR: |
572 | STRIP_SIGN_NOPS (c0); |
573 | STRIP_SIGN_NOPS (c1); |
574 | ctype = TREE_TYPE (c0); |
575 | if (!useless_type_conversion_p (ctype, type)) |
576 | return; |
577 | |
578 | break; |
579 | |
580 | case EQ_EXPR: |
581 | /* We could derive quite precise information from EQ_EXPR, however, such |
582 | a guard is unlikely to appear, so we do not bother with handling |
583 | it. */ |
584 | return; |
585 | |
586 | case NE_EXPR: |
587 | /* NE_EXPR comparisons do not contain much of useful information, except for |
588 | special case of comparing with the bounds of the type. */ |
589 | if (TREE_CODE (c1) != INTEGER_CST |
590 | || !INTEGRAL_TYPE_P (type)) |
591 | return; |
592 | |
593 | /* Ensure that the condition speaks about an expression in the same type |
594 | as X and Y. */ |
595 | ctype = TREE_TYPE (c0); |
596 | if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type)) |
597 | return; |
598 | c0 = fold_convert (type, c0); |
599 | c1 = fold_convert (type, c1); |
600 | |
601 | if (TYPE_MIN_VALUE (type) |
602 | && operand_equal_p (c1, TYPE_MIN_VALUE (type), flags: 0)) |
603 | { |
604 | cmp = GT_EXPR; |
605 | break; |
606 | } |
607 | if (TYPE_MAX_VALUE (type) |
608 | && operand_equal_p (c1, TYPE_MAX_VALUE (type), flags: 0)) |
609 | { |
610 | cmp = LT_EXPR; |
611 | break; |
612 | } |
613 | |
614 | return; |
615 | default: |
616 | return; |
617 | } |
618 | |
619 | mpz_init (offc0); |
620 | mpz_init (offc1); |
621 | split_to_var_and_offset (expr: expand_simple_operations (c0), var: &varc0, offset: offc0); |
622 | split_to_var_and_offset (expr: expand_simple_operations (c1), var: &varc1, offset: offc1); |
623 | |
624 | /* We are only interested in comparisons of expressions based on VARX and |
625 | VARY. TODO -- we might also be able to derive some bounds from |
626 | expressions containing just one of the variables. */ |
627 | |
628 | if (operand_equal_p (varx, varc1, flags: 0)) |
629 | { |
630 | std::swap (a&: varc0, b&: varc1); |
631 | mpz_swap (offc0, offc1); |
632 | cmp = swap_tree_comparison (cmp); |
633 | } |
634 | |
635 | if (!operand_equal_p (varx, varc0, flags: 0) |
636 | || !operand_equal_p (vary, varc1, flags: 0)) |
637 | goto end; |
638 | |
639 | mpz_init_set (loffx, offx); |
640 | mpz_init_set (loffy, offy); |
641 | |
642 | if (cmp == GT_EXPR || cmp == GE_EXPR) |
643 | { |
644 | std::swap (a&: varx, b&: vary); |
645 | mpz_swap (offc0, offc1); |
646 | mpz_swap (loffx, loffy); |
647 | cmp = swap_tree_comparison (cmp); |
648 | lbound = true; |
649 | } |
650 | |
651 | /* If there is no overflow, the condition implies that |
652 | |
653 | (VARX + OFFX) cmp (VARY + OFFY) + (OFFX - OFFY + OFFC1 - OFFC0). |
654 | |
655 | The overflows and underflows may complicate things a bit; each |
656 | overflow decreases the appropriate offset by M, and underflow |
657 | increases it by M. The above inequality would not necessarily be |
658 | true if |
659 | |
660 | -- VARX + OFFX underflows and VARX + OFFC0 does not, or |
661 | VARX + OFFC0 overflows, but VARX + OFFX does not. |
662 | This may only happen if OFFX < OFFC0. |
663 | -- VARY + OFFY overflows and VARY + OFFC1 does not, or |
664 | VARY + OFFC1 underflows and VARY + OFFY does not. |
665 | This may only happen if OFFY > OFFC1. */ |
666 | |
667 | if (no_wrap) |
668 | { |
669 | x_ok = true; |
670 | y_ok = true; |
671 | } |
672 | else |
673 | { |
674 | x_ok = (integer_zerop (varx) |
675 | || mpz_cmp (loffx, offc0) >= 0); |
676 | y_ok = (integer_zerop (vary) |
677 | || mpz_cmp (loffy, offc1) <= 0); |
678 | } |
679 | |
680 | if (x_ok && y_ok) |
681 | { |
682 | mpz_init (bnd); |
683 | mpz_sub (bnd, loffx, loffy); |
684 | mpz_add (bnd, bnd, offc1); |
685 | mpz_sub (bnd, bnd, offc0); |
686 | |
687 | if (cmp == LT_EXPR) |
688 | mpz_sub_ui (bnd, bnd, 1); |
689 | |
690 | if (lbound) |
691 | { |
692 | mpz_neg (gmp_w: bnd, gmp_u: bnd); |
693 | if (mpz_cmp (bnds->below, bnd) < 0) |
694 | mpz_set (bnds->below, bnd); |
695 | } |
696 | else |
697 | { |
698 | if (mpz_cmp (bnd, bnds->up) < 0) |
699 | mpz_set (bnds->up, bnd); |
700 | } |
701 | mpz_clear (bnd); |
702 | } |
703 | |
704 | mpz_clear (loffx); |
705 | mpz_clear (loffy); |
706 | end: |
707 | mpz_clear (offc0); |
708 | mpz_clear (offc1); |
709 | } |
710 | |
711 | /* Stores the bounds on the value of the expression X - Y in LOOP to BNDS. |
712 | The subtraction is considered to be performed in arbitrary precision, |
713 | without overflows. |
714 | |
715 | We do not attempt to be too clever regarding the value ranges of X and |
716 | Y; most of the time, they are just integers or ssa names offsetted by |
717 | integer. However, we try to use the information contained in the |
718 | comparisons before the loop (usually created by loop header copying). */ |
719 | |
720 | static void |
721 | bound_difference (class loop *loop, tree x, tree y, bounds *bnds) |
722 | { |
723 | tree type = TREE_TYPE (x); |
724 | tree varx, vary; |
725 | mpz_t offx, offy; |
726 | int cnt = 0; |
727 | edge e; |
728 | basic_block bb; |
729 | tree c0, c1; |
730 | enum tree_code cmp; |
731 | |
732 | /* Get rid of unnecessary casts, but preserve the value of |
733 | the expressions. */ |
734 | STRIP_SIGN_NOPS (x); |
735 | STRIP_SIGN_NOPS (y); |
736 | |
737 | mpz_init (bnds->below); |
738 | mpz_init (bnds->up); |
739 | mpz_init (offx); |
740 | mpz_init (offy); |
741 | split_to_var_and_offset (expr: x, var: &varx, offset: offx); |
742 | split_to_var_and_offset (expr: y, var: &vary, offset: offy); |
743 | |
744 | if (!integer_zerop (varx) |
745 | && operand_equal_p (varx, vary, flags: 0)) |
746 | { |
747 | /* Special case VARX == VARY -- we just need to compare the |
748 | offsets. The matters are a bit more complicated in the |
749 | case addition of offsets may wrap. */ |
750 | bound_difference_of_offsetted_base (type, x: offx, y: offy, bnds); |
751 | } |
752 | else |
753 | { |
754 | /* Otherwise, use the value ranges to determine the initial |
755 | estimates on below and up. */ |
756 | auto_mpz minx, maxx, miny, maxy; |
757 | determine_value_range (loop, type, var: varx, off: offx, min: minx, max: maxx); |
758 | determine_value_range (loop, type, var: vary, off: offy, min: miny, max: maxy); |
759 | |
760 | mpz_sub (bnds->below, minx, maxy); |
761 | mpz_sub (bnds->up, maxx, miny); |
762 | } |
763 | |
764 | /* If both X and Y are constants, we cannot get any more precise. */ |
765 | if (integer_zerop (varx) && integer_zerop (vary)) |
766 | goto end; |
767 | |
768 | /* Now walk the dominators of the loop header and use the entry |
769 | guards to refine the estimates. */ |
770 | for (bb = loop->header; |
771 | bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK; |
772 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
773 | { |
774 | if (!single_pred_p (bb)) |
775 | continue; |
776 | e = single_pred_edge (bb); |
777 | |
778 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
779 | continue; |
780 | |
781 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: e->src)); |
782 | c0 = gimple_cond_lhs (gs: cond); |
783 | cmp = gimple_cond_code (gs: cond); |
784 | c1 = gimple_cond_rhs (gs: cond); |
785 | |
786 | if (e->flags & EDGE_FALSE_VALUE) |
787 | cmp = invert_tree_comparison (cmp, false); |
788 | |
789 | refine_bounds_using_guard (type, varx, offx, vary, offy, |
790 | c0, cmp, c1, bnds); |
791 | ++cnt; |
792 | } |
793 | |
794 | end: |
795 | mpz_clear (offx); |
796 | mpz_clear (offy); |
797 | } |
798 | |
799 | /* Update the bounds in BNDS that restrict the value of X to the bounds |
800 | that restrict the value of X + DELTA. X can be obtained as a |
801 | difference of two values in TYPE. */ |
802 | |
803 | static void |
804 | bounds_add (bounds *bnds, const widest_int &delta, tree type) |
805 | { |
806 | mpz_t mdelta, max; |
807 | |
808 | mpz_init (mdelta); |
809 | wi::to_mpz (delta, mdelta, SIGNED); |
810 | |
811 | mpz_init (max); |
812 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED); |
813 | |
814 | mpz_add (bnds->up, bnds->up, mdelta); |
815 | mpz_add (bnds->below, bnds->below, mdelta); |
816 | |
817 | if (mpz_cmp (bnds->up, max) > 0) |
818 | mpz_set (bnds->up, max); |
819 | |
820 | mpz_neg (gmp_w: max, gmp_u: max); |
821 | if (mpz_cmp (bnds->below, max) < 0) |
822 | mpz_set (bnds->below, max); |
823 | |
824 | mpz_clear (mdelta); |
825 | mpz_clear (max); |
826 | } |
827 | |
828 | /* Update the bounds in BNDS that restrict the value of X to the bounds |
829 | that restrict the value of -X. */ |
830 | |
831 | static void |
832 | bounds_negate (bounds *bnds) |
833 | { |
834 | mpz_t tmp; |
835 | |
836 | mpz_init_set (tmp, bnds->up); |
837 | mpz_neg (gmp_w: bnds->up, gmp_u: bnds->below); |
838 | mpz_neg (gmp_w: bnds->below, gmp_u: tmp); |
839 | mpz_clear (tmp); |
840 | } |
841 | |
842 | /* Returns inverse of X modulo 2^s, where MASK = 2^s-1. */ |
843 | |
844 | static tree |
845 | inverse (tree x, tree mask) |
846 | { |
847 | tree type = TREE_TYPE (x); |
848 | tree rslt; |
849 | unsigned ctr = tree_floor_log2 (mask); |
850 | |
851 | if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT) |
852 | { |
853 | unsigned HOST_WIDE_INT ix; |
854 | unsigned HOST_WIDE_INT imask; |
855 | unsigned HOST_WIDE_INT irslt = 1; |
856 | |
857 | gcc_assert (cst_and_fits_in_hwi (x)); |
858 | gcc_assert (cst_and_fits_in_hwi (mask)); |
859 | |
860 | ix = int_cst_value (x); |
861 | imask = int_cst_value (mask); |
862 | |
863 | for (; ctr; ctr--) |
864 | { |
865 | irslt *= ix; |
866 | ix *= ix; |
867 | } |
868 | irslt &= imask; |
869 | |
870 | rslt = build_int_cst_type (type, irslt); |
871 | } |
872 | else |
873 | { |
874 | rslt = build_int_cst (type, 1); |
875 | for (; ctr; ctr--) |
876 | { |
877 | rslt = int_const_binop (MULT_EXPR, rslt, x); |
878 | x = int_const_binop (MULT_EXPR, x, x); |
879 | } |
880 | rslt = int_const_binop (BIT_AND_EXPR, rslt, mask); |
881 | } |
882 | |
883 | return rslt; |
884 | } |
885 | |
886 | /* Derives the upper bound BND on the number of executions of loop with exit |
887 | condition S * i <> C. If NO_OVERFLOW is true, then the control variable of |
888 | the loop does not overflow. EXIT_MUST_BE_TAKEN is true if we are guaranteed |
889 | that the loop ends through this exit, i.e., the induction variable ever |
890 | reaches the value of C. |
891 | |
892 | The value C is equal to final - base, where final and base are the final and |
893 | initial value of the actual induction variable in the analysed loop. BNDS |
894 | bounds the value of this difference when computed in signed type with |
895 | unbounded range, while the computation of C is performed in an unsigned |
896 | type with the range matching the range of the type of the induction variable. |
897 | In particular, BNDS.up contains an upper bound on C in the following cases: |
898 | -- if the iv must reach its final value without overflow, i.e., if |
899 | NO_OVERFLOW && EXIT_MUST_BE_TAKEN is true, or |
900 | -- if final >= base, which we know to hold when BNDS.below >= 0. */ |
901 | |
902 | static void |
903 | number_of_iterations_ne_max (mpz_t bnd, bool no_overflow, tree c, tree s, |
904 | bounds *bnds, bool exit_must_be_taken) |
905 | { |
906 | widest_int max; |
907 | mpz_t d; |
908 | tree type = TREE_TYPE (c); |
909 | bool bnds_u_valid = ((no_overflow && exit_must_be_taken) |
910 | || mpz_sgn (bnds->below) >= 0); |
911 | |
912 | if (integer_onep (s) |
913 | || (TREE_CODE (c) == INTEGER_CST |
914 | && TREE_CODE (s) == INTEGER_CST |
915 | && wi::mod_trunc (x: wi::to_wide (t: c), y: wi::to_wide (t: s), |
916 | TYPE_SIGN (type)) == 0) |
917 | || (TYPE_OVERFLOW_UNDEFINED (type) |
918 | && multiple_of_p (type, c, s))) |
919 | { |
920 | /* If C is an exact multiple of S, then its value will be reached before |
921 | the induction variable overflows (unless the loop is exited in some |
922 | other way before). Note that the actual induction variable in the |
923 | loop (which ranges from base to final instead of from 0 to C) may |
924 | overflow, in which case BNDS.up will not be giving a correct upper |
925 | bound on C; thus, BNDS_U_VALID had to be computed in advance. */ |
926 | no_overflow = true; |
927 | exit_must_be_taken = true; |
928 | } |
929 | |
930 | /* If the induction variable can overflow, the number of iterations is at |
931 | most the period of the control variable (or infinite, but in that case |
932 | the whole # of iterations analysis will fail). */ |
933 | if (!no_overflow) |
934 | { |
935 | max = wi::mask <widest_int> (TYPE_PRECISION (type) |
936 | - wi::ctz (wi::to_wide (t: s)), negate_p: false); |
937 | wi::to_mpz (max, bnd, UNSIGNED); |
938 | return; |
939 | } |
940 | |
941 | /* Now we know that the induction variable does not overflow, so the loop |
942 | iterates at most (range of type / S) times. */ |
943 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), bnd, UNSIGNED); |
944 | |
945 | /* If the induction variable is guaranteed to reach the value of C before |
946 | overflow, ... */ |
947 | if (exit_must_be_taken) |
948 | { |
949 | /* ... then we can strengthen this to C / S, and possibly we can use |
950 | the upper bound on C given by BNDS. */ |
951 | if (TREE_CODE (c) == INTEGER_CST) |
952 | wi::to_mpz (wi::to_wide (t: c), bnd, UNSIGNED); |
953 | else if (bnds_u_valid) |
954 | mpz_set (bnd, bnds->up); |
955 | } |
956 | |
957 | mpz_init (d); |
958 | wi::to_mpz (wi::to_wide (t: s), d, UNSIGNED); |
959 | mpz_fdiv_q (bnd, bnd, d); |
960 | mpz_clear (d); |
961 | } |
962 | |
963 | /* Determines number of iterations of loop whose ending condition |
964 | is IV <> FINAL. TYPE is the type of the iv. The number of |
965 | iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if |
966 | we know that the exit must be taken eventually, i.e., that the IV |
967 | ever reaches the value FINAL (we derived this earlier, and possibly set |
968 | NITER->assumptions to make sure this is the case). BNDS contains the |
969 | bounds on the difference FINAL - IV->base. */ |
970 | |
971 | static bool |
972 | number_of_iterations_ne (class loop *loop, tree type, affine_iv *iv, |
973 | tree final, class tree_niter_desc *niter, |
974 | bool exit_must_be_taken, bounds *bnds) |
975 | { |
976 | tree niter_type = unsigned_type_for (type); |
977 | tree s, c, d, bits, assumption, tmp, bound; |
978 | |
979 | niter->control = *iv; |
980 | niter->bound = final; |
981 | niter->cmp = NE_EXPR; |
982 | |
983 | /* Rearrange the terms so that we get inequality S * i <> C, with S |
984 | positive. Also cast everything to the unsigned type. If IV does |
985 | not overflow, BNDS bounds the value of C. Also, this is the |
986 | case if the computation |FINAL - IV->base| does not overflow, i.e., |
987 | if BNDS->below in the result is nonnegative. */ |
988 | if (tree_int_cst_sign_bit (iv->step)) |
989 | { |
990 | s = fold_convert (niter_type, |
991 | fold_build1 (NEGATE_EXPR, type, iv->step)); |
992 | c = fold_build2 (MINUS_EXPR, niter_type, |
993 | fold_convert (niter_type, iv->base), |
994 | fold_convert (niter_type, final)); |
995 | bounds_negate (bnds); |
996 | } |
997 | else |
998 | { |
999 | s = fold_convert (niter_type, iv->step); |
1000 | c = fold_build2 (MINUS_EXPR, niter_type, |
1001 | fold_convert (niter_type, final), |
1002 | fold_convert (niter_type, iv->base)); |
1003 | } |
1004 | |
1005 | auto_mpz max; |
1006 | number_of_iterations_ne_max (bnd: max, no_overflow: iv->no_overflow, c, s, bnds, |
1007 | exit_must_be_taken); |
1008 | niter->max = widest_int::from (x: wi::from_mpz (niter_type, max, false), |
1009 | TYPE_SIGN (niter_type)); |
1010 | |
1011 | /* Compute no-overflow information for the control iv. This can be |
1012 | proven when below two conditions are satisfied: |
1013 | |
1014 | 1) IV evaluates toward FINAL at beginning, i.e: |
1015 | base <= FINAL ; step > 0 |
1016 | base >= FINAL ; step < 0 |
1017 | |
1018 | 2) |FINAL - base| is an exact multiple of step. |
1019 | |
1020 | Unfortunately, it's hard to prove above conditions after pass loop-ch |
1021 | because loop with exit condition (IV != FINAL) usually will be guarded |
1022 | by initial-condition (IV.base - IV.step != FINAL). In this case, we |
1023 | can alternatively try to prove below conditions: |
1024 | |
1025 | 1') IV evaluates toward FINAL at beginning, i.e: |
1026 | new_base = base - step < FINAL ; step > 0 |
1027 | && base - step doesn't underflow |
1028 | new_base = base - step > FINAL ; step < 0 |
1029 | && base - step doesn't overflow |
1030 | |
1031 | Please refer to PR34114 as an example of loop-ch's impact. |
1032 | |
1033 | Note, for NE_EXPR, base equals to FINAL is a special case, in |
1034 | which the loop exits immediately, and the iv does not overflow. |
1035 | |
1036 | Also note, we prove condition 2) by checking base and final seperately |
1037 | along with condition 1) or 1'). Since we ensure the difference |
1038 | computation of c does not wrap with cond below and the adjusted s |
1039 | will fit a signed type as well as an unsigned we can safely do |
1040 | this using the type of the IV if it is not pointer typed. */ |
1041 | tree mtype = type; |
1042 | if (POINTER_TYPE_P (type)) |
1043 | mtype = niter_type; |
1044 | if (!niter->control.no_overflow |
1045 | && (integer_onep (s) |
1046 | || (multiple_of_p (mtype, fold_convert (mtype, iv->base), |
1047 | fold_convert (mtype, s), false) |
1048 | && multiple_of_p (mtype, fold_convert (mtype, final), |
1049 | fold_convert (mtype, s), false)))) |
1050 | { |
1051 | tree t, cond, relaxed_cond = boolean_false_node; |
1052 | |
1053 | if (tree_int_cst_sign_bit (iv->step)) |
1054 | { |
1055 | cond = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final); |
1056 | if (TREE_CODE (type) == INTEGER_TYPE) |
1057 | { |
1058 | /* Only when base - step doesn't overflow. */ |
1059 | t = TYPE_MAX_VALUE (type); |
1060 | t = fold_build2 (PLUS_EXPR, type, t, iv->step); |
1061 | t = fold_build2 (GE_EXPR, boolean_type_node, t, iv->base); |
1062 | if (integer_nonzerop (t)) |
1063 | { |
1064 | t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step); |
1065 | relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node, t, |
1066 | final); |
1067 | } |
1068 | } |
1069 | } |
1070 | else |
1071 | { |
1072 | cond = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final); |
1073 | if (TREE_CODE (type) == INTEGER_TYPE) |
1074 | { |
1075 | /* Only when base - step doesn't underflow. */ |
1076 | t = TYPE_MIN_VALUE (type); |
1077 | t = fold_build2 (PLUS_EXPR, type, t, iv->step); |
1078 | t = fold_build2 (LE_EXPR, boolean_type_node, t, iv->base); |
1079 | if (integer_nonzerop (t)) |
1080 | { |
1081 | t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step); |
1082 | relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node, t, |
1083 | final); |
1084 | } |
1085 | } |
1086 | } |
1087 | |
1088 | t = simplify_using_initial_conditions (loop, cond); |
1089 | if (!t || !integer_onep (t)) |
1090 | t = simplify_using_initial_conditions (loop, relaxed_cond); |
1091 | |
1092 | if (t && integer_onep (t)) |
1093 | { |
1094 | niter->control.no_overflow = true; |
1095 | niter->niter = fold_build2 (EXACT_DIV_EXPR, niter_type, c, s); |
1096 | return true; |
1097 | } |
1098 | } |
1099 | |
1100 | /* Let nsd (step, size of mode) = d. If d does not divide c, the loop |
1101 | is infinite. Otherwise, the number of iterations is |
1102 | (inverse(s/d) * (c/d)) mod (size of mode/d). */ |
1103 | bits = num_ending_zeros (s); |
1104 | bound = build_low_bits_mask (niter_type, |
1105 | (TYPE_PRECISION (niter_type) |
1106 | - tree_to_uhwi (bits))); |
1107 | |
1108 | d = fold_binary_to_constant (LSHIFT_EXPR, niter_type, |
1109 | build_int_cst (niter_type, 1), bits); |
1110 | s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, s, bits); |
1111 | |
1112 | if (!exit_must_be_taken) |
1113 | { |
1114 | /* If we cannot assume that the exit is taken eventually, record the |
1115 | assumptions for divisibility of c. */ |
1116 | assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, c, d); |
1117 | assumption = fold_build2 (EQ_EXPR, boolean_type_node, |
1118 | assumption, build_int_cst (niter_type, 0)); |
1119 | if (!integer_nonzerop (assumption)) |
1120 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1121 | niter->assumptions, assumption); |
1122 | } |
1123 | |
1124 | c = fold_build2 (EXACT_DIV_EXPR, niter_type, c, d); |
1125 | if (integer_onep (s)) |
1126 | { |
1127 | niter->niter = c; |
1128 | } |
1129 | else |
1130 | { |
1131 | tmp = fold_build2 (MULT_EXPR, niter_type, c, inverse (s, bound)); |
1132 | niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound); |
1133 | } |
1134 | return true; |
1135 | } |
1136 | |
1137 | /* Checks whether we can determine the final value of the control variable |
1138 | of the loop with ending condition IV0 < IV1 (computed in TYPE). |
1139 | DELTA is the difference IV1->base - IV0->base, STEP is the absolute value |
1140 | of the step. The assumptions necessary to ensure that the computation |
1141 | of the final value does not overflow are recorded in NITER. If we |
1142 | find the final value, we adjust DELTA and return TRUE. Otherwise |
1143 | we return false. BNDS bounds the value of IV1->base - IV0->base, |
1144 | and will be updated by the same amount as DELTA. EXIT_MUST_BE_TAKEN is |
1145 | true if we know that the exit must be taken eventually. */ |
1146 | |
1147 | static bool |
1148 | number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1, |
1149 | class tree_niter_desc *niter, |
1150 | tree *delta, tree step, |
1151 | bool exit_must_be_taken, bounds *bnds) |
1152 | { |
1153 | tree niter_type = TREE_TYPE (step); |
1154 | tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step); |
1155 | tree tmod; |
1156 | tree assumption = boolean_true_node, bound, noloop; |
1157 | bool fv_comp_no_overflow; |
1158 | tree type1 = type; |
1159 | if (POINTER_TYPE_P (type)) |
1160 | type1 = sizetype; |
1161 | |
1162 | if (TREE_CODE (mod) != INTEGER_CST) |
1163 | return false; |
1164 | if (integer_nonzerop (mod)) |
1165 | mod = fold_build2 (MINUS_EXPR, niter_type, step, mod); |
1166 | tmod = fold_convert (type1, mod); |
1167 | |
1168 | auto_mpz mmod; |
1169 | wi::to_mpz (wi::to_wide (t: mod), mmod, UNSIGNED); |
1170 | mpz_neg (gmp_w: mmod, gmp_u: mmod); |
1171 | |
1172 | /* If the induction variable does not overflow and the exit is taken, |
1173 | then the computation of the final value does not overflow. This is |
1174 | also obviously the case if the new final value is equal to the |
1175 | current one. Finally, we postulate this for pointer type variables, |
1176 | as the code cannot rely on the object to that the pointer points being |
1177 | placed at the end of the address space (and more pragmatically, |
1178 | TYPE_{MIN,MAX}_VALUE is not defined for pointers). */ |
1179 | if (integer_zerop (mod) || POINTER_TYPE_P (type)) |
1180 | fv_comp_no_overflow = true; |
1181 | else if (!exit_must_be_taken) |
1182 | fv_comp_no_overflow = false; |
1183 | else |
1184 | fv_comp_no_overflow = |
1185 | (iv0->no_overflow && integer_nonzerop (iv0->step)) |
1186 | || (iv1->no_overflow && integer_nonzerop (iv1->step)); |
1187 | |
1188 | if (integer_nonzerop (iv0->step)) |
1189 | { |
1190 | /* The final value of the iv is iv1->base + MOD, assuming that this |
1191 | computation does not overflow, and that |
1192 | iv0->base <= iv1->base + MOD. */ |
1193 | if (!fv_comp_no_overflow) |
1194 | { |
1195 | bound = fold_build2 (MINUS_EXPR, type1, |
1196 | TYPE_MAX_VALUE (type1), tmod); |
1197 | assumption = fold_build2 (LE_EXPR, boolean_type_node, |
1198 | iv1->base, bound); |
1199 | if (integer_zerop (assumption)) |
1200 | return false; |
1201 | } |
1202 | if (mpz_cmp (mmod, bnds->below) < 0) |
1203 | noloop = boolean_false_node; |
1204 | else if (POINTER_TYPE_P (type)) |
1205 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1206 | iv0->base, |
1207 | fold_build_pointer_plus (iv1->base, tmod)); |
1208 | else |
1209 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1210 | iv0->base, |
1211 | fold_build2 (PLUS_EXPR, type1, |
1212 | iv1->base, tmod)); |
1213 | } |
1214 | else |
1215 | { |
1216 | /* The final value of the iv is iv0->base - MOD, assuming that this |
1217 | computation does not overflow, and that |
1218 | iv0->base - MOD <= iv1->base. */ |
1219 | if (!fv_comp_no_overflow) |
1220 | { |
1221 | bound = fold_build2 (PLUS_EXPR, type1, |
1222 | TYPE_MIN_VALUE (type1), tmod); |
1223 | assumption = fold_build2 (GE_EXPR, boolean_type_node, |
1224 | iv0->base, bound); |
1225 | if (integer_zerop (assumption)) |
1226 | return false; |
1227 | } |
1228 | if (mpz_cmp (mmod, bnds->below) < 0) |
1229 | noloop = boolean_false_node; |
1230 | else if (POINTER_TYPE_P (type)) |
1231 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1232 | fold_build_pointer_plus (iv0->base, |
1233 | fold_build1 (NEGATE_EXPR, |
1234 | type1, tmod)), |
1235 | iv1->base); |
1236 | else |
1237 | noloop = fold_build2 (GT_EXPR, boolean_type_node, |
1238 | fold_build2 (MINUS_EXPR, type1, |
1239 | iv0->base, tmod), |
1240 | iv1->base); |
1241 | } |
1242 | |
1243 | if (!integer_nonzerop (assumption)) |
1244 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1245 | niter->assumptions, |
1246 | assumption); |
1247 | if (!integer_zerop (noloop)) |
1248 | niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, |
1249 | niter->may_be_zero, |
1250 | noloop); |
1251 | bounds_add (bnds, delta: wi::to_widest (t: mod), type); |
1252 | *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod); |
1253 | |
1254 | return true; |
1255 | } |
1256 | |
1257 | /* Add assertions to NITER that ensure that the control variable of the loop |
1258 | with ending condition IV0 < IV1 does not overflow. Types of IV0 and IV1 |
1259 | are TYPE. Returns false if we can prove that there is an overflow, true |
1260 | otherwise. STEP is the absolute value of the step. */ |
1261 | |
1262 | static bool |
1263 | assert_no_overflow_lt (tree type, affine_iv *iv0, affine_iv *iv1, |
1264 | class tree_niter_desc *niter, tree step) |
1265 | { |
1266 | tree bound, d, assumption, diff; |
1267 | tree niter_type = TREE_TYPE (step); |
1268 | |
1269 | if (integer_nonzerop (iv0->step)) |
1270 | { |
1271 | /* for (i = iv0->base; i < iv1->base; i += iv0->step) */ |
1272 | if (iv0->no_overflow) |
1273 | return true; |
1274 | |
1275 | /* If iv0->base is a constant, we can determine the last value before |
1276 | overflow precisely; otherwise we conservatively assume |
1277 | MAX - STEP + 1. */ |
1278 | |
1279 | if (TREE_CODE (iv0->base) == INTEGER_CST) |
1280 | { |
1281 | d = fold_build2 (MINUS_EXPR, niter_type, |
1282 | fold_convert (niter_type, TYPE_MAX_VALUE (type)), |
1283 | fold_convert (niter_type, iv0->base)); |
1284 | diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step); |
1285 | } |
1286 | else |
1287 | diff = fold_build2 (MINUS_EXPR, niter_type, step, |
1288 | build_int_cst (niter_type, 1)); |
1289 | bound = fold_build2 (MINUS_EXPR, type, |
1290 | TYPE_MAX_VALUE (type), fold_convert (type, diff)); |
1291 | assumption = fold_build2 (LE_EXPR, boolean_type_node, |
1292 | iv1->base, bound); |
1293 | } |
1294 | else |
1295 | { |
1296 | /* for (i = iv1->base; i > iv0->base; i += iv1->step) */ |
1297 | if (iv1->no_overflow) |
1298 | return true; |
1299 | |
1300 | if (TREE_CODE (iv1->base) == INTEGER_CST) |
1301 | { |
1302 | d = fold_build2 (MINUS_EXPR, niter_type, |
1303 | fold_convert (niter_type, iv1->base), |
1304 | fold_convert (niter_type, TYPE_MIN_VALUE (type))); |
1305 | diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step); |
1306 | } |
1307 | else |
1308 | diff = fold_build2 (MINUS_EXPR, niter_type, step, |
1309 | build_int_cst (niter_type, 1)); |
1310 | bound = fold_build2 (PLUS_EXPR, type, |
1311 | TYPE_MIN_VALUE (type), fold_convert (type, diff)); |
1312 | assumption = fold_build2 (GE_EXPR, boolean_type_node, |
1313 | iv0->base, bound); |
1314 | } |
1315 | |
1316 | if (integer_zerop (assumption)) |
1317 | return false; |
1318 | if (!integer_nonzerop (assumption)) |
1319 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1320 | niter->assumptions, assumption); |
1321 | |
1322 | iv0->no_overflow = true; |
1323 | iv1->no_overflow = true; |
1324 | return true; |
1325 | } |
1326 | |
1327 | /* Add an assumption to NITER that a loop whose ending condition |
1328 | is IV0 < IV1 rolls. TYPE is the type of the control iv. BNDS |
1329 | bounds the value of IV1->base - IV0->base. */ |
1330 | |
1331 | static void |
1332 | assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1, |
1333 | class tree_niter_desc *niter, bounds *bnds) |
1334 | { |
1335 | tree assumption = boolean_true_node, bound, diff; |
1336 | tree mbz, mbzl, mbzr, type1; |
1337 | bool rolls_p, no_overflow_p; |
1338 | widest_int dstep; |
1339 | mpz_t mstep, max; |
1340 | |
1341 | /* We are going to compute the number of iterations as |
1342 | (iv1->base - iv0->base + step - 1) / step, computed in the unsigned |
1343 | variant of TYPE. This formula only works if |
1344 | |
1345 | -step + 1 <= (iv1->base - iv0->base) <= MAX - step + 1 |
1346 | |
1347 | (where MAX is the maximum value of the unsigned variant of TYPE, and |
1348 | the computations in this formula are performed in full precision, |
1349 | i.e., without overflows). |
1350 | |
1351 | Usually, for loops with exit condition iv0->base + step * i < iv1->base, |
1352 | we have a condition of the form iv0->base - step < iv1->base before the loop, |
1353 | and for loops iv0->base < iv1->base - step * i the condition |
1354 | iv0->base < iv1->base + step, due to loop header copying, which enable us |
1355 | to prove the lower bound. |
1356 | |
1357 | The upper bound is more complicated. Unless the expressions for initial |
1358 | and final value themselves contain enough information, we usually cannot |
1359 | derive it from the context. */ |
1360 | |
1361 | /* First check whether the answer does not follow from the bounds we gathered |
1362 | before. */ |
1363 | if (integer_nonzerop (iv0->step)) |
1364 | dstep = wi::to_widest (t: iv0->step); |
1365 | else |
1366 | { |
1367 | dstep = wi::sext (x: wi::to_widest (t: iv1->step), TYPE_PRECISION (type)); |
1368 | dstep = -dstep; |
1369 | } |
1370 | |
1371 | mpz_init (mstep); |
1372 | wi::to_mpz (dstep, mstep, UNSIGNED); |
1373 | mpz_neg (gmp_w: mstep, gmp_u: mstep); |
1374 | mpz_add_ui (mstep, mstep, 1); |
1375 | |
1376 | rolls_p = mpz_cmp (mstep, bnds->below) <= 0; |
1377 | |
1378 | mpz_init (max); |
1379 | wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED); |
1380 | mpz_add (max, max, mstep); |
1381 | no_overflow_p = (mpz_cmp (bnds->up, max) <= 0 |
1382 | /* For pointers, only values lying inside a single object |
1383 | can be compared or manipulated by pointer arithmetics. |
1384 | Gcc in general does not allow or handle objects larger |
1385 | than half of the address space, hence the upper bound |
1386 | is satisfied for pointers. */ |
1387 | || POINTER_TYPE_P (type)); |
1388 | mpz_clear (mstep); |
1389 | mpz_clear (max); |
1390 | |
1391 | if (rolls_p && no_overflow_p) |
1392 | return; |
1393 | |
1394 | type1 = type; |
1395 | if (POINTER_TYPE_P (type)) |
1396 | type1 = sizetype; |
1397 | |
1398 | /* Now the hard part; we must formulate the assumption(s) as expressions, and |
1399 | we must be careful not to introduce overflow. */ |
1400 | |
1401 | if (integer_nonzerop (iv0->step)) |
1402 | { |
1403 | diff = fold_build2 (MINUS_EXPR, type1, |
1404 | iv0->step, build_int_cst (type1, 1)); |
1405 | |
1406 | /* We need to know that iv0->base >= MIN + iv0->step - 1. Since |
1407 | 0 address never belongs to any object, we can assume this for |
1408 | pointers. */ |
1409 | if (!POINTER_TYPE_P (type)) |
1410 | { |
1411 | bound = fold_build2 (PLUS_EXPR, type1, |
1412 | TYPE_MIN_VALUE (type), diff); |
1413 | assumption = fold_build2 (GE_EXPR, boolean_type_node, |
1414 | iv0->base, bound); |
1415 | } |
1416 | |
1417 | /* And then we can compute iv0->base - diff, and compare it with |
1418 | iv1->base. */ |
1419 | mbzl = fold_build2 (MINUS_EXPR, type1, |
1420 | fold_convert (type1, iv0->base), diff); |
1421 | mbzr = fold_convert (type1, iv1->base); |
1422 | } |
1423 | else |
1424 | { |
1425 | diff = fold_build2 (PLUS_EXPR, type1, |
1426 | iv1->step, build_int_cst (type1, 1)); |
1427 | |
1428 | if (!POINTER_TYPE_P (type)) |
1429 | { |
1430 | bound = fold_build2 (PLUS_EXPR, type1, |
1431 | TYPE_MAX_VALUE (type), diff); |
1432 | assumption = fold_build2 (LE_EXPR, boolean_type_node, |
1433 | iv1->base, bound); |
1434 | } |
1435 | |
1436 | mbzl = fold_convert (type1, iv0->base); |
1437 | mbzr = fold_build2 (MINUS_EXPR, type1, |
1438 | fold_convert (type1, iv1->base), diff); |
1439 | } |
1440 | |
1441 | if (!integer_nonzerop (assumption)) |
1442 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1443 | niter->assumptions, assumption); |
1444 | if (!rolls_p) |
1445 | { |
1446 | mbz = fold_build2 (GT_EXPR, boolean_type_node, mbzl, mbzr); |
1447 | niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, |
1448 | niter->may_be_zero, mbz); |
1449 | } |
1450 | } |
1451 | |
1452 | /* Determines number of iterations of loop whose ending condition |
1453 | is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. |
1454 | The number of iterations is stored to NITER. */ |
1455 | |
1456 | static bool |
1457 | number_of_iterations_until_wrap (class loop *loop, tree type, affine_iv *iv0, |
1458 | affine_iv *iv1, class tree_niter_desc *niter) |
1459 | { |
1460 | tree niter_type = unsigned_type_for (type); |
1461 | tree step, num, assumptions, may_be_zero, span; |
1462 | wide_int high, low, max, min; |
1463 | |
1464 | may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base); |
1465 | if (integer_onep (may_be_zero)) |
1466 | return false; |
1467 | |
1468 | int prec = TYPE_PRECISION (type); |
1469 | signop sgn = TYPE_SIGN (type); |
1470 | min = wi::min_value (prec, sgn); |
1471 | max = wi::max_value (prec, sgn); |
1472 | |
1473 | /* n < {base, C}. */ |
1474 | if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step)) |
1475 | { |
1476 | step = iv1->step; |
1477 | /* MIN + C - 1 <= n. */ |
1478 | tree last = wide_int_to_tree (type, cst: min + wi::to_wide (t: step) - 1); |
1479 | assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, iv0->base); |
1480 | if (integer_zerop (assumptions)) |
1481 | return false; |
1482 | |
1483 | num = fold_build2 (MINUS_EXPR, niter_type, |
1484 | wide_int_to_tree (niter_type, max), |
1485 | fold_convert (niter_type, iv1->base)); |
1486 | |
1487 | /* When base has the form iv + 1, if we know iv >= n, then iv + 1 < n |
1488 | only when iv + 1 overflows, i.e. when iv == TYPE_VALUE_MAX. */ |
1489 | if (sgn == UNSIGNED |
1490 | && integer_onep (step) |
1491 | && TREE_CODE (iv1->base) == PLUS_EXPR |
1492 | && integer_onep (TREE_OPERAND (iv1->base, 1))) |
1493 | { |
1494 | tree cond = fold_build2 (GE_EXPR, boolean_type_node, |
1495 | TREE_OPERAND (iv1->base, 0), iv0->base); |
1496 | cond = simplify_using_initial_conditions (loop, cond); |
1497 | if (integer_onep (cond)) |
1498 | may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, |
1499 | TREE_OPERAND (iv1->base, 0), |
1500 | TYPE_MAX_VALUE (type)); |
1501 | } |
1502 | |
1503 | high = max; |
1504 | if (TREE_CODE (iv1->base) == INTEGER_CST) |
1505 | low = wi::to_wide (t: iv1->base) - 1; |
1506 | else if (TREE_CODE (iv0->base) == INTEGER_CST) |
1507 | low = wi::to_wide (t: iv0->base); |
1508 | else |
1509 | low = min; |
1510 | } |
1511 | /* {base, -C} < n. */ |
1512 | else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step)) |
1513 | { |
1514 | step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step); |
1515 | /* MAX - C + 1 >= n. */ |
1516 | tree last = wide_int_to_tree (type, cst: max - wi::to_wide (t: step) + 1); |
1517 | assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, iv1->base); |
1518 | if (integer_zerop (assumptions)) |
1519 | return false; |
1520 | |
1521 | num = fold_build2 (MINUS_EXPR, niter_type, |
1522 | fold_convert (niter_type, iv0->base), |
1523 | wide_int_to_tree (niter_type, min)); |
1524 | low = min; |
1525 | if (TREE_CODE (iv0->base) == INTEGER_CST) |
1526 | high = wi::to_wide (t: iv0->base) + 1; |
1527 | else if (TREE_CODE (iv1->base) == INTEGER_CST) |
1528 | high = wi::to_wide (t: iv1->base); |
1529 | else |
1530 | high = max; |
1531 | } |
1532 | else |
1533 | return false; |
1534 | |
1535 | /* (delta + step - 1) / step */ |
1536 | step = fold_convert (niter_type, step); |
1537 | num = fold_build2 (PLUS_EXPR, niter_type, num, step); |
1538 | niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); |
1539 | |
1540 | widest_int delta, s; |
1541 | delta = widest_int::from (x: high, sgn) - widest_int::from (x: low, sgn); |
1542 | s = wi::to_widest (t: step); |
1543 | delta = delta + s - 1; |
1544 | niter->max = wi::udiv_floor (x: delta, y: s); |
1545 | |
1546 | niter->may_be_zero = may_be_zero; |
1547 | |
1548 | if (!integer_nonzerop (assumptions)) |
1549 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1550 | niter->assumptions, assumptions); |
1551 | |
1552 | niter->control.no_overflow = false; |
1553 | |
1554 | /* Update bound and exit condition as: |
1555 | bound = niter * STEP + (IVbase - STEP). |
1556 | { IVbase - STEP, +, STEP } != bound |
1557 | Here, biasing IVbase by 1 step makes 'bound' be the value before wrap. |
1558 | */ |
1559 | tree base_type = TREE_TYPE (niter->control.base); |
1560 | if (POINTER_TYPE_P (base_type)) |
1561 | { |
1562 | tree utype = unsigned_type_for (base_type); |
1563 | niter->control.base |
1564 | = fold_build2 (MINUS_EXPR, utype, |
1565 | fold_convert (utype, niter->control.base), |
1566 | fold_convert (utype, niter->control.step)); |
1567 | niter->control.base = fold_convert (base_type, niter->control.base); |
1568 | } |
1569 | else |
1570 | niter->control.base |
1571 | = fold_build2 (MINUS_EXPR, base_type, niter->control.base, |
1572 | niter->control.step); |
1573 | |
1574 | span = fold_build2 (MULT_EXPR, niter_type, niter->niter, |
1575 | fold_convert (niter_type, niter->control.step)); |
1576 | niter->bound = fold_build2 (PLUS_EXPR, niter_type, span, |
1577 | fold_convert (niter_type, niter->control.base)); |
1578 | niter->bound = fold_convert (type, niter->bound); |
1579 | niter->cmp = NE_EXPR; |
1580 | |
1581 | return true; |
1582 | } |
1583 | |
1584 | /* Determines number of iterations of loop whose ending condition |
1585 | is IV0 < IV1. TYPE is the type of the iv. The number of |
1586 | iterations is stored to NITER. BNDS bounds the difference |
1587 | IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know |
1588 | that the exit must be taken eventually. */ |
1589 | |
1590 | static bool |
1591 | number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0, |
1592 | affine_iv *iv1, class tree_niter_desc *niter, |
1593 | bool exit_must_be_taken, bounds *bnds) |
1594 | { |
1595 | tree niter_type = unsigned_type_for (type); |
1596 | tree delta, step, s; |
1597 | mpz_t mstep, tmp; |
1598 | |
1599 | if (integer_nonzerop (iv0->step)) |
1600 | { |
1601 | niter->control = *iv0; |
1602 | niter->cmp = LT_EXPR; |
1603 | niter->bound = iv1->base; |
1604 | } |
1605 | else |
1606 | { |
1607 | niter->control = *iv1; |
1608 | niter->cmp = GT_EXPR; |
1609 | niter->bound = iv0->base; |
1610 | } |
1611 | |
1612 | /* {base, -C} < n, or n < {base, C} */ |
1613 | if (tree_int_cst_sign_bit (iv0->step) |
1614 | || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))) |
1615 | return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter); |
1616 | |
1617 | delta = fold_build2 (MINUS_EXPR, niter_type, |
1618 | fold_convert (niter_type, iv1->base), |
1619 | fold_convert (niter_type, iv0->base)); |
1620 | |
1621 | /* First handle the special case that the step is +-1. */ |
1622 | if ((integer_onep (iv0->step) && integer_zerop (iv1->step)) |
1623 | || (integer_all_onesp (iv1->step) && integer_zerop (iv0->step))) |
1624 | { |
1625 | /* for (i = iv0->base; i < iv1->base; i++) |
1626 | |
1627 | or |
1628 | |
1629 | for (i = iv1->base; i > iv0->base; i--). |
1630 | |
1631 | In both cases # of iterations is iv1->base - iv0->base, assuming that |
1632 | iv1->base >= iv0->base. |
1633 | |
1634 | First try to derive a lower bound on the value of |
1635 | iv1->base - iv0->base, computed in full precision. If the difference |
1636 | is nonnegative, we are done, otherwise we must record the |
1637 | condition. */ |
1638 | |
1639 | if (mpz_sgn (bnds->below) < 0) |
1640 | niter->may_be_zero = fold_build2 (LT_EXPR, boolean_type_node, |
1641 | iv1->base, iv0->base); |
1642 | niter->niter = delta; |
1643 | niter->max = widest_int::from (x: wi::from_mpz (niter_type, bnds->up, false), |
1644 | TYPE_SIGN (niter_type)); |
1645 | niter->control.no_overflow = true; |
1646 | return true; |
1647 | } |
1648 | |
1649 | if (integer_nonzerop (iv0->step)) |
1650 | step = fold_convert (niter_type, iv0->step); |
1651 | else |
1652 | step = fold_convert (niter_type, |
1653 | fold_build1 (NEGATE_EXPR, type, iv1->step)); |
1654 | |
1655 | /* If we can determine the final value of the control iv exactly, we can |
1656 | transform the condition to != comparison. In particular, this will be |
1657 | the case if DELTA is constant. */ |
1658 | if (number_of_iterations_lt_to_ne (type, iv0, iv1, niter, delta: &delta, step, |
1659 | exit_must_be_taken, bnds)) |
1660 | { |
1661 | affine_iv zps; |
1662 | |
1663 | zps.base = build_int_cst (niter_type, 0); |
1664 | zps.step = step; |
1665 | /* number_of_iterations_lt_to_ne will add assumptions that ensure that |
1666 | zps does not overflow. */ |
1667 | zps.no_overflow = true; |
1668 | |
1669 | return number_of_iterations_ne (loop, type, iv: &zps, |
1670 | final: delta, niter, exit_must_be_taken: true, bnds); |
1671 | } |
1672 | |
1673 | /* Make sure that the control iv does not overflow. */ |
1674 | if (!assert_no_overflow_lt (type, iv0, iv1, niter, step)) |
1675 | return false; |
1676 | |
1677 | /* We determine the number of iterations as (delta + step - 1) / step. For |
1678 | this to work, we must know that iv1->base >= iv0->base - step + 1, |
1679 | otherwise the loop does not roll. */ |
1680 | assert_loop_rolls_lt (type, iv0, iv1, niter, bnds); |
1681 | |
1682 | s = fold_build2 (MINUS_EXPR, niter_type, |
1683 | step, build_int_cst (niter_type, 1)); |
1684 | delta = fold_build2 (PLUS_EXPR, niter_type, delta, s); |
1685 | niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta, step); |
1686 | |
1687 | mpz_init (mstep); |
1688 | mpz_init (tmp); |
1689 | wi::to_mpz (wi::to_wide (t: step), mstep, UNSIGNED); |
1690 | mpz_add (tmp, bnds->up, mstep); |
1691 | mpz_sub_ui (tmp, tmp, 1); |
1692 | mpz_fdiv_q (tmp, tmp, mstep); |
1693 | niter->max = widest_int::from (x: wi::from_mpz (niter_type, tmp, false), |
1694 | TYPE_SIGN (niter_type)); |
1695 | mpz_clear (mstep); |
1696 | mpz_clear (tmp); |
1697 | |
1698 | return true; |
1699 | } |
1700 | |
1701 | /* Determines number of iterations of loop whose ending condition |
1702 | is IV0 <= IV1. TYPE is the type of the iv. The number of |
1703 | iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if |
1704 | we know that this condition must eventually become false (we derived this |
1705 | earlier, and possibly set NITER->assumptions to make sure this |
1706 | is the case). BNDS bounds the difference IV1->base - IV0->base. */ |
1707 | |
1708 | static bool |
1709 | number_of_iterations_le (class loop *loop, tree type, affine_iv *iv0, |
1710 | affine_iv *iv1, class tree_niter_desc *niter, |
1711 | bool exit_must_be_taken, bounds *bnds) |
1712 | { |
1713 | tree assumption; |
1714 | tree type1 = type; |
1715 | if (POINTER_TYPE_P (type)) |
1716 | type1 = sizetype; |
1717 | |
1718 | /* Say that IV0 is the control variable. Then IV0 <= IV1 iff |
1719 | IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest |
1720 | value of the type. This we must know anyway, since if it is |
1721 | equal to this value, the loop rolls forever. We do not check |
1722 | this condition for pointer type ivs, as the code cannot rely on |
1723 | the object to that the pointer points being placed at the end of |
1724 | the address space (and more pragmatically, TYPE_{MIN,MAX}_VALUE is |
1725 | not defined for pointers). */ |
1726 | |
1727 | if (!exit_must_be_taken && !POINTER_TYPE_P (type)) |
1728 | { |
1729 | if (integer_nonzerop (iv0->step)) |
1730 | assumption = fold_build2 (NE_EXPR, boolean_type_node, |
1731 | iv1->base, TYPE_MAX_VALUE (type)); |
1732 | else |
1733 | assumption = fold_build2 (NE_EXPR, boolean_type_node, |
1734 | iv0->base, TYPE_MIN_VALUE (type)); |
1735 | |
1736 | if (integer_zerop (assumption)) |
1737 | return false; |
1738 | if (!integer_nonzerop (assumption)) |
1739 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
1740 | niter->assumptions, assumption); |
1741 | } |
1742 | |
1743 | if (integer_nonzerop (iv0->step)) |
1744 | { |
1745 | if (POINTER_TYPE_P (type)) |
1746 | iv1->base = fold_build_pointer_plus_hwi (iv1->base, 1); |
1747 | else |
1748 | iv1->base = fold_build2 (PLUS_EXPR, type1, iv1->base, |
1749 | build_int_cst (type1, 1)); |
1750 | } |
1751 | else if (POINTER_TYPE_P (type)) |
1752 | iv0->base = fold_build_pointer_plus_hwi (iv0->base, -1); |
1753 | else |
1754 | iv0->base = fold_build2 (MINUS_EXPR, type1, |
1755 | iv0->base, build_int_cst (type1, 1)); |
1756 | |
1757 | bounds_add (bnds, delta: 1, type: type1); |
1758 | |
1759 | return number_of_iterations_lt (loop, type, iv0, iv1, niter, exit_must_be_taken, |
1760 | bnds); |
1761 | } |
1762 | |
1763 | /* Dumps description of affine induction variable IV to FILE. */ |
1764 | |
1765 | static void |
1766 | dump_affine_iv (FILE *file, affine_iv *iv) |
1767 | { |
1768 | if (!integer_zerop (iv->step)) |
1769 | fprintf (stream: file, format: "[" ); |
1770 | |
1771 | print_generic_expr (dump_file, iv->base, TDF_SLIM); |
1772 | |
1773 | if (!integer_zerop (iv->step)) |
1774 | { |
1775 | fprintf (stream: file, format: ", + , " ); |
1776 | print_generic_expr (dump_file, iv->step, TDF_SLIM); |
1777 | fprintf (stream: file, format: "]%s" , iv->no_overflow ? "(no_overflow)" : "" ); |
1778 | } |
1779 | } |
1780 | |
1781 | /* Determine the number of iterations according to condition (for staying |
1782 | inside loop) which compares two induction variables using comparison |
1783 | operator CODE. The induction variable on left side of the comparison |
1784 | is IV0, the right-hand side is IV1. Both induction variables must have |
1785 | type TYPE, which must be an integer or pointer type. The steps of the |
1786 | ivs must be constants (or NULL_TREE, which is interpreted as constant zero). |
1787 | |
1788 | LOOP is the loop whose number of iterations we are determining. |
1789 | |
1790 | ONLY_EXIT is true if we are sure this is the only way the loop could be |
1791 | exited (including possibly non-returning function calls, exceptions, etc.) |
1792 | -- in this case we can use the information whether the control induction |
1793 | variables can overflow or not in a more efficient way. |
1794 | |
1795 | if EVERY_ITERATION is true, we know the test is executed on every iteration. |
1796 | |
1797 | The results (number of iterations and assumptions as described in |
1798 | comments at class tree_niter_desc in tree-ssa-loop.h) are stored to NITER. |
1799 | Returns false if it fails to determine number of iterations, true if it |
1800 | was determined (possibly with some assumptions). */ |
1801 | |
1802 | static bool |
1803 | number_of_iterations_cond (class loop *loop, |
1804 | tree type, affine_iv *iv0, enum tree_code code, |
1805 | affine_iv *iv1, class tree_niter_desc *niter, |
1806 | bool only_exit, bool every_iteration) |
1807 | { |
1808 | bool exit_must_be_taken = false, ret; |
1809 | bounds bnds; |
1810 | |
1811 | /* If the test is not executed every iteration, wrapping may make the test |
1812 | to pass again. |
1813 | TODO: the overflow case can be still used as unreliable estimate of upper |
1814 | bound. But we have no API to pass it down to number of iterations code |
1815 | and, at present, it will not use it anyway. */ |
1816 | if (!every_iteration |
1817 | && (!iv0->no_overflow || !iv1->no_overflow |
1818 | || code == NE_EXPR || code == EQ_EXPR)) |
1819 | return false; |
1820 | |
1821 | /* The meaning of these assumptions is this: |
1822 | if !assumptions |
1823 | then the rest of information does not have to be valid |
1824 | if may_be_zero then the loop does not roll, even if |
1825 | niter != 0. */ |
1826 | niter->assumptions = boolean_true_node; |
1827 | niter->may_be_zero = boolean_false_node; |
1828 | niter->niter = NULL_TREE; |
1829 | niter->max = 0; |
1830 | niter->bound = NULL_TREE; |
1831 | niter->cmp = ERROR_MARK; |
1832 | |
1833 | /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that |
1834 | the control variable is on lhs. */ |
1835 | if (code == GE_EXPR || code == GT_EXPR |
1836 | || (code == NE_EXPR && integer_zerop (iv0->step))) |
1837 | { |
1838 | std::swap (a&: iv0, b&: iv1); |
1839 | code = swap_tree_comparison (code); |
1840 | } |
1841 | |
1842 | if (POINTER_TYPE_P (type)) |
1843 | { |
1844 | /* Comparison of pointers is undefined unless both iv0 and iv1 point |
1845 | to the same object. If they do, the control variable cannot wrap |
1846 | (as wrap around the bounds of memory will never return a pointer |
1847 | that would be guaranteed to point to the same object, even if we |
1848 | avoid undefined behavior by casting to size_t and back). */ |
1849 | iv0->no_overflow = true; |
1850 | iv1->no_overflow = true; |
1851 | } |
1852 | |
1853 | /* If the control induction variable does not overflow and the only exit |
1854 | from the loop is the one that we analyze, we know it must be taken |
1855 | eventually. */ |
1856 | if (only_exit) |
1857 | { |
1858 | if (!integer_zerop (iv0->step) && iv0->no_overflow) |
1859 | exit_must_be_taken = true; |
1860 | else if (!integer_zerop (iv1->step) && iv1->no_overflow) |
1861 | exit_must_be_taken = true; |
1862 | } |
1863 | |
1864 | /* We can handle cases which neither of the sides of the comparison is |
1865 | invariant: |
1866 | |
1867 | {iv0.base, iv0.step} cmp_code {iv1.base, iv1.step} |
1868 | as if: |
1869 | {iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0} |
1870 | |
1871 | provided that either below condition is satisfied: |
1872 | |
1873 | a) the test is NE_EXPR; |
1874 | b) iv0 and iv1 do not overflow and iv0.step - iv1.step is of |
1875 | the same sign and of less or equal magnitude than iv0.step |
1876 | |
1877 | This rarely occurs in practice, but it is simple enough to manage. */ |
1878 | if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step)) |
1879 | { |
1880 | tree step_type = POINTER_TYPE_P (type) ? sizetype : type; |
1881 | tree step = fold_binary_to_constant (MINUS_EXPR, step_type, |
1882 | iv0->step, iv1->step); |
1883 | |
1884 | /* For code other than NE_EXPR we have to ensure moving the evolution |
1885 | of IV1 to that of IV0 does not introduce overflow. */ |
1886 | if (TREE_CODE (step) != INTEGER_CST |
1887 | || !iv0->no_overflow || !iv1->no_overflow) |
1888 | { |
1889 | if (code != NE_EXPR) |
1890 | return false; |
1891 | iv0->no_overflow = false; |
1892 | } |
1893 | /* If the new step of IV0 has changed sign or is of greater |
1894 | magnitude then we do not know whether IV0 does overflow |
1895 | and thus the transform is not valid for code other than NE_EXPR. */ |
1896 | else if (tree_int_cst_sign_bit (step) != tree_int_cst_sign_bit (iv0->step) |
1897 | || wi::gtu_p (x: wi::abs (x: wi::to_widest (t: step)), |
1898 | y: wi::abs (x: wi::to_widest (t: iv0->step)))) |
1899 | { |
1900 | if (POINTER_TYPE_P (type) && code != NE_EXPR) |
1901 | /* For relational pointer compares we have further guarantees |
1902 | that the pointers always point to the same object (or one |
1903 | after it) and that objects do not cross the zero page. So |
1904 | not only is the transform always valid for relational |
1905 | pointer compares, we also know the resulting IV does not |
1906 | overflow. */ |
1907 | ; |
1908 | else if (code != NE_EXPR) |
1909 | return false; |
1910 | else |
1911 | iv0->no_overflow = false; |
1912 | } |
1913 | |
1914 | iv0->step = step; |
1915 | iv1->step = build_int_cst (step_type, 0); |
1916 | iv1->no_overflow = true; |
1917 | } |
1918 | |
1919 | /* If the result of the comparison is a constant, the loop is weird. More |
1920 | precise handling would be possible, but the situation is not common enough |
1921 | to waste time on it. */ |
1922 | if (integer_zerop (iv0->step) && integer_zerop (iv1->step)) |
1923 | return false; |
1924 | |
1925 | /* If the loop exits immediately, there is nothing to do. */ |
1926 | tree tem = fold_binary (code, boolean_type_node, iv0->base, iv1->base); |
1927 | if (tem && integer_zerop (tem)) |
1928 | { |
1929 | if (!every_iteration) |
1930 | return false; |
1931 | niter->niter = build_int_cst (unsigned_type_for (type), 0); |
1932 | niter->max = 0; |
1933 | return true; |
1934 | } |
1935 | |
1936 | /* OK, now we know we have a senseful loop. Handle several cases, depending |
1937 | on what comparison operator is used. */ |
1938 | bound_difference (loop, x: iv1->base, y: iv0->base, bnds: &bnds); |
1939 | |
1940 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1941 | { |
1942 | fprintf (stream: dump_file, |
1943 | format: "Analyzing # of iterations of loop %d\n" , loop->num); |
1944 | |
1945 | fprintf (stream: dump_file, format: " exit condition " ); |
1946 | dump_affine_iv (file: dump_file, iv: iv0); |
1947 | fprintf (stream: dump_file, format: " %s " , |
1948 | code == NE_EXPR ? "!=" |
1949 | : code == LT_EXPR ? "<" |
1950 | : "<=" ); |
1951 | dump_affine_iv (file: dump_file, iv: iv1); |
1952 | fprintf (stream: dump_file, format: "\n" ); |
1953 | |
1954 | fprintf (stream: dump_file, format: " bounds on difference of bases: " ); |
1955 | mpz_out_str (dump_file, 10, bnds.below); |
1956 | fprintf (stream: dump_file, format: " ... " ); |
1957 | mpz_out_str (dump_file, 10, bnds.up); |
1958 | fprintf (stream: dump_file, format: "\n" ); |
1959 | } |
1960 | |
1961 | switch (code) |
1962 | { |
1963 | case NE_EXPR: |
1964 | gcc_assert (integer_zerop (iv1->step)); |
1965 | ret = number_of_iterations_ne (loop, type, iv: iv0, final: iv1->base, niter, |
1966 | exit_must_be_taken, bnds: &bnds); |
1967 | break; |
1968 | |
1969 | case LT_EXPR: |
1970 | ret = number_of_iterations_lt (loop, type, iv0, iv1, niter, |
1971 | exit_must_be_taken, bnds: &bnds); |
1972 | break; |
1973 | |
1974 | case LE_EXPR: |
1975 | ret = number_of_iterations_le (loop, type, iv0, iv1, niter, |
1976 | exit_must_be_taken, bnds: &bnds); |
1977 | break; |
1978 | |
1979 | default: |
1980 | gcc_unreachable (); |
1981 | } |
1982 | |
1983 | mpz_clear (bnds.up); |
1984 | mpz_clear (bnds.below); |
1985 | |
1986 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1987 | { |
1988 | if (ret) |
1989 | { |
1990 | fprintf (stream: dump_file, format: " result:\n" ); |
1991 | if (!integer_nonzerop (niter->assumptions)) |
1992 | { |
1993 | fprintf (stream: dump_file, format: " under assumptions " ); |
1994 | print_generic_expr (dump_file, niter->assumptions, TDF_SLIM); |
1995 | fprintf (stream: dump_file, format: "\n" ); |
1996 | } |
1997 | |
1998 | if (!integer_zerop (niter->may_be_zero)) |
1999 | { |
2000 | fprintf (stream: dump_file, format: " zero if " ); |
2001 | print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); |
2002 | fprintf (stream: dump_file, format: "\n" ); |
2003 | } |
2004 | |
2005 | fprintf (stream: dump_file, format: " # of iterations " ); |
2006 | print_generic_expr (dump_file, niter->niter, TDF_SLIM); |
2007 | fprintf (stream: dump_file, format: ", bounded by " ); |
2008 | print_decu (wi: niter->max, file: dump_file); |
2009 | fprintf (stream: dump_file, format: "\n" ); |
2010 | } |
2011 | else |
2012 | fprintf (stream: dump_file, format: " failed\n\n" ); |
2013 | } |
2014 | return ret; |
2015 | } |
2016 | |
2017 | /* Return an expression that computes the popcount of src. */ |
2018 | |
2019 | static tree |
2020 | build_popcount_expr (tree src) |
2021 | { |
2022 | tree fn; |
2023 | bool use_ifn = false; |
2024 | int prec = TYPE_PRECISION (TREE_TYPE (src)); |
2025 | int i_prec = TYPE_PRECISION (integer_type_node); |
2026 | int li_prec = TYPE_PRECISION (long_integer_type_node); |
2027 | int lli_prec = TYPE_PRECISION (long_long_integer_type_node); |
2028 | |
2029 | tree utype = unsigned_type_for (TREE_TYPE (src)); |
2030 | src = fold_convert (utype, src); |
2031 | |
2032 | if (direct_internal_fn_supported_p (IFN_POPCOUNT, utype, OPTIMIZE_FOR_BOTH)) |
2033 | use_ifn = true; |
2034 | else if (prec <= i_prec) |
2035 | fn = builtin_decl_implicit (fncode: BUILT_IN_POPCOUNT); |
2036 | else if (prec == li_prec) |
2037 | fn = builtin_decl_implicit (fncode: BUILT_IN_POPCOUNTL); |
2038 | else if (prec == lli_prec || prec == 2 * lli_prec) |
2039 | fn = builtin_decl_implicit (fncode: BUILT_IN_POPCOUNTLL); |
2040 | else |
2041 | return NULL_TREE; |
2042 | |
2043 | tree call; |
2044 | if (use_ifn) |
2045 | call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_POPCOUNT, |
2046 | integer_type_node, 1, src); |
2047 | else if (prec == 2 * lli_prec) |
2048 | { |
2049 | tree src1 = fold_convert (long_long_unsigned_type_node, |
2050 | fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), |
2051 | unshare_expr (src), |
2052 | build_int_cst (integer_type_node, |
2053 | lli_prec))); |
2054 | tree src2 = fold_convert (long_long_unsigned_type_node, src); |
2055 | tree call1 = build_call_expr (fn, 1, src1); |
2056 | tree call2 = build_call_expr (fn, 1, src2); |
2057 | call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2); |
2058 | } |
2059 | else |
2060 | { |
2061 | if (prec < i_prec) |
2062 | src = fold_convert (unsigned_type_node, src); |
2063 | |
2064 | call = build_call_expr (fn, 1, src); |
2065 | } |
2066 | |
2067 | return call; |
2068 | } |
2069 | |
2070 | /* Utility function to check if OP is defined by a stmt |
2071 | that is a val - 1. */ |
2072 | |
2073 | static bool |
2074 | ssa_defined_by_minus_one_stmt_p (tree op, tree val) |
2075 | { |
2076 | gimple *stmt; |
2077 | return (TREE_CODE (op) == SSA_NAME |
2078 | && (stmt = SSA_NAME_DEF_STMT (op)) |
2079 | && is_gimple_assign (gs: stmt) |
2080 | && (gimple_assign_rhs_code (gs: stmt) == PLUS_EXPR) |
2081 | && val == gimple_assign_rhs1 (gs: stmt) |
2082 | && integer_minus_onep (gimple_assign_rhs2 (gs: stmt))); |
2083 | } |
2084 | |
2085 | /* See comment below for number_of_iterations_bitcount. |
2086 | For popcount, we have: |
2087 | |
2088 | modify: |
2089 | _1 = iv_1 + -1 |
2090 | iv_2 = iv_1 & _1 |
2091 | |
2092 | test: |
2093 | if (iv != 0) |
2094 | |
2095 | modification count: |
2096 | popcount (src) |
2097 | |
2098 | */ |
2099 | |
2100 | static bool |
2101 | number_of_iterations_popcount (loop_p loop, edge exit, |
2102 | enum tree_code code, |
2103 | class tree_niter_desc *niter) |
2104 | { |
2105 | bool modify_before_test = true; |
2106 | HOST_WIDE_INT max; |
2107 | |
2108 | /* Check that condition for staying inside the loop is like |
2109 | if (iv != 0). */ |
2110 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
2111 | if (!cond_stmt |
2112 | || code != NE_EXPR |
2113 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)) |
2114 | || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) |
2115 | return false; |
2116 | |
2117 | tree iv_2 = gimple_cond_lhs (gs: cond_stmt); |
2118 | gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2119 | |
2120 | /* If the test comes before the iv modification, then these will actually be |
2121 | iv_1 and a phi node. */ |
2122 | if (gimple_code (g: iv_2_stmt) == GIMPLE_PHI |
2123 | && gimple_bb (g: iv_2_stmt) == loop->header |
2124 | && gimple_phi_num_args (gs: iv_2_stmt) == 2 |
2125 | && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, |
2126 | loop_latch_edge (loop)->dest_idx)) |
2127 | == SSA_NAME)) |
2128 | { |
2129 | /* iv_2 is actually one of the inputs to the phi. */ |
2130 | iv_2 = gimple_phi_arg_def (gs: iv_2_stmt, index: loop_latch_edge (loop)->dest_idx); |
2131 | iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2132 | modify_before_test = false; |
2133 | } |
2134 | |
2135 | /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1). */ |
2136 | if (!is_gimple_assign (gs: iv_2_stmt) |
2137 | || gimple_assign_rhs_code (gs: iv_2_stmt) != BIT_AND_EXPR) |
2138 | return false; |
2139 | |
2140 | tree iv_1 = gimple_assign_rhs1 (gs: iv_2_stmt); |
2141 | tree _1 = gimple_assign_rhs2 (gs: iv_2_stmt); |
2142 | |
2143 | /* Check that _1 is defined by (_1 = iv_1 + -1). |
2144 | Also make sure that _1 is the same in and_stmt and _1 defining stmt. |
2145 | Also canonicalize if _1 and _b11 are revrsed. */ |
2146 | if (ssa_defined_by_minus_one_stmt_p (op: iv_1, val: _1)) |
2147 | std::swap (a&: iv_1, b&: _1); |
2148 | else if (ssa_defined_by_minus_one_stmt_p (op: _1, val: iv_1)) |
2149 | ; |
2150 | else |
2151 | return false; |
2152 | |
2153 | /* Check the recurrence. */ |
2154 | gimple *phi = SSA_NAME_DEF_STMT (iv_1); |
2155 | if (gimple_code (g: phi) != GIMPLE_PHI |
2156 | || (gimple_bb (g: phi) != loop_latch_edge (loop)->dest) |
2157 | || (iv_2 != gimple_phi_arg_def (gs: phi, index: loop_latch_edge (loop)->dest_idx))) |
2158 | return false; |
2159 | |
2160 | /* We found a match. */ |
2161 | tree src = gimple_phi_arg_def (gs: phi, index: loop_preheader_edge (loop)->dest_idx); |
2162 | int src_precision = TYPE_PRECISION (TREE_TYPE (src)); |
2163 | |
2164 | /* Get the corresponding popcount builtin. */ |
2165 | tree expr = build_popcount_expr (src); |
2166 | |
2167 | if (!expr) |
2168 | return false; |
2169 | |
2170 | max = src_precision; |
2171 | |
2172 | tree may_be_zero = boolean_false_node; |
2173 | |
2174 | if (modify_before_test) |
2175 | { |
2176 | expr = fold_build2 (MINUS_EXPR, integer_type_node, expr, |
2177 | integer_one_node); |
2178 | max = max - 1; |
2179 | may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src, |
2180 | build_zero_cst (TREE_TYPE (src))); |
2181 | } |
2182 | |
2183 | expr = fold_convert (unsigned_type_node, expr); |
2184 | |
2185 | niter->assumptions = boolean_true_node; |
2186 | niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero); |
2187 | niter->niter = simplify_using_initial_conditions(loop, expr); |
2188 | |
2189 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
2190 | niter->max = tree_to_uhwi (niter->niter); |
2191 | else |
2192 | niter->max = max; |
2193 | |
2194 | niter->bound = NULL_TREE; |
2195 | niter->cmp = ERROR_MARK; |
2196 | return true; |
2197 | } |
2198 | |
2199 | /* Return an expression that counts the leading/trailing zeroes of src. |
2200 | |
2201 | If define_at_zero is true, then the built expression will be defined to |
2202 | return the precision of src when src == 0 (using either a conditional |
2203 | expression or a suitable internal function). |
2204 | Otherwise, we can elide the conditional expression and let src = 0 invoke |
2205 | undefined behaviour. */ |
2206 | |
2207 | static tree |
2208 | build_cltz_expr (tree src, bool leading, bool define_at_zero) |
2209 | { |
2210 | tree fn; |
2211 | internal_fn ifn = leading ? IFN_CLZ : IFN_CTZ; |
2212 | bool use_ifn = false; |
2213 | int prec = TYPE_PRECISION (TREE_TYPE (src)); |
2214 | int i_prec = TYPE_PRECISION (integer_type_node); |
2215 | int li_prec = TYPE_PRECISION (long_integer_type_node); |
2216 | int lli_prec = TYPE_PRECISION (long_long_integer_type_node); |
2217 | |
2218 | tree utype = unsigned_type_for (TREE_TYPE (src)); |
2219 | src = fold_convert (utype, src); |
2220 | |
2221 | if (direct_internal_fn_supported_p (ifn, utype, OPTIMIZE_FOR_BOTH)) |
2222 | use_ifn = true; |
2223 | else if (prec <= i_prec) |
2224 | fn = leading ? builtin_decl_implicit (fncode: BUILT_IN_CLZ) |
2225 | : builtin_decl_implicit (fncode: BUILT_IN_CTZ); |
2226 | else if (prec == li_prec) |
2227 | fn = leading ? builtin_decl_implicit (fncode: BUILT_IN_CLZL) |
2228 | : builtin_decl_implicit (fncode: BUILT_IN_CTZL); |
2229 | else if (prec == lli_prec || prec == 2 * lli_prec) |
2230 | fn = leading ? builtin_decl_implicit (fncode: BUILT_IN_CLZLL) |
2231 | : builtin_decl_implicit (fncode: BUILT_IN_CTZLL); |
2232 | else |
2233 | return NULL_TREE; |
2234 | |
2235 | tree call; |
2236 | if (use_ifn) |
2237 | { |
2238 | call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn, |
2239 | integer_type_node, 1, src); |
2240 | int val; |
2241 | int optab_defined_at_zero |
2242 | = (leading |
2243 | ? CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val) |
2244 | : CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val)); |
2245 | if (define_at_zero && !(optab_defined_at_zero == 2 && val == prec)) |
2246 | { |
2247 | tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src, |
2248 | build_zero_cst (TREE_TYPE (src))); |
2249 | call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call, |
2250 | build_int_cst (integer_type_node, prec)); |
2251 | } |
2252 | } |
2253 | else if (prec == 2 * lli_prec) |
2254 | { |
2255 | tree src1 = fold_convert (long_long_unsigned_type_node, |
2256 | fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), |
2257 | unshare_expr (src), |
2258 | build_int_cst (integer_type_node, |
2259 | lli_prec))); |
2260 | tree src2 = fold_convert (long_long_unsigned_type_node, src); |
2261 | /* We count the zeroes in src1, and add the number in src2 when src1 |
2262 | is 0. */ |
2263 | if (!leading) |
2264 | std::swap (a&: src1, b&: src2); |
2265 | tree call1 = build_call_expr (fn, 1, src1); |
2266 | tree call2 = build_call_expr (fn, 1, src2); |
2267 | if (define_at_zero) |
2268 | { |
2269 | tree is_zero2 = fold_build2 (NE_EXPR, boolean_type_node, src2, |
2270 | build_zero_cst (TREE_TYPE (src2))); |
2271 | call2 = fold_build3 (COND_EXPR, integer_type_node, is_zero2, call2, |
2272 | build_int_cst (integer_type_node, lli_prec)); |
2273 | } |
2274 | tree is_zero1 = fold_build2 (NE_EXPR, boolean_type_node, src1, |
2275 | build_zero_cst (TREE_TYPE (src1))); |
2276 | call = fold_build3 (COND_EXPR, integer_type_node, is_zero1, call1, |
2277 | fold_build2 (PLUS_EXPR, integer_type_node, call2, |
2278 | build_int_cst (integer_type_node, |
2279 | lli_prec))); |
2280 | } |
2281 | else |
2282 | { |
2283 | if (prec < i_prec) |
2284 | src = fold_convert (unsigned_type_node, src); |
2285 | |
2286 | call = build_call_expr (fn, 1, src); |
2287 | if (define_at_zero) |
2288 | { |
2289 | tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src, |
2290 | build_zero_cst (TREE_TYPE (src))); |
2291 | call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call, |
2292 | build_int_cst (integer_type_node, prec)); |
2293 | } |
2294 | |
2295 | if (leading && prec < i_prec) |
2296 | call = fold_build2 (MINUS_EXPR, integer_type_node, call, |
2297 | build_int_cst (integer_type_node, i_prec - prec)); |
2298 | } |
2299 | |
2300 | return call; |
2301 | } |
2302 | |
2303 | /* See comment below for number_of_iterations_bitcount. |
2304 | For c[lt]z, we have: |
2305 | |
2306 | modify: |
2307 | iv_2 = iv_1 << 1 OR iv_1 >> 1 |
2308 | |
2309 | test: |
2310 | if (iv & 1 << (prec-1)) OR (iv & 1) |
2311 | |
2312 | modification count: |
2313 | src precision - c[lt]z (src) |
2314 | |
2315 | */ |
2316 | |
2317 | static bool |
2318 | number_of_iterations_cltz (loop_p loop, edge exit, |
2319 | enum tree_code code, |
2320 | class tree_niter_desc *niter) |
2321 | { |
2322 | bool modify_before_test = true; |
2323 | HOST_WIDE_INT max; |
2324 | int checked_bit; |
2325 | tree iv_2; |
2326 | |
2327 | /* Check that condition for staying inside the loop is like |
2328 | if (iv == 0). */ |
2329 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
2330 | if (!cond_stmt |
2331 | || (code != EQ_EXPR && code != GE_EXPR) |
2332 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)) |
2333 | || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) |
2334 | return false; |
2335 | |
2336 | if (code == EQ_EXPR) |
2337 | { |
2338 | /* Make sure we check a bitwise and with a suitable constant */ |
2339 | gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond_stmt)); |
2340 | if (!is_gimple_assign (gs: and_stmt) |
2341 | || gimple_assign_rhs_code (gs: and_stmt) != BIT_AND_EXPR |
2342 | || !integer_pow2p (gimple_assign_rhs2 (gs: and_stmt)) |
2343 | || TREE_CODE (gimple_assign_rhs1 (and_stmt)) != SSA_NAME) |
2344 | return false; |
2345 | |
2346 | checked_bit = tree_log2 (gimple_assign_rhs2 (gs: and_stmt)); |
2347 | |
2348 | iv_2 = gimple_assign_rhs1 (gs: and_stmt); |
2349 | } |
2350 | else |
2351 | { |
2352 | /* We have a GE_EXPR - a signed comparison with zero is equivalent to |
2353 | testing the leading bit, so check for this pattern too. */ |
2354 | |
2355 | iv_2 = gimple_cond_lhs (gs: cond_stmt); |
2356 | tree test_value_type = TREE_TYPE (iv_2); |
2357 | |
2358 | if (TYPE_UNSIGNED (test_value_type)) |
2359 | return false; |
2360 | |
2361 | gimple *test_value_stmt = SSA_NAME_DEF_STMT (iv_2); |
2362 | |
2363 | if (is_gimple_assign (gs: test_value_stmt) |
2364 | && gimple_assign_rhs_code (gs: test_value_stmt) == NOP_EXPR) |
2365 | { |
2366 | /* If the test value comes from a NOP_EXPR, then we need to unwrap |
2367 | this. We conservatively require that both types have the same |
2368 | precision. */ |
2369 | iv_2 = gimple_assign_rhs1 (gs: test_value_stmt); |
2370 | tree rhs_type = TREE_TYPE (iv_2); |
2371 | if (TREE_CODE (iv_2) != SSA_NAME |
2372 | || TREE_CODE (rhs_type) != INTEGER_TYPE |
2373 | || (TYPE_PRECISION (rhs_type) |
2374 | != TYPE_PRECISION (test_value_type))) |
2375 | return false; |
2376 | } |
2377 | |
2378 | checked_bit = TYPE_PRECISION (test_value_type) - 1; |
2379 | } |
2380 | |
2381 | gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2382 | |
2383 | /* If the test comes before the iv modification, then these will actually be |
2384 | iv_1 and a phi node. */ |
2385 | if (gimple_code (g: iv_2_stmt) == GIMPLE_PHI |
2386 | && gimple_bb (g: iv_2_stmt) == loop->header |
2387 | && gimple_phi_num_args (gs: iv_2_stmt) == 2 |
2388 | && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, |
2389 | loop_latch_edge (loop)->dest_idx)) |
2390 | == SSA_NAME)) |
2391 | { |
2392 | /* iv_2 is actually one of the inputs to the phi. */ |
2393 | iv_2 = gimple_phi_arg_def (gs: iv_2_stmt, index: loop_latch_edge (loop)->dest_idx); |
2394 | iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2395 | modify_before_test = false; |
2396 | } |
2397 | |
2398 | /* Make sure iv_2_stmt is a logical shift by one stmt: |
2399 | iv_2 = iv_1 {<<|>>} 1 */ |
2400 | if (!is_gimple_assign (gs: iv_2_stmt) |
2401 | || (gimple_assign_rhs_code (gs: iv_2_stmt) != LSHIFT_EXPR |
2402 | && (gimple_assign_rhs_code (gs: iv_2_stmt) != RSHIFT_EXPR |
2403 | || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) |
2404 | || !integer_onep (gimple_assign_rhs2 (gs: iv_2_stmt))) |
2405 | return false; |
2406 | |
2407 | bool left_shift = (gimple_assign_rhs_code (gs: iv_2_stmt) == LSHIFT_EXPR); |
2408 | |
2409 | tree iv_1 = gimple_assign_rhs1 (gs: iv_2_stmt); |
2410 | |
2411 | /* Check the recurrence. */ |
2412 | gimple *phi = SSA_NAME_DEF_STMT (iv_1); |
2413 | if (gimple_code (g: phi) != GIMPLE_PHI |
2414 | || (gimple_bb (g: phi) != loop_latch_edge (loop)->dest) |
2415 | || (iv_2 != gimple_phi_arg_def (gs: phi, index: loop_latch_edge (loop)->dest_idx))) |
2416 | return false; |
2417 | |
2418 | /* We found a match. */ |
2419 | tree src = gimple_phi_arg_def (gs: phi, index: loop_preheader_edge (loop)->dest_idx); |
2420 | int src_precision = TYPE_PRECISION (TREE_TYPE (src)); |
2421 | |
2422 | /* Apply any needed preprocessing to src. */ |
2423 | int num_ignored_bits; |
2424 | if (left_shift) |
2425 | num_ignored_bits = src_precision - checked_bit - 1; |
2426 | else |
2427 | num_ignored_bits = checked_bit; |
2428 | |
2429 | if (modify_before_test) |
2430 | num_ignored_bits++; |
2431 | |
2432 | if (num_ignored_bits != 0) |
2433 | src = fold_build2 (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR, |
2434 | TREE_TYPE (src), src, |
2435 | build_int_cst (integer_type_node, num_ignored_bits)); |
2436 | |
2437 | /* Get the corresponding c[lt]z builtin. */ |
2438 | tree expr = build_cltz_expr (src, leading: left_shift, define_at_zero: false); |
2439 | |
2440 | if (!expr) |
2441 | return false; |
2442 | |
2443 | max = src_precision - num_ignored_bits - 1; |
2444 | |
2445 | expr = fold_convert (unsigned_type_node, expr); |
2446 | |
2447 | tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src, |
2448 | build_zero_cst (TREE_TYPE (src))); |
2449 | |
2450 | niter->assumptions = simplify_using_initial_conditions (loop, assumptions); |
2451 | niter->may_be_zero = boolean_false_node; |
2452 | niter->niter = simplify_using_initial_conditions (loop, expr); |
2453 | |
2454 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
2455 | niter->max = tree_to_uhwi (niter->niter); |
2456 | else |
2457 | niter->max = max; |
2458 | |
2459 | niter->bound = NULL_TREE; |
2460 | niter->cmp = ERROR_MARK; |
2461 | |
2462 | return true; |
2463 | } |
2464 | |
2465 | /* See comment below for number_of_iterations_bitcount. |
2466 | For c[lt]z complement, we have: |
2467 | |
2468 | modify: |
2469 | iv_2 = iv_1 >> 1 OR iv_1 << 1 |
2470 | |
2471 | test: |
2472 | if (iv != 0) |
2473 | |
2474 | modification count: |
2475 | src precision - c[lt]z (src) |
2476 | |
2477 | */ |
2478 | |
2479 | static bool |
2480 | number_of_iterations_cltz_complement (loop_p loop, edge exit, |
2481 | enum tree_code code, |
2482 | class tree_niter_desc *niter) |
2483 | { |
2484 | bool modify_before_test = true; |
2485 | HOST_WIDE_INT max; |
2486 | |
2487 | /* Check that condition for staying inside the loop is like |
2488 | if (iv != 0). */ |
2489 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
2490 | if (!cond_stmt |
2491 | || code != NE_EXPR |
2492 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)) |
2493 | || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME) |
2494 | return false; |
2495 | |
2496 | tree iv_2 = gimple_cond_lhs (gs: cond_stmt); |
2497 | gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2498 | |
2499 | /* If the test comes before the iv modification, then these will actually be |
2500 | iv_1 and a phi node. */ |
2501 | if (gimple_code (g: iv_2_stmt) == GIMPLE_PHI |
2502 | && gimple_bb (g: iv_2_stmt) == loop->header |
2503 | && gimple_phi_num_args (gs: iv_2_stmt) == 2 |
2504 | && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt, |
2505 | loop_latch_edge (loop)->dest_idx)) |
2506 | == SSA_NAME)) |
2507 | { |
2508 | /* iv_2 is actually one of the inputs to the phi. */ |
2509 | iv_2 = gimple_phi_arg_def (gs: iv_2_stmt, index: loop_latch_edge (loop)->dest_idx); |
2510 | iv_2_stmt = SSA_NAME_DEF_STMT (iv_2); |
2511 | modify_before_test = false; |
2512 | } |
2513 | |
2514 | /* Make sure iv_2_stmt is a logical shift by one stmt: |
2515 | iv_2 = iv_1 {>>|<<} 1 */ |
2516 | if (!is_gimple_assign (gs: iv_2_stmt) |
2517 | || (gimple_assign_rhs_code (gs: iv_2_stmt) != LSHIFT_EXPR |
2518 | && (gimple_assign_rhs_code (gs: iv_2_stmt) != RSHIFT_EXPR |
2519 | || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) |
2520 | || !integer_onep (gimple_assign_rhs2 (gs: iv_2_stmt))) |
2521 | return false; |
2522 | |
2523 | bool left_shift = (gimple_assign_rhs_code (gs: iv_2_stmt) == LSHIFT_EXPR); |
2524 | |
2525 | tree iv_1 = gimple_assign_rhs1 (gs: iv_2_stmt); |
2526 | |
2527 | /* Check the recurrence. */ |
2528 | gimple *phi = SSA_NAME_DEF_STMT (iv_1); |
2529 | if (gimple_code (g: phi) != GIMPLE_PHI |
2530 | || (gimple_bb (g: phi) != loop_latch_edge (loop)->dest) |
2531 | || (iv_2 != gimple_phi_arg_def (gs: phi, index: loop_latch_edge (loop)->dest_idx))) |
2532 | return false; |
2533 | |
2534 | /* We found a match. */ |
2535 | tree src = gimple_phi_arg_def (gs: phi, index: loop_preheader_edge (loop)->dest_idx); |
2536 | int src_precision = TYPE_PRECISION (TREE_TYPE (src)); |
2537 | |
2538 | /* Get the corresponding c[lt]z builtin. */ |
2539 | tree expr = build_cltz_expr (src, leading: !left_shift, define_at_zero: true); |
2540 | |
2541 | if (!expr) |
2542 | return false; |
2543 | |
2544 | expr = fold_build2 (MINUS_EXPR, integer_type_node, |
2545 | build_int_cst (integer_type_node, src_precision), |
2546 | expr); |
2547 | |
2548 | max = src_precision; |
2549 | |
2550 | tree may_be_zero = boolean_false_node; |
2551 | |
2552 | if (modify_before_test) |
2553 | { |
2554 | expr = fold_build2 (MINUS_EXPR, integer_type_node, expr, |
2555 | integer_one_node); |
2556 | max = max - 1; |
2557 | may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src, |
2558 | build_zero_cst (TREE_TYPE (src))); |
2559 | } |
2560 | |
2561 | expr = fold_convert (unsigned_type_node, expr); |
2562 | |
2563 | niter->assumptions = boolean_true_node; |
2564 | niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero); |
2565 | niter->niter = simplify_using_initial_conditions (loop, expr); |
2566 | |
2567 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
2568 | niter->max = tree_to_uhwi (niter->niter); |
2569 | else |
2570 | niter->max = max; |
2571 | |
2572 | niter->bound = NULL_TREE; |
2573 | niter->cmp = ERROR_MARK; |
2574 | return true; |
2575 | } |
2576 | |
2577 | /* See if LOOP contains a bit counting idiom. The idiom consists of two parts: |
2578 | 1. A modification to the induction variabler;. |
2579 | 2. A test to determine whether or not to exit the loop. |
2580 | |
2581 | These can come in either order - i.e.: |
2582 | |
2583 | <bb 3> |
2584 | iv_1 = PHI <src(2), iv_2(4)> |
2585 | if (test (iv_1)) |
2586 | goto <bb 4> |
2587 | else |
2588 | goto <bb 5> |
2589 | |
2590 | <bb 4> |
2591 | iv_2 = modify (iv_1) |
2592 | goto <bb 3> |
2593 | |
2594 | OR |
2595 | |
2596 | <bb 3> |
2597 | iv_1 = PHI <src(2), iv_2(4)> |
2598 | iv_2 = modify (iv_1) |
2599 | |
2600 | <bb 4> |
2601 | if (test (iv_2)) |
2602 | goto <bb 3> |
2603 | else |
2604 | goto <bb 5> |
2605 | |
2606 | The second form can be generated by copying the loop header out of the loop. |
2607 | |
2608 | In the first case, the number of latch executions will be equal to the |
2609 | number of induction variable modifications required before the test fails. |
2610 | |
2611 | In the second case (modify_before_test), if we assume that the number of |
2612 | modifications required before the test fails is nonzero, then the number of |
2613 | latch executions will be one less than this number. |
2614 | |
2615 | If we recognise the pattern, then we update niter accordingly, and return |
2616 | true. */ |
2617 | |
2618 | static bool |
2619 | number_of_iterations_bitcount (loop_p loop, edge exit, |
2620 | enum tree_code code, |
2621 | class tree_niter_desc *niter) |
2622 | { |
2623 | return (number_of_iterations_popcount (loop, exit, code, niter) |
2624 | || number_of_iterations_cltz (loop, exit, code, niter) |
2625 | || number_of_iterations_cltz_complement (loop, exit, code, niter)); |
2626 | } |
2627 | |
2628 | /* Substitute NEW_TREE for OLD in EXPR and fold the result. |
2629 | If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead |
2630 | all SSA names are replaced with the result of calling the VALUEIZE |
2631 | function with the SSA name as argument. */ |
2632 | |
2633 | tree |
2634 | simplify_replace_tree (tree expr, tree old, tree new_tree, |
2635 | tree (*valueize) (tree, void*), void *context, |
2636 | bool do_fold) |
2637 | { |
2638 | unsigned i, n; |
2639 | tree ret = NULL_TREE, e, se; |
2640 | |
2641 | if (!expr) |
2642 | return NULL_TREE; |
2643 | |
2644 | /* Do not bother to replace constants. */ |
2645 | if (CONSTANT_CLASS_P (expr)) |
2646 | return expr; |
2647 | |
2648 | if (valueize) |
2649 | { |
2650 | if (TREE_CODE (expr) == SSA_NAME) |
2651 | { |
2652 | new_tree = valueize (expr, context); |
2653 | if (new_tree != expr) |
2654 | return new_tree; |
2655 | } |
2656 | } |
2657 | else if (expr == old |
2658 | || operand_equal_p (expr, old, flags: 0)) |
2659 | return unshare_expr (new_tree); |
2660 | |
2661 | if (!EXPR_P (expr)) |
2662 | return expr; |
2663 | |
2664 | n = TREE_OPERAND_LENGTH (expr); |
2665 | for (i = 0; i < n; i++) |
2666 | { |
2667 | e = TREE_OPERAND (expr, i); |
2668 | se = simplify_replace_tree (expr: e, old, new_tree, valueize, context, do_fold); |
2669 | if (e == se) |
2670 | continue; |
2671 | |
2672 | if (!ret) |
2673 | ret = copy_node (expr); |
2674 | |
2675 | TREE_OPERAND (ret, i) = se; |
2676 | } |
2677 | |
2678 | return (ret ? (do_fold ? fold (ret) : ret) : expr); |
2679 | } |
2680 | |
2681 | /* Expand definitions of ssa names in EXPR as long as they are simple |
2682 | enough, and return the new expression. If STOP is specified, stop |
2683 | expanding if EXPR equals to it. */ |
2684 | |
2685 | static tree |
2686 | expand_simple_operations (tree expr, tree stop, hash_map<tree, tree> &cache) |
2687 | { |
2688 | unsigned i, n; |
2689 | tree ret = NULL_TREE, e, ee, e1; |
2690 | enum tree_code code; |
2691 | gimple *stmt; |
2692 | |
2693 | if (expr == NULL_TREE) |
2694 | return expr; |
2695 | |
2696 | if (is_gimple_min_invariant (expr)) |
2697 | return expr; |
2698 | |
2699 | code = TREE_CODE (expr); |
2700 | if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) |
2701 | { |
2702 | n = TREE_OPERAND_LENGTH (expr); |
2703 | for (i = 0; i < n; i++) |
2704 | { |
2705 | e = TREE_OPERAND (expr, i); |
2706 | if (!e) |
2707 | continue; |
2708 | /* SCEV analysis feeds us with a proper expression |
2709 | graph matching the SSA graph. Avoid turning it |
2710 | into a tree here, thus handle tree sharing |
2711 | properly. |
2712 | ??? The SSA walk below still turns the SSA graph |
2713 | into a tree but until we find a testcase do not |
2714 | introduce additional tree sharing here. */ |
2715 | bool existed_p; |
2716 | tree &cee = cache.get_or_insert (k: e, existed: &existed_p); |
2717 | if (existed_p) |
2718 | ee = cee; |
2719 | else |
2720 | { |
2721 | cee = e; |
2722 | ee = expand_simple_operations (expr: e, stop, cache); |
2723 | if (ee != e) |
2724 | *cache.get (k: e) = ee; |
2725 | } |
2726 | if (e == ee) |
2727 | continue; |
2728 | |
2729 | if (!ret) |
2730 | ret = copy_node (expr); |
2731 | |
2732 | TREE_OPERAND (ret, i) = ee; |
2733 | } |
2734 | |
2735 | if (!ret) |
2736 | return expr; |
2737 | |
2738 | fold_defer_overflow_warnings (); |
2739 | ret = fold (ret); |
2740 | fold_undefer_and_ignore_overflow_warnings (); |
2741 | return ret; |
2742 | } |
2743 | |
2744 | /* Stop if it's not ssa name or the one we don't want to expand. */ |
2745 | if (TREE_CODE (expr) != SSA_NAME || expr == stop) |
2746 | return expr; |
2747 | |
2748 | stmt = SSA_NAME_DEF_STMT (expr); |
2749 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
2750 | { |
2751 | basic_block src, dest; |
2752 | |
2753 | if (gimple_phi_num_args (gs: stmt) != 1) |
2754 | return expr; |
2755 | e = PHI_ARG_DEF (stmt, 0); |
2756 | |
2757 | /* Avoid propagating through loop exit phi nodes, which |
2758 | could break loop-closed SSA form restrictions. */ |
2759 | dest = gimple_bb (g: stmt); |
2760 | src = single_pred (bb: dest); |
2761 | if (TREE_CODE (e) == SSA_NAME |
2762 | && src->loop_father != dest->loop_father) |
2763 | return expr; |
2764 | |
2765 | return expand_simple_operations (expr: e, stop, cache); |
2766 | } |
2767 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN) |
2768 | return expr; |
2769 | |
2770 | /* Avoid expanding to expressions that contain SSA names that need |
2771 | to take part in abnormal coalescing. */ |
2772 | ssa_op_iter iter; |
2773 | FOR_EACH_SSA_TREE_OPERAND (e, stmt, iter, SSA_OP_USE) |
2774 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (e)) |
2775 | return expr; |
2776 | |
2777 | e = gimple_assign_rhs1 (gs: stmt); |
2778 | code = gimple_assign_rhs_code (gs: stmt); |
2779 | if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) |
2780 | { |
2781 | if (is_gimple_min_invariant (e)) |
2782 | return e; |
2783 | |
2784 | if (code == SSA_NAME) |
2785 | return expand_simple_operations (expr: e, stop, cache); |
2786 | else if (code == ADDR_EXPR) |
2787 | { |
2788 | poly_int64 offset; |
2789 | tree base = get_addr_base_and_unit_offset (TREE_OPERAND (e, 0), |
2790 | &offset); |
2791 | if (base |
2792 | && TREE_CODE (base) == MEM_REF) |
2793 | { |
2794 | ee = expand_simple_operations (TREE_OPERAND (base, 0), stop, |
2795 | cache); |
2796 | return fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (expr), ee, |
2797 | wide_int_to_tree (sizetype, |
2798 | mem_ref_offset (base) |
2799 | + offset)); |
2800 | } |
2801 | } |
2802 | |
2803 | return expr; |
2804 | } |
2805 | |
2806 | switch (code) |
2807 | { |
2808 | CASE_CONVERT: |
2809 | /* Casts are simple. */ |
2810 | ee = expand_simple_operations (expr: e, stop, cache); |
2811 | return fold_build1 (code, TREE_TYPE (expr), ee); |
2812 | |
2813 | case PLUS_EXPR: |
2814 | case MINUS_EXPR: |
2815 | case MULT_EXPR: |
2816 | if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr)) |
2817 | && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr))) |
2818 | return expr; |
2819 | /* Fallthru. */ |
2820 | case POINTER_PLUS_EXPR: |
2821 | /* And increments and decrements by a constant are simple. */ |
2822 | e1 = gimple_assign_rhs2 (gs: stmt); |
2823 | if (!is_gimple_min_invariant (e1)) |
2824 | return expr; |
2825 | |
2826 | ee = expand_simple_operations (expr: e, stop, cache); |
2827 | return fold_build2 (code, TREE_TYPE (expr), ee, e1); |
2828 | |
2829 | default: |
2830 | return expr; |
2831 | } |
2832 | } |
2833 | |
2834 | tree |
2835 | expand_simple_operations (tree expr, tree stop) |
2836 | { |
2837 | hash_map<tree, tree> cache; |
2838 | return expand_simple_operations (expr, stop, cache); |
2839 | } |
2840 | |
2841 | /* Tries to simplify EXPR using the condition COND. Returns the simplified |
2842 | expression (or EXPR unchanged, if no simplification was possible). */ |
2843 | |
2844 | static tree |
2845 | tree_simplify_using_condition_1 (tree cond, tree expr) |
2846 | { |
2847 | bool changed; |
2848 | tree e, e0, e1, e2, notcond; |
2849 | enum tree_code code = TREE_CODE (expr); |
2850 | |
2851 | if (code == INTEGER_CST) |
2852 | return expr; |
2853 | |
2854 | if (code == TRUTH_OR_EXPR |
2855 | || code == TRUTH_AND_EXPR |
2856 | || code == COND_EXPR) |
2857 | { |
2858 | changed = false; |
2859 | |
2860 | e0 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 0)); |
2861 | if (TREE_OPERAND (expr, 0) != e0) |
2862 | changed = true; |
2863 | |
2864 | e1 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 1)); |
2865 | if (TREE_OPERAND (expr, 1) != e1) |
2866 | changed = true; |
2867 | |
2868 | if (code == COND_EXPR) |
2869 | { |
2870 | e2 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 2)); |
2871 | if (TREE_OPERAND (expr, 2) != e2) |
2872 | changed = true; |
2873 | } |
2874 | else |
2875 | e2 = NULL_TREE; |
2876 | |
2877 | if (changed) |
2878 | { |
2879 | if (code == COND_EXPR) |
2880 | expr = fold_build3 (code, boolean_type_node, e0, e1, e2); |
2881 | else |
2882 | expr = fold_build2 (code, boolean_type_node, e0, e1); |
2883 | } |
2884 | |
2885 | return expr; |
2886 | } |
2887 | |
2888 | /* In case COND is equality, we may be able to simplify EXPR by copy/constant |
2889 | propagation, and vice versa. Fold does not handle this, since it is |
2890 | considered too expensive. */ |
2891 | if (TREE_CODE (cond) == EQ_EXPR) |
2892 | { |
2893 | e0 = TREE_OPERAND (cond, 0); |
2894 | e1 = TREE_OPERAND (cond, 1); |
2895 | |
2896 | /* We know that e0 == e1. Check whether we cannot simplify expr |
2897 | using this fact. */ |
2898 | e = simplify_replace_tree (expr, old: e0, new_tree: e1); |
2899 | if (integer_zerop (e) || integer_nonzerop (e)) |
2900 | return e; |
2901 | |
2902 | e = simplify_replace_tree (expr, old: e1, new_tree: e0); |
2903 | if (integer_zerop (e) || integer_nonzerop (e)) |
2904 | return e; |
2905 | } |
2906 | if (TREE_CODE (expr) == EQ_EXPR) |
2907 | { |
2908 | e0 = TREE_OPERAND (expr, 0); |
2909 | e1 = TREE_OPERAND (expr, 1); |
2910 | |
2911 | /* If e0 == e1 (EXPR) implies !COND, then EXPR cannot be true. */ |
2912 | e = simplify_replace_tree (expr: cond, old: e0, new_tree: e1); |
2913 | if (integer_zerop (e)) |
2914 | return e; |
2915 | e = simplify_replace_tree (expr: cond, old: e1, new_tree: e0); |
2916 | if (integer_zerop (e)) |
2917 | return e; |
2918 | } |
2919 | if (TREE_CODE (expr) == NE_EXPR) |
2920 | { |
2921 | e0 = TREE_OPERAND (expr, 0); |
2922 | e1 = TREE_OPERAND (expr, 1); |
2923 | |
2924 | /* If e0 == e1 (!EXPR) implies !COND, then EXPR must be true. */ |
2925 | e = simplify_replace_tree (expr: cond, old: e0, new_tree: e1); |
2926 | if (integer_zerop (e)) |
2927 | return boolean_true_node; |
2928 | e = simplify_replace_tree (expr: cond, old: e1, new_tree: e0); |
2929 | if (integer_zerop (e)) |
2930 | return boolean_true_node; |
2931 | } |
2932 | |
2933 | /* Check whether COND ==> EXPR. */ |
2934 | notcond = invert_truthvalue (cond); |
2935 | e = fold_binary (TRUTH_OR_EXPR, boolean_type_node, notcond, expr); |
2936 | if (e && integer_nonzerop (e)) |
2937 | return e; |
2938 | |
2939 | /* Check whether COND ==> not EXPR. */ |
2940 | e = fold_binary (TRUTH_AND_EXPR, boolean_type_node, cond, expr); |
2941 | if (e && integer_zerop (e)) |
2942 | return e; |
2943 | |
2944 | return expr; |
2945 | } |
2946 | |
2947 | /* Tries to simplify EXPR using the condition COND. Returns the simplified |
2948 | expression (or EXPR unchanged, if no simplification was possible). |
2949 | Wrapper around tree_simplify_using_condition_1 that ensures that chains |
2950 | of simple operations in definitions of ssa names in COND are expanded, |
2951 | so that things like casts or incrementing the value of the bound before |
2952 | the loop do not cause us to fail. */ |
2953 | |
2954 | static tree |
2955 | tree_simplify_using_condition (tree cond, tree expr) |
2956 | { |
2957 | cond = expand_simple_operations (expr: cond); |
2958 | |
2959 | return tree_simplify_using_condition_1 (cond, expr); |
2960 | } |
2961 | |
2962 | /* Tries to simplify EXPR using the conditions on entry to LOOP. |
2963 | Returns the simplified expression (or EXPR unchanged, if no |
2964 | simplification was possible). */ |
2965 | |
2966 | tree |
2967 | simplify_using_initial_conditions (class loop *loop, tree expr) |
2968 | { |
2969 | edge e; |
2970 | basic_block bb; |
2971 | tree cond, expanded, backup; |
2972 | int cnt = 0; |
2973 | |
2974 | if (TREE_CODE (expr) == INTEGER_CST) |
2975 | return expr; |
2976 | |
2977 | backup = expanded = expand_simple_operations (expr); |
2978 | |
2979 | /* Limit walking the dominators to avoid quadraticness in |
2980 | the number of BBs times the number of loops in degenerate |
2981 | cases. */ |
2982 | for (bb = loop->header; |
2983 | bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK; |
2984 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
2985 | { |
2986 | if (!single_pred_p (bb)) |
2987 | continue; |
2988 | e = single_pred_edge (bb); |
2989 | |
2990 | if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
2991 | continue; |
2992 | |
2993 | gcond *stmt = as_a <gcond *> (p: *gsi_last_bb (bb: e->src)); |
2994 | cond = fold_build2 (gimple_cond_code (stmt), |
2995 | boolean_type_node, |
2996 | gimple_cond_lhs (stmt), |
2997 | gimple_cond_rhs (stmt)); |
2998 | if (e->flags & EDGE_FALSE_VALUE) |
2999 | cond = invert_truthvalue (cond); |
3000 | expanded = tree_simplify_using_condition (cond, expr: expanded); |
3001 | /* Break if EXPR is simplified to const values. */ |
3002 | if (expanded |
3003 | && (integer_zerop (expanded) || integer_nonzerop (expanded))) |
3004 | return expanded; |
3005 | |
3006 | ++cnt; |
3007 | } |
3008 | |
3009 | /* Return the original expression if no simplification is done. */ |
3010 | return operand_equal_p (backup, expanded, flags: 0) ? expr : expanded; |
3011 | } |
3012 | |
3013 | /* Tries to simplify EXPR using the evolutions of the loop invariants |
3014 | in the superloops of LOOP. Returns the simplified expression |
3015 | (or EXPR unchanged, if no simplification was possible). */ |
3016 | |
3017 | static tree |
3018 | simplify_using_outer_evolutions (class loop *loop, tree expr) |
3019 | { |
3020 | enum tree_code code = TREE_CODE (expr); |
3021 | bool changed; |
3022 | tree e, e0, e1, e2; |
3023 | |
3024 | if (is_gimple_min_invariant (expr)) |
3025 | return expr; |
3026 | |
3027 | if (code == TRUTH_OR_EXPR |
3028 | || code == TRUTH_AND_EXPR |
3029 | || code == COND_EXPR) |
3030 | { |
3031 | changed = false; |
3032 | |
3033 | e0 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 0)); |
3034 | if (TREE_OPERAND (expr, 0) != e0) |
3035 | changed = true; |
3036 | |
3037 | e1 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 1)); |
3038 | if (TREE_OPERAND (expr, 1) != e1) |
3039 | changed = true; |
3040 | |
3041 | if (code == COND_EXPR) |
3042 | { |
3043 | e2 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 2)); |
3044 | if (TREE_OPERAND (expr, 2) != e2) |
3045 | changed = true; |
3046 | } |
3047 | else |
3048 | e2 = NULL_TREE; |
3049 | |
3050 | if (changed) |
3051 | { |
3052 | if (code == COND_EXPR) |
3053 | expr = fold_build3 (code, boolean_type_node, e0, e1, e2); |
3054 | else |
3055 | expr = fold_build2 (code, boolean_type_node, e0, e1); |
3056 | } |
3057 | |
3058 | return expr; |
3059 | } |
3060 | |
3061 | e = instantiate_parameters (loop, chrec: expr); |
3062 | if (is_gimple_min_invariant (e)) |
3063 | return e; |
3064 | |
3065 | return expr; |
3066 | } |
3067 | |
3068 | /* Returns true if EXIT is the only possible exit from LOOP. */ |
3069 | |
3070 | bool |
3071 | loop_only_exit_p (const class loop *loop, basic_block *body, const_edge exit) |
3072 | { |
3073 | gimple_stmt_iterator bsi; |
3074 | unsigned i; |
3075 | |
3076 | if (exit != single_exit (loop)) |
3077 | return false; |
3078 | |
3079 | for (i = 0; i < loop->num_nodes; i++) |
3080 | for (bsi = gsi_start_bb (bb: body[i]); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
3081 | if (stmt_can_terminate_bb_p (gsi_stmt (i: bsi))) |
3082 | return false; |
3083 | |
3084 | return true; |
3085 | } |
3086 | |
3087 | /* Stores description of number of iterations of LOOP derived from |
3088 | EXIT (an exit edge of the LOOP) in NITER. Returns true if some useful |
3089 | information could be derived (and fields of NITER have meaning described |
3090 | in comments at class tree_niter_desc declaration), false otherwise. |
3091 | When EVERY_ITERATION is true, only tests that are known to be executed |
3092 | every iteration are considered (i.e. only test that alone bounds the loop). |
3093 | If AT_STMT is not NULL, this function stores LOOP's condition statement in |
3094 | it when returning true. */ |
3095 | |
3096 | bool |
3097 | number_of_iterations_exit_assumptions (class loop *loop, edge exit, |
3098 | class tree_niter_desc *niter, |
3099 | gcond **at_stmt, bool every_iteration, |
3100 | basic_block *body) |
3101 | { |
3102 | tree type; |
3103 | tree op0, op1; |
3104 | enum tree_code code; |
3105 | affine_iv iv0, iv1; |
3106 | bool safe; |
3107 | |
3108 | /* The condition at a fake exit (if it exists) does not control its |
3109 | execution. */ |
3110 | if (exit->flags & EDGE_FAKE) |
3111 | return false; |
3112 | |
3113 | /* Nothing to analyze if the loop is known to be infinite. */ |
3114 | if (loop_constraint_set_p (loop, LOOP_C_INFINITE)) |
3115 | return false; |
3116 | |
3117 | safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src); |
3118 | |
3119 | if (every_iteration && !safe) |
3120 | return false; |
3121 | |
3122 | niter->assumptions = boolean_false_node; |
3123 | niter->control.base = NULL_TREE; |
3124 | niter->control.step = NULL_TREE; |
3125 | niter->control.no_overflow = false; |
3126 | gcond *stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
3127 | if (!stmt) |
3128 | return false; |
3129 | |
3130 | if (at_stmt) |
3131 | *at_stmt = stmt; |
3132 | |
3133 | /* We want the condition for staying inside loop. */ |
3134 | code = gimple_cond_code (gs: stmt); |
3135 | if (exit->flags & EDGE_TRUE_VALUE) |
3136 | code = invert_tree_comparison (code, false); |
3137 | |
3138 | switch (code) |
3139 | { |
3140 | case GT_EXPR: |
3141 | case GE_EXPR: |
3142 | case LT_EXPR: |
3143 | case LE_EXPR: |
3144 | case NE_EXPR: |
3145 | break; |
3146 | |
3147 | case EQ_EXPR: |
3148 | return number_of_iterations_cltz (loop, exit, code, niter); |
3149 | |
3150 | default: |
3151 | return false; |
3152 | } |
3153 | |
3154 | op0 = gimple_cond_lhs (gs: stmt); |
3155 | op1 = gimple_cond_rhs (gs: stmt); |
3156 | type = TREE_TYPE (op0); |
3157 | |
3158 | if (TREE_CODE (type) != INTEGER_TYPE |
3159 | && !POINTER_TYPE_P (type)) |
3160 | return false; |
3161 | |
3162 | tree iv0_niters = NULL_TREE; |
3163 | if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt), |
3164 | op0, &iv0, safe ? &iv0_niters : NULL, false)) |
3165 | return number_of_iterations_bitcount (loop, exit, code, niter); |
3166 | tree iv1_niters = NULL_TREE; |
3167 | if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt), |
3168 | op1, &iv1, safe ? &iv1_niters : NULL, false)) |
3169 | return false; |
3170 | /* Give up on complicated case. */ |
3171 | if (iv0_niters && iv1_niters) |
3172 | return false; |
3173 | |
3174 | /* We don't want to see undefined signed overflow warnings while |
3175 | computing the number of iterations. */ |
3176 | fold_defer_overflow_warnings (); |
3177 | |
3178 | iv0.base = expand_simple_operations (expr: iv0.base); |
3179 | iv1.base = expand_simple_operations (expr: iv1.base); |
3180 | bool body_from_caller = true; |
3181 | if (!body) |
3182 | { |
3183 | body = get_loop_body (loop); |
3184 | body_from_caller = false; |
3185 | } |
3186 | bool only_exit_p = loop_only_exit_p (loop, body, exit); |
3187 | if (!body_from_caller) |
3188 | free (ptr: body); |
3189 | if (!number_of_iterations_cond (loop, type, iv0: &iv0, code, iv1: &iv1, niter, |
3190 | only_exit: only_exit_p, every_iteration: safe)) |
3191 | { |
3192 | fold_undefer_and_ignore_overflow_warnings (); |
3193 | return false; |
3194 | } |
3195 | |
3196 | /* Incorporate additional assumption implied by control iv. */ |
3197 | tree iv_niters = iv0_niters ? iv0_niters : iv1_niters; |
3198 | if (iv_niters) |
3199 | { |
3200 | tree assumption = fold_build2 (LE_EXPR, boolean_type_node, niter->niter, |
3201 | fold_convert (TREE_TYPE (niter->niter), |
3202 | iv_niters)); |
3203 | |
3204 | if (!integer_nonzerop (assumption)) |
3205 | niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
3206 | niter->assumptions, assumption); |
3207 | |
3208 | /* Refine upper bound if possible. */ |
3209 | if (TREE_CODE (iv_niters) == INTEGER_CST |
3210 | && niter->max > wi::to_widest (t: iv_niters)) |
3211 | niter->max = wi::to_widest (t: iv_niters); |
3212 | } |
3213 | |
3214 | /* There is no assumptions if the loop is known to be finite. */ |
3215 | if (!integer_zerop (niter->assumptions) |
3216 | && loop_constraint_set_p (loop, LOOP_C_FINITE)) |
3217 | niter->assumptions = boolean_true_node; |
3218 | |
3219 | if (optimize >= 3) |
3220 | { |
3221 | niter->assumptions = simplify_using_outer_evolutions (loop, |
3222 | expr: niter->assumptions); |
3223 | niter->may_be_zero = simplify_using_outer_evolutions (loop, |
3224 | expr: niter->may_be_zero); |
3225 | niter->niter = simplify_using_outer_evolutions (loop, expr: niter->niter); |
3226 | } |
3227 | |
3228 | niter->assumptions |
3229 | = simplify_using_initial_conditions (loop, |
3230 | expr: niter->assumptions); |
3231 | niter->may_be_zero |
3232 | = simplify_using_initial_conditions (loop, |
3233 | expr: niter->may_be_zero); |
3234 | |
3235 | fold_undefer_and_ignore_overflow_warnings (); |
3236 | |
3237 | /* If NITER has simplified into a constant, update MAX. */ |
3238 | if (TREE_CODE (niter->niter) == INTEGER_CST) |
3239 | niter->max = wi::to_widest (t: niter->niter); |
3240 | |
3241 | return (!integer_zerop (niter->assumptions)); |
3242 | } |
3243 | |
3244 | /* Like number_of_iterations_exit_assumptions, but return TRUE only if |
3245 | the niter information holds unconditionally. */ |
3246 | |
3247 | bool |
3248 | number_of_iterations_exit (class loop *loop, edge exit, |
3249 | class tree_niter_desc *niter, |
3250 | bool warn, bool every_iteration, |
3251 | basic_block *body) |
3252 | { |
3253 | gcond *stmt; |
3254 | if (!number_of_iterations_exit_assumptions (loop, exit, niter, |
3255 | at_stmt: &stmt, every_iteration, body)) |
3256 | return false; |
3257 | |
3258 | if (integer_nonzerop (niter->assumptions)) |
3259 | return true; |
3260 | |
3261 | if (warn && dump_enabled_p ()) |
3262 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, |
3263 | "missed loop optimization: niters analysis ends up " |
3264 | "with assumptions.\n" ); |
3265 | |
3266 | return false; |
3267 | } |
3268 | |
3269 | /* Try to determine the number of iterations of LOOP. If we succeed, |
3270 | expression giving number of iterations is returned and *EXIT is |
3271 | set to the edge from that the information is obtained. Otherwise |
3272 | chrec_dont_know is returned. */ |
3273 | |
3274 | tree |
3275 | find_loop_niter (class loop *loop, edge *exit) |
3276 | { |
3277 | unsigned i; |
3278 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3279 | edge ex; |
3280 | tree niter = NULL_TREE, aniter; |
3281 | class tree_niter_desc desc; |
3282 | |
3283 | *exit = NULL; |
3284 | FOR_EACH_VEC_ELT (exits, i, ex) |
3285 | { |
3286 | if (!number_of_iterations_exit (loop, exit: ex, niter: &desc, warn: false)) |
3287 | continue; |
3288 | |
3289 | if (integer_nonzerop (desc.may_be_zero)) |
3290 | { |
3291 | /* We exit in the first iteration through this exit. |
3292 | We won't find anything better. */ |
3293 | niter = build_int_cst (unsigned_type_node, 0); |
3294 | *exit = ex; |
3295 | break; |
3296 | } |
3297 | |
3298 | if (!integer_zerop (desc.may_be_zero)) |
3299 | continue; |
3300 | |
3301 | aniter = desc.niter; |
3302 | |
3303 | if (!niter) |
3304 | { |
3305 | /* Nothing recorded yet. */ |
3306 | niter = aniter; |
3307 | *exit = ex; |
3308 | continue; |
3309 | } |
3310 | |
3311 | /* Prefer constants, the lower the better. */ |
3312 | if (TREE_CODE (aniter) != INTEGER_CST) |
3313 | continue; |
3314 | |
3315 | if (TREE_CODE (niter) != INTEGER_CST) |
3316 | { |
3317 | niter = aniter; |
3318 | *exit = ex; |
3319 | continue; |
3320 | } |
3321 | |
3322 | if (tree_int_cst_lt (t1: aniter, t2: niter)) |
3323 | { |
3324 | niter = aniter; |
3325 | *exit = ex; |
3326 | continue; |
3327 | } |
3328 | } |
3329 | |
3330 | return niter ? niter : chrec_dont_know; |
3331 | } |
3332 | |
3333 | /* Return true if loop is known to have bounded number of iterations. */ |
3334 | |
3335 | bool |
3336 | finite_loop_p (class loop *loop) |
3337 | { |
3338 | widest_int nit; |
3339 | int flags; |
3340 | |
3341 | if (loop->finite_p) |
3342 | { |
3343 | unsigned i; |
3344 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3345 | edge ex; |
3346 | |
3347 | /* If the loop has a normal exit, we can assume it will terminate. */ |
3348 | FOR_EACH_VEC_ELT (exits, i, ex) |
3349 | if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_FAKE))) |
3350 | { |
3351 | if (dump_file) |
3352 | fprintf (stream: dump_file, format: "Assume loop %i to be finite: it has an exit " |
3353 | "and -ffinite-loops is on or loop was " |
3354 | "previously finite.\n" , |
3355 | loop->num); |
3356 | return true; |
3357 | } |
3358 | } |
3359 | |
3360 | flags = flags_from_decl_or_type (current_function_decl); |
3361 | if ((flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE)) |
3362 | { |
3363 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3364 | fprintf (stream: dump_file, |
3365 | format: "Found loop %i to be finite: it is within " |
3366 | "pure or const function.\n" , |
3367 | loop->num); |
3368 | loop->finite_p = true; |
3369 | return true; |
3370 | } |
3371 | |
3372 | if (loop->any_upper_bound |
3373 | /* Loop with no normal exit will not pass max_loop_iterations. */ |
3374 | || (!loop->finite_p && max_loop_iterations (loop, &nit))) |
3375 | { |
3376 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3377 | fprintf (stream: dump_file, format: "Found loop %i to be finite: upper bound found.\n" , |
3378 | loop->num); |
3379 | loop->finite_p = true; |
3380 | return true; |
3381 | } |
3382 | |
3383 | return false; |
3384 | } |
3385 | |
3386 | /* |
3387 | |
3388 | Analysis of a number of iterations of a loop by a brute-force evaluation. |
3389 | |
3390 | */ |
3391 | |
3392 | /* Bound on the number of iterations we try to evaluate. */ |
3393 | |
3394 | #define MAX_ITERATIONS_TO_TRACK \ |
3395 | ((unsigned) param_max_iterations_to_track) |
3396 | |
3397 | /* Returns the loop phi node of LOOP such that ssa name X is derived from its |
3398 | result by a chain of operations such that all but exactly one of their |
3399 | operands are constants. */ |
3400 | |
3401 | static gphi * |
3402 | chain_of_csts_start (class loop *loop, tree x) |
3403 | { |
3404 | gimple *stmt = SSA_NAME_DEF_STMT (x); |
3405 | tree use; |
3406 | basic_block bb = gimple_bb (g: stmt); |
3407 | enum tree_code code; |
3408 | |
3409 | if (!bb |
3410 | || !flow_bb_inside_loop_p (loop, bb)) |
3411 | return NULL; |
3412 | |
3413 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
3414 | { |
3415 | if (bb == loop->header) |
3416 | return as_a <gphi *> (p: stmt); |
3417 | |
3418 | return NULL; |
3419 | } |
3420 | |
3421 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN |
3422 | || gimple_assign_rhs_class (gs: stmt) == GIMPLE_TERNARY_RHS) |
3423 | return NULL; |
3424 | |
3425 | code = gimple_assign_rhs_code (gs: stmt); |
3426 | if (gimple_references_memory_p (stmt) |
3427 | || TREE_CODE_CLASS (code) == tcc_reference |
3428 | || (code == ADDR_EXPR |
3429 | && !is_gimple_min_invariant (gimple_assign_rhs1 (gs: stmt)))) |
3430 | return NULL; |
3431 | |
3432 | use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); |
3433 | if (use == NULL_TREE) |
3434 | return NULL; |
3435 | |
3436 | return chain_of_csts_start (loop, x: use); |
3437 | } |
3438 | |
3439 | /* Determines whether the expression X is derived from a result of a phi node |
3440 | in header of LOOP such that |
3441 | |
3442 | * the derivation of X consists only from operations with constants |
3443 | * the initial value of the phi node is constant |
3444 | * the value of the phi node in the next iteration can be derived from the |
3445 | value in the current iteration by a chain of operations with constants, |
3446 | or is also a constant |
3447 | |
3448 | If such phi node exists, it is returned, otherwise NULL is returned. */ |
3449 | |
3450 | static gphi * |
3451 | get_base_for (class loop *loop, tree x) |
3452 | { |
3453 | gphi *phi; |
3454 | tree init, next; |
3455 | |
3456 | if (is_gimple_min_invariant (x)) |
3457 | return NULL; |
3458 | |
3459 | phi = chain_of_csts_start (loop, x); |
3460 | if (!phi) |
3461 | return NULL; |
3462 | |
3463 | init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); |
3464 | next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); |
3465 | |
3466 | if (!is_gimple_min_invariant (init)) |
3467 | return NULL; |
3468 | |
3469 | if (TREE_CODE (next) == SSA_NAME |
3470 | && chain_of_csts_start (loop, x: next) != phi) |
3471 | return NULL; |
3472 | |
3473 | return phi; |
3474 | } |
3475 | |
3476 | /* Given an expression X, then |
3477 | |
3478 | * if X is NULL_TREE, we return the constant BASE. |
3479 | * if X is a constant, we return the constant X. |
3480 | * otherwise X is a SSA name, whose value in the considered loop is derived |
3481 | by a chain of operations with constant from a result of a phi node in |
3482 | the header of the loop. Then we return value of X when the value of the |
3483 | result of this phi node is given by the constant BASE. */ |
3484 | |
3485 | static tree |
3486 | get_val_for (tree x, tree base) |
3487 | { |
3488 | gimple *stmt; |
3489 | |
3490 | gcc_checking_assert (is_gimple_min_invariant (base)); |
3491 | |
3492 | if (!x) |
3493 | return base; |
3494 | else if (is_gimple_min_invariant (x)) |
3495 | return x; |
3496 | |
3497 | stmt = SSA_NAME_DEF_STMT (x); |
3498 | if (gimple_code (g: stmt) == GIMPLE_PHI) |
3499 | return base; |
3500 | |
3501 | gcc_checking_assert (is_gimple_assign (stmt)); |
3502 | |
3503 | /* STMT must be either an assignment of a single SSA name or an |
3504 | expression involving an SSA name and a constant. Try to fold that |
3505 | expression using the value for the SSA name. */ |
3506 | if (gimple_assign_ssa_name_copy_p (stmt)) |
3507 | return get_val_for (x: gimple_assign_rhs1 (gs: stmt), base); |
3508 | else if (gimple_assign_rhs_class (gs: stmt) == GIMPLE_UNARY_RHS |
3509 | && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) |
3510 | return fold_build1 (gimple_assign_rhs_code (stmt), |
3511 | TREE_TYPE (gimple_assign_lhs (stmt)), |
3512 | get_val_for (gimple_assign_rhs1 (stmt), base)); |
3513 | else if (gimple_assign_rhs_class (gs: stmt) == GIMPLE_BINARY_RHS) |
3514 | { |
3515 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
3516 | tree rhs2 = gimple_assign_rhs2 (gs: stmt); |
3517 | if (TREE_CODE (rhs1) == SSA_NAME) |
3518 | rhs1 = get_val_for (x: rhs1, base); |
3519 | else if (TREE_CODE (rhs2) == SSA_NAME) |
3520 | rhs2 = get_val_for (x: rhs2, base); |
3521 | else |
3522 | gcc_unreachable (); |
3523 | return fold_build2 (gimple_assign_rhs_code (stmt), |
3524 | TREE_TYPE (gimple_assign_lhs (stmt)), rhs1, rhs2); |
3525 | } |
3526 | else |
3527 | gcc_unreachable (); |
3528 | } |
3529 | |
3530 | |
3531 | /* Tries to count the number of iterations of LOOP till it exits by EXIT |
3532 | by brute force -- i.e. by determining the value of the operands of the |
3533 | condition at EXIT in first few iterations of the loop (assuming that |
3534 | these values are constant) and determining the first one in that the |
3535 | condition is not satisfied. Returns the constant giving the number |
3536 | of the iterations of LOOP if successful, chrec_dont_know otherwise. */ |
3537 | |
3538 | tree |
3539 | loop_niter_by_eval (class loop *loop, edge exit) |
3540 | { |
3541 | tree acnd; |
3542 | tree op[2], val[2], next[2], aval[2]; |
3543 | gphi *phi; |
3544 | unsigned i, j; |
3545 | enum tree_code cmp; |
3546 | |
3547 | gcond *cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: exit->src)); |
3548 | if (!cond) |
3549 | return chrec_dont_know; |
3550 | |
3551 | cmp = gimple_cond_code (gs: cond); |
3552 | if (exit->flags & EDGE_TRUE_VALUE) |
3553 | cmp = invert_tree_comparison (cmp, false); |
3554 | |
3555 | switch (cmp) |
3556 | { |
3557 | case EQ_EXPR: |
3558 | case NE_EXPR: |
3559 | case GT_EXPR: |
3560 | case GE_EXPR: |
3561 | case LT_EXPR: |
3562 | case LE_EXPR: |
3563 | op[0] = gimple_cond_lhs (gs: cond); |
3564 | op[1] = gimple_cond_rhs (gs: cond); |
3565 | break; |
3566 | |
3567 | default: |
3568 | return chrec_dont_know; |
3569 | } |
3570 | |
3571 | for (j = 0; j < 2; j++) |
3572 | { |
3573 | if (is_gimple_min_invariant (op[j])) |
3574 | { |
3575 | val[j] = op[j]; |
3576 | next[j] = NULL_TREE; |
3577 | op[j] = NULL_TREE; |
3578 | } |
3579 | else |
3580 | { |
3581 | phi = get_base_for (loop, x: op[j]); |
3582 | if (!phi) |
3583 | return chrec_dont_know; |
3584 | val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); |
3585 | next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); |
3586 | } |
3587 | } |
3588 | |
3589 | /* Don't issue signed overflow warnings. */ |
3590 | fold_defer_overflow_warnings (); |
3591 | |
3592 | for (i = 0; i < MAX_ITERATIONS_TO_TRACK; i++) |
3593 | { |
3594 | for (j = 0; j < 2; j++) |
3595 | aval[j] = get_val_for (x: op[j], base: val[j]); |
3596 | |
3597 | acnd = fold_binary (cmp, boolean_type_node, aval[0], aval[1]); |
3598 | if (acnd && integer_zerop (acnd)) |
3599 | { |
3600 | fold_undefer_and_ignore_overflow_warnings (); |
3601 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3602 | fprintf (stream: dump_file, |
3603 | format: "Proved that loop %d iterates %d times using brute force.\n" , |
3604 | loop->num, i); |
3605 | return build_int_cst (unsigned_type_node, i); |
3606 | } |
3607 | |
3608 | for (j = 0; j < 2; j++) |
3609 | { |
3610 | aval[j] = val[j]; |
3611 | val[j] = get_val_for (x: next[j], base: val[j]); |
3612 | if (!is_gimple_min_invariant (val[j])) |
3613 | { |
3614 | fold_undefer_and_ignore_overflow_warnings (); |
3615 | return chrec_dont_know; |
3616 | } |
3617 | } |
3618 | |
3619 | /* If the next iteration would use the same base values |
3620 | as the current one, there is no point looping further, |
3621 | all following iterations will be the same as this one. */ |
3622 | if (val[0] == aval[0] && val[1] == aval[1]) |
3623 | break; |
3624 | } |
3625 | |
3626 | fold_undefer_and_ignore_overflow_warnings (); |
3627 | |
3628 | return chrec_dont_know; |
3629 | } |
3630 | |
3631 | /* Finds the exit of the LOOP by that the loop exits after a constant |
3632 | number of iterations and stores the exit edge to *EXIT. The constant |
3633 | giving the number of iterations of LOOP is returned. The number of |
3634 | iterations is determined using loop_niter_by_eval (i.e. by brute force |
3635 | evaluation). If we are unable to find the exit for that loop_niter_by_eval |
3636 | determines the number of iterations, chrec_dont_know is returned. */ |
3637 | |
3638 | tree |
3639 | find_loop_niter_by_eval (class loop *loop, edge *exit) |
3640 | { |
3641 | unsigned i; |
3642 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3643 | edge ex; |
3644 | tree niter = NULL_TREE, aniter; |
3645 | |
3646 | *exit = NULL; |
3647 | |
3648 | /* Loops with multiple exits are expensive to handle and less important. */ |
3649 | if (!flag_expensive_optimizations |
3650 | && exits.length () > 1) |
3651 | return chrec_dont_know; |
3652 | |
3653 | FOR_EACH_VEC_ELT (exits, i, ex) |
3654 | { |
3655 | if (!just_once_each_iteration_p (loop, ex->src)) |
3656 | continue; |
3657 | |
3658 | aniter = loop_niter_by_eval (loop, exit: ex); |
3659 | if (chrec_contains_undetermined (aniter)) |
3660 | continue; |
3661 | |
3662 | if (niter |
3663 | && !tree_int_cst_lt (t1: aniter, t2: niter)) |
3664 | continue; |
3665 | |
3666 | niter = aniter; |
3667 | *exit = ex; |
3668 | } |
3669 | |
3670 | return niter ? niter : chrec_dont_know; |
3671 | } |
3672 | |
3673 | /* |
3674 | |
3675 | Analysis of upper bounds on number of iterations of a loop. |
3676 | |
3677 | */ |
3678 | |
3679 | static widest_int derive_constant_upper_bound_ops (tree, tree, |
3680 | enum tree_code, tree); |
3681 | |
3682 | /* Returns a constant upper bound on the value of the right-hand side of |
3683 | an assignment statement STMT. */ |
3684 | |
3685 | static widest_int |
3686 | derive_constant_upper_bound_assign (gimple *stmt) |
3687 | { |
3688 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
3689 | tree op0 = gimple_assign_rhs1 (gs: stmt); |
3690 | tree op1 = gimple_assign_rhs2 (gs: stmt); |
3691 | |
3692 | return derive_constant_upper_bound_ops (TREE_TYPE (gimple_assign_lhs (stmt)), |
3693 | op0, code, op1); |
3694 | } |
3695 | |
3696 | /* Returns a constant upper bound on the value of expression VAL. VAL |
3697 | is considered to be unsigned. If its type is signed, its value must |
3698 | be nonnegative. */ |
3699 | |
3700 | static widest_int |
3701 | derive_constant_upper_bound (tree val) |
3702 | { |
3703 | enum tree_code code; |
3704 | tree op0, op1, op2; |
3705 | |
3706 | extract_ops_from_tree (val, &code, &op0, &op1, &op2); |
3707 | return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1); |
3708 | } |
3709 | |
3710 | /* Returns a constant upper bound on the value of expression OP0 CODE OP1, |
3711 | whose type is TYPE. The expression is considered to be unsigned. If |
3712 | its type is signed, its value must be nonnegative. */ |
3713 | |
3714 | static widest_int |
3715 | derive_constant_upper_bound_ops (tree type, tree op0, |
3716 | enum tree_code code, tree op1) |
3717 | { |
3718 | tree subtype, maxt; |
3719 | widest_int bnd, max, cst; |
3720 | gimple *stmt; |
3721 | |
3722 | if (INTEGRAL_TYPE_P (type)) |
3723 | maxt = TYPE_MAX_VALUE (type); |
3724 | else |
3725 | maxt = upper_bound_in_type (type, type); |
3726 | |
3727 | max = wi::to_widest (t: maxt); |
3728 | |
3729 | switch (code) |
3730 | { |
3731 | case INTEGER_CST: |
3732 | return wi::to_widest (t: op0); |
3733 | |
3734 | CASE_CONVERT: |
3735 | subtype = TREE_TYPE (op0); |
3736 | if (!TYPE_UNSIGNED (subtype) |
3737 | /* If TYPE is also signed, the fact that VAL is nonnegative implies |
3738 | that OP0 is nonnegative. */ |
3739 | && TYPE_UNSIGNED (type) |
3740 | && !tree_expr_nonnegative_p (op0)) |
3741 | { |
3742 | /* If we cannot prove that the casted expression is nonnegative, |
3743 | we cannot establish more useful upper bound than the precision |
3744 | of the type gives us. */ |
3745 | return max; |
3746 | } |
3747 | |
3748 | /* We now know that op0 is an nonnegative value. Try deriving an upper |
3749 | bound for it. */ |
3750 | bnd = derive_constant_upper_bound (val: op0); |
3751 | |
3752 | /* If the bound does not fit in TYPE, max. value of TYPE could be |
3753 | attained. */ |
3754 | if (wi::ltu_p (x: max, y: bnd)) |
3755 | return max; |
3756 | |
3757 | return bnd; |
3758 | |
3759 | case PLUS_EXPR: |
3760 | case POINTER_PLUS_EXPR: |
3761 | case MINUS_EXPR: |
3762 | if (TREE_CODE (op1) != INTEGER_CST |
3763 | || !tree_expr_nonnegative_p (op0)) |
3764 | return max; |
3765 | |
3766 | /* Canonicalize to OP0 - CST. Consider CST to be signed, in order to |
3767 | choose the most logical way how to treat this constant regardless |
3768 | of the signedness of the type. */ |
3769 | cst = wi::sext (x: wi::to_widest (t: op1), TYPE_PRECISION (type)); |
3770 | if (code != MINUS_EXPR) |
3771 | cst = -cst; |
3772 | |
3773 | bnd = derive_constant_upper_bound (val: op0); |
3774 | |
3775 | if (wi::neg_p (x: cst)) |
3776 | { |
3777 | cst = -cst; |
3778 | /* Avoid CST == 0x80000... */ |
3779 | if (wi::neg_p (x: cst)) |
3780 | return max; |
3781 | |
3782 | /* OP0 + CST. We need to check that |
3783 | BND <= MAX (type) - CST. */ |
3784 | |
3785 | widest_int mmax = max - cst; |
3786 | if (wi::leu_p (x: bnd, y: mmax)) |
3787 | return max; |
3788 | |
3789 | return bnd + cst; |
3790 | } |
3791 | else |
3792 | { |
3793 | /* OP0 - CST, where CST >= 0. |
3794 | |
3795 | If TYPE is signed, we have already verified that OP0 >= 0, and we |
3796 | know that the result is nonnegative. This implies that |
3797 | VAL <= BND - CST. |
3798 | |
3799 | If TYPE is unsigned, we must additionally know that OP0 >= CST, |
3800 | otherwise the operation underflows. |
3801 | */ |
3802 | |
3803 | /* This should only happen if the type is unsigned; however, for |
3804 | buggy programs that use overflowing signed arithmetics even with |
3805 | -fno-wrapv, this condition may also be true for signed values. */ |
3806 | if (wi::ltu_p (x: bnd, y: cst)) |
3807 | return max; |
3808 | |
3809 | if (TYPE_UNSIGNED (type)) |
3810 | { |
3811 | tree tem = fold_binary (GE_EXPR, boolean_type_node, op0, |
3812 | wide_int_to_tree (type, cst)); |
3813 | if (!tem || integer_nonzerop (tem)) |
3814 | return max; |
3815 | } |
3816 | |
3817 | bnd -= cst; |
3818 | } |
3819 | |
3820 | return bnd; |
3821 | |
3822 | case FLOOR_DIV_EXPR: |
3823 | case EXACT_DIV_EXPR: |
3824 | if (TREE_CODE (op1) != INTEGER_CST |
3825 | || tree_int_cst_sign_bit (op1)) |
3826 | return max; |
3827 | |
3828 | bnd = derive_constant_upper_bound (val: op0); |
3829 | return wi::udiv_floor (x: bnd, y: wi::to_widest (t: op1)); |
3830 | |
3831 | case BIT_AND_EXPR: |
3832 | if (TREE_CODE (op1) != INTEGER_CST |
3833 | || tree_int_cst_sign_bit (op1)) |
3834 | return max; |
3835 | return wi::to_widest (t: op1); |
3836 | |
3837 | case SSA_NAME: |
3838 | stmt = SSA_NAME_DEF_STMT (op0); |
3839 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN |
3840 | || gimple_assign_lhs (gs: stmt) != op0) |
3841 | return max; |
3842 | return derive_constant_upper_bound_assign (stmt); |
3843 | |
3844 | default: |
3845 | return max; |
3846 | } |
3847 | } |
3848 | |
3849 | /* Emit a -Waggressive-loop-optimizations warning if needed. */ |
3850 | |
3851 | static void |
3852 | do_warn_aggressive_loop_optimizations (class loop *loop, |
3853 | widest_int i_bound, gimple *stmt) |
3854 | { |
3855 | /* Don't warn if the loop doesn't have known constant bound. */ |
3856 | if (!loop->nb_iterations |
3857 | || TREE_CODE (loop->nb_iterations) != INTEGER_CST |
3858 | || !warn_aggressive_loop_optimizations |
3859 | /* To avoid warning multiple times for the same loop, |
3860 | only start warning when we preserve loops. */ |
3861 | || (cfun->curr_properties & PROP_loops) == 0 |
3862 | /* Only warn once per loop. */ |
3863 | || loop->warned_aggressive_loop_optimizations |
3864 | /* Only warn if undefined behavior gives us lower estimate than the |
3865 | known constant bound. */ |
3866 | || wi::cmpu (x: i_bound, y: wi::to_widest (t: loop->nb_iterations)) >= 0 |
3867 | /* And undefined behavior happens unconditionally. */ |
3868 | || !dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (g: stmt))) |
3869 | return; |
3870 | |
3871 | edge e = single_exit (loop); |
3872 | if (e == NULL) |
3873 | return; |
3874 | |
3875 | gimple *estmt = last_nondebug_stmt (e->src); |
3876 | char buf[WIDE_INT_PRINT_BUFFER_SIZE], *p; |
3877 | unsigned len; |
3878 | if (print_dec_buf_size (wi: i_bound, TYPE_SIGN (TREE_TYPE (loop->nb_iterations)), |
3879 | len: &len)) |
3880 | p = XALLOCAVEC (char, len); |
3881 | else |
3882 | p = buf; |
3883 | print_dec (wi: i_bound, buf: p, TYPE_SIGN (TREE_TYPE (loop->nb_iterations))); |
3884 | auto_diagnostic_group d; |
3885 | if (warning_at (gimple_location (g: stmt), OPT_Waggressive_loop_optimizations, |
3886 | "iteration %s invokes undefined behavior" , p)) |
3887 | inform (gimple_location (g: estmt), "within this loop" ); |
3888 | loop->warned_aggressive_loop_optimizations = true; |
3889 | } |
3890 | |
3891 | /* Records that AT_STMT is executed at most BOUND + 1 times in LOOP. IS_EXIT |
3892 | is true if the loop is exited immediately after STMT, and this exit |
3893 | is taken at last when the STMT is executed BOUND + 1 times. |
3894 | REALISTIC is true if BOUND is expected to be close to the real number |
3895 | of iterations. UPPER is true if we are sure the loop iterates at most |
3896 | BOUND times. I_BOUND is a widest_int upper estimate on BOUND. */ |
3897 | |
3898 | static void |
3899 | record_estimate (class loop *loop, tree bound, const widest_int &i_bound, |
3900 | gimple *at_stmt, bool is_exit, bool realistic, bool upper) |
3901 | { |
3902 | widest_int delta; |
3903 | |
3904 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3905 | { |
3906 | fprintf (stream: dump_file, format: "Statement %s" , is_exit ? "(exit)" : "" ); |
3907 | print_gimple_stmt (dump_file, at_stmt, 0, TDF_SLIM); |
3908 | fprintf (stream: dump_file, format: " is %sexecuted at most " , |
3909 | upper ? "" : "probably " ); |
3910 | print_generic_expr (dump_file, bound, TDF_SLIM); |
3911 | fprintf (stream: dump_file, format: " (bounded by " ); |
3912 | print_decu (wi: i_bound, file: dump_file); |
3913 | fprintf (stream: dump_file, format: ") + 1 times in loop %d.\n" , loop->num); |
3914 | } |
3915 | |
3916 | /* If the I_BOUND is just an estimate of BOUND, it rarely is close to the |
3917 | real number of iterations. */ |
3918 | if (TREE_CODE (bound) != INTEGER_CST) |
3919 | realistic = false; |
3920 | else |
3921 | gcc_checking_assert (i_bound == wi::to_widest (bound)); |
3922 | |
3923 | if (wi::min_precision (x: i_bound, sgn: SIGNED) > bound_wide_int ().get_precision ()) |
3924 | return; |
3925 | |
3926 | /* If we have a guaranteed upper bound, record it in the appropriate |
3927 | list, unless this is an !is_exit bound (i.e. undefined behavior in |
3928 | at_stmt) in a loop with known constant number of iterations. */ |
3929 | if (upper |
3930 | && (is_exit |
3931 | || loop->nb_iterations == NULL_TREE |
3932 | || TREE_CODE (loop->nb_iterations) != INTEGER_CST)) |
3933 | { |
3934 | class nb_iter_bound *elt = ggc_alloc<nb_iter_bound> (); |
3935 | |
3936 | elt->bound = bound_wide_int::from (x: i_bound, sgn: SIGNED); |
3937 | elt->stmt = at_stmt; |
3938 | elt->is_exit = is_exit; |
3939 | elt->next = loop->bounds; |
3940 | loop->bounds = elt; |
3941 | } |
3942 | |
3943 | /* If statement is executed on every path to the loop latch, we can directly |
3944 | infer the upper bound on the # of iterations of the loop. */ |
3945 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (g: at_stmt))) |
3946 | upper = false; |
3947 | |
3948 | /* Update the number of iteration estimates according to the bound. |
3949 | If at_stmt is an exit then the loop latch is executed at most BOUND times, |
3950 | otherwise it can be executed BOUND + 1 times. We will lower the estimate |
3951 | later if such statement must be executed on last iteration */ |
3952 | if (is_exit) |
3953 | delta = 0; |
3954 | else |
3955 | delta = 1; |
3956 | widest_int new_i_bound = i_bound + delta; |
3957 | |
3958 | /* If an overflow occurred, ignore the result. */ |
3959 | if (wi::ltu_p (x: new_i_bound, y: delta)) |
3960 | return; |
3961 | |
3962 | if (upper && !is_exit) |
3963 | do_warn_aggressive_loop_optimizations (loop, i_bound: new_i_bound, stmt: at_stmt); |
3964 | record_niter_bound (loop, new_i_bound, realistic, upper); |
3965 | } |
3966 | |
3967 | /* Records the control iv analyzed in NITER for LOOP if the iv is valid |
3968 | and doesn't overflow. */ |
3969 | |
3970 | static void |
3971 | record_control_iv (class loop *loop, class tree_niter_desc *niter) |
3972 | { |
3973 | struct control_iv *iv; |
3974 | |
3975 | if (!niter->control.base || !niter->control.step) |
3976 | return; |
3977 | |
3978 | if (!integer_onep (niter->assumptions) || !niter->control.no_overflow) |
3979 | return; |
3980 | |
3981 | iv = ggc_alloc<control_iv> (); |
3982 | iv->base = niter->control.base; |
3983 | iv->step = niter->control.step; |
3984 | iv->next = loop->control_ivs; |
3985 | loop->control_ivs = iv; |
3986 | |
3987 | return; |
3988 | } |
3989 | |
3990 | /* This function returns TRUE if below conditions are satisfied: |
3991 | 1) VAR is SSA variable. |
3992 | 2) VAR is an IV:{base, step} in its defining loop. |
3993 | 3) IV doesn't overflow. |
3994 | 4) Both base and step are integer constants. |
3995 | 5) Base is the MIN/MAX value depends on IS_MIN. |
3996 | Store value of base to INIT correspondingly. */ |
3997 | |
3998 | static bool |
3999 | get_cst_init_from_scev (tree var, wide_int *init, bool is_min) |
4000 | { |
4001 | if (TREE_CODE (var) != SSA_NAME) |
4002 | return false; |
4003 | |
4004 | gimple *def_stmt = SSA_NAME_DEF_STMT (var); |
4005 | class loop *loop = loop_containing_stmt (stmt: def_stmt); |
4006 | |
4007 | if (loop == NULL) |
4008 | return false; |
4009 | |
4010 | affine_iv iv; |
4011 | if (!simple_iv (loop, loop, var, &iv, false)) |
4012 | return false; |
4013 | |
4014 | if (!iv.no_overflow) |
4015 | return false; |
4016 | |
4017 | if (TREE_CODE (iv.base) != INTEGER_CST || TREE_CODE (iv.step) != INTEGER_CST) |
4018 | return false; |
4019 | |
4020 | if (is_min == tree_int_cst_sign_bit (iv.step)) |
4021 | return false; |
4022 | |
4023 | *init = wi::to_wide (t: iv.base); |
4024 | return true; |
4025 | } |
4026 | |
4027 | /* Record the estimate on number of iterations of LOOP based on the fact that |
4028 | the induction variable BASE + STEP * i evaluated in STMT does not wrap and |
4029 | its values belong to the range <LOW, HIGH>. REALISTIC is true if the |
4030 | estimated number of iterations is expected to be close to the real one. |
4031 | UPPER is true if we are sure the induction variable does not wrap. */ |
4032 | |
4033 | static void |
4034 | record_nonwrapping_iv (class loop *loop, tree base, tree step, gimple *stmt, |
4035 | tree low, tree high, bool realistic, bool upper) |
4036 | { |
4037 | tree niter_bound, extreme, delta; |
4038 | tree type = TREE_TYPE (base), unsigned_type; |
4039 | tree orig_base = base; |
4040 | |
4041 | if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step)) |
4042 | return; |
4043 | |
4044 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4045 | { |
4046 | fprintf (stream: dump_file, format: "Induction variable (" ); |
4047 | print_generic_expr (dump_file, TREE_TYPE (base), TDF_SLIM); |
4048 | fprintf (stream: dump_file, format: ") " ); |
4049 | print_generic_expr (dump_file, base, TDF_SLIM); |
4050 | fprintf (stream: dump_file, format: " + " ); |
4051 | print_generic_expr (dump_file, step, TDF_SLIM); |
4052 | fprintf (stream: dump_file, format: " * iteration does not wrap in statement " ); |
4053 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
4054 | fprintf (stream: dump_file, format: " in loop %d.\n" , loop->num); |
4055 | } |
4056 | |
4057 | unsigned_type = unsigned_type_for (type); |
4058 | base = fold_convert (unsigned_type, base); |
4059 | step = fold_convert (unsigned_type, step); |
4060 | |
4061 | if (tree_int_cst_sign_bit (step)) |
4062 | { |
4063 | wide_int max; |
4064 | Value_Range base_range (TREE_TYPE (orig_base)); |
4065 | if (get_range_query (cfun)->range_of_expr (r&: base_range, expr: orig_base) |
4066 | && !base_range.undefined_p ()) |
4067 | max = base_range.upper_bound (); |
4068 | extreme = fold_convert (unsigned_type, low); |
4069 | if (TREE_CODE (orig_base) == SSA_NAME |
4070 | && TREE_CODE (high) == INTEGER_CST |
4071 | && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) |
4072 | && ((!base_range.varying_p () |
4073 | && !base_range.undefined_p ()) |
4074 | || get_cst_init_from_scev (var: orig_base, init: &max, is_min: false)) |
4075 | && wi::gts_p (x: wi::to_wide (t: high), y: max)) |
4076 | base = wide_int_to_tree (type: unsigned_type, cst: max); |
4077 | else if (TREE_CODE (base) != INTEGER_CST |
4078 | && dominated_by_p (CDI_DOMINATORS, |
4079 | loop->latch, gimple_bb (g: stmt))) |
4080 | base = fold_convert (unsigned_type, high); |
4081 | delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme); |
4082 | step = fold_build1 (NEGATE_EXPR, unsigned_type, step); |
4083 | } |
4084 | else |
4085 | { |
4086 | wide_int min; |
4087 | Value_Range base_range (TREE_TYPE (orig_base)); |
4088 | if (get_range_query (cfun)->range_of_expr (r&: base_range, expr: orig_base) |
4089 | && !base_range.undefined_p ()) |
4090 | min = base_range.lower_bound (); |
4091 | extreme = fold_convert (unsigned_type, high); |
4092 | if (TREE_CODE (orig_base) == SSA_NAME |
4093 | && TREE_CODE (low) == INTEGER_CST |
4094 | && INTEGRAL_TYPE_P (TREE_TYPE (orig_base)) |
4095 | && ((!base_range.varying_p () |
4096 | && !base_range.undefined_p ()) |
4097 | || get_cst_init_from_scev (var: orig_base, init: &min, is_min: true)) |
4098 | && wi::gts_p (x: min, y: wi::to_wide (t: low))) |
4099 | base = wide_int_to_tree (type: unsigned_type, cst: min); |
4100 | else if (TREE_CODE (base) != INTEGER_CST |
4101 | && dominated_by_p (CDI_DOMINATORS, |
4102 | loop->latch, gimple_bb (g: stmt))) |
4103 | base = fold_convert (unsigned_type, low); |
4104 | delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base); |
4105 | } |
4106 | |
4107 | /* STMT is executed at most NITER_BOUND + 1 times, since otherwise the value |
4108 | would get out of the range. */ |
4109 | niter_bound = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step); |
4110 | widest_int max = derive_constant_upper_bound (val: niter_bound); |
4111 | record_estimate (loop, bound: niter_bound, i_bound: max, at_stmt: stmt, is_exit: false, realistic, upper); |
4112 | } |
4113 | |
4114 | /* Determine information about number of iterations a LOOP from the index |
4115 | IDX of a data reference accessed in STMT. RELIABLE is true if STMT is |
4116 | guaranteed to be executed in every iteration of LOOP. Callback for |
4117 | for_each_index. */ |
4118 | |
4119 | struct ilb_data |
4120 | { |
4121 | class loop *loop; |
4122 | gimple *stmt; |
4123 | }; |
4124 | |
4125 | static bool |
4126 | idx_infer_loop_bounds (tree base, tree *idx, void *dta) |
4127 | { |
4128 | struct ilb_data *data = (struct ilb_data *) dta; |
4129 | tree ev, init, step; |
4130 | tree low, high, type, next; |
4131 | bool sign, upper = true, has_flexible_size = false; |
4132 | class loop *loop = data->loop; |
4133 | |
4134 | if (TREE_CODE (base) != ARRAY_REF) |
4135 | return true; |
4136 | |
4137 | /* For arrays that might have flexible sizes, it is not guaranteed that they |
4138 | do not really extend over their declared size. */ |
4139 | if (array_ref_flexible_size_p (base)) |
4140 | { |
4141 | has_flexible_size = true; |
4142 | upper = false; |
4143 | } |
4144 | |
4145 | class loop *dloop = loop_containing_stmt (stmt: data->stmt); |
4146 | if (!dloop) |
4147 | return true; |
4148 | |
4149 | ev = analyze_scalar_evolution (dloop, *idx); |
4150 | ev = instantiate_parameters (loop, chrec: ev); |
4151 | init = initial_condition (ev); |
4152 | step = evolution_part_in_loop_num (ev, loop->num); |
4153 | |
4154 | if (!init |
4155 | || !step |
4156 | || TREE_CODE (step) != INTEGER_CST |
4157 | || integer_zerop (step) |
4158 | || tree_contains_chrecs (init, NULL) |
4159 | || chrec_contains_symbols_defined_in_loop (init, loop->num)) |
4160 | return true; |
4161 | |
4162 | low = array_ref_low_bound (base); |
4163 | high = array_ref_up_bound (base); |
4164 | |
4165 | /* The case of nonconstant bounds could be handled, but it would be |
4166 | complicated. */ |
4167 | if (TREE_CODE (low) != INTEGER_CST |
4168 | || !high |
4169 | || TREE_CODE (high) != INTEGER_CST) |
4170 | return true; |
4171 | sign = tree_int_cst_sign_bit (step); |
4172 | type = TREE_TYPE (step); |
4173 | |
4174 | /* The array that might have flexible size most likely extends |
4175 | beyond its bounds. */ |
4176 | if (has_flexible_size |
4177 | && operand_equal_p (low, high, flags: 0)) |
4178 | return true; |
4179 | |
4180 | /* In case the relevant bound of the array does not fit in type, or |
4181 | it does, but bound + step (in type) still belongs into the range of the |
4182 | array, the index may wrap and still stay within the range of the array |
4183 | (consider e.g. if the array is indexed by the full range of |
4184 | unsigned char). |
4185 | |
4186 | To make things simpler, we require both bounds to fit into type, although |
4187 | there are cases where this would not be strictly necessary. */ |
4188 | if (!int_fits_type_p (high, type) |
4189 | || !int_fits_type_p (low, type)) |
4190 | return true; |
4191 | low = fold_convert (type, low); |
4192 | high = fold_convert (type, high); |
4193 | |
4194 | if (sign) |
4195 | next = fold_binary (PLUS_EXPR, type, low, step); |
4196 | else |
4197 | next = fold_binary (PLUS_EXPR, type, high, step); |
4198 | |
4199 | if (tree_int_cst_compare (t1: low, t2: next) <= 0 |
4200 | && tree_int_cst_compare (t1: next, t2: high) <= 0) |
4201 | return true; |
4202 | |
4203 | /* If access is not executed on every iteration, we must ensure that overlow |
4204 | may not make the access valid later. */ |
4205 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (g: data->stmt)) |
4206 | && scev_probably_wraps_p (NULL_TREE, |
4207 | initial_condition_in_loop_num (ev, loop->num), |
4208 | step, data->stmt, loop, true)) |
4209 | upper = false; |
4210 | |
4211 | record_nonwrapping_iv (loop, base: init, step, stmt: data->stmt, low, high, realistic: false, upper); |
4212 | return true; |
4213 | } |
4214 | |
4215 | /* Determine information about number of iterations a LOOP from the bounds |
4216 | of arrays in the data reference REF accessed in STMT. RELIABLE is true if |
4217 | STMT is guaranteed to be executed in every iteration of LOOP.*/ |
4218 | |
4219 | static void |
4220 | infer_loop_bounds_from_ref (class loop *loop, gimple *stmt, tree ref) |
4221 | { |
4222 | struct ilb_data data; |
4223 | |
4224 | data.loop = loop; |
4225 | data.stmt = stmt; |
4226 | for_each_index (&ref, idx_infer_loop_bounds, &data); |
4227 | } |
4228 | |
4229 | /* Determine information about number of iterations of a LOOP from the way |
4230 | arrays are used in STMT. RELIABLE is true if STMT is guaranteed to be |
4231 | executed in every iteration of LOOP. */ |
4232 | |
4233 | static void |
4234 | infer_loop_bounds_from_array (class loop *loop, gimple *stmt) |
4235 | { |
4236 | if (is_gimple_assign (gs: stmt)) |
4237 | { |
4238 | tree op0 = gimple_assign_lhs (gs: stmt); |
4239 | tree op1 = gimple_assign_rhs1 (gs: stmt); |
4240 | |
4241 | /* For each memory access, analyze its access function |
4242 | and record a bound on the loop iteration domain. */ |
4243 | if (REFERENCE_CLASS_P (op0)) |
4244 | infer_loop_bounds_from_ref (loop, stmt, ref: op0); |
4245 | |
4246 | if (REFERENCE_CLASS_P (op1)) |
4247 | infer_loop_bounds_from_ref (loop, stmt, ref: op1); |
4248 | } |
4249 | else if (is_gimple_call (gs: stmt)) |
4250 | { |
4251 | tree arg, lhs; |
4252 | unsigned i, n = gimple_call_num_args (gs: stmt); |
4253 | |
4254 | lhs = gimple_call_lhs (gs: stmt); |
4255 | if (lhs && REFERENCE_CLASS_P (lhs)) |
4256 | infer_loop_bounds_from_ref (loop, stmt, ref: lhs); |
4257 | |
4258 | for (i = 0; i < n; i++) |
4259 | { |
4260 | arg = gimple_call_arg (gs: stmt, index: i); |
4261 | if (REFERENCE_CLASS_P (arg)) |
4262 | infer_loop_bounds_from_ref (loop, stmt, ref: arg); |
4263 | } |
4264 | } |
4265 | } |
4266 | |
4267 | /* Determine information about number of iterations of a LOOP from the fact |
4268 | that pointer arithmetics in STMT does not overflow. */ |
4269 | |
4270 | static void |
4271 | infer_loop_bounds_from_pointer_arith (class loop *loop, gimple *stmt) |
4272 | { |
4273 | tree def, base, step, scev, type, low, high; |
4274 | tree var, ptr; |
4275 | |
4276 | if (!is_gimple_assign (gs: stmt) |
4277 | || gimple_assign_rhs_code (gs: stmt) != POINTER_PLUS_EXPR) |
4278 | return; |
4279 | |
4280 | def = gimple_assign_lhs (gs: stmt); |
4281 | if (TREE_CODE (def) != SSA_NAME) |
4282 | return; |
4283 | |
4284 | type = TREE_TYPE (def); |
4285 | if (!nowrap_type_p (type)) |
4286 | return; |
4287 | |
4288 | ptr = gimple_assign_rhs1 (gs: stmt); |
4289 | if (!expr_invariant_in_loop_p (loop, ptr)) |
4290 | return; |
4291 | |
4292 | var = gimple_assign_rhs2 (gs: stmt); |
4293 | if (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (var))) |
4294 | return; |
4295 | |
4296 | class loop *uloop = loop_containing_stmt (stmt); |
4297 | scev = instantiate_parameters (loop, chrec: analyze_scalar_evolution (uloop, def)); |
4298 | if (chrec_contains_undetermined (scev)) |
4299 | return; |
4300 | |
4301 | base = initial_condition_in_loop_num (scev, loop->num); |
4302 | step = evolution_part_in_loop_num (scev, loop->num); |
4303 | |
4304 | if (!base || !step |
4305 | || TREE_CODE (step) != INTEGER_CST |
4306 | || tree_contains_chrecs (base, NULL) |
4307 | || chrec_contains_symbols_defined_in_loop (base, loop->num)) |
4308 | return; |
4309 | |
4310 | low = lower_bound_in_type (type, type); |
4311 | high = upper_bound_in_type (type, type); |
4312 | |
4313 | /* In C, pointer arithmetic p + 1 cannot use a NULL pointer, and p - 1 cannot |
4314 | produce a NULL pointer. The contrary would mean NULL points to an object, |
4315 | while NULL is supposed to compare unequal with the address of all objects. |
4316 | Furthermore, p + 1 cannot produce a NULL pointer and p - 1 cannot use a |
4317 | NULL pointer since that would mean wrapping, which we assume here not to |
4318 | happen. So, we can exclude NULL from the valid range of pointer |
4319 | arithmetic. */ |
4320 | if (flag_delete_null_pointer_checks && int_cst_value (low) == 0) |
4321 | low = build_int_cstu (TREE_TYPE (low), TYPE_ALIGN_UNIT (TREE_TYPE (type))); |
4322 | |
4323 | record_nonwrapping_iv (loop, base, step, stmt, low, high, realistic: false, upper: true); |
4324 | } |
4325 | |
4326 | /* Determine information about number of iterations of a LOOP from the fact |
4327 | that signed arithmetics in STMT does not overflow. */ |
4328 | |
4329 | static void |
4330 | infer_loop_bounds_from_signedness (class loop *loop, gimple *stmt) |
4331 | { |
4332 | tree def, base, step, scev, type, low, high; |
4333 | |
4334 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN) |
4335 | return; |
4336 | |
4337 | def = gimple_assign_lhs (gs: stmt); |
4338 | |
4339 | if (TREE_CODE (def) != SSA_NAME) |
4340 | return; |
4341 | |
4342 | type = TREE_TYPE (def); |
4343 | if (!INTEGRAL_TYPE_P (type) |
4344 | || !TYPE_OVERFLOW_UNDEFINED (type)) |
4345 | return; |
4346 | |
4347 | scev = instantiate_parameters (loop, chrec: analyze_scalar_evolution (loop, def)); |
4348 | if (chrec_contains_undetermined (scev)) |
4349 | return; |
4350 | |
4351 | base = initial_condition_in_loop_num (scev, loop->num); |
4352 | step = evolution_part_in_loop_num (scev, loop->num); |
4353 | |
4354 | if (!base || !step |
4355 | || TREE_CODE (step) != INTEGER_CST |
4356 | || tree_contains_chrecs (base, NULL) |
4357 | || chrec_contains_symbols_defined_in_loop (base, loop->num)) |
4358 | return; |
4359 | |
4360 | low = lower_bound_in_type (type, type); |
4361 | high = upper_bound_in_type (type, type); |
4362 | Value_Range r (TREE_TYPE (def)); |
4363 | get_range_query (cfun)->range_of_expr (r, expr: def); |
4364 | if (!r.varying_p () && !r.undefined_p ()) |
4365 | { |
4366 | low = wide_int_to_tree (type, cst: r.lower_bound ()); |
4367 | high = wide_int_to_tree (type, cst: r.upper_bound ()); |
4368 | } |
4369 | |
4370 | record_nonwrapping_iv (loop, base, step, stmt, low, high, realistic: false, upper: true); |
4371 | } |
4372 | |
4373 | /* The following analyzers are extracting informations on the bounds |
4374 | of LOOP from the following undefined behaviors: |
4375 | |
4376 | - data references should not access elements over the statically |
4377 | allocated size, |
4378 | |
4379 | - signed variables should not overflow when flag_wrapv is not set. |
4380 | */ |
4381 | |
4382 | static void |
4383 | infer_loop_bounds_from_undefined (class loop *loop, basic_block *bbs) |
4384 | { |
4385 | unsigned i; |
4386 | gimple_stmt_iterator bsi; |
4387 | basic_block bb; |
4388 | bool reliable; |
4389 | |
4390 | for (i = 0; i < loop->num_nodes; i++) |
4391 | { |
4392 | bb = bbs[i]; |
4393 | |
4394 | /* If BB is not executed in each iteration of the loop, we cannot |
4395 | use the operations in it to infer reliable upper bound on the |
4396 | # of iterations of the loop. However, we can use it as a guess. |
4397 | Reliable guesses come only from array bounds. */ |
4398 | reliable = dominated_by_p (CDI_DOMINATORS, loop->latch, bb); |
4399 | |
4400 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
4401 | { |
4402 | gimple *stmt = gsi_stmt (i: bsi); |
4403 | |
4404 | infer_loop_bounds_from_array (loop, stmt); |
4405 | |
4406 | if (reliable) |
4407 | { |
4408 | infer_loop_bounds_from_signedness (loop, stmt); |
4409 | infer_loop_bounds_from_pointer_arith (loop, stmt); |
4410 | } |
4411 | } |
4412 | |
4413 | } |
4414 | } |
4415 | |
4416 | /* Compare wide ints, callback for qsort. */ |
4417 | |
4418 | static int |
4419 | wide_int_cmp (const void *p1, const void *p2) |
4420 | { |
4421 | const bound_wide_int *d1 = (const bound_wide_int *) p1; |
4422 | const bound_wide_int *d2 = (const bound_wide_int *) p2; |
4423 | return wi::cmpu (x: *d1, y: *d2); |
4424 | } |
4425 | |
4426 | /* Return index of BOUND in BOUNDS array sorted in increasing order. |
4427 | Lookup by binary search. */ |
4428 | |
4429 | static int |
4430 | bound_index (const vec<bound_wide_int> &bounds, const bound_wide_int &bound) |
4431 | { |
4432 | unsigned int end = bounds.length (); |
4433 | unsigned int begin = 0; |
4434 | |
4435 | /* Find a matching index by means of a binary search. */ |
4436 | while (begin != end) |
4437 | { |
4438 | unsigned int middle = (begin + end) / 2; |
4439 | bound_wide_int index = bounds[middle]; |
4440 | |
4441 | if (index == bound) |
4442 | return middle; |
4443 | else if (wi::ltu_p (x: index, y: bound)) |
4444 | begin = middle + 1; |
4445 | else |
4446 | end = middle; |
4447 | } |
4448 | gcc_unreachable (); |
4449 | } |
4450 | |
4451 | /* We recorded loop bounds only for statements dominating loop latch (and thus |
4452 | executed each loop iteration). If there are any bounds on statements not |
4453 | dominating the loop latch we can improve the estimate by walking the loop |
4454 | body and seeing if every path from loop header to loop latch contains |
4455 | some bounded statement. */ |
4456 | |
4457 | static void |
4458 | discover_iteration_bound_by_body_walk (class loop *loop) |
4459 | { |
4460 | class nb_iter_bound *elt; |
4461 | auto_vec<bound_wide_int> bounds; |
4462 | vec<vec<basic_block> > queues = vNULL; |
4463 | vec<basic_block> queue = vNULL; |
4464 | ptrdiff_t queue_index; |
4465 | ptrdiff_t latch_index = 0; |
4466 | |
4467 | /* Discover what bounds may interest us. */ |
4468 | for (elt = loop->bounds; elt; elt = elt->next) |
4469 | { |
4470 | bound_wide_int bound = elt->bound; |
4471 | |
4472 | /* Exit terminates loop at given iteration, while non-exits produce undefined |
4473 | effect on the next iteration. */ |
4474 | if (!elt->is_exit) |
4475 | { |
4476 | bound += 1; |
4477 | /* If an overflow occurred, ignore the result. */ |
4478 | if (bound == 0) |
4479 | continue; |
4480 | } |
4481 | |
4482 | if (!loop->any_upper_bound |
4483 | || wi::ltu_p (x: bound, y: loop->nb_iterations_upper_bound)) |
4484 | bounds.safe_push (obj: bound); |
4485 | } |
4486 | |
4487 | /* Exit early if there is nothing to do. */ |
4488 | if (!bounds.exists ()) |
4489 | return; |
4490 | |
4491 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4492 | fprintf (stream: dump_file, format: " Trying to walk loop body to reduce the bound.\n" ); |
4493 | |
4494 | /* Sort the bounds in decreasing order. */ |
4495 | bounds.qsort (wide_int_cmp); |
4496 | |
4497 | /* For every basic block record the lowest bound that is guaranteed to |
4498 | terminate the loop. */ |
4499 | |
4500 | hash_map<basic_block, ptrdiff_t> bb_bounds; |
4501 | for (elt = loop->bounds; elt; elt = elt->next) |
4502 | { |
4503 | bound_wide_int bound = elt->bound; |
4504 | if (!elt->is_exit) |
4505 | { |
4506 | bound += 1; |
4507 | /* If an overflow occurred, ignore the result. */ |
4508 | if (bound == 0) |
4509 | continue; |
4510 | } |
4511 | |
4512 | if (!loop->any_upper_bound |
4513 | || wi::ltu_p (x: bound, y: loop->nb_iterations_upper_bound)) |
4514 | { |
4515 | ptrdiff_t index = bound_index (bounds, bound); |
4516 | ptrdiff_t *entry = bb_bounds.get (k: gimple_bb (g: elt->stmt)); |
4517 | if (!entry) |
4518 | bb_bounds.put (k: gimple_bb (g: elt->stmt), v: index); |
4519 | else if ((ptrdiff_t)*entry > index) |
4520 | *entry = index; |
4521 | } |
4522 | } |
4523 | |
4524 | hash_map<basic_block, ptrdiff_t> block_priority; |
4525 | |
4526 | /* Perform shortest path discovery loop->header ... loop->latch. |
4527 | |
4528 | The "distance" is given by the smallest loop bound of basic block |
4529 | present in the path and we look for path with largest smallest bound |
4530 | on it. |
4531 | |
4532 | To avoid the need for fibonacci heap on double ints we simply compress |
4533 | double ints into indexes to BOUNDS array and then represent the queue |
4534 | as arrays of queues for every index. |
4535 | Index of BOUNDS.length() means that the execution of given BB has |
4536 | no bounds determined. |
4537 | |
4538 | VISITED is a pointer map translating basic block into smallest index |
4539 | it was inserted into the priority queue with. */ |
4540 | latch_index = -1; |
4541 | |
4542 | /* Start walk in loop header with index set to infinite bound. */ |
4543 | queue_index = bounds.length (); |
4544 | queues.safe_grow_cleared (len: queue_index + 1, exact: true); |
4545 | queue.safe_push (obj: loop->header); |
4546 | queues[queue_index] = queue; |
4547 | block_priority.put (k: loop->header, v: queue_index); |
4548 | |
4549 | for (; queue_index >= 0; queue_index--) |
4550 | { |
4551 | if (latch_index < queue_index) |
4552 | { |
4553 | while (queues[queue_index].length ()) |
4554 | { |
4555 | basic_block bb; |
4556 | ptrdiff_t bound_index = queue_index; |
4557 | edge e; |
4558 | edge_iterator ei; |
4559 | |
4560 | queue = queues[queue_index]; |
4561 | bb = queue.pop (); |
4562 | |
4563 | /* OK, we later inserted the BB with lower priority, skip it. */ |
4564 | if (*block_priority.get (k: bb) > queue_index) |
4565 | continue; |
4566 | |
4567 | /* See if we can improve the bound. */ |
4568 | ptrdiff_t *entry = bb_bounds.get (k: bb); |
4569 | if (entry && *entry < bound_index) |
4570 | bound_index = *entry; |
4571 | |
4572 | /* Insert succesors into the queue, watch for latch edge |
4573 | and record greatest index we saw. */ |
4574 | FOR_EACH_EDGE (e, ei, bb->succs) |
4575 | { |
4576 | bool insert = false; |
4577 | |
4578 | if (loop_exit_edge_p (loop, e)) |
4579 | continue; |
4580 | |
4581 | if (e == loop_latch_edge (loop) |
4582 | && latch_index < bound_index) |
4583 | latch_index = bound_index; |
4584 | else if (!(entry = block_priority.get (k: e->dest))) |
4585 | { |
4586 | insert = true; |
4587 | block_priority.put (k: e->dest, v: bound_index); |
4588 | } |
4589 | else if (*entry < bound_index) |
4590 | { |
4591 | insert = true; |
4592 | *entry = bound_index; |
4593 | } |
4594 | |
4595 | if (insert) |
4596 | queues[bound_index].safe_push (obj: e->dest); |
4597 | } |
4598 | } |
4599 | } |
4600 | queues[queue_index].release (); |
4601 | } |
4602 | |
4603 | gcc_assert (latch_index >= 0); |
4604 | if ((unsigned)latch_index < bounds.length ()) |
4605 | { |
4606 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4607 | { |
4608 | fprintf (stream: dump_file, format: "Found better loop bound " ); |
4609 | print_decu (wi: bounds[latch_index], file: dump_file); |
4610 | fprintf (stream: dump_file, format: "\n" ); |
4611 | } |
4612 | record_niter_bound (loop, widest_int::from (x: bounds[latch_index], |
4613 | sgn: SIGNED), false, true); |
4614 | } |
4615 | |
4616 | queues.release (); |
4617 | } |
4618 | |
4619 | /* See if every path cross the loop goes through a statement that is known |
4620 | to not execute at the last iteration. In that case we can decrese iteration |
4621 | count by 1. */ |
4622 | |
4623 | static void |
4624 | maybe_lower_iteration_bound (class loop *loop) |
4625 | { |
4626 | hash_set<gimple *> *not_executed_last_iteration = NULL; |
4627 | class nb_iter_bound *elt; |
4628 | bool found_exit = false; |
4629 | auto_vec<basic_block> queue; |
4630 | bitmap visited; |
4631 | |
4632 | /* Collect all statements with interesting (i.e. lower than |
4633 | nb_iterations_upper_bound) bound on them. |
4634 | |
4635 | TODO: Due to the way record_estimate choose estimates to store, the bounds |
4636 | will be always nb_iterations_upper_bound-1. We can change this to record |
4637 | also statements not dominating the loop latch and update the walk bellow |
4638 | to the shortest path algorithm. */ |
4639 | for (elt = loop->bounds; elt; elt = elt->next) |
4640 | { |
4641 | if (!elt->is_exit |
4642 | && wi::ltu_p (x: elt->bound, y: loop->nb_iterations_upper_bound)) |
4643 | { |
4644 | if (!not_executed_last_iteration) |
4645 | not_executed_last_iteration = new hash_set<gimple *>; |
4646 | not_executed_last_iteration->add (k: elt->stmt); |
4647 | } |
4648 | } |
4649 | if (!not_executed_last_iteration) |
4650 | return; |
4651 | |
4652 | /* Start DFS walk in the loop header and see if we can reach the |
4653 | loop latch or any of the exits (including statements with side |
4654 | effects that may terminate the loop otherwise) without visiting |
4655 | any of the statements known to have undefined effect on the last |
4656 | iteration. */ |
4657 | queue.safe_push (obj: loop->header); |
4658 | visited = BITMAP_ALLOC (NULL); |
4659 | bitmap_set_bit (visited, loop->header->index); |
4660 | found_exit = false; |
4661 | |
4662 | do |
4663 | { |
4664 | basic_block bb = queue.pop (); |
4665 | gimple_stmt_iterator gsi; |
4666 | bool stmt_found = false; |
4667 | |
4668 | /* Loop for possible exits and statements bounding the execution. */ |
4669 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
4670 | { |
4671 | gimple *stmt = gsi_stmt (i: gsi); |
4672 | if (not_executed_last_iteration->contains (k: stmt)) |
4673 | { |
4674 | stmt_found = true; |
4675 | break; |
4676 | } |
4677 | if (gimple_has_side_effects (stmt)) |
4678 | { |
4679 | found_exit = true; |
4680 | break; |
4681 | } |
4682 | } |
4683 | if (found_exit) |
4684 | break; |
4685 | |
4686 | /* If no bounding statement is found, continue the walk. */ |
4687 | if (!stmt_found) |
4688 | { |
4689 | edge e; |
4690 | edge_iterator ei; |
4691 | |
4692 | FOR_EACH_EDGE (e, ei, bb->succs) |
4693 | { |
4694 | if (loop_exit_edge_p (loop, e) |
4695 | || e == loop_latch_edge (loop)) |
4696 | { |
4697 | found_exit = true; |
4698 | break; |
4699 | } |
4700 | if (bitmap_set_bit (visited, e->dest->index)) |
4701 | queue.safe_push (obj: e->dest); |
4702 | } |
4703 | } |
4704 | } |
4705 | while (queue.length () && !found_exit); |
4706 | |
4707 | /* If every path through the loop reach bounding statement before exit, |
4708 | then we know the last iteration of the loop will have undefined effect |
4709 | and we can decrease number of iterations. */ |
4710 | |
4711 | if (!found_exit) |
4712 | { |
4713 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4714 | fprintf (stream: dump_file, format: "Reducing loop iteration estimate by 1; " |
4715 | "undefined statement must be executed at the last iteration.\n" ); |
4716 | record_niter_bound (loop, widest_int::from (x: loop->nb_iterations_upper_bound, |
4717 | sgn: SIGNED) - 1, |
4718 | false, true); |
4719 | } |
4720 | |
4721 | BITMAP_FREE (visited); |
4722 | delete not_executed_last_iteration; |
4723 | } |
4724 | |
4725 | /* Get expected upper bound for number of loop iterations for |
4726 | BUILT_IN_EXPECT_WITH_PROBABILITY for a condition COND. */ |
4727 | |
4728 | static tree |
4729 | get_upper_bound_based_on_builtin_expr_with_prob (gcond *cond) |
4730 | { |
4731 | if (cond == NULL) |
4732 | return NULL_TREE; |
4733 | |
4734 | tree lhs = gimple_cond_lhs (gs: cond); |
4735 | if (TREE_CODE (lhs) != SSA_NAME) |
4736 | return NULL_TREE; |
4737 | |
4738 | gimple *stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond)); |
4739 | gcall *def = dyn_cast<gcall *> (p: stmt); |
4740 | if (def == NULL) |
4741 | return NULL_TREE; |
4742 | |
4743 | tree decl = gimple_call_fndecl (gs: def); |
4744 | if (!decl |
4745 | || !fndecl_built_in_p (node: decl, name1: BUILT_IN_EXPECT_WITH_PROBABILITY) |
4746 | || gimple_call_num_args (gs: stmt) != 3) |
4747 | return NULL_TREE; |
4748 | |
4749 | tree c = gimple_call_arg (gs: def, index: 1); |
4750 | tree condt = TREE_TYPE (lhs); |
4751 | tree res = fold_build2 (gimple_cond_code (cond), |
4752 | condt, c, |
4753 | gimple_cond_rhs (cond)); |
4754 | if (TREE_CODE (res) != INTEGER_CST) |
4755 | return NULL_TREE; |
4756 | |
4757 | |
4758 | tree prob = gimple_call_arg (gs: def, index: 2); |
4759 | tree t = TREE_TYPE (prob); |
4760 | tree one |
4761 | = build_real_from_int_cst (t, |
4762 | integer_one_node); |
4763 | if (integer_zerop (res)) |
4764 | prob = fold_build2 (MINUS_EXPR, t, one, prob); |
4765 | tree r = fold_build2 (RDIV_EXPR, t, one, prob); |
4766 | if (TREE_CODE (r) != REAL_CST) |
4767 | return NULL_TREE; |
4768 | |
4769 | HOST_WIDE_INT probi |
4770 | = real_to_integer (TREE_REAL_CST_PTR (r)); |
4771 | return build_int_cst (condt, probi); |
4772 | } |
4773 | |
4774 | /* Records estimates on numbers of iterations of LOOP. If USE_UNDEFINED_P |
4775 | is true also use estimates derived from undefined behavior. */ |
4776 | |
4777 | void |
4778 | estimate_numbers_of_iterations (class loop *loop) |
4779 | { |
4780 | tree niter, type; |
4781 | unsigned i; |
4782 | class tree_niter_desc niter_desc; |
4783 | edge ex; |
4784 | widest_int bound; |
4785 | edge likely_exit; |
4786 | |
4787 | /* Give up if we already have tried to compute an estimation. */ |
4788 | if (loop->estimate_state != EST_NOT_COMPUTED) |
4789 | return; |
4790 | |
4791 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4792 | fprintf (stream: dump_file, format: "Estimating # of iterations of loop %d\n" , loop->num); |
4793 | |
4794 | loop->estimate_state = EST_AVAILABLE; |
4795 | |
4796 | sreal nit; |
4797 | bool reliable; |
4798 | |
4799 | /* If we have a measured profile, use it to estimate the number of |
4800 | iterations. Normally this is recorded by branch_prob right after |
4801 | reading the profile. In case we however found a new loop, record the |
4802 | information here. |
4803 | |
4804 | Explicitly check for profile status so we do not report |
4805 | wrong prediction hitrates for guessed loop iterations heuristics. |
4806 | Do not recompute already recorded bounds - we ought to be better on |
4807 | updating iteration bounds than updating profile in general and thus |
4808 | recomputing iteration bounds later in the compilation process will just |
4809 | introduce random roundoff errors. */ |
4810 | if (!loop->any_estimate |
4811 | && expected_loop_iterations_by_profile (loop, ret: &nit, reliable: &reliable) |
4812 | && reliable) |
4813 | { |
4814 | bound = nit.to_nearest_int (); |
4815 | record_niter_bound (loop, bound, true, false); |
4816 | } |
4817 | |
4818 | /* Ensure that loop->nb_iterations is computed if possible. If it turns out |
4819 | to be constant, we avoid undefined behavior implied bounds and instead |
4820 | diagnose those loops with -Waggressive-loop-optimizations. */ |
4821 | number_of_latch_executions (loop); |
4822 | |
4823 | basic_block *body = get_loop_body (loop); |
4824 | auto_vec<edge> exits = get_loop_exit_edges (loop, body); |
4825 | likely_exit = single_likely_exit (loop, exits); |
4826 | FOR_EACH_VEC_ELT (exits, i, ex) |
4827 | { |
4828 | if (ex == likely_exit) |
4829 | { |
4830 | gimple *stmt = *gsi_last_bb (bb: ex->src); |
4831 | if (stmt != NULL) |
4832 | { |
4833 | gcond *cond = dyn_cast<gcond *> (p: stmt); |
4834 | tree niter_bound |
4835 | = get_upper_bound_based_on_builtin_expr_with_prob (cond); |
4836 | if (niter_bound != NULL_TREE) |
4837 | { |
4838 | widest_int max = derive_constant_upper_bound (val: niter_bound); |
4839 | record_estimate (loop, bound: niter_bound, i_bound: max, at_stmt: cond, |
4840 | is_exit: true, realistic: true, upper: false); |
4841 | } |
4842 | } |
4843 | } |
4844 | |
4845 | if (!number_of_iterations_exit (loop, exit: ex, niter: &niter_desc, |
4846 | warn: false, every_iteration: false, body)) |
4847 | continue; |
4848 | |
4849 | niter = niter_desc.niter; |
4850 | type = TREE_TYPE (niter); |
4851 | if (TREE_CODE (niter_desc.may_be_zero) != INTEGER_CST) |
4852 | niter = build3 (COND_EXPR, type, niter_desc.may_be_zero, |
4853 | build_int_cst (type, 0), |
4854 | niter); |
4855 | record_estimate (loop, bound: niter, i_bound: niter_desc.max, |
4856 | at_stmt: last_nondebug_stmt (ex->src), |
4857 | is_exit: true, realistic: ex == likely_exit, upper: true); |
4858 | record_control_iv (loop, niter: &niter_desc); |
4859 | } |
4860 | |
4861 | if (flag_aggressive_loop_optimizations) |
4862 | infer_loop_bounds_from_undefined (loop, bbs: body); |
4863 | free (ptr: body); |
4864 | |
4865 | discover_iteration_bound_by_body_walk (loop); |
4866 | |
4867 | maybe_lower_iteration_bound (loop); |
4868 | |
4869 | /* If we know the exact number of iterations of this loop, try to |
4870 | not break code with undefined behavior by not recording smaller |
4871 | maximum number of iterations. */ |
4872 | if (loop->nb_iterations |
4873 | && TREE_CODE (loop->nb_iterations) == INTEGER_CST |
4874 | && (wi::min_precision (x: wi::to_widest (t: loop->nb_iterations), sgn: SIGNED) |
4875 | <= bound_wide_int ().get_precision ())) |
4876 | { |
4877 | loop->any_upper_bound = true; |
4878 | loop->nb_iterations_upper_bound |
4879 | = bound_wide_int::from (x: wi::to_widest (t: loop->nb_iterations), sgn: SIGNED); |
4880 | } |
4881 | } |
4882 | |
4883 | /* Sets NIT to the estimated number of executions of the latch of the |
4884 | LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as |
4885 | large as the number of iterations. If we have no reliable estimate, |
4886 | the function returns false, otherwise returns true. */ |
4887 | |
4888 | bool |
4889 | estimated_loop_iterations (class loop *loop, widest_int *nit) |
4890 | { |
4891 | /* When SCEV information is available, try to update loop iterations |
4892 | estimate. Otherwise just return whatever we recorded earlier. */ |
4893 | if (scev_initialized_p ()) |
4894 | estimate_numbers_of_iterations (loop); |
4895 | |
4896 | return (get_estimated_loop_iterations (loop, nit)); |
4897 | } |
4898 | |
4899 | /* Similar to estimated_loop_iterations, but returns the estimate only |
4900 | if it fits to HOST_WIDE_INT. If this is not the case, or the estimate |
4901 | on the number of iterations of LOOP could not be derived, returns -1. */ |
4902 | |
4903 | HOST_WIDE_INT |
4904 | estimated_loop_iterations_int (class loop *loop) |
4905 | { |
4906 | widest_int nit; |
4907 | HOST_WIDE_INT hwi_nit; |
4908 | |
4909 | if (!estimated_loop_iterations (loop, nit: &nit)) |
4910 | return -1; |
4911 | |
4912 | if (!wi::fits_shwi_p (x: nit)) |
4913 | return -1; |
4914 | hwi_nit = nit.to_shwi (); |
4915 | |
4916 | return hwi_nit < 0 ? -1 : hwi_nit; |
4917 | } |
4918 | |
4919 | |
4920 | /* Sets NIT to an upper bound for the maximum number of executions of the |
4921 | latch of the LOOP. If we have no reliable estimate, the function returns |
4922 | false, otherwise returns true. */ |
4923 | |
4924 | bool |
4925 | max_loop_iterations (class loop *loop, widest_int *nit) |
4926 | { |
4927 | /* When SCEV information is available, try to update loop iterations |
4928 | estimate. Otherwise just return whatever we recorded earlier. */ |
4929 | if (scev_initialized_p ()) |
4930 | estimate_numbers_of_iterations (loop); |
4931 | |
4932 | return get_max_loop_iterations (loop, nit); |
4933 | } |
4934 | |
4935 | /* Similar to max_loop_iterations, but returns the estimate only |
4936 | if it fits to HOST_WIDE_INT. If this is not the case, or the estimate |
4937 | on the number of iterations of LOOP could not be derived, returns -1. */ |
4938 | |
4939 | HOST_WIDE_INT |
4940 | max_loop_iterations_int (class loop *loop) |
4941 | { |
4942 | widest_int nit; |
4943 | HOST_WIDE_INT hwi_nit; |
4944 | |
4945 | if (!max_loop_iterations (loop, nit: &nit)) |
4946 | return -1; |
4947 | |
4948 | if (!wi::fits_shwi_p (x: nit)) |
4949 | return -1; |
4950 | hwi_nit = nit.to_shwi (); |
4951 | |
4952 | return hwi_nit < 0 ? -1 : hwi_nit; |
4953 | } |
4954 | |
4955 | /* Sets NIT to an likely upper bound for the maximum number of executions of the |
4956 | latch of the LOOP. If we have no reliable estimate, the function returns |
4957 | false, otherwise returns true. */ |
4958 | |
4959 | bool |
4960 | likely_max_loop_iterations (class loop *loop, widest_int *nit) |
4961 | { |
4962 | /* When SCEV information is available, try to update loop iterations |
4963 | estimate. Otherwise just return whatever we recorded earlier. */ |
4964 | if (scev_initialized_p ()) |
4965 | estimate_numbers_of_iterations (loop); |
4966 | |
4967 | return get_likely_max_loop_iterations (loop, nit); |
4968 | } |
4969 | |
4970 | /* Similar to max_loop_iterations, but returns the estimate only |
4971 | if it fits to HOST_WIDE_INT. If this is not the case, or the estimate |
4972 | on the number of iterations of LOOP could not be derived, returns -1. */ |
4973 | |
4974 | HOST_WIDE_INT |
4975 | likely_max_loop_iterations_int (class loop *loop) |
4976 | { |
4977 | widest_int nit; |
4978 | HOST_WIDE_INT hwi_nit; |
4979 | |
4980 | if (!likely_max_loop_iterations (loop, nit: &nit)) |
4981 | return -1; |
4982 | |
4983 | if (!wi::fits_shwi_p (x: nit)) |
4984 | return -1; |
4985 | hwi_nit = nit.to_shwi (); |
4986 | |
4987 | return hwi_nit < 0 ? -1 : hwi_nit; |
4988 | } |
4989 | |
4990 | /* Returns an estimate for the number of executions of statements |
4991 | in the LOOP. For statements before the loop exit, this exceeds |
4992 | the number of execution of the latch by one. */ |
4993 | |
4994 | HOST_WIDE_INT |
4995 | estimated_stmt_executions_int (class loop *loop) |
4996 | { |
4997 | HOST_WIDE_INT nit = estimated_loop_iterations_int (loop); |
4998 | HOST_WIDE_INT snit; |
4999 | |
5000 | if (nit == -1) |
5001 | return -1; |
5002 | |
5003 | snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1); |
5004 | |
5005 | /* If the computation overflows, return -1. */ |
5006 | return snit < 0 ? -1 : snit; |
5007 | } |
5008 | |
5009 | /* Sets NIT to the maximum number of executions of the latch of the |
5010 | LOOP, plus one. If we have no reliable estimate, the function returns |
5011 | false, otherwise returns true. */ |
5012 | |
5013 | bool |
5014 | max_stmt_executions (class loop *loop, widest_int *nit) |
5015 | { |
5016 | widest_int nit_minus_one; |
5017 | |
5018 | if (!max_loop_iterations (loop, nit)) |
5019 | return false; |
5020 | |
5021 | nit_minus_one = *nit; |
5022 | |
5023 | *nit += 1; |
5024 | |
5025 | return wi::gtu_p (x: *nit, y: nit_minus_one); |
5026 | } |
5027 | |
5028 | /* Sets NIT to the estimated maximum number of executions of the latch of the |
5029 | LOOP, plus one. If we have no likely estimate, the function returns |
5030 | false, otherwise returns true. */ |
5031 | |
5032 | bool |
5033 | likely_max_stmt_executions (class loop *loop, widest_int *nit) |
5034 | { |
5035 | widest_int nit_minus_one; |
5036 | |
5037 | if (!likely_max_loop_iterations (loop, nit)) |
5038 | return false; |
5039 | |
5040 | nit_minus_one = *nit; |
5041 | |
5042 | *nit += 1; |
5043 | |
5044 | return wi::gtu_p (x: *nit, y: nit_minus_one); |
5045 | } |
5046 | |
5047 | /* Sets NIT to the estimated number of executions of the latch of the |
5048 | LOOP, plus one. If we have no reliable estimate, the function returns |
5049 | false, otherwise returns true. */ |
5050 | |
5051 | bool |
5052 | estimated_stmt_executions (class loop *loop, widest_int *nit) |
5053 | { |
5054 | widest_int nit_minus_one; |
5055 | |
5056 | if (!estimated_loop_iterations (loop, nit)) |
5057 | return false; |
5058 | |
5059 | nit_minus_one = *nit; |
5060 | |
5061 | *nit += 1; |
5062 | |
5063 | return wi::gtu_p (x: *nit, y: nit_minus_one); |
5064 | } |
5065 | |
5066 | /* Records estimates on numbers of iterations of loops. */ |
5067 | |
5068 | void |
5069 | estimate_numbers_of_iterations (function *fn) |
5070 | { |
5071 | /* We don't want to issue signed overflow warnings while getting |
5072 | loop iteration estimates. */ |
5073 | fold_defer_overflow_warnings (); |
5074 | |
5075 | for (auto loop : loops_list (fn, 0)) |
5076 | estimate_numbers_of_iterations (loop); |
5077 | |
5078 | fold_undefer_and_ignore_overflow_warnings (); |
5079 | } |
5080 | |
5081 | /* Returns true if statement S1 dominates statement S2. */ |
5082 | |
5083 | bool |
5084 | stmt_dominates_stmt_p (gimple *s1, gimple *s2) |
5085 | { |
5086 | basic_block bb1 = gimple_bb (g: s1), bb2 = gimple_bb (g: s2); |
5087 | |
5088 | if (!bb1 |
5089 | || s1 == s2) |
5090 | return true; |
5091 | |
5092 | if (bb1 == bb2) |
5093 | { |
5094 | gimple_stmt_iterator bsi; |
5095 | |
5096 | if (gimple_code (g: s2) == GIMPLE_PHI) |
5097 | return false; |
5098 | |
5099 | if (gimple_code (g: s1) == GIMPLE_PHI) |
5100 | return true; |
5101 | |
5102 | for (bsi = gsi_start_bb (bb: bb1); gsi_stmt (i: bsi) != s2; gsi_next (i: &bsi)) |
5103 | if (gsi_stmt (i: bsi) == s1) |
5104 | return true; |
5105 | |
5106 | return false; |
5107 | } |
5108 | |
5109 | return dominated_by_p (CDI_DOMINATORS, bb2, bb1); |
5110 | } |
5111 | |
5112 | /* Returns true when we can prove that the number of executions of |
5113 | STMT in the loop is at most NITER, according to the bound on |
5114 | the number of executions of the statement NITER_BOUND->stmt recorded in |
5115 | NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT. |
5116 | |
5117 | ??? This code can become quite a CPU hog - we can have many bounds, |
5118 | and large basic block forcing stmt_dominates_stmt_p to be queried |
5119 | many times on a large basic blocks, so the whole thing is O(n^2) |
5120 | for scev_probably_wraps_p invocation (that can be done n times). |
5121 | |
5122 | It would make more sense (and give better answers) to remember BB |
5123 | bounds computed by discover_iteration_bound_by_body_walk. */ |
5124 | |
5125 | static bool |
5126 | n_of_executions_at_most (gimple *stmt, |
5127 | class nb_iter_bound *niter_bound, |
5128 | tree niter) |
5129 | { |
5130 | widest_int bound = widest_int::from (x: niter_bound->bound, sgn: SIGNED); |
5131 | tree nit_type = TREE_TYPE (niter), e; |
5132 | enum tree_code cmp; |
5133 | |
5134 | gcc_assert (TYPE_UNSIGNED (nit_type)); |
5135 | |
5136 | /* If the bound does not even fit into NIT_TYPE, it cannot tell us that |
5137 | the number of iterations is small. */ |
5138 | if (!wi::fits_to_tree_p (x: bound, type: nit_type)) |
5139 | return false; |
5140 | |
5141 | /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1 |
5142 | times. This means that: |
5143 | |
5144 | -- if NITER_BOUND->is_exit is true, then everything after |
5145 | it at most NITER_BOUND->bound times. |
5146 | |
5147 | -- If NITER_BOUND->is_exit is false, then if we can prove that when STMT |
5148 | is executed, then NITER_BOUND->stmt is executed as well in the same |
5149 | iteration then STMT is executed at most NITER_BOUND->bound + 1 times. |
5150 | |
5151 | If we can determine that NITER_BOUND->stmt is always executed |
5152 | after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times. |
5153 | We conclude that if both statements belong to the same |
5154 | basic block and STMT is before NITER_BOUND->stmt and there are no |
5155 | statements with side effects in between. */ |
5156 | |
5157 | if (niter_bound->is_exit) |
5158 | { |
5159 | if (stmt == niter_bound->stmt |
5160 | || !stmt_dominates_stmt_p (s1: niter_bound->stmt, s2: stmt)) |
5161 | return false; |
5162 | cmp = GE_EXPR; |
5163 | } |
5164 | else |
5165 | { |
5166 | if (!stmt_dominates_stmt_p (s1: niter_bound->stmt, s2: stmt)) |
5167 | { |
5168 | gimple_stmt_iterator bsi; |
5169 | if (gimple_bb (g: stmt) != gimple_bb (g: niter_bound->stmt) |
5170 | || gimple_code (g: stmt) == GIMPLE_PHI |
5171 | || gimple_code (g: niter_bound->stmt) == GIMPLE_PHI) |
5172 | return false; |
5173 | |
5174 | /* By stmt_dominates_stmt_p we already know that STMT appears |
5175 | before NITER_BOUND->STMT. Still need to test that the loop |
5176 | cannot be terinated by a side effect in between. */ |
5177 | for (bsi = gsi_for_stmt (stmt); gsi_stmt (i: bsi) != niter_bound->stmt; |
5178 | gsi_next (i: &bsi)) |
5179 | if (gimple_has_side_effects (gsi_stmt (i: bsi))) |
5180 | return false; |
5181 | bound += 1; |
5182 | if (bound == 0 |
5183 | || !wi::fits_to_tree_p (x: bound, type: nit_type)) |
5184 | return false; |
5185 | } |
5186 | cmp = GT_EXPR; |
5187 | } |
5188 | |
5189 | e = fold_binary (cmp, boolean_type_node, |
5190 | niter, wide_int_to_tree (nit_type, bound)); |
5191 | return e && integer_nonzerop (e); |
5192 | } |
5193 | |
5194 | /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */ |
5195 | |
5196 | bool |
5197 | nowrap_type_p (tree type) |
5198 | { |
5199 | if (ANY_INTEGRAL_TYPE_P (type) |
5200 | && TYPE_OVERFLOW_UNDEFINED (type)) |
5201 | return true; |
5202 | |
5203 | if (POINTER_TYPE_P (type)) |
5204 | return true; |
5205 | |
5206 | return false; |
5207 | } |
5208 | |
5209 | /* Return true if we can prove LOOP is exited before evolution of induction |
5210 | variable {BASE, STEP} overflows with respect to its type bound. */ |
5211 | |
5212 | static bool |
5213 | loop_exits_before_overflow (tree base, tree step, |
5214 | gimple *at_stmt, class loop *loop) |
5215 | { |
5216 | widest_int niter; |
5217 | struct control_iv *civ; |
5218 | class nb_iter_bound *bound; |
5219 | tree e, delta, step_abs, unsigned_base; |
5220 | tree type = TREE_TYPE (step); |
5221 | tree unsigned_type, valid_niter; |
5222 | |
5223 | /* Don't issue signed overflow warnings. */ |
5224 | fold_defer_overflow_warnings (); |
5225 | |
5226 | /* Compute the number of iterations before we reach the bound of the |
5227 | type, and verify that the loop is exited before this occurs. */ |
5228 | unsigned_type = unsigned_type_for (type); |
5229 | unsigned_base = fold_convert (unsigned_type, base); |
5230 | |
5231 | if (tree_int_cst_sign_bit (step)) |
5232 | { |
5233 | tree extreme = fold_convert (unsigned_type, |
5234 | lower_bound_in_type (type, type)); |
5235 | delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme); |
5236 | step_abs = fold_build1 (NEGATE_EXPR, unsigned_type, |
5237 | fold_convert (unsigned_type, step)); |
5238 | } |
5239 | else |
5240 | { |
5241 | tree extreme = fold_convert (unsigned_type, |
5242 | upper_bound_in_type (type, type)); |
5243 | delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base); |
5244 | step_abs = fold_convert (unsigned_type, step); |
5245 | } |
5246 | |
5247 | valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs); |
5248 | |
5249 | estimate_numbers_of_iterations (loop); |
5250 | |
5251 | if (max_loop_iterations (loop, nit: &niter) |
5252 | && wi::fits_to_tree_p (x: niter, TREE_TYPE (valid_niter)) |
5253 | && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter, |
5254 | wide_int_to_tree (TREE_TYPE (valid_niter), |
5255 | niter))) != NULL |
5256 | && integer_nonzerop (e)) |
5257 | { |
5258 | fold_undefer_and_ignore_overflow_warnings (); |
5259 | return true; |
5260 | } |
5261 | if (at_stmt) |
5262 | for (bound = loop->bounds; bound; bound = bound->next) |
5263 | { |
5264 | if (n_of_executions_at_most (stmt: at_stmt, niter_bound: bound, niter: valid_niter)) |
5265 | { |
5266 | fold_undefer_and_ignore_overflow_warnings (); |
5267 | return true; |
5268 | } |
5269 | } |
5270 | fold_undefer_and_ignore_overflow_warnings (); |
5271 | |
5272 | /* Try to prove loop is exited before {base, step} overflows with the |
5273 | help of analyzed loop control IV. This is done only for IVs with |
5274 | constant step because otherwise we don't have the information. */ |
5275 | if (TREE_CODE (step) == INTEGER_CST) |
5276 | { |
5277 | for (civ = loop->control_ivs; civ; civ = civ->next) |
5278 | { |
5279 | enum tree_code code; |
5280 | tree civ_type = TREE_TYPE (civ->step); |
5281 | |
5282 | /* Have to consider type difference because operand_equal_p ignores |
5283 | that for constants. */ |
5284 | if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type) |
5285 | || element_precision (type) != element_precision (civ_type)) |
5286 | continue; |
5287 | |
5288 | /* Only consider control IV with same step. */ |
5289 | if (!operand_equal_p (step, civ->step, flags: 0)) |
5290 | continue; |
5291 | |
5292 | /* Done proving if this is a no-overflow control IV. */ |
5293 | if (operand_equal_p (base, civ->base, flags: 0)) |
5294 | return true; |
5295 | |
5296 | /* Control IV is recorded after expanding simple operations, |
5297 | Here we expand base and compare it too. */ |
5298 | tree expanded_base = expand_simple_operations (expr: base); |
5299 | if (operand_equal_p (expanded_base, civ->base, flags: 0)) |
5300 | return true; |
5301 | |
5302 | /* If this is a before stepping control IV, in other words, we have |
5303 | |
5304 | {civ_base, step} = {base + step, step} |
5305 | |
5306 | Because civ {base + step, step} doesn't overflow during loop |
5307 | iterations, {base, step} will not overflow if we can prove the |
5308 | operation "base + step" does not overflow. Specifically, we try |
5309 | to prove below conditions are satisfied: |
5310 | |
5311 | base <= UPPER_BOUND (type) - step ;;step > 0 |
5312 | base >= LOWER_BOUND (type) - step ;;step < 0 |
5313 | |
5314 | by proving the reverse conditions are false using loop's initial |
5315 | condition. */ |
5316 | if (POINTER_TYPE_P (TREE_TYPE (base))) |
5317 | code = POINTER_PLUS_EXPR; |
5318 | else |
5319 | code = PLUS_EXPR; |
5320 | |
5321 | tree stepped = fold_build2 (code, TREE_TYPE (base), base, step); |
5322 | tree expanded_stepped = fold_build2 (code, TREE_TYPE (base), |
5323 | expanded_base, step); |
5324 | if (operand_equal_p (stepped, civ->base, flags: 0) |
5325 | || operand_equal_p (expanded_stepped, civ->base, flags: 0)) |
5326 | { |
5327 | tree extreme; |
5328 | |
5329 | if (tree_int_cst_sign_bit (step)) |
5330 | { |
5331 | code = LT_EXPR; |
5332 | extreme = lower_bound_in_type (type, type); |
5333 | } |
5334 | else |
5335 | { |
5336 | code = GT_EXPR; |
5337 | extreme = upper_bound_in_type (type, type); |
5338 | } |
5339 | extreme = fold_build2 (MINUS_EXPR, type, extreme, step); |
5340 | e = fold_build2 (code, boolean_type_node, base, extreme); |
5341 | e = simplify_using_initial_conditions (loop, expr: e); |
5342 | if (integer_zerop (e)) |
5343 | return true; |
5344 | } |
5345 | } |
5346 | } |
5347 | |
5348 | return false; |
5349 | } |
5350 | |
5351 | /* VAR is scev variable whose evolution part is constant STEP, this function |
5352 | proves that VAR can't overflow by using value range info. If VAR's value |
5353 | range is [MIN, MAX], it can be proven by: |
5354 | MAX + step doesn't overflow ; if step > 0 |
5355 | or |
5356 | MIN + step doesn't underflow ; if step < 0. |
5357 | |
5358 | We can only do this if var is computed in every loop iteration, i.e, var's |
5359 | definition has to dominate loop latch. Consider below example: |
5360 | |
5361 | { |
5362 | unsigned int i; |
5363 | |
5364 | <bb 3>: |
5365 | |
5366 | <bb 4>: |
5367 | # RANGE [0, 4294967294] NONZERO 65535 |
5368 | # i_21 = PHI <0(3), i_18(9)> |
5369 | if (i_21 != 0) |
5370 | goto <bb 6>; |
5371 | else |
5372 | goto <bb 8>; |
5373 | |
5374 | <bb 6>: |
5375 | # RANGE [0, 65533] NONZERO 65535 |
5376 | _6 = i_21 + 4294967295; |
5377 | # RANGE [0, 65533] NONZERO 65535 |
5378 | _7 = (long unsigned int) _6; |
5379 | # RANGE [0, 524264] NONZERO 524280 |
5380 | _8 = _7 * 8; |
5381 | # PT = nonlocal escaped |
5382 | _9 = a_14 + _8; |
5383 | *_9 = 0; |
5384 | |
5385 | <bb 8>: |
5386 | # RANGE [1, 65535] NONZERO 65535 |
5387 | i_18 = i_21 + 1; |
5388 | if (i_18 >= 65535) |
5389 | goto <bb 10>; |
5390 | else |
5391 | goto <bb 9>; |
5392 | |
5393 | <bb 9>: |
5394 | goto <bb 4>; |
5395 | |
5396 | <bb 10>: |
5397 | return; |
5398 | } |
5399 | |
5400 | VAR _6 doesn't overflow only with pre-condition (i_21 != 0), here we |
5401 | can't use _6 to prove no-overlfow for _7. In fact, var _7 takes value |
5402 | sequence (4294967295, 0, 1, ..., 65533) in loop life time, rather than |
5403 | (4294967295, 4294967296, ...). */ |
5404 | |
5405 | static bool |
5406 | scev_var_range_cant_overflow (tree var, tree step, class loop *loop) |
5407 | { |
5408 | tree type; |
5409 | wide_int minv, maxv, diff, step_wi; |
5410 | |
5411 | if (TREE_CODE (step) != INTEGER_CST || !INTEGRAL_TYPE_P (TREE_TYPE (var))) |
5412 | return false; |
5413 | |
5414 | /* Check if VAR evaluates in every loop iteration. It's not the case |
5415 | if VAR is default definition or does not dominate loop's latch. */ |
5416 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); |
5417 | if (!def_bb || !dominated_by_p (CDI_DOMINATORS, loop->latch, def_bb)) |
5418 | return false; |
5419 | |
5420 | Value_Range r (TREE_TYPE (var)); |
5421 | get_range_query (cfun)->range_of_expr (r, expr: var); |
5422 | if (r.varying_p () || r.undefined_p ()) |
5423 | return false; |
5424 | |
5425 | /* VAR is a scev whose evolution part is STEP and value range info |
5426 | is [MIN, MAX], we can prove its no-overflowness by conditions: |
5427 | |
5428 | type_MAX - MAX >= step ; if step > 0 |
5429 | MIN - type_MIN >= |step| ; if step < 0. |
5430 | |
5431 | Or VAR must take value outside of value range, which is not true. */ |
5432 | step_wi = wi::to_wide (t: step); |
5433 | type = TREE_TYPE (var); |
5434 | if (tree_int_cst_sign_bit (step)) |
5435 | { |
5436 | diff = r.lower_bound () - wi::to_wide (t: lower_bound_in_type (type, type)); |
5437 | step_wi = - step_wi; |
5438 | } |
5439 | else |
5440 | diff = wi::to_wide (t: upper_bound_in_type (type, type)) - r.upper_bound (); |
5441 | |
5442 | return (wi::geu_p (x: diff, y: step_wi)); |
5443 | } |
5444 | |
5445 | /* Return false only when the induction variable BASE + STEP * I is |
5446 | known to not overflow: i.e. when the number of iterations is small |
5447 | enough with respect to the step and initial condition in order to |
5448 | keep the evolution confined in TYPEs bounds. Return true when the |
5449 | iv is known to overflow or when the property is not computable. |
5450 | |
5451 | USE_OVERFLOW_SEMANTICS is true if this function should assume that |
5452 | the rules for overflow of the given language apply (e.g., that signed |
5453 | arithmetics in C does not overflow). |
5454 | |
5455 | If VAR is a ssa variable, this function also returns false if VAR can |
5456 | be proven not overflow with value range info. */ |
5457 | |
5458 | bool |
5459 | scev_probably_wraps_p (tree var, tree base, tree step, |
5460 | gimple *at_stmt, class loop *loop, |
5461 | bool use_overflow_semantics) |
5462 | { |
5463 | /* FIXME: We really need something like |
5464 | http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html. |
5465 | |
5466 | We used to test for the following situation that frequently appears |
5467 | during address arithmetics: |
5468 | |
5469 | D.1621_13 = (long unsigned intD.4) D.1620_12; |
5470 | D.1622_14 = D.1621_13 * 8; |
5471 | D.1623_15 = (doubleD.29 *) D.1622_14; |
5472 | |
5473 | And derived that the sequence corresponding to D_14 |
5474 | can be proved to not wrap because it is used for computing a |
5475 | memory access; however, this is not really the case -- for example, |
5476 | if D_12 = (unsigned char) [254,+,1], then D_14 has values |
5477 | 2032, 2040, 0, 8, ..., but the code is still legal. */ |
5478 | |
5479 | if (chrec_contains_undetermined (base) |
5480 | || chrec_contains_undetermined (step)) |
5481 | return true; |
5482 | |
5483 | if (integer_zerop (step)) |
5484 | return false; |
5485 | |
5486 | /* If we can use the fact that signed and pointer arithmetics does not |
5487 | wrap, we are done. */ |
5488 | if (use_overflow_semantics && nowrap_type_p (TREE_TYPE (base))) |
5489 | return false; |
5490 | |
5491 | /* To be able to use estimates on number of iterations of the loop, |
5492 | we must have an upper bound on the absolute value of the step. */ |
5493 | if (TREE_CODE (step) != INTEGER_CST) |
5494 | return true; |
5495 | |
5496 | /* Check if var can be proven not overflow with value range info. */ |
5497 | if (var && TREE_CODE (var) == SSA_NAME |
5498 | && scev_var_range_cant_overflow (var, step, loop)) |
5499 | return false; |
5500 | |
5501 | if (loop_exits_before_overflow (base, step, at_stmt, loop)) |
5502 | return false; |
5503 | |
5504 | /* At this point we still don't have a proof that the iv does not |
5505 | overflow: give up. */ |
5506 | return true; |
5507 | } |
5508 | |
5509 | /* Frees the information on upper bounds on numbers of iterations of LOOP. */ |
5510 | |
5511 | void |
5512 | free_numbers_of_iterations_estimates (class loop *loop) |
5513 | { |
5514 | struct control_iv *civ; |
5515 | class nb_iter_bound *bound; |
5516 | |
5517 | loop->nb_iterations = NULL; |
5518 | loop->estimate_state = EST_NOT_COMPUTED; |
5519 | for (bound = loop->bounds; bound;) |
5520 | { |
5521 | class nb_iter_bound *next = bound->next; |
5522 | ggc_free (bound); |
5523 | bound = next; |
5524 | } |
5525 | loop->bounds = NULL; |
5526 | |
5527 | for (civ = loop->control_ivs; civ;) |
5528 | { |
5529 | struct control_iv *next = civ->next; |
5530 | ggc_free (civ); |
5531 | civ = next; |
5532 | } |
5533 | loop->control_ivs = NULL; |
5534 | } |
5535 | |
5536 | /* Frees the information on upper bounds on numbers of iterations of loops. */ |
5537 | |
5538 | void |
5539 | free_numbers_of_iterations_estimates (function *fn) |
5540 | { |
5541 | for (auto loop : loops_list (fn, 0)) |
5542 | free_numbers_of_iterations_estimates (loop); |
5543 | } |
5544 | |
5545 | /* Substitute value VAL for ssa name NAME inside expressions held |
5546 | at LOOP. */ |
5547 | |
5548 | void |
5549 | substitute_in_loop_info (class loop *loop, tree name, tree val) |
5550 | { |
5551 | loop->nb_iterations = simplify_replace_tree (expr: loop->nb_iterations, old: name, new_tree: val); |
5552 | } |
5553 | |