1 | /* Loop versioning pass. |
2 | Copyright (C) 2018-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "gimple-iterator.h" |
27 | #include "tree-pass.h" |
28 | #include "gimplify-me.h" |
29 | #include "cfgloop.h" |
30 | #include "tree-ssa-loop.h" |
31 | #include "ssa.h" |
32 | #include "tree-scalar-evolution.h" |
33 | #include "tree-ssa-loop-ivopts.h" |
34 | #include "fold-const.h" |
35 | #include "tree-ssa-propagate.h" |
36 | #include "tree-inline.h" |
37 | #include "domwalk.h" |
38 | #include "tree-vectorizer.h" |
39 | #include "omp-general.h" |
40 | #include "predict.h" |
41 | #include "tree-into-ssa.h" |
42 | #include "gimple-range.h" |
43 | #include "tree-cfg.h" |
44 | |
45 | namespace { |
46 | |
47 | /* This pass looks for loops that could be simplified if certain loop |
48 | invariant conditions were true. It is effectively a form of loop |
49 | splitting in which the pass produces the split conditions itself, |
50 | instead of using ones that are already present in the IL. |
51 | |
52 | Versioning for when strides are 1 |
53 | --------------------------------- |
54 | |
55 | At the moment the only thing the pass looks for are memory references |
56 | like: |
57 | |
58 | for (auto i : ...) |
59 | ...x[i * stride]... |
60 | |
61 | It considers changing such loops to: |
62 | |
63 | if (stride == 1) |
64 | for (auto i : ...) [A] |
65 | ...x[i]... |
66 | else |
67 | for (auto i : ...) [B] |
68 | ...x[i * stride]... |
69 | |
70 | This can have several benefits: |
71 | |
72 | (1) [A] is often easier or cheaper to vectorize than [B]. |
73 | |
74 | (2) The scalar code in [A] is simpler than the scalar code in [B] |
75 | (if the loops cannot be vectorized or need an epilogue loop). |
76 | |
77 | (3) We might recognize [A] as a pattern, such as a memcpy or memset. |
78 | |
79 | (4) [A] has simpler address evolutions, which can help other passes |
80 | like loop interchange. |
81 | |
82 | The optimization is particularly useful for assumed-shape arrays in |
83 | Fortran, where the stride of the innermost dimension depends on the |
84 | array descriptor but is often equal to 1 in practice. For example: |
85 | |
86 | subroutine f1(x) |
87 | real :: x(:) |
88 | x(:) = 100 |
89 | end subroutine f1 |
90 | |
91 | generates the equivalent of: |
92 | |
93 | raw_stride = *x.dim[0].stride; |
94 | stride = raw_stride != 0 ? raw_stride : 1; |
95 | x_base = *x.data; |
96 | ... |
97 | tmp1 = stride * S; |
98 | tmp2 = tmp1 - stride; |
99 | *x_base[tmp2] = 1.0e+2; |
100 | |
101 | but in the common case that stride == 1, the last three statements |
102 | simplify to: |
103 | |
104 | tmp3 = S + -1; |
105 | *x_base[tmp3] = 1.0e+2; |
106 | |
107 | The optimization is in principle very simple. The difficult parts are: |
108 | |
109 | (a) deciding which parts of a general address calculation correspond |
110 | to the inner dimension of an array, since this usually isn't explicit |
111 | in the IL, and for C often isn't even explicit in the source code |
112 | |
113 | (b) estimating when the transformation is worthwhile |
114 | |
115 | Structure |
116 | --------- |
117 | |
118 | The pass has four phases: |
119 | |
120 | (1) Walk through the statements looking for and recording potential |
121 | versioning opportunities. Stop if there are none. |
122 | |
123 | (2) Use context-sensitive range information to see whether any versioning |
124 | conditions are impossible in practice. Remove them if so, and stop |
125 | if no opportunities remain. |
126 | |
127 | (We do this only after (1) to keep compile time down when no |
128 | versioning opportunities exist.) |
129 | |
130 | (3) Apply the cost model. Decide which versioning opportunities are |
131 | worthwhile and at which nesting level they should be applied. |
132 | |
133 | (4) Attempt to version all the loops selected by (3), so that: |
134 | |
135 | for (...) |
136 | ... |
137 | |
138 | becomes: |
139 | |
140 | if (!cond) |
141 | for (...) // Original loop |
142 | ... |
143 | else |
144 | for (...) // New loop |
145 | ... |
146 | |
147 | Use the version condition COND to simplify the new loop. */ |
148 | |
149 | /* Enumerates the likelihood that a particular value indexes the inner |
150 | dimension of an array. */ |
151 | enum inner_likelihood { |
152 | INNER_UNLIKELY, |
153 | INNER_DONT_KNOW, |
154 | INNER_LIKELY |
155 | }; |
156 | |
157 | /* Information about one term of an address_info. */ |
158 | struct address_term_info |
159 | { |
160 | /* The value of the term is EXPR * MULTIPLIER. */ |
161 | tree expr; |
162 | unsigned HOST_WIDE_INT multiplier; |
163 | |
164 | /* The stride applied by EXPR in each iteration of some unrecorded loop, |
165 | or null if no stride has been identified. */ |
166 | tree stride; |
167 | |
168 | /* Enumerates the likelihood that EXPR indexes the inner dimension |
169 | of an array. */ |
170 | enum inner_likelihood inner_likelihood; |
171 | |
172 | /* True if STRIDE == 1 is a versioning opportunity when considered |
173 | in isolation. */ |
174 | bool versioning_opportunity_p; |
175 | }; |
176 | |
177 | /* Information about an address calculation, and the range of constant |
178 | offsets applied to it. */ |
179 | class address_info |
180 | { |
181 | public: |
182 | static const unsigned int MAX_TERMS = 8; |
183 | |
184 | /* One statement that calculates the address. If multiple statements |
185 | share the same address, we only record the first. */ |
186 | gimple *stmt; |
187 | |
188 | /* The loop containing STMT (cached for convenience). If multiple |
189 | statements share the same address, they all belong to this loop. */ |
190 | class loop *loop; |
191 | |
192 | /* A decomposition of the calculation into a sum of terms plus an |
193 | optional base. When BASE is provided, it is never an SSA name. |
194 | Once initialization is complete, all members of TERMs are SSA names. */ |
195 | tree base; |
196 | auto_vec<address_term_info, MAX_TERMS> terms; |
197 | |
198 | /* All bytes accessed from the address fall in the offset range |
199 | [MIN_OFFSET, MAX_OFFSET). */ |
200 | HOST_WIDE_INT min_offset, max_offset; |
201 | }; |
202 | |
203 | /* Stores addresses based on their base and terms (ignoring the offsets). */ |
204 | struct address_info_hasher : nofree_ptr_hash <address_info> |
205 | { |
206 | static hashval_t hash (const address_info *); |
207 | static bool equal (const address_info *, const address_info *); |
208 | }; |
209 | |
210 | /* Information about the versioning we'd like to apply to a loop. */ |
211 | class loop_info |
212 | { |
213 | public: |
214 | bool worth_versioning_p () const; |
215 | |
216 | /* True if we've decided not to version this loop. The remaining |
217 | fields are meaningless if so. */ |
218 | bool rejected_p; |
219 | |
220 | /* True if at least one subloop of this loop benefits from versioning. */ |
221 | bool subloops_benefit_p; |
222 | |
223 | /* An estimate of the total number of instructions in the loop, |
224 | excluding those in subloops that benefit from versioning. */ |
225 | unsigned int num_insns; |
226 | |
227 | /* The outermost loop that can handle all the version checks |
228 | described below. */ |
229 | class loop *outermost; |
230 | |
231 | /* The first entry in the list of blocks that belong to this loop |
232 | (and not to subloops). m_next_block_in_loop provides the chain |
233 | pointers for the list. */ |
234 | basic_block block_list; |
235 | |
236 | /* We'd like to version the loop for the case in which these SSA names |
237 | (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime. */ |
238 | bitmap_head unity_names; |
239 | |
240 | /* If versioning succeeds, this points the version of the loop that |
241 | assumes the version conditions holds. */ |
242 | class loop *optimized_loop; |
243 | }; |
244 | |
245 | /* The main pass structure. */ |
246 | class loop_versioning |
247 | { |
248 | public: |
249 | loop_versioning (function *); |
250 | ~loop_versioning (); |
251 | unsigned int run (); |
252 | |
253 | private: |
254 | /* Used to walk the dominator tree to find loop versioning conditions |
255 | that are always false. */ |
256 | class lv_dom_walker : public dom_walker |
257 | { |
258 | public: |
259 | lv_dom_walker (loop_versioning &); |
260 | |
261 | edge before_dom_children (basic_block) final override; |
262 | |
263 | private: |
264 | /* The parent pass. */ |
265 | loop_versioning &m_lv; |
266 | }; |
267 | |
268 | /* Used to simplify statements based on conditions that are established |
269 | by the version checks. */ |
270 | class name_prop : public substitute_and_fold_engine |
271 | { |
272 | public: |
273 | name_prop (loop_info &li) : m_li (li) {} |
274 | tree value_of_expr (tree name, gimple *) final override; |
275 | |
276 | private: |
277 | /* Information about the versioning we've performed on the loop. */ |
278 | loop_info &m_li; |
279 | }; |
280 | |
281 | loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; } |
282 | |
283 | unsigned int max_insns_for_loop (class loop *); |
284 | bool expensive_stmt_p (gimple *); |
285 | |
286 | void version_for_unity (gimple *, tree); |
287 | bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT, |
288 | unsigned HOST_WIDE_INT * = 0); |
289 | bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *); |
290 | bool multiply_term_by (address_term_info &, tree); |
291 | inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT); |
292 | void dump_inner_likelihood (address_info &, address_term_info &); |
293 | void analyze_stride (address_info &, address_term_info &, |
294 | tree, class loop *); |
295 | bool find_per_loop_multiplication (address_info &, address_term_info &); |
296 | bool analyze_term_using_scevs (address_info &, address_term_info &); |
297 | void analyze_arbitrary_term (address_info &, address_term_info &); |
298 | void analyze_address_fragment (address_info &); |
299 | void record_address_fragment (gimple *, unsigned HOST_WIDE_INT, |
300 | tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT); |
301 | void analyze_expr (gimple *, tree); |
302 | bool analyze_block (basic_block); |
303 | bool analyze_blocks (); |
304 | |
305 | void prune_loop_conditions (class loop *); |
306 | bool prune_conditions (); |
307 | |
308 | void merge_loop_info (class loop *, class loop *); |
309 | void add_loop_to_queue (class loop *); |
310 | bool decide_whether_loop_is_versionable (class loop *); |
311 | bool make_versioning_decisions (); |
312 | |
313 | bool version_loop (class loop *); |
314 | void implement_versioning_decisions (); |
315 | |
316 | /* The function we're optimizing. */ |
317 | function *m_fn; |
318 | |
319 | /* The obstack to use for all pass-specific bitmaps. */ |
320 | bitmap_obstack m_bitmap_obstack; |
321 | |
322 | /* An obstack to use for general allocation. */ |
323 | obstack m_obstack; |
324 | |
325 | /* The number of loops in the function. */ |
326 | unsigned int m_nloops; |
327 | |
328 | /* The total number of loop version conditions we've found. */ |
329 | unsigned int m_num_conditions; |
330 | |
331 | /* Assume that an address fragment of the form i * stride * scale |
332 | (for variable stride and constant scale) will not benefit from |
333 | versioning for stride == 1 when scale is greater than this value. */ |
334 | unsigned HOST_WIDE_INT m_maximum_scale; |
335 | |
336 | /* Information about each loop. */ |
337 | auto_vec<loop_info> m_loops; |
338 | |
339 | /* Used to form a linked list of blocks that belong to a loop, |
340 | started by loop_info::block_list. */ |
341 | auto_vec<basic_block> m_next_block_in_loop; |
342 | |
343 | /* The list of loops that we've decided to version. */ |
344 | auto_vec<class loop *> m_loops_to_version; |
345 | |
346 | /* A table of addresses in the current loop, keyed off their values |
347 | but not their offsets. */ |
348 | hash_table <address_info_hasher> m_address_table; |
349 | |
350 | /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order. */ |
351 | auto_vec <address_info *, 32> m_address_list; |
352 | }; |
353 | |
354 | /* If EXPR is an SSA name and not a default definition, return the |
355 | defining statement, otherwise return null. */ |
356 | |
357 | static gimple * |
358 | maybe_get_stmt (tree expr) |
359 | { |
360 | if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr)) |
361 | return SSA_NAME_DEF_STMT (expr); |
362 | return NULL; |
363 | } |
364 | |
365 | /* Like maybe_get_stmt, but also return null if the defining |
366 | statement isn't an assignment. */ |
367 | |
368 | static gassign * |
369 | maybe_get_assign (tree expr) |
370 | { |
371 | return safe_dyn_cast <gassign *> (p: maybe_get_stmt (expr)); |
372 | } |
373 | |
374 | /* Return true if this pass should look through a cast of expression FROM |
375 | to type TYPE when analyzing pieces of an address. */ |
376 | |
377 | static bool |
378 | look_through_cast_p (tree type, tree from) |
379 | { |
380 | return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type) |
381 | && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type)); |
382 | } |
383 | |
384 | /* Strip all conversions of integers or pointers from EXPR, regardless |
385 | of whether the conversions are nops. This is useful in the context |
386 | of this pass because we're not trying to fold or simulate the |
387 | expression; we just want to see how it's structured. */ |
388 | |
389 | static tree |
390 | strip_casts (tree expr) |
391 | { |
392 | const unsigned int MAX_NITERS = 4; |
393 | |
394 | tree type = TREE_TYPE (expr); |
395 | while (CONVERT_EXPR_P (expr) |
396 | && look_through_cast_p (type, TREE_OPERAND (expr, 0))) |
397 | expr = TREE_OPERAND (expr, 0); |
398 | |
399 | for (unsigned int niters = 0; niters < MAX_NITERS; ++niters) |
400 | { |
401 | gassign *assign = maybe_get_assign (expr); |
402 | if (assign |
403 | && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)) |
404 | && look_through_cast_p (type, from: gimple_assign_rhs1 (gs: assign))) |
405 | expr = gimple_assign_rhs1 (gs: assign); |
406 | else |
407 | break; |
408 | } |
409 | return expr; |
410 | } |
411 | |
412 | /* Compare two address_term_infos in the same address_info. */ |
413 | |
414 | static int |
415 | compare_address_terms (const void *a_uncast, const void *b_uncast) |
416 | { |
417 | const address_term_info *a = (const address_term_info *) a_uncast; |
418 | const address_term_info *b = (const address_term_info *) b_uncast; |
419 | |
420 | if (a->expr != b->expr) |
421 | return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1; |
422 | |
423 | if (a->multiplier != b->multiplier) |
424 | return a->multiplier < b->multiplier ? -1 : 1; |
425 | |
426 | return 0; |
427 | } |
428 | |
429 | /* Dump ADDRESS using flags FLAGS. */ |
430 | |
431 | static void |
432 | dump_address_info (dump_flags_t flags, address_info &address) |
433 | { |
434 | if (address.base) |
435 | dump_printf (flags, "%T + " , address.base); |
436 | for (unsigned int i = 0; i < address.terms.length (); ++i) |
437 | { |
438 | if (i != 0) |
439 | dump_printf (flags, " + " ); |
440 | dump_printf (flags, "%T" , address.terms[i].expr); |
441 | if (address.terms[i].multiplier != 1) |
442 | dump_printf (flags, " * %wd" , address.terms[i].multiplier); |
443 | } |
444 | dump_printf (flags, " + [%wd, %wd]" , |
445 | address.min_offset, address.max_offset - 1); |
446 | } |
447 | |
448 | /* Hash an address_info based on its base and terms. */ |
449 | |
450 | hashval_t |
451 | address_info_hasher::hash (const address_info *info) |
452 | { |
453 | inchash::hash hash; |
454 | hash.add_int (v: info->base ? TREE_CODE (info->base) : 0); |
455 | hash.add_int (v: info->terms.length ()); |
456 | for (unsigned int i = 0; i < info->terms.length (); ++i) |
457 | { |
458 | hash.add_int (SSA_NAME_VERSION (info->terms[i].expr)); |
459 | hash.add_hwi (v: info->terms[i].multiplier); |
460 | } |
461 | return hash.end (); |
462 | } |
463 | |
464 | /* Return true if two address_infos have equal bases and terms. Other |
465 | properties might be different (such as the statement or constant |
466 | offset range). */ |
467 | |
468 | bool |
469 | address_info_hasher::equal (const address_info *a, const address_info *b) |
470 | { |
471 | if (a->base != b->base |
472 | && (!a->base || !b->base || !operand_equal_p (a->base, b->base, flags: 0))) |
473 | return false; |
474 | |
475 | if (a->terms.length () != b->terms.length ()) |
476 | return false; |
477 | |
478 | for (unsigned int i = 0; i < a->terms.length (); ++i) |
479 | if (a->terms[i].expr != b->terms[i].expr |
480 | || a->terms[i].multiplier != b->terms[i].multiplier) |
481 | return false; |
482 | |
483 | return true; |
484 | } |
485 | |
486 | /* Return true if we want to version the loop, i.e. if we have a |
487 | specific reason for doing so and no specific reason not to. */ |
488 | |
489 | bool |
490 | loop_info::worth_versioning_p () const |
491 | { |
492 | return (!rejected_p |
493 | && (!bitmap_empty_p (map: &unity_names) || subloops_benefit_p)); |
494 | } |
495 | |
496 | loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv) |
497 | : dom_walker (CDI_DOMINATORS), m_lv (lv) |
498 | { |
499 | } |
500 | |
501 | /* Process BB before processing the blocks it dominates. */ |
502 | |
503 | edge |
504 | loop_versioning::lv_dom_walker::before_dom_children (basic_block bb) |
505 | { |
506 | if (bb == bb->loop_father->header) |
507 | m_lv.prune_loop_conditions (bb->loop_father); |
508 | |
509 | return NULL; |
510 | } |
511 | |
512 | /* Decide whether to replace VAL with a new value in a versioned loop. |
513 | Return the new value if so, otherwise return null. */ |
514 | |
515 | tree |
516 | loop_versioning::name_prop::value_of_expr (tree val, gimple *) |
517 | { |
518 | if (TREE_CODE (val) == SSA_NAME |
519 | && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val))) |
520 | return build_one_cst (TREE_TYPE (val)); |
521 | return NULL_TREE; |
522 | } |
523 | |
524 | /* Initialize the structure to optimize FN. */ |
525 | |
526 | loop_versioning::loop_versioning (function *fn) |
527 | : m_fn (fn), |
528 | m_nloops (number_of_loops (fn)), |
529 | m_num_conditions (0), |
530 | m_address_table (31) |
531 | { |
532 | bitmap_obstack_initialize (&m_bitmap_obstack); |
533 | gcc_obstack_init (&m_obstack); |
534 | |
535 | /* Initialize the loop information. */ |
536 | m_loops.safe_grow_cleared (len: m_nloops, exact: true); |
537 | for (unsigned int i = 0; i < m_nloops; ++i) |
538 | { |
539 | m_loops[i].outermost = get_loop (fn: m_fn, num: 0); |
540 | bitmap_initialize (head: &m_loops[i].unity_names, obstack: &m_bitmap_obstack); |
541 | } |
542 | |
543 | /* Initialize the list of blocks that belong to each loop. */ |
544 | unsigned int nbbs = last_basic_block_for_fn (fn); |
545 | m_next_block_in_loop.safe_grow (len: nbbs, exact: true); |
546 | basic_block bb; |
547 | FOR_EACH_BB_FN (bb, fn) |
548 | { |
549 | loop_info &li = get_loop_info (loop: bb->loop_father); |
550 | m_next_block_in_loop[bb->index] = li.block_list; |
551 | li.block_list = bb; |
552 | } |
553 | |
554 | /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for |
555 | unvectorizable code, since it is the largest size that can be |
556 | handled efficiently by scalar code. omp_max_vf calculates the |
557 | maximum number of bytes in a vector, when such a value is relevant |
558 | to loop optimization. */ |
559 | m_maximum_scale = estimated_poly_value (x: omp_max_vf ()); |
560 | m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); |
561 | } |
562 | |
563 | loop_versioning::~loop_versioning () |
564 | { |
565 | bitmap_obstack_release (&m_bitmap_obstack); |
566 | obstack_free (&m_obstack, NULL); |
567 | } |
568 | |
569 | /* Return the maximum number of instructions allowed in LOOP before |
570 | it becomes too big for versioning. |
571 | |
572 | There are separate limits for inner and outer loops. The limit for |
573 | inner loops applies only to loops that benefit directly from versioning. |
574 | The limit for outer loops applies to all code in the outer loop and |
575 | its subloops that *doesn't* benefit directly from versioning; such code |
576 | would be "taken along for the ride". The idea is that if the cost of |
577 | the latter is small, it is better to version outer loops rather than |
578 | inner loops, both to reduce the number of repeated checks and to enable |
579 | more of the loop nest to be optimized as a natural nest (e.g. by loop |
580 | interchange or outer-loop vectorization). */ |
581 | |
582 | unsigned int |
583 | loop_versioning::max_insns_for_loop (class loop *loop) |
584 | { |
585 | return (loop->inner |
586 | ? param_loop_versioning_max_outer_insns |
587 | : param_loop_versioning_max_inner_insns); |
588 | } |
589 | |
590 | /* Return true if for cost reasons we should avoid versioning any loop |
591 | that contains STMT. |
592 | |
593 | Note that we don't need to check whether versioning is invalid for |
594 | correctness reasons, since the versioning process does that for us. |
595 | The conditions involved are too rare to be worth duplicating here. */ |
596 | |
597 | bool |
598 | loop_versioning::expensive_stmt_p (gimple *stmt) |
599 | { |
600 | if (gcall *call = dyn_cast <gcall *> (p: stmt)) |
601 | /* Assume for now that the time spent in an "expensive" call would |
602 | overwhelm any saving from versioning. */ |
603 | return !gimple_inexpensive_call_p (call); |
604 | return false; |
605 | } |
606 | |
607 | /* Record that we want to version the loop that contains STMT for the |
608 | case in which SSA name NAME is equal to 1. We already know that NAME |
609 | is invariant in the loop. */ |
610 | |
611 | void |
612 | loop_versioning::version_for_unity (gimple *stmt, tree name) |
613 | { |
614 | class loop *loop = loop_containing_stmt (stmt); |
615 | loop_info &li = get_loop_info (loop); |
616 | |
617 | if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name))) |
618 | { |
619 | /* This is the first time we've wanted to version LOOP for NAME. |
620 | Keep track of the outermost loop that can handle all versioning |
621 | checks in LI. */ |
622 | class loop *outermost |
623 | = outermost_invariant_loop_for_expr (loop, name); |
624 | if (loop_depth (loop: li.outermost) < loop_depth (loop: outermost)) |
625 | li.outermost = outermost; |
626 | |
627 | if (dump_enabled_p ()) |
628 | { |
629 | dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop" |
630 | " for when %T == 1" , name); |
631 | if (outermost == loop) |
632 | dump_printf (MSG_NOTE, "; cannot hoist check further" ); |
633 | else |
634 | { |
635 | dump_printf (MSG_NOTE, "; could implement the check at loop" |
636 | " depth %d" , loop_depth (loop: outermost)); |
637 | if (loop_depth (loop: li.outermost) > loop_depth (loop: outermost)) |
638 | dump_printf (MSG_NOTE, ", but other checks only allow" |
639 | " a depth of %d" , loop_depth (loop: li.outermost)); |
640 | } |
641 | dump_printf (MSG_NOTE, "\n" ); |
642 | } |
643 | |
644 | m_num_conditions += 1; |
645 | } |
646 | else |
647 | { |
648 | /* This is a duplicate request. */ |
649 | if (dump_enabled_p ()) |
650 | dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing" |
651 | " loop for when %T == 1\n" , name); |
652 | } |
653 | } |
654 | |
655 | /* Return true if OP1_TREE is constant and if in principle it is worth |
656 | versioning an address fragment of the form: |
657 | |
658 | i * OP1_TREE * OP2 * stride |
659 | |
660 | for the case in which stride == 1. This in practice means testing |
661 | whether: |
662 | |
663 | OP1_TREE * OP2 <= M_MAXIMUM_SCALE. |
664 | |
665 | If RESULT is nonnull, store OP1_TREE * OP2 there when returning true. */ |
666 | |
667 | bool |
668 | loop_versioning::acceptable_multiplier_p (tree op1_tree, |
669 | unsigned HOST_WIDE_INT op2, |
670 | unsigned HOST_WIDE_INT *result) |
671 | { |
672 | if (tree_fits_uhwi_p (op1_tree)) |
673 | { |
674 | unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree); |
675 | /* The first part checks for overflow. */ |
676 | if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale) |
677 | { |
678 | if (result) |
679 | *result = op1 * op2; |
680 | return true; |
681 | } |
682 | } |
683 | return false; |
684 | } |
685 | |
686 | /* Return true if it is worth using loop versioning on a memory access |
687 | of type TYPE. Store the size of the access in *SIZE if so. */ |
688 | |
689 | bool |
690 | loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size) |
691 | { |
692 | return (TYPE_SIZE_UNIT (type) |
693 | && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), op2: 1, result: size)); |
694 | } |
695 | |
696 | /* See whether OP is constant and whether we can multiply TERM by that |
697 | constant without exceeding M_MAXIMUM_SCALE. Return true and update |
698 | TERM if so. */ |
699 | |
700 | bool |
701 | loop_versioning::multiply_term_by (address_term_info &term, tree op) |
702 | { |
703 | return acceptable_multiplier_p (op1_tree: op, op2: term.multiplier, result: &term.multiplier); |
704 | } |
705 | |
706 | /* Decide whether an address fragment of the form STRIDE * MULTIPLIER |
707 | is likely to be indexing an innermost dimension, returning the result |
708 | as an INNER_* probability. */ |
709 | |
710 | inner_likelihood |
711 | loop_versioning::get_inner_likelihood (tree stride, |
712 | unsigned HOST_WIDE_INT multiplier) |
713 | { |
714 | const unsigned int MAX_NITERS = 8; |
715 | |
716 | /* Iterate over possible values of STRIDE. Return INNER_LIKELY if at |
717 | least one of those values is likely to be for the innermost dimension. |
718 | Record in UNLIKELY_P if at least one of those values is unlikely to be |
719 | for the innermost dimension. |
720 | |
721 | E.g. for: |
722 | |
723 | stride = cond ? a * b : 1 |
724 | |
725 | we should treat STRIDE as being a likely inner dimension, since |
726 | we know that it is 1 under at least some circumstances. (See the |
727 | Fortran example below.) However: |
728 | |
729 | stride = a * b |
730 | |
731 | on its own is unlikely to be for the innermost dimension, since |
732 | that would require both a and b to be 1 at runtime. */ |
733 | bool unlikely_p = false; |
734 | tree worklist[MAX_NITERS]; |
735 | unsigned int length = 0; |
736 | worklist[length++] = stride; |
737 | for (unsigned int i = 0; i < length; ++i) |
738 | { |
739 | tree expr = worklist[i]; |
740 | |
741 | if (CONSTANT_CLASS_P (expr)) |
742 | { |
743 | /* See if EXPR * MULTIPLIER would be consistent with an individual |
744 | access or a small grouped access. */ |
745 | if (acceptable_multiplier_p (op1_tree: expr, op2: multiplier)) |
746 | return INNER_LIKELY; |
747 | else |
748 | unlikely_p = true; |
749 | } |
750 | else if (gimple *stmt = maybe_get_stmt (expr)) |
751 | { |
752 | /* If EXPR is set by a PHI node, queue its arguments in case |
753 | we find one that is consistent with an inner dimension. |
754 | |
755 | An important instance of this is the Fortran handling of array |
756 | descriptors, which calculates the stride of the inner dimension |
757 | using a PHI equivalent of: |
758 | |
759 | raw_stride = a.dim[0].stride; |
760 | stride = raw_stride != 0 ? raw_stride : 1; |
761 | |
762 | (Strides for outer dimensions do not treat 0 specially.) */ |
763 | if (gphi *phi = dyn_cast <gphi *> (p: stmt)) |
764 | { |
765 | unsigned int nargs = gimple_phi_num_args (gs: phi); |
766 | for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j) |
767 | worklist[length++] = strip_casts (expr: gimple_phi_arg_def (gs: phi, index: j)); |
768 | } |
769 | /* If the value is set by an assignment, expect it to be read |
770 | from memory (such as an array descriptor) rather than be |
771 | calculated. */ |
772 | else if (gassign *assign = dyn_cast <gassign *> (p: stmt)) |
773 | { |
774 | if (!gimple_assign_load_p (assign)) |
775 | unlikely_p = true; |
776 | } |
777 | /* Things like calls don't really tell us anything. */ |
778 | } |
779 | } |
780 | |
781 | /* We didn't find any possible values of STRIDE that were likely to be |
782 | for the innermost dimension. If we found one that was actively |
783 | unlikely to be for the innermost dimension, assume that that applies |
784 | to STRIDE too. */ |
785 | return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW; |
786 | } |
787 | |
788 | /* Dump the likelihood that TERM's stride is for the innermost dimension. |
789 | ADDRESS is the address that contains TERM. */ |
790 | |
791 | void |
792 | loop_versioning::dump_inner_likelihood (address_info &address, |
793 | address_term_info &term) |
794 | { |
795 | if (term.inner_likelihood == INNER_LIKELY) |
796 | dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the" |
797 | " innermost dimension\n" , term.stride); |
798 | else if (term.inner_likelihood == INNER_UNLIKELY) |
799 | dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the" |
800 | " innermost dimension\n" , term.stride); |
801 | else |
802 | dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T" |
803 | " is the innermost dimension\n" , term.stride); |
804 | } |
805 | |
806 | /* The caller has identified that STRIDE is the stride of interest |
807 | in TERM, and that the stride is applied in OP_LOOP. Record this |
808 | information in TERM, deciding whether STRIDE is likely to be for |
809 | the innermost dimension of an array and whether it represents a |
810 | versioning opportunity. ADDRESS is the address that contains TERM. */ |
811 | |
812 | void |
813 | loop_versioning::analyze_stride (address_info &address, |
814 | address_term_info &term, |
815 | tree stride, class loop *op_loop) |
816 | { |
817 | term.stride = stride; |
818 | |
819 | term.inner_likelihood = get_inner_likelihood (stride, multiplier: term.multiplier); |
820 | if (dump_enabled_p ()) |
821 | dump_inner_likelihood (address, term); |
822 | |
823 | /* To be a versioning opportunity we require: |
824 | |
825 | - The multiplier applied by TERM is equal to the access size, |
826 | so that when STRIDE is 1, the accesses in successive loop |
827 | iterations are consecutive. |
828 | |
829 | This is deliberately conservative. We could relax it to handle |
830 | other cases (such as those with gaps between iterations) if we |
831 | find any real testcases for which it's useful. |
832 | |
833 | - the stride is applied in the same loop as STMT rather than |
834 | in an outer loop. Although versioning for strides applied in |
835 | outer loops could help in some cases -- such as enabling |
836 | more loop interchange -- the savings are much lower than for |
837 | inner loops. |
838 | |
839 | - the stride is an SSA name that is invariant in STMT's loop, |
840 | since otherwise versioning isn't possible. */ |
841 | unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset; |
842 | if (term.multiplier == access_size |
843 | && address.loop == op_loop |
844 | && TREE_CODE (stride) == SSA_NAME |
845 | && expr_invariant_in_loop_p (address.loop, stride)) |
846 | { |
847 | term.versioning_opportunity_p = true; |
848 | if (dump_enabled_p ()) |
849 | dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning" |
850 | " opportunity\n" , stride); |
851 | } |
852 | } |
853 | |
854 | /* See whether address term TERM (which belongs to ADDRESS) is the result |
855 | of multiplying a varying SSA name by a loop-invariant SSA name. |
856 | Return true and update TERM if so. |
857 | |
858 | This handles both cases that SCEV might handle, such as: |
859 | |
860 | for (int i = 0; i < n; ++i) |
861 | res += a[i * stride]; |
862 | |
863 | and ones in which the term varies arbitrarily between iterations, such as: |
864 | |
865 | for (int i = 0; i < n; ++i) |
866 | res += a[index[i] * stride]; */ |
867 | |
868 | bool |
869 | loop_versioning::find_per_loop_multiplication (address_info &address, |
870 | address_term_info &term) |
871 | { |
872 | gassign *mult = maybe_get_assign (expr: term.expr); |
873 | if (!mult || gimple_assign_rhs_code (gs: mult) != MULT_EXPR) |
874 | return false; |
875 | |
876 | class loop *mult_loop = loop_containing_stmt (stmt: mult); |
877 | if (!loop_outer (loop: mult_loop)) |
878 | return false; |
879 | |
880 | tree op1 = strip_casts (expr: gimple_assign_rhs1 (gs: mult)); |
881 | tree op2 = strip_casts (expr: gimple_assign_rhs2 (gs: mult)); |
882 | if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) |
883 | return false; |
884 | |
885 | bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1); |
886 | bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2); |
887 | if (invariant1_p == invariant2_p) |
888 | return false; |
889 | |
890 | /* Make sure that the loop invariant is OP2 rather than OP1. */ |
891 | if (invariant1_p) |
892 | std::swap (a&: op1, b&: op2); |
893 | |
894 | if (dump_enabled_p ()) |
895 | dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T" |
896 | " * loop-invariant %T\n" , term.expr, op1, op2); |
897 | analyze_stride (address, term, stride: op2, op_loop: mult_loop); |
898 | return true; |
899 | } |
900 | |
901 | /* Try to use scalar evolutions to find an address stride for TERM, |
902 | which belongs to ADDRESS. Return true and update TERM if so. |
903 | |
904 | Here we are interested in any evolution information we can find, |
905 | not just evolutions wrt ADDRESS->LOOP. For example, if we find that |
906 | an outer loop obviously iterates over the inner dimension of an array, |
907 | that information can help us eliminate worthless versioning opportunities |
908 | in inner loops. */ |
909 | |
910 | bool |
911 | loop_versioning::analyze_term_using_scevs (address_info &address, |
912 | address_term_info &term) |
913 | { |
914 | gimple *setter = maybe_get_stmt (expr: term.expr); |
915 | if (!setter) |
916 | return false; |
917 | |
918 | class loop *wrt_loop = loop_containing_stmt (stmt: setter); |
919 | if (!loop_outer (loop: wrt_loop)) |
920 | return false; |
921 | |
922 | tree chrec = strip_casts (expr: analyze_scalar_evolution (wrt_loop, term.expr)); |
923 | if (TREE_CODE (chrec) == POLYNOMIAL_CHREC) |
924 | { |
925 | if (dump_enabled_p ()) |
926 | dump_printf_loc (MSG_NOTE, address.stmt, |
927 | "address term %T = %T\n" , term.expr, chrec); |
928 | |
929 | /* Peel casts and accumulate constant multiplications, up to the |
930 | limit allowed by M_MAXIMUM_SCALE. */ |
931 | tree stride = strip_casts (CHREC_RIGHT (chrec)); |
932 | while (TREE_CODE (stride) == MULT_EXPR |
933 | && multiply_term_by (term, TREE_OPERAND (stride, 1))) |
934 | stride = strip_casts (TREE_OPERAND (stride, 0)); |
935 | |
936 | gassign *assign; |
937 | while ((assign = maybe_get_assign (expr: stride)) |
938 | && gimple_assign_rhs_code (gs: assign) == MULT_EXPR |
939 | && multiply_term_by (term, op: gimple_assign_rhs2 (gs: assign))) |
940 | { |
941 | if (dump_enabled_p ()) |
942 | dump_printf_loc (MSG_NOTE, address.stmt, |
943 | "looking through %G" , (gimple *) assign); |
944 | stride = strip_casts (expr: gimple_assign_rhs1 (gs: assign)); |
945 | } |
946 | |
947 | analyze_stride (address, term, stride, op_loop: get_chrec_loop (chrec)); |
948 | return true; |
949 | } |
950 | |
951 | return false; |
952 | } |
953 | |
954 | /* Address term TERM is an arbitrary term that provides no versioning |
955 | opportunities. Analyze it to see whether it contains any likely |
956 | inner strides, so that we don't mistakenly version for other |
957 | (less likely) candidates. |
958 | |
959 | This copes with invariant innermost indices such as: |
960 | |
961 | x(i, :) = 100 |
962 | |
963 | where the "i" component of the address is invariant in the loop |
964 | but provides the real inner stride. |
965 | |
966 | ADDRESS is the address that contains TERM. */ |
967 | |
968 | void |
969 | loop_versioning::analyze_arbitrary_term (address_info &address, |
970 | address_term_info &term) |
971 | |
972 | { |
973 | /* A multiplication offers two potential strides. Pick the one that |
974 | is most likely to be an innermost stride. */ |
975 | tree expr = term.expr, alt = NULL_TREE; |
976 | gassign *mult = maybe_get_assign (expr); |
977 | if (mult && gimple_assign_rhs_code (gs: mult) == MULT_EXPR) |
978 | { |
979 | expr = strip_casts (expr: gimple_assign_rhs1 (gs: mult)); |
980 | alt = strip_casts (expr: gimple_assign_rhs2 (gs: mult)); |
981 | } |
982 | term.stride = expr; |
983 | term.inner_likelihood = get_inner_likelihood (stride: expr, multiplier: term.multiplier); |
984 | if (alt) |
985 | { |
986 | inner_likelihood alt_l = get_inner_likelihood (stride: alt, multiplier: term.multiplier); |
987 | if (alt_l > term.inner_likelihood) |
988 | { |
989 | term.stride = alt; |
990 | term.inner_likelihood = alt_l; |
991 | } |
992 | } |
993 | if (dump_enabled_p ()) |
994 | dump_inner_likelihood (address, term); |
995 | } |
996 | |
997 | /* Try to identify loop strides in ADDRESS and try to choose realistic |
998 | versioning opportunities based on these strides. |
999 | |
1000 | The main difficulty here isn't finding strides that could be used |
1001 | in a version check (that's pretty easy). The problem instead is to |
1002 | avoid versioning for some stride S that is unlikely ever to be 1 at |
1003 | runtime. Versioning for S == 1 on its own would lead to unnecessary |
1004 | code bloat, while adding S == 1 to more realistic version conditions |
1005 | would lose the optimisation opportunity offered by those other conditions. |
1006 | |
1007 | For example, versioning for a stride of 1 in the Fortran code: |
1008 | |
1009 | integer :: a(:,:) |
1010 | a(1,:) = 1 |
1011 | |
1012 | is not usually a good idea, since the assignment is iterating over |
1013 | an outer dimension and is relatively unlikely to have a stride of 1. |
1014 | (It isn't impossible, since the inner dimension might be 1, or the |
1015 | array might be transposed.) Similarly, in: |
1016 | |
1017 | integer :: a(:,:), b(:,:) |
1018 | b(:,1) = a(1,:) |
1019 | |
1020 | b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't. |
1021 | Versioning for when both strides are 1 would lose most of the benefit |
1022 | of versioning for b's access. |
1023 | |
1024 | The approach we take is as follows: |
1025 | |
1026 | - Analyze each term to see whether it has an identifiable stride, |
1027 | regardless of which loop applies the stride. |
1028 | |
1029 | - Evaluate the likelihood that each such stride is for the innermost |
1030 | dimension of an array, on the scale "likely", "don't know" or "unlikely". |
1031 | |
1032 | - If there is a single "likely" innermost stride, and that stride is |
1033 | applied in the loop that contains STMT, version the loop for when the |
1034 | stride is 1. This deals with the cases in which we're fairly |
1035 | confident of doing the right thing, such as the b(:,1) reference above. |
1036 | |
1037 | - If there are no "likely" innermost strides, and the loop that contains |
1038 | STMT uses a stride that we rated as "don't know", version for when |
1039 | that stride is 1. This is principally used for C code such as: |
1040 | |
1041 | for (int i = 0; i < n; ++i) |
1042 | a[i * x] = ...; |
1043 | |
1044 | and: |
1045 | |
1046 | for (int j = 0; j < n; ++j) |
1047 | for (int i = 0; i < n; ++i) |
1048 | a[i * x + j * y] = ...; |
1049 | |
1050 | where nothing in the way "x" and "y" are set gives a hint as to |
1051 | whether "i" iterates over the innermost dimension of the array. |
1052 | In these situations it seems reasonable to assume the |
1053 | programmer has nested the loops appropriately (although of course |
1054 | there are examples like GEMM in which this assumption doesn't hold |
1055 | for all accesses in the loop). |
1056 | |
1057 | This case is also useful for the Fortran equivalent of the |
1058 | above C code. */ |
1059 | |
1060 | void |
1061 | loop_versioning::analyze_address_fragment (address_info &address) |
1062 | { |
1063 | if (dump_enabled_p ()) |
1064 | { |
1065 | dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment " ); |
1066 | dump_address_info (flags: MSG_NOTE, address); |
1067 | dump_printf (MSG_NOTE, "\n" ); |
1068 | } |
1069 | |
1070 | /* Analyze each component of the sum to see whether it involves an |
1071 | apparent stride. |
1072 | |
1073 | There is an overlap between the addresses that |
1074 | find_per_loop_multiplication and analyze_term_using_scevs can handle, |
1075 | but the former is much cheaper than SCEV analysis, so try it first. */ |
1076 | for (unsigned int i = 0; i < address.terms.length (); ++i) |
1077 | if (!find_per_loop_multiplication (address, term&: address.terms[i]) |
1078 | && !analyze_term_using_scevs (address, term&: address.terms[i]) |
1079 | && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr))) |
1080 | analyze_arbitrary_term (address, term&: address.terms[i]); |
1081 | |
1082 | /* Check for strides that are likely to be for the innermost dimension. |
1083 | |
1084 | 1. If there is a single likely inner stride, if it is an SSA name, |
1085 | and if it is worth versioning the loop for when the SSA name |
1086 | equals 1, record that we want to do so. |
1087 | |
1088 | 2. Otherwise, if there any likely inner strides, bail out. This means |
1089 | one of: |
1090 | |
1091 | (a) There are multiple likely inner strides. This suggests we're |
1092 | confused and be can't be confident of doing the right thing. |
1093 | |
1094 | (b) There is a single likely inner stride and it is a constant |
1095 | rather than an SSA name. This can mean either that the access |
1096 | is a natural one without any variable strides, such as: |
1097 | |
1098 | for (int i = 0; i < n; ++i) |
1099 | a[i] += 1; |
1100 | |
1101 | or that a variable stride is applied to an outer dimension, |
1102 | such as: |
1103 | |
1104 | for (int i = 0; i < n; ++i) |
1105 | for (int j = 0; j < n; ++j) |
1106 | a[j * stride][i] += 1; |
1107 | |
1108 | (c) There is a single likely inner stride, and it is an SSA name, |
1109 | but it isn't a worthwhile versioning opportunity. This usually |
1110 | means that the variable stride is applied by an outer loop, |
1111 | such as: |
1112 | |
1113 | for (int i = 0; i < n; ++i) |
1114 | for (int j = 0; j < n; ++j) |
1115 | a[j][i * stride] += 1; |
1116 | |
1117 | or (using an example with a more natural loop nesting): |
1118 | |
1119 | for (int i = 0; i < n; ++i) |
1120 | for (int j = 0; j < n; ++j) |
1121 | a[i][j] += b[i * stride]; |
1122 | |
1123 | in cases where b[i * stride] cannot (yet) be hoisted for |
1124 | aliasing reasons. |
1125 | |
1126 | 3. If there are no likely inner strides, fall through to the next |
1127 | set of checks. |
1128 | |
1129 | Pointer equality is enough to check for uniqueness in (1), since we |
1130 | only care about SSA names. */ |
1131 | tree chosen_stride = NULL_TREE; |
1132 | tree version_stride = NULL_TREE; |
1133 | for (unsigned int i = 0; i < address.terms.length (); ++i) |
1134 | if (chosen_stride != address.terms[i].stride |
1135 | && address.terms[i].inner_likelihood == INNER_LIKELY) |
1136 | { |
1137 | if (chosen_stride) |
1138 | return; |
1139 | chosen_stride = address.terms[i].stride; |
1140 | if (address.terms[i].versioning_opportunity_p) |
1141 | version_stride = chosen_stride; |
1142 | } |
1143 | |
1144 | /* If there are no likely inner strides, see if there is a single |
1145 | versioning opportunity for a stride that was rated as INNER_DONT_KNOW. |
1146 | See the comment above the function for the cases that this code |
1147 | handles. */ |
1148 | if (!chosen_stride) |
1149 | for (unsigned int i = 0; i < address.terms.length (); ++i) |
1150 | if (version_stride != address.terms[i].stride |
1151 | && address.terms[i].inner_likelihood == INNER_DONT_KNOW |
1152 | && address.terms[i].versioning_opportunity_p) |
1153 | { |
1154 | if (version_stride) |
1155 | return; |
1156 | version_stride = address.terms[i].stride; |
1157 | } |
1158 | |
1159 | if (version_stride) |
1160 | version_for_unity (stmt: address.stmt, name: version_stride); |
1161 | } |
1162 | |
1163 | /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses |
1164 | TYPE_SIZE bytes and record this address fragment for later processing. |
1165 | STMT is the statement that contains the address. */ |
1166 | |
1167 | void |
1168 | loop_versioning::record_address_fragment (gimple *stmt, |
1169 | unsigned HOST_WIDE_INT type_size, |
1170 | tree expr, |
1171 | unsigned HOST_WIDE_INT multiplier, |
1172 | HOST_WIDE_INT offset) |
1173 | { |
1174 | /* We're only interested in computed values. */ |
1175 | if (TREE_CODE (expr) != SSA_NAME) |
1176 | return; |
1177 | |
1178 | /* Quick exit if no part of the address is calculated in STMT's loop, |
1179 | since such addresses have no versioning opportunities. */ |
1180 | class loop *loop = loop_containing_stmt (stmt); |
1181 | if (expr_invariant_in_loop_p (loop, expr)) |
1182 | return; |
1183 | |
1184 | /* Set up an address_info for EXPR * MULTIPLIER. */ |
1185 | address_info *address = XOBNEW (&m_obstack, address_info); |
1186 | new (address) address_info; |
1187 | address->stmt = stmt; |
1188 | address->loop = loop; |
1189 | address->base = NULL_TREE; |
1190 | address->terms.quick_grow (len: 1); |
1191 | address->terms[0].expr = expr; |
1192 | address->terms[0].multiplier = multiplier; |
1193 | address->terms[0].stride = NULL_TREE; |
1194 | address->terms[0].inner_likelihood = INNER_UNLIKELY; |
1195 | address->terms[0].versioning_opportunity_p = false; |
1196 | address->min_offset = offset; |
1197 | |
1198 | /* Peel apart the expression into a sum of address_terms, where each |
1199 | term is multiplied by a constant. Treat a + b and a - b the same, |
1200 | since it doesn't matter for our purposes whether an address is |
1201 | increasing or decreasing. Distribute (a + b) * constant into |
1202 | a * constant + b * constant. |
1203 | |
1204 | We don't care which loop each term belongs to, since we want to |
1205 | examine as many candidate strides as possible when determining |
1206 | which is likely to be for the innermost dimension. We therefore |
1207 | don't limit the search to statements in STMT's loop. */ |
1208 | for (unsigned int i = 0; i < address->terms.length (); ) |
1209 | { |
1210 | if (gassign *assign = maybe_get_assign (expr: address->terms[i].expr)) |
1211 | { |
1212 | tree_code code = gimple_assign_rhs_code (gs: assign); |
1213 | if (code == PLUS_EXPR |
1214 | || code == POINTER_PLUS_EXPR |
1215 | || code == MINUS_EXPR) |
1216 | { |
1217 | tree op1 = gimple_assign_rhs1 (gs: assign); |
1218 | tree op2 = gimple_assign_rhs2 (gs: assign); |
1219 | if (TREE_CODE (op2) == INTEGER_CST) |
1220 | { |
1221 | address->terms[i].expr = strip_casts (expr: op1); |
1222 | /* This is heuristic only, so don't worry about truncation |
1223 | or overflow. */ |
1224 | address->min_offset += (TREE_INT_CST_LOW (op2) |
1225 | * address->terms[i].multiplier); |
1226 | continue; |
1227 | } |
1228 | else if (address->terms.length () < address_info::MAX_TERMS) |
1229 | { |
1230 | unsigned int j = address->terms.length (); |
1231 | address->terms.quick_push (obj: address->terms[i]); |
1232 | address->terms[i].expr = strip_casts (expr: op1); |
1233 | address->terms[j].expr = strip_casts (expr: op2); |
1234 | continue; |
1235 | } |
1236 | } |
1237 | if (code == MULT_EXPR) |
1238 | { |
1239 | tree op1 = gimple_assign_rhs1 (gs: assign); |
1240 | tree op2 = gimple_assign_rhs2 (gs: assign); |
1241 | if (multiply_term_by (term&: address->terms[i], op: op2)) |
1242 | { |
1243 | address->terms[i].expr = strip_casts (expr: op1); |
1244 | continue; |
1245 | } |
1246 | } |
1247 | if (CONVERT_EXPR_CODE_P (code)) |
1248 | { |
1249 | tree op1 = gimple_assign_rhs1 (gs: assign); |
1250 | address->terms[i].expr = strip_casts (expr: op1); |
1251 | continue; |
1252 | } |
1253 | } |
1254 | i += 1; |
1255 | } |
1256 | |
1257 | /* Peel off any symbolic pointer. */ |
1258 | if (TREE_CODE (address->terms[0].expr) != SSA_NAME |
1259 | && address->terms[0].multiplier == 1) |
1260 | { |
1261 | if (address->terms.length () == 1) |
1262 | { |
1263 | obstack_free (&m_obstack, address); |
1264 | return; |
1265 | } |
1266 | address->base = address->terms[0].expr; |
1267 | address->terms.ordered_remove (ix: 0); |
1268 | } |
1269 | |
1270 | /* Require all remaining terms to be SSA names. (This could be false |
1271 | for unfolded statements, but they aren't worth dealing with.) */ |
1272 | for (unsigned int i = 0; i < address->terms.length (); ++i) |
1273 | if (TREE_CODE (address->terms[i].expr) != SSA_NAME) |
1274 | { |
1275 | obstack_free (&m_obstack, address); |
1276 | return; |
1277 | } |
1278 | |
1279 | /* The loop above set MIN_OFFSET based on the first byte of the |
1280 | referenced data. Calculate the end + 1. */ |
1281 | address->max_offset = address->min_offset + type_size; |
1282 | |
1283 | /* Put the terms into a canonical order for the hash table lookup below. */ |
1284 | address->terms.qsort (compare_address_terms); |
1285 | |
1286 | if (dump_enabled_p ()) |
1287 | { |
1288 | dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T" , expr); |
1289 | if (multiplier != 1) |
1290 | dump_printf (MSG_NOTE, " * %wd" , multiplier); |
1291 | dump_printf (MSG_NOTE, " = " ); |
1292 | dump_address_info (flags: MSG_NOTE, address&: *address); |
1293 | dump_printf (MSG_NOTE, "\n" ); |
1294 | } |
1295 | |
1296 | /* Pool address information with the same terms (but potentially |
1297 | different offsets). */ |
1298 | address_info **slot = m_address_table.find_slot (value: address, insert: INSERT); |
1299 | if (address_info *old_address = *slot) |
1300 | { |
1301 | /* We've already seen an address with the same terms. Extend the |
1302 | offset range to account for this access. Doing this can paper |
1303 | over gaps, such as in: |
1304 | |
1305 | a[i * stride * 4] + a[i * stride * 4 + 3]; |
1306 | |
1307 | where nothing references "+ 1" or "+ 2". However, the vectorizer |
1308 | handles such gapped accesses without problems, so it's not worth |
1309 | trying to exclude them. */ |
1310 | if (old_address->min_offset > address->min_offset) |
1311 | old_address->min_offset = address->min_offset; |
1312 | if (old_address->max_offset < address->max_offset) |
1313 | old_address->max_offset = address->max_offset; |
1314 | obstack_free (&m_obstack, address); |
1315 | } |
1316 | else |
1317 | { |
1318 | /* This is the first time we've seen an address with these terms. */ |
1319 | *slot = address; |
1320 | m_address_list.safe_push (obj: address); |
1321 | } |
1322 | } |
1323 | |
1324 | /* Analyze expression EXPR, which occurs in STMT. */ |
1325 | |
1326 | void |
1327 | loop_versioning::analyze_expr (gimple *stmt, tree expr) |
1328 | { |
1329 | unsigned HOST_WIDE_INT type_size; |
1330 | |
1331 | while (handled_component_p (t: expr)) |
1332 | { |
1333 | /* See whether we can use versioning to avoid a multiplication |
1334 | in an array index. */ |
1335 | if (TREE_CODE (expr) == ARRAY_REF |
1336 | && acceptable_type_p (TREE_TYPE (expr), size: &type_size)) |
1337 | record_address_fragment (stmt, type_size, |
1338 | TREE_OPERAND (expr, 1), multiplier: type_size, offset: 0); |
1339 | expr = TREE_OPERAND (expr, 0); |
1340 | } |
1341 | |
1342 | /* See whether we can use versioning to avoid a multiplication |
1343 | in the pointer calculation of a MEM_REF. */ |
1344 | if (TREE_CODE (expr) == MEM_REF |
1345 | && acceptable_type_p (TREE_TYPE (expr), size: &type_size)) |
1346 | record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), multiplier: 1, |
1347 | /* This is heuristic only, so don't worry |
1348 | about truncation or overflow. */ |
1349 | TREE_INT_CST_LOW (TREE_OPERAND (expr, 1))); |
1350 | |
1351 | /* These would be easy to handle if they existed at this stage. */ |
1352 | gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF); |
1353 | } |
1354 | |
1355 | /* Analyze all the statements in BB looking for useful version checks. |
1356 | Return true on success, false if something prevents the block from |
1357 | being versioned. */ |
1358 | |
1359 | bool |
1360 | loop_versioning::analyze_block (basic_block bb) |
1361 | { |
1362 | class loop *loop = bb->loop_father; |
1363 | loop_info &li = get_loop_info (loop); |
1364 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); |
1365 | gsi_next (i: &gsi)) |
1366 | { |
1367 | gimple *stmt = gsi_stmt (i: gsi); |
1368 | if (is_gimple_debug (gs: stmt)) |
1369 | continue; |
1370 | |
1371 | if (expensive_stmt_p (stmt)) |
1372 | { |
1373 | if (dump_enabled_p ()) |
1374 | dump_printf_loc (MSG_NOTE, stmt, "expensive statement" |
1375 | " prevents versioning: %G" , stmt); |
1376 | return false; |
1377 | } |
1378 | |
1379 | /* Only look for direct versioning opportunities in inner loops |
1380 | since the benefit tends to be much smaller for outer loops. */ |
1381 | if (!loop->inner) |
1382 | { |
1383 | unsigned int nops = gimple_num_ops (gs: stmt); |
1384 | for (unsigned int i = 0; i < nops; ++i) |
1385 | if (tree op = gimple_op (gs: stmt, i)) |
1386 | analyze_expr (stmt, expr: op); |
1387 | } |
1388 | |
1389 | /* The point of the instruction limit is to prevent excessive |
1390 | code growth, so this is a size-based estimate even though |
1391 | the optimization is aimed at speed. */ |
1392 | li.num_insns += estimate_num_insns (stmt, &eni_size_weights); |
1393 | } |
1394 | |
1395 | return true; |
1396 | } |
1397 | |
1398 | /* Analyze all the blocks in the function, looking for useful version checks. |
1399 | Return true if we found one. */ |
1400 | |
1401 | bool |
1402 | loop_versioning::analyze_blocks () |
1403 | { |
1404 | AUTO_DUMP_SCOPE ("analyze_blocks" , |
1405 | dump_user_location_t::from_function_decl (m_fn->decl)); |
1406 | |
1407 | /* For now we don't try to version the whole function, although |
1408 | versioning at that level could be useful in some cases. */ |
1409 | get_loop_info (loop: get_loop (fn: m_fn, num: 0)).rejected_p = true; |
1410 | |
1411 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
1412 | { |
1413 | loop_info &linfo = get_loop_info (loop); |
1414 | |
1415 | /* Ignore cold loops. */ |
1416 | if (!optimize_loop_for_speed_p (loop)) |
1417 | linfo.rejected_p = true; |
1418 | |
1419 | /* See whether an inner loop prevents versioning of this loop. */ |
1420 | if (!linfo.rejected_p) |
1421 | for (class loop *inner = loop->inner; inner; inner = inner->next) |
1422 | if (get_loop_info (loop: inner).rejected_p) |
1423 | { |
1424 | linfo.rejected_p = true; |
1425 | break; |
1426 | } |
1427 | |
1428 | /* If versioning the loop is still a possibility, examine the |
1429 | statements in the loop to look for versioning opportunities. */ |
1430 | if (!linfo.rejected_p) |
1431 | { |
1432 | void *start_point = obstack_alloc (&m_obstack, 0); |
1433 | |
1434 | for (basic_block bb = linfo.block_list; bb; |
1435 | bb = m_next_block_in_loop[bb->index]) |
1436 | if (!analyze_block (bb)) |
1437 | { |
1438 | linfo.rejected_p = true; |
1439 | break; |
1440 | } |
1441 | |
1442 | if (!linfo.rejected_p) |
1443 | { |
1444 | /* Process any queued address fragments, now that we have |
1445 | complete grouping information. */ |
1446 | address_info *address; |
1447 | unsigned int i; |
1448 | FOR_EACH_VEC_ELT (m_address_list, i, address) |
1449 | analyze_address_fragment (address&: *address); |
1450 | } |
1451 | |
1452 | m_address_table.empty (); |
1453 | m_address_list.truncate (size: 0); |
1454 | obstack_free (&m_obstack, start_point); |
1455 | } |
1456 | } |
1457 | |
1458 | return m_num_conditions != 0; |
1459 | } |
1460 | |
1461 | /* Use the ranges in VRS to remove impossible versioning conditions from |
1462 | LOOP. */ |
1463 | |
1464 | void |
1465 | loop_versioning::prune_loop_conditions (class loop *loop) |
1466 | { |
1467 | loop_info &li = get_loop_info (loop); |
1468 | |
1469 | int to_remove = -1; |
1470 | bitmap_iterator bi; |
1471 | unsigned int i; |
1472 | int_range_max r; |
1473 | EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi) |
1474 | { |
1475 | tree name = ssa_name (i); |
1476 | gimple *stmt = first_stmt (loop->header); |
1477 | |
1478 | if (get_range_query (cfun)->range_of_expr (r, expr: name, stmt) |
1479 | && !r.contains_p (wi::one (TYPE_PRECISION (TREE_TYPE (name))))) |
1480 | { |
1481 | if (dump_enabled_p ()) |
1482 | dump_printf_loc (MSG_NOTE, find_loop_location (loop), |
1483 | "%T can never be 1 in this loop\n" , name); |
1484 | |
1485 | if (to_remove >= 0) |
1486 | bitmap_clear_bit (&li.unity_names, to_remove); |
1487 | to_remove = i; |
1488 | m_num_conditions -= 1; |
1489 | } |
1490 | } |
1491 | if (to_remove >= 0) |
1492 | bitmap_clear_bit (&li.unity_names, to_remove); |
1493 | } |
1494 | |
1495 | /* Remove any scheduled loop version conditions that will never be true. |
1496 | Return true if any remain. */ |
1497 | |
1498 | bool |
1499 | loop_versioning::prune_conditions () |
1500 | { |
1501 | AUTO_DUMP_SCOPE ("prune_loop_conditions" , |
1502 | dump_user_location_t::from_function_decl (m_fn->decl)); |
1503 | |
1504 | calculate_dominance_info (CDI_DOMINATORS); |
1505 | lv_dom_walker dom_walker (*this); |
1506 | dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn)); |
1507 | return m_num_conditions != 0; |
1508 | } |
1509 | |
1510 | /* Merge the version checks for INNER into immediately-enclosing loop |
1511 | OUTER. */ |
1512 | |
1513 | void |
1514 | loop_versioning::merge_loop_info (class loop *outer, class loop *inner) |
1515 | { |
1516 | loop_info &inner_li = get_loop_info (loop: inner); |
1517 | loop_info &outer_li = get_loop_info (loop: outer); |
1518 | |
1519 | if (dump_enabled_p ()) |
1520 | { |
1521 | bitmap_iterator bi; |
1522 | unsigned int i; |
1523 | EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi) |
1524 | if (!bitmap_bit_p (&outer_li.unity_names, i)) |
1525 | dump_printf_loc (MSG_NOTE, find_loop_location (inner), |
1526 | "hoisting check that %T == 1 to outer loop\n" , |
1527 | ssa_name (i)); |
1528 | } |
1529 | |
1530 | bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names); |
1531 | if (loop_depth (loop: outer_li.outermost) < loop_depth (loop: inner_li.outermost)) |
1532 | outer_li.outermost = inner_li.outermost; |
1533 | } |
1534 | |
1535 | /* Add LOOP to the queue of loops to version. */ |
1536 | |
1537 | void |
1538 | loop_versioning::add_loop_to_queue (class loop *loop) |
1539 | { |
1540 | loop_info &li = get_loop_info (loop); |
1541 | |
1542 | if (dump_enabled_p ()) |
1543 | dump_printf_loc (MSG_NOTE, find_loop_location (loop), |
1544 | "queuing this loop for versioning\n" ); |
1545 | m_loops_to_version.safe_push (obj: loop); |
1546 | |
1547 | /* Don't try to version superloops. */ |
1548 | li.rejected_p = true; |
1549 | } |
1550 | |
1551 | /* Decide whether the cost model would allow us to version LOOP, |
1552 | either directly or as part of a parent loop, and return true if so. |
1553 | This does not imply that the loop is actually worth versioning in its |
1554 | own right, just that it would be valid to version it if something |
1555 | benefited. |
1556 | |
1557 | We have already made this decision for all inner loops of LOOP. */ |
1558 | |
1559 | bool |
1560 | loop_versioning::decide_whether_loop_is_versionable (class loop *loop) |
1561 | { |
1562 | loop_info &li = get_loop_info (loop); |
1563 | |
1564 | if (li.rejected_p) |
1565 | return false; |
1566 | |
1567 | /* Examine the decisions made for inner loops. */ |
1568 | for (class loop *inner = loop->inner; inner; inner = inner->next) |
1569 | { |
1570 | loop_info &inner_li = get_loop_info (loop: inner); |
1571 | if (inner_li.rejected_p) |
1572 | { |
1573 | if (dump_enabled_p ()) |
1574 | dump_printf_loc (MSG_NOTE, find_loop_location (loop), |
1575 | "not versioning this loop because one of its" |
1576 | " inner loops should not be versioned\n" ); |
1577 | return false; |
1578 | } |
1579 | |
1580 | if (inner_li.worth_versioning_p ()) |
1581 | li.subloops_benefit_p = true; |
1582 | |
1583 | /* Accumulate the number of instructions from subloops that are not |
1584 | the innermost, or that don't benefit from versioning. Only the |
1585 | instructions from innermost loops that benefit from versioning |
1586 | should be weighed against loop-versioning-max-inner-insns; |
1587 | everything else should be weighed against |
1588 | loop-versioning-max-outer-insns. */ |
1589 | if (!inner_li.worth_versioning_p () || inner->inner) |
1590 | { |
1591 | if (dump_enabled_p ()) |
1592 | dump_printf_loc (MSG_NOTE, find_loop_location (loop), |
1593 | "counting %d instructions from this loop" |
1594 | " against its parent loop\n" , inner_li.num_insns); |
1595 | li.num_insns += inner_li.num_insns; |
1596 | } |
1597 | } |
1598 | |
1599 | /* Enforce the size limits. */ |
1600 | if (li.worth_versioning_p ()) |
1601 | { |
1602 | unsigned int max_num_insns = max_insns_for_loop (loop); |
1603 | if (dump_enabled_p ()) |
1604 | dump_printf_loc (MSG_NOTE, find_loop_location (loop), |
1605 | "this loop has %d instructions, against" |
1606 | " a versioning limit of %d\n" , |
1607 | li.num_insns, max_num_insns); |
1608 | if (li.num_insns > max_num_insns) |
1609 | { |
1610 | if (dump_enabled_p ()) |
1611 | dump_printf_loc (MSG_MISSED_OPTIMIZATION |
1612 | | MSG_PRIORITY_USER_FACING, |
1613 | find_loop_location (loop), |
1614 | "this loop is too big to version" ); |
1615 | return false; |
1616 | } |
1617 | } |
1618 | |
1619 | /* Hoist all version checks from subloops to this loop. */ |
1620 | for (class loop *subloop = loop->inner; subloop; subloop = subloop->next) |
1621 | merge_loop_info (outer: loop, inner: subloop); |
1622 | |
1623 | return true; |
1624 | } |
1625 | |
1626 | /* Decide which loops to version and add them to the versioning queue. |
1627 | Return true if there are any loops to version. */ |
1628 | |
1629 | bool |
1630 | loop_versioning::make_versioning_decisions () |
1631 | { |
1632 | AUTO_DUMP_SCOPE ("make_versioning_decisions" , |
1633 | dump_user_location_t::from_function_decl (m_fn->decl)); |
1634 | |
1635 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
1636 | { |
1637 | loop_info &linfo = get_loop_info (loop); |
1638 | if (decide_whether_loop_is_versionable (loop)) |
1639 | { |
1640 | /* Commit to versioning LOOP directly if we can't hoist the |
1641 | version checks any further. */ |
1642 | if (linfo.worth_versioning_p () |
1643 | && (loop_depth (loop) == 1 || linfo.outermost == loop)) |
1644 | add_loop_to_queue (loop); |
1645 | } |
1646 | else |
1647 | { |
1648 | /* We can't version this loop, so individually version any |
1649 | subloops that would benefit and haven't been versioned yet. */ |
1650 | linfo.rejected_p = true; |
1651 | for (class loop *subloop = loop->inner; subloop; |
1652 | subloop = subloop->next) |
1653 | if (get_loop_info (loop: subloop).worth_versioning_p ()) |
1654 | add_loop_to_queue (loop: subloop); |
1655 | } |
1656 | } |
1657 | |
1658 | return !m_loops_to_version.is_empty (); |
1659 | } |
1660 | |
1661 | /* Attempt to implement loop versioning for LOOP, using the information |
1662 | cached in the associated loop_info. Return true on success. */ |
1663 | |
1664 | bool |
1665 | loop_versioning::version_loop (class loop *loop) |
1666 | { |
1667 | loop_info &li = get_loop_info (loop); |
1668 | |
1669 | /* Build up a condition that selects the original loop instead of |
1670 | the simplified loop. */ |
1671 | tree cond = boolean_false_node; |
1672 | bitmap_iterator bi; |
1673 | unsigned int i; |
1674 | EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi) |
1675 | { |
1676 | tree name = ssa_name (i); |
1677 | tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name, |
1678 | build_one_cst (TREE_TYPE (name))); |
1679 | cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one); |
1680 | } |
1681 | |
1682 | /* Convert the condition into a suitable gcond. */ |
1683 | gimple_seq stmts = NULL; |
1684 | cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr_for_cond, |
1685 | NULL_TREE); |
1686 | |
1687 | /* Version the loop. */ |
1688 | initialize_original_copy_tables (); |
1689 | basic_block cond_bb; |
1690 | li.optimized_loop = loop_version (loop, cond, &cond_bb, |
1691 | profile_probability::unlikely (), |
1692 | profile_probability::likely (), |
1693 | profile_probability::unlikely (), |
1694 | profile_probability::likely (), true); |
1695 | free_original_copy_tables (); |
1696 | if (!li.optimized_loop) |
1697 | { |
1698 | if (dump_enabled_p ()) |
1699 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop), |
1700 | "tried but failed to version this loop for when" |
1701 | " certain strides are 1\n" ); |
1702 | return false; |
1703 | } |
1704 | |
1705 | if (dump_enabled_p ()) |
1706 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop), |
1707 | "versioned this loop for when certain strides are 1\n" ); |
1708 | |
1709 | /* Insert the statements that feed COND. */ |
1710 | if (stmts) |
1711 | { |
1712 | gimple_stmt_iterator gsi = gsi_last_bb (bb: cond_bb); |
1713 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); |
1714 | } |
1715 | |
1716 | return true; |
1717 | } |
1718 | |
1719 | /* Attempt to version all loops in the versioning queue. */ |
1720 | |
1721 | void |
1722 | loop_versioning::implement_versioning_decisions () |
1723 | { |
1724 | /* No AUTO_DUMP_SCOPE here since all messages are top-level and |
1725 | user-facing at this point. */ |
1726 | |
1727 | bool any_succeeded_p = false; |
1728 | class loop *loop; |
1729 | unsigned int i; |
1730 | FOR_EACH_VEC_ELT (m_loops_to_version, i, loop) |
1731 | if (version_loop (loop)) |
1732 | any_succeeded_p = true; |
1733 | if (!any_succeeded_p) |
1734 | return; |
1735 | |
1736 | update_ssa (TODO_update_ssa); |
1737 | |
1738 | /* Simplify the new loop, which is used when COND is false. */ |
1739 | FOR_EACH_VEC_ELT (m_loops_to_version, i, loop) |
1740 | { |
1741 | loop_info &linfo = get_loop_info (loop); |
1742 | if (linfo.optimized_loop) |
1743 | name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header); |
1744 | } |
1745 | } |
1746 | |
1747 | /* Run the pass and return a set of TODO_* flags. */ |
1748 | |
1749 | unsigned int |
1750 | loop_versioning::run () |
1751 | { |
1752 | gcc_assert (scev_initialized_p ()); |
1753 | |
1754 | if (analyze_blocks () |
1755 | && prune_conditions () |
1756 | && make_versioning_decisions ()) |
1757 | implement_versioning_decisions (); |
1758 | |
1759 | return 0; |
1760 | } |
1761 | |
1762 | /* Loop versioning pass. */ |
1763 | |
1764 | const pass_data pass_data_loop_versioning = |
1765 | { |
1766 | .type: GIMPLE_PASS, /* type */ |
1767 | .name: "lversion" , /* name */ |
1768 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
1769 | .tv_id: TV_LOOP_VERSIONING, /* tv_id */ |
1770 | PROP_cfg, /* properties_required */ |
1771 | .properties_provided: 0, /* properties_provided */ |
1772 | .properties_destroyed: 0, /* properties_destroyed */ |
1773 | .todo_flags_start: 0, /* todo_flags_start */ |
1774 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1775 | }; |
1776 | |
1777 | class pass_loop_versioning : public gimple_opt_pass |
1778 | { |
1779 | public: |
1780 | pass_loop_versioning (gcc::context *ctxt) |
1781 | : gimple_opt_pass (pass_data_loop_versioning, ctxt) |
1782 | {} |
1783 | |
1784 | /* opt_pass methods: */ |
1785 | bool gate (function *) final override |
1786 | { |
1787 | return flag_version_loops_for_strides; |
1788 | } |
1789 | unsigned int execute (function *) final override; |
1790 | }; |
1791 | |
1792 | unsigned int |
1793 | pass_loop_versioning::execute (function *fn) |
1794 | { |
1795 | if (number_of_loops (fn) <= 1) |
1796 | return 0; |
1797 | |
1798 | enable_ranger (m: fn); |
1799 | unsigned int ret = loop_versioning (fn).run (); |
1800 | disable_ranger (fn); |
1801 | return ret; |
1802 | } |
1803 | |
1804 | } // anon namespace |
1805 | |
1806 | gimple_opt_pass * |
1807 | make_pass_loop_versioning (gcc::context *ctxt) |
1808 | { |
1809 | return new pass_loop_versioning (ctxt); |
1810 | } |
1811 | |