1 | /* Loop invariant motion. |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "tree.h" |
25 | #include "gimple.h" |
26 | #include "cfghooks.h" |
27 | #include "tree-pass.h" |
28 | #include "ssa.h" |
29 | #include "gimple-pretty-print.h" |
30 | #include "fold-const.h" |
31 | #include "cfganal.h" |
32 | #include "tree-eh.h" |
33 | #include "gimplify.h" |
34 | #include "gimple-iterator.h" |
35 | #include "tree-cfg.h" |
36 | #include "tree-ssa-loop-manip.h" |
37 | #include "tree-ssa-loop.h" |
38 | #include "tree-into-ssa.h" |
39 | #include "cfgloop.h" |
40 | #include "tree-affine.h" |
41 | #include "tree-ssa-propagate.h" |
42 | #include "trans-mem.h" |
43 | #include "gimple-fold.h" |
44 | #include "tree-scalar-evolution.h" |
45 | #include "tree-ssa-loop-niter.h" |
46 | #include "alias.h" |
47 | #include "builtins.h" |
48 | #include "tree-dfa.h" |
49 | #include "tree-ssa.h" |
50 | #include "dbgcnt.h" |
51 | |
52 | /* TODO: Support for predicated code motion. I.e. |
53 | |
54 | while (1) |
55 | { |
56 | if (cond) |
57 | { |
58 | a = inv; |
59 | something; |
60 | } |
61 | } |
62 | |
63 | Where COND and INV are invariants, but evaluating INV may trap or be |
64 | invalid from some other reason if !COND. This may be transformed to |
65 | |
66 | if (cond) |
67 | a = inv; |
68 | while (1) |
69 | { |
70 | if (cond) |
71 | something; |
72 | } */ |
73 | |
74 | /* The auxiliary data kept for each statement. */ |
75 | |
76 | struct lim_aux_data |
77 | { |
78 | class loop *max_loop; /* The outermost loop in that the statement |
79 | is invariant. */ |
80 | |
81 | class loop *tgt_loop; /* The loop out of that we want to move the |
82 | invariant. */ |
83 | |
84 | class loop *always_executed_in; |
85 | /* The outermost loop for that we are sure |
86 | the statement is executed if the loop |
87 | is entered. */ |
88 | |
89 | unsigned cost; /* Cost of the computation performed by the |
90 | statement. */ |
91 | |
92 | unsigned ref; /* The simple_mem_ref in this stmt or 0. */ |
93 | |
94 | vec<gimple *> depends; /* Vector of statements that must be also |
95 | hoisted out of the loop when this statement |
96 | is hoisted; i.e. those that define the |
97 | operands of the statement and are inside of |
98 | the MAX_LOOP loop. */ |
99 | }; |
100 | |
101 | /* Maps statements to their lim_aux_data. */ |
102 | |
103 | static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map; |
104 | |
105 | /* Description of a memory reference location. */ |
106 | |
107 | struct mem_ref_loc |
108 | { |
109 | tree *ref; /* The reference itself. */ |
110 | gimple *stmt; /* The statement in that it occurs. */ |
111 | }; |
112 | |
113 | |
114 | /* Description of a memory reference. */ |
115 | |
116 | class im_mem_ref |
117 | { |
118 | public: |
119 | unsigned id : 30; /* ID assigned to the memory reference |
120 | (its index in memory_accesses.refs_list) */ |
121 | unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */ |
122 | unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */ |
123 | hashval_t hash; /* Its hash value. */ |
124 | |
125 | /* The memory access itself and associated caching of alias-oracle |
126 | query meta-data. We are using mem.ref == error_mark_node for the |
127 | case the reference is represented by its single access stmt |
128 | in accesses_in_loop[0]. */ |
129 | ao_ref mem; |
130 | |
131 | bitmap stored; /* The set of loops in that this memory location |
132 | is stored to. */ |
133 | bitmap loaded; /* The set of loops in that this memory location |
134 | is loaded from. */ |
135 | vec<mem_ref_loc> accesses_in_loop; |
136 | /* The locations of the accesses. */ |
137 | |
138 | /* The following set is computed on demand. */ |
139 | bitmap_head dep_loop; /* The set of loops in that the memory |
140 | reference is {in,}dependent in |
141 | different modes. */ |
142 | }; |
143 | |
144 | /* We use six bits per loop in the ref->dep_loop bitmap to record |
145 | the dep_kind x dep_state combinations. */ |
146 | |
147 | enum dep_kind { lim_raw, sm_war, sm_waw }; |
148 | enum dep_state { dep_unknown, dep_independent, dep_dependent }; |
149 | |
150 | /* coldest outermost loop for given loop. */ |
151 | vec<class loop *> coldest_outermost_loop; |
152 | /* hotter outer loop nearest to given loop. */ |
153 | vec<class loop *> hotter_than_inner_loop; |
154 | |
155 | /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */ |
156 | |
157 | static void |
158 | record_loop_dependence (class loop *loop, im_mem_ref *ref, |
159 | dep_kind kind, dep_state state) |
160 | { |
161 | gcc_assert (state != dep_unknown); |
162 | unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0; |
163 | bitmap_set_bit (&ref->dep_loop, bit); |
164 | } |
165 | |
166 | /* Query the loop dependence cache of REF for LOOP, KIND. */ |
167 | |
168 | static dep_state |
169 | query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind) |
170 | { |
171 | unsigned first_bit = 6 * loop->num + kind * 2; |
172 | if (bitmap_bit_p (&ref->dep_loop, first_bit)) |
173 | return dep_independent; |
174 | else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1)) |
175 | return dep_dependent; |
176 | return dep_unknown; |
177 | } |
178 | |
179 | /* Mem_ref hashtable helpers. */ |
180 | |
181 | struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref> |
182 | { |
183 | typedef ao_ref *compare_type; |
184 | static inline hashval_t hash (const im_mem_ref *); |
185 | static inline bool equal (const im_mem_ref *, const ao_ref *); |
186 | }; |
187 | |
188 | /* A hash function for class im_mem_ref object OBJ. */ |
189 | |
190 | inline hashval_t |
191 | mem_ref_hasher::hash (const im_mem_ref *mem) |
192 | { |
193 | return mem->hash; |
194 | } |
195 | |
196 | /* An equality function for class im_mem_ref object MEM1 with |
197 | memory reference OBJ2. */ |
198 | |
199 | inline bool |
200 | mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2) |
201 | { |
202 | if (obj2->max_size_known_p ()) |
203 | return (mem1->ref_decomposed |
204 | && ((TREE_CODE (mem1->mem.base) == MEM_REF |
205 | && TREE_CODE (obj2->base) == MEM_REF |
206 | && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0), |
207 | TREE_OPERAND (obj2->base, 0), flags: 0) |
208 | && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset, |
209 | mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset)) |
210 | || (operand_equal_p (mem1->mem.base, obj2->base, flags: 0) |
211 | && known_eq (mem1->mem.offset, obj2->offset))) |
212 | && known_eq (mem1->mem.size, obj2->size) |
213 | && known_eq (mem1->mem.max_size, obj2->max_size) |
214 | && mem1->mem.volatile_p == obj2->volatile_p |
215 | && (mem1->mem.ref_alias_set == obj2->ref_alias_set |
216 | /* We are not canonicalizing alias-sets but for the |
217 | special-case we didn't canonicalize yet and the |
218 | incoming ref is a alias-set zero MEM we pick |
219 | the correct one already. */ |
220 | || (!mem1->ref_canonical |
221 | && (TREE_CODE (obj2->ref) == MEM_REF |
222 | || TREE_CODE (obj2->ref) == TARGET_MEM_REF) |
223 | && obj2->ref_alias_set == 0) |
224 | /* Likewise if there's a canonical ref with alias-set zero. */ |
225 | || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0)) |
226 | && types_compatible_p (TREE_TYPE (mem1->mem.ref), |
227 | TREE_TYPE (obj2->ref))); |
228 | else |
229 | return operand_equal_p (mem1->mem.ref, obj2->ref, flags: 0); |
230 | } |
231 | |
232 | |
233 | /* Description of memory accesses in loops. */ |
234 | |
235 | static struct |
236 | { |
237 | /* The hash table of memory references accessed in loops. */ |
238 | hash_table<mem_ref_hasher> *refs; |
239 | |
240 | /* The list of memory references. */ |
241 | vec<im_mem_ref *> refs_list; |
242 | |
243 | /* The set of memory references accessed in each loop. */ |
244 | vec<bitmap_head> refs_loaded_in_loop; |
245 | |
246 | /* The set of memory references stored in each loop. */ |
247 | vec<bitmap_head> refs_stored_in_loop; |
248 | |
249 | /* The set of memory references stored in each loop, including subloops . */ |
250 | vec<bitmap_head> all_refs_stored_in_loop; |
251 | |
252 | /* Cache for expanding memory addresses. */ |
253 | hash_map<tree, name_expansion *> *ttae_cache; |
254 | } memory_accesses; |
255 | |
256 | /* Obstack for the bitmaps in the above data structures. */ |
257 | static bitmap_obstack lim_bitmap_obstack; |
258 | static obstack mem_ref_obstack; |
259 | |
260 | static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind); |
261 | static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool); |
262 | static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true); |
263 | |
264 | /* Minimum cost of an expensive expression. */ |
265 | #define LIM_EXPENSIVE ((unsigned) param_lim_expensive) |
266 | |
267 | /* The outermost loop for which execution of the header guarantees that the |
268 | block will be executed. */ |
269 | #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux) |
270 | #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL)) |
271 | |
272 | /* ID of the shared unanalyzable mem. */ |
273 | #define UNANALYZABLE_MEM_ID 0 |
274 | |
275 | /* Whether the reference was analyzable. */ |
276 | #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID) |
277 | |
278 | static struct lim_aux_data * |
279 | init_lim_data (gimple *stmt) |
280 | { |
281 | lim_aux_data *p = XCNEW (struct lim_aux_data); |
282 | lim_aux_data_map->put (k: stmt, v: p); |
283 | |
284 | return p; |
285 | } |
286 | |
287 | static struct lim_aux_data * |
288 | get_lim_data (gimple *stmt) |
289 | { |
290 | lim_aux_data **p = lim_aux_data_map->get (k: stmt); |
291 | if (!p) |
292 | return NULL; |
293 | |
294 | return *p; |
295 | } |
296 | |
297 | /* Releases the memory occupied by DATA. */ |
298 | |
299 | static void |
300 | free_lim_aux_data (struct lim_aux_data *data) |
301 | { |
302 | data->depends.release (); |
303 | free (ptr: data); |
304 | } |
305 | |
306 | static void |
307 | clear_lim_data (gimple *stmt) |
308 | { |
309 | lim_aux_data **p = lim_aux_data_map->get (k: stmt); |
310 | if (!p) |
311 | return; |
312 | |
313 | free_lim_aux_data (data: *p); |
314 | *p = NULL; |
315 | } |
316 | |
317 | |
318 | /* The possibilities of statement movement. */ |
319 | enum move_pos |
320 | { |
321 | MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */ |
322 | MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement |
323 | become executed -- memory accesses, ... */ |
324 | MOVE_POSSIBLE /* Unlimited movement. */ |
325 | }; |
326 | |
327 | |
328 | /* If it is possible to hoist the statement STMT unconditionally, |
329 | returns MOVE_POSSIBLE. |
330 | If it is possible to hoist the statement STMT, but we must avoid making |
331 | it executed if it would not be executed in the original program (e.g. |
332 | because it may trap), return MOVE_PRESERVE_EXECUTION. |
333 | Otherwise return MOVE_IMPOSSIBLE. */ |
334 | |
335 | static enum move_pos |
336 | movement_possibility_1 (gimple *stmt) |
337 | { |
338 | tree lhs; |
339 | enum move_pos ret = MOVE_POSSIBLE; |
340 | |
341 | if (flag_unswitch_loops |
342 | && gimple_code (g: stmt) == GIMPLE_COND) |
343 | { |
344 | /* If we perform unswitching, force the operands of the invariant |
345 | condition to be moved out of the loop. */ |
346 | return MOVE_POSSIBLE; |
347 | } |
348 | |
349 | if (gimple_code (g: stmt) == GIMPLE_PHI |
350 | && gimple_phi_num_args (gs: stmt) <= 2 |
351 | && !virtual_operand_p (op: gimple_phi_result (gs: stmt)) |
352 | && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt))) |
353 | return MOVE_POSSIBLE; |
354 | |
355 | if (gimple_get_lhs (stmt) == NULL_TREE) |
356 | return MOVE_IMPOSSIBLE; |
357 | |
358 | if (gimple_vdef (g: stmt)) |
359 | return MOVE_IMPOSSIBLE; |
360 | |
361 | if (stmt_ends_bb_p (stmt) |
362 | || gimple_has_volatile_ops (stmt) |
363 | || gimple_has_side_effects (stmt) |
364 | || stmt_could_throw_p (cfun, stmt)) |
365 | return MOVE_IMPOSSIBLE; |
366 | |
367 | if (is_gimple_call (gs: stmt)) |
368 | { |
369 | /* While pure or const call is guaranteed to have no side effects, we |
370 | cannot move it arbitrarily. Consider code like |
371 | |
372 | char *s = something (); |
373 | |
374 | while (1) |
375 | { |
376 | if (s) |
377 | t = strlen (s); |
378 | else |
379 | t = 0; |
380 | } |
381 | |
382 | Here the strlen call cannot be moved out of the loop, even though |
383 | s is invariant. In addition to possibly creating a call with |
384 | invalid arguments, moving out a function call that is not executed |
385 | may cause performance regressions in case the call is costly and |
386 | not executed at all. */ |
387 | ret = MOVE_PRESERVE_EXECUTION; |
388 | lhs = gimple_call_lhs (gs: stmt); |
389 | } |
390 | else if (is_gimple_assign (gs: stmt)) |
391 | lhs = gimple_assign_lhs (gs: stmt); |
392 | else |
393 | return MOVE_IMPOSSIBLE; |
394 | |
395 | if (TREE_CODE (lhs) == SSA_NAME |
396 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) |
397 | return MOVE_IMPOSSIBLE; |
398 | |
399 | if (TREE_CODE (lhs) != SSA_NAME |
400 | || gimple_could_trap_p (stmt)) |
401 | return MOVE_PRESERVE_EXECUTION; |
402 | |
403 | if (is_gimple_assign (gs: stmt)) |
404 | { |
405 | auto code = gimple_assign_rhs_code (gs: stmt); |
406 | tree type = TREE_TYPE (gimple_assign_rhs1 (stmt)); |
407 | /* For shifts and rotates and possibly out-of-bound shift operands |
408 | we currently cannot rewrite them into something unconditionally |
409 | well-defined. */ |
410 | if ((code == LSHIFT_EXPR |
411 | || code == RSHIFT_EXPR |
412 | || code == LROTATE_EXPR |
413 | || code == RROTATE_EXPR) |
414 | && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST |
415 | /* We cannot use ranges at 'stmt' here. */ |
416 | || wi::ltu_p (x: wi::to_wide (t: gimple_assign_rhs2 (gs: stmt)), |
417 | y: element_precision (type)))) |
418 | ret = MOVE_PRESERVE_EXECUTION; |
419 | } |
420 | |
421 | /* Non local loads in a transaction cannot be hoisted out. Well, |
422 | unless the load happens on every path out of the loop, but we |
423 | don't take this into account yet. */ |
424 | if (flag_tm |
425 | && gimple_in_transaction (stmt) |
426 | && gimple_assign_single_p (gs: stmt)) |
427 | { |
428 | tree rhs = gimple_assign_rhs1 (gs: stmt); |
429 | if (DECL_P (rhs) && is_global_var (t: rhs)) |
430 | { |
431 | if (dump_file) |
432 | { |
433 | fprintf (stream: dump_file, format: "Cannot hoist conditional load of " ); |
434 | print_generic_expr (dump_file, rhs, TDF_SLIM); |
435 | fprintf (stream: dump_file, format: " because it is in a transaction.\n" ); |
436 | } |
437 | return MOVE_IMPOSSIBLE; |
438 | } |
439 | } |
440 | |
441 | return ret; |
442 | } |
443 | |
444 | static enum move_pos |
445 | movement_possibility (gimple *stmt) |
446 | { |
447 | enum move_pos pos = movement_possibility_1 (stmt); |
448 | if (pos == MOVE_POSSIBLE) |
449 | { |
450 | use_operand_p use_p; |
451 | ssa_op_iter ssa_iter; |
452 | FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE) |
453 | if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME |
454 | && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p))) |
455 | return MOVE_PRESERVE_EXECUTION; |
456 | } |
457 | return pos; |
458 | } |
459 | |
460 | |
461 | /* Compare the profile count inequality of bb and loop's preheader, it is |
462 | three-state as stated in profile-count.h, FALSE is returned if inequality |
463 | cannot be decided. */ |
464 | bool |
465 | (basic_block bb, class loop *loop) |
466 | { |
467 | gcc_assert (bb && loop); |
468 | return bb->count < loop_preheader_edge (loop)->src->count; |
469 | } |
470 | |
471 | /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile |
472 | count. |
473 | It does three steps check: |
474 | 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just |
475 | return NULL which means it should not be moved out at all; |
476 | 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of |
477 | OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP; |
478 | 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a |
479 | hotter loop between OUTERMOST_LOOP and loop in pre-computed |
480 | HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return |
481 | OUTERMOST_LOOP. |
482 | At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as |
483 | the hoist target. */ |
484 | |
485 | static class loop * |
486 | get_coldest_out_loop (class loop *outermost_loop, class loop *loop, |
487 | basic_block curr_bb) |
488 | { |
489 | gcc_assert (outermost_loop == loop |
490 | || flow_loop_nested_p (outermost_loop, loop)); |
491 | |
492 | /* If bb_colder_than_loop_preheader returns false due to three-state |
493 | comparision, OUTERMOST_LOOP is returned finally to preserve the behavior. |
494 | Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */ |
495 | if (curr_bb && bb_colder_than_loop_preheader (bb: curr_bb, loop)) |
496 | return NULL; |
497 | |
498 | class loop *coldest_loop = coldest_outermost_loop[loop->num]; |
499 | if (loop_depth (loop: coldest_loop) < loop_depth (loop: outermost_loop)) |
500 | { |
501 | class loop *hotter_loop = hotter_than_inner_loop[loop->num]; |
502 | if (!hotter_loop |
503 | || loop_depth (loop: hotter_loop) < loop_depth (loop: outermost_loop)) |
504 | return outermost_loop; |
505 | |
506 | /* hotter_loop is between OUTERMOST_LOOP and LOOP like: |
507 | [loop tree root, ..., coldest_loop, ..., outermost_loop, ..., |
508 | hotter_loop, second_coldest_loop, ..., loop] |
509 | return second_coldest_loop to be the hoist target. */ |
510 | class loop *aloop; |
511 | for (aloop = hotter_loop->inner; aloop; aloop = aloop->next) |
512 | if (aloop == loop || flow_loop_nested_p (aloop, loop)) |
513 | return aloop; |
514 | } |
515 | return coldest_loop; |
516 | } |
517 | |
518 | /* Suppose that operand DEF is used inside the LOOP. Returns the outermost |
519 | loop to that we could move the expression using DEF if it did not have |
520 | other operands, i.e. the outermost loop enclosing LOOP in that the value |
521 | of DEF is invariant. */ |
522 | |
523 | static class loop * |
524 | outermost_invariant_loop (tree def, class loop *loop) |
525 | { |
526 | gimple *def_stmt; |
527 | basic_block def_bb; |
528 | class loop *max_loop; |
529 | struct lim_aux_data *lim_data; |
530 | |
531 | if (!def) |
532 | return superloop_at_depth (loop, 1); |
533 | |
534 | if (TREE_CODE (def) != SSA_NAME) |
535 | { |
536 | gcc_assert (is_gimple_min_invariant (def)); |
537 | return superloop_at_depth (loop, 1); |
538 | } |
539 | |
540 | def_stmt = SSA_NAME_DEF_STMT (def); |
541 | def_bb = gimple_bb (g: def_stmt); |
542 | if (!def_bb) |
543 | return superloop_at_depth (loop, 1); |
544 | |
545 | max_loop = find_common_loop (loop, def_bb->loop_father); |
546 | |
547 | lim_data = get_lim_data (stmt: def_stmt); |
548 | if (lim_data != NULL && lim_data->max_loop != NULL) |
549 | max_loop = find_common_loop (max_loop, |
550 | loop_outer (loop: lim_data->max_loop)); |
551 | if (max_loop == loop) |
552 | return NULL; |
553 | max_loop = superloop_at_depth (loop, loop_depth (loop: max_loop) + 1); |
554 | |
555 | return max_loop; |
556 | } |
557 | |
558 | /* DATA is a structure containing information associated with a statement |
559 | inside LOOP. DEF is one of the operands of this statement. |
560 | |
561 | Find the outermost loop enclosing LOOP in that value of DEF is invariant |
562 | and record this in DATA->max_loop field. If DEF itself is defined inside |
563 | this loop as well (i.e. we need to hoist it out of the loop if we want |
564 | to hoist the statement represented by DATA), record the statement in that |
565 | DEF is defined to the DATA->depends list. Additionally if ADD_COST is true, |
566 | add the cost of the computation of DEF to the DATA->cost. |
567 | |
568 | If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */ |
569 | |
570 | static bool |
571 | add_dependency (tree def, struct lim_aux_data *data, class loop *loop, |
572 | bool add_cost) |
573 | { |
574 | gimple *def_stmt = SSA_NAME_DEF_STMT (def); |
575 | basic_block def_bb = gimple_bb (g: def_stmt); |
576 | class loop *max_loop; |
577 | struct lim_aux_data *def_data; |
578 | |
579 | if (!def_bb) |
580 | return true; |
581 | |
582 | max_loop = outermost_invariant_loop (def, loop); |
583 | if (!max_loop) |
584 | return false; |
585 | |
586 | if (flow_loop_nested_p (data->max_loop, max_loop)) |
587 | data->max_loop = max_loop; |
588 | |
589 | def_data = get_lim_data (stmt: def_stmt); |
590 | if (!def_data) |
591 | return true; |
592 | |
593 | if (add_cost |
594 | /* Only add the cost if the statement defining DEF is inside LOOP, |
595 | i.e. if it is likely that by moving the invariants dependent |
596 | on it, we will be able to avoid creating a new register for |
597 | it (since it will be only used in these dependent invariants). */ |
598 | && def_bb->loop_father == loop) |
599 | data->cost += def_data->cost; |
600 | |
601 | data->depends.safe_push (obj: def_stmt); |
602 | |
603 | return true; |
604 | } |
605 | |
606 | /* Returns an estimate for a cost of statement STMT. The values here |
607 | are just ad-hoc constants, similar to costs for inlining. */ |
608 | |
609 | static unsigned |
610 | stmt_cost (gimple *stmt) |
611 | { |
612 | /* Always try to create possibilities for unswitching. */ |
613 | if (gimple_code (g: stmt) == GIMPLE_COND |
614 | || gimple_code (g: stmt) == GIMPLE_PHI) |
615 | return LIM_EXPENSIVE; |
616 | |
617 | /* We should be hoisting calls if possible. */ |
618 | if (is_gimple_call (gs: stmt)) |
619 | { |
620 | tree fndecl; |
621 | |
622 | /* Unless the call is a builtin_constant_p; this always folds to a |
623 | constant, so moving it is useless. */ |
624 | fndecl = gimple_call_fndecl (gs: stmt); |
625 | if (fndecl && fndecl_built_in_p (node: fndecl, name1: BUILT_IN_CONSTANT_P)) |
626 | return 0; |
627 | |
628 | return LIM_EXPENSIVE; |
629 | } |
630 | |
631 | /* Hoisting memory references out should almost surely be a win. */ |
632 | if (gimple_references_memory_p (stmt)) |
633 | return LIM_EXPENSIVE; |
634 | |
635 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN) |
636 | return 1; |
637 | |
638 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
639 | switch (code) |
640 | { |
641 | case MULT_EXPR: |
642 | case WIDEN_MULT_EXPR: |
643 | case WIDEN_MULT_PLUS_EXPR: |
644 | case WIDEN_MULT_MINUS_EXPR: |
645 | case DOT_PROD_EXPR: |
646 | case TRUNC_DIV_EXPR: |
647 | case CEIL_DIV_EXPR: |
648 | case FLOOR_DIV_EXPR: |
649 | case ROUND_DIV_EXPR: |
650 | case EXACT_DIV_EXPR: |
651 | case CEIL_MOD_EXPR: |
652 | case FLOOR_MOD_EXPR: |
653 | case ROUND_MOD_EXPR: |
654 | case TRUNC_MOD_EXPR: |
655 | case RDIV_EXPR: |
656 | /* Division and multiplication are usually expensive. */ |
657 | return LIM_EXPENSIVE; |
658 | |
659 | case LSHIFT_EXPR: |
660 | case RSHIFT_EXPR: |
661 | case WIDEN_LSHIFT_EXPR: |
662 | case LROTATE_EXPR: |
663 | case RROTATE_EXPR: |
664 | /* Shifts and rotates are usually expensive. */ |
665 | return LIM_EXPENSIVE; |
666 | |
667 | case COND_EXPR: |
668 | case VEC_COND_EXPR: |
669 | /* Conditionals are expensive. */ |
670 | return LIM_EXPENSIVE; |
671 | |
672 | case CONSTRUCTOR: |
673 | /* Make vector construction cost proportional to the number |
674 | of elements. */ |
675 | return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)); |
676 | |
677 | case SSA_NAME: |
678 | case PAREN_EXPR: |
679 | /* Whether or not something is wrapped inside a PAREN_EXPR |
680 | should not change move cost. Nor should an intermediate |
681 | unpropagated SSA name copy. */ |
682 | return 0; |
683 | |
684 | default: |
685 | /* Comparisons are usually expensive. */ |
686 | if (TREE_CODE_CLASS (code) == tcc_comparison) |
687 | return LIM_EXPENSIVE; |
688 | return 1; |
689 | } |
690 | } |
691 | |
692 | /* Finds the outermost loop between OUTER and LOOP in that the memory reference |
693 | REF is independent. If REF is not independent in LOOP, NULL is returned |
694 | instead. */ |
695 | |
696 | static class loop * |
697 | outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref) |
698 | { |
699 | class loop *aloop; |
700 | |
701 | if (ref->stored && bitmap_bit_p (ref->stored, loop->num)) |
702 | return NULL; |
703 | |
704 | for (aloop = outer; |
705 | aloop != loop; |
706 | aloop = superloop_at_depth (loop, loop_depth (loop: aloop) + 1)) |
707 | if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num)) |
708 | && ref_indep_loop_p (aloop, ref, lim_raw)) |
709 | return aloop; |
710 | |
711 | if (ref_indep_loop_p (loop, ref, lim_raw)) |
712 | return loop; |
713 | else |
714 | return NULL; |
715 | } |
716 | |
717 | /* If there is a simple load or store to a memory reference in STMT, returns |
718 | the location of the memory reference, and sets IS_STORE according to whether |
719 | it is a store or load. Otherwise, returns NULL. */ |
720 | |
721 | static tree * |
722 | simple_mem_ref_in_stmt (gimple *stmt, bool *is_store) |
723 | { |
724 | tree *lhs, *rhs; |
725 | |
726 | /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */ |
727 | if (!gimple_assign_single_p (gs: stmt)) |
728 | return NULL; |
729 | |
730 | lhs = gimple_assign_lhs_ptr (gs: stmt); |
731 | rhs = gimple_assign_rhs1_ptr (gs: stmt); |
732 | |
733 | if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (g: stmt)) |
734 | { |
735 | *is_store = false; |
736 | return rhs; |
737 | } |
738 | else if (gimple_vdef (g: stmt) |
739 | && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs))) |
740 | { |
741 | *is_store = true; |
742 | return lhs; |
743 | } |
744 | else |
745 | return NULL; |
746 | } |
747 | |
748 | /* From a controlling predicate in DOM determine the arguments from |
749 | the PHI node PHI that are chosen if the predicate evaluates to |
750 | true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if |
751 | they are non-NULL. Returns true if the arguments can be determined, |
752 | else return false. */ |
753 | |
754 | static bool |
755 | (basic_block dom, gphi *phi, |
756 | tree *true_arg_p, tree *false_arg_p) |
757 | { |
758 | edge te, fe; |
759 | if (! extract_true_false_controlled_edges (dom, gimple_bb (g: phi), |
760 | &te, &fe)) |
761 | return false; |
762 | |
763 | if (true_arg_p) |
764 | *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx); |
765 | if (false_arg_p) |
766 | *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx); |
767 | |
768 | return true; |
769 | } |
770 | |
771 | /* Determine the outermost loop to that it is possible to hoist a statement |
772 | STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine |
773 | the outermost loop in that the value computed by STMT is invariant. |
774 | If MUST_PRESERVE_EXEC is true, additionally choose such a loop that |
775 | we preserve the fact whether STMT is executed. It also fills other related |
776 | information to LIM_DATA (STMT). |
777 | |
778 | The function returns false if STMT cannot be hoisted outside of the loop it |
779 | is defined in, and true otherwise. */ |
780 | |
781 | static bool |
782 | determine_max_movement (gimple *stmt, bool must_preserve_exec) |
783 | { |
784 | basic_block bb = gimple_bb (g: stmt); |
785 | class loop *loop = bb->loop_father; |
786 | class loop *level; |
787 | struct lim_aux_data *lim_data = get_lim_data (stmt); |
788 | tree val; |
789 | ssa_op_iter iter; |
790 | |
791 | if (must_preserve_exec) |
792 | level = ALWAYS_EXECUTED_IN (bb); |
793 | else |
794 | level = superloop_at_depth (loop, 1); |
795 | lim_data->max_loop = get_coldest_out_loop (outermost_loop: level, loop, curr_bb: bb); |
796 | if (!lim_data->max_loop) |
797 | return false; |
798 | |
799 | if (gphi *phi = dyn_cast <gphi *> (p: stmt)) |
800 | { |
801 | use_operand_p use_p; |
802 | unsigned min_cost = UINT_MAX; |
803 | unsigned total_cost = 0; |
804 | struct lim_aux_data *def_data; |
805 | |
806 | /* We will end up promoting dependencies to be unconditionally |
807 | evaluated. For this reason the PHI cost (and thus the |
808 | cost we remove from the loop by doing the invariant motion) |
809 | is that of the cheapest PHI argument dependency chain. */ |
810 | FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) |
811 | { |
812 | val = USE_FROM_PTR (use_p); |
813 | |
814 | if (TREE_CODE (val) != SSA_NAME) |
815 | { |
816 | /* Assign const 1 to constants. */ |
817 | min_cost = MIN (min_cost, 1); |
818 | total_cost += 1; |
819 | continue; |
820 | } |
821 | if (!add_dependency (def: val, data: lim_data, loop, add_cost: false)) |
822 | return false; |
823 | |
824 | gimple *def_stmt = SSA_NAME_DEF_STMT (val); |
825 | if (gimple_bb (g: def_stmt) |
826 | && gimple_bb (g: def_stmt)->loop_father == loop) |
827 | { |
828 | def_data = get_lim_data (stmt: def_stmt); |
829 | if (def_data) |
830 | { |
831 | min_cost = MIN (min_cost, def_data->cost); |
832 | total_cost += def_data->cost; |
833 | } |
834 | } |
835 | } |
836 | |
837 | min_cost = MIN (min_cost, total_cost); |
838 | lim_data->cost += min_cost; |
839 | |
840 | if (gimple_phi_num_args (gs: phi) > 1) |
841 | { |
842 | basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); |
843 | gimple *cond; |
844 | if (gsi_end_p (i: gsi_last_bb (bb: dom))) |
845 | return false; |
846 | cond = gsi_stmt (i: gsi_last_bb (bb: dom)); |
847 | if (gimple_code (g: cond) != GIMPLE_COND) |
848 | return false; |
849 | /* Verify that this is an extended form of a diamond and |
850 | the PHI arguments are completely controlled by the |
851 | predicate in DOM. */ |
852 | if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL)) |
853 | return false; |
854 | |
855 | /* Fold in dependencies and cost of the condition. */ |
856 | FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE) |
857 | { |
858 | if (!add_dependency (def: val, data: lim_data, loop, add_cost: false)) |
859 | return false; |
860 | def_data = get_lim_data (SSA_NAME_DEF_STMT (val)); |
861 | if (def_data) |
862 | lim_data->cost += def_data->cost; |
863 | } |
864 | |
865 | /* We want to avoid unconditionally executing very expensive |
866 | operations. As costs for our dependencies cannot be |
867 | negative just claim we are not invariand for this case. |
868 | We also are not sure whether the control-flow inside the |
869 | loop will vanish. */ |
870 | if (total_cost - min_cost >= 2 * LIM_EXPENSIVE |
871 | && !(min_cost != 0 |
872 | && total_cost / min_cost <= 2)) |
873 | return false; |
874 | |
875 | /* Assume that the control-flow in the loop will vanish. |
876 | ??? We should verify this and not artificially increase |
877 | the cost if that is not the case. */ |
878 | lim_data->cost += stmt_cost (stmt); |
879 | } |
880 | |
881 | return true; |
882 | } |
883 | |
884 | /* A stmt that receives abnormal edges cannot be hoisted. */ |
885 | if (is_a <gcall *> (p: stmt) |
886 | && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE)) |
887 | return false; |
888 | |
889 | FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE) |
890 | if (!add_dependency (def: val, data: lim_data, loop, add_cost: true)) |
891 | return false; |
892 | |
893 | if (gimple_vuse (g: stmt)) |
894 | { |
895 | im_mem_ref *ref |
896 | = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL; |
897 | if (ref |
898 | && MEM_ANALYZABLE (ref)) |
899 | { |
900 | lim_data->max_loop = outermost_indep_loop (outer: lim_data->max_loop, |
901 | loop, ref); |
902 | if (!lim_data->max_loop) |
903 | return false; |
904 | } |
905 | else if (! add_dependency (def: gimple_vuse (g: stmt), data: lim_data, loop, add_cost: false)) |
906 | return false; |
907 | } |
908 | |
909 | lim_data->cost += stmt_cost (stmt); |
910 | |
911 | return true; |
912 | } |
913 | |
914 | /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL, |
915 | and that one of the operands of this statement is computed by STMT. |
916 | Ensure that STMT (together with all the statements that define its |
917 | operands) is hoisted at least out of the loop LEVEL. */ |
918 | |
919 | static void |
920 | set_level (gimple *stmt, class loop *orig_loop, class loop *level) |
921 | { |
922 | class loop *stmt_loop = gimple_bb (g: stmt)->loop_father; |
923 | struct lim_aux_data *lim_data; |
924 | gimple *dep_stmt; |
925 | unsigned i; |
926 | |
927 | stmt_loop = find_common_loop (orig_loop, stmt_loop); |
928 | lim_data = get_lim_data (stmt); |
929 | if (lim_data != NULL && lim_data->tgt_loop != NULL) |
930 | stmt_loop = find_common_loop (stmt_loop, |
931 | loop_outer (loop: lim_data->tgt_loop)); |
932 | if (flow_loop_nested_p (stmt_loop, level)) |
933 | return; |
934 | |
935 | gcc_assert (level == lim_data->max_loop |
936 | || flow_loop_nested_p (lim_data->max_loop, level)); |
937 | |
938 | lim_data->tgt_loop = level; |
939 | FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt) |
940 | set_level (stmt: dep_stmt, orig_loop, level); |
941 | } |
942 | |
943 | /* Determines an outermost loop from that we want to hoist the statement STMT. |
944 | For now we chose the outermost possible loop. TODO -- use profiling |
945 | information to set it more sanely. */ |
946 | |
947 | static void |
948 | set_profitable_level (gimple *stmt) |
949 | { |
950 | set_level (stmt, orig_loop: gimple_bb (g: stmt)->loop_father, level: get_lim_data (stmt)->max_loop); |
951 | } |
952 | |
953 | /* Returns true if STMT is a call that has side effects. */ |
954 | |
955 | static bool |
956 | nonpure_call_p (gimple *stmt) |
957 | { |
958 | if (gimple_code (g: stmt) != GIMPLE_CALL) |
959 | return false; |
960 | |
961 | return gimple_has_side_effects (stmt); |
962 | } |
963 | |
964 | /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */ |
965 | |
966 | static gimple * |
967 | rewrite_reciprocal (gimple_stmt_iterator *bsi) |
968 | { |
969 | gassign *stmt, *stmt1, *stmt2; |
970 | tree name, lhs, type; |
971 | tree real_one; |
972 | gimple_stmt_iterator gsi; |
973 | |
974 | stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi)); |
975 | lhs = gimple_assign_lhs (gs: stmt); |
976 | type = TREE_TYPE (lhs); |
977 | |
978 | real_one = build_one_cst (type); |
979 | |
980 | name = make_temp_ssa_name (type, NULL, name: "reciptmp" ); |
981 | stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one, |
982 | gimple_assign_rhs2 (gs: stmt)); |
983 | stmt2 = gimple_build_assign (lhs, MULT_EXPR, name, |
984 | gimple_assign_rhs1 (gs: stmt)); |
985 | |
986 | /* Replace division stmt with reciprocal and multiply stmts. |
987 | The multiply stmt is not invariant, so update iterator |
988 | and avoid rescanning. */ |
989 | gsi = *bsi; |
990 | gsi_insert_before (bsi, stmt1, GSI_NEW_STMT); |
991 | gsi_replace (&gsi, stmt2, true); |
992 | |
993 | /* Continue processing with invariant reciprocal statement. */ |
994 | return stmt1; |
995 | } |
996 | |
997 | /* Check if the pattern at *BSI is a bittest of the form |
998 | (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */ |
999 | |
1000 | static gimple * |
1001 | rewrite_bittest (gimple_stmt_iterator *bsi) |
1002 | { |
1003 | gassign *stmt; |
1004 | gimple *stmt1; |
1005 | gassign *stmt2; |
1006 | gimple *use_stmt; |
1007 | gcond *cond_stmt; |
1008 | tree lhs, name, t, a, b; |
1009 | use_operand_p use; |
1010 | |
1011 | stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi)); |
1012 | lhs = gimple_assign_lhs (gs: stmt); |
1013 | |
1014 | /* Verify that the single use of lhs is a comparison against zero. */ |
1015 | if (TREE_CODE (lhs) != SSA_NAME |
1016 | || !single_imm_use (var: lhs, use_p: &use, stmt: &use_stmt)) |
1017 | return stmt; |
1018 | cond_stmt = dyn_cast <gcond *> (p: use_stmt); |
1019 | if (!cond_stmt) |
1020 | return stmt; |
1021 | if (gimple_cond_lhs (gs: cond_stmt) != lhs |
1022 | || (gimple_cond_code (gs: cond_stmt) != NE_EXPR |
1023 | && gimple_cond_code (gs: cond_stmt) != EQ_EXPR) |
1024 | || !integer_zerop (gimple_cond_rhs (gs: cond_stmt))) |
1025 | return stmt; |
1026 | |
1027 | /* Get at the operands of the shift. The rhs is TMP1 & 1. */ |
1028 | stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
1029 | if (gimple_code (g: stmt1) != GIMPLE_ASSIGN) |
1030 | return stmt; |
1031 | |
1032 | /* There is a conversion in between possibly inserted by fold. */ |
1033 | if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1))) |
1034 | { |
1035 | t = gimple_assign_rhs1 (gs: stmt1); |
1036 | if (TREE_CODE (t) != SSA_NAME |
1037 | || !has_single_use (var: t)) |
1038 | return stmt; |
1039 | stmt1 = SSA_NAME_DEF_STMT (t); |
1040 | if (gimple_code (g: stmt1) != GIMPLE_ASSIGN) |
1041 | return stmt; |
1042 | } |
1043 | |
1044 | /* Verify that B is loop invariant but A is not. Verify that with |
1045 | all the stmt walking we are still in the same loop. */ |
1046 | if (gimple_assign_rhs_code (gs: stmt1) != RSHIFT_EXPR |
1047 | || loop_containing_stmt (stmt: stmt1) != loop_containing_stmt (stmt)) |
1048 | return stmt; |
1049 | |
1050 | a = gimple_assign_rhs1 (gs: stmt1); |
1051 | b = gimple_assign_rhs2 (gs: stmt1); |
1052 | |
1053 | if (outermost_invariant_loop (def: b, loop: loop_containing_stmt (stmt: stmt1)) != NULL |
1054 | && outermost_invariant_loop (def: a, loop: loop_containing_stmt (stmt: stmt1)) == NULL) |
1055 | { |
1056 | gimple_stmt_iterator rsi; |
1057 | |
1058 | /* 1 << B */ |
1059 | t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a), |
1060 | build_int_cst (TREE_TYPE (a), 1), b); |
1061 | name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp" ); |
1062 | stmt1 = gimple_build_assign (name, t); |
1063 | |
1064 | /* A & (1 << B) */ |
1065 | t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name); |
1066 | name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp" ); |
1067 | stmt2 = gimple_build_assign (name, t); |
1068 | |
1069 | /* Replace the SSA_NAME we compare against zero. Adjust |
1070 | the type of zero accordingly. */ |
1071 | SET_USE (use, name); |
1072 | gimple_cond_set_rhs (gs: cond_stmt, |
1073 | rhs: build_int_cst_type (TREE_TYPE (name), |
1074 | 0)); |
1075 | |
1076 | /* Don't use gsi_replace here, none of the new assignments sets |
1077 | the variable originally set in stmt. Move bsi to stmt1, and |
1078 | then remove the original stmt, so that we get a chance to |
1079 | retain debug info for it. */ |
1080 | rsi = *bsi; |
1081 | gsi_insert_before (bsi, stmt1, GSI_NEW_STMT); |
1082 | gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT); |
1083 | gimple *to_release = gsi_stmt (i: rsi); |
1084 | gsi_remove (&rsi, true); |
1085 | release_defs (to_release); |
1086 | |
1087 | return stmt1; |
1088 | } |
1089 | |
1090 | return stmt; |
1091 | } |
1092 | |
1093 | /* Determine the outermost loops in that statements in basic block BB are |
1094 | invariant, and record them to the LIM_DATA associated with the |
1095 | statements. */ |
1096 | |
1097 | static void |
1098 | compute_invariantness (basic_block bb) |
1099 | { |
1100 | enum move_pos pos; |
1101 | gimple_stmt_iterator bsi; |
1102 | gimple *stmt; |
1103 | bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL; |
1104 | class loop *outermost = ALWAYS_EXECUTED_IN (bb); |
1105 | struct lim_aux_data *lim_data; |
1106 | |
1107 | if (!loop_outer (loop: bb->loop_father)) |
1108 | return; |
1109 | |
1110 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1111 | fprintf (stream: dump_file, format: "Basic block %d (loop %d -- depth %d):\n\n" , |
1112 | bb->index, bb->loop_father->num, loop_depth (loop: bb->loop_father)); |
1113 | |
1114 | /* Look at PHI nodes, but only if there is at most two. |
1115 | ??? We could relax this further by post-processing the inserted |
1116 | code and transforming adjacent cond-exprs with the same predicate |
1117 | to control flow again. */ |
1118 | bsi = gsi_start_phis (bb); |
1119 | if (!gsi_end_p (i: bsi) |
1120 | && ((gsi_next (i: &bsi), gsi_end_p (i: bsi)) |
1121 | || (gsi_next (i: &bsi), gsi_end_p (i: bsi)))) |
1122 | for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1123 | { |
1124 | stmt = gsi_stmt (i: bsi); |
1125 | |
1126 | pos = movement_possibility (stmt); |
1127 | if (pos == MOVE_IMPOSSIBLE) |
1128 | continue; |
1129 | |
1130 | lim_data = get_lim_data (stmt); |
1131 | if (! lim_data) |
1132 | lim_data = init_lim_data (stmt); |
1133 | lim_data->always_executed_in = outermost; |
1134 | |
1135 | if (!determine_max_movement (stmt, must_preserve_exec: false)) |
1136 | { |
1137 | lim_data->max_loop = NULL; |
1138 | continue; |
1139 | } |
1140 | |
1141 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1142 | { |
1143 | print_gimple_stmt (dump_file, stmt, 2); |
1144 | fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n" , |
1145 | loop_depth (loop: lim_data->max_loop), |
1146 | lim_data->cost); |
1147 | } |
1148 | |
1149 | if (lim_data->cost >= LIM_EXPENSIVE) |
1150 | set_profitable_level (stmt); |
1151 | } |
1152 | |
1153 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1154 | { |
1155 | stmt = gsi_stmt (i: bsi); |
1156 | |
1157 | pos = movement_possibility (stmt); |
1158 | if (pos == MOVE_IMPOSSIBLE) |
1159 | { |
1160 | if (nonpure_call_p (stmt)) |
1161 | { |
1162 | maybe_never = true; |
1163 | outermost = NULL; |
1164 | } |
1165 | /* Make sure to note always_executed_in for stores to make |
1166 | store-motion work. */ |
1167 | else if (stmt_makes_single_store (stmt)) |
1168 | { |
1169 | struct lim_aux_data *lim_data = get_lim_data (stmt); |
1170 | if (! lim_data) |
1171 | lim_data = init_lim_data (stmt); |
1172 | lim_data->always_executed_in = outermost; |
1173 | } |
1174 | continue; |
1175 | } |
1176 | |
1177 | if (is_gimple_assign (gs: stmt) |
1178 | && (get_gimple_rhs_class (code: gimple_assign_rhs_code (gs: stmt)) |
1179 | == GIMPLE_BINARY_RHS)) |
1180 | { |
1181 | tree op0 = gimple_assign_rhs1 (gs: stmt); |
1182 | tree op1 = gimple_assign_rhs2 (gs: stmt); |
1183 | class loop *ol1 = outermost_invariant_loop (def: op1, |
1184 | loop: loop_containing_stmt (stmt)); |
1185 | |
1186 | /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal |
1187 | to be hoisted out of loop, saving expensive divide. */ |
1188 | if (pos == MOVE_POSSIBLE |
1189 | && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR |
1190 | && flag_unsafe_math_optimizations |
1191 | && !flag_trapping_math |
1192 | && ol1 != NULL |
1193 | && outermost_invariant_loop (def: op0, loop: ol1) == NULL) |
1194 | stmt = rewrite_reciprocal (bsi: &bsi); |
1195 | |
1196 | /* If the shift count is invariant, convert (A >> B) & 1 to |
1197 | A & (1 << B) allowing the bit mask to be hoisted out of the loop |
1198 | saving an expensive shift. */ |
1199 | if (pos == MOVE_POSSIBLE |
1200 | && gimple_assign_rhs_code (gs: stmt) == BIT_AND_EXPR |
1201 | && integer_onep (op1) |
1202 | && TREE_CODE (op0) == SSA_NAME |
1203 | && has_single_use (var: op0)) |
1204 | stmt = rewrite_bittest (bsi: &bsi); |
1205 | } |
1206 | |
1207 | lim_data = get_lim_data (stmt); |
1208 | if (! lim_data) |
1209 | lim_data = init_lim_data (stmt); |
1210 | lim_data->always_executed_in = outermost; |
1211 | |
1212 | if (maybe_never && pos == MOVE_PRESERVE_EXECUTION) |
1213 | continue; |
1214 | |
1215 | if (!determine_max_movement (stmt, must_preserve_exec: pos == MOVE_PRESERVE_EXECUTION)) |
1216 | { |
1217 | lim_data->max_loop = NULL; |
1218 | continue; |
1219 | } |
1220 | |
1221 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1222 | { |
1223 | print_gimple_stmt (dump_file, stmt, 2); |
1224 | fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n" , |
1225 | loop_depth (loop: lim_data->max_loop), |
1226 | lim_data->cost); |
1227 | } |
1228 | |
1229 | if (lim_data->cost >= LIM_EXPENSIVE) |
1230 | set_profitable_level (stmt); |
1231 | } |
1232 | } |
1233 | |
1234 | /* Hoist the statements in basic block BB out of the loops prescribed by |
1235 | data stored in LIM_DATA structures associated with each statement. Callback |
1236 | for walk_dominator_tree. */ |
1237 | |
1238 | unsigned int |
1239 | move_computations_worker (basic_block bb) |
1240 | { |
1241 | class loop *level; |
1242 | unsigned cost = 0; |
1243 | struct lim_aux_data *lim_data; |
1244 | unsigned int todo = 0; |
1245 | |
1246 | if (!loop_outer (loop: bb->loop_father)) |
1247 | return todo; |
1248 | |
1249 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); ) |
1250 | { |
1251 | gassign *new_stmt; |
1252 | gphi *stmt = bsi.phi (); |
1253 | |
1254 | lim_data = get_lim_data (stmt); |
1255 | if (lim_data == NULL) |
1256 | { |
1257 | gsi_next (i: &bsi); |
1258 | continue; |
1259 | } |
1260 | |
1261 | cost = lim_data->cost; |
1262 | level = lim_data->tgt_loop; |
1263 | clear_lim_data (stmt); |
1264 | |
1265 | if (!level) |
1266 | { |
1267 | gsi_next (i: &bsi); |
1268 | continue; |
1269 | } |
1270 | |
1271 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1272 | { |
1273 | fprintf (stream: dump_file, format: "Moving PHI node\n" ); |
1274 | print_gimple_stmt (dump_file, stmt, 0); |
1275 | fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n" , |
1276 | cost, level->num); |
1277 | } |
1278 | |
1279 | if (gimple_phi_num_args (gs: stmt) == 1) |
1280 | { |
1281 | tree arg = PHI_ARG_DEF (stmt, 0); |
1282 | new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt), |
1283 | TREE_CODE (arg), arg); |
1284 | } |
1285 | else |
1286 | { |
1287 | basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); |
1288 | gimple *cond = gsi_stmt (i: gsi_last_bb (bb: dom)); |
1289 | tree arg0 = NULL_TREE, arg1 = NULL_TREE, t; |
1290 | /* Get the PHI arguments corresponding to the true and false |
1291 | edges of COND. */ |
1292 | extract_true_false_args_from_phi (dom, phi: stmt, true_arg_p: &arg0, false_arg_p: &arg1); |
1293 | gcc_assert (arg0 && arg1); |
1294 | t = make_ssa_name (boolean_type_node); |
1295 | new_stmt = gimple_build_assign (t, gimple_cond_code (gs: cond), |
1296 | gimple_cond_lhs (gs: cond), |
1297 | gimple_cond_rhs (gs: cond)); |
1298 | gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); |
1299 | new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt), |
1300 | COND_EXPR, t, arg0, arg1); |
1301 | todo |= TODO_cleanup_cfg; |
1302 | } |
1303 | if (!ALWAYS_EXECUTED_IN (bb) |
1304 | || (ALWAYS_EXECUTED_IN (bb) != level |
1305 | && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))) |
1306 | reset_flow_sensitive_info (gimple_assign_lhs (gs: new_stmt)); |
1307 | gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); |
1308 | remove_phi_node (&bsi, false); |
1309 | } |
1310 | |
1311 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); ) |
1312 | { |
1313 | edge e; |
1314 | |
1315 | gimple *stmt = gsi_stmt (i: bsi); |
1316 | |
1317 | lim_data = get_lim_data (stmt); |
1318 | if (lim_data == NULL) |
1319 | { |
1320 | gsi_next (i: &bsi); |
1321 | continue; |
1322 | } |
1323 | |
1324 | cost = lim_data->cost; |
1325 | level = lim_data->tgt_loop; |
1326 | clear_lim_data (stmt); |
1327 | |
1328 | if (!level) |
1329 | { |
1330 | gsi_next (i: &bsi); |
1331 | continue; |
1332 | } |
1333 | |
1334 | /* We do not really want to move conditionals out of the loop; we just |
1335 | placed it here to force its operands to be moved if necessary. */ |
1336 | if (gimple_code (g: stmt) == GIMPLE_COND) |
1337 | { |
1338 | gsi_next (i: &bsi); |
1339 | continue; |
1340 | } |
1341 | |
1342 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1343 | { |
1344 | fprintf (stream: dump_file, format: "Moving statement\n" ); |
1345 | print_gimple_stmt (dump_file, stmt, 0); |
1346 | fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n" , |
1347 | cost, level->num); |
1348 | } |
1349 | |
1350 | e = loop_preheader_edge (level); |
1351 | gcc_assert (!gimple_vdef (stmt)); |
1352 | if (gimple_vuse (g: stmt)) |
1353 | { |
1354 | /* The new VUSE is the one from the virtual PHI in the loop |
1355 | header or the one already present. */ |
1356 | gphi_iterator gsi2; |
1357 | for (gsi2 = gsi_start_phis (e->dest); |
1358 | !gsi_end_p (i: gsi2); gsi_next (i: &gsi2)) |
1359 | { |
1360 | gphi *phi = gsi2.phi (); |
1361 | if (virtual_operand_p (op: gimple_phi_result (gs: phi))) |
1362 | { |
1363 | SET_USE (gimple_vuse_op (stmt), |
1364 | PHI_ARG_DEF_FROM_EDGE (phi, e)); |
1365 | break; |
1366 | } |
1367 | } |
1368 | } |
1369 | gsi_remove (&bsi, false); |
1370 | if (gimple_has_lhs (stmt) |
1371 | && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME |
1372 | && (!ALWAYS_EXECUTED_IN (bb) |
1373 | || !(ALWAYS_EXECUTED_IN (bb) == level |
1374 | || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) |
1375 | reset_flow_sensitive_info (gimple_get_lhs (stmt)); |
1376 | /* In case this is a stmt that is not unconditionally executed |
1377 | when the target loop header is executed and the stmt may |
1378 | invoke undefined integer or pointer overflow rewrite it to |
1379 | unsigned arithmetic. */ |
1380 | if (is_gimple_assign (gs: stmt) |
1381 | && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))) |
1382 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt))) |
1383 | && arith_code_with_undefined_signed_overflow |
1384 | (gimple_assign_rhs_code (gs: stmt)) |
1385 | && (!ALWAYS_EXECUTED_IN (bb) |
1386 | || !(ALWAYS_EXECUTED_IN (bb) == level |
1387 | || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) |
1388 | gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt)); |
1389 | else |
1390 | gsi_insert_on_edge (e, stmt); |
1391 | } |
1392 | |
1393 | return todo; |
1394 | } |
1395 | |
1396 | /* Checks whether the statement defining variable *INDEX can be hoisted |
1397 | out of the loop passed in DATA. Callback for for_each_index. */ |
1398 | |
1399 | static bool |
1400 | may_move_till (tree ref, tree *index, void *data) |
1401 | { |
1402 | class loop *loop = (class loop *) data, *max_loop; |
1403 | |
1404 | /* If REF is an array reference, check also that the step and the lower |
1405 | bound is invariant in LOOP. */ |
1406 | if (TREE_CODE (ref) == ARRAY_REF) |
1407 | { |
1408 | tree step = TREE_OPERAND (ref, 3); |
1409 | tree lbound = TREE_OPERAND (ref, 2); |
1410 | |
1411 | max_loop = outermost_invariant_loop (def: step, loop); |
1412 | if (!max_loop) |
1413 | return false; |
1414 | |
1415 | max_loop = outermost_invariant_loop (def: lbound, loop); |
1416 | if (!max_loop) |
1417 | return false; |
1418 | } |
1419 | |
1420 | max_loop = outermost_invariant_loop (def: *index, loop); |
1421 | if (!max_loop) |
1422 | return false; |
1423 | |
1424 | return true; |
1425 | } |
1426 | |
1427 | /* If OP is SSA NAME, force the statement that defines it to be |
1428 | moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */ |
1429 | |
1430 | static void |
1431 | force_move_till_op (tree op, class loop *orig_loop, class loop *loop) |
1432 | { |
1433 | gimple *stmt; |
1434 | |
1435 | if (!op |
1436 | || is_gimple_min_invariant (op)) |
1437 | return; |
1438 | |
1439 | gcc_assert (TREE_CODE (op) == SSA_NAME); |
1440 | |
1441 | stmt = SSA_NAME_DEF_STMT (op); |
1442 | if (gimple_nop_p (g: stmt)) |
1443 | return; |
1444 | |
1445 | set_level (stmt, orig_loop, level: loop); |
1446 | } |
1447 | |
1448 | /* Forces statement defining invariants in REF (and *INDEX) to be moved out of |
1449 | the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for |
1450 | for_each_index. */ |
1451 | |
1452 | struct fmt_data |
1453 | { |
1454 | class loop *loop; |
1455 | class loop *orig_loop; |
1456 | }; |
1457 | |
1458 | static bool |
1459 | force_move_till (tree ref, tree *index, void *data) |
1460 | { |
1461 | struct fmt_data *fmt_data = (struct fmt_data *) data; |
1462 | |
1463 | if (TREE_CODE (ref) == ARRAY_REF) |
1464 | { |
1465 | tree step = TREE_OPERAND (ref, 3); |
1466 | tree lbound = TREE_OPERAND (ref, 2); |
1467 | |
1468 | force_move_till_op (op: step, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop); |
1469 | force_move_till_op (op: lbound, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop); |
1470 | } |
1471 | |
1472 | force_move_till_op (op: *index, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop); |
1473 | |
1474 | return true; |
1475 | } |
1476 | |
1477 | /* A function to free the mem_ref object OBJ. */ |
1478 | |
1479 | static void |
1480 | memref_free (class im_mem_ref *mem) |
1481 | { |
1482 | mem->accesses_in_loop.release (); |
1483 | } |
1484 | |
1485 | /* Allocates and returns a memory reference description for MEM whose hash |
1486 | value is HASH and id is ID. */ |
1487 | |
1488 | static im_mem_ref * |
1489 | mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id) |
1490 | { |
1491 | im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref); |
1492 | if (mem) |
1493 | ref->mem = *mem; |
1494 | else |
1495 | ao_ref_init (&ref->mem, error_mark_node); |
1496 | ref->id = id; |
1497 | ref->ref_canonical = false; |
1498 | ref->ref_decomposed = false; |
1499 | ref->hash = hash; |
1500 | ref->stored = NULL; |
1501 | ref->loaded = NULL; |
1502 | bitmap_initialize (head: &ref->dep_loop, obstack: &lim_bitmap_obstack); |
1503 | ref->accesses_in_loop.create (nelems: 1); |
1504 | |
1505 | return ref; |
1506 | } |
1507 | |
1508 | /* Records memory reference location *LOC in LOOP to the memory reference |
1509 | description REF. The reference occurs in statement STMT. */ |
1510 | |
1511 | static void |
1512 | record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc) |
1513 | { |
1514 | mem_ref_loc aref; |
1515 | aref.stmt = stmt; |
1516 | aref.ref = loc; |
1517 | ref->accesses_in_loop.safe_push (obj: aref); |
1518 | } |
1519 | |
1520 | /* Set the LOOP bit in REF stored bitmap and allocate that if |
1521 | necessary. Return whether a bit was changed. */ |
1522 | |
1523 | static bool |
1524 | set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop) |
1525 | { |
1526 | if (!ref->stored) |
1527 | ref->stored = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
1528 | return bitmap_set_bit (ref->stored, loop->num); |
1529 | } |
1530 | |
1531 | /* Marks reference REF as stored in LOOP. */ |
1532 | |
1533 | static void |
1534 | mark_ref_stored (im_mem_ref *ref, class loop *loop) |
1535 | { |
1536 | while (loop != current_loops->tree_root |
1537 | && set_ref_stored_in_loop (ref, loop)) |
1538 | loop = loop_outer (loop); |
1539 | } |
1540 | |
1541 | /* Set the LOOP bit in REF loaded bitmap and allocate that if |
1542 | necessary. Return whether a bit was changed. */ |
1543 | |
1544 | static bool |
1545 | set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop) |
1546 | { |
1547 | if (!ref->loaded) |
1548 | ref->loaded = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
1549 | return bitmap_set_bit (ref->loaded, loop->num); |
1550 | } |
1551 | |
1552 | /* Marks reference REF as loaded in LOOP. */ |
1553 | |
1554 | static void |
1555 | mark_ref_loaded (im_mem_ref *ref, class loop *loop) |
1556 | { |
1557 | while (loop != current_loops->tree_root |
1558 | && set_ref_loaded_in_loop (ref, loop)) |
1559 | loop = loop_outer (loop); |
1560 | } |
1561 | |
1562 | /* Gathers memory references in statement STMT in LOOP, storing the |
1563 | information about them in the memory_accesses structure. Marks |
1564 | the vops accessed through unrecognized statements there as |
1565 | well. */ |
1566 | |
1567 | static void |
1568 | gather_mem_refs_stmt (class loop *loop, gimple *stmt) |
1569 | { |
1570 | tree *mem = NULL; |
1571 | hashval_t hash; |
1572 | im_mem_ref **slot; |
1573 | im_mem_ref *ref; |
1574 | bool is_stored; |
1575 | unsigned id; |
1576 | |
1577 | if (!gimple_vuse (g: stmt)) |
1578 | return; |
1579 | |
1580 | mem = simple_mem_ref_in_stmt (stmt, is_store: &is_stored); |
1581 | if (!mem && is_gimple_assign (gs: stmt)) |
1582 | { |
1583 | /* For aggregate copies record distinct references but use them |
1584 | only for disambiguation purposes. */ |
1585 | id = memory_accesses.refs_list.length (); |
1586 | ref = mem_ref_alloc (NULL, hash: 0, id); |
1587 | memory_accesses.refs_list.safe_push (obj: ref); |
1588 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1589 | { |
1590 | fprintf (stream: dump_file, format: "Unhandled memory reference %u: " , id); |
1591 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
1592 | } |
1593 | record_mem_ref_loc (ref, stmt, loc: mem); |
1594 | is_stored = gimple_vdef (g: stmt); |
1595 | } |
1596 | else if (!mem) |
1597 | { |
1598 | /* We use the shared mem_ref for all unanalyzable refs. */ |
1599 | id = UNANALYZABLE_MEM_ID; |
1600 | ref = memory_accesses.refs_list[id]; |
1601 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1602 | { |
1603 | fprintf (stream: dump_file, format: "Unanalyzed memory reference %u: " , id); |
1604 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
1605 | } |
1606 | is_stored = gimple_vdef (g: stmt); |
1607 | } |
1608 | else |
1609 | { |
1610 | /* We are looking for equal refs that might differ in structure |
1611 | such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but |
1612 | make sure we can canonicalize the ref in the hashtable if |
1613 | non-operand_equal_p refs are found. For the lookup we mark |
1614 | the case we want strict equality with aor.max_size == -1. */ |
1615 | ao_ref aor; |
1616 | ao_ref_init (&aor, *mem); |
1617 | ao_ref_base (&aor); |
1618 | ao_ref_alias_set (&aor); |
1619 | HOST_WIDE_INT offset, size, max_size; |
1620 | poly_int64 saved_maxsize = aor.max_size, mem_off; |
1621 | tree mem_base; |
1622 | bool ref_decomposed; |
1623 | if (aor.max_size_known_p () |
1624 | && aor.offset.is_constant (const_value: &offset) |
1625 | && aor.size.is_constant (const_value: &size) |
1626 | && aor.max_size.is_constant (const_value: &max_size) |
1627 | && size == max_size |
1628 | && (size % BITS_PER_UNIT) == 0 |
1629 | /* We're canonicalizing to a MEM where TYPE_SIZE specifies the |
1630 | size. Make sure this is consistent with the extraction. */ |
1631 | && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem))) |
1632 | && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))), |
1633 | aor.size) |
1634 | && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off))) |
1635 | { |
1636 | ref_decomposed = true; |
1637 | tree base = ao_ref_base (&aor); |
1638 | poly_int64 moffset; |
1639 | HOST_WIDE_INT mcoffset; |
1640 | if (TREE_CODE (base) == MEM_REF |
1641 | && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (r: &moffset) |
1642 | && moffset.is_constant (const_value: &mcoffset)) |
1643 | { |
1644 | hash = iterative_hash_expr (TREE_OPERAND (base, 0), seed: 0); |
1645 | hash = iterative_hash_host_wide_int (val: mcoffset, val2: hash); |
1646 | } |
1647 | else |
1648 | { |
1649 | hash = iterative_hash_expr (tree: base, seed: 0); |
1650 | hash = iterative_hash_host_wide_int (val: offset, val2: hash); |
1651 | } |
1652 | hash = iterative_hash_host_wide_int (val: size, val2: hash); |
1653 | } |
1654 | else |
1655 | { |
1656 | ref_decomposed = false; |
1657 | hash = iterative_hash_expr (tree: aor.ref, seed: 0); |
1658 | aor.max_size = -1; |
1659 | } |
1660 | slot = memory_accesses.refs->find_slot_with_hash (comparable: &aor, hash, insert: INSERT); |
1661 | aor.max_size = saved_maxsize; |
1662 | if (*slot) |
1663 | { |
1664 | if (!(*slot)->ref_canonical |
1665 | && !operand_equal_p (*mem, (*slot)->mem.ref, flags: 0)) |
1666 | { |
1667 | /* If we didn't yet canonicalize the hashtable ref (which |
1668 | we'll end up using for code insertion) and hit a second |
1669 | equal ref that is not structurally equivalent create |
1670 | a canonical ref which is a bare MEM_REF. */ |
1671 | if (TREE_CODE (*mem) == MEM_REF |
1672 | || TREE_CODE (*mem) == TARGET_MEM_REF) |
1673 | { |
1674 | (*slot)->mem.ref = *mem; |
1675 | (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor); |
1676 | } |
1677 | else |
1678 | { |
1679 | tree ref_alias_type = reference_alias_ptr_type (*mem); |
1680 | unsigned int ref_align = get_object_alignment (*mem); |
1681 | tree ref_type = TREE_TYPE (*mem); |
1682 | tree tmp = build1 (ADDR_EXPR, ptr_type_node, |
1683 | unshare_expr (mem_base)); |
1684 | if (TYPE_ALIGN (ref_type) != ref_align) |
1685 | ref_type = build_aligned_type (ref_type, ref_align); |
1686 | tree new_ref |
1687 | = fold_build2 (MEM_REF, ref_type, tmp, |
1688 | build_int_cst (ref_alias_type, mem_off)); |
1689 | if ((*slot)->mem.volatile_p) |
1690 | TREE_THIS_VOLATILE (new_ref) = 1; |
1691 | (*slot)->mem.ref = new_ref; |
1692 | /* Make sure the recorded base and offset are consistent |
1693 | with the newly built ref. */ |
1694 | if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR) |
1695 | ; |
1696 | else |
1697 | { |
1698 | (*slot)->mem.base = new_ref; |
1699 | (*slot)->mem.offset = 0; |
1700 | } |
1701 | gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF |
1702 | && is_gimple_mem_ref_addr |
1703 | (TREE_OPERAND ((*slot)->mem.ref, |
1704 | 0))); |
1705 | (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set; |
1706 | } |
1707 | (*slot)->ref_canonical = true; |
1708 | } |
1709 | ref = *slot; |
1710 | id = ref->id; |
1711 | } |
1712 | else |
1713 | { |
1714 | id = memory_accesses.refs_list.length (); |
1715 | ref = mem_ref_alloc (mem: &aor, hash, id); |
1716 | ref->ref_decomposed = ref_decomposed; |
1717 | memory_accesses.refs_list.safe_push (obj: ref); |
1718 | *slot = ref; |
1719 | |
1720 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1721 | { |
1722 | fprintf (stream: dump_file, format: "Memory reference %u: " , id); |
1723 | print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM); |
1724 | fprintf (stream: dump_file, format: "\n" ); |
1725 | } |
1726 | } |
1727 | |
1728 | record_mem_ref_loc (ref, stmt, loc: mem); |
1729 | } |
1730 | if (is_stored) |
1731 | { |
1732 | bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id); |
1733 | mark_ref_stored (ref, loop); |
1734 | } |
1735 | /* A not simple memory op is also a read when it is a write. */ |
1736 | if (!is_stored || id == UNANALYZABLE_MEM_ID |
1737 | || ref->mem.ref == error_mark_node) |
1738 | { |
1739 | bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id); |
1740 | mark_ref_loaded (ref, loop); |
1741 | } |
1742 | init_lim_data (stmt)->ref = ref->id; |
1743 | return; |
1744 | } |
1745 | |
1746 | static unsigned *bb_loop_postorder; |
1747 | |
1748 | /* qsort sort function to sort blocks after their loop fathers postorder. */ |
1749 | |
1750 | static int |
1751 | sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_, |
1752 | void *bb_loop_postorder_) |
1753 | { |
1754 | unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; |
1755 | basic_block bb1 = *(const basic_block *)bb1_; |
1756 | basic_block bb2 = *(const basic_block *)bb2_; |
1757 | class loop *loop1 = bb1->loop_father; |
1758 | class loop *loop2 = bb2->loop_father; |
1759 | if (loop1->num == loop2->num) |
1760 | return bb1->index - bb2->index; |
1761 | return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; |
1762 | } |
1763 | |
1764 | /* qsort sort function to sort ref locs after their loop fathers postorder. */ |
1765 | |
1766 | static int |
1767 | sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_, |
1768 | void *bb_loop_postorder_) |
1769 | { |
1770 | unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; |
1771 | const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_; |
1772 | const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_; |
1773 | class loop *loop1 = gimple_bb (g: loc1->stmt)->loop_father; |
1774 | class loop *loop2 = gimple_bb (g: loc2->stmt)->loop_father; |
1775 | if (loop1->num == loop2->num) |
1776 | return 0; |
1777 | return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; |
1778 | } |
1779 | |
1780 | /* Gathers memory references in loops. */ |
1781 | |
1782 | static void |
1783 | analyze_memory_references (bool store_motion) |
1784 | { |
1785 | gimple_stmt_iterator bsi; |
1786 | basic_block bb, *bbs; |
1787 | class loop *outer; |
1788 | unsigned i, n; |
1789 | |
1790 | /* Collect all basic-blocks in loops and sort them after their |
1791 | loops postorder. */ |
1792 | i = 0; |
1793 | bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); |
1794 | FOR_EACH_BB_FN (bb, cfun) |
1795 | if (bb->loop_father != current_loops->tree_root) |
1796 | bbs[i++] = bb; |
1797 | n = i; |
1798 | gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp, |
1799 | bb_loop_postorder); |
1800 | |
1801 | /* Visit blocks in loop postorder and assign mem-ref IDs in that order. |
1802 | That results in better locality for all the bitmaps. It also |
1803 | automatically sorts the location list of gathered memory references |
1804 | after their loop postorder number allowing to binary-search it. */ |
1805 | for (i = 0; i < n; ++i) |
1806 | { |
1807 | basic_block bb = bbs[i]; |
1808 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
1809 | gather_mem_refs_stmt (loop: bb->loop_father, stmt: gsi_stmt (i: bsi)); |
1810 | } |
1811 | |
1812 | /* Verify the list of gathered memory references is sorted after their |
1813 | loop postorder number. */ |
1814 | if (flag_checking) |
1815 | { |
1816 | im_mem_ref *ref; |
1817 | FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) |
1818 | for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j) |
1819 | gcc_assert (sort_locs_in_loop_postorder_cmp |
1820 | (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j], |
1821 | bb_loop_postorder) <= 0); |
1822 | } |
1823 | |
1824 | free (ptr: bbs); |
1825 | |
1826 | if (!store_motion) |
1827 | return; |
1828 | |
1829 | /* Propagate the information about accessed memory references up |
1830 | the loop hierarchy. */ |
1831 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
1832 | { |
1833 | /* Finalize the overall touched references (including subloops). */ |
1834 | bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num], |
1835 | &memory_accesses.refs_stored_in_loop[loop->num]); |
1836 | |
1837 | /* Propagate the information about accessed memory references up |
1838 | the loop hierarchy. */ |
1839 | outer = loop_outer (loop); |
1840 | if (outer == current_loops->tree_root) |
1841 | continue; |
1842 | |
1843 | bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num], |
1844 | &memory_accesses.all_refs_stored_in_loop[loop->num]); |
1845 | } |
1846 | } |
1847 | |
1848 | /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in |
1849 | tree_to_aff_combination_expand. */ |
1850 | |
1851 | static bool |
1852 | mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2, |
1853 | hash_map<tree, name_expansion *> **ttae_cache, |
1854 | bool tbaa_p) |
1855 | { |
1856 | gcc_checking_assert (mem1->mem.ref != error_mark_node |
1857 | && mem2->mem.ref != error_mark_node); |
1858 | |
1859 | /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same |
1860 | object and their offset differ in such a way that the locations cannot |
1861 | overlap, then they cannot alias. */ |
1862 | poly_widest_int size1, size2; |
1863 | aff_tree off1, off2; |
1864 | |
1865 | /* Perform basic offset and type-based disambiguation. */ |
1866 | if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p)) |
1867 | return false; |
1868 | |
1869 | /* The expansion of addresses may be a bit expensive, thus we only do |
1870 | the check at -O2 and higher optimization levels. */ |
1871 | if (optimize < 2) |
1872 | return true; |
1873 | |
1874 | get_inner_reference_aff (mem1->mem.ref, &off1, &size1); |
1875 | get_inner_reference_aff (mem2->mem.ref, &off2, &size2); |
1876 | aff_combination_expand (&off1, ttae_cache); |
1877 | aff_combination_expand (&off2, ttae_cache); |
1878 | aff_combination_scale (&off1, -1); |
1879 | aff_combination_add (&off2, &off1); |
1880 | |
1881 | if (aff_comb_cannot_overlap_p (&off2, size1, size2)) |
1882 | return false; |
1883 | |
1884 | return true; |
1885 | } |
1886 | |
1887 | /* Compare function for bsearch searching for reference locations |
1888 | in a loop. */ |
1889 | |
1890 | static int |
1891 | find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_, |
1892 | void *bb_loop_postorder_) |
1893 | { |
1894 | unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; |
1895 | class loop *loop = (class loop *)const_cast<void *>(loop_); |
1896 | mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_); |
1897 | class loop *loc_loop = gimple_bb (g: loc->stmt)->loop_father; |
1898 | if (loop->num == loc_loop->num |
1899 | || flow_loop_nested_p (loop, loc_loop)) |
1900 | return 0; |
1901 | return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num] |
1902 | ? -1 : 1); |
1903 | } |
1904 | |
1905 | /* Iterates over all locations of REF in LOOP and its subloops calling |
1906 | fn.operator() with the location as argument. When that operator |
1907 | returns true the iteration is stopped and true is returned. |
1908 | Otherwise false is returned. */ |
1909 | |
1910 | template <typename FN> |
1911 | static bool |
1912 | for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn) |
1913 | { |
1914 | unsigned i; |
1915 | mem_ref_loc *loc; |
1916 | |
1917 | /* Search for the cluster of locs in the accesses_in_loop vector |
1918 | which is sorted after postorder index of the loop father. */ |
1919 | loc = ref->accesses_in_loop.bsearch (key: loop, cmp: find_ref_loc_in_loop_cmp, |
1920 | data: bb_loop_postorder); |
1921 | if (!loc) |
1922 | return false; |
1923 | |
1924 | /* We have found one location inside loop or its sub-loops. Iterate |
1925 | both forward and backward to cover the whole cluster. */ |
1926 | i = loc - ref->accesses_in_loop.address (); |
1927 | while (i > 0) |
1928 | { |
1929 | --i; |
1930 | mem_ref_loc *l = &ref->accesses_in_loop[i]; |
1931 | if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt))) |
1932 | break; |
1933 | if (fn (l)) |
1934 | return true; |
1935 | } |
1936 | for (i = loc - ref->accesses_in_loop.address (); |
1937 | i < ref->accesses_in_loop.length (); ++i) |
1938 | { |
1939 | mem_ref_loc *l = &ref->accesses_in_loop[i]; |
1940 | if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt))) |
1941 | break; |
1942 | if (fn (l)) |
1943 | return true; |
1944 | } |
1945 | |
1946 | return false; |
1947 | } |
1948 | |
1949 | /* Rewrites location LOC by TMP_VAR. */ |
1950 | |
1951 | class rewrite_mem_ref_loc |
1952 | { |
1953 | public: |
1954 | rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {} |
1955 | bool operator () (mem_ref_loc *loc); |
1956 | tree tmp_var; |
1957 | }; |
1958 | |
1959 | bool |
1960 | rewrite_mem_ref_loc::operator () (mem_ref_loc *loc) |
1961 | { |
1962 | *loc->ref = tmp_var; |
1963 | update_stmt (s: loc->stmt); |
1964 | return false; |
1965 | } |
1966 | |
1967 | /* Rewrites all references to REF in LOOP by variable TMP_VAR. */ |
1968 | |
1969 | static void |
1970 | rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var) |
1971 | { |
1972 | for_all_locs_in_loop (loop, ref, fn: rewrite_mem_ref_loc (tmp_var)); |
1973 | } |
1974 | |
1975 | /* Stores the first reference location in LOCP. */ |
1976 | |
1977 | class first_mem_ref_loc_1 |
1978 | { |
1979 | public: |
1980 | first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {} |
1981 | bool operator () (mem_ref_loc *loc); |
1982 | mem_ref_loc **locp; |
1983 | }; |
1984 | |
1985 | bool |
1986 | first_mem_ref_loc_1::operator () (mem_ref_loc *loc) |
1987 | { |
1988 | *locp = loc; |
1989 | return true; |
1990 | } |
1991 | |
1992 | /* Returns the first reference location to REF in LOOP. */ |
1993 | |
1994 | static mem_ref_loc * |
1995 | first_mem_ref_loc (class loop *loop, im_mem_ref *ref) |
1996 | { |
1997 | mem_ref_loc *locp = NULL; |
1998 | for_all_locs_in_loop (loop, ref, fn: first_mem_ref_loc_1 (&locp)); |
1999 | return locp; |
2000 | } |
2001 | |
2002 | /* Helper function for execute_sm. Emit code to store TMP_VAR into |
2003 | MEM along edge EX. |
2004 | |
2005 | The store is only done if MEM has changed. We do this so no |
2006 | changes to MEM occur on code paths that did not originally store |
2007 | into it. |
2008 | |
2009 | The common case for execute_sm will transform: |
2010 | |
2011 | for (...) { |
2012 | if (foo) |
2013 | stuff; |
2014 | else |
2015 | MEM = TMP_VAR; |
2016 | } |
2017 | |
2018 | into: |
2019 | |
2020 | lsm = MEM; |
2021 | for (...) { |
2022 | if (foo) |
2023 | stuff; |
2024 | else |
2025 | lsm = TMP_VAR; |
2026 | } |
2027 | MEM = lsm; |
2028 | |
2029 | This function will generate: |
2030 | |
2031 | lsm = MEM; |
2032 | |
2033 | lsm_flag = false; |
2034 | ... |
2035 | for (...) { |
2036 | if (foo) |
2037 | stuff; |
2038 | else { |
2039 | lsm = TMP_VAR; |
2040 | lsm_flag = true; |
2041 | } |
2042 | } |
2043 | if (lsm_flag) <-- |
2044 | MEM = lsm; <-- (X) |
2045 | |
2046 | In case MEM and TMP_VAR are NULL the function will return the then |
2047 | block so the caller can insert (X) and other related stmts. |
2048 | */ |
2049 | |
2050 | static basic_block |
2051 | execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, |
2052 | edge , hash_set <basic_block> *flag_bbs, |
2053 | edge &append_cond_position, edge &last_cond_fallthru) |
2054 | { |
2055 | basic_block new_bb, then_bb, old_dest; |
2056 | bool loop_has_only_one_exit; |
2057 | edge then_old_edge; |
2058 | gimple_stmt_iterator gsi; |
2059 | gimple *stmt; |
2060 | bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP; |
2061 | |
2062 | profile_count count_sum = profile_count::zero (); |
2063 | int nbbs = 0, ncount = 0; |
2064 | profile_probability flag_probability = profile_probability::uninitialized (); |
2065 | |
2066 | /* Flag is set in FLAG_BBS. Determine probability that flag will be true |
2067 | at loop exit. |
2068 | |
2069 | This code may look fancy, but it cannot update profile very realistically |
2070 | because we do not know the probability that flag will be true at given |
2071 | loop exit. |
2072 | |
2073 | We look for two interesting extremes |
2074 | - when exit is dominated by block setting the flag, we know it will |
2075 | always be true. This is a common case. |
2076 | - when all blocks setting the flag have very low frequency we know |
2077 | it will likely be false. |
2078 | In all other cases we default to 2/3 for flag being true. */ |
2079 | |
2080 | for (hash_set<basic_block>::iterator it = flag_bbs->begin (); |
2081 | it != flag_bbs->end (); ++it) |
2082 | { |
2083 | if ((*it)->count.initialized_p ()) |
2084 | count_sum += (*it)->count, ncount ++; |
2085 | if (dominated_by_p (CDI_DOMINATORS, ex->src, *it)) |
2086 | flag_probability = profile_probability::always (); |
2087 | nbbs++; |
2088 | } |
2089 | |
2090 | profile_probability cap |
2091 | = profile_probability::guessed_always ().apply_scale (num: 2, den: 3); |
2092 | |
2093 | if (flag_probability.initialized_p ()) |
2094 | ; |
2095 | else if (ncount == nbbs |
2096 | && preheader->count () >= count_sum && preheader->count ().nonzero_p ()) |
2097 | { |
2098 | flag_probability = count_sum.probability_in (overall: preheader->count ()); |
2099 | if (flag_probability > cap) |
2100 | flag_probability = cap; |
2101 | } |
2102 | |
2103 | if (!flag_probability.initialized_p ()) |
2104 | flag_probability = cap; |
2105 | |
2106 | /* ?? Insert store after previous store if applicable. See note |
2107 | below. */ |
2108 | if (append_cond_position) |
2109 | ex = append_cond_position; |
2110 | |
2111 | loop_has_only_one_exit = single_pred_p (bb: ex->dest); |
2112 | |
2113 | if (loop_has_only_one_exit) |
2114 | ex = split_block_after_labels (ex->dest); |
2115 | else |
2116 | { |
2117 | for (gphi_iterator gpi = gsi_start_phis (ex->dest); |
2118 | !gsi_end_p (i: gpi); gsi_next (i: &gpi)) |
2119 | { |
2120 | gphi *phi = gpi.phi (); |
2121 | if (virtual_operand_p (op: gimple_phi_result (gs: phi))) |
2122 | continue; |
2123 | |
2124 | /* When the destination has a non-virtual PHI node with multiple |
2125 | predecessors make sure we preserve the PHI structure by |
2126 | forcing a forwarder block so that hoisting of that PHI will |
2127 | still work. */ |
2128 | split_edge (ex); |
2129 | break; |
2130 | } |
2131 | } |
2132 | |
2133 | old_dest = ex->dest; |
2134 | new_bb = split_edge (ex); |
2135 | if (append_cond_position) |
2136 | new_bb->count += last_cond_fallthru->count (); |
2137 | then_bb = create_empty_bb (new_bb); |
2138 | then_bb->count = new_bb->count.apply_probability (prob: flag_probability); |
2139 | if (irr) |
2140 | then_bb->flags = BB_IRREDUCIBLE_LOOP; |
2141 | add_bb_to_loop (then_bb, new_bb->loop_father); |
2142 | |
2143 | gsi = gsi_start_bb (bb: new_bb); |
2144 | stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node, |
2145 | NULL_TREE, NULL_TREE); |
2146 | gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
2147 | |
2148 | /* Insert actual store. */ |
2149 | if (mem) |
2150 | { |
2151 | gsi = gsi_start_bb (bb: then_bb); |
2152 | stmt = gimple_build_assign (unshare_expr (mem), tmp_var); |
2153 | gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
2154 | } |
2155 | |
2156 | edge e1 = single_succ_edge (bb: new_bb); |
2157 | edge e2 = make_edge (new_bb, then_bb, |
2158 | EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); |
2159 | e2->probability = flag_probability; |
2160 | |
2161 | e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0); |
2162 | e1->flags &= ~EDGE_FALLTHRU; |
2163 | |
2164 | e1->probability = flag_probability.invert (); |
2165 | |
2166 | then_old_edge = make_single_succ_edge (then_bb, old_dest, |
2167 | EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); |
2168 | |
2169 | set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb); |
2170 | |
2171 | if (append_cond_position) |
2172 | { |
2173 | basic_block prevbb = last_cond_fallthru->src; |
2174 | redirect_edge_succ (last_cond_fallthru, new_bb); |
2175 | set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb); |
2176 | set_immediate_dominator (CDI_DOMINATORS, old_dest, |
2177 | recompute_dominator (CDI_DOMINATORS, old_dest)); |
2178 | } |
2179 | |
2180 | /* ?? Because stores may alias, they must happen in the exact |
2181 | sequence they originally happened. Save the position right after |
2182 | the (_lsm) store we just created so we can continue appending after |
2183 | it and maintain the original order. */ |
2184 | append_cond_position = then_old_edge; |
2185 | last_cond_fallthru = find_edge (new_bb, old_dest); |
2186 | |
2187 | if (!loop_has_only_one_exit) |
2188 | for (gphi_iterator gpi = gsi_start_phis (old_dest); |
2189 | !gsi_end_p (i: gpi); gsi_next (i: &gpi)) |
2190 | { |
2191 | gphi *phi = gpi.phi (); |
2192 | unsigned i; |
2193 | |
2194 | for (i = 0; i < gimple_phi_num_args (gs: phi); i++) |
2195 | if (gimple_phi_arg_edge (phi, i)->src == new_bb) |
2196 | { |
2197 | tree arg = gimple_phi_arg_def (gs: phi, index: i); |
2198 | add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION); |
2199 | update_stmt (s: phi); |
2200 | } |
2201 | } |
2202 | |
2203 | return then_bb; |
2204 | } |
2205 | |
2206 | /* When REF is set on the location, set flag indicating the store. */ |
2207 | |
2208 | class sm_set_flag_if_changed |
2209 | { |
2210 | public: |
2211 | sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_) |
2212 | : flag (flag_), bbs (bbs_) {} |
2213 | bool operator () (mem_ref_loc *loc); |
2214 | tree flag; |
2215 | hash_set <basic_block> *bbs; |
2216 | }; |
2217 | |
2218 | bool |
2219 | sm_set_flag_if_changed::operator () (mem_ref_loc *loc) |
2220 | { |
2221 | /* Only set the flag for writes. */ |
2222 | if (is_gimple_assign (gs: loc->stmt) |
2223 | && gimple_assign_lhs_ptr (gs: loc->stmt) == loc->ref) |
2224 | { |
2225 | gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt); |
2226 | gimple *stmt = gimple_build_assign (flag, boolean_true_node); |
2227 | gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
2228 | bbs->add (k: gimple_bb (g: stmt)); |
2229 | } |
2230 | return false; |
2231 | } |
2232 | |
2233 | /* Helper function for execute_sm. On every location where REF is |
2234 | set, set an appropriate flag indicating the store. */ |
2235 | |
2236 | static tree |
2237 | execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref, |
2238 | hash_set <basic_block> *bbs) |
2239 | { |
2240 | tree flag; |
2241 | char *str = get_lsm_tmp_name (ref: ref->mem.ref, n: ~0, suffix: "_flag" ); |
2242 | flag = create_tmp_reg (boolean_type_node, str); |
2243 | for_all_locs_in_loop (loop, ref, fn: sm_set_flag_if_changed (flag, bbs)); |
2244 | return flag; |
2245 | } |
2246 | |
2247 | struct sm_aux |
2248 | { |
2249 | tree tmp_var; |
2250 | tree store_flag; |
2251 | hash_set <basic_block> flag_bbs; |
2252 | }; |
2253 | |
2254 | /* Executes store motion of memory reference REF from LOOP. |
2255 | Exits from the LOOP are stored in EXITS. The initialization of the |
2256 | temporary variable is put to the preheader of the loop, and assignments |
2257 | to the reference from the temporary variable are emitted to exits. */ |
2258 | |
2259 | static void |
2260 | execute_sm (class loop *loop, im_mem_ref *ref, |
2261 | hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt, |
2262 | bool use_other_flag_var) |
2263 | { |
2264 | gassign *load; |
2265 | struct fmt_data fmt_data; |
2266 | struct lim_aux_data *lim_data; |
2267 | bool multi_threaded_model_p = false; |
2268 | gimple_stmt_iterator gsi; |
2269 | sm_aux *aux = new sm_aux; |
2270 | |
2271 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2272 | { |
2273 | fprintf (stream: dump_file, format: "Executing store motion of " ); |
2274 | print_generic_expr (dump_file, ref->mem.ref); |
2275 | fprintf (stream: dump_file, format: " from loop %d\n" , loop->num); |
2276 | } |
2277 | |
2278 | aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref), |
2279 | get_lsm_tmp_name (ref: ref->mem.ref, n: ~0)); |
2280 | |
2281 | fmt_data.loop = loop; |
2282 | fmt_data.orig_loop = loop; |
2283 | for_each_index (&ref->mem.ref, force_move_till, &fmt_data); |
2284 | |
2285 | bool always_stored = ref_always_accessed_p (loop, ref, true); |
2286 | if (maybe_mt |
2287 | && (bb_in_transaction (bb: loop_preheader_edge (loop)->src) |
2288 | || (! flag_store_data_races && ! always_stored))) |
2289 | multi_threaded_model_p = true; |
2290 | |
2291 | if (multi_threaded_model_p && !use_other_flag_var) |
2292 | aux->store_flag |
2293 | = execute_sm_if_changed_flag_set (loop, ref, bbs: &aux->flag_bbs); |
2294 | else |
2295 | aux->store_flag = NULL_TREE; |
2296 | |
2297 | /* Remember variable setup. */ |
2298 | aux_map.put (k: ref, v: aux); |
2299 | |
2300 | rewrite_mem_refs (loop, ref, tmp_var: aux->tmp_var); |
2301 | |
2302 | /* Emit the load code on a random exit edge or into the latch if |
2303 | the loop does not exit, so that we are sure it will be processed |
2304 | by move_computations after all dependencies. */ |
2305 | gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt); |
2306 | |
2307 | /* Avoid doing a load if there was no load of the ref in the loop. |
2308 | Esp. when the ref is not always stored we cannot optimize it |
2309 | away later. But when it is not always stored we must use a conditional |
2310 | store then. */ |
2311 | if ((!always_stored && !multi_threaded_model_p) |
2312 | || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) |
2313 | load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref)); |
2314 | else |
2315 | { |
2316 | /* If not emitting a load mark the uninitialized state on the |
2317 | loop entry as not to be warned for. */ |
2318 | tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var)); |
2319 | suppress_warning (uninit, OPT_Wuninitialized); |
2320 | load = gimple_build_assign (aux->tmp_var, uninit); |
2321 | } |
2322 | lim_data = init_lim_data (stmt: load); |
2323 | lim_data->max_loop = loop; |
2324 | lim_data->tgt_loop = loop; |
2325 | gsi_insert_before (&gsi, load, GSI_SAME_STMT); |
2326 | |
2327 | if (aux->store_flag) |
2328 | { |
2329 | load = gimple_build_assign (aux->store_flag, boolean_false_node); |
2330 | lim_data = init_lim_data (stmt: load); |
2331 | lim_data->max_loop = loop; |
2332 | lim_data->tgt_loop = loop; |
2333 | gsi_insert_before (&gsi, load, GSI_SAME_STMT); |
2334 | } |
2335 | } |
2336 | |
2337 | /* sm_ord is used for ordinary stores we can retain order with respect |
2338 | to other stores |
2339 | sm_unord is used for conditional executed stores which need to be |
2340 | able to execute in arbitrary order with respect to other stores |
2341 | sm_other is used for stores we do not try to apply store motion to. */ |
2342 | enum sm_kind { sm_ord, sm_unord, sm_other }; |
2343 | struct seq_entry |
2344 | { |
2345 | seq_entry () = default; |
2346 | seq_entry (unsigned f, sm_kind k, tree fr = NULL) |
2347 | : first (f), second (k), from (fr) {} |
2348 | unsigned first; |
2349 | sm_kind second; |
2350 | tree from; |
2351 | }; |
2352 | |
2353 | static void |
2354 | execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq, |
2355 | hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind, |
2356 | edge &append_cond_position, edge &last_cond_fallthru) |
2357 | { |
2358 | /* Sink the stores to exit from the loop. */ |
2359 | for (unsigned i = seq.length (); i > 0; --i) |
2360 | { |
2361 | im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first]; |
2362 | if (seq[i-1].second == sm_other) |
2363 | { |
2364 | gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE); |
2365 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2366 | { |
2367 | fprintf (stream: dump_file, format: "Re-issueing dependent store of " ); |
2368 | print_generic_expr (dump_file, ref->mem.ref); |
2369 | fprintf (stream: dump_file, format: " from loop %d on exit %d -> %d\n" , |
2370 | loop->num, ex->src->index, ex->dest->index); |
2371 | } |
2372 | gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref), |
2373 | seq[i-1].from); |
2374 | gsi_insert_on_edge (ex, store); |
2375 | } |
2376 | else |
2377 | { |
2378 | sm_aux *aux = *aux_map.get (k: ref); |
2379 | if (!aux->store_flag || kind == sm_ord) |
2380 | { |
2381 | gassign *store; |
2382 | store = gimple_build_assign (unshare_expr (ref->mem.ref), |
2383 | aux->tmp_var); |
2384 | gsi_insert_on_edge (ex, store); |
2385 | } |
2386 | else |
2387 | execute_sm_if_changed (ex, mem: ref->mem.ref, tmp_var: aux->tmp_var, |
2388 | flag: aux->store_flag, |
2389 | preheader: loop_preheader_edge (loop), flag_bbs: &aux->flag_bbs, |
2390 | append_cond_position, last_cond_fallthru); |
2391 | } |
2392 | } |
2393 | } |
2394 | |
2395 | /* Push the SM candidate at index PTR in the sequence SEQ down until |
2396 | we hit the next SM candidate. Return true if that went OK and |
2397 | false if we could not disambiguate agains another unrelated ref. |
2398 | Update *AT to the index where the candidate now resides. */ |
2399 | |
2400 | static bool |
2401 | sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at) |
2402 | { |
2403 | *at = ptr; |
2404 | for (; ptr > 0; --ptr) |
2405 | { |
2406 | seq_entry &new_cand = seq[ptr]; |
2407 | seq_entry &against = seq[ptr-1]; |
2408 | if (against.second == sm_ord |
2409 | || (against.second == sm_other && against.from != NULL_TREE)) |
2410 | /* Found the tail of the sequence. */ |
2411 | break; |
2412 | /* We may not ignore self-dependences here. */ |
2413 | if (new_cand.first == against.first |
2414 | || !refs_independent_p (memory_accesses.refs_list[new_cand.first], |
2415 | memory_accesses.refs_list[against.first], |
2416 | false)) |
2417 | /* ??? Prune new_cand from the list of refs to apply SM to. */ |
2418 | return false; |
2419 | std::swap (a&: new_cand, b&: against); |
2420 | *at = ptr - 1; |
2421 | } |
2422 | return true; |
2423 | } |
2424 | |
2425 | /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ |
2426 | walking backwards from VDEF (or the end of BB if VDEF is NULL). */ |
2427 | |
2428 | static int |
2429 | sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, |
2430 | vec<seq_entry> &seq, bitmap refs_not_in_seq, |
2431 | bitmap refs_not_supported, bool forked, |
2432 | bitmap fully_visited) |
2433 | { |
2434 | if (!vdef) |
2435 | for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (i: gsi); |
2436 | gsi_prev (i: &gsi)) |
2437 | { |
2438 | vdef = gimple_vdef (g: gsi_stmt (i: gsi)); |
2439 | if (vdef) |
2440 | break; |
2441 | } |
2442 | if (!vdef) |
2443 | { |
2444 | gphi *vphi = get_virtual_phi (bb); |
2445 | if (vphi) |
2446 | vdef = gimple_phi_result (gs: vphi); |
2447 | } |
2448 | if (!vdef) |
2449 | { |
2450 | if (single_pred_p (bb)) |
2451 | /* This handles the perfect nest case. */ |
2452 | return sm_seq_valid_bb (loop, bb: single_pred (bb), vdef, |
2453 | seq, refs_not_in_seq, refs_not_supported, |
2454 | forked, fully_visited); |
2455 | return 0; |
2456 | } |
2457 | do |
2458 | { |
2459 | gimple *def = SSA_NAME_DEF_STMT (vdef); |
2460 | if (gimple_bb (g: def) != bb) |
2461 | { |
2462 | /* If we forked by processing a PHI do not allow our walk to |
2463 | merge again until we handle that robustly. */ |
2464 | if (forked) |
2465 | { |
2466 | /* Mark refs_not_in_seq as unsupported. */ |
2467 | bitmap_ior_into (refs_not_supported, refs_not_in_seq); |
2468 | return 1; |
2469 | } |
2470 | /* Otherwise it doesn't really matter if we end up in different |
2471 | BBs. */ |
2472 | bb = gimple_bb (g: def); |
2473 | } |
2474 | if (gphi *phi = dyn_cast <gphi *> (p: def)) |
2475 | { |
2476 | /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb) |
2477 | this is still linear. |
2478 | Eventually we want to cache intermediate results per BB |
2479 | (but we can't easily cache for different exits?). */ |
2480 | /* Stop at PHIs with possible backedges. */ |
2481 | if (bb == bb->loop_father->header |
2482 | || bb->flags & BB_IRREDUCIBLE_LOOP) |
2483 | { |
2484 | /* Mark refs_not_in_seq as unsupported. */ |
2485 | bitmap_ior_into (refs_not_supported, refs_not_in_seq); |
2486 | return 1; |
2487 | } |
2488 | if (gimple_phi_num_args (gs: phi) == 1) |
2489 | return sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src, |
2490 | vdef: gimple_phi_arg_def (gs: phi, index: 0), seq, |
2491 | refs_not_in_seq, refs_not_supported, |
2492 | forked: false, fully_visited); |
2493 | if (bitmap_bit_p (fully_visited, |
2494 | SSA_NAME_VERSION (gimple_phi_result (phi)))) |
2495 | return 1; |
2496 | auto_vec<seq_entry> first_edge_seq; |
2497 | auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack); |
2498 | int eret; |
2499 | bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq); |
2500 | eret = sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src, |
2501 | vdef: gimple_phi_arg_def (gs: phi, index: 0), |
2502 | seq&: first_edge_seq, |
2503 | refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported, |
2504 | forked: true, fully_visited); |
2505 | if (eret != 1) |
2506 | return -1; |
2507 | /* Simplify our lives by pruning the sequence of !sm_ord. */ |
2508 | while (!first_edge_seq.is_empty () |
2509 | && first_edge_seq.last ().second != sm_ord) |
2510 | first_edge_seq.pop (); |
2511 | for (unsigned int i = 1; i < gimple_phi_num_args (gs: phi); ++i) |
2512 | { |
2513 | tree vuse = gimple_phi_arg_def (gs: phi, index: i); |
2514 | edge e = gimple_phi_arg_edge (phi, i); |
2515 | auto_vec<seq_entry> edge_seq; |
2516 | bitmap_and_compl (tem_refs_not_in_seq, |
2517 | refs_not_in_seq, refs_not_supported); |
2518 | /* If we've marked all refs we search for as unsupported |
2519 | we can stop processing and use the sequence as before |
2520 | the PHI. */ |
2521 | if (bitmap_empty_p (map: tem_refs_not_in_seq)) |
2522 | return 1; |
2523 | eret = sm_seq_valid_bb (loop, bb: e->src, vdef: vuse, seq&: edge_seq, |
2524 | refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported, |
2525 | forked: true, fully_visited); |
2526 | if (eret != 1) |
2527 | return -1; |
2528 | /* Simplify our lives by pruning the sequence of !sm_ord. */ |
2529 | while (!edge_seq.is_empty () |
2530 | && edge_seq.last ().second != sm_ord) |
2531 | edge_seq.pop (); |
2532 | unsigned min_len = MIN(first_edge_seq.length (), |
2533 | edge_seq.length ()); |
2534 | /* Incrementally merge seqs into first_edge_seq. */ |
2535 | int first_uneq = -1; |
2536 | auto_vec<seq_entry, 2> ; |
2537 | for (unsigned int i = 0; i < min_len; ++i) |
2538 | { |
2539 | /* ??? We can more intelligently merge when we face different |
2540 | order by additional sinking operations in one sequence. |
2541 | For now we simply mark them as to be processed by the |
2542 | not order-preserving SM code. */ |
2543 | if (first_edge_seq[i].first != edge_seq[i].first) |
2544 | { |
2545 | if (first_edge_seq[i].second == sm_ord) |
2546 | bitmap_set_bit (refs_not_supported, |
2547 | first_edge_seq[i].first); |
2548 | if (edge_seq[i].second == sm_ord) |
2549 | bitmap_set_bit (refs_not_supported, edge_seq[i].first); |
2550 | first_edge_seq[i].second = sm_other; |
2551 | first_edge_seq[i].from = NULL_TREE; |
2552 | /* Record the dropped refs for later processing. */ |
2553 | if (first_uneq == -1) |
2554 | first_uneq = i; |
2555 | extra_refs.safe_push (obj: seq_entry (edge_seq[i].first, |
2556 | sm_other, NULL_TREE)); |
2557 | } |
2558 | /* sm_other prevails. */ |
2559 | else if (first_edge_seq[i].second != edge_seq[i].second) |
2560 | { |
2561 | /* Make sure the ref is marked as not supported. */ |
2562 | bitmap_set_bit (refs_not_supported, |
2563 | first_edge_seq[i].first); |
2564 | first_edge_seq[i].second = sm_other; |
2565 | first_edge_seq[i].from = NULL_TREE; |
2566 | } |
2567 | else if (first_edge_seq[i].second == sm_other |
2568 | && first_edge_seq[i].from != NULL_TREE |
2569 | && (edge_seq[i].from == NULL_TREE |
2570 | || !operand_equal_p (first_edge_seq[i].from, |
2571 | edge_seq[i].from, flags: 0))) |
2572 | first_edge_seq[i].from = NULL_TREE; |
2573 | } |
2574 | /* Any excess elements become sm_other since they are now |
2575 | coonditionally executed. */ |
2576 | if (first_edge_seq.length () > edge_seq.length ()) |
2577 | { |
2578 | for (unsigned i = edge_seq.length (); |
2579 | i < first_edge_seq.length (); ++i) |
2580 | { |
2581 | if (first_edge_seq[i].second == sm_ord) |
2582 | bitmap_set_bit (refs_not_supported, |
2583 | first_edge_seq[i].first); |
2584 | first_edge_seq[i].second = sm_other; |
2585 | } |
2586 | } |
2587 | else if (edge_seq.length () > first_edge_seq.length ()) |
2588 | { |
2589 | if (first_uneq == -1) |
2590 | first_uneq = first_edge_seq.length (); |
2591 | for (unsigned i = first_edge_seq.length (); |
2592 | i < edge_seq.length (); ++i) |
2593 | { |
2594 | if (edge_seq[i].second == sm_ord) |
2595 | bitmap_set_bit (refs_not_supported, edge_seq[i].first); |
2596 | extra_refs.safe_push (obj: seq_entry (edge_seq[i].first, |
2597 | sm_other, NULL_TREE)); |
2598 | } |
2599 | } |
2600 | /* Put unmerged refs at first_uneq to force dependence checking |
2601 | on them. */ |
2602 | if (first_uneq != -1) |
2603 | { |
2604 | /* Missing ordered_splice_at. */ |
2605 | if ((unsigned)first_uneq == first_edge_seq.length ()) |
2606 | first_edge_seq.safe_splice (src: extra_refs); |
2607 | else |
2608 | { |
2609 | unsigned fes_length = first_edge_seq.length (); |
2610 | first_edge_seq.safe_grow (len: fes_length |
2611 | + extra_refs.length ()); |
2612 | memmove (dest: &first_edge_seq[first_uneq + extra_refs.length ()], |
2613 | src: &first_edge_seq[first_uneq], |
2614 | n: (fes_length - first_uneq) * sizeof (seq_entry)); |
2615 | memcpy (dest: &first_edge_seq[first_uneq], |
2616 | src: extra_refs.address (), |
2617 | n: extra_refs.length () * sizeof (seq_entry)); |
2618 | } |
2619 | } |
2620 | } |
2621 | /* Use the sequence from the first edge and push SMs down. */ |
2622 | for (unsigned i = 0; i < first_edge_seq.length (); ++i) |
2623 | { |
2624 | unsigned id = first_edge_seq[i].first; |
2625 | seq.safe_push (obj: first_edge_seq[i]); |
2626 | unsigned new_idx; |
2627 | if ((first_edge_seq[i].second == sm_ord |
2628 | || (first_edge_seq[i].second == sm_other |
2629 | && first_edge_seq[i].from != NULL_TREE)) |
2630 | && !sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx)) |
2631 | { |
2632 | if (first_edge_seq[i].second == sm_ord) |
2633 | bitmap_set_bit (refs_not_supported, id); |
2634 | /* Mark it sm_other. */ |
2635 | seq[new_idx].second = sm_other; |
2636 | seq[new_idx].from = NULL_TREE; |
2637 | } |
2638 | } |
2639 | bitmap_set_bit (fully_visited, |
2640 | SSA_NAME_VERSION (gimple_phi_result (phi))); |
2641 | return 1; |
2642 | } |
2643 | lim_aux_data *data = get_lim_data (stmt: def); |
2644 | gcc_assert (data); |
2645 | if (data->ref == UNANALYZABLE_MEM_ID) |
2646 | return -1; |
2647 | /* Stop at memory references which we can't move. */ |
2648 | else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node |
2649 | || TREE_THIS_VOLATILE |
2650 | (memory_accesses.refs_list[data->ref]->mem.ref)) |
2651 | { |
2652 | /* Mark refs_not_in_seq as unsupported. */ |
2653 | bitmap_ior_into (refs_not_supported, refs_not_in_seq); |
2654 | return 1; |
2655 | } |
2656 | /* One of the stores we want to apply SM to and we've not yet seen. */ |
2657 | else if (bitmap_clear_bit (refs_not_in_seq, data->ref)) |
2658 | { |
2659 | seq.safe_push (obj: seq_entry (data->ref, sm_ord)); |
2660 | |
2661 | /* 1) push it down the queue until a SMed |
2662 | and not ignored ref is reached, skipping all not SMed refs |
2663 | and ignored refs via non-TBAA disambiguation. */ |
2664 | unsigned new_idx; |
2665 | if (!sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx) |
2666 | /* If that fails but we did not fork yet continue, we'll see |
2667 | to re-materialize all of the stores in the sequence then. |
2668 | Further stores will only be pushed up to this one. */ |
2669 | && forked) |
2670 | { |
2671 | bitmap_set_bit (refs_not_supported, data->ref); |
2672 | /* Mark it sm_other. */ |
2673 | seq[new_idx].second = sm_other; |
2674 | } |
2675 | |
2676 | /* 2) check whether we've seen all refs we want to SM and if so |
2677 | declare success for the active exit */ |
2678 | if (bitmap_empty_p (map: refs_not_in_seq)) |
2679 | return 1; |
2680 | } |
2681 | else |
2682 | /* Another store not part of the final sequence. Simply push it. */ |
2683 | seq.safe_push (obj: seq_entry (data->ref, sm_other, |
2684 | gimple_assign_rhs1 (gs: def))); |
2685 | |
2686 | vdef = gimple_vuse (g: def); |
2687 | } |
2688 | while (1); |
2689 | } |
2690 | |
2691 | /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit |
2692 | edges of the LOOP. */ |
2693 | |
2694 | static void |
2695 | hoist_memory_references (class loop *loop, bitmap mem_refs, |
2696 | const vec<edge> &exits) |
2697 | { |
2698 | im_mem_ref *ref; |
2699 | unsigned i; |
2700 | bitmap_iterator bi; |
2701 | |
2702 | /* There's a special case we can use ordered re-materialization for |
2703 | conditionally excuted stores which is when all stores in the loop |
2704 | happen in the same basic-block. In that case we know we'll reach |
2705 | all stores and thus can simply process that BB and emit a single |
2706 | conditional block of ordered materializations. See PR102436. */ |
2707 | basic_block single_store_bb = NULL; |
2708 | EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num], |
2709 | 0, i, bi) |
2710 | { |
2711 | bool fail = false; |
2712 | ref = memory_accesses.refs_list[i]; |
2713 | for (auto loc : ref->accesses_in_loop) |
2714 | if (!gimple_vdef (g: loc.stmt)) |
2715 | ; |
2716 | else if (!single_store_bb) |
2717 | { |
2718 | single_store_bb = gimple_bb (g: loc.stmt); |
2719 | bool conditional = false; |
2720 | for (edge e : exits) |
2721 | if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb)) |
2722 | { |
2723 | /* Conditional as seen from e. */ |
2724 | conditional = true; |
2725 | break; |
2726 | } |
2727 | if (!conditional) |
2728 | { |
2729 | fail = true; |
2730 | break; |
2731 | } |
2732 | } |
2733 | else if (single_store_bb != gimple_bb (g: loc.stmt)) |
2734 | { |
2735 | fail = true; |
2736 | break; |
2737 | } |
2738 | if (fail) |
2739 | { |
2740 | single_store_bb = NULL; |
2741 | break; |
2742 | } |
2743 | } |
2744 | if (single_store_bb) |
2745 | { |
2746 | /* Analyze the single block with stores. */ |
2747 | auto_bitmap fully_visited; |
2748 | auto_bitmap refs_not_supported; |
2749 | auto_bitmap refs_not_in_seq; |
2750 | auto_vec<seq_entry> seq; |
2751 | bitmap_copy (refs_not_in_seq, mem_refs); |
2752 | int res = sm_seq_valid_bb (loop, bb: single_store_bb, NULL_TREE, |
2753 | seq, refs_not_in_seq, refs_not_supported, |
2754 | forked: false, fully_visited); |
2755 | if (res != 1) |
2756 | { |
2757 | /* Unhandled refs can still fail this. */ |
2758 | bitmap_clear (mem_refs); |
2759 | return; |
2760 | } |
2761 | |
2762 | /* We cannot handle sm_other since we neither remember the |
2763 | stored location nor the value at the point we execute them. */ |
2764 | for (unsigned i = 0; i < seq.length (); ++i) |
2765 | { |
2766 | unsigned new_i; |
2767 | if (seq[i].second == sm_other |
2768 | && seq[i].from != NULL_TREE) |
2769 | seq[i].from = NULL_TREE; |
2770 | else if ((seq[i].second == sm_ord |
2771 | || (seq[i].second == sm_other |
2772 | && seq[i].from != NULL_TREE)) |
2773 | && !sm_seq_push_down (seq, ptr: i, at: &new_i)) |
2774 | { |
2775 | bitmap_set_bit (refs_not_supported, seq[new_i].first); |
2776 | seq[new_i].second = sm_other; |
2777 | seq[new_i].from = NULL_TREE; |
2778 | } |
2779 | } |
2780 | bitmap_and_compl_into (mem_refs, refs_not_supported); |
2781 | if (bitmap_empty_p (map: mem_refs)) |
2782 | return; |
2783 | |
2784 | /* Prune seq. */ |
2785 | while (seq.last ().second == sm_other |
2786 | && seq.last ().from == NULL_TREE) |
2787 | seq.pop (); |
2788 | |
2789 | hash_map<im_mem_ref *, sm_aux *> aux_map; |
2790 | |
2791 | /* Execute SM but delay the store materialization for ordered |
2792 | sequences on exit. */ |
2793 | bool first_p = true; |
2794 | EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) |
2795 | { |
2796 | ref = memory_accesses.refs_list[i]; |
2797 | execute_sm (loop, ref, aux_map, maybe_mt: true, use_other_flag_var: !first_p); |
2798 | first_p = false; |
2799 | } |
2800 | |
2801 | /* Get at the single flag variable we eventually produced. */ |
2802 | im_mem_ref *ref |
2803 | = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)]; |
2804 | sm_aux *aux = *aux_map.get (k: ref); |
2805 | |
2806 | /* Materialize ordered store sequences on exits. */ |
2807 | edge e; |
2808 | FOR_EACH_VEC_ELT (exits, i, e) |
2809 | { |
2810 | edge append_cond_position = NULL; |
2811 | edge last_cond_fallthru = NULL; |
2812 | edge insert_e = e; |
2813 | /* Construct the single flag variable control flow and insert |
2814 | the ordered seq of stores in the then block. With |
2815 | -fstore-data-races we can do the stores unconditionally. */ |
2816 | if (aux->store_flag) |
2817 | insert_e |
2818 | = single_pred_edge |
2819 | (bb: execute_sm_if_changed (ex: e, NULL_TREE, NULL_TREE, |
2820 | flag: aux->store_flag, |
2821 | preheader: loop_preheader_edge (loop), |
2822 | flag_bbs: &aux->flag_bbs, append_cond_position, |
2823 | last_cond_fallthru)); |
2824 | execute_sm_exit (loop, ex: insert_e, seq, aux_map, kind: sm_ord, |
2825 | append_cond_position, last_cond_fallthru); |
2826 | gsi_commit_one_edge_insert (insert_e, NULL); |
2827 | } |
2828 | |
2829 | for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin (); |
2830 | iter != aux_map.end (); ++iter) |
2831 | delete (*iter).second; |
2832 | |
2833 | return; |
2834 | } |
2835 | |
2836 | /* To address PR57359 before actually applying store-motion check |
2837 | the candidates found for validity with regards to reordering |
2838 | relative to other stores which we until here disambiguated using |
2839 | TBAA which isn't valid. |
2840 | What matters is the order of the last stores to the mem_refs |
2841 | with respect to the other stores of the loop at the point of the |
2842 | loop exits. */ |
2843 | |
2844 | /* For each exit compute the store order, pruning from mem_refs |
2845 | on the fly. */ |
2846 | /* The complexity of this is at least |
2847 | O(number of exits * number of SM refs) but more approaching |
2848 | O(number of exits * number of SM refs * number of stores). */ |
2849 | /* ??? Somehow do this in a single sweep over the loop body. */ |
2850 | auto_vec<std::pair<edge, vec<seq_entry> > > sms; |
2851 | auto_bitmap refs_not_supported (&lim_bitmap_obstack); |
2852 | edge e; |
2853 | FOR_EACH_VEC_ELT (exits, i, e) |
2854 | { |
2855 | vec<seq_entry> seq; |
2856 | seq.create (nelems: 4); |
2857 | auto_bitmap refs_not_in_seq (&lim_bitmap_obstack); |
2858 | bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported); |
2859 | if (bitmap_empty_p (map: refs_not_in_seq)) |
2860 | { |
2861 | seq.release (); |
2862 | break; |
2863 | } |
2864 | auto_bitmap fully_visited; |
2865 | int res = sm_seq_valid_bb (loop, bb: e->src, NULL_TREE, |
2866 | seq, refs_not_in_seq, |
2867 | refs_not_supported, forked: false, |
2868 | fully_visited); |
2869 | if (res != 1) |
2870 | { |
2871 | bitmap_copy (refs_not_supported, mem_refs); |
2872 | seq.release (); |
2873 | break; |
2874 | } |
2875 | sms.safe_push (obj: std::make_pair (x&: e, y&: seq)); |
2876 | } |
2877 | |
2878 | /* Prune pruned mem_refs from earlier processed exits. */ |
2879 | bool changed = !bitmap_empty_p (map: refs_not_supported); |
2880 | while (changed) |
2881 | { |
2882 | changed = false; |
2883 | std::pair<edge, vec<seq_entry> > *seq; |
2884 | FOR_EACH_VEC_ELT (sms, i, seq) |
2885 | { |
2886 | bool need_to_push = false; |
2887 | for (unsigned i = 0; i < seq->second.length (); ++i) |
2888 | { |
2889 | sm_kind kind = seq->second[i].second; |
2890 | if (kind == sm_other && seq->second[i].from == NULL_TREE) |
2891 | break; |
2892 | unsigned id = seq->second[i].first; |
2893 | unsigned new_idx; |
2894 | if (kind == sm_ord |
2895 | && bitmap_bit_p (refs_not_supported, id)) |
2896 | { |
2897 | seq->second[i].second = sm_other; |
2898 | gcc_assert (seq->second[i].from == NULL_TREE); |
2899 | need_to_push = true; |
2900 | } |
2901 | else if (need_to_push |
2902 | && !sm_seq_push_down (seq&: seq->second, ptr: i, at: &new_idx)) |
2903 | { |
2904 | /* We need to push down both sm_ord and sm_other |
2905 | but for the latter we need to disqualify all |
2906 | following refs. */ |
2907 | if (kind == sm_ord) |
2908 | { |
2909 | if (bitmap_set_bit (refs_not_supported, id)) |
2910 | changed = true; |
2911 | seq->second[new_idx].second = sm_other; |
2912 | } |
2913 | else |
2914 | { |
2915 | for (unsigned j = seq->second.length () - 1; |
2916 | j > new_idx; --j) |
2917 | if (seq->second[j].second == sm_ord |
2918 | && bitmap_set_bit (refs_not_supported, |
2919 | seq->second[j].first)) |
2920 | changed = true; |
2921 | seq->second.truncate (size: new_idx); |
2922 | break; |
2923 | } |
2924 | } |
2925 | } |
2926 | } |
2927 | } |
2928 | std::pair<edge, vec<seq_entry> > *seq; |
2929 | FOR_EACH_VEC_ELT (sms, i, seq) |
2930 | { |
2931 | /* Prune sm_other from the end. */ |
2932 | while (!seq->second.is_empty () |
2933 | && seq->second.last ().second == sm_other) |
2934 | seq->second.pop (); |
2935 | /* Prune duplicates from the start. */ |
2936 | auto_bitmap seen (&lim_bitmap_obstack); |
2937 | unsigned j, k; |
2938 | for (j = k = 0; j < seq->second.length (); ++j) |
2939 | if (bitmap_set_bit (seen, seq->second[j].first)) |
2940 | { |
2941 | if (k != j) |
2942 | seq->second[k] = seq->second[j]; |
2943 | ++k; |
2944 | } |
2945 | seq->second.truncate (size: k); |
2946 | /* And verify. */ |
2947 | seq_entry *e; |
2948 | FOR_EACH_VEC_ELT (seq->second, j, e) |
2949 | gcc_assert (e->second == sm_ord |
2950 | || (e->second == sm_other && e->from != NULL_TREE)); |
2951 | } |
2952 | |
2953 | /* Verify dependence for refs we cannot handle with the order preserving |
2954 | code (refs_not_supported) or prune them from mem_refs. */ |
2955 | auto_vec<seq_entry> unord_refs; |
2956 | EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi) |
2957 | { |
2958 | ref = memory_accesses.refs_list[i]; |
2959 | if (!ref_indep_loop_p (loop, ref, sm_waw)) |
2960 | bitmap_clear_bit (mem_refs, i); |
2961 | /* We've now verified store order for ref with respect to all other |
2962 | stores in the loop does not matter. */ |
2963 | else |
2964 | unord_refs.safe_push (obj: seq_entry (i, sm_unord)); |
2965 | } |
2966 | |
2967 | hash_map<im_mem_ref *, sm_aux *> aux_map; |
2968 | |
2969 | /* Execute SM but delay the store materialization for ordered |
2970 | sequences on exit. */ |
2971 | EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) |
2972 | { |
2973 | ref = memory_accesses.refs_list[i]; |
2974 | execute_sm (loop, ref, aux_map, maybe_mt: bitmap_bit_p (refs_not_supported, i), |
2975 | use_other_flag_var: false); |
2976 | } |
2977 | |
2978 | /* Materialize ordered store sequences on exits. */ |
2979 | FOR_EACH_VEC_ELT (exits, i, e) |
2980 | { |
2981 | edge append_cond_position = NULL; |
2982 | edge last_cond_fallthru = NULL; |
2983 | if (i < sms.length ()) |
2984 | { |
2985 | gcc_assert (sms[i].first == e); |
2986 | execute_sm_exit (loop, ex: e, seq&: sms[i].second, aux_map, kind: sm_ord, |
2987 | append_cond_position, last_cond_fallthru); |
2988 | sms[i].second.release (); |
2989 | } |
2990 | if (!unord_refs.is_empty ()) |
2991 | execute_sm_exit (loop, ex: e, seq&: unord_refs, aux_map, kind: sm_unord, |
2992 | append_cond_position, last_cond_fallthru); |
2993 | /* Commit edge inserts here to preserve the order of stores |
2994 | when an exit exits multiple loops. */ |
2995 | gsi_commit_one_edge_insert (e, NULL); |
2996 | } |
2997 | |
2998 | for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin (); |
2999 | iter != aux_map.end (); ++iter) |
3000 | delete (*iter).second; |
3001 | } |
3002 | |
3003 | class ref_always_accessed |
3004 | { |
3005 | public: |
3006 | ref_always_accessed (class loop *loop_, bool stored_p_) |
3007 | : loop (loop_), stored_p (stored_p_) {} |
3008 | bool operator () (mem_ref_loc *loc); |
3009 | class loop *loop; |
3010 | bool stored_p; |
3011 | }; |
3012 | |
3013 | bool |
3014 | ref_always_accessed::operator () (mem_ref_loc *loc) |
3015 | { |
3016 | class loop *must_exec; |
3017 | |
3018 | struct lim_aux_data *lim_data = get_lim_data (stmt: loc->stmt); |
3019 | if (!lim_data) |
3020 | return false; |
3021 | |
3022 | /* If we require an always executed store make sure the statement |
3023 | is a store. */ |
3024 | if (stored_p) |
3025 | { |
3026 | tree lhs = gimple_get_lhs (loc->stmt); |
3027 | if (!lhs |
3028 | || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs))) |
3029 | return false; |
3030 | } |
3031 | |
3032 | must_exec = lim_data->always_executed_in; |
3033 | if (!must_exec) |
3034 | return false; |
3035 | |
3036 | if (must_exec == loop |
3037 | || flow_loop_nested_p (must_exec, loop)) |
3038 | return true; |
3039 | |
3040 | return false; |
3041 | } |
3042 | |
3043 | /* Returns true if REF is always accessed in LOOP. If STORED_P is true |
3044 | make sure REF is always stored to in LOOP. */ |
3045 | |
3046 | static bool |
3047 | ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p) |
3048 | { |
3049 | return for_all_locs_in_loop (loop, ref, |
3050 | fn: ref_always_accessed (loop, stored_p)); |
3051 | } |
3052 | |
3053 | /* Returns true if REF1 and REF2 are independent. */ |
3054 | |
3055 | static bool |
3056 | refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p) |
3057 | { |
3058 | if (ref1 == ref2) |
3059 | return true; |
3060 | |
3061 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3062 | fprintf (stream: dump_file, format: "Querying dependency of refs %u and %u: " , |
3063 | ref1->id, ref2->id); |
3064 | |
3065 | if (mem_refs_may_alias_p (mem1: ref1, mem2: ref2, ttae_cache: &memory_accesses.ttae_cache, tbaa_p)) |
3066 | { |
3067 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3068 | fprintf (stream: dump_file, format: "dependent.\n" ); |
3069 | return false; |
3070 | } |
3071 | else |
3072 | { |
3073 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3074 | fprintf (stream: dump_file, format: "independent.\n" ); |
3075 | return true; |
3076 | } |
3077 | } |
3078 | |
3079 | /* Returns true if REF is independent on all other accessess in LOOP. |
3080 | KIND specifies the kind of dependence to consider. |
3081 | lim_raw assumes REF is not stored in LOOP and disambiguates RAW |
3082 | dependences so if true REF can be hoisted out of LOOP |
3083 | sm_war disambiguates a store REF against all other loads to see |
3084 | whether the store can be sunk across loads out of LOOP |
3085 | sm_waw disambiguates a store REF against all other stores to see |
3086 | whether the store can be sunk across stores out of LOOP. */ |
3087 | |
3088 | static bool |
3089 | ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) |
3090 | { |
3091 | bool indep_p = true; |
3092 | bitmap refs_to_check; |
3093 | |
3094 | if (kind == sm_war) |
3095 | refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num]; |
3096 | else |
3097 | refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num]; |
3098 | |
3099 | if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID) |
3100 | || ref->mem.ref == error_mark_node) |
3101 | indep_p = false; |
3102 | else |
3103 | { |
3104 | /* tri-state, { unknown, independent, dependent } */ |
3105 | dep_state state = query_loop_dependence (loop, ref, kind); |
3106 | if (state != dep_unknown) |
3107 | return state == dep_independent ? true : false; |
3108 | |
3109 | class loop *inner = loop->inner; |
3110 | while (inner) |
3111 | { |
3112 | if (!ref_indep_loop_p (loop: inner, ref, kind)) |
3113 | { |
3114 | indep_p = false; |
3115 | break; |
3116 | } |
3117 | inner = inner->next; |
3118 | } |
3119 | |
3120 | if (indep_p) |
3121 | { |
3122 | unsigned i; |
3123 | bitmap_iterator bi; |
3124 | EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) |
3125 | { |
3126 | im_mem_ref *aref = memory_accesses.refs_list[i]; |
3127 | if (aref->mem.ref == error_mark_node) |
3128 | { |
3129 | gimple *stmt = aref->accesses_in_loop[0].stmt; |
3130 | if ((kind == sm_war |
3131 | && ref_maybe_used_by_stmt_p (stmt, &ref->mem, |
3132 | kind != sm_waw)) |
3133 | || stmt_may_clobber_ref_p_1 (stmt, &ref->mem, |
3134 | kind != sm_waw)) |
3135 | { |
3136 | indep_p = false; |
3137 | break; |
3138 | } |
3139 | } |
3140 | else if (!refs_independent_p (ref1: ref, ref2: aref, tbaa_p: kind != sm_waw)) |
3141 | { |
3142 | indep_p = false; |
3143 | break; |
3144 | } |
3145 | } |
3146 | } |
3147 | } |
3148 | |
3149 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3150 | fprintf (stream: dump_file, format: "Querying %s dependencies of ref %u in loop %d: %s\n" , |
3151 | kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW" ), |
3152 | ref->id, loop->num, indep_p ? "independent" : "dependent" ); |
3153 | |
3154 | /* Record the computed result in the cache. */ |
3155 | record_loop_dependence (loop, ref, kind, |
3156 | state: indep_p ? dep_independent : dep_dependent); |
3157 | |
3158 | return indep_p; |
3159 | } |
3160 | |
3161 | class ref_in_loop_hot_body |
3162 | { |
3163 | public: |
3164 | ref_in_loop_hot_body (class loop *loop_) : l (loop_) {} |
3165 | bool operator () (mem_ref_loc *loc); |
3166 | class loop *l; |
3167 | }; |
3168 | |
3169 | /* Check the coldest loop between loop L and innermost loop. If there is one |
3170 | cold loop between L and INNER_LOOP, store motion can be performed, otherwise |
3171 | no cold loop means no store motion. get_coldest_out_loop also handles cases |
3172 | when l is inner_loop. */ |
3173 | bool |
3174 | ref_in_loop_hot_body::operator () (mem_ref_loc *loc) |
3175 | { |
3176 | basic_block curr_bb = gimple_bb (g: loc->stmt); |
3177 | class loop *inner_loop = curr_bb->loop_father; |
3178 | return get_coldest_out_loop (outermost_loop: l, loop: inner_loop, curr_bb); |
3179 | } |
3180 | |
3181 | |
3182 | /* Returns true if we can perform store motion of REF from LOOP. */ |
3183 | |
3184 | static bool |
3185 | can_sm_ref_p (class loop *loop, im_mem_ref *ref) |
3186 | { |
3187 | tree base; |
3188 | |
3189 | /* Can't hoist unanalyzable refs. */ |
3190 | if (!MEM_ANALYZABLE (ref)) |
3191 | return false; |
3192 | |
3193 | /* Can't hoist/sink aggregate copies. */ |
3194 | if (ref->mem.ref == error_mark_node) |
3195 | return false; |
3196 | |
3197 | /* It should be movable. */ |
3198 | if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) |
3199 | || TREE_THIS_VOLATILE (ref->mem.ref) |
3200 | || !for_each_index (&ref->mem.ref, may_move_till, loop)) |
3201 | return false; |
3202 | |
3203 | /* If it can throw fail, we do not properly update EH info. */ |
3204 | if (tree_could_throw_p (ref->mem.ref)) |
3205 | return false; |
3206 | |
3207 | /* If it can trap, it must be always executed in LOOP. |
3208 | Readonly memory locations may trap when storing to them, but |
3209 | tree_could_trap_p is a predicate for rvalues, so check that |
3210 | explicitly. */ |
3211 | base = get_base_address (t: ref->mem.ref); |
3212 | if ((tree_could_trap_p (ref->mem.ref) |
3213 | || (DECL_P (base) && TREE_READONLY (base))) |
3214 | /* ??? We can at least use false here, allowing loads? We |
3215 | are forcing conditional stores if the ref is not always |
3216 | stored to later anyway. So this would only guard |
3217 | the load we need to emit. Thus when the ref is not |
3218 | loaded we can elide this completely? */ |
3219 | && !ref_always_accessed_p (loop, ref, stored_p: true)) |
3220 | return false; |
3221 | |
3222 | /* Verify all loads of ref can be hoisted. */ |
3223 | if (ref->loaded |
3224 | && bitmap_bit_p (ref->loaded, loop->num) |
3225 | && !ref_indep_loop_p (loop, ref, kind: lim_raw)) |
3226 | return false; |
3227 | |
3228 | /* Verify the candidate can be disambiguated against all loads, |
3229 | that is, we can elide all in-loop stores. Disambiguation |
3230 | against stores is done later when we cannot guarantee preserving |
3231 | the order of stores. */ |
3232 | if (!ref_indep_loop_p (loop, ref, kind: sm_war)) |
3233 | return false; |
3234 | |
3235 | /* Verify whether the candidate is hot for LOOP. Only do store motion if the |
3236 | candidate's profile count is hot. Statement in cold BB shouldn't be moved |
3237 | out of it's loop_father. */ |
3238 | if (!for_all_locs_in_loop (loop, ref, fn: ref_in_loop_hot_body (loop))) |
3239 | return false; |
3240 | |
3241 | return true; |
3242 | } |
3243 | |
3244 | /* Marks the references in LOOP for that store motion should be performed |
3245 | in REFS_TO_SM. SM_EXECUTED is the set of references for that store |
3246 | motion was performed in one of the outer loops. */ |
3247 | |
3248 | static void |
3249 | find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm) |
3250 | { |
3251 | bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num]; |
3252 | unsigned i; |
3253 | bitmap_iterator bi; |
3254 | im_mem_ref *ref; |
3255 | |
3256 | EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi) |
3257 | { |
3258 | ref = memory_accesses.refs_list[i]; |
3259 | if (can_sm_ref_p (loop, ref) && dbg_cnt (index: lim)) |
3260 | bitmap_set_bit (refs_to_sm, i); |
3261 | } |
3262 | } |
3263 | |
3264 | /* Checks whether LOOP (with exits stored in EXITS array) is suitable |
3265 | for a store motion optimization (i.e. whether we can insert statement |
3266 | on its exits). */ |
3267 | |
3268 | static bool |
3269 | loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED, |
3270 | const vec<edge> &exits) |
3271 | { |
3272 | unsigned i; |
3273 | edge ex; |
3274 | |
3275 | FOR_EACH_VEC_ELT (exits, i, ex) |
3276 | if (ex->flags & (EDGE_ABNORMAL | EDGE_EH)) |
3277 | return false; |
3278 | |
3279 | return true; |
3280 | } |
3281 | |
3282 | /* Try to perform store motion for all memory references modified inside |
3283 | LOOP. SM_EXECUTED is the bitmap of the memory references for that |
3284 | store motion was executed in one of the outer loops. */ |
3285 | |
3286 | static void |
3287 | store_motion_loop (class loop *loop, bitmap sm_executed) |
3288 | { |
3289 | auto_vec<edge> exits = get_loop_exit_edges (loop); |
3290 | class loop *subloop; |
3291 | bitmap sm_in_loop = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
3292 | |
3293 | if (loop_suitable_for_sm (loop, exits)) |
3294 | { |
3295 | find_refs_for_sm (loop, sm_executed, refs_to_sm: sm_in_loop); |
3296 | if (!bitmap_empty_p (map: sm_in_loop)) |
3297 | hoist_memory_references (loop, mem_refs: sm_in_loop, exits); |
3298 | } |
3299 | |
3300 | bitmap_ior_into (sm_executed, sm_in_loop); |
3301 | for (subloop = loop->inner; subloop != NULL; subloop = subloop->next) |
3302 | store_motion_loop (loop: subloop, sm_executed); |
3303 | bitmap_and_compl_into (sm_executed, sm_in_loop); |
3304 | BITMAP_FREE (sm_in_loop); |
3305 | } |
3306 | |
3307 | /* Try to perform store motion for all memory references modified inside |
3308 | loops. */ |
3309 | |
3310 | static void |
3311 | do_store_motion (void) |
3312 | { |
3313 | class loop *loop; |
3314 | bitmap sm_executed = BITMAP_ALLOC (obstack: &lim_bitmap_obstack); |
3315 | |
3316 | for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) |
3317 | store_motion_loop (loop, sm_executed); |
3318 | |
3319 | BITMAP_FREE (sm_executed); |
3320 | } |
3321 | |
3322 | /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. |
3323 | for each such basic block bb records the outermost loop for that execution |
3324 | of its header implies execution of bb. CONTAINS_CALL is the bitmap of |
3325 | blocks that contain a nonpure call. */ |
3326 | |
3327 | static void |
3328 | fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) |
3329 | { |
3330 | basic_block bb = NULL, last = NULL; |
3331 | edge e; |
3332 | class loop *inn_loop = loop; |
3333 | |
3334 | if (ALWAYS_EXECUTED_IN (loop->header) == NULL) |
3335 | { |
3336 | auto_vec<basic_block, 64> worklist; |
3337 | worklist.reserve_exact (nelems: loop->num_nodes); |
3338 | worklist.quick_push (obj: loop->header); |
3339 | do |
3340 | { |
3341 | edge_iterator ei; |
3342 | bb = worklist.pop (); |
3343 | |
3344 | if (!flow_bb_inside_loop_p (inn_loop, bb)) |
3345 | { |
3346 | /* When we are leaving a possibly infinite inner loop |
3347 | we have to stop processing. */ |
3348 | if (!finite_loop_p (inn_loop)) |
3349 | break; |
3350 | /* If the loop was finite we can continue with processing |
3351 | the loop we exited to. */ |
3352 | inn_loop = bb->loop_father; |
3353 | } |
3354 | |
3355 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
3356 | last = bb; |
3357 | |
3358 | if (bitmap_bit_p (map: contains_call, bitno: bb->index)) |
3359 | break; |
3360 | |
3361 | /* If LOOP exits from this BB stop processing. */ |
3362 | FOR_EACH_EDGE (e, ei, bb->succs) |
3363 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
3364 | break; |
3365 | if (e) |
3366 | break; |
3367 | |
3368 | /* A loop might be infinite (TODO use simple loop analysis |
3369 | to disprove this if possible). */ |
3370 | if (bb->flags & BB_IRREDUCIBLE_LOOP) |
3371 | break; |
3372 | |
3373 | if (bb->loop_father->header == bb) |
3374 | /* Record that we enter into a subloop since it might not |
3375 | be finite. */ |
3376 | /* ??? Entering into a not always executed subloop makes |
3377 | fill_always_executed_in quadratic in loop depth since |
3378 | we walk those loops N times. This is not a problem |
3379 | in practice though, see PR102253 for a worst-case testcase. */ |
3380 | inn_loop = bb->loop_father; |
3381 | |
3382 | /* Walk the body of LOOP sorted by dominance relation. Additionally, |
3383 | if a basic block S dominates the latch, then only blocks dominated |
3384 | by S are after it. |
3385 | This is get_loop_body_in_dom_order using a worklist algorithm and |
3386 | stopping once we are no longer interested in visiting further |
3387 | blocks. */ |
3388 | unsigned old_len = worklist.length (); |
3389 | unsigned postpone = 0; |
3390 | for (basic_block son = first_dom_son (CDI_DOMINATORS, bb); |
3391 | son; |
3392 | son = next_dom_son (CDI_DOMINATORS, son)) |
3393 | { |
3394 | if (!flow_bb_inside_loop_p (loop, son)) |
3395 | continue; |
3396 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, son)) |
3397 | postpone = worklist.length (); |
3398 | worklist.quick_push (obj: son); |
3399 | } |
3400 | if (postpone) |
3401 | /* Postponing the block that dominates the latch means |
3402 | processing it last and thus putting it earliest in the |
3403 | worklist. */ |
3404 | std::swap (a&: worklist[old_len], b&: worklist[postpone]); |
3405 | } |
3406 | while (!worklist.is_empty ()); |
3407 | |
3408 | while (1) |
3409 | { |
3410 | if (dump_enabled_p ()) |
3411 | dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n" , |
3412 | last->index, loop->num); |
3413 | SET_ALWAYS_EXECUTED_IN (last, loop); |
3414 | if (last == loop->header) |
3415 | break; |
3416 | last = get_immediate_dominator (CDI_DOMINATORS, last); |
3417 | } |
3418 | } |
3419 | |
3420 | for (loop = loop->inner; loop; loop = loop->next) |
3421 | fill_always_executed_in_1 (loop, contains_call); |
3422 | } |
3423 | |
3424 | /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e. |
3425 | for each such basic block bb records the outermost loop for that execution |
3426 | of its header implies execution of bb. */ |
3427 | |
3428 | static void |
3429 | fill_always_executed_in (void) |
3430 | { |
3431 | basic_block bb; |
3432 | class loop *loop; |
3433 | |
3434 | auto_sbitmap contains_call (last_basic_block_for_fn (cfun)); |
3435 | bitmap_clear (contains_call); |
3436 | FOR_EACH_BB_FN (bb, cfun) |
3437 | { |
3438 | gimple_stmt_iterator gsi; |
3439 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
3440 | { |
3441 | if (nonpure_call_p (stmt: gsi_stmt (i: gsi))) |
3442 | break; |
3443 | } |
3444 | |
3445 | if (!gsi_end_p (i: gsi)) |
3446 | bitmap_set_bit (map: contains_call, bitno: bb->index); |
3447 | } |
3448 | |
3449 | for (loop = current_loops->tree_root->inner; loop; loop = loop->next) |
3450 | fill_always_executed_in_1 (loop, contains_call); |
3451 | } |
3452 | |
3453 | /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop |
3454 | to LOOP. Then recursively iterate each inner loop. */ |
3455 | |
3456 | void |
3457 | fill_coldest_and_hotter_out_loop (class loop *coldest_loop, |
3458 | class loop *hotter_loop, class loop *loop) |
3459 | { |
3460 | if (bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src, |
3461 | loop: coldest_loop)) |
3462 | coldest_loop = loop; |
3463 | |
3464 | coldest_outermost_loop[loop->num] = coldest_loop; |
3465 | |
3466 | hotter_than_inner_loop[loop->num] = NULL; |
3467 | class loop *outer_loop = loop_outer (loop); |
3468 | if (hotter_loop |
3469 | && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src, |
3470 | loop: hotter_loop)) |
3471 | hotter_than_inner_loop[loop->num] = hotter_loop; |
3472 | |
3473 | if (outer_loop && outer_loop != current_loops->tree_root |
3474 | && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src, |
3475 | loop: outer_loop)) |
3476 | hotter_than_inner_loop[loop->num] = outer_loop; |
3477 | |
3478 | if (dump_enabled_p ()) |
3479 | { |
3480 | dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, " , |
3481 | loop->num, coldest_loop->num); |
3482 | if (hotter_than_inner_loop[loop->num]) |
3483 | dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n" , |
3484 | hotter_than_inner_loop[loop->num]->num); |
3485 | else |
3486 | dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n" ); |
3487 | } |
3488 | |
3489 | class loop *inner_loop; |
3490 | for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next) |
3491 | fill_coldest_and_hotter_out_loop (coldest_loop, |
3492 | hotter_loop: hotter_than_inner_loop[loop->num], |
3493 | loop: inner_loop); |
3494 | } |
3495 | |
3496 | /* Compute the global information needed by the loop invariant motion pass. */ |
3497 | |
3498 | static void |
3499 | tree_ssa_lim_initialize (bool store_motion) |
3500 | { |
3501 | unsigned i; |
3502 | |
3503 | bitmap_obstack_initialize (&lim_bitmap_obstack); |
3504 | gcc_obstack_init (&mem_ref_obstack); |
3505 | lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>; |
3506 | |
3507 | if (flag_tm) |
3508 | compute_transaction_bits (); |
3509 | |
3510 | memory_accesses.refs = new hash_table<mem_ref_hasher> (100); |
3511 | memory_accesses.refs_list.create (nelems: 100); |
3512 | /* Allocate a special, unanalyzable mem-ref with ID zero. */ |
3513 | memory_accesses.refs_list.quick_push |
3514 | (obj: mem_ref_alloc (NULL, hash: 0, UNANALYZABLE_MEM_ID)); |
3515 | |
3516 | memory_accesses.refs_loaded_in_loop.create (nelems: number_of_loops (cfun)); |
3517 | memory_accesses.refs_loaded_in_loop.quick_grow_cleared (len: number_of_loops (cfun)); |
3518 | memory_accesses.refs_stored_in_loop.create (nelems: number_of_loops (cfun)); |
3519 | memory_accesses.refs_stored_in_loop.quick_grow_cleared (len: number_of_loops (cfun)); |
3520 | if (store_motion) |
3521 | { |
3522 | memory_accesses.all_refs_stored_in_loop.create (nelems: number_of_loops (cfun)); |
3523 | memory_accesses.all_refs_stored_in_loop.quick_grow_cleared |
3524 | (len: number_of_loops (cfun)); |
3525 | } |
3526 | |
3527 | for (i = 0; i < number_of_loops (cfun); i++) |
3528 | { |
3529 | bitmap_initialize (head: &memory_accesses.refs_loaded_in_loop[i], |
3530 | obstack: &lim_bitmap_obstack); |
3531 | bitmap_initialize (head: &memory_accesses.refs_stored_in_loop[i], |
3532 | obstack: &lim_bitmap_obstack); |
3533 | if (store_motion) |
3534 | bitmap_initialize (head: &memory_accesses.all_refs_stored_in_loop[i], |
3535 | obstack: &lim_bitmap_obstack); |
3536 | } |
3537 | |
3538 | memory_accesses.ttae_cache = NULL; |
3539 | |
3540 | /* Initialize bb_loop_postorder with a mapping from loop->num to |
3541 | its postorder index. */ |
3542 | i = 0; |
3543 | bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun)); |
3544 | for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) |
3545 | bb_loop_postorder[loop->num] = i++; |
3546 | } |
3547 | |
3548 | /* Cleans up after the invariant motion pass. */ |
3549 | |
3550 | static void |
3551 | tree_ssa_lim_finalize (void) |
3552 | { |
3553 | basic_block bb; |
3554 | unsigned i; |
3555 | im_mem_ref *ref; |
3556 | |
3557 | FOR_EACH_BB_FN (bb, cfun) |
3558 | SET_ALWAYS_EXECUTED_IN (bb, NULL); |
3559 | |
3560 | bitmap_obstack_release (&lim_bitmap_obstack); |
3561 | delete lim_aux_data_map; |
3562 | |
3563 | delete memory_accesses.refs; |
3564 | memory_accesses.refs = NULL; |
3565 | |
3566 | FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) |
3567 | memref_free (mem: ref); |
3568 | memory_accesses.refs_list.release (); |
3569 | obstack_free (&mem_ref_obstack, NULL); |
3570 | |
3571 | memory_accesses.refs_loaded_in_loop.release (); |
3572 | memory_accesses.refs_stored_in_loop.release (); |
3573 | memory_accesses.all_refs_stored_in_loop.release (); |
3574 | |
3575 | if (memory_accesses.ttae_cache) |
3576 | free_affine_expand_cache (&memory_accesses.ttae_cache); |
3577 | |
3578 | free (ptr: bb_loop_postorder); |
3579 | |
3580 | coldest_outermost_loop.release (); |
3581 | hotter_than_inner_loop.release (); |
3582 | } |
3583 | |
3584 | /* Moves invariants from loops. Only "expensive" invariants are moved out -- |
3585 | i.e. those that are likely to be win regardless of the register pressure. |
3586 | Only perform store motion if STORE_MOTION is true. */ |
3587 | |
3588 | unsigned int |
3589 | loop_invariant_motion_in_fun (function *fun, bool store_motion) |
3590 | { |
3591 | unsigned int todo = 0; |
3592 | |
3593 | tree_ssa_lim_initialize (store_motion); |
3594 | |
3595 | mark_ssa_maybe_undefs (); |
3596 | |
3597 | /* Gathers information about memory accesses in the loops. */ |
3598 | analyze_memory_references (store_motion); |
3599 | |
3600 | /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ |
3601 | fill_always_executed_in (); |
3602 | |
3603 | /* Pre-compute coldest outermost loop and nearest hotter loop of each loop. |
3604 | */ |
3605 | class loop *loop; |
3606 | coldest_outermost_loop.create (nelems: number_of_loops (cfun)); |
3607 | coldest_outermost_loop.safe_grow_cleared (len: number_of_loops (cfun)); |
3608 | hotter_than_inner_loop.create (nelems: number_of_loops (cfun)); |
3609 | hotter_than_inner_loop.safe_grow_cleared (len: number_of_loops (cfun)); |
3610 | for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) |
3611 | fill_coldest_and_hotter_out_loop (coldest_loop: loop, NULL, loop); |
3612 | |
3613 | int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); |
3614 | int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); |
3615 | |
3616 | /* For each statement determine the outermost loop in that it is |
3617 | invariant and cost for computing the invariant. */ |
3618 | for (int i = 0; i < n; ++i) |
3619 | compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i])); |
3620 | |
3621 | /* Execute store motion. Force the necessary invariants to be moved |
3622 | out of the loops as well. */ |
3623 | if (store_motion) |
3624 | do_store_motion (); |
3625 | |
3626 | free (ptr: rpo); |
3627 | rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); |
3628 | n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); |
3629 | |
3630 | /* Move the expressions that are expensive enough. */ |
3631 | for (int i = 0; i < n; ++i) |
3632 | todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i])); |
3633 | |
3634 | free (ptr: rpo); |
3635 | |
3636 | gsi_commit_edge_inserts (); |
3637 | if (need_ssa_update_p (fun)) |
3638 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
3639 | |
3640 | tree_ssa_lim_finalize (); |
3641 | |
3642 | return todo; |
3643 | } |
3644 | |
3645 | /* Loop invariant motion pass. */ |
3646 | |
3647 | namespace { |
3648 | |
3649 | const pass_data pass_data_lim = |
3650 | { |
3651 | .type: GIMPLE_PASS, /* type */ |
3652 | .name: "lim" , /* name */ |
3653 | .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */ |
3654 | .tv_id: TV_LIM, /* tv_id */ |
3655 | PROP_cfg, /* properties_required */ |
3656 | .properties_provided: 0, /* properties_provided */ |
3657 | .properties_destroyed: 0, /* properties_destroyed */ |
3658 | .todo_flags_start: 0, /* todo_flags_start */ |
3659 | .todo_flags_finish: 0, /* todo_flags_finish */ |
3660 | }; |
3661 | |
3662 | class pass_lim : public gimple_opt_pass |
3663 | { |
3664 | public: |
3665 | pass_lim (gcc::context *ctxt) |
3666 | : gimple_opt_pass (pass_data_lim, ctxt) |
3667 | {} |
3668 | |
3669 | /* opt_pass methods: */ |
3670 | opt_pass * clone () final override { return new pass_lim (m_ctxt); } |
3671 | bool gate (function *) final override { return flag_tree_loop_im != 0; } |
3672 | unsigned int execute (function *) final override; |
3673 | |
3674 | }; // class pass_lim |
3675 | |
3676 | unsigned int |
3677 | pass_lim::execute (function *fun) |
3678 | { |
3679 | bool in_loop_pipeline = scev_initialized_p (); |
3680 | if (!in_loop_pipeline) |
3681 | loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
3682 | |
3683 | if (number_of_loops (fn: fun) <= 1) |
3684 | return 0; |
3685 | unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores); |
3686 | |
3687 | if (!in_loop_pipeline) |
3688 | loop_optimizer_finalize (); |
3689 | else |
3690 | scev_reset (); |
3691 | return todo; |
3692 | } |
3693 | |
3694 | } // anon namespace |
3695 | |
3696 | gimple_opt_pass * |
3697 | make_pass_lim (gcc::context *ctxt) |
3698 | { |
3699 | return new pass_lim (ctxt); |
3700 | } |
3701 | |
3702 | |
3703 | |