1/* Loop invariant motion.
2 Copyright (C) 2003-2025 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "tree.h"
25#include "gimple.h"
26#include "cfghooks.h"
27#include "tree-pass.h"
28#include "ssa.h"
29#include "gimple-pretty-print.h"
30#include "fold-const.h"
31#include "cfganal.h"
32#include "tree-eh.h"
33#include "gimplify.h"
34#include "gimple-iterator.h"
35#include "tree-cfg.h"
36#include "tree-ssa-loop-manip.h"
37#include "tree-ssa-loop.h"
38#include "tree-into-ssa.h"
39#include "cfgloop.h"
40#include "tree-affine.h"
41#include "tree-ssa-propagate.h"
42#include "trans-mem.h"
43#include "gimple-fold.h"
44#include "tree-scalar-evolution.h"
45#include "tree-ssa-loop-niter.h"
46#include "alias.h"
47#include "builtins.h"
48#include "tree-dfa.h"
49#include "tree-ssa.h"
50#include "dbgcnt.h"
51#include "insn-codes.h"
52#include "optabs-tree.h"
53
54/* TODO: Support for predicated code motion. I.e.
55
56 while (1)
57 {
58 if (cond)
59 {
60 a = inv;
61 something;
62 }
63 }
64
65 Where COND and INV are invariants, but evaluating INV may trap or be
66 invalid from some other reason if !COND. This may be transformed to
67
68 if (cond)
69 a = inv;
70 while (1)
71 {
72 if (cond)
73 something;
74 } */
75
76/* The auxiliary data kept for each statement. */
77
78struct lim_aux_data
79{
80 class loop *max_loop; /* The outermost loop in that the statement
81 is invariant. */
82
83 class loop *tgt_loop; /* The loop out of that we want to move the
84 invariant. */
85
86 class loop *always_executed_in;
87 /* The outermost loop for that we are sure
88 the statement is executed if the loop
89 is entered. */
90
91 unsigned cost; /* Cost of the computation performed by the
92 statement. */
93
94 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
95
96 vec<gimple *> depends; /* Vector of statements that must be also
97 hoisted out of the loop when this statement
98 is hoisted; i.e. those that define the
99 operands of the statement and are inside of
100 the MAX_LOOP loop. */
101};
102
103/* Maps statements to their lim_aux_data. */
104
105static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
106
107/* Description of a memory reference location. */
108
109struct mem_ref_loc
110{
111 tree *ref; /* The reference itself. */
112 gimple *stmt; /* The statement in that it occurs. */
113};
114
115
116/* Description of a memory reference. */
117
118class im_mem_ref
119{
120public:
121 unsigned id : 30; /* ID assigned to the memory reference
122 (its index in memory_accesses.refs_list) */
123 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
124 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
125 hashval_t hash; /* Its hash value. */
126
127 /* The memory access itself and associated caching of alias-oracle
128 query meta-data. We are using mem.ref == error_mark_node for the
129 case the reference is represented by its single access stmt
130 in accesses_in_loop[0]. */
131 ao_ref mem;
132
133 bitmap stored; /* The set of loops in that this memory location
134 is stored to. */
135 bitmap loaded; /* The set of loops in that this memory location
136 is loaded from. */
137 vec<mem_ref_loc> accesses_in_loop;
138 /* The locations of the accesses. */
139
140 /* The following set is computed on demand. */
141 bitmap_head dep_loop; /* The set of loops in that the memory
142 reference is {in,}dependent in
143 different modes. */
144};
145
146static bool in_loop_pipeline;
147
148/* We use six bits per loop in the ref->dep_loop bitmap to record
149 the dep_kind x dep_state combinations. */
150
151enum dep_kind { lim_raw, sm_war, sm_waw };
152enum dep_state { dep_unknown, dep_independent, dep_dependent };
153
154/* coldest outermost loop for given loop. */
155vec<class loop *> coldest_outermost_loop;
156/* hotter outer loop nearest to given loop. */
157vec<class loop *> hotter_than_inner_loop;
158
159/* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
160
161static void
162record_loop_dependence (class loop *loop, im_mem_ref *ref,
163 dep_kind kind, dep_state state)
164{
165 gcc_assert (state != dep_unknown);
166 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
167 bitmap_set_bit (&ref->dep_loop, bit);
168}
169
170/* Query the loop dependence cache of REF for LOOP, KIND. */
171
172static dep_state
173query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
174{
175 unsigned first_bit = 6 * loop->num + kind * 2;
176 if (bitmap_bit_p (&ref->dep_loop, first_bit))
177 return dep_independent;
178 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
179 return dep_dependent;
180 return dep_unknown;
181}
182
183/* Mem_ref hashtable helpers. */
184
185struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
186{
187 typedef ao_ref *compare_type;
188 static inline hashval_t hash (const im_mem_ref *);
189 static inline bool equal (const im_mem_ref *, const ao_ref *);
190};
191
192/* A hash function for class im_mem_ref object OBJ. */
193
194inline hashval_t
195mem_ref_hasher::hash (const im_mem_ref *mem)
196{
197 return mem->hash;
198}
199
200/* An equality function for class im_mem_ref object MEM1 with
201 memory reference OBJ2. */
202
203inline bool
204mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
205{
206 if (obj2->max_size_known_p ())
207 return (mem1->ref_decomposed
208 && ((TREE_CODE (mem1->mem.base) == MEM_REF
209 && TREE_CODE (obj2->base) == MEM_REF
210 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
211 TREE_OPERAND (obj2->base, 0), flags: 0)
212 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
213 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
214 || (operand_equal_p (mem1->mem.base, obj2->base, flags: 0)
215 && known_eq (mem1->mem.offset, obj2->offset)))
216 && known_eq (mem1->mem.size, obj2->size)
217 && known_eq (mem1->mem.max_size, obj2->max_size)
218 && mem1->mem.volatile_p == obj2->volatile_p
219 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
220 /* We are not canonicalizing alias-sets but for the
221 special-case we didn't canonicalize yet and the
222 incoming ref is a alias-set zero MEM we pick
223 the correct one already. */
224 || (!mem1->ref_canonical
225 && (TREE_CODE (obj2->ref) == MEM_REF
226 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
227 && obj2->ref_alias_set == 0)
228 /* Likewise if there's a canonical ref with alias-set zero. */
229 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
230 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
231 TREE_TYPE (obj2->ref)));
232 else
233 return operand_equal_p (mem1->mem.ref, obj2->ref, flags: 0);
234}
235
236
237/* Description of memory accesses in loops. */
238
239static struct
240{
241 /* The hash table of memory references accessed in loops. */
242 hash_table<mem_ref_hasher> *refs;
243
244 /* The list of memory references. */
245 vec<im_mem_ref *> refs_list;
246
247 /* The set of memory references accessed in each loop. */
248 vec<bitmap_head> refs_loaded_in_loop;
249
250 /* The set of memory references stored in each loop. */
251 vec<bitmap_head> refs_stored_in_loop;
252
253 /* The set of memory references stored in each loop, including subloops . */
254 vec<bitmap_head> all_refs_stored_in_loop;
255
256 /* Cache for expanding memory addresses. */
257 hash_map<tree, name_expansion *> *ttae_cache;
258} memory_accesses;
259
260/* Obstack for the bitmaps in the above data structures. */
261static bitmap_obstack lim_bitmap_obstack;
262static obstack mem_ref_obstack;
263
264static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
265static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
266static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
267
268/* Minimum cost of an expensive expression. */
269#define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
270
271/* The outermost loop for which execution of the header guarantees that the
272 block will be executed. */
273#define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
274#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
275
276/* ID of the shared unanalyzable mem. */
277#define UNANALYZABLE_MEM_ID 0
278
279/* Whether the reference was analyzable. */
280#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
281
282static struct lim_aux_data *
283init_lim_data (gimple *stmt)
284{
285 lim_aux_data *p = XCNEW (struct lim_aux_data);
286 lim_aux_data_map->put (k: stmt, v: p);
287
288 return p;
289}
290
291static struct lim_aux_data *
292get_lim_data (gimple *stmt)
293{
294 lim_aux_data **p = lim_aux_data_map->get (k: stmt);
295 if (!p)
296 return NULL;
297
298 return *p;
299}
300
301/* Releases the memory occupied by DATA. */
302
303static void
304free_lim_aux_data (struct lim_aux_data *data)
305{
306 data->depends.release ();
307 free (ptr: data);
308}
309
310static void
311clear_lim_data (gimple *stmt)
312{
313 lim_aux_data **p = lim_aux_data_map->get (k: stmt);
314 if (!p)
315 return;
316
317 free_lim_aux_data (data: *p);
318 *p = NULL;
319}
320
321
322/* The possibilities of statement movement. */
323enum move_pos
324 {
325 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
326 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
327 become executed -- memory accesses, ... */
328 MOVE_POSSIBLE /* Unlimited movement. */
329 };
330
331
332/* If it is possible to hoist the statement STMT unconditionally,
333 returns MOVE_POSSIBLE.
334 If it is possible to hoist the statement STMT, but we must avoid making
335 it executed if it would not be executed in the original program (e.g.
336 because it may trap), return MOVE_PRESERVE_EXECUTION.
337 Otherwise return MOVE_IMPOSSIBLE. */
338
339static enum move_pos
340movement_possibility_1 (gimple *stmt)
341{
342 tree lhs;
343 enum move_pos ret = MOVE_POSSIBLE;
344
345 if (flag_unswitch_loops
346 && gimple_code (g: stmt) == GIMPLE_COND)
347 {
348 /* If we perform unswitching, force the operands of the invariant
349 condition to be moved out of the loop. */
350 return MOVE_POSSIBLE;
351 }
352
353 if (gimple_code (g: stmt) == GIMPLE_PHI
354 && gimple_phi_num_args (gs: stmt) <= 2
355 && !virtual_operand_p (op: gimple_phi_result (gs: stmt))
356 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
357 return MOVE_POSSIBLE;
358
359 if (gimple_get_lhs (stmt) == NULL_TREE)
360 return MOVE_IMPOSSIBLE;
361
362 if (gimple_vdef (g: stmt))
363 return MOVE_IMPOSSIBLE;
364
365 if (stmt_ends_bb_p (stmt)
366 || gimple_has_volatile_ops (stmt)
367 || gimple_has_side_effects (stmt)
368 || stmt_could_throw_p (cfun, stmt))
369 return MOVE_IMPOSSIBLE;
370
371 if (is_gimple_call (gs: stmt))
372 {
373 /* While pure or const call is guaranteed to have no side effects, we
374 cannot move it arbitrarily. Consider code like
375
376 char *s = something ();
377
378 while (1)
379 {
380 if (s)
381 t = strlen (s);
382 else
383 t = 0;
384 }
385
386 Here the strlen call cannot be moved out of the loop, even though
387 s is invariant. In addition to possibly creating a call with
388 invalid arguments, moving out a function call that is not executed
389 may cause performance regressions in case the call is costly and
390 not executed at all. */
391 ret = MOVE_PRESERVE_EXECUTION;
392 lhs = gimple_call_lhs (gs: stmt);
393 }
394 else if (is_gimple_assign (gs: stmt))
395 lhs = gimple_assign_lhs (gs: stmt);
396 else
397 return MOVE_IMPOSSIBLE;
398
399 if (TREE_CODE (lhs) == SSA_NAME
400 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
401 return MOVE_IMPOSSIBLE;
402
403 if (TREE_CODE (lhs) != SSA_NAME
404 || gimple_could_trap_p (stmt))
405 return MOVE_PRESERVE_EXECUTION;
406
407 if (is_gimple_assign (gs: stmt))
408 {
409 auto code = gimple_assign_rhs_code (gs: stmt);
410 tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
411 /* For shifts and rotates and possibly out-of-bound shift operands
412 we currently cannot rewrite them into something unconditionally
413 well-defined. */
414 if ((code == LSHIFT_EXPR
415 || code == RSHIFT_EXPR
416 || code == LROTATE_EXPR
417 || code == RROTATE_EXPR)
418 && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
419 /* We cannot use ranges at 'stmt' here. */
420 || wi::ltu_p (x: wi::to_wide (t: gimple_assign_rhs2 (gs: stmt)),
421 y: element_precision (type))))
422 ret = MOVE_PRESERVE_EXECUTION;
423 }
424
425 /* Non local loads in a transaction cannot be hoisted out. Well,
426 unless the load happens on every path out of the loop, but we
427 don't take this into account yet. */
428 if (flag_tm
429 && gimple_in_transaction (stmt)
430 && gimple_assign_single_p (gs: stmt))
431 {
432 tree rhs = gimple_assign_rhs1 (gs: stmt);
433 if (DECL_P (rhs) && is_global_var (t: rhs))
434 {
435 if (dump_file)
436 {
437 fprintf (stream: dump_file, format: "Cannot hoist conditional load of ");
438 print_generic_expr (dump_file, rhs, TDF_SLIM);
439 fprintf (stream: dump_file, format: " because it is in a transaction.\n");
440 }
441 return MOVE_IMPOSSIBLE;
442 }
443 }
444
445 return ret;
446}
447
448static enum move_pos
449movement_possibility (gimple *stmt)
450{
451 enum move_pos pos = movement_possibility_1 (stmt);
452 if (pos == MOVE_POSSIBLE)
453 {
454 use_operand_p use_p;
455 ssa_op_iter ssa_iter;
456 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
457 if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
458 && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
459 return MOVE_PRESERVE_EXECUTION;
460 }
461 return pos;
462}
463
464
465/* Compare the profile count inequality of bb and loop's preheader, it is
466 three-state as stated in profile-count.h, FALSE is returned if inequality
467 cannot be decided. */
468bool
469bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
470{
471 gcc_assert (bb && loop);
472 return bb->count < loop_preheader_edge (loop)->src->count;
473}
474
475/* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
476 count.
477 It does three steps check:
478 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
479 return NULL which means it should not be moved out at all;
480 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
481 OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
482 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
483 hotter loop between OUTERMOST_LOOP and loop in pre-computed
484 HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
485 OUTERMOST_LOOP.
486 At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
487 the hoist target. */
488
489static class loop *
490get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
491 basic_block curr_bb)
492{
493 gcc_assert (outermost_loop == loop
494 || flow_loop_nested_p (outermost_loop, loop));
495
496 /* If bb_colder_than_loop_preheader returns false due to three-state
497 comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
498 Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */
499 if (curr_bb && bb_colder_than_loop_preheader (bb: curr_bb, loop))
500 return NULL;
501
502 class loop *coldest_loop = coldest_outermost_loop[loop->num];
503 if (loop_depth (loop: coldest_loop) < loop_depth (loop: outermost_loop))
504 {
505 class loop *hotter_loop = hotter_than_inner_loop[loop->num];
506 if (!hotter_loop
507 || loop_depth (loop: hotter_loop) < loop_depth (loop: outermost_loop))
508 return outermost_loop;
509
510 /* hotter_loop is between OUTERMOST_LOOP and LOOP like:
511 [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
512 hotter_loop, second_coldest_loop, ..., loop]
513 return second_coldest_loop to be the hoist target. */
514 class loop *aloop;
515 for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
516 if (aloop == loop || flow_loop_nested_p (aloop, loop))
517 return aloop;
518 }
519 return coldest_loop;
520}
521
522/* Suppose that operand DEF is used inside the LOOP. Returns the outermost
523 loop to that we could move the expression using DEF if it did not have
524 other operands, i.e. the outermost loop enclosing LOOP in that the value
525 of DEF is invariant. */
526
527static class loop *
528outermost_invariant_loop (tree def, class loop *loop)
529{
530 gimple *def_stmt;
531 basic_block def_bb;
532 class loop *max_loop;
533 struct lim_aux_data *lim_data;
534
535 if (!def)
536 return superloop_at_depth (loop, 1);
537
538 if (TREE_CODE (def) != SSA_NAME)
539 {
540 gcc_assert (is_gimple_min_invariant (def));
541 return superloop_at_depth (loop, 1);
542 }
543
544 def_stmt = SSA_NAME_DEF_STMT (def);
545 def_bb = gimple_bb (g: def_stmt);
546 if (!def_bb)
547 return superloop_at_depth (loop, 1);
548
549 max_loop = find_common_loop (loop, def_bb->loop_father);
550
551 lim_data = get_lim_data (stmt: def_stmt);
552 if (lim_data != NULL && lim_data->max_loop != NULL)
553 max_loop = find_common_loop (max_loop,
554 loop_outer (loop: lim_data->max_loop));
555 if (max_loop == loop)
556 return NULL;
557 max_loop = superloop_at_depth (loop, loop_depth (loop: max_loop) + 1);
558
559 return max_loop;
560}
561
562/* DATA is a structure containing information associated with a statement
563 inside LOOP. DEF is one of the operands of this statement.
564
565 Find the outermost loop enclosing LOOP in that value of DEF is invariant
566 and record this in DATA->max_loop field. If DEF itself is defined inside
567 this loop as well (i.e. we need to hoist it out of the loop if we want
568 to hoist the statement represented by DATA), record the statement in that
569 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
570 add the cost of the computation of DEF to the DATA->cost.
571
572 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
573
574static bool
575add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
576 bool add_cost)
577{
578 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
579 basic_block def_bb = gimple_bb (g: def_stmt);
580 class loop *max_loop;
581 struct lim_aux_data *def_data;
582
583 if (!def_bb)
584 return true;
585
586 max_loop = outermost_invariant_loop (def, loop);
587 if (!max_loop)
588 return false;
589
590 if (flow_loop_nested_p (data->max_loop, max_loop))
591 data->max_loop = max_loop;
592
593 def_data = get_lim_data (stmt: def_stmt);
594 if (!def_data)
595 return true;
596
597 if (add_cost
598 /* Only add the cost if the statement defining DEF is inside LOOP,
599 i.e. if it is likely that by moving the invariants dependent
600 on it, we will be able to avoid creating a new register for
601 it (since it will be only used in these dependent invariants). */
602 && def_bb->loop_father == loop)
603 data->cost += def_data->cost;
604
605 data->depends.safe_push (obj: def_stmt);
606
607 return true;
608}
609
610/* Returns an estimate for a cost of statement STMT. The values here
611 are just ad-hoc constants, similar to costs for inlining. */
612
613static unsigned
614stmt_cost (gimple *stmt)
615{
616 /* Always try to create possibilities for unswitching. */
617 if (gimple_code (g: stmt) == GIMPLE_COND
618 || gimple_code (g: stmt) == GIMPLE_PHI)
619 return LIM_EXPENSIVE;
620
621 /* We should be hoisting calls if possible. */
622 if (is_gimple_call (gs: stmt))
623 {
624 tree fndecl;
625
626 /* Unless the call is a builtin_constant_p; this always folds to a
627 constant, so moving it is useless. */
628 fndecl = gimple_call_fndecl (gs: stmt);
629 if (fndecl && fndecl_built_in_p (node: fndecl, name1: BUILT_IN_CONSTANT_P))
630 return 0;
631
632 return LIM_EXPENSIVE;
633 }
634
635 /* Hoisting memory references out should almost surely be a win. */
636 if (gimple_references_memory_p (stmt))
637 return LIM_EXPENSIVE;
638
639 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
640 return 1;
641
642 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
643 switch (code)
644 {
645 case MULT_EXPR:
646 case WIDEN_MULT_EXPR:
647 case WIDEN_MULT_PLUS_EXPR:
648 case WIDEN_MULT_MINUS_EXPR:
649 case DOT_PROD_EXPR:
650 case TRUNC_DIV_EXPR:
651 case CEIL_DIV_EXPR:
652 case FLOOR_DIV_EXPR:
653 case ROUND_DIV_EXPR:
654 case EXACT_DIV_EXPR:
655 case CEIL_MOD_EXPR:
656 case FLOOR_MOD_EXPR:
657 case ROUND_MOD_EXPR:
658 case TRUNC_MOD_EXPR:
659 case RDIV_EXPR:
660 /* Division and multiplication are usually expensive. */
661 return LIM_EXPENSIVE;
662
663 case LSHIFT_EXPR:
664 case RSHIFT_EXPR:
665 case WIDEN_LSHIFT_EXPR:
666 case LROTATE_EXPR:
667 case RROTATE_EXPR:
668 /* Shifts and rotates are usually expensive. */
669 return LIM_EXPENSIVE;
670
671 case COND_EXPR:
672 case VEC_COND_EXPR:
673 /* Conditionals are expensive. */
674 return LIM_EXPENSIVE;
675
676 case CONSTRUCTOR:
677 /* Make vector construction cost proportional to the number
678 of elements. */
679 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
680
681 case SSA_NAME:
682 case PAREN_EXPR:
683 /* Whether or not something is wrapped inside a PAREN_EXPR
684 should not change move cost. Nor should an intermediate
685 unpropagated SSA name copy. */
686 return 0;
687
688 default:
689 /* Comparisons are usually expensive. */
690 if (TREE_CODE_CLASS (code) == tcc_comparison)
691 return LIM_EXPENSIVE;
692 return 1;
693 }
694}
695
696/* Finds the outermost loop between OUTER and LOOP in that the memory reference
697 REF is independent. If REF is not independent in LOOP, NULL is returned
698 instead. */
699
700static class loop *
701outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
702{
703 class loop *aloop;
704
705 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
706 return NULL;
707
708 for (aloop = outer;
709 aloop != loop;
710 aloop = superloop_at_depth (loop, loop_depth (loop: aloop) + 1))
711 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
712 && ref_indep_loop_p (aloop, ref, lim_raw))
713 return aloop;
714
715 if (ref_indep_loop_p (loop, ref, lim_raw))
716 return loop;
717 else
718 return NULL;
719}
720
721/* If there is a simple load or store to a memory reference in STMT, returns
722 the location of the memory reference, and sets IS_STORE according to whether
723 it is a store or load. Otherwise, returns NULL. */
724
725static tree *
726simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
727{
728 tree *lhs, *rhs;
729
730 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
731 if (!gimple_assign_single_p (gs: stmt))
732 return NULL;
733
734 lhs = gimple_assign_lhs_ptr (gs: stmt);
735 rhs = gimple_assign_rhs1_ptr (gs: stmt);
736
737 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (g: stmt))
738 {
739 *is_store = false;
740 return rhs;
741 }
742 else if (gimple_vdef (g: stmt)
743 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
744 {
745 *is_store = true;
746 return lhs;
747 }
748 else
749 return NULL;
750}
751
752/* From a controlling predicate in DOM determine the arguments from
753 the PHI node PHI that are chosen if the predicate evaluates to
754 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
755 they are non-NULL. Returns true if the arguments can be determined,
756 else return false. */
757
758static bool
759extract_true_false_args_from_phi (basic_block dom, gphi *phi,
760 tree *true_arg_p, tree *false_arg_p)
761{
762 edge te, fe;
763 if (! extract_true_false_controlled_edges (dom, gimple_bb (g: phi),
764 &te, &fe))
765 return false;
766
767 if (true_arg_p)
768 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
769 if (false_arg_p)
770 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
771
772 return true;
773}
774
775/* Determine the outermost loop to that it is possible to hoist a statement
776 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
777 the outermost loop in that the value computed by STMT is invariant.
778 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
779 we preserve the fact whether STMT is executed. It also fills other related
780 information to LIM_DATA (STMT).
781
782 The function returns false if STMT cannot be hoisted outside of the loop it
783 is defined in, and true otherwise. */
784
785static bool
786determine_max_movement (gimple *stmt, bool must_preserve_exec)
787{
788 basic_block bb = gimple_bb (g: stmt);
789 class loop *loop = bb->loop_father;
790 class loop *level;
791 struct lim_aux_data *lim_data = get_lim_data (stmt);
792 tree val;
793 ssa_op_iter iter;
794
795 if (must_preserve_exec)
796 level = ALWAYS_EXECUTED_IN (bb);
797 else
798 level = superloop_at_depth (loop, 1);
799 lim_data->max_loop = get_coldest_out_loop (outermost_loop: level, loop, curr_bb: bb);
800 if (!lim_data->max_loop)
801 return false;
802
803 if (gphi *phi = dyn_cast <gphi *> (p: stmt))
804 {
805 use_operand_p use_p;
806 unsigned min_cost = UINT_MAX;
807 unsigned total_cost = 0;
808 struct lim_aux_data *def_data;
809
810 /* We will end up promoting dependencies to be unconditionally
811 evaluated. For this reason the PHI cost (and thus the
812 cost we remove from the loop by doing the invariant motion)
813 is that of the cheapest PHI argument dependency chain. */
814 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
815 {
816 val = USE_FROM_PTR (use_p);
817
818 if (TREE_CODE (val) != SSA_NAME)
819 {
820 /* Assign const 1 to constants. */
821 min_cost = MIN (min_cost, 1);
822 total_cost += 1;
823 continue;
824 }
825 if (!add_dependency (def: val, data: lim_data, loop, add_cost: false))
826 return false;
827
828 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
829 if (gimple_bb (g: def_stmt)
830 && gimple_bb (g: def_stmt)->loop_father == loop)
831 {
832 def_data = get_lim_data (stmt: def_stmt);
833 if (def_data)
834 {
835 min_cost = MIN (min_cost, def_data->cost);
836 total_cost += def_data->cost;
837 }
838 }
839 }
840
841 min_cost = MIN (min_cost, total_cost);
842 lim_data->cost += min_cost;
843
844 if (gimple_phi_num_args (gs: phi) > 1)
845 {
846 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
847 gimple *cond;
848 if (gsi_end_p (i: gsi_last_bb (bb: dom)))
849 return false;
850 cond = gsi_stmt (i: gsi_last_bb (bb: dom));
851 if (gimple_code (g: cond) != GIMPLE_COND)
852 return false;
853 /* Verify that this is an extended form of a diamond and
854 the PHI arguments are completely controlled by the
855 predicate in DOM. */
856 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
857 return false;
858
859 /* Check if one of the depedent statement is a vector compare whether
860 the target supports it, otherwise it's invalid to hoist it out of
861 the gcond it belonged to. */
862 if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond))))
863 {
864 tree type = TREE_TYPE (gimple_cond_lhs (cond));
865 auto code = gimple_cond_code (gs: cond);
866 if (!target_supports_op_p (type, code, optab_vector))
867 return false;
868 }
869
870 /* Fold in dependencies and cost of the condition. */
871 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
872 {
873 if (!add_dependency (def: val, data: lim_data, loop, add_cost: false))
874 return false;
875 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
876 if (def_data)
877 lim_data->cost += def_data->cost;
878 }
879
880 /* We want to avoid unconditionally executing very expensive
881 operations. As costs for our dependencies cannot be
882 negative just claim we are not invariand for this case.
883 We also are not sure whether the control-flow inside the
884 loop will vanish. */
885 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
886 && !(min_cost != 0
887 && total_cost / min_cost <= 2))
888 return false;
889
890 /* Assume that the control-flow in the loop will vanish.
891 ??? We should verify this and not artificially increase
892 the cost if that is not the case. */
893 lim_data->cost += stmt_cost (stmt);
894 }
895
896 return true;
897 }
898
899 /* A stmt that receives abnormal edges cannot be hoisted. */
900 if (is_a <gcall *> (p: stmt)
901 && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE))
902 return false;
903
904 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
905 if (!add_dependency (def: val, data: lim_data, loop, add_cost: true))
906 return false;
907
908 if (gimple_vuse (g: stmt))
909 {
910 im_mem_ref *ref
911 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
912 if (ref
913 && MEM_ANALYZABLE (ref))
914 {
915 lim_data->max_loop = outermost_indep_loop (outer: lim_data->max_loop,
916 loop, ref);
917 if (!lim_data->max_loop)
918 return false;
919 }
920 else if (! add_dependency (def: gimple_vuse (g: stmt), data: lim_data, loop, add_cost: false))
921 return false;
922 }
923
924 lim_data->cost += stmt_cost (stmt);
925
926 return true;
927}
928
929/* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
930 and that one of the operands of this statement is computed by STMT.
931 Ensure that STMT (together with all the statements that define its
932 operands) is hoisted at least out of the loop LEVEL. */
933
934static void
935set_level (gimple *stmt, class loop *orig_loop, class loop *level)
936{
937 class loop *stmt_loop = gimple_bb (g: stmt)->loop_father;
938 struct lim_aux_data *lim_data;
939 gimple *dep_stmt;
940 unsigned i;
941
942 stmt_loop = find_common_loop (orig_loop, stmt_loop);
943 lim_data = get_lim_data (stmt);
944 if (lim_data != NULL && lim_data->tgt_loop != NULL)
945 stmt_loop = find_common_loop (stmt_loop,
946 loop_outer (loop: lim_data->tgt_loop));
947 if (flow_loop_nested_p (stmt_loop, level))
948 return;
949
950 gcc_assert (level == lim_data->max_loop
951 || flow_loop_nested_p (lim_data->max_loop, level));
952
953 lim_data->tgt_loop = level;
954 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
955 set_level (stmt: dep_stmt, orig_loop, level);
956}
957
958/* Determines an outermost loop from that we want to hoist the statement STMT.
959 For now we chose the outermost possible loop. TODO -- use profiling
960 information to set it more sanely. */
961
962static void
963set_profitable_level (gimple *stmt)
964{
965 set_level (stmt, orig_loop: gimple_bb (g: stmt)->loop_father, level: get_lim_data (stmt)->max_loop);
966}
967
968/* Returns true if STMT is a call that has side effects. */
969
970static bool
971nonpure_call_p (gimple *stmt)
972{
973 if (gimple_code (g: stmt) != GIMPLE_CALL)
974 return false;
975
976 return gimple_has_side_effects (stmt);
977}
978
979/* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
980
981static gimple *
982rewrite_reciprocal (gimple_stmt_iterator *bsi)
983{
984 gassign *stmt, *stmt1, *stmt2;
985 tree name, lhs, type;
986 tree real_one;
987 gimple_stmt_iterator gsi;
988
989 stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi));
990 lhs = gimple_assign_lhs (gs: stmt);
991 type = TREE_TYPE (lhs);
992
993 real_one = build_one_cst (type);
994
995 name = make_temp_ssa_name (type, NULL, name: "reciptmp");
996 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
997 gimple_assign_rhs2 (gs: stmt));
998 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
999 gimple_assign_rhs1 (gs: stmt));
1000
1001 /* Replace division stmt with reciprocal and multiply stmts.
1002 The multiply stmt is not invariant, so update iterator
1003 and avoid rescanning. */
1004 gsi = *bsi;
1005 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1006 gsi_replace (&gsi, stmt2, true);
1007
1008 /* Continue processing with invariant reciprocal statement. */
1009 return stmt1;
1010}
1011
1012/* Check if the pattern at *BSI is a bittest of the form
1013 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
1014
1015static gimple *
1016rewrite_bittest (gimple_stmt_iterator *bsi)
1017{
1018 gassign *stmt;
1019 gimple *stmt1;
1020 gassign *stmt2;
1021 gimple *use_stmt;
1022 gcond *cond_stmt;
1023 tree lhs, name, t, a, b;
1024 use_operand_p use;
1025
1026 stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi));
1027 lhs = gimple_assign_lhs (gs: stmt);
1028
1029 /* Verify that the single use of lhs is a comparison against zero. */
1030 if (TREE_CODE (lhs) != SSA_NAME
1031 || !single_imm_use (var: lhs, use_p: &use, stmt: &use_stmt))
1032 return stmt;
1033 cond_stmt = dyn_cast <gcond *> (p: use_stmt);
1034 if (!cond_stmt)
1035 return stmt;
1036 if (gimple_cond_lhs (gs: cond_stmt) != lhs
1037 || (gimple_cond_code (gs: cond_stmt) != NE_EXPR
1038 && gimple_cond_code (gs: cond_stmt) != EQ_EXPR)
1039 || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)))
1040 return stmt;
1041
1042 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
1043 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1044 if (gimple_code (g: stmt1) != GIMPLE_ASSIGN)
1045 return stmt;
1046
1047 /* There is a conversion in between possibly inserted by fold. */
1048 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1049 {
1050 t = gimple_assign_rhs1 (gs: stmt1);
1051 if (TREE_CODE (t) != SSA_NAME
1052 || !has_single_use (var: t))
1053 return stmt;
1054 stmt1 = SSA_NAME_DEF_STMT (t);
1055 if (gimple_code (g: stmt1) != GIMPLE_ASSIGN)
1056 return stmt;
1057 }
1058
1059 /* Verify that B is loop invariant but A is not. Verify that with
1060 all the stmt walking we are still in the same loop. */
1061 if (gimple_assign_rhs_code (gs: stmt1) != RSHIFT_EXPR
1062 || loop_containing_stmt (stmt: stmt1) != loop_containing_stmt (stmt))
1063 return stmt;
1064
1065 a = gimple_assign_rhs1 (gs: stmt1);
1066 b = gimple_assign_rhs2 (gs: stmt1);
1067
1068 if (outermost_invariant_loop (def: b, loop: loop_containing_stmt (stmt: stmt1)) != NULL
1069 && outermost_invariant_loop (def: a, loop: loop_containing_stmt (stmt: stmt1)) == NULL)
1070 {
1071 gimple_stmt_iterator rsi;
1072
1073 /* 1 << B */
1074 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1075 build_int_cst (TREE_TYPE (a), 1), b);
1076 name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp");
1077 stmt1 = gimple_build_assign (name, t);
1078
1079 /* A & (1 << B) */
1080 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1081 name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp");
1082 stmt2 = gimple_build_assign (name, t);
1083
1084 /* Replace the SSA_NAME we compare against zero. Adjust
1085 the type of zero accordingly. */
1086 SET_USE (use, name);
1087 gimple_cond_set_rhs (gs: cond_stmt,
1088 rhs: build_int_cst_type (TREE_TYPE (name),
1089 0));
1090
1091 /* Don't use gsi_replace here, none of the new assignments sets
1092 the variable originally set in stmt. Move bsi to stmt1, and
1093 then remove the original stmt, so that we get a chance to
1094 retain debug info for it. */
1095 rsi = *bsi;
1096 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1097 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1098 gimple *to_release = gsi_stmt (i: rsi);
1099 gsi_remove (&rsi, true);
1100 release_defs (to_release);
1101
1102 return stmt1;
1103 }
1104
1105 return stmt;
1106}
1107
1108/* Determine the outermost loops in that statements in basic block BB are
1109 invariant, and record them to the LIM_DATA associated with the
1110 statements. */
1111
1112static void
1113compute_invariantness (basic_block bb)
1114{
1115 enum move_pos pos;
1116 gimple_stmt_iterator bsi;
1117 gimple *stmt;
1118 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1119 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1120 struct lim_aux_data *lim_data;
1121
1122 if (!loop_outer (loop: bb->loop_father))
1123 return;
1124
1125 if (dump_file && (dump_flags & TDF_DETAILS))
1126 fprintf (stream: dump_file, format: "Basic block %d (loop %d -- depth %d):\n\n",
1127 bb->index, bb->loop_father->num, loop_depth (loop: bb->loop_father));
1128
1129 /* Look at PHI nodes, but only if there is at most two.
1130 ??? We could relax this further by post-processing the inserted
1131 code and transforming adjacent cond-exprs with the same predicate
1132 to control flow again. */
1133 bsi = gsi_start_phis (bb);
1134 if (!gsi_end_p (i: bsi)
1135 && ((gsi_next (i: &bsi), gsi_end_p (i: bsi))
1136 || (gsi_next (i: &bsi), gsi_end_p (i: bsi))))
1137 for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1138 {
1139 stmt = gsi_stmt (i: bsi);
1140
1141 pos = movement_possibility (stmt);
1142 if (pos == MOVE_IMPOSSIBLE)
1143 continue;
1144
1145 lim_data = get_lim_data (stmt);
1146 if (! lim_data)
1147 lim_data = init_lim_data (stmt);
1148 lim_data->always_executed_in = outermost;
1149
1150 if (!determine_max_movement (stmt, must_preserve_exec: false))
1151 {
1152 lim_data->max_loop = NULL;
1153 continue;
1154 }
1155
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1157 {
1158 print_gimple_stmt (dump_file, stmt, 2);
1159 fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n",
1160 loop_depth (loop: lim_data->max_loop),
1161 lim_data->cost);
1162 }
1163
1164 if (lim_data->cost >= LIM_EXPENSIVE)
1165 set_profitable_level (stmt);
1166 }
1167
1168 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1169 {
1170 stmt = gsi_stmt (i: bsi);
1171
1172 pos = movement_possibility (stmt);
1173 if (pos == MOVE_IMPOSSIBLE)
1174 {
1175 if (nonpure_call_p (stmt))
1176 {
1177 maybe_never = true;
1178 outermost = NULL;
1179 }
1180 /* Make sure to note always_executed_in for stores to make
1181 store-motion work. */
1182 else if (stmt_makes_single_store (stmt))
1183 {
1184 struct lim_aux_data *lim_data = get_lim_data (stmt);
1185 if (! lim_data)
1186 lim_data = init_lim_data (stmt);
1187 lim_data->always_executed_in = outermost;
1188 }
1189 continue;
1190 }
1191
1192 if (is_gimple_assign (gs: stmt)
1193 && (get_gimple_rhs_class (code: gimple_assign_rhs_code (gs: stmt))
1194 == GIMPLE_BINARY_RHS))
1195 {
1196 tree op0 = gimple_assign_rhs1 (gs: stmt);
1197 tree op1 = gimple_assign_rhs2 (gs: stmt);
1198 class loop *ol1 = outermost_invariant_loop (def: op1,
1199 loop: loop_containing_stmt (stmt));
1200
1201 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1202 to be hoisted out of loop, saving expensive divide. */
1203 if (pos == MOVE_POSSIBLE
1204 && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR
1205 && flag_unsafe_math_optimizations
1206 && !flag_trapping_math
1207 && ol1 != NULL
1208 && outermost_invariant_loop (def: op0, loop: ol1) == NULL)
1209 stmt = rewrite_reciprocal (bsi: &bsi);
1210
1211 /* If the shift count is invariant, convert (A >> B) & 1 to
1212 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1213 saving an expensive shift. */
1214 if (pos == MOVE_POSSIBLE
1215 && gimple_assign_rhs_code (gs: stmt) == BIT_AND_EXPR
1216 && integer_onep (op1)
1217 && TREE_CODE (op0) == SSA_NAME
1218 && has_single_use (var: op0))
1219 stmt = rewrite_bittest (bsi: &bsi);
1220 }
1221
1222 lim_data = get_lim_data (stmt);
1223 if (! lim_data)
1224 lim_data = init_lim_data (stmt);
1225 lim_data->always_executed_in = outermost;
1226
1227 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1228 continue;
1229
1230 if (!determine_max_movement (stmt, must_preserve_exec: pos == MOVE_PRESERVE_EXECUTION))
1231 {
1232 lim_data->max_loop = NULL;
1233 continue;
1234 }
1235
1236 if (dump_file && (dump_flags & TDF_DETAILS))
1237 {
1238 print_gimple_stmt (dump_file, stmt, 2);
1239 fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n",
1240 loop_depth (loop: lim_data->max_loop),
1241 lim_data->cost);
1242 }
1243
1244 if (lim_data->cost >= LIM_EXPENSIVE)
1245 set_profitable_level (stmt);
1246 /* When we run before PRE and PRE is active hoist all expressions
1247 to the always executed loop since PRE would do so anyway
1248 and we can preserve range info while PRE cannot. */
1249 else if (flag_tree_pre && !in_loop_pipeline
1250 && outermost)
1251 {
1252 class loop *mloop = lim_data->max_loop;
1253 if (loop_depth (loop: outermost) > loop_depth (loop: mloop))
1254 {
1255 mloop = outermost;
1256 if (dump_file && (dump_flags & TDF_DETAILS))
1257 fprintf (stream: dump_file, format: " constraining to loop depth %d\n\n\n",
1258 loop_depth (loop: mloop));
1259 }
1260 set_level (stmt, orig_loop: bb->loop_father, level: mloop);
1261 }
1262 }
1263}
1264
1265/* Hoist the statements in basic block BB out of the loops prescribed by
1266 data stored in LIM_DATA structures associated with each statement. Callback
1267 for walk_dominator_tree. */
1268
1269unsigned int
1270move_computations_worker (basic_block bb)
1271{
1272 class loop *level;
1273 unsigned cost = 0;
1274 struct lim_aux_data *lim_data;
1275 unsigned int todo = 0;
1276
1277 if (!loop_outer (loop: bb->loop_father))
1278 return todo;
1279
1280 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); )
1281 {
1282 gassign *new_stmt;
1283 gphi *stmt = bsi.phi ();
1284
1285 lim_data = get_lim_data (stmt);
1286 if (lim_data == NULL)
1287 {
1288 gsi_next (i: &bsi);
1289 continue;
1290 }
1291
1292 cost = lim_data->cost;
1293 level = lim_data->tgt_loop;
1294 clear_lim_data (stmt);
1295
1296 if (!level)
1297 {
1298 gsi_next (i: &bsi);
1299 continue;
1300 }
1301
1302 if (dump_file && (dump_flags & TDF_DETAILS))
1303 {
1304 fprintf (stream: dump_file, format: "Moving PHI node\n");
1305 print_gimple_stmt (dump_file, stmt, 0);
1306 fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n",
1307 cost, level->num);
1308 }
1309
1310 if (gimple_phi_num_args (gs: stmt) == 1)
1311 {
1312 tree arg = PHI_ARG_DEF (stmt, 0);
1313 new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt),
1314 TREE_CODE (arg), arg);
1315 }
1316 else
1317 {
1318 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1319 gimple *cond = gsi_stmt (i: gsi_last_bb (bb: dom));
1320 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1321 /* Get the PHI arguments corresponding to the true and false
1322 edges of COND. */
1323 extract_true_false_args_from_phi (dom, phi: stmt, true_arg_p: &arg0, false_arg_p: &arg1);
1324 gcc_assert (arg0 && arg1);
1325 /* For `bool_val != 0`, reuse bool_val. */
1326 if (gimple_cond_code (gs: cond) == NE_EXPR
1327 && integer_zerop (gimple_cond_rhs (gs: cond))
1328 && types_compatible_p (TREE_TYPE (gimple_cond_lhs (cond)),
1329 boolean_type_node))
1330 {
1331 t = gimple_cond_lhs (gs: cond);
1332 }
1333 else
1334 {
1335 t = make_ssa_name (boolean_type_node);
1336 new_stmt = gimple_build_assign (t, gimple_cond_code (gs: cond),
1337 gimple_cond_lhs (gs: cond),
1338 gimple_cond_rhs (gs: cond));
1339 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1340 }
1341 new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt),
1342 COND_EXPR, t, arg0, arg1);
1343 todo |= TODO_cleanup_cfg;
1344 }
1345 if (!ALWAYS_EXECUTED_IN (bb)
1346 || (ALWAYS_EXECUTED_IN (bb) != level
1347 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1348 reset_flow_sensitive_info (gimple_assign_lhs (gs: new_stmt));
1349 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1350 remove_phi_node (&bsi, false);
1351 }
1352
1353 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); )
1354 {
1355 edge e;
1356
1357 gimple *stmt = gsi_stmt (i: bsi);
1358
1359 lim_data = get_lim_data (stmt);
1360 if (lim_data == NULL)
1361 {
1362 gsi_next (i: &bsi);
1363 continue;
1364 }
1365
1366 cost = lim_data->cost;
1367 level = lim_data->tgt_loop;
1368 clear_lim_data (stmt);
1369
1370 if (!level)
1371 {
1372 gsi_next (i: &bsi);
1373 continue;
1374 }
1375
1376 /* We do not really want to move conditionals out of the loop; we just
1377 placed it here to force its operands to be moved if necessary. */
1378 if (gimple_code (g: stmt) == GIMPLE_COND)
1379 {
1380 gsi_next (i: &bsi);
1381 continue;
1382 }
1383
1384 if (dump_file && (dump_flags & TDF_DETAILS))
1385 {
1386 fprintf (stream: dump_file, format: "Moving statement\n");
1387 print_gimple_stmt (dump_file, stmt, 0);
1388 fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n",
1389 cost, level->num);
1390 }
1391
1392 e = loop_preheader_edge (level);
1393 gcc_assert (!gimple_vdef (stmt));
1394 if (gimple_vuse (g: stmt))
1395 {
1396 /* The new VUSE is the one from the virtual PHI in the loop
1397 header or the one already present. */
1398 gphi_iterator gsi2;
1399 for (gsi2 = gsi_start_phis (e->dest);
1400 !gsi_end_p (i: gsi2); gsi_next (i: &gsi2))
1401 {
1402 gphi *phi = gsi2.phi ();
1403 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
1404 {
1405 SET_USE (gimple_vuse_op (stmt),
1406 PHI_ARG_DEF_FROM_EDGE (phi, e));
1407 break;
1408 }
1409 }
1410 }
1411 gsi_remove (&bsi, false);
1412 if (gimple_has_lhs (stmt)
1413 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1414 && (!ALWAYS_EXECUTED_IN (bb)
1415 || !(ALWAYS_EXECUTED_IN (bb) == level
1416 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1417 reset_flow_sensitive_info (gimple_get_lhs (stmt));
1418 /* In case this is a stmt that is not unconditionally executed
1419 when the target loop header is executed and the stmt may
1420 invoke undefined integer or pointer overflow rewrite it to
1421 unsigned arithmetic. */
1422 if (gimple_needing_rewrite_undefined (stmt)
1423 && (!ALWAYS_EXECUTED_IN (bb)
1424 || !(ALWAYS_EXECUTED_IN (bb) == level
1425 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1426 gsi_insert_seq_on_edge (e, rewrite_to_defined_unconditional (stmt));
1427 else
1428 gsi_insert_on_edge (e, stmt);
1429 }
1430
1431 return todo;
1432}
1433
1434/* Checks whether the statement defining variable *INDEX can be hoisted
1435 out of the loop passed in DATA. Callback for for_each_index. */
1436
1437static bool
1438may_move_till (tree ref, tree *index, void *data)
1439{
1440 class loop *loop = (class loop *) data, *max_loop;
1441
1442 /* If REF is an array reference, check also that the step and the lower
1443 bound is invariant in LOOP. */
1444 if (TREE_CODE (ref) == ARRAY_REF)
1445 {
1446 tree step = TREE_OPERAND (ref, 3);
1447 tree lbound = TREE_OPERAND (ref, 2);
1448
1449 max_loop = outermost_invariant_loop (def: step, loop);
1450 if (!max_loop)
1451 return false;
1452
1453 max_loop = outermost_invariant_loop (def: lbound, loop);
1454 if (!max_loop)
1455 return false;
1456 }
1457
1458 max_loop = outermost_invariant_loop (def: *index, loop);
1459 if (!max_loop)
1460 return false;
1461
1462 return true;
1463}
1464
1465/* If OP is SSA NAME, force the statement that defines it to be
1466 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1467
1468static void
1469force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1470{
1471 gimple *stmt;
1472
1473 if (!op
1474 || is_gimple_min_invariant (op))
1475 return;
1476
1477 gcc_assert (TREE_CODE (op) == SSA_NAME);
1478
1479 stmt = SSA_NAME_DEF_STMT (op);
1480 if (gimple_nop_p (g: stmt))
1481 return;
1482
1483 set_level (stmt, orig_loop, level: loop);
1484}
1485
1486/* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1487 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1488 for_each_index. */
1489
1490struct fmt_data
1491{
1492 class loop *loop;
1493 class loop *orig_loop;
1494};
1495
1496static bool
1497force_move_till (tree ref, tree *index, void *data)
1498{
1499 struct fmt_data *fmt_data = (struct fmt_data *) data;
1500
1501 if (TREE_CODE (ref) == ARRAY_REF)
1502 {
1503 tree step = TREE_OPERAND (ref, 3);
1504 tree lbound = TREE_OPERAND (ref, 2);
1505
1506 force_move_till_op (op: step, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop);
1507 force_move_till_op (op: lbound, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop);
1508 }
1509
1510 force_move_till_op (op: *index, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop);
1511
1512 return true;
1513}
1514
1515/* A function to free the mem_ref object OBJ. */
1516
1517static void
1518memref_free (class im_mem_ref *mem)
1519{
1520 mem->accesses_in_loop.release ();
1521}
1522
1523/* Allocates and returns a memory reference description for MEM whose hash
1524 value is HASH and id is ID. */
1525
1526static im_mem_ref *
1527mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1528{
1529 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1530 if (mem)
1531 ref->mem = *mem;
1532 else
1533 ao_ref_init (&ref->mem, error_mark_node);
1534 ref->id = id;
1535 ref->ref_canonical = false;
1536 ref->ref_decomposed = false;
1537 ref->hash = hash;
1538 ref->stored = NULL;
1539 ref->loaded = NULL;
1540 bitmap_initialize (head: &ref->dep_loop, obstack: &lim_bitmap_obstack);
1541 ref->accesses_in_loop.create (nelems: 1);
1542
1543 return ref;
1544}
1545
1546/* Records memory reference location *LOC in LOOP to the memory reference
1547 description REF. The reference occurs in statement STMT. */
1548
1549static void
1550record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1551{
1552 mem_ref_loc aref;
1553 aref.stmt = stmt;
1554 aref.ref = loc;
1555 ref->accesses_in_loop.safe_push (obj: aref);
1556}
1557
1558/* Set the LOOP bit in REF stored bitmap and allocate that if
1559 necessary. Return whether a bit was changed. */
1560
1561static bool
1562set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1563{
1564 if (!ref->stored)
1565 ref->stored = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
1566 return bitmap_set_bit (ref->stored, loop->num);
1567}
1568
1569/* Marks reference REF as stored in LOOP. */
1570
1571static void
1572mark_ref_stored (im_mem_ref *ref, class loop *loop)
1573{
1574 while (loop != current_loops->tree_root
1575 && set_ref_stored_in_loop (ref, loop))
1576 loop = loop_outer (loop);
1577}
1578
1579/* Set the LOOP bit in REF loaded bitmap and allocate that if
1580 necessary. Return whether a bit was changed. */
1581
1582static bool
1583set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1584{
1585 if (!ref->loaded)
1586 ref->loaded = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
1587 return bitmap_set_bit (ref->loaded, loop->num);
1588}
1589
1590/* Marks reference REF as loaded in LOOP. */
1591
1592static void
1593mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1594{
1595 while (loop != current_loops->tree_root
1596 && set_ref_loaded_in_loop (ref, loop))
1597 loop = loop_outer (loop);
1598}
1599
1600/* Gathers memory references in statement STMT in LOOP, storing the
1601 information about them in the memory_accesses structure. Marks
1602 the vops accessed through unrecognized statements there as
1603 well. */
1604
1605static void
1606gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1607{
1608 tree *mem = NULL;
1609 hashval_t hash;
1610 im_mem_ref **slot;
1611 im_mem_ref *ref;
1612 bool is_stored;
1613 unsigned id;
1614
1615 if (!gimple_vuse (g: stmt))
1616 return;
1617
1618 mem = simple_mem_ref_in_stmt (stmt, is_store: &is_stored);
1619 if (!mem && is_gimple_assign (gs: stmt))
1620 {
1621 /* For aggregate copies record distinct references but use them
1622 only for disambiguation purposes. */
1623 id = memory_accesses.refs_list.length ();
1624 ref = mem_ref_alloc (NULL, hash: 0, id);
1625 memory_accesses.refs_list.safe_push (obj: ref);
1626 if (dump_file && (dump_flags & TDF_DETAILS))
1627 {
1628 fprintf (stream: dump_file, format: "Unhandled memory reference %u: ", id);
1629 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1630 }
1631 record_mem_ref_loc (ref, stmt, loc: mem);
1632 is_stored = gimple_vdef (g: stmt);
1633 }
1634 else if (!mem)
1635 {
1636 /* We use the shared mem_ref for all unanalyzable refs. */
1637 id = UNANALYZABLE_MEM_ID;
1638 ref = memory_accesses.refs_list[id];
1639 if (dump_file && (dump_flags & TDF_DETAILS))
1640 {
1641 fprintf (stream: dump_file, format: "Unanalyzed memory reference %u: ", id);
1642 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1643 }
1644 is_stored = gimple_vdef (g: stmt);
1645 }
1646 else
1647 {
1648 /* We are looking for equal refs that might differ in structure
1649 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1650 make sure we can canonicalize the ref in the hashtable if
1651 non-operand_equal_p refs are found. For the lookup we mark
1652 the case we want strict equality with aor.max_size == -1. */
1653 ao_ref aor;
1654 ao_ref_init (&aor, *mem);
1655 ao_ref_base (&aor);
1656 ao_ref_alias_set (&aor);
1657 HOST_WIDE_INT offset, size, max_size;
1658 poly_int64 saved_maxsize = aor.max_size, mem_off;
1659 tree mem_base;
1660 bool ref_decomposed;
1661 if (aor.max_size_known_p ()
1662 && aor.offset.is_constant (const_value: &offset)
1663 && aor.size.is_constant (const_value: &size)
1664 && aor.max_size.is_constant (const_value: &max_size)
1665 && size == max_size
1666 && (size % BITS_PER_UNIT) == 0
1667 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1668 size. Make sure this is consistent with the extraction. */
1669 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1670 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1671 aor.size)
1672 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1673 {
1674 ref_decomposed = true;
1675 tree base = ao_ref_base (&aor);
1676 poly_int64 moffset;
1677 HOST_WIDE_INT mcoffset;
1678 if (TREE_CODE (base) == MEM_REF
1679 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (r: &moffset)
1680 && moffset.is_constant (const_value: &mcoffset))
1681 {
1682 hash = iterative_hash_expr (TREE_OPERAND (base, 0), seed: 0);
1683 hash = iterative_hash_host_wide_int (val: mcoffset, val2: hash);
1684 }
1685 else
1686 {
1687 hash = iterative_hash_expr (tree: base, seed: 0);
1688 hash = iterative_hash_host_wide_int (val: offset, val2: hash);
1689 }
1690 hash = iterative_hash_host_wide_int (val: size, val2: hash);
1691 }
1692 else
1693 {
1694 ref_decomposed = false;
1695 hash = iterative_hash_expr (tree: aor.ref, seed: 0);
1696 aor.max_size = -1;
1697 }
1698 slot = memory_accesses.refs->find_slot_with_hash (comparable: &aor, hash, insert: INSERT);
1699 aor.max_size = saved_maxsize;
1700 if (*slot)
1701 {
1702 if (!(*slot)->ref_canonical
1703 && !operand_equal_p (*mem, (*slot)->mem.ref, flags: 0))
1704 {
1705 /* If we didn't yet canonicalize the hashtable ref (which
1706 we'll end up using for code insertion) and hit a second
1707 equal ref that is not structurally equivalent create
1708 a canonical ref which is a bare MEM_REF. */
1709 if (TREE_CODE (*mem) == MEM_REF
1710 || TREE_CODE (*mem) == TARGET_MEM_REF)
1711 {
1712 (*slot)->mem.ref = *mem;
1713 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1714 }
1715 else
1716 {
1717 tree ref_alias_type = reference_alias_ptr_type (*mem);
1718 unsigned int ref_align = get_object_alignment (*mem);
1719 tree ref_type = TREE_TYPE (*mem);
1720 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1721 unshare_expr (mem_base));
1722 if (TYPE_ALIGN (ref_type) != ref_align)
1723 ref_type = build_aligned_type (ref_type, ref_align);
1724 tree new_ref
1725 = fold_build2 (MEM_REF, ref_type, tmp,
1726 build_int_cst (ref_alias_type, mem_off));
1727 if ((*slot)->mem.volatile_p)
1728 TREE_THIS_VOLATILE (new_ref) = 1;
1729 (*slot)->mem.ref = new_ref;
1730 /* Make sure the recorded base and offset are consistent
1731 with the newly built ref. */
1732 if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
1733 ;
1734 else
1735 {
1736 (*slot)->mem.base = new_ref;
1737 (*slot)->mem.offset = 0;
1738 }
1739 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1740 && is_gimple_mem_ref_addr
1741 (TREE_OPERAND ((*slot)->mem.ref,
1742 0)));
1743 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1744 }
1745 (*slot)->ref_canonical = true;
1746 }
1747 ref = *slot;
1748 id = ref->id;
1749 }
1750 else
1751 {
1752 id = memory_accesses.refs_list.length ();
1753 ref = mem_ref_alloc (mem: &aor, hash, id);
1754 ref->ref_decomposed = ref_decomposed;
1755 memory_accesses.refs_list.safe_push (obj: ref);
1756 *slot = ref;
1757
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1759 {
1760 fprintf (stream: dump_file, format: "Memory reference %u: ", id);
1761 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1762 fprintf (stream: dump_file, format: "\n");
1763 }
1764 }
1765
1766 record_mem_ref_loc (ref, stmt, loc: mem);
1767 }
1768 if (is_stored)
1769 {
1770 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1771 mark_ref_stored (ref, loop);
1772 }
1773 /* A not simple memory op is also a read when it is a write. */
1774 if (!is_stored || id == UNANALYZABLE_MEM_ID
1775 || ref->mem.ref == error_mark_node)
1776 {
1777 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1778 mark_ref_loaded (ref, loop);
1779 }
1780 init_lim_data (stmt)->ref = ref->id;
1781 return;
1782}
1783
1784static unsigned *bb_loop_postorder;
1785
1786/* qsort sort function to sort blocks after their loop fathers postorder. */
1787
1788static int
1789sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1790 void *bb_loop_postorder_)
1791{
1792 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1793 basic_block bb1 = *(const basic_block *)bb1_;
1794 basic_block bb2 = *(const basic_block *)bb2_;
1795 class loop *loop1 = bb1->loop_father;
1796 class loop *loop2 = bb2->loop_father;
1797 if (loop1->num == loop2->num)
1798 return bb1->index - bb2->index;
1799 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1800}
1801
1802/* qsort sort function to sort ref locs after their loop fathers postorder. */
1803
1804static int
1805sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1806 void *bb_loop_postorder_)
1807{
1808 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1809 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1810 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1811 class loop *loop1 = gimple_bb (g: loc1->stmt)->loop_father;
1812 class loop *loop2 = gimple_bb (g: loc2->stmt)->loop_father;
1813 if (loop1->num == loop2->num)
1814 return 0;
1815 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1816}
1817
1818/* Gathers memory references in loops. */
1819
1820static void
1821analyze_memory_references (bool store_motion)
1822{
1823 gimple_stmt_iterator bsi;
1824 basic_block bb, *bbs;
1825 class loop *outer;
1826 unsigned i, n;
1827
1828 /* Collect all basic-blocks in loops and sort them after their
1829 loops postorder. */
1830 i = 0;
1831 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1832 FOR_EACH_BB_FN (bb, cfun)
1833 if (bb->loop_father != current_loops->tree_root)
1834 bbs[i++] = bb;
1835 n = i;
1836 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1837 bb_loop_postorder);
1838
1839 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1840 That results in better locality for all the bitmaps. It also
1841 automatically sorts the location list of gathered memory references
1842 after their loop postorder number allowing to binary-search it. */
1843 for (i = 0; i < n; ++i)
1844 {
1845 basic_block bb = bbs[i];
1846 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1847 gather_mem_refs_stmt (loop: bb->loop_father, stmt: gsi_stmt (i: bsi));
1848 }
1849
1850 /* Verify the list of gathered memory references is sorted after their
1851 loop postorder number. */
1852 if (flag_checking)
1853 {
1854 im_mem_ref *ref;
1855 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1856 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1857 gcc_assert (sort_locs_in_loop_postorder_cmp
1858 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1859 bb_loop_postorder) <= 0);
1860 }
1861
1862 free (ptr: bbs);
1863
1864 if (!store_motion)
1865 return;
1866
1867 /* Propagate the information about accessed memory references up
1868 the loop hierarchy. */
1869 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1870 {
1871 /* Finalize the overall touched references (including subloops). */
1872 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1873 &memory_accesses.refs_stored_in_loop[loop->num]);
1874
1875 /* Propagate the information about accessed memory references up
1876 the loop hierarchy. */
1877 outer = loop_outer (loop);
1878 if (outer == current_loops->tree_root)
1879 continue;
1880
1881 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1882 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1883 }
1884}
1885
1886/* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1887 tree_to_aff_combination_expand. */
1888
1889static bool
1890mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1891 hash_map<tree, name_expansion *> **ttae_cache,
1892 bool tbaa_p)
1893{
1894 gcc_checking_assert (mem1->mem.ref != error_mark_node
1895 && mem2->mem.ref != error_mark_node);
1896
1897 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1898 object and their offset differ in such a way that the locations cannot
1899 overlap, then they cannot alias. */
1900 poly_widest_int size1, size2;
1901 aff_tree off1, off2;
1902
1903 /* Perform basic offset and type-based disambiguation. */
1904 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1905 return false;
1906
1907 /* The expansion of addresses may be a bit expensive, thus we only do
1908 the check at -O2 and higher optimization levels. */
1909 if (optimize < 2)
1910 return true;
1911
1912 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1913 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1914 aff_combination_expand (&off1, ttae_cache);
1915 aff_combination_expand (&off2, ttae_cache);
1916 aff_combination_scale (&off1, -1);
1917 aff_combination_add (&off2, &off1);
1918
1919 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1920 return false;
1921
1922 return true;
1923}
1924
1925/* Compare function for bsearch searching for reference locations
1926 in a loop. */
1927
1928static int
1929find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1930 void *bb_loop_postorder_)
1931{
1932 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1933 class loop *loop = (class loop *)const_cast<void *>(loop_);
1934 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1935 class loop *loc_loop = gimple_bb (g: loc->stmt)->loop_father;
1936 if (loop->num == loc_loop->num
1937 || flow_loop_nested_p (loop, loc_loop))
1938 return 0;
1939 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1940 ? -1 : 1);
1941}
1942
1943/* Iterates over all locations of REF in LOOP and its subloops calling
1944 fn.operator() with the location as argument. When that operator
1945 returns true the iteration is stopped and true is returned.
1946 Otherwise false is returned. */
1947
1948template <typename FN>
1949static bool
1950for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1951{
1952 unsigned i;
1953 mem_ref_loc *loc;
1954
1955 /* Search for the cluster of locs in the accesses_in_loop vector
1956 which is sorted after postorder index of the loop father. */
1957 loc = ref->accesses_in_loop.bsearch (key: loop, cmp: find_ref_loc_in_loop_cmp,
1958 data: bb_loop_postorder);
1959 if (!loc)
1960 return false;
1961
1962 /* We have found one location inside loop or its sub-loops. Iterate
1963 both forward and backward to cover the whole cluster. */
1964 i = loc - ref->accesses_in_loop.address ();
1965 while (i > 0)
1966 {
1967 --i;
1968 mem_ref_loc *l = &ref->accesses_in_loop[i];
1969 if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt)))
1970 break;
1971 if (fn (l))
1972 return true;
1973 }
1974 for (i = loc - ref->accesses_in_loop.address ();
1975 i < ref->accesses_in_loop.length (); ++i)
1976 {
1977 mem_ref_loc *l = &ref->accesses_in_loop[i];
1978 if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt)))
1979 break;
1980 if (fn (l))
1981 return true;
1982 }
1983
1984 return false;
1985}
1986
1987/* Rewrites location LOC by TMP_VAR. */
1988
1989class rewrite_mem_ref_loc
1990{
1991public:
1992 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1993 bool operator () (mem_ref_loc *loc);
1994 tree tmp_var;
1995};
1996
1997bool
1998rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1999{
2000 *loc->ref = tmp_var;
2001 update_stmt (s: loc->stmt);
2002 return false;
2003}
2004
2005/* Rewrites all references to REF in LOOP by variable TMP_VAR. */
2006
2007static void
2008rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
2009{
2010 for_all_locs_in_loop (loop, ref, fn: rewrite_mem_ref_loc (tmp_var));
2011}
2012
2013/* Stores the first reference location in LOCP. */
2014
2015class first_mem_ref_loc_1
2016{
2017public:
2018 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
2019 bool operator () (mem_ref_loc *loc);
2020 mem_ref_loc **locp;
2021};
2022
2023bool
2024first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
2025{
2026 *locp = loc;
2027 return true;
2028}
2029
2030/* Returns the first reference location to REF in LOOP. */
2031
2032static mem_ref_loc *
2033first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
2034{
2035 mem_ref_loc *locp = NULL;
2036 for_all_locs_in_loop (loop, ref, fn: first_mem_ref_loc_1 (&locp));
2037 return locp;
2038}
2039
2040/* Helper function for execute_sm. Emit code to store TMP_VAR into
2041 MEM along edge EX.
2042
2043 The store is only done if MEM has changed. We do this so no
2044 changes to MEM occur on code paths that did not originally store
2045 into it.
2046
2047 The common case for execute_sm will transform:
2048
2049 for (...) {
2050 if (foo)
2051 stuff;
2052 else
2053 MEM = TMP_VAR;
2054 }
2055
2056 into:
2057
2058 lsm = MEM;
2059 for (...) {
2060 if (foo)
2061 stuff;
2062 else
2063 lsm = TMP_VAR;
2064 }
2065 MEM = lsm;
2066
2067 This function will generate:
2068
2069 lsm = MEM;
2070
2071 lsm_flag = false;
2072 ...
2073 for (...) {
2074 if (foo)
2075 stuff;
2076 else {
2077 lsm = TMP_VAR;
2078 lsm_flag = true;
2079 }
2080 }
2081 if (lsm_flag) <--
2082 MEM = lsm; <-- (X)
2083
2084 In case MEM and TMP_VAR are NULL the function will return the then
2085 block so the caller can insert (X) and other related stmts.
2086*/
2087
2088static basic_block
2089execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2090 edge preheader, hash_set <basic_block> *flag_bbs,
2091 edge &append_cond_position, edge &last_cond_fallthru)
2092{
2093 basic_block new_bb, then_bb, old_dest;
2094 bool loop_has_only_one_exit;
2095 edge then_old_edge;
2096 gimple_stmt_iterator gsi;
2097 gimple *stmt;
2098 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2099
2100 profile_count count_sum = profile_count::zero ();
2101 int nbbs = 0, ncount = 0;
2102 profile_probability flag_probability = profile_probability::uninitialized ();
2103
2104 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2105 at loop exit.
2106
2107 This code may look fancy, but it cannot update profile very realistically
2108 because we do not know the probability that flag will be true at given
2109 loop exit.
2110
2111 We look for two interesting extremes
2112 - when exit is dominated by block setting the flag, we know it will
2113 always be true. This is a common case.
2114 - when all blocks setting the flag have very low frequency we know
2115 it will likely be false.
2116 In all other cases we default to 2/3 for flag being true. */
2117
2118 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2119 it != flag_bbs->end (); ++it)
2120 {
2121 if ((*it)->count.initialized_p ())
2122 count_sum += (*it)->count, ncount ++;
2123 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2124 flag_probability = profile_probability::always ();
2125 nbbs++;
2126 }
2127
2128 profile_probability cap
2129 = profile_probability::guessed_always ().apply_scale (num: 2, den: 3);
2130
2131 if (flag_probability.initialized_p ())
2132 ;
2133 else if (ncount == nbbs
2134 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2135 {
2136 flag_probability = count_sum.probability_in (overall: preheader->count ());
2137 if (flag_probability > cap)
2138 flag_probability = cap;
2139 }
2140
2141 if (!flag_probability.initialized_p ())
2142 flag_probability = cap;
2143
2144 /* ?? Insert store after previous store if applicable. See note
2145 below. */
2146 if (append_cond_position)
2147 ex = append_cond_position;
2148
2149 loop_has_only_one_exit = single_pred_p (bb: ex->dest);
2150
2151 if (loop_has_only_one_exit)
2152 ex = split_block_after_labels (ex->dest);
2153 else
2154 {
2155 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2156 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
2157 {
2158 gphi *phi = gpi.phi ();
2159 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
2160 continue;
2161
2162 /* When the destination has a non-virtual PHI node with multiple
2163 predecessors make sure we preserve the PHI structure by
2164 forcing a forwarder block so that hoisting of that PHI will
2165 still work. */
2166 split_edge (ex);
2167 break;
2168 }
2169 }
2170
2171 old_dest = ex->dest;
2172 new_bb = split_edge (ex);
2173 if (append_cond_position)
2174 new_bb->count += last_cond_fallthru->count ();
2175 then_bb = create_empty_bb (new_bb);
2176 then_bb->count = new_bb->count.apply_probability (prob: flag_probability);
2177 if (irr)
2178 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2179 add_bb_to_loop (then_bb, new_bb->loop_father);
2180
2181 gsi = gsi_start_bb (bb: new_bb);
2182 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2183 NULL_TREE, NULL_TREE);
2184 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2185
2186 /* Insert actual store. */
2187 if (mem)
2188 {
2189 gsi = gsi_start_bb (bb: then_bb);
2190 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2191 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2192 }
2193
2194 edge e1 = single_succ_edge (bb: new_bb);
2195 edge e2 = make_edge (new_bb, then_bb,
2196 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2197 e2->probability = flag_probability;
2198
2199 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2200 e1->flags &= ~EDGE_FALLTHRU;
2201
2202 e1->probability = flag_probability.invert ();
2203
2204 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2205 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2206
2207 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2208
2209 if (append_cond_position)
2210 {
2211 basic_block prevbb = last_cond_fallthru->src;
2212 redirect_edge_succ (last_cond_fallthru, new_bb);
2213 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2214 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2215 recompute_dominator (CDI_DOMINATORS, old_dest));
2216 }
2217
2218 /* ?? Because stores may alias, they must happen in the exact
2219 sequence they originally happened. Save the position right after
2220 the (_lsm) store we just created so we can continue appending after
2221 it and maintain the original order. */
2222 append_cond_position = then_old_edge;
2223 last_cond_fallthru = find_edge (new_bb, old_dest);
2224
2225 if (!loop_has_only_one_exit)
2226 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2227 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
2228 {
2229 gphi *phi = gpi.phi ();
2230 unsigned i;
2231
2232 for (i = 0; i < gimple_phi_num_args (gs: phi); i++)
2233 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2234 {
2235 tree arg = gimple_phi_arg_def (gs: phi, index: i);
2236 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2237 update_stmt (s: phi);
2238 }
2239 }
2240
2241 return then_bb;
2242}
2243
2244/* When REF is set on the location, set flag indicating the store. */
2245
2246class sm_set_flag_if_changed
2247{
2248public:
2249 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2250 : flag (flag_), bbs (bbs_) {}
2251 bool operator () (mem_ref_loc *loc);
2252 tree flag;
2253 hash_set <basic_block> *bbs;
2254};
2255
2256bool
2257sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2258{
2259 /* Only set the flag for writes. */
2260 if (is_gimple_assign (gs: loc->stmt)
2261 && gimple_assign_lhs_ptr (gs: loc->stmt) == loc->ref)
2262 {
2263 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2264 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2265 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2266 bbs->add (k: gimple_bb (g: stmt));
2267 }
2268 return false;
2269}
2270
2271/* Helper function for execute_sm. On every location where REF is
2272 set, set an appropriate flag indicating the store. */
2273
2274static tree
2275execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2276 hash_set <basic_block> *bbs)
2277{
2278 tree flag;
2279 char *str = get_lsm_tmp_name (ref: ref->mem.ref, n: ~0, suffix: "_flag");
2280 flag = create_tmp_reg (boolean_type_node, str);
2281 for_all_locs_in_loop (loop, ref, fn: sm_set_flag_if_changed (flag, bbs));
2282 return flag;
2283}
2284
2285struct sm_aux
2286{
2287 tree tmp_var;
2288 tree store_flag;
2289 hash_set <basic_block> flag_bbs;
2290};
2291
2292/* Executes store motion of memory reference REF from LOOP.
2293 Exits from the LOOP are stored in EXITS. The initialization of the
2294 temporary variable is put to the preheader of the loop, and assignments
2295 to the reference from the temporary variable are emitted to exits. */
2296
2297static sm_aux *
2298execute_sm (class loop *loop, im_mem_ref *ref,
2299 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2300 bool use_other_flag_var)
2301{
2302 gassign *load;
2303 struct fmt_data fmt_data;
2304 struct lim_aux_data *lim_data;
2305 bool multi_threaded_model_p = false;
2306 gimple_stmt_iterator gsi;
2307 sm_aux *aux = new sm_aux;
2308
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 {
2311 fprintf (stream: dump_file, format: "Executing store motion of ");
2312 print_generic_expr (dump_file, ref->mem.ref);
2313 fprintf (stream: dump_file, format: " from loop %d\n", loop->num);
2314 }
2315
2316 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2317 get_lsm_tmp_name (ref: ref->mem.ref, n: ~0));
2318
2319 fmt_data.loop = loop;
2320 fmt_data.orig_loop = loop;
2321 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2322
2323 bool always_stored = ref_always_accessed_p (loop, ref, true);
2324 if (maybe_mt
2325 && (bb_in_transaction (bb: loop_preheader_edge (loop)->src)
2326 || (ref_can_have_store_data_races (ref->mem.ref) && ! always_stored)))
2327 multi_threaded_model_p = true;
2328
2329 if (multi_threaded_model_p && !use_other_flag_var)
2330 aux->store_flag
2331 = execute_sm_if_changed_flag_set (loop, ref, bbs: &aux->flag_bbs);
2332 else
2333 aux->store_flag = NULL_TREE;
2334
2335 /* Remember variable setup. */
2336 aux_map.put (k: ref, v: aux);
2337
2338 rewrite_mem_refs (loop, ref, tmp_var: aux->tmp_var);
2339
2340 /* Emit the load code on a random exit edge or into the latch if
2341 the loop does not exit, so that we are sure it will be processed
2342 by move_computations after all dependencies. */
2343 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2344
2345 /* Avoid doing a load if there was no load of the ref in the loop.
2346 Esp. when the ref is not always stored we cannot optimize it
2347 away later. But when it is not always stored we must use a conditional
2348 store then. */
2349 if ((!always_stored && !multi_threaded_model_p)
2350 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2351 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2352 else
2353 {
2354 /* If not emitting a load mark the uninitialized state on the
2355 loop entry as not to be warned for. */
2356 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2357 suppress_warning (uninit, OPT_Wuninitialized);
2358 load = gimple_build_assign (aux->tmp_var, uninit);
2359 }
2360 lim_data = init_lim_data (stmt: load);
2361 lim_data->max_loop = loop;
2362 lim_data->tgt_loop = loop;
2363 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2364
2365 if (aux->store_flag)
2366 {
2367 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2368 lim_data = init_lim_data (stmt: load);
2369 lim_data->max_loop = loop;
2370 lim_data->tgt_loop = loop;
2371 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2372 }
2373
2374 return aux;
2375}
2376
2377/* sm_ord is used for ordinary stores we can retain order with respect
2378 to other stores
2379 sm_unord is used for conditional executed stores which need to be
2380 able to execute in arbitrary order with respect to other stores
2381 sm_other is used for stores we do not try to apply store motion to. */
2382enum sm_kind { sm_ord, sm_unord, sm_other };
2383struct seq_entry
2384{
2385 seq_entry () = default;
2386 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2387 : first (f), second (k), from (fr) {}
2388 unsigned first;
2389 sm_kind second;
2390 tree from;
2391};
2392
2393static void
2394execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2395 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2396 edge &append_cond_position, edge &last_cond_fallthru,
2397 bitmap clobbers_to_prune)
2398{
2399 /* Sink the stores to exit from the loop. */
2400 for (unsigned i = seq.length (); i > 0; --i)
2401 {
2402 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2403 if (seq[i-1].second == sm_other)
2404 {
2405 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2406 gassign *store;
2407 if (ref->mem.ref == error_mark_node)
2408 {
2409 tree lhs = gimple_assign_lhs (gs: ref->accesses_in_loop[0].stmt);
2410 if (dump_file && (dump_flags & TDF_DETAILS))
2411 {
2412 fprintf (stream: dump_file, format: "Re-issueing dependent ");
2413 print_generic_expr (dump_file, unshare_expr (seq[i-1].from));
2414 fprintf (stream: dump_file, format: " of ");
2415 print_generic_expr (dump_file, lhs);
2416 fprintf (stream: dump_file, format: " from loop %d on exit %d -> %d\n",
2417 loop->num, ex->src->index, ex->dest->index);
2418 }
2419 store = gimple_build_assign (unshare_expr (lhs),
2420 unshare_expr (seq[i-1].from));
2421 bitmap_set_bit (clobbers_to_prune, seq[i-1].first);
2422 }
2423 else
2424 {
2425 if (dump_file && (dump_flags & TDF_DETAILS))
2426 {
2427 fprintf (stream: dump_file, format: "Re-issueing dependent store of ");
2428 print_generic_expr (dump_file, ref->mem.ref);
2429 fprintf (stream: dump_file, format: " from loop %d on exit %d -> %d\n",
2430 loop->num, ex->src->index, ex->dest->index);
2431 }
2432 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2433 seq[i-1].from);
2434 }
2435 gsi_insert_on_edge (ex, store);
2436 }
2437 else
2438 {
2439 sm_aux *aux = *aux_map.get (k: ref);
2440 if (!aux->store_flag || kind == sm_ord)
2441 {
2442 gassign *store;
2443 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2444 aux->tmp_var);
2445 gsi_insert_on_edge (ex, store);
2446 }
2447 else
2448 execute_sm_if_changed (ex, mem: ref->mem.ref, tmp_var: aux->tmp_var,
2449 flag: aux->store_flag,
2450 preheader: loop_preheader_edge (loop), flag_bbs: &aux->flag_bbs,
2451 append_cond_position, last_cond_fallthru);
2452 }
2453 }
2454}
2455
2456/* Push the SM candidate at index PTR in the sequence SEQ down until
2457 we hit the next SM candidate. Return true if that went OK and
2458 false if we could not disambiguate agains another unrelated ref.
2459 Update *AT to the index where the candidate now resides. */
2460
2461static bool
2462sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2463{
2464 *at = ptr;
2465 for (; ptr > 0; --ptr)
2466 {
2467 seq_entry &new_cand = seq[ptr];
2468 seq_entry &against = seq[ptr-1];
2469 if (against.second == sm_ord
2470 || (against.second == sm_other && against.from != NULL_TREE))
2471 /* Found the tail of the sequence. */
2472 break;
2473 /* We may not ignore self-dependences here. */
2474 if (new_cand.first == against.first
2475 /* ??? We could actually handle clobbers here, but not easily
2476 with LIMs dependence analysis. */
2477 || (memory_accesses.refs_list[new_cand.first]->mem.ref
2478 == error_mark_node)
2479 || (memory_accesses.refs_list[against.first]->mem.ref
2480 == error_mark_node)
2481 || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2482 memory_accesses.refs_list[against.first],
2483 false))
2484 /* ??? Prune new_cand from the list of refs to apply SM to. */
2485 return false;
2486 std::swap (a&: new_cand, b&: against);
2487 *at = ptr - 1;
2488 }
2489 return true;
2490}
2491
2492/* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2493 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2494
2495static int
2496sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2497 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2498 bitmap refs_not_supported, bool forked,
2499 bitmap fully_visited)
2500{
2501 if (!vdef)
2502 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (i: gsi);
2503 gsi_prev (i: &gsi))
2504 {
2505 vdef = gimple_vdef (g: gsi_stmt (i: gsi));
2506 if (vdef)
2507 break;
2508 }
2509 if (!vdef)
2510 {
2511 gphi *vphi = get_virtual_phi (bb);
2512 if (vphi)
2513 vdef = gimple_phi_result (gs: vphi);
2514 }
2515 if (!vdef)
2516 {
2517 if (single_pred_p (bb))
2518 /* This handles the perfect nest case. */
2519 return sm_seq_valid_bb (loop, bb: single_pred (bb), vdef,
2520 seq, refs_not_in_seq, refs_not_supported,
2521 forked, fully_visited);
2522 return 0;
2523 }
2524 do
2525 {
2526 gimple *def = SSA_NAME_DEF_STMT (vdef);
2527 if (gimple_bb (g: def) != bb)
2528 {
2529 /* If we forked by processing a PHI do not allow our walk to
2530 merge again until we handle that robustly. */
2531 if (forked)
2532 {
2533 /* Mark refs_not_in_seq as unsupported. */
2534 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2535 return 1;
2536 }
2537 /* Otherwise it doesn't really matter if we end up in different
2538 BBs. */
2539 bb = gimple_bb (g: def);
2540 }
2541 if (gphi *phi = dyn_cast <gphi *> (p: def))
2542 {
2543 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2544 this is still linear.
2545 Eventually we want to cache intermediate results per BB
2546 (but we can't easily cache for different exits?). */
2547 /* Stop at PHIs with possible backedges. */
2548 if (bb == bb->loop_father->header
2549 || bb->flags & BB_IRREDUCIBLE_LOOP)
2550 {
2551 /* Mark refs_not_in_seq as unsupported. */
2552 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2553 return 1;
2554 }
2555 if (gimple_phi_num_args (gs: phi) == 1)
2556 return sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src,
2557 vdef: gimple_phi_arg_def (gs: phi, index: 0), seq,
2558 refs_not_in_seq, refs_not_supported,
2559 forked: false, fully_visited);
2560 if (bitmap_bit_p (fully_visited,
2561 SSA_NAME_VERSION (gimple_phi_result (phi))))
2562 return 1;
2563 auto_vec<seq_entry> first_edge_seq;
2564 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2565 int eret;
2566 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2567 eret = sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src,
2568 vdef: gimple_phi_arg_def (gs: phi, index: 0),
2569 seq&: first_edge_seq,
2570 refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported,
2571 forked: true, fully_visited);
2572 if (eret != 1)
2573 return -1;
2574 /* Simplify our lives by pruning the sequence of !sm_ord. */
2575 while (!first_edge_seq.is_empty ()
2576 && first_edge_seq.last ().second != sm_ord)
2577 first_edge_seq.pop ();
2578 for (unsigned int i = 1; i < gimple_phi_num_args (gs: phi); ++i)
2579 {
2580 tree vuse = gimple_phi_arg_def (gs: phi, index: i);
2581 edge e = gimple_phi_arg_edge (phi, i);
2582 auto_vec<seq_entry> edge_seq;
2583 bitmap_and_compl (tem_refs_not_in_seq,
2584 refs_not_in_seq, refs_not_supported);
2585 /* If we've marked all refs we search for as unsupported
2586 we can stop processing and use the sequence as before
2587 the PHI. */
2588 if (bitmap_empty_p (map: tem_refs_not_in_seq))
2589 return 1;
2590 eret = sm_seq_valid_bb (loop, bb: e->src, vdef: vuse, seq&: edge_seq,
2591 refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported,
2592 forked: true, fully_visited);
2593 if (eret != 1)
2594 return -1;
2595 /* Simplify our lives by pruning the sequence of !sm_ord. */
2596 while (!edge_seq.is_empty ()
2597 && edge_seq.last ().second != sm_ord)
2598 edge_seq.pop ();
2599 unsigned min_len = MIN(first_edge_seq.length (),
2600 edge_seq.length ());
2601 /* Incrementally merge seqs into first_edge_seq. */
2602 int first_uneq = -1;
2603 auto_vec<seq_entry, 2> extra_refs;
2604 for (unsigned int i = 0; i < min_len; ++i)
2605 {
2606 /* ??? We can more intelligently merge when we face different
2607 order by additional sinking operations in one sequence.
2608 For now we simply mark them as to be processed by the
2609 not order-preserving SM code. */
2610 if (first_edge_seq[i].first != edge_seq[i].first)
2611 {
2612 if (first_edge_seq[i].second == sm_ord)
2613 bitmap_set_bit (refs_not_supported,
2614 first_edge_seq[i].first);
2615 if (edge_seq[i].second == sm_ord)
2616 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2617 first_edge_seq[i].second = sm_other;
2618 first_edge_seq[i].from = NULL_TREE;
2619 /* Record the dropped refs for later processing. */
2620 if (first_uneq == -1)
2621 first_uneq = i;
2622 extra_refs.safe_push (obj: seq_entry (edge_seq[i].first,
2623 sm_other, NULL_TREE));
2624 }
2625 /* sm_other prevails. */
2626 else if (first_edge_seq[i].second != edge_seq[i].second)
2627 {
2628 /* Make sure the ref is marked as not supported. */
2629 bitmap_set_bit (refs_not_supported,
2630 first_edge_seq[i].first);
2631 first_edge_seq[i].second = sm_other;
2632 first_edge_seq[i].from = NULL_TREE;
2633 }
2634 else if (first_edge_seq[i].second == sm_other
2635 && first_edge_seq[i].from != NULL_TREE
2636 && (edge_seq[i].from == NULL_TREE
2637 || !operand_equal_p (first_edge_seq[i].from,
2638 edge_seq[i].from, flags: 0)))
2639 first_edge_seq[i].from = NULL_TREE;
2640 }
2641 /* Any excess elements become sm_other since they are now
2642 coonditionally executed. */
2643 if (first_edge_seq.length () > edge_seq.length ())
2644 {
2645 for (unsigned i = edge_seq.length ();
2646 i < first_edge_seq.length (); ++i)
2647 {
2648 if (first_edge_seq[i].second == sm_ord)
2649 bitmap_set_bit (refs_not_supported,
2650 first_edge_seq[i].first);
2651 first_edge_seq[i].second = sm_other;
2652 }
2653 }
2654 else if (edge_seq.length () > first_edge_seq.length ())
2655 {
2656 if (first_uneq == -1)
2657 first_uneq = first_edge_seq.length ();
2658 for (unsigned i = first_edge_seq.length ();
2659 i < edge_seq.length (); ++i)
2660 {
2661 if (edge_seq[i].second == sm_ord)
2662 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2663 extra_refs.safe_push (obj: seq_entry (edge_seq[i].first,
2664 sm_other, NULL_TREE));
2665 }
2666 }
2667 /* Put unmerged refs at first_uneq to force dependence checking
2668 on them. */
2669 if (first_uneq != -1)
2670 {
2671 /* Missing ordered_splice_at. */
2672 if ((unsigned)first_uneq == first_edge_seq.length ())
2673 first_edge_seq.safe_splice (src: extra_refs);
2674 else
2675 {
2676 unsigned fes_length = first_edge_seq.length ();
2677 first_edge_seq.safe_grow (len: fes_length
2678 + extra_refs.length ());
2679 memmove (dest: &first_edge_seq[first_uneq + extra_refs.length ()],
2680 src: &first_edge_seq[first_uneq],
2681 n: (fes_length - first_uneq) * sizeof (seq_entry));
2682 memcpy (dest: &first_edge_seq[first_uneq],
2683 src: extra_refs.address (),
2684 n: extra_refs.length () * sizeof (seq_entry));
2685 }
2686 }
2687 }
2688 /* Use the sequence from the first edge and push SMs down. */
2689 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2690 {
2691 unsigned id = first_edge_seq[i].first;
2692 seq.safe_push (obj: first_edge_seq[i]);
2693 unsigned new_idx;
2694 if ((first_edge_seq[i].second == sm_ord
2695 || (first_edge_seq[i].second == sm_other
2696 && first_edge_seq[i].from != NULL_TREE))
2697 && !sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx))
2698 {
2699 if (first_edge_seq[i].second == sm_ord)
2700 bitmap_set_bit (refs_not_supported, id);
2701 /* Mark it sm_other. */
2702 seq[new_idx].second = sm_other;
2703 seq[new_idx].from = NULL_TREE;
2704 }
2705 }
2706 bitmap_set_bit (fully_visited,
2707 SSA_NAME_VERSION (gimple_phi_result (phi)));
2708 return 1;
2709 }
2710 lim_aux_data *data = get_lim_data (stmt: def);
2711 im_mem_ref *ref = memory_accesses.refs_list[data->ref];
2712 if (data->ref == UNANALYZABLE_MEM_ID)
2713 return -1;
2714 /* Stop at memory references which we can't move. */
2715 else if ((ref->mem.ref == error_mark_node
2716 /* We can move end-of-storage/object down. */
2717 && !gimple_clobber_p (s: ref->accesses_in_loop[0].stmt,
2718 kind: CLOBBER_STORAGE_END)
2719 && !gimple_clobber_p (s: ref->accesses_in_loop[0].stmt,
2720 kind: CLOBBER_OBJECT_END))
2721 || TREE_THIS_VOLATILE (ref->mem.ref))
2722 {
2723 /* Mark refs_not_in_seq as unsupported. */
2724 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2725 return 1;
2726 }
2727 /* One of the stores we want to apply SM to and we've not yet seen. */
2728 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2729 {
2730 seq.safe_push (obj: seq_entry (data->ref, sm_ord));
2731
2732 /* 1) push it down the queue until a SMed
2733 and not ignored ref is reached, skipping all not SMed refs
2734 and ignored refs via non-TBAA disambiguation. */
2735 unsigned new_idx;
2736 if (!sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx)
2737 /* If that fails but we did not fork yet continue, we'll see
2738 to re-materialize all of the stores in the sequence then.
2739 Further stores will only be pushed up to this one. */
2740 && forked)
2741 {
2742 bitmap_set_bit (refs_not_supported, data->ref);
2743 /* Mark it sm_other. */
2744 seq[new_idx].second = sm_other;
2745 }
2746
2747 /* 2) check whether we've seen all refs we want to SM and if so
2748 declare success for the active exit */
2749 if (bitmap_empty_p (map: refs_not_in_seq))
2750 return 1;
2751 }
2752 else
2753 /* Another store not part of the final sequence. Simply push it. */
2754 seq.safe_push (obj: seq_entry (data->ref, sm_other,
2755 gimple_assign_rhs1 (gs: def)));
2756
2757 vdef = gimple_vuse (g: def);
2758 }
2759 while (1);
2760}
2761
2762/* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2763 edges of the LOOP. */
2764
2765static void
2766hoist_memory_references (class loop *loop, bitmap mem_refs,
2767 const vec<edge> &exits)
2768{
2769 im_mem_ref *ref;
2770 unsigned i;
2771 bitmap_iterator bi;
2772
2773 /* There's a special case we can use ordered re-materialization for
2774 conditionally excuted stores which is when all stores in the loop
2775 happen in the same basic-block. In that case we know we'll reach
2776 all stores and thus can simply process that BB and emit a single
2777 conditional block of ordered materializations. See PR102436. */
2778 basic_block single_store_bb = NULL;
2779 EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2780 0, i, bi)
2781 {
2782 bool fail = false;
2783 ref = memory_accesses.refs_list[i];
2784 for (auto loc : ref->accesses_in_loop)
2785 if (!gimple_vdef (g: loc.stmt))
2786 ;
2787 else if (!single_store_bb)
2788 {
2789 single_store_bb = gimple_bb (g: loc.stmt);
2790 bool conditional = false;
2791 for (edge e : exits)
2792 if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2793 {
2794 /* Conditional as seen from e. */
2795 conditional = true;
2796 break;
2797 }
2798 if (!conditional)
2799 {
2800 fail = true;
2801 break;
2802 }
2803 }
2804 else if (single_store_bb != gimple_bb (g: loc.stmt))
2805 {
2806 fail = true;
2807 break;
2808 }
2809 if (fail)
2810 {
2811 single_store_bb = NULL;
2812 break;
2813 }
2814 }
2815 if (single_store_bb)
2816 {
2817 /* Analyze the single block with stores. */
2818 auto_bitmap fully_visited;
2819 auto_bitmap refs_not_supported;
2820 auto_bitmap refs_not_in_seq;
2821 auto_vec<seq_entry> seq;
2822 bitmap_copy (refs_not_in_seq, mem_refs);
2823 int res = sm_seq_valid_bb (loop, bb: single_store_bb, NULL_TREE,
2824 seq, refs_not_in_seq, refs_not_supported,
2825 forked: false, fully_visited);
2826 if (res != 1)
2827 {
2828 /* Unhandled refs can still fail this. */
2829 bitmap_clear (mem_refs);
2830 return;
2831 }
2832
2833 /* We cannot handle sm_other since we neither remember the
2834 stored location nor the value at the point we execute them. */
2835 for (unsigned i = 0; i < seq.length (); ++i)
2836 {
2837 unsigned new_i;
2838 if (seq[i].second == sm_other
2839 && seq[i].from != NULL_TREE)
2840 seq[i].from = NULL_TREE;
2841 else if ((seq[i].second == sm_ord
2842 || (seq[i].second == sm_other
2843 && seq[i].from != NULL_TREE))
2844 && !sm_seq_push_down (seq, ptr: i, at: &new_i))
2845 {
2846 bitmap_set_bit (refs_not_supported, seq[new_i].first);
2847 seq[new_i].second = sm_other;
2848 seq[new_i].from = NULL_TREE;
2849 }
2850 }
2851 bitmap_and_compl_into (mem_refs, refs_not_supported);
2852 if (bitmap_empty_p (map: mem_refs))
2853 return;
2854
2855 /* Prune seq. */
2856 while (seq.last ().second == sm_other
2857 && seq.last ().from == NULL_TREE)
2858 seq.pop ();
2859
2860 hash_map<im_mem_ref *, sm_aux *> aux_map;
2861
2862 /* Execute SM but delay the store materialization for ordered
2863 sequences on exit. Remember a created flag var and make
2864 sure to re-use it. */
2865 sm_aux *flag_var_aux = nullptr;
2866 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2867 {
2868 ref = memory_accesses.refs_list[i];
2869 sm_aux *aux = execute_sm (loop, ref, aux_map, maybe_mt: true,
2870 use_other_flag_var: flag_var_aux != nullptr);
2871 if (aux->store_flag)
2872 flag_var_aux = aux;
2873 }
2874
2875 /* Materialize ordered store sequences on exits. */
2876 edge e;
2877 auto_bitmap clobbers_to_prune;
2878 FOR_EACH_VEC_ELT (exits, i, e)
2879 {
2880 edge append_cond_position = NULL;
2881 edge last_cond_fallthru = NULL;
2882 edge insert_e = e;
2883 /* Construct the single flag variable control flow and insert
2884 the ordered seq of stores in the then block. With
2885 -fstore-data-races we can do the stores unconditionally. */
2886 if (flag_var_aux)
2887 insert_e
2888 = single_pred_edge
2889 (bb: execute_sm_if_changed (ex: e, NULL_TREE, NULL_TREE,
2890 flag: flag_var_aux->store_flag,
2891 preheader: loop_preheader_edge (loop),
2892 flag_bbs: &flag_var_aux->flag_bbs,
2893 append_cond_position,
2894 last_cond_fallthru));
2895 execute_sm_exit (loop, ex: insert_e, seq, aux_map, kind: sm_ord,
2896 append_cond_position, last_cond_fallthru,
2897 clobbers_to_prune);
2898 gsi_commit_one_edge_insert (insert_e, NULL);
2899 }
2900
2901 /* Remove clobbers inside the loop we re-materialized on exits. */
2902 EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
2903 {
2904 gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
2905 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2906 unlink_stmt_vdef (stmt);
2907 release_defs (stmt);
2908 gimple_set_vdef (g: stmt, NULL_TREE);
2909 gsi_remove (&gsi, true);
2910 }
2911
2912 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2913 iter != aux_map.end (); ++iter)
2914 delete (*iter).second;
2915
2916 return;
2917 }
2918
2919 /* To address PR57359 before actually applying store-motion check
2920 the candidates found for validity with regards to reordering
2921 relative to other stores which we until here disambiguated using
2922 TBAA which isn't valid.
2923 What matters is the order of the last stores to the mem_refs
2924 with respect to the other stores of the loop at the point of the
2925 loop exits. */
2926
2927 /* For each exit compute the store order, pruning from mem_refs
2928 on the fly. */
2929 /* The complexity of this is at least
2930 O(number of exits * number of SM refs) but more approaching
2931 O(number of exits * number of SM refs * number of stores). */
2932 /* ??? Somehow do this in a single sweep over the loop body. */
2933 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2934 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2935 edge e;
2936 FOR_EACH_VEC_ELT (exits, i, e)
2937 {
2938 vec<seq_entry> seq;
2939 seq.create (nelems: 4);
2940 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2941 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2942 if (bitmap_empty_p (map: refs_not_in_seq))
2943 {
2944 seq.release ();
2945 break;
2946 }
2947 auto_bitmap fully_visited;
2948 int res = sm_seq_valid_bb (loop, bb: e->src, NULL_TREE,
2949 seq, refs_not_in_seq,
2950 refs_not_supported, forked: false,
2951 fully_visited);
2952 if (res != 1)
2953 {
2954 bitmap_copy (refs_not_supported, mem_refs);
2955 seq.release ();
2956 break;
2957 }
2958 sms.safe_push (obj: std::make_pair (x&: e, y&: seq));
2959 }
2960
2961 /* Prune pruned mem_refs from earlier processed exits. */
2962 bool changed = !bitmap_empty_p (map: refs_not_supported);
2963 while (changed)
2964 {
2965 changed = false;
2966 std::pair<edge, vec<seq_entry> > *seq;
2967 FOR_EACH_VEC_ELT (sms, i, seq)
2968 {
2969 bool need_to_push = false;
2970 for (unsigned i = 0; i < seq->second.length (); ++i)
2971 {
2972 sm_kind kind = seq->second[i].second;
2973 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2974 break;
2975 unsigned id = seq->second[i].first;
2976 unsigned new_idx;
2977 if (kind == sm_ord
2978 && bitmap_bit_p (refs_not_supported, id))
2979 {
2980 seq->second[i].second = sm_other;
2981 gcc_assert (seq->second[i].from == NULL_TREE);
2982 need_to_push = true;
2983 }
2984 else if (need_to_push
2985 && !sm_seq_push_down (seq&: seq->second, ptr: i, at: &new_idx))
2986 {
2987 /* We need to push down both sm_ord and sm_other
2988 but for the latter we need to disqualify all
2989 following refs. */
2990 if (kind == sm_ord)
2991 {
2992 if (bitmap_set_bit (refs_not_supported, id))
2993 changed = true;
2994 seq->second[new_idx].second = sm_other;
2995 }
2996 else
2997 {
2998 for (unsigned j = seq->second.length () - 1;
2999 j > new_idx; --j)
3000 if (seq->second[j].second == sm_ord
3001 && bitmap_set_bit (refs_not_supported,
3002 seq->second[j].first))
3003 changed = true;
3004 seq->second.truncate (size: new_idx);
3005 break;
3006 }
3007 }
3008 }
3009 }
3010 }
3011 std::pair<edge, vec<seq_entry> > *seq;
3012 FOR_EACH_VEC_ELT (sms, i, seq)
3013 {
3014 /* Prune sm_other from the end. */
3015 while (!seq->second.is_empty ()
3016 && seq->second.last ().second == sm_other)
3017 seq->second.pop ();
3018 /* Prune duplicates from the start. */
3019 auto_bitmap seen (&lim_bitmap_obstack);
3020 unsigned j, k;
3021 for (j = k = 0; j < seq->second.length (); ++j)
3022 if (bitmap_set_bit (seen, seq->second[j].first))
3023 {
3024 if (k != j)
3025 seq->second[k] = seq->second[j];
3026 ++k;
3027 }
3028 seq->second.truncate (size: k);
3029 /* And verify. */
3030 seq_entry *e;
3031 FOR_EACH_VEC_ELT (seq->second, j, e)
3032 gcc_assert (e->second == sm_ord
3033 || (e->second == sm_other && e->from != NULL_TREE));
3034 }
3035
3036 /* Verify dependence for refs we cannot handle with the order preserving
3037 code (refs_not_supported) or prune them from mem_refs. */
3038 auto_vec<seq_entry> unord_refs;
3039 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
3040 {
3041 ref = memory_accesses.refs_list[i];
3042 if (!ref_indep_loop_p (loop, ref, sm_waw))
3043 bitmap_clear_bit (mem_refs, i);
3044 /* We've now verified store order for ref with respect to all other
3045 stores in the loop does not matter. */
3046 else
3047 unord_refs.safe_push (obj: seq_entry (i, sm_unord));
3048 }
3049
3050 hash_map<im_mem_ref *, sm_aux *> aux_map;
3051
3052 /* Execute SM but delay the store materialization for ordered
3053 sequences on exit. */
3054 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
3055 {
3056 ref = memory_accesses.refs_list[i];
3057 execute_sm (loop, ref, aux_map, maybe_mt: bitmap_bit_p (refs_not_supported, i),
3058 use_other_flag_var: false);
3059 }
3060
3061 /* Materialize ordered store sequences on exits. */
3062 auto_bitmap clobbers_to_prune;
3063 FOR_EACH_VEC_ELT (exits, i, e)
3064 {
3065 edge append_cond_position = NULL;
3066 edge last_cond_fallthru = NULL;
3067 if (i < sms.length ())
3068 {
3069 gcc_assert (sms[i].first == e);
3070 execute_sm_exit (loop, ex: e, seq&: sms[i].second, aux_map, kind: sm_ord,
3071 append_cond_position, last_cond_fallthru,
3072 clobbers_to_prune);
3073 sms[i].second.release ();
3074 }
3075 if (!unord_refs.is_empty ())
3076 execute_sm_exit (loop, ex: e, seq&: unord_refs, aux_map, kind: sm_unord,
3077 append_cond_position, last_cond_fallthru,
3078 clobbers_to_prune);
3079 /* Commit edge inserts here to preserve the order of stores
3080 when an exit exits multiple loops. */
3081 gsi_commit_one_edge_insert (e, NULL);
3082 }
3083
3084 /* Remove clobbers inside the loop we re-materialized on exits. */
3085 EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
3086 {
3087 gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
3088 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3089 unlink_stmt_vdef (stmt);
3090 release_defs (stmt);
3091 gimple_set_vdef (g: stmt, NULL_TREE);
3092 gsi_remove (&gsi, true);
3093 }
3094
3095 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
3096 iter != aux_map.end (); ++iter)
3097 delete (*iter).second;
3098}
3099
3100class ref_always_accessed
3101{
3102public:
3103 ref_always_accessed (class loop *loop_, bool stored_p_)
3104 : loop (loop_), stored_p (stored_p_) {}
3105 bool operator () (mem_ref_loc *loc);
3106 class loop *loop;
3107 bool stored_p;
3108};
3109
3110bool
3111ref_always_accessed::operator () (mem_ref_loc *loc)
3112{
3113 class loop *must_exec;
3114
3115 struct lim_aux_data *lim_data = get_lim_data (stmt: loc->stmt);
3116 if (!lim_data)
3117 return false;
3118
3119 /* If we require an always executed store make sure the statement
3120 is a store. */
3121 if (stored_p)
3122 {
3123 tree lhs = gimple_get_lhs (loc->stmt);
3124 if (!lhs
3125 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
3126 return false;
3127 }
3128
3129 must_exec = lim_data->always_executed_in;
3130 if (!must_exec)
3131 return false;
3132
3133 if (must_exec == loop
3134 || flow_loop_nested_p (must_exec, loop))
3135 return true;
3136
3137 return false;
3138}
3139
3140/* Returns true if REF is always accessed in LOOP. If STORED_P is true
3141 make sure REF is always stored to in LOOP. */
3142
3143static bool
3144ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
3145{
3146 return for_all_locs_in_loop (loop, ref,
3147 fn: ref_always_accessed (loop, stored_p));
3148}
3149
3150/* Returns true if REF1 and REF2 are independent. */
3151
3152static bool
3153refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3154{
3155 if (ref1 == ref2)
3156 return true;
3157
3158 if (dump_file && (dump_flags & TDF_DETAILS))
3159 fprintf (stream: dump_file, format: "Querying dependency of refs %u and %u: ",
3160 ref1->id, ref2->id);
3161
3162 if (mem_refs_may_alias_p (mem1: ref1, mem2: ref2, ttae_cache: &memory_accesses.ttae_cache, tbaa_p))
3163 {
3164 if (dump_file && (dump_flags & TDF_DETAILS))
3165 fprintf (stream: dump_file, format: "dependent.\n");
3166 return false;
3167 }
3168 else
3169 {
3170 if (dump_file && (dump_flags & TDF_DETAILS))
3171 fprintf (stream: dump_file, format: "independent.\n");
3172 return true;
3173 }
3174}
3175
3176/* Returns true if REF is independent on all other accessess in LOOP.
3177 KIND specifies the kind of dependence to consider.
3178 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3179 dependences so if true REF can be hoisted out of LOOP
3180 sm_war disambiguates a store REF against all other loads to see
3181 whether the store can be sunk across loads out of LOOP
3182 sm_waw disambiguates a store REF against all other stores to see
3183 whether the store can be sunk across stores out of LOOP. */
3184
3185static bool
3186ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3187{
3188 bool indep_p = true;
3189 bitmap refs_to_check;
3190
3191 if (kind == sm_war)
3192 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3193 else
3194 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3195
3196 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3197 || ref->mem.ref == error_mark_node)
3198 indep_p = false;
3199 else
3200 {
3201 /* tri-state, { unknown, independent, dependent } */
3202 dep_state state = query_loop_dependence (loop, ref, kind);
3203 if (state != dep_unknown)
3204 return state == dep_independent ? true : false;
3205
3206 class loop *inner = loop->inner;
3207 while (inner)
3208 {
3209 if (!ref_indep_loop_p (loop: inner, ref, kind))
3210 {
3211 indep_p = false;
3212 break;
3213 }
3214 inner = inner->next;
3215 }
3216
3217 if (indep_p)
3218 {
3219 unsigned i;
3220 bitmap_iterator bi;
3221 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3222 {
3223 im_mem_ref *aref = memory_accesses.refs_list[i];
3224 if (aref->mem.ref == error_mark_node)
3225 {
3226 gimple *stmt = aref->accesses_in_loop[0].stmt;
3227 if ((kind == sm_war
3228 && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3229 kind != sm_waw))
3230 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3231 kind != sm_waw))
3232 {
3233 indep_p = false;
3234 break;
3235 }
3236 }
3237 else if (!refs_independent_p (ref1: ref, ref2: aref, tbaa_p: kind != sm_waw))
3238 {
3239 indep_p = false;
3240 break;
3241 }
3242 }
3243 }
3244 }
3245
3246 if (dump_file && (dump_flags & TDF_DETAILS))
3247 fprintf (stream: dump_file, format: "Querying %s dependencies of ref %u in loop %d: %s\n",
3248 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3249 ref->id, loop->num, indep_p ? "independent" : "dependent");
3250
3251 /* Record the computed result in the cache. */
3252 record_loop_dependence (loop, ref, kind,
3253 state: indep_p ? dep_independent : dep_dependent);
3254
3255 return indep_p;
3256}
3257
3258class ref_in_loop_hot_body
3259{
3260public:
3261 ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3262 bool operator () (mem_ref_loc *loc);
3263 class loop *l;
3264};
3265
3266/* Check the coldest loop between loop L and innermost loop. If there is one
3267 cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3268 no cold loop means no store motion. get_coldest_out_loop also handles cases
3269 when l is inner_loop. */
3270bool
3271ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3272{
3273 basic_block curr_bb = gimple_bb (g: loc->stmt);
3274 class loop *inner_loop = curr_bb->loop_father;
3275 return get_coldest_out_loop (outermost_loop: l, loop: inner_loop, curr_bb);
3276}
3277
3278
3279/* Returns true if we can perform store motion of REF from LOOP. */
3280
3281static bool
3282can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3283{
3284 tree base;
3285
3286 /* Can't hoist unanalyzable refs. */
3287 if (!MEM_ANALYZABLE (ref))
3288 return false;
3289
3290 /* Can't hoist/sink aggregate copies. */
3291 if (ref->mem.ref == error_mark_node)
3292 return false;
3293
3294 /* It should be movable. */
3295 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3296 || TREE_THIS_VOLATILE (ref->mem.ref)
3297 || !for_each_index (&ref->mem.ref, may_move_till, loop))
3298 return false;
3299
3300 /* If it can throw fail, we do not properly update EH info. */
3301 if (tree_could_throw_p (ref->mem.ref))
3302 return false;
3303
3304 /* If it can trap, it must be always executed in LOOP.
3305 Readonly memory locations may trap when storing to them, but
3306 tree_could_trap_p is a predicate for rvalues, so check that
3307 explicitly. */
3308 base = get_base_address (t: ref->mem.ref);
3309 if ((tree_could_trap_p (ref->mem.ref)
3310 || (DECL_P (base) && TREE_READONLY (base))
3311 || TREE_CODE (base) == STRING_CST)
3312 /* ??? We can at least use false here, allowing loads? We
3313 are forcing conditional stores if the ref is not always
3314 stored to later anyway. So this would only guard
3315 the load we need to emit. Thus when the ref is not
3316 loaded we can elide this completely? */
3317 && !ref_always_accessed_p (loop, ref, stored_p: true))
3318 return false;
3319
3320 /* Verify all loads of ref can be hoisted. */
3321 if (ref->loaded
3322 && bitmap_bit_p (ref->loaded, loop->num)
3323 && !ref_indep_loop_p (loop, ref, kind: lim_raw))
3324 return false;
3325
3326 /* Verify the candidate can be disambiguated against all loads,
3327 that is, we can elide all in-loop stores. Disambiguation
3328 against stores is done later when we cannot guarantee preserving
3329 the order of stores. */
3330 if (!ref_indep_loop_p (loop, ref, kind: sm_war))
3331 return false;
3332
3333 /* Verify whether the candidate is hot for LOOP. Only do store motion if the
3334 candidate's profile count is hot. Statement in cold BB shouldn't be moved
3335 out of it's loop_father. */
3336 if (!for_all_locs_in_loop (loop, ref, fn: ref_in_loop_hot_body (loop)))
3337 return false;
3338
3339 return true;
3340}
3341
3342/* Marks the references in LOOP for that store motion should be performed
3343 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3344 motion was performed in one of the outer loops. */
3345
3346static void
3347find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3348{
3349 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3350 unsigned i;
3351 bitmap_iterator bi;
3352 im_mem_ref *ref;
3353
3354 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3355 {
3356 ref = memory_accesses.refs_list[i];
3357 if (can_sm_ref_p (loop, ref) && dbg_cnt (index: lim))
3358 bitmap_set_bit (refs_to_sm, i);
3359 }
3360}
3361
3362/* Checks whether LOOP (with exits stored in EXITS array) is suitable
3363 for a store motion optimization (i.e. whether we can insert statement
3364 on its exits). */
3365
3366static bool
3367loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3368 const vec<edge> &exits)
3369{
3370 unsigned i;
3371 edge ex;
3372
3373 FOR_EACH_VEC_ELT (exits, i, ex)
3374 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3375 return false;
3376
3377 return true;
3378}
3379
3380/* Try to perform store motion for all memory references modified inside
3381 LOOP. SM_EXECUTED is the bitmap of the memory references for that
3382 store motion was executed in one of the outer loops. */
3383
3384static void
3385store_motion_loop (class loop *loop, bitmap sm_executed)
3386{
3387 auto_vec<edge> exits = get_loop_exit_edges (loop);
3388 class loop *subloop;
3389 bitmap sm_in_loop = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
3390
3391 if (loop_suitable_for_sm (loop, exits))
3392 {
3393 find_refs_for_sm (loop, sm_executed, refs_to_sm: sm_in_loop);
3394 if (!bitmap_empty_p (map: sm_in_loop))
3395 hoist_memory_references (loop, mem_refs: sm_in_loop, exits);
3396 }
3397
3398 bitmap_ior_into (sm_executed, sm_in_loop);
3399 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3400 store_motion_loop (loop: subloop, sm_executed);
3401 bitmap_and_compl_into (sm_executed, sm_in_loop);
3402 BITMAP_FREE (sm_in_loop);
3403}
3404
3405/* Try to perform store motion for all memory references modified inside
3406 loops. */
3407
3408static void
3409do_store_motion (void)
3410{
3411 class loop *loop;
3412 bitmap sm_executed = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
3413
3414 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3415 store_motion_loop (loop, sm_executed);
3416
3417 BITMAP_FREE (sm_executed);
3418}
3419
3420/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3421 for each such basic block bb records the outermost loop for that execution
3422 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3423 blocks that contain a nonpure call. */
3424
3425static void
3426fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3427{
3428 basic_block bb = NULL, last = NULL;
3429 edge e;
3430 class loop *inn_loop = loop;
3431
3432 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3433 {
3434 auto_vec<basic_block, 64> worklist;
3435 worklist.reserve_exact (nelems: loop->num_nodes);
3436 worklist.quick_push (obj: loop->header);
3437 do
3438 {
3439 edge_iterator ei;
3440 bb = worklist.pop ();
3441
3442 if (!flow_bb_inside_loop_p (inn_loop, bb))
3443 {
3444 /* When we are leaving a possibly infinite inner loop
3445 we have to stop processing. */
3446 if (!finite_loop_p (inn_loop))
3447 break;
3448 /* If the loop was finite we can continue with processing
3449 the loop we exited to. */
3450 inn_loop = bb->loop_father;
3451 }
3452
3453 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3454 last = bb;
3455
3456 if (bitmap_bit_p (map: contains_call, bitno: bb->index))
3457 break;
3458
3459 /* If LOOP exits from this BB stop processing. */
3460 FOR_EACH_EDGE (e, ei, bb->succs)
3461 if (!flow_bb_inside_loop_p (loop, e->dest))
3462 break;
3463 if (e)
3464 break;
3465
3466 /* A loop might be infinite (TODO use simple loop analysis
3467 to disprove this if possible). */
3468 if (bb->flags & BB_IRREDUCIBLE_LOOP)
3469 break;
3470
3471 if (bb->loop_father->header == bb)
3472 /* Record that we enter into a subloop since it might not
3473 be finite. */
3474 /* ??? Entering into a not always executed subloop makes
3475 fill_always_executed_in quadratic in loop depth since
3476 we walk those loops N times. This is not a problem
3477 in practice though, see PR102253 for a worst-case testcase. */
3478 inn_loop = bb->loop_father;
3479
3480 /* Walk the body of LOOP sorted by dominance relation. Additionally,
3481 if a basic block S dominates the latch, then only blocks dominated
3482 by S are after it.
3483 This is get_loop_body_in_dom_order using a worklist algorithm and
3484 stopping once we are no longer interested in visiting further
3485 blocks. */
3486 unsigned old_len = worklist.length ();
3487 unsigned postpone = 0;
3488 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3489 son;
3490 son = next_dom_son (CDI_DOMINATORS, son))
3491 {
3492 if (!flow_bb_inside_loop_p (loop, son))
3493 continue;
3494 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3495 postpone = worklist.length ();
3496 worklist.quick_push (obj: son);
3497 }
3498 if (postpone)
3499 /* Postponing the block that dominates the latch means
3500 processing it last and thus putting it earliest in the
3501 worklist. */
3502 std::swap (a&: worklist[old_len], b&: worklist[postpone]);
3503 }
3504 while (!worklist.is_empty ());
3505
3506 while (1)
3507 {
3508 if (dump_enabled_p ())
3509 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3510 last->index, loop->num);
3511 SET_ALWAYS_EXECUTED_IN (last, loop);
3512 if (last == loop->header)
3513 break;
3514 last = get_immediate_dominator (CDI_DOMINATORS, last);
3515 }
3516 }
3517
3518 for (loop = loop->inner; loop; loop = loop->next)
3519 fill_always_executed_in_1 (loop, contains_call);
3520}
3521
3522/* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3523 for each such basic block bb records the outermost loop for that execution
3524 of its header implies execution of bb. */
3525
3526static void
3527fill_always_executed_in (void)
3528{
3529 basic_block bb;
3530 class loop *loop;
3531
3532 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3533 bitmap_clear (contains_call);
3534 FOR_EACH_BB_FN (bb, cfun)
3535 {
3536 gimple_stmt_iterator gsi;
3537 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
3538 {
3539 if (nonpure_call_p (stmt: gsi_stmt (i: gsi)))
3540 break;
3541 }
3542
3543 if (!gsi_end_p (i: gsi))
3544 bitmap_set_bit (map: contains_call, bitno: bb->index);
3545 }
3546
3547 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3548 fill_always_executed_in_1 (loop, contains_call);
3549}
3550
3551/* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3552 to LOOP. Then recursively iterate each inner loop. */
3553
3554void
3555fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3556 class loop *hotter_loop, class loop *loop)
3557{
3558 if (bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src,
3559 loop: coldest_loop))
3560 coldest_loop = loop;
3561
3562 coldest_outermost_loop[loop->num] = coldest_loop;
3563
3564 hotter_than_inner_loop[loop->num] = NULL;
3565 class loop *outer_loop = loop_outer (loop);
3566 if (hotter_loop
3567 && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src,
3568 loop: hotter_loop))
3569 hotter_than_inner_loop[loop->num] = hotter_loop;
3570
3571 if (outer_loop && outer_loop != current_loops->tree_root
3572 && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src,
3573 loop: outer_loop))
3574 hotter_than_inner_loop[loop->num] = outer_loop;
3575
3576 if (dump_enabled_p ())
3577 {
3578 dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3579 loop->num, coldest_loop->num);
3580 if (hotter_than_inner_loop[loop->num])
3581 dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3582 hotter_than_inner_loop[loop->num]->num);
3583 else
3584 dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3585 }
3586
3587 class loop *inner_loop;
3588 for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3589 fill_coldest_and_hotter_out_loop (coldest_loop,
3590 hotter_loop: hotter_than_inner_loop[loop->num],
3591 loop: inner_loop);
3592}
3593
3594/* Compute the global information needed by the loop invariant motion pass. */
3595
3596static void
3597tree_ssa_lim_initialize (bool store_motion)
3598{
3599 unsigned i;
3600
3601 bitmap_obstack_initialize (&lim_bitmap_obstack);
3602 gcc_obstack_init (&mem_ref_obstack);
3603 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3604
3605 if (flag_tm)
3606 compute_transaction_bits ();
3607
3608 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3609 memory_accesses.refs_list.create (nelems: 100);
3610 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3611 memory_accesses.refs_list.quick_push
3612 (obj: mem_ref_alloc (NULL, hash: 0, UNANALYZABLE_MEM_ID));
3613
3614 memory_accesses.refs_loaded_in_loop.create (nelems: number_of_loops (cfun));
3615 memory_accesses.refs_loaded_in_loop.quick_grow_cleared (len: number_of_loops (cfun));
3616 memory_accesses.refs_stored_in_loop.create (nelems: number_of_loops (cfun));
3617 memory_accesses.refs_stored_in_loop.quick_grow_cleared (len: number_of_loops (cfun));
3618 if (store_motion)
3619 {
3620 memory_accesses.all_refs_stored_in_loop.create (nelems: number_of_loops (cfun));
3621 memory_accesses.all_refs_stored_in_loop.quick_grow_cleared
3622 (len: number_of_loops (cfun));
3623 }
3624
3625 for (i = 0; i < number_of_loops (cfun); i++)
3626 {
3627 bitmap_initialize (head: &memory_accesses.refs_loaded_in_loop[i],
3628 obstack: &lim_bitmap_obstack);
3629 bitmap_initialize (head: &memory_accesses.refs_stored_in_loop[i],
3630 obstack: &lim_bitmap_obstack);
3631 if (store_motion)
3632 bitmap_initialize (head: &memory_accesses.all_refs_stored_in_loop[i],
3633 obstack: &lim_bitmap_obstack);
3634 }
3635
3636 memory_accesses.ttae_cache = NULL;
3637
3638 /* Initialize bb_loop_postorder with a mapping from loop->num to
3639 its postorder index. */
3640 i = 0;
3641 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3642 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3643 bb_loop_postorder[loop->num] = i++;
3644}
3645
3646/* Cleans up after the invariant motion pass. */
3647
3648static void
3649tree_ssa_lim_finalize (void)
3650{
3651 basic_block bb;
3652 unsigned i;
3653 im_mem_ref *ref;
3654
3655 FOR_EACH_BB_FN (bb, cfun)
3656 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3657
3658 bitmap_obstack_release (&lim_bitmap_obstack);
3659 delete lim_aux_data_map;
3660
3661 delete memory_accesses.refs;
3662 memory_accesses.refs = NULL;
3663
3664 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3665 memref_free (mem: ref);
3666 memory_accesses.refs_list.release ();
3667 obstack_free (&mem_ref_obstack, NULL);
3668
3669 memory_accesses.refs_loaded_in_loop.release ();
3670 memory_accesses.refs_stored_in_loop.release ();
3671 memory_accesses.all_refs_stored_in_loop.release ();
3672
3673 if (memory_accesses.ttae_cache)
3674 free_affine_expand_cache (&memory_accesses.ttae_cache);
3675
3676 free (ptr: bb_loop_postorder);
3677
3678 coldest_outermost_loop.release ();
3679 hotter_than_inner_loop.release ();
3680}
3681
3682/* Moves invariants from loops. Only "expensive" invariants are moved out --
3683 i.e. those that are likely to be win regardless of the register pressure.
3684 Only perform store motion if STORE_MOTION is true. */
3685
3686unsigned int
3687loop_invariant_motion_in_fun (function *fun, bool store_motion)
3688{
3689 unsigned int todo = 0;
3690
3691 tree_ssa_lim_initialize (store_motion);
3692
3693 mark_ssa_maybe_undefs ();
3694
3695 /* Gathers information about memory accesses in the loops. */
3696 analyze_memory_references (store_motion);
3697
3698 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3699 fill_always_executed_in ();
3700
3701 /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3702 */
3703 class loop *loop;
3704 coldest_outermost_loop.create (nelems: number_of_loops (cfun));
3705 coldest_outermost_loop.safe_grow_cleared (len: number_of_loops (cfun));
3706 hotter_than_inner_loop.create (nelems: number_of_loops (cfun));
3707 hotter_than_inner_loop.safe_grow_cleared (len: number_of_loops (cfun));
3708 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3709 fill_coldest_and_hotter_out_loop (coldest_loop: loop, NULL, loop);
3710
3711 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3712 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3713
3714 /* For each statement determine the outermost loop in that it is
3715 invariant and cost for computing the invariant. */
3716 for (int i = 0; i < n; ++i)
3717 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3718
3719 /* Execute store motion. Force the necessary invariants to be moved
3720 out of the loops as well. */
3721 if (store_motion)
3722 do_store_motion ();
3723
3724 free (ptr: rpo);
3725 rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3726 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3727
3728 /* Move the expressions that are expensive enough. */
3729 for (int i = 0; i < n; ++i)
3730 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3731
3732 free (ptr: rpo);
3733
3734 gsi_commit_edge_inserts ();
3735 if (need_ssa_update_p (fun))
3736 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3737
3738 tree_ssa_lim_finalize ();
3739
3740 return todo;
3741}
3742
3743/* Loop invariant motion pass. */
3744
3745namespace {
3746
3747const pass_data pass_data_lim =
3748{
3749 .type: GIMPLE_PASS, /* type */
3750 .name: "lim", /* name */
3751 .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */
3752 .tv_id: TV_LIM, /* tv_id */
3753 PROP_cfg, /* properties_required */
3754 .properties_provided: 0, /* properties_provided */
3755 .properties_destroyed: 0, /* properties_destroyed */
3756 .todo_flags_start: 0, /* todo_flags_start */
3757 .todo_flags_finish: 0, /* todo_flags_finish */
3758};
3759
3760class pass_lim : public gimple_opt_pass
3761{
3762public:
3763 pass_lim (gcc::context *ctxt)
3764 : gimple_opt_pass (pass_data_lim, ctxt)
3765 {}
3766
3767 /* opt_pass methods: */
3768 opt_pass * clone () final override { return new pass_lim (m_ctxt); }
3769 bool gate (function *) final override { return flag_tree_loop_im != 0; }
3770 unsigned int execute (function *) final override;
3771
3772}; // class pass_lim
3773
3774unsigned int
3775pass_lim::execute (function *fun)
3776{
3777 in_loop_pipeline = scev_initialized_p ();
3778 if (!in_loop_pipeline)
3779 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3780
3781 if (number_of_loops (fn: fun) <= 1)
3782 return 0;
3783 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3784
3785 if (!in_loop_pipeline)
3786 loop_optimizer_finalize ();
3787 else
3788 scev_reset ();
3789 return todo;
3790}
3791
3792} // anon namespace
3793
3794gimple_opt_pass *
3795make_pass_lim (gcc::context *ctxt)
3796{
3797 return new pass_lim (ctxt);
3798}
3799
3800
3801

source code of gcc/tree-ssa-loop-im.cc