1/* Predictive commoning.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
168
169 It can be replaced with:
170
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
182
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207#include "config.h"
208#include "system.h"
209#include "coretypes.h"
210#include "backend.h"
211#include "rtl.h"
212#include "tree.h"
213#include "gimple.h"
214#include "predict.h"
215#include "tree-pass.h"
216#include "ssa.h"
217#include "gimple-pretty-print.h"
218#include "alias.h"
219#include "fold-const.h"
220#include "cfgloop.h"
221#include "tree-eh.h"
222#include "gimplify.h"
223#include "gimple-iterator.h"
224#include "gimplify-me.h"
225#include "tree-ssa-loop-ivopts.h"
226#include "tree-ssa-loop-manip.h"
227#include "tree-ssa-loop-niter.h"
228#include "tree-ssa-loop.h"
229#include "tree-into-ssa.h"
230#include "tree-dfa.h"
231#include "tree-ssa.h"
232#include "tree-data-ref.h"
233#include "tree-scalar-evolution.h"
234#include "tree-affine.h"
235#include "builtins.h"
236#include "opts.h"
237
238/* The maximum number of iterations between the considered memory
239 references. */
240
241#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
242
243/* Data references (or phi nodes that carry data reference values across
244 loop iterations). */
245
246typedef class dref_d
247{
248public:
249 /* The reference itself. */
250 struct data_reference *ref;
251
252 /* The statement in that the reference appears. */
253 gimple *stmt;
254
255 /* In case that STMT is a phi node, this field is set to the SSA name
256 defined by it in replace_phis_by_defined_names (in order to avoid
257 pointing to phi node that got reallocated in the meantime). */
258 tree name_defined_by_phi;
259
260 /* Distance of the reference from the root of the chain (in number of
261 iterations of the loop). */
262 unsigned distance;
263
264 /* Number of iterations offset from the first reference in the component. */
265 widest_int offset;
266
267 /* Number of the reference in a component, in dominance ordering. */
268 unsigned pos;
269
270 /* True if the memory reference is always accessed when the loop is
271 entered. */
272 unsigned always_accessed : 1;
273} *dref;
274
275
276/* Type of the chain of the references. */
277
278enum chain_type
279{
280 /* The addresses of the references in the chain are constant. */
281 CT_INVARIANT,
282
283 /* There are only loads in the chain. */
284 CT_LOAD,
285
286 /* Root of the chain is store, the rest are loads. */
287 CT_STORE_LOAD,
288
289 /* There are only stores in the chain. */
290 CT_STORE_STORE,
291
292 /* A combination of two chains. */
293 CT_COMBINATION
294};
295
296/* Chains of data references. */
297
298typedef struct chain
299{
300 chain (chain_type t) : type (t), op (ERROR_MARK), rslt_type (NULL_TREE),
301 ch1 (NULL), ch2 (NULL), length (0), init_seq (NULL), fini_seq (NULL),
302 has_max_use_after (false), all_always_accessed (false), combined (false),
303 inv_store_elimination (false) {}
304
305 /* Type of the chain. */
306 enum chain_type type;
307
308 /* For combination chains, the operator and the two chains that are
309 combined, and the type of the result. */
310 enum tree_code op;
311 tree rslt_type;
312 struct chain *ch1, *ch2;
313
314 /* The references in the chain. */
315 auto_vec<dref> refs;
316
317 /* The maximum distance of the reference in the chain from the root. */
318 unsigned length;
319
320 /* The variables used to copy the value throughout iterations. */
321 auto_vec<tree> vars;
322
323 /* Initializers for the variables. */
324 auto_vec<tree> inits;
325
326 /* Finalizers for the eliminated stores. */
327 auto_vec<tree> finis;
328
329 /* gimple stmts intializing the initial variables of the chain. */
330 gimple_seq init_seq;
331
332 /* gimple stmts finalizing the eliminated stores of the chain. */
333 gimple_seq fini_seq;
334
335 /* True if there is a use of a variable with the maximal distance
336 that comes after the root in the loop. */
337 unsigned has_max_use_after : 1;
338
339 /* True if all the memory references in the chain are always accessed. */
340 unsigned all_always_accessed : 1;
341
342 /* True if this chain was combined together with some other chain. */
343 unsigned combined : 1;
344
345 /* True if this is store elimination chain and eliminated stores store
346 loop invariant value into memory. */
347 unsigned inv_store_elimination : 1;
348} *chain_p;
349
350
351/* Describes the knowledge about the step of the memory references in
352 the component. */
353
354enum ref_step_type
355{
356 /* The step is zero. */
357 RS_INVARIANT,
358
359 /* The step is nonzero. */
360 RS_NONZERO,
361
362 /* The step may or may not be nonzero. */
363 RS_ANY
364};
365
366/* Components of the data dependence graph. */
367
368struct component
369{
370 component (bool es) : comp_step (RS_ANY), eliminate_store_p (es),
371 next (NULL) {}
372
373 /* The references in the component. */
374 auto_vec<dref> refs;
375
376 /* What we know about the step of the references in the component. */
377 enum ref_step_type comp_step;
378
379 /* True if all references in component are stores and we try to do
380 intra/inter loop iteration dead store elimination. */
381 bool eliminate_store_p;
382
383 /* Next component in the list. */
384 struct component *next;
385};
386
387/* A class to encapsulate the global states used for predictive
388 commoning work on top of one given LOOP. */
389
390class pcom_worker
391{
392public:
393 pcom_worker (loop_p l) : m_loop (l), m_cache (NULL) {}
394
395 ~pcom_worker ()
396 {
397 free_data_refs (m_datarefs);
398 free_dependence_relations (m_dependences);
399 free_affine_expand_cache (&m_cache);
400 release_chains ();
401 }
402
403 pcom_worker (const pcom_worker &) = delete;
404 pcom_worker &operator= (const pcom_worker &) = delete;
405
406 /* Performs predictive commoning. */
407 unsigned tree_predictive_commoning_loop (bool allow_unroll_p);
408
409 /* Perform the predictive commoning optimization for chains, make this
410 public for being called in callback execute_pred_commoning_cbck. */
411 void execute_pred_commoning (bitmap tmp_vars);
412
413private:
414 /* The pointer to the given loop. */
415 loop_p m_loop;
416
417 /* All data references. */
418 auto_vec<data_reference_p, 10> m_datarefs;
419
420 /* All data dependences. */
421 auto_vec<ddr_p, 10> m_dependences;
422
423 /* All chains. */
424 auto_vec<chain_p> m_chains;
425
426 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
427 auto_bitmap m_looparound_phis;
428
429 typedef hash_map<tree, name_expansion *> tree_expand_map_t;
430 /* Cache used by tree_to_aff_combination_expand. */
431 tree_expand_map_t *m_cache;
432
433 /* Splits dependence graph to components. */
434 struct component *split_data_refs_to_components ();
435
436 /* Check the conditions on references inside each of components COMPS,
437 and remove the unsuitable components from the list. */
438 struct component *filter_suitable_components (struct component *comps);
439
440 /* Find roots of the values and determine distances in components COMPS,
441 and separates the references to chains. */
442 void determine_roots (struct component *comps);
443
444 /* Prepare initializers for chains, and free chains that cannot
445 be used because the initializers might trap. */
446 void prepare_initializers ();
447
448 /* Generates finalizer memory reference for chains. Returns true if
449 finalizer code generation for chains breaks loop closed ssa form. */
450 bool prepare_finalizers ();
451
452 /* Try to combine the chains. */
453 void try_combine_chains ();
454
455 /* Frees CHAINS. */
456 void release_chains ();
457
458 /* Frees a chain CHAIN. */
459 void release_chain (chain_p chain);
460
461 /* Prepare initializers for CHAIN. Returns false if this is impossible
462 because one of these initializers may trap, true otherwise. */
463 bool prepare_initializers_chain (chain_p chain);
464
465 /* Generates finalizer memory references for CHAIN. Returns true
466 if finalizer code for CHAIN can be generated, otherwise false. */
467 bool prepare_finalizers_chain (chain_p chain);
468
469 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
470 void aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset);
471
472 /* Determines number of iterations of the innermost enclosing loop before
473 B refers to exactly the same location as A and stores it to OFF. */
474 bool determine_offset (struct data_reference *a, struct data_reference *b,
475 poly_widest_int *off);
476
477 /* Returns true if the component COMP satisfies the conditions
478 described in 2) at the beginning of this file. */
479 bool suitable_component_p (struct component *comp);
480
481 /* Returns true if REF is a valid initializer for ROOT with given
482 DISTANCE (in iterations of the innermost enclosing loop). */
483 bool valid_initializer_p (struct data_reference *ref, unsigned distance,
484 struct data_reference *root);
485
486 /* Finds looparound phi node of loop that copies the value of REF. */
487 gphi *find_looparound_phi (dref ref, dref root);
488
489 /* Find roots of the values and determine distances in the component
490 COMP. The references are redistributed into chains. */
491 void determine_roots_comp (struct component *comp);
492
493 /* For references in CHAIN that are copied around the loop, add the
494 results of such copies to the chain. */
495 void add_looparound_copies (chain_p chain);
496
497 /* Returns the single statement in that NAME is used, excepting
498 the looparound phi nodes contained in one of the chains. */
499 gimple *single_nonlooparound_use (tree name);
500
501 /* Remove statement STMT, as well as the chain of assignments in that
502 it is used. */
503 void remove_stmt (gimple *stmt);
504
505 /* Perform the predictive commoning optimization for a chain CHAIN. */
506 void execute_pred_commoning_chain (chain_p chain, bitmap tmp_vars);
507
508 /* Returns the modify statement that uses NAME. */
509 gimple *find_use_stmt (tree *name);
510
511 /* If the operation used in STMT is associative and commutative, go
512 through the tree of the same operations and returns its root. */
513 gimple *find_associative_operation_root (gimple *stmt, unsigned *distance);
514
515 /* Returns the common statement in that NAME1 and NAME2 have a use. */
516 gimple *find_common_use_stmt (tree *name1, tree *name2);
517
518 /* Checks whether R1 and R2 are combined together using CODE, with the
519 result in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order
520 R2 CODE R1 if it is true. */
521 bool combinable_refs_p (dref r1, dref r2, enum tree_code *code, bool *swap,
522 tree *rslt_type);
523
524 /* Reassociates the expression in that NAME1 and NAME2 are used so that
525 they are combined in a single statement, and returns this statement. */
526 gimple *reassociate_to_the_same_stmt (tree name1, tree name2);
527
528 /* Returns the statement that combines references R1 and R2. */
529 gimple *stmt_combining_refs (dref r1, dref r2);
530
531 /* Tries to combine chains CH1 and CH2 together. */
532 chain_p combine_chains (chain_p ch1, chain_p ch2);
533};
534
535/* Dumps data reference REF to FILE. */
536
537extern void dump_dref (FILE *, dref);
538void
539dump_dref (FILE *file, dref ref)
540{
541 if (ref->ref)
542 {
543 fprintf (stream: file, format: " ");
544 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
545 fprintf (stream: file, format: " (id %u%s)\n", ref->pos,
546 DR_IS_READ (ref->ref) ? "" : ", write");
547
548 fprintf (stream: file, format: " offset ");
549 print_decs (wi: ref->offset, file);
550 fprintf (stream: file, format: "\n");
551
552 fprintf (stream: file, format: " distance %u\n", ref->distance);
553 }
554 else
555 {
556 if (gimple_code (g: ref->stmt) == GIMPLE_PHI)
557 fprintf (stream: file, format: " looparound ref\n");
558 else
559 fprintf (stream: file, format: " combination ref\n");
560 fprintf (stream: file, format: " in statement ");
561 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
562 fprintf (stream: file, format: "\n");
563 fprintf (stream: file, format: " distance %u\n", ref->distance);
564 }
565
566}
567
568/* Dumps CHAIN to FILE. */
569
570extern void dump_chain (FILE *, chain_p);
571void
572dump_chain (FILE *file, chain_p chain)
573{
574 dref a;
575 const char *chain_type;
576 unsigned i;
577 tree var;
578
579 switch (chain->type)
580 {
581 case CT_INVARIANT:
582 chain_type = "Load motion";
583 break;
584
585 case CT_LOAD:
586 chain_type = "Loads-only";
587 break;
588
589 case CT_STORE_LOAD:
590 chain_type = "Store-loads";
591 break;
592
593 case CT_STORE_STORE:
594 chain_type = "Store-stores";
595 break;
596
597 case CT_COMBINATION:
598 chain_type = "Combination";
599 break;
600
601 default:
602 gcc_unreachable ();
603 }
604
605 fprintf (stream: file, format: "%s chain %p%s\n", chain_type, (void *) chain,
606 chain->combined ? " (combined)" : "");
607 if (chain->type != CT_INVARIANT)
608 fprintf (stream: file, format: " max distance %u%s\n", chain->length,
609 chain->has_max_use_after ? "" : ", may reuse first");
610
611 if (chain->type == CT_COMBINATION)
612 {
613 fprintf (stream: file, format: " equal to %p %s %p in type ",
614 (void *) chain->ch1, op_symbol_code (chain->op),
615 (void *) chain->ch2);
616 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
617 fprintf (stream: file, format: "\n");
618 }
619
620 if (chain->vars.exists ())
621 {
622 fprintf (stream: file, format: " vars");
623 FOR_EACH_VEC_ELT (chain->vars, i, var)
624 {
625 fprintf (stream: file, format: " ");
626 print_generic_expr (file, var, TDF_SLIM);
627 }
628 fprintf (stream: file, format: "\n");
629 }
630
631 if (chain->inits.exists ())
632 {
633 fprintf (stream: file, format: " inits");
634 FOR_EACH_VEC_ELT (chain->inits, i, var)
635 {
636 fprintf (stream: file, format: " ");
637 print_generic_expr (file, var, TDF_SLIM);
638 }
639 fprintf (stream: file, format: "\n");
640 }
641
642 fprintf (stream: file, format: " references:\n");
643 FOR_EACH_VEC_ELT (chain->refs, i, a)
644 dump_dref (file, ref: a);
645
646 fprintf (stream: file, format: "\n");
647}
648
649/* Dumps CHAINS to FILE. */
650
651void
652dump_chains (FILE *file, const vec<chain_p> &chains)
653{
654 chain_p chain;
655 unsigned i;
656
657 FOR_EACH_VEC_ELT (chains, i, chain)
658 dump_chain (file, chain);
659}
660
661/* Dumps COMP to FILE. */
662
663extern void dump_component (FILE *, struct component *);
664void
665dump_component (FILE *file, struct component *comp)
666{
667 dref a;
668 unsigned i;
669
670 fprintf (stream: file, format: "Component%s:\n",
671 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
672 FOR_EACH_VEC_ELT (comp->refs, i, a)
673 dump_dref (file, ref: a);
674 fprintf (stream: file, format: "\n");
675}
676
677/* Dumps COMPS to FILE. */
678
679extern void dump_components (FILE *, struct component *);
680void
681dump_components (FILE *file, struct component *comps)
682{
683 struct component *comp;
684
685 for (comp = comps; comp; comp = comp->next)
686 dump_component (file, comp);
687}
688
689/* Frees a chain CHAIN. */
690
691void
692pcom_worker::release_chain (chain_p chain)
693{
694 dref ref;
695 unsigned i;
696
697 if (chain == NULL)
698 return;
699
700 FOR_EACH_VEC_ELT (chain->refs, i, ref)
701 free (ptr: ref);
702
703 if (chain->init_seq)
704 gimple_seq_discard (chain->init_seq);
705
706 if (chain->fini_seq)
707 gimple_seq_discard (chain->fini_seq);
708
709 delete chain;
710}
711
712/* Frees CHAINS. */
713
714void
715pcom_worker::release_chains ()
716{
717 unsigned i;
718 chain_p chain;
719
720 FOR_EACH_VEC_ELT (m_chains, i, chain)
721 release_chain (chain);
722}
723
724/* Frees list of components COMPS. */
725
726static void
727release_components (struct component *comps)
728{
729 struct component *act, *next;
730
731 for (act = comps; act; act = next)
732 {
733 next = act->next;
734 delete act;
735 }
736}
737
738/* Finds a root of tree given by FATHERS containing A, and performs path
739 shortening. */
740
741static unsigned
742component_of (vec<unsigned> &fathers, unsigned a)
743{
744 unsigned root, n;
745
746 for (root = a; root != fathers[root]; root = fathers[root])
747 continue;
748
749 for (; a != root; a = n)
750 {
751 n = fathers[a];
752 fathers[a] = root;
753 }
754
755 return root;
756}
757
758/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
759 components, A and B are components to merge. */
760
761static void
762merge_comps (vec<unsigned> &fathers, vec<unsigned> &sizes,
763 unsigned a, unsigned b)
764{
765 unsigned ca = component_of (fathers, a);
766 unsigned cb = component_of (fathers, a: b);
767
768 if (ca == cb)
769 return;
770
771 if (sizes[ca] < sizes[cb])
772 {
773 sizes[cb] += sizes[ca];
774 fathers[ca] = cb;
775 }
776 else
777 {
778 sizes[ca] += sizes[cb];
779 fathers[cb] = ca;
780 }
781}
782
783/* Returns true if A is a reference that is suitable for predictive commoning
784 in the innermost loop that contains it. REF_STEP is set according to the
785 step of the reference A. */
786
787static bool
788suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
789{
790 tree ref = DR_REF (a), step = DR_STEP (a);
791
792 if (!step
793 || TREE_THIS_VOLATILE (ref)
794 || !is_gimple_reg_type (TREE_TYPE (ref))
795 || tree_could_throw_p (ref))
796 return false;
797
798 if (integer_zerop (step))
799 *ref_step = RS_INVARIANT;
800 else if (integer_nonzerop (step))
801 *ref_step = RS_NONZERO;
802 else
803 *ref_step = RS_ANY;
804
805 return true;
806}
807
808/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
809
810void
811pcom_worker::aff_combination_dr_offset (struct data_reference *dr,
812 aff_tree *offset)
813{
814 tree type = TREE_TYPE (DR_OFFSET (dr));
815 aff_tree delta;
816
817 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &m_cache);
818 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
819 aff_combination_add (offset, &delta);
820}
821
822/* Determines number of iterations of the innermost enclosing loop before B
823 refers to exactly the same location as A and stores it to OFF. If A and
824 B do not have the same step, they never meet, or anything else fails,
825 returns false, otherwise returns true. Both A and B are assumed to
826 satisfy suitable_reference_p. */
827
828bool
829pcom_worker::determine_offset (struct data_reference *a,
830 struct data_reference *b, poly_widest_int *off)
831{
832 aff_tree diff, baseb, step;
833 tree typea, typeb;
834
835 /* Check that both the references access the location in the same type. */
836 typea = TREE_TYPE (DR_REF (a));
837 typeb = TREE_TYPE (DR_REF (b));
838 if (!useless_type_conversion_p (typeb, typea))
839 return false;
840
841 /* Check whether the base address and the step of both references is the
842 same. */
843 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), flags: 0)
844 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), flags: 0))
845 return false;
846
847 if (integer_zerop (DR_STEP (a)))
848 {
849 /* If the references have loop invariant address, check that they access
850 exactly the same location. */
851 *off = 0;
852 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), flags: 0)
853 && operand_equal_p (DR_INIT (a), DR_INIT (b), flags: 0));
854 }
855
856 /* Compare the offsets of the addresses, and check whether the difference
857 is a multiple of step. */
858 aff_combination_dr_offset (dr: a, offset: &diff);
859 aff_combination_dr_offset (dr: b, offset: &baseb);
860 aff_combination_scale (&baseb, -1);
861 aff_combination_add (&diff, &baseb);
862
863 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
864 &step, &m_cache);
865 return aff_combination_constant_multiple_p (&diff, &step, off);
866}
867
868/* Returns the last basic block in LOOP for that we are sure that
869 it is executed whenever the loop is entered. */
870
871static basic_block
872last_always_executed_block (class loop *loop)
873{
874 unsigned i;
875 auto_vec<edge> exits = get_loop_exit_edges (loop);
876 edge ex;
877 basic_block last = loop->latch;
878
879 FOR_EACH_VEC_ELT (exits, i, ex)
880 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
881
882 return last;
883}
884
885/* Splits dependence graph on DATAREFS described by DEPENDENCES to
886 components. */
887
888struct component *
889pcom_worker::split_data_refs_to_components ()
890{
891 unsigned i, n = m_datarefs.length ();
892 unsigned ca, ia, ib, bad;
893 struct data_reference *dr, *dra, *drb;
894 struct data_dependence_relation *ddr;
895 struct component *comp_list = NULL, *comp;
896 dref dataref;
897 /* Don't do store elimination if loop has multiple exit edges. */
898 bool eliminate_store_p = single_exit (m_loop) != NULL;
899 basic_block last_always_executed = last_always_executed_block (loop: m_loop);
900 auto_bitmap no_store_store_comps;
901 auto_vec<unsigned> comp_father (n + 1);
902 auto_vec<unsigned> comp_size (n + 1);
903 comp_father.quick_grow (len: n + 1);
904 comp_size.quick_grow (len: n + 1);
905
906 FOR_EACH_VEC_ELT (m_datarefs, i, dr)
907 {
908 if (!DR_REF (dr))
909 /* A fake reference for call or asm_expr that may clobber memory;
910 just fail. */
911 return NULL;
912 /* predcom pass isn't prepared to handle calls with data references. */
913 if (is_gimple_call (DR_STMT (dr)))
914 return NULL;
915 dr->aux = (void *) (size_t) i;
916 comp_father[i] = i;
917 comp_size[i] = 1;
918 }
919
920 /* A component reserved for the "bad" data references. */
921 comp_father[n] = n;
922 comp_size[n] = 1;
923
924 FOR_EACH_VEC_ELT (m_datarefs, i, dr)
925 {
926 enum ref_step_type dummy;
927
928 if (!suitable_reference_p (a: dr, ref_step: &dummy))
929 {
930 ia = (unsigned) (size_t) dr->aux;
931 merge_comps (fathers&: comp_father, sizes&: comp_size, a: n, b: ia);
932 }
933 }
934
935 FOR_EACH_VEC_ELT (m_dependences, i, ddr)
936 {
937 poly_widest_int dummy_off;
938
939 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
940 continue;
941
942 dra = DDR_A (ddr);
943 drb = DDR_B (ddr);
944
945 /* Don't do store elimination if there is any unknown dependence for
946 any store data reference. */
947 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
948 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
949 || DDR_NUM_DIST_VECTS (ddr) == 0))
950 eliminate_store_p = false;
951
952 ia = component_of (fathers&: comp_father, a: (unsigned) (size_t) dra->aux);
953 ib = component_of (fathers&: comp_father, a: (unsigned) (size_t) drb->aux);
954 if (ia == ib)
955 continue;
956
957 bad = component_of (fathers&: comp_father, a: n);
958
959 /* If both A and B are reads, we may ignore unsuitable dependences. */
960 if (DR_IS_READ (dra) && DR_IS_READ (drb))
961 {
962 if (ia == bad || ib == bad
963 || !determine_offset (a: dra, b: drb, off: &dummy_off))
964 continue;
965 }
966 /* If A is read and B write or vice versa and there is unsuitable
967 dependence, instead of merging both components into a component
968 that will certainly not pass suitable_component_p, just put the
969 read into bad component, perhaps at least the write together with
970 all the other data refs in it's component will be optimizable. */
971 else if (DR_IS_READ (dra) && ib != bad)
972 {
973 if (ia == bad)
974 {
975 bitmap_set_bit (no_store_store_comps, ib);
976 continue;
977 }
978 else if (!determine_offset (a: dra, b: drb, off: &dummy_off))
979 {
980 bitmap_set_bit (no_store_store_comps, ib);
981 merge_comps (fathers&: comp_father, sizes&: comp_size, a: bad, b: ia);
982 continue;
983 }
984 }
985 else if (DR_IS_READ (drb) && ia != bad)
986 {
987 if (ib == bad)
988 {
989 bitmap_set_bit (no_store_store_comps, ia);
990 continue;
991 }
992 else if (!determine_offset (a: dra, b: drb, off: &dummy_off))
993 {
994 bitmap_set_bit (no_store_store_comps, ia);
995 merge_comps (fathers&: comp_father, sizes&: comp_size, a: bad, b: ib);
996 continue;
997 }
998 }
999 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
1000 && ia != bad && ib != bad
1001 && !determine_offset (a: dra, b: drb, off: &dummy_off))
1002 {
1003 merge_comps (fathers&: comp_father, sizes&: comp_size, a: bad, b: ia);
1004 merge_comps (fathers&: comp_father, sizes&: comp_size, a: bad, b: ib);
1005 continue;
1006 }
1007
1008 merge_comps (fathers&: comp_father, sizes&: comp_size, a: ia, b: ib);
1009 }
1010
1011 if (eliminate_store_p)
1012 {
1013 tree niters = number_of_latch_executions (m_loop);
1014
1015 /* Don't do store elimination if niters info is unknown because stores
1016 in the last iteration can't be eliminated and we need to recover it
1017 after loop. */
1018 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
1019 }
1020
1021 auto_vec<struct component *> comps;
1022 comps.safe_grow_cleared (len: n, exact: true);
1023 bad = component_of (fathers&: comp_father, a: n);
1024 FOR_EACH_VEC_ELT (m_datarefs, i, dr)
1025 {
1026 ia = (unsigned) (size_t) dr->aux;
1027 ca = component_of (fathers&: comp_father, a: ia);
1028 if (ca == bad)
1029 continue;
1030
1031 comp = comps[ca];
1032 if (!comp)
1033 {
1034 comp = new component (eliminate_store_p);
1035 comp->refs.reserve_exact (nelems: comp_size[ca]);
1036 comps[ca] = comp;
1037 }
1038
1039 dataref = XCNEW (class dref_d);
1040 dataref->ref = dr;
1041 dataref->stmt = DR_STMT (dr);
1042 dataref->offset = 0;
1043 dataref->distance = 0;
1044
1045 dataref->always_accessed
1046 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
1047 gimple_bb (g: dataref->stmt));
1048 dataref->pos = comp->refs.length ();
1049 comp->refs.quick_push (obj: dataref);
1050 }
1051
1052 if (eliminate_store_p)
1053 {
1054 bitmap_iterator bi;
1055 EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
1056 {
1057 ca = component_of (fathers&: comp_father, a: ia);
1058 if (ca != bad)
1059 comps[ca]->eliminate_store_p = false;
1060 }
1061 }
1062
1063 for (i = 0; i < n; i++)
1064 {
1065 comp = comps[i];
1066 if (comp)
1067 {
1068 comp->next = comp_list;
1069 comp_list = comp;
1070 }
1071 }
1072 return comp_list;
1073}
1074
1075/* Returns true if the component COMP satisfies the conditions
1076 described in 2) at the beginning of this file. */
1077
1078bool
1079pcom_worker::suitable_component_p (struct component *comp)
1080{
1081 unsigned i;
1082 dref a, first;
1083 basic_block ba, bp = m_loop->header;
1084 bool ok, has_write = false;
1085
1086 FOR_EACH_VEC_ELT (comp->refs, i, a)
1087 {
1088 ba = gimple_bb (g: a->stmt);
1089
1090 if (!just_once_each_iteration_p (m_loop, ba))
1091 return false;
1092
1093 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
1094 bp = ba;
1095
1096 if (DR_IS_WRITE (a->ref))
1097 has_write = true;
1098 }
1099
1100 first = comp->refs[0];
1101 ok = suitable_reference_p (a: first->ref, ref_step: &comp->comp_step);
1102 gcc_assert (ok);
1103 first->offset = 0;
1104
1105 FOR_EACH_VEC_ELT (comp->refs, i, a)
1106 {
1107 if (has_write && comp->comp_step == RS_NONZERO)
1108 {
1109 /* Punt for non-invariant references where step isn't a multiple
1110 of reference size. If step is smaller than reference size,
1111 it overlaps the access in next iteration, if step is larger,
1112 but not multiple of the access size, there could be overlap
1113 in some later iteration. There might be more latent issues
1114 about this in predcom or data reference analysis. If the
1115 reference is a COMPONENT_REF, also check if step isn't a
1116 multiple of the containg aggregate size. See PR111683. */
1117 tree ref = DR_REF (a->ref);
1118 tree step = DR_STEP (a->ref);
1119 if (TREE_CODE (ref) == COMPONENT_REF
1120 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1121 ref = TREE_OPERAND (ref, 0);
1122 do
1123 {
1124 tree sz = TYPE_SIZE_UNIT (TREE_TYPE (ref));
1125 if (TREE_CODE (sz) != INTEGER_CST)
1126 return false;
1127 if (wi::multiple_of_p (x: wi::to_offset (t: step),
1128 y: wi::to_offset (t: sz), sgn: SIGNED))
1129 break;
1130 if (TREE_CODE (ref) != COMPONENT_REF)
1131 return false;
1132 ref = TREE_OPERAND (ref, 0);
1133 }
1134 while (1);
1135 }
1136 if (i == 0)
1137 continue;
1138 /* Polynomial offsets are no use, since we need to know the
1139 gap between iteration numbers at compile time. */
1140 poly_widest_int offset;
1141 if (!determine_offset (a: first->ref, b: a->ref, off: &offset)
1142 || !offset.is_constant (const_value: &a->offset))
1143 return false;
1144
1145 enum ref_step_type a_step;
1146 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
1147 && a_step == comp->comp_step);
1148 }
1149
1150 /* If there is a write inside the component, we must know whether the
1151 step is nonzero or not -- we would not otherwise be able to recognize
1152 whether the value accessed by reads comes from the OFFSET-th iteration
1153 or the previous one. */
1154 if (has_write && comp->comp_step == RS_ANY)
1155 return false;
1156
1157 return true;
1158}
1159
1160/* Check the conditions on references inside each of components COMPS,
1161 and remove the unsuitable components from the list. The new list
1162 of components is returned. The conditions are described in 2) at
1163 the beginning of this file. */
1164
1165struct component *
1166pcom_worker::filter_suitable_components (struct component *comps)
1167{
1168 struct component **comp, *act;
1169
1170 for (comp = &comps; *comp; )
1171 {
1172 act = *comp;
1173 if (suitable_component_p (comp: act))
1174 comp = &act->next;
1175 else
1176 {
1177 dref ref;
1178 unsigned i;
1179
1180 *comp = act->next;
1181 FOR_EACH_VEC_ELT (act->refs, i, ref)
1182 free (ptr: ref);
1183 delete act;
1184 }
1185 }
1186
1187 return comps;
1188}
1189
1190/* Compares two drefs A and B by their offset and position. Callback for
1191 qsort. */
1192
1193static int
1194order_drefs (const void *a, const void *b)
1195{
1196 const dref *const da = (const dref *) a;
1197 const dref *const db = (const dref *) b;
1198 int offcmp = wi::cmps (x: (*da)->offset, y: (*db)->offset);
1199
1200 if (offcmp != 0)
1201 return offcmp;
1202
1203 return (*da)->pos - (*db)->pos;
1204}
1205
1206/* Compares two drefs A and B by their position. Callback for qsort. */
1207
1208static int
1209order_drefs_by_pos (const void *a, const void *b)
1210{
1211 const dref *const da = (const dref *) a;
1212 const dref *const db = (const dref *) b;
1213
1214 return (*da)->pos - (*db)->pos;
1215}
1216
1217/* Returns root of the CHAIN. */
1218
1219static inline dref
1220get_chain_root (chain_p chain)
1221{
1222 return chain->refs[0];
1223}
1224
1225/* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1226 exist. */
1227
1228static inline dref
1229get_chain_last_write_at (chain_p chain, unsigned distance)
1230{
1231 for (unsigned i = chain->refs.length (); i > 0; i--)
1232 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1233 && distance == chain->refs[i - 1]->distance)
1234 return chain->refs[i - 1];
1235
1236 return NULL;
1237}
1238
1239/* Given CHAIN, returns the last write ref with the same distance before load
1240 at index LOAD_IDX, or NULL if it doesn't exist. */
1241
1242static inline dref
1243get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1244{
1245 gcc_assert (load_idx < chain->refs.length ());
1246
1247 unsigned distance = chain->refs[load_idx]->distance;
1248
1249 for (unsigned i = load_idx; i > 0; i--)
1250 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1251 && distance == chain->refs[i - 1]->distance)
1252 return chain->refs[i - 1];
1253
1254 return NULL;
1255}
1256
1257/* Adds REF to the chain CHAIN. */
1258
1259static void
1260add_ref_to_chain (chain_p chain, dref ref)
1261{
1262 dref root = get_chain_root (chain);
1263
1264 gcc_assert (wi::les_p (root->offset, ref->offset));
1265 widest_int dist = ref->offset - root->offset;
1266 gcc_assert (wi::fits_uhwi_p (dist));
1267
1268 chain->refs.safe_push (obj: ref);
1269
1270 ref->distance = dist.to_uhwi ();
1271
1272 if (ref->distance >= chain->length)
1273 {
1274 chain->length = ref->distance;
1275 chain->has_max_use_after = false;
1276 }
1277
1278 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1279 if (DR_IS_WRITE (ref->ref))
1280 chain->type = CT_STORE_STORE;
1281
1282 /* Don't set the flag for store-store chain since there is no use. */
1283 if (chain->type != CT_STORE_STORE
1284 && ref->distance == chain->length
1285 && ref->pos > root->pos)
1286 chain->has_max_use_after = true;
1287
1288 chain->all_always_accessed &= ref->always_accessed;
1289}
1290
1291/* Returns the chain for invariant component COMP. */
1292
1293static chain_p
1294make_invariant_chain (struct component *comp)
1295{
1296 chain_p chain = new struct chain (CT_INVARIANT);
1297 unsigned i;
1298 dref ref;
1299
1300 chain->all_always_accessed = true;
1301
1302 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1303 {
1304 chain->refs.safe_push (obj: ref);
1305 chain->all_always_accessed &= ref->always_accessed;
1306 }
1307
1308 chain->inits = vNULL;
1309 chain->finis = vNULL;
1310
1311 return chain;
1312}
1313
1314/* Make a new chain of type TYPE rooted at REF. */
1315
1316static chain_p
1317make_rooted_chain (dref ref, enum chain_type type)
1318{
1319 chain_p chain = new struct chain (type);
1320
1321 chain->refs.safe_push (obj: ref);
1322 chain->all_always_accessed = ref->always_accessed;
1323 ref->distance = 0;
1324
1325 chain->inits = vNULL;
1326 chain->finis = vNULL;
1327
1328 return chain;
1329}
1330
1331/* Returns true if CHAIN is not trivial. */
1332
1333static bool
1334nontrivial_chain_p (chain_p chain)
1335{
1336 return chain != NULL && chain->refs.length () > 1;
1337}
1338
1339/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1340 is no such name. */
1341
1342static tree
1343name_for_ref (dref ref)
1344{
1345 tree name;
1346
1347 if (is_gimple_assign (gs: ref->stmt))
1348 {
1349 if (!ref->ref || DR_IS_READ (ref->ref))
1350 name = gimple_assign_lhs (gs: ref->stmt);
1351 else
1352 name = gimple_assign_rhs1 (gs: ref->stmt);
1353 }
1354 else
1355 name = PHI_RESULT (ref->stmt);
1356
1357 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1358}
1359
1360/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1361 iterations of the innermost enclosing loop). */
1362
1363bool
1364pcom_worker::valid_initializer_p (struct data_reference *ref, unsigned distance,
1365 struct data_reference *root)
1366{
1367 aff_tree diff, base, step;
1368 poly_widest_int off;
1369
1370 /* Both REF and ROOT must be accessing the same object. */
1371 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), flags: 0))
1372 return false;
1373
1374 /* The initializer is defined outside of loop, hence its address must be
1375 invariant inside the loop. */
1376 gcc_assert (integer_zerop (DR_STEP (ref)));
1377
1378 /* If the address of the reference is invariant, initializer must access
1379 exactly the same location. */
1380 if (integer_zerop (DR_STEP (root)))
1381 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), flags: 0)
1382 && operand_equal_p (DR_INIT (ref), DR_INIT (root), flags: 0));
1383
1384 /* Verify that this index of REF is equal to the root's index at
1385 -DISTANCE-th iteration. */
1386 aff_combination_dr_offset (dr: root, offset: &diff);
1387 aff_combination_dr_offset (dr: ref, offset: &base);
1388 aff_combination_scale (&base, -1);
1389 aff_combination_add (&diff, &base);
1390
1391 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1392 &step, &m_cache);
1393 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1394 return false;
1395
1396 if (maybe_ne (a: off, b: distance))
1397 return false;
1398
1399 return true;
1400}
1401
1402/* Finds looparound phi node of loop that copies the value of REF, and if its
1403 initial value is correct (equal to initial value of REF shifted by one
1404 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1405 is the root of the current chain. */
1406
1407gphi *
1408pcom_worker::find_looparound_phi (dref ref, dref root)
1409{
1410 tree name, init, init_ref;
1411 gimple *init_stmt;
1412 edge latch = loop_latch_edge (m_loop);
1413 struct data_reference init_dr;
1414 gphi_iterator psi;
1415
1416 if (is_gimple_assign (gs: ref->stmt))
1417 {
1418 if (DR_IS_READ (ref->ref))
1419 name = gimple_assign_lhs (gs: ref->stmt);
1420 else
1421 name = gimple_assign_rhs1 (gs: ref->stmt);
1422 }
1423 else
1424 name = PHI_RESULT (ref->stmt);
1425 if (!name)
1426 return NULL;
1427
1428 tree entry_vuse = NULL_TREE;
1429 gphi *phi = NULL;
1430 for (psi = gsi_start_phis (m_loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1431 {
1432 gphi *p = psi.phi ();
1433 if (PHI_ARG_DEF_FROM_EDGE (p, latch) == name)
1434 phi = p;
1435 else if (virtual_operand_p (op: gimple_phi_result (gs: p)))
1436 entry_vuse = PHI_ARG_DEF_FROM_EDGE (p, loop_preheader_edge (m_loop));
1437 if (phi && entry_vuse)
1438 break;
1439 }
1440 if (!phi || !entry_vuse)
1441 return NULL;
1442
1443 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
1444 if (TREE_CODE (init) != SSA_NAME)
1445 return NULL;
1446 init_stmt = SSA_NAME_DEF_STMT (init);
1447 if (gimple_code (g: init_stmt) != GIMPLE_ASSIGN)
1448 return NULL;
1449 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1450
1451 init_ref = gimple_assign_rhs1 (gs: init_stmt);
1452 if (!REFERENCE_CLASS_P (init_ref)
1453 && !DECL_P (init_ref))
1454 return NULL;
1455
1456 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1457 loop enclosing PHI). */
1458 memset (s: &init_dr, c: 0, n: sizeof (struct data_reference));
1459 DR_REF (&init_dr) = init_ref;
1460 DR_STMT (&init_dr) = phi;
1461 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, m_loop,
1462 init_stmt))
1463 return NULL;
1464
1465 if (!valid_initializer_p (ref: &init_dr, distance: ref->distance + 1, root: root->ref))
1466 return NULL;
1467
1468 /* Make sure nothing clobbers the location we re-use the initial value
1469 from. */
1470 if (entry_vuse != gimple_vuse (g: init_stmt))
1471 {
1472 ao_ref ref;
1473 ao_ref_init (&ref, init_ref);
1474 unsigned limit = param_sccvn_max_alias_queries_per_access;
1475 tree vdef = entry_vuse;
1476 do
1477 {
1478 gimple *def = SSA_NAME_DEF_STMT (vdef);
1479 if (limit-- == 0 || gimple_code (g: def) == GIMPLE_PHI)
1480 return NULL;
1481 if (stmt_may_clobber_ref_p_1 (def, &ref))
1482 /* When the stmt is an assign to init_ref we could in theory
1483 use its RHS for the initial value of the looparound PHI
1484 we replace in prepare_initializers_chain, but we have
1485 no convenient place to store this info at the moment. */
1486 return NULL;
1487 vdef = gimple_vuse (g: def);
1488 }
1489 while (vdef != gimple_vuse (g: init_stmt));
1490 }
1491
1492 return phi;
1493}
1494
1495/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1496
1497static void
1498insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1499{
1500 dref nw = XCNEW (class dref_d), aref;
1501 unsigned i;
1502
1503 nw->stmt = phi;
1504 nw->distance = ref->distance + 1;
1505 nw->always_accessed = 1;
1506
1507 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1508 if (aref->distance >= nw->distance)
1509 break;
1510 chain->refs.safe_insert (ix: i, obj: nw);
1511
1512 if (nw->distance > chain->length)
1513 {
1514 chain->length = nw->distance;
1515 chain->has_max_use_after = false;
1516 }
1517}
1518
1519/* For references in CHAIN that are copied around the loop (created previously
1520 by PRE, or by user), add the results of such copies to the chain. This
1521 enables us to remove the copies by unrolling, and may need less registers
1522 (also, it may allow us to combine chains together). */
1523
1524void
1525pcom_worker::add_looparound_copies (chain_p chain)
1526{
1527 unsigned i;
1528 dref ref, root = get_chain_root (chain);
1529 gphi *phi;
1530
1531 if (chain->type == CT_STORE_STORE)
1532 return;
1533
1534 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1535 {
1536 phi = find_looparound_phi (ref, root);
1537 if (!phi)
1538 continue;
1539
1540 bitmap_set_bit (m_looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1541 insert_looparound_copy (chain, ref, phi);
1542 }
1543}
1544
1545/* Find roots of the values and determine distances in the component COMP.
1546 The references are redistributed into chains. */
1547
1548void
1549pcom_worker::determine_roots_comp (struct component *comp)
1550{
1551 unsigned i;
1552 dref a;
1553 chain_p chain = NULL;
1554 widest_int last_ofs = 0;
1555 enum chain_type type;
1556
1557 /* Invariants are handled specially. */
1558 if (comp->comp_step == RS_INVARIANT)
1559 {
1560 chain = make_invariant_chain (comp);
1561 m_chains.safe_push (obj: chain);
1562 return;
1563 }
1564
1565 /* Trivial component. */
1566 if (comp->refs.length () <= 1)
1567 {
1568 if (comp->refs.length () == 1)
1569 {
1570 free (ptr: comp->refs[0]);
1571 comp->refs.truncate (size: 0);
1572 }
1573 return;
1574 }
1575
1576 comp->refs.qsort (order_drefs);
1577
1578 /* For Store-Store chain, we only support load if it is dominated by a
1579 store statement in the same iteration of loop. */
1580 if (comp->eliminate_store_p)
1581 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1582 {
1583 if (DR_IS_WRITE (comp->refs[i]->ref))
1584 a = comp->refs[i];
1585 else if (a == NULL || a->offset != comp->refs[i]->offset)
1586 {
1587 /* If there is load that is not dominated by a store in the
1588 same iteration of loop, clear the flag so no Store-Store
1589 chain is generated for this component. */
1590 comp->eliminate_store_p = false;
1591 break;
1592 }
1593 }
1594
1595 /* Determine roots and create chains for components. */
1596 FOR_EACH_VEC_ELT (comp->refs, i, a)
1597 {
1598 if (!chain
1599 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1600 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1601 || wi::leu_p (MAX_DISTANCE, y: a->offset - last_ofs))
1602 {
1603 if (nontrivial_chain_p (chain))
1604 {
1605 add_looparound_copies (chain);
1606 m_chains.safe_push (obj: chain);
1607 }
1608 else
1609 release_chain (chain);
1610
1611 /* Determine type of the chain. If the root reference is a load,
1612 this can only be a CT_LOAD chain; other chains are intialized
1613 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1614 new reference is added. */
1615 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1616 chain = make_rooted_chain (ref: a, type);
1617 last_ofs = a->offset;
1618 continue;
1619 }
1620
1621 add_ref_to_chain (chain, ref: a);
1622 }
1623
1624 if (nontrivial_chain_p (chain))
1625 {
1626 add_looparound_copies (chain);
1627 m_chains.safe_push (obj: chain);
1628 }
1629 else
1630 release_chain (chain);
1631}
1632
1633/* Find roots of the values and determine distances in components COMPS, and
1634 separates the references to chains. */
1635
1636void
1637pcom_worker::determine_roots (struct component *comps)
1638{
1639 struct component *comp;
1640
1641 for (comp = comps; comp; comp = comp->next)
1642 determine_roots_comp (comp);
1643}
1644
1645/* Replace the reference in statement STMT with temporary variable
1646 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1647 the reference in the statement. IN_LHS is true if the reference
1648 is in the lhs of STMT, false if it is in rhs. */
1649
1650static void
1651replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1652{
1653 tree val;
1654 gassign *new_stmt;
1655 gimple_stmt_iterator bsi, psi;
1656
1657 if (gimple_code (g: stmt) == GIMPLE_PHI)
1658 {
1659 gcc_assert (!in_lhs && !set);
1660
1661 val = PHI_RESULT (stmt);
1662 bsi = gsi_after_labels (bb: gimple_bb (g: stmt));
1663 psi = gsi_for_stmt (stmt);
1664 remove_phi_node (&psi, false);
1665
1666 /* Turn the phi node into GIMPLE_ASSIGN. */
1667 new_stmt = gimple_build_assign (val, new_tree);
1668 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1669 return;
1670 }
1671
1672 /* Since the reference is of gimple_reg type, it should only
1673 appear as lhs or rhs of modify statement. */
1674 gcc_assert (is_gimple_assign (stmt));
1675
1676 bsi = gsi_for_stmt (stmt);
1677
1678 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1679 if (!set)
1680 {
1681 gcc_assert (!in_lhs);
1682 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1683 stmt = gsi_stmt (i: bsi);
1684 update_stmt (s: stmt);
1685 return;
1686 }
1687
1688 if (in_lhs)
1689 {
1690 /* We have statement
1691
1692 OLD = VAL
1693
1694 If OLD is a memory reference, then VAL is gimple_val, and we transform
1695 this to
1696
1697 OLD = VAL
1698 NEW = VAL
1699
1700 Otherwise, we are replacing a combination chain,
1701 VAL is the expression that performs the combination, and OLD is an
1702 SSA name. In this case, we transform the assignment to
1703
1704 OLD = VAL
1705 NEW = OLD
1706
1707 */
1708
1709 val = gimple_assign_lhs (gs: stmt);
1710 if (TREE_CODE (val) != SSA_NAME)
1711 {
1712 val = gimple_assign_rhs1 (gs: stmt);
1713 gcc_assert (gimple_assign_single_p (stmt));
1714 if (TREE_CLOBBER_P (val))
1715 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1716 else
1717 gcc_assert (gimple_assign_copy_p (stmt));
1718 }
1719 }
1720 else
1721 {
1722 /* VAL = OLD
1723
1724 is transformed to
1725
1726 VAL = OLD
1727 NEW = VAL */
1728
1729 val = gimple_assign_lhs (gs: stmt);
1730 }
1731
1732 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1733 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1734}
1735
1736/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1737 of the loop it was analyzed in. Append init stmts to STMTS. */
1738
1739static tree
1740ref_at_iteration (data_reference_p dr, int iter,
1741 gimple_seq *stmts, tree niters = NULL_TREE)
1742{
1743 tree off = DR_OFFSET (dr);
1744 tree coff = DR_INIT (dr);
1745 tree ref = DR_REF (dr);
1746 enum tree_code ref_code = ERROR_MARK;
1747 tree ref_type = NULL_TREE;
1748 tree ref_op1 = NULL_TREE;
1749 tree ref_op2 = NULL_TREE;
1750 tree new_offset;
1751
1752 if (iter != 0)
1753 {
1754 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1755 if (TREE_CODE (new_offset) == INTEGER_CST)
1756 coff = size_binop (PLUS_EXPR, coff, new_offset);
1757 else
1758 off = size_binop (PLUS_EXPR, off, new_offset);
1759 }
1760
1761 if (niters != NULL_TREE)
1762 {
1763 niters = fold_convert (ssizetype, niters);
1764 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1765 if (TREE_CODE (niters) == INTEGER_CST)
1766 coff = size_binop (PLUS_EXPR, coff, new_offset);
1767 else
1768 off = size_binop (PLUS_EXPR, off, new_offset);
1769 }
1770
1771 /* While data-ref analysis punts on bit offsets it still handles
1772 bitfield accesses at byte boundaries. Cope with that. Note that
1773 if the bitfield object also starts at a byte-boundary we can simply
1774 replicate the COMPONENT_REF, but we have to subtract the component's
1775 byte-offset from the MEM_REF address first.
1776 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1777 start at offset zero. */
1778 if (TREE_CODE (ref) == COMPONENT_REF
1779 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1780 {
1781 unsigned HOST_WIDE_INT boff;
1782 tree field = TREE_OPERAND (ref, 1);
1783 tree offset = component_ref_field_offset (ref);
1784 ref_type = TREE_TYPE (ref);
1785 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1786 /* This can occur in Ada. See the comment in get_bit_range. */
1787 if (boff % BITS_PER_UNIT != 0
1788 || !tree_fits_uhwi_p (offset))
1789 {
1790 ref_code = BIT_FIELD_REF;
1791 ref_op1 = DECL_SIZE (field);
1792 ref_op2 = bitsize_zero_node;
1793 }
1794 else
1795 {
1796 boff >>= LOG2_BITS_PER_UNIT;
1797 boff += tree_to_uhwi (offset);
1798 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1799 ref_code = COMPONENT_REF;
1800 ref_op1 = field;
1801 ref_op2 = TREE_OPERAND (ref, 2);
1802 ref = TREE_OPERAND (ref, 0);
1803 }
1804 }
1805 /* We may not associate the constant offset across the pointer plus
1806 expression because that might form a pointer to before the object
1807 then. But for some cases we can retain that to allow tree_could_trap_p
1808 to return false - see gcc.dg/tree-ssa/predcom-1.c */
1809 tree addr, alias_ptr;
1810 if (integer_zerop (off))
1811 {
1812 alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1813 addr = DR_BASE_ADDRESS (dr);
1814 }
1815 else
1816 {
1817 alias_ptr = build_zero_cst (reference_alias_ptr_type (ref));
1818 off = size_binop (PLUS_EXPR, off, coff);
1819 addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1820 }
1821 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1822 is_gimple_mem_ref_addr, NULL_TREE);
1823 tree type = build_aligned_type (TREE_TYPE (ref),
1824 get_object_alignment (ref));
1825 ref = build2 (MEM_REF, type, addr, alias_ptr);
1826 if (ref_type)
1827 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1828 return ref;
1829}
1830
1831/* Get the initialization expression for the INDEX-th temporary variable
1832 of CHAIN. */
1833
1834static tree
1835get_init_expr (chain_p chain, unsigned index)
1836{
1837 if (chain->type == CT_COMBINATION)
1838 {
1839 tree e1 = get_init_expr (chain: chain->ch1, index);
1840 tree e2 = get_init_expr (chain: chain->ch2, index);
1841
1842 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1843 }
1844 else
1845 return chain->inits[index];
1846}
1847
1848/* Returns a new temporary variable used for the I-th variable carrying
1849 value of REF. The variable's uid is marked in TMP_VARS. */
1850
1851static tree
1852predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1853{
1854 tree type = TREE_TYPE (ref);
1855 /* We never access the components of the temporary variable in predictive
1856 commoning. */
1857 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, n: i));
1858 bitmap_set_bit (tmp_vars, DECL_UID (var));
1859 return var;
1860}
1861
1862/* Creates the variables for CHAIN, as well as phi nodes for them and
1863 initialization on entry to LOOP. Uids of the newly created
1864 temporary variables are marked in TMP_VARS. */
1865
1866static void
1867initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1868{
1869 unsigned i;
1870 unsigned n = chain->length;
1871 dref root = get_chain_root (chain);
1872 bool reuse_first = !chain->has_max_use_after;
1873 tree ref, init, var, next;
1874 gphi *phi;
1875 gimple_seq stmts;
1876 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1877
1878 /* If N == 0, then all the references are within the single iteration. And
1879 since this is an nonempty chain, reuse_first cannot be true. */
1880 gcc_assert (n > 0 || !reuse_first);
1881
1882 chain->vars.create (nelems: n + 1);
1883
1884 if (chain->type == CT_COMBINATION)
1885 ref = gimple_assign_lhs (gs: root->stmt);
1886 else
1887 ref = DR_REF (root->ref);
1888
1889 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1890 {
1891 var = predcom_tmp_var (ref, i, tmp_vars);
1892 chain->vars.quick_push (obj: var);
1893 }
1894 if (reuse_first)
1895 chain->vars.quick_push (obj: chain->vars[0]);
1896
1897 FOR_EACH_VEC_ELT (chain->vars, i, var)
1898 chain->vars[i] = make_ssa_name (var);
1899
1900 for (i = 0; i < n; i++)
1901 {
1902 var = chain->vars[i];
1903 next = chain->vars[i + 1];
1904 init = get_init_expr (chain, index: i);
1905
1906 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1907 if (stmts)
1908 gsi_insert_seq_on_edge_immediate (entry, stmts);
1909
1910 phi = create_phi_node (var, loop->header);
1911 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1912 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1913 }
1914}
1915
1916/* For inter-iteration store elimination CHAIN in LOOP, returns true if
1917 all stores to be eliminated store loop invariant values into memory.
1918 In this case, we can use these invariant values directly after LOOP. */
1919
1920static bool
1921is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1922{
1923 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1924 return false;
1925
1926 gcc_assert (!chain->has_max_use_after);
1927
1928 /* If loop iterates for unknown times or fewer times than chain->length,
1929 we still need to setup root variable and propagate it with PHI node. */
1930 tree niters = number_of_latch_executions (loop);
1931 if (TREE_CODE (niters) != INTEGER_CST
1932 || wi::leu_p (x: wi::to_wide (t: niters), y: chain->length))
1933 return false;
1934
1935 /* Check stores in chain for elimination if they only store loop invariant
1936 values. */
1937 for (unsigned i = 0; i < chain->length; i++)
1938 {
1939 dref a = get_chain_last_write_at (chain, distance: i);
1940 if (a == NULL)
1941 continue;
1942
1943 gimple *def_stmt, *stmt = a->stmt;
1944 if (!gimple_assign_single_p (gs: stmt))
1945 return false;
1946
1947 tree val = gimple_assign_rhs1 (gs: stmt);
1948 if (TREE_CLOBBER_P (val))
1949 return false;
1950
1951 if (CONSTANT_CLASS_P (val))
1952 continue;
1953
1954 if (TREE_CODE (val) != SSA_NAME)
1955 return false;
1956
1957 def_stmt = SSA_NAME_DEF_STMT (val);
1958 if (gimple_nop_p (g: def_stmt))
1959 continue;
1960
1961 if (flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
1962 return false;
1963 }
1964 return true;
1965}
1966
1967/* Creates root variables for store elimination CHAIN in which stores for
1968 elimination only store loop invariant values. In this case, we neither
1969 need to load root variables before loop nor propagate it with PHI nodes. */
1970
1971static void
1972initialize_root_vars_store_elim_1 (chain_p chain)
1973{
1974 tree var;
1975 unsigned i, n = chain->length;
1976
1977 chain->vars.create (nelems: n);
1978 chain->vars.safe_grow_cleared (len: n, exact: true);
1979
1980 /* Initialize root value for eliminated stores at each distance. */
1981 for (i = 0; i < n; i++)
1982 {
1983 dref a = get_chain_last_write_at (chain, distance: i);
1984 if (a == NULL)
1985 continue;
1986
1987 var = gimple_assign_rhs1 (gs: a->stmt);
1988 chain->vars[a->distance] = var;
1989 }
1990
1991 /* We don't propagate values with PHI nodes, so manually propagate value
1992 to bubble positions. */
1993 var = chain->vars[0];
1994 for (i = 1; i < n; i++)
1995 {
1996 if (chain->vars[i] != NULL_TREE)
1997 {
1998 var = chain->vars[i];
1999 continue;
2000 }
2001 chain->vars[i] = var;
2002 }
2003
2004 /* Revert the vector. */
2005 for (i = 0; i < n / 2; i++)
2006 std::swap (a&: chain->vars[i], b&: chain->vars[n - i - 1]);
2007}
2008
2009/* Creates root variables for store elimination CHAIN in which stores for
2010 elimination store loop variant values. In this case, we may need to
2011 load root variables before LOOP and propagate it with PHI nodes. Uids
2012 of the newly created root variables are marked in TMP_VARS. */
2013
2014static void
2015initialize_root_vars_store_elim_2 (class loop *loop,
2016 chain_p chain, bitmap tmp_vars)
2017{
2018 unsigned i, n = chain->length;
2019 tree ref, init, var, next, val, phi_result;
2020 gimple *stmt;
2021 gimple_seq stmts;
2022
2023 chain->vars.create (nelems: n);
2024
2025 ref = DR_REF (get_chain_root (chain)->ref);
2026 for (i = 0; i < n; i++)
2027 {
2028 var = predcom_tmp_var (ref, i, tmp_vars);
2029 chain->vars.quick_push (obj: var);
2030 }
2031
2032 FOR_EACH_VEC_ELT (chain->vars, i, var)
2033 chain->vars[i] = make_ssa_name (var);
2034
2035 /* Root values are either rhs operand of stores to be eliminated, or
2036 loaded from memory before loop. */
2037 auto_vec<tree> vtemps;
2038 vtemps.safe_grow_cleared (len: n, exact: true);
2039 for (i = 0; i < n; i++)
2040 {
2041 init = get_init_expr (chain, index: i);
2042 if (init == NULL_TREE)
2043 {
2044 /* Root value is rhs operand of the store to be eliminated if
2045 it isn't loaded from memory before loop. */
2046 dref a = get_chain_last_write_at (chain, distance: i);
2047 val = gimple_assign_rhs1 (gs: a->stmt);
2048 if (TREE_CLOBBER_P (val))
2049 {
2050 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
2051 gimple_assign_set_rhs1 (gs: a->stmt, rhs: val);
2052 }
2053
2054 vtemps[n - i - 1] = val;
2055 }
2056 else
2057 {
2058 edge latch = loop_latch_edge (loop);
2059 edge entry = loop_preheader_edge (loop);
2060
2061 /* Root value is loaded from memory before loop, we also need
2062 to add PHI nodes to propagate the value across iterations. */
2063 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
2064 if (stmts)
2065 gsi_insert_seq_on_edge_immediate (entry, stmts);
2066
2067 next = chain->vars[n - i];
2068 phi_result = copy_ssa_name (var: next);
2069 gphi *phi = create_phi_node (phi_result, loop->header);
2070 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2071 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2072 vtemps[n - i - 1] = phi_result;
2073 }
2074 }
2075
2076 /* Find the insertion position. */
2077 dref last = get_chain_root (chain);
2078 for (i = 0; i < chain->refs.length (); i++)
2079 {
2080 if (chain->refs[i]->pos > last->pos)
2081 last = chain->refs[i];
2082 }
2083
2084 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
2085
2086 /* Insert statements copying root value to root variable. */
2087 for (i = 0; i < n; i++)
2088 {
2089 var = chain->vars[i];
2090 val = vtemps[i];
2091 stmt = gimple_build_assign (var, val);
2092 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
2093 }
2094}
2095
2096/* Generates stores for CHAIN's eliminated stores in LOOP's last
2097 (CHAIN->length - 1) iterations. */
2098
2099static void
2100finalize_eliminated_stores (class loop *loop, chain_p chain)
2101{
2102 unsigned i, n = chain->length;
2103
2104 for (i = 0; i < n; i++)
2105 {
2106 tree var = chain->vars[i];
2107 tree fini = chain->finis[n - i - 1];
2108 gimple *stmt = gimple_build_assign (fini, var);
2109
2110 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
2111 }
2112
2113 if (chain->fini_seq)
2114 {
2115 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
2116 chain->fini_seq = NULL;
2117 }
2118}
2119
2120/* Initializes a variable for load motion for ROOT and prepares phi nodes and
2121 initialization on entry to LOOP if necessary. The ssa name for the variable
2122 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
2123 around the loop is created. Uid of the newly created temporary variable
2124 is marked in TMP_VARS. INITS is the list containing the (single)
2125 initializer. */
2126
2127static void
2128initialize_root_vars_lm (class loop *loop, dref root, bool written,
2129 vec<tree> *vars, const vec<tree> &inits,
2130 bitmap tmp_vars)
2131{
2132 unsigned i;
2133 tree ref = DR_REF (root->ref), init, var, next;
2134 gimple_seq stmts;
2135 gphi *phi;
2136 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
2137
2138 /* Find the initializer for the variable, and check that it cannot
2139 trap. */
2140 init = inits[0];
2141
2142 vars->create (nelems: written ? 2 : 1);
2143 var = predcom_tmp_var (ref, i: 0, tmp_vars);
2144 vars->quick_push (obj: var);
2145 if (written)
2146 vars->quick_push (obj: (*vars)[0]);
2147
2148 FOR_EACH_VEC_ELT (*vars, i, var)
2149 (*vars)[i] = make_ssa_name (var);
2150
2151 var = (*vars)[0];
2152
2153 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
2154 if (stmts)
2155 gsi_insert_seq_on_edge_immediate (entry, stmts);
2156
2157 if (written)
2158 {
2159 next = (*vars)[1];
2160 phi = create_phi_node (var, loop->header);
2161 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2162 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2163 }
2164 else
2165 {
2166 gassign *init_stmt = gimple_build_assign (var, init);
2167 gsi_insert_on_edge_immediate (entry, init_stmt);
2168 }
2169}
2170
2171
2172/* Execute load motion for references in chain CHAIN. Uids of the newly
2173 created temporary variables are marked in TMP_VARS. */
2174
2175static void
2176execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
2177{
2178 auto_vec<tree> vars;
2179 dref a;
2180 unsigned n_writes = 0, ridx, i;
2181 tree var;
2182
2183 gcc_assert (chain->type == CT_INVARIANT);
2184 gcc_assert (!chain->combined);
2185 FOR_EACH_VEC_ELT (chain->refs, i, a)
2186 if (DR_IS_WRITE (a->ref))
2187 n_writes++;
2188
2189 /* If there are no reads in the loop, there is nothing to do. */
2190 if (n_writes == chain->refs.length ())
2191 return;
2192
2193 initialize_root_vars_lm (loop, root: get_chain_root (chain), written: n_writes > 0,
2194 vars: &vars, inits: chain->inits, tmp_vars);
2195
2196 ridx = 0;
2197 FOR_EACH_VEC_ELT (chain->refs, i, a)
2198 {
2199 bool is_read = DR_IS_READ (a->ref);
2200
2201 if (DR_IS_WRITE (a->ref))
2202 {
2203 n_writes--;
2204 if (n_writes)
2205 {
2206 var = vars[0];
2207 var = make_ssa_name (SSA_NAME_VAR (var));
2208 vars[0] = var;
2209 }
2210 else
2211 ridx = 1;
2212 }
2213
2214 replace_ref_with (stmt: a->stmt, new_tree: vars[ridx],
2215 set: !is_read, in_lhs: !is_read);
2216 }
2217}
2218
2219/* Returns the single statement in that NAME is used, excepting
2220 the looparound phi nodes contained in one of the chains. If there is no
2221 such statement, or more statements, NULL is returned. */
2222
2223gimple *
2224pcom_worker::single_nonlooparound_use (tree name)
2225{
2226 use_operand_p use;
2227 imm_use_iterator it;
2228 gimple *stmt, *ret = NULL;
2229
2230 FOR_EACH_IMM_USE_FAST (use, it, name)
2231 {
2232 stmt = USE_STMT (use);
2233
2234 if (gimple_code (g: stmt) == GIMPLE_PHI)
2235 {
2236 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2237 could not be processed anyway, so just fail for them. */
2238 if (bitmap_bit_p (m_looparound_phis,
2239 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2240 continue;
2241
2242 return NULL;
2243 }
2244 else if (is_gimple_debug (gs: stmt))
2245 continue;
2246 else if (ret != NULL)
2247 return NULL;
2248 else
2249 ret = stmt;
2250 }
2251
2252 return ret;
2253}
2254
2255/* Remove statement STMT, as well as the chain of assignments in that it is
2256 used. */
2257
2258void
2259pcom_worker::remove_stmt (gimple *stmt)
2260{
2261 tree name;
2262 gimple *next;
2263 gimple_stmt_iterator psi;
2264
2265 if (gimple_code (g: stmt) == GIMPLE_PHI)
2266 {
2267 name = PHI_RESULT (stmt);
2268 next = single_nonlooparound_use (name);
2269 reset_debug_uses (stmt);
2270 psi = gsi_for_stmt (stmt);
2271 remove_phi_node (&psi, true);
2272
2273 if (!next
2274 || !gimple_assign_ssa_name_copy_p (next)
2275 || gimple_assign_rhs1 (gs: next) != name)
2276 return;
2277
2278 stmt = next;
2279 }
2280
2281 while (1)
2282 {
2283 gimple_stmt_iterator bsi;
2284
2285 bsi = gsi_for_stmt (stmt);
2286
2287 name = gimple_assign_lhs (gs: stmt);
2288 if (TREE_CODE (name) == SSA_NAME)
2289 {
2290 next = single_nonlooparound_use (name);
2291 reset_debug_uses (stmt);
2292 }
2293 else
2294 {
2295 /* This is a store to be eliminated. */
2296 gcc_assert (gimple_vdef (stmt) != NULL);
2297 next = NULL;
2298 }
2299
2300 unlink_stmt_vdef (stmt);
2301 gsi_remove (&bsi, true);
2302 release_defs (stmt);
2303
2304 if (!next
2305 || !gimple_assign_ssa_name_copy_p (next)
2306 || gimple_assign_rhs1 (gs: next) != name)
2307 return;
2308
2309 stmt = next;
2310 }
2311}
2312
2313/* Perform the predictive commoning optimization for a chain CHAIN.
2314 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2315
2316void
2317pcom_worker::execute_pred_commoning_chain (chain_p chain,
2318 bitmap tmp_vars)
2319{
2320 unsigned i;
2321 dref a;
2322 tree var;
2323 bool in_lhs;
2324
2325 if (chain->combined)
2326 {
2327 /* For combined chains, just remove the statements that are used to
2328 compute the values of the expression (except for the root one).
2329 We delay this until after all chains are processed. */
2330 }
2331 else if (chain->type == CT_STORE_STORE)
2332 {
2333 if (chain->length > 0)
2334 {
2335 if (chain->inv_store_elimination)
2336 {
2337 /* If dead stores in this chain only store loop invariant
2338 values, we can simply record the invariant value and use
2339 it directly after loop. */
2340 initialize_root_vars_store_elim_1 (chain);
2341 }
2342 else
2343 {
2344 /* If dead stores in this chain store loop variant values,
2345 we need to set up the variables by loading from memory
2346 before loop and propagating it with PHI nodes. */
2347 initialize_root_vars_store_elim_2 (loop: m_loop, chain, tmp_vars);
2348 }
2349
2350 /* For inter-iteration store elimination chain, stores at each
2351 distance in loop's last (chain->length - 1) iterations can't
2352 be eliminated, because there is no following killing store.
2353 We need to generate these stores after loop. */
2354 finalize_eliminated_stores (loop: m_loop, chain);
2355 }
2356
2357 bool last_store_p = true;
2358 for (i = chain->refs.length (); i > 0; i--)
2359 {
2360 a = chain->refs[i - 1];
2361 /* Preserve the last store of the chain. Eliminate other stores
2362 which are killed by the last one. */
2363 if (DR_IS_WRITE (a->ref))
2364 {
2365 if (last_store_p)
2366 last_store_p = false;
2367 else
2368 remove_stmt (stmt: a->stmt);
2369
2370 continue;
2371 }
2372
2373 /* Any load in Store-Store chain must be dominated by a previous
2374 store, we replace the load reference with rhs of the store. */
2375 dref b = get_chain_last_write_before_load (chain, load_idx: i - 1);
2376 gcc_assert (b != NULL);
2377 var = gimple_assign_rhs1 (gs: b->stmt);
2378 replace_ref_with (stmt: a->stmt, new_tree: var, set: false, in_lhs: false);
2379 }
2380 }
2381 else
2382 {
2383 /* For non-combined chains, set up the variables that hold its value. */
2384 initialize_root_vars (loop: m_loop, chain, tmp_vars);
2385 a = get_chain_root (chain);
2386 in_lhs = (chain->type == CT_STORE_LOAD
2387 || chain->type == CT_COMBINATION);
2388 replace_ref_with (stmt: a->stmt, new_tree: chain->vars[chain->length], set: true, in_lhs);
2389
2390 /* Replace the uses of the original references by these variables. */
2391 for (i = 1; chain->refs.iterate (ix: i, ptr: &a); i++)
2392 {
2393 var = chain->vars[chain->length - a->distance];
2394 replace_ref_with (stmt: a->stmt, new_tree: var, set: false, in_lhs: false);
2395 }
2396 }
2397}
2398
2399/* Determines the unroll factor necessary to remove as many temporary variable
2400 copies as possible. CHAINS is the list of chains that will be
2401 optimized. */
2402
2403static unsigned
2404determine_unroll_factor (const vec<chain_p> &chains)
2405{
2406 chain_p chain;
2407 unsigned factor = 1, af, nfactor, i;
2408 unsigned max = param_max_unroll_times;
2409
2410 FOR_EACH_VEC_ELT (chains, i, chain)
2411 {
2412 if (chain->type == CT_INVARIANT)
2413 continue;
2414 /* For now we can't handle unrolling when eliminating stores. */
2415 else if (chain->type == CT_STORE_STORE)
2416 return 1;
2417
2418 if (chain->combined)
2419 {
2420 /* For combined chains, we can't handle unrolling if we replace
2421 looparound PHIs. */
2422 dref a;
2423 unsigned j;
2424 for (j = 1; chain->refs.iterate (ix: j, ptr: &a); j++)
2425 if (gimple_code (g: a->stmt) == GIMPLE_PHI)
2426 return 1;
2427 continue;
2428 }
2429
2430 /* The best unroll factor for this chain is equal to the number of
2431 temporary variables that we create for it. */
2432 af = chain->length;
2433 if (chain->has_max_use_after)
2434 af++;
2435
2436 nfactor = factor * af / gcd (factor, af);
2437 if (nfactor <= max)
2438 factor = nfactor;
2439 }
2440
2441 return factor;
2442}
2443
2444/* Perform the predictive commoning optimization for chains.
2445 Uids of the newly created temporary variables are marked in TMP_VARS. */
2446
2447void
2448pcom_worker::execute_pred_commoning (bitmap tmp_vars)
2449{
2450 chain_p chain;
2451 unsigned i;
2452
2453 FOR_EACH_VEC_ELT (m_chains, i, chain)
2454 {
2455 if (chain->type == CT_INVARIANT)
2456 execute_load_motion (loop: m_loop, chain, tmp_vars);
2457 else
2458 execute_pred_commoning_chain (chain, tmp_vars);
2459 }
2460
2461 FOR_EACH_VEC_ELT (m_chains, i, chain)
2462 {
2463 if (chain->type == CT_INVARIANT)
2464 ;
2465 else if (chain->combined)
2466 {
2467 /* For combined chains, just remove the statements that are used to
2468 compute the values of the expression (except for the root one). */
2469 dref a;
2470 unsigned j;
2471 for (j = 1; chain->refs.iterate (ix: j, ptr: &a); j++)
2472 remove_stmt (stmt: a->stmt);
2473 }
2474 }
2475}
2476
2477/* For each reference in CHAINS, if its defining statement is
2478 phi node, record the ssa name that is defined by it. */
2479
2480static void
2481replace_phis_by_defined_names (vec<chain_p> &chains)
2482{
2483 chain_p chain;
2484 dref a;
2485 unsigned i, j;
2486
2487 FOR_EACH_VEC_ELT (chains, i, chain)
2488 FOR_EACH_VEC_ELT (chain->refs, j, a)
2489 {
2490 if (gimple_code (g: a->stmt) == GIMPLE_PHI)
2491 {
2492 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2493 a->stmt = NULL;
2494 }
2495 }
2496}
2497
2498/* For each reference in CHAINS, if name_defined_by_phi is not
2499 NULL, use it to set the stmt field. */
2500
2501static void
2502replace_names_by_phis (vec<chain_p> chains)
2503{
2504 chain_p chain;
2505 dref a;
2506 unsigned i, j;
2507
2508 FOR_EACH_VEC_ELT (chains, i, chain)
2509 FOR_EACH_VEC_ELT (chain->refs, j, a)
2510 if (a->stmt == NULL)
2511 {
2512 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2513 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2514 a->name_defined_by_phi = NULL_TREE;
2515 }
2516}
2517
2518/* Wrapper over execute_pred_commoning, to pass it as a callback
2519 to tree_transform_and_unroll_loop. */
2520
2521struct epcc_data
2522{
2523 vec<chain_p> chains;
2524 bitmap tmp_vars;
2525 pcom_worker *worker;
2526};
2527
2528static void
2529execute_pred_commoning_cbck (class loop *loop ATTRIBUTE_UNUSED, void *data)
2530{
2531 struct epcc_data *const dta = (struct epcc_data *) data;
2532 pcom_worker *worker = dta->worker;
2533
2534 /* Restore phi nodes that were replaced by ssa names before
2535 tree_transform_and_unroll_loop (see detailed description in
2536 tree_predictive_commoning_loop). */
2537 replace_names_by_phis (chains: dta->chains);
2538 worker->execute_pred_commoning (tmp_vars: dta->tmp_vars);
2539}
2540
2541/* Base NAME and all the names in the chain of phi nodes that use it
2542 on variable VAR. The phi nodes are recognized by being in the copies of
2543 the header of the LOOP. */
2544
2545static void
2546base_names_in_chain_on (class loop *loop, tree name, tree var)
2547{
2548 gimple *stmt, *phi;
2549 imm_use_iterator iter;
2550
2551 replace_ssa_name_symbol (name, var);
2552
2553 while (1)
2554 {
2555 phi = NULL;
2556 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2557 {
2558 if (gimple_code (g: stmt) == GIMPLE_PHI
2559 && flow_bb_inside_loop_p (loop, gimple_bb (g: stmt)))
2560 {
2561 phi = stmt;
2562 break;
2563 }
2564 }
2565 if (!phi)
2566 return;
2567
2568 name = PHI_RESULT (phi);
2569 replace_ssa_name_symbol (name, var);
2570 }
2571}
2572
2573/* Given an unrolled LOOP after predictive commoning, remove the
2574 register copies arising from phi nodes by changing the base
2575 variables of SSA names. TMP_VARS is the set of the temporary variables
2576 for those we want to perform this. */
2577
2578static void
2579eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2580{
2581 edge e;
2582 gphi *phi;
2583 gimple *stmt;
2584 tree name, use, var;
2585 gphi_iterator psi;
2586
2587 e = loop_latch_edge (loop);
2588 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
2589 {
2590 phi = psi.phi ();
2591 name = PHI_RESULT (phi);
2592 var = SSA_NAME_VAR (name);
2593 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2594 continue;
2595 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2596 gcc_assert (TREE_CODE (use) == SSA_NAME);
2597
2598 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2599 stmt = SSA_NAME_DEF_STMT (use);
2600 while (gimple_code (g: stmt) == GIMPLE_PHI
2601 /* In case we could not unroll the loop enough to eliminate
2602 all copies, we may reach the loop header before the defining
2603 statement (in that case, some register copies will be present
2604 in loop latch in the final code, corresponding to the newly
2605 created looparound phi nodes). */
2606 && gimple_bb (g: stmt) != loop->header)
2607 {
2608 gcc_assert (single_pred_p (gimple_bb (stmt)));
2609 use = PHI_ARG_DEF (stmt, 0);
2610 stmt = SSA_NAME_DEF_STMT (use);
2611 }
2612
2613 base_names_in_chain_on (loop, name: use, var);
2614 }
2615}
2616
2617/* Returns true if CHAIN is suitable to be combined. */
2618
2619static bool
2620chain_can_be_combined_p (chain_p chain)
2621{
2622 return (!chain->combined
2623 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2624}
2625
2626/* Returns the modify statement that uses NAME. Skips over assignment
2627 statements, NAME is replaced with the actual name used in the returned
2628 statement. */
2629
2630gimple *
2631pcom_worker::find_use_stmt (tree *name)
2632{
2633 gimple *stmt;
2634 tree rhs, lhs;
2635
2636 /* Skip over assignments. */
2637 while (1)
2638 {
2639 stmt = single_nonlooparound_use (name: *name);
2640 if (!stmt)
2641 return NULL;
2642
2643 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
2644 return NULL;
2645
2646 lhs = gimple_assign_lhs (gs: stmt);
2647 if (TREE_CODE (lhs) != SSA_NAME)
2648 return NULL;
2649
2650 if (gimple_assign_copy_p (stmt))
2651 {
2652 rhs = gimple_assign_rhs1 (gs: stmt);
2653 if (rhs != *name)
2654 return NULL;
2655
2656 *name = lhs;
2657 }
2658 else if (get_gimple_rhs_class (code: gimple_assign_rhs_code (gs: stmt))
2659 == GIMPLE_BINARY_RHS)
2660 return stmt;
2661 else
2662 return NULL;
2663 }
2664}
2665
2666/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2667
2668static bool
2669may_reassociate_p (tree type, enum tree_code code)
2670{
2671 if (FLOAT_TYPE_P (type)
2672 && !flag_unsafe_math_optimizations)
2673 return false;
2674
2675 return (commutative_tree_code (code)
2676 && associative_tree_code (code));
2677}
2678
2679/* If the operation used in STMT is associative and commutative, go through the
2680 tree of the same operations and returns its root. Distance to the root
2681 is stored in DISTANCE. */
2682
2683gimple *
2684pcom_worker::find_associative_operation_root (gimple *stmt, unsigned *distance)
2685{
2686 tree lhs;
2687 gimple *next;
2688 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
2689 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2690 unsigned dist = 0;
2691
2692 if (!may_reassociate_p (type, code))
2693 return NULL;
2694
2695 while (1)
2696 {
2697 lhs = gimple_assign_lhs (gs: stmt);
2698 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2699
2700 next = find_use_stmt (name: &lhs);
2701 if (!next
2702 || gimple_assign_rhs_code (gs: next) != code)
2703 break;
2704
2705 stmt = next;
2706 dist++;
2707 }
2708
2709 if (distance)
2710 *distance = dist;
2711 return stmt;
2712}
2713
2714/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2715 is no such statement, returns NULL_TREE. In case the operation used on
2716 NAME1 and NAME2 is associative and commutative, returns the root of the
2717 tree formed by this operation instead of the statement that uses NAME1 or
2718 NAME2. */
2719
2720gimple *
2721pcom_worker::find_common_use_stmt (tree *name1, tree *name2)
2722{
2723 gimple *stmt1, *stmt2;
2724
2725 stmt1 = find_use_stmt (name: name1);
2726 if (!stmt1)
2727 return NULL;
2728
2729 stmt2 = find_use_stmt (name: name2);
2730 if (!stmt2)
2731 return NULL;
2732
2733 if (stmt1 == stmt2)
2734 return stmt1;
2735
2736 stmt1 = find_associative_operation_root (stmt: stmt1, NULL);
2737 if (!stmt1)
2738 return NULL;
2739 stmt2 = find_associative_operation_root (stmt: stmt2, NULL);
2740 if (!stmt2)
2741 return NULL;
2742
2743 return (stmt1 == stmt2 ? stmt1 : NULL);
2744}
2745
2746/* Checks whether R1 and R2 are combined together using CODE, with the result
2747 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2748 if it is true. If CODE is ERROR_MARK, set these values instead. */
2749
2750bool
2751pcom_worker::combinable_refs_p (dref r1, dref r2,
2752 enum tree_code *code, bool *swap, tree *rslt_type)
2753{
2754 enum tree_code acode;
2755 bool aswap;
2756 tree atype;
2757 tree name1, name2;
2758 gimple *stmt;
2759
2760 name1 = name_for_ref (ref: r1);
2761 name2 = name_for_ref (ref: r2);
2762 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2763
2764 stmt = find_common_use_stmt (name1: &name1, name2: &name2);
2765
2766 if (!stmt
2767 /* A simple post-dominance check - make sure the combination
2768 is executed under the same condition as the references. */
2769 || (gimple_bb (g: stmt) != gimple_bb (g: r1->stmt)
2770 && gimple_bb (g: stmt) != gimple_bb (g: r2->stmt)))
2771 return false;
2772
2773 acode = gimple_assign_rhs_code (gs: stmt);
2774 aswap = (!commutative_tree_code (acode)
2775 && gimple_assign_rhs1 (gs: stmt) != name1);
2776 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2777
2778 if (*code == ERROR_MARK)
2779 {
2780 *code = acode;
2781 *swap = aswap;
2782 *rslt_type = atype;
2783 return true;
2784 }
2785
2786 return (*code == acode
2787 && *swap == aswap
2788 && *rslt_type == atype);
2789}
2790
2791/* Remove OP from the operation on rhs of STMT, and replace STMT with
2792 an assignment of the remaining operand. */
2793
2794static void
2795remove_name_from_operation (gimple *stmt, tree op)
2796{
2797 tree other_op;
2798 gimple_stmt_iterator si;
2799
2800 gcc_assert (is_gimple_assign (stmt));
2801
2802 if (gimple_assign_rhs1 (gs: stmt) == op)
2803 other_op = gimple_assign_rhs2 (gs: stmt);
2804 else
2805 other_op = gimple_assign_rhs1 (gs: stmt);
2806
2807 si = gsi_for_stmt (stmt);
2808 gimple_assign_set_rhs_from_tree (&si, other_op);
2809
2810 /* We should not have reallocated STMT. */
2811 gcc_assert (gsi_stmt (si) == stmt);
2812
2813 update_stmt (s: stmt);
2814}
2815
2816/* Reassociates the expression in that NAME1 and NAME2 are used so that they
2817 are combined in a single statement, and returns this statement. */
2818
2819gimple *
2820pcom_worker::reassociate_to_the_same_stmt (tree name1, tree name2)
2821{
2822 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2823 gassign *new_stmt, *tmp_stmt;
2824 tree new_name, tmp_name, var, r1, r2;
2825 unsigned dist1, dist2;
2826 enum tree_code code;
2827 tree type = TREE_TYPE (name1);
2828 gimple_stmt_iterator bsi;
2829
2830 stmt1 = find_use_stmt (name: &name1);
2831 stmt2 = find_use_stmt (name: &name2);
2832 root1 = find_associative_operation_root (stmt: stmt1, distance: &dist1);
2833 root2 = find_associative_operation_root (stmt: stmt2, distance: &dist2);
2834 code = gimple_assign_rhs_code (gs: stmt1);
2835
2836 gcc_assert (root1 && root2 && root1 == root2
2837 && code == gimple_assign_rhs_code (stmt2));
2838
2839 /* Find the root of the nearest expression in that both NAME1 and NAME2
2840 are used. */
2841 r1 = name1;
2842 s1 = stmt1;
2843 r2 = name2;
2844 s2 = stmt2;
2845
2846 while (dist1 > dist2)
2847 {
2848 s1 = find_use_stmt (name: &r1);
2849 r1 = gimple_assign_lhs (gs: s1);
2850 dist1--;
2851 }
2852 while (dist2 > dist1)
2853 {
2854 s2 = find_use_stmt (name: &r2);
2855 r2 = gimple_assign_lhs (gs: s2);
2856 dist2--;
2857 }
2858
2859 while (s1 != s2)
2860 {
2861 s1 = find_use_stmt (name: &r1);
2862 r1 = gimple_assign_lhs (gs: s1);
2863 s2 = find_use_stmt (name: &r2);
2864 r2 = gimple_assign_lhs (gs: s2);
2865 }
2866
2867 /* Remove NAME1 and NAME2 from the statements in that they are used
2868 currently. */
2869 remove_name_from_operation (stmt: stmt1, op: name1);
2870 remove_name_from_operation (stmt: stmt2, op: name2);
2871
2872 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2873 combine it with the rhs of S1. */
2874 var = create_tmp_reg (type, "predreastmp");
2875 new_name = make_ssa_name (var);
2876 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2877
2878 var = create_tmp_reg (type, "predreastmp");
2879 tmp_name = make_ssa_name (var);
2880
2881 /* Rhs of S1 may now be either a binary expression with operation
2882 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2883 so that name1 or name2 was removed from it). */
2884 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (gs: s1),
2885 gimple_assign_rhs1 (gs: s1),
2886 gimple_assign_rhs2 (gs: s1));
2887
2888 bsi = gsi_for_stmt (s1);
2889 gimple_assign_set_rhs_with_ops (gsi: &bsi, code, op1: new_name, op2: tmp_name);
2890 s1 = gsi_stmt (i: bsi);
2891 update_stmt (s: s1);
2892
2893 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2894 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2895
2896 return new_stmt;
2897}
2898
2899/* Returns the statement that combines references R1 and R2. In case R1
2900 and R2 are not used in the same statement, but they are used with an
2901 associative and commutative operation in the same expression, reassociate
2902 the expression so that they are used in the same statement. */
2903
2904gimple *
2905pcom_worker::stmt_combining_refs (dref r1, dref r2)
2906{
2907 gimple *stmt1, *stmt2;
2908 tree name1 = name_for_ref (ref: r1);
2909 tree name2 = name_for_ref (ref: r2);
2910
2911 stmt1 = find_use_stmt (name: &name1);
2912 stmt2 = find_use_stmt (name: &name2);
2913 if (stmt1 == stmt2)
2914 return stmt1;
2915
2916 return reassociate_to_the_same_stmt (name1, name2);
2917}
2918
2919/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2920 description of the new chain is returned, otherwise we return NULL. */
2921
2922chain_p
2923pcom_worker::combine_chains (chain_p ch1, chain_p ch2)
2924{
2925 dref r1, r2, nw;
2926 enum tree_code op = ERROR_MARK;
2927 bool swap = false;
2928 chain_p new_chain;
2929 unsigned i;
2930 tree rslt_type = NULL_TREE;
2931
2932 if (ch1 == ch2)
2933 return NULL;
2934 if (ch1->length != ch2->length)
2935 return NULL;
2936
2937 if (ch1->refs.length () != ch2->refs.length ())
2938 return NULL;
2939
2940 for (i = 0; (ch1->refs.iterate (ix: i, ptr: &r1)
2941 && ch2->refs.iterate (ix: i, ptr: &r2)); i++)
2942 {
2943 if (r1->distance != r2->distance)
2944 return NULL;
2945
2946 if (!combinable_refs_p (r1, r2, code: &op, swap: &swap, rslt_type: &rslt_type))
2947 return NULL;
2948 }
2949
2950 if (swap)
2951 std::swap (a&: ch1, b&: ch2);
2952
2953 new_chain = new struct chain (CT_COMBINATION);
2954 new_chain->op = op;
2955 new_chain->ch1 = ch1;
2956 new_chain->ch2 = ch2;
2957 new_chain->rslt_type = rslt_type;
2958 new_chain->length = ch1->length;
2959
2960 for (i = 0; (ch1->refs.iterate (ix: i, ptr: &r1)
2961 && ch2->refs.iterate (ix: i, ptr: &r2)); i++)
2962 {
2963 nw = XCNEW (class dref_d);
2964 nw->stmt = stmt_combining_refs (r1, r2);
2965 nw->distance = r1->distance;
2966
2967 new_chain->refs.safe_push (obj: nw);
2968 }
2969
2970 ch1->combined = true;
2971 ch2->combined = true;
2972 return new_chain;
2973}
2974
2975/* Recursively update position information of all offspring chains to ROOT
2976 chain's position information. */
2977
2978static void
2979update_pos_for_combined_chains (chain_p root)
2980{
2981 chain_p ch1 = root->ch1, ch2 = root->ch2;
2982 dref ref, ref1, ref2;
2983 for (unsigned j = 0; (root->refs.iterate (ix: j, ptr: &ref)
2984 && ch1->refs.iterate (ix: j, ptr: &ref1)
2985 && ch2->refs.iterate (ix: j, ptr: &ref2)); ++j)
2986 ref1->pos = ref2->pos = ref->pos;
2987
2988 if (ch1->type == CT_COMBINATION)
2989 update_pos_for_combined_chains (root: ch1);
2990 if (ch2->type == CT_COMBINATION)
2991 update_pos_for_combined_chains (root: ch2);
2992}
2993
2994/* Returns true if statement S1 dominates statement S2. */
2995
2996static bool
2997pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2998{
2999 basic_block bb1 = gimple_bb (g: s1), bb2 = gimple_bb (g: s2);
3000
3001 if (!bb1 || s1 == s2)
3002 return true;
3003
3004 if (bb1 == bb2)
3005 return gimple_uid (g: s1) < gimple_uid (g: s2);
3006
3007 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
3008}
3009
3010/* Try to combine the chains. */
3011
3012void
3013pcom_worker::try_combine_chains ()
3014{
3015 unsigned i, j;
3016 chain_p ch1, ch2, cch;
3017 auto_vec<chain_p> worklist;
3018 bool combined_p = false;
3019
3020 FOR_EACH_VEC_ELT (m_chains, i, ch1)
3021 if (chain_can_be_combined_p (chain: ch1))
3022 worklist.safe_push (obj: ch1);
3023
3024 while (!worklist.is_empty ())
3025 {
3026 ch1 = worklist.pop ();
3027 if (!chain_can_be_combined_p (chain: ch1))
3028 continue;
3029
3030 FOR_EACH_VEC_ELT (m_chains, j, ch2)
3031 {
3032 if (!chain_can_be_combined_p (chain: ch2))
3033 continue;
3034
3035 cch = combine_chains (ch1, ch2);
3036 if (cch)
3037 {
3038 worklist.safe_push (obj: cch);
3039 m_chains.safe_push (obj: cch);
3040 combined_p = true;
3041 break;
3042 }
3043 }
3044 }
3045 if (!combined_p)
3046 return;
3047
3048 /* Setup UID for all statements in dominance order. */
3049 basic_block *bbs = get_loop_body_in_dom_order (m_loop);
3050 renumber_gimple_stmt_uids_in_blocks (bbs, m_loop->num_nodes);
3051 free (ptr: bbs);
3052
3053 /* Re-association in combined chains may generate statements different to
3054 order of references of the original chain. We need to keep references
3055 of combined chain in dominance order so that all uses will be inserted
3056 after definitions. Note:
3057 A) This is necessary for all combined chains.
3058 B) This is only necessary for ZERO distance references because other
3059 references inherit value from loop carried PHIs.
3060
3061 We first update position information for all combined chains. */
3062 dref ref;
3063 for (i = 0; m_chains.iterate (ix: i, ptr: &ch1); ++i)
3064 {
3065 if (ch1->type != CT_COMBINATION || ch1->combined)
3066 continue;
3067
3068 for (j = 0; ch1->refs.iterate (ix: j, ptr: &ref); ++j)
3069 ref->pos = gimple_uid (g: ref->stmt);
3070
3071 update_pos_for_combined_chains (root: ch1);
3072 }
3073 /* Then sort references according to newly updated position information. */
3074 for (i = 0; m_chains.iterate (ix: i, ptr: &ch1); ++i)
3075 {
3076 if (ch1->type != CT_COMBINATION && !ch1->combined)
3077 continue;
3078
3079 /* Find the first reference with non-ZERO distance. */
3080 if (ch1->length == 0)
3081 j = ch1->refs.length();
3082 else
3083 {
3084 for (j = 0; ch1->refs.iterate (ix: j, ptr: &ref); ++j)
3085 if (ref->distance != 0)
3086 break;
3087 }
3088
3089 /* Sort all ZERO distance references by position. */
3090 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
3091
3092 if (ch1->combined)
3093 continue;
3094
3095 /* For ZERO length chain, has_max_use_after must be true since root
3096 combined stmt must dominates others. */
3097 if (ch1->length == 0)
3098 {
3099 ch1->has_max_use_after = true;
3100 continue;
3101 }
3102 /* Check if there is use at max distance after root for combined chains
3103 and set flag accordingly. */
3104 ch1->has_max_use_after = false;
3105 gimple *root_stmt = get_chain_root (chain: ch1)->stmt;
3106 for (j = 1; ch1->refs.iterate (ix: j, ptr: &ref); ++j)
3107 {
3108 if (ref->distance == ch1->length
3109 && !pcom_stmt_dominates_stmt_p (s1: ref->stmt, s2: root_stmt))
3110 {
3111 ch1->has_max_use_after = true;
3112 break;
3113 }
3114 }
3115 }
3116}
3117
3118/* Prepare initializers for store elimination CHAIN in LOOP. Returns false
3119 if this is impossible because one of these initializers may trap, true
3120 otherwise. */
3121
3122static bool
3123prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
3124{
3125 unsigned i, n = chain->length;
3126
3127 /* For now we can't eliminate stores if some of them are conditional
3128 executed. */
3129 if (!chain->all_always_accessed)
3130 return false;
3131
3132 /* Nothing to intialize for intra-iteration store elimination. */
3133 if (n == 0 && chain->type == CT_STORE_STORE)
3134 return true;
3135
3136 /* For store elimination chain, there is nothing to initialize if stores
3137 to be eliminated only store loop invariant values into memory. */
3138 if (chain->type == CT_STORE_STORE
3139 && is_inv_store_elimination_chain (loop, chain))
3140 {
3141 chain->inv_store_elimination = true;
3142 return true;
3143 }
3144
3145 chain->inits.create (nelems: n);
3146 chain->inits.safe_grow_cleared (len: n, exact: true);
3147
3148 /* For store eliminatin chain like below:
3149
3150 for (i = 0; i < len; i++)
3151 {
3152 a[i] = 1;
3153 // a[i + 1] = ...
3154 a[i + 2] = 3;
3155 }
3156
3157 store to a[i + 1] is missed in loop body, it acts like bubbles. The
3158 content of a[i + 1] remain the same if the loop iterates fewer times
3159 than chain->length. We need to set up root variables for such stores
3160 by loading from memory before loop. Note we only need to load bubble
3161 elements because loop body is guaranteed to be executed at least once
3162 after loop's preheader edge. */
3163 auto_vec<bool> bubbles;
3164 bubbles.safe_grow_cleared (len: n + 1, exact: true);
3165 for (i = 0; i < chain->refs.length (); i++)
3166 bubbles[chain->refs[i]->distance] = true;
3167
3168 struct data_reference *dr = get_chain_root (chain)->ref;
3169 for (i = 0; i < n; i++)
3170 {
3171 if (bubbles[i])
3172 continue;
3173
3174 gimple_seq stmts = NULL;
3175
3176 tree init = ref_at_iteration (dr, iter: (int) 0 - i, stmts: &stmts);
3177 if (stmts)
3178 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3179
3180 chain->inits[i] = init;
3181 }
3182
3183 return true;
3184}
3185
3186/* Prepare initializers for CHAIN. Returns false if this is impossible
3187 because one of these initializers may trap, true otherwise. */
3188
3189bool
3190pcom_worker::prepare_initializers_chain (chain_p chain)
3191{
3192 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3193 struct data_reference *dr = get_chain_root (chain)->ref;
3194 tree init;
3195 dref laref;
3196 edge entry = loop_preheader_edge (m_loop);
3197
3198 if (chain->type == CT_STORE_STORE)
3199 return prepare_initializers_chain_store_elim (loop: m_loop, chain);
3200
3201 /* Find the initializers for the variables, and check that they cannot
3202 trap. */
3203 chain->inits.create (nelems: n);
3204 for (i = 0; i < n; i++)
3205 chain->inits.quick_push (NULL_TREE);
3206
3207 /* If we have replaced some looparound phi nodes, use their initializers
3208 instead of creating our own. */
3209 FOR_EACH_VEC_ELT (chain->refs, i, laref)
3210 {
3211 if (gimple_code (g: laref->stmt) != GIMPLE_PHI)
3212 continue;
3213
3214 gcc_assert (laref->distance > 0);
3215 chain->inits[n - laref->distance]
3216 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3217 }
3218
3219 for (i = 0; i < n; i++)
3220 {
3221 gimple_seq stmts = NULL;
3222
3223 if (chain->inits[i] != NULL_TREE)
3224 continue;
3225
3226 init = ref_at_iteration (dr, iter: (int) i - n, stmts: &stmts);
3227 if (!chain->all_always_accessed && tree_could_trap_p (init))
3228 {
3229 gimple_seq_discard (stmts);
3230 return false;
3231 }
3232
3233 if (stmts)
3234 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3235
3236 chain->inits[i] = init;
3237 }
3238
3239 return true;
3240}
3241
3242/* Prepare initializers for chains, and free chains that cannot
3243 be used because the initializers might trap. */
3244
3245void
3246pcom_worker::prepare_initializers ()
3247{
3248 chain_p chain;
3249 unsigned i;
3250
3251 for (i = 0; i < m_chains.length (); )
3252 {
3253 chain = m_chains[i];
3254 if (prepare_initializers_chain (chain))
3255 i++;
3256 else
3257 {
3258 release_chain (chain);
3259 m_chains.unordered_remove (ix: i);
3260 }
3261 }
3262}
3263
3264/* Generates finalizer memory references for CHAIN. Returns true
3265 if finalizer code for CHAIN can be generated, otherwise false. */
3266
3267bool
3268pcom_worker::prepare_finalizers_chain (chain_p chain)
3269{
3270 unsigned i, n = chain->length;
3271 struct data_reference *dr = get_chain_root (chain)->ref;
3272 tree fini, niters = number_of_latch_executions (m_loop);
3273
3274 /* For now we can't eliminate stores if some of them are conditional
3275 executed. */
3276 if (!chain->all_always_accessed)
3277 return false;
3278
3279 chain->finis.create (nelems: n);
3280 for (i = 0; i < n; i++)
3281 chain->finis.quick_push (NULL_TREE);
3282
3283 /* We never use looparound phi node for store elimination chains. */
3284
3285 /* Find the finalizers for the variables, and check that they cannot
3286 trap. */
3287 for (i = 0; i < n; i++)
3288 {
3289 gimple_seq stmts = NULL;
3290 gcc_assert (chain->finis[i] == NULL_TREE);
3291
3292 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3293 {
3294 niters = unshare_expr (niters);
3295 niters = force_gimple_operand (niters, &stmts, true, NULL);
3296 if (stmts)
3297 {
3298 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3299 stmts = NULL;
3300 }
3301 }
3302 fini = ref_at_iteration (dr, iter: (int) 0 - i, stmts: &stmts, niters);
3303 if (stmts)
3304 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3305
3306 chain->finis[i] = fini;
3307 }
3308
3309 return true;
3310}
3311
3312/* Generates finalizer memory reference for chains. Returns true if
3313 finalizer code generation for chains breaks loop closed ssa form. */
3314
3315bool
3316pcom_worker::prepare_finalizers ()
3317{
3318 chain_p chain;
3319 unsigned i;
3320 bool loop_closed_ssa = false;
3321
3322 for (i = 0; i < m_chains.length ();)
3323 {
3324 chain = m_chains[i];
3325
3326 /* Finalizer is only necessary for inter-iteration store elimination
3327 chains. */
3328 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3329 {
3330 i++;
3331 continue;
3332 }
3333
3334 if (prepare_finalizers_chain (chain))
3335 {
3336 i++;
3337 /* Be conservative, assume loop closed ssa form is corrupted
3338 by store-store chain. Though it's not always the case if
3339 eliminated stores only store loop invariant values into
3340 memory. */
3341 loop_closed_ssa = true;
3342 }
3343 else
3344 {
3345 release_chain (chain);
3346 m_chains.unordered_remove (ix: i);
3347 }
3348 }
3349 return loop_closed_ssa;
3350}
3351
3352/* Insert all initializing gimple stmts into LOOP's entry edge. */
3353
3354static void
3355insert_init_seqs (class loop *loop, vec<chain_p> &chains)
3356{
3357 unsigned i;
3358 edge entry = loop_preheader_edge (loop);
3359
3360 for (i = 0; i < chains.length (); ++i)
3361 if (chains[i]->init_seq)
3362 {
3363 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3364 chains[i]->init_seq = NULL;
3365 }
3366}
3367
3368/* Performs predictive commoning for LOOP. Sets bit 1<<1 of return value
3369 if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
3370 form was corrupted. Non-zero return value indicates some changes were
3371 applied to this loop. */
3372
3373unsigned
3374pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
3375{
3376 struct component *components;
3377 unsigned unroll_factor = 0;
3378 class tree_niter_desc desc;
3379 bool unroll = false, loop_closed_ssa = false;
3380
3381 if (dump_file && (dump_flags & TDF_DETAILS))
3382 fprintf (stream: dump_file, format: "Processing loop %d\n", m_loop->num);
3383
3384 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3385 if (get_max_loop_iterations_int (m_loop) == 0)
3386 {
3387 if (dump_file && (dump_flags & TDF_DETAILS))
3388 fprintf (stream: dump_file, format: "Loop iterates only 1 time, nothing to do.\n");
3389
3390 return 0;
3391 }
3392
3393 /* Find the data references and split them into components according to their
3394 dependence relations. */
3395 auto_vec<loop_p, 3> loop_nest;
3396 if (!compute_data_dependences_for_loop (m_loop, true, &loop_nest, &m_datarefs,
3397 &m_dependences))
3398 {
3399 if (dump_file && (dump_flags & TDF_DETAILS))
3400 fprintf (stream: dump_file, format: "Cannot analyze data dependencies\n");
3401 return 0;
3402 }
3403
3404 if (dump_file && (dump_flags & TDF_DETAILS))
3405 dump_data_dependence_relations (dump_file, m_dependences);
3406
3407 components = split_data_refs_to_components ();
3408
3409 loop_nest.release ();
3410 if (!components)
3411 return 0;
3412
3413 if (dump_file && (dump_flags & TDF_DETAILS))
3414 {
3415 fprintf (stream: dump_file, format: "Initial state:\n\n");
3416 dump_components (file: dump_file, comps: components);
3417 }
3418
3419 /* Find the suitable components and split them into chains. */
3420 components = filter_suitable_components (comps: components);
3421
3422 auto_bitmap tmp_vars;
3423 determine_roots (comps: components);
3424 release_components (comps: components);
3425
3426 if (!m_chains.exists ())
3427 {
3428 if (dump_file && (dump_flags & TDF_DETAILS))
3429 fprintf (stream: dump_file,
3430 format: "Predictive commoning failed: no suitable chains\n");
3431 return 0;
3432 }
3433
3434 prepare_initializers ();
3435 loop_closed_ssa = prepare_finalizers ();
3436
3437 /* Try to combine the chains that are always worked with together. */
3438 try_combine_chains ();
3439
3440 insert_init_seqs (loop: m_loop, chains&: m_chains);
3441
3442 if (dump_file && (dump_flags & TDF_DETAILS))
3443 {
3444 fprintf (stream: dump_file, format: "Before commoning:\n\n");
3445 dump_chains (file: dump_file, chains: m_chains);
3446 }
3447
3448 if (allow_unroll_p)
3449 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3450 that its number of iterations is divisible by the factor. */
3451 unroll_factor = determine_unroll_factor (chains: m_chains);
3452
3453 if (unroll_factor > 1)
3454 unroll = can_unroll_loop_p (loop: m_loop, factor: unroll_factor, niter: &desc);
3455
3456 /* Execute the predictive commoning transformations, and possibly unroll the
3457 loop. */
3458 if (unroll)
3459 {
3460 struct epcc_data dta;
3461
3462 if (dump_file && (dump_flags & TDF_DETAILS))
3463 fprintf (stream: dump_file, format: "Unrolling %u times.\n", unroll_factor);
3464
3465 dta.tmp_vars = tmp_vars;
3466 dta.chains = m_chains.to_vec_legacy ();
3467 dta.worker = this;
3468
3469 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3470 execute_pred_commoning_cbck is called may cause phi nodes to be
3471 reallocated, which is a problem since CHAINS may point to these
3472 statements. To fix this, we store the ssa names defined by the
3473 phi nodes here instead of the phi nodes themselves, and restore
3474 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3475 replace_phis_by_defined_names (chains&: m_chains);
3476
3477 tree_transform_and_unroll_loop (m_loop, unroll_factor, &desc,
3478 execute_pred_commoning_cbck, &dta);
3479 eliminate_temp_copies (loop: m_loop, tmp_vars);
3480 }
3481 else
3482 {
3483 if (dump_file && (dump_flags & TDF_DETAILS))
3484 fprintf (stream: dump_file,
3485 format: "Executing predictive commoning without unrolling.\n");
3486 execute_pred_commoning (tmp_vars);
3487 }
3488
3489 return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
3490}
3491
3492/* Runs predictive commoning. */
3493
3494unsigned
3495tree_predictive_commoning (bool allow_unroll_p)
3496{
3497 unsigned ret = 0, changed = 0;
3498
3499 initialize_original_copy_tables ();
3500 for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
3501 if (optimize_loop_for_speed_p (loop))
3502 {
3503 pcom_worker w(loop);
3504 changed |= w.tree_predictive_commoning_loop (allow_unroll_p);
3505 }
3506 free_original_copy_tables ();
3507
3508 if (changed > 0)
3509 {
3510 ret = TODO_update_ssa_only_virtuals;
3511
3512 /* Some loop(s) got unrolled. */
3513 if (changed > 1)
3514 {
3515 scev_reset ();
3516
3517 /* Need to fix up loop closed SSA. */
3518 if (changed >= 4)
3519 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3520
3521 ret |= TODO_cleanup_cfg;
3522 }
3523 }
3524
3525 return ret;
3526}
3527
3528/* Predictive commoning Pass. */
3529
3530static unsigned
3531run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p)
3532{
3533 if (number_of_loops (fn: fun) <= 1)
3534 return 0;
3535
3536 return tree_predictive_commoning (allow_unroll_p);
3537}
3538
3539namespace {
3540
3541const pass_data pass_data_predcom =
3542{
3543 .type: GIMPLE_PASS, /* type */
3544 .name: "pcom", /* name */
3545 .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */
3546 .tv_id: TV_PREDCOM, /* tv_id */
3547 PROP_cfg, /* properties_required */
3548 .properties_provided: 0, /* properties_provided */
3549 .properties_destroyed: 0, /* properties_destroyed */
3550 .todo_flags_start: 0, /* todo_flags_start */
3551 .todo_flags_finish: 0, /* todo_flags_finish */
3552};
3553
3554class pass_predcom : public gimple_opt_pass
3555{
3556public:
3557 pass_predcom (gcc::context *ctxt)
3558 : gimple_opt_pass (pass_data_predcom, ctxt)
3559 {}
3560
3561 /* opt_pass methods: */
3562 bool
3563 gate (function *) final override
3564 {
3565 if (flag_predictive_commoning != 0)
3566 return true;
3567 /* Loop vectorization enables predictive commoning implicitly
3568 only if predictive commoning isn't set explicitly, and it
3569 doesn't allow unrolling. */
3570 if (flag_tree_loop_vectorize
3571 && !OPTION_SET_P (flag_predictive_commoning))
3572 return true;
3573
3574 return false;
3575 }
3576
3577 unsigned int
3578 execute (function *fun) final override
3579 {
3580 bool allow_unroll_p = flag_predictive_commoning != 0;
3581 return run_tree_predictive_commoning (fun, allow_unroll_p);
3582 }
3583
3584}; // class pass_predcom
3585
3586} // anon namespace
3587
3588gimple_opt_pass *
3589make_pass_predcom (gcc::context *ctxt)
3590{
3591 return new pass_predcom (ctxt);
3592}
3593

source code of gcc/tree-predcom.cc