1/* Data references and dependences detectors.
2 Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74*/
75
76#define INCLUDE_ALGORITHM
77#include "config.h"
78#include "system.h"
79#include "coretypes.h"
80#include "backend.h"
81#include "rtl.h"
82#include "tree.h"
83#include "gimple.h"
84#include "gimple-pretty-print.h"
85#include "alias.h"
86#include "fold-const.h"
87#include "expr.h"
88#include "gimple-iterator.h"
89#include "tree-ssa-loop-niter.h"
90#include "tree-ssa-loop.h"
91#include "tree-ssa.h"
92#include "cfgloop.h"
93#include "tree-data-ref.h"
94#include "tree-scalar-evolution.h"
95#include "dumpfile.h"
96#include "tree-affine.h"
97#include "builtins.h"
98#include "tree-eh.h"
99#include "ssa.h"
100#include "internal-fn.h"
101#include "vr-values.h"
102#include "range-op.h"
103#include "tree-ssa-loop-ivopts.h"
104#include "calls.h"
105
106static struct datadep_stats
107{
108 int num_dependence_tests;
109 int num_dependence_dependent;
110 int num_dependence_independent;
111 int num_dependence_undetermined;
112
113 int num_subscript_tests;
114 int num_subscript_undetermined;
115 int num_same_subscript_function;
116
117 int num_ziv;
118 int num_ziv_independent;
119 int num_ziv_dependent;
120 int num_ziv_unimplemented;
121
122 int num_siv;
123 int num_siv_independent;
124 int num_siv_dependent;
125 int num_siv_unimplemented;
126
127 int num_miv;
128 int num_miv_independent;
129 int num_miv_dependent;
130 int num_miv_unimplemented;
131} dependence_stats;
132
133static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
134 unsigned int, unsigned int,
135 class loop *);
136/* Returns true iff A divides B. */
137
138static inline bool
139tree_fold_divides_p (const_tree a, const_tree b)
140{
141 gcc_assert (TREE_CODE (a) == INTEGER_CST);
142 gcc_assert (TREE_CODE (b) == INTEGER_CST);
143 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
144}
145
146/* Returns true iff A divides B. */
147
148static inline bool
149int_divides_p (lambda_int a, lambda_int b)
150{
151 return ((b % a) == 0);
152}
153
154/* Return true if reference REF contains a union access. */
155
156static bool
157ref_contains_union_access_p (tree ref)
158{
159 while (handled_component_p (t: ref))
160 {
161 ref = TREE_OPERAND (ref, 0);
162 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
163 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
164 return true;
165 }
166 return false;
167}
168
169
170
171/* Dump into FILE all the data references from DATAREFS. */
172
173static void
174dump_data_references (FILE *file, vec<data_reference_p> datarefs)
175{
176 for (data_reference *dr : datarefs)
177 dump_data_reference (file, dr);
178}
179
180/* Unified dump into FILE all the data references from DATAREFS. */
181
182DEBUG_FUNCTION void
183debug (vec<data_reference_p> &ref)
184{
185 dump_data_references (stderr, datarefs: ref);
186}
187
188DEBUG_FUNCTION void
189debug (vec<data_reference_p> *ptr)
190{
191 if (ptr)
192 debug (ref&: *ptr);
193 else
194 fprintf (stderr, format: "<nil>\n");
195}
196
197
198/* Dump into STDERR all the data references from DATAREFS. */
199
200DEBUG_FUNCTION void
201debug_data_references (vec<data_reference_p> datarefs)
202{
203 dump_data_references (stderr, datarefs);
204}
205
206/* Print to STDERR the data_reference DR. */
207
208DEBUG_FUNCTION void
209debug_data_reference (struct data_reference *dr)
210{
211 dump_data_reference (stderr, dr);
212}
213
214/* Dump function for a DATA_REFERENCE structure. */
215
216void
217dump_data_reference (FILE *outf,
218 struct data_reference *dr)
219{
220 unsigned int i;
221
222 fprintf (stream: outf, format: "#(Data Ref: \n");
223 fprintf (stream: outf, format: "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
224 fprintf (stream: outf, format: "# stmt: ");
225 print_gimple_stmt (outf, DR_STMT (dr), 0);
226 fprintf (stream: outf, format: "# ref: ");
227 print_generic_stmt (outf, DR_REF (dr));
228 fprintf (stream: outf, format: "# base_object: ");
229 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
230
231 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
232 {
233 fprintf (stream: outf, format: "# Access function %d: ", i);
234 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
235 }
236 fprintf (stream: outf, format: "#)\n");
237}
238
239/* Unified dump function for a DATA_REFERENCE structure. */
240
241DEBUG_FUNCTION void
242debug (data_reference &ref)
243{
244 dump_data_reference (stderr, dr: &ref);
245}
246
247DEBUG_FUNCTION void
248debug (data_reference *ptr)
249{
250 if (ptr)
251 debug (ref&: *ptr);
252 else
253 fprintf (stderr, format: "<nil>\n");
254}
255
256
257/* Dumps the affine function described by FN to the file OUTF. */
258
259DEBUG_FUNCTION void
260dump_affine_function (FILE *outf, affine_fn fn)
261{
262 unsigned i;
263 tree coef;
264
265 print_generic_expr (outf, fn[0], TDF_SLIM);
266 for (i = 1; fn.iterate (ix: i, ptr: &coef); i++)
267 {
268 fprintf (stream: outf, format: " + ");
269 print_generic_expr (outf, coef, TDF_SLIM);
270 fprintf (stream: outf, format: " * x_%u", i);
271 }
272}
273
274/* Dumps the conflict function CF to the file OUTF. */
275
276DEBUG_FUNCTION void
277dump_conflict_function (FILE *outf, conflict_function *cf)
278{
279 unsigned i;
280
281 if (cf->n == NO_DEPENDENCE)
282 fprintf (stream: outf, format: "no dependence");
283 else if (cf->n == NOT_KNOWN)
284 fprintf (stream: outf, format: "not known");
285 else
286 {
287 for (i = 0; i < cf->n; i++)
288 {
289 if (i != 0)
290 fprintf (stream: outf, format: " ");
291 fprintf (stream: outf, format: "[");
292 dump_affine_function (outf, fn: cf->fns[i]);
293 fprintf (stream: outf, format: "]");
294 }
295 }
296}
297
298/* Dump function for a SUBSCRIPT structure. */
299
300DEBUG_FUNCTION void
301dump_subscript (FILE *outf, struct subscript *subscript)
302{
303 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
304
305 fprintf (stream: outf, format: "\n (subscript \n");
306 fprintf (stream: outf, format: " iterations_that_access_an_element_twice_in_A: ");
307 dump_conflict_function (outf, cf);
308 if (CF_NONTRIVIAL_P (cf))
309 {
310 tree last_iteration = SUB_LAST_CONFLICT (subscript);
311 fprintf (stream: outf, format: "\n last_conflict: ");
312 print_generic_expr (outf, last_iteration);
313 }
314
315 cf = SUB_CONFLICTS_IN_B (subscript);
316 fprintf (stream: outf, format: "\n iterations_that_access_an_element_twice_in_B: ");
317 dump_conflict_function (outf, cf);
318 if (CF_NONTRIVIAL_P (cf))
319 {
320 tree last_iteration = SUB_LAST_CONFLICT (subscript);
321 fprintf (stream: outf, format: "\n last_conflict: ");
322 print_generic_expr (outf, last_iteration);
323 }
324
325 fprintf (stream: outf, format: "\n (Subscript distance: ");
326 print_generic_expr (outf, SUB_DISTANCE (subscript));
327 fprintf (stream: outf, format: " ))\n");
328}
329
330/* Print the classic direction vector DIRV to OUTF. */
331
332DEBUG_FUNCTION void
333print_direction_vector (FILE *outf,
334 lambda_vector dirv,
335 int length)
336{
337 int eq;
338
339 for (eq = 0; eq < length; eq++)
340 {
341 enum data_dependence_direction dir = ((enum data_dependence_direction)
342 dirv[eq]);
343
344 switch (dir)
345 {
346 case dir_positive:
347 fprintf (stream: outf, format: " +");
348 break;
349 case dir_negative:
350 fprintf (stream: outf, format: " -");
351 break;
352 case dir_equal:
353 fprintf (stream: outf, format: " =");
354 break;
355 case dir_positive_or_equal:
356 fprintf (stream: outf, format: " +=");
357 break;
358 case dir_positive_or_negative:
359 fprintf (stream: outf, format: " +-");
360 break;
361 case dir_negative_or_equal:
362 fprintf (stream: outf, format: " -=");
363 break;
364 case dir_star:
365 fprintf (stream: outf, format: " *");
366 break;
367 default:
368 fprintf (stream: outf, format: "indep");
369 break;
370 }
371 }
372 fprintf (stream: outf, format: "\n");
373}
374
375/* Print a vector of direction vectors. */
376
377DEBUG_FUNCTION void
378print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
379 int length)
380{
381 for (lambda_vector v : dir_vects)
382 print_direction_vector (outf, dirv: v, length);
383}
384
385/* Print out a vector VEC of length N to OUTFILE. */
386
387DEBUG_FUNCTION void
388print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
389{
390 int i;
391
392 for (i = 0; i < n; i++)
393 fprintf (stream: outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
394 fprintf (stream: outfile, format: "\n");
395}
396
397/* Print a vector of distance vectors. */
398
399DEBUG_FUNCTION void
400print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
401 int length)
402{
403 for (lambda_vector v : dist_vects)
404 print_lambda_vector (outfile: outf, vector: v, n: length);
405}
406
407/* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408
409DEBUG_FUNCTION void
410dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
411{
412 struct data_reference *dra, *drb;
413
414 fprintf (stream: outf, format: "(Data Dep: \n");
415
416 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
417 {
418 if (ddr)
419 {
420 dra = DDR_A (ddr);
421 drb = DDR_B (ddr);
422 if (dra)
423 dump_data_reference (outf, dr: dra);
424 else
425 fprintf (stream: outf, format: " (nil)\n");
426 if (drb)
427 dump_data_reference (outf, dr: drb);
428 else
429 fprintf (stream: outf, format: " (nil)\n");
430 }
431 fprintf (stream: outf, format: " (don't know)\n)\n");
432 return;
433 }
434
435 dra = DDR_A (ddr);
436 drb = DDR_B (ddr);
437 dump_data_reference (outf, dr: dra);
438 dump_data_reference (outf, dr: drb);
439
440 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
441 fprintf (stream: outf, format: " (no dependence)\n");
442
443 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
444 {
445 unsigned int i;
446 class loop *loopi;
447
448 subscript *sub;
449 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
450 {
451 fprintf (stream: outf, format: " access_fn_A: ");
452 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
453 fprintf (stream: outf, format: " access_fn_B: ");
454 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
455 dump_subscript (outf, subscript: sub);
456 }
457
458 fprintf (stream: outf, format: " loop nest: (");
459 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
460 fprintf (stream: outf, format: "%d ", loopi->num);
461 fprintf (stream: outf, format: ")\n");
462
463 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
464 {
465 fprintf (stream: outf, format: " distance_vector: ");
466 print_lambda_vector (outfile: outf, DDR_DIST_VECT (ddr, i),
467 DDR_NB_LOOPS (ddr));
468 }
469
470 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
471 {
472 fprintf (stream: outf, format: " direction_vector: ");
473 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
474 DDR_NB_LOOPS (ddr));
475 }
476 }
477
478 fprintf (stream: outf, format: ")\n");
479}
480
481/* Debug version. */
482
483DEBUG_FUNCTION void
484debug_data_dependence_relation (const struct data_dependence_relation *ddr)
485{
486 dump_data_dependence_relation (stderr, ddr);
487}
488
489/* Dump into FILE all the dependence relations from DDRS. */
490
491DEBUG_FUNCTION void
492dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
493{
494 for (auto ddr : ddrs)
495 dump_data_dependence_relation (outf: file, ddr);
496}
497
498DEBUG_FUNCTION void
499debug (vec<ddr_p> &ref)
500{
501 dump_data_dependence_relations (stderr, ddrs: ref);
502}
503
504DEBUG_FUNCTION void
505debug (vec<ddr_p> *ptr)
506{
507 if (ptr)
508 debug (ref&: *ptr);
509 else
510 fprintf (stderr, format: "<nil>\n");
511}
512
513
514/* Dump to STDERR all the dependence relations from DDRS. */
515
516DEBUG_FUNCTION void
517debug_data_dependence_relations (vec<ddr_p> ddrs)
518{
519 dump_data_dependence_relations (stderr, ddrs);
520}
521
522/* Dumps the distance and direction vectors in FILE. DDRS contains
523 the dependence relations, and VECT_SIZE is the size of the
524 dependence vectors, or in other words the number of loops in the
525 considered nest. */
526
527DEBUG_FUNCTION void
528dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
529{
530 for (data_dependence_relation *ddr : ddrs)
531 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
532 {
533 for (lambda_vector v : DDR_DIST_VECTS (ddr))
534 {
535 fprintf (stream: file, format: "DISTANCE_V (");
536 print_lambda_vector (outfile: file, vector: v, DDR_NB_LOOPS (ddr));
537 fprintf (stream: file, format: ")\n");
538 }
539
540 for (lambda_vector v : DDR_DIR_VECTS (ddr))
541 {
542 fprintf (stream: file, format: "DIRECTION_V (");
543 print_direction_vector (outf: file, dirv: v, DDR_NB_LOOPS (ddr));
544 fprintf (stream: file, format: ")\n");
545 }
546 }
547
548 fprintf (stream: file, format: "\n\n");
549}
550
551/* Dumps the data dependence relations DDRS in FILE. */
552
553DEBUG_FUNCTION void
554dump_ddrs (FILE *file, vec<ddr_p> ddrs)
555{
556 for (data_dependence_relation *ddr : ddrs)
557 dump_data_dependence_relation (outf: file, ddr);
558
559 fprintf (stream: file, format: "\n\n");
560}
561
562DEBUG_FUNCTION void
563debug_ddrs (vec<ddr_p> ddrs)
564{
565 dump_ddrs (stderr, ddrs);
566}
567
568/* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
569 OP0 CODE OP1, where:
570
571 - OP0 CODE OP1 has integral type TYPE
572 - the range of OP0 is given by OP0_RANGE and
573 - the range of OP1 is given by OP1_RANGE.
574
575 Independently of RESULT_RANGE, try to compute:
576
577 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
578 - (sizetype) (OP0 CODE OP1)
579
580 as a constant and subtract DELTA from the ssizetype constant in *OFF.
581 Return true on success, or false if DELTA is not known at compile time.
582
583 Truncation and sign changes are known to distribute over CODE, i.e.
584
585 (itype) (A CODE B) == (itype) A CODE (itype) B
586
587 for any integral type ITYPE whose precision is no greater than the
588 precision of A and B. */
589
590static bool
591compute_distributive_range (tree type, irange &op0_range,
592 tree_code code, irange &op1_range,
593 tree *off, irange *result_range)
594{
595 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
596 if (result_range)
597 {
598 range_op_handler op (code);
599 if (!op.fold_range (r&: *result_range, type, lh: op0_range, rh: op1_range))
600 result_range->set_varying (type);
601 }
602
603 /* The distributive property guarantees that if TYPE is no narrower
604 than SIZETYPE,
605
606 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
607
608 and so we can treat DELTA as zero. */
609 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
610 return true;
611
612 /* If overflow is undefined, we can assume that:
613
614 X == (ssizetype) OP0 CODE (ssizetype) OP1
615
616 is within the range of TYPE, i.e.:
617
618 X == (ssizetype) (TYPE) X
619
620 Distributing the (TYPE) truncation over X gives:
621
622 X == (ssizetype) (OP0 CODE OP1)
623
624 Casting both sides to sizetype and distributing the sizetype cast
625 over X gives:
626
627 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
628
629 and so we can treat DELTA as zero. */
630 if (TYPE_OVERFLOW_UNDEFINED (type))
631 return true;
632
633 /* Compute the range of:
634
635 (ssizetype) OP0 CODE (ssizetype) OP1
636
637 The distributive property guarantees that this has the same bitpattern as:
638
639 (sizetype) OP0 CODE (sizetype) OP1
640
641 but its range is more conducive to analysis. */
642 range_cast (r&: op0_range, ssizetype);
643 range_cast (r&: op1_range, ssizetype);
644 int_range_max wide_range;
645 range_op_handler op (code);
646 bool saved_flag_wrapv = flag_wrapv;
647 flag_wrapv = 1;
648 if (!op.fold_range (r&: wide_range, ssizetype, lh: op0_range, rh: op1_range))
649 wide_range.set_varying (ssizetype);;
650 flag_wrapv = saved_flag_wrapv;
651 if (wide_range.num_pairs () != 1
652 || wide_range.varying_p () || wide_range.undefined_p ())
653 return false;
654
655 wide_int lb = wide_range.lower_bound ();
656 wide_int ub = wide_range.upper_bound ();
657
658 /* Calculate the number of times that each end of the range overflows or
659 underflows TYPE. We can only calculate DELTA if the numbers match. */
660 unsigned int precision = TYPE_PRECISION (type);
661 if (!TYPE_UNSIGNED (type))
662 {
663 wide_int type_min = wi::mask (width: precision - 1, negate_p: true, precision: lb.get_precision ());
664 lb -= type_min;
665 ub -= type_min;
666 }
667 wide_int upper_bits = wi::mask (width: precision, negate_p: true, precision: lb.get_precision ());
668 lb &= upper_bits;
669 ub &= upper_bits;
670 if (lb != ub)
671 return false;
672
673 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
674 negative values indicating underflow. The low PRECISION bits of LB
675 are clear, so DELTA is therefore LB (== UB). */
676 *off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: *off) - lb);
677 return true;
678}
679
680/* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
681 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
682 FROM_TYPE are integral types. */
683
684static bool
685nop_conversion_for_offset_p (tree to_type, tree from_type, irange &range)
686{
687 gcc_assert (INTEGRAL_TYPE_P (to_type)
688 && INTEGRAL_TYPE_P (from_type)
689 && !TYPE_OVERFLOW_TRAPS (to_type)
690 && !TYPE_OVERFLOW_TRAPS (from_type));
691
692 /* Converting to something no narrower than sizetype and then to sizetype
693 is equivalent to converting directly to sizetype. */
694 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
695 return true;
696
697 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
698 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
699 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
700 return true;
701
702 /* For narrowing conversions, we could in principle test whether
703 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
704 and apply a constant adjustment.
705
706 For other conversions (which involve a sign change) we could
707 check that the signs are always equal, and apply a constant
708 adjustment if the signs are negative.
709
710 However, both cases should be rare. */
711 return range_fits_type_p (vr: &range, TYPE_PRECISION (to_type),
712 TYPE_SIGN (to_type));
713}
714
715static void
716split_constant_offset (tree type, tree *var, tree *off,
717 irange *result_range,
718 hash_map<tree, std::pair<tree, tree> > &cache,
719 unsigned *limit);
720
721/* Helper function for split_constant_offset. If TYPE is a pointer type,
722 try to express OP0 CODE OP1 as:
723
724 POINTER_PLUS <*VAR, (sizetype) *OFF>
725
726 where:
727
728 - *VAR has type TYPE
729 - *OFF is a constant of type ssizetype.
730
731 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
732
733 *VAR + (sizetype) *OFF
734
735 where:
736
737 - *VAR has type sizetype
738 - *OFF is a constant of type ssizetype.
739
740 In both cases, OP0 CODE OP1 has type TYPE.
741
742 Return true on success. A false return value indicates that we can't
743 do better than set *OFF to zero.
744
745 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
746 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
747
748 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
749 visited. LIMIT counts down the number of SSA names that we are
750 allowed to process before giving up. */
751
752static bool
753split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
754 tree *var, tree *off, irange *result_range,
755 hash_map<tree, std::pair<tree, tree> > &cache,
756 unsigned *limit)
757{
758 tree var0, var1;
759 tree off0, off1;
760 int_range_max op0_range, op1_range;
761
762 *var = NULL_TREE;
763 *off = NULL_TREE;
764
765 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
766 return false;
767
768 if (TREE_CODE (op0) == SSA_NAME
769 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
770 return false;
771 if (op1
772 && TREE_CODE (op1) == SSA_NAME
773 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
774 return false;
775
776 switch (code)
777 {
778 case INTEGER_CST:
779 *var = size_int (0);
780 *off = fold_convert (ssizetype, op0);
781 if (result_range)
782 {
783 wide_int w = wi::to_wide (t: op0);
784 result_range->set (TREE_TYPE (op0), w, w);
785 }
786 return true;
787
788 case POINTER_PLUS_EXPR:
789 split_constant_offset (type: op0, var: &var0, off: &off0, result_range: nullptr, cache, limit);
790 split_constant_offset (type: op1, var: &var1, off: &off1, result_range: nullptr, cache, limit);
791 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
792 *off = size_binop (PLUS_EXPR, off0, off1);
793 return true;
794
795 case PLUS_EXPR:
796 case MINUS_EXPR:
797 split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit);
798 split_constant_offset (type: op1, var: &var1, off: &off1, result_range: &op1_range, cache, limit);
799 *off = size_binop (code, off0, off1);
800 if (!compute_distributive_range (type, op0_range, code, op1_range,
801 off, result_range))
802 return false;
803 *var = fold_build2 (code, sizetype, var0, var1);
804 return true;
805
806 case MULT_EXPR:
807 if (TREE_CODE (op1) != INTEGER_CST)
808 return false;
809
810 split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit);
811 op1_range.set (TREE_TYPE (op1), wi::to_wide (t: op1), wi::to_wide (t: op1));
812 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
813 if (!compute_distributive_range (type, op0_range, code, op1_range,
814 off, result_range))
815 return false;
816 *var = fold_build2 (MULT_EXPR, sizetype, var0,
817 fold_convert (sizetype, op1));
818 return true;
819
820 case ADDR_EXPR:
821 {
822 tree base, poffset;
823 poly_int64 pbitsize, pbitpos, pbytepos;
824 machine_mode pmode;
825 int punsignedp, preversep, pvolatilep;
826
827 op0 = TREE_OPERAND (op0, 0);
828 base
829 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
830 &punsignedp, &preversep, &pvolatilep);
831
832 if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos))
833 return false;
834 base = build_fold_addr_expr (base);
835 off0 = ssize_int (pbytepos);
836
837 if (poffset)
838 {
839 split_constant_offset (type: poffset, var: &poffset, off: &off1, result_range: nullptr,
840 cache, limit);
841 off0 = size_binop (PLUS_EXPR, off0, off1);
842 base = fold_build_pointer_plus (base, poffset);
843 }
844
845 var0 = fold_convert (type, base);
846
847 /* If variable length types are involved, punt, otherwise casts
848 might be converted into ARRAY_REFs in gimplify_conversion.
849 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
850 possibly no longer appears in current GIMPLE, might resurface.
851 This perhaps could run
852 if (CONVERT_EXPR_P (var0))
853 {
854 gimplify_conversion (&var0);
855 // Attempt to fill in any within var0 found ARRAY_REF's
856 // element size from corresponding op embedded ARRAY_REF,
857 // if unsuccessful, just punt.
858 } */
859 while (POINTER_TYPE_P (type))
860 type = TREE_TYPE (type);
861 if (int_size_in_bytes (type) < 0)
862 return false;
863
864 *var = var0;
865 *off = off0;
866 return true;
867 }
868
869 case SSA_NAME:
870 {
871 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
872 enum tree_code subcode;
873
874 if (gimple_code (g: def_stmt) != GIMPLE_ASSIGN)
875 return false;
876
877 subcode = gimple_assign_rhs_code (gs: def_stmt);
878
879 /* We are using a cache to avoid un-CSEing large amounts of code. */
880 bool use_cache = false;
881 if (!has_single_use (var: op0)
882 && (subcode == POINTER_PLUS_EXPR
883 || subcode == PLUS_EXPR
884 || subcode == MINUS_EXPR
885 || subcode == MULT_EXPR
886 || subcode == ADDR_EXPR
887 || CONVERT_EXPR_CODE_P (subcode)))
888 {
889 use_cache = true;
890 bool existed;
891 std::pair<tree, tree> &e = cache.get_or_insert (k: op0, existed: &existed);
892 if (existed)
893 {
894 if (integer_zerop (e.second))
895 return false;
896 *var = e.first;
897 *off = e.second;
898 /* The caller sets the range in this case. */
899 return true;
900 }
901 e = std::make_pair (x&: op0, ssize_int (0));
902 }
903
904 if (*limit == 0)
905 return false;
906 --*limit;
907
908 var0 = gimple_assign_rhs1 (gs: def_stmt);
909 var1 = gimple_assign_rhs2 (gs: def_stmt);
910
911 bool res = split_constant_offset_1 (type, op0: var0, code: subcode, op1: var1,
912 var, off, result_range: nullptr, cache, limit);
913 if (res && use_cache)
914 *cache.get (k: op0) = std::make_pair (x&: *var, y&: *off);
915 /* The caller sets the range in this case. */
916 return res;
917 }
918 CASE_CONVERT:
919 {
920 /* We can only handle the following conversions:
921
922 - Conversions from one pointer type to another pointer type.
923
924 - Conversions from one non-trapping integral type to another
925 non-trapping integral type. In this case, the recursive
926 call makes sure that:
927
928 (sizetype) OP0
929
930 can be expressed as a sizetype operation involving VAR and OFF,
931 and all we need to do is check whether:
932
933 (sizetype) OP0 == (sizetype) (TYPE) OP0
934
935 - Conversions from a non-trapping sizetype-size integral type to
936 a like-sized pointer type. In this case, the recursive call
937 makes sure that:
938
939 (sizetype) OP0 == *VAR + (sizetype) *OFF
940
941 and we can convert that to:
942
943 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
944
945 - Conversions from a sizetype-sized pointer type to a like-sized
946 non-trapping integral type. In this case, the recursive call
947 makes sure that:
948
949 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
950
951 where the POINTER_PLUS and *VAR have the same precision as
952 TYPE (and the same precision as sizetype). Then:
953
954 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
955 tree itype = TREE_TYPE (op0);
956 if ((POINTER_TYPE_P (itype)
957 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
958 && (POINTER_TYPE_P (type)
959 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
960 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
961 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
962 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
963 {
964 if (POINTER_TYPE_P (type))
965 {
966 split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit);
967 *var = fold_convert (type, *var);
968 }
969 else if (POINTER_TYPE_P (itype))
970 {
971 split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit);
972 *var = fold_convert (sizetype, *var);
973 }
974 else
975 {
976 split_constant_offset (type: op0, var, off, result_range: &op0_range,
977 cache, limit);
978 if (!nop_conversion_for_offset_p (to_type: type, from_type: itype, range&: op0_range))
979 return false;
980 if (result_range)
981 {
982 *result_range = op0_range;
983 range_cast (r&: *result_range, type);
984 }
985 }
986 return true;
987 }
988 return false;
989 }
990
991 default:
992 return false;
993 }
994}
995
996/* If EXP has pointer type, try to express it as:
997
998 POINTER_PLUS <*VAR, (sizetype) *OFF>
999
1000 where:
1001
1002 - *VAR has the same type as EXP
1003 - *OFF is a constant of type ssizetype.
1004
1005 If EXP has an integral type, try to express (sizetype) EXP as:
1006
1007 *VAR + (sizetype) *OFF
1008
1009 where:
1010
1011 - *VAR has type sizetype
1012 - *OFF is a constant of type ssizetype.
1013
1014 If EXP_RANGE is nonnull, set it to the range of EXP.
1015
1016 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1017 visited. LIMIT counts down the number of SSA names that we are
1018 allowed to process before giving up. */
1019
1020static void
1021split_constant_offset (tree exp, tree *var, tree *off, irange *exp_range,
1022 hash_map<tree, std::pair<tree, tree> > &cache,
1023 unsigned *limit)
1024{
1025 tree type = TREE_TYPE (exp), op0, op1;
1026 enum tree_code code;
1027
1028 code = TREE_CODE (exp);
1029 if (exp_range)
1030 {
1031 exp_range->set_varying (type);
1032 if (code == SSA_NAME)
1033 {
1034 int_range_max vr;
1035 get_range_query (cfun)->range_of_expr (r&: vr, expr: exp);
1036 if (vr.undefined_p ())
1037 vr.set_varying (TREE_TYPE (exp));
1038 tree vr_min, vr_max;
1039 value_range_kind vr_kind = get_legacy_range (vr, min&: vr_min, max&: vr_max);
1040 wide_int var_min = wi::to_wide (t: vr_min);
1041 wide_int var_max = wi::to_wide (t: vr_max);
1042 wide_int var_nonzero = get_nonzero_bits (exp);
1043 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1044 &var_min, &var_max,
1045 var_nonzero,
1046 TYPE_SIGN (type));
1047 /* This check for VR_VARYING is here because the old code
1048 using get_range_info would return VR_RANGE for the entire
1049 domain, instead of VR_VARYING. The new code normalizes
1050 full-domain ranges to VR_VARYING. */
1051 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1052 exp_range->set (type, var_min, var_max);
1053 }
1054 }
1055
1056 if (!tree_is_chrec (expr: exp)
1057 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1058 {
1059 extract_ops_from_tree (expr: exp, code: &code, op0: &op0, op1: &op1);
1060 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1061 result_range: exp_range, cache, limit))
1062 return;
1063 }
1064
1065 *var = exp;
1066 if (INTEGRAL_TYPE_P (type))
1067 *var = fold_convert (sizetype, *var);
1068 *off = ssize_int (0);
1069
1070 int_range_max r;
1071 if (exp_range && code != SSA_NAME
1072 && get_range_query (cfun)->range_of_expr (r, expr: exp)
1073 && !r.undefined_p ())
1074 *exp_range = r;
1075}
1076
1077/* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1078 type as EXP while OFF has type ssizetype. */
1079
1080void
1081split_constant_offset (tree exp, tree *var, tree *off)
1082{
1083 unsigned limit = param_ssa_name_def_chain_limit;
1084 static hash_map<tree, std::pair<tree, tree> > *cache;
1085 if (!cache)
1086 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1087 split_constant_offset (exp, var, off, exp_range: nullptr, cache&: *cache, limit: &limit);
1088 *var = fold_convert (TREE_TYPE (exp), *var);
1089 cache->empty ();
1090}
1091
1092/* Returns the address ADDR of an object in a canonical shape (without nop
1093 casts, and with type of pointer to the object). */
1094
1095static tree
1096canonicalize_base_object_address (tree addr)
1097{
1098 tree orig = addr;
1099
1100 STRIP_NOPS (addr);
1101
1102 /* The base address may be obtained by casting from integer, in that case
1103 keep the cast. */
1104 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1105 return orig;
1106
1107 if (TREE_CODE (addr) != ADDR_EXPR)
1108 return addr;
1109
1110 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1111}
1112
1113/* Analyze the behavior of memory reference REF within STMT.
1114 There are two modes:
1115
1116 - BB analysis. In this case we simply split the address into base,
1117 init and offset components, without reference to any containing loop.
1118 The resulting base and offset are general expressions and they can
1119 vary arbitrarily from one iteration of the containing loop to the next.
1120 The step is always zero.
1121
1122 - loop analysis. In this case we analyze the reference both wrt LOOP
1123 and on the basis that the reference occurs (is "used") in LOOP;
1124 see the comment above analyze_scalar_evolution_in_loop for more
1125 information about this distinction. The base, init, offset and
1126 step fields are all invariant in LOOP.
1127
1128 Perform BB analysis if LOOP is null, or if LOOP is the function's
1129 dummy outermost loop. In other cases perform loop analysis.
1130
1131 Return true if the analysis succeeded and store the results in DRB if so.
1132 BB analysis can only fail for bitfield or reversed-storage accesses. */
1133
1134opt_result
1135dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1136 class loop *loop, const gimple *stmt)
1137{
1138 poly_int64 pbitsize, pbitpos;
1139 tree base, poffset;
1140 machine_mode pmode;
1141 int punsignedp, preversep, pvolatilep;
1142 affine_iv base_iv, offset_iv;
1143 tree init, dinit, step;
1144 bool in_loop = (loop && loop->num);
1145
1146 if (dump_file && (dump_flags & TDF_DETAILS))
1147 fprintf (stream: dump_file, format: "analyze_innermost: ");
1148
1149 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1150 &punsignedp, &preversep, &pvolatilep);
1151 gcc_assert (base != NULL_TREE);
1152
1153 poly_int64 pbytepos;
1154 if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos))
1155 return opt_result::failure_at (loc: stmt,
1156 fmt: "failed: bit offset alignment.\n");
1157
1158 if (preversep)
1159 return opt_result::failure_at (loc: stmt,
1160 fmt: "failed: reverse storage order.\n");
1161
1162 /* Calculate the alignment and misalignment for the inner reference. */
1163 unsigned int HOST_WIDE_INT bit_base_misalignment;
1164 unsigned int bit_base_alignment;
1165 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1166
1167 /* There are no bitfield references remaining in BASE, so the values
1168 we got back must be whole bytes. */
1169 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1170 && bit_base_misalignment % BITS_PER_UNIT == 0);
1171 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1172 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1173
1174 if (TREE_CODE (base) == MEM_REF)
1175 {
1176 if (!integer_zerop (TREE_OPERAND (base, 1)))
1177 {
1178 /* Subtract MOFF from the base and add it to POFFSET instead.
1179 Adjust the misalignment to reflect the amount we subtracted. */
1180 poly_offset_int moff = mem_ref_offset (base);
1181 base_misalignment -= moff.force_shwi ();
1182 tree mofft = wide_int_to_tree (sizetype, cst: moff);
1183 if (!poffset)
1184 poffset = mofft;
1185 else
1186 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1187 }
1188 base = TREE_OPERAND (base, 0);
1189 }
1190 else
1191 {
1192 if (may_be_nonaddressable_p (expr: base))
1193 return opt_result::failure_at (loc: stmt,
1194 fmt: "failed: base not addressable.\n");
1195 base = build_fold_addr_expr (base);
1196 }
1197
1198 if (in_loop)
1199 {
1200 if (!simple_iv (loop, loop, base, &base_iv, true))
1201 return opt_result::failure_at
1202 (loc: stmt, fmt: "failed: evolution of base is not affine.\n");
1203 }
1204 else
1205 {
1206 base_iv.base = base;
1207 base_iv.step = ssize_int (0);
1208 base_iv.no_overflow = true;
1209 }
1210
1211 if (!poffset)
1212 {
1213 offset_iv.base = ssize_int (0);
1214 offset_iv.step = ssize_int (0);
1215 }
1216 else
1217 {
1218 if (!in_loop)
1219 {
1220 offset_iv.base = poffset;
1221 offset_iv.step = ssize_int (0);
1222 }
1223 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1224 return opt_result::failure_at
1225 (loc: stmt, fmt: "failed: evolution of offset is not affine.\n");
1226 }
1227
1228 init = ssize_int (pbytepos);
1229
1230 /* Subtract any constant component from the base and add it to INIT instead.
1231 Adjust the misalignment to reflect the amount we subtracted. */
1232 split_constant_offset (exp: base_iv.base, var: &base_iv.base, off: &dinit);
1233 init = size_binop (PLUS_EXPR, init, dinit);
1234 base_misalignment -= TREE_INT_CST_LOW (dinit);
1235
1236 split_constant_offset (exp: offset_iv.base, var: &offset_iv.base, off: &dinit);
1237 init = size_binop (PLUS_EXPR, init, dinit);
1238
1239 step = size_binop (PLUS_EXPR,
1240 fold_convert (ssizetype, base_iv.step),
1241 fold_convert (ssizetype, offset_iv.step));
1242
1243 base = canonicalize_base_object_address (addr: base_iv.base);
1244
1245 /* See if get_pointer_alignment can guarantee a higher alignment than
1246 the one we calculated above. */
1247 unsigned int HOST_WIDE_INT alt_misalignment;
1248 unsigned int alt_alignment;
1249 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1250
1251 /* As above, these values must be whole bytes. */
1252 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1253 && alt_misalignment % BITS_PER_UNIT == 0);
1254 alt_alignment /= BITS_PER_UNIT;
1255 alt_misalignment /= BITS_PER_UNIT;
1256
1257 if (base_alignment < alt_alignment)
1258 {
1259 base_alignment = alt_alignment;
1260 base_misalignment = alt_misalignment;
1261 }
1262
1263 drb->base_address = base;
1264 drb->offset = fold_convert (ssizetype, offset_iv.base);
1265 drb->init = init;
1266 drb->step = step;
1267 if (known_misalignment (value: base_misalignment, align: base_alignment,
1268 misalign: &drb->base_misalignment))
1269 drb->base_alignment = base_alignment;
1270 else
1271 {
1272 drb->base_alignment = known_alignment (a: base_misalignment);
1273 drb->base_misalignment = 0;
1274 }
1275 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1276 drb->step_alignment = highest_pow2_factor (step);
1277
1278 if (dump_file && (dump_flags & TDF_DETAILS))
1279 fprintf (stream: dump_file, format: "success.\n");
1280
1281 return opt_result::success ();
1282}
1283
1284/* Return true if OP is a valid component reference for a DR access
1285 function. This accepts a subset of what handled_component_p accepts. */
1286
1287static bool
1288access_fn_component_p (tree op)
1289{
1290 switch (TREE_CODE (op))
1291 {
1292 case REALPART_EXPR:
1293 case IMAGPART_EXPR:
1294 case ARRAY_REF:
1295 return true;
1296
1297 case COMPONENT_REF:
1298 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1299
1300 default:
1301 return false;
1302 }
1303}
1304
1305/* Returns whether BASE can have a access_fn_component_p with BASE
1306 as base. */
1307
1308static bool
1309base_supports_access_fn_components_p (tree base)
1310{
1311 switch (TREE_CODE (TREE_TYPE (base)))
1312 {
1313 case COMPLEX_TYPE:
1314 case ARRAY_TYPE:
1315 case RECORD_TYPE:
1316 return true;
1317 default:
1318 return false;
1319 }
1320}
1321
1322/* Determines the base object and the list of indices of memory reference
1323 DR, analyzed in LOOP and instantiated before NEST. */
1324
1325static void
1326dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1327{
1328 /* If analyzing a basic-block there are no indices to analyze
1329 and thus no access functions. */
1330 if (!nest)
1331 {
1332 dri->base_object = ref;
1333 dri->access_fns.create (nelems: 0);
1334 return;
1335 }
1336
1337 vec<tree> access_fns = vNULL;
1338
1339 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1340 into a two element array with a constant index. The base is
1341 then just the immediate underlying object. */
1342 if (TREE_CODE (ref) == REALPART_EXPR)
1343 {
1344 ref = TREE_OPERAND (ref, 0);
1345 access_fns.safe_push (integer_zero_node);
1346 }
1347 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1348 {
1349 ref = TREE_OPERAND (ref, 0);
1350 access_fns.safe_push (integer_one_node);
1351 }
1352
1353 /* Analyze access functions of dimensions we know to be independent.
1354 The list of component references handled here should be kept in
1355 sync with access_fn_component_p. */
1356 while (handled_component_p (t: ref))
1357 {
1358 if (TREE_CODE (ref) == ARRAY_REF)
1359 {
1360 tree op = TREE_OPERAND (ref, 1);
1361 tree access_fn = analyze_scalar_evolution (loop, op);
1362 access_fn = instantiate_scev (nest, loop, access_fn);
1363 access_fns.safe_push (obj: access_fn);
1364 }
1365 else if (TREE_CODE (ref) == COMPONENT_REF
1366 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1367 {
1368 /* For COMPONENT_REFs of records (but not unions!) use the
1369 FIELD_DECL offset as constant access function so we can
1370 disambiguate a[i].f1 and a[i].f2. */
1371 tree off = component_ref_field_offset (ref);
1372 off = size_binop (PLUS_EXPR,
1373 size_binop (MULT_EXPR,
1374 fold_convert (bitsizetype, off),
1375 bitsize_int (BITS_PER_UNIT)),
1376 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1377 access_fns.safe_push (obj: off);
1378 }
1379 else
1380 /* If we have an unhandled component we could not translate
1381 to an access function stop analyzing. We have determined
1382 our base object in this case. */
1383 break;
1384
1385 ref = TREE_OPERAND (ref, 0);
1386 }
1387
1388 /* If the address operand of a MEM_REF base has an evolution in the
1389 analyzed nest, add it as an additional independent access-function. */
1390 if (TREE_CODE (ref) == MEM_REF)
1391 {
1392 tree op = TREE_OPERAND (ref, 0);
1393 tree access_fn = analyze_scalar_evolution (loop, op);
1394 access_fn = instantiate_scev (nest, loop, access_fn);
1395 STRIP_NOPS (access_fn);
1396 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1397 {
1398 tree memoff = TREE_OPERAND (ref, 1);
1399 tree base = initial_condition (access_fn);
1400 tree orig_type = TREE_TYPE (base);
1401 STRIP_USELESS_TYPE_CONVERSION (base);
1402 tree off;
1403 split_constant_offset (exp: base, var: &base, off: &off);
1404 STRIP_USELESS_TYPE_CONVERSION (base);
1405 /* Fold the MEM_REF offset into the evolutions initial
1406 value to make more bases comparable. */
1407 if (!integer_zerop (memoff))
1408 {
1409 off = size_binop (PLUS_EXPR, off,
1410 fold_convert (ssizetype, memoff));
1411 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1412 }
1413 /* Adjust the offset so it is a multiple of the access type
1414 size and thus we separate bases that can possibly be used
1415 to produce partial overlaps (which the access_fn machinery
1416 cannot handle). */
1417 wide_int rem;
1418 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1419 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1420 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1421 rem = wi::mod_trunc
1422 (x: wi::to_wide (t: off),
1423 y: wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1424 sgn: SIGNED);
1425 else
1426 /* If we can't compute the remainder simply force the initial
1427 condition to zero. */
1428 rem = wi::to_wide (t: off);
1429 off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: off) - rem);
1430 memoff = wide_int_to_tree (TREE_TYPE (memoff), cst: rem);
1431 /* And finally replace the initial condition. */
1432 access_fn = chrec_replace_initial_condition
1433 (access_fn, fold_convert (orig_type, off));
1434 /* ??? This is still not a suitable base object for
1435 dr_may_alias_p - the base object needs to be an
1436 access that covers the object as whole. With
1437 an evolution in the pointer this cannot be
1438 guaranteed.
1439 As a band-aid, mark the access so we can special-case
1440 it in dr_may_alias_p. */
1441 tree old = ref;
1442 ref = fold_build2_loc (EXPR_LOCATION (ref),
1443 MEM_REF, TREE_TYPE (ref),
1444 base, memoff);
1445 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1446 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1447 dri->unconstrained_base = true;
1448 access_fns.safe_push (obj: access_fn);
1449 }
1450 }
1451 else if (DECL_P (ref))
1452 {
1453 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1454 ref = build2 (MEM_REF, TREE_TYPE (ref),
1455 build_fold_addr_expr (ref),
1456 build_int_cst (reference_alias_ptr_type (ref), 0));
1457 }
1458
1459 dri->base_object = ref;
1460 dri->access_fns = access_fns;
1461}
1462
1463/* Extracts the alias analysis information from the memory reference DR. */
1464
1465static void
1466dr_analyze_alias (struct data_reference *dr)
1467{
1468 tree ref = DR_REF (dr);
1469 tree base = get_base_address (t: ref), addr;
1470
1471 if (INDIRECT_REF_P (base)
1472 || TREE_CODE (base) == MEM_REF)
1473 {
1474 addr = TREE_OPERAND (base, 0);
1475 if (TREE_CODE (addr) == SSA_NAME)
1476 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1477 }
1478}
1479
1480/* Frees data reference DR. */
1481
1482void
1483free_data_ref (data_reference_p dr)
1484{
1485 DR_ACCESS_FNS (dr).release ();
1486 if (dr->alt_indices.base_object)
1487 dr->alt_indices.access_fns.release ();
1488 free (ptr: dr);
1489}
1490
1491/* Analyze memory reference MEMREF, which is accessed in STMT.
1492 The reference is a read if IS_READ is true, otherwise it is a write.
1493 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1494 within STMT, i.e. that it might not occur even if STMT is executed
1495 and runs to completion.
1496
1497 Return the data_reference description of MEMREF. NEST is the outermost
1498 loop in which the reference should be instantiated, LOOP is the loop
1499 in which the data reference should be analyzed. */
1500
1501struct data_reference *
1502create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1503 bool is_read, bool is_conditional_in_stmt)
1504{
1505 struct data_reference *dr;
1506
1507 if (dump_file && (dump_flags & TDF_DETAILS))
1508 {
1509 fprintf (stream: dump_file, format: "Creating dr for ");
1510 print_generic_expr (dump_file, memref, TDF_SLIM);
1511 fprintf (stream: dump_file, format: "\n");
1512 }
1513
1514 dr = XCNEW (struct data_reference);
1515 DR_STMT (dr) = stmt;
1516 DR_REF (dr) = memref;
1517 DR_IS_READ (dr) = is_read;
1518 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1519
1520 dr_analyze_innermost (drb: &DR_INNERMOST (dr), ref: memref,
1521 loop: nest != NULL ? loop : NULL, stmt);
1522 dr_analyze_indices (dri: &dr->indices, DR_REF (dr), nest, loop);
1523 dr_analyze_alias (dr);
1524
1525 if (dump_file && (dump_flags & TDF_DETAILS))
1526 {
1527 unsigned i;
1528 fprintf (stream: dump_file, format: "\tbase_address: ");
1529 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1530 fprintf (stream: dump_file, format: "\n\toffset from base address: ");
1531 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1532 fprintf (stream: dump_file, format: "\n\tconstant offset from base address: ");
1533 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1534 fprintf (stream: dump_file, format: "\n\tstep: ");
1535 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1536 fprintf (stream: dump_file, format: "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1537 fprintf (stream: dump_file, format: "\n\tbase misalignment: %d",
1538 DR_BASE_MISALIGNMENT (dr));
1539 fprintf (stream: dump_file, format: "\n\toffset alignment: %d",
1540 DR_OFFSET_ALIGNMENT (dr));
1541 fprintf (stream: dump_file, format: "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1542 fprintf (stream: dump_file, format: "\n\tbase_object: ");
1543 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1544 fprintf (stream: dump_file, format: "\n");
1545 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1546 {
1547 fprintf (stream: dump_file, format: "\tAccess function %d: ", i);
1548 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1549 }
1550 }
1551
1552 return dr;
1553}
1554
1555/* A helper function computes order between two tree expressions T1 and T2.
1556 This is used in comparator functions sorting objects based on the order
1557 of tree expressions. The function returns -1, 0, or 1. */
1558
1559int
1560data_ref_compare_tree (tree t1, tree t2)
1561{
1562 int i, cmp;
1563 enum tree_code code;
1564 char tclass;
1565
1566 if (t1 == t2)
1567 return 0;
1568 if (t1 == NULL)
1569 return -1;
1570 if (t2 == NULL)
1571 return 1;
1572
1573 STRIP_USELESS_TYPE_CONVERSION (t1);
1574 STRIP_USELESS_TYPE_CONVERSION (t2);
1575 if (t1 == t2)
1576 return 0;
1577
1578 if (TREE_CODE (t1) != TREE_CODE (t2)
1579 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1580 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1581
1582 code = TREE_CODE (t1);
1583 switch (code)
1584 {
1585 case INTEGER_CST:
1586 return tree_int_cst_compare (t1, t2);
1587
1588 case STRING_CST:
1589 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1590 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1591 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1592 TREE_STRING_LENGTH (t1));
1593
1594 case SSA_NAME:
1595 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1596 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1597 break;
1598
1599 default:
1600 if (POLY_INT_CST_P (t1))
1601 return compare_sizes_for_sort (a: wi::to_poly_widest (t: t1),
1602 b: wi::to_poly_widest (t: t2));
1603
1604 tclass = TREE_CODE_CLASS (code);
1605
1606 /* For decls, compare their UIDs. */
1607 if (tclass == tcc_declaration)
1608 {
1609 if (DECL_UID (t1) != DECL_UID (t2))
1610 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1611 break;
1612 }
1613 /* For expressions, compare their operands recursively. */
1614 else if (IS_EXPR_CODE_CLASS (tclass))
1615 {
1616 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1617 {
1618 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1619 TREE_OPERAND (t2, i));
1620 if (cmp != 0)
1621 return cmp;
1622 }
1623 }
1624 else
1625 gcc_unreachable ();
1626 }
1627
1628 return 0;
1629}
1630
1631/* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1632 check. */
1633
1634opt_result
1635runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1636{
1637 if (dump_enabled_p ())
1638 dump_printf (MSG_NOTE,
1639 "consider run-time aliasing test between %T and %T\n",
1640 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1641
1642 if (!speed_p)
1643 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1644 fmt: "runtime alias check not supported when"
1645 " optimizing for size.\n");
1646
1647 /* FORNOW: We don't support versioning with outer-loop in either
1648 vectorization or loop distribution. */
1649 if (loop != NULL && loop->inner != NULL)
1650 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1651 fmt: "runtime alias check not supported for"
1652 " outer loop.\n");
1653
1654 /* FORNOW: We don't support handling different address spaces. */
1655 if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1656 != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1657 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1658 fmt: "runtime alias check between different "
1659 "address spaces not supported.\n");
1660
1661 return opt_result::success ();
1662}
1663
1664/* Operator == between two dr_with_seg_len objects.
1665
1666 This equality operator is used to make sure two data refs
1667 are the same one so that we will consider to combine the
1668 aliasing checks of those two pairs of data dependent data
1669 refs. */
1670
1671static bool
1672operator == (const dr_with_seg_len& d1,
1673 const dr_with_seg_len& d2)
1674{
1675 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1676 DR_BASE_ADDRESS (d2.dr), flags: 0)
1677 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1678 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1679 && data_ref_compare_tree (t1: d1.seg_len, t2: d2.seg_len) == 0
1680 && known_eq (d1.access_size, d2.access_size)
1681 && d1.align == d2.align);
1682}
1683
1684/* Comparison function for sorting objects of dr_with_seg_len_pair_t
1685 so that we can combine aliasing checks in one scan. */
1686
1687static int
1688comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1689{
1690 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1691 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1692 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1693 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1694
1695 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1696 if a and c have the same basic address snd step, and b and d have the same
1697 address and step. Therefore, if any a&c or b&d don't have the same address
1698 and step, we don't care the order of those two pairs after sorting. */
1699 int comp_res;
1700
1701 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1702 DR_BASE_ADDRESS (b1.dr))) != 0)
1703 return comp_res;
1704 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1705 DR_BASE_ADDRESS (b2.dr))) != 0)
1706 return comp_res;
1707 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1708 DR_STEP (b1.dr))) != 0)
1709 return comp_res;
1710 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1711 DR_STEP (b2.dr))) != 0)
1712 return comp_res;
1713 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1714 DR_OFFSET (b1.dr))) != 0)
1715 return comp_res;
1716 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1717 DR_INIT (b1.dr))) != 0)
1718 return comp_res;
1719 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1720 DR_OFFSET (b2.dr))) != 0)
1721 return comp_res;
1722 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1723 DR_INIT (b2.dr))) != 0)
1724 return comp_res;
1725
1726 return 0;
1727}
1728
1729/* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1730
1731static void
1732dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1733{
1734 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1735 DR_REF (alias_pair->first.dr),
1736 DR_REF (alias_pair->second.dr));
1737
1738 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1739 alias_pair->first.seg_len);
1740 if (!operand_equal_p (alias_pair->first.seg_len,
1741 alias_pair->second.seg_len, flags: 0))
1742 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1743
1744 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1745 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1746 if (maybe_ne (a: alias_pair->first.access_size, b: alias_pair->second.access_size))
1747 {
1748 dump_printf (MSG_NOTE, " vs. ");
1749 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1750 }
1751
1752 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1753 alias_pair->first.align);
1754 if (alias_pair->first.align != alias_pair->second.align)
1755 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1756
1757 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1758 if (alias_pair->flags & DR_ALIAS_RAW)
1759 dump_printf (MSG_NOTE, " RAW");
1760 if (alias_pair->flags & DR_ALIAS_WAR)
1761 dump_printf (MSG_NOTE, " WAR");
1762 if (alias_pair->flags & DR_ALIAS_WAW)
1763 dump_printf (MSG_NOTE, " WAW");
1764 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1765 dump_printf (MSG_NOTE, " ARBITRARY");
1766 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1767 dump_printf (MSG_NOTE, " SWAPPED");
1768 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1769 dump_printf (MSG_NOTE, " UNSWAPPED");
1770 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1771 dump_printf (MSG_NOTE, " MIXED_STEPS");
1772 if (alias_pair->flags == 0)
1773 dump_printf (MSG_NOTE, " <none>");
1774 dump_printf (MSG_NOTE, "\n");
1775}
1776
1777/* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1778 FACTOR is number of iterations that each data reference is accessed.
1779
1780 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1781 we create an expression:
1782
1783 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1785
1786 for aliasing checks. However, in some cases we can decrease the number
1787 of checks by combining two checks into one. For example, suppose we have
1788 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1789 condition is satisfied:
1790
1791 load_ptr_0 < load_ptr_1 &&
1792 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1793
1794 (this condition means, in each iteration of vectorized loop, the accessed
1795 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1796 load_ptr_1.)
1797
1798 we then can use only the following expression to finish the alising checks
1799 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1800
1801 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1802 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1803
1804 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1805 basic address. */
1806
1807void
1808prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1809 poly_uint64)
1810{
1811 if (alias_pairs->is_empty ())
1812 return;
1813
1814 /* Canonicalize each pair so that the base components are ordered wrt
1815 data_ref_compare_tree. This allows the loop below to merge more
1816 cases. */
1817 unsigned int i;
1818 dr_with_seg_len_pair_t *alias_pair;
1819 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1820 {
1821 data_reference_p dr_a = alias_pair->first.dr;
1822 data_reference_p dr_b = alias_pair->second.dr;
1823 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1824 DR_BASE_ADDRESS (dr_b));
1825 if (comp_res == 0)
1826 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1827 if (comp_res == 0)
1828 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1829 if (comp_res > 0)
1830 {
1831 std::swap (a&: alias_pair->first, b&: alias_pair->second);
1832 alias_pair->flags |= DR_ALIAS_SWAPPED;
1833 }
1834 else
1835 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1836 }
1837
1838 /* Sort the collected data ref pairs so that we can scan them once to
1839 combine all possible aliasing checks. */
1840 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1841
1842 /* Scan the sorted dr pairs and check if we can combine alias checks
1843 of two neighboring dr pairs. */
1844 unsigned int last = 0;
1845 for (i = 1; i < alias_pairs->length (); ++i)
1846 {
1847 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1848 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1849 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1850
1851 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1852 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1853 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1854 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1855
1856 /* Remove duplicate data ref pairs. */
1857 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1858 {
1859 if (dump_enabled_p ())
1860 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1861 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1862 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1863 alias_pair1->flags |= alias_pair2->flags;
1864 continue;
1865 }
1866
1867 /* Assume that we won't be able to merge the pairs, then correct
1868 if we do. */
1869 last += 1;
1870 if (last != i)
1871 (*alias_pairs)[last] = (*alias_pairs)[i];
1872
1873 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1874 {
1875 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1876 and DR_A1 and DR_A2 are two consecutive memrefs. */
1877 if (*dr_a1 == *dr_a2)
1878 {
1879 std::swap (a&: dr_a1, b&: dr_b1);
1880 std::swap (a&: dr_a2, b&: dr_b2);
1881 }
1882
1883 poly_int64 init_a1, init_a2;
1884 /* Only consider cases in which the distance between the initial
1885 DR_A1 and the initial DR_A2 is known at compile time. */
1886 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1887 DR_BASE_ADDRESS (dr_a2->dr), flags: 0)
1888 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1889 DR_OFFSET (dr_a2->dr), flags: 0)
1890 || !poly_int_tree_p (DR_INIT (dr_a1->dr), value: &init_a1)
1891 || !poly_int_tree_p (DR_INIT (dr_a2->dr), value: &init_a2))
1892 continue;
1893
1894 /* Don't combine if we can't tell which one comes first. */
1895 if (!ordered_p (a: init_a1, b: init_a2))
1896 continue;
1897
1898 /* Work out what the segment length would be if we did combine
1899 DR_A1 and DR_A2:
1900
1901 - If DR_A1 and DR_A2 have equal lengths, that length is
1902 also the combined length.
1903
1904 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1905 length is the lower bound on those lengths.
1906
1907 - If DR_A1 and DR_A2 both have positive lengths, the combined
1908 length is the upper bound on those lengths.
1909
1910 Other cases are unlikely to give a useful combination.
1911
1912 The lengths both have sizetype, so the sign is taken from
1913 the step instead. */
1914 poly_uint64 new_seg_len = 0;
1915 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1916 dr_a2->seg_len, flags: 0);
1917 if (new_seg_len_p)
1918 {
1919 poly_uint64 seg_len_a1, seg_len_a2;
1920 if (!poly_int_tree_p (t: dr_a1->seg_len, value: &seg_len_a1)
1921 || !poly_int_tree_p (t: dr_a2->seg_len, value: &seg_len_a2))
1922 continue;
1923
1924 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1925 if (TREE_CODE (indicator_a) != INTEGER_CST)
1926 continue;
1927
1928 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1929 if (TREE_CODE (indicator_b) != INTEGER_CST)
1930 continue;
1931
1932 int sign_a = tree_int_cst_sgn (indicator_a);
1933 int sign_b = tree_int_cst_sgn (indicator_b);
1934
1935 if (sign_a <= 0 && sign_b <= 0)
1936 new_seg_len = lower_bound (a: seg_len_a1, b: seg_len_a2);
1937 else if (sign_a >= 0 && sign_b >= 0)
1938 new_seg_len = upper_bound (a: seg_len_a1, b: seg_len_a2);
1939 else
1940 continue;
1941 }
1942 /* At this point we're committed to merging the refs. */
1943
1944 /* Make sure dr_a1 starts left of dr_a2. */
1945 if (maybe_gt (init_a1, init_a2))
1946 {
1947 std::swap (a&: *dr_a1, b&: *dr_a2);
1948 std::swap (a&: init_a1, b&: init_a2);
1949 }
1950
1951 /* The DR_Bs are equal, so only the DR_As can introduce
1952 mixed steps. */
1953 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), flags: 0))
1954 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1955
1956 if (new_seg_len_p)
1957 {
1958 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1959 new_seg_len);
1960 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1961 }
1962
1963 /* This is always positive due to the swap above. */
1964 poly_uint64 diff = init_a2 - init_a1;
1965
1966 /* The new check will start at DR_A1. Make sure that its access
1967 size encompasses the initial DR_A2. */
1968 if (maybe_lt (a: dr_a1->access_size, b: diff + dr_a2->access_size))
1969 {
1970 dr_a1->access_size = upper_bound (a: dr_a1->access_size,
1971 b: diff + dr_a2->access_size);
1972 unsigned int new_align = known_alignment (a: dr_a1->access_size);
1973 dr_a1->align = MIN (dr_a1->align, new_align);
1974 }
1975 if (dump_enabled_p ())
1976 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1977 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1978 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1979 alias_pair1->flags |= alias_pair2->flags;
1980 last -= 1;
1981 }
1982 }
1983 alias_pairs->truncate (size: last + 1);
1984
1985 /* Try to restore the original dr_with_seg_len order within each
1986 dr_with_seg_len_pair_t. If we ended up combining swapped and
1987 unswapped pairs into the same check, we have to invalidate any
1988 RAW, WAR and WAW information for it. */
1989 if (dump_enabled_p ())
1990 dump_printf (MSG_NOTE, "merged alias checks:\n");
1991 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1992 {
1993 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1994 unsigned int swapped = (alias_pair->flags & swap_mask);
1995 if (swapped == DR_ALIAS_SWAPPED)
1996 std::swap (a&: alias_pair->first, b&: alias_pair->second);
1997 else if (swapped != DR_ALIAS_UNSWAPPED)
1998 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1999 alias_pair->flags &= ~swap_mask;
2000 if (dump_enabled_p ())
2001 dump_alias_pair (alias_pair, indent: " ");
2002 }
2003}
2004
2005/* A subroutine of create_intersect_range_checks, with a subset of the
2006 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2007 to optimize cases in which the references form a simple RAW, WAR or
2008 WAR dependence. */
2009
2010static bool
2011create_ifn_alias_checks (tree *cond_expr,
2012 const dr_with_seg_len_pair_t &alias_pair)
2013{
2014 const dr_with_seg_len& dr_a = alias_pair.first;
2015 const dr_with_seg_len& dr_b = alias_pair.second;
2016
2017 /* Check for cases in which:
2018
2019 (a) we have a known RAW, WAR or WAR dependence
2020 (b) the accesses are well-ordered in both the original and new code
2021 (see the comment above the DR_ALIAS_* flags for details); and
2022 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2023 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2024 return false;
2025
2026 /* Make sure that both DRs access the same pattern of bytes,
2027 with a constant length and step. */
2028 poly_uint64 seg_len;
2029 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, flags: 0)
2030 || !poly_int_tree_p (t: dr_a.seg_len, value: &seg_len)
2031 || maybe_ne (a: dr_a.access_size, b: dr_b.access_size)
2032 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0)
2033 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2034 return false;
2035
2036 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2037 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2038 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2039
2040 /* See whether the target suports what we want to do. WAW checks are
2041 equivalent to WAR checks here. */
2042 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2043 ? IFN_CHECK_RAW_PTRS
2044 : IFN_CHECK_WAR_PTRS);
2045 unsigned int align = MIN (dr_a.align, dr_b.align);
2046 poly_uint64 full_length = seg_len + bytes;
2047 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2048 full_length, align))
2049 {
2050 full_length = seg_len + dr_a.access_size;
2051 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2052 full_length, align))
2053 return false;
2054 }
2055
2056 /* Commit to using this form of test. */
2057 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2058 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2059
2060 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2061 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2062
2063 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2064 ifn, boolean_type_node,
2065 4, addr_a, addr_b,
2066 size_int (full_length),
2067 size_int (align));
2068
2069 if (dump_enabled_p ())
2070 {
2071 if (ifn == IFN_CHECK_RAW_PTRS)
2072 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2073 else
2074 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2075 }
2076 return true;
2077}
2078
2079/* Try to generate a runtime condition that is true if ALIAS_PAIR is
2080 free of aliases, using a condition based on index values instead
2081 of a condition based on addresses. Return true on success,
2082 storing the condition in *COND_EXPR.
2083
2084 This can only be done if the two data references in ALIAS_PAIR access
2085 the same array object and the index is the only difference. For example,
2086 if the two data references are DR_A and DR_B:
2087
2088 DR_A DR_B
2089 data-ref arr[i] arr[j]
2090 base_object arr arr
2091 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2092
2093 The addresses and their index are like:
2094
2095 |<- ADDR_A ->| |<- ADDR_B ->|
2096 ------------------------------------------------------->
2097 | | | | | | | | | |
2098 ------------------------------------------------------->
2099 i_0 ... i_0+4 j_0 ... j_0+4
2100
2101 We can create expression based on index rather than address:
2102
2103 (unsigned) (i_0 - j_0 + 3) <= 6
2104
2105 i.e. the indices are less than 4 apart.
2106
2107 Note evolution step of index needs to be considered in comparison. */
2108
2109static bool
2110create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2111 const dr_with_seg_len_pair_t &alias_pair)
2112{
2113 const dr_with_seg_len &dr_a = alias_pair.first;
2114 const dr_with_seg_len &dr_b = alias_pair.second;
2115 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2116 || integer_zerop (DR_STEP (dr_a.dr))
2117 || integer_zerop (DR_STEP (dr_b.dr))
2118 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2119 return false;
2120
2121 poly_uint64 seg_len1, seg_len2;
2122 if (!poly_int_tree_p (t: dr_a.seg_len, value: &seg_len1)
2123 || !poly_int_tree_p (t: dr_b.seg_len, value: &seg_len2))
2124 return false;
2125
2126 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2127 return false;
2128
2129 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), flags: 0))
2130 return false;
2131
2132 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0))
2133 return false;
2134
2135 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2136
2137 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2138 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2139 if (neg_step)
2140 {
2141 abs_step = -abs_step;
2142 seg_len1 = (-wi::to_poly_wide (t: dr_a.seg_len)).force_uhwi ();
2143 seg_len2 = (-wi::to_poly_wide (t: dr_b.seg_len)).force_uhwi ();
2144 }
2145
2146 /* Infer the number of iterations with which the memory segment is accessed
2147 by DR. In other words, alias is checked if memory segment accessed by
2148 DR_A in some iterations intersect with memory segment accessed by DR_B
2149 in the same amount iterations.
2150 Note segnment length is a linear function of number of iterations with
2151 DR_STEP as the coefficient. */
2152 poly_uint64 niter_len1, niter_len2;
2153 if (!can_div_trunc_p (a: seg_len1 + abs_step - 1, b: abs_step, quotient: &niter_len1)
2154 || !can_div_trunc_p (a: seg_len2 + abs_step - 1, b: abs_step, quotient: &niter_len2))
2155 return false;
2156
2157 /* Divide each access size by the byte step, rounding up. */
2158 poly_uint64 niter_access1, niter_access2;
2159 if (!can_div_trunc_p (a: dr_a.access_size + abs_step - 1,
2160 b: abs_step, quotient: &niter_access1)
2161 || !can_div_trunc_p (a: dr_b.access_size + abs_step - 1,
2162 b: abs_step, quotient: &niter_access2))
2163 return false;
2164
2165 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2166
2167 int found = -1;
2168 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2169 {
2170 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2171 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2172 /* Two indices must be the same if they are not scev, or not scev wrto
2173 current loop being vecorized. */
2174 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2175 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2176 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2177 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2178 {
2179 if (operand_equal_p (access1, access2, flags: 0))
2180 continue;
2181
2182 return false;
2183 }
2184 if (found >= 0)
2185 return false;
2186 found = i;
2187 }
2188
2189 /* Ought not to happen in practice, since if all accesses are equal then the
2190 alias should be decidable at compile time. */
2191 if (found < 0)
2192 return false;
2193
2194 /* The two indices must have the same step. */
2195 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2196 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2197 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), flags: 0))
2198 return false;
2199
2200 tree idx_step = CHREC_RIGHT (access1);
2201 /* Index must have const step, otherwise DR_STEP won't be constant. */
2202 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2203 /* Index must evaluate in the same direction as DR. */
2204 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2205
2206 tree min1 = CHREC_LEFT (access1);
2207 tree min2 = CHREC_LEFT (access2);
2208 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2209 return false;
2210
2211 /* Ideally, alias can be checked against loop's control IV, but we
2212 need to prove linear mapping between control IV and reference
2213 index. Although that should be true, we check against (array)
2214 index of data reference. Like segment length, index length is
2215 linear function of the number of iterations with index_step as
2216 the coefficient, i.e, niter_len * idx_step. */
2217 offset_int abs_idx_step = offset_int::from (x: wi::to_wide (t: idx_step),
2218 sgn: SIGNED);
2219 if (neg_step)
2220 abs_idx_step = -abs_idx_step;
2221 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2222 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2223 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2224 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2225
2226 gcc_assert (known_ge (idx_len1, 0)
2227 && known_ge (idx_len2, 0)
2228 && known_ge (idx_access1, 0)
2229 && known_ge (idx_access2, 0));
2230
2231 /* Each access has the following pattern, with lengths measured
2232 in units of INDEX:
2233
2234 <-- idx_len -->
2235 <--- A: -ve step --->
2236 +-----+-------+-----+-------+-----+
2237 | n-1 | ..... | 0 | ..... | n-1 |
2238 +-----+-------+-----+-------+-----+
2239 <--- B: +ve step --->
2240 <-- idx_len -->
2241 |
2242 min
2243
2244 where "n" is the number of scalar iterations covered by the segment
2245 and where each access spans idx_access units.
2246
2247 A is the range of bytes accessed when the step is negative,
2248 B is the range when the step is positive.
2249
2250 When checking for general overlap, we need to test whether
2251 the range:
2252
2253 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2254
2255 overlaps:
2256
2257 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2258
2259 where:
2260
2261 low_offsetN = +ve step ? 0 : -idx_lenN;
2262 high_offsetN = +ve step ? idx_lenN : 0;
2263
2264 This is equivalent to testing whether:
2265
2266 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2267 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2268
2269 Converting this into a single test, there is an overlap if:
2270
2271 0 <= min2 - min1 + bias <= limit
2272
2273 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2274 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2275 + (high_offset2 - low_offset2 + idx_access2 - 1)
2276 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2277
2278 Combining the tests requires limit to be computable in an unsigned
2279 form of the index type; if it isn't, we fall back to the usual
2280 pointer-based checks.
2281
2282 We can do better if DR_B is a write and if DR_A and DR_B are
2283 well-ordered in both the original and the new code (see the
2284 comment above the DR_ALIAS_* flags for details). In this case
2285 we know that for each i in [0, n-1], the write performed by
2286 access i of DR_B occurs after access numbers j<=i of DR_A in
2287 both the original and the new code. Any write or anti
2288 dependencies wrt those DR_A accesses are therefore maintained.
2289
2290 We just need to make sure that each individual write in DR_B does not
2291 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2292 after the DR_B access in the original code but happen before it in
2293 the new code.
2294
2295 We know the steps for both accesses are equal, so by induction, we
2296 just need to test whether the first write of DR_B overlaps a later
2297 access of DR_A. In other words, we need to move min1 along by
2298 one iteration:
2299
2300 min1' = min1 + idx_step
2301
2302 and use the ranges:
2303
2304 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2305
2306 and:
2307
2308 [min2, min2 + idx_access2 - 1]
2309
2310 where:
2311
2312 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2313 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2314 if (waw_or_war_p)
2315 idx_len1 -= abs_idx_step;
2316
2317 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2318 if (!waw_or_war_p)
2319 limit += idx_len2;
2320
2321 tree utype = unsigned_type_for (TREE_TYPE (min1));
2322 if (!wi::fits_to_tree_p (x: limit, type: utype))
2323 return false;
2324
2325 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2326 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2327 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2328 /* Equivalent to adding IDX_STEP to MIN1. */
2329 if (waw_or_war_p)
2330 bias -= wi::to_offset (t: idx_step);
2331
2332 tree subject = fold_build2 (MINUS_EXPR, utype,
2333 fold_convert (utype, min2),
2334 fold_convert (utype, min1));
2335 subject = fold_build2 (PLUS_EXPR, utype, subject,
2336 wide_int_to_tree (utype, bias));
2337 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2338 wide_int_to_tree (utype, limit));
2339 if (*cond_expr)
2340 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2341 *cond_expr, part_cond_expr);
2342 else
2343 *cond_expr = part_cond_expr;
2344 if (dump_enabled_p ())
2345 {
2346 if (waw_or_war_p)
2347 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2348 else
2349 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2350 }
2351 return true;
2352}
2353
2354/* A subroutine of create_intersect_range_checks, with a subset of the
2355 same arguments. Try to optimize cases in which the second access
2356 is a write and in which some overlap is valid. */
2357
2358static bool
2359create_waw_or_war_checks (tree *cond_expr,
2360 const dr_with_seg_len_pair_t &alias_pair)
2361{
2362 const dr_with_seg_len& dr_a = alias_pair.first;
2363 const dr_with_seg_len& dr_b = alias_pair.second;
2364
2365 /* Check for cases in which:
2366
2367 (a) DR_B is always a write;
2368 (b) the accesses are well-ordered in both the original and new code
2369 (see the comment above the DR_ALIAS_* flags for details); and
2370 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2371 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2372 return false;
2373
2374 /* Check for equal (but possibly variable) steps. */
2375 tree step = DR_STEP (dr_a.dr);
2376 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2377 return false;
2378
2379 /* Make sure that we can operate on sizetype without loss of precision. */
2380 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2381 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2382 return false;
2383
2384 /* All addresses involved are known to have a common alignment ALIGN.
2385 We can therefore subtract ALIGN from an exclusive endpoint to get
2386 an inclusive endpoint. In the best (and common) case, ALIGN is the
2387 same as the access sizes of both DRs, and so subtracting ALIGN
2388 cancels out the addition of an access size. */
2389 unsigned int align = MIN (dr_a.align, dr_b.align);
2390 poly_uint64 last_chunk_a = dr_a.access_size - align;
2391 poly_uint64 last_chunk_b = dr_b.access_size - align;
2392
2393 /* Get a boolean expression that is true when the step is negative. */
2394 tree indicator = dr_direction_indicator (dr_a.dr);
2395 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2396 fold_convert (ssizetype, indicator),
2397 ssize_int (0));
2398
2399 /* Get lengths in sizetype. */
2400 tree seg_len_a
2401 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2402 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2403
2404 /* Each access has the following pattern:
2405
2406 <- |seg_len| ->
2407 <--- A: -ve step --->
2408 +-----+-------+-----+-------+-----+
2409 | n-1 | ..... | 0 | ..... | n-1 |
2410 +-----+-------+-----+-------+-----+
2411 <--- B: +ve step --->
2412 <- |seg_len| ->
2413 |
2414 base address
2415
2416 where "n" is the number of scalar iterations covered by the segment.
2417
2418 A is the range of bytes accessed when the step is negative,
2419 B is the range when the step is positive.
2420
2421 We know that DR_B is a write. We also know (from checking that
2422 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2423 the write performed by access i of DR_B occurs after access numbers
2424 j<=i of DR_A in both the original and the new code. Any write or
2425 anti dependencies wrt those DR_A accesses are therefore maintained.
2426
2427 We just need to make sure that each individual write in DR_B does not
2428 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2429 after the DR_B access in the original code but happen before it in
2430 the new code.
2431
2432 We know the steps for both accesses are equal, so by induction, we
2433 just need to test whether the first write of DR_B overlaps a later
2434 access of DR_A. In other words, we need to move addr_a along by
2435 one iteration:
2436
2437 addr_a' = addr_a + step
2438
2439 and check whether:
2440
2441 [addr_b, addr_b + last_chunk_b]
2442
2443 overlaps:
2444
2445 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2446
2447 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2448
2449 low_offset_a = +ve step ? 0 : seg_len_a - step
2450 high_offset_a = +ve step ? seg_len_a - step : 0
2451
2452 This is equivalent to testing whether:
2453
2454 addr_a' + low_offset_a <= addr_b + last_chunk_b
2455 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2456
2457 Converting this into a single test, there is an overlap if:
2458
2459 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2460
2461 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2462
2463 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2464 less than the size of the object underlying DR_A. We also know
2465 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2466 guaranteed at compile time. There can therefore be no overflow if
2467 "limit" is calculated in an unsigned type with pointer precision. */
2468 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2469 DR_OFFSET (dr_a.dr));
2470 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2471
2472 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2473 DR_OFFSET (dr_b.dr));
2474 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2475
2476 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2477 addr_a = fold_build_pointer_plus (addr_a, step);
2478 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2479 seg_len_a, step);
2480 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2481 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2482
2483 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2484 seg_len_a_minus_step, size_zero_node);
2485 if (!CONSTANT_CLASS_P (low_offset_a))
2486 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2487
2488 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2489 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2490 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2491 low_offset_a);
2492
2493 /* The amount added to addr_b - addr_a'. */
2494 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2495 size_int (last_chunk_b), low_offset_a);
2496
2497 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2498 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2499 size_int (last_chunk_a + last_chunk_b));
2500
2501 tree subject = fold_build2 (MINUS_EXPR, sizetype,
2502 fold_convert (sizetype, addr_b),
2503 fold_convert (sizetype, addr_a));
2504 subject = fold_build2 (PLUS_EXPR, sizetype, subject, bias);
2505
2506 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2507 if (dump_enabled_p ())
2508 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2509 return true;
2510}
2511
2512/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2513 every address ADDR accessed by D:
2514
2515 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2516
2517 In this case, every element accessed by D is aligned to at least
2518 ALIGN bytes.
2519
2520 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2521
2522 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2523
2524static void
2525get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2526 tree *seg_max_out, HOST_WIDE_INT align)
2527{
2528 /* Each access has the following pattern:
2529
2530 <- |seg_len| ->
2531 <--- A: -ve step --->
2532 +-----+-------+-----+-------+-----+
2533 | n-1 | ,.... | 0 | ..... | n-1 |
2534 +-----+-------+-----+-------+-----+
2535 <--- B: +ve step --->
2536 <- |seg_len| ->
2537 |
2538 base address
2539
2540 where "n" is the number of scalar iterations covered by the segment.
2541 (This should be VF for a particular pair if we know that both steps
2542 are the same, otherwise it will be the full number of scalar loop
2543 iterations.)
2544
2545 A is the range of bytes accessed when the step is negative,
2546 B is the range when the step is positive.
2547
2548 If the access size is "access_size" bytes, the lowest addressed byte is:
2549
2550 base + (step < 0 ? seg_len : 0) [LB]
2551
2552 and the highest addressed byte is always below:
2553
2554 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2555
2556 Thus:
2557
2558 LB <= ADDR < UB
2559
2560 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2561 bytes, so:
2562
2563 LB <= ADDR <= UB - ALIGN
2564
2565 where "- ALIGN" folds naturally with the "+ access_size" and often
2566 cancels it out.
2567
2568 We don't try to simplify LB and UB beyond this (e.g. by using
2569 MIN and MAX based on whether seg_len rather than the stride is
2570 negative) because it is possible for the absolute size of the
2571 segment to overflow the range of a ssize_t.
2572
2573 Keeping the pointer_plus outside of the cond_expr should allow
2574 the cond_exprs to be shared with other alias checks. */
2575 tree indicator = dr_direction_indicator (d.dr);
2576 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2577 fold_convert (ssizetype, indicator),
2578 ssize_int (0));
2579 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2580 DR_OFFSET (d.dr));
2581 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2582 tree seg_len
2583 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2584
2585 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2586 seg_len, size_zero_node);
2587 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2588 size_zero_node, seg_len);
2589 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2590 size_int (d.access_size - align));
2591
2592 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2593 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2594}
2595
2596/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2597 storing the condition in *COND_EXPR. The fallback is to generate a
2598 a test that the two accesses do not overlap:
2599
2600 end_a <= start_b || end_b <= start_a. */
2601
2602static void
2603create_intersect_range_checks (class loop *loop, tree *cond_expr,
2604 const dr_with_seg_len_pair_t &alias_pair)
2605{
2606 const dr_with_seg_len& dr_a = alias_pair.first;
2607 const dr_with_seg_len& dr_b = alias_pair.second;
2608 *cond_expr = NULL_TREE;
2609 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2610 return;
2611
2612 if (create_ifn_alias_checks (cond_expr, alias_pair))
2613 return;
2614
2615 if (create_waw_or_war_checks (cond_expr, alias_pair))
2616 return;
2617
2618 unsigned HOST_WIDE_INT min_align;
2619 tree_code cmp_code;
2620 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2621 are equivalent. This is just an optimization heuristic. */
2622 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2623 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2624 {
2625 /* In this case adding access_size to seg_len is likely to give
2626 a simple X * step, where X is either the number of scalar
2627 iterations or the vectorization factor. We're better off
2628 keeping that, rather than subtracting an alignment from it.
2629
2630 In this case the maximum values are exclusive and so there is
2631 no alias if the maximum of one segment equals the minimum
2632 of another. */
2633 min_align = 0;
2634 cmp_code = LE_EXPR;
2635 }
2636 else
2637 {
2638 /* Calculate the minimum alignment shared by all four pointers,
2639 then arrange for this alignment to be subtracted from the
2640 exclusive maximum values to get inclusive maximum values.
2641 This "- min_align" is cumulative with a "+ access_size"
2642 in the calculation of the maximum values. In the best
2643 (and common) case, the two cancel each other out, leaving
2644 us with an inclusive bound based only on seg_len. In the
2645 worst case we're simply adding a smaller number than before.
2646
2647 Because the maximum values are inclusive, there is an alias
2648 if the maximum value of one segment is equal to the minimum
2649 value of the other. */
2650 min_align = std::min (a: dr_a.align, b: dr_b.align);
2651 cmp_code = LT_EXPR;
2652 }
2653
2654 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2655 get_segment_min_max (d: dr_a, seg_min_out: &seg_a_min, seg_max_out: &seg_a_max, align: min_align);
2656 get_segment_min_max (d: dr_b, seg_min_out: &seg_b_min, seg_max_out: &seg_b_max, align: min_align);
2657
2658 *cond_expr
2659 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2660 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2661 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2662 if (dump_enabled_p ())
2663 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2664}
2665
2666/* Create a conditional expression that represents the run-time checks for
2667 overlapping of address ranges represented by a list of data references
2668 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2669 COND_EXPR is the conditional expression to be used in the if statement
2670 that controls which version of the loop gets executed at runtime. */
2671
2672void
2673create_runtime_alias_checks (class loop *loop,
2674 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2675 tree * cond_expr)
2676{
2677 tree part_cond_expr;
2678
2679 fold_defer_overflow_warnings ();
2680 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2681 {
2682 gcc_assert (alias_pair.flags);
2683 if (dump_enabled_p ())
2684 dump_printf (MSG_NOTE,
2685 "create runtime check for data references %T and %T\n",
2686 DR_REF (alias_pair.first.dr),
2687 DR_REF (alias_pair.second.dr));
2688
2689 /* Create condition expression for each pair data references. */
2690 create_intersect_range_checks (loop, cond_expr: &part_cond_expr, alias_pair);
2691 if (*cond_expr)
2692 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2693 *cond_expr, part_cond_expr);
2694 else
2695 *cond_expr = part_cond_expr;
2696 }
2697 fold_undefer_and_ignore_overflow_warnings ();
2698}
2699
2700/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2701 expressions. */
2702static bool
2703dr_equal_offsets_p1 (tree offset1, tree offset2)
2704{
2705 bool res;
2706
2707 STRIP_NOPS (offset1);
2708 STRIP_NOPS (offset2);
2709
2710 if (offset1 == offset2)
2711 return true;
2712
2713 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2714 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2715 return false;
2716
2717 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2718 TREE_OPERAND (offset2, 0));
2719
2720 if (!res || !BINARY_CLASS_P (offset1))
2721 return res;
2722
2723 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2724 TREE_OPERAND (offset2, 1));
2725
2726 return res;
2727}
2728
2729/* Check if DRA and DRB have equal offsets. */
2730bool
2731dr_equal_offsets_p (struct data_reference *dra,
2732 struct data_reference *drb)
2733{
2734 tree offset1, offset2;
2735
2736 offset1 = DR_OFFSET (dra);
2737 offset2 = DR_OFFSET (drb);
2738
2739 return dr_equal_offsets_p1 (offset1, offset2);
2740}
2741
2742/* Returns true if FNA == FNB. */
2743
2744static bool
2745affine_function_equal_p (affine_fn fna, affine_fn fnb)
2746{
2747 unsigned i, n = fna.length ();
2748
2749 if (n != fnb.length ())
2750 return false;
2751
2752 for (i = 0; i < n; i++)
2753 if (!operand_equal_p (fna[i], fnb[i], flags: 0))
2754 return false;
2755
2756 return true;
2757}
2758
2759/* If all the functions in CF are the same, returns one of them,
2760 otherwise returns NULL. */
2761
2762static affine_fn
2763common_affine_function (conflict_function *cf)
2764{
2765 unsigned i;
2766 affine_fn comm;
2767
2768 if (!CF_NONTRIVIAL_P (cf))
2769 return affine_fn ();
2770
2771 comm = cf->fns[0];
2772
2773 for (i = 1; i < cf->n; i++)
2774 if (!affine_function_equal_p (fna: comm, fnb: cf->fns[i]))
2775 return affine_fn ();
2776
2777 return comm;
2778}
2779
2780/* Returns the base of the affine function FN. */
2781
2782static tree
2783affine_function_base (affine_fn fn)
2784{
2785 return fn[0];
2786}
2787
2788/* Returns true if FN is a constant. */
2789
2790static bool
2791affine_function_constant_p (affine_fn fn)
2792{
2793 unsigned i;
2794 tree coef;
2795
2796 for (i = 1; fn.iterate (ix: i, ptr: &coef); i++)
2797 if (!integer_zerop (coef))
2798 return false;
2799
2800 return true;
2801}
2802
2803/* Returns true if FN is the zero constant function. */
2804
2805static bool
2806affine_function_zero_p (affine_fn fn)
2807{
2808 return (integer_zerop (affine_function_base (fn))
2809 && affine_function_constant_p (fn));
2810}
2811
2812/* Returns a signed integer type with the largest precision from TA
2813 and TB. */
2814
2815static tree
2816signed_type_for_types (tree ta, tree tb)
2817{
2818 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2819 return signed_type_for (ta);
2820 else
2821 return signed_type_for (tb);
2822}
2823
2824/* Applies operation OP on affine functions FNA and FNB, and returns the
2825 result. */
2826
2827static affine_fn
2828affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2829{
2830 unsigned i, n, m;
2831 affine_fn ret;
2832 tree coef;
2833
2834 if (fnb.length () > fna.length ())
2835 {
2836 n = fna.length ();
2837 m = fnb.length ();
2838 }
2839 else
2840 {
2841 n = fnb.length ();
2842 m = fna.length ();
2843 }
2844
2845 ret.create (nelems: m);
2846 for (i = 0; i < n; i++)
2847 {
2848 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2849 TREE_TYPE (fnb[i]));
2850 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2851 }
2852
2853 for (; fna.iterate (ix: i, ptr: &coef); i++)
2854 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2855 coef, integer_zero_node));
2856 for (; fnb.iterate (ix: i, ptr: &coef); i++)
2857 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2858 integer_zero_node, coef));
2859
2860 return ret;
2861}
2862
2863/* Returns the sum of affine functions FNA and FNB. */
2864
2865static affine_fn
2866affine_fn_plus (affine_fn fna, affine_fn fnb)
2867{
2868 return affine_fn_op (op: PLUS_EXPR, fna, fnb);
2869}
2870
2871/* Returns the difference of affine functions FNA and FNB. */
2872
2873static affine_fn
2874affine_fn_minus (affine_fn fna, affine_fn fnb)
2875{
2876 return affine_fn_op (op: MINUS_EXPR, fna, fnb);
2877}
2878
2879/* Frees affine function FN. */
2880
2881static void
2882affine_fn_free (affine_fn fn)
2883{
2884 fn.release ();
2885}
2886
2887/* Determine for each subscript in the data dependence relation DDR
2888 the distance. */
2889
2890static void
2891compute_subscript_distance (struct data_dependence_relation *ddr)
2892{
2893 conflict_function *cf_a, *cf_b;
2894 affine_fn fn_a, fn_b, diff;
2895
2896 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2897 {
2898 unsigned int i;
2899
2900 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2901 {
2902 struct subscript *subscript;
2903
2904 subscript = DDR_SUBSCRIPT (ddr, i);
2905 cf_a = SUB_CONFLICTS_IN_A (subscript);
2906 cf_b = SUB_CONFLICTS_IN_B (subscript);
2907
2908 fn_a = common_affine_function (cf: cf_a);
2909 fn_b = common_affine_function (cf: cf_b);
2910 if (!fn_a.exists () || !fn_b.exists ())
2911 {
2912 SUB_DISTANCE (subscript) = chrec_dont_know;
2913 return;
2914 }
2915 diff = affine_fn_minus (fna: fn_a, fnb: fn_b);
2916
2917 if (affine_function_constant_p (fn: diff))
2918 SUB_DISTANCE (subscript) = affine_function_base (fn: diff);
2919 else
2920 SUB_DISTANCE (subscript) = chrec_dont_know;
2921
2922 affine_fn_free (fn: diff);
2923 }
2924 }
2925}
2926
2927/* Returns the conflict function for "unknown". */
2928
2929static conflict_function *
2930conflict_fn_not_known (void)
2931{
2932 conflict_function *fn = XCNEW (conflict_function);
2933 fn->n = NOT_KNOWN;
2934
2935 return fn;
2936}
2937
2938/* Returns the conflict function for "independent". */
2939
2940static conflict_function *
2941conflict_fn_no_dependence (void)
2942{
2943 conflict_function *fn = XCNEW (conflict_function);
2944 fn->n = NO_DEPENDENCE;
2945
2946 return fn;
2947}
2948
2949/* Returns true if the address of OBJ is invariant in LOOP. */
2950
2951static bool
2952object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2953{
2954 while (handled_component_p (t: obj))
2955 {
2956 if (TREE_CODE (obj) == ARRAY_REF)
2957 {
2958 for (int i = 1; i < 4; ++i)
2959 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2960 loop->num))
2961 return false;
2962 }
2963 else if (TREE_CODE (obj) == COMPONENT_REF)
2964 {
2965 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2966 loop->num))
2967 return false;
2968 }
2969 obj = TREE_OPERAND (obj, 0);
2970 }
2971
2972 if (!INDIRECT_REF_P (obj)
2973 && TREE_CODE (obj) != MEM_REF)
2974 return true;
2975
2976 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2977 loop->num);
2978}
2979
2980/* Helper for contains_ssa_ref_p. */
2981
2982static bool
2983contains_ssa_ref_p_1 (tree, tree *idx, void *data)
2984{
2985 if (TREE_CODE (*idx) == SSA_NAME)
2986 {
2987 *(bool *)data = true;
2988 return false;
2989 }
2990 return true;
2991}
2992
2993/* Returns true if the reference REF contains a SSA index. */
2994
2995static bool
2996contains_ssa_ref_p (tree ref)
2997{
2998 bool res = false;
2999 for_each_index (&ref, contains_ssa_ref_p_1, &res);
3000 return res;
3001}
3002
3003/* Returns false if we can prove that data references A and B do not alias,
3004 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
3005 considered. */
3006
3007bool
3008dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
3009 class loop *loop_nest)
3010{
3011 tree addr_a = DR_BASE_OBJECT (a);
3012 tree addr_b = DR_BASE_OBJECT (b);
3013
3014 /* If we are not processing a loop nest but scalar code we
3015 do not need to care about possible cross-iteration dependences
3016 and thus can process the full original reference. Do so,
3017 similar to how loop invariant motion applies extra offset-based
3018 disambiguation. */
3019 if (!loop_nest)
3020 {
3021 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
3022 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
3023
3024 if (DR_BASE_ADDRESS (a)
3025 && DR_BASE_ADDRESS (b)
3026 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
3027 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
3028 && tree_size_a
3029 && tree_size_b
3030 && poly_int_tree_p (t: tree_size_a)
3031 && poly_int_tree_p (t: tree_size_b)
3032 && !ranges_maybe_overlap_p (pos1: wi::to_poly_widest (DR_INIT (a)),
3033 size1: wi::to_poly_widest (t: tree_size_a),
3034 pos2: wi::to_poly_widest (DR_INIT (b)),
3035 size2: wi::to_poly_widest (t: tree_size_b)))
3036 {
3037 gcc_assert (integer_zerop (DR_STEP (a))
3038 && integer_zerop (DR_STEP (b)));
3039 return false;
3040 }
3041
3042 aff_tree off1, off2;
3043 poly_widest_int size1, size2;
3044 get_inner_reference_aff (DR_REF (a), &off1, &size1);
3045 get_inner_reference_aff (DR_REF (b), &off2, &size2);
3046 aff_combination_scale (&off1, -1);
3047 aff_combination_add (&off2, &off1);
3048 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3049 return false;
3050 }
3051
3052 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3053 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3054 /* For cross-iteration dependences the cliques must be valid for the
3055 whole loop, not just individual iterations. */
3056 && (!loop_nest
3057 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3058 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3059 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3060 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3061 return false;
3062
3063 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3064 do not know the size of the base-object. So we cannot do any
3065 offset/overlap based analysis but have to rely on points-to
3066 information only. */
3067 if (TREE_CODE (addr_a) == MEM_REF
3068 && (DR_UNCONSTRAINED_BASE (a)
3069 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3070 {
3071 /* For true dependences we can apply TBAA. */
3072 if (flag_strict_aliasing
3073 && DR_IS_WRITE (a) && DR_IS_READ (b)
3074 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3075 get_alias_set (DR_REF (b))))
3076 return false;
3077 if (TREE_CODE (addr_b) == MEM_REF)
3078 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3079 TREE_OPERAND (addr_b, 0));
3080 else
3081 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3082 build_fold_addr_expr (addr_b));
3083 }
3084 else if (TREE_CODE (addr_b) == MEM_REF
3085 && (DR_UNCONSTRAINED_BASE (b)
3086 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3087 {
3088 /* For true dependences we can apply TBAA. */
3089 if (flag_strict_aliasing
3090 && DR_IS_WRITE (a) && DR_IS_READ (b)
3091 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3092 get_alias_set (DR_REF (b))))
3093 return false;
3094 if (TREE_CODE (addr_a) == MEM_REF)
3095 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3096 TREE_OPERAND (addr_b, 0));
3097 else
3098 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3099 TREE_OPERAND (addr_b, 0));
3100 }
3101 /* If dr_analyze_innermost failed to handle a component we are
3102 possibly left with a non-base in which case we didn't analyze
3103 a possible evolution of the base when analyzing a loop. */
3104 else if (loop_nest
3105 && ((handled_component_p (t: addr_a) && contains_ssa_ref_p (ref: addr_a))
3106 || (handled_component_p (t: addr_b) && contains_ssa_ref_p (ref: addr_b))))
3107 {
3108 /* For true dependences we can apply TBAA. */
3109 if (flag_strict_aliasing
3110 && DR_IS_WRITE (a) && DR_IS_READ (b)
3111 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3112 get_alias_set (DR_REF (b))))
3113 return false;
3114 if (TREE_CODE (addr_a) == MEM_REF)
3115 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3116 build_fold_addr_expr (addr_b));
3117 else if (TREE_CODE (addr_b) == MEM_REF)
3118 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3119 TREE_OPERAND (addr_b, 0));
3120 else
3121 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3122 build_fold_addr_expr (addr_b));
3123 }
3124
3125 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3126 that is being subsetted in the loop nest. */
3127 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3128 return refs_output_dependent_p (addr_a, addr_b);
3129 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3130 return refs_anti_dependent_p (addr_a, addr_b);
3131 return refs_may_alias_p (addr_a, addr_b);
3132}
3133
3134/* REF_A and REF_B both satisfy access_fn_component_p. Return true
3135 if it is meaningful to compare their associated access functions
3136 when checking for dependencies. */
3137
3138static bool
3139access_fn_components_comparable_p (tree ref_a, tree ref_b)
3140{
3141 /* Allow pairs of component refs from the following sets:
3142
3143 { REALPART_EXPR, IMAGPART_EXPR }
3144 { COMPONENT_REF }
3145 { ARRAY_REF }. */
3146 tree_code code_a = TREE_CODE (ref_a);
3147 tree_code code_b = TREE_CODE (ref_b);
3148 if (code_a == IMAGPART_EXPR)
3149 code_a = REALPART_EXPR;
3150 if (code_b == IMAGPART_EXPR)
3151 code_b = REALPART_EXPR;
3152 if (code_a != code_b)
3153 return false;
3154
3155 if (TREE_CODE (ref_a) == COMPONENT_REF)
3156 /* ??? We cannot simply use the type of operand #0 of the refs here as
3157 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3158 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3159 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3160 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3161
3162 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3163 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3164}
3165
3166/* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3167 is true when the main indices of A and B were not comparable so we try again
3168 with alternate indices computed on an indirect reference. */
3169
3170struct data_dependence_relation *
3171initialize_data_dependence_relation (struct data_dependence_relation *res,
3172 vec<loop_p> loop_nest,
3173 bool use_alt_indices)
3174{
3175 struct data_reference *a = DDR_A (res);
3176 struct data_reference *b = DDR_B (res);
3177 unsigned int i;
3178
3179 struct indices *indices_a = &a->indices;
3180 struct indices *indices_b = &b->indices;
3181 if (use_alt_indices)
3182 {
3183 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3184 indices_a = &a->alt_indices;
3185 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3186 indices_b = &b->alt_indices;
3187 }
3188 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3189 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3190 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3191 {
3192 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3193 return res;
3194 }
3195
3196 /* For unconstrained bases, the root (highest-indexed) subscript
3197 describes a variation in the base of the original DR_REF rather
3198 than a component access. We have no type that accurately describes
3199 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3200 applying this subscript) so limit the search to the last real
3201 component access.
3202
3203 E.g. for:
3204
3205 void
3206 f (int a[][8], int b[][8])
3207 {
3208 for (int i = 0; i < 8; ++i)
3209 a[i * 2][0] = b[i][0];
3210 }
3211
3212 the a and b accesses have a single ARRAY_REF component reference [0]
3213 but have two subscripts. */
3214 if (indices_a->unconstrained_base)
3215 num_dimensions_a -= 1;
3216 if (indices_b->unconstrained_base)
3217 num_dimensions_b -= 1;
3218
3219 /* These structures describe sequences of component references in
3220 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3221 specific access function. */
3222 struct {
3223 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3224 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3225 indices. In C notation, these are the indices of the rightmost
3226 component references; e.g. for a sequence .b.c.d, the start
3227 index is for .d. */
3228 unsigned int start_a;
3229 unsigned int start_b;
3230
3231 /* The sequence contains LENGTH consecutive access functions from
3232 each DR. */
3233 unsigned int length;
3234
3235 /* The enclosing objects for the A and B sequences respectively,
3236 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3237 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3238 tree object_a;
3239 tree object_b;
3240 } full_seq = {}, struct_seq = {};
3241
3242 /* Before each iteration of the loop:
3243
3244 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3245 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3246 unsigned int index_a = 0;
3247 unsigned int index_b = 0;
3248 tree ref_a = DR_REF (a);
3249 tree ref_b = DR_REF (b);
3250
3251 /* Now walk the component references from the final DR_REFs back up to
3252 the enclosing base objects. Each component reference corresponds
3253 to one access function in the DR, with access function 0 being for
3254 the final DR_REF and the highest-indexed access function being the
3255 one that is applied to the base of the DR.
3256
3257 Look for a sequence of component references whose access functions
3258 are comparable (see access_fn_components_comparable_p). If more
3259 than one such sequence exists, pick the one nearest the base
3260 (which is the leftmost sequence in C notation). Store this sequence
3261 in FULL_SEQ.
3262
3263 For example, if we have:
3264
3265 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3266
3267 A: a[0][i].s.c.d
3268 B: __real b[0][i].s.e[i].f
3269
3270 (where d is the same type as the real component of f) then the access
3271 functions would be:
3272
3273 0 1 2 3
3274 A: .d .c .s [i]
3275
3276 0 1 2 3 4 5
3277 B: __real .f [i] .e .s [i]
3278
3279 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3280 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3281 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3282 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3283 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3284 index foo[10] arrays, so is again comparable. The sequence is
3285 therefore:
3286
3287 A: [1, 3] (i.e. [i].s.c)
3288 B: [3, 5] (i.e. [i].s.e)
3289
3290 Also look for sequences of component references whose access
3291 functions are comparable and whose enclosing objects have the same
3292 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3293 example, STRUCT_SEQ would be:
3294
3295 A: [1, 2] (i.e. s.c)
3296 B: [3, 4] (i.e. s.e) */
3297 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3298 {
3299 /* The alternate indices form always has a single dimension
3300 with unconstrained base. */
3301 gcc_assert (!use_alt_indices);
3302
3303 /* REF_A and REF_B must be one of the component access types
3304 allowed by dr_analyze_indices. */
3305 gcc_checking_assert (access_fn_component_p (ref_a));
3306 gcc_checking_assert (access_fn_component_p (ref_b));
3307
3308 /* Get the immediately-enclosing objects for REF_A and REF_B,
3309 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3310 and DR_ACCESS_FN (B, INDEX_B). */
3311 tree object_a = TREE_OPERAND (ref_a, 0);
3312 tree object_b = TREE_OPERAND (ref_b, 0);
3313
3314 tree type_a = TREE_TYPE (object_a);
3315 tree type_b = TREE_TYPE (object_b);
3316 if (access_fn_components_comparable_p (ref_a, ref_b))
3317 {
3318 /* This pair of component accesses is comparable for dependence
3319 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3320 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3321 if (full_seq.start_a + full_seq.length != index_a
3322 || full_seq.start_b + full_seq.length != index_b)
3323 {
3324 /* The accesses don't extend the current sequence,
3325 so start a new one here. */
3326 full_seq.start_a = index_a;
3327 full_seq.start_b = index_b;
3328 full_seq.length = 0;
3329 }
3330
3331 /* Add this pair of references to the sequence. */
3332 full_seq.length += 1;
3333 full_seq.object_a = object_a;
3334 full_seq.object_b = object_b;
3335
3336 /* If the enclosing objects are structures (and thus have the
3337 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3338 if (TREE_CODE (type_a) == RECORD_TYPE)
3339 struct_seq = full_seq;
3340
3341 /* Move to the next containing reference for both A and B. */
3342 ref_a = object_a;
3343 ref_b = object_b;
3344 index_a += 1;
3345 index_b += 1;
3346 continue;
3347 }
3348
3349 /* Try to approach equal type sizes. */
3350 if (!COMPLETE_TYPE_P (type_a)
3351 || !COMPLETE_TYPE_P (type_b)
3352 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3353 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3354 break;
3355
3356 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3357 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3358 if (size_a <= size_b)
3359 {
3360 index_a += 1;
3361 ref_a = object_a;
3362 }
3363 if (size_b <= size_a)
3364 {
3365 index_b += 1;
3366 ref_b = object_b;
3367 }
3368 }
3369
3370 /* See whether FULL_SEQ ends at the base and whether the two bases
3371 are equal. We do not care about TBAA or alignment info so we can
3372 use OEP_ADDRESS_OF to avoid false negatives. */
3373 tree base_a = indices_a->base_object;
3374 tree base_b = indices_b->base_object;
3375 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3376 && full_seq.start_b + full_seq.length == num_dimensions_b
3377 && (indices_a->unconstrained_base
3378 == indices_b->unconstrained_base)
3379 && operand_equal_p (base_a, base_b, flags: OEP_ADDRESS_OF)
3380 && (types_compatible_p (TREE_TYPE (base_a),
3381 TREE_TYPE (base_b))
3382 || (!base_supports_access_fn_components_p (base: base_a)
3383 && !base_supports_access_fn_components_p (base: base_b)
3384 && operand_equal_p
3385 (TYPE_SIZE (TREE_TYPE (base_a)),
3386 TYPE_SIZE (TREE_TYPE (base_b)), flags: 0)))
3387 && (!loop_nest.exists ()
3388 || (object_address_invariant_in_loop_p
3389 (loop: loop_nest[0], obj: base_a))));
3390
3391 /* If the bases are the same, we can include the base variation too.
3392 E.g. the b accesses in:
3393
3394 for (int i = 0; i < n; ++i)
3395 b[i + 4][0] = b[i][0];
3396
3397 have a definite dependence distance of 4, while for:
3398
3399 for (int i = 0; i < n; ++i)
3400 a[i + 4][0] = b[i][0];
3401
3402 the dependence distance depends on the gap between a and b.
3403
3404 If the bases are different then we can only rely on the sequence
3405 rooted at a structure access, since arrays are allowed to overlap
3406 arbitrarily and change shape arbitrarily. E.g. we treat this as
3407 valid code:
3408
3409 int a[256];
3410 ...
3411 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3412
3413 where two lvalues with the same int[4][3] type overlap, and where
3414 both lvalues are distinct from the object's declared type. */
3415 if (same_base_p)
3416 {
3417 if (indices_a->unconstrained_base)
3418 full_seq.length += 1;
3419 }
3420 else
3421 full_seq = struct_seq;
3422
3423 /* Punt if we didn't find a suitable sequence. */
3424 if (full_seq.length == 0)
3425 {
3426 if (use_alt_indices
3427 || (TREE_CODE (DR_REF (a)) == MEM_REF
3428 && TREE_CODE (DR_REF (b)) == MEM_REF)
3429 || may_be_nonaddressable_p (DR_REF (a))
3430 || may_be_nonaddressable_p (DR_REF (b)))
3431 {
3432 /* Fully exhausted possibilities. */
3433 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3434 return res;
3435 }
3436
3437 /* Try evaluating both DRs as dereferences of pointers. */
3438 if (!a->alt_indices.base_object
3439 && TREE_CODE (DR_REF (a)) != MEM_REF)
3440 {
3441 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3442 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3443 build_int_cst
3444 (reference_alias_ptr_type (DR_REF (a)), 0));
3445 dr_analyze_indices (dri: &a->alt_indices, ref: alt_ref,
3446 nest: loop_preheader_edge (loop_nest[0]),
3447 loop: loop_containing_stmt (DR_STMT (a)));
3448 }
3449 if (!b->alt_indices.base_object
3450 && TREE_CODE (DR_REF (b)) != MEM_REF)
3451 {
3452 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3453 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3454 build_int_cst
3455 (reference_alias_ptr_type (DR_REF (b)), 0));
3456 dr_analyze_indices (dri: &b->alt_indices, ref: alt_ref,
3457 nest: loop_preheader_edge (loop_nest[0]),
3458 loop: loop_containing_stmt (DR_STMT (b)));
3459 }
3460 return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: true);
3461 }
3462
3463 if (!same_base_p)
3464 {
3465 /* Partial overlap is possible for different bases when strict aliasing
3466 is not in effect. It's also possible if either base involves a union
3467 access; e.g. for:
3468
3469 struct s1 { int a[2]; };
3470 struct s2 { struct s1 b; int c; };
3471 struct s3 { int d; struct s1 e; };
3472 union u { struct s2 f; struct s3 g; } *p, *q;
3473
3474 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3475 "p->g.e" (base "p->g") and might partially overlap the s1 at
3476 "q->g.e" (base "q->g"). */
3477 if (!flag_strict_aliasing
3478 || ref_contains_union_access_p (ref: full_seq.object_a)
3479 || ref_contains_union_access_p (ref: full_seq.object_b))
3480 {
3481 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3482 return res;
3483 }
3484
3485 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3486 if (!loop_nest.exists ()
3487 || (object_address_invariant_in_loop_p (loop: loop_nest[0],
3488 obj: full_seq.object_a)
3489 && object_address_invariant_in_loop_p (loop: loop_nest[0],
3490 obj: full_seq.object_b)))
3491 {
3492 DDR_OBJECT_A (res) = full_seq.object_a;
3493 DDR_OBJECT_B (res) = full_seq.object_b;
3494 }
3495 }
3496
3497 DDR_AFFINE_P (res) = true;
3498 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3499 DDR_SUBSCRIPTS (res).create (nelems: full_seq.length);
3500 DDR_LOOP_NEST (res) = loop_nest;
3501 DDR_SELF_REFERENCE (res) = false;
3502
3503 for (i = 0; i < full_seq.length; ++i)
3504 {
3505 struct subscript *subscript;
3506
3507 subscript = XNEW (struct subscript);
3508 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3509 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3510 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3511 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3512 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3513 SUB_DISTANCE (subscript) = chrec_dont_know;
3514 DDR_SUBSCRIPTS (res).safe_push (obj: subscript);
3515 }
3516
3517 return res;
3518}
3519
3520/* Initialize a data dependence relation between data accesses A and
3521 B. NB_LOOPS is the number of loops surrounding the references: the
3522 size of the classic distance/direction vectors. */
3523
3524struct data_dependence_relation *
3525initialize_data_dependence_relation (struct data_reference *a,
3526 struct data_reference *b,
3527 vec<loop_p> loop_nest)
3528{
3529 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3530 DDR_A (res) = a;
3531 DDR_B (res) = b;
3532 DDR_LOOP_NEST (res).create (nelems: 0);
3533 DDR_SUBSCRIPTS (res).create (nelems: 0);
3534 DDR_DIR_VECTS (res).create (nelems: 0);
3535 DDR_DIST_VECTS (res).create (nelems: 0);
3536
3537 if (a == NULL || b == NULL)
3538 {
3539 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3540 return res;
3541 }
3542
3543 /* If the data references do not alias, then they are independent. */
3544 if (!dr_may_alias_p (a, b, loop_nest: loop_nest.exists () ? loop_nest[0] : NULL))
3545 {
3546 DDR_ARE_DEPENDENT (res) = chrec_known;
3547 return res;
3548 }
3549
3550 return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: false);
3551}
3552
3553
3554/* Frees memory used by the conflict function F. */
3555
3556static void
3557free_conflict_function (conflict_function *f)
3558{
3559 unsigned i;
3560
3561 if (CF_NONTRIVIAL_P (f))
3562 {
3563 for (i = 0; i < f->n; i++)
3564 affine_fn_free (fn: f->fns[i]);
3565 }
3566 free (ptr: f);
3567}
3568
3569/* Frees memory used by SUBSCRIPTS. */
3570
3571static void
3572free_subscripts (vec<subscript_p> subscripts)
3573{
3574 for (subscript_p s : subscripts)
3575 {
3576 free_conflict_function (f: s->conflicting_iterations_in_a);
3577 free_conflict_function (f: s->conflicting_iterations_in_b);
3578 free (ptr: s);
3579 }
3580 subscripts.release ();
3581}
3582
3583/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3584 description. */
3585
3586static inline void
3587finalize_ddr_dependent (struct data_dependence_relation *ddr,
3588 tree chrec)
3589{
3590 DDR_ARE_DEPENDENT (ddr) = chrec;
3591 free_subscripts (DDR_SUBSCRIPTS (ddr));
3592 DDR_SUBSCRIPTS (ddr).create (nelems: 0);
3593}
3594
3595/* The dependence relation DDR cannot be represented by a distance
3596 vector. */
3597
3598static inline void
3599non_affine_dependence_relation (struct data_dependence_relation *ddr)
3600{
3601 if (dump_file && (dump_flags & TDF_DETAILS))
3602 fprintf (stream: dump_file, format: "(Dependence relation cannot be represented by distance vector.) \n");
3603
3604 DDR_AFFINE_P (ddr) = false;
3605}
3606
3607
3608
3609/* This section contains the classic Banerjee tests. */
3610
3611/* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3612 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3613
3614static inline bool
3615ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3616{
3617 return (evolution_function_is_constant_p (chrec: chrec_a)
3618 && evolution_function_is_constant_p (chrec: chrec_b));
3619}
3620
3621/* Returns true iff CHREC_A and CHREC_B are dependent on an index
3622 variable, i.e., if the SIV (Single Index Variable) test is true. */
3623
3624static bool
3625siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3626{
3627 if ((evolution_function_is_constant_p (chrec: chrec_a)
3628 && evolution_function_is_univariate_p (chrec_b))
3629 || (evolution_function_is_constant_p (chrec: chrec_b)
3630 && evolution_function_is_univariate_p (chrec_a)))
3631 return true;
3632
3633 if (evolution_function_is_univariate_p (chrec_a)
3634 && evolution_function_is_univariate_p (chrec_b))
3635 {
3636 switch (TREE_CODE (chrec_a))
3637 {
3638 case POLYNOMIAL_CHREC:
3639 switch (TREE_CODE (chrec_b))
3640 {
3641 case POLYNOMIAL_CHREC:
3642 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3643 return false;
3644 /* FALLTHRU */
3645
3646 default:
3647 return true;
3648 }
3649
3650 default:
3651 return true;
3652 }
3653 }
3654
3655 return false;
3656}
3657
3658/* Creates a conflict function with N dimensions. The affine functions
3659 in each dimension follow. */
3660
3661static conflict_function *
3662conflict_fn (unsigned n, ...)
3663{
3664 unsigned i;
3665 conflict_function *ret = XCNEW (conflict_function);
3666 va_list ap;
3667
3668 gcc_assert (n > 0 && n <= MAX_DIM);
3669 va_start (ap, n);
3670
3671 ret->n = n;
3672 for (i = 0; i < n; i++)
3673 ret->fns[i] = va_arg (ap, affine_fn);
3674 va_end (ap);
3675
3676 return ret;
3677}
3678
3679/* Returns constant affine function with value CST. */
3680
3681static affine_fn
3682affine_fn_cst (tree cst)
3683{
3684 affine_fn fn;
3685 fn.create (nelems: 1);
3686 fn.quick_push (obj: cst);
3687 return fn;
3688}
3689
3690/* Returns affine function with single variable, CST + COEF * x_DIM. */
3691
3692static affine_fn
3693affine_fn_univar (tree cst, unsigned dim, tree coef)
3694{
3695 affine_fn fn;
3696 fn.create (nelems: dim + 1);
3697 unsigned i;
3698
3699 gcc_assert (dim > 0);
3700 fn.quick_push (obj: cst);
3701 for (i = 1; i < dim; i++)
3702 fn.quick_push (integer_zero_node);
3703 fn.quick_push (obj: coef);
3704 return fn;
3705}
3706
3707/* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3708 *OVERLAPS_B are initialized to the functions that describe the
3709 relation between the elements accessed twice by CHREC_A and
3710 CHREC_B. For k >= 0, the following property is verified:
3711
3712 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3713
3714static void
3715analyze_ziv_subscript (tree chrec_a,
3716 tree chrec_b,
3717 conflict_function **overlaps_a,
3718 conflict_function **overlaps_b,
3719 tree *last_conflicts)
3720{
3721 tree type, difference;
3722 dependence_stats.num_ziv++;
3723
3724 if (dump_file && (dump_flags & TDF_DETAILS))
3725 fprintf (stream: dump_file, format: "(analyze_ziv_subscript \n");
3726
3727 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3728 chrec_a = chrec_convert (type, chrec_a, NULL);
3729 chrec_b = chrec_convert (type, chrec_b, NULL);
3730 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3731
3732 switch (TREE_CODE (difference))
3733 {
3734 case INTEGER_CST:
3735 if (integer_zerop (difference))
3736 {
3737 /* The difference is equal to zero: the accessed index
3738 overlaps for each iteration in the loop. */
3739 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3740 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3741 *last_conflicts = chrec_dont_know;
3742 dependence_stats.num_ziv_dependent++;
3743 }
3744 else
3745 {
3746 /* The accesses do not overlap. */
3747 *overlaps_a = conflict_fn_no_dependence ();
3748 *overlaps_b = conflict_fn_no_dependence ();
3749 *last_conflicts = integer_zero_node;
3750 dependence_stats.num_ziv_independent++;
3751 }
3752 break;
3753
3754 default:
3755 /* We're not sure whether the indexes overlap. For the moment,
3756 conservatively answer "don't know". */
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3758 fprintf (stream: dump_file, format: "ziv test failed: difference is non-integer.\n");
3759
3760 *overlaps_a = conflict_fn_not_known ();
3761 *overlaps_b = conflict_fn_not_known ();
3762 *last_conflicts = chrec_dont_know;
3763 dependence_stats.num_ziv_unimplemented++;
3764 break;
3765 }
3766
3767 if (dump_file && (dump_flags & TDF_DETAILS))
3768 fprintf (stream: dump_file, format: ")\n");
3769}
3770
3771/* Similar to max_stmt_executions_int, but returns the bound as a tree,
3772 and only if it fits to the int type. If this is not the case, or the
3773 bound on the number of iterations of LOOP could not be derived, returns
3774 chrec_dont_know. */
3775
3776static tree
3777max_stmt_executions_tree (class loop *loop)
3778{
3779 widest_int nit;
3780
3781 if (!max_stmt_executions (loop, &nit))
3782 return chrec_dont_know;
3783
3784 if (!wi::fits_to_tree_p (x: nit, unsigned_type_node))
3785 return chrec_dont_know;
3786
3787 return wide_int_to_tree (unsigned_type_node, cst: nit);
3788}
3789
3790/* Determine whether the CHREC is always positive/negative. If the expression
3791 cannot be statically analyzed, return false, otherwise set the answer into
3792 VALUE. */
3793
3794static bool
3795chrec_is_positive (tree chrec, bool *value)
3796{
3797 bool value0, value1, value2;
3798 tree end_value, nb_iter;
3799
3800 switch (TREE_CODE (chrec))
3801 {
3802 case POLYNOMIAL_CHREC:
3803 if (!chrec_is_positive (CHREC_LEFT (chrec), value: &value0)
3804 || !chrec_is_positive (CHREC_RIGHT (chrec), value: &value1))
3805 return false;
3806
3807 /* FIXME -- overflows. */
3808 if (value0 == value1)
3809 {
3810 *value = value0;
3811 return true;
3812 }
3813
3814 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3815 and the proof consists in showing that the sign never
3816 changes during the execution of the loop, from 0 to
3817 loop->nb_iterations. */
3818 if (!evolution_function_is_affine_p (chrec))
3819 return false;
3820
3821 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3822 if (chrec_contains_undetermined (nb_iter))
3823 return false;
3824
3825#if 0
3826 /* TODO -- If the test is after the exit, we may decrease the number of
3827 iterations by one. */
3828 if (after_exit)
3829 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3830#endif
3831
3832 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3833
3834 if (!chrec_is_positive (chrec: end_value, value: &value2))
3835 return false;
3836
3837 *value = value0;
3838 return value0 == value1;
3839
3840 case INTEGER_CST:
3841 switch (tree_int_cst_sgn (chrec))
3842 {
3843 case -1:
3844 *value = false;
3845 break;
3846 case 1:
3847 *value = true;
3848 break;
3849 default:
3850 return false;
3851 }
3852 return true;
3853
3854 default:
3855 return false;
3856 }
3857}
3858
3859
3860/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3861 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3862 *OVERLAPS_B are initialized to the functions that describe the
3863 relation between the elements accessed twice by CHREC_A and
3864 CHREC_B. For k >= 0, the following property is verified:
3865
3866 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3867
3868static void
3869analyze_siv_subscript_cst_affine (tree chrec_a,
3870 tree chrec_b,
3871 conflict_function **overlaps_a,
3872 conflict_function **overlaps_b,
3873 tree *last_conflicts)
3874{
3875 bool value0, value1, value2;
3876 tree type, difference, tmp;
3877
3878 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3879 chrec_a = chrec_convert (type, chrec_a, NULL);
3880 chrec_b = chrec_convert (type, chrec_b, NULL);
3881 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3882
3883 /* Special case overlap in the first iteration. */
3884 if (integer_zerop (difference))
3885 {
3886 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3887 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3888 *last_conflicts = integer_one_node;
3889 return;
3890 }
3891
3892 if (!chrec_is_positive (chrec: initial_condition (difference), value: &value0))
3893 {
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3895 fprintf (stream: dump_file, format: "siv test failed: chrec is not positive.\n");
3896
3897 dependence_stats.num_siv_unimplemented++;
3898 *overlaps_a = conflict_fn_not_known ();
3899 *overlaps_b = conflict_fn_not_known ();
3900 *last_conflicts = chrec_dont_know;
3901 return;
3902 }
3903 else
3904 {
3905 if (value0 == false)
3906 {
3907 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3908 || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value1))
3909 {
3910 if (dump_file && (dump_flags & TDF_DETAILS))
3911 fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n");
3912
3913 *overlaps_a = conflict_fn_not_known ();
3914 *overlaps_b = conflict_fn_not_known ();
3915 *last_conflicts = chrec_dont_know;
3916 dependence_stats.num_siv_unimplemented++;
3917 return;
3918 }
3919 else
3920 {
3921 if (value1 == true)
3922 {
3923 /* Example:
3924 chrec_a = 12
3925 chrec_b = {10, +, 1}
3926 */
3927
3928 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference))
3929 {
3930 HOST_WIDE_INT numiter;
3931 class loop *loop = get_chrec_loop (chrec: chrec_b);
3932
3933 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3934 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3935 fold_build1 (ABS_EXPR, type, difference),
3936 CHREC_RIGHT (chrec_b));
3937 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp));
3938 *last_conflicts = integer_one_node;
3939
3940
3941 /* Perform weak-zero siv test to see if overlap is
3942 outside the loop bounds. */
3943 numiter = max_stmt_executions_int (loop);
3944
3945 if (numiter >= 0
3946 && compare_tree_int (tmp, numiter) > 0)
3947 {
3948 free_conflict_function (f: *overlaps_a);
3949 free_conflict_function (f: *overlaps_b);
3950 *overlaps_a = conflict_fn_no_dependence ();
3951 *overlaps_b = conflict_fn_no_dependence ();
3952 *last_conflicts = integer_zero_node;
3953 dependence_stats.num_siv_independent++;
3954 return;
3955 }
3956 dependence_stats.num_siv_dependent++;
3957 return;
3958 }
3959
3960 /* When the step does not divide the difference, there are
3961 no overlaps. */
3962 else
3963 {
3964 *overlaps_a = conflict_fn_no_dependence ();
3965 *overlaps_b = conflict_fn_no_dependence ();
3966 *last_conflicts = integer_zero_node;
3967 dependence_stats.num_siv_independent++;
3968 return;
3969 }
3970 }
3971
3972 else
3973 {
3974 /* Example:
3975 chrec_a = 12
3976 chrec_b = {10, +, -1}
3977
3978 In this case, chrec_a will not overlap with chrec_b. */
3979 *overlaps_a = conflict_fn_no_dependence ();
3980 *overlaps_b = conflict_fn_no_dependence ();
3981 *last_conflicts = integer_zero_node;
3982 dependence_stats.num_siv_independent++;
3983 return;
3984 }
3985 }
3986 }
3987 else
3988 {
3989 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3990 || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value2))
3991 {
3992 if (dump_file && (dump_flags & TDF_DETAILS))
3993 fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n");
3994
3995 *overlaps_a = conflict_fn_not_known ();
3996 *overlaps_b = conflict_fn_not_known ();
3997 *last_conflicts = chrec_dont_know;
3998 dependence_stats.num_siv_unimplemented++;
3999 return;
4000 }
4001 else
4002 {
4003 if (value2 == false)
4004 {
4005 /* Example:
4006 chrec_a = 3
4007 chrec_b = {10, +, -1}
4008 */
4009 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference))
4010 {
4011 HOST_WIDE_INT numiter;
4012 class loop *loop = get_chrec_loop (chrec: chrec_b);
4013
4014 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4015 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
4016 CHREC_RIGHT (chrec_b));
4017 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp));
4018 *last_conflicts = integer_one_node;
4019
4020 /* Perform weak-zero siv test to see if overlap is
4021 outside the loop bounds. */
4022 numiter = max_stmt_executions_int (loop);
4023
4024 if (numiter >= 0
4025 && compare_tree_int (tmp, numiter) > 0)
4026 {
4027 free_conflict_function (f: *overlaps_a);
4028 free_conflict_function (f: *overlaps_b);
4029 *overlaps_a = conflict_fn_no_dependence ();
4030 *overlaps_b = conflict_fn_no_dependence ();
4031 *last_conflicts = integer_zero_node;
4032 dependence_stats.num_siv_independent++;
4033 return;
4034 }
4035 dependence_stats.num_siv_dependent++;
4036 return;
4037 }
4038
4039 /* When the step does not divide the difference, there
4040 are no overlaps. */
4041 else
4042 {
4043 *overlaps_a = conflict_fn_no_dependence ();
4044 *overlaps_b = conflict_fn_no_dependence ();
4045 *last_conflicts = integer_zero_node;
4046 dependence_stats.num_siv_independent++;
4047 return;
4048 }
4049 }
4050 else
4051 {
4052 /* Example:
4053 chrec_a = 3
4054 chrec_b = {4, +, 1}
4055
4056 In this case, chrec_a will not overlap with chrec_b. */
4057 *overlaps_a = conflict_fn_no_dependence ();
4058 *overlaps_b = conflict_fn_no_dependence ();
4059 *last_conflicts = integer_zero_node;
4060 dependence_stats.num_siv_independent++;
4061 return;
4062 }
4063 }
4064 }
4065 }
4066}
4067
4068/* Helper recursive function for initializing the matrix A. Returns
4069 the initial value of CHREC. */
4070
4071static tree
4072initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4073{
4074 gcc_assert (chrec);
4075
4076 switch (TREE_CODE (chrec))
4077 {
4078 case POLYNOMIAL_CHREC:
4079 HOST_WIDE_INT chrec_right;
4080 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4081 return chrec_dont_know;
4082 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4083 /* We want to be able to negate without overflow. */
4084 if (chrec_right == HOST_WIDE_INT_MIN)
4085 return chrec_dont_know;
4086 A[index][0] = mult * chrec_right;
4087 return initialize_matrix_A (A, CHREC_LEFT (chrec), index: index + 1, mult);
4088
4089 case PLUS_EXPR:
4090 case MULT_EXPR:
4091 case MINUS_EXPR:
4092 {
4093 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4094 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4095
4096 return chrec_fold_op (TREE_CODE (chrec), type: chrec_type (chrec), op0, op1);
4097 }
4098
4099 CASE_CONVERT:
4100 {
4101 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4102 return chrec_convert (chrec_type (chrec), op, NULL);
4103 }
4104
4105 case BIT_NOT_EXPR:
4106 {
4107 /* Handle ~X as -1 - X. */
4108 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4109 return chrec_fold_op (code: MINUS_EXPR, type: chrec_type (chrec),
4110 op0: build_int_cst (TREE_TYPE (chrec), -1), op1: op);
4111 }
4112
4113 case INTEGER_CST:
4114 return cst_and_fits_in_hwi (chrec) ? chrec : chrec_dont_know;
4115
4116 default:
4117 gcc_unreachable ();
4118 return NULL_TREE;
4119 }
4120}
4121
4122#define FLOOR_DIV(x,y) ((x) / (y))
4123
4124/* Solves the special case of the Diophantine equation:
4125 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4126
4127 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4128 number of iterations that loops X and Y run. The overlaps will be
4129 constructed as evolutions in dimension DIM. */
4130
4131static void
4132compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4133 HOST_WIDE_INT step_a,
4134 HOST_WIDE_INT step_b,
4135 affine_fn *overlaps_a,
4136 affine_fn *overlaps_b,
4137 tree *last_conflicts, int dim)
4138{
4139 if (((step_a > 0 && step_b > 0)
4140 || (step_a < 0 && step_b < 0)))
4141 {
4142 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4143 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4144
4145 gcd_steps_a_b = gcd (step_a, step_b);
4146 step_overlaps_a = step_b / gcd_steps_a_b;
4147 step_overlaps_b = step_a / gcd_steps_a_b;
4148
4149 if (niter > 0)
4150 {
4151 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4152 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4153 last_conflict = tau2;
4154 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4155 }
4156 else
4157 *last_conflicts = chrec_dont_know;
4158
4159 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4160 coef: build_int_cst (NULL_TREE,
4161 step_overlaps_a));
4162 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4163 coef: build_int_cst (NULL_TREE,
4164 step_overlaps_b));
4165 }
4166
4167 else
4168 {
4169 *overlaps_a = affine_fn_cst (integer_zero_node);
4170 *overlaps_b = affine_fn_cst (integer_zero_node);
4171 *last_conflicts = integer_zero_node;
4172 }
4173}
4174
4175/* Solves the special case of a Diophantine equation where CHREC_A is
4176 an affine bivariate function, and CHREC_B is an affine univariate
4177 function. For example,
4178
4179 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4180
4181 has the following overlapping functions:
4182
4183 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4184 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4185 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4186
4187 FORNOW: This is a specialized implementation for a case occurring in
4188 a common benchmark. Implement the general algorithm. */
4189
4190static void
4191compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4192 conflict_function **overlaps_a,
4193 conflict_function **overlaps_b,
4194 tree *last_conflicts)
4195{
4196 bool xz_p, yz_p, xyz_p;
4197 HOST_WIDE_INT step_x, step_y, step_z;
4198 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4199 affine_fn overlaps_a_xz, overlaps_b_xz;
4200 affine_fn overlaps_a_yz, overlaps_b_yz;
4201 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4202 affine_fn ova1, ova2, ovb;
4203 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4204
4205 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4206 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4207 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4208
4209 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4210 niter_y = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a));
4211 niter_z = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b));
4212
4213 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4214 {
4215 if (dump_file && (dump_flags & TDF_DETAILS))
4216 fprintf (stream: dump_file, format: "overlap steps test failed: no iteration counts.\n");
4217
4218 *overlaps_a = conflict_fn_not_known ();
4219 *overlaps_b = conflict_fn_not_known ();
4220 *last_conflicts = chrec_dont_know;
4221 return;
4222 }
4223
4224 niter = MIN (niter_x, niter_z);
4225 compute_overlap_steps_for_affine_univar (niter, step_a: step_x, step_b: step_z,
4226 overlaps_a: &overlaps_a_xz,
4227 overlaps_b: &overlaps_b_xz,
4228 last_conflicts: &last_conflicts_xz, dim: 1);
4229 niter = MIN (niter_y, niter_z);
4230 compute_overlap_steps_for_affine_univar (niter, step_a: step_y, step_b: step_z,
4231 overlaps_a: &overlaps_a_yz,
4232 overlaps_b: &overlaps_b_yz,
4233 last_conflicts: &last_conflicts_yz, dim: 2);
4234 niter = MIN (niter_x, niter_z);
4235 niter = MIN (niter_y, niter);
4236 compute_overlap_steps_for_affine_univar (niter, step_a: step_x + step_y, step_b: step_z,
4237 overlaps_a: &overlaps_a_xyz,
4238 overlaps_b: &overlaps_b_xyz,
4239 last_conflicts: &last_conflicts_xyz, dim: 3);
4240
4241 xz_p = !integer_zerop (last_conflicts_xz);
4242 yz_p = !integer_zerop (last_conflicts_yz);
4243 xyz_p = !integer_zerop (last_conflicts_xyz);
4244
4245 if (xz_p || yz_p || xyz_p)
4246 {
4247 ova1 = affine_fn_cst (integer_zero_node);
4248 ova2 = affine_fn_cst (integer_zero_node);
4249 ovb = affine_fn_cst (integer_zero_node);
4250 if (xz_p)
4251 {
4252 affine_fn t0 = ova1;
4253 affine_fn t2 = ovb;
4254
4255 ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xz);
4256 ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xz);
4257 affine_fn_free (fn: t0);
4258 affine_fn_free (fn: t2);
4259 *last_conflicts = last_conflicts_xz;
4260 }
4261 if (yz_p)
4262 {
4263 affine_fn t0 = ova2;
4264 affine_fn t2 = ovb;
4265
4266 ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_yz);
4267 ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_yz);
4268 affine_fn_free (fn: t0);
4269 affine_fn_free (fn: t2);
4270 *last_conflicts = last_conflicts_yz;
4271 }
4272 if (xyz_p)
4273 {
4274 affine_fn t0 = ova1;
4275 affine_fn t2 = ova2;
4276 affine_fn t4 = ovb;
4277
4278 ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xyz);
4279 ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_xyz);
4280 ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xyz);
4281 affine_fn_free (fn: t0);
4282 affine_fn_free (fn: t2);
4283 affine_fn_free (fn: t4);
4284 *last_conflicts = last_conflicts_xyz;
4285 }
4286 *overlaps_a = conflict_fn (n: 2, ova1, ova2);
4287 *overlaps_b = conflict_fn (n: 1, ovb);
4288 }
4289 else
4290 {
4291 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4292 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4293 *last_conflicts = integer_zero_node;
4294 }
4295
4296 affine_fn_free (fn: overlaps_a_xz);
4297 affine_fn_free (fn: overlaps_b_xz);
4298 affine_fn_free (fn: overlaps_a_yz);
4299 affine_fn_free (fn: overlaps_b_yz);
4300 affine_fn_free (fn: overlaps_a_xyz);
4301 affine_fn_free (fn: overlaps_b_xyz);
4302}
4303
4304/* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4305
4306static void
4307lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4308 int size)
4309{
4310 memcpy (dest: vec2, src: vec1, n: size * sizeof (*vec1));
4311}
4312
4313/* Copy the elements of M x N matrix MAT1 to MAT2. */
4314
4315static void
4316lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4317 int m, int n)
4318{
4319 int i;
4320
4321 for (i = 0; i < m; i++)
4322 lambda_vector_copy (vec1: mat1[i], vec2: mat2[i], size: n);
4323}
4324
4325/* Store the N x N identity matrix in MAT. */
4326
4327static void
4328lambda_matrix_id (lambda_matrix mat, int size)
4329{
4330 int i, j;
4331
4332 for (i = 0; i < size; i++)
4333 for (j = 0; j < size; j++)
4334 mat[i][j] = (i == j) ? 1 : 0;
4335}
4336
4337/* Return the index of the first nonzero element of vector VEC1 between
4338 START and N. We must have START <= N.
4339 Returns N if VEC1 is the zero vector. */
4340
4341static int
4342lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4343{
4344 int j = start;
4345 while (j < n && vec1[j] == 0)
4346 j++;
4347 return j;
4348}
4349
4350/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4351 R2 = R2 + CONST1 * R1. */
4352
4353static bool
4354lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4355 lambda_int const1)
4356{
4357 int i;
4358
4359 if (const1 == 0)
4360 return true;
4361
4362 for (i = 0; i < n; i++)
4363 {
4364 bool ovf;
4365 lambda_int tem = mul_hwi (a: mat[r1][i], b: const1, overflow: &ovf);
4366 if (ovf)
4367 return false;
4368 lambda_int tem2 = add_hwi (a: mat[r2][i], b: tem, overflow: &ovf);
4369 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4370 return false;
4371 mat[r2][i] = tem2;
4372 }
4373
4374 return true;
4375}
4376
4377/* Multiply vector VEC1 of length SIZE by a constant CONST1,
4378 and store the result in VEC2. */
4379
4380static void
4381lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4382 int size, lambda_int const1)
4383{
4384 int i;
4385
4386 if (const1 == 0)
4387 lambda_vector_clear (vec1: vec2, size);
4388 else
4389 for (i = 0; i < size; i++)
4390 vec2[i] = const1 * vec1[i];
4391}
4392
4393/* Negate vector VEC1 with length SIZE and store it in VEC2. */
4394
4395static void
4396lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4397 int size)
4398{
4399 lambda_vector_mult_const (vec1, vec2, size, const1: -1);
4400}
4401
4402/* Negate row R1 of matrix MAT which has N columns. */
4403
4404static void
4405lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4406{
4407 lambda_vector_negate (vec1: mat[r1], vec2: mat[r1], size: n);
4408}
4409
4410/* Return true if two vectors are equal. */
4411
4412static bool
4413lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4414{
4415 int i;
4416 for (i = 0; i < size; i++)
4417 if (vec1[i] != vec2[i])
4418 return false;
4419 return true;
4420}
4421
4422/* Given an M x N integer matrix A, this function determines an M x
4423 M unimodular matrix U, and an M x N echelon matrix S such that
4424 "U.A = S". This decomposition is also known as "right Hermite".
4425
4426 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4427 Restructuring Compilers" Utpal Banerjee. */
4428
4429static bool
4430lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4431 lambda_matrix S, lambda_matrix U)
4432{
4433 int i, j, i0 = 0;
4434
4435 lambda_matrix_copy (mat1: A, mat2: S, m, n);
4436 lambda_matrix_id (mat: U, size: m);
4437
4438 for (j = 0; j < n; j++)
4439 {
4440 if (lambda_vector_first_nz (vec1: S[j], n: m, start: i0) < m)
4441 {
4442 ++i0;
4443 for (i = m - 1; i >= i0; i--)
4444 {
4445 while (S[i][j] != 0)
4446 {
4447 lambda_int factor, a, b;
4448
4449 a = S[i-1][j];
4450 b = S[i][j];
4451 gcc_assert (a != HOST_WIDE_INT_MIN);
4452 factor = a / b;
4453
4454 if (!lambda_matrix_row_add (mat: S, n, r1: i, r2: i-1, const1: -factor))
4455 return false;
4456 std::swap (a&: S[i], b&: S[i-1]);
4457
4458 if (!lambda_matrix_row_add (mat: U, n: m, r1: i, r2: i-1, const1: -factor))
4459 return false;
4460 std::swap (a&: U[i], b&: U[i-1]);
4461 }
4462 }
4463 }
4464 }
4465
4466 return true;
4467}
4468
4469/* Determines the overlapping elements due to accesses CHREC_A and
4470 CHREC_B, that are affine functions. This function cannot handle
4471 symbolic evolution functions, ie. when initial conditions are
4472 parameters, because it uses lambda matrices of integers. */
4473
4474static void
4475analyze_subscript_affine_affine (tree chrec_a,
4476 tree chrec_b,
4477 conflict_function **overlaps_a,
4478 conflict_function **overlaps_b,
4479 tree *last_conflicts)
4480{
4481 unsigned nb_vars_a, nb_vars_b, dim;
4482 lambda_int gamma, gcd_alpha_beta;
4483 lambda_matrix A, U, S;
4484 struct obstack scratch_obstack;
4485
4486 if (eq_evolutions_p (chrec_a, chrec_b))
4487 {
4488 /* The accessed index overlaps for each iteration in the
4489 loop. */
4490 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4491 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4492 *last_conflicts = chrec_dont_know;
4493 return;
4494 }
4495 if (dump_file && (dump_flags & TDF_DETAILS))
4496 fprintf (stream: dump_file, format: "(analyze_subscript_affine_affine \n");
4497
4498 /* For determining the initial intersection, we have to solve a
4499 Diophantine equation. This is the most time consuming part.
4500
4501 For answering to the question: "Is there a dependence?" we have
4502 to prove that there exists a solution to the Diophantine
4503 equation, and that the solution is in the iteration domain,
4504 i.e. the solution is positive or zero, and that the solution
4505 happens before the upper bound loop.nb_iterations. Otherwise
4506 there is no dependence. This function outputs a description of
4507 the iterations that hold the intersections. */
4508
4509 nb_vars_a = nb_vars_in_chrec (chrec_a);
4510 nb_vars_b = nb_vars_in_chrec (chrec_b);
4511
4512 gcc_obstack_init (&scratch_obstack);
4513
4514 dim = nb_vars_a + nb_vars_b;
4515 U = lambda_matrix_new (m: dim, n: dim, lambda_obstack: &scratch_obstack);
4516 A = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack);
4517 S = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack);
4518
4519 tree init_a = initialize_matrix_A (A, chrec: chrec_a, index: 0, mult: 1);
4520 tree init_b = initialize_matrix_A (A, chrec: chrec_b, index: nb_vars_a, mult: -1);
4521 if (init_a == chrec_dont_know
4522 || init_b == chrec_dont_know)
4523 {
4524 if (dump_file && (dump_flags & TDF_DETAILS))
4525 fprintf (stream: dump_file, format: "affine-affine test failed: "
4526 "representation issue.\n");
4527 *overlaps_a = conflict_fn_not_known ();
4528 *overlaps_b = conflict_fn_not_known ();
4529 *last_conflicts = chrec_dont_know;
4530 goto end_analyze_subs_aa;
4531 }
4532 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4533
4534 /* Don't do all the hard work of solving the Diophantine equation
4535 when we already know the solution: for example,
4536 | {3, +, 1}_1
4537 | {3, +, 4}_2
4538 | gamma = 3 - 3 = 0.
4539 Then the first overlap occurs during the first iterations:
4540 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4541 */
4542 if (gamma == 0)
4543 {
4544 if (nb_vars_a == 1 && nb_vars_b == 1)
4545 {
4546 HOST_WIDE_INT step_a, step_b;
4547 HOST_WIDE_INT niter, niter_a, niter_b;
4548 affine_fn ova, ovb;
4549
4550 niter_a = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a));
4551 niter_b = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b));
4552 niter = MIN (niter_a, niter_b);
4553 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4554 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4555
4556 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4557 overlaps_a: &ova, overlaps_b: &ovb,
4558 last_conflicts, dim: 1);
4559 *overlaps_a = conflict_fn (n: 1, ova);
4560 *overlaps_b = conflict_fn (n: 1, ovb);
4561 }
4562
4563 else if (nb_vars_a == 2 && nb_vars_b == 1)
4564 compute_overlap_steps_for_affine_1_2
4565 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4566
4567 else if (nb_vars_a == 1 && nb_vars_b == 2)
4568 compute_overlap_steps_for_affine_1_2
4569 (chrec_a: chrec_b, chrec_b: chrec_a, overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts);
4570
4571 else
4572 {
4573 if (dump_file && (dump_flags & TDF_DETAILS))
4574 fprintf (stream: dump_file, format: "affine-affine test failed: too many variables.\n");
4575 *overlaps_a = conflict_fn_not_known ();
4576 *overlaps_b = conflict_fn_not_known ();
4577 *last_conflicts = chrec_dont_know;
4578 }
4579 goto end_analyze_subs_aa;
4580 }
4581
4582 /* U.A = S */
4583 if (!lambda_matrix_right_hermite (A, m: dim, n: 1, S, U))
4584 {
4585 *overlaps_a = conflict_fn_not_known ();
4586 *overlaps_b = conflict_fn_not_known ();
4587 *last_conflicts = chrec_dont_know;
4588 goto end_analyze_subs_aa;
4589 }
4590
4591 if (S[0][0] < 0)
4592 {
4593 S[0][0] *= -1;
4594 lambda_matrix_row_negate (mat: U, n: dim, r1: 0);
4595 }
4596 gcd_alpha_beta = S[0][0];
4597
4598 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4599 but that is a quite strange case. Instead of ICEing, answer
4600 don't know. */
4601 if (gcd_alpha_beta == 0)
4602 {
4603 *overlaps_a = conflict_fn_not_known ();
4604 *overlaps_b = conflict_fn_not_known ();
4605 *last_conflicts = chrec_dont_know;
4606 goto end_analyze_subs_aa;
4607 }
4608
4609 /* The classic "gcd-test". */
4610 if (!int_divides_p (a: gcd_alpha_beta, b: gamma))
4611 {
4612 /* The "gcd-test" has determined that there is no integer
4613 solution, i.e. there is no dependence. */
4614 *overlaps_a = conflict_fn_no_dependence ();
4615 *overlaps_b = conflict_fn_no_dependence ();
4616 *last_conflicts = integer_zero_node;
4617 }
4618
4619 /* Both access functions are univariate. This includes SIV and MIV cases. */
4620 else if (nb_vars_a == 1 && nb_vars_b == 1)
4621 {
4622 /* Both functions should have the same evolution sign. */
4623 if (((A[0][0] > 0 && -A[1][0] > 0)
4624 || (A[0][0] < 0 && -A[1][0] < 0)))
4625 {
4626 /* The solutions are given by:
4627 |
4628 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4629 | [u21 u22] [y0]
4630
4631 For a given integer t. Using the following variables,
4632
4633 | i0 = u11 * gamma / gcd_alpha_beta
4634 | j0 = u12 * gamma / gcd_alpha_beta
4635 | i1 = u21
4636 | j1 = u22
4637
4638 the solutions are:
4639
4640 | x0 = i0 + i1 * t,
4641 | y0 = j0 + j1 * t. */
4642 HOST_WIDE_INT i0, j0, i1, j1;
4643
4644 i0 = U[0][0] * gamma / gcd_alpha_beta;
4645 j0 = U[0][1] * gamma / gcd_alpha_beta;
4646 i1 = U[1][0];
4647 j1 = U[1][1];
4648
4649 if ((i1 == 0 && i0 < 0)
4650 || (j1 == 0 && j0 < 0))
4651 {
4652 /* There is no solution.
4653 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4654 falls in here, but for the moment we don't look at the
4655 upper bound of the iteration domain. */
4656 *overlaps_a = conflict_fn_no_dependence ();
4657 *overlaps_b = conflict_fn_no_dependence ();
4658 *last_conflicts = integer_zero_node;
4659 goto end_analyze_subs_aa;
4660 }
4661
4662 if (i1 > 0 && j1 > 0)
4663 {
4664 HOST_WIDE_INT niter_a
4665 = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a));
4666 HOST_WIDE_INT niter_b
4667 = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b));
4668 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4669
4670 /* (X0, Y0) is a solution of the Diophantine equation:
4671 "chrec_a (X0) = chrec_b (Y0)". */
4672 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4673 CEIL (-j0, j1));
4674 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4675 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4676
4677 /* (X1, Y1) is the smallest positive solution of the eq
4678 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4679 first conflict occurs. */
4680 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4681 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4682 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4683
4684 if (niter > 0)
4685 {
4686 /* If the overlap occurs outside of the bounds of the
4687 loop, there is no dependence. */
4688 if (x1 >= niter_a || y1 >= niter_b)
4689 {
4690 *overlaps_a = conflict_fn_no_dependence ();
4691 *overlaps_b = conflict_fn_no_dependence ();
4692 *last_conflicts = integer_zero_node;
4693 goto end_analyze_subs_aa;
4694 }
4695
4696 /* max stmt executions can get quite large, avoid
4697 overflows by using wide ints here. */
4698 widest_int tau2
4699 = wi::smin (x: wi::sdiv_floor (x: wi::sub (x: niter_a, y: i0), y: i1),
4700 y: wi::sdiv_floor (x: wi::sub (x: niter_b, y: j0), y: j1));
4701 widest_int last_conflict = wi::sub (x: tau2, y: (x1 - i0)/i1);
4702 if (wi::min_precision (x: last_conflict, sgn: SIGNED)
4703 <= TYPE_PRECISION (integer_type_node))
4704 *last_conflicts
4705 = build_int_cst (integer_type_node,
4706 last_conflict.to_shwi ());
4707 else
4708 *last_conflicts = chrec_dont_know;
4709 }
4710 else
4711 *last_conflicts = chrec_dont_know;
4712
4713 *overlaps_a
4714 = conflict_fn (n: 1,
4715 affine_fn_univar (cst: build_int_cst (NULL_TREE, x1),
4716 dim: 1,
4717 coef: build_int_cst (NULL_TREE, i1)));
4718 *overlaps_b
4719 = conflict_fn (n: 1,
4720 affine_fn_univar (cst: build_int_cst (NULL_TREE, y1),
4721 dim: 1,
4722 coef: build_int_cst (NULL_TREE, j1)));
4723 }
4724 else
4725 {
4726 /* FIXME: For the moment, the upper bound of the
4727 iteration domain for i and j is not checked. */
4728 if (dump_file && (dump_flags & TDF_DETAILS))
4729 fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n");
4730 *overlaps_a = conflict_fn_not_known ();
4731 *overlaps_b = conflict_fn_not_known ();
4732 *last_conflicts = chrec_dont_know;
4733 }
4734 }
4735 else
4736 {
4737 if (dump_file && (dump_flags & TDF_DETAILS))
4738 fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n");
4739 *overlaps_a = conflict_fn_not_known ();
4740 *overlaps_b = conflict_fn_not_known ();
4741 *last_conflicts = chrec_dont_know;
4742 }
4743 }
4744 else
4745 {
4746 if (dump_file && (dump_flags & TDF_DETAILS))
4747 fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n");
4748 *overlaps_a = conflict_fn_not_known ();
4749 *overlaps_b = conflict_fn_not_known ();
4750 *last_conflicts = chrec_dont_know;
4751 }
4752
4753end_analyze_subs_aa:
4754 obstack_free (&scratch_obstack, NULL);
4755 if (dump_file && (dump_flags & TDF_DETAILS))
4756 {
4757 fprintf (stream: dump_file, format: " (overlaps_a = ");
4758 dump_conflict_function (outf: dump_file, cf: *overlaps_a);
4759 fprintf (stream: dump_file, format: ")\n (overlaps_b = ");
4760 dump_conflict_function (outf: dump_file, cf: *overlaps_b);
4761 fprintf (stream: dump_file, format: "))\n");
4762 }
4763}
4764
4765/* Returns true when analyze_subscript_affine_affine can be used for
4766 determining the dependence relation between chrec_a and chrec_b,
4767 that contain symbols. This function modifies chrec_a and chrec_b
4768 such that the analysis result is the same, and such that they don't
4769 contain symbols, and then can safely be passed to the analyzer.
4770
4771 Example: The analysis of the following tuples of evolutions produce
4772 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4773 vs. {0, +, 1}_1
4774
4775 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4776 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4777*/
4778
4779static bool
4780can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4781{
4782 tree diff, type, left_a, left_b, right_b;
4783
4784 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4785 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4786 /* FIXME: For the moment not handled. Might be refined later. */
4787 return false;
4788
4789 type = chrec_type (chrec: *chrec_a);
4790 left_a = CHREC_LEFT (*chrec_a);
4791 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4792 diff = chrec_fold_minus (type, left_a, left_b);
4793
4794 if (!evolution_function_is_constant_p (chrec: diff))
4795 return false;
4796
4797 if (dump_file && (dump_flags & TDF_DETAILS))
4798 fprintf (stream: dump_file, format: "can_use_subscript_aff_aff_for_symbolic \n");
4799
4800 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4801 left: diff, CHREC_RIGHT (*chrec_a));
4802 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4803 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4804 left: build_int_cst (type, 0),
4805 right: right_b);
4806 return true;
4807}
4808
4809/* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4810 *OVERLAPS_B are initialized to the functions that describe the
4811 relation between the elements accessed twice by CHREC_A and
4812 CHREC_B. For k >= 0, the following property is verified:
4813
4814 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4815
4816static void
4817analyze_siv_subscript (tree chrec_a,
4818 tree chrec_b,
4819 conflict_function **overlaps_a,
4820 conflict_function **overlaps_b,
4821 tree *last_conflicts,
4822 int loop_nest_num)
4823{
4824 dependence_stats.num_siv++;
4825
4826 if (dump_file && (dump_flags & TDF_DETAILS))
4827 fprintf (stream: dump_file, format: "(analyze_siv_subscript \n");
4828
4829 if (evolution_function_is_constant_p (chrec: chrec_a)
4830 && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num))
4831 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4832 overlaps_a, overlaps_b, last_conflicts);
4833
4834 else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num)
4835 && evolution_function_is_constant_p (chrec: chrec_b))
4836 analyze_siv_subscript_cst_affine (chrec_a: chrec_b, chrec_b: chrec_a,
4837 overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts);
4838
4839 else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num)
4840 && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num))
4841 {
4842 if (!chrec_contains_symbols (chrec_a)
4843 && !chrec_contains_symbols (chrec_b))
4844 {
4845 analyze_subscript_affine_affine (chrec_a, chrec_b,
4846 overlaps_a, overlaps_b,
4847 last_conflicts);
4848
4849 if (CF_NOT_KNOWN_P (*overlaps_a)
4850 || CF_NOT_KNOWN_P (*overlaps_b))
4851 dependence_stats.num_siv_unimplemented++;
4852 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4853 || CF_NO_DEPENDENCE_P (*overlaps_b))
4854 dependence_stats.num_siv_independent++;
4855 else
4856 dependence_stats.num_siv_dependent++;
4857 }
4858 else if (can_use_analyze_subscript_affine_affine (chrec_a: &chrec_a,
4859 chrec_b: &chrec_b))
4860 {
4861 analyze_subscript_affine_affine (chrec_a, chrec_b,
4862 overlaps_a, overlaps_b,
4863 last_conflicts);
4864
4865 if (CF_NOT_KNOWN_P (*overlaps_a)
4866 || CF_NOT_KNOWN_P (*overlaps_b))
4867 dependence_stats.num_siv_unimplemented++;
4868 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4869 || CF_NO_DEPENDENCE_P (*overlaps_b))
4870 dependence_stats.num_siv_independent++;
4871 else
4872 dependence_stats.num_siv_dependent++;
4873 }
4874 else
4875 goto siv_subscript_dontknow;
4876 }
4877
4878 else
4879 {
4880 siv_subscript_dontknow:;
4881 if (dump_file && (dump_flags & TDF_DETAILS))
4882 fprintf (stream: dump_file, format: " siv test failed: unimplemented");
4883 *overlaps_a = conflict_fn_not_known ();
4884 *overlaps_b = conflict_fn_not_known ();
4885 *last_conflicts = chrec_dont_know;
4886 dependence_stats.num_siv_unimplemented++;
4887 }
4888
4889 if (dump_file && (dump_flags & TDF_DETAILS))
4890 fprintf (stream: dump_file, format: ")\n");
4891}
4892
4893/* Returns false if we can prove that the greatest common divisor of the steps
4894 of CHREC does not divide CST, false otherwise. */
4895
4896static bool
4897gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4898{
4899 HOST_WIDE_INT cd = 0, val;
4900 tree step;
4901
4902 if (!tree_fits_shwi_p (cst))
4903 return true;
4904 val = tree_to_shwi (cst);
4905
4906 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4907 {
4908 step = CHREC_RIGHT (chrec);
4909 if (!tree_fits_shwi_p (step))
4910 return true;
4911 cd = gcd (cd, tree_to_shwi (step));
4912 chrec = CHREC_LEFT (chrec);
4913 }
4914
4915 return val % cd == 0;
4916}
4917
4918/* Analyze a MIV (Multiple Index Variable) subscript with respect to
4919 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4920 functions that describe the relation between the elements accessed
4921 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4922 is verified:
4923
4924 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4925
4926static void
4927analyze_miv_subscript (tree chrec_a,
4928 tree chrec_b,
4929 conflict_function **overlaps_a,
4930 conflict_function **overlaps_b,
4931 tree *last_conflicts,
4932 class loop *loop_nest)
4933{
4934 tree type, difference;
4935
4936 dependence_stats.num_miv++;
4937 if (dump_file && (dump_flags & TDF_DETAILS))
4938 fprintf (stream: dump_file, format: "(analyze_miv_subscript \n");
4939
4940 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4941 chrec_a = chrec_convert (type, chrec_a, NULL);
4942 chrec_b = chrec_convert (type, chrec_b, NULL);
4943 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4944
4945 if (eq_evolutions_p (chrec_a, chrec_b))
4946 {
4947 /* Access functions are the same: all the elements are accessed
4948 in the same order. */
4949 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4950 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4951 *last_conflicts = max_stmt_executions_tree (loop: get_chrec_loop (chrec: chrec_a));
4952 dependence_stats.num_miv_dependent++;
4953 }
4954
4955 else if (evolution_function_is_constant_p (chrec: difference)
4956 && evolution_function_is_affine_multivariate_p (chrec_a,
4957 loop_nest->num)
4958 && !gcd_of_steps_may_divide_p (chrec: chrec_a, cst: difference))
4959 {
4960 /* testsuite/.../ssa-chrec-33.c
4961 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4962
4963 The difference is 1, and all the evolution steps are multiples
4964 of 2, consequently there are no overlapping elements. */
4965 *overlaps_a = conflict_fn_no_dependence ();
4966 *overlaps_b = conflict_fn_no_dependence ();
4967 *last_conflicts = integer_zero_node;
4968 dependence_stats.num_miv_independent++;
4969 }
4970
4971 else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest->num)
4972 && !chrec_contains_symbols (chrec_a, loop_nest)
4973 && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest->num)
4974 && !chrec_contains_symbols (chrec_b, loop_nest))
4975 {
4976 /* testsuite/.../ssa-chrec-35.c
4977 {0, +, 1}_2 vs. {0, +, 1}_3
4978 the overlapping elements are respectively located at iterations:
4979 {0, +, 1}_x and {0, +, 1}_x,
4980 in other words, we have the equality:
4981 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4982
4983 Other examples:
4984 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4985 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4986
4987 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4988 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4989 */
4990 analyze_subscript_affine_affine (chrec_a, chrec_b,
4991 overlaps_a, overlaps_b, last_conflicts);
4992
4993 if (CF_NOT_KNOWN_P (*overlaps_a)
4994 || CF_NOT_KNOWN_P (*overlaps_b))
4995 dependence_stats.num_miv_unimplemented++;
4996 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4997 || CF_NO_DEPENDENCE_P (*overlaps_b))
4998 dependence_stats.num_miv_independent++;
4999 else
5000 dependence_stats.num_miv_dependent++;
5001 }
5002
5003 else
5004 {
5005 /* When the analysis is too difficult, answer "don't know". */
5006 if (dump_file && (dump_flags & TDF_DETAILS))
5007 fprintf (stream: dump_file, format: "analyze_miv_subscript test failed: unimplemented.\n");
5008
5009 *overlaps_a = conflict_fn_not_known ();
5010 *overlaps_b = conflict_fn_not_known ();
5011 *last_conflicts = chrec_dont_know;
5012 dependence_stats.num_miv_unimplemented++;
5013 }
5014
5015 if (dump_file && (dump_flags & TDF_DETAILS))
5016 fprintf (stream: dump_file, format: ")\n");
5017}
5018
5019/* Determines the iterations for which CHREC_A is equal to CHREC_B in
5020 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
5021 OVERLAP_ITERATIONS_B are initialized with two functions that
5022 describe the iterations that contain conflicting elements.
5023
5024 Remark: For an integer k >= 0, the following equality is true:
5025
5026 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
5027*/
5028
5029static void
5030analyze_overlapping_iterations (tree chrec_a,
5031 tree chrec_b,
5032 conflict_function **overlap_iterations_a,
5033 conflict_function **overlap_iterations_b,
5034 tree *last_conflicts, class loop *loop_nest)
5035{
5036 unsigned int lnn = loop_nest->num;
5037
5038 dependence_stats.num_subscript_tests++;
5039
5040 if (dump_file && (dump_flags & TDF_DETAILS))
5041 {
5042 fprintf (stream: dump_file, format: "(analyze_overlapping_iterations \n");
5043 fprintf (stream: dump_file, format: " (chrec_a = ");
5044 print_generic_expr (dump_file, chrec_a);
5045 fprintf (stream: dump_file, format: ")\n (chrec_b = ");
5046 print_generic_expr (dump_file, chrec_b);
5047 fprintf (stream: dump_file, format: ")\n");
5048 }
5049
5050 if (chrec_a == NULL_TREE
5051 || chrec_b == NULL_TREE
5052 || chrec_contains_undetermined (chrec_a)
5053 || chrec_contains_undetermined (chrec_b))
5054 {
5055 dependence_stats.num_subscript_undetermined++;
5056
5057 *overlap_iterations_a = conflict_fn_not_known ();
5058 *overlap_iterations_b = conflict_fn_not_known ();
5059 }
5060
5061 /* If they are the same chrec, and are affine, they overlap
5062 on every iteration. */
5063 else if (eq_evolutions_p (chrec_a, chrec_b)
5064 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5065 || operand_equal_p (chrec_a, chrec_b, flags: 0)))
5066 {
5067 dependence_stats.num_same_subscript_function++;
5068 *overlap_iterations_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
5069 *overlap_iterations_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
5070 *last_conflicts = chrec_dont_know;
5071 }
5072
5073 /* If they aren't the same, and aren't affine, we can't do anything
5074 yet. */
5075 else if ((chrec_contains_symbols (chrec_a)
5076 || chrec_contains_symbols (chrec_b))
5077 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5078 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5079 {
5080 dependence_stats.num_subscript_undetermined++;
5081 *overlap_iterations_a = conflict_fn_not_known ();
5082 *overlap_iterations_b = conflict_fn_not_known ();
5083 }
5084
5085 else if (ziv_subscript_p (chrec_a, chrec_b))
5086 analyze_ziv_subscript (chrec_a, chrec_b,
5087 overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b,
5088 last_conflicts);
5089
5090 else if (siv_subscript_p (chrec_a, chrec_b))
5091 analyze_siv_subscript (chrec_a, chrec_b,
5092 overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b,
5093 last_conflicts, loop_nest_num: lnn);
5094
5095 else
5096 analyze_miv_subscript (chrec_a, chrec_b,
5097 overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b,
5098 last_conflicts, loop_nest);
5099
5100 if (dump_file && (dump_flags & TDF_DETAILS))
5101 {
5102 fprintf (stream: dump_file, format: " (overlap_iterations_a = ");
5103 dump_conflict_function (outf: dump_file, cf: *overlap_iterations_a);
5104 fprintf (stream: dump_file, format: ")\n (overlap_iterations_b = ");
5105 dump_conflict_function (outf: dump_file, cf: *overlap_iterations_b);
5106 fprintf (stream: dump_file, format: "))\n");
5107 }
5108}
5109
5110/* Helper function for uniquely inserting distance vectors. */
5111
5112static void
5113save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5114{
5115 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5116 if (lambda_vector_equal (vec1: v, vec2: dist_v, DDR_NB_LOOPS (ddr)))
5117 return;
5118
5119 DDR_DIST_VECTS (ddr).safe_push (obj: dist_v);
5120}
5121
5122/* Helper function for uniquely inserting direction vectors. */
5123
5124static void
5125save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5126{
5127 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5128 if (lambda_vector_equal (vec1: v, vec2: dir_v, DDR_NB_LOOPS (ddr)))
5129 return;
5130
5131 DDR_DIR_VECTS (ddr).safe_push (obj: dir_v);
5132}
5133
5134/* Add a distance of 1 on all the loops outer than INDEX. If we
5135 haven't yet determined a distance for this outer loop, push a new
5136 distance vector composed of the previous distance, and a distance
5137 of 1 for this outer loop. Example:
5138
5139 | loop_1
5140 | loop_2
5141 | A[10]
5142 | endloop_2
5143 | endloop_1
5144
5145 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5146 save (0, 1), then we have to save (1, 0). */
5147
5148static void
5149add_outer_distances (struct data_dependence_relation *ddr,
5150 lambda_vector dist_v, int index)
5151{
5152 /* For each outer loop where init_v is not set, the accesses are
5153 in dependence of distance 1 in the loop. */
5154 while (--index >= 0)
5155 {
5156 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5157 lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr));
5158 save_v[index] = 1;
5159 save_dist_v (ddr, dist_v: save_v);
5160 }
5161}
5162
5163/* Return false when fail to represent the data dependence as a
5164 distance vector. A_INDEX is the index of the first reference
5165 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5166 second reference. INIT_B is set to true when a component has been
5167 added to the distance vector DIST_V. INDEX_CARRY is then set to
5168 the index in DIST_V that carries the dependence. */
5169
5170static bool
5171build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5172 unsigned int a_index, unsigned int b_index,
5173 lambda_vector dist_v, bool *init_b,
5174 int *index_carry)
5175{
5176 unsigned i;
5177 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5178 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5179
5180 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5181 {
5182 tree access_fn_a, access_fn_b;
5183 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5184
5185 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5186 {
5187 non_affine_dependence_relation (ddr);
5188 return false;
5189 }
5190
5191 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5192 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5193
5194 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5195 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5196 {
5197 HOST_WIDE_INT dist;
5198 int index;
5199 int var_a = CHREC_VARIABLE (access_fn_a);
5200 int var_b = CHREC_VARIABLE (access_fn_b);
5201
5202 if (var_a != var_b
5203 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5204 {
5205 non_affine_dependence_relation (ddr);
5206 return false;
5207 }
5208
5209 /* When data references are collected in a loop while data
5210 dependences are analyzed in loop nest nested in the loop, we
5211 would have more number of access functions than number of
5212 loops. Skip access functions of loops not in the loop nest.
5213
5214 See PR89725 for more information. */
5215 if (flow_loop_nested_p (get_loop (cfun, num: var_a), loop))
5216 continue;
5217
5218 dist = int_cst_value (SUB_DISTANCE (subscript));
5219 index = index_in_loop_nest (var: var_a, DDR_LOOP_NEST (ddr));
5220 *index_carry = MIN (index, *index_carry);
5221
5222 /* This is the subscript coupling test. If we have already
5223 recorded a distance for this loop (a distance coming from
5224 another subscript), it should be the same. For example,
5225 in the following code, there is no dependence:
5226
5227 | loop i = 0, N, 1
5228 | T[i+1][i] = ...
5229 | ... = T[i][i]
5230 | endloop
5231 */
5232 if (init_v[index] != 0 && dist_v[index] != dist)
5233 {
5234 finalize_ddr_dependent (ddr, chrec_known);
5235 return false;
5236 }
5237
5238 dist_v[index] = dist;
5239 init_v[index] = 1;
5240 *init_b = true;
5241 }
5242 else if (!operand_equal_p (access_fn_a, access_fn_b, flags: 0))
5243 {
5244 /* This can be for example an affine vs. constant dependence
5245 (T[i] vs. T[3]) that is not an affine dependence and is
5246 not representable as a distance vector. */
5247 non_affine_dependence_relation (ddr);
5248 return false;
5249 }
5250 }
5251
5252 return true;
5253}
5254
5255/* Return true when the DDR contains only invariant access functions wrto. loop
5256 number LNUM. */
5257
5258static bool
5259invariant_access_functions (const struct data_dependence_relation *ddr,
5260 int lnum)
5261{
5262 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5263 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5264 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5265 return false;
5266
5267 return true;
5268}
5269
5270/* Helper function for the case where DDR_A and DDR_B are the same
5271 multivariate access function with a constant step. For an example
5272 see pr34635-1.c. */
5273
5274static void
5275add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5276{
5277 int x_1, x_2;
5278 tree c_1 = CHREC_LEFT (c_2);
5279 tree c_0 = CHREC_LEFT (c_1);
5280 lambda_vector dist_v;
5281 HOST_WIDE_INT v1, v2, cd;
5282
5283 /* Polynomials with more than 2 variables are not handled yet. When
5284 the evolution steps are parameters, it is not possible to
5285 represent the dependence using classical distance vectors. */
5286 if (TREE_CODE (c_0) != INTEGER_CST
5287 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5288 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5289 {
5290 DDR_AFFINE_P (ddr) = false;
5291 return;
5292 }
5293
5294 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5295 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5296
5297 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5298 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5299 v1 = int_cst_value (CHREC_RIGHT (c_1));
5300 v2 = int_cst_value (CHREC_RIGHT (c_2));
5301 cd = gcd (v1, v2);
5302 v1 /= cd;
5303 v2 /= cd;
5304
5305 if (v2 < 0)
5306 {
5307 v2 = -v2;
5308 v1 = -v1;
5309 }
5310
5311 dist_v[x_1] = v2;
5312 dist_v[x_2] = -v1;
5313 save_dist_v (ddr, dist_v);
5314
5315 add_outer_distances (ddr, dist_v, index: x_1);
5316}
5317
5318/* Helper function for the case where DDR_A and DDR_B are the same
5319 access functions. */
5320
5321static void
5322add_other_self_distances (struct data_dependence_relation *ddr)
5323{
5324 lambda_vector dist_v;
5325 unsigned i;
5326 int index_carry = DDR_NB_LOOPS (ddr);
5327 subscript *sub;
5328 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5329
5330 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5331 {
5332 tree access_fun = SUB_ACCESS_FN (sub, 0);
5333
5334 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5335 {
5336 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5337 {
5338 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5339 {
5340 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5341 return;
5342 }
5343
5344 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5345
5346 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5347 add_multivariate_self_dist (ddr, c_2: access_fun);
5348 else
5349 /* The evolution step is not constant: it varies in
5350 the outer loop, so this cannot be represented by a
5351 distance vector. For example in pr34635.c the
5352 evolution is {0, +, {0, +, 4}_1}_2. */
5353 DDR_AFFINE_P (ddr) = false;
5354
5355 return;
5356 }
5357
5358 /* When data references are collected in a loop while data
5359 dependences are analyzed in loop nest nested in the loop, we
5360 would have more number of access functions than number of
5361 loops. Skip access functions of loops not in the loop nest.
5362
5363 See PR89725 for more information. */
5364 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5365 loop))
5366 continue;
5367
5368 index_carry = MIN (index_carry,
5369 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5370 DDR_LOOP_NEST (ddr)));
5371 }
5372 }
5373
5374 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5375 add_outer_distances (ddr, dist_v, index: index_carry);
5376}
5377
5378static void
5379insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5380{
5381 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5382
5383 dist_v[0] = 1;
5384 save_dist_v (ddr, dist_v);
5385}
5386
5387/* Adds a unit distance vector to DDR when there is a 0 overlap. This
5388 is the case for example when access functions are the same and
5389 equal to a constant, as in:
5390
5391 | loop_1
5392 | A[3] = ...
5393 | ... = A[3]
5394 | endloop_1
5395
5396 in which case the distance vectors are (0) and (1). */
5397
5398static void
5399add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5400{
5401 unsigned i, j;
5402
5403 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5404 {
5405 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5406 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5407 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5408
5409 for (j = 0; j < ca->n; j++)
5410 if (affine_function_zero_p (fn: ca->fns[j]))
5411 {
5412 insert_innermost_unit_dist_vector (ddr);
5413 return;
5414 }
5415
5416 for (j = 0; j < cb->n; j++)
5417 if (affine_function_zero_p (fn: cb->fns[j]))
5418 {
5419 insert_innermost_unit_dist_vector (ddr);
5420 return;
5421 }
5422 }
5423}
5424
5425/* Return true when the DDR contains two data references that have the
5426 same access functions. */
5427
5428static inline bool
5429same_access_functions (const struct data_dependence_relation *ddr)
5430{
5431 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5432 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5433 SUB_ACCESS_FN (sub, 1)))
5434 return false;
5435
5436 return true;
5437}
5438
5439/* Compute the classic per loop distance vector. DDR is the data
5440 dependence relation to build a vector from. Return false when fail
5441 to represent the data dependence as a distance vector. */
5442
5443static bool
5444build_classic_dist_vector (struct data_dependence_relation *ddr,
5445 class loop *loop_nest)
5446{
5447 bool init_b = false;
5448 int index_carry = DDR_NB_LOOPS (ddr);
5449 lambda_vector dist_v;
5450
5451 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5452 return false;
5453
5454 if (same_access_functions (ddr))
5455 {
5456 /* Save the 0 vector. */
5457 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5458 save_dist_v (ddr, dist_v);
5459
5460 if (invariant_access_functions (ddr, lnum: loop_nest->num))
5461 add_distance_for_zero_overlaps (ddr);
5462
5463 if (DDR_NB_LOOPS (ddr) > 1)
5464 add_other_self_distances (ddr);
5465
5466 return true;
5467 }
5468
5469 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5470 if (!build_classic_dist_vector_1 (ddr, a_index: 0, b_index: 1, dist_v, init_b: &init_b, index_carry: &index_carry))
5471 return false;
5472
5473 /* Save the distance vector if we initialized one. */
5474 if (init_b)
5475 {
5476 /* Verify a basic constraint: classic distance vectors should
5477 always be lexicographically positive.
5478
5479 Data references are collected in the order of execution of
5480 the program, thus for the following loop
5481
5482 | for (i = 1; i < 100; i++)
5483 | for (j = 1; j < 100; j++)
5484 | {
5485 | t = T[j+1][i-1]; // A
5486 | T[j][i] = t + 2; // B
5487 | }
5488
5489 references are collected following the direction of the wind:
5490 A then B. The data dependence tests are performed also
5491 following this order, such that we're looking at the distance
5492 separating the elements accessed by A from the elements later
5493 accessed by B. But in this example, the distance returned by
5494 test_dep (A, B) is lexicographically negative (-1, 1), that
5495 means that the access A occurs later than B with respect to
5496 the outer loop, ie. we're actually looking upwind. In this
5497 case we solve test_dep (B, A) looking downwind to the
5498 lexicographically positive solution, that returns the
5499 distance vector (1, -1). */
5500 if (!lambda_vector_lexico_pos (v: dist_v, DDR_NB_LOOPS (ddr)))
5501 {
5502 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5503 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5504 return false;
5505 compute_subscript_distance (ddr);
5506 if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: save_v, init_b: &init_b,
5507 index_carry: &index_carry))
5508 return false;
5509 save_dist_v (ddr, dist_v: save_v);
5510 DDR_REVERSED_P (ddr) = true;
5511
5512 /* In this case there is a dependence forward for all the
5513 outer loops:
5514
5515 | for (k = 1; k < 100; k++)
5516 | for (i = 1; i < 100; i++)
5517 | for (j = 1; j < 100; j++)
5518 | {
5519 | t = T[j+1][i-1]; // A
5520 | T[j][i] = t + 2; // B
5521 | }
5522
5523 the vectors are:
5524 (0, 1, -1)
5525 (1, 1, -1)
5526 (1, -1, 1)
5527 */
5528 if (DDR_NB_LOOPS (ddr) > 1)
5529 {
5530 add_outer_distances (ddr, dist_v: save_v, index: index_carry);
5531 add_outer_distances (ddr, dist_v, index: index_carry);
5532 }
5533 }
5534 else
5535 {
5536 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5537 lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr));
5538
5539 if (DDR_NB_LOOPS (ddr) > 1)
5540 {
5541 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5542
5543 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5544 return false;
5545 compute_subscript_distance (ddr);
5546 if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: opposite_v, init_b: &init_b,
5547 index_carry: &index_carry))
5548 return false;
5549
5550 save_dist_v (ddr, dist_v: save_v);
5551 add_outer_distances (ddr, dist_v, index: index_carry);
5552 add_outer_distances (ddr, dist_v: opposite_v, index: index_carry);
5553 }
5554 else
5555 save_dist_v (ddr, dist_v: save_v);
5556 }
5557 }
5558 else
5559 {
5560 /* There is a distance of 1 on all the outer loops: Example:
5561 there is a dependence of distance 1 on loop_1 for the array A.
5562
5563 | loop_1
5564 | A[5] = ...
5565 | endloop
5566 */
5567 add_outer_distances (ddr, dist_v,
5568 index: lambda_vector_first_nz (vec1: dist_v,
5569 DDR_NB_LOOPS (ddr), start: 0));
5570 }
5571
5572 return true;
5573}
5574
5575/* Return the direction for a given distance.
5576 FIXME: Computing dir this way is suboptimal, since dir can catch
5577 cases that dist is unable to represent. */
5578
5579static inline enum data_dependence_direction
5580dir_from_dist (int dist)
5581{
5582 if (dist > 0)
5583 return dir_positive;
5584 else if (dist < 0)
5585 return dir_negative;
5586 else
5587 return dir_equal;
5588}
5589
5590/* Compute the classic per loop direction vector. DDR is the data
5591 dependence relation to build a vector from. */
5592
5593static void
5594build_classic_dir_vector (struct data_dependence_relation *ddr)
5595{
5596 unsigned i, j;
5597 lambda_vector dist_v;
5598
5599 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5600 {
5601 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5602
5603 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5604 dir_v[j] = dir_from_dist (dist: dist_v[j]);
5605
5606 save_dir_v (ddr, dir_v);
5607 }
5608}
5609
5610/* Helper function. Returns true when there is a dependence between the
5611 data references. A_INDEX is the index of the first reference (0 for
5612 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5613
5614static bool
5615subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5616 unsigned int a_index, unsigned int b_index,
5617 class loop *loop_nest)
5618{
5619 unsigned int i;
5620 tree last_conflicts;
5621 struct subscript *subscript;
5622 tree res = NULL_TREE;
5623
5624 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (ix: i, ptr: &subscript); i++)
5625 {
5626 conflict_function *overlaps_a, *overlaps_b;
5627
5628 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5629 SUB_ACCESS_FN (subscript, b_index),
5630 overlap_iterations_a: &overlaps_a, overlap_iterations_b: &overlaps_b,
5631 last_conflicts: &last_conflicts, loop_nest);
5632
5633 if (SUB_CONFLICTS_IN_A (subscript))
5634 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5635 if (SUB_CONFLICTS_IN_B (subscript))
5636 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5637
5638 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5639 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5640 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5641
5642 /* If there is any undetermined conflict function we have to
5643 give a conservative answer in case we cannot prove that
5644 no dependence exists when analyzing another subscript. */
5645 if (CF_NOT_KNOWN_P (overlaps_a)
5646 || CF_NOT_KNOWN_P (overlaps_b))
5647 {
5648 res = chrec_dont_know;
5649 continue;
5650 }
5651
5652 /* When there is a subscript with no dependence we can stop. */
5653 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5654 || CF_NO_DEPENDENCE_P (overlaps_b))
5655 {
5656 res = chrec_known;
5657 break;
5658 }
5659 }
5660
5661 if (res == NULL_TREE)
5662 return true;
5663
5664 if (res == chrec_known)
5665 dependence_stats.num_dependence_independent++;
5666 else
5667 dependence_stats.num_dependence_undetermined++;
5668 finalize_ddr_dependent (ddr, chrec: res);
5669 return false;
5670}
5671
5672/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5673
5674static void
5675subscript_dependence_tester (struct data_dependence_relation *ddr,
5676 class loop *loop_nest)
5677{
5678 if (subscript_dependence_tester_1 (ddr, a_index: 0, b_index: 1, loop_nest))
5679 dependence_stats.num_dependence_dependent++;
5680
5681 compute_subscript_distance (ddr);
5682 if (build_classic_dist_vector (ddr, loop_nest))
5683 {
5684 if (dump_file && (dump_flags & TDF_DETAILS))
5685 {
5686 unsigned i;
5687
5688 fprintf (stream: dump_file, format: "(build_classic_dist_vector\n");
5689 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5690 {
5691 fprintf (stream: dump_file, format: " dist_vector = (");
5692 print_lambda_vector (outfile: dump_file, DDR_DIST_VECT (ddr, i),
5693 DDR_NB_LOOPS (ddr));
5694 fprintf (stream: dump_file, format: " )\n");
5695 }
5696 fprintf (stream: dump_file, format: ")\n");
5697 }
5698
5699 build_classic_dir_vector (ddr);
5700 }
5701}
5702
5703/* Returns true when all the access functions of A are affine or
5704 constant with respect to LOOP_NEST. */
5705
5706static bool
5707access_functions_are_affine_or_constant_p (const struct data_reference *a,
5708 const class loop *loop_nest)
5709{
5710 vec<tree> fns = DR_ACCESS_FNS (a);
5711 for (tree t : fns)
5712 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5713 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5714 return false;
5715
5716 return true;
5717}
5718
5719/* This computes the affine dependence relation between A and B with
5720 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5721 independence between two accesses, while CHREC_DONT_KNOW is used
5722 for representing the unknown relation.
5723
5724 Note that it is possible to stop the computation of the dependence
5725 relation the first time we detect a CHREC_KNOWN element for a given
5726 subscript. */
5727
5728void
5729compute_affine_dependence (struct data_dependence_relation *ddr,
5730 class loop *loop_nest)
5731{
5732 struct data_reference *dra = DDR_A (ddr);
5733 struct data_reference *drb = DDR_B (ddr);
5734
5735 if (dump_file && (dump_flags & TDF_DETAILS))
5736 {
5737 fprintf (stream: dump_file, format: "(compute_affine_dependence\n");
5738 fprintf (stream: dump_file, format: " ref_a: ");
5739 print_generic_expr (dump_file, DR_REF (dra));
5740 fprintf (stream: dump_file, format: ", stmt_a: ");
5741 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5742 fprintf (stream: dump_file, format: " ref_b: ");
5743 print_generic_expr (dump_file, DR_REF (drb));
5744 fprintf (stream: dump_file, format: ", stmt_b: ");
5745 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5746 }
5747
5748 /* Analyze only when the dependence relation is not yet known. */
5749 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5750 {
5751 dependence_stats.num_dependence_tests++;
5752
5753 if (access_functions_are_affine_or_constant_p (a: dra, loop_nest)
5754 && access_functions_are_affine_or_constant_p (a: drb, loop_nest))
5755 subscript_dependence_tester (ddr, loop_nest);
5756
5757 /* As a last case, if the dependence cannot be determined, or if
5758 the dependence is considered too difficult to determine, answer
5759 "don't know". */
5760 else
5761 {
5762 dependence_stats.num_dependence_undetermined++;
5763
5764 if (dump_file && (dump_flags & TDF_DETAILS))
5765 {
5766 fprintf (stream: dump_file, format: "Data ref a:\n");
5767 dump_data_reference (outf: dump_file, dr: dra);
5768 fprintf (stream: dump_file, format: "Data ref b:\n");
5769 dump_data_reference (outf: dump_file, dr: drb);
5770 fprintf (stream: dump_file, format: "affine dependence test not usable: access function not affine or constant.\n");
5771 }
5772 finalize_ddr_dependent (ddr, chrec_dont_know);
5773 }
5774 }
5775
5776 if (dump_file && (dump_flags & TDF_DETAILS))
5777 {
5778 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5779 fprintf (stream: dump_file, format: ") -> no dependence\n");
5780 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5781 fprintf (stream: dump_file, format: ") -> dependence analysis failed\n");
5782 else
5783 fprintf (stream: dump_file, format: ")\n");
5784 }
5785}
5786
5787/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5788 the data references in DATAREFS, in the LOOP_NEST. When
5789 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5790 relations. Return true when successful, i.e. data references number
5791 is small enough to be handled. */
5792
5793bool
5794compute_all_dependences (const vec<data_reference_p> &datarefs,
5795 vec<ddr_p> *dependence_relations,
5796 const vec<loop_p> &loop_nest,
5797 bool compute_self_and_rr)
5798{
5799 struct data_dependence_relation *ddr;
5800 struct data_reference *a, *b;
5801 unsigned int i, j;
5802
5803 if ((int) datarefs.length ()
5804 > param_loop_max_datarefs_for_datadeps)
5805 {
5806 struct data_dependence_relation *ddr;
5807
5808 /* Insert a single relation into dependence_relations:
5809 chrec_dont_know. */
5810 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5811 dependence_relations->safe_push (obj: ddr);
5812 return false;
5813 }
5814
5815 FOR_EACH_VEC_ELT (datarefs, i, a)
5816 for (j = i + 1; datarefs.iterate (ix: j, ptr: &b); j++)
5817 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5818 {
5819 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5820 dependence_relations->safe_push (obj: ddr);
5821 if (loop_nest.exists ())
5822 compute_affine_dependence (ddr, loop_nest: loop_nest[0]);
5823 }
5824
5825 if (compute_self_and_rr)
5826 FOR_EACH_VEC_ELT (datarefs, i, a)
5827 {
5828 ddr = initialize_data_dependence_relation (a, b: a, loop_nest);
5829 dependence_relations->safe_push (obj: ddr);
5830 if (loop_nest.exists ())
5831 compute_affine_dependence (ddr, loop_nest: loop_nest[0]);
5832 }
5833
5834 return true;
5835}
5836
5837/* Describes a location of a memory reference. */
5838
5839struct data_ref_loc
5840{
5841 /* The memory reference. */
5842 tree ref;
5843
5844 /* True if the memory reference is read. */
5845 bool is_read;
5846
5847 /* True if the data reference is conditional within the containing
5848 statement, i.e. if it might not occur even when the statement
5849 is executed and runs to completion. */
5850 bool is_conditional_in_stmt;
5851};
5852
5853
5854/* Stores the locations of memory references in STMT to REFERENCES. Returns
5855 true if STMT clobbers memory, false otherwise. */
5856
5857static bool
5858get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5859{
5860 bool clobbers_memory = false;
5861 data_ref_loc ref;
5862 tree op0, op1;
5863 enum gimple_code stmt_code = gimple_code (g: stmt);
5864
5865 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5866 As we cannot model data-references to not spelled out
5867 accesses give up if they may occur. */
5868 if (stmt_code == GIMPLE_CALL
5869 && !(gimple_call_flags (stmt) & ECF_CONST))
5870 {
5871 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5872 if (gimple_call_internal_p (gs: stmt))
5873 switch (gimple_call_internal_fn (gs: stmt))
5874 {
5875 case IFN_GOMP_SIMD_LANE:
5876 {
5877 class loop *loop = gimple_bb (g: stmt)->loop_father;
5878 tree uid = gimple_call_arg (gs: stmt, index: 0);
5879 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5880 if (loop == NULL
5881 || loop->simduid != SSA_NAME_VAR (uid))
5882 clobbers_memory = true;
5883 break;
5884 }
5885 case IFN_MASK_LOAD:
5886 case IFN_MASK_STORE:
5887 break;
5888 case IFN_MASK_CALL:
5889 {
5890 tree orig_fndecl
5891 = gimple_call_addr_fndecl (fn: gimple_call_arg (gs: stmt, index: 0));
5892 if (!orig_fndecl
5893 || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5894 clobbers_memory = true;
5895 }
5896 break;
5897 default:
5898 clobbers_memory = true;
5899 break;
5900 }
5901 else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
5902 clobbers_memory = false;
5903 else
5904 clobbers_memory = true;
5905 }
5906 else if (stmt_code == GIMPLE_ASM
5907 && (gimple_asm_volatile_p (asm_stmt: as_a <gasm *> (p: stmt))
5908 || gimple_vuse (g: stmt)))
5909 clobbers_memory = true;
5910
5911 if (!gimple_vuse (g: stmt))
5912 return clobbers_memory;
5913
5914 if (stmt_code == GIMPLE_ASSIGN)
5915 {
5916 tree base;
5917 op0 = gimple_assign_lhs (gs: stmt);
5918 op1 = gimple_assign_rhs1 (gs: stmt);
5919
5920 if (DECL_P (op1)
5921 || (REFERENCE_CLASS_P (op1)
5922 && (base = get_base_address (t: op1))
5923 && TREE_CODE (base) != SSA_NAME
5924 && !is_gimple_min_invariant (base)))
5925 {
5926 ref.ref = op1;
5927 ref.is_read = true;
5928 ref.is_conditional_in_stmt = false;
5929 references->safe_push (obj: ref);
5930 }
5931 }
5932 else if (stmt_code == GIMPLE_CALL)
5933 {
5934 unsigned i = 0, n;
5935 tree ptr, type;
5936 unsigned int align;
5937
5938 ref.is_read = false;
5939 if (gimple_call_internal_p (gs: stmt))
5940 switch (gimple_call_internal_fn (gs: stmt))
5941 {
5942 case IFN_MASK_LOAD:
5943 if (gimple_call_lhs (gs: stmt) == NULL_TREE)
5944 break;
5945 ref.is_read = true;
5946 /* FALLTHRU */
5947 case IFN_MASK_STORE:
5948 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5949 align = tree_to_shwi (gimple_call_arg (gs: stmt, index: 1));
5950 if (ref.is_read)
5951 type = TREE_TYPE (gimple_call_lhs (stmt));
5952 else
5953 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5954 if (TYPE_ALIGN (type) != align)
5955 type = build_aligned_type (type, align);
5956 ref.is_conditional_in_stmt = true;
5957 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5958 ptr);
5959 references->safe_push (obj: ref);
5960 return false;
5961 case IFN_MASK_CALL:
5962 i = 1;
5963 gcc_fallthrough ();
5964 default:
5965 break;
5966 }
5967
5968 op0 = gimple_call_lhs (gs: stmt);
5969 n = gimple_call_num_args (gs: stmt);
5970 for (; i < n; i++)
5971 {
5972 op1 = gimple_call_arg (gs: stmt, index: i);
5973
5974 if (DECL_P (op1)
5975 || (REFERENCE_CLASS_P (op1) && get_base_address (t: op1)))
5976 {
5977 ref.ref = op1;
5978 ref.is_read = true;
5979 ref.is_conditional_in_stmt = false;
5980 references->safe_push (obj: ref);
5981 }
5982 }
5983 }
5984 else
5985 return clobbers_memory;
5986
5987 if (op0
5988 && (DECL_P (op0)
5989 || (REFERENCE_CLASS_P (op0) && get_base_address (t: op0))))
5990 {
5991 ref.ref = op0;
5992 ref.is_read = false;
5993 ref.is_conditional_in_stmt = false;
5994 references->safe_push (obj: ref);
5995 }
5996 return clobbers_memory;
5997}
5998
5999
6000/* Returns true if the loop-nest has any data reference. */
6001
6002bool
6003loop_nest_has_data_refs (loop_p loop)
6004{
6005 basic_block *bbs = get_loop_body (loop);
6006 auto_vec<data_ref_loc, 3> references;
6007
6008 for (unsigned i = 0; i < loop->num_nodes; i++)
6009 {
6010 basic_block bb = bbs[i];
6011 gimple_stmt_iterator bsi;
6012
6013 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
6014 {
6015 gimple *stmt = gsi_stmt (i: bsi);
6016 get_references_in_stmt (stmt, references: &references);
6017 if (references.length ())
6018 {
6019 free (ptr: bbs);
6020 return true;
6021 }
6022 }
6023 }
6024 free (ptr: bbs);
6025 return false;
6026}
6027
6028/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
6029 reference, returns false, otherwise returns true. NEST is the outermost
6030 loop of the loop nest in which the references should be analyzed. */
6031
6032opt_result
6033find_data_references_in_stmt (class loop *nest, gimple *stmt,
6034 vec<data_reference_p> *datarefs)
6035{
6036 auto_vec<data_ref_loc, 2> references;
6037 data_reference_p dr;
6038
6039 if (get_references_in_stmt (stmt, references: &references))
6040 return opt_result::failure_at (loc: stmt, fmt: "statement clobbers memory: %G",
6041 stmt);
6042
6043 for (const data_ref_loc &ref : references)
6044 {
6045 dr = create_data_ref (nest: nest ? loop_preheader_edge (nest) : NULL,
6046 loop: loop_containing_stmt (stmt), memref: ref.ref,
6047 stmt, is_read: ref.is_read, is_conditional_in_stmt: ref.is_conditional_in_stmt);
6048 gcc_assert (dr != NULL);
6049 datarefs->safe_push (obj: dr);
6050 }
6051
6052 return opt_result::success ();
6053}
6054
6055/* Stores the data references in STMT to DATAREFS. If there is an
6056 unanalyzable reference, returns false, otherwise returns true.
6057 NEST is the outermost loop of the loop nest in which the references
6058 should be instantiated, LOOP is the loop in which the references
6059 should be analyzed. */
6060
6061bool
6062graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6063 vec<data_reference_p> *datarefs)
6064{
6065 auto_vec<data_ref_loc, 2> references;
6066 bool ret = true;
6067 data_reference_p dr;
6068
6069 if (get_references_in_stmt (stmt, references: &references))
6070 return false;
6071
6072 for (const data_ref_loc &ref : references)
6073 {
6074 dr = create_data_ref (nest, loop, memref: ref.ref, stmt, is_read: ref.is_read,
6075 is_conditional_in_stmt: ref.is_conditional_in_stmt);
6076 gcc_assert (dr != NULL);
6077 datarefs->safe_push (obj: dr);
6078 }
6079
6080 return ret;
6081}
6082
6083/* Search the data references in LOOP, and record the information into
6084 DATAREFS. Returns chrec_dont_know when failing to analyze a
6085 difficult case, returns NULL_TREE otherwise. */
6086
6087tree
6088find_data_references_in_bb (class loop *loop, basic_block bb,
6089 vec<data_reference_p> *datarefs)
6090{
6091 gimple_stmt_iterator bsi;
6092
6093 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
6094 {
6095 gimple *stmt = gsi_stmt (i: bsi);
6096
6097 if (!find_data_references_in_stmt (nest: loop, stmt, datarefs))
6098 {
6099 struct data_reference *res;
6100 res = XCNEW (struct data_reference);
6101 datarefs->safe_push (obj: res);
6102
6103 return chrec_dont_know;
6104 }
6105 }
6106
6107 return NULL_TREE;
6108}
6109
6110/* Search the data references in LOOP, and record the information into
6111 DATAREFS. Returns chrec_dont_know when failing to analyze a
6112 difficult case, returns NULL_TREE otherwise.
6113
6114 TODO: This function should be made smarter so that it can handle address
6115 arithmetic as if they were array accesses, etc. */
6116
6117tree
6118find_data_references_in_loop (class loop *loop,
6119 vec<data_reference_p> *datarefs)
6120{
6121 basic_block bb, *bbs;
6122 unsigned int i;
6123
6124 bbs = get_loop_body_in_dom_order (loop);
6125
6126 for (i = 0; i < loop->num_nodes; i++)
6127 {
6128 bb = bbs[i];
6129
6130 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6131 {
6132 free (ptr: bbs);
6133 return chrec_dont_know;
6134 }
6135 }
6136 free (ptr: bbs);
6137
6138 return NULL_TREE;
6139}
6140
6141/* Return the alignment in bytes that DRB is guaranteed to have at all
6142 times. */
6143
6144unsigned int
6145dr_alignment (innermost_loop_behavior *drb)
6146{
6147 /* Get the alignment of BASE_ADDRESS + INIT. */
6148 unsigned int alignment = drb->base_alignment;
6149 unsigned int misalignment = (drb->base_misalignment
6150 + TREE_INT_CST_LOW (drb->init));
6151 if (misalignment != 0)
6152 alignment = MIN (alignment, misalignment & -misalignment);
6153
6154 /* Cap it to the alignment of OFFSET. */
6155 if (!integer_zerop (drb->offset))
6156 alignment = MIN (alignment, drb->offset_alignment);
6157
6158 /* Cap it to the alignment of STEP. */
6159 if (!integer_zerop (drb->step))
6160 alignment = MIN (alignment, drb->step_alignment);
6161
6162 return alignment;
6163}
6164
6165/* If BASE is a pointer-typed SSA name, try to find the object that it
6166 is based on. Return this object X on success and store the alignment
6167 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6168
6169static tree
6170get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6171{
6172 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6173 return NULL_TREE;
6174
6175 gimple *def = SSA_NAME_DEF_STMT (base);
6176 base = analyze_scalar_evolution (loop_containing_stmt (stmt: def), base);
6177
6178 /* Peel chrecs and record the minimum alignment preserved by
6179 all steps. */
6180 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6181 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6182 {
6183 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6184 alignment = MIN (alignment, step_alignment);
6185 base = CHREC_LEFT (base);
6186 }
6187
6188 /* Punt if the expression is too complicated to handle. */
6189 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6190 return NULL_TREE;
6191
6192 /* The only useful cases are those for which a dereference folds to something
6193 other than an INDIRECT_REF. */
6194 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6195 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6196 if (!ref)
6197 return NULL_TREE;
6198
6199 /* Analyze the base to which the steps we peeled were applied. */
6200 poly_int64 bitsize, bitpos, bytepos;
6201 machine_mode mode;
6202 int unsignedp, reversep, volatilep;
6203 tree offset;
6204 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6205 &unsignedp, &reversep, &volatilep);
6206 if (!base || !multiple_p (a: bitpos, BITS_PER_UNIT, multiple: &bytepos))
6207 return NULL_TREE;
6208
6209 /* Restrict the alignment to that guaranteed by the offsets. */
6210 unsigned int bytepos_alignment = known_alignment (a: bytepos);
6211 if (bytepos_alignment != 0)
6212 alignment = MIN (alignment, bytepos_alignment);
6213 if (offset)
6214 {
6215 unsigned int offset_alignment = highest_pow2_factor (offset);
6216 alignment = MIN (alignment, offset_alignment);
6217 }
6218
6219 *alignment_out = alignment;
6220 return base;
6221}
6222
6223/* Return the object whose alignment would need to be changed in order
6224 to increase the alignment of ADDR. Store the maximum achievable
6225 alignment in *MAX_ALIGNMENT. */
6226
6227tree
6228get_base_for_alignment (tree addr, unsigned int *max_alignment)
6229{
6230 tree base = get_base_for_alignment_1 (base: addr, alignment_out: max_alignment);
6231 if (base)
6232 return base;
6233
6234 if (TREE_CODE (addr) == ADDR_EXPR)
6235 addr = TREE_OPERAND (addr, 0);
6236 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6237 return addr;
6238}
6239
6240/* Recursive helper function. */
6241
6242static bool
6243find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6244{
6245 /* Inner loops of the nest should not contain siblings. Example:
6246 when there are two consecutive loops,
6247
6248 | loop_0
6249 | loop_1
6250 | A[{0, +, 1}_1]
6251 | endloop_1
6252 | loop_2
6253 | A[{0, +, 1}_2]
6254 | endloop_2
6255 | endloop_0
6256
6257 the dependence relation cannot be captured by the distance
6258 abstraction. */
6259 if (loop->next)
6260 return false;
6261
6262 loop_nest->safe_push (obj: loop);
6263 if (loop->inner)
6264 return find_loop_nest_1 (loop: loop->inner, loop_nest);
6265 return true;
6266}
6267
6268/* Return false when the LOOP is not well nested. Otherwise return
6269 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6270 contain the loops from the outermost to the innermost, as they will
6271 appear in the classic distance vector. */
6272
6273bool
6274find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6275{
6276 loop_nest->safe_push (obj: loop);
6277 if (loop->inner)
6278 return find_loop_nest_1 (loop: loop->inner, loop_nest);
6279 return true;
6280}
6281
6282/* Returns true when the data dependences have been computed, false otherwise.
6283 Given a loop nest LOOP, the following vectors are returned:
6284 DATAREFS is initialized to all the array elements contained in this loop,
6285 DEPENDENCE_RELATIONS contains the relations between the data references.
6286 Compute read-read and self relations if
6287 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6288
6289bool
6290compute_data_dependences_for_loop (class loop *loop,
6291 bool compute_self_and_read_read_dependences,
6292 vec<loop_p> *loop_nest,
6293 vec<data_reference_p> *datarefs,
6294 vec<ddr_p> *dependence_relations)
6295{
6296 bool res = true;
6297
6298 memset (s: &dependence_stats, c: 0, n: sizeof (dependence_stats));
6299
6300 /* If the loop nest is not well formed, or one of the data references
6301 is not computable, give up without spending time to compute other
6302 dependences. */
6303 if (!loop
6304 || !find_loop_nest (loop, loop_nest)
6305 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6306 || !compute_all_dependences (datarefs: *datarefs, dependence_relations, loop_nest: *loop_nest,
6307 compute_self_and_rr: compute_self_and_read_read_dependences))
6308 res = false;
6309
6310 if (dump_file && (dump_flags & TDF_STATS))
6311 {
6312 fprintf (stream: dump_file, format: "Dependence tester statistics:\n");
6313
6314 fprintf (stream: dump_file, format: "Number of dependence tests: %d\n",
6315 dependence_stats.num_dependence_tests);
6316 fprintf (stream: dump_file, format: "Number of dependence tests classified dependent: %d\n",
6317 dependence_stats.num_dependence_dependent);
6318 fprintf (stream: dump_file, format: "Number of dependence tests classified independent: %d\n",
6319 dependence_stats.num_dependence_independent);
6320 fprintf (stream: dump_file, format: "Number of undetermined dependence tests: %d\n",
6321 dependence_stats.num_dependence_undetermined);
6322
6323 fprintf (stream: dump_file, format: "Number of subscript tests: %d\n",
6324 dependence_stats.num_subscript_tests);
6325 fprintf (stream: dump_file, format: "Number of undetermined subscript tests: %d\n",
6326 dependence_stats.num_subscript_undetermined);
6327 fprintf (stream: dump_file, format: "Number of same subscript function: %d\n",
6328 dependence_stats.num_same_subscript_function);
6329
6330 fprintf (stream: dump_file, format: "Number of ziv tests: %d\n",
6331 dependence_stats.num_ziv);
6332 fprintf (stream: dump_file, format: "Number of ziv tests returning dependent: %d\n",
6333 dependence_stats.num_ziv_dependent);
6334 fprintf (stream: dump_file, format: "Number of ziv tests returning independent: %d\n",
6335 dependence_stats.num_ziv_independent);
6336 fprintf (stream: dump_file, format: "Number of ziv tests unimplemented: %d\n",
6337 dependence_stats.num_ziv_unimplemented);
6338
6339 fprintf (stream: dump_file, format: "Number of siv tests: %d\n",
6340 dependence_stats.num_siv);
6341 fprintf (stream: dump_file, format: "Number of siv tests returning dependent: %d\n",
6342 dependence_stats.num_siv_dependent);
6343 fprintf (stream: dump_file, format: "Number of siv tests returning independent: %d\n",
6344 dependence_stats.num_siv_independent);
6345 fprintf (stream: dump_file, format: "Number of siv tests unimplemented: %d\n",
6346 dependence_stats.num_siv_unimplemented);
6347
6348 fprintf (stream: dump_file, format: "Number of miv tests: %d\n",
6349 dependence_stats.num_miv);
6350 fprintf (stream: dump_file, format: "Number of miv tests returning dependent: %d\n",
6351 dependence_stats.num_miv_dependent);
6352 fprintf (stream: dump_file, format: "Number of miv tests returning independent: %d\n",
6353 dependence_stats.num_miv_independent);
6354 fprintf (stream: dump_file, format: "Number of miv tests unimplemented: %d\n",
6355 dependence_stats.num_miv_unimplemented);
6356 }
6357
6358 return res;
6359}
6360
6361/* Free the memory used by a data dependence relation DDR. */
6362
6363void
6364free_dependence_relation (struct data_dependence_relation *ddr)
6365{
6366 if (ddr == NULL)
6367 return;
6368
6369 if (DDR_SUBSCRIPTS (ddr).exists ())
6370 free_subscripts (DDR_SUBSCRIPTS (ddr));
6371 DDR_DIST_VECTS (ddr).release ();
6372 DDR_DIR_VECTS (ddr).release ();
6373
6374 free (ptr: ddr);
6375}
6376
6377/* Free the memory used by the data dependence relations from
6378 DEPENDENCE_RELATIONS. */
6379
6380void
6381free_dependence_relations (vec<ddr_p>& dependence_relations)
6382{
6383 for (data_dependence_relation *ddr : dependence_relations)
6384 if (ddr)
6385 free_dependence_relation (ddr);
6386
6387 dependence_relations.release ();
6388}
6389
6390/* Free the memory used by the data references from DATAREFS. */
6391
6392void
6393free_data_refs (vec<data_reference_p>& datarefs)
6394{
6395 for (data_reference *dr : datarefs)
6396 free_data_ref (dr);
6397 datarefs.release ();
6398}
6399
6400/* Common routine implementing both dr_direction_indicator and
6401 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6402 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6403 Return the step as the indicator otherwise. */
6404
6405static tree
6406dr_step_indicator (struct data_reference *dr, int useful_min)
6407{
6408 tree step = DR_STEP (dr);
6409 if (!step)
6410 return NULL_TREE;
6411 STRIP_NOPS (step);
6412 /* Look for cases where the step is scaled by a positive constant
6413 integer, which will often be the access size. If the multiplication
6414 doesn't change the sign (due to overflow effects) then we can
6415 test the unscaled value instead. */
6416 if (TREE_CODE (step) == MULT_EXPR
6417 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6418 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6419 {
6420 tree factor = TREE_OPERAND (step, 1);
6421 step = TREE_OPERAND (step, 0);
6422
6423 /* Strip widening and truncating conversions as well as nops. */
6424 if (CONVERT_EXPR_P (step)
6425 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6426 step = TREE_OPERAND (step, 0);
6427 tree type = TREE_TYPE (step);
6428
6429 /* Get the range of step values that would not cause overflow. */
6430 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6431 / wi::to_widest (t: factor));
6432 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6433 / wi::to_widest (t: factor));
6434
6435 /* Get the range of values that the unconverted step actually has. */
6436 wide_int step_min, step_max;
6437 int_range_max vr;
6438 if (TREE_CODE (step) != SSA_NAME
6439 || !get_range_query (cfun)->range_of_expr (r&: vr, expr: step)
6440 || vr.undefined_p ())
6441 {
6442 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6443 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6444 }
6445 else
6446 {
6447 step_min = vr.lower_bound ();
6448 step_max = vr.upper_bound ();
6449 }
6450
6451 /* Check whether the unconverted step has an acceptable range. */
6452 signop sgn = TYPE_SIGN (type);
6453 if (wi::les_p (x: minv, y: widest_int::from (x: step_min, sgn))
6454 && wi::ges_p (x: maxv, y: widest_int::from (x: step_max, sgn)))
6455 {
6456 if (wi::ge_p (x: step_min, y: useful_min, sgn))
6457 return ssize_int (useful_min);
6458 else if (wi::lt_p (x: step_max, y: 0, sgn))
6459 return ssize_int (-1);
6460 else
6461 return fold_convert (ssizetype, step);
6462 }
6463 }
6464 return DR_STEP (dr);
6465}
6466
6467/* Return a value that is negative iff DR has a negative step. */
6468
6469tree
6470dr_direction_indicator (struct data_reference *dr)
6471{
6472 return dr_step_indicator (dr, useful_min: 0);
6473}
6474
6475/* Return a value that is zero iff DR has a zero step. */
6476
6477tree
6478dr_zero_step_indicator (struct data_reference *dr)
6479{
6480 return dr_step_indicator (dr, useful_min: 1);
6481}
6482
6483/* Return true if DR is known to have a nonnegative (but possibly zero)
6484 step. */
6485
6486bool
6487dr_known_forward_stride_p (struct data_reference *dr)
6488{
6489 tree indicator = dr_direction_indicator (dr);
6490 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6491 fold_convert (ssizetype, indicator),
6492 ssize_int (0));
6493 return neg_step_val && integer_zerop (neg_step_val);
6494}
6495

source code of gcc/tree-data-ref.cc