1/* Data references and dependences detectors.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74*/
75
76#include "config.h"
77#include "system.h"
78#include "coretypes.h"
79#include "backend.h"
80#include "rtl.h"
81#include "tree.h"
82#include "gimple.h"
83#include "gimple-pretty-print.h"
84#include "alias.h"
85#include "fold-const.h"
86#include "expr.h"
87#include "gimple-iterator.h"
88#include "tree-ssa-loop-niter.h"
89#include "tree-ssa-loop.h"
90#include "tree-ssa.h"
91#include "cfgloop.h"
92#include "tree-data-ref.h"
93#include "tree-scalar-evolution.h"
94#include "dumpfile.h"
95#include "tree-affine.h"
96#include "builtins.h"
97#include "tree-eh.h"
98#include "ssa.h"
99#include "internal-fn.h"
100#include "vr-values.h"
101#include "range-op.h"
102#include "tree-ssa-loop-ivopts.h"
103#include "calls.h"
104
105static struct datadep_stats
106{
107 int num_dependence_tests;
108 int num_dependence_dependent;
109 int num_dependence_independent;
110 int num_dependence_undetermined;
111
112 int num_subscript_tests;
113 int num_subscript_undetermined;
114 int num_same_subscript_function;
115
116 int num_ziv;
117 int num_ziv_independent;
118 int num_ziv_dependent;
119 int num_ziv_unimplemented;
120
121 int num_siv;
122 int num_siv_independent;
123 int num_siv_dependent;
124 int num_siv_unimplemented;
125
126 int num_miv;
127 int num_miv_independent;
128 int num_miv_dependent;
129 int num_miv_unimplemented;
130} dependence_stats;
131
132static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
133 unsigned int, unsigned int,
134 class loop *);
135/* Returns true iff A divides B. */
136
137static inline bool
138tree_fold_divides_p (const_tree a, const_tree b)
139{
140 gcc_assert (TREE_CODE (a) == INTEGER_CST);
141 gcc_assert (TREE_CODE (b) == INTEGER_CST);
142 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
143}
144
145/* Returns true iff A divides B. */
146
147static inline bool
148int_divides_p (lambda_int a, lambda_int b)
149{
150 return ((b % a) == 0);
151}
152
153/* Return true if reference REF contains a union access. */
154
155static bool
156ref_contains_union_access_p (tree ref)
157{
158 while (handled_component_p (t: ref))
159 {
160 ref = TREE_OPERAND (ref, 0);
161 if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
162 || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
163 return true;
164 }
165 return false;
166}
167
168
169
170/* Dump into FILE all the data references from DATAREFS. */
171
172static void
173dump_data_references (FILE *file, vec<data_reference_p> datarefs)
174{
175 for (data_reference *dr : datarefs)
176 dump_data_reference (file, dr);
177}
178
179/* Unified dump into FILE all the data references from DATAREFS. */
180
181DEBUG_FUNCTION void
182debug (vec<data_reference_p> &ref)
183{
184 dump_data_references (stderr, datarefs: ref);
185}
186
187DEBUG_FUNCTION void
188debug (vec<data_reference_p> *ptr)
189{
190 if (ptr)
191 debug (ref&: *ptr);
192 else
193 fprintf (stderr, format: "<nil>\n");
194}
195
196
197/* Dump into STDERR all the data references from DATAREFS. */
198
199DEBUG_FUNCTION void
200debug_data_references (vec<data_reference_p> datarefs)
201{
202 dump_data_references (stderr, datarefs);
203}
204
205/* Print to STDERR the data_reference DR. */
206
207DEBUG_FUNCTION void
208debug_data_reference (struct data_reference *dr)
209{
210 dump_data_reference (stderr, dr);
211}
212
213/* Dump function for a DATA_REFERENCE structure. */
214
215void
216dump_data_reference (FILE *outf,
217 struct data_reference *dr)
218{
219 unsigned int i;
220
221 fprintf (stream: outf, format: "#(Data Ref: \n");
222 fprintf (stream: outf, format: "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
223 fprintf (stream: outf, format: "# stmt: ");
224 print_gimple_stmt (outf, DR_STMT (dr), 0);
225 fprintf (stream: outf, format: "# ref: ");
226 print_generic_stmt (outf, DR_REF (dr));
227 fprintf (stream: outf, format: "# base_object: ");
228 print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229
230 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231 {
232 fprintf (stream: outf, format: "# Access function %d: ", i);
233 print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234 }
235 fprintf (stream: outf, format: "#)\n");
236}
237
238/* Unified dump function for a DATA_REFERENCE structure. */
239
240DEBUG_FUNCTION void
241debug (data_reference &ref)
242{
243 dump_data_reference (stderr, dr: &ref);
244}
245
246DEBUG_FUNCTION void
247debug (data_reference *ptr)
248{
249 if (ptr)
250 debug (ref&: *ptr);
251 else
252 fprintf (stderr, format: "<nil>\n");
253}
254
255
256/* Dumps the affine function described by FN to the file OUTF. */
257
258DEBUG_FUNCTION void
259dump_affine_function (FILE *outf, affine_fn fn)
260{
261 unsigned i;
262 tree coef;
263
264 print_generic_expr (outf, fn[0], TDF_SLIM);
265 for (i = 1; fn.iterate (ix: i, ptr: &coef); i++)
266 {
267 fprintf (stream: outf, format: " + ");
268 print_generic_expr (outf, coef, TDF_SLIM);
269 fprintf (stream: outf, format: " * x_%u", i);
270 }
271}
272
273/* Dumps the conflict function CF to the file OUTF. */
274
275DEBUG_FUNCTION void
276dump_conflict_function (FILE *outf, conflict_function *cf)
277{
278 unsigned i;
279
280 if (cf->n == NO_DEPENDENCE)
281 fprintf (stream: outf, format: "no dependence");
282 else if (cf->n == NOT_KNOWN)
283 fprintf (stream: outf, format: "not known");
284 else
285 {
286 for (i = 0; i < cf->n; i++)
287 {
288 if (i != 0)
289 fprintf (stream: outf, format: " ");
290 fprintf (stream: outf, format: "[");
291 dump_affine_function (outf, fn: cf->fns[i]);
292 fprintf (stream: outf, format: "]");
293 }
294 }
295}
296
297/* Dump function for a SUBSCRIPT structure. */
298
299DEBUG_FUNCTION void
300dump_subscript (FILE *outf, struct subscript *subscript)
301{
302 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303
304 fprintf (stream: outf, format: "\n (subscript \n");
305 fprintf (stream: outf, format: " iterations_that_access_an_element_twice_in_A: ");
306 dump_conflict_function (outf, cf);
307 if (CF_NONTRIVIAL_P (cf))
308 {
309 tree last_iteration = SUB_LAST_CONFLICT (subscript);
310 fprintf (stream: outf, format: "\n last_conflict: ");
311 print_generic_expr (outf, last_iteration);
312 }
313
314 cf = SUB_CONFLICTS_IN_B (subscript);
315 fprintf (stream: outf, format: "\n iterations_that_access_an_element_twice_in_B: ");
316 dump_conflict_function (outf, cf);
317 if (CF_NONTRIVIAL_P (cf))
318 {
319 tree last_iteration = SUB_LAST_CONFLICT (subscript);
320 fprintf (stream: outf, format: "\n last_conflict: ");
321 print_generic_expr (outf, last_iteration);
322 }
323
324 fprintf (stream: outf, format: "\n (Subscript distance: ");
325 print_generic_expr (outf, SUB_DISTANCE (subscript));
326 fprintf (stream: outf, format: " ))\n");
327}
328
329/* Print the classic direction vector DIRV to OUTF. */
330
331DEBUG_FUNCTION void
332print_direction_vector (FILE *outf,
333 lambda_vector dirv,
334 int length)
335{
336 int eq;
337
338 for (eq = 0; eq < length; eq++)
339 {
340 enum data_dependence_direction dir = ((enum data_dependence_direction)
341 dirv[eq]);
342
343 switch (dir)
344 {
345 case dir_positive:
346 fprintf (stream: outf, format: " +");
347 break;
348 case dir_negative:
349 fprintf (stream: outf, format: " -");
350 break;
351 case dir_equal:
352 fprintf (stream: outf, format: " =");
353 break;
354 case dir_positive_or_equal:
355 fprintf (stream: outf, format: " +=");
356 break;
357 case dir_positive_or_negative:
358 fprintf (stream: outf, format: " +-");
359 break;
360 case dir_negative_or_equal:
361 fprintf (stream: outf, format: " -=");
362 break;
363 case dir_star:
364 fprintf (stream: outf, format: " *");
365 break;
366 default:
367 fprintf (stream: outf, format: "indep");
368 break;
369 }
370 }
371 fprintf (stream: outf, format: "\n");
372}
373
374/* Print a vector of direction vectors. */
375
376DEBUG_FUNCTION void
377print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
378 int length)
379{
380 for (lambda_vector v : dir_vects)
381 print_direction_vector (outf, dirv: v, length);
382}
383
384/* Print out a vector VEC of length N to OUTFILE. */
385
386DEBUG_FUNCTION void
387print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388{
389 int i;
390
391 for (i = 0; i < n; i++)
392 fprintf (stream: outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
393 fprintf (stream: outfile, format: "\n");
394}
395
396/* Print a vector of distance vectors. */
397
398DEBUG_FUNCTION void
399print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 int length)
401{
402 for (lambda_vector v : dist_vects)
403 print_lambda_vector (outfile: outf, vector: v, n: length);
404}
405
406/* Dump function for a DATA_DEPENDENCE_RELATION structure. */
407
408DEBUG_FUNCTION void
409dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
410{
411 struct data_reference *dra, *drb;
412
413 fprintf (stream: outf, format: "(Data Dep: \n");
414
415 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416 {
417 if (ddr)
418 {
419 dra = DDR_A (ddr);
420 drb = DDR_B (ddr);
421 if (dra)
422 dump_data_reference (outf, dr: dra);
423 else
424 fprintf (stream: outf, format: " (nil)\n");
425 if (drb)
426 dump_data_reference (outf, dr: drb);
427 else
428 fprintf (stream: outf, format: " (nil)\n");
429 }
430 fprintf (stream: outf, format: " (don't know)\n)\n");
431 return;
432 }
433
434 dra = DDR_A (ddr);
435 drb = DDR_B (ddr);
436 dump_data_reference (outf, dr: dra);
437 dump_data_reference (outf, dr: drb);
438
439 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
440 fprintf (stream: outf, format: " (no dependence)\n");
441
442 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443 {
444 unsigned int i;
445 class loop *loopi;
446
447 subscript *sub;
448 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
449 {
450 fprintf (stream: outf, format: " access_fn_A: ");
451 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
452 fprintf (stream: outf, format: " access_fn_B: ");
453 print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
454 dump_subscript (outf, subscript: sub);
455 }
456
457 fprintf (stream: outf, format: " loop nest: (");
458 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
459 fprintf (stream: outf, format: "%d ", loopi->num);
460 fprintf (stream: outf, format: ")\n");
461
462 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 {
464 fprintf (stream: outf, format: " distance_vector: ");
465 print_lambda_vector (outfile: outf, DDR_DIST_VECT (ddr, i),
466 DDR_NB_LOOPS (ddr));
467 }
468
469 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 {
471 fprintf (stream: outf, format: " direction_vector: ");
472 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
473 DDR_NB_LOOPS (ddr));
474 }
475 }
476
477 fprintf (stream: outf, format: ")\n");
478}
479
480/* Debug version. */
481
482DEBUG_FUNCTION void
483debug_data_dependence_relation (const struct data_dependence_relation *ddr)
484{
485 dump_data_dependence_relation (stderr, ddr);
486}
487
488/* Dump into FILE all the dependence relations from DDRS. */
489
490DEBUG_FUNCTION void
491dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
492{
493 for (auto ddr : ddrs)
494 dump_data_dependence_relation (outf: file, ddr);
495}
496
497DEBUG_FUNCTION void
498debug (vec<ddr_p> &ref)
499{
500 dump_data_dependence_relations (stderr, ddrs: ref);
501}
502
503DEBUG_FUNCTION void
504debug (vec<ddr_p> *ptr)
505{
506 if (ptr)
507 debug (ref&: *ptr);
508 else
509 fprintf (stderr, format: "<nil>\n");
510}
511
512
513/* Dump to STDERR all the dependence relations from DDRS. */
514
515DEBUG_FUNCTION void
516debug_data_dependence_relations (vec<ddr_p> ddrs)
517{
518 dump_data_dependence_relations (stderr, ddrs);
519}
520
521/* Dumps the distance and direction vectors in FILE. DDRS contains
522 the dependence relations, and VECT_SIZE is the size of the
523 dependence vectors, or in other words the number of loops in the
524 considered nest. */
525
526DEBUG_FUNCTION void
527dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
528{
529 for (data_dependence_relation *ddr : ddrs)
530 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
531 {
532 for (lambda_vector v : DDR_DIST_VECTS (ddr))
533 {
534 fprintf (stream: file, format: "DISTANCE_V (");
535 print_lambda_vector (outfile: file, vector: v, DDR_NB_LOOPS (ddr));
536 fprintf (stream: file, format: ")\n");
537 }
538
539 for (lambda_vector v : DDR_DIR_VECTS (ddr))
540 {
541 fprintf (stream: file, format: "DIRECTION_V (");
542 print_direction_vector (outf: file, dirv: v, DDR_NB_LOOPS (ddr));
543 fprintf (stream: file, format: ")\n");
544 }
545 }
546
547 fprintf (stream: file, format: "\n\n");
548}
549
550/* Dumps the data dependence relations DDRS in FILE. */
551
552DEBUG_FUNCTION void
553dump_ddrs (FILE *file, vec<ddr_p> ddrs)
554{
555 for (data_dependence_relation *ddr : ddrs)
556 dump_data_dependence_relation (outf: file, ddr);
557
558 fprintf (stream: file, format: "\n\n");
559}
560
561DEBUG_FUNCTION void
562debug_ddrs (vec<ddr_p> ddrs)
563{
564 dump_ddrs (stderr, ddrs);
565}
566
567/* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
568 OP0 CODE OP1, where:
569
570 - OP0 CODE OP1 has integral type TYPE
571 - the range of OP0 is given by OP0_RANGE and
572 - the range of OP1 is given by OP1_RANGE.
573
574 Independently of RESULT_RANGE, try to compute:
575
576 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
577 - (sizetype) (OP0 CODE OP1)
578
579 as a constant and subtract DELTA from the ssizetype constant in *OFF.
580 Return true on success, or false if DELTA is not known at compile time.
581
582 Truncation and sign changes are known to distribute over CODE, i.e.
583
584 (itype) (A CODE B) == (itype) A CODE (itype) B
585
586 for any integral type ITYPE whose precision is no greater than the
587 precision of A and B. */
588
589static bool
590compute_distributive_range (tree type, value_range &op0_range,
591 tree_code code, value_range &op1_range,
592 tree *off, value_range *result_range)
593{
594 gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
595 if (result_range)
596 {
597 range_op_handler op (code);
598 if (!op.fold_range (r&: *result_range, type, lh: op0_range, rh: op1_range))
599 result_range->set_varying (type);
600 }
601
602 /* The distributive property guarantees that if TYPE is no narrower
603 than SIZETYPE,
604
605 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
606
607 and so we can treat DELTA as zero. */
608 if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
609 return true;
610
611 /* If overflow is undefined, we can assume that:
612
613 X == (ssizetype) OP0 CODE (ssizetype) OP1
614
615 is within the range of TYPE, i.e.:
616
617 X == (ssizetype) (TYPE) X
618
619 Distributing the (TYPE) truncation over X gives:
620
621 X == (ssizetype) (OP0 CODE OP1)
622
623 Casting both sides to sizetype and distributing the sizetype cast
624 over X gives:
625
626 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
627
628 and so we can treat DELTA as zero. */
629 if (TYPE_OVERFLOW_UNDEFINED (type))
630 return true;
631
632 /* Compute the range of:
633
634 (ssizetype) OP0 CODE (ssizetype) OP1
635
636 The distributive property guarantees that this has the same bitpattern as:
637
638 (sizetype) OP0 CODE (sizetype) OP1
639
640 but its range is more conducive to analysis. */
641 range_cast (r&: op0_range, ssizetype);
642 range_cast (r&: op1_range, ssizetype);
643 value_range wide_range;
644 range_op_handler op (code);
645 bool saved_flag_wrapv = flag_wrapv;
646 flag_wrapv = 1;
647 if (!op.fold_range (r&: wide_range, ssizetype, lh: op0_range, rh: op1_range))
648 wide_range.set_varying (ssizetype);;
649 flag_wrapv = saved_flag_wrapv;
650 if (wide_range.num_pairs () != 1
651 || wide_range.varying_p () || wide_range.undefined_p ())
652 return false;
653
654 wide_int lb = wide_range.lower_bound ();
655 wide_int ub = wide_range.upper_bound ();
656
657 /* Calculate the number of times that each end of the range overflows or
658 underflows TYPE. We can only calculate DELTA if the numbers match. */
659 unsigned int precision = TYPE_PRECISION (type);
660 if (!TYPE_UNSIGNED (type))
661 {
662 wide_int type_min = wi::mask (width: precision - 1, negate_p: true, precision: lb.get_precision ());
663 lb -= type_min;
664 ub -= type_min;
665 }
666 wide_int upper_bits = wi::mask (width: precision, negate_p: true, precision: lb.get_precision ());
667 lb &= upper_bits;
668 ub &= upper_bits;
669 if (lb != ub)
670 return false;
671
672 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
673 negative values indicating underflow. The low PRECISION bits of LB
674 are clear, so DELTA is therefore LB (== UB). */
675 *off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: *off) - lb);
676 return true;
677}
678
679/* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
680 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
681 FROM_TYPE are integral types. */
682
683static bool
684nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
685{
686 gcc_assert (INTEGRAL_TYPE_P (to_type)
687 && INTEGRAL_TYPE_P (from_type)
688 && !TYPE_OVERFLOW_TRAPS (to_type)
689 && !TYPE_OVERFLOW_TRAPS (from_type));
690
691 /* Converting to something no narrower than sizetype and then to sizetype
692 is equivalent to converting directly to sizetype. */
693 if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
694 return true;
695
696 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
697 if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
698 && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
699 return true;
700
701 /* For narrowing conversions, we could in principle test whether
702 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
703 and apply a constant adjustment.
704
705 For other conversions (which involve a sign change) we could
706 check that the signs are always equal, and apply a constant
707 adjustment if the signs are negative.
708
709 However, both cases should be rare. */
710 return range_fits_type_p (vr: &range, TYPE_PRECISION (to_type),
711 TYPE_SIGN (to_type));
712}
713
714static void
715split_constant_offset (tree type, tree *var, tree *off,
716 value_range *result_range,
717 hash_map<tree, std::pair<tree, tree> > &cache,
718 unsigned *limit);
719
720/* Helper function for split_constant_offset. If TYPE is a pointer type,
721 try to express OP0 CODE OP1 as:
722
723 POINTER_PLUS <*VAR, (sizetype) *OFF>
724
725 where:
726
727 - *VAR has type TYPE
728 - *OFF is a constant of type ssizetype.
729
730 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
731
732 *VAR + (sizetype) *OFF
733
734 where:
735
736 - *VAR has type sizetype
737 - *OFF is a constant of type ssizetype.
738
739 In both cases, OP0 CODE OP1 has type TYPE.
740
741 Return true on success. A false return value indicates that we can't
742 do better than set *OFF to zero.
743
744 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
745 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
746
747 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
748 visited. LIMIT counts down the number of SSA names that we are
749 allowed to process before giving up. */
750
751static bool
752split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
753 tree *var, tree *off, value_range *result_range,
754 hash_map<tree, std::pair<tree, tree> > &cache,
755 unsigned *limit)
756{
757 tree var0, var1;
758 tree off0, off1;
759 value_range op0_range, op1_range;
760
761 *var = NULL_TREE;
762 *off = NULL_TREE;
763
764 if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
765 return false;
766
767 switch (code)
768 {
769 case INTEGER_CST:
770 *var = size_int (0);
771 *off = fold_convert (ssizetype, op0);
772 if (result_range)
773 {
774 wide_int w = wi::to_wide (t: op0);
775 result_range->set (TREE_TYPE (op0), w, w);
776 }
777 return true;
778
779 case POINTER_PLUS_EXPR:
780 split_constant_offset (type: op0, var: &var0, off: &off0, result_range: nullptr, cache, limit);
781 split_constant_offset (type: op1, var: &var1, off: &off1, result_range: nullptr, cache, limit);
782 *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
783 *off = size_binop (PLUS_EXPR, off0, off1);
784 return true;
785
786 case PLUS_EXPR:
787 case MINUS_EXPR:
788 split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit);
789 split_constant_offset (type: op1, var: &var1, off: &off1, result_range: &op1_range, cache, limit);
790 *off = size_binop (code, off0, off1);
791 if (!compute_distributive_range (type, op0_range, code, op1_range,
792 off, result_range))
793 return false;
794 *var = fold_build2 (code, sizetype, var0, var1);
795 return true;
796
797 case MULT_EXPR:
798 if (TREE_CODE (op1) != INTEGER_CST)
799 return false;
800
801 split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit);
802 op1_range.set (TREE_TYPE (op1), wi::to_wide (t: op1), wi::to_wide (t: op1));
803 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
804 if (!compute_distributive_range (type, op0_range, code, op1_range,
805 off, result_range))
806 return false;
807 *var = fold_build2 (MULT_EXPR, sizetype, var0,
808 fold_convert (sizetype, op1));
809 return true;
810
811 case ADDR_EXPR:
812 {
813 tree base, poffset;
814 poly_int64 pbitsize, pbitpos, pbytepos;
815 machine_mode pmode;
816 int punsignedp, preversep, pvolatilep;
817
818 op0 = TREE_OPERAND (op0, 0);
819 base
820 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
821 &punsignedp, &preversep, &pvolatilep);
822
823 if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos))
824 return false;
825 base = build_fold_addr_expr (base);
826 off0 = ssize_int (pbytepos);
827
828 if (poffset)
829 {
830 split_constant_offset (type: poffset, var: &poffset, off: &off1, result_range: nullptr,
831 cache, limit);
832 off0 = size_binop (PLUS_EXPR, off0, off1);
833 base = fold_build_pointer_plus (base, poffset);
834 }
835
836 var0 = fold_convert (type, base);
837
838 /* If variable length types are involved, punt, otherwise casts
839 might be converted into ARRAY_REFs in gimplify_conversion.
840 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
841 possibly no longer appears in current GIMPLE, might resurface.
842 This perhaps could run
843 if (CONVERT_EXPR_P (var0))
844 {
845 gimplify_conversion (&var0);
846 // Attempt to fill in any within var0 found ARRAY_REF's
847 // element size from corresponding op embedded ARRAY_REF,
848 // if unsuccessful, just punt.
849 } */
850 while (POINTER_TYPE_P (type))
851 type = TREE_TYPE (type);
852 if (int_size_in_bytes (type) < 0)
853 return false;
854
855 *var = var0;
856 *off = off0;
857 return true;
858 }
859
860 case SSA_NAME:
861 {
862 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
863 return false;
864
865 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
866 enum tree_code subcode;
867
868 if (gimple_code (g: def_stmt) != GIMPLE_ASSIGN)
869 return false;
870
871 subcode = gimple_assign_rhs_code (gs: def_stmt);
872
873 /* We are using a cache to avoid un-CSEing large amounts of code. */
874 bool use_cache = false;
875 if (!has_single_use (var: op0)
876 && (subcode == POINTER_PLUS_EXPR
877 || subcode == PLUS_EXPR
878 || subcode == MINUS_EXPR
879 || subcode == MULT_EXPR
880 || subcode == ADDR_EXPR
881 || CONVERT_EXPR_CODE_P (subcode)))
882 {
883 use_cache = true;
884 bool existed;
885 std::pair<tree, tree> &e = cache.get_or_insert (k: op0, existed: &existed);
886 if (existed)
887 {
888 if (integer_zerop (e.second))
889 return false;
890 *var = e.first;
891 *off = e.second;
892 /* The caller sets the range in this case. */
893 return true;
894 }
895 e = std::make_pair (x&: op0, ssize_int (0));
896 }
897
898 if (*limit == 0)
899 return false;
900 --*limit;
901
902 var0 = gimple_assign_rhs1 (gs: def_stmt);
903 var1 = gimple_assign_rhs2 (gs: def_stmt);
904
905 bool res = split_constant_offset_1 (type, op0: var0, code: subcode, op1: var1,
906 var, off, result_range: nullptr, cache, limit);
907 if (res && use_cache)
908 *cache.get (k: op0) = std::make_pair (x&: *var, y&: *off);
909 /* The caller sets the range in this case. */
910 return res;
911 }
912 CASE_CONVERT:
913 {
914 /* We can only handle the following conversions:
915
916 - Conversions from one pointer type to another pointer type.
917
918 - Conversions from one non-trapping integral type to another
919 non-trapping integral type. In this case, the recursive
920 call makes sure that:
921
922 (sizetype) OP0
923
924 can be expressed as a sizetype operation involving VAR and OFF,
925 and all we need to do is check whether:
926
927 (sizetype) OP0 == (sizetype) (TYPE) OP0
928
929 - Conversions from a non-trapping sizetype-size integral type to
930 a like-sized pointer type. In this case, the recursive call
931 makes sure that:
932
933 (sizetype) OP0 == *VAR + (sizetype) *OFF
934
935 and we can convert that to:
936
937 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
938
939 - Conversions from a sizetype-sized pointer type to a like-sized
940 non-trapping integral type. In this case, the recursive call
941 makes sure that:
942
943 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
944
945 where the POINTER_PLUS and *VAR have the same precision as
946 TYPE (and the same precision as sizetype). Then:
947
948 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
949 tree itype = TREE_TYPE (op0);
950 if ((POINTER_TYPE_P (itype)
951 || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
952 && (POINTER_TYPE_P (type)
953 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
954 && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
955 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
956 && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
957 {
958 if (POINTER_TYPE_P (type))
959 {
960 split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit);
961 *var = fold_convert (type, *var);
962 }
963 else if (POINTER_TYPE_P (itype))
964 {
965 split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit);
966 *var = fold_convert (sizetype, *var);
967 }
968 else
969 {
970 split_constant_offset (type: op0, var, off, result_range: &op0_range,
971 cache, limit);
972 if (!nop_conversion_for_offset_p (to_type: type, from_type: itype, range&: op0_range))
973 return false;
974 if (result_range)
975 {
976 *result_range = op0_range;
977 range_cast (r&: *result_range, type);
978 }
979 }
980 return true;
981 }
982 return false;
983 }
984
985 default:
986 return false;
987 }
988}
989
990/* If EXP has pointer type, try to express it as:
991
992 POINTER_PLUS <*VAR, (sizetype) *OFF>
993
994 where:
995
996 - *VAR has the same type as EXP
997 - *OFF is a constant of type ssizetype.
998
999 If EXP has an integral type, try to express (sizetype) EXP as:
1000
1001 *VAR + (sizetype) *OFF
1002
1003 where:
1004
1005 - *VAR has type sizetype
1006 - *OFF is a constant of type ssizetype.
1007
1008 If EXP_RANGE is nonnull, set it to the range of EXP.
1009
1010 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1011 visited. LIMIT counts down the number of SSA names that we are
1012 allowed to process before giving up. */
1013
1014static void
1015split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1016 hash_map<tree, std::pair<tree, tree> > &cache,
1017 unsigned *limit)
1018{
1019 tree type = TREE_TYPE (exp), op0, op1;
1020 enum tree_code code;
1021
1022 code = TREE_CODE (exp);
1023 if (exp_range)
1024 {
1025 *exp_range = type;
1026 if (code == SSA_NAME)
1027 {
1028 value_range vr;
1029 get_range_query (cfun)->range_of_expr (r&: vr, expr: exp);
1030 if (vr.undefined_p ())
1031 vr.set_varying (TREE_TYPE (exp));
1032 tree vr_min, vr_max;
1033 value_range_kind vr_kind = get_legacy_range (vr, min&: vr_min, max&: vr_max);
1034 wide_int var_min = wi::to_wide (t: vr_min);
1035 wide_int var_max = wi::to_wide (t: vr_max);
1036 wide_int var_nonzero = get_nonzero_bits (exp);
1037 vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1038 &var_min, &var_max,
1039 var_nonzero,
1040 TYPE_SIGN (type));
1041 /* This check for VR_VARYING is here because the old code
1042 using get_range_info would return VR_RANGE for the entire
1043 domain, instead of VR_VARYING. The new code normalizes
1044 full-domain ranges to VR_VARYING. */
1045 if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1046 *exp_range = value_range (type, var_min, var_max);
1047 }
1048 }
1049
1050 if (!tree_is_chrec (expr: exp)
1051 && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1052 {
1053 extract_ops_from_tree (expr: exp, code: &code, op0: &op0, op1: &op1);
1054 if (split_constant_offset_1 (type, op0, code, op1, var, off,
1055 result_range: exp_range, cache, limit))
1056 return;
1057 }
1058
1059 *var = exp;
1060 if (INTEGRAL_TYPE_P (type))
1061 *var = fold_convert (sizetype, *var);
1062 *off = ssize_int (0);
1063
1064 value_range r;
1065 if (exp_range && code != SSA_NAME
1066 && get_range_query (cfun)->range_of_expr (r, expr: exp)
1067 && !r.undefined_p ())
1068 *exp_range = r;
1069}
1070
1071/* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1072 type as EXP while OFF has type ssizetype. */
1073
1074void
1075split_constant_offset (tree exp, tree *var, tree *off)
1076{
1077 unsigned limit = param_ssa_name_def_chain_limit;
1078 static hash_map<tree, std::pair<tree, tree> > *cache;
1079 if (!cache)
1080 cache = new hash_map<tree, std::pair<tree, tree> > (37);
1081 split_constant_offset (exp, var, off, exp_range: nullptr, cache&: *cache, limit: &limit);
1082 *var = fold_convert (TREE_TYPE (exp), *var);
1083 cache->empty ();
1084}
1085
1086/* Returns the address ADDR of an object in a canonical shape (without nop
1087 casts, and with type of pointer to the object). */
1088
1089static tree
1090canonicalize_base_object_address (tree addr)
1091{
1092 tree orig = addr;
1093
1094 STRIP_NOPS (addr);
1095
1096 /* The base address may be obtained by casting from integer, in that case
1097 keep the cast. */
1098 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1099 return orig;
1100
1101 if (TREE_CODE (addr) != ADDR_EXPR)
1102 return addr;
1103
1104 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1105}
1106
1107/* Analyze the behavior of memory reference REF within STMT.
1108 There are two modes:
1109
1110 - BB analysis. In this case we simply split the address into base,
1111 init and offset components, without reference to any containing loop.
1112 The resulting base and offset are general expressions and they can
1113 vary arbitrarily from one iteration of the containing loop to the next.
1114 The step is always zero.
1115
1116 - loop analysis. In this case we analyze the reference both wrt LOOP
1117 and on the basis that the reference occurs (is "used") in LOOP;
1118 see the comment above analyze_scalar_evolution_in_loop for more
1119 information about this distinction. The base, init, offset and
1120 step fields are all invariant in LOOP.
1121
1122 Perform BB analysis if LOOP is null, or if LOOP is the function's
1123 dummy outermost loop. In other cases perform loop analysis.
1124
1125 Return true if the analysis succeeded and store the results in DRB if so.
1126 BB analysis can only fail for bitfield or reversed-storage accesses. */
1127
1128opt_result
1129dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1130 class loop *loop, const gimple *stmt)
1131{
1132 poly_int64 pbitsize, pbitpos;
1133 tree base, poffset;
1134 machine_mode pmode;
1135 int punsignedp, preversep, pvolatilep;
1136 affine_iv base_iv, offset_iv;
1137 tree init, dinit, step;
1138 bool in_loop = (loop && loop->num);
1139
1140 if (dump_file && (dump_flags & TDF_DETAILS))
1141 fprintf (stream: dump_file, format: "analyze_innermost: ");
1142
1143 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1144 &punsignedp, &preversep, &pvolatilep);
1145 gcc_assert (base != NULL_TREE);
1146
1147 poly_int64 pbytepos;
1148 if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos))
1149 return opt_result::failure_at (loc: stmt,
1150 fmt: "failed: bit offset alignment.\n");
1151
1152 if (preversep)
1153 return opt_result::failure_at (loc: stmt,
1154 fmt: "failed: reverse storage order.\n");
1155
1156 /* Calculate the alignment and misalignment for the inner reference. */
1157 unsigned int HOST_WIDE_INT bit_base_misalignment;
1158 unsigned int bit_base_alignment;
1159 get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1160
1161 /* There are no bitfield references remaining in BASE, so the values
1162 we got back must be whole bytes. */
1163 gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1164 && bit_base_misalignment % BITS_PER_UNIT == 0);
1165 unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1166 poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1167
1168 if (TREE_CODE (base) == MEM_REF)
1169 {
1170 if (!integer_zerop (TREE_OPERAND (base, 1)))
1171 {
1172 /* Subtract MOFF from the base and add it to POFFSET instead.
1173 Adjust the misalignment to reflect the amount we subtracted. */
1174 poly_offset_int moff = mem_ref_offset (base);
1175 base_misalignment -= moff.force_shwi ();
1176 tree mofft = wide_int_to_tree (sizetype, cst: moff);
1177 if (!poffset)
1178 poffset = mofft;
1179 else
1180 poffset = size_binop (PLUS_EXPR, poffset, mofft);
1181 }
1182 base = TREE_OPERAND (base, 0);
1183 }
1184 else
1185 base = build_fold_addr_expr (base);
1186
1187 if (in_loop)
1188 {
1189 if (!simple_iv (loop, loop, base, &base_iv, true))
1190 return opt_result::failure_at
1191 (loc: stmt, fmt: "failed: evolution of base is not affine.\n");
1192 }
1193 else
1194 {
1195 base_iv.base = base;
1196 base_iv.step = ssize_int (0);
1197 base_iv.no_overflow = true;
1198 }
1199
1200 if (!poffset)
1201 {
1202 offset_iv.base = ssize_int (0);
1203 offset_iv.step = ssize_int (0);
1204 }
1205 else
1206 {
1207 if (!in_loop)
1208 {
1209 offset_iv.base = poffset;
1210 offset_iv.step = ssize_int (0);
1211 }
1212 else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1213 return opt_result::failure_at
1214 (loc: stmt, fmt: "failed: evolution of offset is not affine.\n");
1215 }
1216
1217 init = ssize_int (pbytepos);
1218
1219 /* Subtract any constant component from the base and add it to INIT instead.
1220 Adjust the misalignment to reflect the amount we subtracted. */
1221 split_constant_offset (exp: base_iv.base, var: &base_iv.base, off: &dinit);
1222 init = size_binop (PLUS_EXPR, init, dinit);
1223 base_misalignment -= TREE_INT_CST_LOW (dinit);
1224
1225 split_constant_offset (exp: offset_iv.base, var: &offset_iv.base, off: &dinit);
1226 init = size_binop (PLUS_EXPR, init, dinit);
1227
1228 step = size_binop (PLUS_EXPR,
1229 fold_convert (ssizetype, base_iv.step),
1230 fold_convert (ssizetype, offset_iv.step));
1231
1232 base = canonicalize_base_object_address (addr: base_iv.base);
1233
1234 /* See if get_pointer_alignment can guarantee a higher alignment than
1235 the one we calculated above. */
1236 unsigned int HOST_WIDE_INT alt_misalignment;
1237 unsigned int alt_alignment;
1238 get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1239
1240 /* As above, these values must be whole bytes. */
1241 gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1242 && alt_misalignment % BITS_PER_UNIT == 0);
1243 alt_alignment /= BITS_PER_UNIT;
1244 alt_misalignment /= BITS_PER_UNIT;
1245
1246 if (base_alignment < alt_alignment)
1247 {
1248 base_alignment = alt_alignment;
1249 base_misalignment = alt_misalignment;
1250 }
1251
1252 drb->base_address = base;
1253 drb->offset = fold_convert (ssizetype, offset_iv.base);
1254 drb->init = init;
1255 drb->step = step;
1256 if (known_misalignment (value: base_misalignment, align: base_alignment,
1257 misalign: &drb->base_misalignment))
1258 drb->base_alignment = base_alignment;
1259 else
1260 {
1261 drb->base_alignment = known_alignment (a: base_misalignment);
1262 drb->base_misalignment = 0;
1263 }
1264 drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1265 drb->step_alignment = highest_pow2_factor (step);
1266
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1268 fprintf (stream: dump_file, format: "success.\n");
1269
1270 return opt_result::success ();
1271}
1272
1273/* Return true if OP is a valid component reference for a DR access
1274 function. This accepts a subset of what handled_component_p accepts. */
1275
1276static bool
1277access_fn_component_p (tree op)
1278{
1279 switch (TREE_CODE (op))
1280 {
1281 case REALPART_EXPR:
1282 case IMAGPART_EXPR:
1283 case ARRAY_REF:
1284 return true;
1285
1286 case COMPONENT_REF:
1287 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1288
1289 default:
1290 return false;
1291 }
1292}
1293
1294/* Returns whether BASE can have a access_fn_component_p with BASE
1295 as base. */
1296
1297static bool
1298base_supports_access_fn_components_p (tree base)
1299{
1300 switch (TREE_CODE (TREE_TYPE (base)))
1301 {
1302 case COMPLEX_TYPE:
1303 case ARRAY_TYPE:
1304 case RECORD_TYPE:
1305 return true;
1306 default:
1307 return false;
1308 }
1309}
1310
1311/* Determines the base object and the list of indices of memory reference
1312 DR, analyzed in LOOP and instantiated before NEST. */
1313
1314static void
1315dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1316{
1317 /* If analyzing a basic-block there are no indices to analyze
1318 and thus no access functions. */
1319 if (!nest)
1320 {
1321 dri->base_object = ref;
1322 dri->access_fns.create (nelems: 0);
1323 return;
1324 }
1325
1326 vec<tree> access_fns = vNULL;
1327
1328 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1329 into a two element array with a constant index. The base is
1330 then just the immediate underlying object. */
1331 if (TREE_CODE (ref) == REALPART_EXPR)
1332 {
1333 ref = TREE_OPERAND (ref, 0);
1334 access_fns.safe_push (integer_zero_node);
1335 }
1336 else if (TREE_CODE (ref) == IMAGPART_EXPR)
1337 {
1338 ref = TREE_OPERAND (ref, 0);
1339 access_fns.safe_push (integer_one_node);
1340 }
1341
1342 /* Analyze access functions of dimensions we know to be independent.
1343 The list of component references handled here should be kept in
1344 sync with access_fn_component_p. */
1345 while (handled_component_p (t: ref))
1346 {
1347 if (TREE_CODE (ref) == ARRAY_REF)
1348 {
1349 tree op = TREE_OPERAND (ref, 1);
1350 tree access_fn = analyze_scalar_evolution (loop, op);
1351 access_fn = instantiate_scev (nest, loop, access_fn);
1352 access_fns.safe_push (obj: access_fn);
1353 }
1354 else if (TREE_CODE (ref) == COMPONENT_REF
1355 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1356 {
1357 /* For COMPONENT_REFs of records (but not unions!) use the
1358 FIELD_DECL offset as constant access function so we can
1359 disambiguate a[i].f1 and a[i].f2. */
1360 tree off = component_ref_field_offset (ref);
1361 off = size_binop (PLUS_EXPR,
1362 size_binop (MULT_EXPR,
1363 fold_convert (bitsizetype, off),
1364 bitsize_int (BITS_PER_UNIT)),
1365 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1366 access_fns.safe_push (obj: off);
1367 }
1368 else
1369 /* If we have an unhandled component we could not translate
1370 to an access function stop analyzing. We have determined
1371 our base object in this case. */
1372 break;
1373
1374 ref = TREE_OPERAND (ref, 0);
1375 }
1376
1377 /* If the address operand of a MEM_REF base has an evolution in the
1378 analyzed nest, add it as an additional independent access-function. */
1379 if (TREE_CODE (ref) == MEM_REF)
1380 {
1381 tree op = TREE_OPERAND (ref, 0);
1382 tree access_fn = analyze_scalar_evolution (loop, op);
1383 access_fn = instantiate_scev (nest, loop, access_fn);
1384 STRIP_NOPS (access_fn);
1385 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1386 {
1387 tree memoff = TREE_OPERAND (ref, 1);
1388 tree base = initial_condition (access_fn);
1389 tree orig_type = TREE_TYPE (base);
1390 STRIP_USELESS_TYPE_CONVERSION (base);
1391 tree off;
1392 split_constant_offset (exp: base, var: &base, off: &off);
1393 STRIP_USELESS_TYPE_CONVERSION (base);
1394 /* Fold the MEM_REF offset into the evolutions initial
1395 value to make more bases comparable. */
1396 if (!integer_zerop (memoff))
1397 {
1398 off = size_binop (PLUS_EXPR, off,
1399 fold_convert (ssizetype, memoff));
1400 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1401 }
1402 /* Adjust the offset so it is a multiple of the access type
1403 size and thus we separate bases that can possibly be used
1404 to produce partial overlaps (which the access_fn machinery
1405 cannot handle). */
1406 wide_int rem;
1407 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1408 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1409 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1410 rem = wi::mod_trunc
1411 (x: wi::to_wide (t: off),
1412 y: wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1413 sgn: SIGNED);
1414 else
1415 /* If we can't compute the remainder simply force the initial
1416 condition to zero. */
1417 rem = wi::to_wide (t: off);
1418 off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: off) - rem);
1419 memoff = wide_int_to_tree (TREE_TYPE (memoff), cst: rem);
1420 /* And finally replace the initial condition. */
1421 access_fn = chrec_replace_initial_condition
1422 (access_fn, fold_convert (orig_type, off));
1423 /* ??? This is still not a suitable base object for
1424 dr_may_alias_p - the base object needs to be an
1425 access that covers the object as whole. With
1426 an evolution in the pointer this cannot be
1427 guaranteed.
1428 As a band-aid, mark the access so we can special-case
1429 it in dr_may_alias_p. */
1430 tree old = ref;
1431 ref = fold_build2_loc (EXPR_LOCATION (ref),
1432 MEM_REF, TREE_TYPE (ref),
1433 base, memoff);
1434 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1435 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1436 dri->unconstrained_base = true;
1437 access_fns.safe_push (obj: access_fn);
1438 }
1439 }
1440 else if (DECL_P (ref))
1441 {
1442 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1443 ref = build2 (MEM_REF, TREE_TYPE (ref),
1444 build_fold_addr_expr (ref),
1445 build_int_cst (reference_alias_ptr_type (ref), 0));
1446 }
1447
1448 dri->base_object = ref;
1449 dri->access_fns = access_fns;
1450}
1451
1452/* Extracts the alias analysis information from the memory reference DR. */
1453
1454static void
1455dr_analyze_alias (struct data_reference *dr)
1456{
1457 tree ref = DR_REF (dr);
1458 tree base = get_base_address (t: ref), addr;
1459
1460 if (INDIRECT_REF_P (base)
1461 || TREE_CODE (base) == MEM_REF)
1462 {
1463 addr = TREE_OPERAND (base, 0);
1464 if (TREE_CODE (addr) == SSA_NAME)
1465 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1466 }
1467}
1468
1469/* Frees data reference DR. */
1470
1471void
1472free_data_ref (data_reference_p dr)
1473{
1474 DR_ACCESS_FNS (dr).release ();
1475 if (dr->alt_indices.base_object)
1476 dr->alt_indices.access_fns.release ();
1477 free (ptr: dr);
1478}
1479
1480/* Analyze memory reference MEMREF, which is accessed in STMT.
1481 The reference is a read if IS_READ is true, otherwise it is a write.
1482 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1483 within STMT, i.e. that it might not occur even if STMT is executed
1484 and runs to completion.
1485
1486 Return the data_reference description of MEMREF. NEST is the outermost
1487 loop in which the reference should be instantiated, LOOP is the loop
1488 in which the data reference should be analyzed. */
1489
1490struct data_reference *
1491create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1492 bool is_read, bool is_conditional_in_stmt)
1493{
1494 struct data_reference *dr;
1495
1496 if (dump_file && (dump_flags & TDF_DETAILS))
1497 {
1498 fprintf (stream: dump_file, format: "Creating dr for ");
1499 print_generic_expr (dump_file, memref, TDF_SLIM);
1500 fprintf (stream: dump_file, format: "\n");
1501 }
1502
1503 dr = XCNEW (struct data_reference);
1504 DR_STMT (dr) = stmt;
1505 DR_REF (dr) = memref;
1506 DR_IS_READ (dr) = is_read;
1507 DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1508
1509 dr_analyze_innermost (drb: &DR_INNERMOST (dr), ref: memref,
1510 loop: nest != NULL ? loop : NULL, stmt);
1511 dr_analyze_indices (dri: &dr->indices, DR_REF (dr), nest, loop);
1512 dr_analyze_alias (dr);
1513
1514 if (dump_file && (dump_flags & TDF_DETAILS))
1515 {
1516 unsigned i;
1517 fprintf (stream: dump_file, format: "\tbase_address: ");
1518 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1519 fprintf (stream: dump_file, format: "\n\toffset from base address: ");
1520 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1521 fprintf (stream: dump_file, format: "\n\tconstant offset from base address: ");
1522 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1523 fprintf (stream: dump_file, format: "\n\tstep: ");
1524 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1525 fprintf (stream: dump_file, format: "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1526 fprintf (stream: dump_file, format: "\n\tbase misalignment: %d",
1527 DR_BASE_MISALIGNMENT (dr));
1528 fprintf (stream: dump_file, format: "\n\toffset alignment: %d",
1529 DR_OFFSET_ALIGNMENT (dr));
1530 fprintf (stream: dump_file, format: "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1531 fprintf (stream: dump_file, format: "\n\tbase_object: ");
1532 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1533 fprintf (stream: dump_file, format: "\n");
1534 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1535 {
1536 fprintf (stream: dump_file, format: "\tAccess function %d: ", i);
1537 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1538 }
1539 }
1540
1541 return dr;
1542}
1543
1544/* A helper function computes order between two tree expressions T1 and T2.
1545 This is used in comparator functions sorting objects based on the order
1546 of tree expressions. The function returns -1, 0, or 1. */
1547
1548int
1549data_ref_compare_tree (tree t1, tree t2)
1550{
1551 int i, cmp;
1552 enum tree_code code;
1553 char tclass;
1554
1555 if (t1 == t2)
1556 return 0;
1557 if (t1 == NULL)
1558 return -1;
1559 if (t2 == NULL)
1560 return 1;
1561
1562 STRIP_USELESS_TYPE_CONVERSION (t1);
1563 STRIP_USELESS_TYPE_CONVERSION (t2);
1564 if (t1 == t2)
1565 return 0;
1566
1567 if (TREE_CODE (t1) != TREE_CODE (t2)
1568 && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1569 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1570
1571 code = TREE_CODE (t1);
1572 switch (code)
1573 {
1574 case INTEGER_CST:
1575 return tree_int_cst_compare (t1, t2);
1576
1577 case STRING_CST:
1578 if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1579 return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1580 return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1581 TREE_STRING_LENGTH (t1));
1582
1583 case SSA_NAME:
1584 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1585 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1586 break;
1587
1588 default:
1589 if (POLY_INT_CST_P (t1))
1590 return compare_sizes_for_sort (a: wi::to_poly_widest (t: t1),
1591 b: wi::to_poly_widest (t: t2));
1592
1593 tclass = TREE_CODE_CLASS (code);
1594
1595 /* For decls, compare their UIDs. */
1596 if (tclass == tcc_declaration)
1597 {
1598 if (DECL_UID (t1) != DECL_UID (t2))
1599 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1600 break;
1601 }
1602 /* For expressions, compare their operands recursively. */
1603 else if (IS_EXPR_CODE_CLASS (tclass))
1604 {
1605 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1606 {
1607 cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1608 TREE_OPERAND (t2, i));
1609 if (cmp != 0)
1610 return cmp;
1611 }
1612 }
1613 else
1614 gcc_unreachable ();
1615 }
1616
1617 return 0;
1618}
1619
1620/* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1621 check. */
1622
1623opt_result
1624runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1625{
1626 if (dump_enabled_p ())
1627 dump_printf (MSG_NOTE,
1628 "consider run-time aliasing test between %T and %T\n",
1629 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1630
1631 if (!speed_p)
1632 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1633 fmt: "runtime alias check not supported when"
1634 " optimizing for size.\n");
1635
1636 /* FORNOW: We don't support versioning with outer-loop in either
1637 vectorization or loop distribution. */
1638 if (loop != NULL && loop->inner != NULL)
1639 return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1640 fmt: "runtime alias check not supported for"
1641 " outer loop.\n");
1642
1643 return opt_result::success ();
1644}
1645
1646/* Operator == between two dr_with_seg_len objects.
1647
1648 This equality operator is used to make sure two data refs
1649 are the same one so that we will consider to combine the
1650 aliasing checks of those two pairs of data dependent data
1651 refs. */
1652
1653static bool
1654operator == (const dr_with_seg_len& d1,
1655 const dr_with_seg_len& d2)
1656{
1657 return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1658 DR_BASE_ADDRESS (d2.dr), flags: 0)
1659 && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1660 && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1661 && data_ref_compare_tree (t1: d1.seg_len, t2: d2.seg_len) == 0
1662 && known_eq (d1.access_size, d2.access_size)
1663 && d1.align == d2.align);
1664}
1665
1666/* Comparison function for sorting objects of dr_with_seg_len_pair_t
1667 so that we can combine aliasing checks in one scan. */
1668
1669static int
1670comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1671{
1672 const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1673 const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1674 const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1675 const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1676
1677 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1678 if a and c have the same basic address snd step, and b and d have the same
1679 address and step. Therefore, if any a&c or b&d don't have the same address
1680 and step, we don't care the order of those two pairs after sorting. */
1681 int comp_res;
1682
1683 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1684 DR_BASE_ADDRESS (b1.dr))) != 0)
1685 return comp_res;
1686 if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1687 DR_BASE_ADDRESS (b2.dr))) != 0)
1688 return comp_res;
1689 if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1690 DR_STEP (b1.dr))) != 0)
1691 return comp_res;
1692 if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1693 DR_STEP (b2.dr))) != 0)
1694 return comp_res;
1695 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1696 DR_OFFSET (b1.dr))) != 0)
1697 return comp_res;
1698 if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1699 DR_INIT (b1.dr))) != 0)
1700 return comp_res;
1701 if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1702 DR_OFFSET (b2.dr))) != 0)
1703 return comp_res;
1704 if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1705 DR_INIT (b2.dr))) != 0)
1706 return comp_res;
1707
1708 return 0;
1709}
1710
1711/* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1712
1713static void
1714dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1715{
1716 dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
1717 DR_REF (alias_pair->first.dr),
1718 DR_REF (alias_pair->second.dr));
1719
1720 dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1721 alias_pair->first.seg_len);
1722 if (!operand_equal_p (alias_pair->first.seg_len,
1723 alias_pair->second.seg_len, flags: 0))
1724 dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1725
1726 dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
1727 dump_dec (MSG_NOTE, alias_pair->first.access_size);
1728 if (maybe_ne (a: alias_pair->first.access_size, b: alias_pair->second.access_size))
1729 {
1730 dump_printf (MSG_NOTE, " vs. ");
1731 dump_dec (MSG_NOTE, alias_pair->second.access_size);
1732 }
1733
1734 dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
1735 alias_pair->first.align);
1736 if (alias_pair->first.align != alias_pair->second.align)
1737 dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1738
1739 dump_printf (MSG_NOTE, "\n%sflags: ", indent);
1740 if (alias_pair->flags & DR_ALIAS_RAW)
1741 dump_printf (MSG_NOTE, " RAW");
1742 if (alias_pair->flags & DR_ALIAS_WAR)
1743 dump_printf (MSG_NOTE, " WAR");
1744 if (alias_pair->flags & DR_ALIAS_WAW)
1745 dump_printf (MSG_NOTE, " WAW");
1746 if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1747 dump_printf (MSG_NOTE, " ARBITRARY");
1748 if (alias_pair->flags & DR_ALIAS_SWAPPED)
1749 dump_printf (MSG_NOTE, " SWAPPED");
1750 if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1751 dump_printf (MSG_NOTE, " UNSWAPPED");
1752 if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1753 dump_printf (MSG_NOTE, " MIXED_STEPS");
1754 if (alias_pair->flags == 0)
1755 dump_printf (MSG_NOTE, " <none>");
1756 dump_printf (MSG_NOTE, "\n");
1757}
1758
1759/* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1760 FACTOR is number of iterations that each data reference is accessed.
1761
1762 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1763 we create an expression:
1764
1765 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1766 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1767
1768 for aliasing checks. However, in some cases we can decrease the number
1769 of checks by combining two checks into one. For example, suppose we have
1770 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1771 condition is satisfied:
1772
1773 load_ptr_0 < load_ptr_1 &&
1774 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1775
1776 (this condition means, in each iteration of vectorized loop, the accessed
1777 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1778 load_ptr_1.)
1779
1780 we then can use only the following expression to finish the alising checks
1781 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1782
1783 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1785
1786 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1787 basic address. */
1788
1789void
1790prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1791 poly_uint64)
1792{
1793 if (alias_pairs->is_empty ())
1794 return;
1795
1796 /* Canonicalize each pair so that the base components are ordered wrt
1797 data_ref_compare_tree. This allows the loop below to merge more
1798 cases. */
1799 unsigned int i;
1800 dr_with_seg_len_pair_t *alias_pair;
1801 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1802 {
1803 data_reference_p dr_a = alias_pair->first.dr;
1804 data_reference_p dr_b = alias_pair->second.dr;
1805 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1806 DR_BASE_ADDRESS (dr_b));
1807 if (comp_res == 0)
1808 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1809 if (comp_res == 0)
1810 comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1811 if (comp_res > 0)
1812 {
1813 std::swap (a&: alias_pair->first, b&: alias_pair->second);
1814 alias_pair->flags |= DR_ALIAS_SWAPPED;
1815 }
1816 else
1817 alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1818 }
1819
1820 /* Sort the collected data ref pairs so that we can scan them once to
1821 combine all possible aliasing checks. */
1822 alias_pairs->qsort (comp_dr_with_seg_len_pair);
1823
1824 /* Scan the sorted dr pairs and check if we can combine alias checks
1825 of two neighboring dr pairs. */
1826 unsigned int last = 0;
1827 for (i = 1; i < alias_pairs->length (); ++i)
1828 {
1829 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1830 dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1831 dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1832
1833 dr_with_seg_len *dr_a1 = &alias_pair1->first;
1834 dr_with_seg_len *dr_b1 = &alias_pair1->second;
1835 dr_with_seg_len *dr_a2 = &alias_pair2->first;
1836 dr_with_seg_len *dr_b2 = &alias_pair2->second;
1837
1838 /* Remove duplicate data ref pairs. */
1839 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1840 {
1841 if (dump_enabled_p ())
1842 dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1843 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1844 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1845 alias_pair1->flags |= alias_pair2->flags;
1846 continue;
1847 }
1848
1849 /* Assume that we won't be able to merge the pairs, then correct
1850 if we do. */
1851 last += 1;
1852 if (last != i)
1853 (*alias_pairs)[last] = (*alias_pairs)[i];
1854
1855 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1856 {
1857 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1858 and DR_A1 and DR_A2 are two consecutive memrefs. */
1859 if (*dr_a1 == *dr_a2)
1860 {
1861 std::swap (a&: dr_a1, b&: dr_b1);
1862 std::swap (a&: dr_a2, b&: dr_b2);
1863 }
1864
1865 poly_int64 init_a1, init_a2;
1866 /* Only consider cases in which the distance between the initial
1867 DR_A1 and the initial DR_A2 is known at compile time. */
1868 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1869 DR_BASE_ADDRESS (dr_a2->dr), flags: 0)
1870 || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1871 DR_OFFSET (dr_a2->dr), flags: 0)
1872 || !poly_int_tree_p (DR_INIT (dr_a1->dr), value: &init_a1)
1873 || !poly_int_tree_p (DR_INIT (dr_a2->dr), value: &init_a2))
1874 continue;
1875
1876 /* Don't combine if we can't tell which one comes first. */
1877 if (!ordered_p (a: init_a1, b: init_a2))
1878 continue;
1879
1880 /* Work out what the segment length would be if we did combine
1881 DR_A1 and DR_A2:
1882
1883 - If DR_A1 and DR_A2 have equal lengths, that length is
1884 also the combined length.
1885
1886 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1887 length is the lower bound on those lengths.
1888
1889 - If DR_A1 and DR_A2 both have positive lengths, the combined
1890 length is the upper bound on those lengths.
1891
1892 Other cases are unlikely to give a useful combination.
1893
1894 The lengths both have sizetype, so the sign is taken from
1895 the step instead. */
1896 poly_uint64 new_seg_len = 0;
1897 bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1898 dr_a2->seg_len, flags: 0);
1899 if (new_seg_len_p)
1900 {
1901 poly_uint64 seg_len_a1, seg_len_a2;
1902 if (!poly_int_tree_p (t: dr_a1->seg_len, value: &seg_len_a1)
1903 || !poly_int_tree_p (t: dr_a2->seg_len, value: &seg_len_a2))
1904 continue;
1905
1906 tree indicator_a = dr_direction_indicator (dr_a1->dr);
1907 if (TREE_CODE (indicator_a) != INTEGER_CST)
1908 continue;
1909
1910 tree indicator_b = dr_direction_indicator (dr_a2->dr);
1911 if (TREE_CODE (indicator_b) != INTEGER_CST)
1912 continue;
1913
1914 int sign_a = tree_int_cst_sgn (indicator_a);
1915 int sign_b = tree_int_cst_sgn (indicator_b);
1916
1917 if (sign_a <= 0 && sign_b <= 0)
1918 new_seg_len = lower_bound (a: seg_len_a1, b: seg_len_a2);
1919 else if (sign_a >= 0 && sign_b >= 0)
1920 new_seg_len = upper_bound (a: seg_len_a1, b: seg_len_a2);
1921 else
1922 continue;
1923 }
1924 /* At this point we're committed to merging the refs. */
1925
1926 /* Make sure dr_a1 starts left of dr_a2. */
1927 if (maybe_gt (init_a1, init_a2))
1928 {
1929 std::swap (a&: *dr_a1, b&: *dr_a2);
1930 std::swap (a&: init_a1, b&: init_a2);
1931 }
1932
1933 /* The DR_Bs are equal, so only the DR_As can introduce
1934 mixed steps. */
1935 if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), flags: 0))
1936 alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1937
1938 if (new_seg_len_p)
1939 {
1940 dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1941 new_seg_len);
1942 dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1943 }
1944
1945 /* This is always positive due to the swap above. */
1946 poly_uint64 diff = init_a2 - init_a1;
1947
1948 /* The new check will start at DR_A1. Make sure that its access
1949 size encompasses the initial DR_A2. */
1950 if (maybe_lt (a: dr_a1->access_size, b: diff + dr_a2->access_size))
1951 {
1952 dr_a1->access_size = upper_bound (a: dr_a1->access_size,
1953 b: diff + dr_a2->access_size);
1954 unsigned int new_align = known_alignment (a: dr_a1->access_size);
1955 dr_a1->align = MIN (dr_a1->align, new_align);
1956 }
1957 if (dump_enabled_p ())
1958 dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1959 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1960 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1961 alias_pair1->flags |= alias_pair2->flags;
1962 last -= 1;
1963 }
1964 }
1965 alias_pairs->truncate (size: last + 1);
1966
1967 /* Try to restore the original dr_with_seg_len order within each
1968 dr_with_seg_len_pair_t. If we ended up combining swapped and
1969 unswapped pairs into the same check, we have to invalidate any
1970 RAW, WAR and WAW information for it. */
1971 if (dump_enabled_p ())
1972 dump_printf (MSG_NOTE, "merged alias checks:\n");
1973 FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1974 {
1975 unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1976 unsigned int swapped = (alias_pair->flags & swap_mask);
1977 if (swapped == DR_ALIAS_SWAPPED)
1978 std::swap (a&: alias_pair->first, b&: alias_pair->second);
1979 else if (swapped != DR_ALIAS_UNSWAPPED)
1980 alias_pair->flags |= DR_ALIAS_ARBITRARY;
1981 alias_pair->flags &= ~swap_mask;
1982 if (dump_enabled_p ())
1983 dump_alias_pair (alias_pair, indent: " ");
1984 }
1985}
1986
1987/* A subroutine of create_intersect_range_checks, with a subset of the
1988 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1989 to optimize cases in which the references form a simple RAW, WAR or
1990 WAR dependence. */
1991
1992static bool
1993create_ifn_alias_checks (tree *cond_expr,
1994 const dr_with_seg_len_pair_t &alias_pair)
1995{
1996 const dr_with_seg_len& dr_a = alias_pair.first;
1997 const dr_with_seg_len& dr_b = alias_pair.second;
1998
1999 /* Check for cases in which:
2000
2001 (a) we have a known RAW, WAR or WAR dependence
2002 (b) the accesses are well-ordered in both the original and new code
2003 (see the comment above the DR_ALIAS_* flags for details); and
2004 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2005 if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2006 return false;
2007
2008 /* Make sure that both DRs access the same pattern of bytes,
2009 with a constant length and step. */
2010 poly_uint64 seg_len;
2011 if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, flags: 0)
2012 || !poly_int_tree_p (t: dr_a.seg_len, value: &seg_len)
2013 || maybe_ne (a: dr_a.access_size, b: dr_b.access_size)
2014 || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0)
2015 || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2016 return false;
2017
2018 unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2019 tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2020 tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2021
2022 /* See whether the target suports what we want to do. WAW checks are
2023 equivalent to WAR checks here. */
2024 internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2025 ? IFN_CHECK_RAW_PTRS
2026 : IFN_CHECK_WAR_PTRS);
2027 unsigned int align = MIN (dr_a.align, dr_b.align);
2028 poly_uint64 full_length = seg_len + bytes;
2029 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2030 full_length, align))
2031 {
2032 full_length = seg_len + dr_a.access_size;
2033 if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2034 full_length, align))
2035 return false;
2036 }
2037
2038 /* Commit to using this form of test. */
2039 addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2040 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2041
2042 addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2043 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2044
2045 *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2046 ifn, boolean_type_node,
2047 4, addr_a, addr_b,
2048 size_int (full_length),
2049 size_int (align));
2050
2051 if (dump_enabled_p ())
2052 {
2053 if (ifn == IFN_CHECK_RAW_PTRS)
2054 dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2055 else
2056 dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2057 }
2058 return true;
2059}
2060
2061/* Try to generate a runtime condition that is true if ALIAS_PAIR is
2062 free of aliases, using a condition based on index values instead
2063 of a condition based on addresses. Return true on success,
2064 storing the condition in *COND_EXPR.
2065
2066 This can only be done if the two data references in ALIAS_PAIR access
2067 the same array object and the index is the only difference. For example,
2068 if the two data references are DR_A and DR_B:
2069
2070 DR_A DR_B
2071 data-ref arr[i] arr[j]
2072 base_object arr arr
2073 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2074
2075 The addresses and their index are like:
2076
2077 |<- ADDR_A ->| |<- ADDR_B ->|
2078 ------------------------------------------------------->
2079 | | | | | | | | | |
2080 ------------------------------------------------------->
2081 i_0 ... i_0+4 j_0 ... j_0+4
2082
2083 We can create expression based on index rather than address:
2084
2085 (unsigned) (i_0 - j_0 + 3) <= 6
2086
2087 i.e. the indices are less than 4 apart.
2088
2089 Note evolution step of index needs to be considered in comparison. */
2090
2091static bool
2092create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2093 const dr_with_seg_len_pair_t &alias_pair)
2094{
2095 const dr_with_seg_len &dr_a = alias_pair.first;
2096 const dr_with_seg_len &dr_b = alias_pair.second;
2097 if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2098 || integer_zerop (DR_STEP (dr_a.dr))
2099 || integer_zerop (DR_STEP (dr_b.dr))
2100 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2101 return false;
2102
2103 poly_uint64 seg_len1, seg_len2;
2104 if (!poly_int_tree_p (t: dr_a.seg_len, value: &seg_len1)
2105 || !poly_int_tree_p (t: dr_b.seg_len, value: &seg_len2))
2106 return false;
2107
2108 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2109 return false;
2110
2111 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), flags: 0))
2112 return false;
2113
2114 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0))
2115 return false;
2116
2117 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2118
2119 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2120 unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2121 if (neg_step)
2122 {
2123 abs_step = -abs_step;
2124 seg_len1 = (-wi::to_poly_wide (t: dr_a.seg_len)).force_uhwi ();
2125 seg_len2 = (-wi::to_poly_wide (t: dr_b.seg_len)).force_uhwi ();
2126 }
2127
2128 /* Infer the number of iterations with which the memory segment is accessed
2129 by DR. In other words, alias is checked if memory segment accessed by
2130 DR_A in some iterations intersect with memory segment accessed by DR_B
2131 in the same amount iterations.
2132 Note segnment length is a linear function of number of iterations with
2133 DR_STEP as the coefficient. */
2134 poly_uint64 niter_len1, niter_len2;
2135 if (!can_div_trunc_p (a: seg_len1 + abs_step - 1, b: abs_step, quotient: &niter_len1)
2136 || !can_div_trunc_p (a: seg_len2 + abs_step - 1, b: abs_step, quotient: &niter_len2))
2137 return false;
2138
2139 /* Divide each access size by the byte step, rounding up. */
2140 poly_uint64 niter_access1, niter_access2;
2141 if (!can_div_trunc_p (a: dr_a.access_size + abs_step - 1,
2142 b: abs_step, quotient: &niter_access1)
2143 || !can_div_trunc_p (a: dr_b.access_size + abs_step - 1,
2144 b: abs_step, quotient: &niter_access2))
2145 return false;
2146
2147 bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2148
2149 int found = -1;
2150 for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2151 {
2152 tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2153 tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2154 /* Two indices must be the same if they are not scev, or not scev wrto
2155 current loop being vecorized. */
2156 if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2157 || TREE_CODE (access2) != POLYNOMIAL_CHREC
2158 || CHREC_VARIABLE (access1) != (unsigned)loop->num
2159 || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2160 {
2161 if (operand_equal_p (access1, access2, flags: 0))
2162 continue;
2163
2164 return false;
2165 }
2166 if (found >= 0)
2167 return false;
2168 found = i;
2169 }
2170
2171 /* Ought not to happen in practice, since if all accesses are equal then the
2172 alias should be decidable at compile time. */
2173 if (found < 0)
2174 return false;
2175
2176 /* The two indices must have the same step. */
2177 tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2178 tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2179 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), flags: 0))
2180 return false;
2181
2182 tree idx_step = CHREC_RIGHT (access1);
2183 /* Index must have const step, otherwise DR_STEP won't be constant. */
2184 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2185 /* Index must evaluate in the same direction as DR. */
2186 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2187
2188 tree min1 = CHREC_LEFT (access1);
2189 tree min2 = CHREC_LEFT (access2);
2190 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2191 return false;
2192
2193 /* Ideally, alias can be checked against loop's control IV, but we
2194 need to prove linear mapping between control IV and reference
2195 index. Although that should be true, we check against (array)
2196 index of data reference. Like segment length, index length is
2197 linear function of the number of iterations with index_step as
2198 the coefficient, i.e, niter_len * idx_step. */
2199 offset_int abs_idx_step = offset_int::from (x: wi::to_wide (t: idx_step),
2200 sgn: SIGNED);
2201 if (neg_step)
2202 abs_idx_step = -abs_idx_step;
2203 poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2204 poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2205 poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2206 poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2207
2208 gcc_assert (known_ge (idx_len1, 0)
2209 && known_ge (idx_len2, 0)
2210 && known_ge (idx_access1, 0)
2211 && known_ge (idx_access2, 0));
2212
2213 /* Each access has the following pattern, with lengths measured
2214 in units of INDEX:
2215
2216 <-- idx_len -->
2217 <--- A: -ve step --->
2218 +-----+-------+-----+-------+-----+
2219 | n-1 | ..... | 0 | ..... | n-1 |
2220 +-----+-------+-----+-------+-----+
2221 <--- B: +ve step --->
2222 <-- idx_len -->
2223 |
2224 min
2225
2226 where "n" is the number of scalar iterations covered by the segment
2227 and where each access spans idx_access units.
2228
2229 A is the range of bytes accessed when the step is negative,
2230 B is the range when the step is positive.
2231
2232 When checking for general overlap, we need to test whether
2233 the range:
2234
2235 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2236
2237 overlaps:
2238
2239 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2240
2241 where:
2242
2243 low_offsetN = +ve step ? 0 : -idx_lenN;
2244 high_offsetN = +ve step ? idx_lenN : 0;
2245
2246 This is equivalent to testing whether:
2247
2248 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2249 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2250
2251 Converting this into a single test, there is an overlap if:
2252
2253 0 <= min2 - min1 + bias <= limit
2254
2255 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2256 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2257 + (high_offset2 - low_offset2 + idx_access2 - 1)
2258 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2259
2260 Combining the tests requires limit to be computable in an unsigned
2261 form of the index type; if it isn't, we fall back to the usual
2262 pointer-based checks.
2263
2264 We can do better if DR_B is a write and if DR_A and DR_B are
2265 well-ordered in both the original and the new code (see the
2266 comment above the DR_ALIAS_* flags for details). In this case
2267 we know that for each i in [0, n-1], the write performed by
2268 access i of DR_B occurs after access numbers j<=i of DR_A in
2269 both the original and the new code. Any write or anti
2270 dependencies wrt those DR_A accesses are therefore maintained.
2271
2272 We just need to make sure that each individual write in DR_B does not
2273 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2274 after the DR_B access in the original code but happen before it in
2275 the new code.
2276
2277 We know the steps for both accesses are equal, so by induction, we
2278 just need to test whether the first write of DR_B overlaps a later
2279 access of DR_A. In other words, we need to move min1 along by
2280 one iteration:
2281
2282 min1' = min1 + idx_step
2283
2284 and use the ranges:
2285
2286 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2287
2288 and:
2289
2290 [min2, min2 + idx_access2 - 1]
2291
2292 where:
2293
2294 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2295 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2296 if (waw_or_war_p)
2297 idx_len1 -= abs_idx_step;
2298
2299 poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2300 if (!waw_or_war_p)
2301 limit += idx_len2;
2302
2303 tree utype = unsigned_type_for (TREE_TYPE (min1));
2304 if (!wi::fits_to_tree_p (x: limit, type: utype))
2305 return false;
2306
2307 poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2308 poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2309 poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2310 /* Equivalent to adding IDX_STEP to MIN1. */
2311 if (waw_or_war_p)
2312 bias -= wi::to_offset (t: idx_step);
2313
2314 tree subject = fold_build2 (MINUS_EXPR, utype,
2315 fold_convert (utype, min2),
2316 fold_convert (utype, min1));
2317 subject = fold_build2 (PLUS_EXPR, utype, subject,
2318 wide_int_to_tree (utype, bias));
2319 tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2320 wide_int_to_tree (utype, limit));
2321 if (*cond_expr)
2322 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2323 *cond_expr, part_cond_expr);
2324 else
2325 *cond_expr = part_cond_expr;
2326 if (dump_enabled_p ())
2327 {
2328 if (waw_or_war_p)
2329 dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2330 else
2331 dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2332 }
2333 return true;
2334}
2335
2336/* A subroutine of create_intersect_range_checks, with a subset of the
2337 same arguments. Try to optimize cases in which the second access
2338 is a write and in which some overlap is valid. */
2339
2340static bool
2341create_waw_or_war_checks (tree *cond_expr,
2342 const dr_with_seg_len_pair_t &alias_pair)
2343{
2344 const dr_with_seg_len& dr_a = alias_pair.first;
2345 const dr_with_seg_len& dr_b = alias_pair.second;
2346
2347 /* Check for cases in which:
2348
2349 (a) DR_B is always a write;
2350 (b) the accesses are well-ordered in both the original and new code
2351 (see the comment above the DR_ALIAS_* flags for details); and
2352 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2353 if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2354 return false;
2355
2356 /* Check for equal (but possibly variable) steps. */
2357 tree step = DR_STEP (dr_a.dr);
2358 if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2359 return false;
2360
2361 /* Make sure that we can operate on sizetype without loss of precision. */
2362 tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2363 if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2364 return false;
2365
2366 /* All addresses involved are known to have a common alignment ALIGN.
2367 We can therefore subtract ALIGN from an exclusive endpoint to get
2368 an inclusive endpoint. In the best (and common) case, ALIGN is the
2369 same as the access sizes of both DRs, and so subtracting ALIGN
2370 cancels out the addition of an access size. */
2371 unsigned int align = MIN (dr_a.align, dr_b.align);
2372 poly_uint64 last_chunk_a = dr_a.access_size - align;
2373 poly_uint64 last_chunk_b = dr_b.access_size - align;
2374
2375 /* Get a boolean expression that is true when the step is negative. */
2376 tree indicator = dr_direction_indicator (dr_a.dr);
2377 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2378 fold_convert (ssizetype, indicator),
2379 ssize_int (0));
2380
2381 /* Get lengths in sizetype. */
2382 tree seg_len_a
2383 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2384 step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2385
2386 /* Each access has the following pattern:
2387
2388 <- |seg_len| ->
2389 <--- A: -ve step --->
2390 +-----+-------+-----+-------+-----+
2391 | n-1 | ..... | 0 | ..... | n-1 |
2392 +-----+-------+-----+-------+-----+
2393 <--- B: +ve step --->
2394 <- |seg_len| ->
2395 |
2396 base address
2397
2398 where "n" is the number of scalar iterations covered by the segment.
2399
2400 A is the range of bytes accessed when the step is negative,
2401 B is the range when the step is positive.
2402
2403 We know that DR_B is a write. We also know (from checking that
2404 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2405 the write performed by access i of DR_B occurs after access numbers
2406 j<=i of DR_A in both the original and the new code. Any write or
2407 anti dependencies wrt those DR_A accesses are therefore maintained.
2408
2409 We just need to make sure that each individual write in DR_B does not
2410 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2411 after the DR_B access in the original code but happen before it in
2412 the new code.
2413
2414 We know the steps for both accesses are equal, so by induction, we
2415 just need to test whether the first write of DR_B overlaps a later
2416 access of DR_A. In other words, we need to move addr_a along by
2417 one iteration:
2418
2419 addr_a' = addr_a + step
2420
2421 and check whether:
2422
2423 [addr_b, addr_b + last_chunk_b]
2424
2425 overlaps:
2426
2427 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2428
2429 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2430
2431 low_offset_a = +ve step ? 0 : seg_len_a - step
2432 high_offset_a = +ve step ? seg_len_a - step : 0
2433
2434 This is equivalent to testing whether:
2435
2436 addr_a' + low_offset_a <= addr_b + last_chunk_b
2437 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2438
2439 Converting this into a single test, there is an overlap if:
2440
2441 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2442
2443 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2444
2445 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2446 less than the size of the object underlying DR_A. We also know
2447 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2448 guaranteed at compile time. There can therefore be no overflow if
2449 "limit" is calculated in an unsigned type with pointer precision. */
2450 tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2451 DR_OFFSET (dr_a.dr));
2452 addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2453
2454 tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2455 DR_OFFSET (dr_b.dr));
2456 addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2457
2458 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2459 addr_a = fold_build_pointer_plus (addr_a, step);
2460 tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2461 seg_len_a, step);
2462 if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2463 seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2464
2465 tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2466 seg_len_a_minus_step, size_zero_node);
2467 if (!CONSTANT_CLASS_P (low_offset_a))
2468 low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2469
2470 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2471 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2472 tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2473 low_offset_a);
2474
2475 /* The amount added to addr_b - addr_a'. */
2476 tree bias = fold_build2 (MINUS_EXPR, sizetype,
2477 size_int (last_chunk_b), low_offset_a);
2478
2479 tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2480 limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2481 size_int (last_chunk_a + last_chunk_b));
2482
2483 tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2484 subject = fold_build2 (PLUS_EXPR, sizetype,
2485 fold_convert (sizetype, subject), bias);
2486
2487 *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2488 if (dump_enabled_p ())
2489 dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2490 return true;
2491}
2492
2493/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2494 every address ADDR accessed by D:
2495
2496 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2497
2498 In this case, every element accessed by D is aligned to at least
2499 ALIGN bytes.
2500
2501 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2502
2503 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2504
2505static void
2506get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2507 tree *seg_max_out, HOST_WIDE_INT align)
2508{
2509 /* Each access has the following pattern:
2510
2511 <- |seg_len| ->
2512 <--- A: -ve step --->
2513 +-----+-------+-----+-------+-----+
2514 | n-1 | ,.... | 0 | ..... | n-1 |
2515 +-----+-------+-----+-------+-----+
2516 <--- B: +ve step --->
2517 <- |seg_len| ->
2518 |
2519 base address
2520
2521 where "n" is the number of scalar iterations covered by the segment.
2522 (This should be VF for a particular pair if we know that both steps
2523 are the same, otherwise it will be the full number of scalar loop
2524 iterations.)
2525
2526 A is the range of bytes accessed when the step is negative,
2527 B is the range when the step is positive.
2528
2529 If the access size is "access_size" bytes, the lowest addressed byte is:
2530
2531 base + (step < 0 ? seg_len : 0) [LB]
2532
2533 and the highest addressed byte is always below:
2534
2535 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2536
2537 Thus:
2538
2539 LB <= ADDR < UB
2540
2541 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2542 bytes, so:
2543
2544 LB <= ADDR <= UB - ALIGN
2545
2546 where "- ALIGN" folds naturally with the "+ access_size" and often
2547 cancels it out.
2548
2549 We don't try to simplify LB and UB beyond this (e.g. by using
2550 MIN and MAX based on whether seg_len rather than the stride is
2551 negative) because it is possible for the absolute size of the
2552 segment to overflow the range of a ssize_t.
2553
2554 Keeping the pointer_plus outside of the cond_expr should allow
2555 the cond_exprs to be shared with other alias checks. */
2556 tree indicator = dr_direction_indicator (d.dr);
2557 tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2558 fold_convert (ssizetype, indicator),
2559 ssize_int (0));
2560 tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2561 DR_OFFSET (d.dr));
2562 addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2563 tree seg_len
2564 = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2565
2566 tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2567 seg_len, size_zero_node);
2568 tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2569 size_zero_node, seg_len);
2570 max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2571 size_int (d.access_size - align));
2572
2573 *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2574 *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2575}
2576
2577/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2578 storing the condition in *COND_EXPR. The fallback is to generate a
2579 a test that the two accesses do not overlap:
2580
2581 end_a <= start_b || end_b <= start_a. */
2582
2583static void
2584create_intersect_range_checks (class loop *loop, tree *cond_expr,
2585 const dr_with_seg_len_pair_t &alias_pair)
2586{
2587 const dr_with_seg_len& dr_a = alias_pair.first;
2588 const dr_with_seg_len& dr_b = alias_pair.second;
2589 *cond_expr = NULL_TREE;
2590 if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2591 return;
2592
2593 if (create_ifn_alias_checks (cond_expr, alias_pair))
2594 return;
2595
2596 if (create_waw_or_war_checks (cond_expr, alias_pair))
2597 return;
2598
2599 unsigned HOST_WIDE_INT min_align;
2600 tree_code cmp_code;
2601 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2602 are equivalent. This is just an optimization heuristic. */
2603 if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2604 && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2605 {
2606 /* In this case adding access_size to seg_len is likely to give
2607 a simple X * step, where X is either the number of scalar
2608 iterations or the vectorization factor. We're better off
2609 keeping that, rather than subtracting an alignment from it.
2610
2611 In this case the maximum values are exclusive and so there is
2612 no alias if the maximum of one segment equals the minimum
2613 of another. */
2614 min_align = 0;
2615 cmp_code = LE_EXPR;
2616 }
2617 else
2618 {
2619 /* Calculate the minimum alignment shared by all four pointers,
2620 then arrange for this alignment to be subtracted from the
2621 exclusive maximum values to get inclusive maximum values.
2622 This "- min_align" is cumulative with a "+ access_size"
2623 in the calculation of the maximum values. In the best
2624 (and common) case, the two cancel each other out, leaving
2625 us with an inclusive bound based only on seg_len. In the
2626 worst case we're simply adding a smaller number than before.
2627
2628 Because the maximum values are inclusive, there is an alias
2629 if the maximum value of one segment is equal to the minimum
2630 value of the other. */
2631 min_align = MIN (dr_a.align, dr_b.align);
2632 cmp_code = LT_EXPR;
2633 }
2634
2635 tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2636 get_segment_min_max (d: dr_a, seg_min_out: &seg_a_min, seg_max_out: &seg_a_max, align: min_align);
2637 get_segment_min_max (d: dr_b, seg_min_out: &seg_b_min, seg_max_out: &seg_b_max, align: min_align);
2638
2639 *cond_expr
2640 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2641 fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2642 fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2643 if (dump_enabled_p ())
2644 dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2645}
2646
2647/* Create a conditional expression that represents the run-time checks for
2648 overlapping of address ranges represented by a list of data references
2649 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2650 COND_EXPR is the conditional expression to be used in the if statement
2651 that controls which version of the loop gets executed at runtime. */
2652
2653void
2654create_runtime_alias_checks (class loop *loop,
2655 const vec<dr_with_seg_len_pair_t> *alias_pairs,
2656 tree * cond_expr)
2657{
2658 tree part_cond_expr;
2659
2660 fold_defer_overflow_warnings ();
2661 for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2662 {
2663 gcc_assert (alias_pair.flags);
2664 if (dump_enabled_p ())
2665 dump_printf (MSG_NOTE,
2666 "create runtime check for data references %T and %T\n",
2667 DR_REF (alias_pair.first.dr),
2668 DR_REF (alias_pair.second.dr));
2669
2670 /* Create condition expression for each pair data references. */
2671 create_intersect_range_checks (loop, cond_expr: &part_cond_expr, alias_pair);
2672 if (*cond_expr)
2673 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2674 *cond_expr, part_cond_expr);
2675 else
2676 *cond_expr = part_cond_expr;
2677 }
2678 fold_undefer_and_ignore_overflow_warnings ();
2679}
2680
2681/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2682 expressions. */
2683static bool
2684dr_equal_offsets_p1 (tree offset1, tree offset2)
2685{
2686 bool res;
2687
2688 STRIP_NOPS (offset1);
2689 STRIP_NOPS (offset2);
2690
2691 if (offset1 == offset2)
2692 return true;
2693
2694 if (TREE_CODE (offset1) != TREE_CODE (offset2)
2695 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2696 return false;
2697
2698 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2699 TREE_OPERAND (offset2, 0));
2700
2701 if (!res || !BINARY_CLASS_P (offset1))
2702 return res;
2703
2704 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2705 TREE_OPERAND (offset2, 1));
2706
2707 return res;
2708}
2709
2710/* Check if DRA and DRB have equal offsets. */
2711bool
2712dr_equal_offsets_p (struct data_reference *dra,
2713 struct data_reference *drb)
2714{
2715 tree offset1, offset2;
2716
2717 offset1 = DR_OFFSET (dra);
2718 offset2 = DR_OFFSET (drb);
2719
2720 return dr_equal_offsets_p1 (offset1, offset2);
2721}
2722
2723/* Returns true if FNA == FNB. */
2724
2725static bool
2726affine_function_equal_p (affine_fn fna, affine_fn fnb)
2727{
2728 unsigned i, n = fna.length ();
2729
2730 if (n != fnb.length ())
2731 return false;
2732
2733 for (i = 0; i < n; i++)
2734 if (!operand_equal_p (fna[i], fnb[i], flags: 0))
2735 return false;
2736
2737 return true;
2738}
2739
2740/* If all the functions in CF are the same, returns one of them,
2741 otherwise returns NULL. */
2742
2743static affine_fn
2744common_affine_function (conflict_function *cf)
2745{
2746 unsigned i;
2747 affine_fn comm;
2748
2749 if (!CF_NONTRIVIAL_P (cf))
2750 return affine_fn ();
2751
2752 comm = cf->fns[0];
2753
2754 for (i = 1; i < cf->n; i++)
2755 if (!affine_function_equal_p (fna: comm, fnb: cf->fns[i]))
2756 return affine_fn ();
2757
2758 return comm;
2759}
2760
2761/* Returns the base of the affine function FN. */
2762
2763static tree
2764affine_function_base (affine_fn fn)
2765{
2766 return fn[0];
2767}
2768
2769/* Returns true if FN is a constant. */
2770
2771static bool
2772affine_function_constant_p (affine_fn fn)
2773{
2774 unsigned i;
2775 tree coef;
2776
2777 for (i = 1; fn.iterate (ix: i, ptr: &coef); i++)
2778 if (!integer_zerop (coef))
2779 return false;
2780
2781 return true;
2782}
2783
2784/* Returns true if FN is the zero constant function. */
2785
2786static bool
2787affine_function_zero_p (affine_fn fn)
2788{
2789 return (integer_zerop (affine_function_base (fn))
2790 && affine_function_constant_p (fn));
2791}
2792
2793/* Returns a signed integer type with the largest precision from TA
2794 and TB. */
2795
2796static tree
2797signed_type_for_types (tree ta, tree tb)
2798{
2799 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2800 return signed_type_for (ta);
2801 else
2802 return signed_type_for (tb);
2803}
2804
2805/* Applies operation OP on affine functions FNA and FNB, and returns the
2806 result. */
2807
2808static affine_fn
2809affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2810{
2811 unsigned i, n, m;
2812 affine_fn ret;
2813 tree coef;
2814
2815 if (fnb.length () > fna.length ())
2816 {
2817 n = fna.length ();
2818 m = fnb.length ();
2819 }
2820 else
2821 {
2822 n = fnb.length ();
2823 m = fna.length ();
2824 }
2825
2826 ret.create (nelems: m);
2827 for (i = 0; i < n; i++)
2828 {
2829 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2830 TREE_TYPE (fnb[i]));
2831 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2832 }
2833
2834 for (; fna.iterate (ix: i, ptr: &coef); i++)
2835 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2836 coef, integer_zero_node));
2837 for (; fnb.iterate (ix: i, ptr: &coef); i++)
2838 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2839 integer_zero_node, coef));
2840
2841 return ret;
2842}
2843
2844/* Returns the sum of affine functions FNA and FNB. */
2845
2846static affine_fn
2847affine_fn_plus (affine_fn fna, affine_fn fnb)
2848{
2849 return affine_fn_op (op: PLUS_EXPR, fna, fnb);
2850}
2851
2852/* Returns the difference of affine functions FNA and FNB. */
2853
2854static affine_fn
2855affine_fn_minus (affine_fn fna, affine_fn fnb)
2856{
2857 return affine_fn_op (op: MINUS_EXPR, fna, fnb);
2858}
2859
2860/* Frees affine function FN. */
2861
2862static void
2863affine_fn_free (affine_fn fn)
2864{
2865 fn.release ();
2866}
2867
2868/* Determine for each subscript in the data dependence relation DDR
2869 the distance. */
2870
2871static void
2872compute_subscript_distance (struct data_dependence_relation *ddr)
2873{
2874 conflict_function *cf_a, *cf_b;
2875 affine_fn fn_a, fn_b, diff;
2876
2877 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2878 {
2879 unsigned int i;
2880
2881 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2882 {
2883 struct subscript *subscript;
2884
2885 subscript = DDR_SUBSCRIPT (ddr, i);
2886 cf_a = SUB_CONFLICTS_IN_A (subscript);
2887 cf_b = SUB_CONFLICTS_IN_B (subscript);
2888
2889 fn_a = common_affine_function (cf: cf_a);
2890 fn_b = common_affine_function (cf: cf_b);
2891 if (!fn_a.exists () || !fn_b.exists ())
2892 {
2893 SUB_DISTANCE (subscript) = chrec_dont_know;
2894 return;
2895 }
2896 diff = affine_fn_minus (fna: fn_a, fnb: fn_b);
2897
2898 if (affine_function_constant_p (fn: diff))
2899 SUB_DISTANCE (subscript) = affine_function_base (fn: diff);
2900 else
2901 SUB_DISTANCE (subscript) = chrec_dont_know;
2902
2903 affine_fn_free (fn: diff);
2904 }
2905 }
2906}
2907
2908/* Returns the conflict function for "unknown". */
2909
2910static conflict_function *
2911conflict_fn_not_known (void)
2912{
2913 conflict_function *fn = XCNEW (conflict_function);
2914 fn->n = NOT_KNOWN;
2915
2916 return fn;
2917}
2918
2919/* Returns the conflict function for "independent". */
2920
2921static conflict_function *
2922conflict_fn_no_dependence (void)
2923{
2924 conflict_function *fn = XCNEW (conflict_function);
2925 fn->n = NO_DEPENDENCE;
2926
2927 return fn;
2928}
2929
2930/* Returns true if the address of OBJ is invariant in LOOP. */
2931
2932static bool
2933object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2934{
2935 while (handled_component_p (t: obj))
2936 {
2937 if (TREE_CODE (obj) == ARRAY_REF)
2938 {
2939 for (int i = 1; i < 4; ++i)
2940 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2941 loop->num))
2942 return false;
2943 }
2944 else if (TREE_CODE (obj) == COMPONENT_REF)
2945 {
2946 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2947 loop->num))
2948 return false;
2949 }
2950 obj = TREE_OPERAND (obj, 0);
2951 }
2952
2953 if (!INDIRECT_REF_P (obj)
2954 && TREE_CODE (obj) != MEM_REF)
2955 return true;
2956
2957 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2958 loop->num);
2959}
2960
2961/* Returns false if we can prove that data references A and B do not alias,
2962 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2963 considered. */
2964
2965bool
2966dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2967 class loop *loop_nest)
2968{
2969 tree addr_a = DR_BASE_OBJECT (a);
2970 tree addr_b = DR_BASE_OBJECT (b);
2971
2972 /* If we are not processing a loop nest but scalar code we
2973 do not need to care about possible cross-iteration dependences
2974 and thus can process the full original reference. Do so,
2975 similar to how loop invariant motion applies extra offset-based
2976 disambiguation. */
2977 if (!loop_nest)
2978 {
2979 tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
2980 tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
2981
2982 if (DR_BASE_ADDRESS (a)
2983 && DR_BASE_ADDRESS (b)
2984 && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
2985 && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
2986 && poly_int_tree_p (t: tree_size_a)
2987 && poly_int_tree_p (t: tree_size_b)
2988 && !ranges_maybe_overlap_p (pos1: wi::to_poly_widest (DR_INIT (a)),
2989 size1: wi::to_poly_widest (t: tree_size_a),
2990 pos2: wi::to_poly_widest (DR_INIT (b)),
2991 size2: wi::to_poly_widest (t: tree_size_b)))
2992 {
2993 gcc_assert (integer_zerop (DR_STEP (a))
2994 && integer_zerop (DR_STEP (b)));
2995 return false;
2996 }
2997
2998 aff_tree off1, off2;
2999 poly_widest_int size1, size2;
3000 get_inner_reference_aff (DR_REF (a), &off1, &size1);
3001 get_inner_reference_aff (DR_REF (b), &off2, &size2);
3002 aff_combination_scale (&off1, -1);
3003 aff_combination_add (&off2, &off1);
3004 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3005 return false;
3006 }
3007
3008 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3009 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3010 /* For cross-iteration dependences the cliques must be valid for the
3011 whole loop, not just individual iterations. */
3012 && (!loop_nest
3013 || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3014 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3015 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3016 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3017 return false;
3018
3019 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3020 do not know the size of the base-object. So we cannot do any
3021 offset/overlap based analysis but have to rely on points-to
3022 information only. */
3023 if (TREE_CODE (addr_a) == MEM_REF
3024 && (DR_UNCONSTRAINED_BASE (a)
3025 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3026 {
3027 /* For true dependences we can apply TBAA. */
3028 if (flag_strict_aliasing
3029 && DR_IS_WRITE (a) && DR_IS_READ (b)
3030 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3031 get_alias_set (DR_REF (b))))
3032 return false;
3033 if (TREE_CODE (addr_b) == MEM_REF)
3034 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3035 TREE_OPERAND (addr_b, 0));
3036 else
3037 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3038 build_fold_addr_expr (addr_b));
3039 }
3040 else if (TREE_CODE (addr_b) == MEM_REF
3041 && (DR_UNCONSTRAINED_BASE (b)
3042 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3043 {
3044 /* For true dependences we can apply TBAA. */
3045 if (flag_strict_aliasing
3046 && DR_IS_WRITE (a) && DR_IS_READ (b)
3047 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3048 get_alias_set (DR_REF (b))))
3049 return false;
3050 if (TREE_CODE (addr_a) == MEM_REF)
3051 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3052 TREE_OPERAND (addr_b, 0));
3053 else
3054 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3055 TREE_OPERAND (addr_b, 0));
3056 }
3057
3058 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3059 that is being subsetted in the loop nest. */
3060 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3061 return refs_output_dependent_p (addr_a, addr_b);
3062 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3063 return refs_anti_dependent_p (addr_a, addr_b);
3064 return refs_may_alias_p (addr_a, addr_b);
3065}
3066
3067/* REF_A and REF_B both satisfy access_fn_component_p. Return true
3068 if it is meaningful to compare their associated access functions
3069 when checking for dependencies. */
3070
3071static bool
3072access_fn_components_comparable_p (tree ref_a, tree ref_b)
3073{
3074 /* Allow pairs of component refs from the following sets:
3075
3076 { REALPART_EXPR, IMAGPART_EXPR }
3077 { COMPONENT_REF }
3078 { ARRAY_REF }. */
3079 tree_code code_a = TREE_CODE (ref_a);
3080 tree_code code_b = TREE_CODE (ref_b);
3081 if (code_a == IMAGPART_EXPR)
3082 code_a = REALPART_EXPR;
3083 if (code_b == IMAGPART_EXPR)
3084 code_b = REALPART_EXPR;
3085 if (code_a != code_b)
3086 return false;
3087
3088 if (TREE_CODE (ref_a) == COMPONENT_REF)
3089 /* ??? We cannot simply use the type of operand #0 of the refs here as
3090 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3091 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3092 return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3093 == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3094
3095 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3096 TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3097}
3098
3099/* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3100 is true when the main indices of A and B were not comparable so we try again
3101 with alternate indices computed on an indirect reference. */
3102
3103struct data_dependence_relation *
3104initialize_data_dependence_relation (struct data_dependence_relation *res,
3105 vec<loop_p> loop_nest,
3106 bool use_alt_indices)
3107{
3108 struct data_reference *a = DDR_A (res);
3109 struct data_reference *b = DDR_B (res);
3110 unsigned int i;
3111
3112 struct indices *indices_a = &a->indices;
3113 struct indices *indices_b = &b->indices;
3114 if (use_alt_indices)
3115 {
3116 if (TREE_CODE (DR_REF (a)) != MEM_REF)
3117 indices_a = &a->alt_indices;
3118 if (TREE_CODE (DR_REF (b)) != MEM_REF)
3119 indices_b = &b->alt_indices;
3120 }
3121 unsigned int num_dimensions_a = indices_a->access_fns.length ();
3122 unsigned int num_dimensions_b = indices_b->access_fns.length ();
3123 if (num_dimensions_a == 0 || num_dimensions_b == 0)
3124 {
3125 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3126 return res;
3127 }
3128
3129 /* For unconstrained bases, the root (highest-indexed) subscript
3130 describes a variation in the base of the original DR_REF rather
3131 than a component access. We have no type that accurately describes
3132 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3133 applying this subscript) so limit the search to the last real
3134 component access.
3135
3136 E.g. for:
3137
3138 void
3139 f (int a[][8], int b[][8])
3140 {
3141 for (int i = 0; i < 8; ++i)
3142 a[i * 2][0] = b[i][0];
3143 }
3144
3145 the a and b accesses have a single ARRAY_REF component reference [0]
3146 but have two subscripts. */
3147 if (indices_a->unconstrained_base)
3148 num_dimensions_a -= 1;
3149 if (indices_b->unconstrained_base)
3150 num_dimensions_b -= 1;
3151
3152 /* These structures describe sequences of component references in
3153 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3154 specific access function. */
3155 struct {
3156 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3157 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3158 indices. In C notation, these are the indices of the rightmost
3159 component references; e.g. for a sequence .b.c.d, the start
3160 index is for .d. */
3161 unsigned int start_a;
3162 unsigned int start_b;
3163
3164 /* The sequence contains LENGTH consecutive access functions from
3165 each DR. */
3166 unsigned int length;
3167
3168 /* The enclosing objects for the A and B sequences respectively,
3169 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3170 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3171 tree object_a;
3172 tree object_b;
3173 } full_seq = {}, struct_seq = {};
3174
3175 /* Before each iteration of the loop:
3176
3177 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3178 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3179 unsigned int index_a = 0;
3180 unsigned int index_b = 0;
3181 tree ref_a = DR_REF (a);
3182 tree ref_b = DR_REF (b);
3183
3184 /* Now walk the component references from the final DR_REFs back up to
3185 the enclosing base objects. Each component reference corresponds
3186 to one access function in the DR, with access function 0 being for
3187 the final DR_REF and the highest-indexed access function being the
3188 one that is applied to the base of the DR.
3189
3190 Look for a sequence of component references whose access functions
3191 are comparable (see access_fn_components_comparable_p). If more
3192 than one such sequence exists, pick the one nearest the base
3193 (which is the leftmost sequence in C notation). Store this sequence
3194 in FULL_SEQ.
3195
3196 For example, if we have:
3197
3198 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3199
3200 A: a[0][i].s.c.d
3201 B: __real b[0][i].s.e[i].f
3202
3203 (where d is the same type as the real component of f) then the access
3204 functions would be:
3205
3206 0 1 2 3
3207 A: .d .c .s [i]
3208
3209 0 1 2 3 4 5
3210 B: __real .f [i] .e .s [i]
3211
3212 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3213 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3214 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3215 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3216 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3217 index foo[10] arrays, so is again comparable. The sequence is
3218 therefore:
3219
3220 A: [1, 3] (i.e. [i].s.c)
3221 B: [3, 5] (i.e. [i].s.e)
3222
3223 Also look for sequences of component references whose access
3224 functions are comparable and whose enclosing objects have the same
3225 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3226 example, STRUCT_SEQ would be:
3227
3228 A: [1, 2] (i.e. s.c)
3229 B: [3, 4] (i.e. s.e) */
3230 while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3231 {
3232 /* The alternate indices form always has a single dimension
3233 with unconstrained base. */
3234 gcc_assert (!use_alt_indices);
3235
3236 /* REF_A and REF_B must be one of the component access types
3237 allowed by dr_analyze_indices. */
3238 gcc_checking_assert (access_fn_component_p (ref_a));
3239 gcc_checking_assert (access_fn_component_p (ref_b));
3240
3241 /* Get the immediately-enclosing objects for REF_A and REF_B,
3242 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3243 and DR_ACCESS_FN (B, INDEX_B). */
3244 tree object_a = TREE_OPERAND (ref_a, 0);
3245 tree object_b = TREE_OPERAND (ref_b, 0);
3246
3247 tree type_a = TREE_TYPE (object_a);
3248 tree type_b = TREE_TYPE (object_b);
3249 if (access_fn_components_comparable_p (ref_a, ref_b))
3250 {
3251 /* This pair of component accesses is comparable for dependence
3252 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3253 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3254 if (full_seq.start_a + full_seq.length != index_a
3255 || full_seq.start_b + full_seq.length != index_b)
3256 {
3257 /* The accesses don't extend the current sequence,
3258 so start a new one here. */
3259 full_seq.start_a = index_a;
3260 full_seq.start_b = index_b;
3261 full_seq.length = 0;
3262 }
3263
3264 /* Add this pair of references to the sequence. */
3265 full_seq.length += 1;
3266 full_seq.object_a = object_a;
3267 full_seq.object_b = object_b;
3268
3269 /* If the enclosing objects are structures (and thus have the
3270 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3271 if (TREE_CODE (type_a) == RECORD_TYPE)
3272 struct_seq = full_seq;
3273
3274 /* Move to the next containing reference for both A and B. */
3275 ref_a = object_a;
3276 ref_b = object_b;
3277 index_a += 1;
3278 index_b += 1;
3279 continue;
3280 }
3281
3282 /* Try to approach equal type sizes. */
3283 if (!COMPLETE_TYPE_P (type_a)
3284 || !COMPLETE_TYPE_P (type_b)
3285 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3286 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3287 break;
3288
3289 unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3290 unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3291 if (size_a <= size_b)
3292 {
3293 index_a += 1;
3294 ref_a = object_a;
3295 }
3296 if (size_b <= size_a)
3297 {
3298 index_b += 1;
3299 ref_b = object_b;
3300 }
3301 }
3302
3303 /* See whether FULL_SEQ ends at the base and whether the two bases
3304 are equal. We do not care about TBAA or alignment info so we can
3305 use OEP_ADDRESS_OF to avoid false negatives. */
3306 tree base_a = indices_a->base_object;
3307 tree base_b = indices_b->base_object;
3308 bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3309 && full_seq.start_b + full_seq.length == num_dimensions_b
3310 && (indices_a->unconstrained_base
3311 == indices_b->unconstrained_base)
3312 && operand_equal_p (base_a, base_b, flags: OEP_ADDRESS_OF)
3313 && (types_compatible_p (TREE_TYPE (base_a),
3314 TREE_TYPE (base_b))
3315 || (!base_supports_access_fn_components_p (base: base_a)
3316 && !base_supports_access_fn_components_p (base: base_b)
3317 && operand_equal_p
3318 (TYPE_SIZE (TREE_TYPE (base_a)),
3319 TYPE_SIZE (TREE_TYPE (base_b)), flags: 0)))
3320 && (!loop_nest.exists ()
3321 || (object_address_invariant_in_loop_p
3322 (loop: loop_nest[0], obj: base_a))));
3323
3324 /* If the bases are the same, we can include the base variation too.
3325 E.g. the b accesses in:
3326
3327 for (int i = 0; i < n; ++i)
3328 b[i + 4][0] = b[i][0];
3329
3330 have a definite dependence distance of 4, while for:
3331
3332 for (int i = 0; i < n; ++i)
3333 a[i + 4][0] = b[i][0];
3334
3335 the dependence distance depends on the gap between a and b.
3336
3337 If the bases are different then we can only rely on the sequence
3338 rooted at a structure access, since arrays are allowed to overlap
3339 arbitrarily and change shape arbitrarily. E.g. we treat this as
3340 valid code:
3341
3342 int a[256];
3343 ...
3344 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3345
3346 where two lvalues with the same int[4][3] type overlap, and where
3347 both lvalues are distinct from the object's declared type. */
3348 if (same_base_p)
3349 {
3350 if (indices_a->unconstrained_base)
3351 full_seq.length += 1;
3352 }
3353 else
3354 full_seq = struct_seq;
3355
3356 /* Punt if we didn't find a suitable sequence. */
3357 if (full_seq.length == 0)
3358 {
3359 if (use_alt_indices
3360 || (TREE_CODE (DR_REF (a)) == MEM_REF
3361 && TREE_CODE (DR_REF (b)) == MEM_REF)
3362 || may_be_nonaddressable_p (DR_REF (a))
3363 || may_be_nonaddressable_p (DR_REF (b)))
3364 {
3365 /* Fully exhausted possibilities. */
3366 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3367 return res;
3368 }
3369
3370 /* Try evaluating both DRs as dereferences of pointers. */
3371 if (!a->alt_indices.base_object
3372 && TREE_CODE (DR_REF (a)) != MEM_REF)
3373 {
3374 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3375 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3376 build_int_cst
3377 (reference_alias_ptr_type (DR_REF (a)), 0));
3378 dr_analyze_indices (dri: &a->alt_indices, ref: alt_ref,
3379 nest: loop_preheader_edge (loop_nest[0]),
3380 loop: loop_containing_stmt (DR_STMT (a)));
3381 }
3382 if (!b->alt_indices.base_object
3383 && TREE_CODE (DR_REF (b)) != MEM_REF)
3384 {
3385 tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3386 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3387 build_int_cst
3388 (reference_alias_ptr_type (DR_REF (b)), 0));
3389 dr_analyze_indices (dri: &b->alt_indices, ref: alt_ref,
3390 nest: loop_preheader_edge (loop_nest[0]),
3391 loop: loop_containing_stmt (DR_STMT (b)));
3392 }
3393 return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: true);
3394 }
3395
3396 if (!same_base_p)
3397 {
3398 /* Partial overlap is possible for different bases when strict aliasing
3399 is not in effect. It's also possible if either base involves a union
3400 access; e.g. for:
3401
3402 struct s1 { int a[2]; };
3403 struct s2 { struct s1 b; int c; };
3404 struct s3 { int d; struct s1 e; };
3405 union u { struct s2 f; struct s3 g; } *p, *q;
3406
3407 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3408 "p->g.e" (base "p->g") and might partially overlap the s1 at
3409 "q->g.e" (base "q->g"). */
3410 if (!flag_strict_aliasing
3411 || ref_contains_union_access_p (ref: full_seq.object_a)
3412 || ref_contains_union_access_p (ref: full_seq.object_b))
3413 {
3414 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3415 return res;
3416 }
3417
3418 DDR_COULD_BE_INDEPENDENT_P (res) = true;
3419 if (!loop_nest.exists ()
3420 || (object_address_invariant_in_loop_p (loop: loop_nest[0],
3421 obj: full_seq.object_a)
3422 && object_address_invariant_in_loop_p (loop: loop_nest[0],
3423 obj: full_seq.object_b)))
3424 {
3425 DDR_OBJECT_A (res) = full_seq.object_a;
3426 DDR_OBJECT_B (res) = full_seq.object_b;
3427 }
3428 }
3429
3430 DDR_AFFINE_P (res) = true;
3431 DDR_ARE_DEPENDENT (res) = NULL_TREE;
3432 DDR_SUBSCRIPTS (res).create (nelems: full_seq.length);
3433 DDR_LOOP_NEST (res) = loop_nest;
3434 DDR_SELF_REFERENCE (res) = false;
3435
3436 for (i = 0; i < full_seq.length; ++i)
3437 {
3438 struct subscript *subscript;
3439
3440 subscript = XNEW (struct subscript);
3441 SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3442 SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3443 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3444 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3445 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3446 SUB_DISTANCE (subscript) = chrec_dont_know;
3447 DDR_SUBSCRIPTS (res).safe_push (obj: subscript);
3448 }
3449
3450 return res;
3451}
3452
3453/* Initialize a data dependence relation between data accesses A and
3454 B. NB_LOOPS is the number of loops surrounding the references: the
3455 size of the classic distance/direction vectors. */
3456
3457struct data_dependence_relation *
3458initialize_data_dependence_relation (struct data_reference *a,
3459 struct data_reference *b,
3460 vec<loop_p> loop_nest)
3461{
3462 data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3463 DDR_A (res) = a;
3464 DDR_B (res) = b;
3465 DDR_LOOP_NEST (res).create (nelems: 0);
3466 DDR_SUBSCRIPTS (res).create (nelems: 0);
3467 DDR_DIR_VECTS (res).create (nelems: 0);
3468 DDR_DIST_VECTS (res).create (nelems: 0);
3469
3470 if (a == NULL || b == NULL)
3471 {
3472 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3473 return res;
3474 }
3475
3476 /* If the data references do not alias, then they are independent. */
3477 if (!dr_may_alias_p (a, b, loop_nest: loop_nest.exists () ? loop_nest[0] : NULL))
3478 {
3479 DDR_ARE_DEPENDENT (res) = chrec_known;
3480 return res;
3481 }
3482
3483 return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: false);
3484}
3485
3486
3487/* Frees memory used by the conflict function F. */
3488
3489static void
3490free_conflict_function (conflict_function *f)
3491{
3492 unsigned i;
3493
3494 if (CF_NONTRIVIAL_P (f))
3495 {
3496 for (i = 0; i < f->n; i++)
3497 affine_fn_free (fn: f->fns[i]);
3498 }
3499 free (ptr: f);
3500}
3501
3502/* Frees memory used by SUBSCRIPTS. */
3503
3504static void
3505free_subscripts (vec<subscript_p> subscripts)
3506{
3507 for (subscript_p s : subscripts)
3508 {
3509 free_conflict_function (f: s->conflicting_iterations_in_a);
3510 free_conflict_function (f: s->conflicting_iterations_in_b);
3511 free (ptr: s);
3512 }
3513 subscripts.release ();
3514}
3515
3516/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3517 description. */
3518
3519static inline void
3520finalize_ddr_dependent (struct data_dependence_relation *ddr,
3521 tree chrec)
3522{
3523 DDR_ARE_DEPENDENT (ddr) = chrec;
3524 free_subscripts (DDR_SUBSCRIPTS (ddr));
3525 DDR_SUBSCRIPTS (ddr).create (nelems: 0);
3526}
3527
3528/* The dependence relation DDR cannot be represented by a distance
3529 vector. */
3530
3531static inline void
3532non_affine_dependence_relation (struct data_dependence_relation *ddr)
3533{
3534 if (dump_file && (dump_flags & TDF_DETAILS))
3535 fprintf (stream: dump_file, format: "(Dependence relation cannot be represented by distance vector.) \n");
3536
3537 DDR_AFFINE_P (ddr) = false;
3538}
3539
3540
3541
3542/* This section contains the classic Banerjee tests. */
3543
3544/* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3545 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3546
3547static inline bool
3548ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3549{
3550 return (evolution_function_is_constant_p (chrec: chrec_a)
3551 && evolution_function_is_constant_p (chrec: chrec_b));
3552}
3553
3554/* Returns true iff CHREC_A and CHREC_B are dependent on an index
3555 variable, i.e., if the SIV (Single Index Variable) test is true. */
3556
3557static bool
3558siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3559{
3560 if ((evolution_function_is_constant_p (chrec: chrec_a)
3561 && evolution_function_is_univariate_p (chrec_b))
3562 || (evolution_function_is_constant_p (chrec: chrec_b)
3563 && evolution_function_is_univariate_p (chrec_a)))
3564 return true;
3565
3566 if (evolution_function_is_univariate_p (chrec_a)
3567 && evolution_function_is_univariate_p (chrec_b))
3568 {
3569 switch (TREE_CODE (chrec_a))
3570 {
3571 case POLYNOMIAL_CHREC:
3572 switch (TREE_CODE (chrec_b))
3573 {
3574 case POLYNOMIAL_CHREC:
3575 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3576 return false;
3577 /* FALLTHRU */
3578
3579 default:
3580 return true;
3581 }
3582
3583 default:
3584 return true;
3585 }
3586 }
3587
3588 return false;
3589}
3590
3591/* Creates a conflict function with N dimensions. The affine functions
3592 in each dimension follow. */
3593
3594static conflict_function *
3595conflict_fn (unsigned n, ...)
3596{
3597 unsigned i;
3598 conflict_function *ret = XCNEW (conflict_function);
3599 va_list ap;
3600
3601 gcc_assert (n > 0 && n <= MAX_DIM);
3602 va_start (ap, n);
3603
3604 ret->n = n;
3605 for (i = 0; i < n; i++)
3606 ret->fns[i] = va_arg (ap, affine_fn);
3607 va_end (ap);
3608
3609 return ret;
3610}
3611
3612/* Returns constant affine function with value CST. */
3613
3614static affine_fn
3615affine_fn_cst (tree cst)
3616{
3617 affine_fn fn;
3618 fn.create (nelems: 1);
3619 fn.quick_push (obj: cst);
3620 return fn;
3621}
3622
3623/* Returns affine function with single variable, CST + COEF * x_DIM. */
3624
3625static affine_fn
3626affine_fn_univar (tree cst, unsigned dim, tree coef)
3627{
3628 affine_fn fn;
3629 fn.create (nelems: dim + 1);
3630 unsigned i;
3631
3632 gcc_assert (dim > 0);
3633 fn.quick_push (obj: cst);
3634 for (i = 1; i < dim; i++)
3635 fn.quick_push (integer_zero_node);
3636 fn.quick_push (obj: coef);
3637 return fn;
3638}
3639
3640/* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3641 *OVERLAPS_B are initialized to the functions that describe the
3642 relation between the elements accessed twice by CHREC_A and
3643 CHREC_B. For k >= 0, the following property is verified:
3644
3645 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3646
3647static void
3648analyze_ziv_subscript (tree chrec_a,
3649 tree chrec_b,
3650 conflict_function **overlaps_a,
3651 conflict_function **overlaps_b,
3652 tree *last_conflicts)
3653{
3654 tree type, difference;
3655 dependence_stats.num_ziv++;
3656
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3658 fprintf (stream: dump_file, format: "(analyze_ziv_subscript \n");
3659
3660 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3661 chrec_a = chrec_convert (type, chrec_a, NULL);
3662 chrec_b = chrec_convert (type, chrec_b, NULL);
3663 difference = chrec_fold_minus (type, chrec_a, chrec_b);
3664
3665 switch (TREE_CODE (difference))
3666 {
3667 case INTEGER_CST:
3668 if (integer_zerop (difference))
3669 {
3670 /* The difference is equal to zero: the accessed index
3671 overlaps for each iteration in the loop. */
3672 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3673 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3674 *last_conflicts = chrec_dont_know;
3675 dependence_stats.num_ziv_dependent++;
3676 }
3677 else
3678 {
3679 /* The accesses do not overlap. */
3680 *overlaps_a = conflict_fn_no_dependence ();
3681 *overlaps_b = conflict_fn_no_dependence ();
3682 *last_conflicts = integer_zero_node;
3683 dependence_stats.num_ziv_independent++;
3684 }
3685 break;
3686
3687 default:
3688 /* We're not sure whether the indexes overlap. For the moment,
3689 conservatively answer "don't know". */
3690 if (dump_file && (dump_flags & TDF_DETAILS))
3691 fprintf (stream: dump_file, format: "ziv test failed: difference is non-integer.\n");
3692
3693 *overlaps_a = conflict_fn_not_known ();
3694 *overlaps_b = conflict_fn_not_known ();
3695 *last_conflicts = chrec_dont_know;
3696 dependence_stats.num_ziv_unimplemented++;
3697 break;
3698 }
3699
3700 if (dump_file && (dump_flags & TDF_DETAILS))
3701 fprintf (stream: dump_file, format: ")\n");
3702}
3703
3704/* Similar to max_stmt_executions_int, but returns the bound as a tree,
3705 and only if it fits to the int type. If this is not the case, or the
3706 bound on the number of iterations of LOOP could not be derived, returns
3707 chrec_dont_know. */
3708
3709static tree
3710max_stmt_executions_tree (class loop *loop)
3711{
3712 widest_int nit;
3713
3714 if (!max_stmt_executions (loop, &nit))
3715 return chrec_dont_know;
3716
3717 if (!wi::fits_to_tree_p (x: nit, unsigned_type_node))
3718 return chrec_dont_know;
3719
3720 return wide_int_to_tree (unsigned_type_node, cst: nit);
3721}
3722
3723/* Determine whether the CHREC is always positive/negative. If the expression
3724 cannot be statically analyzed, return false, otherwise set the answer into
3725 VALUE. */
3726
3727static bool
3728chrec_is_positive (tree chrec, bool *value)
3729{
3730 bool value0, value1, value2;
3731 tree end_value, nb_iter;
3732
3733 switch (TREE_CODE (chrec))
3734 {
3735 case POLYNOMIAL_CHREC:
3736 if (!chrec_is_positive (CHREC_LEFT (chrec), value: &value0)
3737 || !chrec_is_positive (CHREC_RIGHT (chrec), value: &value1))
3738 return false;
3739
3740 /* FIXME -- overflows. */
3741 if (value0 == value1)
3742 {
3743 *value = value0;
3744 return true;
3745 }
3746
3747 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3748 and the proof consists in showing that the sign never
3749 changes during the execution of the loop, from 0 to
3750 loop->nb_iterations. */
3751 if (!evolution_function_is_affine_p (chrec))
3752 return false;
3753
3754 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3755 if (chrec_contains_undetermined (nb_iter))
3756 return false;
3757
3758#if 0
3759 /* TODO -- If the test is after the exit, we may decrease the number of
3760 iterations by one. */
3761 if (after_exit)
3762 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3763#endif
3764
3765 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3766
3767 if (!chrec_is_positive (chrec: end_value, value: &value2))
3768 return false;
3769
3770 *value = value0;
3771 return value0 == value1;
3772
3773 case INTEGER_CST:
3774 switch (tree_int_cst_sgn (chrec))
3775 {
3776 case -1:
3777 *value = false;
3778 break;
3779 case 1:
3780 *value = true;
3781 break;
3782 default:
3783 return false;
3784 }
3785 return true;
3786
3787 default:
3788 return false;
3789 }
3790}
3791
3792
3793/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3794 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3795 *OVERLAPS_B are initialized to the functions that describe the
3796 relation between the elements accessed twice by CHREC_A and
3797 CHREC_B. For k >= 0, the following property is verified:
3798
3799 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3800
3801static void
3802analyze_siv_subscript_cst_affine (tree chrec_a,
3803 tree chrec_b,
3804 conflict_function **overlaps_a,
3805 conflict_function **overlaps_b,
3806 tree *last_conflicts)
3807{
3808 bool value0, value1, value2;
3809 tree type, difference, tmp;
3810
3811 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3812 chrec_a = chrec_convert (type, chrec_a, NULL);
3813 chrec_b = chrec_convert (type, chrec_b, NULL);
3814 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3815
3816 /* Special case overlap in the first iteration. */
3817 if (integer_zerop (difference))
3818 {
3819 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3820 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3821 *last_conflicts = integer_one_node;
3822 return;
3823 }
3824
3825 if (!chrec_is_positive (chrec: initial_condition (difference), value: &value0))
3826 {
3827 if (dump_file && (dump_flags & TDF_DETAILS))
3828 fprintf (stream: dump_file, format: "siv test failed: chrec is not positive.\n");
3829
3830 dependence_stats.num_siv_unimplemented++;
3831 *overlaps_a = conflict_fn_not_known ();
3832 *overlaps_b = conflict_fn_not_known ();
3833 *last_conflicts = chrec_dont_know;
3834 return;
3835 }
3836 else
3837 {
3838 if (value0 == false)
3839 {
3840 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3841 || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value1))
3842 {
3843 if (dump_file && (dump_flags & TDF_DETAILS))
3844 fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n");
3845
3846 *overlaps_a = conflict_fn_not_known ();
3847 *overlaps_b = conflict_fn_not_known ();
3848 *last_conflicts = chrec_dont_know;
3849 dependence_stats.num_siv_unimplemented++;
3850 return;
3851 }
3852 else
3853 {
3854 if (value1 == true)
3855 {
3856 /* Example:
3857 chrec_a = 12
3858 chrec_b = {10, +, 1}
3859 */
3860
3861 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference))
3862 {
3863 HOST_WIDE_INT numiter;
3864 class loop *loop = get_chrec_loop (chrec: chrec_b);
3865
3866 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3867 tmp = fold_build2 (EXACT_DIV_EXPR, type,
3868 fold_build1 (ABS_EXPR, type, difference),
3869 CHREC_RIGHT (chrec_b));
3870 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp));
3871 *last_conflicts = integer_one_node;
3872
3873
3874 /* Perform weak-zero siv test to see if overlap is
3875 outside the loop bounds. */
3876 numiter = max_stmt_executions_int (loop);
3877
3878 if (numiter >= 0
3879 && compare_tree_int (tmp, numiter) > 0)
3880 {
3881 free_conflict_function (f: *overlaps_a);
3882 free_conflict_function (f: *overlaps_b);
3883 *overlaps_a = conflict_fn_no_dependence ();
3884 *overlaps_b = conflict_fn_no_dependence ();
3885 *last_conflicts = integer_zero_node;
3886 dependence_stats.num_siv_independent++;
3887 return;
3888 }
3889 dependence_stats.num_siv_dependent++;
3890 return;
3891 }
3892
3893 /* When the step does not divide the difference, there are
3894 no overlaps. */
3895 else
3896 {
3897 *overlaps_a = conflict_fn_no_dependence ();
3898 *overlaps_b = conflict_fn_no_dependence ();
3899 *last_conflicts = integer_zero_node;
3900 dependence_stats.num_siv_independent++;
3901 return;
3902 }
3903 }
3904
3905 else
3906 {
3907 /* Example:
3908 chrec_a = 12
3909 chrec_b = {10, +, -1}
3910
3911 In this case, chrec_a will not overlap with chrec_b. */
3912 *overlaps_a = conflict_fn_no_dependence ();
3913 *overlaps_b = conflict_fn_no_dependence ();
3914 *last_conflicts = integer_zero_node;
3915 dependence_stats.num_siv_independent++;
3916 return;
3917 }
3918 }
3919 }
3920 else
3921 {
3922 if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3923 || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value2))
3924 {
3925 if (dump_file && (dump_flags & TDF_DETAILS))
3926 fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n");
3927
3928 *overlaps_a = conflict_fn_not_known ();
3929 *overlaps_b = conflict_fn_not_known ();
3930 *last_conflicts = chrec_dont_know;
3931 dependence_stats.num_siv_unimplemented++;
3932 return;
3933 }
3934 else
3935 {
3936 if (value2 == false)
3937 {
3938 /* Example:
3939 chrec_a = 3
3940 chrec_b = {10, +, -1}
3941 */
3942 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference))
3943 {
3944 HOST_WIDE_INT numiter;
3945 class loop *loop = get_chrec_loop (chrec: chrec_b);
3946
3947 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
3948 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3949 CHREC_RIGHT (chrec_b));
3950 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp));
3951 *last_conflicts = integer_one_node;
3952
3953 /* Perform weak-zero siv test to see if overlap is
3954 outside the loop bounds. */
3955 numiter = max_stmt_executions_int (loop);
3956
3957 if (numiter >= 0
3958 && compare_tree_int (tmp, numiter) > 0)
3959 {
3960 free_conflict_function (f: *overlaps_a);
3961 free_conflict_function (f: *overlaps_b);
3962 *overlaps_a = conflict_fn_no_dependence ();
3963 *overlaps_b = conflict_fn_no_dependence ();
3964 *last_conflicts = integer_zero_node;
3965 dependence_stats.num_siv_independent++;
3966 return;
3967 }
3968 dependence_stats.num_siv_dependent++;
3969 return;
3970 }
3971
3972 /* When the step does not divide the difference, there
3973 are no overlaps. */
3974 else
3975 {
3976 *overlaps_a = conflict_fn_no_dependence ();
3977 *overlaps_b = conflict_fn_no_dependence ();
3978 *last_conflicts = integer_zero_node;
3979 dependence_stats.num_siv_independent++;
3980 return;
3981 }
3982 }
3983 else
3984 {
3985 /* Example:
3986 chrec_a = 3
3987 chrec_b = {4, +, 1}
3988
3989 In this case, chrec_a will not overlap with chrec_b. */
3990 *overlaps_a = conflict_fn_no_dependence ();
3991 *overlaps_b = conflict_fn_no_dependence ();
3992 *last_conflicts = integer_zero_node;
3993 dependence_stats.num_siv_independent++;
3994 return;
3995 }
3996 }
3997 }
3998 }
3999}
4000
4001/* Helper recursive function for initializing the matrix A. Returns
4002 the initial value of CHREC. */
4003
4004static tree
4005initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4006{
4007 gcc_assert (chrec);
4008
4009 switch (TREE_CODE (chrec))
4010 {
4011 case POLYNOMIAL_CHREC:
4012 HOST_WIDE_INT chrec_right;
4013 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4014 return chrec_dont_know;
4015 chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4016 /* We want to be able to negate without overflow. */
4017 if (chrec_right == HOST_WIDE_INT_MIN)
4018 return chrec_dont_know;
4019 A[index][0] = mult * chrec_right;
4020 return initialize_matrix_A (A, CHREC_LEFT (chrec), index: index + 1, mult);
4021
4022 case PLUS_EXPR:
4023 case MULT_EXPR:
4024 case MINUS_EXPR:
4025 {
4026 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4027 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4028
4029 return chrec_fold_op (TREE_CODE (chrec), type: chrec_type (chrec), op0, op1);
4030 }
4031
4032 CASE_CONVERT:
4033 {
4034 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4035 return chrec_convert (chrec_type (chrec), op, NULL);
4036 }
4037
4038 case BIT_NOT_EXPR:
4039 {
4040 /* Handle ~X as -1 - X. */
4041 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4042 return chrec_fold_op (code: MINUS_EXPR, type: chrec_type (chrec),
4043 op0: build_int_cst (TREE_TYPE (chrec), -1), op1: op);
4044 }
4045
4046 case INTEGER_CST:
4047 return chrec;
4048
4049 default:
4050 gcc_unreachable ();
4051 return NULL_TREE;
4052 }
4053}
4054
4055#define FLOOR_DIV(x,y) ((x) / (y))
4056
4057/* Solves the special case of the Diophantine equation:
4058 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4059
4060 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4061 number of iterations that loops X and Y run. The overlaps will be
4062 constructed as evolutions in dimension DIM. */
4063
4064static void
4065compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4066 HOST_WIDE_INT step_a,
4067 HOST_WIDE_INT step_b,
4068 affine_fn *overlaps_a,
4069 affine_fn *overlaps_b,
4070 tree *last_conflicts, int dim)
4071{
4072 if (((step_a > 0 && step_b > 0)
4073 || (step_a < 0 && step_b < 0)))
4074 {
4075 HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4076 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4077
4078 gcd_steps_a_b = gcd (step_a, step_b);
4079 step_overlaps_a = step_b / gcd_steps_a_b;
4080 step_overlaps_b = step_a / gcd_steps_a_b;
4081
4082 if (niter > 0)
4083 {
4084 tau2 = FLOOR_DIV (niter, step_overlaps_a);
4085 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4086 last_conflict = tau2;
4087 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4088 }
4089 else
4090 *last_conflicts = chrec_dont_know;
4091
4092 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4093 coef: build_int_cst (NULL_TREE,
4094 step_overlaps_a));
4095 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4096 coef: build_int_cst (NULL_TREE,
4097 step_overlaps_b));
4098 }
4099
4100 else
4101 {
4102 *overlaps_a = affine_fn_cst (integer_zero_node);
4103 *overlaps_b = affine_fn_cst (integer_zero_node);
4104 *last_conflicts = integer_zero_node;
4105 }
4106}
4107
4108/* Solves the special case of a Diophantine equation where CHREC_A is
4109 an affine bivariate function, and CHREC_B is an affine univariate
4110 function. For example,
4111
4112 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4113
4114 has the following overlapping functions:
4115
4116 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4117 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4118 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4119
4120 FORNOW: This is a specialized implementation for a case occurring in
4121 a common benchmark. Implement the general algorithm. */
4122
4123static void
4124compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4125 conflict_function **overlaps_a,
4126 conflict_function **overlaps_b,
4127 tree *last_conflicts)
4128{
4129 bool xz_p, yz_p, xyz_p;
4130 HOST_WIDE_INT step_x, step_y, step_z;
4131 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4132 affine_fn overlaps_a_xz, overlaps_b_xz;
4133 affine_fn overlaps_a_yz, overlaps_b_yz;
4134 affine_fn overlaps_a_xyz, overlaps_b_xyz;
4135 affine_fn ova1, ova2, ovb;
4136 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4137
4138 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4139 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4140 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4141
4142 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4143 niter_y = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a));
4144 niter_z = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b));
4145
4146 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4147 {
4148 if (dump_file && (dump_flags & TDF_DETAILS))
4149 fprintf (stream: dump_file, format: "overlap steps test failed: no iteration counts.\n");
4150
4151 *overlaps_a = conflict_fn_not_known ();
4152 *overlaps_b = conflict_fn_not_known ();
4153 *last_conflicts = chrec_dont_know;
4154 return;
4155 }
4156
4157 niter = MIN (niter_x, niter_z);
4158 compute_overlap_steps_for_affine_univar (niter, step_a: step_x, step_b: step_z,
4159 overlaps_a: &overlaps_a_xz,
4160 overlaps_b: &overlaps_b_xz,
4161 last_conflicts: &last_conflicts_xz, dim: 1);
4162 niter = MIN (niter_y, niter_z);
4163 compute_overlap_steps_for_affine_univar (niter, step_a: step_y, step_b: step_z,
4164 overlaps_a: &overlaps_a_yz,
4165 overlaps_b: &overlaps_b_yz,
4166 last_conflicts: &last_conflicts_yz, dim: 2);
4167 niter = MIN (niter_x, niter_z);
4168 niter = MIN (niter_y, niter);
4169 compute_overlap_steps_for_affine_univar (niter, step_a: step_x + step_y, step_b: step_z,
4170 overlaps_a: &overlaps_a_xyz,
4171 overlaps_b: &overlaps_b_xyz,
4172 last_conflicts: &last_conflicts_xyz, dim: 3);
4173
4174 xz_p = !integer_zerop (last_conflicts_xz);
4175 yz_p = !integer_zerop (last_conflicts_yz);
4176 xyz_p = !integer_zerop (last_conflicts_xyz);
4177
4178 if (xz_p || yz_p || xyz_p)
4179 {
4180 ova1 = affine_fn_cst (integer_zero_node);
4181 ova2 = affine_fn_cst (integer_zero_node);
4182 ovb = affine_fn_cst (integer_zero_node);
4183 if (xz_p)
4184 {
4185 affine_fn t0 = ova1;
4186 affine_fn t2 = ovb;
4187
4188 ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xz);
4189 ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xz);
4190 affine_fn_free (fn: t0);
4191 affine_fn_free (fn: t2);
4192 *last_conflicts = last_conflicts_xz;
4193 }
4194 if (yz_p)
4195 {
4196 affine_fn t0 = ova2;
4197 affine_fn t2 = ovb;
4198
4199 ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_yz);
4200 ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_yz);
4201 affine_fn_free (fn: t0);
4202 affine_fn_free (fn: t2);
4203 *last_conflicts = last_conflicts_yz;
4204 }
4205 if (xyz_p)
4206 {
4207 affine_fn t0 = ova1;
4208 affine_fn t2 = ova2;
4209 affine_fn t4 = ovb;
4210
4211 ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xyz);
4212 ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_xyz);
4213 ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xyz);
4214 affine_fn_free (fn: t0);
4215 affine_fn_free (fn: t2);
4216 affine_fn_free (fn: t4);
4217 *last_conflicts = last_conflicts_xyz;
4218 }
4219 *overlaps_a = conflict_fn (n: 2, ova1, ova2);
4220 *overlaps_b = conflict_fn (n: 1, ovb);
4221 }
4222 else
4223 {
4224 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4225 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4226 *last_conflicts = integer_zero_node;
4227 }
4228
4229 affine_fn_free (fn: overlaps_a_xz);
4230 affine_fn_free (fn: overlaps_b_xz);
4231 affine_fn_free (fn: overlaps_a_yz);
4232 affine_fn_free (fn: overlaps_b_yz);
4233 affine_fn_free (fn: overlaps_a_xyz);
4234 affine_fn_free (fn: overlaps_b_xyz);
4235}
4236
4237/* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4238
4239static void
4240lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4241 int size)
4242{
4243 memcpy (dest: vec2, src: vec1, n: size * sizeof (*vec1));
4244}
4245
4246/* Copy the elements of M x N matrix MAT1 to MAT2. */
4247
4248static void
4249lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4250 int m, int n)
4251{
4252 int i;
4253
4254 for (i = 0; i < m; i++)
4255 lambda_vector_copy (vec1: mat1[i], vec2: mat2[i], size: n);
4256}
4257
4258/* Store the N x N identity matrix in MAT. */
4259
4260static void
4261lambda_matrix_id (lambda_matrix mat, int size)
4262{
4263 int i, j;
4264
4265 for (i = 0; i < size; i++)
4266 for (j = 0; j < size; j++)
4267 mat[i][j] = (i == j) ? 1 : 0;
4268}
4269
4270/* Return the index of the first nonzero element of vector VEC1 between
4271 START and N. We must have START <= N.
4272 Returns N if VEC1 is the zero vector. */
4273
4274static int
4275lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4276{
4277 int j = start;
4278 while (j < n && vec1[j] == 0)
4279 j++;
4280 return j;
4281}
4282
4283/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4284 R2 = R2 + CONST1 * R1. */
4285
4286static bool
4287lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4288 lambda_int const1)
4289{
4290 int i;
4291
4292 if (const1 == 0)
4293 return true;
4294
4295 for (i = 0; i < n; i++)
4296 {
4297 bool ovf;
4298 lambda_int tem = mul_hwi (a: mat[r1][i], b: const1, overflow: &ovf);
4299 if (ovf)
4300 return false;
4301 lambda_int tem2 = add_hwi (a: mat[r2][i], b: tem, overflow: &ovf);
4302 if (ovf || tem2 == HOST_WIDE_INT_MIN)
4303 return false;
4304 mat[r2][i] = tem2;
4305 }
4306
4307 return true;
4308}
4309
4310/* Multiply vector VEC1 of length SIZE by a constant CONST1,
4311 and store the result in VEC2. */
4312
4313static void
4314lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4315 int size, lambda_int const1)
4316{
4317 int i;
4318
4319 if (const1 == 0)
4320 lambda_vector_clear (vec1: vec2, size);
4321 else
4322 for (i = 0; i < size; i++)
4323 vec2[i] = const1 * vec1[i];
4324}
4325
4326/* Negate vector VEC1 with length SIZE and store it in VEC2. */
4327
4328static void
4329lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4330 int size)
4331{
4332 lambda_vector_mult_const (vec1, vec2, size, const1: -1);
4333}
4334
4335/* Negate row R1 of matrix MAT which has N columns. */
4336
4337static void
4338lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4339{
4340 lambda_vector_negate (vec1: mat[r1], vec2: mat[r1], size: n);
4341}
4342
4343/* Return true if two vectors are equal. */
4344
4345static bool
4346lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4347{
4348 int i;
4349 for (i = 0; i < size; i++)
4350 if (vec1[i] != vec2[i])
4351 return false;
4352 return true;
4353}
4354
4355/* Given an M x N integer matrix A, this function determines an M x
4356 M unimodular matrix U, and an M x N echelon matrix S such that
4357 "U.A = S". This decomposition is also known as "right Hermite".
4358
4359 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4360 Restructuring Compilers" Utpal Banerjee. */
4361
4362static bool
4363lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4364 lambda_matrix S, lambda_matrix U)
4365{
4366 int i, j, i0 = 0;
4367
4368 lambda_matrix_copy (mat1: A, mat2: S, m, n);
4369 lambda_matrix_id (mat: U, size: m);
4370
4371 for (j = 0; j < n; j++)
4372 {
4373 if (lambda_vector_first_nz (vec1: S[j], n: m, start: i0) < m)
4374 {
4375 ++i0;
4376 for (i = m - 1; i >= i0; i--)
4377 {
4378 while (S[i][j] != 0)
4379 {
4380 lambda_int factor, a, b;
4381
4382 a = S[i-1][j];
4383 b = S[i][j];
4384 gcc_assert (a != HOST_WIDE_INT_MIN);
4385 factor = a / b;
4386
4387 if (!lambda_matrix_row_add (mat: S, n, r1: i, r2: i-1, const1: -factor))
4388 return false;
4389 std::swap (a&: S[i], b&: S[i-1]);
4390
4391 if (!lambda_matrix_row_add (mat: U, n: m, r1: i, r2: i-1, const1: -factor))
4392 return false;
4393 std::swap (a&: U[i], b&: U[i-1]);
4394 }
4395 }
4396 }
4397 }
4398
4399 return true;
4400}
4401
4402/* Determines the overlapping elements due to accesses CHREC_A and
4403 CHREC_B, that are affine functions. This function cannot handle
4404 symbolic evolution functions, ie. when initial conditions are
4405 parameters, because it uses lambda matrices of integers. */
4406
4407static void
4408analyze_subscript_affine_affine (tree chrec_a,
4409 tree chrec_b,
4410 conflict_function **overlaps_a,
4411 conflict_function **overlaps_b,
4412 tree *last_conflicts)
4413{
4414 unsigned nb_vars_a, nb_vars_b, dim;
4415 lambda_int gamma, gcd_alpha_beta;
4416 lambda_matrix A, U, S;
4417 struct obstack scratch_obstack;
4418
4419 if (eq_evolutions_p (chrec_a, chrec_b))
4420 {
4421 /* The accessed index overlaps for each iteration in the
4422 loop. */
4423 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4424 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4425 *last_conflicts = chrec_dont_know;
4426 return;
4427 }
4428 if (dump_file && (dump_flags & TDF_DETAILS))
4429 fprintf (stream: dump_file, format: "(analyze_subscript_affine_affine \n");
4430
4431 /* For determining the initial intersection, we have to solve a
4432 Diophantine equation. This is the most time consuming part.
4433
4434 For answering to the question: "Is there a dependence?" we have
4435 to prove that there exists a solution to the Diophantine
4436 equation, and that the solution is in the iteration domain,
4437 i.e. the solution is positive or zero, and that the solution
4438 happens before the upper bound loop.nb_iterations. Otherwise
4439 there is no dependence. This function outputs a description of
4440 the iterations that hold the intersections. */
4441
4442 nb_vars_a = nb_vars_in_chrec (chrec_a);
4443 nb_vars_b = nb_vars_in_chrec (chrec_b);
4444
4445 gcc_obstack_init (&scratch_obstack);
4446
4447 dim = nb_vars_a + nb_vars_b;
4448 U = lambda_matrix_new (m: dim, n: dim, lambda_obstack: &scratch_obstack);
4449 A = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack);
4450 S = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack);
4451
4452 tree init_a = initialize_matrix_A (A, chrec: chrec_a, index: 0, mult: 1);
4453 tree init_b = initialize_matrix_A (A, chrec: chrec_b, index: nb_vars_a, mult: -1);
4454 if (init_a == chrec_dont_know
4455 || init_b == chrec_dont_know)
4456 {
4457 if (dump_file && (dump_flags & TDF_DETAILS))
4458 fprintf (stream: dump_file, format: "affine-affine test failed: "
4459 "representation issue.\n");
4460 *overlaps_a = conflict_fn_not_known ();
4461 *overlaps_b = conflict_fn_not_known ();
4462 *last_conflicts = chrec_dont_know;
4463 goto end_analyze_subs_aa;
4464 }
4465 gamma = int_cst_value (init_b) - int_cst_value (init_a);
4466
4467 /* Don't do all the hard work of solving the Diophantine equation
4468 when we already know the solution: for example,
4469 | {3, +, 1}_1
4470 | {3, +, 4}_2
4471 | gamma = 3 - 3 = 0.
4472 Then the first overlap occurs during the first iterations:
4473 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4474 */
4475 if (gamma == 0)
4476 {
4477 if (nb_vars_a == 1 && nb_vars_b == 1)
4478 {
4479 HOST_WIDE_INT step_a, step_b;
4480 HOST_WIDE_INT niter, niter_a, niter_b;
4481 affine_fn ova, ovb;
4482
4483 niter_a = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a));
4484 niter_b = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b));
4485 niter = MIN (niter_a, niter_b);
4486 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4487 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4488
4489 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4490 overlaps_a: &ova, overlaps_b: &ovb,
4491 last_conflicts, dim: 1);
4492 *overlaps_a = conflict_fn (n: 1, ova);
4493 *overlaps_b = conflict_fn (n: 1, ovb);
4494 }
4495
4496 else if (nb_vars_a == 2 && nb_vars_b == 1)
4497 compute_overlap_steps_for_affine_1_2
4498 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4499
4500 else if (nb_vars_a == 1 && nb_vars_b == 2)
4501 compute_overlap_steps_for_affine_1_2
4502 (chrec_a: chrec_b, chrec_b: chrec_a, overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts);
4503
4504 else
4505 {
4506 if (dump_file && (dump_flags & TDF_DETAILS))
4507 fprintf (stream: dump_file, format: "affine-affine test failed: too many variables.\n");
4508 *overlaps_a = conflict_fn_not_known ();
4509 *overlaps_b = conflict_fn_not_known ();
4510 *last_conflicts = chrec_dont_know;
4511 }
4512 goto end_analyze_subs_aa;
4513 }
4514
4515 /* U.A = S */
4516 if (!lambda_matrix_right_hermite (A, m: dim, n: 1, S, U))
4517 {
4518 *overlaps_a = conflict_fn_not_known ();
4519 *overlaps_b = conflict_fn_not_known ();
4520 *last_conflicts = chrec_dont_know;
4521 goto end_analyze_subs_aa;
4522 }
4523
4524 if (S[0][0] < 0)
4525 {
4526 S[0][0] *= -1;
4527 lambda_matrix_row_negate (mat: U, n: dim, r1: 0);
4528 }
4529 gcd_alpha_beta = S[0][0];
4530
4531 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4532 but that is a quite strange case. Instead of ICEing, answer
4533 don't know. */
4534 if (gcd_alpha_beta == 0)
4535 {
4536 *overlaps_a = conflict_fn_not_known ();
4537 *overlaps_b = conflict_fn_not_known ();
4538 *last_conflicts = chrec_dont_know;
4539 goto end_analyze_subs_aa;
4540 }
4541
4542 /* The classic "gcd-test". */
4543 if (!int_divides_p (a: gcd_alpha_beta, b: gamma))
4544 {
4545 /* The "gcd-test" has determined that there is no integer
4546 solution, i.e. there is no dependence. */
4547 *overlaps_a = conflict_fn_no_dependence ();
4548 *overlaps_b = conflict_fn_no_dependence ();
4549 *last_conflicts = integer_zero_node;
4550 }
4551
4552 /* Both access functions are univariate. This includes SIV and MIV cases. */
4553 else if (nb_vars_a == 1 && nb_vars_b == 1)
4554 {
4555 /* Both functions should have the same evolution sign. */
4556 if (((A[0][0] > 0 && -A[1][0] > 0)
4557 || (A[0][0] < 0 && -A[1][0] < 0)))
4558 {
4559 /* The solutions are given by:
4560 |
4561 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4562 | [u21 u22] [y0]
4563
4564 For a given integer t. Using the following variables,
4565
4566 | i0 = u11 * gamma / gcd_alpha_beta
4567 | j0 = u12 * gamma / gcd_alpha_beta
4568 | i1 = u21
4569 | j1 = u22
4570
4571 the solutions are:
4572
4573 | x0 = i0 + i1 * t,
4574 | y0 = j0 + j1 * t. */
4575 HOST_WIDE_INT i0, j0, i1, j1;
4576
4577 i0 = U[0][0] * gamma / gcd_alpha_beta;
4578 j0 = U[0][1] * gamma / gcd_alpha_beta;
4579 i1 = U[1][0];
4580 j1 = U[1][1];
4581
4582 if ((i1 == 0 && i0 < 0)
4583 || (j1 == 0 && j0 < 0))
4584 {
4585 /* There is no solution.
4586 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4587 falls in here, but for the moment we don't look at the
4588 upper bound of the iteration domain. */
4589 *overlaps_a = conflict_fn_no_dependence ();
4590 *overlaps_b = conflict_fn_no_dependence ();
4591 *last_conflicts = integer_zero_node;
4592 goto end_analyze_subs_aa;
4593 }
4594
4595 if (i1 > 0 && j1 > 0)
4596 {
4597 HOST_WIDE_INT niter_a
4598 = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a));
4599 HOST_WIDE_INT niter_b
4600 = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b));
4601 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4602
4603 /* (X0, Y0) is a solution of the Diophantine equation:
4604 "chrec_a (X0) = chrec_b (Y0)". */
4605 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4606 CEIL (-j0, j1));
4607 HOST_WIDE_INT x0 = i1 * tau1 + i0;
4608 HOST_WIDE_INT y0 = j1 * tau1 + j0;
4609
4610 /* (X1, Y1) is the smallest positive solution of the eq
4611 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4612 first conflict occurs. */
4613 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4614 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4615 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4616
4617 if (niter > 0)
4618 {
4619 /* If the overlap occurs outside of the bounds of the
4620 loop, there is no dependence. */
4621 if (x1 >= niter_a || y1 >= niter_b)
4622 {
4623 *overlaps_a = conflict_fn_no_dependence ();
4624 *overlaps_b = conflict_fn_no_dependence ();
4625 *last_conflicts = integer_zero_node;
4626 goto end_analyze_subs_aa;
4627 }
4628
4629 /* max stmt executions can get quite large, avoid
4630 overflows by using wide ints here. */
4631 widest_int tau2
4632 = wi::smin (x: wi::sdiv_floor (x: wi::sub (x: niter_a, y: i0), y: i1),
4633 y: wi::sdiv_floor (x: wi::sub (x: niter_b, y: j0), y: j1));
4634 widest_int last_conflict = wi::sub (x: tau2, y: (x1 - i0)/i1);
4635 if (wi::min_precision (x: last_conflict, sgn: SIGNED)
4636 <= TYPE_PRECISION (integer_type_node))
4637 *last_conflicts
4638 = build_int_cst (integer_type_node,
4639 last_conflict.to_shwi ());
4640 else
4641 *last_conflicts = chrec_dont_know;
4642 }
4643 else
4644 *last_conflicts = chrec_dont_know;
4645
4646 *overlaps_a
4647 = conflict_fn (n: 1,
4648 affine_fn_univar (cst: build_int_cst (NULL_TREE, x1),
4649 dim: 1,
4650 coef: build_int_cst (NULL_TREE, i1)));
4651 *overlaps_b
4652 = conflict_fn (n: 1,
4653 affine_fn_univar (cst: build_int_cst (NULL_TREE, y1),
4654 dim: 1,
4655 coef: build_int_cst (NULL_TREE, j1)));
4656 }
4657 else
4658 {
4659 /* FIXME: For the moment, the upper bound of the
4660 iteration domain for i and j is not checked. */
4661 if (dump_file && (dump_flags & TDF_DETAILS))
4662 fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n");
4663 *overlaps_a = conflict_fn_not_known ();
4664 *overlaps_b = conflict_fn_not_known ();
4665 *last_conflicts = chrec_dont_know;
4666 }
4667 }
4668 else
4669 {
4670 if (dump_file && (dump_flags & TDF_DETAILS))
4671 fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n");
4672 *overlaps_a = conflict_fn_not_known ();
4673 *overlaps_b = conflict_fn_not_known ();
4674 *last_conflicts = chrec_dont_know;
4675 }
4676 }
4677 else
4678 {
4679 if (dump_file && (dump_flags & TDF_DETAILS))
4680 fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n");
4681 *overlaps_a = conflict_fn_not_known ();
4682 *overlaps_b = conflict_fn_not_known ();
4683 *last_conflicts = chrec_dont_know;
4684 }
4685
4686end_analyze_subs_aa:
4687 obstack_free (&scratch_obstack, NULL);
4688 if (dump_file && (dump_flags & TDF_DETAILS))
4689 {
4690 fprintf (stream: dump_file, format: " (overlaps_a = ");
4691 dump_conflict_function (outf: dump_file, cf: *overlaps_a);
4692 fprintf (stream: dump_file, format: ")\n (overlaps_b = ");
4693 dump_conflict_function (outf: dump_file, cf: *overlaps_b);
4694 fprintf (stream: dump_file, format: "))\n");
4695 }
4696}
4697
4698/* Returns true when analyze_subscript_affine_affine can be used for
4699 determining the dependence relation between chrec_a and chrec_b,
4700 that contain symbols. This function modifies chrec_a and chrec_b
4701 such that the analysis result is the same, and such that they don't
4702 contain symbols, and then can safely be passed to the analyzer.
4703
4704 Example: The analysis of the following tuples of evolutions produce
4705 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4706 vs. {0, +, 1}_1
4707
4708 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4709 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4710*/
4711
4712static bool
4713can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4714{
4715 tree diff, type, left_a, left_b, right_b;
4716
4717 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4718 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4719 /* FIXME: For the moment not handled. Might be refined later. */
4720 return false;
4721
4722 type = chrec_type (chrec: *chrec_a);
4723 left_a = CHREC_LEFT (*chrec_a);
4724 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4725 diff = chrec_fold_minus (type, left_a, left_b);
4726
4727 if (!evolution_function_is_constant_p (chrec: diff))
4728 return false;
4729
4730 if (dump_file && (dump_flags & TDF_DETAILS))
4731 fprintf (stream: dump_file, format: "can_use_subscript_aff_aff_for_symbolic \n");
4732
4733 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4734 left: diff, CHREC_RIGHT (*chrec_a));
4735 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4736 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4737 left: build_int_cst (type, 0),
4738 right: right_b);
4739 return true;
4740}
4741
4742/* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4743 *OVERLAPS_B are initialized to the functions that describe the
4744 relation between the elements accessed twice by CHREC_A and
4745 CHREC_B. For k >= 0, the following property is verified:
4746
4747 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4748
4749static void
4750analyze_siv_subscript (tree chrec_a,
4751 tree chrec_b,
4752 conflict_function **overlaps_a,
4753 conflict_function **overlaps_b,
4754 tree *last_conflicts,
4755 int loop_nest_num)
4756{
4757 dependence_stats.num_siv++;
4758
4759 if (dump_file && (dump_flags & TDF_DETAILS))
4760 fprintf (stream: dump_file, format: "(analyze_siv_subscript \n");
4761
4762 if (evolution_function_is_constant_p (chrec: chrec_a)
4763 && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num))
4764 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4765 overlaps_a, overlaps_b, last_conflicts);
4766
4767 else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num)
4768 && evolution_function_is_constant_p (chrec: chrec_b))
4769 analyze_siv_subscript_cst_affine (chrec_a: chrec_b, chrec_b: chrec_a,
4770 overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts);
4771
4772 else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num)
4773 && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num))
4774 {
4775 if (!chrec_contains_symbols (chrec_a)
4776 && !chrec_contains_symbols (chrec_b))
4777 {
4778 analyze_subscript_affine_affine (chrec_a, chrec_b,
4779 overlaps_a, overlaps_b,
4780 last_conflicts);
4781
4782 if (CF_NOT_KNOWN_P (*overlaps_a)
4783 || CF_NOT_KNOWN_P (*overlaps_b))
4784 dependence_stats.num_siv_unimplemented++;
4785 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4786 || CF_NO_DEPENDENCE_P (*overlaps_b))
4787 dependence_stats.num_siv_independent++;
4788 else
4789 dependence_stats.num_siv_dependent++;
4790 }
4791 else if (can_use_analyze_subscript_affine_affine (chrec_a: &chrec_a,
4792 chrec_b: &chrec_b))
4793 {
4794 analyze_subscript_affine_affine (chrec_a, chrec_b,
4795 overlaps_a, overlaps_b,
4796 last_conflicts);
4797
4798 if (CF_NOT_KNOWN_P (*overlaps_a)
4799 || CF_NOT_KNOWN_P (*overlaps_b))
4800 dependence_stats.num_siv_unimplemented++;
4801 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4802 || CF_NO_DEPENDENCE_P (*overlaps_b))
4803 dependence_stats.num_siv_independent++;
4804 else
4805 dependence_stats.num_siv_dependent++;
4806 }
4807 else
4808 goto siv_subscript_dontknow;
4809 }
4810
4811 else
4812 {
4813 siv_subscript_dontknow:;
4814 if (dump_file && (dump_flags & TDF_DETAILS))
4815 fprintf (stream: dump_file, format: " siv test failed: unimplemented");
4816 *overlaps_a = conflict_fn_not_known ();
4817 *overlaps_b = conflict_fn_not_known ();
4818 *last_conflicts = chrec_dont_know;
4819 dependence_stats.num_siv_unimplemented++;
4820 }
4821
4822 if (dump_file && (dump_flags & TDF_DETAILS))
4823 fprintf (stream: dump_file, format: ")\n");
4824}
4825
4826/* Returns false if we can prove that the greatest common divisor of the steps
4827 of CHREC does not divide CST, false otherwise. */
4828
4829static bool
4830gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4831{
4832 HOST_WIDE_INT cd = 0, val;
4833 tree step;
4834
4835 if (!tree_fits_shwi_p (cst))
4836 return true;
4837 val = tree_to_shwi (cst);
4838
4839 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4840 {
4841 step = CHREC_RIGHT (chrec);
4842 if (!tree_fits_shwi_p (step))
4843 return true;
4844 cd = gcd (cd, tree_to_shwi (step));
4845 chrec = CHREC_LEFT (chrec);
4846 }
4847
4848 return val % cd == 0;
4849}
4850
4851/* Analyze a MIV (Multiple Index Variable) subscript with respect to
4852 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4853 functions that describe the relation between the elements accessed
4854 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4855 is verified:
4856
4857 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4858
4859static void
4860analyze_miv_subscript (tree chrec_a,
4861 tree chrec_b,
4862 conflict_function **overlaps_a,
4863 conflict_function **overlaps_b,
4864 tree *last_conflicts,
4865 class loop *loop_nest)
4866{
4867 tree type, difference;
4868
4869 dependence_stats.num_miv++;
4870 if (dump_file && (dump_flags & TDF_DETAILS))
4871 fprintf (stream: dump_file, format: "(analyze_miv_subscript \n");
4872
4873 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4874 chrec_a = chrec_convert (type, chrec_a, NULL);
4875 chrec_b = chrec_convert (type, chrec_b, NULL);
4876 difference = chrec_fold_minus (type, chrec_a, chrec_b);
4877
4878 if (eq_evolutions_p (chrec_a, chrec_b))
4879 {
4880 /* Access functions are the same: all the elements are accessed
4881 in the same order. */
4882 *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4883 *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
4884 *last_conflicts = max_stmt_executions_tree (loop: get_chrec_loop (chrec: chrec_a));
4885 dependence_stats.num_miv_dependent++;
4886 }
4887
4888 else if (evolution_function_is_constant_p (chrec: difference)
4889 && evolution_function_is_affine_multivariate_p (chrec_a,
4890 loop_nest->num)
4891 && !gcd_of_steps_may_divide_p (chrec: chrec_a, cst: difference))
4892 {
4893 /* testsuite/.../ssa-chrec-33.c
4894 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4895
4896 The difference is 1, and all the evolution steps are multiples
4897 of 2, consequently there are no overlapping elements. */
4898 *overlaps_a = conflict_fn_no_dependence ();
4899 *overlaps_b = conflict_fn_no_dependence ();
4900 *last_conflicts = integer_zero_node;
4901 dependence_stats.num_miv_independent++;
4902 }
4903
4904 else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest->num)
4905 && !chrec_contains_symbols (chrec_a, loop_nest)
4906 && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest->num)
4907 && !chrec_contains_symbols (chrec_b, loop_nest))
4908 {
4909 /* testsuite/.../ssa-chrec-35.c
4910 {0, +, 1}_2 vs. {0, +, 1}_3
4911 the overlapping elements are respectively located at iterations:
4912 {0, +, 1}_x and {0, +, 1}_x,
4913 in other words, we have the equality:
4914 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4915
4916 Other examples:
4917 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4918 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4919
4920 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4921 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4922 */
4923 analyze_subscript_affine_affine (chrec_a, chrec_b,
4924 overlaps_a, overlaps_b, last_conflicts);
4925
4926 if (CF_NOT_KNOWN_P (*overlaps_a)
4927 || CF_NOT_KNOWN_P (*overlaps_b))
4928 dependence_stats.num_miv_unimplemented++;
4929 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4930 || CF_NO_DEPENDENCE_P (*overlaps_b))
4931 dependence_stats.num_miv_independent++;
4932 else
4933 dependence_stats.num_miv_dependent++;
4934 }
4935
4936 else
4937 {
4938 /* When the analysis is too difficult, answer "don't know". */
4939 if (dump_file && (dump_flags & TDF_DETAILS))
4940 fprintf (stream: dump_file, format: "analyze_miv_subscript test failed: unimplemented.\n");
4941
4942 *overlaps_a = conflict_fn_not_known ();
4943 *overlaps_b = conflict_fn_not_known ();
4944 *last_conflicts = chrec_dont_know;
4945 dependence_stats.num_miv_unimplemented++;
4946 }
4947
4948 if (dump_file && (dump_flags & TDF_DETAILS))
4949 fprintf (stream: dump_file, format: ")\n");
4950}
4951
4952/* Determines the iterations for which CHREC_A is equal to CHREC_B in
4953 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4954 OVERLAP_ITERATIONS_B are initialized with two functions that
4955 describe the iterations that contain conflicting elements.
4956
4957 Remark: For an integer k >= 0, the following equality is true:
4958
4959 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4960*/
4961
4962static void
4963analyze_overlapping_iterations (tree chrec_a,
4964 tree chrec_b,
4965 conflict_function **overlap_iterations_a,
4966 conflict_function **overlap_iterations_b,
4967 tree *last_conflicts, class loop *loop_nest)
4968{
4969 unsigned int lnn = loop_nest->num;
4970
4971 dependence_stats.num_subscript_tests++;
4972
4973 if (dump_file && (dump_flags & TDF_DETAILS))
4974 {
4975 fprintf (stream: dump_file, format: "(analyze_overlapping_iterations \n");
4976 fprintf (stream: dump_file, format: " (chrec_a = ");
4977 print_generic_expr (dump_file, chrec_a);
4978 fprintf (stream: dump_file, format: ")\n (chrec_b = ");
4979 print_generic_expr (dump_file, chrec_b);
4980 fprintf (stream: dump_file, format: ")\n");
4981 }
4982
4983 if (chrec_a == NULL_TREE
4984 || chrec_b == NULL_TREE
4985 || chrec_contains_undetermined (chrec_a)
4986 || chrec_contains_undetermined (chrec_b))
4987 {
4988 dependence_stats.num_subscript_undetermined++;
4989
4990 *overlap_iterations_a = conflict_fn_not_known ();
4991 *overlap_iterations_b = conflict_fn_not_known ();
4992 }
4993
4994 /* If they are the same chrec, and are affine, they overlap
4995 on every iteration. */
4996 else if (eq_evolutions_p (chrec_a, chrec_b)
4997 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4998 || operand_equal_p (chrec_a, chrec_b, flags: 0)))
4999 {
5000 dependence_stats.num_same_subscript_function++;
5001 *overlap_iterations_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
5002 *overlap_iterations_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node));
5003 *last_conflicts = chrec_dont_know;
5004 }
5005
5006 /* If they aren't the same, and aren't affine, we can't do anything
5007 yet. */
5008 else if ((chrec_contains_symbols (chrec_a)
5009 || chrec_contains_symbols (chrec_b))
5010 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5011 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5012 {
5013 dependence_stats.num_subscript_undetermined++;
5014 *overlap_iterations_a = conflict_fn_not_known ();
5015 *overlap_iterations_b = conflict_fn_not_known ();
5016 }
5017
5018 else if (ziv_subscript_p (chrec_a, chrec_b))
5019 analyze_ziv_subscript (chrec_a, chrec_b,
5020 overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b,
5021 last_conflicts);
5022
5023 else if (siv_subscript_p (chrec_a, chrec_b))
5024 analyze_siv_subscript (chrec_a, chrec_b,
5025 overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b,
5026 last_conflicts, loop_nest_num: lnn);
5027
5028 else
5029 analyze_miv_subscript (chrec_a, chrec_b,
5030 overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b,
5031 last_conflicts, loop_nest);
5032
5033 if (dump_file && (dump_flags & TDF_DETAILS))
5034 {
5035 fprintf (stream: dump_file, format: " (overlap_iterations_a = ");
5036 dump_conflict_function (outf: dump_file, cf: *overlap_iterations_a);
5037 fprintf (stream: dump_file, format: ")\n (overlap_iterations_b = ");
5038 dump_conflict_function (outf: dump_file, cf: *overlap_iterations_b);
5039 fprintf (stream: dump_file, format: "))\n");
5040 }
5041}
5042
5043/* Helper function for uniquely inserting distance vectors. */
5044
5045static void
5046save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5047{
5048 for (lambda_vector v : DDR_DIST_VECTS (ddr))
5049 if (lambda_vector_equal (vec1: v, vec2: dist_v, DDR_NB_LOOPS (ddr)))
5050 return;
5051
5052 DDR_DIST_VECTS (ddr).safe_push (obj: dist_v);
5053}
5054
5055/* Helper function for uniquely inserting direction vectors. */
5056
5057static void
5058save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5059{
5060 for (lambda_vector v : DDR_DIR_VECTS (ddr))
5061 if (lambda_vector_equal (vec1: v, vec2: dir_v, DDR_NB_LOOPS (ddr)))
5062 return;
5063
5064 DDR_DIR_VECTS (ddr).safe_push (obj: dir_v);
5065}
5066
5067/* Add a distance of 1 on all the loops outer than INDEX. If we
5068 haven't yet determined a distance for this outer loop, push a new
5069 distance vector composed of the previous distance, and a distance
5070 of 1 for this outer loop. Example:
5071
5072 | loop_1
5073 | loop_2
5074 | A[10]
5075 | endloop_2
5076 | endloop_1
5077
5078 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5079 save (0, 1), then we have to save (1, 0). */
5080
5081static void
5082add_outer_distances (struct data_dependence_relation *ddr,
5083 lambda_vector dist_v, int index)
5084{
5085 /* For each outer loop where init_v is not set, the accesses are
5086 in dependence of distance 1 in the loop. */
5087 while (--index >= 0)
5088 {
5089 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5090 lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr));
5091 save_v[index] = 1;
5092 save_dist_v (ddr, dist_v: save_v);
5093 }
5094}
5095
5096/* Return false when fail to represent the data dependence as a
5097 distance vector. A_INDEX is the index of the first reference
5098 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5099 second reference. INIT_B is set to true when a component has been
5100 added to the distance vector DIST_V. INDEX_CARRY is then set to
5101 the index in DIST_V that carries the dependence. */
5102
5103static bool
5104build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5105 unsigned int a_index, unsigned int b_index,
5106 lambda_vector dist_v, bool *init_b,
5107 int *index_carry)
5108{
5109 unsigned i;
5110 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5111 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5112
5113 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5114 {
5115 tree access_fn_a, access_fn_b;
5116 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5117
5118 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5119 {
5120 non_affine_dependence_relation (ddr);
5121 return false;
5122 }
5123
5124 access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5125 access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5126
5127 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5128 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5129 {
5130 HOST_WIDE_INT dist;
5131 int index;
5132 int var_a = CHREC_VARIABLE (access_fn_a);
5133 int var_b = CHREC_VARIABLE (access_fn_b);
5134
5135 if (var_a != var_b
5136 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5137 {
5138 non_affine_dependence_relation (ddr);
5139 return false;
5140 }
5141
5142 /* When data references are collected in a loop while data
5143 dependences are analyzed in loop nest nested in the loop, we
5144 would have more number of access functions than number of
5145 loops. Skip access functions of loops not in the loop nest.
5146
5147 See PR89725 for more information. */
5148 if (flow_loop_nested_p (get_loop (cfun, num: var_a), loop))
5149 continue;
5150
5151 dist = int_cst_value (SUB_DISTANCE (subscript));
5152 index = index_in_loop_nest (var: var_a, DDR_LOOP_NEST (ddr));
5153 *index_carry = MIN (index, *index_carry);
5154
5155 /* This is the subscript coupling test. If we have already
5156 recorded a distance for this loop (a distance coming from
5157 another subscript), it should be the same. For example,
5158 in the following code, there is no dependence:
5159
5160 | loop i = 0, N, 1
5161 | T[i+1][i] = ...
5162 | ... = T[i][i]
5163 | endloop
5164 */
5165 if (init_v[index] != 0 && dist_v[index] != dist)
5166 {
5167 finalize_ddr_dependent (ddr, chrec_known);
5168 return false;
5169 }
5170
5171 dist_v[index] = dist;
5172 init_v[index] = 1;
5173 *init_b = true;
5174 }
5175 else if (!operand_equal_p (access_fn_a, access_fn_b, flags: 0))
5176 {
5177 /* This can be for example an affine vs. constant dependence
5178 (T[i] vs. T[3]) that is not an affine dependence and is
5179 not representable as a distance vector. */
5180 non_affine_dependence_relation (ddr);
5181 return false;
5182 }
5183 else
5184 *init_b = true;
5185 }
5186
5187 return true;
5188}
5189
5190/* Return true when the DDR contains only invariant access functions wrto. loop
5191 number LNUM. */
5192
5193static bool
5194invariant_access_functions (const struct data_dependence_relation *ddr,
5195 int lnum)
5196{
5197 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5198 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5199 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5200 return false;
5201
5202 return true;
5203}
5204
5205/* Helper function for the case where DDR_A and DDR_B are the same
5206 multivariate access function with a constant step. For an example
5207 see pr34635-1.c. */
5208
5209static void
5210add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5211{
5212 int x_1, x_2;
5213 tree c_1 = CHREC_LEFT (c_2);
5214 tree c_0 = CHREC_LEFT (c_1);
5215 lambda_vector dist_v;
5216 HOST_WIDE_INT v1, v2, cd;
5217
5218 /* Polynomials with more than 2 variables are not handled yet. When
5219 the evolution steps are parameters, it is not possible to
5220 represent the dependence using classical distance vectors. */
5221 if (TREE_CODE (c_0) != INTEGER_CST
5222 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5223 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5224 {
5225 DDR_AFFINE_P (ddr) = false;
5226 return;
5227 }
5228
5229 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5230 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5231
5232 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5233 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5234 v1 = int_cst_value (CHREC_RIGHT (c_1));
5235 v2 = int_cst_value (CHREC_RIGHT (c_2));
5236 cd = gcd (v1, v2);
5237 v1 /= cd;
5238 v2 /= cd;
5239
5240 if (v2 < 0)
5241 {
5242 v2 = -v2;
5243 v1 = -v1;
5244 }
5245
5246 dist_v[x_1] = v2;
5247 dist_v[x_2] = -v1;
5248 save_dist_v (ddr, dist_v);
5249
5250 add_outer_distances (ddr, dist_v, index: x_1);
5251}
5252
5253/* Helper function for the case where DDR_A and DDR_B are the same
5254 access functions. */
5255
5256static void
5257add_other_self_distances (struct data_dependence_relation *ddr)
5258{
5259 lambda_vector dist_v;
5260 unsigned i;
5261 int index_carry = DDR_NB_LOOPS (ddr);
5262 subscript *sub;
5263 class loop *loop = DDR_LOOP_NEST (ddr)[0];
5264
5265 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5266 {
5267 tree access_fun = SUB_ACCESS_FN (sub, 0);
5268
5269 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5270 {
5271 if (!evolution_function_is_univariate_p (access_fun, loop->num))
5272 {
5273 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5274 {
5275 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5276 return;
5277 }
5278
5279 access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5280
5281 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5282 add_multivariate_self_dist (ddr, c_2: access_fun);
5283 else
5284 /* The evolution step is not constant: it varies in
5285 the outer loop, so this cannot be represented by a
5286 distance vector. For example in pr34635.c the
5287 evolution is {0, +, {0, +, 4}_1}_2. */
5288 DDR_AFFINE_P (ddr) = false;
5289
5290 return;
5291 }
5292
5293 /* When data references are collected in a loop while data
5294 dependences are analyzed in loop nest nested in the loop, we
5295 would have more number of access functions than number of
5296 loops. Skip access functions of loops not in the loop nest.
5297
5298 See PR89725 for more information. */
5299 if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5300 loop))
5301 continue;
5302
5303 index_carry = MIN (index_carry,
5304 index_in_loop_nest (CHREC_VARIABLE (access_fun),
5305 DDR_LOOP_NEST (ddr)));
5306 }
5307 }
5308
5309 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5310 add_outer_distances (ddr, dist_v, index: index_carry);
5311}
5312
5313static void
5314insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5315{
5316 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5317
5318 dist_v[0] = 1;
5319 save_dist_v (ddr, dist_v);
5320}
5321
5322/* Adds a unit distance vector to DDR when there is a 0 overlap. This
5323 is the case for example when access functions are the same and
5324 equal to a constant, as in:
5325
5326 | loop_1
5327 | A[3] = ...
5328 | ... = A[3]
5329 | endloop_1
5330
5331 in which case the distance vectors are (0) and (1). */
5332
5333static void
5334add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5335{
5336 unsigned i, j;
5337
5338 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5339 {
5340 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5341 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5342 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5343
5344 for (j = 0; j < ca->n; j++)
5345 if (affine_function_zero_p (fn: ca->fns[j]))
5346 {
5347 insert_innermost_unit_dist_vector (ddr);
5348 return;
5349 }
5350
5351 for (j = 0; j < cb->n; j++)
5352 if (affine_function_zero_p (fn: cb->fns[j]))
5353 {
5354 insert_innermost_unit_dist_vector (ddr);
5355 return;
5356 }
5357 }
5358}
5359
5360/* Return true when the DDR contains two data references that have the
5361 same access functions. */
5362
5363static inline bool
5364same_access_functions (const struct data_dependence_relation *ddr)
5365{
5366 for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5367 if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5368 SUB_ACCESS_FN (sub, 1)))
5369 return false;
5370
5371 return true;
5372}
5373
5374/* Compute the classic per loop distance vector. DDR is the data
5375 dependence relation to build a vector from. Return false when fail
5376 to represent the data dependence as a distance vector. */
5377
5378static bool
5379build_classic_dist_vector (struct data_dependence_relation *ddr,
5380 class loop *loop_nest)
5381{
5382 bool init_b = false;
5383 int index_carry = DDR_NB_LOOPS (ddr);
5384 lambda_vector dist_v;
5385
5386 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5387 return false;
5388
5389 if (same_access_functions (ddr))
5390 {
5391 /* Save the 0 vector. */
5392 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5393 save_dist_v (ddr, dist_v);
5394
5395 if (invariant_access_functions (ddr, lnum: loop_nest->num))
5396 add_distance_for_zero_overlaps (ddr);
5397
5398 if (DDR_NB_LOOPS (ddr) > 1)
5399 add_other_self_distances (ddr);
5400
5401 return true;
5402 }
5403
5404 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5405 if (!build_classic_dist_vector_1 (ddr, a_index: 0, b_index: 1, dist_v, init_b: &init_b, index_carry: &index_carry))
5406 return false;
5407
5408 /* Save the distance vector if we initialized one. */
5409 if (init_b)
5410 {
5411 /* Verify a basic constraint: classic distance vectors should
5412 always be lexicographically positive.
5413
5414 Data references are collected in the order of execution of
5415 the program, thus for the following loop
5416
5417 | for (i = 1; i < 100; i++)
5418 | for (j = 1; j < 100; j++)
5419 | {
5420 | t = T[j+1][i-1]; // A
5421 | T[j][i] = t + 2; // B
5422 | }
5423
5424 references are collected following the direction of the wind:
5425 A then B. The data dependence tests are performed also
5426 following this order, such that we're looking at the distance
5427 separating the elements accessed by A from the elements later
5428 accessed by B. But in this example, the distance returned by
5429 test_dep (A, B) is lexicographically negative (-1, 1), that
5430 means that the access A occurs later than B with respect to
5431 the outer loop, ie. we're actually looking upwind. In this
5432 case we solve test_dep (B, A) looking downwind to the
5433 lexicographically positive solution, that returns the
5434 distance vector (1, -1). */
5435 if (!lambda_vector_lexico_pos (v: dist_v, DDR_NB_LOOPS (ddr)))
5436 {
5437 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5438 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5439 return false;
5440 compute_subscript_distance (ddr);
5441 if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: save_v, init_b: &init_b,
5442 index_carry: &index_carry))
5443 return false;
5444 save_dist_v (ddr, dist_v: save_v);
5445 DDR_REVERSED_P (ddr) = true;
5446
5447 /* In this case there is a dependence forward for all the
5448 outer loops:
5449
5450 | for (k = 1; k < 100; k++)
5451 | for (i = 1; i < 100; i++)
5452 | for (j = 1; j < 100; j++)
5453 | {
5454 | t = T[j+1][i-1]; // A
5455 | T[j][i] = t + 2; // B
5456 | }
5457
5458 the vectors are:
5459 (0, 1, -1)
5460 (1, 1, -1)
5461 (1, -1, 1)
5462 */
5463 if (DDR_NB_LOOPS (ddr) > 1)
5464 {
5465 add_outer_distances (ddr, dist_v: save_v, index: index_carry);
5466 add_outer_distances (ddr, dist_v, index: index_carry);
5467 }
5468 }
5469 else
5470 {
5471 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5472 lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr));
5473
5474 if (DDR_NB_LOOPS (ddr) > 1)
5475 {
5476 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5477
5478 if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5479 return false;
5480 compute_subscript_distance (ddr);
5481 if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: opposite_v, init_b: &init_b,
5482 index_carry: &index_carry))
5483 return false;
5484
5485 save_dist_v (ddr, dist_v: save_v);
5486 add_outer_distances (ddr, dist_v, index: index_carry);
5487 add_outer_distances (ddr, dist_v: opposite_v, index: index_carry);
5488 }
5489 else
5490 save_dist_v (ddr, dist_v: save_v);
5491 }
5492 }
5493 else
5494 {
5495 /* There is a distance of 1 on all the outer loops: Example:
5496 there is a dependence of distance 1 on loop_1 for the array A.
5497
5498 | loop_1
5499 | A[5] = ...
5500 | endloop
5501 */
5502 add_outer_distances (ddr, dist_v,
5503 index: lambda_vector_first_nz (vec1: dist_v,
5504 DDR_NB_LOOPS (ddr), start: 0));
5505 }
5506
5507 if (dump_file && (dump_flags & TDF_DETAILS))
5508 {
5509 unsigned i;
5510
5511 fprintf (stream: dump_file, format: "(build_classic_dist_vector\n");
5512 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5513 {
5514 fprintf (stream: dump_file, format: " dist_vector = (");
5515 print_lambda_vector (outfile: dump_file, DDR_DIST_VECT (ddr, i),
5516 DDR_NB_LOOPS (ddr));
5517 fprintf (stream: dump_file, format: " )\n");
5518 }
5519 fprintf (stream: dump_file, format: ")\n");
5520 }
5521
5522 return true;
5523}
5524
5525/* Return the direction for a given distance.
5526 FIXME: Computing dir this way is suboptimal, since dir can catch
5527 cases that dist is unable to represent. */
5528
5529static inline enum data_dependence_direction
5530dir_from_dist (int dist)
5531{
5532 if (dist > 0)
5533 return dir_positive;
5534 else if (dist < 0)
5535 return dir_negative;
5536 else
5537 return dir_equal;
5538}
5539
5540/* Compute the classic per loop direction vector. DDR is the data
5541 dependence relation to build a vector from. */
5542
5543static void
5544build_classic_dir_vector (struct data_dependence_relation *ddr)
5545{
5546 unsigned i, j;
5547 lambda_vector dist_v;
5548
5549 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5550 {
5551 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5552
5553 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5554 dir_v[j] = dir_from_dist (dist: dist_v[j]);
5555
5556 save_dir_v (ddr, dir_v);
5557 }
5558}
5559
5560/* Helper function. Returns true when there is a dependence between the
5561 data references. A_INDEX is the index of the first reference (0 for
5562 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5563
5564static bool
5565subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5566 unsigned int a_index, unsigned int b_index,
5567 class loop *loop_nest)
5568{
5569 unsigned int i;
5570 tree last_conflicts;
5571 struct subscript *subscript;
5572 tree res = NULL_TREE;
5573
5574 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (ix: i, ptr: &subscript); i++)
5575 {
5576 conflict_function *overlaps_a, *overlaps_b;
5577
5578 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5579 SUB_ACCESS_FN (subscript, b_index),
5580 overlap_iterations_a: &overlaps_a, overlap_iterations_b: &overlaps_b,
5581 last_conflicts: &last_conflicts, loop_nest);
5582
5583 if (SUB_CONFLICTS_IN_A (subscript))
5584 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5585 if (SUB_CONFLICTS_IN_B (subscript))
5586 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5587
5588 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5589 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5590 SUB_LAST_CONFLICT (subscript) = last_conflicts;
5591
5592 /* If there is any undetermined conflict function we have to
5593 give a conservative answer in case we cannot prove that
5594 no dependence exists when analyzing another subscript. */
5595 if (CF_NOT_KNOWN_P (overlaps_a)
5596 || CF_NOT_KNOWN_P (overlaps_b))
5597 {
5598 res = chrec_dont_know;
5599 continue;
5600 }
5601
5602 /* When there is a subscript with no dependence we can stop. */
5603 else if (CF_NO_DEPENDENCE_P (overlaps_a)
5604 || CF_NO_DEPENDENCE_P (overlaps_b))
5605 {
5606 res = chrec_known;
5607 break;
5608 }
5609 }
5610
5611 if (res == NULL_TREE)
5612 return true;
5613
5614 if (res == chrec_known)
5615 dependence_stats.num_dependence_independent++;
5616 else
5617 dependence_stats.num_dependence_undetermined++;
5618 finalize_ddr_dependent (ddr, chrec: res);
5619 return false;
5620}
5621
5622/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5623
5624static void
5625subscript_dependence_tester (struct data_dependence_relation *ddr,
5626 class loop *loop_nest)
5627{
5628 if (subscript_dependence_tester_1 (ddr, a_index: 0, b_index: 1, loop_nest))
5629 dependence_stats.num_dependence_dependent++;
5630
5631 compute_subscript_distance (ddr);
5632 if (build_classic_dist_vector (ddr, loop_nest))
5633 build_classic_dir_vector (ddr);
5634}
5635
5636/* Returns true when all the access functions of A are affine or
5637 constant with respect to LOOP_NEST. */
5638
5639static bool
5640access_functions_are_affine_or_constant_p (const struct data_reference *a,
5641 const class loop *loop_nest)
5642{
5643 vec<tree> fns = DR_ACCESS_FNS (a);
5644 for (tree t : fns)
5645 if (!evolution_function_is_invariant_p (t, loop_nest->num)
5646 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5647 return false;
5648
5649 return true;
5650}
5651
5652/* This computes the affine dependence relation between A and B with
5653 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5654 independence between two accesses, while CHREC_DONT_KNOW is used
5655 for representing the unknown relation.
5656
5657 Note that it is possible to stop the computation of the dependence
5658 relation the first time we detect a CHREC_KNOWN element for a given
5659 subscript. */
5660
5661void
5662compute_affine_dependence (struct data_dependence_relation *ddr,
5663 class loop *loop_nest)
5664{
5665 struct data_reference *dra = DDR_A (ddr);
5666 struct data_reference *drb = DDR_B (ddr);
5667
5668 if (dump_file && (dump_flags & TDF_DETAILS))
5669 {
5670 fprintf (stream: dump_file, format: "(compute_affine_dependence\n");
5671 fprintf (stream: dump_file, format: " ref_a: ");
5672 print_generic_expr (dump_file, DR_REF (dra));
5673 fprintf (stream: dump_file, format: ", stmt_a: ");
5674 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5675 fprintf (stream: dump_file, format: " ref_b: ");
5676 print_generic_expr (dump_file, DR_REF (drb));
5677 fprintf (stream: dump_file, format: ", stmt_b: ");
5678 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5679 }
5680
5681 /* Analyze only when the dependence relation is not yet known. */
5682 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5683 {
5684 dependence_stats.num_dependence_tests++;
5685
5686 if (access_functions_are_affine_or_constant_p (a: dra, loop_nest)
5687 && access_functions_are_affine_or_constant_p (a: drb, loop_nest))
5688 subscript_dependence_tester (ddr, loop_nest);
5689
5690 /* As a last case, if the dependence cannot be determined, or if
5691 the dependence is considered too difficult to determine, answer
5692 "don't know". */
5693 else
5694 {
5695 dependence_stats.num_dependence_undetermined++;
5696
5697 if (dump_file && (dump_flags & TDF_DETAILS))
5698 {
5699 fprintf (stream: dump_file, format: "Data ref a:\n");
5700 dump_data_reference (outf: dump_file, dr: dra);
5701 fprintf (stream: dump_file, format: "Data ref b:\n");
5702 dump_data_reference (outf: dump_file, dr: drb);
5703 fprintf (stream: dump_file, format: "affine dependence test not usable: access function not affine or constant.\n");
5704 }
5705 finalize_ddr_dependent (ddr, chrec_dont_know);
5706 }
5707 }
5708
5709 if (dump_file && (dump_flags & TDF_DETAILS))
5710 {
5711 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5712 fprintf (stream: dump_file, format: ") -> no dependence\n");
5713 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5714 fprintf (stream: dump_file, format: ") -> dependence analysis failed\n");
5715 else
5716 fprintf (stream: dump_file, format: ")\n");
5717 }
5718}
5719
5720/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5721 the data references in DATAREFS, in the LOOP_NEST. When
5722 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5723 relations. Return true when successful, i.e. data references number
5724 is small enough to be handled. */
5725
5726bool
5727compute_all_dependences (const vec<data_reference_p> &datarefs,
5728 vec<ddr_p> *dependence_relations,
5729 const vec<loop_p> &loop_nest,
5730 bool compute_self_and_rr)
5731{
5732 struct data_dependence_relation *ddr;
5733 struct data_reference *a, *b;
5734 unsigned int i, j;
5735
5736 if ((int) datarefs.length ()
5737 > param_loop_max_datarefs_for_datadeps)
5738 {
5739 struct data_dependence_relation *ddr;
5740
5741 /* Insert a single relation into dependence_relations:
5742 chrec_dont_know. */
5743 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5744 dependence_relations->safe_push (obj: ddr);
5745 return false;
5746 }
5747
5748 FOR_EACH_VEC_ELT (datarefs, i, a)
5749 for (j = i + 1; datarefs.iterate (ix: j, ptr: &b); j++)
5750 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5751 {
5752 ddr = initialize_data_dependence_relation (a, b, loop_nest);
5753 dependence_relations->safe_push (obj: ddr);
5754 if (loop_nest.exists ())
5755 compute_affine_dependence (ddr, loop_nest: loop_nest[0]);
5756 }
5757
5758 if (compute_self_and_rr)
5759 FOR_EACH_VEC_ELT (datarefs, i, a)
5760 {
5761 ddr = initialize_data_dependence_relation (a, b: a, loop_nest);
5762 dependence_relations->safe_push (obj: ddr);
5763 if (loop_nest.exists ())
5764 compute_affine_dependence (ddr, loop_nest: loop_nest[0]);
5765 }
5766
5767 return true;
5768}
5769
5770/* Describes a location of a memory reference. */
5771
5772struct data_ref_loc
5773{
5774 /* The memory reference. */
5775 tree ref;
5776
5777 /* True if the memory reference is read. */
5778 bool is_read;
5779
5780 /* True if the data reference is conditional within the containing
5781 statement, i.e. if it might not occur even when the statement
5782 is executed and runs to completion. */
5783 bool is_conditional_in_stmt;
5784};
5785
5786
5787/* Stores the locations of memory references in STMT to REFERENCES. Returns
5788 true if STMT clobbers memory, false otherwise. */
5789
5790static bool
5791get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5792{
5793 bool clobbers_memory = false;
5794 data_ref_loc ref;
5795 tree op0, op1;
5796 enum gimple_code stmt_code = gimple_code (g: stmt);
5797
5798 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5799 As we cannot model data-references to not spelled out
5800 accesses give up if they may occur. */
5801 if (stmt_code == GIMPLE_CALL
5802 && !(gimple_call_flags (stmt) & ECF_CONST))
5803 {
5804 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5805 if (gimple_call_internal_p (gs: stmt))
5806 switch (gimple_call_internal_fn (gs: stmt))
5807 {
5808 case IFN_GOMP_SIMD_LANE:
5809 {
5810 class loop *loop = gimple_bb (g: stmt)->loop_father;
5811 tree uid = gimple_call_arg (gs: stmt, index: 0);
5812 gcc_assert (TREE_CODE (uid) == SSA_NAME);
5813 if (loop == NULL
5814 || loop->simduid != SSA_NAME_VAR (uid))
5815 clobbers_memory = true;
5816 break;
5817 }
5818 case IFN_MASK_LOAD:
5819 case IFN_MASK_STORE:
5820 break;
5821 case IFN_MASK_CALL:
5822 {
5823 tree orig_fndecl
5824 = gimple_call_addr_fndecl (fn: gimple_call_arg (gs: stmt, index: 0));
5825 if (!orig_fndecl
5826 || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5827 clobbers_memory = true;
5828 }
5829 break;
5830 default:
5831 clobbers_memory = true;
5832 break;
5833 }
5834 else
5835 clobbers_memory = true;
5836 }
5837 else if (stmt_code == GIMPLE_ASM
5838 && (gimple_asm_volatile_p (asm_stmt: as_a <gasm *> (p: stmt))
5839 || gimple_vuse (g: stmt)))
5840 clobbers_memory = true;
5841
5842 if (!gimple_vuse (g: stmt))
5843 return clobbers_memory;
5844
5845 if (stmt_code == GIMPLE_ASSIGN)
5846 {
5847 tree base;
5848 op0 = gimple_assign_lhs (gs: stmt);
5849 op1 = gimple_assign_rhs1 (gs: stmt);
5850
5851 if (DECL_P (op1)
5852 || (REFERENCE_CLASS_P (op1)
5853 && (base = get_base_address (t: op1))
5854 && TREE_CODE (base) != SSA_NAME
5855 && !is_gimple_min_invariant (base)))
5856 {
5857 ref.ref = op1;
5858 ref.is_read = true;
5859 ref.is_conditional_in_stmt = false;
5860 references->safe_push (obj: ref);
5861 }
5862 }
5863 else if (stmt_code == GIMPLE_CALL)
5864 {
5865 unsigned i = 0, n;
5866 tree ptr, type;
5867 unsigned int align;
5868
5869 ref.is_read = false;
5870 if (gimple_call_internal_p (gs: stmt))
5871 switch (gimple_call_internal_fn (gs: stmt))
5872 {
5873 case IFN_MASK_LOAD:
5874 if (gimple_call_lhs (gs: stmt) == NULL_TREE)
5875 break;
5876 ref.is_read = true;
5877 /* FALLTHRU */
5878 case IFN_MASK_STORE:
5879 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5880 align = tree_to_shwi (gimple_call_arg (gs: stmt, index: 1));
5881 if (ref.is_read)
5882 type = TREE_TYPE (gimple_call_lhs (stmt));
5883 else
5884 type = TREE_TYPE (gimple_call_arg (stmt, 3));
5885 if (TYPE_ALIGN (type) != align)
5886 type = build_aligned_type (type, align);
5887 ref.is_conditional_in_stmt = true;
5888 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5889 ptr);
5890 references->safe_push (obj: ref);
5891 return false;
5892 case IFN_MASK_CALL:
5893 i = 1;
5894 gcc_fallthrough ();
5895 default:
5896 break;
5897 }
5898
5899 op0 = gimple_call_lhs (gs: stmt);
5900 n = gimple_call_num_args (gs: stmt);
5901 for (; i < n; i++)
5902 {
5903 op1 = gimple_call_arg (gs: stmt, index: i);
5904
5905 if (DECL_P (op1)
5906 || (REFERENCE_CLASS_P (op1) && get_base_address (t: op1)))
5907 {
5908 ref.ref = op1;
5909 ref.is_read = true;
5910 ref.is_conditional_in_stmt = false;
5911 references->safe_push (obj: ref);
5912 }
5913 }
5914 }
5915 else
5916 return clobbers_memory;
5917
5918 if (op0
5919 && (DECL_P (op0)
5920 || (REFERENCE_CLASS_P (op0) && get_base_address (t: op0))))
5921 {
5922 ref.ref = op0;
5923 ref.is_read = false;
5924 ref.is_conditional_in_stmt = false;
5925 references->safe_push (obj: ref);
5926 }
5927 return clobbers_memory;
5928}
5929
5930
5931/* Returns true if the loop-nest has any data reference. */
5932
5933bool
5934loop_nest_has_data_refs (loop_p loop)
5935{
5936 basic_block *bbs = get_loop_body (loop);
5937 auto_vec<data_ref_loc, 3> references;
5938
5939 for (unsigned i = 0; i < loop->num_nodes; i++)
5940 {
5941 basic_block bb = bbs[i];
5942 gimple_stmt_iterator bsi;
5943
5944 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
5945 {
5946 gimple *stmt = gsi_stmt (i: bsi);
5947 get_references_in_stmt (stmt, references: &references);
5948 if (references.length ())
5949 {
5950 free (ptr: bbs);
5951 return true;
5952 }
5953 }
5954 }
5955 free (ptr: bbs);
5956 return false;
5957}
5958
5959/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
5960 reference, returns false, otherwise returns true. NEST is the outermost
5961 loop of the loop nest in which the references should be analyzed. */
5962
5963opt_result
5964find_data_references_in_stmt (class loop *nest, gimple *stmt,
5965 vec<data_reference_p> *datarefs)
5966{
5967 auto_vec<data_ref_loc, 2> references;
5968 data_reference_p dr;
5969
5970 if (get_references_in_stmt (stmt, references: &references))
5971 return opt_result::failure_at (loc: stmt, fmt: "statement clobbers memory: %G",
5972 stmt);
5973
5974 for (const data_ref_loc &ref : references)
5975 {
5976 dr = create_data_ref (nest: nest ? loop_preheader_edge (nest) : NULL,
5977 loop: loop_containing_stmt (stmt), memref: ref.ref,
5978 stmt, is_read: ref.is_read, is_conditional_in_stmt: ref.is_conditional_in_stmt);
5979 gcc_assert (dr != NULL);
5980 datarefs->safe_push (obj: dr);
5981 }
5982
5983 return opt_result::success ();
5984}
5985
5986/* Stores the data references in STMT to DATAREFS. If there is an
5987 unanalyzable reference, returns false, otherwise returns true.
5988 NEST is the outermost loop of the loop nest in which the references
5989 should be instantiated, LOOP is the loop in which the references
5990 should be analyzed. */
5991
5992bool
5993graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5994 vec<data_reference_p> *datarefs)
5995{
5996 auto_vec<data_ref_loc, 2> references;
5997 bool ret = true;
5998 data_reference_p dr;
5999
6000 if (get_references_in_stmt (stmt, references: &references))
6001 return false;
6002
6003 for (const data_ref_loc &ref : references)
6004 {
6005 dr = create_data_ref (nest, loop, memref: ref.ref, stmt, is_read: ref.is_read,
6006 is_conditional_in_stmt: ref.is_conditional_in_stmt);
6007 gcc_assert (dr != NULL);
6008 datarefs->safe_push (obj: dr);
6009 }
6010
6011 return ret;
6012}
6013
6014/* Search the data references in LOOP, and record the information into
6015 DATAREFS. Returns chrec_dont_know when failing to analyze a
6016 difficult case, returns NULL_TREE otherwise. */
6017
6018tree
6019find_data_references_in_bb (class loop *loop, basic_block bb,
6020 vec<data_reference_p> *datarefs)
6021{
6022 gimple_stmt_iterator bsi;
6023
6024 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
6025 {
6026 gimple *stmt = gsi_stmt (i: bsi);
6027
6028 if (!find_data_references_in_stmt (nest: loop, stmt, datarefs))
6029 {
6030 struct data_reference *res;
6031 res = XCNEW (struct data_reference);
6032 datarefs->safe_push (obj: res);
6033
6034 return chrec_dont_know;
6035 }
6036 }
6037
6038 return NULL_TREE;
6039}
6040
6041/* Search the data references in LOOP, and record the information into
6042 DATAREFS. Returns chrec_dont_know when failing to analyze a
6043 difficult case, returns NULL_TREE otherwise.
6044
6045 TODO: This function should be made smarter so that it can handle address
6046 arithmetic as if they were array accesses, etc. */
6047
6048tree
6049find_data_references_in_loop (class loop *loop,
6050 vec<data_reference_p> *datarefs)
6051{
6052 basic_block bb, *bbs;
6053 unsigned int i;
6054
6055 bbs = get_loop_body_in_dom_order (loop);
6056
6057 for (i = 0; i < loop->num_nodes; i++)
6058 {
6059 bb = bbs[i];
6060
6061 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6062 {
6063 free (ptr: bbs);
6064 return chrec_dont_know;
6065 }
6066 }
6067 free (ptr: bbs);
6068
6069 return NULL_TREE;
6070}
6071
6072/* Return the alignment in bytes that DRB is guaranteed to have at all
6073 times. */
6074
6075unsigned int
6076dr_alignment (innermost_loop_behavior *drb)
6077{
6078 /* Get the alignment of BASE_ADDRESS + INIT. */
6079 unsigned int alignment = drb->base_alignment;
6080 unsigned int misalignment = (drb->base_misalignment
6081 + TREE_INT_CST_LOW (drb->init));
6082 if (misalignment != 0)
6083 alignment = MIN (alignment, misalignment & -misalignment);
6084
6085 /* Cap it to the alignment of OFFSET. */
6086 if (!integer_zerop (drb->offset))
6087 alignment = MIN (alignment, drb->offset_alignment);
6088
6089 /* Cap it to the alignment of STEP. */
6090 if (!integer_zerop (drb->step))
6091 alignment = MIN (alignment, drb->step_alignment);
6092
6093 return alignment;
6094}
6095
6096/* If BASE is a pointer-typed SSA name, try to find the object that it
6097 is based on. Return this object X on success and store the alignment
6098 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6099
6100static tree
6101get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6102{
6103 if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6104 return NULL_TREE;
6105
6106 gimple *def = SSA_NAME_DEF_STMT (base);
6107 base = analyze_scalar_evolution (loop_containing_stmt (stmt: def), base);
6108
6109 /* Peel chrecs and record the minimum alignment preserved by
6110 all steps. */
6111 unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6112 while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6113 {
6114 unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6115 alignment = MIN (alignment, step_alignment);
6116 base = CHREC_LEFT (base);
6117 }
6118
6119 /* Punt if the expression is too complicated to handle. */
6120 if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6121 return NULL_TREE;
6122
6123 /* The only useful cases are those for which a dereference folds to something
6124 other than an INDIRECT_REF. */
6125 tree ref_type = TREE_TYPE (TREE_TYPE (base));
6126 tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6127 if (!ref)
6128 return NULL_TREE;
6129
6130 /* Analyze the base to which the steps we peeled were applied. */
6131 poly_int64 bitsize, bitpos, bytepos;
6132 machine_mode mode;
6133 int unsignedp, reversep, volatilep;
6134 tree offset;
6135 base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6136 &unsignedp, &reversep, &volatilep);
6137 if (!base || !multiple_p (a: bitpos, BITS_PER_UNIT, multiple: &bytepos))
6138 return NULL_TREE;
6139
6140 /* Restrict the alignment to that guaranteed by the offsets. */
6141 unsigned int bytepos_alignment = known_alignment (a: bytepos);
6142 if (bytepos_alignment != 0)
6143 alignment = MIN (alignment, bytepos_alignment);
6144 if (offset)
6145 {
6146 unsigned int offset_alignment = highest_pow2_factor (offset);
6147 alignment = MIN (alignment, offset_alignment);
6148 }
6149
6150 *alignment_out = alignment;
6151 return base;
6152}
6153
6154/* Return the object whose alignment would need to be changed in order
6155 to increase the alignment of ADDR. Store the maximum achievable
6156 alignment in *MAX_ALIGNMENT. */
6157
6158tree
6159get_base_for_alignment (tree addr, unsigned int *max_alignment)
6160{
6161 tree base = get_base_for_alignment_1 (base: addr, alignment_out: max_alignment);
6162 if (base)
6163 return base;
6164
6165 if (TREE_CODE (addr) == ADDR_EXPR)
6166 addr = TREE_OPERAND (addr, 0);
6167 *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6168 return addr;
6169}
6170
6171/* Recursive helper function. */
6172
6173static bool
6174find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6175{
6176 /* Inner loops of the nest should not contain siblings. Example:
6177 when there are two consecutive loops,
6178
6179 | loop_0
6180 | loop_1
6181 | A[{0, +, 1}_1]
6182 | endloop_1
6183 | loop_2
6184 | A[{0, +, 1}_2]
6185 | endloop_2
6186 | endloop_0
6187
6188 the dependence relation cannot be captured by the distance
6189 abstraction. */
6190 if (loop->next)
6191 return false;
6192
6193 loop_nest->safe_push (obj: loop);
6194 if (loop->inner)
6195 return find_loop_nest_1 (loop: loop->inner, loop_nest);
6196 return true;
6197}
6198
6199/* Return false when the LOOP is not well nested. Otherwise return
6200 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6201 contain the loops from the outermost to the innermost, as they will
6202 appear in the classic distance vector. */
6203
6204bool
6205find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6206{
6207 loop_nest->safe_push (obj: loop);
6208 if (loop->inner)
6209 return find_loop_nest_1 (loop: loop->inner, loop_nest);
6210 return true;
6211}
6212
6213/* Returns true when the data dependences have been computed, false otherwise.
6214 Given a loop nest LOOP, the following vectors are returned:
6215 DATAREFS is initialized to all the array elements contained in this loop,
6216 DEPENDENCE_RELATIONS contains the relations between the data references.
6217 Compute read-read and self relations if
6218 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6219
6220bool
6221compute_data_dependences_for_loop (class loop *loop,
6222 bool compute_self_and_read_read_dependences,
6223 vec<loop_p> *loop_nest,
6224 vec<data_reference_p> *datarefs,
6225 vec<ddr_p> *dependence_relations)
6226{
6227 bool res = true;
6228
6229 memset (s: &dependence_stats, c: 0, n: sizeof (dependence_stats));
6230
6231 /* If the loop nest is not well formed, or one of the data references
6232 is not computable, give up without spending time to compute other
6233 dependences. */
6234 if (!loop
6235 || !find_loop_nest (loop, loop_nest)
6236 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6237 || !compute_all_dependences (datarefs: *datarefs, dependence_relations, loop_nest: *loop_nest,
6238 compute_self_and_rr: compute_self_and_read_read_dependences))
6239 res = false;
6240
6241 if (dump_file && (dump_flags & TDF_STATS))
6242 {
6243 fprintf (stream: dump_file, format: "Dependence tester statistics:\n");
6244
6245 fprintf (stream: dump_file, format: "Number of dependence tests: %d\n",
6246 dependence_stats.num_dependence_tests);
6247 fprintf (stream: dump_file, format: "Number of dependence tests classified dependent: %d\n",
6248 dependence_stats.num_dependence_dependent);
6249 fprintf (stream: dump_file, format: "Number of dependence tests classified independent: %d\n",
6250 dependence_stats.num_dependence_independent);
6251 fprintf (stream: dump_file, format: "Number of undetermined dependence tests: %d\n",
6252 dependence_stats.num_dependence_undetermined);
6253
6254 fprintf (stream: dump_file, format: "Number of subscript tests: %d\n",
6255 dependence_stats.num_subscript_tests);
6256 fprintf (stream: dump_file, format: "Number of undetermined subscript tests: %d\n",
6257 dependence_stats.num_subscript_undetermined);
6258 fprintf (stream: dump_file, format: "Number of same subscript function: %d\n",
6259 dependence_stats.num_same_subscript_function);
6260
6261 fprintf (stream: dump_file, format: "Number of ziv tests: %d\n",
6262 dependence_stats.num_ziv);
6263 fprintf (stream: dump_file, format: "Number of ziv tests returning dependent: %d\n",
6264 dependence_stats.num_ziv_dependent);
6265 fprintf (stream: dump_file, format: "Number of ziv tests returning independent: %d\n",
6266 dependence_stats.num_ziv_independent);
6267 fprintf (stream: dump_file, format: "Number of ziv tests unimplemented: %d\n",
6268 dependence_stats.num_ziv_unimplemented);
6269
6270 fprintf (stream: dump_file, format: "Number of siv tests: %d\n",
6271 dependence_stats.num_siv);
6272 fprintf (stream: dump_file, format: "Number of siv tests returning dependent: %d\n",
6273 dependence_stats.num_siv_dependent);
6274 fprintf (stream: dump_file, format: "Number of siv tests returning independent: %d\n",
6275 dependence_stats.num_siv_independent);
6276 fprintf (stream: dump_file, format: "Number of siv tests unimplemented: %d\n",
6277 dependence_stats.num_siv_unimplemented);
6278
6279 fprintf (stream: dump_file, format: "Number of miv tests: %d\n",
6280 dependence_stats.num_miv);
6281 fprintf (stream: dump_file, format: "Number of miv tests returning dependent: %d\n",
6282 dependence_stats.num_miv_dependent);
6283 fprintf (stream: dump_file, format: "Number of miv tests returning independent: %d\n",
6284 dependence_stats.num_miv_independent);
6285 fprintf (stream: dump_file, format: "Number of miv tests unimplemented: %d\n",
6286 dependence_stats.num_miv_unimplemented);
6287 }
6288
6289 return res;
6290}
6291
6292/* Free the memory used by a data dependence relation DDR. */
6293
6294void
6295free_dependence_relation (struct data_dependence_relation *ddr)
6296{
6297 if (ddr == NULL)
6298 return;
6299
6300 if (DDR_SUBSCRIPTS (ddr).exists ())
6301 free_subscripts (DDR_SUBSCRIPTS (ddr));
6302 DDR_DIST_VECTS (ddr).release ();
6303 DDR_DIR_VECTS (ddr).release ();
6304
6305 free (ptr: ddr);
6306}
6307
6308/* Free the memory used by the data dependence relations from
6309 DEPENDENCE_RELATIONS. */
6310
6311void
6312free_dependence_relations (vec<ddr_p>& dependence_relations)
6313{
6314 for (data_dependence_relation *ddr : dependence_relations)
6315 if (ddr)
6316 free_dependence_relation (ddr);
6317
6318 dependence_relations.release ();
6319}
6320
6321/* Free the memory used by the data references from DATAREFS. */
6322
6323void
6324free_data_refs (vec<data_reference_p>& datarefs)
6325{
6326 for (data_reference *dr : datarefs)
6327 free_data_ref (dr);
6328 datarefs.release ();
6329}
6330
6331/* Common routine implementing both dr_direction_indicator and
6332 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6333 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6334 Return the step as the indicator otherwise. */
6335
6336static tree
6337dr_step_indicator (struct data_reference *dr, int useful_min)
6338{
6339 tree step = DR_STEP (dr);
6340 if (!step)
6341 return NULL_TREE;
6342 STRIP_NOPS (step);
6343 /* Look for cases where the step is scaled by a positive constant
6344 integer, which will often be the access size. If the multiplication
6345 doesn't change the sign (due to overflow effects) then we can
6346 test the unscaled value instead. */
6347 if (TREE_CODE (step) == MULT_EXPR
6348 && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6349 && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6350 {
6351 tree factor = TREE_OPERAND (step, 1);
6352 step = TREE_OPERAND (step, 0);
6353
6354 /* Strip widening and truncating conversions as well as nops. */
6355 if (CONVERT_EXPR_P (step)
6356 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6357 step = TREE_OPERAND (step, 0);
6358 tree type = TREE_TYPE (step);
6359
6360 /* Get the range of step values that would not cause overflow. */
6361 widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6362 / wi::to_widest (t: factor));
6363 widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6364 / wi::to_widest (t: factor));
6365
6366 /* Get the range of values that the unconverted step actually has. */
6367 wide_int step_min, step_max;
6368 value_range vr;
6369 if (TREE_CODE (step) != SSA_NAME
6370 || !get_range_query (cfun)->range_of_expr (r&: vr, expr: step)
6371 || vr.undefined_p ())
6372 {
6373 step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6374 step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6375 }
6376 else
6377 {
6378 step_min = vr.lower_bound ();
6379 step_max = vr.upper_bound ();
6380 }
6381
6382 /* Check whether the unconverted step has an acceptable range. */
6383 signop sgn = TYPE_SIGN (type);
6384 if (wi::les_p (x: minv, y: widest_int::from (x: step_min, sgn))
6385 && wi::ges_p (x: maxv, y: widest_int::from (x: step_max, sgn)))
6386 {
6387 if (wi::ge_p (x: step_min, y: useful_min, sgn))
6388 return ssize_int (useful_min);
6389 else if (wi::lt_p (x: step_max, y: 0, sgn))
6390 return ssize_int (-1);
6391 else
6392 return fold_convert (ssizetype, step);
6393 }
6394 }
6395 return DR_STEP (dr);
6396}
6397
6398/* Return a value that is negative iff DR has a negative step. */
6399
6400tree
6401dr_direction_indicator (struct data_reference *dr)
6402{
6403 return dr_step_indicator (dr, useful_min: 0);
6404}
6405
6406/* Return a value that is zero iff DR has a zero step. */
6407
6408tree
6409dr_zero_step_indicator (struct data_reference *dr)
6410{
6411 return dr_step_indicator (dr, useful_min: 1);
6412}
6413
6414/* Return true if DR is known to have a nonnegative (but possibly zero)
6415 step. */
6416
6417bool
6418dr_known_forward_stride_p (struct data_reference *dr)
6419{
6420 tree indicator = dr_direction_indicator (dr);
6421 tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6422 fold_convert (ssizetype, indicator),
6423 ssize_int (0));
6424 return neg_step_val && integer_zerop (neg_step_val);
6425}
6426

source code of gcc/tree-data-ref.cc