1 | /* Data references and dependences detectors. |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | Contributed by Sebastian Pop <pop@cri.ensmp.fr> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* This pass walks a given loop structure searching for array |
22 | references. The information about the array accesses is recorded |
23 | in DATA_REFERENCE structures. |
24 | |
25 | The basic test for determining the dependences is: |
26 | given two access functions chrec1 and chrec2 to a same array, and |
27 | x and y two vectors from the iteration domain, the same element of |
28 | the array is accessed twice at iterations x and y if and only if: |
29 | | chrec1 (x) == chrec2 (y). |
30 | |
31 | The goals of this analysis are: |
32 | |
33 | - to determine the independence: the relation between two |
34 | independent accesses is qualified with the chrec_known (this |
35 | information allows a loop parallelization), |
36 | |
37 | - when two data references access the same data, to qualify the |
38 | dependence relation with classic dependence representations: |
39 | |
40 | - distance vectors |
41 | - direction vectors |
42 | - loop carried level dependence |
43 | - polyhedron dependence |
44 | or with the chains of recurrences based representation, |
45 | |
46 | - to define a knowledge base for storing the data dependence |
47 | information, |
48 | |
49 | - to define an interface to access this data. |
50 | |
51 | |
52 | Definitions: |
53 | |
54 | - subscript: given two array accesses a subscript is the tuple |
55 | composed of the access functions for a given dimension. Example: |
56 | Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts: |
57 | (f1, g1), (f2, g2), (f3, g3). |
58 | |
59 | - Diophantine equation: an equation whose coefficients and |
60 | solutions are integer constants, for example the equation |
61 | | 3*x + 2*y = 1 |
62 | has an integer solution x = 1 and y = -1. |
63 | |
64 | References: |
65 | |
66 | - "Advanced Compilation for High Performance Computing" by Randy |
67 | Allen and Ken Kennedy. |
68 | http://citeseer.ist.psu.edu/goff91practical.html |
69 | |
70 | - "Loop Transformations for Restructuring Compilers - The Foundations" |
71 | by Utpal Banerjee. |
72 | |
73 | |
74 | */ |
75 | |
76 | #include "config.h" |
77 | #include "system.h" |
78 | #include "coretypes.h" |
79 | #include "backend.h" |
80 | #include "rtl.h" |
81 | #include "tree.h" |
82 | #include "gimple.h" |
83 | #include "gimple-pretty-print.h" |
84 | #include "alias.h" |
85 | #include "fold-const.h" |
86 | #include "expr.h" |
87 | #include "gimple-iterator.h" |
88 | #include "tree-ssa-loop-niter.h" |
89 | #include "tree-ssa-loop.h" |
90 | #include "tree-ssa.h" |
91 | #include "cfgloop.h" |
92 | #include "tree-data-ref.h" |
93 | #include "tree-scalar-evolution.h" |
94 | #include "dumpfile.h" |
95 | #include "tree-affine.h" |
96 | #include "builtins.h" |
97 | #include "tree-eh.h" |
98 | #include "ssa.h" |
99 | #include "internal-fn.h" |
100 | #include "vr-values.h" |
101 | #include "range-op.h" |
102 | #include "tree-ssa-loop-ivopts.h" |
103 | #include "calls.h" |
104 | |
105 | static struct datadep_stats |
106 | { |
107 | int num_dependence_tests; |
108 | int num_dependence_dependent; |
109 | int num_dependence_independent; |
110 | int num_dependence_undetermined; |
111 | |
112 | int num_subscript_tests; |
113 | int num_subscript_undetermined; |
114 | int num_same_subscript_function; |
115 | |
116 | int num_ziv; |
117 | int num_ziv_independent; |
118 | int num_ziv_dependent; |
119 | int num_ziv_unimplemented; |
120 | |
121 | int num_siv; |
122 | int num_siv_independent; |
123 | int num_siv_dependent; |
124 | int num_siv_unimplemented; |
125 | |
126 | int num_miv; |
127 | int num_miv_independent; |
128 | int num_miv_dependent; |
129 | int num_miv_unimplemented; |
130 | } dependence_stats; |
131 | |
132 | static bool subscript_dependence_tester_1 (struct data_dependence_relation *, |
133 | unsigned int, unsigned int, |
134 | class loop *); |
135 | /* Returns true iff A divides B. */ |
136 | |
137 | static inline bool |
138 | tree_fold_divides_p (const_tree a, const_tree b) |
139 | { |
140 | gcc_assert (TREE_CODE (a) == INTEGER_CST); |
141 | gcc_assert (TREE_CODE (b) == INTEGER_CST); |
142 | return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a)); |
143 | } |
144 | |
145 | /* Returns true iff A divides B. */ |
146 | |
147 | static inline bool |
148 | int_divides_p (lambda_int a, lambda_int b) |
149 | { |
150 | return ((b % a) == 0); |
151 | } |
152 | |
153 | /* Return true if reference REF contains a union access. */ |
154 | |
155 | static bool |
156 | ref_contains_union_access_p (tree ref) |
157 | { |
158 | while (handled_component_p (t: ref)) |
159 | { |
160 | ref = TREE_OPERAND (ref, 0); |
161 | if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE |
162 | || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE) |
163 | return true; |
164 | } |
165 | return false; |
166 | } |
167 | |
168 | |
169 | |
170 | /* Dump into FILE all the data references from DATAREFS. */ |
171 | |
172 | static void |
173 | dump_data_references (FILE *file, vec<data_reference_p> datarefs) |
174 | { |
175 | for (data_reference *dr : datarefs) |
176 | dump_data_reference (file, dr); |
177 | } |
178 | |
179 | /* Unified dump into FILE all the data references from DATAREFS. */ |
180 | |
181 | DEBUG_FUNCTION void |
182 | debug (vec<data_reference_p> &ref) |
183 | { |
184 | dump_data_references (stderr, datarefs: ref); |
185 | } |
186 | |
187 | DEBUG_FUNCTION void |
188 | debug (vec<data_reference_p> *ptr) |
189 | { |
190 | if (ptr) |
191 | debug (ref&: *ptr); |
192 | else |
193 | fprintf (stderr, format: "<nil>\n" ); |
194 | } |
195 | |
196 | |
197 | /* Dump into STDERR all the data references from DATAREFS. */ |
198 | |
199 | DEBUG_FUNCTION void |
200 | debug_data_references (vec<data_reference_p> datarefs) |
201 | { |
202 | dump_data_references (stderr, datarefs); |
203 | } |
204 | |
205 | /* Print to STDERR the data_reference DR. */ |
206 | |
207 | DEBUG_FUNCTION void |
208 | debug_data_reference (struct data_reference *dr) |
209 | { |
210 | dump_data_reference (stderr, dr); |
211 | } |
212 | |
213 | /* Dump function for a DATA_REFERENCE structure. */ |
214 | |
215 | void |
216 | dump_data_reference (FILE *outf, |
217 | struct data_reference *dr) |
218 | { |
219 | unsigned int i; |
220 | |
221 | fprintf (stream: outf, format: "#(Data Ref: \n" ); |
222 | fprintf (stream: outf, format: "# bb: %d \n" , gimple_bb (DR_STMT (dr))->index); |
223 | fprintf (stream: outf, format: "# stmt: " ); |
224 | print_gimple_stmt (outf, DR_STMT (dr), 0); |
225 | fprintf (stream: outf, format: "# ref: " ); |
226 | print_generic_stmt (outf, DR_REF (dr)); |
227 | fprintf (stream: outf, format: "# base_object: " ); |
228 | print_generic_stmt (outf, DR_BASE_OBJECT (dr)); |
229 | |
230 | for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) |
231 | { |
232 | fprintf (stream: outf, format: "# Access function %d: " , i); |
233 | print_generic_stmt (outf, DR_ACCESS_FN (dr, i)); |
234 | } |
235 | fprintf (stream: outf, format: "#)\n" ); |
236 | } |
237 | |
238 | /* Unified dump function for a DATA_REFERENCE structure. */ |
239 | |
240 | DEBUG_FUNCTION void |
241 | debug (data_reference &ref) |
242 | { |
243 | dump_data_reference (stderr, dr: &ref); |
244 | } |
245 | |
246 | DEBUG_FUNCTION void |
247 | debug (data_reference *ptr) |
248 | { |
249 | if (ptr) |
250 | debug (ref&: *ptr); |
251 | else |
252 | fprintf (stderr, format: "<nil>\n" ); |
253 | } |
254 | |
255 | |
256 | /* Dumps the affine function described by FN to the file OUTF. */ |
257 | |
258 | DEBUG_FUNCTION void |
259 | dump_affine_function (FILE *outf, affine_fn fn) |
260 | { |
261 | unsigned i; |
262 | tree coef; |
263 | |
264 | print_generic_expr (outf, fn[0], TDF_SLIM); |
265 | for (i = 1; fn.iterate (ix: i, ptr: &coef); i++) |
266 | { |
267 | fprintf (stream: outf, format: " + " ); |
268 | print_generic_expr (outf, coef, TDF_SLIM); |
269 | fprintf (stream: outf, format: " * x_%u" , i); |
270 | } |
271 | } |
272 | |
273 | /* Dumps the conflict function CF to the file OUTF. */ |
274 | |
275 | DEBUG_FUNCTION void |
276 | dump_conflict_function (FILE *outf, conflict_function *cf) |
277 | { |
278 | unsigned i; |
279 | |
280 | if (cf->n == NO_DEPENDENCE) |
281 | fprintf (stream: outf, format: "no dependence" ); |
282 | else if (cf->n == NOT_KNOWN) |
283 | fprintf (stream: outf, format: "not known" ); |
284 | else |
285 | { |
286 | for (i = 0; i < cf->n; i++) |
287 | { |
288 | if (i != 0) |
289 | fprintf (stream: outf, format: " " ); |
290 | fprintf (stream: outf, format: "[" ); |
291 | dump_affine_function (outf, fn: cf->fns[i]); |
292 | fprintf (stream: outf, format: "]" ); |
293 | } |
294 | } |
295 | } |
296 | |
297 | /* Dump function for a SUBSCRIPT structure. */ |
298 | |
299 | DEBUG_FUNCTION void |
300 | dump_subscript (FILE *outf, struct subscript *subscript) |
301 | { |
302 | conflict_function *cf = SUB_CONFLICTS_IN_A (subscript); |
303 | |
304 | fprintf (stream: outf, format: "\n (subscript \n" ); |
305 | fprintf (stream: outf, format: " iterations_that_access_an_element_twice_in_A: " ); |
306 | dump_conflict_function (outf, cf); |
307 | if (CF_NONTRIVIAL_P (cf)) |
308 | { |
309 | tree last_iteration = SUB_LAST_CONFLICT (subscript); |
310 | fprintf (stream: outf, format: "\n last_conflict: " ); |
311 | print_generic_expr (outf, last_iteration); |
312 | } |
313 | |
314 | cf = SUB_CONFLICTS_IN_B (subscript); |
315 | fprintf (stream: outf, format: "\n iterations_that_access_an_element_twice_in_B: " ); |
316 | dump_conflict_function (outf, cf); |
317 | if (CF_NONTRIVIAL_P (cf)) |
318 | { |
319 | tree last_iteration = SUB_LAST_CONFLICT (subscript); |
320 | fprintf (stream: outf, format: "\n last_conflict: " ); |
321 | print_generic_expr (outf, last_iteration); |
322 | } |
323 | |
324 | fprintf (stream: outf, format: "\n (Subscript distance: " ); |
325 | print_generic_expr (outf, SUB_DISTANCE (subscript)); |
326 | fprintf (stream: outf, format: " ))\n" ); |
327 | } |
328 | |
329 | /* Print the classic direction vector DIRV to OUTF. */ |
330 | |
331 | DEBUG_FUNCTION void |
332 | print_direction_vector (FILE *outf, |
333 | lambda_vector dirv, |
334 | int length) |
335 | { |
336 | int eq; |
337 | |
338 | for (eq = 0; eq < length; eq++) |
339 | { |
340 | enum data_dependence_direction dir = ((enum data_dependence_direction) |
341 | dirv[eq]); |
342 | |
343 | switch (dir) |
344 | { |
345 | case dir_positive: |
346 | fprintf (stream: outf, format: " +" ); |
347 | break; |
348 | case dir_negative: |
349 | fprintf (stream: outf, format: " -" ); |
350 | break; |
351 | case dir_equal: |
352 | fprintf (stream: outf, format: " =" ); |
353 | break; |
354 | case dir_positive_or_equal: |
355 | fprintf (stream: outf, format: " +=" ); |
356 | break; |
357 | case dir_positive_or_negative: |
358 | fprintf (stream: outf, format: " +-" ); |
359 | break; |
360 | case dir_negative_or_equal: |
361 | fprintf (stream: outf, format: " -=" ); |
362 | break; |
363 | case dir_star: |
364 | fprintf (stream: outf, format: " *" ); |
365 | break; |
366 | default: |
367 | fprintf (stream: outf, format: "indep" ); |
368 | break; |
369 | } |
370 | } |
371 | fprintf (stream: outf, format: "\n" ); |
372 | } |
373 | |
374 | /* Print a vector of direction vectors. */ |
375 | |
376 | DEBUG_FUNCTION void |
377 | print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects, |
378 | int length) |
379 | { |
380 | for (lambda_vector v : dir_vects) |
381 | print_direction_vector (outf, dirv: v, length); |
382 | } |
383 | |
384 | /* Print out a vector VEC of length N to OUTFILE. */ |
385 | |
386 | DEBUG_FUNCTION void |
387 | print_lambda_vector (FILE * outfile, lambda_vector vector, int n) |
388 | { |
389 | int i; |
390 | |
391 | for (i = 0; i < n; i++) |
392 | fprintf (stream: outfile, HOST_WIDE_INT_PRINT_DEC " " , vector[i]); |
393 | fprintf (stream: outfile, format: "\n" ); |
394 | } |
395 | |
396 | /* Print a vector of distance vectors. */ |
397 | |
398 | DEBUG_FUNCTION void |
399 | print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects, |
400 | int length) |
401 | { |
402 | for (lambda_vector v : dist_vects) |
403 | print_lambda_vector (outfile: outf, vector: v, n: length); |
404 | } |
405 | |
406 | /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ |
407 | |
408 | DEBUG_FUNCTION void |
409 | dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr) |
410 | { |
411 | struct data_reference *dra, *drb; |
412 | |
413 | fprintf (stream: outf, format: "(Data Dep: \n" ); |
414 | |
415 | if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
416 | { |
417 | if (ddr) |
418 | { |
419 | dra = DDR_A (ddr); |
420 | drb = DDR_B (ddr); |
421 | if (dra) |
422 | dump_data_reference (outf, dr: dra); |
423 | else |
424 | fprintf (stream: outf, format: " (nil)\n" ); |
425 | if (drb) |
426 | dump_data_reference (outf, dr: drb); |
427 | else |
428 | fprintf (stream: outf, format: " (nil)\n" ); |
429 | } |
430 | fprintf (stream: outf, format: " (don't know)\n)\n" ); |
431 | return; |
432 | } |
433 | |
434 | dra = DDR_A (ddr); |
435 | drb = DDR_B (ddr); |
436 | dump_data_reference (outf, dr: dra); |
437 | dump_data_reference (outf, dr: drb); |
438 | |
439 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) |
440 | fprintf (stream: outf, format: " (no dependence)\n" ); |
441 | |
442 | else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) |
443 | { |
444 | unsigned int i; |
445 | class loop *loopi; |
446 | |
447 | subscript *sub; |
448 | FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) |
449 | { |
450 | fprintf (stream: outf, format: " access_fn_A: " ); |
451 | print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0)); |
452 | fprintf (stream: outf, format: " access_fn_B: " ); |
453 | print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1)); |
454 | dump_subscript (outf, subscript: sub); |
455 | } |
456 | |
457 | fprintf (stream: outf, format: " loop nest: (" ); |
458 | FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi) |
459 | fprintf (stream: outf, format: "%d " , loopi->num); |
460 | fprintf (stream: outf, format: ")\n" ); |
461 | |
462 | for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) |
463 | { |
464 | fprintf (stream: outf, format: " distance_vector: " ); |
465 | print_lambda_vector (outfile: outf, DDR_DIST_VECT (ddr, i), |
466 | DDR_NB_LOOPS (ddr)); |
467 | } |
468 | |
469 | for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) |
470 | { |
471 | fprintf (stream: outf, format: " direction_vector: " ); |
472 | print_direction_vector (outf, DDR_DIR_VECT (ddr, i), |
473 | DDR_NB_LOOPS (ddr)); |
474 | } |
475 | } |
476 | |
477 | fprintf (stream: outf, format: ")\n" ); |
478 | } |
479 | |
480 | /* Debug version. */ |
481 | |
482 | DEBUG_FUNCTION void |
483 | debug_data_dependence_relation (const struct data_dependence_relation *ddr) |
484 | { |
485 | dump_data_dependence_relation (stderr, ddr); |
486 | } |
487 | |
488 | /* Dump into FILE all the dependence relations from DDRS. */ |
489 | |
490 | DEBUG_FUNCTION void |
491 | dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs) |
492 | { |
493 | for (auto ddr : ddrs) |
494 | dump_data_dependence_relation (outf: file, ddr); |
495 | } |
496 | |
497 | DEBUG_FUNCTION void |
498 | debug (vec<ddr_p> &ref) |
499 | { |
500 | dump_data_dependence_relations (stderr, ddrs: ref); |
501 | } |
502 | |
503 | DEBUG_FUNCTION void |
504 | debug (vec<ddr_p> *ptr) |
505 | { |
506 | if (ptr) |
507 | debug (ref&: *ptr); |
508 | else |
509 | fprintf (stderr, format: "<nil>\n" ); |
510 | } |
511 | |
512 | |
513 | /* Dump to STDERR all the dependence relations from DDRS. */ |
514 | |
515 | DEBUG_FUNCTION void |
516 | debug_data_dependence_relations (vec<ddr_p> ddrs) |
517 | { |
518 | dump_data_dependence_relations (stderr, ddrs); |
519 | } |
520 | |
521 | /* Dumps the distance and direction vectors in FILE. DDRS contains |
522 | the dependence relations, and VECT_SIZE is the size of the |
523 | dependence vectors, or in other words the number of loops in the |
524 | considered nest. */ |
525 | |
526 | DEBUG_FUNCTION void |
527 | dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs) |
528 | { |
529 | for (data_dependence_relation *ddr : ddrs) |
530 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr)) |
531 | { |
532 | for (lambda_vector v : DDR_DIST_VECTS (ddr)) |
533 | { |
534 | fprintf (stream: file, format: "DISTANCE_V (" ); |
535 | print_lambda_vector (outfile: file, vector: v, DDR_NB_LOOPS (ddr)); |
536 | fprintf (stream: file, format: ")\n" ); |
537 | } |
538 | |
539 | for (lambda_vector v : DDR_DIR_VECTS (ddr)) |
540 | { |
541 | fprintf (stream: file, format: "DIRECTION_V (" ); |
542 | print_direction_vector (outf: file, dirv: v, DDR_NB_LOOPS (ddr)); |
543 | fprintf (stream: file, format: ")\n" ); |
544 | } |
545 | } |
546 | |
547 | fprintf (stream: file, format: "\n\n" ); |
548 | } |
549 | |
550 | /* Dumps the data dependence relations DDRS in FILE. */ |
551 | |
552 | DEBUG_FUNCTION void |
553 | dump_ddrs (FILE *file, vec<ddr_p> ddrs) |
554 | { |
555 | for (data_dependence_relation *ddr : ddrs) |
556 | dump_data_dependence_relation (outf: file, ddr); |
557 | |
558 | fprintf (stream: file, format: "\n\n" ); |
559 | } |
560 | |
561 | DEBUG_FUNCTION void |
562 | debug_ddrs (vec<ddr_p> ddrs) |
563 | { |
564 | dump_ddrs (stderr, ddrs); |
565 | } |
566 | |
567 | /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of |
568 | OP0 CODE OP1, where: |
569 | |
570 | - OP0 CODE OP1 has integral type TYPE |
571 | - the range of OP0 is given by OP0_RANGE and |
572 | - the range of OP1 is given by OP1_RANGE. |
573 | |
574 | Independently of RESULT_RANGE, try to compute: |
575 | |
576 | DELTA = ((sizetype) OP0 CODE (sizetype) OP1) |
577 | - (sizetype) (OP0 CODE OP1) |
578 | |
579 | as a constant and subtract DELTA from the ssizetype constant in *OFF. |
580 | Return true on success, or false if DELTA is not known at compile time. |
581 | |
582 | Truncation and sign changes are known to distribute over CODE, i.e. |
583 | |
584 | (itype) (A CODE B) == (itype) A CODE (itype) B |
585 | |
586 | for any integral type ITYPE whose precision is no greater than the |
587 | precision of A and B. */ |
588 | |
589 | static bool |
590 | compute_distributive_range (tree type, value_range &op0_range, |
591 | tree_code code, value_range &op1_range, |
592 | tree *off, value_range *result_range) |
593 | { |
594 | gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)); |
595 | if (result_range) |
596 | { |
597 | range_op_handler op (code); |
598 | if (!op.fold_range (r&: *result_range, type, lh: op0_range, rh: op1_range)) |
599 | result_range->set_varying (type); |
600 | } |
601 | |
602 | /* The distributive property guarantees that if TYPE is no narrower |
603 | than SIZETYPE, |
604 | |
605 | (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1 |
606 | |
607 | and so we can treat DELTA as zero. */ |
608 | if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype)) |
609 | return true; |
610 | |
611 | /* If overflow is undefined, we can assume that: |
612 | |
613 | X == (ssizetype) OP0 CODE (ssizetype) OP1 |
614 | |
615 | is within the range of TYPE, i.e.: |
616 | |
617 | X == (ssizetype) (TYPE) X |
618 | |
619 | Distributing the (TYPE) truncation over X gives: |
620 | |
621 | X == (ssizetype) (OP0 CODE OP1) |
622 | |
623 | Casting both sides to sizetype and distributing the sizetype cast |
624 | over X gives: |
625 | |
626 | (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1) |
627 | |
628 | and so we can treat DELTA as zero. */ |
629 | if (TYPE_OVERFLOW_UNDEFINED (type)) |
630 | return true; |
631 | |
632 | /* Compute the range of: |
633 | |
634 | (ssizetype) OP0 CODE (ssizetype) OP1 |
635 | |
636 | The distributive property guarantees that this has the same bitpattern as: |
637 | |
638 | (sizetype) OP0 CODE (sizetype) OP1 |
639 | |
640 | but its range is more conducive to analysis. */ |
641 | range_cast (r&: op0_range, ssizetype); |
642 | range_cast (r&: op1_range, ssizetype); |
643 | value_range wide_range; |
644 | range_op_handler op (code); |
645 | bool saved_flag_wrapv = flag_wrapv; |
646 | flag_wrapv = 1; |
647 | if (!op.fold_range (r&: wide_range, ssizetype, lh: op0_range, rh: op1_range)) |
648 | wide_range.set_varying (ssizetype);; |
649 | flag_wrapv = saved_flag_wrapv; |
650 | if (wide_range.num_pairs () != 1 |
651 | || wide_range.varying_p () || wide_range.undefined_p ()) |
652 | return false; |
653 | |
654 | wide_int lb = wide_range.lower_bound (); |
655 | wide_int ub = wide_range.upper_bound (); |
656 | |
657 | /* Calculate the number of times that each end of the range overflows or |
658 | underflows TYPE. We can only calculate DELTA if the numbers match. */ |
659 | unsigned int precision = TYPE_PRECISION (type); |
660 | if (!TYPE_UNSIGNED (type)) |
661 | { |
662 | wide_int type_min = wi::mask (width: precision - 1, negate_p: true, precision: lb.get_precision ()); |
663 | lb -= type_min; |
664 | ub -= type_min; |
665 | } |
666 | wide_int upper_bits = wi::mask (width: precision, negate_p: true, precision: lb.get_precision ()); |
667 | lb &= upper_bits; |
668 | ub &= upper_bits; |
669 | if (lb != ub) |
670 | return false; |
671 | |
672 | /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with |
673 | negative values indicating underflow. The low PRECISION bits of LB |
674 | are clear, so DELTA is therefore LB (== UB). */ |
675 | *off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: *off) - lb); |
676 | return true; |
677 | } |
678 | |
679 | /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP, |
680 | given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and |
681 | FROM_TYPE are integral types. */ |
682 | |
683 | static bool |
684 | nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range) |
685 | { |
686 | gcc_assert (INTEGRAL_TYPE_P (to_type) |
687 | && INTEGRAL_TYPE_P (from_type) |
688 | && !TYPE_OVERFLOW_TRAPS (to_type) |
689 | && !TYPE_OVERFLOW_TRAPS (from_type)); |
690 | |
691 | /* Converting to something no narrower than sizetype and then to sizetype |
692 | is equivalent to converting directly to sizetype. */ |
693 | if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype)) |
694 | return true; |
695 | |
696 | /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */ |
697 | if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type) |
698 | && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type))) |
699 | return true; |
700 | |
701 | /* For narrowing conversions, we could in principle test whether |
702 | the bits in FROM_TYPE but not in TO_TYPE have a fixed value |
703 | and apply a constant adjustment. |
704 | |
705 | For other conversions (which involve a sign change) we could |
706 | check that the signs are always equal, and apply a constant |
707 | adjustment if the signs are negative. |
708 | |
709 | However, both cases should be rare. */ |
710 | return range_fits_type_p (vr: &range, TYPE_PRECISION (to_type), |
711 | TYPE_SIGN (to_type)); |
712 | } |
713 | |
714 | static void |
715 | split_constant_offset (tree type, tree *var, tree *off, |
716 | value_range *result_range, |
717 | hash_map<tree, std::pair<tree, tree> > &cache, |
718 | unsigned *limit); |
719 | |
720 | /* Helper function for split_constant_offset. If TYPE is a pointer type, |
721 | try to express OP0 CODE OP1 as: |
722 | |
723 | POINTER_PLUS <*VAR, (sizetype) *OFF> |
724 | |
725 | where: |
726 | |
727 | - *VAR has type TYPE |
728 | - *OFF is a constant of type ssizetype. |
729 | |
730 | If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as: |
731 | |
732 | *VAR + (sizetype) *OFF |
733 | |
734 | where: |
735 | |
736 | - *VAR has type sizetype |
737 | - *OFF is a constant of type ssizetype. |
738 | |
739 | In both cases, OP0 CODE OP1 has type TYPE. |
740 | |
741 | Return true on success. A false return value indicates that we can't |
742 | do better than set *OFF to zero. |
743 | |
744 | When returning true, set RESULT_RANGE to the range of OP0 CODE OP1, |
745 | if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING. |
746 | |
747 | CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously |
748 | visited. LIMIT counts down the number of SSA names that we are |
749 | allowed to process before giving up. */ |
750 | |
751 | static bool |
752 | split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, |
753 | tree *var, tree *off, value_range *result_range, |
754 | hash_map<tree, std::pair<tree, tree> > &cache, |
755 | unsigned *limit) |
756 | { |
757 | tree var0, var1; |
758 | tree off0, off1; |
759 | value_range op0_range, op1_range; |
760 | |
761 | *var = NULL_TREE; |
762 | *off = NULL_TREE; |
763 | |
764 | if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) |
765 | return false; |
766 | |
767 | switch (code) |
768 | { |
769 | case INTEGER_CST: |
770 | *var = size_int (0); |
771 | *off = fold_convert (ssizetype, op0); |
772 | if (result_range) |
773 | { |
774 | wide_int w = wi::to_wide (t: op0); |
775 | result_range->set (TREE_TYPE (op0), w, w); |
776 | } |
777 | return true; |
778 | |
779 | case POINTER_PLUS_EXPR: |
780 | split_constant_offset (type: op0, var: &var0, off: &off0, result_range: nullptr, cache, limit); |
781 | split_constant_offset (type: op1, var: &var1, off: &off1, result_range: nullptr, cache, limit); |
782 | *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1); |
783 | *off = size_binop (PLUS_EXPR, off0, off1); |
784 | return true; |
785 | |
786 | case PLUS_EXPR: |
787 | case MINUS_EXPR: |
788 | split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit); |
789 | split_constant_offset (type: op1, var: &var1, off: &off1, result_range: &op1_range, cache, limit); |
790 | *off = size_binop (code, off0, off1); |
791 | if (!compute_distributive_range (type, op0_range, code, op1_range, |
792 | off, result_range)) |
793 | return false; |
794 | *var = fold_build2 (code, sizetype, var0, var1); |
795 | return true; |
796 | |
797 | case MULT_EXPR: |
798 | if (TREE_CODE (op1) != INTEGER_CST) |
799 | return false; |
800 | |
801 | split_constant_offset (type: op0, var: &var0, off: &off0, result_range: &op0_range, cache, limit); |
802 | op1_range.set (TREE_TYPE (op1), wi::to_wide (t: op1), wi::to_wide (t: op1)); |
803 | *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1)); |
804 | if (!compute_distributive_range (type, op0_range, code, op1_range, |
805 | off, result_range)) |
806 | return false; |
807 | *var = fold_build2 (MULT_EXPR, sizetype, var0, |
808 | fold_convert (sizetype, op1)); |
809 | return true; |
810 | |
811 | case ADDR_EXPR: |
812 | { |
813 | tree base, poffset; |
814 | poly_int64 pbitsize, pbitpos, pbytepos; |
815 | machine_mode pmode; |
816 | int punsignedp, preversep, pvolatilep; |
817 | |
818 | op0 = TREE_OPERAND (op0, 0); |
819 | base |
820 | = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode, |
821 | &punsignedp, &preversep, &pvolatilep); |
822 | |
823 | if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos)) |
824 | return false; |
825 | base = build_fold_addr_expr (base); |
826 | off0 = ssize_int (pbytepos); |
827 | |
828 | if (poffset) |
829 | { |
830 | split_constant_offset (type: poffset, var: &poffset, off: &off1, result_range: nullptr, |
831 | cache, limit); |
832 | off0 = size_binop (PLUS_EXPR, off0, off1); |
833 | base = fold_build_pointer_plus (base, poffset); |
834 | } |
835 | |
836 | var0 = fold_convert (type, base); |
837 | |
838 | /* If variable length types are involved, punt, otherwise casts |
839 | might be converted into ARRAY_REFs in gimplify_conversion. |
840 | To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which |
841 | possibly no longer appears in current GIMPLE, might resurface. |
842 | This perhaps could run |
843 | if (CONVERT_EXPR_P (var0)) |
844 | { |
845 | gimplify_conversion (&var0); |
846 | // Attempt to fill in any within var0 found ARRAY_REF's |
847 | // element size from corresponding op embedded ARRAY_REF, |
848 | // if unsuccessful, just punt. |
849 | } */ |
850 | while (POINTER_TYPE_P (type)) |
851 | type = TREE_TYPE (type); |
852 | if (int_size_in_bytes (type) < 0) |
853 | return false; |
854 | |
855 | *var = var0; |
856 | *off = off0; |
857 | return true; |
858 | } |
859 | |
860 | case SSA_NAME: |
861 | { |
862 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0)) |
863 | return false; |
864 | |
865 | gimple *def_stmt = SSA_NAME_DEF_STMT (op0); |
866 | enum tree_code subcode; |
867 | |
868 | if (gimple_code (g: def_stmt) != GIMPLE_ASSIGN) |
869 | return false; |
870 | |
871 | subcode = gimple_assign_rhs_code (gs: def_stmt); |
872 | |
873 | /* We are using a cache to avoid un-CSEing large amounts of code. */ |
874 | bool use_cache = false; |
875 | if (!has_single_use (var: op0) |
876 | && (subcode == POINTER_PLUS_EXPR |
877 | || subcode == PLUS_EXPR |
878 | || subcode == MINUS_EXPR |
879 | || subcode == MULT_EXPR |
880 | || subcode == ADDR_EXPR |
881 | || CONVERT_EXPR_CODE_P (subcode))) |
882 | { |
883 | use_cache = true; |
884 | bool existed; |
885 | std::pair<tree, tree> &e = cache.get_or_insert (k: op0, existed: &existed); |
886 | if (existed) |
887 | { |
888 | if (integer_zerop (e.second)) |
889 | return false; |
890 | *var = e.first; |
891 | *off = e.second; |
892 | /* The caller sets the range in this case. */ |
893 | return true; |
894 | } |
895 | e = std::make_pair (x&: op0, ssize_int (0)); |
896 | } |
897 | |
898 | if (*limit == 0) |
899 | return false; |
900 | --*limit; |
901 | |
902 | var0 = gimple_assign_rhs1 (gs: def_stmt); |
903 | var1 = gimple_assign_rhs2 (gs: def_stmt); |
904 | |
905 | bool res = split_constant_offset_1 (type, op0: var0, code: subcode, op1: var1, |
906 | var, off, result_range: nullptr, cache, limit); |
907 | if (res && use_cache) |
908 | *cache.get (k: op0) = std::make_pair (x&: *var, y&: *off); |
909 | /* The caller sets the range in this case. */ |
910 | return res; |
911 | } |
912 | CASE_CONVERT: |
913 | { |
914 | /* We can only handle the following conversions: |
915 | |
916 | - Conversions from one pointer type to another pointer type. |
917 | |
918 | - Conversions from one non-trapping integral type to another |
919 | non-trapping integral type. In this case, the recursive |
920 | call makes sure that: |
921 | |
922 | (sizetype) OP0 |
923 | |
924 | can be expressed as a sizetype operation involving VAR and OFF, |
925 | and all we need to do is check whether: |
926 | |
927 | (sizetype) OP0 == (sizetype) (TYPE) OP0 |
928 | |
929 | - Conversions from a non-trapping sizetype-size integral type to |
930 | a like-sized pointer type. In this case, the recursive call |
931 | makes sure that: |
932 | |
933 | (sizetype) OP0 == *VAR + (sizetype) *OFF |
934 | |
935 | and we can convert that to: |
936 | |
937 | POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF> |
938 | |
939 | - Conversions from a sizetype-sized pointer type to a like-sized |
940 | non-trapping integral type. In this case, the recursive call |
941 | makes sure that: |
942 | |
943 | OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF> |
944 | |
945 | where the POINTER_PLUS and *VAR have the same precision as |
946 | TYPE (and the same precision as sizetype). Then: |
947 | |
948 | (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */ |
949 | tree itype = TREE_TYPE (op0); |
950 | if ((POINTER_TYPE_P (itype) |
951 | || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype))) |
952 | && (POINTER_TYPE_P (type) |
953 | || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))) |
954 | && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype) |
955 | || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype) |
956 | && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype)))) |
957 | { |
958 | if (POINTER_TYPE_P (type)) |
959 | { |
960 | split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit); |
961 | *var = fold_convert (type, *var); |
962 | } |
963 | else if (POINTER_TYPE_P (itype)) |
964 | { |
965 | split_constant_offset (type: op0, var, off, result_range: nullptr, cache, limit); |
966 | *var = fold_convert (sizetype, *var); |
967 | } |
968 | else |
969 | { |
970 | split_constant_offset (type: op0, var, off, result_range: &op0_range, |
971 | cache, limit); |
972 | if (!nop_conversion_for_offset_p (to_type: type, from_type: itype, range&: op0_range)) |
973 | return false; |
974 | if (result_range) |
975 | { |
976 | *result_range = op0_range; |
977 | range_cast (r&: *result_range, type); |
978 | } |
979 | } |
980 | return true; |
981 | } |
982 | return false; |
983 | } |
984 | |
985 | default: |
986 | return false; |
987 | } |
988 | } |
989 | |
990 | /* If EXP has pointer type, try to express it as: |
991 | |
992 | POINTER_PLUS <*VAR, (sizetype) *OFF> |
993 | |
994 | where: |
995 | |
996 | - *VAR has the same type as EXP |
997 | - *OFF is a constant of type ssizetype. |
998 | |
999 | If EXP has an integral type, try to express (sizetype) EXP as: |
1000 | |
1001 | *VAR + (sizetype) *OFF |
1002 | |
1003 | where: |
1004 | |
1005 | - *VAR has type sizetype |
1006 | - *OFF is a constant of type ssizetype. |
1007 | |
1008 | If EXP_RANGE is nonnull, set it to the range of EXP. |
1009 | |
1010 | CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously |
1011 | visited. LIMIT counts down the number of SSA names that we are |
1012 | allowed to process before giving up. */ |
1013 | |
1014 | static void |
1015 | split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range, |
1016 | hash_map<tree, std::pair<tree, tree> > &cache, |
1017 | unsigned *limit) |
1018 | { |
1019 | tree type = TREE_TYPE (exp), op0, op1; |
1020 | enum tree_code code; |
1021 | |
1022 | code = TREE_CODE (exp); |
1023 | if (exp_range) |
1024 | { |
1025 | *exp_range = type; |
1026 | if (code == SSA_NAME) |
1027 | { |
1028 | value_range vr; |
1029 | get_range_query (cfun)->range_of_expr (r&: vr, expr: exp); |
1030 | if (vr.undefined_p ()) |
1031 | vr.set_varying (TREE_TYPE (exp)); |
1032 | tree vr_min, vr_max; |
1033 | value_range_kind vr_kind = get_legacy_range (vr, min&: vr_min, max&: vr_max); |
1034 | wide_int var_min = wi::to_wide (t: vr_min); |
1035 | wide_int var_max = wi::to_wide (t: vr_max); |
1036 | wide_int var_nonzero = get_nonzero_bits (exp); |
1037 | vr_kind = intersect_range_with_nonzero_bits (vr_kind, |
1038 | &var_min, &var_max, |
1039 | var_nonzero, |
1040 | TYPE_SIGN (type)); |
1041 | /* This check for VR_VARYING is here because the old code |
1042 | using get_range_info would return VR_RANGE for the entire |
1043 | domain, instead of VR_VARYING. The new code normalizes |
1044 | full-domain ranges to VR_VARYING. */ |
1045 | if (vr_kind == VR_RANGE || vr_kind == VR_VARYING) |
1046 | *exp_range = value_range (type, var_min, var_max); |
1047 | } |
1048 | } |
1049 | |
1050 | if (!tree_is_chrec (expr: exp) |
1051 | && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS) |
1052 | { |
1053 | extract_ops_from_tree (expr: exp, code: &code, op0: &op0, op1: &op1); |
1054 | if (split_constant_offset_1 (type, op0, code, op1, var, off, |
1055 | result_range: exp_range, cache, limit)) |
1056 | return; |
1057 | } |
1058 | |
1059 | *var = exp; |
1060 | if (INTEGRAL_TYPE_P (type)) |
1061 | *var = fold_convert (sizetype, *var); |
1062 | *off = ssize_int (0); |
1063 | |
1064 | value_range r; |
1065 | if (exp_range && code != SSA_NAME |
1066 | && get_range_query (cfun)->range_of_expr (r, expr: exp) |
1067 | && !r.undefined_p ()) |
1068 | *exp_range = r; |
1069 | } |
1070 | |
1071 | /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same |
1072 | type as EXP while OFF has type ssizetype. */ |
1073 | |
1074 | void |
1075 | split_constant_offset (tree exp, tree *var, tree *off) |
1076 | { |
1077 | unsigned limit = param_ssa_name_def_chain_limit; |
1078 | static hash_map<tree, std::pair<tree, tree> > *cache; |
1079 | if (!cache) |
1080 | cache = new hash_map<tree, std::pair<tree, tree> > (37); |
1081 | split_constant_offset (exp, var, off, exp_range: nullptr, cache&: *cache, limit: &limit); |
1082 | *var = fold_convert (TREE_TYPE (exp), *var); |
1083 | cache->empty (); |
1084 | } |
1085 | |
1086 | /* Returns the address ADDR of an object in a canonical shape (without nop |
1087 | casts, and with type of pointer to the object). */ |
1088 | |
1089 | static tree |
1090 | canonicalize_base_object_address (tree addr) |
1091 | { |
1092 | tree orig = addr; |
1093 | |
1094 | STRIP_NOPS (addr); |
1095 | |
1096 | /* The base address may be obtained by casting from integer, in that case |
1097 | keep the cast. */ |
1098 | if (!POINTER_TYPE_P (TREE_TYPE (addr))) |
1099 | return orig; |
1100 | |
1101 | if (TREE_CODE (addr) != ADDR_EXPR) |
1102 | return addr; |
1103 | |
1104 | return build_fold_addr_expr (TREE_OPERAND (addr, 0)); |
1105 | } |
1106 | |
1107 | /* Analyze the behavior of memory reference REF within STMT. |
1108 | There are two modes: |
1109 | |
1110 | - BB analysis. In this case we simply split the address into base, |
1111 | init and offset components, without reference to any containing loop. |
1112 | The resulting base and offset are general expressions and they can |
1113 | vary arbitrarily from one iteration of the containing loop to the next. |
1114 | The step is always zero. |
1115 | |
1116 | - loop analysis. In this case we analyze the reference both wrt LOOP |
1117 | and on the basis that the reference occurs (is "used") in LOOP; |
1118 | see the comment above analyze_scalar_evolution_in_loop for more |
1119 | information about this distinction. The base, init, offset and |
1120 | step fields are all invariant in LOOP. |
1121 | |
1122 | Perform BB analysis if LOOP is null, or if LOOP is the function's |
1123 | dummy outermost loop. In other cases perform loop analysis. |
1124 | |
1125 | Return true if the analysis succeeded and store the results in DRB if so. |
1126 | BB analysis can only fail for bitfield or reversed-storage accesses. */ |
1127 | |
1128 | opt_result |
1129 | dr_analyze_innermost (innermost_loop_behavior *drb, tree ref, |
1130 | class loop *loop, const gimple *stmt) |
1131 | { |
1132 | poly_int64 pbitsize, pbitpos; |
1133 | tree base, poffset; |
1134 | machine_mode pmode; |
1135 | int punsignedp, preversep, pvolatilep; |
1136 | affine_iv base_iv, offset_iv; |
1137 | tree init, dinit, step; |
1138 | bool in_loop = (loop && loop->num); |
1139 | |
1140 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1141 | fprintf (stream: dump_file, format: "analyze_innermost: " ); |
1142 | |
1143 | base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode, |
1144 | &punsignedp, &preversep, &pvolatilep); |
1145 | gcc_assert (base != NULL_TREE); |
1146 | |
1147 | poly_int64 pbytepos; |
1148 | if (!multiple_p (a: pbitpos, BITS_PER_UNIT, multiple: &pbytepos)) |
1149 | return opt_result::failure_at (loc: stmt, |
1150 | fmt: "failed: bit offset alignment.\n" ); |
1151 | |
1152 | if (preversep) |
1153 | return opt_result::failure_at (loc: stmt, |
1154 | fmt: "failed: reverse storage order.\n" ); |
1155 | |
1156 | /* Calculate the alignment and misalignment for the inner reference. */ |
1157 | unsigned int HOST_WIDE_INT bit_base_misalignment; |
1158 | unsigned int bit_base_alignment; |
1159 | get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment); |
1160 | |
1161 | /* There are no bitfield references remaining in BASE, so the values |
1162 | we got back must be whole bytes. */ |
1163 | gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0 |
1164 | && bit_base_misalignment % BITS_PER_UNIT == 0); |
1165 | unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT; |
1166 | poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT; |
1167 | |
1168 | if (TREE_CODE (base) == MEM_REF) |
1169 | { |
1170 | if (!integer_zerop (TREE_OPERAND (base, 1))) |
1171 | { |
1172 | /* Subtract MOFF from the base and add it to POFFSET instead. |
1173 | Adjust the misalignment to reflect the amount we subtracted. */ |
1174 | poly_offset_int moff = mem_ref_offset (base); |
1175 | base_misalignment -= moff.force_shwi (); |
1176 | tree mofft = wide_int_to_tree (sizetype, cst: moff); |
1177 | if (!poffset) |
1178 | poffset = mofft; |
1179 | else |
1180 | poffset = size_binop (PLUS_EXPR, poffset, mofft); |
1181 | } |
1182 | base = TREE_OPERAND (base, 0); |
1183 | } |
1184 | else |
1185 | base = build_fold_addr_expr (base); |
1186 | |
1187 | if (in_loop) |
1188 | { |
1189 | if (!simple_iv (loop, loop, base, &base_iv, true)) |
1190 | return opt_result::failure_at |
1191 | (loc: stmt, fmt: "failed: evolution of base is not affine.\n" ); |
1192 | } |
1193 | else |
1194 | { |
1195 | base_iv.base = base; |
1196 | base_iv.step = ssize_int (0); |
1197 | base_iv.no_overflow = true; |
1198 | } |
1199 | |
1200 | if (!poffset) |
1201 | { |
1202 | offset_iv.base = ssize_int (0); |
1203 | offset_iv.step = ssize_int (0); |
1204 | } |
1205 | else |
1206 | { |
1207 | if (!in_loop) |
1208 | { |
1209 | offset_iv.base = poffset; |
1210 | offset_iv.step = ssize_int (0); |
1211 | } |
1212 | else if (!simple_iv (loop, loop, poffset, &offset_iv, true)) |
1213 | return opt_result::failure_at |
1214 | (loc: stmt, fmt: "failed: evolution of offset is not affine.\n" ); |
1215 | } |
1216 | |
1217 | init = ssize_int (pbytepos); |
1218 | |
1219 | /* Subtract any constant component from the base and add it to INIT instead. |
1220 | Adjust the misalignment to reflect the amount we subtracted. */ |
1221 | split_constant_offset (exp: base_iv.base, var: &base_iv.base, off: &dinit); |
1222 | init = size_binop (PLUS_EXPR, init, dinit); |
1223 | base_misalignment -= TREE_INT_CST_LOW (dinit); |
1224 | |
1225 | split_constant_offset (exp: offset_iv.base, var: &offset_iv.base, off: &dinit); |
1226 | init = size_binop (PLUS_EXPR, init, dinit); |
1227 | |
1228 | step = size_binop (PLUS_EXPR, |
1229 | fold_convert (ssizetype, base_iv.step), |
1230 | fold_convert (ssizetype, offset_iv.step)); |
1231 | |
1232 | base = canonicalize_base_object_address (addr: base_iv.base); |
1233 | |
1234 | /* See if get_pointer_alignment can guarantee a higher alignment than |
1235 | the one we calculated above. */ |
1236 | unsigned int HOST_WIDE_INT alt_misalignment; |
1237 | unsigned int alt_alignment; |
1238 | get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment); |
1239 | |
1240 | /* As above, these values must be whole bytes. */ |
1241 | gcc_assert (alt_alignment % BITS_PER_UNIT == 0 |
1242 | && alt_misalignment % BITS_PER_UNIT == 0); |
1243 | alt_alignment /= BITS_PER_UNIT; |
1244 | alt_misalignment /= BITS_PER_UNIT; |
1245 | |
1246 | if (base_alignment < alt_alignment) |
1247 | { |
1248 | base_alignment = alt_alignment; |
1249 | base_misalignment = alt_misalignment; |
1250 | } |
1251 | |
1252 | drb->base_address = base; |
1253 | drb->offset = fold_convert (ssizetype, offset_iv.base); |
1254 | drb->init = init; |
1255 | drb->step = step; |
1256 | if (known_misalignment (value: base_misalignment, align: base_alignment, |
1257 | misalign: &drb->base_misalignment)) |
1258 | drb->base_alignment = base_alignment; |
1259 | else |
1260 | { |
1261 | drb->base_alignment = known_alignment (a: base_misalignment); |
1262 | drb->base_misalignment = 0; |
1263 | } |
1264 | drb->offset_alignment = highest_pow2_factor (offset_iv.base); |
1265 | drb->step_alignment = highest_pow2_factor (step); |
1266 | |
1267 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1268 | fprintf (stream: dump_file, format: "success.\n" ); |
1269 | |
1270 | return opt_result::success (); |
1271 | } |
1272 | |
1273 | /* Return true if OP is a valid component reference for a DR access |
1274 | function. This accepts a subset of what handled_component_p accepts. */ |
1275 | |
1276 | static bool |
1277 | access_fn_component_p (tree op) |
1278 | { |
1279 | switch (TREE_CODE (op)) |
1280 | { |
1281 | case REALPART_EXPR: |
1282 | case IMAGPART_EXPR: |
1283 | case ARRAY_REF: |
1284 | return true; |
1285 | |
1286 | case COMPONENT_REF: |
1287 | return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE; |
1288 | |
1289 | default: |
1290 | return false; |
1291 | } |
1292 | } |
1293 | |
1294 | /* Returns whether BASE can have a access_fn_component_p with BASE |
1295 | as base. */ |
1296 | |
1297 | static bool |
1298 | base_supports_access_fn_components_p (tree base) |
1299 | { |
1300 | switch (TREE_CODE (TREE_TYPE (base))) |
1301 | { |
1302 | case COMPLEX_TYPE: |
1303 | case ARRAY_TYPE: |
1304 | case RECORD_TYPE: |
1305 | return true; |
1306 | default: |
1307 | return false; |
1308 | } |
1309 | } |
1310 | |
1311 | /* Determines the base object and the list of indices of memory reference |
1312 | DR, analyzed in LOOP and instantiated before NEST. */ |
1313 | |
1314 | static void |
1315 | dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop) |
1316 | { |
1317 | /* If analyzing a basic-block there are no indices to analyze |
1318 | and thus no access functions. */ |
1319 | if (!nest) |
1320 | { |
1321 | dri->base_object = ref; |
1322 | dri->access_fns.create (nelems: 0); |
1323 | return; |
1324 | } |
1325 | |
1326 | vec<tree> access_fns = vNULL; |
1327 | |
1328 | /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses |
1329 | into a two element array with a constant index. The base is |
1330 | then just the immediate underlying object. */ |
1331 | if (TREE_CODE (ref) == REALPART_EXPR) |
1332 | { |
1333 | ref = TREE_OPERAND (ref, 0); |
1334 | access_fns.safe_push (integer_zero_node); |
1335 | } |
1336 | else if (TREE_CODE (ref) == IMAGPART_EXPR) |
1337 | { |
1338 | ref = TREE_OPERAND (ref, 0); |
1339 | access_fns.safe_push (integer_one_node); |
1340 | } |
1341 | |
1342 | /* Analyze access functions of dimensions we know to be independent. |
1343 | The list of component references handled here should be kept in |
1344 | sync with access_fn_component_p. */ |
1345 | while (handled_component_p (t: ref)) |
1346 | { |
1347 | if (TREE_CODE (ref) == ARRAY_REF) |
1348 | { |
1349 | tree op = TREE_OPERAND (ref, 1); |
1350 | tree access_fn = analyze_scalar_evolution (loop, op); |
1351 | access_fn = instantiate_scev (nest, loop, access_fn); |
1352 | access_fns.safe_push (obj: access_fn); |
1353 | } |
1354 | else if (TREE_CODE (ref) == COMPONENT_REF |
1355 | && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE) |
1356 | { |
1357 | /* For COMPONENT_REFs of records (but not unions!) use the |
1358 | FIELD_DECL offset as constant access function so we can |
1359 | disambiguate a[i].f1 and a[i].f2. */ |
1360 | tree off = component_ref_field_offset (ref); |
1361 | off = size_binop (PLUS_EXPR, |
1362 | size_binop (MULT_EXPR, |
1363 | fold_convert (bitsizetype, off), |
1364 | bitsize_int (BITS_PER_UNIT)), |
1365 | DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1))); |
1366 | access_fns.safe_push (obj: off); |
1367 | } |
1368 | else |
1369 | /* If we have an unhandled component we could not translate |
1370 | to an access function stop analyzing. We have determined |
1371 | our base object in this case. */ |
1372 | break; |
1373 | |
1374 | ref = TREE_OPERAND (ref, 0); |
1375 | } |
1376 | |
1377 | /* If the address operand of a MEM_REF base has an evolution in the |
1378 | analyzed nest, add it as an additional independent access-function. */ |
1379 | if (TREE_CODE (ref) == MEM_REF) |
1380 | { |
1381 | tree op = TREE_OPERAND (ref, 0); |
1382 | tree access_fn = analyze_scalar_evolution (loop, op); |
1383 | access_fn = instantiate_scev (nest, loop, access_fn); |
1384 | STRIP_NOPS (access_fn); |
1385 | if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) |
1386 | { |
1387 | tree memoff = TREE_OPERAND (ref, 1); |
1388 | tree base = initial_condition (access_fn); |
1389 | tree orig_type = TREE_TYPE (base); |
1390 | STRIP_USELESS_TYPE_CONVERSION (base); |
1391 | tree off; |
1392 | split_constant_offset (exp: base, var: &base, off: &off); |
1393 | STRIP_USELESS_TYPE_CONVERSION (base); |
1394 | /* Fold the MEM_REF offset into the evolutions initial |
1395 | value to make more bases comparable. */ |
1396 | if (!integer_zerop (memoff)) |
1397 | { |
1398 | off = size_binop (PLUS_EXPR, off, |
1399 | fold_convert (ssizetype, memoff)); |
1400 | memoff = build_int_cst (TREE_TYPE (memoff), 0); |
1401 | } |
1402 | /* Adjust the offset so it is a multiple of the access type |
1403 | size and thus we separate bases that can possibly be used |
1404 | to produce partial overlaps (which the access_fn machinery |
1405 | cannot handle). */ |
1406 | wide_int rem; |
1407 | if (TYPE_SIZE_UNIT (TREE_TYPE (ref)) |
1408 | && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST |
1409 | && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref)))) |
1410 | rem = wi::mod_trunc |
1411 | (x: wi::to_wide (t: off), |
1412 | y: wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))), |
1413 | sgn: SIGNED); |
1414 | else |
1415 | /* If we can't compute the remainder simply force the initial |
1416 | condition to zero. */ |
1417 | rem = wi::to_wide (t: off); |
1418 | off = wide_int_to_tree (ssizetype, cst: wi::to_wide (t: off) - rem); |
1419 | memoff = wide_int_to_tree (TREE_TYPE (memoff), cst: rem); |
1420 | /* And finally replace the initial condition. */ |
1421 | access_fn = chrec_replace_initial_condition |
1422 | (access_fn, fold_convert (orig_type, off)); |
1423 | /* ??? This is still not a suitable base object for |
1424 | dr_may_alias_p - the base object needs to be an |
1425 | access that covers the object as whole. With |
1426 | an evolution in the pointer this cannot be |
1427 | guaranteed. |
1428 | As a band-aid, mark the access so we can special-case |
1429 | it in dr_may_alias_p. */ |
1430 | tree old = ref; |
1431 | ref = fold_build2_loc (EXPR_LOCATION (ref), |
1432 | MEM_REF, TREE_TYPE (ref), |
1433 | base, memoff); |
1434 | MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old); |
1435 | MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old); |
1436 | dri->unconstrained_base = true; |
1437 | access_fns.safe_push (obj: access_fn); |
1438 | } |
1439 | } |
1440 | else if (DECL_P (ref)) |
1441 | { |
1442 | /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */ |
1443 | ref = build2 (MEM_REF, TREE_TYPE (ref), |
1444 | build_fold_addr_expr (ref), |
1445 | build_int_cst (reference_alias_ptr_type (ref), 0)); |
1446 | } |
1447 | |
1448 | dri->base_object = ref; |
1449 | dri->access_fns = access_fns; |
1450 | } |
1451 | |
1452 | /* Extracts the alias analysis information from the memory reference DR. */ |
1453 | |
1454 | static void |
1455 | dr_analyze_alias (struct data_reference *dr) |
1456 | { |
1457 | tree ref = DR_REF (dr); |
1458 | tree base = get_base_address (t: ref), addr; |
1459 | |
1460 | if (INDIRECT_REF_P (base) |
1461 | || TREE_CODE (base) == MEM_REF) |
1462 | { |
1463 | addr = TREE_OPERAND (base, 0); |
1464 | if (TREE_CODE (addr) == SSA_NAME) |
1465 | DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr); |
1466 | } |
1467 | } |
1468 | |
1469 | /* Frees data reference DR. */ |
1470 | |
1471 | void |
1472 | free_data_ref (data_reference_p dr) |
1473 | { |
1474 | DR_ACCESS_FNS (dr).release (); |
1475 | if (dr->alt_indices.base_object) |
1476 | dr->alt_indices.access_fns.release (); |
1477 | free (ptr: dr); |
1478 | } |
1479 | |
1480 | /* Analyze memory reference MEMREF, which is accessed in STMT. |
1481 | The reference is a read if IS_READ is true, otherwise it is a write. |
1482 | IS_CONDITIONAL_IN_STMT indicates that the reference is conditional |
1483 | within STMT, i.e. that it might not occur even if STMT is executed |
1484 | and runs to completion. |
1485 | |
1486 | Return the data_reference description of MEMREF. NEST is the outermost |
1487 | loop in which the reference should be instantiated, LOOP is the loop |
1488 | in which the data reference should be analyzed. */ |
1489 | |
1490 | struct data_reference * |
1491 | create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt, |
1492 | bool is_read, bool is_conditional_in_stmt) |
1493 | { |
1494 | struct data_reference *dr; |
1495 | |
1496 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1497 | { |
1498 | fprintf (stream: dump_file, format: "Creating dr for " ); |
1499 | print_generic_expr (dump_file, memref, TDF_SLIM); |
1500 | fprintf (stream: dump_file, format: "\n" ); |
1501 | } |
1502 | |
1503 | dr = XCNEW (struct data_reference); |
1504 | DR_STMT (dr) = stmt; |
1505 | DR_REF (dr) = memref; |
1506 | DR_IS_READ (dr) = is_read; |
1507 | DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt; |
1508 | |
1509 | dr_analyze_innermost (drb: &DR_INNERMOST (dr), ref: memref, |
1510 | loop: nest != NULL ? loop : NULL, stmt); |
1511 | dr_analyze_indices (dri: &dr->indices, DR_REF (dr), nest, loop); |
1512 | dr_analyze_alias (dr); |
1513 | |
1514 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1515 | { |
1516 | unsigned i; |
1517 | fprintf (stream: dump_file, format: "\tbase_address: " ); |
1518 | print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM); |
1519 | fprintf (stream: dump_file, format: "\n\toffset from base address: " ); |
1520 | print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM); |
1521 | fprintf (stream: dump_file, format: "\n\tconstant offset from base address: " ); |
1522 | print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); |
1523 | fprintf (stream: dump_file, format: "\n\tstep: " ); |
1524 | print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); |
1525 | fprintf (stream: dump_file, format: "\n\tbase alignment: %d" , DR_BASE_ALIGNMENT (dr)); |
1526 | fprintf (stream: dump_file, format: "\n\tbase misalignment: %d" , |
1527 | DR_BASE_MISALIGNMENT (dr)); |
1528 | fprintf (stream: dump_file, format: "\n\toffset alignment: %d" , |
1529 | DR_OFFSET_ALIGNMENT (dr)); |
1530 | fprintf (stream: dump_file, format: "\n\tstep alignment: %d" , DR_STEP_ALIGNMENT (dr)); |
1531 | fprintf (stream: dump_file, format: "\n\tbase_object: " ); |
1532 | print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM); |
1533 | fprintf (stream: dump_file, format: "\n" ); |
1534 | for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) |
1535 | { |
1536 | fprintf (stream: dump_file, format: "\tAccess function %d: " , i); |
1537 | print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM); |
1538 | } |
1539 | } |
1540 | |
1541 | return dr; |
1542 | } |
1543 | |
1544 | /* A helper function computes order between two tree expressions T1 and T2. |
1545 | This is used in comparator functions sorting objects based on the order |
1546 | of tree expressions. The function returns -1, 0, or 1. */ |
1547 | |
1548 | int |
1549 | data_ref_compare_tree (tree t1, tree t2) |
1550 | { |
1551 | int i, cmp; |
1552 | enum tree_code code; |
1553 | char tclass; |
1554 | |
1555 | if (t1 == t2) |
1556 | return 0; |
1557 | if (t1 == NULL) |
1558 | return -1; |
1559 | if (t2 == NULL) |
1560 | return 1; |
1561 | |
1562 | STRIP_USELESS_TYPE_CONVERSION (t1); |
1563 | STRIP_USELESS_TYPE_CONVERSION (t2); |
1564 | if (t1 == t2) |
1565 | return 0; |
1566 | |
1567 | if (TREE_CODE (t1) != TREE_CODE (t2) |
1568 | && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2))) |
1569 | return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; |
1570 | |
1571 | code = TREE_CODE (t1); |
1572 | switch (code) |
1573 | { |
1574 | case INTEGER_CST: |
1575 | return tree_int_cst_compare (t1, t2); |
1576 | |
1577 | case STRING_CST: |
1578 | if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2)) |
1579 | return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1; |
1580 | return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2), |
1581 | TREE_STRING_LENGTH (t1)); |
1582 | |
1583 | case SSA_NAME: |
1584 | if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) |
1585 | return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; |
1586 | break; |
1587 | |
1588 | default: |
1589 | if (POLY_INT_CST_P (t1)) |
1590 | return compare_sizes_for_sort (a: wi::to_poly_widest (t: t1), |
1591 | b: wi::to_poly_widest (t: t2)); |
1592 | |
1593 | tclass = TREE_CODE_CLASS (code); |
1594 | |
1595 | /* For decls, compare their UIDs. */ |
1596 | if (tclass == tcc_declaration) |
1597 | { |
1598 | if (DECL_UID (t1) != DECL_UID (t2)) |
1599 | return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; |
1600 | break; |
1601 | } |
1602 | /* For expressions, compare their operands recursively. */ |
1603 | else if (IS_EXPR_CODE_CLASS (tclass)) |
1604 | { |
1605 | for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) |
1606 | { |
1607 | cmp = data_ref_compare_tree (TREE_OPERAND (t1, i), |
1608 | TREE_OPERAND (t2, i)); |
1609 | if (cmp != 0) |
1610 | return cmp; |
1611 | } |
1612 | } |
1613 | else |
1614 | gcc_unreachable (); |
1615 | } |
1616 | |
1617 | return 0; |
1618 | } |
1619 | |
1620 | /* Return TRUE it's possible to resolve data dependence DDR by runtime alias |
1621 | check. */ |
1622 | |
1623 | opt_result |
1624 | runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p) |
1625 | { |
1626 | if (dump_enabled_p ()) |
1627 | dump_printf (MSG_NOTE, |
1628 | "consider run-time aliasing test between %T and %T\n" , |
1629 | DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr))); |
1630 | |
1631 | if (!speed_p) |
1632 | return opt_result::failure_at (DR_STMT (DDR_A (ddr)), |
1633 | fmt: "runtime alias check not supported when" |
1634 | " optimizing for size.\n" ); |
1635 | |
1636 | /* FORNOW: We don't support versioning with outer-loop in either |
1637 | vectorization or loop distribution. */ |
1638 | if (loop != NULL && loop->inner != NULL) |
1639 | return opt_result::failure_at (DR_STMT (DDR_A (ddr)), |
1640 | fmt: "runtime alias check not supported for" |
1641 | " outer loop.\n" ); |
1642 | |
1643 | return opt_result::success (); |
1644 | } |
1645 | |
1646 | /* Operator == between two dr_with_seg_len objects. |
1647 | |
1648 | This equality operator is used to make sure two data refs |
1649 | are the same one so that we will consider to combine the |
1650 | aliasing checks of those two pairs of data dependent data |
1651 | refs. */ |
1652 | |
1653 | static bool |
1654 | operator == (const dr_with_seg_len& d1, |
1655 | const dr_with_seg_len& d2) |
1656 | { |
1657 | return (operand_equal_p (DR_BASE_ADDRESS (d1.dr), |
1658 | DR_BASE_ADDRESS (d2.dr), flags: 0) |
1659 | && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0 |
1660 | && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0 |
1661 | && data_ref_compare_tree (t1: d1.seg_len, t2: d2.seg_len) == 0 |
1662 | && known_eq (d1.access_size, d2.access_size) |
1663 | && d1.align == d2.align); |
1664 | } |
1665 | |
1666 | /* Comparison function for sorting objects of dr_with_seg_len_pair_t |
1667 | so that we can combine aliasing checks in one scan. */ |
1668 | |
1669 | static int |
1670 | comp_dr_with_seg_len_pair (const void *pa_, const void *pb_) |
1671 | { |
1672 | const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_; |
1673 | const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_; |
1674 | const dr_with_seg_len &a1 = pa->first, &a2 = pa->second; |
1675 | const dr_with_seg_len &b1 = pb->first, &b2 = pb->second; |
1676 | |
1677 | /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks |
1678 | if a and c have the same basic address snd step, and b and d have the same |
1679 | address and step. Therefore, if any a&c or b&d don't have the same address |
1680 | and step, we don't care the order of those two pairs after sorting. */ |
1681 | int comp_res; |
1682 | |
1683 | if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr), |
1684 | DR_BASE_ADDRESS (b1.dr))) != 0) |
1685 | return comp_res; |
1686 | if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr), |
1687 | DR_BASE_ADDRESS (b2.dr))) != 0) |
1688 | return comp_res; |
1689 | if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr), |
1690 | DR_STEP (b1.dr))) != 0) |
1691 | return comp_res; |
1692 | if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr), |
1693 | DR_STEP (b2.dr))) != 0) |
1694 | return comp_res; |
1695 | if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr), |
1696 | DR_OFFSET (b1.dr))) != 0) |
1697 | return comp_res; |
1698 | if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr), |
1699 | DR_INIT (b1.dr))) != 0) |
1700 | return comp_res; |
1701 | if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr), |
1702 | DR_OFFSET (b2.dr))) != 0) |
1703 | return comp_res; |
1704 | if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr), |
1705 | DR_INIT (b2.dr))) != 0) |
1706 | return comp_res; |
1707 | |
1708 | return 0; |
1709 | } |
1710 | |
1711 | /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */ |
1712 | |
1713 | static void |
1714 | dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent) |
1715 | { |
1716 | dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n" , indent, |
1717 | DR_REF (alias_pair->first.dr), |
1718 | DR_REF (alias_pair->second.dr)); |
1719 | |
1720 | dump_printf (MSG_NOTE, "%ssegment length: %T" , indent, |
1721 | alias_pair->first.seg_len); |
1722 | if (!operand_equal_p (alias_pair->first.seg_len, |
1723 | alias_pair->second.seg_len, flags: 0)) |
1724 | dump_printf (MSG_NOTE, " vs. %T" , alias_pair->second.seg_len); |
1725 | |
1726 | dump_printf (MSG_NOTE, "\n%saccess size: " , indent); |
1727 | dump_dec (MSG_NOTE, alias_pair->first.access_size); |
1728 | if (maybe_ne (a: alias_pair->first.access_size, b: alias_pair->second.access_size)) |
1729 | { |
1730 | dump_printf (MSG_NOTE, " vs. " ); |
1731 | dump_dec (MSG_NOTE, alias_pair->second.access_size); |
1732 | } |
1733 | |
1734 | dump_printf (MSG_NOTE, "\n%salignment: %d" , indent, |
1735 | alias_pair->first.align); |
1736 | if (alias_pair->first.align != alias_pair->second.align) |
1737 | dump_printf (MSG_NOTE, " vs. %d" , alias_pair->second.align); |
1738 | |
1739 | dump_printf (MSG_NOTE, "\n%sflags: " , indent); |
1740 | if (alias_pair->flags & DR_ALIAS_RAW) |
1741 | dump_printf (MSG_NOTE, " RAW" ); |
1742 | if (alias_pair->flags & DR_ALIAS_WAR) |
1743 | dump_printf (MSG_NOTE, " WAR" ); |
1744 | if (alias_pair->flags & DR_ALIAS_WAW) |
1745 | dump_printf (MSG_NOTE, " WAW" ); |
1746 | if (alias_pair->flags & DR_ALIAS_ARBITRARY) |
1747 | dump_printf (MSG_NOTE, " ARBITRARY" ); |
1748 | if (alias_pair->flags & DR_ALIAS_SWAPPED) |
1749 | dump_printf (MSG_NOTE, " SWAPPED" ); |
1750 | if (alias_pair->flags & DR_ALIAS_UNSWAPPED) |
1751 | dump_printf (MSG_NOTE, " UNSWAPPED" ); |
1752 | if (alias_pair->flags & DR_ALIAS_MIXED_STEPS) |
1753 | dump_printf (MSG_NOTE, " MIXED_STEPS" ); |
1754 | if (alias_pair->flags == 0) |
1755 | dump_printf (MSG_NOTE, " <none>" ); |
1756 | dump_printf (MSG_NOTE, "\n" ); |
1757 | } |
1758 | |
1759 | /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones. |
1760 | FACTOR is number of iterations that each data reference is accessed. |
1761 | |
1762 | Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0, |
1763 | we create an expression: |
1764 | |
1765 | ((store_ptr_0 + store_segment_length_0) <= load_ptr_0) |
1766 | || (load_ptr_0 + load_segment_length_0) <= store_ptr_0)) |
1767 | |
1768 | for aliasing checks. However, in some cases we can decrease the number |
1769 | of checks by combining two checks into one. For example, suppose we have |
1770 | another pair of data refs store_ptr_0 & load_ptr_1, and if the following |
1771 | condition is satisfied: |
1772 | |
1773 | load_ptr_0 < load_ptr_1 && |
1774 | load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0 |
1775 | |
1776 | (this condition means, in each iteration of vectorized loop, the accessed |
1777 | memory of store_ptr_0 cannot be between the memory of load_ptr_0 and |
1778 | load_ptr_1.) |
1779 | |
1780 | we then can use only the following expression to finish the alising checks |
1781 | between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1: |
1782 | |
1783 | ((store_ptr_0 + store_segment_length_0) <= load_ptr_0) |
1784 | || (load_ptr_1 + load_segment_length_1 <= store_ptr_0)) |
1785 | |
1786 | Note that we only consider that load_ptr_0 and load_ptr_1 have the same |
1787 | basic address. */ |
1788 | |
1789 | void |
1790 | prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs, |
1791 | poly_uint64) |
1792 | { |
1793 | if (alias_pairs->is_empty ()) |
1794 | return; |
1795 | |
1796 | /* Canonicalize each pair so that the base components are ordered wrt |
1797 | data_ref_compare_tree. This allows the loop below to merge more |
1798 | cases. */ |
1799 | unsigned int i; |
1800 | dr_with_seg_len_pair_t *alias_pair; |
1801 | FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) |
1802 | { |
1803 | data_reference_p dr_a = alias_pair->first.dr; |
1804 | data_reference_p dr_b = alias_pair->second.dr; |
1805 | int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a), |
1806 | DR_BASE_ADDRESS (dr_b)); |
1807 | if (comp_res == 0) |
1808 | comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b)); |
1809 | if (comp_res == 0) |
1810 | comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b)); |
1811 | if (comp_res > 0) |
1812 | { |
1813 | std::swap (a&: alias_pair->first, b&: alias_pair->second); |
1814 | alias_pair->flags |= DR_ALIAS_SWAPPED; |
1815 | } |
1816 | else |
1817 | alias_pair->flags |= DR_ALIAS_UNSWAPPED; |
1818 | } |
1819 | |
1820 | /* Sort the collected data ref pairs so that we can scan them once to |
1821 | combine all possible aliasing checks. */ |
1822 | alias_pairs->qsort (comp_dr_with_seg_len_pair); |
1823 | |
1824 | /* Scan the sorted dr pairs and check if we can combine alias checks |
1825 | of two neighboring dr pairs. */ |
1826 | unsigned int last = 0; |
1827 | for (i = 1; i < alias_pairs->length (); ++i) |
1828 | { |
1829 | /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */ |
1830 | dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last]; |
1831 | dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i]; |
1832 | |
1833 | dr_with_seg_len *dr_a1 = &alias_pair1->first; |
1834 | dr_with_seg_len *dr_b1 = &alias_pair1->second; |
1835 | dr_with_seg_len *dr_a2 = &alias_pair2->first; |
1836 | dr_with_seg_len *dr_b2 = &alias_pair2->second; |
1837 | |
1838 | /* Remove duplicate data ref pairs. */ |
1839 | if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2) |
1840 | { |
1841 | if (dump_enabled_p ()) |
1842 | dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n" , |
1843 | DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), |
1844 | DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); |
1845 | alias_pair1->flags |= alias_pair2->flags; |
1846 | continue; |
1847 | } |
1848 | |
1849 | /* Assume that we won't be able to merge the pairs, then correct |
1850 | if we do. */ |
1851 | last += 1; |
1852 | if (last != i) |
1853 | (*alias_pairs)[last] = (*alias_pairs)[i]; |
1854 | |
1855 | if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2) |
1856 | { |
1857 | /* We consider the case that DR_B1 and DR_B2 are same memrefs, |
1858 | and DR_A1 and DR_A2 are two consecutive memrefs. */ |
1859 | if (*dr_a1 == *dr_a2) |
1860 | { |
1861 | std::swap (a&: dr_a1, b&: dr_b1); |
1862 | std::swap (a&: dr_a2, b&: dr_b2); |
1863 | } |
1864 | |
1865 | poly_int64 init_a1, init_a2; |
1866 | /* Only consider cases in which the distance between the initial |
1867 | DR_A1 and the initial DR_A2 is known at compile time. */ |
1868 | if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr), |
1869 | DR_BASE_ADDRESS (dr_a2->dr), flags: 0) |
1870 | || !operand_equal_p (DR_OFFSET (dr_a1->dr), |
1871 | DR_OFFSET (dr_a2->dr), flags: 0) |
1872 | || !poly_int_tree_p (DR_INIT (dr_a1->dr), value: &init_a1) |
1873 | || !poly_int_tree_p (DR_INIT (dr_a2->dr), value: &init_a2)) |
1874 | continue; |
1875 | |
1876 | /* Don't combine if we can't tell which one comes first. */ |
1877 | if (!ordered_p (a: init_a1, b: init_a2)) |
1878 | continue; |
1879 | |
1880 | /* Work out what the segment length would be if we did combine |
1881 | DR_A1 and DR_A2: |
1882 | |
1883 | - If DR_A1 and DR_A2 have equal lengths, that length is |
1884 | also the combined length. |
1885 | |
1886 | - If DR_A1 and DR_A2 both have negative "lengths", the combined |
1887 | length is the lower bound on those lengths. |
1888 | |
1889 | - If DR_A1 and DR_A2 both have positive lengths, the combined |
1890 | length is the upper bound on those lengths. |
1891 | |
1892 | Other cases are unlikely to give a useful combination. |
1893 | |
1894 | The lengths both have sizetype, so the sign is taken from |
1895 | the step instead. */ |
1896 | poly_uint64 new_seg_len = 0; |
1897 | bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len, |
1898 | dr_a2->seg_len, flags: 0); |
1899 | if (new_seg_len_p) |
1900 | { |
1901 | poly_uint64 seg_len_a1, seg_len_a2; |
1902 | if (!poly_int_tree_p (t: dr_a1->seg_len, value: &seg_len_a1) |
1903 | || !poly_int_tree_p (t: dr_a2->seg_len, value: &seg_len_a2)) |
1904 | continue; |
1905 | |
1906 | tree indicator_a = dr_direction_indicator (dr_a1->dr); |
1907 | if (TREE_CODE (indicator_a) != INTEGER_CST) |
1908 | continue; |
1909 | |
1910 | tree indicator_b = dr_direction_indicator (dr_a2->dr); |
1911 | if (TREE_CODE (indicator_b) != INTEGER_CST) |
1912 | continue; |
1913 | |
1914 | int sign_a = tree_int_cst_sgn (indicator_a); |
1915 | int sign_b = tree_int_cst_sgn (indicator_b); |
1916 | |
1917 | if (sign_a <= 0 && sign_b <= 0) |
1918 | new_seg_len = lower_bound (a: seg_len_a1, b: seg_len_a2); |
1919 | else if (sign_a >= 0 && sign_b >= 0) |
1920 | new_seg_len = upper_bound (a: seg_len_a1, b: seg_len_a2); |
1921 | else |
1922 | continue; |
1923 | } |
1924 | /* At this point we're committed to merging the refs. */ |
1925 | |
1926 | /* Make sure dr_a1 starts left of dr_a2. */ |
1927 | if (maybe_gt (init_a1, init_a2)) |
1928 | { |
1929 | std::swap (a&: *dr_a1, b&: *dr_a2); |
1930 | std::swap (a&: init_a1, b&: init_a2); |
1931 | } |
1932 | |
1933 | /* The DR_Bs are equal, so only the DR_As can introduce |
1934 | mixed steps. */ |
1935 | if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), flags: 0)) |
1936 | alias_pair1->flags |= DR_ALIAS_MIXED_STEPS; |
1937 | |
1938 | if (new_seg_len_p) |
1939 | { |
1940 | dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len), |
1941 | new_seg_len); |
1942 | dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len)); |
1943 | } |
1944 | |
1945 | /* This is always positive due to the swap above. */ |
1946 | poly_uint64 diff = init_a2 - init_a1; |
1947 | |
1948 | /* The new check will start at DR_A1. Make sure that its access |
1949 | size encompasses the initial DR_A2. */ |
1950 | if (maybe_lt (a: dr_a1->access_size, b: diff + dr_a2->access_size)) |
1951 | { |
1952 | dr_a1->access_size = upper_bound (a: dr_a1->access_size, |
1953 | b: diff + dr_a2->access_size); |
1954 | unsigned int new_align = known_alignment (a: dr_a1->access_size); |
1955 | dr_a1->align = MIN (dr_a1->align, new_align); |
1956 | } |
1957 | if (dump_enabled_p ()) |
1958 | dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n" , |
1959 | DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), |
1960 | DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); |
1961 | alias_pair1->flags |= alias_pair2->flags; |
1962 | last -= 1; |
1963 | } |
1964 | } |
1965 | alias_pairs->truncate (size: last + 1); |
1966 | |
1967 | /* Try to restore the original dr_with_seg_len order within each |
1968 | dr_with_seg_len_pair_t. If we ended up combining swapped and |
1969 | unswapped pairs into the same check, we have to invalidate any |
1970 | RAW, WAR and WAW information for it. */ |
1971 | if (dump_enabled_p ()) |
1972 | dump_printf (MSG_NOTE, "merged alias checks:\n" ); |
1973 | FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) |
1974 | { |
1975 | unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED); |
1976 | unsigned int swapped = (alias_pair->flags & swap_mask); |
1977 | if (swapped == DR_ALIAS_SWAPPED) |
1978 | std::swap (a&: alias_pair->first, b&: alias_pair->second); |
1979 | else if (swapped != DR_ALIAS_UNSWAPPED) |
1980 | alias_pair->flags |= DR_ALIAS_ARBITRARY; |
1981 | alias_pair->flags &= ~swap_mask; |
1982 | if (dump_enabled_p ()) |
1983 | dump_alias_pair (alias_pair, indent: " " ); |
1984 | } |
1985 | } |
1986 | |
1987 | /* A subroutine of create_intersect_range_checks, with a subset of the |
1988 | same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS |
1989 | to optimize cases in which the references form a simple RAW, WAR or |
1990 | WAR dependence. */ |
1991 | |
1992 | static bool |
1993 | create_ifn_alias_checks (tree *cond_expr, |
1994 | const dr_with_seg_len_pair_t &alias_pair) |
1995 | { |
1996 | const dr_with_seg_len& dr_a = alias_pair.first; |
1997 | const dr_with_seg_len& dr_b = alias_pair.second; |
1998 | |
1999 | /* Check for cases in which: |
2000 | |
2001 | (a) we have a known RAW, WAR or WAR dependence |
2002 | (b) the accesses are well-ordered in both the original and new code |
2003 | (see the comment above the DR_ALIAS_* flags for details); and |
2004 | (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ |
2005 | if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW)) |
2006 | return false; |
2007 | |
2008 | /* Make sure that both DRs access the same pattern of bytes, |
2009 | with a constant length and step. */ |
2010 | poly_uint64 seg_len; |
2011 | if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, flags: 0) |
2012 | || !poly_int_tree_p (t: dr_a.seg_len, value: &seg_len) |
2013 | || maybe_ne (a: dr_a.access_size, b: dr_b.access_size) |
2014 | || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0) |
2015 | || !tree_fits_uhwi_p (DR_STEP (dr_a.dr))) |
2016 | return false; |
2017 | |
2018 | unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr)); |
2019 | tree addr_a = DR_BASE_ADDRESS (dr_a.dr); |
2020 | tree addr_b = DR_BASE_ADDRESS (dr_b.dr); |
2021 | |
2022 | /* See whether the target suports what we want to do. WAW checks are |
2023 | equivalent to WAR checks here. */ |
2024 | internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW |
2025 | ? IFN_CHECK_RAW_PTRS |
2026 | : IFN_CHECK_WAR_PTRS); |
2027 | unsigned int align = MIN (dr_a.align, dr_b.align); |
2028 | poly_uint64 full_length = seg_len + bytes; |
2029 | if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), |
2030 | full_length, align)) |
2031 | { |
2032 | full_length = seg_len + dr_a.access_size; |
2033 | if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), |
2034 | full_length, align)) |
2035 | return false; |
2036 | } |
2037 | |
2038 | /* Commit to using this form of test. */ |
2039 | addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr)); |
2040 | addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); |
2041 | |
2042 | addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr)); |
2043 | addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); |
2044 | |
2045 | *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION, |
2046 | ifn, boolean_type_node, |
2047 | 4, addr_a, addr_b, |
2048 | size_int (full_length), |
2049 | size_int (align)); |
2050 | |
2051 | if (dump_enabled_p ()) |
2052 | { |
2053 | if (ifn == IFN_CHECK_RAW_PTRS) |
2054 | dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n" ); |
2055 | else |
2056 | dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n" ); |
2057 | } |
2058 | return true; |
2059 | } |
2060 | |
2061 | /* Try to generate a runtime condition that is true if ALIAS_PAIR is |
2062 | free of aliases, using a condition based on index values instead |
2063 | of a condition based on addresses. Return true on success, |
2064 | storing the condition in *COND_EXPR. |
2065 | |
2066 | This can only be done if the two data references in ALIAS_PAIR access |
2067 | the same array object and the index is the only difference. For example, |
2068 | if the two data references are DR_A and DR_B: |
2069 | |
2070 | DR_A DR_B |
2071 | data-ref arr[i] arr[j] |
2072 | base_object arr arr |
2073 | index {i_0, +, 1}_loop {j_0, +, 1}_loop |
2074 | |
2075 | The addresses and their index are like: |
2076 | |
2077 | |<- ADDR_A ->| |<- ADDR_B ->| |
2078 | -------------------------------------------------------> |
2079 | | | | | | | | | | | |
2080 | -------------------------------------------------------> |
2081 | i_0 ... i_0+4 j_0 ... j_0+4 |
2082 | |
2083 | We can create expression based on index rather than address: |
2084 | |
2085 | (unsigned) (i_0 - j_0 + 3) <= 6 |
2086 | |
2087 | i.e. the indices are less than 4 apart. |
2088 | |
2089 | Note evolution step of index needs to be considered in comparison. */ |
2090 | |
2091 | static bool |
2092 | create_intersect_range_checks_index (class loop *loop, tree *cond_expr, |
2093 | const dr_with_seg_len_pair_t &alias_pair) |
2094 | { |
2095 | const dr_with_seg_len &dr_a = alias_pair.first; |
2096 | const dr_with_seg_len &dr_b = alias_pair.second; |
2097 | if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS) |
2098 | || integer_zerop (DR_STEP (dr_a.dr)) |
2099 | || integer_zerop (DR_STEP (dr_b.dr)) |
2100 | || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr)) |
2101 | return false; |
2102 | |
2103 | poly_uint64 seg_len1, seg_len2; |
2104 | if (!poly_int_tree_p (t: dr_a.seg_len, value: &seg_len1) |
2105 | || !poly_int_tree_p (t: dr_b.seg_len, value: &seg_len2)) |
2106 | return false; |
2107 | |
2108 | if (!tree_fits_shwi_p (DR_STEP (dr_a.dr))) |
2109 | return false; |
2110 | |
2111 | if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), flags: 0)) |
2112 | return false; |
2113 | |
2114 | if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), flags: 0)) |
2115 | return false; |
2116 | |
2117 | gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST); |
2118 | |
2119 | bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0; |
2120 | unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr)); |
2121 | if (neg_step) |
2122 | { |
2123 | abs_step = -abs_step; |
2124 | seg_len1 = (-wi::to_poly_wide (t: dr_a.seg_len)).force_uhwi (); |
2125 | seg_len2 = (-wi::to_poly_wide (t: dr_b.seg_len)).force_uhwi (); |
2126 | } |
2127 | |
2128 | /* Infer the number of iterations with which the memory segment is accessed |
2129 | by DR. In other words, alias is checked if memory segment accessed by |
2130 | DR_A in some iterations intersect with memory segment accessed by DR_B |
2131 | in the same amount iterations. |
2132 | Note segnment length is a linear function of number of iterations with |
2133 | DR_STEP as the coefficient. */ |
2134 | poly_uint64 niter_len1, niter_len2; |
2135 | if (!can_div_trunc_p (a: seg_len1 + abs_step - 1, b: abs_step, quotient: &niter_len1) |
2136 | || !can_div_trunc_p (a: seg_len2 + abs_step - 1, b: abs_step, quotient: &niter_len2)) |
2137 | return false; |
2138 | |
2139 | /* Divide each access size by the byte step, rounding up. */ |
2140 | poly_uint64 niter_access1, niter_access2; |
2141 | if (!can_div_trunc_p (a: dr_a.access_size + abs_step - 1, |
2142 | b: abs_step, quotient: &niter_access1) |
2143 | || !can_div_trunc_p (a: dr_b.access_size + abs_step - 1, |
2144 | b: abs_step, quotient: &niter_access2)) |
2145 | return false; |
2146 | |
2147 | bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0; |
2148 | |
2149 | int found = -1; |
2150 | for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) |
2151 | { |
2152 | tree access1 = DR_ACCESS_FN (dr_a.dr, i); |
2153 | tree access2 = DR_ACCESS_FN (dr_b.dr, i); |
2154 | /* Two indices must be the same if they are not scev, or not scev wrto |
2155 | current loop being vecorized. */ |
2156 | if (TREE_CODE (access1) != POLYNOMIAL_CHREC |
2157 | || TREE_CODE (access2) != POLYNOMIAL_CHREC |
2158 | || CHREC_VARIABLE (access1) != (unsigned)loop->num |
2159 | || CHREC_VARIABLE (access2) != (unsigned)loop->num) |
2160 | { |
2161 | if (operand_equal_p (access1, access2, flags: 0)) |
2162 | continue; |
2163 | |
2164 | return false; |
2165 | } |
2166 | if (found >= 0) |
2167 | return false; |
2168 | found = i; |
2169 | } |
2170 | |
2171 | /* Ought not to happen in practice, since if all accesses are equal then the |
2172 | alias should be decidable at compile time. */ |
2173 | if (found < 0) |
2174 | return false; |
2175 | |
2176 | /* The two indices must have the same step. */ |
2177 | tree access1 = DR_ACCESS_FN (dr_a.dr, found); |
2178 | tree access2 = DR_ACCESS_FN (dr_b.dr, found); |
2179 | if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), flags: 0)) |
2180 | return false; |
2181 | |
2182 | tree idx_step = CHREC_RIGHT (access1); |
2183 | /* Index must have const step, otherwise DR_STEP won't be constant. */ |
2184 | gcc_assert (TREE_CODE (idx_step) == INTEGER_CST); |
2185 | /* Index must evaluate in the same direction as DR. */ |
2186 | gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1); |
2187 | |
2188 | tree min1 = CHREC_LEFT (access1); |
2189 | tree min2 = CHREC_LEFT (access2); |
2190 | if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2))) |
2191 | return false; |
2192 | |
2193 | /* Ideally, alias can be checked against loop's control IV, but we |
2194 | need to prove linear mapping between control IV and reference |
2195 | index. Although that should be true, we check against (array) |
2196 | index of data reference. Like segment length, index length is |
2197 | linear function of the number of iterations with index_step as |
2198 | the coefficient, i.e, niter_len * idx_step. */ |
2199 | offset_int abs_idx_step = offset_int::from (x: wi::to_wide (t: idx_step), |
2200 | sgn: SIGNED); |
2201 | if (neg_step) |
2202 | abs_idx_step = -abs_idx_step; |
2203 | poly_offset_int idx_len1 = abs_idx_step * niter_len1; |
2204 | poly_offset_int idx_len2 = abs_idx_step * niter_len2; |
2205 | poly_offset_int idx_access1 = abs_idx_step * niter_access1; |
2206 | poly_offset_int idx_access2 = abs_idx_step * niter_access2; |
2207 | |
2208 | gcc_assert (known_ge (idx_len1, 0) |
2209 | && known_ge (idx_len2, 0) |
2210 | && known_ge (idx_access1, 0) |
2211 | && known_ge (idx_access2, 0)); |
2212 | |
2213 | /* Each access has the following pattern, with lengths measured |
2214 | in units of INDEX: |
2215 | |
2216 | <-- idx_len --> |
2217 | <--- A: -ve step ---> |
2218 | +-----+-------+-----+-------+-----+ |
2219 | | n-1 | ..... | 0 | ..... | n-1 | |
2220 | +-----+-------+-----+-------+-----+ |
2221 | <--- B: +ve step ---> |
2222 | <-- idx_len --> |
2223 | | |
2224 | min |
2225 | |
2226 | where "n" is the number of scalar iterations covered by the segment |
2227 | and where each access spans idx_access units. |
2228 | |
2229 | A is the range of bytes accessed when the step is negative, |
2230 | B is the range when the step is positive. |
2231 | |
2232 | When checking for general overlap, we need to test whether |
2233 | the range: |
2234 | |
2235 | [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1] |
2236 | |
2237 | overlaps: |
2238 | |
2239 | [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1] |
2240 | |
2241 | where: |
2242 | |
2243 | low_offsetN = +ve step ? 0 : -idx_lenN; |
2244 | high_offsetN = +ve step ? idx_lenN : 0; |
2245 | |
2246 | This is equivalent to testing whether: |
2247 | |
2248 | min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1 |
2249 | && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1 |
2250 | |
2251 | Converting this into a single test, there is an overlap if: |
2252 | |
2253 | 0 <= min2 - min1 + bias <= limit |
2254 | |
2255 | where bias = high_offset2 + idx_access2 - 1 - low_offset1 |
2256 | limit = (high_offset1 - low_offset1 + idx_access1 - 1) |
2257 | + (high_offset2 - low_offset2 + idx_access2 - 1) |
2258 | i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1 |
2259 | |
2260 | Combining the tests requires limit to be computable in an unsigned |
2261 | form of the index type; if it isn't, we fall back to the usual |
2262 | pointer-based checks. |
2263 | |
2264 | We can do better if DR_B is a write and if DR_A and DR_B are |
2265 | well-ordered in both the original and the new code (see the |
2266 | comment above the DR_ALIAS_* flags for details). In this case |
2267 | we know that for each i in [0, n-1], the write performed by |
2268 | access i of DR_B occurs after access numbers j<=i of DR_A in |
2269 | both the original and the new code. Any write or anti |
2270 | dependencies wrt those DR_A accesses are therefore maintained. |
2271 | |
2272 | We just need to make sure that each individual write in DR_B does not |
2273 | overlap any higher-indexed access in DR_A; such DR_A accesses happen |
2274 | after the DR_B access in the original code but happen before it in |
2275 | the new code. |
2276 | |
2277 | We know the steps for both accesses are equal, so by induction, we |
2278 | just need to test whether the first write of DR_B overlaps a later |
2279 | access of DR_A. In other words, we need to move min1 along by |
2280 | one iteration: |
2281 | |
2282 | min1' = min1 + idx_step |
2283 | |
2284 | and use the ranges: |
2285 | |
2286 | [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1] |
2287 | |
2288 | and: |
2289 | |
2290 | [min2, min2 + idx_access2 - 1] |
2291 | |
2292 | where: |
2293 | |
2294 | low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|) |
2295 | high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */ |
2296 | if (waw_or_war_p) |
2297 | idx_len1 -= abs_idx_step; |
2298 | |
2299 | poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1; |
2300 | if (!waw_or_war_p) |
2301 | limit += idx_len2; |
2302 | |
2303 | tree utype = unsigned_type_for (TREE_TYPE (min1)); |
2304 | if (!wi::fits_to_tree_p (x: limit, type: utype)) |
2305 | return false; |
2306 | |
2307 | poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0; |
2308 | poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2; |
2309 | poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1; |
2310 | /* Equivalent to adding IDX_STEP to MIN1. */ |
2311 | if (waw_or_war_p) |
2312 | bias -= wi::to_offset (t: idx_step); |
2313 | |
2314 | tree subject = fold_build2 (MINUS_EXPR, utype, |
2315 | fold_convert (utype, min2), |
2316 | fold_convert (utype, min1)); |
2317 | subject = fold_build2 (PLUS_EXPR, utype, subject, |
2318 | wide_int_to_tree (utype, bias)); |
2319 | tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, |
2320 | wide_int_to_tree (utype, limit)); |
2321 | if (*cond_expr) |
2322 | *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
2323 | *cond_expr, part_cond_expr); |
2324 | else |
2325 | *cond_expr = part_cond_expr; |
2326 | if (dump_enabled_p ()) |
2327 | { |
2328 | if (waw_or_war_p) |
2329 | dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n" ); |
2330 | else |
2331 | dump_printf (MSG_NOTE, "using an index-based overlap test\n" ); |
2332 | } |
2333 | return true; |
2334 | } |
2335 | |
2336 | /* A subroutine of create_intersect_range_checks, with a subset of the |
2337 | same arguments. Try to optimize cases in which the second access |
2338 | is a write and in which some overlap is valid. */ |
2339 | |
2340 | static bool |
2341 | create_waw_or_war_checks (tree *cond_expr, |
2342 | const dr_with_seg_len_pair_t &alias_pair) |
2343 | { |
2344 | const dr_with_seg_len& dr_a = alias_pair.first; |
2345 | const dr_with_seg_len& dr_b = alias_pair.second; |
2346 | |
2347 | /* Check for cases in which: |
2348 | |
2349 | (a) DR_B is always a write; |
2350 | (b) the accesses are well-ordered in both the original and new code |
2351 | (see the comment above the DR_ALIAS_* flags for details); and |
2352 | (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ |
2353 | if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) |
2354 | return false; |
2355 | |
2356 | /* Check for equal (but possibly variable) steps. */ |
2357 | tree step = DR_STEP (dr_a.dr); |
2358 | if (!operand_equal_p (step, DR_STEP (dr_b.dr))) |
2359 | return false; |
2360 | |
2361 | /* Make sure that we can operate on sizetype without loss of precision. */ |
2362 | tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr)); |
2363 | if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype)) |
2364 | return false; |
2365 | |
2366 | /* All addresses involved are known to have a common alignment ALIGN. |
2367 | We can therefore subtract ALIGN from an exclusive endpoint to get |
2368 | an inclusive endpoint. In the best (and common) case, ALIGN is the |
2369 | same as the access sizes of both DRs, and so subtracting ALIGN |
2370 | cancels out the addition of an access size. */ |
2371 | unsigned int align = MIN (dr_a.align, dr_b.align); |
2372 | poly_uint64 last_chunk_a = dr_a.access_size - align; |
2373 | poly_uint64 last_chunk_b = dr_b.access_size - align; |
2374 | |
2375 | /* Get a boolean expression that is true when the step is negative. */ |
2376 | tree indicator = dr_direction_indicator (dr_a.dr); |
2377 | tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, |
2378 | fold_convert (ssizetype, indicator), |
2379 | ssize_int (0)); |
2380 | |
2381 | /* Get lengths in sizetype. */ |
2382 | tree seg_len_a |
2383 | = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len)); |
2384 | step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step)); |
2385 | |
2386 | /* Each access has the following pattern: |
2387 | |
2388 | <- |seg_len| -> |
2389 | <--- A: -ve step ---> |
2390 | +-----+-------+-----+-------+-----+ |
2391 | | n-1 | ..... | 0 | ..... | n-1 | |
2392 | +-----+-------+-----+-------+-----+ |
2393 | <--- B: +ve step ---> |
2394 | <- |seg_len| -> |
2395 | | |
2396 | base address |
2397 | |
2398 | where "n" is the number of scalar iterations covered by the segment. |
2399 | |
2400 | A is the range of bytes accessed when the step is negative, |
2401 | B is the range when the step is positive. |
2402 | |
2403 | We know that DR_B is a write. We also know (from checking that |
2404 | DR_A and DR_B are well-ordered) that for each i in [0, n-1], |
2405 | the write performed by access i of DR_B occurs after access numbers |
2406 | j<=i of DR_A in both the original and the new code. Any write or |
2407 | anti dependencies wrt those DR_A accesses are therefore maintained. |
2408 | |
2409 | We just need to make sure that each individual write in DR_B does not |
2410 | overlap any higher-indexed access in DR_A; such DR_A accesses happen |
2411 | after the DR_B access in the original code but happen before it in |
2412 | the new code. |
2413 | |
2414 | We know the steps for both accesses are equal, so by induction, we |
2415 | just need to test whether the first write of DR_B overlaps a later |
2416 | access of DR_A. In other words, we need to move addr_a along by |
2417 | one iteration: |
2418 | |
2419 | addr_a' = addr_a + step |
2420 | |
2421 | and check whether: |
2422 | |
2423 | [addr_b, addr_b + last_chunk_b] |
2424 | |
2425 | overlaps: |
2426 | |
2427 | [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a] |
2428 | |
2429 | where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.: |
2430 | |
2431 | low_offset_a = +ve step ? 0 : seg_len_a - step |
2432 | high_offset_a = +ve step ? seg_len_a - step : 0 |
2433 | |
2434 | This is equivalent to testing whether: |
2435 | |
2436 | addr_a' + low_offset_a <= addr_b + last_chunk_b |
2437 | && addr_b <= addr_a' + high_offset_a + last_chunk_a |
2438 | |
2439 | Converting this into a single test, there is an overlap if: |
2440 | |
2441 | 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit |
2442 | |
2443 | where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b |
2444 | |
2445 | If DR_A is performed, limit + |step| - last_chunk_b is known to be |
2446 | less than the size of the object underlying DR_A. We also know |
2447 | that last_chunk_b <= |step|; this is checked elsewhere if it isn't |
2448 | guaranteed at compile time. There can therefore be no overflow if |
2449 | "limit" is calculated in an unsigned type with pointer precision. */ |
2450 | tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), |
2451 | DR_OFFSET (dr_a.dr)); |
2452 | addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); |
2453 | |
2454 | tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), |
2455 | DR_OFFSET (dr_b.dr)); |
2456 | addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); |
2457 | |
2458 | /* Advance ADDR_A by one iteration and adjust the length to compensate. */ |
2459 | addr_a = fold_build_pointer_plus (addr_a, step); |
2460 | tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype, |
2461 | seg_len_a, step); |
2462 | if (!CONSTANT_CLASS_P (seg_len_a_minus_step)) |
2463 | seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step); |
2464 | |
2465 | tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step, |
2466 | seg_len_a_minus_step, size_zero_node); |
2467 | if (!CONSTANT_CLASS_P (low_offset_a)) |
2468 | low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a); |
2469 | |
2470 | /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>, |
2471 | but it's usually more efficient to reuse the LOW_OFFSET_A result. */ |
2472 | tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step, |
2473 | low_offset_a); |
2474 | |
2475 | /* The amount added to addr_b - addr_a'. */ |
2476 | tree bias = fold_build2 (MINUS_EXPR, sizetype, |
2477 | size_int (last_chunk_b), low_offset_a); |
2478 | |
2479 | tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a); |
2480 | limit = fold_build2 (PLUS_EXPR, sizetype, limit, |
2481 | size_int (last_chunk_a + last_chunk_b)); |
2482 | |
2483 | tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a); |
2484 | subject = fold_build2 (PLUS_EXPR, sizetype, |
2485 | fold_convert (sizetype, subject), bias); |
2486 | |
2487 | *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit); |
2488 | if (dump_enabled_p ()) |
2489 | dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n" ); |
2490 | return true; |
2491 | } |
2492 | |
2493 | /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for |
2494 | every address ADDR accessed by D: |
2495 | |
2496 | *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT |
2497 | |
2498 | In this case, every element accessed by D is aligned to at least |
2499 | ALIGN bytes. |
2500 | |
2501 | If ALIGN is zero then instead set *SEG_MAX_OUT so that: |
2502 | |
2503 | *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */ |
2504 | |
2505 | static void |
2506 | get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out, |
2507 | tree *seg_max_out, HOST_WIDE_INT align) |
2508 | { |
2509 | /* Each access has the following pattern: |
2510 | |
2511 | <- |seg_len| -> |
2512 | <--- A: -ve step ---> |
2513 | +-----+-------+-----+-------+-----+ |
2514 | | n-1 | ,.... | 0 | ..... | n-1 | |
2515 | +-----+-------+-----+-------+-----+ |
2516 | <--- B: +ve step ---> |
2517 | <- |seg_len| -> |
2518 | | |
2519 | base address |
2520 | |
2521 | where "n" is the number of scalar iterations covered by the segment. |
2522 | (This should be VF for a particular pair if we know that both steps |
2523 | are the same, otherwise it will be the full number of scalar loop |
2524 | iterations.) |
2525 | |
2526 | A is the range of bytes accessed when the step is negative, |
2527 | B is the range when the step is positive. |
2528 | |
2529 | If the access size is "access_size" bytes, the lowest addressed byte is: |
2530 | |
2531 | base + (step < 0 ? seg_len : 0) [LB] |
2532 | |
2533 | and the highest addressed byte is always below: |
2534 | |
2535 | base + (step < 0 ? 0 : seg_len) + access_size [UB] |
2536 | |
2537 | Thus: |
2538 | |
2539 | LB <= ADDR < UB |
2540 | |
2541 | If ALIGN is nonzero, all three values are aligned to at least ALIGN |
2542 | bytes, so: |
2543 | |
2544 | LB <= ADDR <= UB - ALIGN |
2545 | |
2546 | where "- ALIGN" folds naturally with the "+ access_size" and often |
2547 | cancels it out. |
2548 | |
2549 | We don't try to simplify LB and UB beyond this (e.g. by using |
2550 | MIN and MAX based on whether seg_len rather than the stride is |
2551 | negative) because it is possible for the absolute size of the |
2552 | segment to overflow the range of a ssize_t. |
2553 | |
2554 | Keeping the pointer_plus outside of the cond_expr should allow |
2555 | the cond_exprs to be shared with other alias checks. */ |
2556 | tree indicator = dr_direction_indicator (d.dr); |
2557 | tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, |
2558 | fold_convert (ssizetype, indicator), |
2559 | ssize_int (0)); |
2560 | tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr), |
2561 | DR_OFFSET (d.dr)); |
2562 | addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr)); |
2563 | tree seg_len |
2564 | = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len)); |
2565 | |
2566 | tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step, |
2567 | seg_len, size_zero_node); |
2568 | tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step, |
2569 | size_zero_node, seg_len); |
2570 | max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach, |
2571 | size_int (d.access_size - align)); |
2572 | |
2573 | *seg_min_out = fold_build_pointer_plus (addr_base, min_reach); |
2574 | *seg_max_out = fold_build_pointer_plus (addr_base, max_reach); |
2575 | } |
2576 | |
2577 | /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases, |
2578 | storing the condition in *COND_EXPR. The fallback is to generate a |
2579 | a test that the two accesses do not overlap: |
2580 | |
2581 | end_a <= start_b || end_b <= start_a. */ |
2582 | |
2583 | static void |
2584 | create_intersect_range_checks (class loop *loop, tree *cond_expr, |
2585 | const dr_with_seg_len_pair_t &alias_pair) |
2586 | { |
2587 | const dr_with_seg_len& dr_a = alias_pair.first; |
2588 | const dr_with_seg_len& dr_b = alias_pair.second; |
2589 | *cond_expr = NULL_TREE; |
2590 | if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) |
2591 | return; |
2592 | |
2593 | if (create_ifn_alias_checks (cond_expr, alias_pair)) |
2594 | return; |
2595 | |
2596 | if (create_waw_or_war_checks (cond_expr, alias_pair)) |
2597 | return; |
2598 | |
2599 | unsigned HOST_WIDE_INT min_align; |
2600 | tree_code cmp_code; |
2601 | /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions |
2602 | are equivalent. This is just an optimization heuristic. */ |
2603 | if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST |
2604 | && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST) |
2605 | { |
2606 | /* In this case adding access_size to seg_len is likely to give |
2607 | a simple X * step, where X is either the number of scalar |
2608 | iterations or the vectorization factor. We're better off |
2609 | keeping that, rather than subtracting an alignment from it. |
2610 | |
2611 | In this case the maximum values are exclusive and so there is |
2612 | no alias if the maximum of one segment equals the minimum |
2613 | of another. */ |
2614 | min_align = 0; |
2615 | cmp_code = LE_EXPR; |
2616 | } |
2617 | else |
2618 | { |
2619 | /* Calculate the minimum alignment shared by all four pointers, |
2620 | then arrange for this alignment to be subtracted from the |
2621 | exclusive maximum values to get inclusive maximum values. |
2622 | This "- min_align" is cumulative with a "+ access_size" |
2623 | in the calculation of the maximum values. In the best |
2624 | (and common) case, the two cancel each other out, leaving |
2625 | us with an inclusive bound based only on seg_len. In the |
2626 | worst case we're simply adding a smaller number than before. |
2627 | |
2628 | Because the maximum values are inclusive, there is an alias |
2629 | if the maximum value of one segment is equal to the minimum |
2630 | value of the other. */ |
2631 | min_align = MIN (dr_a.align, dr_b.align); |
2632 | cmp_code = LT_EXPR; |
2633 | } |
2634 | |
2635 | tree seg_a_min, seg_a_max, seg_b_min, seg_b_max; |
2636 | get_segment_min_max (d: dr_a, seg_min_out: &seg_a_min, seg_max_out: &seg_a_max, align: min_align); |
2637 | get_segment_min_max (d: dr_b, seg_min_out: &seg_b_min, seg_max_out: &seg_b_max, align: min_align); |
2638 | |
2639 | *cond_expr |
2640 | = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, |
2641 | fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min), |
2642 | fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min)); |
2643 | if (dump_enabled_p ()) |
2644 | dump_printf (MSG_NOTE, "using an address-based overlap test\n" ); |
2645 | } |
2646 | |
2647 | /* Create a conditional expression that represents the run-time checks for |
2648 | overlapping of address ranges represented by a list of data references |
2649 | pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned |
2650 | COND_EXPR is the conditional expression to be used in the if statement |
2651 | that controls which version of the loop gets executed at runtime. */ |
2652 | |
2653 | void |
2654 | create_runtime_alias_checks (class loop *loop, |
2655 | const vec<dr_with_seg_len_pair_t> *alias_pairs, |
2656 | tree * cond_expr) |
2657 | { |
2658 | tree part_cond_expr; |
2659 | |
2660 | fold_defer_overflow_warnings (); |
2661 | for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs) |
2662 | { |
2663 | gcc_assert (alias_pair.flags); |
2664 | if (dump_enabled_p ()) |
2665 | dump_printf (MSG_NOTE, |
2666 | "create runtime check for data references %T and %T\n" , |
2667 | DR_REF (alias_pair.first.dr), |
2668 | DR_REF (alias_pair.second.dr)); |
2669 | |
2670 | /* Create condition expression for each pair data references. */ |
2671 | create_intersect_range_checks (loop, cond_expr: &part_cond_expr, alias_pair); |
2672 | if (*cond_expr) |
2673 | *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
2674 | *cond_expr, part_cond_expr); |
2675 | else |
2676 | *cond_expr = part_cond_expr; |
2677 | } |
2678 | fold_undefer_and_ignore_overflow_warnings (); |
2679 | } |
2680 | |
2681 | /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical |
2682 | expressions. */ |
2683 | static bool |
2684 | dr_equal_offsets_p1 (tree offset1, tree offset2) |
2685 | { |
2686 | bool res; |
2687 | |
2688 | STRIP_NOPS (offset1); |
2689 | STRIP_NOPS (offset2); |
2690 | |
2691 | if (offset1 == offset2) |
2692 | return true; |
2693 | |
2694 | if (TREE_CODE (offset1) != TREE_CODE (offset2) |
2695 | || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) |
2696 | return false; |
2697 | |
2698 | res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), |
2699 | TREE_OPERAND (offset2, 0)); |
2700 | |
2701 | if (!res || !BINARY_CLASS_P (offset1)) |
2702 | return res; |
2703 | |
2704 | res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), |
2705 | TREE_OPERAND (offset2, 1)); |
2706 | |
2707 | return res; |
2708 | } |
2709 | |
2710 | /* Check if DRA and DRB have equal offsets. */ |
2711 | bool |
2712 | dr_equal_offsets_p (struct data_reference *dra, |
2713 | struct data_reference *drb) |
2714 | { |
2715 | tree offset1, offset2; |
2716 | |
2717 | offset1 = DR_OFFSET (dra); |
2718 | offset2 = DR_OFFSET (drb); |
2719 | |
2720 | return dr_equal_offsets_p1 (offset1, offset2); |
2721 | } |
2722 | |
2723 | /* Returns true if FNA == FNB. */ |
2724 | |
2725 | static bool |
2726 | affine_function_equal_p (affine_fn fna, affine_fn fnb) |
2727 | { |
2728 | unsigned i, n = fna.length (); |
2729 | |
2730 | if (n != fnb.length ()) |
2731 | return false; |
2732 | |
2733 | for (i = 0; i < n; i++) |
2734 | if (!operand_equal_p (fna[i], fnb[i], flags: 0)) |
2735 | return false; |
2736 | |
2737 | return true; |
2738 | } |
2739 | |
2740 | /* If all the functions in CF are the same, returns one of them, |
2741 | otherwise returns NULL. */ |
2742 | |
2743 | static affine_fn |
2744 | common_affine_function (conflict_function *cf) |
2745 | { |
2746 | unsigned i; |
2747 | affine_fn comm; |
2748 | |
2749 | if (!CF_NONTRIVIAL_P (cf)) |
2750 | return affine_fn (); |
2751 | |
2752 | comm = cf->fns[0]; |
2753 | |
2754 | for (i = 1; i < cf->n; i++) |
2755 | if (!affine_function_equal_p (fna: comm, fnb: cf->fns[i])) |
2756 | return affine_fn (); |
2757 | |
2758 | return comm; |
2759 | } |
2760 | |
2761 | /* Returns the base of the affine function FN. */ |
2762 | |
2763 | static tree |
2764 | affine_function_base (affine_fn fn) |
2765 | { |
2766 | return fn[0]; |
2767 | } |
2768 | |
2769 | /* Returns true if FN is a constant. */ |
2770 | |
2771 | static bool |
2772 | affine_function_constant_p (affine_fn fn) |
2773 | { |
2774 | unsigned i; |
2775 | tree coef; |
2776 | |
2777 | for (i = 1; fn.iterate (ix: i, ptr: &coef); i++) |
2778 | if (!integer_zerop (coef)) |
2779 | return false; |
2780 | |
2781 | return true; |
2782 | } |
2783 | |
2784 | /* Returns true if FN is the zero constant function. */ |
2785 | |
2786 | static bool |
2787 | affine_function_zero_p (affine_fn fn) |
2788 | { |
2789 | return (integer_zerop (affine_function_base (fn)) |
2790 | && affine_function_constant_p (fn)); |
2791 | } |
2792 | |
2793 | /* Returns a signed integer type with the largest precision from TA |
2794 | and TB. */ |
2795 | |
2796 | static tree |
2797 | signed_type_for_types (tree ta, tree tb) |
2798 | { |
2799 | if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb)) |
2800 | return signed_type_for (ta); |
2801 | else |
2802 | return signed_type_for (tb); |
2803 | } |
2804 | |
2805 | /* Applies operation OP on affine functions FNA and FNB, and returns the |
2806 | result. */ |
2807 | |
2808 | static affine_fn |
2809 | affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb) |
2810 | { |
2811 | unsigned i, n, m; |
2812 | affine_fn ret; |
2813 | tree coef; |
2814 | |
2815 | if (fnb.length () > fna.length ()) |
2816 | { |
2817 | n = fna.length (); |
2818 | m = fnb.length (); |
2819 | } |
2820 | else |
2821 | { |
2822 | n = fnb.length (); |
2823 | m = fna.length (); |
2824 | } |
2825 | |
2826 | ret.create (nelems: m); |
2827 | for (i = 0; i < n; i++) |
2828 | { |
2829 | tree type = signed_type_for_types (TREE_TYPE (fna[i]), |
2830 | TREE_TYPE (fnb[i])); |
2831 | ret.quick_push (fold_build2 (op, type, fna[i], fnb[i])); |
2832 | } |
2833 | |
2834 | for (; fna.iterate (ix: i, ptr: &coef); i++) |
2835 | ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), |
2836 | coef, integer_zero_node)); |
2837 | for (; fnb.iterate (ix: i, ptr: &coef); i++) |
2838 | ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), |
2839 | integer_zero_node, coef)); |
2840 | |
2841 | return ret; |
2842 | } |
2843 | |
2844 | /* Returns the sum of affine functions FNA and FNB. */ |
2845 | |
2846 | static affine_fn |
2847 | affine_fn_plus (affine_fn fna, affine_fn fnb) |
2848 | { |
2849 | return affine_fn_op (op: PLUS_EXPR, fna, fnb); |
2850 | } |
2851 | |
2852 | /* Returns the difference of affine functions FNA and FNB. */ |
2853 | |
2854 | static affine_fn |
2855 | affine_fn_minus (affine_fn fna, affine_fn fnb) |
2856 | { |
2857 | return affine_fn_op (op: MINUS_EXPR, fna, fnb); |
2858 | } |
2859 | |
2860 | /* Frees affine function FN. */ |
2861 | |
2862 | static void |
2863 | affine_fn_free (affine_fn fn) |
2864 | { |
2865 | fn.release (); |
2866 | } |
2867 | |
2868 | /* Determine for each subscript in the data dependence relation DDR |
2869 | the distance. */ |
2870 | |
2871 | static void |
2872 | compute_subscript_distance (struct data_dependence_relation *ddr) |
2873 | { |
2874 | conflict_function *cf_a, *cf_b; |
2875 | affine_fn fn_a, fn_b, diff; |
2876 | |
2877 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) |
2878 | { |
2879 | unsigned int i; |
2880 | |
2881 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
2882 | { |
2883 | struct subscript *subscript; |
2884 | |
2885 | subscript = DDR_SUBSCRIPT (ddr, i); |
2886 | cf_a = SUB_CONFLICTS_IN_A (subscript); |
2887 | cf_b = SUB_CONFLICTS_IN_B (subscript); |
2888 | |
2889 | fn_a = common_affine_function (cf: cf_a); |
2890 | fn_b = common_affine_function (cf: cf_b); |
2891 | if (!fn_a.exists () || !fn_b.exists ()) |
2892 | { |
2893 | SUB_DISTANCE (subscript) = chrec_dont_know; |
2894 | return; |
2895 | } |
2896 | diff = affine_fn_minus (fna: fn_a, fnb: fn_b); |
2897 | |
2898 | if (affine_function_constant_p (fn: diff)) |
2899 | SUB_DISTANCE (subscript) = affine_function_base (fn: diff); |
2900 | else |
2901 | SUB_DISTANCE (subscript) = chrec_dont_know; |
2902 | |
2903 | affine_fn_free (fn: diff); |
2904 | } |
2905 | } |
2906 | } |
2907 | |
2908 | /* Returns the conflict function for "unknown". */ |
2909 | |
2910 | static conflict_function * |
2911 | conflict_fn_not_known (void) |
2912 | { |
2913 | conflict_function *fn = XCNEW (conflict_function); |
2914 | fn->n = NOT_KNOWN; |
2915 | |
2916 | return fn; |
2917 | } |
2918 | |
2919 | /* Returns the conflict function for "independent". */ |
2920 | |
2921 | static conflict_function * |
2922 | conflict_fn_no_dependence (void) |
2923 | { |
2924 | conflict_function *fn = XCNEW (conflict_function); |
2925 | fn->n = NO_DEPENDENCE; |
2926 | |
2927 | return fn; |
2928 | } |
2929 | |
2930 | /* Returns true if the address of OBJ is invariant in LOOP. */ |
2931 | |
2932 | static bool |
2933 | object_address_invariant_in_loop_p (const class loop *loop, const_tree obj) |
2934 | { |
2935 | while (handled_component_p (t: obj)) |
2936 | { |
2937 | if (TREE_CODE (obj) == ARRAY_REF) |
2938 | { |
2939 | for (int i = 1; i < 4; ++i) |
2940 | if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i), |
2941 | loop->num)) |
2942 | return false; |
2943 | } |
2944 | else if (TREE_CODE (obj) == COMPONENT_REF) |
2945 | { |
2946 | if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), |
2947 | loop->num)) |
2948 | return false; |
2949 | } |
2950 | obj = TREE_OPERAND (obj, 0); |
2951 | } |
2952 | |
2953 | if (!INDIRECT_REF_P (obj) |
2954 | && TREE_CODE (obj) != MEM_REF) |
2955 | return true; |
2956 | |
2957 | return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0), |
2958 | loop->num); |
2959 | } |
2960 | |
2961 | /* Returns false if we can prove that data references A and B do not alias, |
2962 | true otherwise. If LOOP_NEST is false no cross-iteration aliases are |
2963 | considered. */ |
2964 | |
2965 | bool |
2966 | dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, |
2967 | class loop *loop_nest) |
2968 | { |
2969 | tree addr_a = DR_BASE_OBJECT (a); |
2970 | tree addr_b = DR_BASE_OBJECT (b); |
2971 | |
2972 | /* If we are not processing a loop nest but scalar code we |
2973 | do not need to care about possible cross-iteration dependences |
2974 | and thus can process the full original reference. Do so, |
2975 | similar to how loop invariant motion applies extra offset-based |
2976 | disambiguation. */ |
2977 | if (!loop_nest) |
2978 | { |
2979 | tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a))); |
2980 | tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b))); |
2981 | |
2982 | if (DR_BASE_ADDRESS (a) |
2983 | && DR_BASE_ADDRESS (b) |
2984 | && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b)) |
2985 | && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b)) |
2986 | && poly_int_tree_p (t: tree_size_a) |
2987 | && poly_int_tree_p (t: tree_size_b) |
2988 | && !ranges_maybe_overlap_p (pos1: wi::to_poly_widest (DR_INIT (a)), |
2989 | size1: wi::to_poly_widest (t: tree_size_a), |
2990 | pos2: wi::to_poly_widest (DR_INIT (b)), |
2991 | size2: wi::to_poly_widest (t: tree_size_b))) |
2992 | { |
2993 | gcc_assert (integer_zerop (DR_STEP (a)) |
2994 | && integer_zerop (DR_STEP (b))); |
2995 | return false; |
2996 | } |
2997 | |
2998 | aff_tree off1, off2; |
2999 | poly_widest_int size1, size2; |
3000 | get_inner_reference_aff (DR_REF (a), &off1, &size1); |
3001 | get_inner_reference_aff (DR_REF (b), &off2, &size2); |
3002 | aff_combination_scale (&off1, -1); |
3003 | aff_combination_add (&off2, &off1); |
3004 | if (aff_comb_cannot_overlap_p (&off2, size1, size2)) |
3005 | return false; |
3006 | } |
3007 | |
3008 | if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF) |
3009 | && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF) |
3010 | /* For cross-iteration dependences the cliques must be valid for the |
3011 | whole loop, not just individual iterations. */ |
3012 | && (!loop_nest |
3013 | || MR_DEPENDENCE_CLIQUE (addr_a) == 1 |
3014 | || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique) |
3015 | && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b) |
3016 | && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b)) |
3017 | return false; |
3018 | |
3019 | /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we |
3020 | do not know the size of the base-object. So we cannot do any |
3021 | offset/overlap based analysis but have to rely on points-to |
3022 | information only. */ |
3023 | if (TREE_CODE (addr_a) == MEM_REF |
3024 | && (DR_UNCONSTRAINED_BASE (a) |
3025 | || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)) |
3026 | { |
3027 | /* For true dependences we can apply TBAA. */ |
3028 | if (flag_strict_aliasing |
3029 | && DR_IS_WRITE (a) && DR_IS_READ (b) |
3030 | && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), |
3031 | get_alias_set (DR_REF (b)))) |
3032 | return false; |
3033 | if (TREE_CODE (addr_b) == MEM_REF) |
3034 | return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), |
3035 | TREE_OPERAND (addr_b, 0)); |
3036 | else |
3037 | return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), |
3038 | build_fold_addr_expr (addr_b)); |
3039 | } |
3040 | else if (TREE_CODE (addr_b) == MEM_REF |
3041 | && (DR_UNCONSTRAINED_BASE (b) |
3042 | || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)) |
3043 | { |
3044 | /* For true dependences we can apply TBAA. */ |
3045 | if (flag_strict_aliasing |
3046 | && DR_IS_WRITE (a) && DR_IS_READ (b) |
3047 | && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), |
3048 | get_alias_set (DR_REF (b)))) |
3049 | return false; |
3050 | if (TREE_CODE (addr_a) == MEM_REF) |
3051 | return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), |
3052 | TREE_OPERAND (addr_b, 0)); |
3053 | else |
3054 | return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a), |
3055 | TREE_OPERAND (addr_b, 0)); |
3056 | } |
3057 | |
3058 | /* Otherwise DR_BASE_OBJECT is an access that covers the whole object |
3059 | that is being subsetted in the loop nest. */ |
3060 | if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) |
3061 | return refs_output_dependent_p (addr_a, addr_b); |
3062 | else if (DR_IS_READ (a) && DR_IS_WRITE (b)) |
3063 | return refs_anti_dependent_p (addr_a, addr_b); |
3064 | return refs_may_alias_p (addr_a, addr_b); |
3065 | } |
3066 | |
3067 | /* REF_A and REF_B both satisfy access_fn_component_p. Return true |
3068 | if it is meaningful to compare their associated access functions |
3069 | when checking for dependencies. */ |
3070 | |
3071 | static bool |
3072 | access_fn_components_comparable_p (tree ref_a, tree ref_b) |
3073 | { |
3074 | /* Allow pairs of component refs from the following sets: |
3075 | |
3076 | { REALPART_EXPR, IMAGPART_EXPR } |
3077 | { COMPONENT_REF } |
3078 | { ARRAY_REF }. */ |
3079 | tree_code code_a = TREE_CODE (ref_a); |
3080 | tree_code code_b = TREE_CODE (ref_b); |
3081 | if (code_a == IMAGPART_EXPR) |
3082 | code_a = REALPART_EXPR; |
3083 | if (code_b == IMAGPART_EXPR) |
3084 | code_b = REALPART_EXPR; |
3085 | if (code_a != code_b) |
3086 | return false; |
3087 | |
3088 | if (TREE_CODE (ref_a) == COMPONENT_REF) |
3089 | /* ??? We cannot simply use the type of operand #0 of the refs here as |
3090 | the Fortran compiler smuggles type punning into COMPONENT_REFs. |
3091 | Use the DECL_CONTEXT of the FIELD_DECLs instead. */ |
3092 | return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1)) |
3093 | == DECL_CONTEXT (TREE_OPERAND (ref_b, 1))); |
3094 | |
3095 | return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)), |
3096 | TREE_TYPE (TREE_OPERAND (ref_b, 0))); |
3097 | } |
3098 | |
3099 | /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES |
3100 | is true when the main indices of A and B were not comparable so we try again |
3101 | with alternate indices computed on an indirect reference. */ |
3102 | |
3103 | struct data_dependence_relation * |
3104 | initialize_data_dependence_relation (struct data_dependence_relation *res, |
3105 | vec<loop_p> loop_nest, |
3106 | bool use_alt_indices) |
3107 | { |
3108 | struct data_reference *a = DDR_A (res); |
3109 | struct data_reference *b = DDR_B (res); |
3110 | unsigned int i; |
3111 | |
3112 | struct indices *indices_a = &a->indices; |
3113 | struct indices *indices_b = &b->indices; |
3114 | if (use_alt_indices) |
3115 | { |
3116 | if (TREE_CODE (DR_REF (a)) != MEM_REF) |
3117 | indices_a = &a->alt_indices; |
3118 | if (TREE_CODE (DR_REF (b)) != MEM_REF) |
3119 | indices_b = &b->alt_indices; |
3120 | } |
3121 | unsigned int num_dimensions_a = indices_a->access_fns.length (); |
3122 | unsigned int num_dimensions_b = indices_b->access_fns.length (); |
3123 | if (num_dimensions_a == 0 || num_dimensions_b == 0) |
3124 | { |
3125 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3126 | return res; |
3127 | } |
3128 | |
3129 | /* For unconstrained bases, the root (highest-indexed) subscript |
3130 | describes a variation in the base of the original DR_REF rather |
3131 | than a component access. We have no type that accurately describes |
3132 | the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after* |
3133 | applying this subscript) so limit the search to the last real |
3134 | component access. |
3135 | |
3136 | E.g. for: |
3137 | |
3138 | void |
3139 | f (int a[][8], int b[][8]) |
3140 | { |
3141 | for (int i = 0; i < 8; ++i) |
3142 | a[i * 2][0] = b[i][0]; |
3143 | } |
3144 | |
3145 | the a and b accesses have a single ARRAY_REF component reference [0] |
3146 | but have two subscripts. */ |
3147 | if (indices_a->unconstrained_base) |
3148 | num_dimensions_a -= 1; |
3149 | if (indices_b->unconstrained_base) |
3150 | num_dimensions_b -= 1; |
3151 | |
3152 | /* These structures describe sequences of component references in |
3153 | DR_REF (A) and DR_REF (B). Each component reference is tied to a |
3154 | specific access function. */ |
3155 | struct { |
3156 | /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and |
3157 | DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher |
3158 | indices. In C notation, these are the indices of the rightmost |
3159 | component references; e.g. for a sequence .b.c.d, the start |
3160 | index is for .d. */ |
3161 | unsigned int start_a; |
3162 | unsigned int start_b; |
3163 | |
3164 | /* The sequence contains LENGTH consecutive access functions from |
3165 | each DR. */ |
3166 | unsigned int length; |
3167 | |
3168 | /* The enclosing objects for the A and B sequences respectively, |
3169 | i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1) |
3170 | and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */ |
3171 | tree object_a; |
3172 | tree object_b; |
3173 | } full_seq = {}, struct_seq = {}; |
3174 | |
3175 | /* Before each iteration of the loop: |
3176 | |
3177 | - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and |
3178 | - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */ |
3179 | unsigned int index_a = 0; |
3180 | unsigned int index_b = 0; |
3181 | tree ref_a = DR_REF (a); |
3182 | tree ref_b = DR_REF (b); |
3183 | |
3184 | /* Now walk the component references from the final DR_REFs back up to |
3185 | the enclosing base objects. Each component reference corresponds |
3186 | to one access function in the DR, with access function 0 being for |
3187 | the final DR_REF and the highest-indexed access function being the |
3188 | one that is applied to the base of the DR. |
3189 | |
3190 | Look for a sequence of component references whose access functions |
3191 | are comparable (see access_fn_components_comparable_p). If more |
3192 | than one such sequence exists, pick the one nearest the base |
3193 | (which is the leftmost sequence in C notation). Store this sequence |
3194 | in FULL_SEQ. |
3195 | |
3196 | For example, if we have: |
3197 | |
3198 | struct foo { struct bar s; ... } (*a)[10], (*b)[10]; |
3199 | |
3200 | A: a[0][i].s.c.d |
3201 | B: __real b[0][i].s.e[i].f |
3202 | |
3203 | (where d is the same type as the real component of f) then the access |
3204 | functions would be: |
3205 | |
3206 | 0 1 2 3 |
3207 | A: .d .c .s [i] |
3208 | |
3209 | 0 1 2 3 4 5 |
3210 | B: __real .f [i] .e .s [i] |
3211 | |
3212 | The A0/B2 column isn't comparable, since .d is a COMPONENT_REF |
3213 | and [i] is an ARRAY_REF. However, the A1/B3 column contains two |
3214 | COMPONENT_REF accesses for struct bar, so is comparable. Likewise |
3215 | the A2/B4 column contains two COMPONENT_REF accesses for struct foo, |
3216 | so is comparable. The A3/B5 column contains two ARRAY_REFs that |
3217 | index foo[10] arrays, so is again comparable. The sequence is |
3218 | therefore: |
3219 | |
3220 | A: [1, 3] (i.e. [i].s.c) |
3221 | B: [3, 5] (i.e. [i].s.e) |
3222 | |
3223 | Also look for sequences of component references whose access |
3224 | functions are comparable and whose enclosing objects have the same |
3225 | RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above |
3226 | example, STRUCT_SEQ would be: |
3227 | |
3228 | A: [1, 2] (i.e. s.c) |
3229 | B: [3, 4] (i.e. s.e) */ |
3230 | while (index_a < num_dimensions_a && index_b < num_dimensions_b) |
3231 | { |
3232 | /* The alternate indices form always has a single dimension |
3233 | with unconstrained base. */ |
3234 | gcc_assert (!use_alt_indices); |
3235 | |
3236 | /* REF_A and REF_B must be one of the component access types |
3237 | allowed by dr_analyze_indices. */ |
3238 | gcc_checking_assert (access_fn_component_p (ref_a)); |
3239 | gcc_checking_assert (access_fn_component_p (ref_b)); |
3240 | |
3241 | /* Get the immediately-enclosing objects for REF_A and REF_B, |
3242 | i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A) |
3243 | and DR_ACCESS_FN (B, INDEX_B). */ |
3244 | tree object_a = TREE_OPERAND (ref_a, 0); |
3245 | tree object_b = TREE_OPERAND (ref_b, 0); |
3246 | |
3247 | tree type_a = TREE_TYPE (object_a); |
3248 | tree type_b = TREE_TYPE (object_b); |
3249 | if (access_fn_components_comparable_p (ref_a, ref_b)) |
3250 | { |
3251 | /* This pair of component accesses is comparable for dependence |
3252 | analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and |
3253 | DR_ACCESS_FN (B, INDEX_B) in the sequence. */ |
3254 | if (full_seq.start_a + full_seq.length != index_a |
3255 | || full_seq.start_b + full_seq.length != index_b) |
3256 | { |
3257 | /* The accesses don't extend the current sequence, |
3258 | so start a new one here. */ |
3259 | full_seq.start_a = index_a; |
3260 | full_seq.start_b = index_b; |
3261 | full_seq.length = 0; |
3262 | } |
3263 | |
3264 | /* Add this pair of references to the sequence. */ |
3265 | full_seq.length += 1; |
3266 | full_seq.object_a = object_a; |
3267 | full_seq.object_b = object_b; |
3268 | |
3269 | /* If the enclosing objects are structures (and thus have the |
3270 | same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */ |
3271 | if (TREE_CODE (type_a) == RECORD_TYPE) |
3272 | struct_seq = full_seq; |
3273 | |
3274 | /* Move to the next containing reference for both A and B. */ |
3275 | ref_a = object_a; |
3276 | ref_b = object_b; |
3277 | index_a += 1; |
3278 | index_b += 1; |
3279 | continue; |
3280 | } |
3281 | |
3282 | /* Try to approach equal type sizes. */ |
3283 | if (!COMPLETE_TYPE_P (type_a) |
3284 | || !COMPLETE_TYPE_P (type_b) |
3285 | || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a)) |
3286 | || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b))) |
3287 | break; |
3288 | |
3289 | unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a)); |
3290 | unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b)); |
3291 | if (size_a <= size_b) |
3292 | { |
3293 | index_a += 1; |
3294 | ref_a = object_a; |
3295 | } |
3296 | if (size_b <= size_a) |
3297 | { |
3298 | index_b += 1; |
3299 | ref_b = object_b; |
3300 | } |
3301 | } |
3302 | |
3303 | /* See whether FULL_SEQ ends at the base and whether the two bases |
3304 | are equal. We do not care about TBAA or alignment info so we can |
3305 | use OEP_ADDRESS_OF to avoid false negatives. */ |
3306 | tree base_a = indices_a->base_object; |
3307 | tree base_b = indices_b->base_object; |
3308 | bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a |
3309 | && full_seq.start_b + full_seq.length == num_dimensions_b |
3310 | && (indices_a->unconstrained_base |
3311 | == indices_b->unconstrained_base) |
3312 | && operand_equal_p (base_a, base_b, flags: OEP_ADDRESS_OF) |
3313 | && (types_compatible_p (TREE_TYPE (base_a), |
3314 | TREE_TYPE (base_b)) |
3315 | || (!base_supports_access_fn_components_p (base: base_a) |
3316 | && !base_supports_access_fn_components_p (base: base_b) |
3317 | && operand_equal_p |
3318 | (TYPE_SIZE (TREE_TYPE (base_a)), |
3319 | TYPE_SIZE (TREE_TYPE (base_b)), flags: 0))) |
3320 | && (!loop_nest.exists () |
3321 | || (object_address_invariant_in_loop_p |
3322 | (loop: loop_nest[0], obj: base_a)))); |
3323 | |
3324 | /* If the bases are the same, we can include the base variation too. |
3325 | E.g. the b accesses in: |
3326 | |
3327 | for (int i = 0; i < n; ++i) |
3328 | b[i + 4][0] = b[i][0]; |
3329 | |
3330 | have a definite dependence distance of 4, while for: |
3331 | |
3332 | for (int i = 0; i < n; ++i) |
3333 | a[i + 4][0] = b[i][0]; |
3334 | |
3335 | the dependence distance depends on the gap between a and b. |
3336 | |
3337 | If the bases are different then we can only rely on the sequence |
3338 | rooted at a structure access, since arrays are allowed to overlap |
3339 | arbitrarily and change shape arbitrarily. E.g. we treat this as |
3340 | valid code: |
3341 | |
3342 | int a[256]; |
3343 | ... |
3344 | ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0]; |
3345 | |
3346 | where two lvalues with the same int[4][3] type overlap, and where |
3347 | both lvalues are distinct from the object's declared type. */ |
3348 | if (same_base_p) |
3349 | { |
3350 | if (indices_a->unconstrained_base) |
3351 | full_seq.length += 1; |
3352 | } |
3353 | else |
3354 | full_seq = struct_seq; |
3355 | |
3356 | /* Punt if we didn't find a suitable sequence. */ |
3357 | if (full_seq.length == 0) |
3358 | { |
3359 | if (use_alt_indices |
3360 | || (TREE_CODE (DR_REF (a)) == MEM_REF |
3361 | && TREE_CODE (DR_REF (b)) == MEM_REF) |
3362 | || may_be_nonaddressable_p (DR_REF (a)) |
3363 | || may_be_nonaddressable_p (DR_REF (b))) |
3364 | { |
3365 | /* Fully exhausted possibilities. */ |
3366 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3367 | return res; |
3368 | } |
3369 | |
3370 | /* Try evaluating both DRs as dereferences of pointers. */ |
3371 | if (!a->alt_indices.base_object |
3372 | && TREE_CODE (DR_REF (a)) != MEM_REF) |
3373 | { |
3374 | tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)), |
3375 | build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)), |
3376 | build_int_cst |
3377 | (reference_alias_ptr_type (DR_REF (a)), 0)); |
3378 | dr_analyze_indices (dri: &a->alt_indices, ref: alt_ref, |
3379 | nest: loop_preheader_edge (loop_nest[0]), |
3380 | loop: loop_containing_stmt (DR_STMT (a))); |
3381 | } |
3382 | if (!b->alt_indices.base_object |
3383 | && TREE_CODE (DR_REF (b)) != MEM_REF) |
3384 | { |
3385 | tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)), |
3386 | build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)), |
3387 | build_int_cst |
3388 | (reference_alias_ptr_type (DR_REF (b)), 0)); |
3389 | dr_analyze_indices (dri: &b->alt_indices, ref: alt_ref, |
3390 | nest: loop_preheader_edge (loop_nest[0]), |
3391 | loop: loop_containing_stmt (DR_STMT (b))); |
3392 | } |
3393 | return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: true); |
3394 | } |
3395 | |
3396 | if (!same_base_p) |
3397 | { |
3398 | /* Partial overlap is possible for different bases when strict aliasing |
3399 | is not in effect. It's also possible if either base involves a union |
3400 | access; e.g. for: |
3401 | |
3402 | struct s1 { int a[2]; }; |
3403 | struct s2 { struct s1 b; int c; }; |
3404 | struct s3 { int d; struct s1 e; }; |
3405 | union u { struct s2 f; struct s3 g; } *p, *q; |
3406 | |
3407 | the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at |
3408 | "p->g.e" (base "p->g") and might partially overlap the s1 at |
3409 | "q->g.e" (base "q->g"). */ |
3410 | if (!flag_strict_aliasing |
3411 | || ref_contains_union_access_p (ref: full_seq.object_a) |
3412 | || ref_contains_union_access_p (ref: full_seq.object_b)) |
3413 | { |
3414 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3415 | return res; |
3416 | } |
3417 | |
3418 | DDR_COULD_BE_INDEPENDENT_P (res) = true; |
3419 | if (!loop_nest.exists () |
3420 | || (object_address_invariant_in_loop_p (loop: loop_nest[0], |
3421 | obj: full_seq.object_a) |
3422 | && object_address_invariant_in_loop_p (loop: loop_nest[0], |
3423 | obj: full_seq.object_b))) |
3424 | { |
3425 | DDR_OBJECT_A (res) = full_seq.object_a; |
3426 | DDR_OBJECT_B (res) = full_seq.object_b; |
3427 | } |
3428 | } |
3429 | |
3430 | DDR_AFFINE_P (res) = true; |
3431 | DDR_ARE_DEPENDENT (res) = NULL_TREE; |
3432 | DDR_SUBSCRIPTS (res).create (nelems: full_seq.length); |
3433 | DDR_LOOP_NEST (res) = loop_nest; |
3434 | DDR_SELF_REFERENCE (res) = false; |
3435 | |
3436 | for (i = 0; i < full_seq.length; ++i) |
3437 | { |
3438 | struct subscript *subscript; |
3439 | |
3440 | subscript = XNEW (struct subscript); |
3441 | SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i]; |
3442 | SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i]; |
3443 | SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); |
3444 | SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); |
3445 | SUB_LAST_CONFLICT (subscript) = chrec_dont_know; |
3446 | SUB_DISTANCE (subscript) = chrec_dont_know; |
3447 | DDR_SUBSCRIPTS (res).safe_push (obj: subscript); |
3448 | } |
3449 | |
3450 | return res; |
3451 | } |
3452 | |
3453 | /* Initialize a data dependence relation between data accesses A and |
3454 | B. NB_LOOPS is the number of loops surrounding the references: the |
3455 | size of the classic distance/direction vectors. */ |
3456 | |
3457 | struct data_dependence_relation * |
3458 | initialize_data_dependence_relation (struct data_reference *a, |
3459 | struct data_reference *b, |
3460 | vec<loop_p> loop_nest) |
3461 | { |
3462 | data_dependence_relation *res = XCNEW (struct data_dependence_relation); |
3463 | DDR_A (res) = a; |
3464 | DDR_B (res) = b; |
3465 | DDR_LOOP_NEST (res).create (nelems: 0); |
3466 | DDR_SUBSCRIPTS (res).create (nelems: 0); |
3467 | DDR_DIR_VECTS (res).create (nelems: 0); |
3468 | DDR_DIST_VECTS (res).create (nelems: 0); |
3469 | |
3470 | if (a == NULL || b == NULL) |
3471 | { |
3472 | DDR_ARE_DEPENDENT (res) = chrec_dont_know; |
3473 | return res; |
3474 | } |
3475 | |
3476 | /* If the data references do not alias, then they are independent. */ |
3477 | if (!dr_may_alias_p (a, b, loop_nest: loop_nest.exists () ? loop_nest[0] : NULL)) |
3478 | { |
3479 | DDR_ARE_DEPENDENT (res) = chrec_known; |
3480 | return res; |
3481 | } |
3482 | |
3483 | return initialize_data_dependence_relation (res, loop_nest, use_alt_indices: false); |
3484 | } |
3485 | |
3486 | |
3487 | /* Frees memory used by the conflict function F. */ |
3488 | |
3489 | static void |
3490 | free_conflict_function (conflict_function *f) |
3491 | { |
3492 | unsigned i; |
3493 | |
3494 | if (CF_NONTRIVIAL_P (f)) |
3495 | { |
3496 | for (i = 0; i < f->n; i++) |
3497 | affine_fn_free (fn: f->fns[i]); |
3498 | } |
3499 | free (ptr: f); |
3500 | } |
3501 | |
3502 | /* Frees memory used by SUBSCRIPTS. */ |
3503 | |
3504 | static void |
3505 | free_subscripts (vec<subscript_p> subscripts) |
3506 | { |
3507 | for (subscript_p s : subscripts) |
3508 | { |
3509 | free_conflict_function (f: s->conflicting_iterations_in_a); |
3510 | free_conflict_function (f: s->conflicting_iterations_in_b); |
3511 | free (ptr: s); |
3512 | } |
3513 | subscripts.release (); |
3514 | } |
3515 | |
3516 | /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap |
3517 | description. */ |
3518 | |
3519 | static inline void |
3520 | finalize_ddr_dependent (struct data_dependence_relation *ddr, |
3521 | tree chrec) |
3522 | { |
3523 | DDR_ARE_DEPENDENT (ddr) = chrec; |
3524 | free_subscripts (DDR_SUBSCRIPTS (ddr)); |
3525 | DDR_SUBSCRIPTS (ddr).create (nelems: 0); |
3526 | } |
3527 | |
3528 | /* The dependence relation DDR cannot be represented by a distance |
3529 | vector. */ |
3530 | |
3531 | static inline void |
3532 | non_affine_dependence_relation (struct data_dependence_relation *ddr) |
3533 | { |
3534 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3535 | fprintf (stream: dump_file, format: "(Dependence relation cannot be represented by distance vector.) \n" ); |
3536 | |
3537 | DDR_AFFINE_P (ddr) = false; |
3538 | } |
3539 | |
3540 | |
3541 | |
3542 | /* This section contains the classic Banerjee tests. */ |
3543 | |
3544 | /* Returns true iff CHREC_A and CHREC_B are not dependent on any index |
3545 | variables, i.e., if the ZIV (Zero Index Variable) test is true. */ |
3546 | |
3547 | static inline bool |
3548 | ziv_subscript_p (const_tree chrec_a, const_tree chrec_b) |
3549 | { |
3550 | return (evolution_function_is_constant_p (chrec: chrec_a) |
3551 | && evolution_function_is_constant_p (chrec: chrec_b)); |
3552 | } |
3553 | |
3554 | /* Returns true iff CHREC_A and CHREC_B are dependent on an index |
3555 | variable, i.e., if the SIV (Single Index Variable) test is true. */ |
3556 | |
3557 | static bool |
3558 | siv_subscript_p (const_tree chrec_a, const_tree chrec_b) |
3559 | { |
3560 | if ((evolution_function_is_constant_p (chrec: chrec_a) |
3561 | && evolution_function_is_univariate_p (chrec_b)) |
3562 | || (evolution_function_is_constant_p (chrec: chrec_b) |
3563 | && evolution_function_is_univariate_p (chrec_a))) |
3564 | return true; |
3565 | |
3566 | if (evolution_function_is_univariate_p (chrec_a) |
3567 | && evolution_function_is_univariate_p (chrec_b)) |
3568 | { |
3569 | switch (TREE_CODE (chrec_a)) |
3570 | { |
3571 | case POLYNOMIAL_CHREC: |
3572 | switch (TREE_CODE (chrec_b)) |
3573 | { |
3574 | case POLYNOMIAL_CHREC: |
3575 | if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b)) |
3576 | return false; |
3577 | /* FALLTHRU */ |
3578 | |
3579 | default: |
3580 | return true; |
3581 | } |
3582 | |
3583 | default: |
3584 | return true; |
3585 | } |
3586 | } |
3587 | |
3588 | return false; |
3589 | } |
3590 | |
3591 | /* Creates a conflict function with N dimensions. The affine functions |
3592 | in each dimension follow. */ |
3593 | |
3594 | static conflict_function * |
3595 | conflict_fn (unsigned n, ...) |
3596 | { |
3597 | unsigned i; |
3598 | conflict_function *ret = XCNEW (conflict_function); |
3599 | va_list ap; |
3600 | |
3601 | gcc_assert (n > 0 && n <= MAX_DIM); |
3602 | va_start (ap, n); |
3603 | |
3604 | ret->n = n; |
3605 | for (i = 0; i < n; i++) |
3606 | ret->fns[i] = va_arg (ap, affine_fn); |
3607 | va_end (ap); |
3608 | |
3609 | return ret; |
3610 | } |
3611 | |
3612 | /* Returns constant affine function with value CST. */ |
3613 | |
3614 | static affine_fn |
3615 | affine_fn_cst (tree cst) |
3616 | { |
3617 | affine_fn fn; |
3618 | fn.create (nelems: 1); |
3619 | fn.quick_push (obj: cst); |
3620 | return fn; |
3621 | } |
3622 | |
3623 | /* Returns affine function with single variable, CST + COEF * x_DIM. */ |
3624 | |
3625 | static affine_fn |
3626 | affine_fn_univar (tree cst, unsigned dim, tree coef) |
3627 | { |
3628 | affine_fn fn; |
3629 | fn.create (nelems: dim + 1); |
3630 | unsigned i; |
3631 | |
3632 | gcc_assert (dim > 0); |
3633 | fn.quick_push (obj: cst); |
3634 | for (i = 1; i < dim; i++) |
3635 | fn.quick_push (integer_zero_node); |
3636 | fn.quick_push (obj: coef); |
3637 | return fn; |
3638 | } |
3639 | |
3640 | /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and |
3641 | *OVERLAPS_B are initialized to the functions that describe the |
3642 | relation between the elements accessed twice by CHREC_A and |
3643 | CHREC_B. For k >= 0, the following property is verified: |
3644 | |
3645 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
3646 | |
3647 | static void |
3648 | analyze_ziv_subscript (tree chrec_a, |
3649 | tree chrec_b, |
3650 | conflict_function **overlaps_a, |
3651 | conflict_function **overlaps_b, |
3652 | tree *last_conflicts) |
3653 | { |
3654 | tree type, difference; |
3655 | dependence_stats.num_ziv++; |
3656 | |
3657 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3658 | fprintf (stream: dump_file, format: "(analyze_ziv_subscript \n" ); |
3659 | |
3660 | type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); |
3661 | chrec_a = chrec_convert (type, chrec_a, NULL); |
3662 | chrec_b = chrec_convert (type, chrec_b, NULL); |
3663 | difference = chrec_fold_minus (type, chrec_a, chrec_b); |
3664 | |
3665 | switch (TREE_CODE (difference)) |
3666 | { |
3667 | case INTEGER_CST: |
3668 | if (integer_zerop (difference)) |
3669 | { |
3670 | /* The difference is equal to zero: the accessed index |
3671 | overlaps for each iteration in the loop. */ |
3672 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3673 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3674 | *last_conflicts = chrec_dont_know; |
3675 | dependence_stats.num_ziv_dependent++; |
3676 | } |
3677 | else |
3678 | { |
3679 | /* The accesses do not overlap. */ |
3680 | *overlaps_a = conflict_fn_no_dependence (); |
3681 | *overlaps_b = conflict_fn_no_dependence (); |
3682 | *last_conflicts = integer_zero_node; |
3683 | dependence_stats.num_ziv_independent++; |
3684 | } |
3685 | break; |
3686 | |
3687 | default: |
3688 | /* We're not sure whether the indexes overlap. For the moment, |
3689 | conservatively answer "don't know". */ |
3690 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3691 | fprintf (stream: dump_file, format: "ziv test failed: difference is non-integer.\n" ); |
3692 | |
3693 | *overlaps_a = conflict_fn_not_known (); |
3694 | *overlaps_b = conflict_fn_not_known (); |
3695 | *last_conflicts = chrec_dont_know; |
3696 | dependence_stats.num_ziv_unimplemented++; |
3697 | break; |
3698 | } |
3699 | |
3700 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3701 | fprintf (stream: dump_file, format: ")\n" ); |
3702 | } |
3703 | |
3704 | /* Similar to max_stmt_executions_int, but returns the bound as a tree, |
3705 | and only if it fits to the int type. If this is not the case, or the |
3706 | bound on the number of iterations of LOOP could not be derived, returns |
3707 | chrec_dont_know. */ |
3708 | |
3709 | static tree |
3710 | max_stmt_executions_tree (class loop *loop) |
3711 | { |
3712 | widest_int nit; |
3713 | |
3714 | if (!max_stmt_executions (loop, &nit)) |
3715 | return chrec_dont_know; |
3716 | |
3717 | if (!wi::fits_to_tree_p (x: nit, unsigned_type_node)) |
3718 | return chrec_dont_know; |
3719 | |
3720 | return wide_int_to_tree (unsigned_type_node, cst: nit); |
3721 | } |
3722 | |
3723 | /* Determine whether the CHREC is always positive/negative. If the expression |
3724 | cannot be statically analyzed, return false, otherwise set the answer into |
3725 | VALUE. */ |
3726 | |
3727 | static bool |
3728 | chrec_is_positive (tree chrec, bool *value) |
3729 | { |
3730 | bool value0, value1, value2; |
3731 | tree end_value, nb_iter; |
3732 | |
3733 | switch (TREE_CODE (chrec)) |
3734 | { |
3735 | case POLYNOMIAL_CHREC: |
3736 | if (!chrec_is_positive (CHREC_LEFT (chrec), value: &value0) |
3737 | || !chrec_is_positive (CHREC_RIGHT (chrec), value: &value1)) |
3738 | return false; |
3739 | |
3740 | /* FIXME -- overflows. */ |
3741 | if (value0 == value1) |
3742 | { |
3743 | *value = value0; |
3744 | return true; |
3745 | } |
3746 | |
3747 | /* Otherwise the chrec is under the form: "{-197, +, 2}_1", |
3748 | and the proof consists in showing that the sign never |
3749 | changes during the execution of the loop, from 0 to |
3750 | loop->nb_iterations. */ |
3751 | if (!evolution_function_is_affine_p (chrec)) |
3752 | return false; |
3753 | |
3754 | nb_iter = number_of_latch_executions (get_chrec_loop (chrec)); |
3755 | if (chrec_contains_undetermined (nb_iter)) |
3756 | return false; |
3757 | |
3758 | #if 0 |
3759 | /* TODO -- If the test is after the exit, we may decrease the number of |
3760 | iterations by one. */ |
3761 | if (after_exit) |
3762 | nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1)); |
3763 | #endif |
3764 | |
3765 | end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter); |
3766 | |
3767 | if (!chrec_is_positive (chrec: end_value, value: &value2)) |
3768 | return false; |
3769 | |
3770 | *value = value0; |
3771 | return value0 == value1; |
3772 | |
3773 | case INTEGER_CST: |
3774 | switch (tree_int_cst_sgn (chrec)) |
3775 | { |
3776 | case -1: |
3777 | *value = false; |
3778 | break; |
3779 | case 1: |
3780 | *value = true; |
3781 | break; |
3782 | default: |
3783 | return false; |
3784 | } |
3785 | return true; |
3786 | |
3787 | default: |
3788 | return false; |
3789 | } |
3790 | } |
3791 | |
3792 | |
3793 | /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a |
3794 | constant, and CHREC_B is an affine function. *OVERLAPS_A and |
3795 | *OVERLAPS_B are initialized to the functions that describe the |
3796 | relation between the elements accessed twice by CHREC_A and |
3797 | CHREC_B. For k >= 0, the following property is verified: |
3798 | |
3799 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
3800 | |
3801 | static void |
3802 | analyze_siv_subscript_cst_affine (tree chrec_a, |
3803 | tree chrec_b, |
3804 | conflict_function **overlaps_a, |
3805 | conflict_function **overlaps_b, |
3806 | tree *last_conflicts) |
3807 | { |
3808 | bool value0, value1, value2; |
3809 | tree type, difference, tmp; |
3810 | |
3811 | type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); |
3812 | chrec_a = chrec_convert (type, chrec_a, NULL); |
3813 | chrec_b = chrec_convert (type, chrec_b, NULL); |
3814 | difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a); |
3815 | |
3816 | /* Special case overlap in the first iteration. */ |
3817 | if (integer_zerop (difference)) |
3818 | { |
3819 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3820 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3821 | *last_conflicts = integer_one_node; |
3822 | return; |
3823 | } |
3824 | |
3825 | if (!chrec_is_positive (chrec: initial_condition (difference), value: &value0)) |
3826 | { |
3827 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3828 | fprintf (stream: dump_file, format: "siv test failed: chrec is not positive.\n" ); |
3829 | |
3830 | dependence_stats.num_siv_unimplemented++; |
3831 | *overlaps_a = conflict_fn_not_known (); |
3832 | *overlaps_b = conflict_fn_not_known (); |
3833 | *last_conflicts = chrec_dont_know; |
3834 | return; |
3835 | } |
3836 | else |
3837 | { |
3838 | if (value0 == false) |
3839 | { |
3840 | if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC |
3841 | || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value1)) |
3842 | { |
3843 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3844 | fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n" ); |
3845 | |
3846 | *overlaps_a = conflict_fn_not_known (); |
3847 | *overlaps_b = conflict_fn_not_known (); |
3848 | *last_conflicts = chrec_dont_know; |
3849 | dependence_stats.num_siv_unimplemented++; |
3850 | return; |
3851 | } |
3852 | else |
3853 | { |
3854 | if (value1 == true) |
3855 | { |
3856 | /* Example: |
3857 | chrec_a = 12 |
3858 | chrec_b = {10, +, 1} |
3859 | */ |
3860 | |
3861 | if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference)) |
3862 | { |
3863 | HOST_WIDE_INT numiter; |
3864 | class loop *loop = get_chrec_loop (chrec: chrec_b); |
3865 | |
3866 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3867 | tmp = fold_build2 (EXACT_DIV_EXPR, type, |
3868 | fold_build1 (ABS_EXPR, type, difference), |
3869 | CHREC_RIGHT (chrec_b)); |
3870 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp)); |
3871 | *last_conflicts = integer_one_node; |
3872 | |
3873 | |
3874 | /* Perform weak-zero siv test to see if overlap is |
3875 | outside the loop bounds. */ |
3876 | numiter = max_stmt_executions_int (loop); |
3877 | |
3878 | if (numiter >= 0 |
3879 | && compare_tree_int (tmp, numiter) > 0) |
3880 | { |
3881 | free_conflict_function (f: *overlaps_a); |
3882 | free_conflict_function (f: *overlaps_b); |
3883 | *overlaps_a = conflict_fn_no_dependence (); |
3884 | *overlaps_b = conflict_fn_no_dependence (); |
3885 | *last_conflicts = integer_zero_node; |
3886 | dependence_stats.num_siv_independent++; |
3887 | return; |
3888 | } |
3889 | dependence_stats.num_siv_dependent++; |
3890 | return; |
3891 | } |
3892 | |
3893 | /* When the step does not divide the difference, there are |
3894 | no overlaps. */ |
3895 | else |
3896 | { |
3897 | *overlaps_a = conflict_fn_no_dependence (); |
3898 | *overlaps_b = conflict_fn_no_dependence (); |
3899 | *last_conflicts = integer_zero_node; |
3900 | dependence_stats.num_siv_independent++; |
3901 | return; |
3902 | } |
3903 | } |
3904 | |
3905 | else |
3906 | { |
3907 | /* Example: |
3908 | chrec_a = 12 |
3909 | chrec_b = {10, +, -1} |
3910 | |
3911 | In this case, chrec_a will not overlap with chrec_b. */ |
3912 | *overlaps_a = conflict_fn_no_dependence (); |
3913 | *overlaps_b = conflict_fn_no_dependence (); |
3914 | *last_conflicts = integer_zero_node; |
3915 | dependence_stats.num_siv_independent++; |
3916 | return; |
3917 | } |
3918 | } |
3919 | } |
3920 | else |
3921 | { |
3922 | if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC |
3923 | || !chrec_is_positive (CHREC_RIGHT (chrec_b), value: &value2)) |
3924 | { |
3925 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3926 | fprintf (stream: dump_file, format: "siv test failed: chrec not positive.\n" ); |
3927 | |
3928 | *overlaps_a = conflict_fn_not_known (); |
3929 | *overlaps_b = conflict_fn_not_known (); |
3930 | *last_conflicts = chrec_dont_know; |
3931 | dependence_stats.num_siv_unimplemented++; |
3932 | return; |
3933 | } |
3934 | else |
3935 | { |
3936 | if (value2 == false) |
3937 | { |
3938 | /* Example: |
3939 | chrec_a = 3 |
3940 | chrec_b = {10, +, -1} |
3941 | */ |
3942 | if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), b: difference)) |
3943 | { |
3944 | HOST_WIDE_INT numiter; |
3945 | class loop *loop = get_chrec_loop (chrec: chrec_b); |
3946 | |
3947 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
3948 | tmp = fold_build2 (EXACT_DIV_EXPR, type, difference, |
3949 | CHREC_RIGHT (chrec_b)); |
3950 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (cst: tmp)); |
3951 | *last_conflicts = integer_one_node; |
3952 | |
3953 | /* Perform weak-zero siv test to see if overlap is |
3954 | outside the loop bounds. */ |
3955 | numiter = max_stmt_executions_int (loop); |
3956 | |
3957 | if (numiter >= 0 |
3958 | && compare_tree_int (tmp, numiter) > 0) |
3959 | { |
3960 | free_conflict_function (f: *overlaps_a); |
3961 | free_conflict_function (f: *overlaps_b); |
3962 | *overlaps_a = conflict_fn_no_dependence (); |
3963 | *overlaps_b = conflict_fn_no_dependence (); |
3964 | *last_conflicts = integer_zero_node; |
3965 | dependence_stats.num_siv_independent++; |
3966 | return; |
3967 | } |
3968 | dependence_stats.num_siv_dependent++; |
3969 | return; |
3970 | } |
3971 | |
3972 | /* When the step does not divide the difference, there |
3973 | are no overlaps. */ |
3974 | else |
3975 | { |
3976 | *overlaps_a = conflict_fn_no_dependence (); |
3977 | *overlaps_b = conflict_fn_no_dependence (); |
3978 | *last_conflicts = integer_zero_node; |
3979 | dependence_stats.num_siv_independent++; |
3980 | return; |
3981 | } |
3982 | } |
3983 | else |
3984 | { |
3985 | /* Example: |
3986 | chrec_a = 3 |
3987 | chrec_b = {4, +, 1} |
3988 | |
3989 | In this case, chrec_a will not overlap with chrec_b. */ |
3990 | *overlaps_a = conflict_fn_no_dependence (); |
3991 | *overlaps_b = conflict_fn_no_dependence (); |
3992 | *last_conflicts = integer_zero_node; |
3993 | dependence_stats.num_siv_independent++; |
3994 | return; |
3995 | } |
3996 | } |
3997 | } |
3998 | } |
3999 | } |
4000 | |
4001 | /* Helper recursive function for initializing the matrix A. Returns |
4002 | the initial value of CHREC. */ |
4003 | |
4004 | static tree |
4005 | initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) |
4006 | { |
4007 | gcc_assert (chrec); |
4008 | |
4009 | switch (TREE_CODE (chrec)) |
4010 | { |
4011 | case POLYNOMIAL_CHREC: |
4012 | HOST_WIDE_INT chrec_right; |
4013 | if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec))) |
4014 | return chrec_dont_know; |
4015 | chrec_right = int_cst_value (CHREC_RIGHT (chrec)); |
4016 | /* We want to be able to negate without overflow. */ |
4017 | if (chrec_right == HOST_WIDE_INT_MIN) |
4018 | return chrec_dont_know; |
4019 | A[index][0] = mult * chrec_right; |
4020 | return initialize_matrix_A (A, CHREC_LEFT (chrec), index: index + 1, mult); |
4021 | |
4022 | case PLUS_EXPR: |
4023 | case MULT_EXPR: |
4024 | case MINUS_EXPR: |
4025 | { |
4026 | tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); |
4027 | tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult); |
4028 | |
4029 | return chrec_fold_op (TREE_CODE (chrec), type: chrec_type (chrec), op0, op1); |
4030 | } |
4031 | |
4032 | CASE_CONVERT: |
4033 | { |
4034 | tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); |
4035 | return chrec_convert (chrec_type (chrec), op, NULL); |
4036 | } |
4037 | |
4038 | case BIT_NOT_EXPR: |
4039 | { |
4040 | /* Handle ~X as -1 - X. */ |
4041 | tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); |
4042 | return chrec_fold_op (code: MINUS_EXPR, type: chrec_type (chrec), |
4043 | op0: build_int_cst (TREE_TYPE (chrec), -1), op1: op); |
4044 | } |
4045 | |
4046 | case INTEGER_CST: |
4047 | return chrec; |
4048 | |
4049 | default: |
4050 | gcc_unreachable (); |
4051 | return NULL_TREE; |
4052 | } |
4053 | } |
4054 | |
4055 | #define FLOOR_DIV(x,y) ((x) / (y)) |
4056 | |
4057 | /* Solves the special case of the Diophantine equation: |
4058 | | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) |
4059 | |
4060 | Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the |
4061 | number of iterations that loops X and Y run. The overlaps will be |
4062 | constructed as evolutions in dimension DIM. */ |
4063 | |
4064 | static void |
4065 | compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter, |
4066 | HOST_WIDE_INT step_a, |
4067 | HOST_WIDE_INT step_b, |
4068 | affine_fn *overlaps_a, |
4069 | affine_fn *overlaps_b, |
4070 | tree *last_conflicts, int dim) |
4071 | { |
4072 | if (((step_a > 0 && step_b > 0) |
4073 | || (step_a < 0 && step_b < 0))) |
4074 | { |
4075 | HOST_WIDE_INT step_overlaps_a, step_overlaps_b; |
4076 | HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2; |
4077 | |
4078 | gcd_steps_a_b = gcd (step_a, step_b); |
4079 | step_overlaps_a = step_b / gcd_steps_a_b; |
4080 | step_overlaps_b = step_a / gcd_steps_a_b; |
4081 | |
4082 | if (niter > 0) |
4083 | { |
4084 | tau2 = FLOOR_DIV (niter, step_overlaps_a); |
4085 | tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); |
4086 | last_conflict = tau2; |
4087 | *last_conflicts = build_int_cst (NULL_TREE, last_conflict); |
4088 | } |
4089 | else |
4090 | *last_conflicts = chrec_dont_know; |
4091 | |
4092 | *overlaps_a = affine_fn_univar (integer_zero_node, dim, |
4093 | coef: build_int_cst (NULL_TREE, |
4094 | step_overlaps_a)); |
4095 | *overlaps_b = affine_fn_univar (integer_zero_node, dim, |
4096 | coef: build_int_cst (NULL_TREE, |
4097 | step_overlaps_b)); |
4098 | } |
4099 | |
4100 | else |
4101 | { |
4102 | *overlaps_a = affine_fn_cst (integer_zero_node); |
4103 | *overlaps_b = affine_fn_cst (integer_zero_node); |
4104 | *last_conflicts = integer_zero_node; |
4105 | } |
4106 | } |
4107 | |
4108 | /* Solves the special case of a Diophantine equation where CHREC_A is |
4109 | an affine bivariate function, and CHREC_B is an affine univariate |
4110 | function. For example, |
4111 | |
4112 | | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z |
4113 | |
4114 | has the following overlapping functions: |
4115 | |
4116 | | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v |
4117 | | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v |
4118 | | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v |
4119 | |
4120 | FORNOW: This is a specialized implementation for a case occurring in |
4121 | a common benchmark. Implement the general algorithm. */ |
4122 | |
4123 | static void |
4124 | compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, |
4125 | conflict_function **overlaps_a, |
4126 | conflict_function **overlaps_b, |
4127 | tree *last_conflicts) |
4128 | { |
4129 | bool xz_p, yz_p, xyz_p; |
4130 | HOST_WIDE_INT step_x, step_y, step_z; |
4131 | HOST_WIDE_INT niter_x, niter_y, niter_z, niter; |
4132 | affine_fn overlaps_a_xz, overlaps_b_xz; |
4133 | affine_fn overlaps_a_yz, overlaps_b_yz; |
4134 | affine_fn overlaps_a_xyz, overlaps_b_xyz; |
4135 | affine_fn ova1, ova2, ovb; |
4136 | tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz; |
4137 | |
4138 | step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); |
4139 | step_y = int_cst_value (CHREC_RIGHT (chrec_a)); |
4140 | step_z = int_cst_value (CHREC_RIGHT (chrec_b)); |
4141 | |
4142 | niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a))); |
4143 | niter_y = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a)); |
4144 | niter_z = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b)); |
4145 | |
4146 | if (niter_x < 0 || niter_y < 0 || niter_z < 0) |
4147 | { |
4148 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4149 | fprintf (stream: dump_file, format: "overlap steps test failed: no iteration counts.\n" ); |
4150 | |
4151 | *overlaps_a = conflict_fn_not_known (); |
4152 | *overlaps_b = conflict_fn_not_known (); |
4153 | *last_conflicts = chrec_dont_know; |
4154 | return; |
4155 | } |
4156 | |
4157 | niter = MIN (niter_x, niter_z); |
4158 | compute_overlap_steps_for_affine_univar (niter, step_a: step_x, step_b: step_z, |
4159 | overlaps_a: &overlaps_a_xz, |
4160 | overlaps_b: &overlaps_b_xz, |
4161 | last_conflicts: &last_conflicts_xz, dim: 1); |
4162 | niter = MIN (niter_y, niter_z); |
4163 | compute_overlap_steps_for_affine_univar (niter, step_a: step_y, step_b: step_z, |
4164 | overlaps_a: &overlaps_a_yz, |
4165 | overlaps_b: &overlaps_b_yz, |
4166 | last_conflicts: &last_conflicts_yz, dim: 2); |
4167 | niter = MIN (niter_x, niter_z); |
4168 | niter = MIN (niter_y, niter); |
4169 | compute_overlap_steps_for_affine_univar (niter, step_a: step_x + step_y, step_b: step_z, |
4170 | overlaps_a: &overlaps_a_xyz, |
4171 | overlaps_b: &overlaps_b_xyz, |
4172 | last_conflicts: &last_conflicts_xyz, dim: 3); |
4173 | |
4174 | xz_p = !integer_zerop (last_conflicts_xz); |
4175 | yz_p = !integer_zerop (last_conflicts_yz); |
4176 | xyz_p = !integer_zerop (last_conflicts_xyz); |
4177 | |
4178 | if (xz_p || yz_p || xyz_p) |
4179 | { |
4180 | ova1 = affine_fn_cst (integer_zero_node); |
4181 | ova2 = affine_fn_cst (integer_zero_node); |
4182 | ovb = affine_fn_cst (integer_zero_node); |
4183 | if (xz_p) |
4184 | { |
4185 | affine_fn t0 = ova1; |
4186 | affine_fn t2 = ovb; |
4187 | |
4188 | ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xz); |
4189 | ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xz); |
4190 | affine_fn_free (fn: t0); |
4191 | affine_fn_free (fn: t2); |
4192 | *last_conflicts = last_conflicts_xz; |
4193 | } |
4194 | if (yz_p) |
4195 | { |
4196 | affine_fn t0 = ova2; |
4197 | affine_fn t2 = ovb; |
4198 | |
4199 | ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_yz); |
4200 | ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_yz); |
4201 | affine_fn_free (fn: t0); |
4202 | affine_fn_free (fn: t2); |
4203 | *last_conflicts = last_conflicts_yz; |
4204 | } |
4205 | if (xyz_p) |
4206 | { |
4207 | affine_fn t0 = ova1; |
4208 | affine_fn t2 = ova2; |
4209 | affine_fn t4 = ovb; |
4210 | |
4211 | ova1 = affine_fn_plus (fna: ova1, fnb: overlaps_a_xyz); |
4212 | ova2 = affine_fn_plus (fna: ova2, fnb: overlaps_a_xyz); |
4213 | ovb = affine_fn_plus (fna: ovb, fnb: overlaps_b_xyz); |
4214 | affine_fn_free (fn: t0); |
4215 | affine_fn_free (fn: t2); |
4216 | affine_fn_free (fn: t4); |
4217 | *last_conflicts = last_conflicts_xyz; |
4218 | } |
4219 | *overlaps_a = conflict_fn (n: 2, ova1, ova2); |
4220 | *overlaps_b = conflict_fn (n: 1, ovb); |
4221 | } |
4222 | else |
4223 | { |
4224 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4225 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4226 | *last_conflicts = integer_zero_node; |
4227 | } |
4228 | |
4229 | affine_fn_free (fn: overlaps_a_xz); |
4230 | affine_fn_free (fn: overlaps_b_xz); |
4231 | affine_fn_free (fn: overlaps_a_yz); |
4232 | affine_fn_free (fn: overlaps_b_yz); |
4233 | affine_fn_free (fn: overlaps_a_xyz); |
4234 | affine_fn_free (fn: overlaps_b_xyz); |
4235 | } |
4236 | |
4237 | /* Copy the elements of vector VEC1 with length SIZE to VEC2. */ |
4238 | |
4239 | static void |
4240 | lambda_vector_copy (lambda_vector vec1, lambda_vector vec2, |
4241 | int size) |
4242 | { |
4243 | memcpy (dest: vec2, src: vec1, n: size * sizeof (*vec1)); |
4244 | } |
4245 | |
4246 | /* Copy the elements of M x N matrix MAT1 to MAT2. */ |
4247 | |
4248 | static void |
4249 | lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2, |
4250 | int m, int n) |
4251 | { |
4252 | int i; |
4253 | |
4254 | for (i = 0; i < m; i++) |
4255 | lambda_vector_copy (vec1: mat1[i], vec2: mat2[i], size: n); |
4256 | } |
4257 | |
4258 | /* Store the N x N identity matrix in MAT. */ |
4259 | |
4260 | static void |
4261 | lambda_matrix_id (lambda_matrix mat, int size) |
4262 | { |
4263 | int i, j; |
4264 | |
4265 | for (i = 0; i < size; i++) |
4266 | for (j = 0; j < size; j++) |
4267 | mat[i][j] = (i == j) ? 1 : 0; |
4268 | } |
4269 | |
4270 | /* Return the index of the first nonzero element of vector VEC1 between |
4271 | START and N. We must have START <= N. |
4272 | Returns N if VEC1 is the zero vector. */ |
4273 | |
4274 | static int |
4275 | lambda_vector_first_nz (lambda_vector vec1, int n, int start) |
4276 | { |
4277 | int j = start; |
4278 | while (j < n && vec1[j] == 0) |
4279 | j++; |
4280 | return j; |
4281 | } |
4282 | |
4283 | /* Add a multiple of row R1 of matrix MAT with N columns to row R2: |
4284 | R2 = R2 + CONST1 * R1. */ |
4285 | |
4286 | static bool |
4287 | lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, |
4288 | lambda_int const1) |
4289 | { |
4290 | int i; |
4291 | |
4292 | if (const1 == 0) |
4293 | return true; |
4294 | |
4295 | for (i = 0; i < n; i++) |
4296 | { |
4297 | bool ovf; |
4298 | lambda_int tem = mul_hwi (a: mat[r1][i], b: const1, overflow: &ovf); |
4299 | if (ovf) |
4300 | return false; |
4301 | lambda_int tem2 = add_hwi (a: mat[r2][i], b: tem, overflow: &ovf); |
4302 | if (ovf || tem2 == HOST_WIDE_INT_MIN) |
4303 | return false; |
4304 | mat[r2][i] = tem2; |
4305 | } |
4306 | |
4307 | return true; |
4308 | } |
4309 | |
4310 | /* Multiply vector VEC1 of length SIZE by a constant CONST1, |
4311 | and store the result in VEC2. */ |
4312 | |
4313 | static void |
4314 | lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2, |
4315 | int size, lambda_int const1) |
4316 | { |
4317 | int i; |
4318 | |
4319 | if (const1 == 0) |
4320 | lambda_vector_clear (vec1: vec2, size); |
4321 | else |
4322 | for (i = 0; i < size; i++) |
4323 | vec2[i] = const1 * vec1[i]; |
4324 | } |
4325 | |
4326 | /* Negate vector VEC1 with length SIZE and store it in VEC2. */ |
4327 | |
4328 | static void |
4329 | lambda_vector_negate (lambda_vector vec1, lambda_vector vec2, |
4330 | int size) |
4331 | { |
4332 | lambda_vector_mult_const (vec1, vec2, size, const1: -1); |
4333 | } |
4334 | |
4335 | /* Negate row R1 of matrix MAT which has N columns. */ |
4336 | |
4337 | static void |
4338 | lambda_matrix_row_negate (lambda_matrix mat, int n, int r1) |
4339 | { |
4340 | lambda_vector_negate (vec1: mat[r1], vec2: mat[r1], size: n); |
4341 | } |
4342 | |
4343 | /* Return true if two vectors are equal. */ |
4344 | |
4345 | static bool |
4346 | lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size) |
4347 | { |
4348 | int i; |
4349 | for (i = 0; i < size; i++) |
4350 | if (vec1[i] != vec2[i]) |
4351 | return false; |
4352 | return true; |
4353 | } |
4354 | |
4355 | /* Given an M x N integer matrix A, this function determines an M x |
4356 | M unimodular matrix U, and an M x N echelon matrix S such that |
4357 | "U.A = S". This decomposition is also known as "right Hermite". |
4358 | |
4359 | Ref: Algorithm 2.1 page 33 in "Loop Transformations for |
4360 | Restructuring Compilers" Utpal Banerjee. */ |
4361 | |
4362 | static bool |
4363 | lambda_matrix_right_hermite (lambda_matrix A, int m, int n, |
4364 | lambda_matrix S, lambda_matrix U) |
4365 | { |
4366 | int i, j, i0 = 0; |
4367 | |
4368 | lambda_matrix_copy (mat1: A, mat2: S, m, n); |
4369 | lambda_matrix_id (mat: U, size: m); |
4370 | |
4371 | for (j = 0; j < n; j++) |
4372 | { |
4373 | if (lambda_vector_first_nz (vec1: S[j], n: m, start: i0) < m) |
4374 | { |
4375 | ++i0; |
4376 | for (i = m - 1; i >= i0; i--) |
4377 | { |
4378 | while (S[i][j] != 0) |
4379 | { |
4380 | lambda_int factor, a, b; |
4381 | |
4382 | a = S[i-1][j]; |
4383 | b = S[i][j]; |
4384 | gcc_assert (a != HOST_WIDE_INT_MIN); |
4385 | factor = a / b; |
4386 | |
4387 | if (!lambda_matrix_row_add (mat: S, n, r1: i, r2: i-1, const1: -factor)) |
4388 | return false; |
4389 | std::swap (a&: S[i], b&: S[i-1]); |
4390 | |
4391 | if (!lambda_matrix_row_add (mat: U, n: m, r1: i, r2: i-1, const1: -factor)) |
4392 | return false; |
4393 | std::swap (a&: U[i], b&: U[i-1]); |
4394 | } |
4395 | } |
4396 | } |
4397 | } |
4398 | |
4399 | return true; |
4400 | } |
4401 | |
4402 | /* Determines the overlapping elements due to accesses CHREC_A and |
4403 | CHREC_B, that are affine functions. This function cannot handle |
4404 | symbolic evolution functions, ie. when initial conditions are |
4405 | parameters, because it uses lambda matrices of integers. */ |
4406 | |
4407 | static void |
4408 | analyze_subscript_affine_affine (tree chrec_a, |
4409 | tree chrec_b, |
4410 | conflict_function **overlaps_a, |
4411 | conflict_function **overlaps_b, |
4412 | tree *last_conflicts) |
4413 | { |
4414 | unsigned nb_vars_a, nb_vars_b, dim; |
4415 | lambda_int gamma, gcd_alpha_beta; |
4416 | lambda_matrix A, U, S; |
4417 | struct obstack scratch_obstack; |
4418 | |
4419 | if (eq_evolutions_p (chrec_a, chrec_b)) |
4420 | { |
4421 | /* The accessed index overlaps for each iteration in the |
4422 | loop. */ |
4423 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4424 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4425 | *last_conflicts = chrec_dont_know; |
4426 | return; |
4427 | } |
4428 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4429 | fprintf (stream: dump_file, format: "(analyze_subscript_affine_affine \n" ); |
4430 | |
4431 | /* For determining the initial intersection, we have to solve a |
4432 | Diophantine equation. This is the most time consuming part. |
4433 | |
4434 | For answering to the question: "Is there a dependence?" we have |
4435 | to prove that there exists a solution to the Diophantine |
4436 | equation, and that the solution is in the iteration domain, |
4437 | i.e. the solution is positive or zero, and that the solution |
4438 | happens before the upper bound loop.nb_iterations. Otherwise |
4439 | there is no dependence. This function outputs a description of |
4440 | the iterations that hold the intersections. */ |
4441 | |
4442 | nb_vars_a = nb_vars_in_chrec (chrec_a); |
4443 | nb_vars_b = nb_vars_in_chrec (chrec_b); |
4444 | |
4445 | gcc_obstack_init (&scratch_obstack); |
4446 | |
4447 | dim = nb_vars_a + nb_vars_b; |
4448 | U = lambda_matrix_new (m: dim, n: dim, lambda_obstack: &scratch_obstack); |
4449 | A = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack); |
4450 | S = lambda_matrix_new (m: dim, n: 1, lambda_obstack: &scratch_obstack); |
4451 | |
4452 | tree init_a = initialize_matrix_A (A, chrec: chrec_a, index: 0, mult: 1); |
4453 | tree init_b = initialize_matrix_A (A, chrec: chrec_b, index: nb_vars_a, mult: -1); |
4454 | if (init_a == chrec_dont_know |
4455 | || init_b == chrec_dont_know) |
4456 | { |
4457 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4458 | fprintf (stream: dump_file, format: "affine-affine test failed: " |
4459 | "representation issue.\n" ); |
4460 | *overlaps_a = conflict_fn_not_known (); |
4461 | *overlaps_b = conflict_fn_not_known (); |
4462 | *last_conflicts = chrec_dont_know; |
4463 | goto end_analyze_subs_aa; |
4464 | } |
4465 | gamma = int_cst_value (init_b) - int_cst_value (init_a); |
4466 | |
4467 | /* Don't do all the hard work of solving the Diophantine equation |
4468 | when we already know the solution: for example, |
4469 | | {3, +, 1}_1 |
4470 | | {3, +, 4}_2 |
4471 | | gamma = 3 - 3 = 0. |
4472 | Then the first overlap occurs during the first iterations: |
4473 | | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) |
4474 | */ |
4475 | if (gamma == 0) |
4476 | { |
4477 | if (nb_vars_a == 1 && nb_vars_b == 1) |
4478 | { |
4479 | HOST_WIDE_INT step_a, step_b; |
4480 | HOST_WIDE_INT niter, niter_a, niter_b; |
4481 | affine_fn ova, ovb; |
4482 | |
4483 | niter_a = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a)); |
4484 | niter_b = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b)); |
4485 | niter = MIN (niter_a, niter_b); |
4486 | step_a = int_cst_value (CHREC_RIGHT (chrec_a)); |
4487 | step_b = int_cst_value (CHREC_RIGHT (chrec_b)); |
4488 | |
4489 | compute_overlap_steps_for_affine_univar (niter, step_a, step_b, |
4490 | overlaps_a: &ova, overlaps_b: &ovb, |
4491 | last_conflicts, dim: 1); |
4492 | *overlaps_a = conflict_fn (n: 1, ova); |
4493 | *overlaps_b = conflict_fn (n: 1, ovb); |
4494 | } |
4495 | |
4496 | else if (nb_vars_a == 2 && nb_vars_b == 1) |
4497 | compute_overlap_steps_for_affine_1_2 |
4498 | (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); |
4499 | |
4500 | else if (nb_vars_a == 1 && nb_vars_b == 2) |
4501 | compute_overlap_steps_for_affine_1_2 |
4502 | (chrec_a: chrec_b, chrec_b: chrec_a, overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts); |
4503 | |
4504 | else |
4505 | { |
4506 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4507 | fprintf (stream: dump_file, format: "affine-affine test failed: too many variables.\n" ); |
4508 | *overlaps_a = conflict_fn_not_known (); |
4509 | *overlaps_b = conflict_fn_not_known (); |
4510 | *last_conflicts = chrec_dont_know; |
4511 | } |
4512 | goto end_analyze_subs_aa; |
4513 | } |
4514 | |
4515 | /* U.A = S */ |
4516 | if (!lambda_matrix_right_hermite (A, m: dim, n: 1, S, U)) |
4517 | { |
4518 | *overlaps_a = conflict_fn_not_known (); |
4519 | *overlaps_b = conflict_fn_not_known (); |
4520 | *last_conflicts = chrec_dont_know; |
4521 | goto end_analyze_subs_aa; |
4522 | } |
4523 | |
4524 | if (S[0][0] < 0) |
4525 | { |
4526 | S[0][0] *= -1; |
4527 | lambda_matrix_row_negate (mat: U, n: dim, r1: 0); |
4528 | } |
4529 | gcd_alpha_beta = S[0][0]; |
4530 | |
4531 | /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5, |
4532 | but that is a quite strange case. Instead of ICEing, answer |
4533 | don't know. */ |
4534 | if (gcd_alpha_beta == 0) |
4535 | { |
4536 | *overlaps_a = conflict_fn_not_known (); |
4537 | *overlaps_b = conflict_fn_not_known (); |
4538 | *last_conflicts = chrec_dont_know; |
4539 | goto end_analyze_subs_aa; |
4540 | } |
4541 | |
4542 | /* The classic "gcd-test". */ |
4543 | if (!int_divides_p (a: gcd_alpha_beta, b: gamma)) |
4544 | { |
4545 | /* The "gcd-test" has determined that there is no integer |
4546 | solution, i.e. there is no dependence. */ |
4547 | *overlaps_a = conflict_fn_no_dependence (); |
4548 | *overlaps_b = conflict_fn_no_dependence (); |
4549 | *last_conflicts = integer_zero_node; |
4550 | } |
4551 | |
4552 | /* Both access functions are univariate. This includes SIV and MIV cases. */ |
4553 | else if (nb_vars_a == 1 && nb_vars_b == 1) |
4554 | { |
4555 | /* Both functions should have the same evolution sign. */ |
4556 | if (((A[0][0] > 0 && -A[1][0] > 0) |
4557 | || (A[0][0] < 0 && -A[1][0] < 0))) |
4558 | { |
4559 | /* The solutions are given by: |
4560 | | |
4561 | | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] |
4562 | | [u21 u22] [y0] |
4563 | |
4564 | For a given integer t. Using the following variables, |
4565 | |
4566 | | i0 = u11 * gamma / gcd_alpha_beta |
4567 | | j0 = u12 * gamma / gcd_alpha_beta |
4568 | | i1 = u21 |
4569 | | j1 = u22 |
4570 | |
4571 | the solutions are: |
4572 | |
4573 | | x0 = i0 + i1 * t, |
4574 | | y0 = j0 + j1 * t. */ |
4575 | HOST_WIDE_INT i0, j0, i1, j1; |
4576 | |
4577 | i0 = U[0][0] * gamma / gcd_alpha_beta; |
4578 | j0 = U[0][1] * gamma / gcd_alpha_beta; |
4579 | i1 = U[1][0]; |
4580 | j1 = U[1][1]; |
4581 | |
4582 | if ((i1 == 0 && i0 < 0) |
4583 | || (j1 == 0 && j0 < 0)) |
4584 | { |
4585 | /* There is no solution. |
4586 | FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" |
4587 | falls in here, but for the moment we don't look at the |
4588 | upper bound of the iteration domain. */ |
4589 | *overlaps_a = conflict_fn_no_dependence (); |
4590 | *overlaps_b = conflict_fn_no_dependence (); |
4591 | *last_conflicts = integer_zero_node; |
4592 | goto end_analyze_subs_aa; |
4593 | } |
4594 | |
4595 | if (i1 > 0 && j1 > 0) |
4596 | { |
4597 | HOST_WIDE_INT niter_a |
4598 | = max_stmt_executions_int (get_chrec_loop (chrec: chrec_a)); |
4599 | HOST_WIDE_INT niter_b |
4600 | = max_stmt_executions_int (get_chrec_loop (chrec: chrec_b)); |
4601 | HOST_WIDE_INT niter = MIN (niter_a, niter_b); |
4602 | |
4603 | /* (X0, Y0) is a solution of the Diophantine equation: |
4604 | "chrec_a (X0) = chrec_b (Y0)". */ |
4605 | HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1), |
4606 | CEIL (-j0, j1)); |
4607 | HOST_WIDE_INT x0 = i1 * tau1 + i0; |
4608 | HOST_WIDE_INT y0 = j1 * tau1 + j0; |
4609 | |
4610 | /* (X1, Y1) is the smallest positive solution of the eq |
4611 | "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the |
4612 | first conflict occurs. */ |
4613 | HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1); |
4614 | HOST_WIDE_INT x1 = x0 - i1 * min_multiple; |
4615 | HOST_WIDE_INT y1 = y0 - j1 * min_multiple; |
4616 | |
4617 | if (niter > 0) |
4618 | { |
4619 | /* If the overlap occurs outside of the bounds of the |
4620 | loop, there is no dependence. */ |
4621 | if (x1 >= niter_a || y1 >= niter_b) |
4622 | { |
4623 | *overlaps_a = conflict_fn_no_dependence (); |
4624 | *overlaps_b = conflict_fn_no_dependence (); |
4625 | *last_conflicts = integer_zero_node; |
4626 | goto end_analyze_subs_aa; |
4627 | } |
4628 | |
4629 | /* max stmt executions can get quite large, avoid |
4630 | overflows by using wide ints here. */ |
4631 | widest_int tau2 |
4632 | = wi::smin (x: wi::sdiv_floor (x: wi::sub (x: niter_a, y: i0), y: i1), |
4633 | y: wi::sdiv_floor (x: wi::sub (x: niter_b, y: j0), y: j1)); |
4634 | widest_int last_conflict = wi::sub (x: tau2, y: (x1 - i0)/i1); |
4635 | if (wi::min_precision (x: last_conflict, sgn: SIGNED) |
4636 | <= TYPE_PRECISION (integer_type_node)) |
4637 | *last_conflicts |
4638 | = build_int_cst (integer_type_node, |
4639 | last_conflict.to_shwi ()); |
4640 | else |
4641 | *last_conflicts = chrec_dont_know; |
4642 | } |
4643 | else |
4644 | *last_conflicts = chrec_dont_know; |
4645 | |
4646 | *overlaps_a |
4647 | = conflict_fn (n: 1, |
4648 | affine_fn_univar (cst: build_int_cst (NULL_TREE, x1), |
4649 | dim: 1, |
4650 | coef: build_int_cst (NULL_TREE, i1))); |
4651 | *overlaps_b |
4652 | = conflict_fn (n: 1, |
4653 | affine_fn_univar (cst: build_int_cst (NULL_TREE, y1), |
4654 | dim: 1, |
4655 | coef: build_int_cst (NULL_TREE, j1))); |
4656 | } |
4657 | else |
4658 | { |
4659 | /* FIXME: For the moment, the upper bound of the |
4660 | iteration domain for i and j is not checked. */ |
4661 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4662 | fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n" ); |
4663 | *overlaps_a = conflict_fn_not_known (); |
4664 | *overlaps_b = conflict_fn_not_known (); |
4665 | *last_conflicts = chrec_dont_know; |
4666 | } |
4667 | } |
4668 | else |
4669 | { |
4670 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4671 | fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n" ); |
4672 | *overlaps_a = conflict_fn_not_known (); |
4673 | *overlaps_b = conflict_fn_not_known (); |
4674 | *last_conflicts = chrec_dont_know; |
4675 | } |
4676 | } |
4677 | else |
4678 | { |
4679 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4680 | fprintf (stream: dump_file, format: "affine-affine test failed: unimplemented.\n" ); |
4681 | *overlaps_a = conflict_fn_not_known (); |
4682 | *overlaps_b = conflict_fn_not_known (); |
4683 | *last_conflicts = chrec_dont_know; |
4684 | } |
4685 | |
4686 | end_analyze_subs_aa: |
4687 | obstack_free (&scratch_obstack, NULL); |
4688 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4689 | { |
4690 | fprintf (stream: dump_file, format: " (overlaps_a = " ); |
4691 | dump_conflict_function (outf: dump_file, cf: *overlaps_a); |
4692 | fprintf (stream: dump_file, format: ")\n (overlaps_b = " ); |
4693 | dump_conflict_function (outf: dump_file, cf: *overlaps_b); |
4694 | fprintf (stream: dump_file, format: "))\n" ); |
4695 | } |
4696 | } |
4697 | |
4698 | /* Returns true when analyze_subscript_affine_affine can be used for |
4699 | determining the dependence relation between chrec_a and chrec_b, |
4700 | that contain symbols. This function modifies chrec_a and chrec_b |
4701 | such that the analysis result is the same, and such that they don't |
4702 | contain symbols, and then can safely be passed to the analyzer. |
4703 | |
4704 | Example: The analysis of the following tuples of evolutions produce |
4705 | the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1 |
4706 | vs. {0, +, 1}_1 |
4707 | |
4708 | {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1) |
4709 | {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1) |
4710 | */ |
4711 | |
4712 | static bool |
4713 | can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b) |
4714 | { |
4715 | tree diff, type, left_a, left_b, right_b; |
4716 | |
4717 | if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a)) |
4718 | || chrec_contains_symbols (CHREC_RIGHT (*chrec_b))) |
4719 | /* FIXME: For the moment not handled. Might be refined later. */ |
4720 | return false; |
4721 | |
4722 | type = chrec_type (chrec: *chrec_a); |
4723 | left_a = CHREC_LEFT (*chrec_a); |
4724 | left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL); |
4725 | diff = chrec_fold_minus (type, left_a, left_b); |
4726 | |
4727 | if (!evolution_function_is_constant_p (chrec: diff)) |
4728 | return false; |
4729 | |
4730 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4731 | fprintf (stream: dump_file, format: "can_use_subscript_aff_aff_for_symbolic \n" ); |
4732 | |
4733 | *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a), |
4734 | left: diff, CHREC_RIGHT (*chrec_a)); |
4735 | right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL); |
4736 | *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b), |
4737 | left: build_int_cst (type, 0), |
4738 | right: right_b); |
4739 | return true; |
4740 | } |
4741 | |
4742 | /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and |
4743 | *OVERLAPS_B are initialized to the functions that describe the |
4744 | relation between the elements accessed twice by CHREC_A and |
4745 | CHREC_B. For k >= 0, the following property is verified: |
4746 | |
4747 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
4748 | |
4749 | static void |
4750 | analyze_siv_subscript (tree chrec_a, |
4751 | tree chrec_b, |
4752 | conflict_function **overlaps_a, |
4753 | conflict_function **overlaps_b, |
4754 | tree *last_conflicts, |
4755 | int loop_nest_num) |
4756 | { |
4757 | dependence_stats.num_siv++; |
4758 | |
4759 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4760 | fprintf (stream: dump_file, format: "(analyze_siv_subscript \n" ); |
4761 | |
4762 | if (evolution_function_is_constant_p (chrec: chrec_a) |
4763 | && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num)) |
4764 | analyze_siv_subscript_cst_affine (chrec_a, chrec_b, |
4765 | overlaps_a, overlaps_b, last_conflicts); |
4766 | |
4767 | else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num) |
4768 | && evolution_function_is_constant_p (chrec: chrec_b)) |
4769 | analyze_siv_subscript_cst_affine (chrec_a: chrec_b, chrec_b: chrec_a, |
4770 | overlaps_a: overlaps_b, overlaps_b: overlaps_a, last_conflicts); |
4771 | |
4772 | else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest_num) |
4773 | && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest_num)) |
4774 | { |
4775 | if (!chrec_contains_symbols (chrec_a) |
4776 | && !chrec_contains_symbols (chrec_b)) |
4777 | { |
4778 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
4779 | overlaps_a, overlaps_b, |
4780 | last_conflicts); |
4781 | |
4782 | if (CF_NOT_KNOWN_P (*overlaps_a) |
4783 | || CF_NOT_KNOWN_P (*overlaps_b)) |
4784 | dependence_stats.num_siv_unimplemented++; |
4785 | else if (CF_NO_DEPENDENCE_P (*overlaps_a) |
4786 | || CF_NO_DEPENDENCE_P (*overlaps_b)) |
4787 | dependence_stats.num_siv_independent++; |
4788 | else |
4789 | dependence_stats.num_siv_dependent++; |
4790 | } |
4791 | else if (can_use_analyze_subscript_affine_affine (chrec_a: &chrec_a, |
4792 | chrec_b: &chrec_b)) |
4793 | { |
4794 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
4795 | overlaps_a, overlaps_b, |
4796 | last_conflicts); |
4797 | |
4798 | if (CF_NOT_KNOWN_P (*overlaps_a) |
4799 | || CF_NOT_KNOWN_P (*overlaps_b)) |
4800 | dependence_stats.num_siv_unimplemented++; |
4801 | else if (CF_NO_DEPENDENCE_P (*overlaps_a) |
4802 | || CF_NO_DEPENDENCE_P (*overlaps_b)) |
4803 | dependence_stats.num_siv_independent++; |
4804 | else |
4805 | dependence_stats.num_siv_dependent++; |
4806 | } |
4807 | else |
4808 | goto siv_subscript_dontknow; |
4809 | } |
4810 | |
4811 | else |
4812 | { |
4813 | siv_subscript_dontknow:; |
4814 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4815 | fprintf (stream: dump_file, format: " siv test failed: unimplemented" ); |
4816 | *overlaps_a = conflict_fn_not_known (); |
4817 | *overlaps_b = conflict_fn_not_known (); |
4818 | *last_conflicts = chrec_dont_know; |
4819 | dependence_stats.num_siv_unimplemented++; |
4820 | } |
4821 | |
4822 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4823 | fprintf (stream: dump_file, format: ")\n" ); |
4824 | } |
4825 | |
4826 | /* Returns false if we can prove that the greatest common divisor of the steps |
4827 | of CHREC does not divide CST, false otherwise. */ |
4828 | |
4829 | static bool |
4830 | gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst) |
4831 | { |
4832 | HOST_WIDE_INT cd = 0, val; |
4833 | tree step; |
4834 | |
4835 | if (!tree_fits_shwi_p (cst)) |
4836 | return true; |
4837 | val = tree_to_shwi (cst); |
4838 | |
4839 | while (TREE_CODE (chrec) == POLYNOMIAL_CHREC) |
4840 | { |
4841 | step = CHREC_RIGHT (chrec); |
4842 | if (!tree_fits_shwi_p (step)) |
4843 | return true; |
4844 | cd = gcd (cd, tree_to_shwi (step)); |
4845 | chrec = CHREC_LEFT (chrec); |
4846 | } |
4847 | |
4848 | return val % cd == 0; |
4849 | } |
4850 | |
4851 | /* Analyze a MIV (Multiple Index Variable) subscript with respect to |
4852 | LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the |
4853 | functions that describe the relation between the elements accessed |
4854 | twice by CHREC_A and CHREC_B. For k >= 0, the following property |
4855 | is verified: |
4856 | |
4857 | CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ |
4858 | |
4859 | static void |
4860 | analyze_miv_subscript (tree chrec_a, |
4861 | tree chrec_b, |
4862 | conflict_function **overlaps_a, |
4863 | conflict_function **overlaps_b, |
4864 | tree *last_conflicts, |
4865 | class loop *loop_nest) |
4866 | { |
4867 | tree type, difference; |
4868 | |
4869 | dependence_stats.num_miv++; |
4870 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4871 | fprintf (stream: dump_file, format: "(analyze_miv_subscript \n" ); |
4872 | |
4873 | type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); |
4874 | chrec_a = chrec_convert (type, chrec_a, NULL); |
4875 | chrec_b = chrec_convert (type, chrec_b, NULL); |
4876 | difference = chrec_fold_minus (type, chrec_a, chrec_b); |
4877 | |
4878 | if (eq_evolutions_p (chrec_a, chrec_b)) |
4879 | { |
4880 | /* Access functions are the same: all the elements are accessed |
4881 | in the same order. */ |
4882 | *overlaps_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4883 | *overlaps_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
4884 | *last_conflicts = max_stmt_executions_tree (loop: get_chrec_loop (chrec: chrec_a)); |
4885 | dependence_stats.num_miv_dependent++; |
4886 | } |
4887 | |
4888 | else if (evolution_function_is_constant_p (chrec: difference) |
4889 | && evolution_function_is_affine_multivariate_p (chrec_a, |
4890 | loop_nest->num) |
4891 | && !gcd_of_steps_may_divide_p (chrec: chrec_a, cst: difference)) |
4892 | { |
4893 | /* testsuite/.../ssa-chrec-33.c |
4894 | {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2 |
4895 | |
4896 | The difference is 1, and all the evolution steps are multiples |
4897 | of 2, consequently there are no overlapping elements. */ |
4898 | *overlaps_a = conflict_fn_no_dependence (); |
4899 | *overlaps_b = conflict_fn_no_dependence (); |
4900 | *last_conflicts = integer_zero_node; |
4901 | dependence_stats.num_miv_independent++; |
4902 | } |
4903 | |
4904 | else if (evolution_function_is_affine_in_loop (chrec: chrec_a, loopnum: loop_nest->num) |
4905 | && !chrec_contains_symbols (chrec_a, loop_nest) |
4906 | && evolution_function_is_affine_in_loop (chrec: chrec_b, loopnum: loop_nest->num) |
4907 | && !chrec_contains_symbols (chrec_b, loop_nest)) |
4908 | { |
4909 | /* testsuite/.../ssa-chrec-35.c |
4910 | {0, +, 1}_2 vs. {0, +, 1}_3 |
4911 | the overlapping elements are respectively located at iterations: |
4912 | {0, +, 1}_x and {0, +, 1}_x, |
4913 | in other words, we have the equality: |
4914 | {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) |
4915 | |
4916 | Other examples: |
4917 | {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = |
4918 | {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) |
4919 | |
4920 | {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = |
4921 | {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) |
4922 | */ |
4923 | analyze_subscript_affine_affine (chrec_a, chrec_b, |
4924 | overlaps_a, overlaps_b, last_conflicts); |
4925 | |
4926 | if (CF_NOT_KNOWN_P (*overlaps_a) |
4927 | || CF_NOT_KNOWN_P (*overlaps_b)) |
4928 | dependence_stats.num_miv_unimplemented++; |
4929 | else if (CF_NO_DEPENDENCE_P (*overlaps_a) |
4930 | || CF_NO_DEPENDENCE_P (*overlaps_b)) |
4931 | dependence_stats.num_miv_independent++; |
4932 | else |
4933 | dependence_stats.num_miv_dependent++; |
4934 | } |
4935 | |
4936 | else |
4937 | { |
4938 | /* When the analysis is too difficult, answer "don't know". */ |
4939 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4940 | fprintf (stream: dump_file, format: "analyze_miv_subscript test failed: unimplemented.\n" ); |
4941 | |
4942 | *overlaps_a = conflict_fn_not_known (); |
4943 | *overlaps_b = conflict_fn_not_known (); |
4944 | *last_conflicts = chrec_dont_know; |
4945 | dependence_stats.num_miv_unimplemented++; |
4946 | } |
4947 | |
4948 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4949 | fprintf (stream: dump_file, format: ")\n" ); |
4950 | } |
4951 | |
4952 | /* Determines the iterations for which CHREC_A is equal to CHREC_B in |
4953 | with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and |
4954 | OVERLAP_ITERATIONS_B are initialized with two functions that |
4955 | describe the iterations that contain conflicting elements. |
4956 | |
4957 | Remark: For an integer k >= 0, the following equality is true: |
4958 | |
4959 | CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)). |
4960 | */ |
4961 | |
4962 | static void |
4963 | analyze_overlapping_iterations (tree chrec_a, |
4964 | tree chrec_b, |
4965 | conflict_function **overlap_iterations_a, |
4966 | conflict_function **overlap_iterations_b, |
4967 | tree *last_conflicts, class loop *loop_nest) |
4968 | { |
4969 | unsigned int lnn = loop_nest->num; |
4970 | |
4971 | dependence_stats.num_subscript_tests++; |
4972 | |
4973 | if (dump_file && (dump_flags & TDF_DETAILS)) |
4974 | { |
4975 | fprintf (stream: dump_file, format: "(analyze_overlapping_iterations \n" ); |
4976 | fprintf (stream: dump_file, format: " (chrec_a = " ); |
4977 | print_generic_expr (dump_file, chrec_a); |
4978 | fprintf (stream: dump_file, format: ")\n (chrec_b = " ); |
4979 | print_generic_expr (dump_file, chrec_b); |
4980 | fprintf (stream: dump_file, format: ")\n" ); |
4981 | } |
4982 | |
4983 | if (chrec_a == NULL_TREE |
4984 | || chrec_b == NULL_TREE |
4985 | || chrec_contains_undetermined (chrec_a) |
4986 | || chrec_contains_undetermined (chrec_b)) |
4987 | { |
4988 | dependence_stats.num_subscript_undetermined++; |
4989 | |
4990 | *overlap_iterations_a = conflict_fn_not_known (); |
4991 | *overlap_iterations_b = conflict_fn_not_known (); |
4992 | } |
4993 | |
4994 | /* If they are the same chrec, and are affine, they overlap |
4995 | on every iteration. */ |
4996 | else if (eq_evolutions_p (chrec_a, chrec_b) |
4997 | && (evolution_function_is_affine_multivariate_p (chrec_a, lnn) |
4998 | || operand_equal_p (chrec_a, chrec_b, flags: 0))) |
4999 | { |
5000 | dependence_stats.num_same_subscript_function++; |
5001 | *overlap_iterations_a = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
5002 | *overlap_iterations_b = conflict_fn (n: 1, affine_fn_cst (integer_zero_node)); |
5003 | *last_conflicts = chrec_dont_know; |
5004 | } |
5005 | |
5006 | /* If they aren't the same, and aren't affine, we can't do anything |
5007 | yet. */ |
5008 | else if ((chrec_contains_symbols (chrec_a) |
5009 | || chrec_contains_symbols (chrec_b)) |
5010 | && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn) |
5011 | || !evolution_function_is_affine_multivariate_p (chrec_b, lnn))) |
5012 | { |
5013 | dependence_stats.num_subscript_undetermined++; |
5014 | *overlap_iterations_a = conflict_fn_not_known (); |
5015 | *overlap_iterations_b = conflict_fn_not_known (); |
5016 | } |
5017 | |
5018 | else if (ziv_subscript_p (chrec_a, chrec_b)) |
5019 | analyze_ziv_subscript (chrec_a, chrec_b, |
5020 | overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b, |
5021 | last_conflicts); |
5022 | |
5023 | else if (siv_subscript_p (chrec_a, chrec_b)) |
5024 | analyze_siv_subscript (chrec_a, chrec_b, |
5025 | overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b, |
5026 | last_conflicts, loop_nest_num: lnn); |
5027 | |
5028 | else |
5029 | analyze_miv_subscript (chrec_a, chrec_b, |
5030 | overlaps_a: overlap_iterations_a, overlaps_b: overlap_iterations_b, |
5031 | last_conflicts, loop_nest); |
5032 | |
5033 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5034 | { |
5035 | fprintf (stream: dump_file, format: " (overlap_iterations_a = " ); |
5036 | dump_conflict_function (outf: dump_file, cf: *overlap_iterations_a); |
5037 | fprintf (stream: dump_file, format: ")\n (overlap_iterations_b = " ); |
5038 | dump_conflict_function (outf: dump_file, cf: *overlap_iterations_b); |
5039 | fprintf (stream: dump_file, format: "))\n" ); |
5040 | } |
5041 | } |
5042 | |
5043 | /* Helper function for uniquely inserting distance vectors. */ |
5044 | |
5045 | static void |
5046 | save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v) |
5047 | { |
5048 | for (lambda_vector v : DDR_DIST_VECTS (ddr)) |
5049 | if (lambda_vector_equal (vec1: v, vec2: dist_v, DDR_NB_LOOPS (ddr))) |
5050 | return; |
5051 | |
5052 | DDR_DIST_VECTS (ddr).safe_push (obj: dist_v); |
5053 | } |
5054 | |
5055 | /* Helper function for uniquely inserting direction vectors. */ |
5056 | |
5057 | static void |
5058 | save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v) |
5059 | { |
5060 | for (lambda_vector v : DDR_DIR_VECTS (ddr)) |
5061 | if (lambda_vector_equal (vec1: v, vec2: dir_v, DDR_NB_LOOPS (ddr))) |
5062 | return; |
5063 | |
5064 | DDR_DIR_VECTS (ddr).safe_push (obj: dir_v); |
5065 | } |
5066 | |
5067 | /* Add a distance of 1 on all the loops outer than INDEX. If we |
5068 | haven't yet determined a distance for this outer loop, push a new |
5069 | distance vector composed of the previous distance, and a distance |
5070 | of 1 for this outer loop. Example: |
5071 | |
5072 | | loop_1 |
5073 | | loop_2 |
5074 | | A[10] |
5075 | | endloop_2 |
5076 | | endloop_1 |
5077 | |
5078 | Saved vectors are of the form (dist_in_1, dist_in_2). First, we |
5079 | save (0, 1), then we have to save (1, 0). */ |
5080 | |
5081 | static void |
5082 | add_outer_distances (struct data_dependence_relation *ddr, |
5083 | lambda_vector dist_v, int index) |
5084 | { |
5085 | /* For each outer loop where init_v is not set, the accesses are |
5086 | in dependence of distance 1 in the loop. */ |
5087 | while (--index >= 0) |
5088 | { |
5089 | lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5090 | lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr)); |
5091 | save_v[index] = 1; |
5092 | save_dist_v (ddr, dist_v: save_v); |
5093 | } |
5094 | } |
5095 | |
5096 | /* Return false when fail to represent the data dependence as a |
5097 | distance vector. A_INDEX is the index of the first reference |
5098 | (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the |
5099 | second reference. INIT_B is set to true when a component has been |
5100 | added to the distance vector DIST_V. INDEX_CARRY is then set to |
5101 | the index in DIST_V that carries the dependence. */ |
5102 | |
5103 | static bool |
5104 | build_classic_dist_vector_1 (struct data_dependence_relation *ddr, |
5105 | unsigned int a_index, unsigned int b_index, |
5106 | lambda_vector dist_v, bool *init_b, |
5107 | int *index_carry) |
5108 | { |
5109 | unsigned i; |
5110 | lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5111 | class loop *loop = DDR_LOOP_NEST (ddr)[0]; |
5112 | |
5113 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
5114 | { |
5115 | tree access_fn_a, access_fn_b; |
5116 | struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); |
5117 | |
5118 | if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) |
5119 | { |
5120 | non_affine_dependence_relation (ddr); |
5121 | return false; |
5122 | } |
5123 | |
5124 | access_fn_a = SUB_ACCESS_FN (subscript, a_index); |
5125 | access_fn_b = SUB_ACCESS_FN (subscript, b_index); |
5126 | |
5127 | if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC |
5128 | && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) |
5129 | { |
5130 | HOST_WIDE_INT dist; |
5131 | int index; |
5132 | int var_a = CHREC_VARIABLE (access_fn_a); |
5133 | int var_b = CHREC_VARIABLE (access_fn_b); |
5134 | |
5135 | if (var_a != var_b |
5136 | || chrec_contains_undetermined (SUB_DISTANCE (subscript))) |
5137 | { |
5138 | non_affine_dependence_relation (ddr); |
5139 | return false; |
5140 | } |
5141 | |
5142 | /* When data references are collected in a loop while data |
5143 | dependences are analyzed in loop nest nested in the loop, we |
5144 | would have more number of access functions than number of |
5145 | loops. Skip access functions of loops not in the loop nest. |
5146 | |
5147 | See PR89725 for more information. */ |
5148 | if (flow_loop_nested_p (get_loop (cfun, num: var_a), loop)) |
5149 | continue; |
5150 | |
5151 | dist = int_cst_value (SUB_DISTANCE (subscript)); |
5152 | index = index_in_loop_nest (var: var_a, DDR_LOOP_NEST (ddr)); |
5153 | *index_carry = MIN (index, *index_carry); |
5154 | |
5155 | /* This is the subscript coupling test. If we have already |
5156 | recorded a distance for this loop (a distance coming from |
5157 | another subscript), it should be the same. For example, |
5158 | in the following code, there is no dependence: |
5159 | |
5160 | | loop i = 0, N, 1 |
5161 | | T[i+1][i] = ... |
5162 | | ... = T[i][i] |
5163 | | endloop |
5164 | */ |
5165 | if (init_v[index] != 0 && dist_v[index] != dist) |
5166 | { |
5167 | finalize_ddr_dependent (ddr, chrec_known); |
5168 | return false; |
5169 | } |
5170 | |
5171 | dist_v[index] = dist; |
5172 | init_v[index] = 1; |
5173 | *init_b = true; |
5174 | } |
5175 | else if (!operand_equal_p (access_fn_a, access_fn_b, flags: 0)) |
5176 | { |
5177 | /* This can be for example an affine vs. constant dependence |
5178 | (T[i] vs. T[3]) that is not an affine dependence and is |
5179 | not representable as a distance vector. */ |
5180 | non_affine_dependence_relation (ddr); |
5181 | return false; |
5182 | } |
5183 | else |
5184 | *init_b = true; |
5185 | } |
5186 | |
5187 | return true; |
5188 | } |
5189 | |
5190 | /* Return true when the DDR contains only invariant access functions wrto. loop |
5191 | number LNUM. */ |
5192 | |
5193 | static bool |
5194 | invariant_access_functions (const struct data_dependence_relation *ddr, |
5195 | int lnum) |
5196 | { |
5197 | for (subscript *sub : DDR_SUBSCRIPTS (ddr)) |
5198 | if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum) |
5199 | || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum)) |
5200 | return false; |
5201 | |
5202 | return true; |
5203 | } |
5204 | |
5205 | /* Helper function for the case where DDR_A and DDR_B are the same |
5206 | multivariate access function with a constant step. For an example |
5207 | see pr34635-1.c. */ |
5208 | |
5209 | static void |
5210 | add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2) |
5211 | { |
5212 | int x_1, x_2; |
5213 | tree c_1 = CHREC_LEFT (c_2); |
5214 | tree c_0 = CHREC_LEFT (c_1); |
5215 | lambda_vector dist_v; |
5216 | HOST_WIDE_INT v1, v2, cd; |
5217 | |
5218 | /* Polynomials with more than 2 variables are not handled yet. When |
5219 | the evolution steps are parameters, it is not possible to |
5220 | represent the dependence using classical distance vectors. */ |
5221 | if (TREE_CODE (c_0) != INTEGER_CST |
5222 | || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST |
5223 | || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST) |
5224 | { |
5225 | DDR_AFFINE_P (ddr) = false; |
5226 | return; |
5227 | } |
5228 | |
5229 | x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr)); |
5230 | x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr)); |
5231 | |
5232 | /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */ |
5233 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5234 | v1 = int_cst_value (CHREC_RIGHT (c_1)); |
5235 | v2 = int_cst_value (CHREC_RIGHT (c_2)); |
5236 | cd = gcd (v1, v2); |
5237 | v1 /= cd; |
5238 | v2 /= cd; |
5239 | |
5240 | if (v2 < 0) |
5241 | { |
5242 | v2 = -v2; |
5243 | v1 = -v1; |
5244 | } |
5245 | |
5246 | dist_v[x_1] = v2; |
5247 | dist_v[x_2] = -v1; |
5248 | save_dist_v (ddr, dist_v); |
5249 | |
5250 | add_outer_distances (ddr, dist_v, index: x_1); |
5251 | } |
5252 | |
5253 | /* Helper function for the case where DDR_A and DDR_B are the same |
5254 | access functions. */ |
5255 | |
5256 | static void |
5257 | add_other_self_distances (struct data_dependence_relation *ddr) |
5258 | { |
5259 | lambda_vector dist_v; |
5260 | unsigned i; |
5261 | int index_carry = DDR_NB_LOOPS (ddr); |
5262 | subscript *sub; |
5263 | class loop *loop = DDR_LOOP_NEST (ddr)[0]; |
5264 | |
5265 | FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) |
5266 | { |
5267 | tree access_fun = SUB_ACCESS_FN (sub, 0); |
5268 | |
5269 | if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC) |
5270 | { |
5271 | if (!evolution_function_is_univariate_p (access_fun, loop->num)) |
5272 | { |
5273 | if (DDR_NUM_SUBSCRIPTS (ddr) != 1) |
5274 | { |
5275 | DDR_ARE_DEPENDENT (ddr) = chrec_dont_know; |
5276 | return; |
5277 | } |
5278 | |
5279 | access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0); |
5280 | |
5281 | if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC) |
5282 | add_multivariate_self_dist (ddr, c_2: access_fun); |
5283 | else |
5284 | /* The evolution step is not constant: it varies in |
5285 | the outer loop, so this cannot be represented by a |
5286 | distance vector. For example in pr34635.c the |
5287 | evolution is {0, +, {0, +, 4}_1}_2. */ |
5288 | DDR_AFFINE_P (ddr) = false; |
5289 | |
5290 | return; |
5291 | } |
5292 | |
5293 | /* When data references are collected in a loop while data |
5294 | dependences are analyzed in loop nest nested in the loop, we |
5295 | would have more number of access functions than number of |
5296 | loops. Skip access functions of loops not in the loop nest. |
5297 | |
5298 | See PR89725 for more information. */ |
5299 | if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)), |
5300 | loop)) |
5301 | continue; |
5302 | |
5303 | index_carry = MIN (index_carry, |
5304 | index_in_loop_nest (CHREC_VARIABLE (access_fun), |
5305 | DDR_LOOP_NEST (ddr))); |
5306 | } |
5307 | } |
5308 | |
5309 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5310 | add_outer_distances (ddr, dist_v, index: index_carry); |
5311 | } |
5312 | |
5313 | static void |
5314 | insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr) |
5315 | { |
5316 | lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5317 | |
5318 | dist_v[0] = 1; |
5319 | save_dist_v (ddr, dist_v); |
5320 | } |
5321 | |
5322 | /* Adds a unit distance vector to DDR when there is a 0 overlap. This |
5323 | is the case for example when access functions are the same and |
5324 | equal to a constant, as in: |
5325 | |
5326 | | loop_1 |
5327 | | A[3] = ... |
5328 | | ... = A[3] |
5329 | | endloop_1 |
5330 | |
5331 | in which case the distance vectors are (0) and (1). */ |
5332 | |
5333 | static void |
5334 | add_distance_for_zero_overlaps (struct data_dependence_relation *ddr) |
5335 | { |
5336 | unsigned i, j; |
5337 | |
5338 | for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) |
5339 | { |
5340 | subscript_p sub = DDR_SUBSCRIPT (ddr, i); |
5341 | conflict_function *ca = SUB_CONFLICTS_IN_A (sub); |
5342 | conflict_function *cb = SUB_CONFLICTS_IN_B (sub); |
5343 | |
5344 | for (j = 0; j < ca->n; j++) |
5345 | if (affine_function_zero_p (fn: ca->fns[j])) |
5346 | { |
5347 | insert_innermost_unit_dist_vector (ddr); |
5348 | return; |
5349 | } |
5350 | |
5351 | for (j = 0; j < cb->n; j++) |
5352 | if (affine_function_zero_p (fn: cb->fns[j])) |
5353 | { |
5354 | insert_innermost_unit_dist_vector (ddr); |
5355 | return; |
5356 | } |
5357 | } |
5358 | } |
5359 | |
5360 | /* Return true when the DDR contains two data references that have the |
5361 | same access functions. */ |
5362 | |
5363 | static inline bool |
5364 | same_access_functions (const struct data_dependence_relation *ddr) |
5365 | { |
5366 | for (subscript *sub : DDR_SUBSCRIPTS (ddr)) |
5367 | if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0), |
5368 | SUB_ACCESS_FN (sub, 1))) |
5369 | return false; |
5370 | |
5371 | return true; |
5372 | } |
5373 | |
5374 | /* Compute the classic per loop distance vector. DDR is the data |
5375 | dependence relation to build a vector from. Return false when fail |
5376 | to represent the data dependence as a distance vector. */ |
5377 | |
5378 | static bool |
5379 | build_classic_dist_vector (struct data_dependence_relation *ddr, |
5380 | class loop *loop_nest) |
5381 | { |
5382 | bool init_b = false; |
5383 | int index_carry = DDR_NB_LOOPS (ddr); |
5384 | lambda_vector dist_v; |
5385 | |
5386 | if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) |
5387 | return false; |
5388 | |
5389 | if (same_access_functions (ddr)) |
5390 | { |
5391 | /* Save the 0 vector. */ |
5392 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5393 | save_dist_v (ddr, dist_v); |
5394 | |
5395 | if (invariant_access_functions (ddr, lnum: loop_nest->num)) |
5396 | add_distance_for_zero_overlaps (ddr); |
5397 | |
5398 | if (DDR_NB_LOOPS (ddr) > 1) |
5399 | add_other_self_distances (ddr); |
5400 | |
5401 | return true; |
5402 | } |
5403 | |
5404 | dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5405 | if (!build_classic_dist_vector_1 (ddr, a_index: 0, b_index: 1, dist_v, init_b: &init_b, index_carry: &index_carry)) |
5406 | return false; |
5407 | |
5408 | /* Save the distance vector if we initialized one. */ |
5409 | if (init_b) |
5410 | { |
5411 | /* Verify a basic constraint: classic distance vectors should |
5412 | always be lexicographically positive. |
5413 | |
5414 | Data references are collected in the order of execution of |
5415 | the program, thus for the following loop |
5416 | |
5417 | | for (i = 1; i < 100; i++) |
5418 | | for (j = 1; j < 100; j++) |
5419 | | { |
5420 | | t = T[j+1][i-1]; // A |
5421 | | T[j][i] = t + 2; // B |
5422 | | } |
5423 | |
5424 | references are collected following the direction of the wind: |
5425 | A then B. The data dependence tests are performed also |
5426 | following this order, such that we're looking at the distance |
5427 | separating the elements accessed by A from the elements later |
5428 | accessed by B. But in this example, the distance returned by |
5429 | test_dep (A, B) is lexicographically negative (-1, 1), that |
5430 | means that the access A occurs later than B with respect to |
5431 | the outer loop, ie. we're actually looking upwind. In this |
5432 | case we solve test_dep (B, A) looking downwind to the |
5433 | lexicographically positive solution, that returns the |
5434 | distance vector (1, -1). */ |
5435 | if (!lambda_vector_lexico_pos (v: dist_v, DDR_NB_LOOPS (ddr))) |
5436 | { |
5437 | lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5438 | if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) |
5439 | return false; |
5440 | compute_subscript_distance (ddr); |
5441 | if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: save_v, init_b: &init_b, |
5442 | index_carry: &index_carry)) |
5443 | return false; |
5444 | save_dist_v (ddr, dist_v: save_v); |
5445 | DDR_REVERSED_P (ddr) = true; |
5446 | |
5447 | /* In this case there is a dependence forward for all the |
5448 | outer loops: |
5449 | |
5450 | | for (k = 1; k < 100; k++) |
5451 | | for (i = 1; i < 100; i++) |
5452 | | for (j = 1; j < 100; j++) |
5453 | | { |
5454 | | t = T[j+1][i-1]; // A |
5455 | | T[j][i] = t + 2; // B |
5456 | | } |
5457 | |
5458 | the vectors are: |
5459 | (0, 1, -1) |
5460 | (1, 1, -1) |
5461 | (1, -1, 1) |
5462 | */ |
5463 | if (DDR_NB_LOOPS (ddr) > 1) |
5464 | { |
5465 | add_outer_distances (ddr, dist_v: save_v, index: index_carry); |
5466 | add_outer_distances (ddr, dist_v, index: index_carry); |
5467 | } |
5468 | } |
5469 | else |
5470 | { |
5471 | lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5472 | lambda_vector_copy (vec1: dist_v, vec2: save_v, DDR_NB_LOOPS (ddr)); |
5473 | |
5474 | if (DDR_NB_LOOPS (ddr) > 1) |
5475 | { |
5476 | lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5477 | |
5478 | if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest)) |
5479 | return false; |
5480 | compute_subscript_distance (ddr); |
5481 | if (!build_classic_dist_vector_1 (ddr, a_index: 1, b_index: 0, dist_v: opposite_v, init_b: &init_b, |
5482 | index_carry: &index_carry)) |
5483 | return false; |
5484 | |
5485 | save_dist_v (ddr, dist_v: save_v); |
5486 | add_outer_distances (ddr, dist_v, index: index_carry); |
5487 | add_outer_distances (ddr, dist_v: opposite_v, index: index_carry); |
5488 | } |
5489 | else |
5490 | save_dist_v (ddr, dist_v: save_v); |
5491 | } |
5492 | } |
5493 | else |
5494 | { |
5495 | /* There is a distance of 1 on all the outer loops: Example: |
5496 | there is a dependence of distance 1 on loop_1 for the array A. |
5497 | |
5498 | | loop_1 |
5499 | | A[5] = ... |
5500 | | endloop |
5501 | */ |
5502 | add_outer_distances (ddr, dist_v, |
5503 | index: lambda_vector_first_nz (vec1: dist_v, |
5504 | DDR_NB_LOOPS (ddr), start: 0)); |
5505 | } |
5506 | |
5507 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5508 | { |
5509 | unsigned i; |
5510 | |
5511 | fprintf (stream: dump_file, format: "(build_classic_dist_vector\n" ); |
5512 | for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) |
5513 | { |
5514 | fprintf (stream: dump_file, format: " dist_vector = (" ); |
5515 | print_lambda_vector (outfile: dump_file, DDR_DIST_VECT (ddr, i), |
5516 | DDR_NB_LOOPS (ddr)); |
5517 | fprintf (stream: dump_file, format: " )\n" ); |
5518 | } |
5519 | fprintf (stream: dump_file, format: ")\n" ); |
5520 | } |
5521 | |
5522 | return true; |
5523 | } |
5524 | |
5525 | /* Return the direction for a given distance. |
5526 | FIXME: Computing dir this way is suboptimal, since dir can catch |
5527 | cases that dist is unable to represent. */ |
5528 | |
5529 | static inline enum data_dependence_direction |
5530 | dir_from_dist (int dist) |
5531 | { |
5532 | if (dist > 0) |
5533 | return dir_positive; |
5534 | else if (dist < 0) |
5535 | return dir_negative; |
5536 | else |
5537 | return dir_equal; |
5538 | } |
5539 | |
5540 | /* Compute the classic per loop direction vector. DDR is the data |
5541 | dependence relation to build a vector from. */ |
5542 | |
5543 | static void |
5544 | build_classic_dir_vector (struct data_dependence_relation *ddr) |
5545 | { |
5546 | unsigned i, j; |
5547 | lambda_vector dist_v; |
5548 | |
5549 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) |
5550 | { |
5551 | lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); |
5552 | |
5553 | for (j = 0; j < DDR_NB_LOOPS (ddr); j++) |
5554 | dir_v[j] = dir_from_dist (dist: dist_v[j]); |
5555 | |
5556 | save_dir_v (ddr, dir_v); |
5557 | } |
5558 | } |
5559 | |
5560 | /* Helper function. Returns true when there is a dependence between the |
5561 | data references. A_INDEX is the index of the first reference (0 for |
5562 | DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */ |
5563 | |
5564 | static bool |
5565 | subscript_dependence_tester_1 (struct data_dependence_relation *ddr, |
5566 | unsigned int a_index, unsigned int b_index, |
5567 | class loop *loop_nest) |
5568 | { |
5569 | unsigned int i; |
5570 | tree last_conflicts; |
5571 | struct subscript *subscript; |
5572 | tree res = NULL_TREE; |
5573 | |
5574 | for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (ix: i, ptr: &subscript); i++) |
5575 | { |
5576 | conflict_function *overlaps_a, *overlaps_b; |
5577 | |
5578 | analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index), |
5579 | SUB_ACCESS_FN (subscript, b_index), |
5580 | overlap_iterations_a: &overlaps_a, overlap_iterations_b: &overlaps_b, |
5581 | last_conflicts: &last_conflicts, loop_nest); |
5582 | |
5583 | if (SUB_CONFLICTS_IN_A (subscript)) |
5584 | free_conflict_function (SUB_CONFLICTS_IN_A (subscript)); |
5585 | if (SUB_CONFLICTS_IN_B (subscript)) |
5586 | free_conflict_function (SUB_CONFLICTS_IN_B (subscript)); |
5587 | |
5588 | SUB_CONFLICTS_IN_A (subscript) = overlaps_a; |
5589 | SUB_CONFLICTS_IN_B (subscript) = overlaps_b; |
5590 | SUB_LAST_CONFLICT (subscript) = last_conflicts; |
5591 | |
5592 | /* If there is any undetermined conflict function we have to |
5593 | give a conservative answer in case we cannot prove that |
5594 | no dependence exists when analyzing another subscript. */ |
5595 | if (CF_NOT_KNOWN_P (overlaps_a) |
5596 | || CF_NOT_KNOWN_P (overlaps_b)) |
5597 | { |
5598 | res = chrec_dont_know; |
5599 | continue; |
5600 | } |
5601 | |
5602 | /* When there is a subscript with no dependence we can stop. */ |
5603 | else if (CF_NO_DEPENDENCE_P (overlaps_a) |
5604 | || CF_NO_DEPENDENCE_P (overlaps_b)) |
5605 | { |
5606 | res = chrec_known; |
5607 | break; |
5608 | } |
5609 | } |
5610 | |
5611 | if (res == NULL_TREE) |
5612 | return true; |
5613 | |
5614 | if (res == chrec_known) |
5615 | dependence_stats.num_dependence_independent++; |
5616 | else |
5617 | dependence_stats.num_dependence_undetermined++; |
5618 | finalize_ddr_dependent (ddr, chrec: res); |
5619 | return false; |
5620 | } |
5621 | |
5622 | /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */ |
5623 | |
5624 | static void |
5625 | subscript_dependence_tester (struct data_dependence_relation *ddr, |
5626 | class loop *loop_nest) |
5627 | { |
5628 | if (subscript_dependence_tester_1 (ddr, a_index: 0, b_index: 1, loop_nest)) |
5629 | dependence_stats.num_dependence_dependent++; |
5630 | |
5631 | compute_subscript_distance (ddr); |
5632 | if (build_classic_dist_vector (ddr, loop_nest)) |
5633 | build_classic_dir_vector (ddr); |
5634 | } |
5635 | |
5636 | /* Returns true when all the access functions of A are affine or |
5637 | constant with respect to LOOP_NEST. */ |
5638 | |
5639 | static bool |
5640 | access_functions_are_affine_or_constant_p (const struct data_reference *a, |
5641 | const class loop *loop_nest) |
5642 | { |
5643 | vec<tree> fns = DR_ACCESS_FNS (a); |
5644 | for (tree t : fns) |
5645 | if (!evolution_function_is_invariant_p (t, loop_nest->num) |
5646 | && !evolution_function_is_affine_multivariate_p (t, loop_nest->num)) |
5647 | return false; |
5648 | |
5649 | return true; |
5650 | } |
5651 | |
5652 | /* This computes the affine dependence relation between A and B with |
5653 | respect to LOOP_NEST. CHREC_KNOWN is used for representing the |
5654 | independence between two accesses, while CHREC_DONT_KNOW is used |
5655 | for representing the unknown relation. |
5656 | |
5657 | Note that it is possible to stop the computation of the dependence |
5658 | relation the first time we detect a CHREC_KNOWN element for a given |
5659 | subscript. */ |
5660 | |
5661 | void |
5662 | compute_affine_dependence (struct data_dependence_relation *ddr, |
5663 | class loop *loop_nest) |
5664 | { |
5665 | struct data_reference *dra = DDR_A (ddr); |
5666 | struct data_reference *drb = DDR_B (ddr); |
5667 | |
5668 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5669 | { |
5670 | fprintf (stream: dump_file, format: "(compute_affine_dependence\n" ); |
5671 | fprintf (stream: dump_file, format: " ref_a: " ); |
5672 | print_generic_expr (dump_file, DR_REF (dra)); |
5673 | fprintf (stream: dump_file, format: ", stmt_a: " ); |
5674 | print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM); |
5675 | fprintf (stream: dump_file, format: " ref_b: " ); |
5676 | print_generic_expr (dump_file, DR_REF (drb)); |
5677 | fprintf (stream: dump_file, format: ", stmt_b: " ); |
5678 | print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM); |
5679 | } |
5680 | |
5681 | /* Analyze only when the dependence relation is not yet known. */ |
5682 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) |
5683 | { |
5684 | dependence_stats.num_dependence_tests++; |
5685 | |
5686 | if (access_functions_are_affine_or_constant_p (a: dra, loop_nest) |
5687 | && access_functions_are_affine_or_constant_p (a: drb, loop_nest)) |
5688 | subscript_dependence_tester (ddr, loop_nest); |
5689 | |
5690 | /* As a last case, if the dependence cannot be determined, or if |
5691 | the dependence is considered too difficult to determine, answer |
5692 | "don't know". */ |
5693 | else |
5694 | { |
5695 | dependence_stats.num_dependence_undetermined++; |
5696 | |
5697 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5698 | { |
5699 | fprintf (stream: dump_file, format: "Data ref a:\n" ); |
5700 | dump_data_reference (outf: dump_file, dr: dra); |
5701 | fprintf (stream: dump_file, format: "Data ref b:\n" ); |
5702 | dump_data_reference (outf: dump_file, dr: drb); |
5703 | fprintf (stream: dump_file, format: "affine dependence test not usable: access function not affine or constant.\n" ); |
5704 | } |
5705 | finalize_ddr_dependent (ddr, chrec_dont_know); |
5706 | } |
5707 | } |
5708 | |
5709 | if (dump_file && (dump_flags & TDF_DETAILS)) |
5710 | { |
5711 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) |
5712 | fprintf (stream: dump_file, format: ") -> no dependence\n" ); |
5713 | else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
5714 | fprintf (stream: dump_file, format: ") -> dependence analysis failed\n" ); |
5715 | else |
5716 | fprintf (stream: dump_file, format: ")\n" ); |
5717 | } |
5718 | } |
5719 | |
5720 | /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all |
5721 | the data references in DATAREFS, in the LOOP_NEST. When |
5722 | COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self |
5723 | relations. Return true when successful, i.e. data references number |
5724 | is small enough to be handled. */ |
5725 | |
5726 | bool |
5727 | compute_all_dependences (const vec<data_reference_p> &datarefs, |
5728 | vec<ddr_p> *dependence_relations, |
5729 | const vec<loop_p> &loop_nest, |
5730 | bool compute_self_and_rr) |
5731 | { |
5732 | struct data_dependence_relation *ddr; |
5733 | struct data_reference *a, *b; |
5734 | unsigned int i, j; |
5735 | |
5736 | if ((int) datarefs.length () |
5737 | > param_loop_max_datarefs_for_datadeps) |
5738 | { |
5739 | struct data_dependence_relation *ddr; |
5740 | |
5741 | /* Insert a single relation into dependence_relations: |
5742 | chrec_dont_know. */ |
5743 | ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest); |
5744 | dependence_relations->safe_push (obj: ddr); |
5745 | return false; |
5746 | } |
5747 | |
5748 | FOR_EACH_VEC_ELT (datarefs, i, a) |
5749 | for (j = i + 1; datarefs.iterate (ix: j, ptr: &b); j++) |
5750 | if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr) |
5751 | { |
5752 | ddr = initialize_data_dependence_relation (a, b, loop_nest); |
5753 | dependence_relations->safe_push (obj: ddr); |
5754 | if (loop_nest.exists ()) |
5755 | compute_affine_dependence (ddr, loop_nest: loop_nest[0]); |
5756 | } |
5757 | |
5758 | if (compute_self_and_rr) |
5759 | FOR_EACH_VEC_ELT (datarefs, i, a) |
5760 | { |
5761 | ddr = initialize_data_dependence_relation (a, b: a, loop_nest); |
5762 | dependence_relations->safe_push (obj: ddr); |
5763 | if (loop_nest.exists ()) |
5764 | compute_affine_dependence (ddr, loop_nest: loop_nest[0]); |
5765 | } |
5766 | |
5767 | return true; |
5768 | } |
5769 | |
5770 | /* Describes a location of a memory reference. */ |
5771 | |
5772 | struct data_ref_loc |
5773 | { |
5774 | /* The memory reference. */ |
5775 | tree ref; |
5776 | |
5777 | /* True if the memory reference is read. */ |
5778 | bool is_read; |
5779 | |
5780 | /* True if the data reference is conditional within the containing |
5781 | statement, i.e. if it might not occur even when the statement |
5782 | is executed and runs to completion. */ |
5783 | bool is_conditional_in_stmt; |
5784 | }; |
5785 | |
5786 | |
5787 | /* Stores the locations of memory references in STMT to REFERENCES. Returns |
5788 | true if STMT clobbers memory, false otherwise. */ |
5789 | |
5790 | static bool |
5791 | get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references) |
5792 | { |
5793 | bool clobbers_memory = false; |
5794 | data_ref_loc ref; |
5795 | tree op0, op1; |
5796 | enum gimple_code stmt_code = gimple_code (g: stmt); |
5797 | |
5798 | /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. |
5799 | As we cannot model data-references to not spelled out |
5800 | accesses give up if they may occur. */ |
5801 | if (stmt_code == GIMPLE_CALL |
5802 | && !(gimple_call_flags (stmt) & ECF_CONST)) |
5803 | { |
5804 | /* Allow IFN_GOMP_SIMD_LANE in their own loops. */ |
5805 | if (gimple_call_internal_p (gs: stmt)) |
5806 | switch (gimple_call_internal_fn (gs: stmt)) |
5807 | { |
5808 | case IFN_GOMP_SIMD_LANE: |
5809 | { |
5810 | class loop *loop = gimple_bb (g: stmt)->loop_father; |
5811 | tree uid = gimple_call_arg (gs: stmt, index: 0); |
5812 | gcc_assert (TREE_CODE (uid) == SSA_NAME); |
5813 | if (loop == NULL |
5814 | || loop->simduid != SSA_NAME_VAR (uid)) |
5815 | clobbers_memory = true; |
5816 | break; |
5817 | } |
5818 | case IFN_MASK_LOAD: |
5819 | case IFN_MASK_STORE: |
5820 | break; |
5821 | case IFN_MASK_CALL: |
5822 | { |
5823 | tree orig_fndecl |
5824 | = gimple_call_addr_fndecl (fn: gimple_call_arg (gs: stmt, index: 0)); |
5825 | if (!orig_fndecl |
5826 | || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0) |
5827 | clobbers_memory = true; |
5828 | } |
5829 | break; |
5830 | default: |
5831 | clobbers_memory = true; |
5832 | break; |
5833 | } |
5834 | else |
5835 | clobbers_memory = true; |
5836 | } |
5837 | else if (stmt_code == GIMPLE_ASM |
5838 | && (gimple_asm_volatile_p (asm_stmt: as_a <gasm *> (p: stmt)) |
5839 | || gimple_vuse (g: stmt))) |
5840 | clobbers_memory = true; |
5841 | |
5842 | if (!gimple_vuse (g: stmt)) |
5843 | return clobbers_memory; |
5844 | |
5845 | if (stmt_code == GIMPLE_ASSIGN) |
5846 | { |
5847 | tree base; |
5848 | op0 = gimple_assign_lhs (gs: stmt); |
5849 | op1 = gimple_assign_rhs1 (gs: stmt); |
5850 | |
5851 | if (DECL_P (op1) |
5852 | || (REFERENCE_CLASS_P (op1) |
5853 | && (base = get_base_address (t: op1)) |
5854 | && TREE_CODE (base) != SSA_NAME |
5855 | && !is_gimple_min_invariant (base))) |
5856 | { |
5857 | ref.ref = op1; |
5858 | ref.is_read = true; |
5859 | ref.is_conditional_in_stmt = false; |
5860 | references->safe_push (obj: ref); |
5861 | } |
5862 | } |
5863 | else if (stmt_code == GIMPLE_CALL) |
5864 | { |
5865 | unsigned i = 0, n; |
5866 | tree ptr, type; |
5867 | unsigned int align; |
5868 | |
5869 | ref.is_read = false; |
5870 | if (gimple_call_internal_p (gs: stmt)) |
5871 | switch (gimple_call_internal_fn (gs: stmt)) |
5872 | { |
5873 | case IFN_MASK_LOAD: |
5874 | if (gimple_call_lhs (gs: stmt) == NULL_TREE) |
5875 | break; |
5876 | ref.is_read = true; |
5877 | /* FALLTHRU */ |
5878 | case IFN_MASK_STORE: |
5879 | ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0); |
5880 | align = tree_to_shwi (gimple_call_arg (gs: stmt, index: 1)); |
5881 | if (ref.is_read) |
5882 | type = TREE_TYPE (gimple_call_lhs (stmt)); |
5883 | else |
5884 | type = TREE_TYPE (gimple_call_arg (stmt, 3)); |
5885 | if (TYPE_ALIGN (type) != align) |
5886 | type = build_aligned_type (type, align); |
5887 | ref.is_conditional_in_stmt = true; |
5888 | ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0), |
5889 | ptr); |
5890 | references->safe_push (obj: ref); |
5891 | return false; |
5892 | case IFN_MASK_CALL: |
5893 | i = 1; |
5894 | gcc_fallthrough (); |
5895 | default: |
5896 | break; |
5897 | } |
5898 | |
5899 | op0 = gimple_call_lhs (gs: stmt); |
5900 | n = gimple_call_num_args (gs: stmt); |
5901 | for (; i < n; i++) |
5902 | { |
5903 | op1 = gimple_call_arg (gs: stmt, index: i); |
5904 | |
5905 | if (DECL_P (op1) |
5906 | || (REFERENCE_CLASS_P (op1) && get_base_address (t: op1))) |
5907 | { |
5908 | ref.ref = op1; |
5909 | ref.is_read = true; |
5910 | ref.is_conditional_in_stmt = false; |
5911 | references->safe_push (obj: ref); |
5912 | } |
5913 | } |
5914 | } |
5915 | else |
5916 | return clobbers_memory; |
5917 | |
5918 | if (op0 |
5919 | && (DECL_P (op0) |
5920 | || (REFERENCE_CLASS_P (op0) && get_base_address (t: op0)))) |
5921 | { |
5922 | ref.ref = op0; |
5923 | ref.is_read = false; |
5924 | ref.is_conditional_in_stmt = false; |
5925 | references->safe_push (obj: ref); |
5926 | } |
5927 | return clobbers_memory; |
5928 | } |
5929 | |
5930 | |
5931 | /* Returns true if the loop-nest has any data reference. */ |
5932 | |
5933 | bool |
5934 | loop_nest_has_data_refs (loop_p loop) |
5935 | { |
5936 | basic_block *bbs = get_loop_body (loop); |
5937 | auto_vec<data_ref_loc, 3> references; |
5938 | |
5939 | for (unsigned i = 0; i < loop->num_nodes; i++) |
5940 | { |
5941 | basic_block bb = bbs[i]; |
5942 | gimple_stmt_iterator bsi; |
5943 | |
5944 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
5945 | { |
5946 | gimple *stmt = gsi_stmt (i: bsi); |
5947 | get_references_in_stmt (stmt, references: &references); |
5948 | if (references.length ()) |
5949 | { |
5950 | free (ptr: bbs); |
5951 | return true; |
5952 | } |
5953 | } |
5954 | } |
5955 | free (ptr: bbs); |
5956 | return false; |
5957 | } |
5958 | |
5959 | /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable |
5960 | reference, returns false, otherwise returns true. NEST is the outermost |
5961 | loop of the loop nest in which the references should be analyzed. */ |
5962 | |
5963 | opt_result |
5964 | find_data_references_in_stmt (class loop *nest, gimple *stmt, |
5965 | vec<data_reference_p> *datarefs) |
5966 | { |
5967 | auto_vec<data_ref_loc, 2> references; |
5968 | data_reference_p dr; |
5969 | |
5970 | if (get_references_in_stmt (stmt, references: &references)) |
5971 | return opt_result::failure_at (loc: stmt, fmt: "statement clobbers memory: %G" , |
5972 | stmt); |
5973 | |
5974 | for (const data_ref_loc &ref : references) |
5975 | { |
5976 | dr = create_data_ref (nest: nest ? loop_preheader_edge (nest) : NULL, |
5977 | loop: loop_containing_stmt (stmt), memref: ref.ref, |
5978 | stmt, is_read: ref.is_read, is_conditional_in_stmt: ref.is_conditional_in_stmt); |
5979 | gcc_assert (dr != NULL); |
5980 | datarefs->safe_push (obj: dr); |
5981 | } |
5982 | |
5983 | return opt_result::success (); |
5984 | } |
5985 | |
5986 | /* Stores the data references in STMT to DATAREFS. If there is an |
5987 | unanalyzable reference, returns false, otherwise returns true. |
5988 | NEST is the outermost loop of the loop nest in which the references |
5989 | should be instantiated, LOOP is the loop in which the references |
5990 | should be analyzed. */ |
5991 | |
5992 | bool |
5993 | graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt, |
5994 | vec<data_reference_p> *datarefs) |
5995 | { |
5996 | auto_vec<data_ref_loc, 2> references; |
5997 | bool ret = true; |
5998 | data_reference_p dr; |
5999 | |
6000 | if (get_references_in_stmt (stmt, references: &references)) |
6001 | return false; |
6002 | |
6003 | for (const data_ref_loc &ref : references) |
6004 | { |
6005 | dr = create_data_ref (nest, loop, memref: ref.ref, stmt, is_read: ref.is_read, |
6006 | is_conditional_in_stmt: ref.is_conditional_in_stmt); |
6007 | gcc_assert (dr != NULL); |
6008 | datarefs->safe_push (obj: dr); |
6009 | } |
6010 | |
6011 | return ret; |
6012 | } |
6013 | |
6014 | /* Search the data references in LOOP, and record the information into |
6015 | DATAREFS. Returns chrec_dont_know when failing to analyze a |
6016 | difficult case, returns NULL_TREE otherwise. */ |
6017 | |
6018 | tree |
6019 | find_data_references_in_bb (class loop *loop, basic_block bb, |
6020 | vec<data_reference_p> *datarefs) |
6021 | { |
6022 | gimple_stmt_iterator bsi; |
6023 | |
6024 | for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi)) |
6025 | { |
6026 | gimple *stmt = gsi_stmt (i: bsi); |
6027 | |
6028 | if (!find_data_references_in_stmt (nest: loop, stmt, datarefs)) |
6029 | { |
6030 | struct data_reference *res; |
6031 | res = XCNEW (struct data_reference); |
6032 | datarefs->safe_push (obj: res); |
6033 | |
6034 | return chrec_dont_know; |
6035 | } |
6036 | } |
6037 | |
6038 | return NULL_TREE; |
6039 | } |
6040 | |
6041 | /* Search the data references in LOOP, and record the information into |
6042 | DATAREFS. Returns chrec_dont_know when failing to analyze a |
6043 | difficult case, returns NULL_TREE otherwise. |
6044 | |
6045 | TODO: This function should be made smarter so that it can handle address |
6046 | arithmetic as if they were array accesses, etc. */ |
6047 | |
6048 | tree |
6049 | find_data_references_in_loop (class loop *loop, |
6050 | vec<data_reference_p> *datarefs) |
6051 | { |
6052 | basic_block bb, *bbs; |
6053 | unsigned int i; |
6054 | |
6055 | bbs = get_loop_body_in_dom_order (loop); |
6056 | |
6057 | for (i = 0; i < loop->num_nodes; i++) |
6058 | { |
6059 | bb = bbs[i]; |
6060 | |
6061 | if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know) |
6062 | { |
6063 | free (ptr: bbs); |
6064 | return chrec_dont_know; |
6065 | } |
6066 | } |
6067 | free (ptr: bbs); |
6068 | |
6069 | return NULL_TREE; |
6070 | } |
6071 | |
6072 | /* Return the alignment in bytes that DRB is guaranteed to have at all |
6073 | times. */ |
6074 | |
6075 | unsigned int |
6076 | dr_alignment (innermost_loop_behavior *drb) |
6077 | { |
6078 | /* Get the alignment of BASE_ADDRESS + INIT. */ |
6079 | unsigned int alignment = drb->base_alignment; |
6080 | unsigned int misalignment = (drb->base_misalignment |
6081 | + TREE_INT_CST_LOW (drb->init)); |
6082 | if (misalignment != 0) |
6083 | alignment = MIN (alignment, misalignment & -misalignment); |
6084 | |
6085 | /* Cap it to the alignment of OFFSET. */ |
6086 | if (!integer_zerop (drb->offset)) |
6087 | alignment = MIN (alignment, drb->offset_alignment); |
6088 | |
6089 | /* Cap it to the alignment of STEP. */ |
6090 | if (!integer_zerop (drb->step)) |
6091 | alignment = MIN (alignment, drb->step_alignment); |
6092 | |
6093 | return alignment; |
6094 | } |
6095 | |
6096 | /* If BASE is a pointer-typed SSA name, try to find the object that it |
6097 | is based on. Return this object X on success and store the alignment |
6098 | in bytes of BASE - &X in *ALIGNMENT_OUT. */ |
6099 | |
6100 | static tree |
6101 | get_base_for_alignment_1 (tree base, unsigned int *alignment_out) |
6102 | { |
6103 | if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base))) |
6104 | return NULL_TREE; |
6105 | |
6106 | gimple *def = SSA_NAME_DEF_STMT (base); |
6107 | base = analyze_scalar_evolution (loop_containing_stmt (stmt: def), base); |
6108 | |
6109 | /* Peel chrecs and record the minimum alignment preserved by |
6110 | all steps. */ |
6111 | unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT; |
6112 | while (TREE_CODE (base) == POLYNOMIAL_CHREC) |
6113 | { |
6114 | unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base)); |
6115 | alignment = MIN (alignment, step_alignment); |
6116 | base = CHREC_LEFT (base); |
6117 | } |
6118 | |
6119 | /* Punt if the expression is too complicated to handle. */ |
6120 | if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base))) |
6121 | return NULL_TREE; |
6122 | |
6123 | /* The only useful cases are those for which a dereference folds to something |
6124 | other than an INDIRECT_REF. */ |
6125 | tree ref_type = TREE_TYPE (TREE_TYPE (base)); |
6126 | tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base); |
6127 | if (!ref) |
6128 | return NULL_TREE; |
6129 | |
6130 | /* Analyze the base to which the steps we peeled were applied. */ |
6131 | poly_int64 bitsize, bitpos, bytepos; |
6132 | machine_mode mode; |
6133 | int unsignedp, reversep, volatilep; |
6134 | tree offset; |
6135 | base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, |
6136 | &unsignedp, &reversep, &volatilep); |
6137 | if (!base || !multiple_p (a: bitpos, BITS_PER_UNIT, multiple: &bytepos)) |
6138 | return NULL_TREE; |
6139 | |
6140 | /* Restrict the alignment to that guaranteed by the offsets. */ |
6141 | unsigned int bytepos_alignment = known_alignment (a: bytepos); |
6142 | if (bytepos_alignment != 0) |
6143 | alignment = MIN (alignment, bytepos_alignment); |
6144 | if (offset) |
6145 | { |
6146 | unsigned int offset_alignment = highest_pow2_factor (offset); |
6147 | alignment = MIN (alignment, offset_alignment); |
6148 | } |
6149 | |
6150 | *alignment_out = alignment; |
6151 | return base; |
6152 | } |
6153 | |
6154 | /* Return the object whose alignment would need to be changed in order |
6155 | to increase the alignment of ADDR. Store the maximum achievable |
6156 | alignment in *MAX_ALIGNMENT. */ |
6157 | |
6158 | tree |
6159 | get_base_for_alignment (tree addr, unsigned int *max_alignment) |
6160 | { |
6161 | tree base = get_base_for_alignment_1 (base: addr, alignment_out: max_alignment); |
6162 | if (base) |
6163 | return base; |
6164 | |
6165 | if (TREE_CODE (addr) == ADDR_EXPR) |
6166 | addr = TREE_OPERAND (addr, 0); |
6167 | *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT; |
6168 | return addr; |
6169 | } |
6170 | |
6171 | /* Recursive helper function. */ |
6172 | |
6173 | static bool |
6174 | find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest) |
6175 | { |
6176 | /* Inner loops of the nest should not contain siblings. Example: |
6177 | when there are two consecutive loops, |
6178 | |
6179 | | loop_0 |
6180 | | loop_1 |
6181 | | A[{0, +, 1}_1] |
6182 | | endloop_1 |
6183 | | loop_2 |
6184 | | A[{0, +, 1}_2] |
6185 | | endloop_2 |
6186 | | endloop_0 |
6187 | |
6188 | the dependence relation cannot be captured by the distance |
6189 | abstraction. */ |
6190 | if (loop->next) |
6191 | return false; |
6192 | |
6193 | loop_nest->safe_push (obj: loop); |
6194 | if (loop->inner) |
6195 | return find_loop_nest_1 (loop: loop->inner, loop_nest); |
6196 | return true; |
6197 | } |
6198 | |
6199 | /* Return false when the LOOP is not well nested. Otherwise return |
6200 | true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will |
6201 | contain the loops from the outermost to the innermost, as they will |
6202 | appear in the classic distance vector. */ |
6203 | |
6204 | bool |
6205 | find_loop_nest (class loop *loop, vec<loop_p> *loop_nest) |
6206 | { |
6207 | loop_nest->safe_push (obj: loop); |
6208 | if (loop->inner) |
6209 | return find_loop_nest_1 (loop: loop->inner, loop_nest); |
6210 | return true; |
6211 | } |
6212 | |
6213 | /* Returns true when the data dependences have been computed, false otherwise. |
6214 | Given a loop nest LOOP, the following vectors are returned: |
6215 | DATAREFS is initialized to all the array elements contained in this loop, |
6216 | DEPENDENCE_RELATIONS contains the relations between the data references. |
6217 | Compute read-read and self relations if |
6218 | COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ |
6219 | |
6220 | bool |
6221 | compute_data_dependences_for_loop (class loop *loop, |
6222 | bool compute_self_and_read_read_dependences, |
6223 | vec<loop_p> *loop_nest, |
6224 | vec<data_reference_p> *datarefs, |
6225 | vec<ddr_p> *dependence_relations) |
6226 | { |
6227 | bool res = true; |
6228 | |
6229 | memset (s: &dependence_stats, c: 0, n: sizeof (dependence_stats)); |
6230 | |
6231 | /* If the loop nest is not well formed, or one of the data references |
6232 | is not computable, give up without spending time to compute other |
6233 | dependences. */ |
6234 | if (!loop |
6235 | || !find_loop_nest (loop, loop_nest) |
6236 | || find_data_references_in_loop (loop, datarefs) == chrec_dont_know |
6237 | || !compute_all_dependences (datarefs: *datarefs, dependence_relations, loop_nest: *loop_nest, |
6238 | compute_self_and_rr: compute_self_and_read_read_dependences)) |
6239 | res = false; |
6240 | |
6241 | if (dump_file && (dump_flags & TDF_STATS)) |
6242 | { |
6243 | fprintf (stream: dump_file, format: "Dependence tester statistics:\n" ); |
6244 | |
6245 | fprintf (stream: dump_file, format: "Number of dependence tests: %d\n" , |
6246 | dependence_stats.num_dependence_tests); |
6247 | fprintf (stream: dump_file, format: "Number of dependence tests classified dependent: %d\n" , |
6248 | dependence_stats.num_dependence_dependent); |
6249 | fprintf (stream: dump_file, format: "Number of dependence tests classified independent: %d\n" , |
6250 | dependence_stats.num_dependence_independent); |
6251 | fprintf (stream: dump_file, format: "Number of undetermined dependence tests: %d\n" , |
6252 | dependence_stats.num_dependence_undetermined); |
6253 | |
6254 | fprintf (stream: dump_file, format: "Number of subscript tests: %d\n" , |
6255 | dependence_stats.num_subscript_tests); |
6256 | fprintf (stream: dump_file, format: "Number of undetermined subscript tests: %d\n" , |
6257 | dependence_stats.num_subscript_undetermined); |
6258 | fprintf (stream: dump_file, format: "Number of same subscript function: %d\n" , |
6259 | dependence_stats.num_same_subscript_function); |
6260 | |
6261 | fprintf (stream: dump_file, format: "Number of ziv tests: %d\n" , |
6262 | dependence_stats.num_ziv); |
6263 | fprintf (stream: dump_file, format: "Number of ziv tests returning dependent: %d\n" , |
6264 | dependence_stats.num_ziv_dependent); |
6265 | fprintf (stream: dump_file, format: "Number of ziv tests returning independent: %d\n" , |
6266 | dependence_stats.num_ziv_independent); |
6267 | fprintf (stream: dump_file, format: "Number of ziv tests unimplemented: %d\n" , |
6268 | dependence_stats.num_ziv_unimplemented); |
6269 | |
6270 | fprintf (stream: dump_file, format: "Number of siv tests: %d\n" , |
6271 | dependence_stats.num_siv); |
6272 | fprintf (stream: dump_file, format: "Number of siv tests returning dependent: %d\n" , |
6273 | dependence_stats.num_siv_dependent); |
6274 | fprintf (stream: dump_file, format: "Number of siv tests returning independent: %d\n" , |
6275 | dependence_stats.num_siv_independent); |
6276 | fprintf (stream: dump_file, format: "Number of siv tests unimplemented: %d\n" , |
6277 | dependence_stats.num_siv_unimplemented); |
6278 | |
6279 | fprintf (stream: dump_file, format: "Number of miv tests: %d\n" , |
6280 | dependence_stats.num_miv); |
6281 | fprintf (stream: dump_file, format: "Number of miv tests returning dependent: %d\n" , |
6282 | dependence_stats.num_miv_dependent); |
6283 | fprintf (stream: dump_file, format: "Number of miv tests returning independent: %d\n" , |
6284 | dependence_stats.num_miv_independent); |
6285 | fprintf (stream: dump_file, format: "Number of miv tests unimplemented: %d\n" , |
6286 | dependence_stats.num_miv_unimplemented); |
6287 | } |
6288 | |
6289 | return res; |
6290 | } |
6291 | |
6292 | /* Free the memory used by a data dependence relation DDR. */ |
6293 | |
6294 | void |
6295 | free_dependence_relation (struct data_dependence_relation *ddr) |
6296 | { |
6297 | if (ddr == NULL) |
6298 | return; |
6299 | |
6300 | if (DDR_SUBSCRIPTS (ddr).exists ()) |
6301 | free_subscripts (DDR_SUBSCRIPTS (ddr)); |
6302 | DDR_DIST_VECTS (ddr).release (); |
6303 | DDR_DIR_VECTS (ddr).release (); |
6304 | |
6305 | free (ptr: ddr); |
6306 | } |
6307 | |
6308 | /* Free the memory used by the data dependence relations from |
6309 | DEPENDENCE_RELATIONS. */ |
6310 | |
6311 | void |
6312 | free_dependence_relations (vec<ddr_p>& dependence_relations) |
6313 | { |
6314 | for (data_dependence_relation *ddr : dependence_relations) |
6315 | if (ddr) |
6316 | free_dependence_relation (ddr); |
6317 | |
6318 | dependence_relations.release (); |
6319 | } |
6320 | |
6321 | /* Free the memory used by the data references from DATAREFS. */ |
6322 | |
6323 | void |
6324 | free_data_refs (vec<data_reference_p>& datarefs) |
6325 | { |
6326 | for (data_reference *dr : datarefs) |
6327 | free_data_ref (dr); |
6328 | datarefs.release (); |
6329 | } |
6330 | |
6331 | /* Common routine implementing both dr_direction_indicator and |
6332 | dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known |
6333 | to be >= USEFUL_MIN and -1 if the indicator is known to be negative. |
6334 | Return the step as the indicator otherwise. */ |
6335 | |
6336 | static tree |
6337 | dr_step_indicator (struct data_reference *dr, int useful_min) |
6338 | { |
6339 | tree step = DR_STEP (dr); |
6340 | if (!step) |
6341 | return NULL_TREE; |
6342 | STRIP_NOPS (step); |
6343 | /* Look for cases where the step is scaled by a positive constant |
6344 | integer, which will often be the access size. If the multiplication |
6345 | doesn't change the sign (due to overflow effects) then we can |
6346 | test the unscaled value instead. */ |
6347 | if (TREE_CODE (step) == MULT_EXPR |
6348 | && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST |
6349 | && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0) |
6350 | { |
6351 | tree factor = TREE_OPERAND (step, 1); |
6352 | step = TREE_OPERAND (step, 0); |
6353 | |
6354 | /* Strip widening and truncating conversions as well as nops. */ |
6355 | if (CONVERT_EXPR_P (step) |
6356 | && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0)))) |
6357 | step = TREE_OPERAND (step, 0); |
6358 | tree type = TREE_TYPE (step); |
6359 | |
6360 | /* Get the range of step values that would not cause overflow. */ |
6361 | widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype)) |
6362 | / wi::to_widest (t: factor)); |
6363 | widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype)) |
6364 | / wi::to_widest (t: factor)); |
6365 | |
6366 | /* Get the range of values that the unconverted step actually has. */ |
6367 | wide_int step_min, step_max; |
6368 | value_range vr; |
6369 | if (TREE_CODE (step) != SSA_NAME |
6370 | || !get_range_query (cfun)->range_of_expr (r&: vr, expr: step) |
6371 | || vr.undefined_p ()) |
6372 | { |
6373 | step_min = wi::to_wide (TYPE_MIN_VALUE (type)); |
6374 | step_max = wi::to_wide (TYPE_MAX_VALUE (type)); |
6375 | } |
6376 | else |
6377 | { |
6378 | step_min = vr.lower_bound (); |
6379 | step_max = vr.upper_bound (); |
6380 | } |
6381 | |
6382 | /* Check whether the unconverted step has an acceptable range. */ |
6383 | signop sgn = TYPE_SIGN (type); |
6384 | if (wi::les_p (x: minv, y: widest_int::from (x: step_min, sgn)) |
6385 | && wi::ges_p (x: maxv, y: widest_int::from (x: step_max, sgn))) |
6386 | { |
6387 | if (wi::ge_p (x: step_min, y: useful_min, sgn)) |
6388 | return ssize_int (useful_min); |
6389 | else if (wi::lt_p (x: step_max, y: 0, sgn)) |
6390 | return ssize_int (-1); |
6391 | else |
6392 | return fold_convert (ssizetype, step); |
6393 | } |
6394 | } |
6395 | return DR_STEP (dr); |
6396 | } |
6397 | |
6398 | /* Return a value that is negative iff DR has a negative step. */ |
6399 | |
6400 | tree |
6401 | dr_direction_indicator (struct data_reference *dr) |
6402 | { |
6403 | return dr_step_indicator (dr, useful_min: 0); |
6404 | } |
6405 | |
6406 | /* Return a value that is zero iff DR has a zero step. */ |
6407 | |
6408 | tree |
6409 | dr_zero_step_indicator (struct data_reference *dr) |
6410 | { |
6411 | return dr_step_indicator (dr, useful_min: 1); |
6412 | } |
6413 | |
6414 | /* Return true if DR is known to have a nonnegative (but possibly zero) |
6415 | step. */ |
6416 | |
6417 | bool |
6418 | dr_known_forward_stride_p (struct data_reference *dr) |
6419 | { |
6420 | tree indicator = dr_direction_indicator (dr); |
6421 | tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node, |
6422 | fold_convert (ssizetype, indicator), |
6423 | ssize_int (0)); |
6424 | return neg_step_val && integer_zerop (neg_step_val); |
6425 | } |
6426 | |