1 | /* If-conversion for vectorizer. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | Contributed by Devang Patel <dpatel@apple.com> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* This pass implements a tree level if-conversion of loops. Its |
22 | initial goal is to help the vectorizer to vectorize loops with |
23 | conditions. |
24 | |
25 | A short description of if-conversion: |
26 | |
27 | o Decide if a loop is if-convertible or not. |
28 | o Walk all loop basic blocks in breadth first order (BFS order). |
29 | o Remove conditional statements (at the end of basic block) |
30 | and propagate condition into destination basic blocks' |
31 | predicate list. |
32 | o Replace modify expression with conditional modify expression |
33 | using current basic block's condition. |
34 | o Merge all basic blocks |
35 | o Replace phi nodes with conditional modify expr |
36 | o Merge all basic blocks into header |
37 | |
38 | Sample transformation: |
39 | |
40 | INPUT |
41 | ----- |
42 | |
43 | # i_23 = PHI <0(0), i_18(10)>; |
44 | <L0>:; |
45 | j_15 = A[i_23]; |
46 | if (j_15 > 41) goto <L1>; else goto <L17>; |
47 | |
48 | <L17>:; |
49 | goto <bb 3> (<L3>); |
50 | |
51 | <L1>:; |
52 | |
53 | # iftmp.2_4 = PHI <0(8), 42(2)>; |
54 | <L3>:; |
55 | A[i_23] = iftmp.2_4; |
56 | i_18 = i_23 + 1; |
57 | if (i_18 <= 15) goto <L19>; else goto <L18>; |
58 | |
59 | <L19>:; |
60 | goto <bb 1> (<L0>); |
61 | |
62 | <L18>:; |
63 | |
64 | OUTPUT |
65 | ------ |
66 | |
67 | # i_23 = PHI <0(0), i_18(10)>; |
68 | <L0>:; |
69 | j_15 = A[i_23]; |
70 | |
71 | <L3>:; |
72 | iftmp.2_4 = j_15 > 41 ? 42 : 0; |
73 | A[i_23] = iftmp.2_4; |
74 | i_18 = i_23 + 1; |
75 | if (i_18 <= 15) goto <L19>; else goto <L18>; |
76 | |
77 | <L19>:; |
78 | goto <bb 1> (<L0>); |
79 | |
80 | <L18>:; |
81 | */ |
82 | |
83 | #include "config.h" |
84 | #include "system.h" |
85 | #include "coretypes.h" |
86 | #include "backend.h" |
87 | #include "rtl.h" |
88 | #include "tree.h" |
89 | #include "gimple.h" |
90 | #include "cfghooks.h" |
91 | #include "tree-pass.h" |
92 | #include "ssa.h" |
93 | #include "expmed.h" |
94 | #include "expr.h" |
95 | #include "optabs-tree.h" |
96 | #include "gimple-pretty-print.h" |
97 | #include "alias.h" |
98 | #include "fold-const.h" |
99 | #include "stor-layout.h" |
100 | #include "gimple-iterator.h" |
101 | #include "gimple-fold.h" |
102 | #include "gimplify.h" |
103 | #include "gimplify-me.h" |
104 | #include "tree-cfg.h" |
105 | #include "tree-into-ssa.h" |
106 | #include "tree-ssa.h" |
107 | #include "cfgloop.h" |
108 | #include "tree-data-ref.h" |
109 | #include "tree-scalar-evolution.h" |
110 | #include "tree-ssa-loop.h" |
111 | #include "tree-ssa-loop-niter.h" |
112 | #include "tree-ssa-loop-ivopts.h" |
113 | #include "tree-ssa-address.h" |
114 | #include "dbgcnt.h" |
115 | #include "tree-hash-traits.h" |
116 | #include "varasm.h" |
117 | #include "builtins.h" |
118 | #include "cfganal.h" |
119 | #include "internal-fn.h" |
120 | #include "fold-const.h" |
121 | #include "tree-ssa-sccvn.h" |
122 | #include "tree-cfgcleanup.h" |
123 | #include "tree-ssa-dse.h" |
124 | #include "tree-vectorizer.h" |
125 | #include "tree-eh.h" |
126 | #include "cgraph.h" |
127 | |
128 | /* For lang_hooks.types.type_for_mode. */ |
129 | #include "langhooks.h" |
130 | |
131 | /* Only handle PHIs with no more arguments unless we are asked to by |
132 | simd pragma. */ |
133 | #define MAX_PHI_ARG_NUM \ |
134 | ((unsigned) param_max_tree_if_conversion_phi_args) |
135 | |
136 | /* True if we've converted a statement that was only executed when some |
137 | condition C was true, and if for correctness we need to predicate the |
138 | statement to ensure that it is a no-op when C is false. See |
139 | predicate_statements for the kinds of predication we support. */ |
140 | static bool need_to_predicate; |
141 | |
142 | /* True if we have to rewrite stmts that may invoke undefined behavior |
143 | when a condition C was false so it doesn't if it is always executed. |
144 | See predicate_statements for the kinds of predication we support. */ |
145 | static bool need_to_rewrite_undefined; |
146 | |
147 | /* Indicate if there are any complicated PHIs that need to be handled in |
148 | if-conversion. Complicated PHI has more than two arguments and can't |
149 | be degenerated to two arguments PHI. See more information in comment |
150 | before phi_convertible_by_degenerating_args. */ |
151 | static bool any_complicated_phi; |
152 | |
153 | /* True if we have bitfield accesses we can lower. */ |
154 | static bool need_to_lower_bitfields; |
155 | |
156 | /* True if there is any ifcvting to be done. */ |
157 | static bool need_to_ifcvt; |
158 | |
159 | /* Hash for struct innermost_loop_behavior. It depends on the user to |
160 | free the memory. */ |
161 | |
162 | struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior> |
163 | { |
164 | static inline hashval_t hash (const value_type &); |
165 | static inline bool equal (const value_type &, |
166 | const compare_type &); |
167 | }; |
168 | |
169 | inline hashval_t |
170 | innermost_loop_behavior_hash::hash (const value_type &e) |
171 | { |
172 | hashval_t hash; |
173 | |
174 | hash = iterative_hash_expr (tree: e->base_address, seed: 0); |
175 | hash = iterative_hash_expr (tree: e->offset, seed: hash); |
176 | hash = iterative_hash_expr (tree: e->init, seed: hash); |
177 | return iterative_hash_expr (tree: e->step, seed: hash); |
178 | } |
179 | |
180 | inline bool |
181 | innermost_loop_behavior_hash::equal (const value_type &e1, |
182 | const compare_type &e2) |
183 | { |
184 | if ((e1->base_address && !e2->base_address) |
185 | || (!e1->base_address && e2->base_address) |
186 | || (!e1->offset && e2->offset) |
187 | || (e1->offset && !e2->offset) |
188 | || (!e1->init && e2->init) |
189 | || (e1->init && !e2->init) |
190 | || (!e1->step && e2->step) |
191 | || (e1->step && !e2->step)) |
192 | return false; |
193 | |
194 | if (e1->base_address && e2->base_address |
195 | && !operand_equal_p (e1->base_address, e2->base_address, flags: 0)) |
196 | return false; |
197 | if (e1->offset && e2->offset |
198 | && !operand_equal_p (e1->offset, e2->offset, flags: 0)) |
199 | return false; |
200 | if (e1->init && e2->init |
201 | && !operand_equal_p (e1->init, e2->init, flags: 0)) |
202 | return false; |
203 | if (e1->step && e2->step |
204 | && !operand_equal_p (e1->step, e2->step, flags: 0)) |
205 | return false; |
206 | |
207 | return true; |
208 | } |
209 | |
210 | /* List of basic blocks in if-conversion-suitable order. */ |
211 | static basic_block *ifc_bbs; |
212 | |
213 | /* Hash table to store <DR's innermost loop behavior, DR> pairs. */ |
214 | static hash_map<innermost_loop_behavior_hash, |
215 | data_reference_p> *innermost_DR_map; |
216 | |
217 | /* Hash table to store <base reference, DR> pairs. */ |
218 | static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map; |
219 | |
220 | /* List of redundant SSA names: the first should be replaced by the second. */ |
221 | static vec< std::pair<tree, tree> > redundant_ssa_names; |
222 | |
223 | /* Structure used to predicate basic blocks. This is attached to the |
224 | ->aux field of the BBs in the loop to be if-converted. */ |
225 | struct bb_predicate { |
226 | |
227 | /* The condition under which this basic block is executed. */ |
228 | tree predicate; |
229 | |
230 | /* PREDICATE is gimplified, and the sequence of statements is |
231 | recorded here, in order to avoid the duplication of computations |
232 | that occur in previous conditions. See PR44483. */ |
233 | gimple_seq predicate_gimplified_stmts; |
234 | |
235 | /* Records the number of statements recorded into |
236 | PREDICATE_GIMPLIFIED_STMTS. */ |
237 | unsigned no_predicate_stmts; |
238 | }; |
239 | |
240 | /* Returns true when the basic block BB has a predicate. */ |
241 | |
242 | static inline bool |
243 | bb_has_predicate (basic_block bb) |
244 | { |
245 | return bb->aux != NULL; |
246 | } |
247 | |
248 | /* Returns the gimplified predicate for basic block BB. */ |
249 | |
250 | static inline tree |
251 | bb_predicate (basic_block bb) |
252 | { |
253 | return ((struct bb_predicate *) bb->aux)->predicate; |
254 | } |
255 | |
256 | /* Sets the gimplified predicate COND for basic block BB. */ |
257 | |
258 | static inline void |
259 | set_bb_predicate (basic_block bb, tree cond) |
260 | { |
261 | auto aux = (struct bb_predicate *) bb->aux; |
262 | gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR |
263 | && is_gimple_val (TREE_OPERAND (cond, 0))) |
264 | || is_gimple_val (cond)); |
265 | aux->predicate = cond; |
266 | aux->no_predicate_stmts++; |
267 | |
268 | if (dump_file && (dump_flags & TDF_DETAILS)) |
269 | fprintf (stream: dump_file, format: "Recording block %d value %d\n" , bb->index, |
270 | aux->no_predicate_stmts); |
271 | } |
272 | |
273 | /* Returns the sequence of statements of the gimplification of the |
274 | predicate for basic block BB. */ |
275 | |
276 | static inline gimple_seq |
277 | bb_predicate_gimplified_stmts (basic_block bb) |
278 | { |
279 | return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts; |
280 | } |
281 | |
282 | /* Sets the sequence of statements STMTS of the gimplification of the |
283 | predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate |
284 | counts. */ |
285 | |
286 | static inline void |
287 | set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts, |
288 | bool preserve_counts) |
289 | { |
290 | ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts; |
291 | if (stmts == NULL && !preserve_counts) |
292 | ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0; |
293 | } |
294 | |
295 | /* Adds the sequence of statements STMTS to the sequence of statements |
296 | of the predicate for basic block BB. */ |
297 | |
298 | static inline void |
299 | add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts) |
300 | { |
301 | /* We might have updated some stmts in STMTS via force_gimple_operand |
302 | calling fold_stmt and that producing multiple stmts. Delink immediate |
303 | uses so update_ssa after loop versioning doesn't get confused for |
304 | the not yet inserted predicates. |
305 | ??? This should go away once we reliably avoid updating stmts |
306 | not in any BB. */ |
307 | for (gimple_stmt_iterator gsi = gsi_start (seq&: stmts); |
308 | !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
309 | { |
310 | gimple *stmt = gsi_stmt (i: gsi); |
311 | delink_stmt_imm_use (stmt); |
312 | gimple_set_modified (s: stmt, modifiedp: true); |
313 | ((struct bb_predicate *) bb->aux)->no_predicate_stmts++; |
314 | } |
315 | gimple_seq_add_seq_without_update |
316 | (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts); |
317 | } |
318 | |
319 | /* Return the number of statements the predicate of the basic block consists |
320 | of. */ |
321 | |
322 | static inline unsigned |
323 | get_bb_num_predicate_stmts (basic_block bb) |
324 | { |
325 | return ((struct bb_predicate *) bb->aux)->no_predicate_stmts; |
326 | } |
327 | |
328 | /* Initializes to TRUE the predicate of basic block BB. */ |
329 | |
330 | static inline void |
331 | init_bb_predicate (basic_block bb) |
332 | { |
333 | bb->aux = XNEW (struct bb_predicate); |
334 | set_bb_predicate_gimplified_stmts (bb, NULL, preserve_counts: false); |
335 | set_bb_predicate (bb, boolean_true_node); |
336 | } |
337 | |
338 | /* Release the SSA_NAMEs associated with the predicate of basic block BB. */ |
339 | |
340 | static inline void |
341 | release_bb_predicate (basic_block bb) |
342 | { |
343 | gimple_seq stmts = bb_predicate_gimplified_stmts (bb); |
344 | if (stmts) |
345 | { |
346 | /* Ensure that these stmts haven't yet been added to a bb. */ |
347 | if (flag_checking) |
348 | for (gimple_stmt_iterator i = gsi_start (seq&: stmts); |
349 | !gsi_end_p (i); gsi_next (i: &i)) |
350 | gcc_assert (! gimple_bb (gsi_stmt (i))); |
351 | |
352 | /* Discard them. */ |
353 | gimple_seq_discard (stmts); |
354 | set_bb_predicate_gimplified_stmts (bb, NULL, preserve_counts: false); |
355 | } |
356 | } |
357 | |
358 | /* Free the predicate of basic block BB. */ |
359 | |
360 | static inline void |
361 | free_bb_predicate (basic_block bb) |
362 | { |
363 | if (!bb_has_predicate (bb)) |
364 | return; |
365 | |
366 | release_bb_predicate (bb); |
367 | free (ptr: bb->aux); |
368 | bb->aux = NULL; |
369 | } |
370 | |
371 | /* Reinitialize predicate of BB with the true predicate. */ |
372 | |
373 | static inline void |
374 | reset_bb_predicate (basic_block bb) |
375 | { |
376 | if (!bb_has_predicate (bb)) |
377 | init_bb_predicate (bb); |
378 | else |
379 | { |
380 | release_bb_predicate (bb); |
381 | set_bb_predicate (bb, boolean_true_node); |
382 | } |
383 | } |
384 | |
385 | /* Returns a new SSA_NAME of type TYPE that is assigned the value of |
386 | the expression EXPR. Inserts the statement created for this |
387 | computation before GSI and leaves the iterator GSI at the same |
388 | statement. */ |
389 | |
390 | static tree |
391 | ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi) |
392 | { |
393 | tree new_name = make_temp_ssa_name (type, NULL, name: "_ifc_" ); |
394 | gimple *stmt = gimple_build_assign (new_name, expr); |
395 | gimple_set_vuse (g: stmt, vuse: gimple_vuse (g: gsi_stmt (i: *gsi))); |
396 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
397 | return new_name; |
398 | } |
399 | |
400 | /* Return true when COND is a false predicate. */ |
401 | |
402 | static inline bool |
403 | is_false_predicate (tree cond) |
404 | { |
405 | return (cond != NULL_TREE |
406 | && (cond == boolean_false_node |
407 | || integer_zerop (cond))); |
408 | } |
409 | |
410 | /* Return true when COND is a true predicate. */ |
411 | |
412 | static inline bool |
413 | is_true_predicate (tree cond) |
414 | { |
415 | return (cond == NULL_TREE |
416 | || cond == boolean_true_node |
417 | || integer_onep (cond)); |
418 | } |
419 | |
420 | /* Returns true when BB has a predicate that is not trivial: true or |
421 | NULL_TREE. */ |
422 | |
423 | static inline bool |
424 | is_predicated (basic_block bb) |
425 | { |
426 | return !is_true_predicate (cond: bb_predicate (bb)); |
427 | } |
428 | |
429 | /* Parses the predicate COND and returns its comparison code and |
430 | operands OP0 and OP1. */ |
431 | |
432 | static enum tree_code |
433 | parse_predicate (tree cond, tree *op0, tree *op1) |
434 | { |
435 | gimple *s; |
436 | |
437 | if (TREE_CODE (cond) == SSA_NAME |
438 | && is_gimple_assign (gs: s = SSA_NAME_DEF_STMT (cond))) |
439 | { |
440 | if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison) |
441 | { |
442 | *op0 = gimple_assign_rhs1 (gs: s); |
443 | *op1 = gimple_assign_rhs2 (gs: s); |
444 | return gimple_assign_rhs_code (gs: s); |
445 | } |
446 | |
447 | else if (gimple_assign_rhs_code (gs: s) == TRUTH_NOT_EXPR) |
448 | { |
449 | tree op = gimple_assign_rhs1 (gs: s); |
450 | tree type = TREE_TYPE (op); |
451 | enum tree_code code = parse_predicate (cond: op, op0, op1); |
452 | |
453 | return code == ERROR_MARK ? ERROR_MARK |
454 | : invert_tree_comparison (code, HONOR_NANS (type)); |
455 | } |
456 | |
457 | return ERROR_MARK; |
458 | } |
459 | |
460 | if (COMPARISON_CLASS_P (cond)) |
461 | { |
462 | *op0 = TREE_OPERAND (cond, 0); |
463 | *op1 = TREE_OPERAND (cond, 1); |
464 | return TREE_CODE (cond); |
465 | } |
466 | |
467 | return ERROR_MARK; |
468 | } |
469 | |
470 | /* Returns the fold of predicate C1 OR C2 at location LOC. */ |
471 | |
472 | static tree |
473 | fold_or_predicates (location_t loc, tree c1, tree c2) |
474 | { |
475 | tree op1a, op1b, op2a, op2b; |
476 | enum tree_code code1 = parse_predicate (cond: c1, op0: &op1a, op1: &op1b); |
477 | enum tree_code code2 = parse_predicate (cond: c2, op0: &op2a, op1: &op2b); |
478 | |
479 | if (code1 != ERROR_MARK && code2 != ERROR_MARK) |
480 | { |
481 | tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b, |
482 | code2, op2a, op2b); |
483 | if (t) |
484 | return t; |
485 | } |
486 | |
487 | return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2); |
488 | } |
489 | |
490 | /* Returns either a COND_EXPR or the folded expression if the folded |
491 | expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR, |
492 | a constant or a SSA_NAME. */ |
493 | |
494 | static tree |
495 | fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs) |
496 | { |
497 | /* If COND is comparison r != 0 and r has boolean type, convert COND |
498 | to SSA_NAME to accept by vect bool pattern. */ |
499 | if (TREE_CODE (cond) == NE_EXPR) |
500 | { |
501 | tree op0 = TREE_OPERAND (cond, 0); |
502 | tree op1 = TREE_OPERAND (cond, 1); |
503 | if (TREE_CODE (op0) == SSA_NAME |
504 | && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE |
505 | && (integer_zerop (op1))) |
506 | cond = op0; |
507 | } |
508 | |
509 | gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR, |
510 | type, cond, rhs, lhs); |
511 | if (cexpr.resimplify (NULL, follow_all_ssa_edges)) |
512 | { |
513 | if (gimple_simplified_result_is_gimple_val (op: &cexpr)) |
514 | return cexpr.ops[0]; |
515 | else if (cexpr.code == ABS_EXPR) |
516 | return build1 (ABS_EXPR, type, cexpr.ops[0]); |
517 | else if (cexpr.code == MIN_EXPR |
518 | || cexpr.code == MAX_EXPR) |
519 | return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]); |
520 | } |
521 | |
522 | return build3 (COND_EXPR, type, cond, rhs, lhs); |
523 | } |
524 | |
525 | /* Add condition NC to the predicate list of basic block BB. LOOP is |
526 | the loop to be if-converted. Use predicate of cd-equivalent block |
527 | for join bb if it exists: we call basic blocks bb1 and bb2 |
528 | cd-equivalent if they are executed under the same condition. */ |
529 | |
530 | static inline void |
531 | add_to_predicate_list (class loop *loop, basic_block bb, tree nc) |
532 | { |
533 | tree bc, *tp; |
534 | basic_block dom_bb; |
535 | |
536 | if (is_true_predicate (cond: nc)) |
537 | return; |
538 | |
539 | /* If dominance tells us this basic block is always executed, |
540 | don't record any predicates for it. */ |
541 | if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
542 | return; |
543 | |
544 | dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); |
545 | /* We use notion of cd equivalence to get simpler predicate for |
546 | join block, e.g. if join block has 2 predecessors with predicates |
547 | p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of |
548 | p1 & p2 | p1 & !p2. */ |
549 | if (dom_bb != loop->header |
550 | && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb) |
551 | { |
552 | gcc_assert (flow_bb_inside_loop_p (loop, dom_bb)); |
553 | bc = bb_predicate (bb: dom_bb); |
554 | if (!is_true_predicate (cond: bc)) |
555 | set_bb_predicate (bb, cond: bc); |
556 | else |
557 | gcc_assert (is_true_predicate (bb_predicate (bb))); |
558 | if (dump_file && (dump_flags & TDF_DETAILS)) |
559 | fprintf (stream: dump_file, format: "Use predicate of bb#%d for bb#%d\n" , |
560 | dom_bb->index, bb->index); |
561 | return; |
562 | } |
563 | |
564 | if (!is_predicated (bb)) |
565 | bc = nc; |
566 | else |
567 | { |
568 | bc = bb_predicate (bb); |
569 | bc = fold_or_predicates (EXPR_LOCATION (bc), c1: nc, c2: bc); |
570 | if (is_true_predicate (cond: bc)) |
571 | { |
572 | reset_bb_predicate (bb); |
573 | return; |
574 | } |
575 | } |
576 | |
577 | /* Allow a TRUTH_NOT_EXPR around the main predicate. */ |
578 | if (TREE_CODE (bc) == TRUTH_NOT_EXPR) |
579 | tp = &TREE_OPERAND (bc, 0); |
580 | else |
581 | tp = &bc; |
582 | if (!is_gimple_val (*tp)) |
583 | { |
584 | gimple_seq stmts; |
585 | *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE); |
586 | add_bb_predicate_gimplified_stmts (bb, stmts); |
587 | } |
588 | set_bb_predicate (bb, cond: bc); |
589 | } |
590 | |
591 | /* Add the condition COND to the previous condition PREV_COND, and add |
592 | this to the predicate list of the destination of edge E. LOOP is |
593 | the loop to be if-converted. */ |
594 | |
595 | static void |
596 | add_to_dst_predicate_list (class loop *loop, edge e, |
597 | tree prev_cond, tree cond) |
598 | { |
599 | if (!flow_bb_inside_loop_p (loop, e->dest)) |
600 | return; |
601 | |
602 | if (!is_true_predicate (cond: prev_cond)) |
603 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
604 | prev_cond, cond); |
605 | |
606 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest)) |
607 | add_to_predicate_list (loop, bb: e->dest, nc: cond); |
608 | } |
609 | |
610 | /* Return true if one of the successor edges of BB exits LOOP. */ |
611 | |
612 | static bool |
613 | bb_with_exit_edge_p (const class loop *loop, basic_block bb) |
614 | { |
615 | edge e; |
616 | edge_iterator ei; |
617 | |
618 | FOR_EACH_EDGE (e, ei, bb->succs) |
619 | if (loop_exit_edge_p (loop, e)) |
620 | return true; |
621 | |
622 | return false; |
623 | } |
624 | |
625 | /* Given PHI which has more than two arguments, this function checks if |
626 | it's if-convertible by degenerating its arguments. Specifically, if |
627 | below two conditions are satisfied: |
628 | |
629 | 1) Number of PHI arguments with different values equals to 2 and one |
630 | argument has the only occurrence. |
631 | 2) The edge corresponding to the unique argument isn't critical edge. |
632 | |
633 | Such PHI can be handled as PHIs have only two arguments. For example, |
634 | below PHI: |
635 | |
636 | res = PHI <A_1(e1), A_1(e2), A_2(e3)>; |
637 | |
638 | can be transformed into: |
639 | |
640 | res = (predicate of e3) ? A_2 : A_1; |
641 | |
642 | Return TRUE if it is the case, FALSE otherwise. */ |
643 | |
644 | static bool |
645 | phi_convertible_by_degenerating_args (gphi *phi) |
646 | { |
647 | edge e; |
648 | tree arg, t1 = NULL, t2 = NULL; |
649 | unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0; |
650 | unsigned int num_args = gimple_phi_num_args (gs: phi); |
651 | |
652 | gcc_assert (num_args > 2); |
653 | |
654 | for (i = 0; i < num_args; i++) |
655 | { |
656 | arg = gimple_phi_arg_def (gs: phi, index: i); |
657 | if (t1 == NULL || operand_equal_p (t1, arg, flags: 0)) |
658 | { |
659 | n1++; |
660 | i1 = i; |
661 | t1 = arg; |
662 | } |
663 | else if (t2 == NULL || operand_equal_p (t2, arg, flags: 0)) |
664 | { |
665 | n2++; |
666 | i2 = i; |
667 | t2 = arg; |
668 | } |
669 | else |
670 | return false; |
671 | } |
672 | |
673 | if (n1 != 1 && n2 != 1) |
674 | return false; |
675 | |
676 | /* Check if the edge corresponding to the unique arg is critical. */ |
677 | e = gimple_phi_arg_edge (phi, i: (n1 == 1) ? i1 : i2); |
678 | if (EDGE_COUNT (e->src->succs) > 1) |
679 | return false; |
680 | |
681 | return true; |
682 | } |
683 | |
684 | /* Return true when PHI is if-convertible. PHI is part of loop LOOP |
685 | and it belongs to basic block BB. Note at this point, it is sure |
686 | that PHI is if-convertible. This function updates global variable |
687 | ANY_COMPLICATED_PHI if PHI is complicated. */ |
688 | |
689 | static bool |
690 | if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi) |
691 | { |
692 | if (dump_file && (dump_flags & TDF_DETAILS)) |
693 | { |
694 | fprintf (stream: dump_file, format: "-------------------------\n" ); |
695 | print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); |
696 | } |
697 | |
698 | if (bb != loop->header |
699 | && gimple_phi_num_args (gs: phi) > 2 |
700 | && !phi_convertible_by_degenerating_args (phi)) |
701 | any_complicated_phi = true; |
702 | |
703 | return true; |
704 | } |
705 | |
706 | /* Records the status of a data reference. This struct is attached to |
707 | each DR->aux field. */ |
708 | |
709 | struct ifc_dr { |
710 | bool rw_unconditionally; |
711 | bool w_unconditionally; |
712 | bool written_at_least_once; |
713 | |
714 | tree rw_predicate; |
715 | tree w_predicate; |
716 | tree base_w_predicate; |
717 | }; |
718 | |
719 | #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux) |
720 | #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once) |
721 | #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally) |
722 | #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally) |
723 | |
724 | /* Iterates over DR's and stores refs, DR and base refs, DR pairs in |
725 | HASH tables. While storing them in HASH table, it checks if the |
726 | reference is unconditionally read or written and stores that as a flag |
727 | information. For base reference it checks if it is written atlest once |
728 | unconditionally and stores it as flag information along with DR. |
729 | In other words for every data reference A in STMT there exist other |
730 | accesses to a data reference with the same base with predicates that |
731 | add up (OR-up) to the true predicate: this ensures that the data |
732 | reference A is touched (read or written) on every iteration of the |
733 | if-converted loop. */ |
734 | static void |
735 | hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a) |
736 | { |
737 | |
738 | data_reference_p *master_dr, *base_master_dr; |
739 | tree base_ref = DR_BASE_OBJECT (a); |
740 | innermost_loop_behavior *innermost = &DR_INNERMOST (a); |
741 | tree ca = bb_predicate (bb: gimple_bb (DR_STMT (a))); |
742 | bool exist1, exist2; |
743 | |
744 | master_dr = &innermost_DR_map->get_or_insert (k: innermost, existed: &exist1); |
745 | if (!exist1) |
746 | *master_dr = a; |
747 | |
748 | if (DR_IS_WRITE (a)) |
749 | { |
750 | IFC_DR (*master_dr)->w_predicate |
751 | = fold_or_predicates (UNKNOWN_LOCATION, c1: ca, |
752 | IFC_DR (*master_dr)->w_predicate); |
753 | if (is_true_predicate (IFC_DR (*master_dr)->w_predicate)) |
754 | DR_W_UNCONDITIONALLY (*master_dr) = true; |
755 | } |
756 | IFC_DR (*master_dr)->rw_predicate |
757 | = fold_or_predicates (UNKNOWN_LOCATION, c1: ca, |
758 | IFC_DR (*master_dr)->rw_predicate); |
759 | if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate)) |
760 | DR_RW_UNCONDITIONALLY (*master_dr) = true; |
761 | |
762 | if (DR_IS_WRITE (a)) |
763 | { |
764 | base_master_dr = &baseref_DR_map->get_or_insert (k: base_ref, existed: &exist2); |
765 | if (!exist2) |
766 | *base_master_dr = a; |
767 | IFC_DR (*base_master_dr)->base_w_predicate |
768 | = fold_or_predicates (UNKNOWN_LOCATION, c1: ca, |
769 | IFC_DR (*base_master_dr)->base_w_predicate); |
770 | if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate)) |
771 | DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true; |
772 | } |
773 | } |
774 | |
775 | /* Return TRUE if can prove the index IDX of an array reference REF is |
776 | within array bound. Return false otherwise. */ |
777 | |
778 | static bool |
779 | idx_within_array_bound (tree ref, tree *idx, void *dta) |
780 | { |
781 | wi::overflow_type overflow; |
782 | widest_int niter, valid_niter, delta, wi_step; |
783 | tree ev, init, step; |
784 | tree low, high; |
785 | class loop *loop = (class loop*) dta; |
786 | |
787 | /* Only support within-bound access for array references. */ |
788 | if (TREE_CODE (ref) != ARRAY_REF) |
789 | return false; |
790 | |
791 | /* For arrays that might have flexible sizes, it is not guaranteed that they |
792 | do not extend over their declared size. */ |
793 | if (array_ref_flexible_size_p (ref)) |
794 | return false; |
795 | |
796 | ev = analyze_scalar_evolution (loop, *idx); |
797 | ev = instantiate_parameters (loop, chrec: ev); |
798 | init = initial_condition (ev); |
799 | step = evolution_part_in_loop_num (ev, loop->num); |
800 | |
801 | if (!init || TREE_CODE (init) != INTEGER_CST |
802 | || (step && TREE_CODE (step) != INTEGER_CST)) |
803 | return false; |
804 | |
805 | low = array_ref_low_bound (ref); |
806 | high = array_ref_up_bound (ref); |
807 | |
808 | /* The case of nonconstant bounds could be handled, but it would be |
809 | complicated. */ |
810 | if (TREE_CODE (low) != INTEGER_CST |
811 | || !high || TREE_CODE (high) != INTEGER_CST) |
812 | return false; |
813 | |
814 | /* Check if the intial idx is within bound. */ |
815 | if (wi::to_widest (t: init) < wi::to_widest (t: low) |
816 | || wi::to_widest (t: init) > wi::to_widest (t: high)) |
817 | return false; |
818 | |
819 | /* The idx is always within bound. */ |
820 | if (!step || integer_zerop (step)) |
821 | return true; |
822 | |
823 | if (!max_loop_iterations (loop, &niter)) |
824 | return false; |
825 | |
826 | if (wi::to_widest (t: step) < 0) |
827 | { |
828 | delta = wi::to_widest (t: init) - wi::to_widest (t: low); |
829 | wi_step = -wi::to_widest (t: step); |
830 | } |
831 | else |
832 | { |
833 | delta = wi::to_widest (t: high) - wi::to_widest (t: init); |
834 | wi_step = wi::to_widest (t: step); |
835 | } |
836 | |
837 | valid_niter = wi::div_floor (x: delta, y: wi_step, sgn: SIGNED, overflow: &overflow); |
838 | /* The iteration space of idx is within array bound. */ |
839 | if (!overflow && niter <= valid_niter) |
840 | return true; |
841 | |
842 | return false; |
843 | } |
844 | |
845 | /* Return TRUE if ref is a within bound array reference. */ |
846 | |
847 | static bool |
848 | ref_within_array_bound (gimple *stmt, tree ref) |
849 | { |
850 | class loop *loop = loop_containing_stmt (stmt); |
851 | |
852 | gcc_assert (loop != NULL); |
853 | return for_each_index (&ref, idx_within_array_bound, loop); |
854 | } |
855 | |
856 | |
857 | /* Given a memory reference expression T, return TRUE if base object |
858 | it refers to is writable. The base object of a memory reference |
859 | is the main object being referenced, which is returned by function |
860 | get_base_address. */ |
861 | |
862 | static bool |
863 | base_object_writable (tree ref) |
864 | { |
865 | tree base_tree = get_base_address (t: ref); |
866 | |
867 | return (base_tree |
868 | && DECL_P (base_tree) |
869 | && decl_binds_to_current_def_p (base_tree) |
870 | && !TREE_READONLY (base_tree)); |
871 | } |
872 | |
873 | /* Return true when the memory references of STMT won't trap in the |
874 | if-converted code. There are two things that we have to check for: |
875 | |
876 | - writes to memory occur to writable memory: if-conversion of |
877 | memory writes transforms the conditional memory writes into |
878 | unconditional writes, i.e. "if (cond) A[i] = foo" is transformed |
879 | into "A[i] = cond ? foo : A[i]", and as the write to memory may not |
880 | be executed at all in the original code, it may be a readonly |
881 | memory. To check that A is not const-qualified, we check that |
882 | there exists at least an unconditional write to A in the current |
883 | function. |
884 | |
885 | - reads or writes to memory are valid memory accesses for every |
886 | iteration. To check that the memory accesses are correctly formed |
887 | and that we are allowed to read and write in these locations, we |
888 | check that the memory accesses to be if-converted occur at every |
889 | iteration unconditionally. |
890 | |
891 | Returns true for the memory reference in STMT, same memory reference |
892 | is read or written unconditionally atleast once and the base memory |
893 | reference is written unconditionally once. This is to check reference |
894 | will not write fault. Also retuns true if the memory reference is |
895 | unconditionally read once then we are conditionally writing to memory |
896 | which is defined as read and write and is bound to the definition |
897 | we are seeing. */ |
898 | static bool |
899 | ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs) |
900 | { |
901 | /* If DR didn't see a reference here we can't use it to tell |
902 | whether the ref traps or not. */ |
903 | if (gimple_uid (g: stmt) == 0) |
904 | return false; |
905 | |
906 | data_reference_p *master_dr, *base_master_dr; |
907 | data_reference_p a = drs[gimple_uid (g: stmt) - 1]; |
908 | |
909 | tree base = DR_BASE_OBJECT (a); |
910 | innermost_loop_behavior *innermost = &DR_INNERMOST (a); |
911 | |
912 | gcc_assert (DR_STMT (a) == stmt); |
913 | gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a) |
914 | || DR_INIT (a) || DR_STEP (a)); |
915 | |
916 | master_dr = innermost_DR_map->get (k: innermost); |
917 | gcc_assert (master_dr != NULL); |
918 | |
919 | base_master_dr = baseref_DR_map->get (k: base); |
920 | |
921 | /* If a is unconditionally written to it doesn't trap. */ |
922 | if (DR_W_UNCONDITIONALLY (*master_dr)) |
923 | return true; |
924 | |
925 | /* If a is unconditionally accessed then ... |
926 | |
927 | Even a is conditional access, we can treat it as an unconditional |
928 | one if it's an array reference and all its index are within array |
929 | bound. */ |
930 | if (DR_RW_UNCONDITIONALLY (*master_dr) |
931 | || ref_within_array_bound (stmt, DR_REF (a))) |
932 | { |
933 | /* an unconditional read won't trap. */ |
934 | if (DR_IS_READ (a)) |
935 | return true; |
936 | |
937 | /* an unconditionaly write won't trap if the base is written |
938 | to unconditionally. */ |
939 | if (base_master_dr |
940 | && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) |
941 | return flag_store_data_races; |
942 | /* or the base is known to be not readonly. */ |
943 | else if (base_object_writable (DR_REF (a))) |
944 | return flag_store_data_races; |
945 | } |
946 | |
947 | return false; |
948 | } |
949 | |
950 | /* Return true if STMT could be converted into a masked load or store |
951 | (conditional load or store based on a mask computed from bb predicate). */ |
952 | |
953 | static bool |
954 | ifcvt_can_use_mask_load_store (gimple *stmt) |
955 | { |
956 | /* Check whether this is a load or store. */ |
957 | tree lhs = gimple_assign_lhs (gs: stmt); |
958 | bool is_load; |
959 | tree ref; |
960 | if (gimple_store_p (gs: stmt)) |
961 | { |
962 | if (!is_gimple_val (gimple_assign_rhs1 (gs: stmt))) |
963 | return false; |
964 | is_load = false; |
965 | ref = lhs; |
966 | } |
967 | else if (gimple_assign_load_p (stmt)) |
968 | { |
969 | is_load = true; |
970 | ref = gimple_assign_rhs1 (gs: stmt); |
971 | } |
972 | else |
973 | return false; |
974 | |
975 | if (may_be_nonaddressable_p (expr: ref)) |
976 | return false; |
977 | |
978 | /* Mask should be integer mode of the same size as the load/store |
979 | mode. */ |
980 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
981 | if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode)) |
982 | return false; |
983 | |
984 | if (can_vec_mask_load_store_p (mode, VOIDmode, is_load)) |
985 | return true; |
986 | |
987 | return false; |
988 | } |
989 | |
990 | /* Return true if STMT could be converted from an operation that is |
991 | unconditional to one that is conditional on a bb predicate mask. */ |
992 | |
993 | static bool |
994 | ifcvt_can_predicate (gimple *stmt) |
995 | { |
996 | basic_block bb = gimple_bb (g: stmt); |
997 | |
998 | if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize) |
999 | || bb->loop_father->dont_vectorize |
1000 | || gimple_has_volatile_ops (stmt)) |
1001 | return false; |
1002 | |
1003 | if (gimple_assign_single_p (gs: stmt)) |
1004 | return ifcvt_can_use_mask_load_store (stmt); |
1005 | |
1006 | tree_code code = gimple_assign_rhs_code (gs: stmt); |
1007 | tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt)); |
1008 | tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); |
1009 | if (!types_compatible_p (type1: lhs_type, type2: rhs_type)) |
1010 | return false; |
1011 | internal_fn cond_fn = get_conditional_internal_fn (code); |
1012 | return (cond_fn != IFN_LAST |
1013 | && vectorized_internal_fn_supported_p (cond_fn, lhs_type)); |
1014 | } |
1015 | |
1016 | /* Return true when STMT is if-convertible. |
1017 | |
1018 | GIMPLE_ASSIGN statement is not if-convertible if, |
1019 | - it is not movable, |
1020 | - it could trap, |
1021 | - LHS is not var decl. */ |
1022 | |
1023 | static bool |
1024 | if_convertible_gimple_assign_stmt_p (gimple *stmt, |
1025 | vec<data_reference_p> refs) |
1026 | { |
1027 | tree lhs = gimple_assign_lhs (gs: stmt); |
1028 | |
1029 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1030 | { |
1031 | fprintf (stream: dump_file, format: "-------------------------\n" ); |
1032 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
1033 | } |
1034 | |
1035 | if (!is_gimple_reg_type (TREE_TYPE (lhs))) |
1036 | return false; |
1037 | |
1038 | /* Some of these constrains might be too conservative. */ |
1039 | if (stmt_ends_bb_p (stmt) |
1040 | || gimple_has_volatile_ops (stmt) |
1041 | || (TREE_CODE (lhs) == SSA_NAME |
1042 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) |
1043 | || gimple_has_side_effects (stmt)) |
1044 | { |
1045 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1046 | fprintf (stream: dump_file, format: "stmt not suitable for ifcvt\n" ); |
1047 | return false; |
1048 | } |
1049 | |
1050 | /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because |
1051 | in between if_convertible_loop_p and combine_blocks |
1052 | we can perform loop versioning. */ |
1053 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: false); |
1054 | |
1055 | if ((! gimple_vuse (g: stmt) |
1056 | || gimple_could_trap_p_1 (stmt, false, false) |
1057 | || ! ifcvt_memrefs_wont_trap (stmt, drs: refs)) |
1058 | && gimple_could_trap_p (stmt)) |
1059 | { |
1060 | if (ifcvt_can_predicate (stmt)) |
1061 | { |
1062 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true); |
1063 | need_to_predicate = true; |
1064 | return true; |
1065 | } |
1066 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1067 | fprintf (stream: dump_file, format: "tree could trap...\n" ); |
1068 | return false; |
1069 | } |
1070 | else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
1071 | || POINTER_TYPE_P (TREE_TYPE (lhs))) |
1072 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)) |
1073 | && arith_code_with_undefined_signed_overflow |
1074 | (gimple_assign_rhs_code (gs: stmt))) |
1075 | /* We have to rewrite stmts with undefined overflow. */ |
1076 | need_to_rewrite_undefined = true; |
1077 | |
1078 | /* When if-converting stores force versioning, likewise if we |
1079 | ended up generating store data races. */ |
1080 | if (gimple_vdef (g: stmt)) |
1081 | need_to_predicate = true; |
1082 | |
1083 | return true; |
1084 | } |
1085 | |
1086 | /* Return true when STMT is if-convertible. |
1087 | |
1088 | A statement is if-convertible if: |
1089 | - it is an if-convertible GIMPLE_ASSIGN, |
1090 | - it is a GIMPLE_LABEL or a GIMPLE_COND, |
1091 | - it is builtins call, |
1092 | - it is a call to a function with a SIMD clone. */ |
1093 | |
1094 | static bool |
1095 | if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs) |
1096 | { |
1097 | switch (gimple_code (g: stmt)) |
1098 | { |
1099 | case GIMPLE_LABEL: |
1100 | case GIMPLE_DEBUG: |
1101 | case GIMPLE_COND: |
1102 | return true; |
1103 | |
1104 | case GIMPLE_ASSIGN: |
1105 | return if_convertible_gimple_assign_stmt_p (stmt, refs); |
1106 | |
1107 | case GIMPLE_CALL: |
1108 | { |
1109 | /* There are some IFN_s that are used to replace builtins but have the |
1110 | same semantics. Even if MASK_CALL cannot handle them vectorable_call |
1111 | will insert the proper selection, so do not block conversion. */ |
1112 | int flags = gimple_call_flags (stmt); |
1113 | if ((flags & ECF_CONST) |
1114 | && !(flags & ECF_LOOPING_CONST_OR_PURE) |
1115 | && gimple_call_combined_fn (stmt) != CFN_LAST) |
1116 | return true; |
1117 | |
1118 | tree fndecl = gimple_call_fndecl (gs: stmt); |
1119 | if (fndecl) |
1120 | { |
1121 | /* We can vectorize some builtins and functions with SIMD |
1122 | "inbranch" clones. */ |
1123 | struct cgraph_node *node = cgraph_node::get (decl: fndecl); |
1124 | if (node && node->simd_clones != NULL) |
1125 | /* Ensure that at least one clone can be "inbranch". */ |
1126 | for (struct cgraph_node *n = node->simd_clones; n != NULL; |
1127 | n = n->simdclone->next_clone) |
1128 | if (n->simdclone->inbranch) |
1129 | { |
1130 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true); |
1131 | need_to_predicate = true; |
1132 | return true; |
1133 | } |
1134 | } |
1135 | |
1136 | return false; |
1137 | } |
1138 | |
1139 | default: |
1140 | /* Don't know what to do with 'em so don't do anything. */ |
1141 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1142 | { |
1143 | fprintf (stream: dump_file, format: "don't know what to do\n" ); |
1144 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
1145 | } |
1146 | return false; |
1147 | } |
1148 | } |
1149 | |
1150 | /* Assumes that BB has more than 1 predecessors. |
1151 | Returns false if at least one successor is not on critical edge |
1152 | and true otherwise. */ |
1153 | |
1154 | static inline bool |
1155 | all_preds_critical_p (basic_block bb) |
1156 | { |
1157 | edge e; |
1158 | edge_iterator ei; |
1159 | |
1160 | FOR_EACH_EDGE (e, ei, bb->preds) |
1161 | if (EDGE_COUNT (e->src->succs) == 1) |
1162 | return false; |
1163 | return true; |
1164 | } |
1165 | |
1166 | /* Return true when BB is if-convertible. This routine does not check |
1167 | basic block's statements and phis. |
1168 | |
1169 | A basic block is not if-convertible if: |
1170 | - it is non-empty and it is after the exit block (in BFS order), |
1171 | - it is after the exit block but before the latch, |
1172 | - its edges are not normal. |
1173 | |
1174 | EXIT_BB is the basic block containing the exit of the LOOP. BB is |
1175 | inside LOOP. */ |
1176 | |
1177 | static bool |
1178 | if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb) |
1179 | { |
1180 | edge e; |
1181 | edge_iterator ei; |
1182 | |
1183 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1184 | fprintf (stream: dump_file, format: "----------[%d]-------------\n" , bb->index); |
1185 | |
1186 | if (EDGE_COUNT (bb->succs) > 2) |
1187 | return false; |
1188 | |
1189 | if (gcall *call = safe_dyn_cast <gcall *> (p: *gsi_last_bb (bb))) |
1190 | if (gimple_call_ctrl_altering_p (gs: call)) |
1191 | return false; |
1192 | |
1193 | if (exit_bb) |
1194 | { |
1195 | if (bb != loop->latch) |
1196 | { |
1197 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1198 | fprintf (stream: dump_file, format: "basic block after exit bb but before latch\n" ); |
1199 | return false; |
1200 | } |
1201 | else if (!empty_block_p (bb)) |
1202 | { |
1203 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1204 | fprintf (stream: dump_file, format: "non empty basic block after exit bb\n" ); |
1205 | return false; |
1206 | } |
1207 | else if (bb == loop->latch |
1208 | && bb != exit_bb |
1209 | && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb)) |
1210 | { |
1211 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1212 | fprintf (stream: dump_file, format: "latch is not dominated by exit_block\n" ); |
1213 | return false; |
1214 | } |
1215 | } |
1216 | |
1217 | /* Be less adventurous and handle only normal edges. */ |
1218 | FOR_EACH_EDGE (e, ei, bb->succs) |
1219 | if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP)) |
1220 | { |
1221 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1222 | fprintf (stream: dump_file, format: "Difficult to handle edges\n" ); |
1223 | return false; |
1224 | } |
1225 | |
1226 | return true; |
1227 | } |
1228 | |
1229 | /* Return true when all predecessor blocks of BB are visited. The |
1230 | VISITED bitmap keeps track of the visited blocks. */ |
1231 | |
1232 | static bool |
1233 | pred_blocks_visited_p (basic_block bb, bitmap *visited) |
1234 | { |
1235 | edge e; |
1236 | edge_iterator ei; |
1237 | FOR_EACH_EDGE (e, ei, bb->preds) |
1238 | if (!bitmap_bit_p (*visited, e->src->index)) |
1239 | return false; |
1240 | |
1241 | return true; |
1242 | } |
1243 | |
1244 | /* Get body of a LOOP in suitable order for if-conversion. It is |
1245 | caller's responsibility to deallocate basic block list. |
1246 | If-conversion suitable order is, breadth first sort (BFS) order |
1247 | with an additional constraint: select a block only if all its |
1248 | predecessors are already selected. */ |
1249 | |
1250 | static basic_block * |
1251 | get_loop_body_in_if_conv_order (const class loop *loop) |
1252 | { |
1253 | basic_block *blocks, *blocks_in_bfs_order; |
1254 | basic_block bb; |
1255 | bitmap visited; |
1256 | unsigned int index = 0; |
1257 | unsigned int visited_count = 0; |
1258 | |
1259 | gcc_assert (loop->num_nodes); |
1260 | gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)); |
1261 | |
1262 | blocks = XCNEWVEC (basic_block, loop->num_nodes); |
1263 | visited = BITMAP_ALLOC (NULL); |
1264 | |
1265 | blocks_in_bfs_order = get_loop_body_in_bfs_order (loop); |
1266 | |
1267 | index = 0; |
1268 | while (index < loop->num_nodes) |
1269 | { |
1270 | bb = blocks_in_bfs_order [index]; |
1271 | |
1272 | if (bb->flags & BB_IRREDUCIBLE_LOOP) |
1273 | { |
1274 | free (ptr: blocks_in_bfs_order); |
1275 | BITMAP_FREE (visited); |
1276 | free (ptr: blocks); |
1277 | return NULL; |
1278 | } |
1279 | |
1280 | if (!bitmap_bit_p (visited, bb->index)) |
1281 | { |
1282 | if (pred_blocks_visited_p (bb, visited: &visited) |
1283 | || bb == loop->header) |
1284 | { |
1285 | /* This block is now visited. */ |
1286 | bitmap_set_bit (visited, bb->index); |
1287 | blocks[visited_count++] = bb; |
1288 | } |
1289 | } |
1290 | |
1291 | index++; |
1292 | |
1293 | if (index == loop->num_nodes |
1294 | && visited_count != loop->num_nodes) |
1295 | /* Not done yet. */ |
1296 | index = 0; |
1297 | } |
1298 | free (ptr: blocks_in_bfs_order); |
1299 | BITMAP_FREE (visited); |
1300 | |
1301 | /* Go through loop and reject if-conversion or lowering of bitfields if we |
1302 | encounter statements we do not believe the vectorizer will be able to |
1303 | handle. If adding a new type of statement here, make sure |
1304 | 'ifcvt_local_dce' is also able to handle it propertly. */ |
1305 | for (index = 0; index < loop->num_nodes; index++) |
1306 | { |
1307 | basic_block bb = blocks[index]; |
1308 | gimple_stmt_iterator gsi; |
1309 | |
1310 | bool may_have_nonlocal_labels |
1311 | = bb_with_exit_edge_p (loop, bb) || bb == loop->latch; |
1312 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1313 | switch (gimple_code (g: gsi_stmt (i: gsi))) |
1314 | { |
1315 | case GIMPLE_LABEL: |
1316 | if (!may_have_nonlocal_labels) |
1317 | { |
1318 | tree label |
1319 | = gimple_label_label (gs: as_a <glabel *> (p: gsi_stmt (i: gsi))); |
1320 | if (DECL_NONLOCAL (label) || FORCED_LABEL (label)) |
1321 | { |
1322 | free (ptr: blocks); |
1323 | return NULL; |
1324 | } |
1325 | } |
1326 | /* Fallthru. */ |
1327 | case GIMPLE_ASSIGN: |
1328 | case GIMPLE_CALL: |
1329 | case GIMPLE_DEBUG: |
1330 | case GIMPLE_COND: |
1331 | gimple_set_uid (g: gsi_stmt (i: gsi), uid: 0); |
1332 | break; |
1333 | default: |
1334 | free (ptr: blocks); |
1335 | return NULL; |
1336 | } |
1337 | } |
1338 | return blocks; |
1339 | } |
1340 | |
1341 | /* Returns true when the analysis of the predicates for all the basic |
1342 | blocks in LOOP succeeded. |
1343 | |
1344 | predicate_bbs first allocates the predicates of the basic blocks. |
1345 | These fields are then initialized with the tree expressions |
1346 | representing the predicates under which a basic block is executed |
1347 | in the LOOP. As the loop->header is executed at each iteration, it |
1348 | has the "true" predicate. Other statements executed under a |
1349 | condition are predicated with that condition, for example |
1350 | |
1351 | | if (x) |
1352 | | S1; |
1353 | | else |
1354 | | S2; |
1355 | |
1356 | S1 will be predicated with "x", and |
1357 | S2 will be predicated with "!x". */ |
1358 | |
1359 | static void |
1360 | predicate_bbs (loop_p loop) |
1361 | { |
1362 | unsigned int i; |
1363 | |
1364 | for (i = 0; i < loop->num_nodes; i++) |
1365 | init_bb_predicate (bb: ifc_bbs[i]); |
1366 | |
1367 | for (i = 0; i < loop->num_nodes; i++) |
1368 | { |
1369 | basic_block bb = ifc_bbs[i]; |
1370 | tree cond; |
1371 | |
1372 | /* The loop latch and loop exit block are always executed and |
1373 | have no extra conditions to be processed: skip them. */ |
1374 | if (bb == loop->latch |
1375 | || bb_with_exit_edge_p (loop, bb)) |
1376 | { |
1377 | reset_bb_predicate (bb); |
1378 | continue; |
1379 | } |
1380 | |
1381 | cond = bb_predicate (bb); |
1382 | if (gcond *stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb))) |
1383 | { |
1384 | tree c2; |
1385 | edge true_edge, false_edge; |
1386 | location_t loc = gimple_location (g: stmt); |
1387 | tree c; |
1388 | /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes |
1389 | conditions can remain unfolded because of multiple uses so |
1390 | try to re-fold here, especially to get precision changing |
1391 | conversions sorted out. Do not simply fold the stmt since |
1392 | this is analysis only. When conditions were embedded in |
1393 | COND_EXPRs those were folded separately before folding the |
1394 | COND_EXPR but as they are now outside we have to make sure |
1395 | to fold them. Do it here - another opportunity would be to |
1396 | fold predicates as they are inserted. */ |
1397 | gimple_match_op cexpr (gimple_match_cond::UNCOND, |
1398 | gimple_cond_code (gs: stmt), |
1399 | boolean_type_node, |
1400 | gimple_cond_lhs (gs: stmt), |
1401 | gimple_cond_rhs (gs: stmt)); |
1402 | if (cexpr.resimplify (NULL, follow_all_ssa_edges) |
1403 | && cexpr.code.is_tree_code () |
1404 | && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison) |
1405 | c = build2_loc (loc, code: (tree_code)cexpr.code, boolean_type_node, |
1406 | arg0: cexpr.ops[0], arg1: cexpr.ops[1]); |
1407 | else |
1408 | c = build2_loc (loc, code: gimple_cond_code (gs: stmt), |
1409 | boolean_type_node, |
1410 | arg0: gimple_cond_lhs (gs: stmt), |
1411 | arg1: gimple_cond_rhs (gs: stmt)); |
1412 | |
1413 | /* Add new condition into destination's predicate list. */ |
1414 | extract_true_false_edges_from_block (gimple_bb (g: stmt), |
1415 | &true_edge, &false_edge); |
1416 | |
1417 | /* If C is true, then TRUE_EDGE is taken. */ |
1418 | add_to_dst_predicate_list (loop, e: true_edge, prev_cond: unshare_expr (cond), |
1419 | cond: unshare_expr (c)); |
1420 | |
1421 | /* If C is false, then FALSE_EDGE is taken. */ |
1422 | c2 = build1_loc (loc, code: TRUTH_NOT_EXPR, boolean_type_node, |
1423 | arg1: unshare_expr (c)); |
1424 | add_to_dst_predicate_list (loop, e: false_edge, |
1425 | prev_cond: unshare_expr (cond), cond: c2); |
1426 | |
1427 | cond = NULL_TREE; |
1428 | } |
1429 | |
1430 | /* If current bb has only one successor, then consider it as an |
1431 | unconditional goto. */ |
1432 | if (single_succ_p (bb)) |
1433 | { |
1434 | basic_block bb_n = single_succ (bb); |
1435 | |
1436 | /* The successor bb inherits the predicate of its |
1437 | predecessor. If there is no predicate in the predecessor |
1438 | bb, then consider the successor bb as always executed. */ |
1439 | if (cond == NULL_TREE) |
1440 | cond = boolean_true_node; |
1441 | |
1442 | add_to_predicate_list (loop, bb: bb_n, nc: cond); |
1443 | } |
1444 | } |
1445 | |
1446 | /* The loop header is always executed. */ |
1447 | reset_bb_predicate (bb: loop->header); |
1448 | gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL |
1449 | && bb_predicate_gimplified_stmts (loop->latch) == NULL); |
1450 | } |
1451 | |
1452 | /* Build region by adding loop pre-header and post-header blocks. */ |
1453 | |
1454 | static vec<basic_block> |
1455 | build_region (class loop *loop) |
1456 | { |
1457 | vec<basic_block> region = vNULL; |
1458 | basic_block exit_bb = NULL; |
1459 | |
1460 | gcc_assert (ifc_bbs); |
1461 | /* The first element is loop pre-header. */ |
1462 | region.safe_push (obj: loop_preheader_edge (loop)->src); |
1463 | |
1464 | for (unsigned int i = 0; i < loop->num_nodes; i++) |
1465 | { |
1466 | basic_block bb = ifc_bbs[i]; |
1467 | region.safe_push (obj: bb); |
1468 | /* Find loop postheader. */ |
1469 | edge e; |
1470 | edge_iterator ei; |
1471 | FOR_EACH_EDGE (e, ei, bb->succs) |
1472 | if (loop_exit_edge_p (loop, e)) |
1473 | { |
1474 | exit_bb = e->dest; |
1475 | break; |
1476 | } |
1477 | } |
1478 | /* The last element is loop post-header. */ |
1479 | gcc_assert (exit_bb); |
1480 | region.safe_push (obj: exit_bb); |
1481 | return region; |
1482 | } |
1483 | |
1484 | /* Return true when LOOP is if-convertible. This is a helper function |
1485 | for if_convertible_loop_p. REFS and DDRS are initialized and freed |
1486 | in if_convertible_loop_p. */ |
1487 | |
1488 | static bool |
1489 | if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs) |
1490 | { |
1491 | unsigned int i; |
1492 | basic_block exit_bb = NULL; |
1493 | vec<basic_block> region; |
1494 | |
1495 | calculate_dominance_info (CDI_DOMINATORS); |
1496 | |
1497 | for (i = 0; i < loop->num_nodes; i++) |
1498 | { |
1499 | basic_block bb = ifc_bbs[i]; |
1500 | |
1501 | if (!if_convertible_bb_p (loop, bb, exit_bb)) |
1502 | return false; |
1503 | |
1504 | if (bb_with_exit_edge_p (loop, bb)) |
1505 | exit_bb = bb; |
1506 | } |
1507 | |
1508 | data_reference_p dr; |
1509 | |
1510 | innermost_DR_map |
1511 | = new hash_map<innermost_loop_behavior_hash, data_reference_p>; |
1512 | baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; |
1513 | |
1514 | /* Compute post-dominator tree locally. */ |
1515 | region = build_region (loop); |
1516 | calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region); |
1517 | |
1518 | predicate_bbs (loop); |
1519 | |
1520 | /* Free post-dominator tree since it is not used after predication. */ |
1521 | free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region); |
1522 | region.release (); |
1523 | |
1524 | for (i = 0; refs->iterate (ix: i, ptr: &dr); i++) |
1525 | { |
1526 | tree ref = DR_REF (dr); |
1527 | |
1528 | dr->aux = XNEW (struct ifc_dr); |
1529 | DR_BASE_W_UNCONDITIONALLY (dr) = false; |
1530 | DR_RW_UNCONDITIONALLY (dr) = false; |
1531 | DR_W_UNCONDITIONALLY (dr) = false; |
1532 | IFC_DR (dr)->rw_predicate = boolean_false_node; |
1533 | IFC_DR (dr)->w_predicate = boolean_false_node; |
1534 | IFC_DR (dr)->base_w_predicate = boolean_false_node; |
1535 | if (gimple_uid (DR_STMT (dr)) == 0) |
1536 | gimple_set_uid (DR_STMT (dr), uid: i + 1); |
1537 | |
1538 | /* If DR doesn't have innermost loop behavior or it's a compound |
1539 | memory reference, we synthesize its innermost loop behavior |
1540 | for hashing. */ |
1541 | if (TREE_CODE (ref) == COMPONENT_REF |
1542 | || TREE_CODE (ref) == IMAGPART_EXPR |
1543 | || TREE_CODE (ref) == REALPART_EXPR |
1544 | || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr) |
1545 | || DR_INIT (dr) || DR_STEP (dr))) |
1546 | { |
1547 | while (TREE_CODE (ref) == COMPONENT_REF |
1548 | || TREE_CODE (ref) == IMAGPART_EXPR |
1549 | || TREE_CODE (ref) == REALPART_EXPR) |
1550 | ref = TREE_OPERAND (ref, 0); |
1551 | |
1552 | memset (s: &DR_INNERMOST (dr), c: 0, n: sizeof (DR_INNERMOST (dr))); |
1553 | DR_BASE_ADDRESS (dr) = ref; |
1554 | } |
1555 | hash_memrefs_baserefs_and_store_DRs_read_written_info (a: dr); |
1556 | } |
1557 | |
1558 | for (i = 0; i < loop->num_nodes; i++) |
1559 | { |
1560 | basic_block bb = ifc_bbs[i]; |
1561 | gimple_stmt_iterator itr; |
1562 | |
1563 | /* Check the if-convertibility of statements in predicated BBs. */ |
1564 | if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) |
1565 | for (itr = gsi_start_bb (bb); !gsi_end_p (i: itr); gsi_next (i: &itr)) |
1566 | if (!if_convertible_stmt_p (stmt: gsi_stmt (i: itr), refs: *refs)) |
1567 | return false; |
1568 | } |
1569 | |
1570 | /* Checking PHIs needs to be done after stmts, as the fact whether there |
1571 | are any masked loads or stores affects the tests. */ |
1572 | for (i = 0; i < loop->num_nodes; i++) |
1573 | { |
1574 | basic_block bb = ifc_bbs[i]; |
1575 | gphi_iterator itr; |
1576 | |
1577 | for (itr = gsi_start_phis (bb); !gsi_end_p (i: itr); gsi_next (i: &itr)) |
1578 | if (!if_convertible_phi_p (loop, bb, phi: itr.phi ())) |
1579 | return false; |
1580 | } |
1581 | |
1582 | if (dump_file) |
1583 | fprintf (stream: dump_file, format: "Applying if-conversion\n" ); |
1584 | |
1585 | return true; |
1586 | } |
1587 | |
1588 | /* Return true when LOOP is if-convertible. |
1589 | LOOP is if-convertible if: |
1590 | - it is innermost, |
1591 | - it has two or more basic blocks, |
1592 | - it has only one exit, |
1593 | - loop header is not the exit edge, |
1594 | - if its basic blocks and phi nodes are if convertible. */ |
1595 | |
1596 | static bool |
1597 | if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs) |
1598 | { |
1599 | edge e; |
1600 | edge_iterator ei; |
1601 | bool res = false; |
1602 | |
1603 | /* Handle only innermost loop. */ |
1604 | if (!loop || loop->inner) |
1605 | { |
1606 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1607 | fprintf (stream: dump_file, format: "not innermost loop\n" ); |
1608 | return false; |
1609 | } |
1610 | |
1611 | /* If only one block, no need for if-conversion. */ |
1612 | if (loop->num_nodes <= 2) |
1613 | { |
1614 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1615 | fprintf (stream: dump_file, format: "less than 2 basic blocks\n" ); |
1616 | return false; |
1617 | } |
1618 | |
1619 | /* If one of the loop header's edge is an exit edge then do not |
1620 | apply if-conversion. */ |
1621 | FOR_EACH_EDGE (e, ei, loop->header->succs) |
1622 | if (loop_exit_edge_p (loop, e)) |
1623 | return false; |
1624 | |
1625 | res = if_convertible_loop_p_1 (loop, refs); |
1626 | |
1627 | delete innermost_DR_map; |
1628 | innermost_DR_map = NULL; |
1629 | |
1630 | delete baseref_DR_map; |
1631 | baseref_DR_map = NULL; |
1632 | |
1633 | return res; |
1634 | } |
1635 | |
1636 | /* Return reduc_1 if has_nop. |
1637 | |
1638 | if (...) |
1639 | tmp1 = (unsigned type) reduc_1; |
1640 | tmp2 = tmp1 + rhs2; |
1641 | reduc_3 = (signed type) tmp2. */ |
1642 | static tree |
1643 | strip_nop_cond_scalar_reduction (bool has_nop, tree op) |
1644 | { |
1645 | if (!has_nop) |
1646 | return op; |
1647 | |
1648 | if (TREE_CODE (op) != SSA_NAME) |
1649 | return NULL_TREE; |
1650 | |
1651 | gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op)); |
1652 | if (!stmt |
1653 | || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) |
1654 | || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE |
1655 | (gimple_assign_rhs1 (stmt)))) |
1656 | return NULL_TREE; |
1657 | |
1658 | return gimple_assign_rhs1 (gs: stmt); |
1659 | } |
1660 | |
1661 | /* Returns true if def-stmt for phi argument ARG is simple increment/decrement |
1662 | which is in predicated basic block. |
1663 | In fact, the following PHI pattern is searching: |
1664 | loop-header: |
1665 | reduc_1 = PHI <..., reduc_2> |
1666 | ... |
1667 | if (...) |
1668 | reduc_3 = ... |
1669 | reduc_2 = PHI <reduc_1, reduc_3> |
1670 | |
1671 | ARG_0 and ARG_1 are correspondent PHI arguments. |
1672 | REDUC, OP0 and OP1 contain reduction stmt and its operands. |
1673 | EXTENDED is true if PHI has > 2 arguments. */ |
1674 | |
1675 | static bool |
1676 | is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, |
1677 | tree *op0, tree *op1, bool extended, bool* has_nop, |
1678 | gimple **nop_reduc) |
1679 | { |
1680 | tree lhs, r_op1, r_op2, r_nop1, r_nop2; |
1681 | gimple *stmt; |
1682 | gimple * = NULL; |
1683 | enum tree_code reduction_op; |
1684 | basic_block bb = gimple_bb (g: phi); |
1685 | class loop *loop = bb->loop_father; |
1686 | edge latch_e = loop_latch_edge (loop); |
1687 | imm_use_iterator imm_iter; |
1688 | use_operand_p use_p; |
1689 | edge e; |
1690 | edge_iterator ei; |
1691 | bool result = *has_nop = false; |
1692 | if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME) |
1693 | return false; |
1694 | |
1695 | if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI) |
1696 | { |
1697 | lhs = arg_1; |
1698 | header_phi = SSA_NAME_DEF_STMT (arg_0); |
1699 | stmt = SSA_NAME_DEF_STMT (arg_1); |
1700 | } |
1701 | else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI) |
1702 | { |
1703 | lhs = arg_0; |
1704 | header_phi = SSA_NAME_DEF_STMT (arg_1); |
1705 | stmt = SSA_NAME_DEF_STMT (arg_0); |
1706 | } |
1707 | else |
1708 | return false; |
1709 | if (gimple_bb (g: header_phi) != loop->header) |
1710 | return false; |
1711 | |
1712 | if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi)) |
1713 | return false; |
1714 | |
1715 | if (gimple_code (g: stmt) != GIMPLE_ASSIGN |
1716 | || gimple_has_volatile_ops (stmt)) |
1717 | return false; |
1718 | |
1719 | if (!flow_bb_inside_loop_p (loop, gimple_bb (g: stmt))) |
1720 | return false; |
1721 | |
1722 | if (!is_predicated (bb: gimple_bb (g: stmt))) |
1723 | return false; |
1724 | |
1725 | /* Check that stmt-block is predecessor of phi-block. */ |
1726 | FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs) |
1727 | if (e->dest == bb) |
1728 | { |
1729 | result = true; |
1730 | break; |
1731 | } |
1732 | if (!result) |
1733 | return false; |
1734 | |
1735 | if (!has_single_use (var: lhs)) |
1736 | return false; |
1737 | |
1738 | reduction_op = gimple_assign_rhs_code (gs: stmt); |
1739 | |
1740 | /* Catch something like below |
1741 | |
1742 | loop-header: |
1743 | reduc_1 = PHI <..., reduc_2> |
1744 | ... |
1745 | if (...) |
1746 | tmp1 = (unsigned type) reduc_1; |
1747 | tmp2 = tmp1 + rhs2; |
1748 | reduc_3 = (signed type) tmp2; |
1749 | |
1750 | reduc_2 = PHI <reduc_1, reduc_3> |
1751 | |
1752 | and convert to |
1753 | |
1754 | reduc_2 = PHI <0, reduc_1> |
1755 | tmp1 = (unsigned type)reduc_1; |
1756 | ifcvt = cond_expr ? rhs2 : 0 |
1757 | tmp2 = tmp1 +/- ifcvt; |
1758 | reduc_1 = (signed type)tmp2; */ |
1759 | |
1760 | if (CONVERT_EXPR_CODE_P (reduction_op)) |
1761 | { |
1762 | lhs = gimple_assign_rhs1 (gs: stmt); |
1763 | if (TREE_CODE (lhs) != SSA_NAME |
1764 | || !has_single_use (var: lhs)) |
1765 | return false; |
1766 | |
1767 | *nop_reduc = stmt; |
1768 | stmt = SSA_NAME_DEF_STMT (lhs); |
1769 | if (gimple_bb (g: stmt) != gimple_bb (g: *nop_reduc) |
1770 | || !is_gimple_assign (gs: stmt)) |
1771 | return false; |
1772 | |
1773 | *has_nop = true; |
1774 | reduction_op = gimple_assign_rhs_code (gs: stmt); |
1775 | } |
1776 | |
1777 | if (reduction_op != PLUS_EXPR |
1778 | && reduction_op != MINUS_EXPR |
1779 | && reduction_op != MULT_EXPR |
1780 | && reduction_op != BIT_IOR_EXPR |
1781 | && reduction_op != BIT_XOR_EXPR |
1782 | && reduction_op != BIT_AND_EXPR) |
1783 | return false; |
1784 | r_op1 = gimple_assign_rhs1 (gs: stmt); |
1785 | r_op2 = gimple_assign_rhs2 (gs: stmt); |
1786 | |
1787 | r_nop1 = strip_nop_cond_scalar_reduction (has_nop: *has_nop, op: r_op1); |
1788 | r_nop2 = strip_nop_cond_scalar_reduction (has_nop: *has_nop, op: r_op2); |
1789 | |
1790 | /* Make R_OP1 to hold reduction variable. */ |
1791 | if (r_nop2 == PHI_RESULT (header_phi) |
1792 | && commutative_tree_code (reduction_op)) |
1793 | { |
1794 | std::swap (a&: r_op1, b&: r_op2); |
1795 | std::swap (a&: r_nop1, b&: r_nop2); |
1796 | } |
1797 | else if (r_nop1 != PHI_RESULT (header_phi)) |
1798 | return false; |
1799 | |
1800 | if (*has_nop) |
1801 | { |
1802 | /* Check that R_NOP1 is used in nop_stmt or in PHI only. */ |
1803 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1) |
1804 | { |
1805 | gimple *use_stmt = USE_STMT (use_p); |
1806 | if (is_gimple_debug (gs: use_stmt)) |
1807 | continue; |
1808 | if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) |
1809 | continue; |
1810 | if (use_stmt != phi) |
1811 | return false; |
1812 | } |
1813 | } |
1814 | |
1815 | /* Check that R_OP1 is used in reduction stmt or in PHI only. */ |
1816 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1) |
1817 | { |
1818 | gimple *use_stmt = USE_STMT (use_p); |
1819 | if (is_gimple_debug (gs: use_stmt)) |
1820 | continue; |
1821 | if (use_stmt == stmt) |
1822 | continue; |
1823 | if (gimple_code (g: use_stmt) != GIMPLE_PHI) |
1824 | return false; |
1825 | } |
1826 | |
1827 | *op0 = r_op1; *op1 = r_op2; |
1828 | *reduc = stmt; |
1829 | return true; |
1830 | } |
1831 | |
1832 | /* Converts conditional scalar reduction into unconditional form, e.g. |
1833 | bb_4 |
1834 | if (_5 != 0) goto bb_5 else goto bb_6 |
1835 | end_bb_4 |
1836 | bb_5 |
1837 | res_6 = res_13 + 1; |
1838 | end_bb_5 |
1839 | bb_6 |
1840 | # res_2 = PHI <res_13(4), res_6(5)> |
1841 | end_bb_6 |
1842 | |
1843 | will be converted into sequence |
1844 | _ifc__1 = _5 != 0 ? 1 : 0; |
1845 | res_2 = res_13 + _ifc__1; |
1846 | Argument SWAP tells that arguments of conditional expression should be |
1847 | swapped. |
1848 | If LOOP_VERSIONED is true if we assume that we versioned the loop for |
1849 | vectorization. In that case we can create a COND_OP. |
1850 | Returns rhs of resulting PHI assignment. */ |
1851 | |
1852 | static tree |
1853 | convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, |
1854 | tree cond, tree op0, tree op1, bool swap, |
1855 | bool has_nop, gimple* nop_reduc, |
1856 | bool loop_versioned) |
1857 | { |
1858 | gimple_stmt_iterator stmt_it; |
1859 | gimple *new_assign; |
1860 | tree rhs; |
1861 | tree rhs1 = gimple_assign_rhs1 (gs: reduc); |
1862 | tree lhs = gimple_assign_lhs (gs: reduc); |
1863 | tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, name: "_ifc_" ); |
1864 | tree c; |
1865 | enum tree_code reduction_op = gimple_assign_rhs_code (gs: reduc); |
1866 | tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, |
1867 | NULL, false); |
1868 | gimple_seq stmts = NULL; |
1869 | |
1870 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1871 | { |
1872 | fprintf (stream: dump_file, format: "Found cond scalar reduction.\n" ); |
1873 | print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM); |
1874 | } |
1875 | |
1876 | /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR. |
1877 | The COND_OP will have a neutral_op else value. */ |
1878 | internal_fn ifn; |
1879 | ifn = get_conditional_internal_fn (reduction_op); |
1880 | if (loop_versioned && ifn != IFN_LAST |
1881 | && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs)) |
1882 | && !swap) |
1883 | { |
1884 | gcall *cond_call = gimple_build_call_internal (ifn, 4, |
1885 | unshare_expr (cond), |
1886 | op0, op1, op0); |
1887 | gsi_insert_before (gsi, cond_call, GSI_SAME_STMT); |
1888 | gimple_call_set_lhs (gs: cond_call, lhs: tmp); |
1889 | rhs = tmp; |
1890 | } |
1891 | else |
1892 | { |
1893 | /* Build cond expression using COND and constant operand |
1894 | of reduction rhs. */ |
1895 | c = fold_build_cond_expr (TREE_TYPE (rhs1), |
1896 | cond: unshare_expr (cond), |
1897 | rhs: swap ? op_nochange : op1, |
1898 | lhs: swap ? op1 : op_nochange); |
1899 | /* Create assignment stmt and insert it at GSI. */ |
1900 | new_assign = gimple_build_assign (tmp, c); |
1901 | gsi_insert_before (gsi, new_assign, GSI_SAME_STMT); |
1902 | /* Build rhs for unconditional increment/decrement/logic_operation. */ |
1903 | rhs = gimple_build (seq: &stmts, code: reduction_op, |
1904 | TREE_TYPE (rhs1), ops: op0, ops: tmp); |
1905 | } |
1906 | |
1907 | if (has_nop) |
1908 | { |
1909 | rhs = gimple_convert (seq: &stmts, |
1910 | TREE_TYPE (gimple_assign_lhs (nop_reduc)), op: rhs); |
1911 | stmt_it = gsi_for_stmt (nop_reduc); |
1912 | gsi_remove (&stmt_it, true); |
1913 | release_defs (nop_reduc); |
1914 | } |
1915 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
1916 | |
1917 | /* Delete original reduction stmt. */ |
1918 | stmt_it = gsi_for_stmt (reduc); |
1919 | gsi_remove (&stmt_it, true); |
1920 | release_defs (reduc); |
1921 | return rhs; |
1922 | } |
1923 | |
1924 | /* Generate a simplified conditional. */ |
1925 | |
1926 | static tree |
1927 | gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set) |
1928 | { |
1929 | /* Check if the value is already live in a previous branch. This resolves |
1930 | nested conditionals from diamond PHI reductions. */ |
1931 | if (TREE_CODE (cond) == SSA_NAME) |
1932 | { |
1933 | gimple *stmt = SSA_NAME_DEF_STMT (cond); |
1934 | gassign *assign = NULL; |
1935 | if ((assign = as_a <gassign *> (p: stmt)) |
1936 | && gimple_assign_rhs_code (gs: assign) == BIT_AND_EXPR) |
1937 | { |
1938 | tree arg1 = gimple_assign_rhs1 (gs: assign); |
1939 | tree arg2 = gimple_assign_rhs2 (gs: assign); |
1940 | if (cond_set.contains (k: { arg1, 1 })) |
1941 | arg1 = boolean_true_node; |
1942 | else |
1943 | arg1 = gen_simplified_condition (cond: arg1, cond_set); |
1944 | |
1945 | if (cond_set.contains (k: { arg2, 1 })) |
1946 | arg2 = boolean_true_node; |
1947 | else |
1948 | arg2 = gen_simplified_condition (cond: arg2, cond_set); |
1949 | |
1950 | cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2); |
1951 | } |
1952 | } |
1953 | return cond; |
1954 | } |
1955 | |
1956 | /* Structure used to track meta-data on PHI arguments used to generate |
1957 | most efficient comparison sequence to slatten a PHI node. */ |
1958 | |
1959 | typedef struct ifcvt_arg_entry |
1960 | { |
1961 | /* The PHI node argument value. */ |
1962 | tree arg; |
1963 | |
1964 | /* The number of compares required to reach this PHI node from start of the |
1965 | BB being if-converted. */ |
1966 | unsigned num_compares; |
1967 | |
1968 | /* The number of times this PHI node argument appears in the current PHI |
1969 | node. */ |
1970 | unsigned occurs; |
1971 | |
1972 | /* The indices at which this PHI arg occurs inside the PHI node. */ |
1973 | vec <int> *indexes; |
1974 | } ifcvt_arg_entry_t; |
1975 | |
1976 | /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT |
1977 | as to whether the condition is inverted. */ |
1978 | |
1979 | static tree |
1980 | gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t &arg, |
1981 | gimple_stmt_iterator *gsi, |
1982 | scalar_cond_masked_set_type &cond_set, bool *invert) |
1983 | { |
1984 | int len; |
1985 | int i; |
1986 | tree cond = NULL_TREE; |
1987 | tree c; |
1988 | edge e; |
1989 | |
1990 | *invert = false; |
1991 | len = arg.indexes->length (); |
1992 | gcc_assert (len > 0); |
1993 | for (i = 0; i < len; i++) |
1994 | { |
1995 | e = gimple_phi_arg_edge (phi, i: (*arg.indexes)[i]); |
1996 | c = bb_predicate (bb: e->src); |
1997 | if (is_true_predicate (cond: c)) |
1998 | { |
1999 | cond = c; |
2000 | break; |
2001 | } |
2002 | /* If we have just a single inverted predicate, signal that and |
2003 | instead invert the COND_EXPR arms. */ |
2004 | if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR) |
2005 | { |
2006 | c = TREE_OPERAND (c, 0); |
2007 | *invert = true; |
2008 | } |
2009 | |
2010 | c = gen_simplified_condition (cond: c, cond_set); |
2011 | c = force_gimple_operand_gsi (gsi, unshare_expr (c), |
2012 | true, NULL_TREE, true, GSI_SAME_STMT); |
2013 | if (cond != NULL_TREE) |
2014 | { |
2015 | /* Must build OR expression. */ |
2016 | cond = fold_or_predicates (EXPR_LOCATION (c), c1: c, c2: cond); |
2017 | cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true, |
2018 | NULL_TREE, true, GSI_SAME_STMT); |
2019 | } |
2020 | else |
2021 | cond = c; |
2022 | |
2023 | /* Register the new possibly simplified conditional. When more than 2 |
2024 | entries in a phi node we chain entries in the false branch, so the |
2025 | inverted condition is active. */ |
2026 | scalar_cond_masked_key pred_cond ({ cond, 1 }); |
2027 | if (!*invert) |
2028 | pred_cond.inverted_p = !pred_cond.inverted_p; |
2029 | cond_set.add (k: pred_cond); |
2030 | } |
2031 | gcc_assert (cond != NULL_TREE); |
2032 | return cond; |
2033 | } |
2034 | |
2035 | /* Create the smallest nested conditional possible. On pre-order we record |
2036 | which conditionals are live, and on post-order rewrite the chain by removing |
2037 | already active conditions. |
2038 | |
2039 | As an example we simplify: |
2040 | |
2041 | _7 = a_10 < 0; |
2042 | _21 = a_10 >= 0; |
2043 | _22 = a_10 < e_11(D); |
2044 | _23 = _21 & _22; |
2045 | _ifc__42 = _23 ? t_13 : 0; |
2046 | t_6 = _7 ? 1 : _ifc__42 |
2047 | |
2048 | into |
2049 | |
2050 | _7 = a_10 < 0; |
2051 | _22 = a_10 < e_11(D); |
2052 | _ifc__42 = _22 ? t_13 : 0; |
2053 | t_6 = _7 ? 1 : _ifc__42; |
2054 | |
2055 | which produces better code. */ |
2056 | |
2057 | static tree |
2058 | gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi, |
2059 | scalar_cond_masked_set_type &cond_set, tree type, |
2060 | gimple **res_stmt, tree lhs0, |
2061 | vec<struct ifcvt_arg_entry> &args, unsigned idx) |
2062 | { |
2063 | if (idx == args.length ()) |
2064 | return args[idx - 1].arg; |
2065 | |
2066 | bool invert; |
2067 | tree cond = gen_phi_arg_condition (phi, arg&: args[idx - 1], gsi, cond_set, |
2068 | invert: &invert); |
2069 | tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt, lhs0, |
2070 | args, idx: idx + 1); |
2071 | |
2072 | unsigned prev = idx; |
2073 | unsigned curr = prev - 1; |
2074 | tree arg0 = args[curr].arg; |
2075 | tree rhs, lhs; |
2076 | if (idx > 1) |
2077 | lhs = make_temp_ssa_name (type, NULL, name: "_ifc_" ); |
2078 | else |
2079 | lhs = lhs0; |
2080 | |
2081 | if (invert) |
2082 | rhs = fold_build_cond_expr (type, cond: unshare_expr (cond), |
2083 | rhs: arg1, lhs: arg0); |
2084 | else |
2085 | rhs = fold_build_cond_expr (type, cond: unshare_expr (cond), |
2086 | rhs: arg0, lhs: arg1); |
2087 | gassign *new_stmt = gimple_build_assign (lhs, rhs); |
2088 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); |
2089 | update_stmt (s: new_stmt); |
2090 | *res_stmt = new_stmt; |
2091 | return lhs; |
2092 | } |
2093 | |
2094 | /* When flattening a PHI node we have a choice of which conditions to test to |
2095 | for all the paths from the start of the dominator block of the BB with the |
2096 | PHI node. If the PHI node has X arguments we have to only test X - 1 |
2097 | conditions as the last one is implicit. It does matter which conditions we |
2098 | test first. We should test the shortest condition first (distance here is |
2099 | measures in the number of logical operators in the condition) and the |
2100 | longest one last. This allows us to skip testing the most expensive |
2101 | condition. To accomplish this we need to sort the conditions. P1 and P2 |
2102 | are sorted first based on the number of logical operations (num_compares) |
2103 | and then by how often they occur in the PHI node. */ |
2104 | |
2105 | static int |
2106 | cmp_arg_entry (const void *p1, const void *p2, void * /* data. */) |
2107 | { |
2108 | const ifcvt_arg_entry sval1 = *(const ifcvt_arg_entry *)p1; |
2109 | const ifcvt_arg_entry sval2 = *(const ifcvt_arg_entry *)p2; |
2110 | |
2111 | if (sval1.num_compares < sval2.num_compares) |
2112 | return -1; |
2113 | else if (sval1.num_compares > sval2.num_compares) |
2114 | return 1; |
2115 | |
2116 | if (sval1.occurs < sval2.occurs) |
2117 | return -1; |
2118 | else if (sval1.occurs > sval2.occurs) |
2119 | return 1; |
2120 | |
2121 | return 0; |
2122 | } |
2123 | |
2124 | /* Replace a scalar PHI node with a COND_EXPR using COND as condition. |
2125 | This routine can handle PHI nodes with more than two arguments. |
2126 | |
2127 | For example, |
2128 | S1: A = PHI <x1(1), x2(5)> |
2129 | is converted into, |
2130 | S2: A = cond ? x1 : x2; |
2131 | |
2132 | The generated code is inserted at GSI that points to the top of |
2133 | basic block's statement list. |
2134 | If PHI node has more than two arguments a chain of conditional |
2135 | expression is produced. |
2136 | LOOP_VERSIONED should be true if we know that the loop was versioned for |
2137 | vectorization. */ |
2138 | |
2139 | |
2140 | static void |
2141 | predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned) |
2142 | { |
2143 | gimple *new_stmt = NULL, *reduc, *nop_reduc; |
2144 | tree rhs, res, arg0, arg1, op0, op1, scev; |
2145 | tree cond; |
2146 | unsigned int index0; |
2147 | edge e; |
2148 | basic_block bb; |
2149 | unsigned int i; |
2150 | bool has_nop; |
2151 | |
2152 | res = gimple_phi_result (gs: phi); |
2153 | if (virtual_operand_p (op: res)) |
2154 | return; |
2155 | |
2156 | if ((rhs = degenerate_phi_result (phi)) |
2157 | || ((scev = analyze_scalar_evolution (gimple_bb (g: phi)->loop_father, |
2158 | res)) |
2159 | && !chrec_contains_undetermined (scev) |
2160 | && scev != res |
2161 | && (rhs = gimple_phi_arg_def (gs: phi, index: 0)))) |
2162 | { |
2163 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2164 | { |
2165 | fprintf (stream: dump_file, format: "Degenerate phi!\n" ); |
2166 | print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); |
2167 | } |
2168 | new_stmt = gimple_build_assign (res, rhs); |
2169 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); |
2170 | update_stmt (s: new_stmt); |
2171 | return; |
2172 | } |
2173 | |
2174 | bb = gimple_bb (g: phi); |
2175 | /* Keep track of conditionals already seen. */ |
2176 | scalar_cond_masked_set_type cond_set; |
2177 | if (EDGE_COUNT (bb->preds) == 2) |
2178 | { |
2179 | /* Predicate ordinary PHI node with 2 arguments. */ |
2180 | edge first_edge, second_edge; |
2181 | basic_block true_bb; |
2182 | first_edge = EDGE_PRED (bb, 0); |
2183 | second_edge = EDGE_PRED (bb, 1); |
2184 | cond = bb_predicate (bb: first_edge->src); |
2185 | cond_set.add (k: { cond, 1 }); |
2186 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) |
2187 | std::swap (a&: first_edge, b&: second_edge); |
2188 | if (EDGE_COUNT (first_edge->src->succs) > 1) |
2189 | { |
2190 | cond = bb_predicate (bb: second_edge->src); |
2191 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) |
2192 | cond = TREE_OPERAND (cond, 0); |
2193 | else |
2194 | first_edge = second_edge; |
2195 | } |
2196 | else |
2197 | cond = bb_predicate (bb: first_edge->src); |
2198 | |
2199 | /* Gimplify the condition to a valid cond-expr conditonal operand. */ |
2200 | cond = gen_simplified_condition (cond, cond_set); |
2201 | cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true, |
2202 | NULL_TREE, true, GSI_SAME_STMT); |
2203 | true_bb = first_edge->src; |
2204 | if (EDGE_PRED (bb, 1)->src == true_bb) |
2205 | { |
2206 | arg0 = gimple_phi_arg_def (gs: phi, index: 1); |
2207 | arg1 = gimple_phi_arg_def (gs: phi, index: 0); |
2208 | } |
2209 | else |
2210 | { |
2211 | arg0 = gimple_phi_arg_def (gs: phi, index: 0); |
2212 | arg1 = gimple_phi_arg_def (gs: phi, index: 1); |
2213 | } |
2214 | if (is_cond_scalar_reduction (phi, reduc: &reduc, arg_0: arg0, arg_1: arg1, |
2215 | op0: &op0, op1: &op1, extended: false, has_nop: &has_nop, |
2216 | nop_reduc: &nop_reduc)) |
2217 | { |
2218 | /* Convert reduction stmt into vectorizable form. */ |
2219 | rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, |
2220 | swap: true_bb != gimple_bb (g: reduc), |
2221 | has_nop, nop_reduc, |
2222 | loop_versioned); |
2223 | redundant_ssa_names.safe_push (obj: std::make_pair (x&: res, y&: rhs)); |
2224 | } |
2225 | else |
2226 | /* Build new RHS using selected condition and arguments. */ |
2227 | rhs = fold_build_cond_expr (TREE_TYPE (res), cond: unshare_expr (cond), |
2228 | rhs: arg0, lhs: arg1); |
2229 | new_stmt = gimple_build_assign (res, rhs); |
2230 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); |
2231 | gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt); |
2232 | if (fold_stmt (&new_gsi, follow_all_ssa_edges)) |
2233 | { |
2234 | new_stmt = gsi_stmt (i: new_gsi); |
2235 | update_stmt (s: new_stmt); |
2236 | } |
2237 | |
2238 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2239 | { |
2240 | fprintf (stream: dump_file, format: "new phi replacement stmt\n" ); |
2241 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); |
2242 | } |
2243 | return; |
2244 | } |
2245 | |
2246 | /* Create hashmap for PHI node which contain vector of argument indexes |
2247 | having the same value. */ |
2248 | bool swap = false; |
2249 | hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map; |
2250 | unsigned int num_args = gimple_phi_num_args (gs: phi); |
2251 | /* Vector of different PHI argument values. */ |
2252 | auto_vec<ifcvt_arg_entry_t> args; |
2253 | |
2254 | /* Compute phi_arg_map, determine the list of unique PHI args and the indices |
2255 | where they are in the PHI node. The indices will be used to determine |
2256 | the conditions to apply and their complexity. */ |
2257 | for (i = 0; i < num_args; i++) |
2258 | { |
2259 | tree arg; |
2260 | |
2261 | arg = gimple_phi_arg_def (gs: phi, index: i); |
2262 | if (!phi_arg_map.get (k: arg)) |
2263 | args.safe_push (obj: { .arg: arg, .num_compares: 0, .occurs: 0, NULL }); |
2264 | phi_arg_map.get_or_insert (k: arg).safe_push (obj: i); |
2265 | } |
2266 | |
2267 | /* Determine element with max number of occurrences and complexity. Looking |
2268 | at only number of occurrences as a measure for complexity isn't enough as |
2269 | all usages can be unique but the comparisons to reach the PHI node differ |
2270 | per branch. */ |
2271 | for (unsigned i = 0; i < args.length (); i++) |
2272 | { |
2273 | unsigned int len = 0; |
2274 | vec<int> *indices = phi_arg_map.get (k: args[i].arg); |
2275 | for (int index : *indices) |
2276 | { |
2277 | edge e = gimple_phi_arg_edge (phi, i: index); |
2278 | len += get_bb_num_predicate_stmts (bb: e->src); |
2279 | } |
2280 | |
2281 | unsigned occur = indices->length (); |
2282 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2283 | fprintf (stream: dump_file, format: "Ranking %d as len=%d, idx=%d\n" , i, len, occur); |
2284 | args[i].num_compares = len; |
2285 | args[i].occurs = occur; |
2286 | args[i].indexes = indices; |
2287 | } |
2288 | |
2289 | /* Sort elements based on rankings ARGS. */ |
2290 | args.stablesort (cmp: cmp_arg_entry, NULL); |
2291 | |
2292 | /* Handle one special case when number of arguments with different values |
2293 | is equal 2 and one argument has the only occurrence. Such PHI can be |
2294 | handled as if would have only 2 arguments. */ |
2295 | if (args.length () == 2 |
2296 | && args[0].indexes->length () == 1) |
2297 | { |
2298 | index0 = (*args[0].indexes)[0]; |
2299 | arg0 = args[0].arg; |
2300 | arg1 = args[1].arg; |
2301 | e = gimple_phi_arg_edge (phi, i: index0); |
2302 | cond = bb_predicate (bb: e->src); |
2303 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) |
2304 | { |
2305 | swap = true; |
2306 | cond = TREE_OPERAND (cond, 0); |
2307 | } |
2308 | /* Gimplify the condition to a valid cond-expr conditonal operand. */ |
2309 | cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true, |
2310 | NULL_TREE, true, GSI_SAME_STMT); |
2311 | if (!(is_cond_scalar_reduction (phi, reduc: &reduc, arg_0: arg0 , arg_1: arg1, |
2312 | op0: &op0, op1: &op1, extended: true, has_nop: &has_nop, nop_reduc: &nop_reduc))) |
2313 | rhs = fold_build_cond_expr (TREE_TYPE (res), cond: unshare_expr (cond), |
2314 | rhs: swap ? arg1 : arg0, |
2315 | lhs: swap ? arg0 : arg1); |
2316 | else |
2317 | { |
2318 | /* Convert reduction stmt into vectorizable form. */ |
2319 | rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, |
2320 | swap, has_nop, nop_reduc, |
2321 | loop_versioned); |
2322 | redundant_ssa_names.safe_push (obj: std::make_pair (x&: res, y&: rhs)); |
2323 | } |
2324 | new_stmt = gimple_build_assign (res, rhs); |
2325 | gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); |
2326 | update_stmt (s: new_stmt); |
2327 | } |
2328 | else |
2329 | { |
2330 | /* Common case. */ |
2331 | tree type = TREE_TYPE (gimple_phi_result (phi)); |
2332 | gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt: &new_stmt, lhs0: res, |
2333 | args, idx: 1); |
2334 | } |
2335 | |
2336 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2337 | { |
2338 | fprintf (stream: dump_file, format: "new extended phi replacement stmt\n" ); |
2339 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); |
2340 | } |
2341 | } |
2342 | |
2343 | /* Replaces in LOOP all the scalar phi nodes other than those in the |
2344 | LOOP->header block with conditional modify expressions. |
2345 | LOOP_VERSIONED should be true if we know that the loop was versioned for |
2346 | vectorization. */ |
2347 | |
2348 | static void |
2349 | predicate_all_scalar_phis (class loop *loop, bool loop_versioned) |
2350 | { |
2351 | basic_block bb; |
2352 | unsigned int orig_loop_num_nodes = loop->num_nodes; |
2353 | unsigned int i; |
2354 | |
2355 | for (i = 1; i < orig_loop_num_nodes; i++) |
2356 | { |
2357 | gphi *phi; |
2358 | gimple_stmt_iterator gsi; |
2359 | gphi_iterator phi_gsi; |
2360 | bb = ifc_bbs[i]; |
2361 | |
2362 | if (bb == loop->header) |
2363 | continue; |
2364 | |
2365 | phi_gsi = gsi_start_phis (bb); |
2366 | if (gsi_end_p (i: phi_gsi)) |
2367 | continue; |
2368 | |
2369 | gsi = gsi_after_labels (bb); |
2370 | while (!gsi_end_p (i: phi_gsi)) |
2371 | { |
2372 | phi = phi_gsi.phi (); |
2373 | if (virtual_operand_p (op: gimple_phi_result (gs: phi))) |
2374 | gsi_next (i: &phi_gsi); |
2375 | else |
2376 | { |
2377 | predicate_scalar_phi (phi, gsi: &gsi, loop_versioned); |
2378 | remove_phi_node (&phi_gsi, false); |
2379 | } |
2380 | } |
2381 | } |
2382 | } |
2383 | |
2384 | /* Insert in each basic block of LOOP the statements produced by the |
2385 | gimplification of the predicates. */ |
2386 | |
2387 | static void |
2388 | insert_gimplified_predicates (loop_p loop) |
2389 | { |
2390 | unsigned int i; |
2391 | |
2392 | for (i = 0; i < loop->num_nodes; i++) |
2393 | { |
2394 | basic_block bb = ifc_bbs[i]; |
2395 | gimple_seq stmts; |
2396 | if (!is_predicated (bb)) |
2397 | gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL); |
2398 | if (!is_predicated (bb)) |
2399 | { |
2400 | /* Do not insert statements for a basic block that is not |
2401 | predicated. Also make sure that the predicate of the |
2402 | basic block is set to true. */ |
2403 | reset_bb_predicate (bb); |
2404 | continue; |
2405 | } |
2406 | |
2407 | stmts = bb_predicate_gimplified_stmts (bb); |
2408 | if (stmts) |
2409 | { |
2410 | if (need_to_predicate) |
2411 | { |
2412 | /* Insert the predicate of the BB just after the label, |
2413 | as the if-conversion of memory writes will use this |
2414 | predicate. */ |
2415 | gimple_stmt_iterator gsi = gsi_after_labels (bb); |
2416 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); |
2417 | } |
2418 | else |
2419 | { |
2420 | /* Insert the predicate of the BB at the end of the BB |
2421 | as this would reduce the register pressure: the only |
2422 | use of this predicate will be in successor BBs. */ |
2423 | gimple_stmt_iterator gsi = gsi_last_bb (bb); |
2424 | |
2425 | if (gsi_end_p (i: gsi) |
2426 | || stmt_ends_bb_p (gsi_stmt (i: gsi))) |
2427 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); |
2428 | else |
2429 | gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT); |
2430 | } |
2431 | |
2432 | /* Once the sequence is code generated, set it to NULL. */ |
2433 | set_bb_predicate_gimplified_stmts (bb, NULL, preserve_counts: true); |
2434 | } |
2435 | } |
2436 | } |
2437 | |
2438 | /* Helper function for predicate_statements. Returns index of existent |
2439 | mask if it was created for given SIZE and -1 otherwise. */ |
2440 | |
2441 | static int |
2442 | mask_exists (int size, const vec<int> &vec) |
2443 | { |
2444 | unsigned int ix; |
2445 | int v; |
2446 | FOR_EACH_VEC_ELT (vec, ix, v) |
2447 | if (v == size) |
2448 | return (int) ix; |
2449 | return -1; |
2450 | } |
2451 | |
2452 | /* Helper function for predicate_statements. STMT is a memory read or |
2453 | write and it needs to be predicated by MASK. Return a statement |
2454 | that does so. */ |
2455 | |
2456 | static gimple * |
2457 | predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) |
2458 | { |
2459 | gcall *new_stmt; |
2460 | |
2461 | tree lhs = gimple_assign_lhs (gs: stmt); |
2462 | tree rhs = gimple_assign_rhs1 (gs: stmt); |
2463 | tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs; |
2464 | mark_addressable (ref); |
2465 | tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref), |
2466 | true, NULL_TREE, true, GSI_SAME_STMT); |
2467 | tree ptr = build_int_cst (reference_alias_ptr_type (ref), |
2468 | get_object_alignment (ref)); |
2469 | /* Copy points-to info if possible. */ |
2470 | if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr)) |
2471 | copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr), |
2472 | ref); |
2473 | if (TREE_CODE (lhs) == SSA_NAME) |
2474 | { |
2475 | new_stmt |
2476 | = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr, |
2477 | ptr, mask); |
2478 | gimple_call_set_lhs (gs: new_stmt, lhs); |
2479 | gimple_set_vuse (g: new_stmt, vuse: gimple_vuse (g: stmt)); |
2480 | } |
2481 | else |
2482 | { |
2483 | new_stmt |
2484 | = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr, |
2485 | mask, rhs); |
2486 | gimple_move_vops (new_stmt, stmt); |
2487 | } |
2488 | gimple_call_set_nothrow (s: new_stmt, nothrow_p: true); |
2489 | return new_stmt; |
2490 | } |
2491 | |
2492 | /* STMT uses OP_LHS. Check whether it is equivalent to: |
2493 | |
2494 | ... = OP_MASK ? OP_LHS : X; |
2495 | |
2496 | Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is |
2497 | known to have value OP_COND. */ |
2498 | |
2499 | static tree |
2500 | check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond, |
2501 | tree op_lhs) |
2502 | { |
2503 | gassign *assign = dyn_cast <gassign *> (p: stmt); |
2504 | if (!assign || gimple_assign_rhs_code (gs: assign) != COND_EXPR) |
2505 | return NULL_TREE; |
2506 | |
2507 | tree use_cond = gimple_assign_rhs1 (gs: assign); |
2508 | tree if_true = gimple_assign_rhs2 (gs: assign); |
2509 | tree if_false = gimple_assign_rhs3 (gs: assign); |
2510 | |
2511 | if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, flags: 0)) |
2512 | && if_true == op_lhs) |
2513 | return if_false; |
2514 | |
2515 | if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs) |
2516 | return if_true; |
2517 | |
2518 | return NULL_TREE; |
2519 | } |
2520 | |
2521 | /* Return true if VALUE is available for use at STMT. SSA_NAMES is |
2522 | the set of SSA names defined earlier in STMT's block. */ |
2523 | |
2524 | static bool |
2525 | value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names, |
2526 | tree value) |
2527 | { |
2528 | if (is_gimple_min_invariant (value)) |
2529 | return true; |
2530 | |
2531 | if (TREE_CODE (value) == SSA_NAME) |
2532 | { |
2533 | if (SSA_NAME_IS_DEFAULT_DEF (value)) |
2534 | return true; |
2535 | |
2536 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value)); |
2537 | basic_block use_bb = gimple_bb (g: stmt); |
2538 | return (def_bb == use_bb |
2539 | ? ssa_names->contains (k: value) |
2540 | : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb)); |
2541 | } |
2542 | |
2543 | return false; |
2544 | } |
2545 | |
2546 | /* Helper function for predicate_statements. STMT is a potentially-trapping |
2547 | arithmetic operation that needs to be predicated by MASK, an SSA_NAME that |
2548 | has value COND. Return a statement that does so. SSA_NAMES is the set of |
2549 | SSA names defined earlier in STMT's block. */ |
2550 | |
2551 | static gimple * |
2552 | predicate_rhs_code (gassign *stmt, tree mask, tree cond, |
2553 | hash_set<tree_ssa_name_hash> *ssa_names) |
2554 | { |
2555 | tree lhs = gimple_assign_lhs (gs: stmt); |
2556 | tree_code code = gimple_assign_rhs_code (gs: stmt); |
2557 | unsigned int nops = gimple_num_ops (gs: stmt); |
2558 | internal_fn cond_fn = get_conditional_internal_fn (code); |
2559 | |
2560 | /* Construct the arguments to the conditional internal function. */ |
2561 | auto_vec<tree, 8> args; |
2562 | args.safe_grow (len: nops + 1, exact: true); |
2563 | args[0] = mask; |
2564 | for (unsigned int i = 1; i < nops; ++i) |
2565 | args[i] = gimple_op (gs: stmt, i); |
2566 | args[nops] = NULL_TREE; |
2567 | |
2568 | /* Look for uses of the result to see whether they are COND_EXPRs that can |
2569 | be folded into the conditional call. */ |
2570 | imm_use_iterator imm_iter; |
2571 | gimple *use_stmt; |
2572 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs) |
2573 | { |
2574 | tree new_else = check_redundant_cond_expr (stmt: use_stmt, op_mask: mask, op_cond: cond, op_lhs: lhs); |
2575 | if (new_else && value_available_p (stmt, ssa_names, value: new_else)) |
2576 | { |
2577 | if (!args[nops]) |
2578 | args[nops] = new_else; |
2579 | if (operand_equal_p (new_else, args[nops], flags: 0)) |
2580 | { |
2581 | /* We have: |
2582 | |
2583 | LHS = IFN_COND (MASK, ..., ELSE); |
2584 | X = MASK ? LHS : ELSE; |
2585 | |
2586 | which makes X equivalent to LHS. */ |
2587 | tree use_lhs = gimple_assign_lhs (gs: use_stmt); |
2588 | redundant_ssa_names.safe_push (obj: std::make_pair (x&: use_lhs, y&: lhs)); |
2589 | } |
2590 | } |
2591 | } |
2592 | if (!args[nops]) |
2593 | args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs), |
2594 | nops - 1, &args[1]); |
2595 | |
2596 | /* Create and insert the call. */ |
2597 | gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args); |
2598 | gimple_call_set_lhs (gs: new_stmt, lhs); |
2599 | gimple_call_set_nothrow (s: new_stmt, nothrow_p: true); |
2600 | |
2601 | return new_stmt; |
2602 | } |
2603 | |
2604 | /* Predicate each write to memory in LOOP. |
2605 | |
2606 | This function transforms control flow constructs containing memory |
2607 | writes of the form: |
2608 | |
2609 | | for (i = 0; i < N; i++) |
2610 | | if (cond) |
2611 | | A[i] = expr; |
2612 | |
2613 | into the following form that does not contain control flow: |
2614 | |
2615 | | for (i = 0; i < N; i++) |
2616 | | A[i] = cond ? expr : A[i]; |
2617 | |
2618 | The original CFG looks like this: |
2619 | |
2620 | | bb_0 |
2621 | | i = 0 |
2622 | | end_bb_0 |
2623 | | |
2624 | | bb_1 |
2625 | | if (i < N) goto bb_5 else goto bb_2 |
2626 | | end_bb_1 |
2627 | | |
2628 | | bb_2 |
2629 | | cond = some_computation; |
2630 | | if (cond) goto bb_3 else goto bb_4 |
2631 | | end_bb_2 |
2632 | | |
2633 | | bb_3 |
2634 | | A[i] = expr; |
2635 | | goto bb_4 |
2636 | | end_bb_3 |
2637 | | |
2638 | | bb_4 |
2639 | | goto bb_1 |
2640 | | end_bb_4 |
2641 | |
2642 | insert_gimplified_predicates inserts the computation of the COND |
2643 | expression at the beginning of the destination basic block: |
2644 | |
2645 | | bb_0 |
2646 | | i = 0 |
2647 | | end_bb_0 |
2648 | | |
2649 | | bb_1 |
2650 | | if (i < N) goto bb_5 else goto bb_2 |
2651 | | end_bb_1 |
2652 | | |
2653 | | bb_2 |
2654 | | cond = some_computation; |
2655 | | if (cond) goto bb_3 else goto bb_4 |
2656 | | end_bb_2 |
2657 | | |
2658 | | bb_3 |
2659 | | cond = some_computation; |
2660 | | A[i] = expr; |
2661 | | goto bb_4 |
2662 | | end_bb_3 |
2663 | | |
2664 | | bb_4 |
2665 | | goto bb_1 |
2666 | | end_bb_4 |
2667 | |
2668 | predicate_statements is then predicating the memory write as follows: |
2669 | |
2670 | | bb_0 |
2671 | | i = 0 |
2672 | | end_bb_0 |
2673 | | |
2674 | | bb_1 |
2675 | | if (i < N) goto bb_5 else goto bb_2 |
2676 | | end_bb_1 |
2677 | | |
2678 | | bb_2 |
2679 | | if (cond) goto bb_3 else goto bb_4 |
2680 | | end_bb_2 |
2681 | | |
2682 | | bb_3 |
2683 | | cond = some_computation; |
2684 | | A[i] = cond ? expr : A[i]; |
2685 | | goto bb_4 |
2686 | | end_bb_3 |
2687 | | |
2688 | | bb_4 |
2689 | | goto bb_1 |
2690 | | end_bb_4 |
2691 | |
2692 | and finally combine_blocks removes the basic block boundaries making |
2693 | the loop vectorizable: |
2694 | |
2695 | | bb_0 |
2696 | | i = 0 |
2697 | | if (i < N) goto bb_5 else goto bb_1 |
2698 | | end_bb_0 |
2699 | | |
2700 | | bb_1 |
2701 | | cond = some_computation; |
2702 | | A[i] = cond ? expr : A[i]; |
2703 | | if (i < N) goto bb_5 else goto bb_4 |
2704 | | end_bb_1 |
2705 | | |
2706 | | bb_4 |
2707 | | goto bb_1 |
2708 | | end_bb_4 |
2709 | */ |
2710 | |
2711 | static void |
2712 | predicate_statements (loop_p loop) |
2713 | { |
2714 | unsigned int i, orig_loop_num_nodes = loop->num_nodes; |
2715 | auto_vec<int, 1> vect_sizes; |
2716 | auto_vec<tree, 1> vect_masks; |
2717 | hash_set<tree_ssa_name_hash> ssa_names; |
2718 | |
2719 | for (i = 1; i < orig_loop_num_nodes; i++) |
2720 | { |
2721 | gimple_stmt_iterator gsi; |
2722 | basic_block bb = ifc_bbs[i]; |
2723 | tree cond = bb_predicate (bb); |
2724 | bool swap; |
2725 | int index; |
2726 | |
2727 | if (is_true_predicate (cond)) |
2728 | continue; |
2729 | |
2730 | swap = false; |
2731 | if (TREE_CODE (cond) == TRUTH_NOT_EXPR) |
2732 | { |
2733 | swap = true; |
2734 | cond = TREE_OPERAND (cond, 0); |
2735 | } |
2736 | |
2737 | vect_sizes.truncate (size: 0); |
2738 | vect_masks.truncate (size: 0); |
2739 | |
2740 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);) |
2741 | { |
2742 | gassign *stmt = dyn_cast <gassign *> (p: gsi_stmt (i: gsi)); |
2743 | tree lhs; |
2744 | if (!stmt) |
2745 | ; |
2746 | else if (is_false_predicate (cond) |
2747 | && gimple_vdef (g: stmt)) |
2748 | { |
2749 | unlink_stmt_vdef (stmt); |
2750 | gsi_remove (&gsi, true); |
2751 | release_defs (stmt); |
2752 | continue; |
2753 | } |
2754 | else if (gimple_plf (stmt, plf: GF_PLF_2) |
2755 | && is_gimple_assign (gs: stmt)) |
2756 | { |
2757 | tree lhs = gimple_assign_lhs (gs: stmt); |
2758 | tree mask; |
2759 | gimple *new_stmt; |
2760 | gimple_seq stmts = NULL; |
2761 | machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
2762 | /* We checked before setting GF_PLF_2 that an equivalent |
2763 | integer mode exists. */ |
2764 | int bitsize = GET_MODE_BITSIZE (mode).to_constant (); |
2765 | if (!vect_sizes.is_empty () |
2766 | && (index = mask_exists (size: bitsize, vec: vect_sizes)) != -1) |
2767 | /* Use created mask. */ |
2768 | mask = vect_masks[index]; |
2769 | else |
2770 | { |
2771 | if (COMPARISON_CLASS_P (cond)) |
2772 | mask = gimple_build (seq: &stmts, TREE_CODE (cond), |
2773 | boolean_type_node, |
2774 | TREE_OPERAND (cond, 0), |
2775 | TREE_OPERAND (cond, 1)); |
2776 | else |
2777 | mask = cond; |
2778 | |
2779 | if (swap) |
2780 | { |
2781 | tree true_val |
2782 | = constant_boolean_node (true, TREE_TYPE (mask)); |
2783 | mask = gimple_build (seq: &stmts, code: BIT_XOR_EXPR, |
2784 | TREE_TYPE (mask), ops: mask, ops: true_val); |
2785 | } |
2786 | gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); |
2787 | |
2788 | /* Save mask and its size for further use. */ |
2789 | vect_sizes.safe_push (obj: bitsize); |
2790 | vect_masks.safe_push (obj: mask); |
2791 | } |
2792 | if (gimple_assign_single_p (gs: stmt)) |
2793 | new_stmt = predicate_load_or_store (gsi: &gsi, stmt, mask); |
2794 | else |
2795 | new_stmt = predicate_rhs_code (stmt, mask, cond, ssa_names: &ssa_names); |
2796 | |
2797 | gsi_replace (&gsi, new_stmt, true); |
2798 | } |
2799 | else if (((lhs = gimple_assign_lhs (gs: stmt)), true) |
2800 | && (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
2801 | || POINTER_TYPE_P (TREE_TYPE (lhs))) |
2802 | && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)) |
2803 | && arith_code_with_undefined_signed_overflow |
2804 | (gimple_assign_rhs_code (gs: stmt))) |
2805 | rewrite_to_defined_overflow (&gsi); |
2806 | else if (gimple_vdef (g: stmt)) |
2807 | { |
2808 | tree lhs = gimple_assign_lhs (gs: stmt); |
2809 | tree rhs = gimple_assign_rhs1 (gs: stmt); |
2810 | tree type = TREE_TYPE (lhs); |
2811 | |
2812 | lhs = ifc_temp_var (type, expr: unshare_expr (lhs), gsi: &gsi); |
2813 | rhs = ifc_temp_var (type, expr: unshare_expr (rhs), gsi: &gsi); |
2814 | if (swap) |
2815 | std::swap (a&: lhs, b&: rhs); |
2816 | cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true, |
2817 | NULL_TREE, true, GSI_SAME_STMT); |
2818 | rhs = fold_build_cond_expr (type, cond: unshare_expr (cond), rhs, lhs); |
2819 | gimple_assign_set_rhs1 (gs: stmt, rhs: ifc_temp_var (type, expr: rhs, gsi: &gsi)); |
2820 | update_stmt (s: stmt); |
2821 | } |
2822 | |
2823 | if (gimple_plf (stmt: gsi_stmt (i: gsi), plf: GF_PLF_2) |
2824 | && is_gimple_call (gs: gsi_stmt (i: gsi))) |
2825 | { |
2826 | /* Convert functions that have a SIMD clone to IFN_MASK_CALL. |
2827 | This will cause the vectorizer to match the "in branch" |
2828 | clone variants, and serves to build the mask vector |
2829 | in a natural way. */ |
2830 | gcall *call = dyn_cast <gcall *> (p: gsi_stmt (i: gsi)); |
2831 | tree orig_fn = gimple_call_fn (gs: call); |
2832 | int orig_nargs = gimple_call_num_args (gs: call); |
2833 | auto_vec<tree> args; |
2834 | args.safe_push (obj: orig_fn); |
2835 | for (int i = 0; i < orig_nargs; i++) |
2836 | args.safe_push (obj: gimple_call_arg (gs: call, index: i)); |
2837 | args.safe_push (obj: cond); |
2838 | |
2839 | /* Replace the call with a IFN_MASK_CALL that has the extra |
2840 | condition parameter. */ |
2841 | gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL, |
2842 | args); |
2843 | gimple_call_set_lhs (gs: new_call, lhs: gimple_call_lhs (gs: call)); |
2844 | gsi_replace (&gsi, new_call, true); |
2845 | } |
2846 | |
2847 | lhs = gimple_get_lhs (gsi_stmt (i: gsi)); |
2848 | if (lhs && TREE_CODE (lhs) == SSA_NAME) |
2849 | ssa_names.add (k: lhs); |
2850 | gsi_next (i: &gsi); |
2851 | } |
2852 | ssa_names.empty (); |
2853 | } |
2854 | } |
2855 | |
2856 | /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks |
2857 | other than the exit and latch of the LOOP. Also resets the |
2858 | GIMPLE_DEBUG information. */ |
2859 | |
2860 | static void |
2861 | remove_conditions_and_labels (loop_p loop) |
2862 | { |
2863 | gimple_stmt_iterator gsi; |
2864 | unsigned int i; |
2865 | |
2866 | for (i = 0; i < loop->num_nodes; i++) |
2867 | { |
2868 | basic_block bb = ifc_bbs[i]; |
2869 | |
2870 | if (bb_with_exit_edge_p (loop, bb) |
2871 | || bb == loop->latch) |
2872 | continue; |
2873 | |
2874 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); ) |
2875 | switch (gimple_code (g: gsi_stmt (i: gsi))) |
2876 | { |
2877 | case GIMPLE_COND: |
2878 | case GIMPLE_LABEL: |
2879 | gsi_remove (&gsi, true); |
2880 | break; |
2881 | |
2882 | case GIMPLE_DEBUG: |
2883 | /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */ |
2884 | if (gimple_debug_bind_p (s: gsi_stmt (i: gsi))) |
2885 | { |
2886 | gimple_debug_bind_reset_value (dbg: gsi_stmt (i: gsi)); |
2887 | update_stmt (s: gsi_stmt (i: gsi)); |
2888 | } |
2889 | gsi_next (i: &gsi); |
2890 | break; |
2891 | |
2892 | default: |
2893 | gsi_next (i: &gsi); |
2894 | } |
2895 | } |
2896 | } |
2897 | |
2898 | /* Combine all the basic blocks from LOOP into one or two super basic |
2899 | blocks. Replace PHI nodes with conditional modify expressions. |
2900 | LOOP_VERSIONED should be true if we know that the loop was versioned for |
2901 | vectorization. */ |
2902 | |
2903 | static void |
2904 | combine_blocks (class loop *loop, bool loop_versioned) |
2905 | { |
2906 | basic_block bb, exit_bb, merge_target_bb; |
2907 | unsigned int orig_loop_num_nodes = loop->num_nodes; |
2908 | unsigned int i; |
2909 | edge e; |
2910 | edge_iterator ei; |
2911 | |
2912 | remove_conditions_and_labels (loop); |
2913 | insert_gimplified_predicates (loop); |
2914 | predicate_all_scalar_phis (loop, loop_versioned); |
2915 | |
2916 | if (need_to_predicate || need_to_rewrite_undefined) |
2917 | predicate_statements (loop); |
2918 | |
2919 | /* Merge basic blocks. */ |
2920 | exit_bb = NULL; |
2921 | bool *predicated = XNEWVEC (bool, orig_loop_num_nodes); |
2922 | for (i = 0; i < orig_loop_num_nodes; i++) |
2923 | { |
2924 | bb = ifc_bbs[i]; |
2925 | predicated[i] = !is_true_predicate (cond: bb_predicate (bb)); |
2926 | free_bb_predicate (bb); |
2927 | if (bb_with_exit_edge_p (loop, bb)) |
2928 | { |
2929 | gcc_assert (exit_bb == NULL); |
2930 | exit_bb = bb; |
2931 | } |
2932 | } |
2933 | gcc_assert (exit_bb != loop->latch); |
2934 | |
2935 | merge_target_bb = loop->header; |
2936 | |
2937 | /* Get at the virtual def valid for uses starting at the first block |
2938 | we merge into the header. Without a virtual PHI the loop has the |
2939 | same virtual use on all stmts. */ |
2940 | gphi *vphi = get_virtual_phi (loop->header); |
2941 | tree last_vdef = NULL_TREE; |
2942 | if (vphi) |
2943 | { |
2944 | last_vdef = gimple_phi_result (gs: vphi); |
2945 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb: loop->header); |
2946 | ! gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
2947 | if (gimple_vdef (g: gsi_stmt (i: gsi))) |
2948 | last_vdef = gimple_vdef (g: gsi_stmt (i: gsi)); |
2949 | } |
2950 | for (i = 1; i < orig_loop_num_nodes; i++) |
2951 | { |
2952 | gimple_stmt_iterator gsi; |
2953 | gimple_stmt_iterator last; |
2954 | |
2955 | bb = ifc_bbs[i]; |
2956 | |
2957 | if (bb == exit_bb || bb == loop->latch) |
2958 | continue; |
2959 | |
2960 | /* We release virtual PHIs late because we have to propagate them |
2961 | out using the current VUSE. The def might be the one used |
2962 | after the loop. */ |
2963 | vphi = get_virtual_phi (bb); |
2964 | if (vphi) |
2965 | { |
2966 | /* When there's just loads inside the loop a stray virtual |
2967 | PHI merging the uses can appear, update last_vdef from |
2968 | it. */ |
2969 | if (!last_vdef) |
2970 | last_vdef = gimple_phi_arg_def (gs: vphi, index: 0); |
2971 | imm_use_iterator iter; |
2972 | use_operand_p use_p; |
2973 | gimple *use_stmt; |
2974 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) |
2975 | { |
2976 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
2977 | SET_USE (use_p, last_vdef); |
2978 | } |
2979 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi))) |
2980 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1; |
2981 | gsi = gsi_for_stmt (vphi); |
2982 | remove_phi_node (&gsi, true); |
2983 | } |
2984 | |
2985 | /* Make stmts member of loop->header and clear range info from all stmts |
2986 | in BB which is now no longer executed conditional on a predicate we |
2987 | could have derived it from. */ |
2988 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
2989 | { |
2990 | gimple *stmt = gsi_stmt (i: gsi); |
2991 | gimple_set_bb (stmt, merge_target_bb); |
2992 | /* Update virtual operands. */ |
2993 | if (last_vdef) |
2994 | { |
2995 | use_operand_p use_p = ssa_vuse_operand (stmt); |
2996 | if (use_p |
2997 | && USE_FROM_PTR (use_p) != last_vdef) |
2998 | SET_USE (use_p, last_vdef); |
2999 | if (gimple_vdef (g: stmt)) |
3000 | last_vdef = gimple_vdef (g: stmt); |
3001 | } |
3002 | else |
3003 | /* If this is the first load we arrive at update last_vdef |
3004 | so we handle stray PHIs correctly. */ |
3005 | last_vdef = gimple_vuse (g: stmt); |
3006 | if (predicated[i]) |
3007 | { |
3008 | ssa_op_iter i; |
3009 | tree op; |
3010 | FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF) |
3011 | reset_flow_sensitive_info (op); |
3012 | } |
3013 | } |
3014 | |
3015 | /* Update stmt list. */ |
3016 | last = gsi_last_bb (bb: merge_target_bb); |
3017 | gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT); |
3018 | set_bb_seq (bb, NULL); |
3019 | } |
3020 | |
3021 | /* Fixup virtual operands in the exit block. */ |
3022 | if (exit_bb |
3023 | && exit_bb != loop->header) |
3024 | { |
3025 | /* We release virtual PHIs late because we have to propagate them |
3026 | out using the current VUSE. The def might be the one used |
3027 | after the loop. */ |
3028 | vphi = get_virtual_phi (exit_bb); |
3029 | if (vphi) |
3030 | { |
3031 | /* When there's just loads inside the loop a stray virtual |
3032 | PHI merging the uses can appear, update last_vdef from |
3033 | it. */ |
3034 | if (!last_vdef) |
3035 | last_vdef = gimple_phi_arg_def (gs: vphi, index: 0); |
3036 | imm_use_iterator iter; |
3037 | use_operand_p use_p; |
3038 | gimple *use_stmt; |
3039 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) |
3040 | { |
3041 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
3042 | SET_USE (use_p, last_vdef); |
3043 | } |
3044 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi))) |
3045 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1; |
3046 | gimple_stmt_iterator gsi = gsi_for_stmt (vphi); |
3047 | remove_phi_node (&gsi, true); |
3048 | } |
3049 | } |
3050 | |
3051 | /* Now remove all the edges in the loop, except for those from the exit |
3052 | block and delete the blocks we elided. */ |
3053 | for (i = 1; i < orig_loop_num_nodes; i++) |
3054 | { |
3055 | bb = ifc_bbs[i]; |
3056 | |
3057 | for (ei = ei_start (bb->preds); (e = ei_safe_edge (i: ei));) |
3058 | { |
3059 | if (e->src == exit_bb) |
3060 | ei_next (i: &ei); |
3061 | else |
3062 | remove_edge (e); |
3063 | } |
3064 | } |
3065 | for (i = 1; i < orig_loop_num_nodes; i++) |
3066 | { |
3067 | bb = ifc_bbs[i]; |
3068 | |
3069 | if (bb == exit_bb || bb == loop->latch) |
3070 | continue; |
3071 | |
3072 | delete_basic_block (bb); |
3073 | } |
3074 | |
3075 | /* Re-connect the exit block. */ |
3076 | if (exit_bb != NULL) |
3077 | { |
3078 | if (exit_bb != loop->header) |
3079 | { |
3080 | /* Connect this node to loop header. */ |
3081 | make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU); |
3082 | set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header); |
3083 | } |
3084 | |
3085 | /* Redirect non-exit edges to loop->latch. */ |
3086 | FOR_EACH_EDGE (e, ei, exit_bb->succs) |
3087 | { |
3088 | if (!loop_exit_edge_p (loop, e)) |
3089 | redirect_edge_and_branch (e, loop->latch); |
3090 | } |
3091 | set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb); |
3092 | } |
3093 | else |
3094 | { |
3095 | /* If the loop does not have an exit, reconnect header and latch. */ |
3096 | make_edge (loop->header, loop->latch, EDGE_FALLTHRU); |
3097 | set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header); |
3098 | } |
3099 | |
3100 | /* If possible, merge loop header to the block with the exit edge. |
3101 | This reduces the number of basic blocks to two, to please the |
3102 | vectorizer that handles only loops with two nodes. */ |
3103 | if (exit_bb |
3104 | && exit_bb != loop->header) |
3105 | { |
3106 | if (can_merge_blocks_p (loop->header, exit_bb)) |
3107 | merge_blocks (loop->header, exit_bb); |
3108 | } |
3109 | |
3110 | free (ptr: ifc_bbs); |
3111 | ifc_bbs = NULL; |
3112 | free (ptr: predicated); |
3113 | } |
3114 | |
3115 | /* Version LOOP before if-converting it; the original loop |
3116 | will be if-converted, the new copy of the loop will not, |
3117 | and the LOOP_VECTORIZED internal call will be guarding which |
3118 | loop to execute. The vectorizer pass will fold this |
3119 | internal call into either true or false. |
3120 | |
3121 | Note that this function intentionally invalidates profile. Both edges |
3122 | out of LOOP_VECTORIZED must have 100% probability so the profile remains |
3123 | consistent after the condition is folded in the vectorizer. */ |
3124 | |
3125 | static class loop * |
3126 | version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds) |
3127 | { |
3128 | basic_block cond_bb; |
3129 | tree cond = make_ssa_name (boolean_type_node); |
3130 | class loop *new_loop; |
3131 | gimple *g; |
3132 | gimple_stmt_iterator gsi; |
3133 | unsigned int save_length = 0; |
3134 | |
3135 | g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2, |
3136 | build_int_cst (integer_type_node, loop->num), |
3137 | integer_zero_node); |
3138 | gimple_call_set_lhs (gs: g, lhs: cond); |
3139 | |
3140 | void **saved_preds = NULL; |
3141 | if (any_complicated_phi || need_to_predicate) |
3142 | { |
3143 | /* Save BB->aux around loop_version as that uses the same field. */ |
3144 | save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes; |
3145 | saved_preds = XALLOCAVEC (void *, save_length); |
3146 | for (unsigned i = 0; i < save_length; i++) |
3147 | saved_preds[i] = ifc_bbs[i]->aux; |
3148 | } |
3149 | |
3150 | initialize_original_copy_tables (); |
3151 | /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED |
3152 | is re-merged in the vectorizer. */ |
3153 | new_loop = loop_version (loop, cond, &cond_bb, |
3154 | profile_probability::always (), |
3155 | profile_probability::always (), |
3156 | profile_probability::always (), |
3157 | profile_probability::always (), true); |
3158 | free_original_copy_tables (); |
3159 | |
3160 | if (any_complicated_phi || need_to_predicate) |
3161 | for (unsigned i = 0; i < save_length; i++) |
3162 | ifc_bbs[i]->aux = saved_preds[i]; |
3163 | |
3164 | if (new_loop == NULL) |
3165 | return NULL; |
3166 | |
3167 | new_loop->dont_vectorize = true; |
3168 | new_loop->force_vectorize = false; |
3169 | gsi = gsi_last_bb (bb: cond_bb); |
3170 | gimple_call_set_arg (gs: g, index: 1, arg: build_int_cst (integer_type_node, new_loop->num)); |
3171 | if (preds) |
3172 | preds->safe_push (obj: g); |
3173 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
3174 | update_ssa (TODO_update_ssa_no_phi); |
3175 | return new_loop; |
3176 | } |
3177 | |
3178 | /* Return true when LOOP satisfies the follow conditions that will |
3179 | allow it to be recognized by the vectorizer for outer-loop |
3180 | vectorization: |
3181 | - The loop is not the root node of the loop tree. |
3182 | - The loop has exactly one inner loop. |
3183 | - The loop has a single exit. |
3184 | - The loop header has a single successor, which is the inner |
3185 | loop header. |
3186 | - Each of the inner and outer loop latches have a single |
3187 | predecessor. |
3188 | - The loop exit block has a single predecessor, which is the |
3189 | inner loop's exit block. */ |
3190 | |
3191 | static bool |
3192 | versionable_outer_loop_p (class loop *loop) |
3193 | { |
3194 | if (!loop_outer (loop) |
3195 | || loop->dont_vectorize |
3196 | || !loop->inner |
3197 | || loop->inner->next |
3198 | || !single_exit (loop) |
3199 | || !single_succ_p (bb: loop->header) |
3200 | || single_succ (bb: loop->header) != loop->inner->header |
3201 | || !single_pred_p (bb: loop->latch) |
3202 | || !single_pred_p (bb: loop->inner->latch)) |
3203 | return false; |
3204 | |
3205 | basic_block outer_exit = single_pred (bb: loop->latch); |
3206 | basic_block inner_exit = single_pred (bb: loop->inner->latch); |
3207 | |
3208 | if (!single_pred_p (bb: outer_exit) || single_pred (bb: outer_exit) != inner_exit) |
3209 | return false; |
3210 | |
3211 | if (dump_file) |
3212 | fprintf (stream: dump_file, format: "Found vectorizable outer loop for versioning\n" ); |
3213 | |
3214 | return true; |
3215 | } |
3216 | |
3217 | /* Performs splitting of critical edges. Skip splitting and return false |
3218 | if LOOP will not be converted because: |
3219 | |
3220 | - LOOP is not well formed. |
3221 | - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments. |
3222 | |
3223 | Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */ |
3224 | |
3225 | static bool |
3226 | ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv) |
3227 | { |
3228 | basic_block *body; |
3229 | basic_block bb; |
3230 | unsigned int num = loop->num_nodes; |
3231 | unsigned int i; |
3232 | edge e; |
3233 | edge_iterator ei; |
3234 | auto_vec<edge> critical_edges; |
3235 | |
3236 | /* Loop is not well formed. */ |
3237 | if (loop->inner) |
3238 | return false; |
3239 | |
3240 | body = get_loop_body (loop); |
3241 | for (i = 0; i < num; i++) |
3242 | { |
3243 | bb = body[i]; |
3244 | if (!aggressive_if_conv |
3245 | && phi_nodes (bb) |
3246 | && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM) |
3247 | { |
3248 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3249 | fprintf (stream: dump_file, |
3250 | format: "BB %d has complicated PHI with more than %u args.\n" , |
3251 | bb->index, MAX_PHI_ARG_NUM); |
3252 | |
3253 | free (ptr: body); |
3254 | return false; |
3255 | } |
3256 | if (bb == loop->latch || bb_with_exit_edge_p (loop, bb)) |
3257 | continue; |
3258 | |
3259 | /* Skip basic blocks not ending with conditional branch. */ |
3260 | if (!safe_is_a <gcond *> (p: *gsi_last_bb (bb))) |
3261 | continue; |
3262 | |
3263 | FOR_EACH_EDGE (e, ei, bb->succs) |
3264 | if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop) |
3265 | critical_edges.safe_push (obj: e); |
3266 | } |
3267 | free (ptr: body); |
3268 | |
3269 | while (critical_edges.length () > 0) |
3270 | { |
3271 | e = critical_edges.pop (); |
3272 | /* Don't split if bb can be predicated along non-critical edge. */ |
3273 | if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (bb: e->dest)) |
3274 | split_edge (e); |
3275 | } |
3276 | |
3277 | return true; |
3278 | } |
3279 | |
3280 | /* Delete redundant statements produced by predication which prevents |
3281 | loop vectorization. */ |
3282 | |
3283 | static void |
3284 | ifcvt_local_dce (class loop *loop) |
3285 | { |
3286 | gimple *stmt; |
3287 | gimple *stmt1; |
3288 | gimple *phi; |
3289 | gimple_stmt_iterator gsi; |
3290 | auto_vec<gimple *> worklist; |
3291 | enum gimple_code code; |
3292 | use_operand_p use_p; |
3293 | imm_use_iterator imm_iter; |
3294 | |
3295 | /* The loop has a single BB only. */ |
3296 | basic_block bb = loop->header; |
3297 | tree latch_vdef = NULL_TREE; |
3298 | |
3299 | worklist.create (nelems: 64); |
3300 | /* Consider all phi as live statements. */ |
3301 | for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
3302 | { |
3303 | phi = gsi_stmt (i: gsi); |
3304 | gimple_set_plf (stmt: phi, plf: GF_PLF_2, val_p: true); |
3305 | worklist.safe_push (obj: phi); |
3306 | if (virtual_operand_p (op: gimple_phi_result (gs: phi))) |
3307 | latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); |
3308 | } |
3309 | /* Consider load/store statements, CALL and COND as live. */ |
3310 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
3311 | { |
3312 | stmt = gsi_stmt (i: gsi); |
3313 | if (is_gimple_debug (gs: stmt)) |
3314 | { |
3315 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true); |
3316 | continue; |
3317 | } |
3318 | if (gimple_store_p (gs: stmt) || gimple_assign_load_p (stmt)) |
3319 | { |
3320 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true); |
3321 | worklist.safe_push (obj: stmt); |
3322 | continue; |
3323 | } |
3324 | code = gimple_code (g: stmt); |
3325 | if (code == GIMPLE_COND || code == GIMPLE_CALL) |
3326 | { |
3327 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true); |
3328 | worklist.safe_push (obj: stmt); |
3329 | continue; |
3330 | } |
3331 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: false); |
3332 | |
3333 | if (code == GIMPLE_ASSIGN) |
3334 | { |
3335 | tree lhs = gimple_assign_lhs (gs: stmt); |
3336 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) |
3337 | { |
3338 | stmt1 = USE_STMT (use_p); |
3339 | if (!is_gimple_debug (gs: stmt1) && gimple_bb (g: stmt1) != bb) |
3340 | { |
3341 | gimple_set_plf (stmt, plf: GF_PLF_2, val_p: true); |
3342 | worklist.safe_push (obj: stmt); |
3343 | break; |
3344 | } |
3345 | } |
3346 | } |
3347 | } |
3348 | /* Propagate liveness through arguments of live stmt. */ |
3349 | while (worklist.length () > 0) |
3350 | { |
3351 | ssa_op_iter iter; |
3352 | use_operand_p use_p; |
3353 | tree use; |
3354 | |
3355 | stmt = worklist.pop (); |
3356 | FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) |
3357 | { |
3358 | use = USE_FROM_PTR (use_p); |
3359 | if (TREE_CODE (use) != SSA_NAME) |
3360 | continue; |
3361 | stmt1 = SSA_NAME_DEF_STMT (use); |
3362 | if (gimple_bb (g: stmt1) != bb || gimple_plf (stmt: stmt1, plf: GF_PLF_2)) |
3363 | continue; |
3364 | gimple_set_plf (stmt: stmt1, plf: GF_PLF_2, val_p: true); |
3365 | worklist.safe_push (obj: stmt1); |
3366 | } |
3367 | } |
3368 | /* Delete dead statements. */ |
3369 | gsi = gsi_last_bb (bb); |
3370 | while (!gsi_end_p (i: gsi)) |
3371 | { |
3372 | gimple_stmt_iterator gsiprev = gsi; |
3373 | gsi_prev (i: &gsiprev); |
3374 | stmt = gsi_stmt (i: gsi); |
3375 | if (gimple_store_p (gs: stmt) && gimple_vdef (g: stmt)) |
3376 | { |
3377 | tree lhs = gimple_get_lhs (stmt); |
3378 | ao_ref write; |
3379 | ao_ref_init (&write, lhs); |
3380 | |
3381 | if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef) |
3382 | == DSE_STORE_DEAD) |
3383 | delete_dead_or_redundant_assignment (&gsi, "dead" ); |
3384 | gsi = gsiprev; |
3385 | continue; |
3386 | } |
3387 | |
3388 | if (gimple_plf (stmt, plf: GF_PLF_2)) |
3389 | { |
3390 | gsi = gsiprev; |
3391 | continue; |
3392 | } |
3393 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3394 | { |
3395 | fprintf (stream: dump_file, format: "Delete dead stmt in bb#%d\n" , bb->index); |
3396 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
3397 | } |
3398 | gsi_remove (&gsi, true); |
3399 | release_defs (stmt); |
3400 | gsi = gsiprev; |
3401 | } |
3402 | } |
3403 | |
3404 | /* Return true if VALUE is already available on edge PE. */ |
3405 | |
3406 | static bool |
3407 | ifcvt_available_on_edge_p (edge pe, tree value) |
3408 | { |
3409 | if (is_gimple_min_invariant (value)) |
3410 | return true; |
3411 | |
3412 | if (TREE_CODE (value) == SSA_NAME) |
3413 | { |
3414 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value)); |
3415 | if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb)) |
3416 | return true; |
3417 | } |
3418 | |
3419 | return false; |
3420 | } |
3421 | |
3422 | /* Return true if STMT can be hoisted from if-converted loop LOOP to |
3423 | edge PE. */ |
3424 | |
3425 | static bool |
3426 | ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt) |
3427 | { |
3428 | if (auto *call = dyn_cast<gcall *> (p: stmt)) |
3429 | { |
3430 | if (gimple_call_internal_p (gs: call) |
3431 | && internal_fn_mask_index (gimple_call_internal_fn (gs: call)) >= 0) |
3432 | return false; |
3433 | } |
3434 | else if (auto *assign = dyn_cast<gassign *> (p: stmt)) |
3435 | { |
3436 | if (gimple_assign_rhs_code (gs: assign) == COND_EXPR) |
3437 | return false; |
3438 | } |
3439 | else |
3440 | return false; |
3441 | |
3442 | if (gimple_has_side_effects (stmt) |
3443 | || gimple_could_trap_p (stmt) |
3444 | || stmt_could_throw_p (cfun, stmt) |
3445 | || gimple_vdef (g: stmt) |
3446 | || gimple_vuse (g: stmt)) |
3447 | return false; |
3448 | |
3449 | int num_args = gimple_num_args (gs: stmt); |
3450 | if (pe != loop_preheader_edge (loop)) |
3451 | { |
3452 | for (int i = 0; i < num_args; ++i) |
3453 | if (!ifcvt_available_on_edge_p (pe, value: gimple_arg (gs: stmt, i))) |
3454 | return false; |
3455 | } |
3456 | else |
3457 | { |
3458 | for (int i = 0; i < num_args; ++i) |
3459 | if (!expr_invariant_in_loop_p (loop, gimple_arg (gs: stmt, i))) |
3460 | return false; |
3461 | } |
3462 | |
3463 | return true; |
3464 | } |
3465 | |
3466 | /* Hoist invariant statements from LOOP to edge PE. */ |
3467 | |
3468 | static void |
3469 | ifcvt_hoist_invariants (class loop *loop, edge pe) |
3470 | { |
3471 | gimple_stmt_iterator hoist_gsi = {}; |
3472 | unsigned int num_blocks = loop->num_nodes; |
3473 | basic_block *body = get_loop_body (loop); |
3474 | for (unsigned int i = 0; i < num_blocks; ++i) |
3475 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb: body[i]); !gsi_end_p (i: gsi);) |
3476 | { |
3477 | gimple *stmt = gsi_stmt (i: gsi); |
3478 | if (ifcvt_can_hoist (loop, pe, stmt)) |
3479 | { |
3480 | /* Once we've hoisted one statement, insert other statements |
3481 | after it. */ |
3482 | gsi_remove (&gsi, false); |
3483 | if (hoist_gsi.ptr) |
3484 | gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT); |
3485 | else |
3486 | { |
3487 | gsi_insert_on_edge_immediate (pe, stmt); |
3488 | hoist_gsi = gsi_for_stmt (stmt); |
3489 | } |
3490 | continue; |
3491 | } |
3492 | gsi_next (i: &gsi); |
3493 | } |
3494 | free (ptr: body); |
3495 | } |
3496 | |
3497 | /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its |
3498 | type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64 |
3499 | value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR, |
3500 | if not NULL, will hold the tree representing the base struct of this |
3501 | bitfield. */ |
3502 | |
3503 | static tree |
3504 | get_bitfield_rep (gassign *stmt, bool write, tree *bitpos, |
3505 | tree *struct_expr) |
3506 | { |
3507 | tree comp_ref = write ? gimple_assign_lhs (gs: stmt) |
3508 | : gimple_assign_rhs1 (gs: stmt); |
3509 | |
3510 | tree field_decl = TREE_OPERAND (comp_ref, 1); |
3511 | tree ref_offset = component_ref_field_offset (comp_ref); |
3512 | tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl); |
3513 | |
3514 | /* Bail out if the representative is not a suitable type for a scalar |
3515 | register variable. */ |
3516 | if (!is_gimple_reg_type (TREE_TYPE (rep_decl))) |
3517 | return NULL_TREE; |
3518 | |
3519 | /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's |
3520 | precision. */ |
3521 | unsigned HOST_WIDE_INT bf_prec |
3522 | = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt))); |
3523 | if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0) |
3524 | return NULL_TREE; |
3525 | |
3526 | if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST |
3527 | || TREE_CODE (ref_offset) != INTEGER_CST) |
3528 | { |
3529 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3530 | fprintf (stream: dump_file, format: "\t Bitfield NOT OK to lower," |
3531 | " offset is non-constant.\n" ); |
3532 | return NULL_TREE; |
3533 | } |
3534 | |
3535 | if (struct_expr) |
3536 | *struct_expr = TREE_OPERAND (comp_ref, 0); |
3537 | |
3538 | if (bitpos) |
3539 | { |
3540 | /* To calculate the bitposition of the BITFIELD_REF we have to determine |
3541 | where our bitfield starts in relation to the container REP_DECL. The |
3542 | DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells |
3543 | us how many bytes from the start of the structure there are until the |
3544 | start of the group of bitfield members the FIELD_DECL belongs to, |
3545 | whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that |
3546 | position our actual bitfield member starts. For the container |
3547 | REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell |
3548 | us the distance between the start of the structure and the start of |
3549 | the container, though the first is in bytes and the later other in |
3550 | bits. With this in mind we calculate the bit position of our new |
3551 | BITFIELD_REF by subtracting the number of bits between the start of |
3552 | the structure and the container from the number of bits from the start |
3553 | of the structure and the actual bitfield member. */ |
3554 | tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype, |
3555 | ref_offset, |
3556 | build_int_cst (bitsizetype, BITS_PER_UNIT)); |
3557 | bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos, |
3558 | DECL_FIELD_BIT_OFFSET (field_decl)); |
3559 | tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype, |
3560 | DECL_FIELD_OFFSET (rep_decl), |
3561 | build_int_cst (bitsizetype, BITS_PER_UNIT)); |
3562 | rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos, |
3563 | DECL_FIELD_BIT_OFFSET (rep_decl)); |
3564 | |
3565 | *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos); |
3566 | } |
3567 | |
3568 | return rep_decl; |
3569 | |
3570 | } |
3571 | |
3572 | /* Lowers the bitfield described by DATA. |
3573 | For a write like: |
3574 | |
3575 | struct.bf = _1; |
3576 | |
3577 | lower to: |
3578 | |
3579 | __ifc_1 = struct.<representative>; |
3580 | __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos); |
3581 | struct.<representative> = __ifc_2; |
3582 | |
3583 | For a read: |
3584 | |
3585 | _1 = struct.bf; |
3586 | |
3587 | lower to: |
3588 | |
3589 | __ifc_1 = struct.<representative>; |
3590 | _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos); |
3591 | |
3592 | where representative is a legal load that contains the bitfield value, |
3593 | bitsize is the size of the bitfield and bitpos the offset to the start of |
3594 | the bitfield within the representative. */ |
3595 | |
3596 | static void |
3597 | lower_bitfield (gassign *stmt, bool write) |
3598 | { |
3599 | tree struct_expr; |
3600 | tree bitpos; |
3601 | tree rep_decl = get_bitfield_rep (stmt, write, bitpos: &bitpos, struct_expr: &struct_expr); |
3602 | tree rep_type = TREE_TYPE (rep_decl); |
3603 | tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt)); |
3604 | |
3605 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
3606 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3607 | { |
3608 | fprintf (stream: dump_file, format: "Lowering:\n" ); |
3609 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
3610 | fprintf (stream: dump_file, format: "to:\n" ); |
3611 | } |
3612 | |
3613 | /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's |
3614 | defining SSA_NAME. */ |
3615 | tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl, |
3616 | NULL_TREE); |
3617 | tree new_val = ifc_temp_var (type: rep_type, expr: rep_comp_ref, gsi: &gsi); |
3618 | |
3619 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3620 | print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM); |
3621 | |
3622 | if (write) |
3623 | { |
3624 | new_val = ifc_temp_var (type: rep_type, |
3625 | expr: build3 (BIT_INSERT_EXPR, rep_type, new_val, |
3626 | unshare_expr (gimple_assign_rhs1 (gs: stmt)), |
3627 | bitpos), gsi: &gsi); |
3628 | |
3629 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3630 | print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM); |
3631 | |
3632 | gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref), |
3633 | new_val); |
3634 | gimple_move_vops (new_stmt, stmt); |
3635 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
3636 | |
3637 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3638 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); |
3639 | } |
3640 | else |
3641 | { |
3642 | tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val, |
3643 | build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)), |
3644 | bitpos); |
3645 | new_val = ifc_temp_var (type: bf_type, expr: bfr, gsi: &gsi); |
3646 | |
3647 | gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (gs: stmt), |
3648 | new_val); |
3649 | gimple_move_vops (new_stmt, stmt); |
3650 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
3651 | |
3652 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3653 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); |
3654 | } |
3655 | |
3656 | gsi_remove (&gsi, true); |
3657 | } |
3658 | |
3659 | /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER |
3660 | with data structures representing these bitfields. */ |
3661 | |
3662 | static bool |
3663 | bitfields_to_lower_p (class loop *loop, |
3664 | vec <gassign *> &reads_to_lower, |
3665 | vec <gassign *> &writes_to_lower) |
3666 | { |
3667 | gimple_stmt_iterator gsi; |
3668 | |
3669 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3670 | { |
3671 | fprintf (stream: dump_file, format: "Analyzing loop %d for bitfields:\n" , loop->num); |
3672 | } |
3673 | |
3674 | for (unsigned i = 0; i < loop->num_nodes; ++i) |
3675 | { |
3676 | basic_block bb = ifc_bbs[i]; |
3677 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
3678 | { |
3679 | gassign *stmt = dyn_cast<gassign*> (p: gsi_stmt (i: gsi)); |
3680 | if (!stmt) |
3681 | continue; |
3682 | |
3683 | tree op = gimple_assign_lhs (gs: stmt); |
3684 | bool write = TREE_CODE (op) == COMPONENT_REF; |
3685 | |
3686 | if (!write) |
3687 | op = gimple_assign_rhs1 (gs: stmt); |
3688 | |
3689 | if (TREE_CODE (op) != COMPONENT_REF) |
3690 | continue; |
3691 | |
3692 | if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1))) |
3693 | { |
3694 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3695 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
3696 | |
3697 | if (!INTEGRAL_TYPE_P (TREE_TYPE (op))) |
3698 | { |
3699 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3700 | fprintf (stream: dump_file, format: "\t Bitfield NO OK to lower," |
3701 | " field type is not Integral.\n" ); |
3702 | return false; |
3703 | } |
3704 | |
3705 | if (!get_bitfield_rep (stmt, write, NULL, NULL)) |
3706 | { |
3707 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3708 | fprintf (stream: dump_file, format: "\t Bitfield NOT OK to lower," |
3709 | " representative is BLKmode.\n" ); |
3710 | return false; |
3711 | } |
3712 | |
3713 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3714 | fprintf (stream: dump_file, format: "\tBitfield OK to lower.\n" ); |
3715 | if (write) |
3716 | writes_to_lower.safe_push (obj: stmt); |
3717 | else |
3718 | reads_to_lower.safe_push (obj: stmt); |
3719 | } |
3720 | } |
3721 | } |
3722 | return !reads_to_lower.is_empty () || !writes_to_lower.is_empty (); |
3723 | } |
3724 | |
3725 | |
3726 | /* If-convert LOOP when it is legal. For the moment this pass has no |
3727 | profitability analysis. Returns non-zero todo flags when something |
3728 | changed. */ |
3729 | |
3730 | unsigned int |
3731 | tree_if_conversion (class loop *loop, vec<gimple *> *preds) |
3732 | { |
3733 | unsigned int todo = 0; |
3734 | bool aggressive_if_conv; |
3735 | class loop *rloop; |
3736 | auto_vec <gassign *, 4> reads_to_lower; |
3737 | auto_vec <gassign *, 4> writes_to_lower; |
3738 | bitmap exit_bbs; |
3739 | edge pe; |
3740 | auto_vec<data_reference_p, 10> refs; |
3741 | bool loop_versioned; |
3742 | |
3743 | again: |
3744 | rloop = NULL; |
3745 | ifc_bbs = NULL; |
3746 | need_to_lower_bitfields = false; |
3747 | need_to_ifcvt = false; |
3748 | need_to_predicate = false; |
3749 | need_to_rewrite_undefined = false; |
3750 | any_complicated_phi = false; |
3751 | loop_versioned = false; |
3752 | |
3753 | /* Apply more aggressive if-conversion when loop or its outer loop were |
3754 | marked with simd pragma. When that's the case, we try to if-convert |
3755 | loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */ |
3756 | aggressive_if_conv = loop->force_vectorize; |
3757 | if (!aggressive_if_conv) |
3758 | { |
3759 | class loop *outer_loop = loop_outer (loop); |
3760 | if (outer_loop && outer_loop->force_vectorize) |
3761 | aggressive_if_conv = true; |
3762 | } |
3763 | |
3764 | /* If there are more than two BBs in the loop then there is at least one if |
3765 | to convert. */ |
3766 | if (loop->num_nodes > 2 |
3767 | && !ifcvt_split_critical_edges (loop, aggressive_if_conv)) |
3768 | goto cleanup; |
3769 | |
3770 | ifc_bbs = get_loop_body_in_if_conv_order (loop); |
3771 | if (!ifc_bbs) |
3772 | { |
3773 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3774 | fprintf (stream: dump_file, format: "Irreducible loop\n" ); |
3775 | goto cleanup; |
3776 | } |
3777 | |
3778 | if (find_data_references_in_loop (loop, &refs) == chrec_dont_know) |
3779 | goto cleanup; |
3780 | |
3781 | if (loop->num_nodes > 2) |
3782 | { |
3783 | /* More than one loop exit is too much to handle. */ |
3784 | if (!single_exit (loop)) |
3785 | { |
3786 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3787 | fprintf (stream: dump_file, format: "Can not ifcvt due to multiple exits\n" ); |
3788 | } |
3789 | else |
3790 | { |
3791 | need_to_ifcvt = true; |
3792 | |
3793 | if (!if_convertible_loop_p (loop, refs: &refs) |
3794 | || !dbg_cnt (index: if_conversion_tree)) |
3795 | goto cleanup; |
3796 | |
3797 | if ((need_to_predicate || any_complicated_phi) |
3798 | && ((!flag_tree_loop_vectorize && !loop->force_vectorize) |
3799 | || loop->dont_vectorize)) |
3800 | goto cleanup; |
3801 | } |
3802 | } |
3803 | |
3804 | if ((flag_tree_loop_vectorize || loop->force_vectorize) |
3805 | && !loop->dont_vectorize) |
3806 | need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower, |
3807 | writes_to_lower); |
3808 | |
3809 | if (!need_to_ifcvt && !need_to_lower_bitfields) |
3810 | goto cleanup; |
3811 | |
3812 | /* The edge to insert invariant stmts on. */ |
3813 | pe = loop_preheader_edge (loop); |
3814 | |
3815 | /* Since we have no cost model, always version loops unless the user |
3816 | specified -ftree-loop-if-convert or unless versioning is required. |
3817 | Either version this loop, or if the pattern is right for outer-loop |
3818 | vectorization, version the outer loop. In the latter case we will |
3819 | still if-convert the original inner loop. */ |
3820 | if (need_to_lower_bitfields |
3821 | || need_to_predicate |
3822 | || any_complicated_phi |
3823 | || flag_tree_loop_if_convert != 1) |
3824 | { |
3825 | class loop *vloop |
3826 | = (versionable_outer_loop_p (loop: loop_outer (loop)) |
3827 | ? loop_outer (loop) : loop); |
3828 | class loop *nloop = version_loop_for_if_conversion (loop: vloop, preds); |
3829 | if (nloop == NULL) |
3830 | goto cleanup; |
3831 | if (vloop != loop) |
3832 | { |
3833 | /* If versionable_outer_loop_p decided to version the |
3834 | outer loop, version also the inner loop of the non-vectorized |
3835 | loop copy. So we transform: |
3836 | loop1 |
3837 | loop2 |
3838 | into: |
3839 | if (LOOP_VECTORIZED (1, 3)) |
3840 | { |
3841 | loop1 |
3842 | loop2 |
3843 | } |
3844 | else |
3845 | loop3 (copy of loop1) |
3846 | if (LOOP_VECTORIZED (4, 5)) |
3847 | loop4 (copy of loop2) |
3848 | else |
3849 | loop5 (copy of loop4) */ |
3850 | gcc_assert (nloop->inner && nloop->inner->next == NULL); |
3851 | rloop = nloop->inner; |
3852 | } |
3853 | else |
3854 | /* If we versioned loop then make sure to insert invariant |
3855 | stmts before the .LOOP_VECTORIZED check since the vectorizer |
3856 | will re-use that for things like runtime alias versioning |
3857 | whose condition can end up using those invariants. */ |
3858 | pe = single_pred_edge (bb: gimple_bb (g: preds->last ())); |
3859 | |
3860 | loop_versioned = true; |
3861 | } |
3862 | |
3863 | if (need_to_lower_bitfields) |
3864 | { |
3865 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3866 | { |
3867 | fprintf (stream: dump_file, format: "-------------------------\n" ); |
3868 | fprintf (stream: dump_file, format: "Start lowering bitfields\n" ); |
3869 | } |
3870 | while (!reads_to_lower.is_empty ()) |
3871 | lower_bitfield (stmt: reads_to_lower.pop (), write: false); |
3872 | while (!writes_to_lower.is_empty ()) |
3873 | lower_bitfield (stmt: writes_to_lower.pop (), write: true); |
3874 | |
3875 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3876 | { |
3877 | fprintf (stream: dump_file, format: "Done lowering bitfields\n" ); |
3878 | fprintf (stream: dump_file, format: "-------------------------\n" ); |
3879 | } |
3880 | } |
3881 | if (need_to_ifcvt) |
3882 | { |
3883 | /* Before we rewrite edges we'll record their original position in the |
3884 | edge map such that we can map the edges between the ifcvt and the |
3885 | non-ifcvt loop during peeling. */ |
3886 | uintptr_t idx = 0; |
3887 | for (edge exit : get_loop_exit_edges (loop)) |
3888 | exit->aux = (void*)idx++; |
3889 | |
3890 | /* Now all statements are if-convertible. Combine all the basic |
3891 | blocks into one huge basic block doing the if-conversion |
3892 | on-the-fly. */ |
3893 | combine_blocks (loop, loop_versioned); |
3894 | } |
3895 | |
3896 | /* Perform local CSE, this esp. helps the vectorizer analysis if loads |
3897 | and stores are involved. CSE only the loop body, not the entry |
3898 | PHIs, those are to be kept in sync with the non-if-converted copy. |
3899 | ??? We'll still keep dead stores though. */ |
3900 | exit_bbs = BITMAP_ALLOC (NULL); |
3901 | for (edge exit : get_loop_exit_edges (loop)) |
3902 | bitmap_set_bit (exit_bbs, exit->dest->index); |
3903 | bitmap_set_bit (exit_bbs, loop->latch->index); |
3904 | |
3905 | std::pair <tree, tree> *name_pair; |
3906 | unsigned ssa_names_idx; |
3907 | FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair) |
3908 | replace_uses_by (name_pair->first, name_pair->second); |
3909 | redundant_ssa_names.release (); |
3910 | |
3911 | todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); |
3912 | |
3913 | /* Delete dead predicate computations. */ |
3914 | ifcvt_local_dce (loop); |
3915 | BITMAP_FREE (exit_bbs); |
3916 | |
3917 | ifcvt_hoist_invariants (loop, pe); |
3918 | |
3919 | todo |= TODO_cleanup_cfg; |
3920 | |
3921 | cleanup: |
3922 | data_reference_p dr; |
3923 | unsigned int i; |
3924 | for (i = 0; refs.iterate (ix: i, ptr: &dr); i++) |
3925 | { |
3926 | free (ptr: dr->aux); |
3927 | free_data_ref (dr); |
3928 | } |
3929 | refs.truncate (size: 0); |
3930 | |
3931 | if (ifc_bbs) |
3932 | { |
3933 | unsigned int i; |
3934 | |
3935 | for (i = 0; i < loop->num_nodes; i++) |
3936 | free_bb_predicate (bb: ifc_bbs[i]); |
3937 | |
3938 | free (ptr: ifc_bbs); |
3939 | ifc_bbs = NULL; |
3940 | } |
3941 | if (rloop != NULL) |
3942 | { |
3943 | loop = rloop; |
3944 | reads_to_lower.truncate (size: 0); |
3945 | writes_to_lower.truncate (size: 0); |
3946 | goto again; |
3947 | } |
3948 | |
3949 | return todo; |
3950 | } |
3951 | |
3952 | /* Tree if-conversion pass management. */ |
3953 | |
3954 | namespace { |
3955 | |
3956 | const pass_data pass_data_if_conversion = |
3957 | { |
3958 | .type: GIMPLE_PASS, /* type */ |
3959 | .name: "ifcvt" , /* name */ |
3960 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
3961 | .tv_id: TV_TREE_LOOP_IFCVT, /* tv_id */ |
3962 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
3963 | .properties_provided: 0, /* properties_provided */ |
3964 | .properties_destroyed: 0, /* properties_destroyed */ |
3965 | .todo_flags_start: 0, /* todo_flags_start */ |
3966 | .todo_flags_finish: 0, /* todo_flags_finish */ |
3967 | }; |
3968 | |
3969 | class pass_if_conversion : public gimple_opt_pass |
3970 | { |
3971 | public: |
3972 | pass_if_conversion (gcc::context *ctxt) |
3973 | : gimple_opt_pass (pass_data_if_conversion, ctxt) |
3974 | {} |
3975 | |
3976 | /* opt_pass methods: */ |
3977 | bool gate (function *) final override; |
3978 | unsigned int execute (function *) final override; |
3979 | |
3980 | }; // class pass_if_conversion |
3981 | |
3982 | bool |
3983 | pass_if_conversion::gate (function *fun) |
3984 | { |
3985 | return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops) |
3986 | && flag_tree_loop_if_convert != 0) |
3987 | || flag_tree_loop_if_convert == 1); |
3988 | } |
3989 | |
3990 | unsigned int |
3991 | pass_if_conversion::execute (function *fun) |
3992 | { |
3993 | unsigned todo = 0; |
3994 | |
3995 | if (number_of_loops (fn: fun) <= 1) |
3996 | return 0; |
3997 | |
3998 | auto_vec<gimple *> preds; |
3999 | for (auto loop : loops_list (cfun, 0)) |
4000 | if (flag_tree_loop_if_convert == 1 |
4001 | || ((flag_tree_loop_vectorize || loop->force_vectorize) |
4002 | && !loop->dont_vectorize)) |
4003 | todo |= tree_if_conversion (loop, preds: &preds); |
4004 | |
4005 | if (todo) |
4006 | { |
4007 | free_numbers_of_iterations_estimates (fun); |
4008 | scev_reset (); |
4009 | } |
4010 | |
4011 | if (flag_checking) |
4012 | { |
4013 | basic_block bb; |
4014 | FOR_EACH_BB_FN (bb, fun) |
4015 | gcc_assert (!bb->aux); |
4016 | } |
4017 | |
4018 | /* Perform IL update now, it might elide some loops. */ |
4019 | if (todo & TODO_cleanup_cfg) |
4020 | { |
4021 | cleanup_tree_cfg (); |
4022 | if (need_ssa_update_p (fun)) |
4023 | todo |= TODO_update_ssa; |
4024 | } |
4025 | if (todo & TODO_update_ssa_any) |
4026 | update_ssa (todo & TODO_update_ssa_any); |
4027 | |
4028 | /* If if-conversion elided the loop fall back to the original one. */ |
4029 | for (unsigned i = 0; i < preds.length (); ++i) |
4030 | { |
4031 | gimple *g = preds[i]; |
4032 | if (!gimple_bb (g)) |
4033 | continue; |
4034 | unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (gs: g, index: 0)); |
4035 | unsigned orig_loop = tree_to_uhwi (gimple_call_arg (gs: g, index: 1)); |
4036 | if (!get_loop (fn: fun, num: ifcvt_loop) || !get_loop (fn: fun, num: orig_loop)) |
4037 | { |
4038 | if (dump_file) |
4039 | fprintf (stream: dump_file, format: "If-converted loop vanished\n" ); |
4040 | fold_loop_internal_call (g, boolean_false_node); |
4041 | } |
4042 | } |
4043 | |
4044 | return 0; |
4045 | } |
4046 | |
4047 | } // anon namespace |
4048 | |
4049 | gimple_opt_pass * |
4050 | make_pass_if_conversion (gcc::context *ctxt) |
4051 | { |
4052 | return new pass_if_conversion (ctxt); |
4053 | } |
4054 | |