1 | /* Optimization of PHI nodes by converting them into straightline code. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "backend.h" |
24 | #include "insn-codes.h" |
25 | #include "rtl.h" |
26 | #include "tree.h" |
27 | #include "gimple.h" |
28 | #include "cfghooks.h" |
29 | #include "tree-pass.h" |
30 | #include "ssa.h" |
31 | #include "tree-ssa.h" |
32 | #include "optabs-tree.h" |
33 | #include "insn-config.h" |
34 | #include "gimple-pretty-print.h" |
35 | #include "fold-const.h" |
36 | #include "stor-layout.h" |
37 | #include "cfganal.h" |
38 | #include "gimplify.h" |
39 | #include "gimple-iterator.h" |
40 | #include "gimplify-me.h" |
41 | #include "tree-cfg.h" |
42 | #include "tree-dfa.h" |
43 | #include "domwalk.h" |
44 | #include "cfgloop.h" |
45 | #include "tree-data-ref.h" |
46 | #include "tree-scalar-evolution.h" |
47 | #include "tree-inline.h" |
48 | #include "case-cfn-macros.h" |
49 | #include "tree-eh.h" |
50 | #include "gimple-fold.h" |
51 | #include "internal-fn.h" |
52 | #include "gimple-range.h" |
53 | #include "gimple-match.h" |
54 | #include "dbgcnt.h" |
55 | #include "tree-ssa-propagate.h" |
56 | #include "tree-ssa-dce.h" |
57 | |
58 | /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ |
59 | |
60 | static gphi * |
61 | single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1) |
62 | { |
63 | gimple_stmt_iterator i; |
64 | gphi *phi = NULL; |
65 | if (gimple_seq_singleton_p (seq)) |
66 | { |
67 | phi = as_a <gphi *> (p: gsi_stmt (i: gsi_start (seq))); |
68 | /* Never return virtual phis. */ |
69 | if (virtual_operand_p (op: gimple_phi_result (gs: phi))) |
70 | return NULL; |
71 | return phi; |
72 | } |
73 | for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (i: &i)) |
74 | { |
75 | gphi *p = as_a <gphi *> (p: gsi_stmt (i)); |
76 | /* If the PHI arguments are equal then we can skip this PHI. */ |
77 | if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (gs: p, index: e0->dest_idx), |
78 | gimple_phi_arg_def (gs: p, index: e1->dest_idx))) |
79 | continue; |
80 | |
81 | /* Punt on virtual phis with different arguments from the edges. */ |
82 | if (virtual_operand_p (op: gimple_phi_result (gs: p))) |
83 | return NULL; |
84 | |
85 | /* If we already have a PHI that has the two edge arguments are |
86 | different, then return it is not a singleton for these PHIs. */ |
87 | if (phi) |
88 | return NULL; |
89 | |
90 | phi = p; |
91 | } |
92 | return phi; |
93 | } |
94 | |
95 | /* Replace PHI node element whose edge is E in block BB with variable NEW. |
96 | Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK |
97 | is known to have two edges, one of which must reach BB). */ |
98 | |
99 | static void |
100 | replace_phi_edge_with_variable (basic_block cond_block, |
101 | edge e, gphi *phi, tree new_tree, |
102 | bitmap dce_ssa_names = nullptr) |
103 | { |
104 | basic_block bb = gimple_bb (g: phi); |
105 | gimple_stmt_iterator gsi; |
106 | tree phi_result = PHI_RESULT (phi); |
107 | bool deleteboth = false; |
108 | |
109 | /* Duplicate range info if they are the only things setting the target PHI. |
110 | This is needed as later on, the new_tree will be replacing |
111 | The assignement of the PHI. |
112 | For an example: |
113 | bb1: |
114 | _4 = min<a_1, 255> |
115 | goto bb2 |
116 | |
117 | # RANGE [-INF, 255] |
118 | a_3 = PHI<_4(1)> |
119 | bb3: |
120 | |
121 | use(a_3) |
122 | And _4 gets propagated into the use of a_3 and losing the range info. |
123 | This can't be done for more than 2 incoming edges as the propagation |
124 | won't happen. |
125 | The new_tree needs to be defined in the same basic block as the conditional. */ |
126 | if (TREE_CODE (new_tree) == SSA_NAME |
127 | && EDGE_COUNT (gimple_bb (phi)->preds) == 2 |
128 | && INTEGRAL_TYPE_P (TREE_TYPE (phi_result)) |
129 | && !SSA_NAME_RANGE_INFO (new_tree) |
130 | && SSA_NAME_RANGE_INFO (phi_result) |
131 | && gimple_bb (SSA_NAME_DEF_STMT (new_tree)) == cond_block |
132 | && dbg_cnt (index: phiopt_edge_range)) |
133 | duplicate_ssa_name_range_info (dest: new_tree, src: phi_result); |
134 | |
135 | /* Change the PHI argument to new. */ |
136 | SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); |
137 | |
138 | /* Remove the empty basic block. */ |
139 | edge edge_to_remove = NULL, keep_edge = NULL; |
140 | if (EDGE_SUCC (cond_block, 0)->dest == bb) |
141 | { |
142 | edge_to_remove = EDGE_SUCC (cond_block, 1); |
143 | keep_edge = EDGE_SUCC (cond_block, 0); |
144 | } |
145 | else if (EDGE_SUCC (cond_block, 1)->dest == bb) |
146 | { |
147 | edge_to_remove = EDGE_SUCC (cond_block, 0); |
148 | keep_edge = EDGE_SUCC (cond_block, 1); |
149 | } |
150 | else if ((keep_edge = find_edge (cond_block, e->src))) |
151 | { |
152 | basic_block bb1 = EDGE_SUCC (cond_block, 0)->dest; |
153 | basic_block bb2 = EDGE_SUCC (cond_block, 1)->dest; |
154 | if (single_pred_p (bb: bb1) && single_pred_p (bb: bb2) |
155 | && single_succ_p (bb: bb1) && single_succ_p (bb: bb2) |
156 | && empty_block_p (bb1) && empty_block_p (bb2)) |
157 | deleteboth = true; |
158 | } |
159 | else |
160 | gcc_unreachable (); |
161 | |
162 | if (edge_to_remove && EDGE_COUNT (edge_to_remove->dest->preds) == 1) |
163 | { |
164 | e->flags |= EDGE_FALLTHRU; |
165 | e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); |
166 | e->probability = profile_probability::always (); |
167 | delete_basic_block (edge_to_remove->dest); |
168 | |
169 | /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ |
170 | gsi = gsi_last_bb (bb: cond_block); |
171 | gsi_remove (&gsi, true); |
172 | } |
173 | else if (deleteboth) |
174 | { |
175 | basic_block bb1 = EDGE_SUCC (cond_block, 0)->dest; |
176 | basic_block bb2 = EDGE_SUCC (cond_block, 1)->dest; |
177 | |
178 | edge newedge = redirect_edge_and_branch (keep_edge, bb); |
179 | |
180 | /* The new edge should be the same. */ |
181 | gcc_assert (newedge == keep_edge); |
182 | |
183 | keep_edge->flags |= EDGE_FALLTHRU; |
184 | keep_edge->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); |
185 | keep_edge->probability = profile_probability::always (); |
186 | |
187 | /* Copy the edge's phi entry from the old one. */ |
188 | copy_phi_arg_into_existing_phi (e, keep_edge); |
189 | |
190 | /* Delete the old 2 empty basic blocks */ |
191 | delete_basic_block (bb1); |
192 | delete_basic_block (bb2); |
193 | |
194 | /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ |
195 | gsi = gsi_last_bb (bb: cond_block); |
196 | gsi_remove (&gsi, true); |
197 | } |
198 | else |
199 | { |
200 | /* If there are other edges into the middle block make |
201 | CFG cleanup deal with the edge removal to avoid |
202 | updating dominators here in a non-trivial way. */ |
203 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: cond_block)); |
204 | if (keep_edge->flags & EDGE_FALSE_VALUE) |
205 | gimple_cond_make_false (gs: cond); |
206 | else if (keep_edge->flags & EDGE_TRUE_VALUE) |
207 | gimple_cond_make_true (gs: cond); |
208 | } |
209 | |
210 | if (dce_ssa_names) |
211 | simple_dce_from_worklist (dce_ssa_names); |
212 | |
213 | statistics_counter_event (cfun, "Replace PHI with variable" , 1); |
214 | |
215 | if (dump_file && (dump_flags & TDF_DETAILS)) |
216 | fprintf (stream: dump_file, |
217 | format: "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n" , |
218 | cond_block->index, |
219 | bb->index); |
220 | } |
221 | |
222 | /* PR66726: Factor operations out of COND_EXPR. If the arguments of the PHI |
223 | stmt are CONVERT_STMT, factor out the conversion and perform the conversion |
224 | to the result of PHI stmt. COND_STMT is the controlling predicate. |
225 | Return the newly-created PHI, if any. */ |
226 | |
227 | static gphi * |
228 | factor_out_conditional_operation (edge e0, edge e1, gphi *phi, |
229 | tree arg0, tree arg1, gimple *cond_stmt) |
230 | { |
231 | gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL, *new_stmt; |
232 | tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE; |
233 | tree temp, result; |
234 | gphi *newphi; |
235 | gimple_stmt_iterator gsi, gsi_for_def; |
236 | location_t locus = gimple_location (g: phi); |
237 | enum tree_code op_code; |
238 | |
239 | /* Handle only PHI statements with two arguments. TODO: If all |
240 | other arguments to PHI are INTEGER_CST or if their defining |
241 | statement have the same unary operation, we can handle more |
242 | than two arguments too. */ |
243 | if (gimple_phi_num_args (gs: phi) != 2) |
244 | return NULL; |
245 | |
246 | /* First canonicalize to simplify tests. */ |
247 | if (TREE_CODE (arg0) != SSA_NAME) |
248 | { |
249 | std::swap (a&: arg0, b&: arg1); |
250 | std::swap (a&: e0, b&: e1); |
251 | } |
252 | |
253 | if (TREE_CODE (arg0) != SSA_NAME |
254 | || (TREE_CODE (arg1) != SSA_NAME |
255 | && TREE_CODE (arg1) != INTEGER_CST)) |
256 | return NULL; |
257 | |
258 | /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is |
259 | an unary operation. */ |
260 | arg0_def_stmt = SSA_NAME_DEF_STMT (arg0); |
261 | if (!is_gimple_assign (gs: arg0_def_stmt) |
262 | || (gimple_assign_rhs_class (gs: arg0_def_stmt) != GIMPLE_UNARY_RHS |
263 | && gimple_assign_rhs_code (gs: arg0_def_stmt) != VIEW_CONVERT_EXPR)) |
264 | return NULL; |
265 | |
266 | /* Use the RHS as new_arg0. */ |
267 | op_code = gimple_assign_rhs_code (gs: arg0_def_stmt); |
268 | new_arg0 = gimple_assign_rhs1 (gs: arg0_def_stmt); |
269 | if (op_code == VIEW_CONVERT_EXPR) |
270 | { |
271 | new_arg0 = TREE_OPERAND (new_arg0, 0); |
272 | if (!is_gimple_reg_type (TREE_TYPE (new_arg0))) |
273 | return NULL; |
274 | } |
275 | if (TREE_CODE (new_arg0) == SSA_NAME |
276 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_arg0)) |
277 | return NULL; |
278 | |
279 | if (TREE_CODE (arg1) == SSA_NAME) |
280 | { |
281 | /* Check if arg1 is an SSA_NAME and the stmt which defines arg1 |
282 | is an unary operation. */ |
283 | arg1_def_stmt = SSA_NAME_DEF_STMT (arg1); |
284 | if (!is_gimple_assign (gs: arg1_def_stmt) |
285 | || gimple_assign_rhs_code (gs: arg1_def_stmt) != op_code) |
286 | return NULL; |
287 | |
288 | /* Either arg1_def_stmt or arg0_def_stmt should be conditional. */ |
289 | if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: phi), gimple_bb (g: arg0_def_stmt)) |
290 | && dominated_by_p (CDI_DOMINATORS, |
291 | gimple_bb (g: phi), gimple_bb (g: arg1_def_stmt))) |
292 | return NULL; |
293 | |
294 | /* Use the RHS as new_arg1. */ |
295 | new_arg1 = gimple_assign_rhs1 (gs: arg1_def_stmt); |
296 | if (op_code == VIEW_CONVERT_EXPR) |
297 | new_arg1 = TREE_OPERAND (new_arg1, 0); |
298 | if (TREE_CODE (new_arg1) == SSA_NAME |
299 | && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_arg1)) |
300 | return NULL; |
301 | } |
302 | else |
303 | { |
304 | /* TODO: handle more than just casts here. */ |
305 | if (!gimple_assign_cast_p (s: arg0_def_stmt)) |
306 | return NULL; |
307 | |
308 | /* arg0_def_stmt should be conditional. */ |
309 | if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: phi), gimple_bb (g: arg0_def_stmt))) |
310 | return NULL; |
311 | /* If arg1 is an INTEGER_CST, fold it to new type. */ |
312 | if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0)) |
313 | && (int_fits_type_p (arg1, TREE_TYPE (new_arg0)) |
314 | || (TYPE_PRECISION (TREE_TYPE (new_arg0)) |
315 | == TYPE_PRECISION (TREE_TYPE (arg1))))) |
316 | { |
317 | if (gimple_assign_cast_p (s: arg0_def_stmt)) |
318 | { |
319 | /* For the INTEGER_CST case, we are just moving the |
320 | conversion from one place to another, which can often |
321 | hurt as the conversion moves further away from the |
322 | statement that computes the value. So, perform this |
323 | only if new_arg0 is an operand of COND_STMT, or |
324 | if arg0_def_stmt is the only non-debug stmt in |
325 | its basic block, because then it is possible this |
326 | could enable further optimizations (minmax replacement |
327 | etc.). See PR71016. |
328 | Note no-op conversions don't have this issue as |
329 | it will not generate any zero/sign extend in that case. */ |
330 | if ((TYPE_PRECISION (TREE_TYPE (new_arg0)) |
331 | != TYPE_PRECISION (TREE_TYPE (arg1))) |
332 | && new_arg0 != gimple_cond_lhs (gs: cond_stmt) |
333 | && new_arg0 != gimple_cond_rhs (gs: cond_stmt) |
334 | && gimple_bb (g: arg0_def_stmt) == e0->src) |
335 | { |
336 | gsi = gsi_for_stmt (arg0_def_stmt); |
337 | gsi_prev_nondebug (i: &gsi); |
338 | if (!gsi_end_p (i: gsi)) |
339 | { |
340 | if (gassign *assign |
341 | = dyn_cast <gassign *> (p: gsi_stmt (i: gsi))) |
342 | { |
343 | tree lhs = gimple_assign_lhs (gs: assign); |
344 | enum tree_code ass_code |
345 | = gimple_assign_rhs_code (gs: assign); |
346 | if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) |
347 | return NULL; |
348 | if (lhs != gimple_assign_rhs1 (gs: arg0_def_stmt)) |
349 | return NULL; |
350 | gsi_prev_nondebug (i: &gsi); |
351 | if (!gsi_end_p (i: gsi)) |
352 | return NULL; |
353 | } |
354 | else |
355 | return NULL; |
356 | } |
357 | gsi = gsi_for_stmt (arg0_def_stmt); |
358 | gsi_next_nondebug (i: &gsi); |
359 | if (!gsi_end_p (i: gsi)) |
360 | return NULL; |
361 | } |
362 | new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1); |
363 | |
364 | /* Drop the overlow that fold_convert might add. */ |
365 | if (TREE_OVERFLOW (new_arg1)) |
366 | new_arg1 = drop_tree_overflow (new_arg1); |
367 | } |
368 | else |
369 | return NULL; |
370 | } |
371 | else |
372 | return NULL; |
373 | } |
374 | |
375 | /* If arg0/arg1 have > 1 use, then this transformation actually increases |
376 | the number of expressions evaluated at runtime. */ |
377 | if (!has_single_use (var: arg0) |
378 | || (arg1_def_stmt && !has_single_use (var: arg1))) |
379 | return NULL; |
380 | |
381 | /* If types of new_arg0 and new_arg1 are different bailout. */ |
382 | if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) |
383 | return NULL; |
384 | |
385 | /* Create a new PHI stmt. */ |
386 | result = PHI_RESULT (phi); |
387 | temp = make_ssa_name (TREE_TYPE (new_arg0), NULL); |
388 | newphi = create_phi_node (temp, gimple_bb (g: phi)); |
389 | |
390 | if (dump_file && (dump_flags & TDF_DETAILS)) |
391 | { |
392 | fprintf (stream: dump_file, format: "PHI " ); |
393 | print_generic_expr (dump_file, gimple_phi_result (gs: phi)); |
394 | fprintf (stream: dump_file, |
395 | format: " changed to factor operation out from COND_EXPR.\n" ); |
396 | fprintf (stream: dump_file, format: "New stmt with OPERATION that defines " ); |
397 | print_generic_expr (dump_file, result); |
398 | fprintf (stream: dump_file, format: ".\n" ); |
399 | } |
400 | |
401 | /* Remove the old operation(s) that has single use. */ |
402 | gsi_for_def = gsi_for_stmt (arg0_def_stmt); |
403 | gsi_remove (&gsi_for_def, true); |
404 | release_defs (arg0_def_stmt); |
405 | |
406 | if (arg1_def_stmt) |
407 | { |
408 | gsi_for_def = gsi_for_stmt (arg1_def_stmt); |
409 | gsi_remove (&gsi_for_def, true); |
410 | release_defs (arg1_def_stmt); |
411 | } |
412 | |
413 | add_phi_arg (newphi, new_arg0, e0, locus); |
414 | add_phi_arg (newphi, new_arg1, e1, locus); |
415 | |
416 | /* Create the operation stmt and insert it. */ |
417 | if (op_code == VIEW_CONVERT_EXPR) |
418 | { |
419 | temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp); |
420 | new_stmt = gimple_build_assign (result, temp); |
421 | } |
422 | else |
423 | new_stmt = gimple_build_assign (result, op_code, temp); |
424 | gsi = gsi_after_labels (bb: gimple_bb (g: phi)); |
425 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
426 | |
427 | /* Remove the original PHI stmt. */ |
428 | gsi = gsi_for_stmt (phi); |
429 | gsi_remove (&gsi, true); |
430 | |
431 | statistics_counter_event (cfun, "factored out operation" , 1); |
432 | |
433 | return newphi; |
434 | } |
435 | |
436 | |
437 | /* Return TRUE if SEQ/OP pair should be allowed during early phiopt. |
438 | Currently this is to allow MIN/MAX and ABS/NEGATE and constants. */ |
439 | static bool |
440 | phiopt_early_allow (gimple_seq &seq, gimple_match_op &op) |
441 | { |
442 | /* Don't allow functions. */ |
443 | if (!op.code.is_tree_code ()) |
444 | return false; |
445 | tree_code code = (tree_code)op.code; |
446 | |
447 | /* For non-empty sequence, only allow one statement |
448 | except for MIN/MAX, allow max 2 statements, |
449 | each with MIN/MAX. */ |
450 | if (!gimple_seq_empty_p (s: seq)) |
451 | { |
452 | if (code == MIN_EXPR || code == MAX_EXPR) |
453 | { |
454 | if (!gimple_seq_singleton_p (seq)) |
455 | return false; |
456 | |
457 | gimple *stmt = gimple_seq_first_stmt (s: seq); |
458 | /* Only allow assignments. */ |
459 | if (!is_gimple_assign (gs: stmt)) |
460 | return false; |
461 | code = gimple_assign_rhs_code (gs: stmt); |
462 | return code == MIN_EXPR || code == MAX_EXPR; |
463 | } |
464 | /* Check to make sure op was already a SSA_NAME. */ |
465 | if (code != SSA_NAME) |
466 | return false; |
467 | if (!gimple_seq_singleton_p (seq)) |
468 | return false; |
469 | gimple *stmt = gimple_seq_first_stmt (s: seq); |
470 | /* Only allow assignments. */ |
471 | if (!is_gimple_assign (gs: stmt)) |
472 | return false; |
473 | if (gimple_assign_lhs (gs: stmt) != op.ops[0]) |
474 | return false; |
475 | code = gimple_assign_rhs_code (gs: stmt); |
476 | } |
477 | |
478 | switch (code) |
479 | { |
480 | case MIN_EXPR: |
481 | case MAX_EXPR: |
482 | case ABS_EXPR: |
483 | case ABSU_EXPR: |
484 | case NEGATE_EXPR: |
485 | case SSA_NAME: |
486 | return true; |
487 | case INTEGER_CST: |
488 | case REAL_CST: |
489 | case VECTOR_CST: |
490 | case FIXED_CST: |
491 | return true; |
492 | default: |
493 | return false; |
494 | } |
495 | } |
496 | |
497 | /* gimple_simplify_phiopt is like gimple_simplify but designed for PHIOPT. |
498 | Return NULL if nothing can be simplified or the resulting simplified value |
499 | with parts pushed if EARLY_P was true. Also rejects non allowed tree code |
500 | if EARLY_P is set. |
501 | Takes the comparison from COMP_STMT and two args, ARG0 and ARG1 and tries |
502 | to simplify CMP ? ARG0 : ARG1. |
503 | Also try to simplify (!CMP) ? ARG1 : ARG0 if the non-inverse failed. */ |
504 | static tree |
505 | gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt, |
506 | tree arg0, tree arg1, |
507 | gimple_seq *seq) |
508 | { |
509 | gimple_seq seq1 = NULL; |
510 | enum tree_code comp_code = gimple_cond_code (gs: comp_stmt); |
511 | location_t loc = gimple_location (g: comp_stmt); |
512 | tree cmp0 = gimple_cond_lhs (gs: comp_stmt); |
513 | tree cmp1 = gimple_cond_rhs (gs: comp_stmt); |
514 | /* To handle special cases like floating point comparison, it is easier and |
515 | less error-prone to build a tree and gimplify it on the fly though it is |
516 | less efficient. |
517 | Don't use fold_build2 here as that might create (bool)a instead of just |
518 | "a != 0". */ |
519 | tree cond = build2_loc (loc, code: comp_code, boolean_type_node, |
520 | arg0: cmp0, arg1: cmp1); |
521 | |
522 | if (dump_file && (dump_flags & TDF_FOLDING)) |
523 | { |
524 | fprintf (stream: dump_file, format: "\nphiopt match-simplify trying:\n\t" ); |
525 | print_generic_expr (dump_file, cond); |
526 | fprintf (stream: dump_file, format: " ? " ); |
527 | print_generic_expr (dump_file, arg0); |
528 | fprintf (stream: dump_file, format: " : " ); |
529 | print_generic_expr (dump_file, arg1); |
530 | fprintf (stream: dump_file, format: "\n" ); |
531 | } |
532 | |
533 | gimple_match_op op (gimple_match_cond::UNCOND, |
534 | COND_EXPR, type, cond, arg0, arg1); |
535 | |
536 | if (op.resimplify (&seq1, follow_all_ssa_edges)) |
537 | { |
538 | bool allowed = !early_p || phiopt_early_allow (seq&: seq1, op); |
539 | tree result = maybe_push_res_to_seq (&op, &seq1); |
540 | if (dump_file && (dump_flags & TDF_FOLDING)) |
541 | { |
542 | fprintf (stream: dump_file, format: "\nphiopt match-simplify back:\n" ); |
543 | if (seq1) |
544 | print_gimple_seq (dump_file, seq1, 0, TDF_VOPS|TDF_MEMSYMS); |
545 | fprintf (stream: dump_file, format: "result: " ); |
546 | if (result) |
547 | print_generic_expr (dump_file, result); |
548 | else |
549 | fprintf (stream: dump_file, format: " (none)" ); |
550 | fprintf (stream: dump_file, format: "\n" ); |
551 | if (!allowed) |
552 | fprintf (stream: dump_file, format: "rejected because early\n" ); |
553 | } |
554 | /* Early we want only to allow some generated tree codes. */ |
555 | if (allowed && result) |
556 | { |
557 | if (loc != UNKNOWN_LOCATION) |
558 | annotate_all_with_location (seq1, loc); |
559 | gimple_seq_add_seq_without_update (seq, seq1); |
560 | return result; |
561 | } |
562 | } |
563 | gimple_seq_discard (seq1); |
564 | seq1 = NULL; |
565 | |
566 | /* Try the inverted comparison, that is !COMP ? ARG1 : ARG0. */ |
567 | comp_code = invert_tree_comparison (comp_code, HONOR_NANS (cmp0)); |
568 | |
569 | if (comp_code == ERROR_MARK) |
570 | return NULL; |
571 | |
572 | cond = build2_loc (loc, |
573 | code: comp_code, boolean_type_node, |
574 | arg0: cmp0, arg1: cmp1); |
575 | |
576 | if (dump_file && (dump_flags & TDF_FOLDING)) |
577 | { |
578 | fprintf (stream: dump_file, format: "\nphiopt match-simplify trying:\n\t" ); |
579 | print_generic_expr (dump_file, cond); |
580 | fprintf (stream: dump_file, format: " ? " ); |
581 | print_generic_expr (dump_file, arg1); |
582 | fprintf (stream: dump_file, format: " : " ); |
583 | print_generic_expr (dump_file, arg0); |
584 | fprintf (stream: dump_file, format: "\n" ); |
585 | } |
586 | |
587 | gimple_match_op op1 (gimple_match_cond::UNCOND, |
588 | COND_EXPR, type, cond, arg1, arg0); |
589 | |
590 | if (op1.resimplify (&seq1, follow_all_ssa_edges)) |
591 | { |
592 | bool allowed = !early_p || phiopt_early_allow (seq&: seq1, op&: op1); |
593 | tree result = maybe_push_res_to_seq (&op1, &seq1); |
594 | if (dump_file && (dump_flags & TDF_FOLDING)) |
595 | { |
596 | fprintf (stream: dump_file, format: "\nphiopt match-simplify back:\n" ); |
597 | if (seq1) |
598 | print_gimple_seq (dump_file, seq1, 0, TDF_VOPS|TDF_MEMSYMS); |
599 | fprintf (stream: dump_file, format: "result: " ); |
600 | if (result) |
601 | print_generic_expr (dump_file, result); |
602 | else |
603 | fprintf (stream: dump_file, format: " (none)" ); |
604 | fprintf (stream: dump_file, format: "\n" ); |
605 | if (!allowed) |
606 | fprintf (stream: dump_file, format: "rejected because early\n" ); |
607 | } |
608 | /* Early we want only to allow some generated tree codes. */ |
609 | if (allowed && result) |
610 | { |
611 | if (loc != UNKNOWN_LOCATION) |
612 | annotate_all_with_location (seq1, loc); |
613 | gimple_seq_add_seq_without_update (seq, seq1); |
614 | return result; |
615 | } |
616 | } |
617 | gimple_seq_discard (seq1); |
618 | |
619 | return NULL; |
620 | } |
621 | |
622 | /* empty_bb_or_one_feeding_into_p returns true if bb was empty basic block |
623 | or it has one cheap preparation statement that feeds into the PHI |
624 | statement and it sets STMT to that statement. */ |
625 | static bool |
626 | empty_bb_or_one_feeding_into_p (basic_block bb, |
627 | gimple *phi, |
628 | gimple *&stmt) |
629 | { |
630 | stmt = nullptr; |
631 | gimple *stmt_to_move = nullptr; |
632 | tree lhs; |
633 | |
634 | if (empty_block_p (bb)) |
635 | return true; |
636 | |
637 | if (!single_pred_p (bb)) |
638 | return false; |
639 | |
640 | /* The middle bb cannot have phi nodes as we don't |
641 | move those assignments yet. */ |
642 | if (!gimple_seq_empty_p (s: phi_nodes (bb))) |
643 | return false; |
644 | |
645 | gimple_stmt_iterator gsi; |
646 | |
647 | gsi = gsi_start_nondebug_after_labels_bb (bb); |
648 | while (!gsi_end_p (i: gsi)) |
649 | { |
650 | gimple *s = gsi_stmt (i: gsi); |
651 | gsi_next_nondebug (i: &gsi); |
652 | /* Skip over Predict and nop statements. */ |
653 | if (gimple_code (g: s) == GIMPLE_PREDICT |
654 | || gimple_code (g: s) == GIMPLE_NOP) |
655 | continue; |
656 | /* If there is more one statement return false. */ |
657 | if (stmt_to_move) |
658 | return false; |
659 | stmt_to_move = s; |
660 | } |
661 | |
662 | /* The only statement here was a Predict or a nop statement |
663 | so return true. */ |
664 | if (!stmt_to_move) |
665 | return true; |
666 | |
667 | if (gimple_vuse (g: stmt_to_move)) |
668 | return false; |
669 | |
670 | if (gimple_could_trap_p (stmt_to_move) |
671 | || gimple_has_side_effects (stmt_to_move)) |
672 | return false; |
673 | |
674 | ssa_op_iter it; |
675 | tree use; |
676 | FOR_EACH_SSA_TREE_OPERAND (use, stmt_to_move, it, SSA_OP_USE) |
677 | if (ssa_name_maybe_undef_p (var: use)) |
678 | return false; |
679 | |
680 | /* Allow assignments but allow some builtin/internal calls. |
681 | As const calls don't match any of the above, yet they could |
682 | still have some side-effects - they could contain |
683 | gimple_could_trap_p statements, like floating point |
684 | exceptions or integer division by zero. See PR70586. |
685 | FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p |
686 | should handle this. |
687 | Allow some known builtin/internal calls that are known not to |
688 | trap: logical functions (e.g. bswap and bit counting). */ |
689 | if (!is_gimple_assign (gs: stmt_to_move)) |
690 | { |
691 | if (!is_gimple_call (gs: stmt_to_move)) |
692 | return false; |
693 | combined_fn cfn = gimple_call_combined_fn (stmt_to_move); |
694 | switch (cfn) |
695 | { |
696 | default: |
697 | return false; |
698 | case CFN_BUILT_IN_BSWAP16: |
699 | case CFN_BUILT_IN_BSWAP32: |
700 | case CFN_BUILT_IN_BSWAP64: |
701 | case CFN_BUILT_IN_BSWAP128: |
702 | CASE_CFN_FFS: |
703 | CASE_CFN_PARITY: |
704 | CASE_CFN_POPCOUNT: |
705 | CASE_CFN_CLZ: |
706 | CASE_CFN_CTZ: |
707 | case CFN_BUILT_IN_CLRSB: |
708 | case CFN_BUILT_IN_CLRSBL: |
709 | case CFN_BUILT_IN_CLRSBLL: |
710 | lhs = gimple_call_lhs (gs: stmt_to_move); |
711 | break; |
712 | } |
713 | } |
714 | else |
715 | lhs = gimple_assign_lhs (gs: stmt_to_move); |
716 | |
717 | gimple *use_stmt; |
718 | use_operand_p use_p; |
719 | |
720 | /* Allow only a statement which feeds into the other stmt. */ |
721 | if (!lhs || TREE_CODE (lhs) != SSA_NAME |
722 | || !single_imm_use (var: lhs, use_p: &use_p, stmt: &use_stmt) |
723 | || use_stmt != phi) |
724 | return false; |
725 | |
726 | stmt = stmt_to_move; |
727 | return true; |
728 | } |
729 | |
730 | /* Move STMT to before GSI and insert its defining |
731 | name into INSERTED_EXPRS bitmap. */ |
732 | static void |
733 | move_stmt (gimple *stmt, gimple_stmt_iterator *gsi, auto_bitmap &inserted_exprs) |
734 | { |
735 | if (!stmt) |
736 | return; |
737 | if (dump_file && (dump_flags & TDF_DETAILS)) |
738 | { |
739 | fprintf (stream: dump_file, format: "statement un-sinked:\n" ); |
740 | print_gimple_stmt (dump_file, stmt, 0, |
741 | TDF_VOPS|TDF_MEMSYMS); |
742 | } |
743 | |
744 | tree name = gimple_get_lhs (stmt); |
745 | // Mark the name to be renamed if there is one. |
746 | bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (name)); |
747 | gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt); |
748 | gsi_move_before (&gsi1, gsi); |
749 | reset_flow_sensitive_info (name); |
750 | } |
751 | |
752 | /* RAII style class to temporarily remove flow sensitive |
753 | from ssa names defined by a gimple statement. */ |
754 | class auto_flow_sensitive |
755 | { |
756 | public: |
757 | auto_flow_sensitive (gimple *s); |
758 | ~auto_flow_sensitive (); |
759 | private: |
760 | auto_vec<std::pair<tree, flow_sensitive_info_storage>, 2> stack; |
761 | }; |
762 | |
763 | /* Constructor for auto_flow_sensitive. Saves |
764 | off the ssa names' flow sensitive information |
765 | that was defined by gimple statement S and |
766 | resets it to be non-flow based ones. */ |
767 | |
768 | auto_flow_sensitive::auto_flow_sensitive (gimple *s) |
769 | { |
770 | if (!s) |
771 | return; |
772 | ssa_op_iter it; |
773 | tree def; |
774 | FOR_EACH_SSA_TREE_OPERAND (def, s, it, SSA_OP_DEF) |
775 | { |
776 | flow_sensitive_info_storage storage; |
777 | storage.save_and_clear (def); |
778 | stack.safe_push (obj: std::make_pair (x&: def, y&: storage)); |
779 | } |
780 | } |
781 | |
782 | /* Deconstructor, restores the flow sensitive information |
783 | for the SSA names that had been saved off. */ |
784 | |
785 | auto_flow_sensitive::~auto_flow_sensitive () |
786 | { |
787 | for (auto p : stack) |
788 | p.second.restore (p.first); |
789 | } |
790 | |
791 | /* The function match_simplify_replacement does the main work of doing the |
792 | replacement using match and simplify. Return true if the replacement is done. |
793 | Otherwise return false. |
794 | BB is the basic block where the replacement is going to be done on. ARG0 |
795 | is argument 0 from PHI. Likewise for ARG1. */ |
796 | |
797 | static bool |
798 | match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, |
799 | basic_block middle_bb_alt, |
800 | edge e0, edge e1, gphi *phi, |
801 | tree arg0, tree arg1, bool early_p, |
802 | bool threeway_p) |
803 | { |
804 | gimple *stmt; |
805 | gimple_stmt_iterator gsi; |
806 | edge true_edge, false_edge; |
807 | gimple_seq seq = NULL; |
808 | tree result; |
809 | gimple *stmt_to_move = NULL; |
810 | gimple *stmt_to_move_alt = NULL; |
811 | tree arg_true, arg_false; |
812 | |
813 | /* Special case A ? B : B as this will always simplify to B. */ |
814 | if (operand_equal_for_phi_arg_p (arg0, arg1)) |
815 | return false; |
816 | |
817 | /* If the basic block only has a cheap preparation statement, |
818 | allow it and move it once the transformation is done. */ |
819 | if (!empty_bb_or_one_feeding_into_p (bb: middle_bb, phi, stmt&: stmt_to_move)) |
820 | return false; |
821 | |
822 | if (threeway_p |
823 | && middle_bb != middle_bb_alt |
824 | && !empty_bb_or_one_feeding_into_p (bb: middle_bb_alt, phi, |
825 | stmt&: stmt_to_move_alt)) |
826 | return false; |
827 | |
828 | /* At this point we know we have a GIMPLE_COND with two successors. |
829 | One successor is BB, the other successor is an empty block which |
830 | falls through into BB. |
831 | |
832 | There is a single PHI node at the join point (BB). |
833 | |
834 | So, given the condition COND, and the two PHI arguments, match and simplify |
835 | can happen on (COND) ? arg0 : arg1. */ |
836 | |
837 | stmt = last_nondebug_stmt (cond_bb); |
838 | |
839 | /* We need to know which is the true edge and which is the false |
840 | edge so that we know when to invert the condition below. */ |
841 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); |
842 | |
843 | /* Forward the edges over the middle basic block. */ |
844 | if (true_edge->dest == middle_bb) |
845 | true_edge = EDGE_SUCC (true_edge->dest, 0); |
846 | if (false_edge->dest == middle_bb) |
847 | false_edge = EDGE_SUCC (false_edge->dest, 0); |
848 | |
849 | /* When THREEWAY_P then e1 will point to the edge of the final transition |
850 | from middle-bb to end. */ |
851 | if (true_edge == e0) |
852 | { |
853 | if (!threeway_p) |
854 | gcc_assert (false_edge == e1); |
855 | arg_true = arg0; |
856 | arg_false = arg1; |
857 | } |
858 | else |
859 | { |
860 | gcc_assert (false_edge == e0); |
861 | if (!threeway_p) |
862 | gcc_assert (true_edge == e1); |
863 | arg_true = arg1; |
864 | arg_false = arg0; |
865 | } |
866 | |
867 | /* Do not make conditional undefs unconditional. */ |
868 | if ((TREE_CODE (arg_true) == SSA_NAME |
869 | && ssa_name_maybe_undef_p (var: arg_true)) |
870 | || (TREE_CODE (arg_false) == SSA_NAME |
871 | && ssa_name_maybe_undef_p (var: arg_false))) |
872 | return false; |
873 | |
874 | tree type = TREE_TYPE (gimple_phi_result (phi)); |
875 | { |
876 | auto_flow_sensitive s1(stmt_to_move); |
877 | auto_flow_sensitive s_alt(stmt_to_move_alt); |
878 | |
879 | result = gimple_simplify_phiopt (early_p, type, comp_stmt: stmt, |
880 | arg0: arg_true, arg1: arg_false, |
881 | seq: &seq); |
882 | } |
883 | |
884 | if (!result) |
885 | return false; |
886 | if (dump_file && (dump_flags & TDF_FOLDING)) |
887 | fprintf (stream: dump_file, format: "accepted the phiopt match-simplify.\n" ); |
888 | |
889 | auto_bitmap exprs_maybe_dce; |
890 | |
891 | /* Mark the cond statements' lhs/rhs as maybe dce. */ |
892 | if (TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME |
893 | && !SSA_NAME_IS_DEFAULT_DEF (gimple_cond_lhs (stmt))) |
894 | bitmap_set_bit (exprs_maybe_dce, |
895 | SSA_NAME_VERSION (gimple_cond_lhs (stmt))); |
896 | if (TREE_CODE (gimple_cond_rhs (stmt)) == SSA_NAME |
897 | && !SSA_NAME_IS_DEFAULT_DEF (gimple_cond_rhs (stmt))) |
898 | bitmap_set_bit (exprs_maybe_dce, |
899 | SSA_NAME_VERSION (gimple_cond_rhs (stmt))); |
900 | |
901 | gsi = gsi_last_bb (bb: cond_bb); |
902 | /* Insert the sequence generated from gimple_simplify_phiopt. */ |
903 | if (seq) |
904 | { |
905 | // Mark the lhs of the new statements maybe for dce |
906 | gimple_stmt_iterator gsi1 = gsi_start (seq); |
907 | for (; !gsi_end_p (i: gsi1); gsi_next (i: &gsi1)) |
908 | { |
909 | gimple *stmt = gsi_stmt (i: gsi1); |
910 | tree name = gimple_get_lhs (stmt); |
911 | if (name && TREE_CODE (name) == SSA_NAME) |
912 | bitmap_set_bit (exprs_maybe_dce, SSA_NAME_VERSION (name)); |
913 | } |
914 | gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING); |
915 | } |
916 | |
917 | /* If there was a statement to move, move it to right before |
918 | the original conditional. */ |
919 | move_stmt (stmt: stmt_to_move, gsi: &gsi, inserted_exprs&: exprs_maybe_dce); |
920 | move_stmt (stmt: stmt_to_move_alt, gsi: &gsi, inserted_exprs&: exprs_maybe_dce); |
921 | |
922 | replace_phi_edge_with_variable (cond_block: cond_bb, e: e1, phi, new_tree: result, dce_ssa_names: exprs_maybe_dce); |
923 | |
924 | /* Add Statistic here even though replace_phi_edge_with_variable already |
925 | does it as we want to be able to count when match-simplify happens vs |
926 | the others. */ |
927 | statistics_counter_event (cfun, "match-simplify PHI replacement" , 1); |
928 | |
929 | /* Note that we optimized this PHI. */ |
930 | return true; |
931 | } |
932 | |
933 | /* Update *ARG which is defined in STMT so that it contains the |
934 | computed value if that seems profitable. Return true if the |
935 | statement is made dead by that rewriting. */ |
936 | |
937 | static bool |
938 | jump_function_from_stmt (tree *arg, gimple *stmt) |
939 | { |
940 | enum tree_code code = gimple_assign_rhs_code (gs: stmt); |
941 | if (code == ADDR_EXPR) |
942 | { |
943 | /* For arg = &p->i transform it to p, if possible. */ |
944 | tree rhs1 = gimple_assign_rhs1 (gs: stmt); |
945 | poly_int64 offset; |
946 | tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0), |
947 | &offset); |
948 | if (tem |
949 | && TREE_CODE (tem) == MEM_REF |
950 | && known_eq (mem_ref_offset (tem) + offset, 0)) |
951 | { |
952 | *arg = TREE_OPERAND (tem, 0); |
953 | return true; |
954 | } |
955 | } |
956 | /* TODO: Much like IPA-CP jump-functions we want to handle constant |
957 | additions symbolically here, and we'd need to update the comparison |
958 | code that compares the arg + cst tuples in our caller. For now the |
959 | code above exactly handles the VEC_BASE pattern from vec.h. */ |
960 | return false; |
961 | } |
962 | |
963 | /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional |
964 | of the form SSA_NAME NE 0. |
965 | |
966 | If RHS is fed by a simple EQ_EXPR comparison of two values, see if |
967 | the two input values of the EQ_EXPR match arg0 and arg1. |
968 | |
969 | If so update *code and return TRUE. Otherwise return FALSE. */ |
970 | |
971 | static bool |
972 | rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1, |
973 | enum tree_code *code, const_tree rhs) |
974 | { |
975 | /* Obviously if RHS is not an SSA_NAME, we can't look at the defining |
976 | statement. */ |
977 | if (TREE_CODE (rhs) == SSA_NAME) |
978 | { |
979 | gimple *def1 = SSA_NAME_DEF_STMT (rhs); |
980 | |
981 | /* Verify the defining statement has an EQ_EXPR on the RHS. */ |
982 | if (is_gimple_assign (gs: def1) && gimple_assign_rhs_code (gs: def1) == EQ_EXPR) |
983 | { |
984 | /* Finally verify the source operands of the EQ_EXPR are equal |
985 | to arg0 and arg1. */ |
986 | tree op0 = gimple_assign_rhs1 (gs: def1); |
987 | tree op1 = gimple_assign_rhs2 (gs: def1); |
988 | if ((operand_equal_for_phi_arg_p (arg0, op0) |
989 | && operand_equal_for_phi_arg_p (arg1, op1)) |
990 | || (operand_equal_for_phi_arg_p (arg0, op1) |
991 | && operand_equal_for_phi_arg_p (arg1, op0))) |
992 | { |
993 | /* We will perform the optimization. */ |
994 | *code = gimple_assign_rhs_code (gs: def1); |
995 | return true; |
996 | } |
997 | } |
998 | } |
999 | return false; |
1000 | } |
1001 | |
1002 | /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND. |
1003 | |
1004 | Also return TRUE if arg0/arg1 are equal to the source arguments of a |
1005 | an EQ comparison feeding a BIT_AND_EXPR which feeds COND. |
1006 | |
1007 | Return FALSE otherwise. */ |
1008 | |
1009 | static bool |
1010 | operand_equal_for_value_replacement (const_tree arg0, const_tree arg1, |
1011 | enum tree_code *code, gimple *cond) |
1012 | { |
1013 | gimple *def; |
1014 | tree lhs = gimple_cond_lhs (gs: cond); |
1015 | tree rhs = gimple_cond_rhs (gs: cond); |
1016 | |
1017 | if ((operand_equal_for_phi_arg_p (arg0, lhs) |
1018 | && operand_equal_for_phi_arg_p (arg1, rhs)) |
1019 | || (operand_equal_for_phi_arg_p (arg1, lhs) |
1020 | && operand_equal_for_phi_arg_p (arg0, rhs))) |
1021 | return true; |
1022 | |
1023 | /* Now handle more complex case where we have an EQ comparison |
1024 | which feeds a BIT_AND_EXPR which feeds COND. |
1025 | |
1026 | First verify that COND is of the form SSA_NAME NE 0. */ |
1027 | if (*code != NE_EXPR || !integer_zerop (rhs) |
1028 | || TREE_CODE (lhs) != SSA_NAME) |
1029 | return false; |
1030 | |
1031 | /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */ |
1032 | def = SSA_NAME_DEF_STMT (lhs); |
1033 | if (!is_gimple_assign (gs: def) || gimple_assign_rhs_code (gs: def) != BIT_AND_EXPR) |
1034 | return false; |
1035 | |
1036 | /* Now verify arg0/arg1 correspond to the source arguments of an |
1037 | EQ comparison feeding the BIT_AND_EXPR. */ |
1038 | |
1039 | tree tmp = gimple_assign_rhs1 (gs: def); |
1040 | if (rhs_is_fed_for_value_replacement (arg0, arg1, code, rhs: tmp)) |
1041 | return true; |
1042 | |
1043 | tmp = gimple_assign_rhs2 (gs: def); |
1044 | if (rhs_is_fed_for_value_replacement (arg0, arg1, code, rhs: tmp)) |
1045 | return true; |
1046 | |
1047 | return false; |
1048 | } |
1049 | |
1050 | /* Returns true if ARG is a neutral element for operation CODE |
1051 | on the RIGHT side. */ |
1052 | |
1053 | static bool |
1054 | neutral_element_p (tree_code code, tree arg, bool right) |
1055 | { |
1056 | switch (code) |
1057 | { |
1058 | case PLUS_EXPR: |
1059 | case BIT_IOR_EXPR: |
1060 | case BIT_XOR_EXPR: |
1061 | return integer_zerop (arg); |
1062 | |
1063 | case LROTATE_EXPR: |
1064 | case RROTATE_EXPR: |
1065 | case LSHIFT_EXPR: |
1066 | case RSHIFT_EXPR: |
1067 | case MINUS_EXPR: |
1068 | case POINTER_PLUS_EXPR: |
1069 | return right && integer_zerop (arg); |
1070 | |
1071 | case MULT_EXPR: |
1072 | return integer_onep (arg); |
1073 | |
1074 | case TRUNC_DIV_EXPR: |
1075 | case CEIL_DIV_EXPR: |
1076 | case FLOOR_DIV_EXPR: |
1077 | case ROUND_DIV_EXPR: |
1078 | case EXACT_DIV_EXPR: |
1079 | return right && integer_onep (arg); |
1080 | |
1081 | case BIT_AND_EXPR: |
1082 | return integer_all_onesp (arg); |
1083 | |
1084 | default: |
1085 | return false; |
1086 | } |
1087 | } |
1088 | |
1089 | /* Returns true if ARG is an absorbing element for operation CODE. */ |
1090 | |
1091 | static bool |
1092 | absorbing_element_p (tree_code code, tree arg, bool right, tree rval) |
1093 | { |
1094 | switch (code) |
1095 | { |
1096 | case BIT_IOR_EXPR: |
1097 | return integer_all_onesp (arg); |
1098 | |
1099 | case MULT_EXPR: |
1100 | case BIT_AND_EXPR: |
1101 | return integer_zerop (arg); |
1102 | |
1103 | case LSHIFT_EXPR: |
1104 | case RSHIFT_EXPR: |
1105 | case LROTATE_EXPR: |
1106 | case RROTATE_EXPR: |
1107 | return !right && integer_zerop (arg); |
1108 | |
1109 | case TRUNC_DIV_EXPR: |
1110 | case CEIL_DIV_EXPR: |
1111 | case FLOOR_DIV_EXPR: |
1112 | case ROUND_DIV_EXPR: |
1113 | case EXACT_DIV_EXPR: |
1114 | case TRUNC_MOD_EXPR: |
1115 | case CEIL_MOD_EXPR: |
1116 | case FLOOR_MOD_EXPR: |
1117 | case ROUND_MOD_EXPR: |
1118 | return (!right |
1119 | && integer_zerop (arg) |
1120 | && tree_single_nonzero_warnv_p (rval, NULL)); |
1121 | |
1122 | default: |
1123 | return false; |
1124 | } |
1125 | } |
1126 | |
1127 | /* The function value_replacement does the main work of doing the value |
1128 | replacement. Return non-zero if the replacement is done. Otherwise return |
1129 | 0. If we remove the middle basic block, return 2. |
1130 | BB is the basic block where the replacement is going to be done on. ARG0 |
1131 | is argument 0 from the PHI. Likewise for ARG1. */ |
1132 | |
1133 | static int |
1134 | value_replacement (basic_block cond_bb, basic_block middle_bb, |
1135 | edge e0, edge e1, gphi *phi, tree arg0, tree arg1) |
1136 | { |
1137 | gimple_stmt_iterator gsi; |
1138 | edge true_edge, false_edge; |
1139 | enum tree_code code; |
1140 | bool empty_or_with_defined_p = true; |
1141 | |
1142 | /* If the type says honor signed zeros we cannot do this |
1143 | optimization. */ |
1144 | if (HONOR_SIGNED_ZEROS (arg1)) |
1145 | return 0; |
1146 | |
1147 | /* If there is a statement in MIDDLE_BB that defines one of the PHI |
1148 | arguments, then adjust arg0 or arg1. */ |
1149 | gsi = gsi_start_nondebug_after_labels_bb (bb: middle_bb); |
1150 | while (!gsi_end_p (i: gsi)) |
1151 | { |
1152 | gimple *stmt = gsi_stmt (i: gsi); |
1153 | tree lhs; |
1154 | gsi_next_nondebug (i: &gsi); |
1155 | if (!is_gimple_assign (gs: stmt)) |
1156 | { |
1157 | if (gimple_code (g: stmt) != GIMPLE_PREDICT |
1158 | && gimple_code (g: stmt) != GIMPLE_NOP) |
1159 | empty_or_with_defined_p = false; |
1160 | continue; |
1161 | } |
1162 | /* Now try to adjust arg0 or arg1 according to the computation |
1163 | in the statement. */ |
1164 | lhs = gimple_assign_lhs (gs: stmt); |
1165 | if (!(lhs == arg0 |
1166 | && jump_function_from_stmt (arg: &arg0, stmt)) |
1167 | || (lhs == arg1 |
1168 | && jump_function_from_stmt (arg: &arg1, stmt))) |
1169 | empty_or_with_defined_p = false; |
1170 | } |
1171 | |
1172 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: cond_bb)); |
1173 | code = gimple_cond_code (gs: cond); |
1174 | |
1175 | /* This transformation is only valid for equality comparisons. */ |
1176 | if (code != NE_EXPR && code != EQ_EXPR) |
1177 | return 0; |
1178 | |
1179 | /* We need to know which is the true edge and which is the false |
1180 | edge so that we know if have abs or negative abs. */ |
1181 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); |
1182 | |
1183 | /* At this point we know we have a COND_EXPR with two successors. |
1184 | One successor is BB, the other successor is an empty block which |
1185 | falls through into BB. |
1186 | |
1187 | The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR. |
1188 | |
1189 | There is a single PHI node at the join point (BB) with two arguments. |
1190 | |
1191 | We now need to verify that the two arguments in the PHI node match |
1192 | the two arguments to the equality comparison. */ |
1193 | |
1194 | bool equal_p = operand_equal_for_value_replacement (arg0, arg1, code: &code, cond); |
1195 | bool maybe_equal_p = false; |
1196 | if (!equal_p |
1197 | && empty_or_with_defined_p |
1198 | && TREE_CODE (gimple_cond_rhs (cond)) == INTEGER_CST |
1199 | && (operand_equal_for_phi_arg_p (gimple_cond_lhs (gs: cond), arg0) |
1200 | ? TREE_CODE (arg1) == INTEGER_CST |
1201 | : (operand_equal_for_phi_arg_p (gimple_cond_lhs (gs: cond), arg1) |
1202 | && TREE_CODE (arg0) == INTEGER_CST))) |
1203 | maybe_equal_p = true; |
1204 | if (equal_p || maybe_equal_p) |
1205 | { |
1206 | edge e; |
1207 | tree arg; |
1208 | |
1209 | /* For NE_EXPR, we want to build an assignment result = arg where |
1210 | arg is the PHI argument associated with the true edge. For |
1211 | EQ_EXPR we want the PHI argument associated with the false edge. */ |
1212 | e = (code == NE_EXPR ? true_edge : false_edge); |
1213 | |
1214 | /* Unfortunately, E may not reach BB (it may instead have gone to |
1215 | OTHER_BLOCK). If that is the case, then we want the single outgoing |
1216 | edge from OTHER_BLOCK which reaches BB and represents the desired |
1217 | path from COND_BLOCK. */ |
1218 | if (e->dest == middle_bb) |
1219 | e = single_succ_edge (bb: e->dest); |
1220 | |
1221 | /* Now we know the incoming edge to BB that has the argument for the |
1222 | RHS of our new assignment statement. */ |
1223 | if (e0 == e) |
1224 | arg = arg0; |
1225 | else |
1226 | arg = arg1; |
1227 | |
1228 | /* If the middle basic block was empty or is defining the |
1229 | PHI arguments and this is a single phi where the args are different |
1230 | for the edges e0 and e1 then we can remove the middle basic block. */ |
1231 | if (empty_or_with_defined_p |
1232 | && single_non_singleton_phi_for_edges (seq: phi_nodes (bb: gimple_bb (g: phi)), |
1233 | e0, e1) == phi) |
1234 | { |
1235 | use_operand_p use_p; |
1236 | gimple *use_stmt; |
1237 | |
1238 | /* Even if arg0/arg1 isn't equal to second operand of cond, we |
1239 | can optimize away the bb if we can prove it doesn't care whether |
1240 | phi result is arg0/arg1 or second operand of cond. Consider: |
1241 | <bb 2> [local count: 118111600]: |
1242 | if (i_2(D) == 4) |
1243 | goto <bb 4>; [97.00%] |
1244 | else |
1245 | goto <bb 3>; [3.00%] |
1246 | |
1247 | <bb 3> [local count: 3540129]: |
1248 | |
1249 | <bb 4> [local count: 118111600]: |
1250 | # i_6 = PHI <i_2(D)(3), 6(2)> |
1251 | _3 = i_6 != 0; |
1252 | Here, carg is 4, oarg is 6, crhs is 0, and because |
1253 | (4 != 0) == (6 != 0), we don't care if i_6 is 4 or 6, both |
1254 | have the same outcome. So, can can optimize this to: |
1255 | _3 = i_2(D) != 0; |
1256 | If the single imm use of phi result >, >=, < or <=, similarly |
1257 | we can check if both carg and oarg compare the same against |
1258 | crhs using ccode. */ |
1259 | if (maybe_equal_p |
1260 | && TREE_CODE (arg) != INTEGER_CST |
1261 | && single_imm_use (var: gimple_phi_result (gs: phi), use_p: &use_p, stmt: &use_stmt)) |
1262 | { |
1263 | enum tree_code ccode = ERROR_MARK; |
1264 | tree clhs = NULL_TREE, crhs = NULL_TREE; |
1265 | tree carg = gimple_cond_rhs (gs: cond); |
1266 | tree oarg = e0 == e ? arg1 : arg0; |
1267 | if (is_gimple_assign (gs: use_stmt) |
1268 | && (TREE_CODE_CLASS (gimple_assign_rhs_code (use_stmt)) |
1269 | == tcc_comparison)) |
1270 | { |
1271 | ccode = gimple_assign_rhs_code (gs: use_stmt); |
1272 | clhs = gimple_assign_rhs1 (gs: use_stmt); |
1273 | crhs = gimple_assign_rhs2 (gs: use_stmt); |
1274 | } |
1275 | else if (gimple_code (g: use_stmt) == GIMPLE_COND) |
1276 | { |
1277 | ccode = gimple_cond_code (gs: use_stmt); |
1278 | clhs = gimple_cond_lhs (gs: use_stmt); |
1279 | crhs = gimple_cond_rhs (gs: use_stmt); |
1280 | } |
1281 | if (ccode != ERROR_MARK |
1282 | && clhs == gimple_phi_result (gs: phi) |
1283 | && TREE_CODE (crhs) == INTEGER_CST) |
1284 | switch (ccode) |
1285 | { |
1286 | case EQ_EXPR: |
1287 | case NE_EXPR: |
1288 | if (!tree_int_cst_equal (crhs, carg) |
1289 | && !tree_int_cst_equal (crhs, oarg)) |
1290 | equal_p = true; |
1291 | break; |
1292 | case GT_EXPR: |
1293 | if (tree_int_cst_lt (t1: crhs, t2: carg) |
1294 | == tree_int_cst_lt (t1: crhs, t2: oarg)) |
1295 | equal_p = true; |
1296 | break; |
1297 | case GE_EXPR: |
1298 | if (tree_int_cst_le (t1: crhs, t2: carg) |
1299 | == tree_int_cst_le (t1: crhs, t2: oarg)) |
1300 | equal_p = true; |
1301 | break; |
1302 | case LT_EXPR: |
1303 | if (tree_int_cst_lt (t1: carg, t2: crhs) |
1304 | == tree_int_cst_lt (t1: oarg, t2: crhs)) |
1305 | equal_p = true; |
1306 | break; |
1307 | case LE_EXPR: |
1308 | if (tree_int_cst_le (t1: carg, t2: crhs) |
1309 | == tree_int_cst_le (t1: oarg, t2: crhs)) |
1310 | equal_p = true; |
1311 | break; |
1312 | default: |
1313 | break; |
1314 | } |
1315 | if (equal_p) |
1316 | { |
1317 | tree phires = gimple_phi_result (gs: phi); |
1318 | if (SSA_NAME_RANGE_INFO (phires)) |
1319 | { |
1320 | /* After the optimization PHI result can have value |
1321 | which it couldn't have previously. */ |
1322 | int_range_max r; |
1323 | if (get_global_range_query ()->range_of_expr (r, expr: phires, |
1324 | phi)) |
1325 | { |
1326 | wide_int warg = wi::to_wide (t: carg); |
1327 | int_range<2> tmp (TREE_TYPE (carg), warg, warg); |
1328 | r.union_ (tmp); |
1329 | reset_flow_sensitive_info (phires); |
1330 | set_range_info (phires, r); |
1331 | } |
1332 | else |
1333 | reset_flow_sensitive_info (phires); |
1334 | } |
1335 | } |
1336 | if (equal_p && MAY_HAVE_DEBUG_BIND_STMTS) |
1337 | { |
1338 | imm_use_iterator imm_iter; |
1339 | tree phires = gimple_phi_result (gs: phi); |
1340 | tree temp = NULL_TREE; |
1341 | bool reset_p = false; |
1342 | |
1343 | /* Add # DEBUG D#1 => arg != carg ? arg : oarg. */ |
1344 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, phires) |
1345 | { |
1346 | if (!is_gimple_debug (gs: use_stmt)) |
1347 | continue; |
1348 | if (temp == NULL_TREE) |
1349 | { |
1350 | if (!single_pred_p (bb: middle_bb) |
1351 | || EDGE_COUNT (gimple_bb (phi)->preds) != 2) |
1352 | { |
1353 | /* But only if middle_bb has a single |
1354 | predecessor and phi bb has two, otherwise |
1355 | we could use a SSA_NAME not usable in that |
1356 | place or wrong-debug. */ |
1357 | reset_p = true; |
1358 | break; |
1359 | } |
1360 | gimple_stmt_iterator gsi |
1361 | = gsi_after_labels (bb: gimple_bb (g: phi)); |
1362 | tree type = TREE_TYPE (phires); |
1363 | temp = build_debug_expr_decl (type); |
1364 | tree t = build2 (NE_EXPR, boolean_type_node, |
1365 | arg, carg); |
1366 | t = build3 (COND_EXPR, type, t, arg, oarg); |
1367 | gimple *g = gimple_build_debug_bind (temp, t, phi); |
1368 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1369 | } |
1370 | FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) |
1371 | replace_exp (use_p, temp); |
1372 | update_stmt (s: use_stmt); |
1373 | } |
1374 | if (reset_p) |
1375 | reset_debug_uses (phi); |
1376 | } |
1377 | } |
1378 | if (equal_p) |
1379 | { |
1380 | replace_phi_edge_with_variable (cond_block: cond_bb, e: e1, phi, new_tree: arg); |
1381 | /* Note that we optimized this PHI. */ |
1382 | return 2; |
1383 | } |
1384 | } |
1385 | else if (equal_p) |
1386 | { |
1387 | if (!single_pred_p (bb: middle_bb)) |
1388 | return 0; |
1389 | statistics_counter_event (cfun, "Replace PHI with " |
1390 | "variable/value_replacement" , 1); |
1391 | |
1392 | /* Replace the PHI arguments with arg. */ |
1393 | SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); |
1394 | SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); |
1395 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1396 | { |
1397 | fprintf (stream: dump_file, format: "PHI " ); |
1398 | print_generic_expr (dump_file, gimple_phi_result (gs: phi)); |
1399 | fprintf (stream: dump_file, format: " reduced for COND_EXPR in block %d to " , |
1400 | cond_bb->index); |
1401 | print_generic_expr (dump_file, arg); |
1402 | fprintf (stream: dump_file, format: ".\n" ); |
1403 | } |
1404 | return 1; |
1405 | } |
1406 | } |
1407 | |
1408 | if (!single_pred_p (bb: middle_bb)) |
1409 | return 0; |
1410 | |
1411 | /* Now optimize (x != 0) ? x + y : y to just x + y. */ |
1412 | gsi = gsi_last_nondebug_bb (bb: middle_bb); |
1413 | if (gsi_end_p (i: gsi)) |
1414 | return 0; |
1415 | |
1416 | gimple *assign = gsi_stmt (i: gsi); |
1417 | if (!is_gimple_assign (gs: assign) |
1418 | || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) |
1419 | && !POINTER_TYPE_P (TREE_TYPE (arg0)))) |
1420 | return 0; |
1421 | |
1422 | if (gimple_assign_rhs_class (gs: assign) != GIMPLE_BINARY_RHS) |
1423 | { |
1424 | /* If last stmt of the middle_bb is a conversion, handle it like |
1425 | a preparation statement through constant evaluation with |
1426 | checking for UB. */ |
1427 | enum tree_code sc = gimple_assign_rhs_code (gs: assign); |
1428 | if (CONVERT_EXPR_CODE_P (sc)) |
1429 | assign = NULL; |
1430 | else |
1431 | return 0; |
1432 | } |
1433 | |
1434 | /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */ |
1435 | if (!gimple_seq_empty_p (s: phi_nodes (bb: middle_bb))) |
1436 | return 0; |
1437 | |
1438 | /* Allow up to 2 cheap preparation statements that prepare argument |
1439 | for assign, e.g.: |
1440 | if (y_4 != 0) |
1441 | goto <bb 3>; |
1442 | else |
1443 | goto <bb 4>; |
1444 | <bb 3>: |
1445 | _1 = (int) y_4; |
1446 | iftmp.0_6 = x_5(D) r<< _1; |
1447 | <bb 4>: |
1448 | # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)> |
1449 | or: |
1450 | if (y_3(D) == 0) |
1451 | goto <bb 4>; |
1452 | else |
1453 | goto <bb 3>; |
1454 | <bb 3>: |
1455 | y_4 = y_3(D) & 31; |
1456 | _1 = (int) y_4; |
1457 | _6 = x_5(D) r<< _1; |
1458 | <bb 4>: |
1459 | # _2 = PHI <x_5(D)(2), _6(3)> */ |
1460 | gimple *prep_stmt[2] = { NULL, NULL }; |
1461 | int prep_cnt; |
1462 | for (prep_cnt = 0; ; prep_cnt++) |
1463 | { |
1464 | if (prep_cnt || assign) |
1465 | gsi_prev_nondebug (i: &gsi); |
1466 | if (gsi_end_p (i: gsi)) |
1467 | break; |
1468 | |
1469 | gimple *g = gsi_stmt (i: gsi); |
1470 | if (gimple_code (g) == GIMPLE_LABEL) |
1471 | break; |
1472 | |
1473 | if (prep_cnt == 2 || !is_gimple_assign (gs: g)) |
1474 | return 0; |
1475 | |
1476 | tree lhs = gimple_assign_lhs (gs: g); |
1477 | tree rhs1 = gimple_assign_rhs1 (gs: g); |
1478 | use_operand_p use_p; |
1479 | gimple *use_stmt; |
1480 | if (TREE_CODE (lhs) != SSA_NAME |
1481 | || TREE_CODE (rhs1) != SSA_NAME |
1482 | || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
1483 | || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) |
1484 | || !single_imm_use (var: lhs, use_p: &use_p, stmt: &use_stmt) |
1485 | || ((prep_cnt || assign) |
1486 | && use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign))) |
1487 | return 0; |
1488 | switch (gimple_assign_rhs_code (gs: g)) |
1489 | { |
1490 | CASE_CONVERT: |
1491 | break; |
1492 | case PLUS_EXPR: |
1493 | case BIT_AND_EXPR: |
1494 | case BIT_IOR_EXPR: |
1495 | case BIT_XOR_EXPR: |
1496 | if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST) |
1497 | return 0; |
1498 | break; |
1499 | default: |
1500 | return 0; |
1501 | } |
1502 | prep_stmt[prep_cnt] = g; |
1503 | } |
1504 | |
1505 | /* Only transform if it removes the condition. */ |
1506 | if (!single_non_singleton_phi_for_edges (seq: phi_nodes (bb: gimple_bb (g: phi)), e0, e1)) |
1507 | return 0; |
1508 | |
1509 | /* Size-wise, this is always profitable. */ |
1510 | if (optimize_bb_for_speed_p (cond_bb) |
1511 | /* The special case is useless if it has a low probability. */ |
1512 | && profile_status_for_fn (cfun) != PROFILE_ABSENT |
1513 | && EDGE_PRED (middle_bb, 0)->probability < profile_probability::even () |
1514 | /* If assign is cheap, there is no point avoiding it. */ |
1515 | && estimate_num_insns_seq (bb_seq (bb: middle_bb), &eni_time_weights) |
1516 | >= 3 * estimate_num_insns (cond, &eni_time_weights)) |
1517 | return 0; |
1518 | |
1519 | tree cond_lhs = gimple_cond_lhs (gs: cond); |
1520 | tree cond_rhs = gimple_cond_rhs (gs: cond); |
1521 | |
1522 | /* Propagate the cond_rhs constant through preparation stmts, |
1523 | make sure UB isn't invoked while doing that. */ |
1524 | for (int i = prep_cnt - 1; i >= 0; --i) |
1525 | { |
1526 | gimple *g = prep_stmt[i]; |
1527 | tree grhs1 = gimple_assign_rhs1 (gs: g); |
1528 | if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1)) |
1529 | return 0; |
1530 | cond_lhs = gimple_assign_lhs (gs: g); |
1531 | cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs); |
1532 | if (TREE_CODE (cond_rhs) != INTEGER_CST |
1533 | || TREE_OVERFLOW (cond_rhs)) |
1534 | return 0; |
1535 | if (gimple_assign_rhs_class (gs: g) == GIMPLE_BINARY_RHS) |
1536 | { |
1537 | cond_rhs = int_const_binop (gimple_assign_rhs_code (gs: g), cond_rhs, |
1538 | gimple_assign_rhs2 (gs: g)); |
1539 | if (TREE_OVERFLOW (cond_rhs)) |
1540 | return 0; |
1541 | } |
1542 | cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs); |
1543 | if (TREE_CODE (cond_rhs) != INTEGER_CST |
1544 | || TREE_OVERFLOW (cond_rhs)) |
1545 | return 0; |
1546 | } |
1547 | |
1548 | tree lhs, rhs1, rhs2; |
1549 | enum tree_code code_def; |
1550 | if (assign) |
1551 | { |
1552 | lhs = gimple_assign_lhs (gs: assign); |
1553 | rhs1 = gimple_assign_rhs1 (gs: assign); |
1554 | rhs2 = gimple_assign_rhs2 (gs: assign); |
1555 | code_def = gimple_assign_rhs_code (gs: assign); |
1556 | } |
1557 | else |
1558 | { |
1559 | gcc_assert (prep_cnt > 0); |
1560 | lhs = cond_lhs; |
1561 | rhs1 = NULL_TREE; |
1562 | rhs2 = NULL_TREE; |
1563 | code_def = ERROR_MARK; |
1564 | } |
1565 | |
1566 | if (((code == NE_EXPR && e1 == false_edge) |
1567 | || (code == EQ_EXPR && e1 == true_edge)) |
1568 | && arg0 == lhs |
1569 | && ((assign == NULL |
1570 | && operand_equal_for_phi_arg_p (arg1, cond_rhs)) |
1571 | || (assign |
1572 | && arg1 == rhs1 |
1573 | && operand_equal_for_phi_arg_p (rhs2, cond_lhs) |
1574 | && neutral_element_p (code: code_def, arg: cond_rhs, right: true)) |
1575 | || (assign |
1576 | && arg1 == rhs2 |
1577 | && operand_equal_for_phi_arg_p (rhs1, cond_lhs) |
1578 | && neutral_element_p (code: code_def, arg: cond_rhs, right: false)) |
1579 | || (assign |
1580 | && operand_equal_for_phi_arg_p (arg1, cond_rhs) |
1581 | && ((operand_equal_for_phi_arg_p (rhs2, cond_lhs) |
1582 | && absorbing_element_p (code: code_def, arg: cond_rhs, right: true, rval: rhs2)) |
1583 | || (operand_equal_for_phi_arg_p (rhs1, cond_lhs) |
1584 | && absorbing_element_p (code: code_def, |
1585 | arg: cond_rhs, right: false, rval: rhs2)))))) |
1586 | { |
1587 | gsi = gsi_for_stmt (cond); |
1588 | /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6 |
1589 | def-stmt in: |
1590 | if (n_5 != 0) |
1591 | goto <bb 3>; |
1592 | else |
1593 | goto <bb 4>; |
1594 | |
1595 | <bb 3>: |
1596 | # RANGE [0, 4294967294] |
1597 | u_6 = n_5 + 4294967295; |
1598 | |
1599 | <bb 4>: |
1600 | # u_3 = PHI <u_6(3), 4294967295(2)> */ |
1601 | reset_flow_sensitive_info (lhs); |
1602 | gimple_stmt_iterator gsi_from; |
1603 | for (int i = prep_cnt - 1; i >= 0; --i) |
1604 | { |
1605 | tree plhs = gimple_assign_lhs (gs: prep_stmt[i]); |
1606 | reset_flow_sensitive_info (plhs); |
1607 | gsi_from = gsi_for_stmt (prep_stmt[i]); |
1608 | gsi_move_before (&gsi_from, &gsi); |
1609 | } |
1610 | if (assign) |
1611 | { |
1612 | gsi_from = gsi_for_stmt (assign); |
1613 | gsi_move_before (&gsi_from, &gsi); |
1614 | } |
1615 | replace_phi_edge_with_variable (cond_block: cond_bb, e: e1, phi, new_tree: lhs); |
1616 | return 2; |
1617 | } |
1618 | |
1619 | return 0; |
1620 | } |
1621 | |
1622 | /* If VAR is an SSA_NAME that points to a BIT_NOT_EXPR then return the TREE for |
1623 | the value being inverted. */ |
1624 | |
1625 | static tree |
1626 | strip_bit_not (tree var) |
1627 | { |
1628 | if (TREE_CODE (var) != SSA_NAME) |
1629 | return NULL_TREE; |
1630 | |
1631 | gimple *assign = SSA_NAME_DEF_STMT (var); |
1632 | if (gimple_code (g: assign) != GIMPLE_ASSIGN) |
1633 | return NULL_TREE; |
1634 | |
1635 | if (gimple_assign_rhs_code (gs: assign) != BIT_NOT_EXPR) |
1636 | return NULL_TREE; |
1637 | |
1638 | return gimple_assign_rhs1 (gs: assign); |
1639 | } |
1640 | |
1641 | /* Invert a MIN to a MAX or a MAX to a MIN expression CODE. */ |
1642 | |
1643 | enum tree_code |
1644 | invert_minmax_code (enum tree_code code) |
1645 | { |
1646 | switch (code) { |
1647 | case MIN_EXPR: |
1648 | return MAX_EXPR; |
1649 | case MAX_EXPR: |
1650 | return MIN_EXPR; |
1651 | default: |
1652 | gcc_unreachable (); |
1653 | } |
1654 | } |
1655 | |
1656 | /* The function minmax_replacement does the main work of doing the minmax |
1657 | replacement. Return true if the replacement is done. Otherwise return |
1658 | false. |
1659 | BB is the basic block where the replacement is going to be done on. ARG0 |
1660 | is argument 0 from the PHI. Likewise for ARG1. |
1661 | |
1662 | If THREEWAY_P then expect the BB to be laid out in diamond shape with each |
1663 | BB containing only a MIN or MAX expression. */ |
1664 | |
1665 | static bool |
1666 | minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_middle_bb, |
1667 | edge e0, edge e1, gphi *phi, tree arg0, tree arg1, bool threeway_p) |
1668 | { |
1669 | tree result; |
1670 | edge true_edge, false_edge; |
1671 | enum tree_code minmax, ass_code; |
1672 | tree smaller, larger, arg_true, arg_false; |
1673 | gimple_stmt_iterator gsi, gsi_from; |
1674 | |
1675 | tree type = TREE_TYPE (PHI_RESULT (phi)); |
1676 | |
1677 | gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: cond_bb)); |
1678 | enum tree_code cmp = gimple_cond_code (gs: cond); |
1679 | tree rhs = gimple_cond_rhs (gs: cond); |
1680 | |
1681 | /* Turn EQ/NE of extreme values to order comparisons. */ |
1682 | if ((cmp == NE_EXPR || cmp == EQ_EXPR) |
1683 | && TREE_CODE (rhs) == INTEGER_CST |
1684 | && INTEGRAL_TYPE_P (TREE_TYPE (rhs))) |
1685 | { |
1686 | if (wi::eq_p (x: wi::to_wide (t: rhs), y: wi::min_value (TREE_TYPE (rhs)))) |
1687 | { |
1688 | cmp = (cmp == EQ_EXPR) ? LT_EXPR : GE_EXPR; |
1689 | rhs = wide_int_to_tree (TREE_TYPE (rhs), |
1690 | cst: wi::min_value (TREE_TYPE (rhs)) + 1); |
1691 | } |
1692 | else if (wi::eq_p (x: wi::to_wide (t: rhs), y: wi::max_value (TREE_TYPE (rhs)))) |
1693 | { |
1694 | cmp = (cmp == EQ_EXPR) ? GT_EXPR : LE_EXPR; |
1695 | rhs = wide_int_to_tree (TREE_TYPE (rhs), |
1696 | cst: wi::max_value (TREE_TYPE (rhs)) - 1); |
1697 | } |
1698 | } |
1699 | |
1700 | /* This transformation is only valid for order comparisons. Record which |
1701 | operand is smaller/larger if the result of the comparison is true. */ |
1702 | tree alt_smaller = NULL_TREE; |
1703 | tree alt_larger = NULL_TREE; |
1704 | if (cmp == LT_EXPR || cmp == LE_EXPR) |
1705 | { |
1706 | smaller = gimple_cond_lhs (gs: cond); |
1707 | larger = rhs; |
1708 | /* If we have smaller < CST it is equivalent to smaller <= CST-1. |
1709 | Likewise smaller <= CST is equivalent to smaller < CST+1. */ |
1710 | if (TREE_CODE (larger) == INTEGER_CST |
1711 | && INTEGRAL_TYPE_P (TREE_TYPE (larger))) |
1712 | { |
1713 | if (cmp == LT_EXPR) |
1714 | { |
1715 | wi::overflow_type overflow; |
1716 | wide_int alt = wi::sub (x: wi::to_wide (t: larger), y: 1, |
1717 | TYPE_SIGN (TREE_TYPE (larger)), |
1718 | overflow: &overflow); |
1719 | if (! overflow) |
1720 | alt_larger = wide_int_to_tree (TREE_TYPE (larger), cst: alt); |
1721 | } |
1722 | else |
1723 | { |
1724 | wi::overflow_type overflow; |
1725 | wide_int alt = wi::add (x: wi::to_wide (t: larger), y: 1, |
1726 | TYPE_SIGN (TREE_TYPE (larger)), |
1727 | overflow: &overflow); |
1728 | if (! overflow) |
1729 | alt_larger = wide_int_to_tree (TREE_TYPE (larger), cst: alt); |
1730 | } |
1731 | } |
1732 | } |
1733 | else if (cmp == GT_EXPR || cmp == GE_EXPR) |
1734 | { |
1735 | smaller = rhs; |
1736 | larger = gimple_cond_lhs (gs: cond); |
1737 | /* If we have larger > CST it is equivalent to larger >= CST+1. |
1738 | Likewise larger >= CST is equivalent to larger > CST-1. */ |
1739 | if (TREE_CODE (smaller) == INTEGER_CST |
1740 | && INTEGRAL_TYPE_P (TREE_TYPE (smaller))) |
1741 | { |
1742 | wi::overflow_type overflow; |
1743 | if (cmp == GT_EXPR) |
1744 | { |
1745 | wide_int alt = wi::add (x: wi::to_wide (t: smaller), y: 1, |
1746 | TYPE_SIGN (TREE_TYPE (smaller)), |
1747 | overflow: &overflow); |
1748 | if (! overflow) |
1749 | alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), cst: alt); |
1750 | } |
1751 | else |
1752 | { |
1753 | wide_int alt = wi::sub (x: wi::to_wide (t: smaller), y: 1, |
1754 | TYPE_SIGN (TREE_TYPE (smaller)), |
1755 | overflow: &overflow); |
1756 | if (! overflow) |
1757 | alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), cst: alt); |
1758 | } |
1759 | } |
1760 | } |
1761 | else |
1762 | return false; |
1763 | |
1764 | /* Handle the special case of (signed_type)x < 0 being equivalent |
1765 | to x > MAX_VAL(signed_type) and (signed_type)x >= 0 equivalent |
1766 | to x <= MAX_VAL(signed_type). */ |
1767 | if ((cmp == GE_EXPR || cmp == LT_EXPR) |
1768 | && INTEGRAL_TYPE_P (type) |
1769 | && TYPE_UNSIGNED (type) |
1770 | && integer_zerop (rhs)) |
1771 | { |
1772 | tree op = gimple_cond_lhs (gs: cond); |
1773 | if (TREE_CODE (op) == SSA_NAME |
1774 | && INTEGRAL_TYPE_P (TREE_TYPE (op)) |
1775 | && !TYPE_UNSIGNED (TREE_TYPE (op))) |
1776 | { |
1777 | gimple *def_stmt = SSA_NAME_DEF_STMT (op); |
1778 | if (gimple_assign_cast_p (s: def_stmt)) |
1779 | { |
1780 | tree op1 = gimple_assign_rhs1 (gs: def_stmt); |
1781 | if (INTEGRAL_TYPE_P (TREE_TYPE (op1)) |
1782 | && TYPE_UNSIGNED (TREE_TYPE (op1)) |
1783 | && (TYPE_PRECISION (TREE_TYPE (op)) |
1784 | == TYPE_PRECISION (TREE_TYPE (op1))) |
1785 | && useless_type_conversion_p (type, TREE_TYPE (op1))) |
1786 | { |
1787 | wide_int w1 = wi::max_value (TREE_TYPE (op)); |
1788 | wide_int w2 = wi::add (x: w1, y: 1); |
1789 | if (cmp == LT_EXPR) |
1790 | { |
1791 | larger = op1; |
1792 | smaller = wide_int_to_tree (TREE_TYPE (op1), cst: w1); |
1793 | alt_smaller = wide_int_to_tree (TREE_TYPE (op1), cst: w2); |
1794 | alt_larger = NULL_TREE; |
1795 | } |
1796 | else |
1797 | { |
1798 | smaller = op1; |
1799 | larger = wide_int_to_tree (TREE_TYPE (op1), cst: w1); |
1800 | alt_larger = wide_int_to_tree (TREE_TYPE (op1), cst: w2); |
1801 | alt_smaller = NULL_TREE; |
1802 | } |
1803 | } |
1804 | } |
1805 | } |
1806 | } |
1807 | |
1808 | /* We need to know which is the true edge and which is the false |
1809 | edge so that we know if have abs or negative abs. */ |
1810 | extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); |
1811 | |
1812 | /* Forward the edges over the middle basic block. */ |
1813 | if (true_edge->dest == middle_bb) |
1814 | true_edge = EDGE_SUCC (true_edge->dest, 0); |
1815 | if (false_edge->dest == middle_bb) |
1816 | false_edge = EDGE_SUCC (false_edge->dest, 0); |
1817 | |
1818 | /* When THREEWAY_P then e1 will point to the edge of the final transition |
1819 | from middle-bb to end. */ |
1820 | if (true_edge == e0) |
1821 | { |
1822 | if (!threeway_p) |
1823 | gcc_assert (false_edge == e1); |
1824 | arg_true = arg0; |
1825 | arg_false = arg1; |
1826 | } |
1827 | else |
1828 | { |
1829 | gcc_assert (false_edge == e0); |
1830 | if (!threeway_p) |
1831 | gcc_assert (true_edge == e1); |
1832 | arg_true = arg1; |
1833 | arg_false = arg0; |
1834 | } |
1835 | |
1836 | if (empty_block_p (middle_bb) |
1837 | && (!threeway_p |
1838 | || empty_block_p (alt_middle_bb))) |
1839 | { |
1840 | if ((operand_equal_for_phi_arg_p (arg_true, smaller) |
1841 | || (alt_smaller |
1842 | && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) |
1843 | && (operand_equal_for_phi_arg_p (arg_false, larger) |
1844 | || (alt_larger |
1845 | && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) |
1846 | { |
1847 | /* Case |
1848 | |
1849 | if (smaller < larger) |
1850 | rslt = smaller; |
1851 | else |
1852 | rslt = larger; */ |
1853 | minmax = MIN_EXPR; |
1854 | } |
1855 | else if ((operand_equal_for_phi_arg_p (arg_false, smaller) |
1856 | || (alt_smaller |
1857 | && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) |
1858 | && (operand_equal_for_phi_arg_p (arg_true, larger) |
1859 | || (alt_larger |
1860 | && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) |
1861 | minmax = MAX_EXPR; |
1862 | else |
1863 | return false; |
1864 | } |
1865 | else if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) |
1866 | /* The optimization may be unsafe due to NaNs. */ |
1867 | return false; |
1868 | else if (middle_bb != alt_middle_bb && threeway_p) |
1869 | { |
1870 | /* Recognize the following case: |
1871 | |
1872 | if (smaller < larger) |
1873 | a = MIN (smaller, c); |
1874 | else |
1875 | b = MIN (larger, c); |
1876 | x = PHI <a, b> |
1877 | |
1878 | This is equivalent to |
1879 | |
1880 | a = MIN (smaller, c); |
1881 | x = MIN (larger, a); */ |
1882 | |
1883 | gimple *assign = last_and_only_stmt (middle_bb); |
1884 | tree lhs, op0, op1, bound; |
1885 | tree alt_lhs, alt_op0, alt_op1; |
1886 | bool invert = false; |
1887 | |
1888 | /* When THREEWAY_P then e1 will point to the edge of the final transition |
1889 | from middle-bb to end. */ |
1890 | if (true_edge == e0) |
1891 | gcc_assert (false_edge == EDGE_PRED (e1->src, 0)); |
1892 | else |
1893 | gcc_assert (true_edge == EDGE_PRED (e1->src, 0)); |
1894 | |
1895 | bool valid_minmax_p = false; |
1896 | gimple_stmt_iterator it1 |
1897 | = gsi_start_nondebug_after_labels_bb (bb: middle_bb); |
1898 | gimple_stmt_iterator it2 |
1899 | = gsi_start_nondebug_after_labels_bb (bb: alt_middle_bb); |
1900 | if (gsi_one_nondebug_before_end_p (i: it1) |
1901 | && gsi_one_nondebug_before_end_p (i: it2)) |
1902 | { |
1903 | gimple *stmt1 = gsi_stmt (i: it1); |
1904 | gimple *stmt2 = gsi_stmt (i: it2); |
1905 | if (is_gimple_assign (gs: stmt1) && is_gimple_assign (gs: stmt2)) |
1906 | { |
1907 | enum tree_code code1 = gimple_assign_rhs_code (gs: stmt1); |
1908 | enum tree_code code2 = gimple_assign_rhs_code (gs: stmt2); |
1909 | valid_minmax_p = (code1 == MIN_EXPR || code1 == MAX_EXPR) |
1910 | && (code2 == MIN_EXPR || code2 == MAX_EXPR); |
1911 | } |
1912 | } |
1913 | |
1914 | if (!valid_minmax_p) |
1915 | return false; |
1916 | |
1917 | if (!assign |
1918 | || gimple_code (g: assign) != GIMPLE_ASSIGN) |
1919 | return false; |
1920 | |
1921 | lhs = gimple_assign_lhs (gs: assign); |
1922 | ass_code = gimple_assign_rhs_code (gs: assign); |
1923 | if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) |
1924 | return false; |
1925 | |
1926 | op0 = gimple_assign_rhs1 (gs: assign); |
1927 | op1 = gimple_assign_rhs2 (gs: assign); |
1928 | |
1929 | assign = last_and_only_stmt (alt_middle_bb); |
1930 | if (!assign |
1931 | || gimple_code (g: assign) != GIMPLE_ASSIGN) |
1932 | return false; |
1933 | |
1934 | alt_lhs = gimple_assign_lhs (gs: assign); |
1935 | if (ass_code != gimple_assign_rhs_code (gs: assign)) |
1936 | return false; |
1937 | |
1938 | if (!operand_equal_for_phi_arg_p (lhs, arg_true) |
1939 | || !operand_equal_for_phi_arg_p (alt_lhs, arg_false)) |
1940 | return false; |
1941 | |
1942 | alt_op0 = gimple_assign_rhs1 (gs: assign); |
1943 | alt_op1 = gimple_assign_rhs2 (gs: assign); |
1944 | |
1945 | if ((operand_equal_for_phi_arg_p (op0, smaller) |
1946 | || (alt_smaller |
1947 | && operand_equal_for_phi_arg_p (op0, alt_smaller))) |
1948 | && (operand_equal_for_phi_arg_p (alt_op0, larger) |
1949 | || (alt_larger |
1950 | && operand_equal_for_phi_arg_p (alt_op0, alt_larger)))) |
1951 | { |
1952 | /* We got here if the condition is true, i.e., SMALLER < LARGER. */ |
1953 | if (!operand_equal_for_phi_arg_p (op1, alt_op1)) |
1954 | return false; |
1955 | |
1956 | if ((arg0 = strip_bit_not (var: op0)) != NULL |
1957 | && (arg1 = strip_bit_not (var: alt_op0)) != NULL |
1958 | && (bound = strip_bit_not (var: op1)) != NULL) |
1959 | { |
1960 | minmax = MAX_EXPR; |
1961 | ass_code = invert_minmax_code (code: ass_code); |
1962 | invert = true; |
1963 | } |
1964 | else |
1965 | { |
1966 | bound = op1; |
1967 | minmax = MIN_EXPR; |
1968 | arg0 = op0; |
1969 | arg1 = alt_op0; |
1970 | } |
1971 | } |
1972 | else if ((operand_equal_for_phi_arg_p (op0, larger) |
1973 | || (alt_larger |
1974 | && operand_equal_for_phi_arg_p (op0, alt_larger))) |
1975 | && (operand_equal_for_phi_arg_p (alt_op0, smaller) |
1976 | || (alt_smaller |
1977 | && operand_equal_for_phi_arg_p (alt_op0, alt_smaller)))) |
1978 | { |
1979 | /* We got here if the condition is true, i.e., SMALLER > LARGER. */ |
1980 | if (!operand_equal_for_phi_arg_p (op1, alt_op1)) |
1981 | return false; |
1982 | |
1983 | if ((arg0 = strip_bit_not (var: op0)) != NULL |
1984 | && (arg1 = strip_bit_not (var: alt_op0)) != NULL |
1985 | && (bound = strip_bit_not (var: op1)) != NULL) |
1986 | { |
1987 | minmax = MIN_EXPR; |
1988 | ass_code = invert_minmax_code (code: ass_code); |
1989 | invert = true; |
1990 | } |
1991 | else |
1992 | { |
1993 | bound = op1; |
1994 | minmax = MAX_EXPR; |
1995 | arg0 = op0; |
1996 | arg1 = alt_op0; |
1997 | } |
1998 | } |
1999 | else |
2000 | return false; |
2001 | |
2002 | /* Emit the statement to compute min/max. */ |
2003 | location_t locus = gimple_location (g: last_nondebug_stmt (cond_bb)); |
2004 | gimple_seq stmts = NULL; |
2005 | tree phi_result = PHI_RESULT (phi); |
2006 | result = gimple_build (seq: &stmts, loc: locus, code: minmax, TREE_TYPE (phi_result), |
2007 | ops: arg0, ops: arg1); |
2008 | result = gimple_build (seq: &stmts, loc: locus, code: ass_code, TREE_TYPE (phi_result), |
2009 | ops: result, ops: bound); |
2010 | if (invert) |
2011 | result = gimple_build (seq: &stmts, loc: locus, code: BIT_NOT_EXPR, TREE_TYPE (phi_result), |
2012 | ops: result); |
2013 | |
2014 | gsi = gsi_last_bb (bb: cond_bb); |
2015 | gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); |
2016 | |
2017 | replace_phi_edge_with_variable (cond_block: cond_bb, e: e1, phi, new_tree: result); |
2018 | |
2019 | return true; |
2020 | } |
2021 | else if (!threeway_p |
2022 | || empty_block_p (alt_middle_bb)) |
2023 | { |
2024 | /* Recognize the following case, assuming d <= u: |
2025 | |
2026 | if (a <= u) |
2027 | b = MAX (a, d); |
2028 | x = PHI <b, u> |
2029 | |
2030 | This is equivalent to |
2031 | |
2032 | b = MAX (a, d); |
2033 | x = MIN (b, u); */ |
2034 | |
2035 | gimple *assign = last_and_only_stmt (middle_bb); |
2036 | tree lhs, op0, op1, bound; |
2037 | |
2038 | if (!single_pred_p (bb: middle_bb)) |
2039 | return false; |
2040 | |
2041 | if (!assign |
2042 | || gimple_code (g: assign) != GIMPLE_ASSIGN) |
2043 | return false; |
2044 | |
2045 | lhs = gimple_assign_lhs (gs: assign); |
2046 | ass_code = gimple_assign_rhs_code (gs: assign); |
2047 | if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) |
2048 | return false; |
2049 | op0 = gimple_assign_rhs1 (gs: assign); |
2050 | op1 = gimple_assign_rhs2 (gs: assign); |
2051 | |
2052 | if (true_edge->src == middle_bb) |
2053 | { |
2054 | /* We got here if the condition is true, i.e., SMALLER < LARGER. */ |
2055 | if (!operand_equal_for_phi_arg_p (lhs, arg_true)) |
2056 | return false; |
2057 | |
2058 | if (operand_equal_for_phi_arg_p (arg_false, larger) |
2059 | || (alt_larger |
2060 | && operand_equal_for_phi_arg_p (arg_false, alt_larger))) |
2061 | { |
2062 | /* Case |
2063 | |
2064 | if (smaller < larger) |
2065 | { |
2066 | r' = MAX_EXPR (smaller, bound) |
2067 | } |
2068 | r = PHI <r', larger> --> to be turned to MIN_EXPR. */ |
2069 | if (ass_code != MAX_EXPR) |
2070 | return false; |
2071 | |
2072 | minmax = MIN_EXPR; |
2073 | if (operand_equal_for_phi_arg_p (op0, smaller) |
2074 | || (alt_smaller |
2075 | && operand_equal_for_phi_arg_p (op0, alt_smaller))) |
2076 | bound = op1; |
2077 | else if (operand_equal_for_phi_arg_p (op1, smaller) |
2078 | || (alt_smaller |
2079 | && operand_equal_for_phi_arg_p (op1, alt_smaller))) |
2080 | bound = op0; |
2081 | else |
2082 | return false; |
2083 | |
2084 | /* We need BOUND <= LARGER. */ |
2085 | if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, |
2086 | bound, arg_false))) |
2087 | return false; |
2088 | } |
2089 | else if (operand_equal_for_phi_arg_p (arg_false, smaller) |
2090 | || (alt_smaller |
2091 | && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) |
2092 | { |
2093 | /* Case |
2094 | |
2095 | if (smaller < larger) |
2096 | { |
2097 | r' = MIN_EXPR (larger, bound) |
2098 | } |
2099 | r = PHI <r', smaller> --> to be turned to MAX_EXPR. */ |
2100 | if (ass_code != MIN_EXPR) |
2101 | return false; |
2102 | |
2103 | minmax = MAX_EXPR; |
2104 | if (operand_equal_for_phi_arg_p (op0, larger) |
2105 | || (alt_larger |
2106 | && operand_equal_for_phi_arg_p (op0, alt_larger))) |
2107 | bound = op1; |
2108 | else if (operand_equal_for_phi_arg_p (op1, larger) |
2109 | || (alt_larger |
2110 | && operand_equal_for_phi_arg_p (op1, alt_larger))) |
2111 | bound = op0; |
2112 | else |
2113 | return false; |
2114 | |
2115 | /* We need BOUND >= SMALLER. */ |
2116 | if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, |
2117 | bound, arg_false))) |
2118 | return false; |
2119 | } |
2120 | else |
2121 | return false; |
2122 | } |
2123 | else |
2124 | { |
2125 | /* We got here if the condition is false, i.e., SMALLER > LARGER. */ |
2126 | if (!operand_equal_for_phi_arg_p (lhs, arg_false)) |
2127 | return false; |
2128 | |
2129 | if (operand_equal_for_phi_arg_p (arg_true, larger) |
2130 | || (alt_larger |
2131 | && operand_equal_for_phi_arg_p (arg_true, alt_larger))) |
2132 | { |
2133 | /* Case |
2134 | |
2135 | if (smaller > larger) |
2136 | { |
2137 | r' = MIN_EXPR (smaller, bound) |
2138 | } |
2139 | r = PHI <r', larger> --> to be turned to MAX_EXPR. */ |
2140 | if (ass_code != MIN_EXPR) |
2141 | return false; |
2142 | |
2143 | minmax = MAX_EXPR; |
2144 | if (operand_equal_for_phi_arg_p (op0, smaller) |
2145 | || (alt_smaller |
2146 | && operand_equal_for_phi_arg_p (op0, alt_smaller))) |
2147 | bound = op1; |
2148 | else if (operand_equal_for_phi_arg_p (op1, smaller) |
2149 | || (alt_smaller |
2150 | && operand_equal_for_phi_arg_p (op1, alt_smaller))) |
2151 | bound = op0; |
2152 | else |
2153 | return false; |
2154 | |
2155 | /* We need BOUND >= LARGER. */ |
2156 | if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, |
2157 | bound, arg_true))) |
2158 | return false; |
2159 | } |
2160 | else if (operand_equal_for_phi_arg_p (arg_true, smaller) |
2161 | || (alt_smaller |
2162 | && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) |
2163 | { |
2164 | /* Case |
2165 | |
2166 | if (smaller > larger) |
2167 | { |
2168 | r' = MAX_EXPR (larger, bound) |
2169 | } |
2170 | r = PHI <r', smaller> --> to be turned to MIN_EXPR. */ |
2171 | if (ass_code != MAX_EXPR) |
2172 | return false; |
2173 | |
2174 | minmax = MIN_EXPR; |
2175 | if (operand_equal_for_phi_arg_p (op0, larger)) |
2176 | bound = op1; |
2177 | else if (operand_equal_for_phi_arg_p (op1, larger)) |
2178 | bound = op0; |
2179 | else |
2180 | return false; |
2181 | |
2182 | /* We need BOUND <= SMALLER. */ |
2183 | if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, |
2184 | bound, arg_true))) |
2185 | return false; |
2186 | } |
2187 | else |
2188 | return false; |
2189 | } |
2190 | |
2191 | /* Move the statement from the middle block. */ |
2192 | gsi = gsi_last_bb (bb: cond_bb); |
2193 | gsi_from = gsi_last_nondebug_bb (bb: middle_bb); |
2194 | reset_flow_sensitive_info (SINGLE_SSA_TREE_OPERAND (gsi_stmt (gsi_from), |
2195 | SSA_OP_DEF)); |
2196 | gsi_move_before (&gsi_from, &gsi); |
2197 | } |
2198 | else |
2199 | return false; |
2200 | |
2201 | /* Emit the statement to compute min/max. */ |
2202 | gimple_seq stmts = NULL; |
2203 | tree phi_result = PHI_RESULT (phi); |
2204 | |
2205 | /* When we can't use a MIN/MAX_EXPR still make sure the expression |
2206 | stays in a form to be recognized by ISA that map to IEEE x > y ? x : y |
2207 | semantics (that's not IEEE max semantics). */ |
2208 | if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) |
2209 | { |
2210 | result = gimple_build (seq: &stmts, code: cmp, boolean_type_node, |
2211 | ops: gimple_cond_lhs (gs: cond), ops: rhs); |
2212 | result = gimple_build (seq: &stmts, code: COND_EXPR, TREE_TYPE (phi_result), |
2213 | ops: result, ops: arg_true, ops: arg_false); |
2214 | } |
2215 | else |
2216 | result = gimple_build (seq: &stmts, code: minmax, TREE_TYPE (phi_result), ops: arg0, ops: arg1); |
2217 | |
2218 | gsi = gsi_last_bb (bb: cond_bb); |
2219 | gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); |
2220 | |
2221 | replace_phi_edge_with_variable (cond_block: cond_bb, e: e1, phi, new_tree: result); |
2222 | |
2223 | return true; |
2224 | } |
2225 | |
2226 | /* Attempt to optimize (x <=> y) cmp 0 and similar comparisons. |
2227 | For strong ordering <=> try to match something like: |
2228 | <bb 2> : // cond3_bb (== cond2_bb) |
2229 | if (x_4(D) != y_5(D)) |
2230 | goto <bb 3>; [INV] |
2231 | else |
2232 | goto <bb 6>; [INV] |
2233 | |
2234 | <bb 3> : // cond_bb |
2235 | if (x_4(D) < y_5(D)) |
2236 | goto <bb 6>; [INV] |
2237 | else |
2238 | goto <bb 4>; [INV] |
2239 | |
2240 | <bb 4> : // middle_bb |
2241 | |
2242 | <bb 6> : // phi_bb |
2243 | # iftmp.0_2 = PHI <1(4), 0(2), -1(3)> |
2244 | _1 = iftmp.0_2 == 0; |
2245 | |
2246 | and for partial ordering <=> something like: |
2247 | |
2248 | <bb 2> : // cond3_bb |
2249 | if (a_3(D) == b_5(D)) |
2250 | goto <bb 6>; [50.00%] |
2251 | else |
2252 | goto <bb 3>; [50.00%] |
2253 | |
2254 | <bb 3> [local count: 536870913]: // cond2_bb |
2255 | if (a_3(D) < b_5(D)) |
2256 | goto <bb 6>; [50.00%] |
2257 | else |
2258 | goto <bb 4>; [50.00%] |
2259 | |
2260 | <bb 4> [local count: 268435456]: // cond_bb |
2261 | if (a_3(D) > b_5(D)) |
2262 | goto <bb 6>; [50.00%] |
2263 | else |
2264 | goto <bb 5>; [50.00%] |
2265 | |
2266 | <bb 5> [local count: 134217728]: // middle_bb |
2267 | |
2268 | <bb 6> [local count: 1073741824]: // phi_bb |
2269 | # SR.27_4 = PHI <0(2), -1(3), 1(4), 2(5)> |
2270 | _2 = SR.27_4 > 0; */ |
2271 | |
2272 | static bool |
2273 | spaceship_replacement (basic_block cond_bb, basic_block middle_bb, |
2274 | edge e0, edge e1, gphi *phi, |
2275 | tree arg0, tree arg1) |
2276 | { |
2277 | tree phires = PHI_RESULT (phi); |
2278 | if (!INTEGRAL_TYPE_P (TREE_TYPE (phires)) |
2279 | || TYPE_UNSIGNED (TREE_TYPE (phires)) |
2280 | || !tree_fits_shwi_p (arg0) |
2281 | || !tree_fits_shwi_p (arg1) |
2282 | || !IN_RANGE (tree_to_shwi (arg0), -1, 2) |
2283 | || !IN_RANGE (tree_to_shwi (arg1), -1, 2)) |
2284 | return false; |
2285 | |
2286 | basic_block phi_bb = gimple_bb (g: phi); |
2287 | gcc_assert (phi_bb == e0->dest && phi_bb == e1->dest); |
2288 | if (!IN_RANGE (EDGE_COUNT (phi_bb->preds), 3, 4)) |
2289 | return false; |
2290 | |
2291 | use_operand_p use_p; |
2292 | gimple *use_stmt; |
2293 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires)) |
2294 | return false; |
2295 | if (!single_imm_use (var: phires, use_p: &use_p, stmt: &use_stmt)) |
2296 | return false; |
2297 | enum tree_code cmp; |
2298 | tree lhs, rhs; |
2299 | gimple *orig_use_stmt = use_stmt; |
2300 | tree orig_use_lhs = NULL_TREE; |
2301 | int prec = TYPE_PRECISION (TREE_TYPE (phires)); |
2302 | bool is_cast = false; |
2303 | |
2304 | /* Deal with the case when match.pd has rewritten the (res & ~1) == 0 |
2305 | into res <= 1 and has left a type-cast for signed types. */ |
2306 | if (gimple_assign_cast_p (s: use_stmt)) |
2307 | { |
2308 | orig_use_lhs = gimple_assign_lhs (gs: use_stmt); |
2309 | /* match.pd would have only done this for a signed type, |
2310 | so the conversion must be to an unsigned one. */ |
2311 | tree ty1 = TREE_TYPE (gimple_assign_rhs1 (use_stmt)); |
2312 | tree ty2 = TREE_TYPE (orig_use_lhs); |
2313 | |
2314 | if (!TYPE_UNSIGNED (ty2) || !INTEGRAL_TYPE_P (ty2)) |
2315 | return false; |
2316 | if (TYPE_PRECISION (ty1) > TYPE_PRECISION (ty2)) |
2317 | return false; |
2318 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs)) |
2319 | return false; |
2320 | if (!single_imm_use (var: orig_use_lhs, use_p: &use_p, stmt: &use_stmt)) |
2321 | return false; |
2322 | |
2323 | is_cast = true; |
2324 | } |
2325 | else if (is_gimple_assign (gs: use_stmt) |
2326 | && gimple_assign_rhs_code (gs: use_stmt) == BIT_AND_EXPR |
2327 | && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST |
2328 | && (wi::to_wide (t: gimple_assign_rhs2 (gs: use_stmt)) |
2329 | == wi::shifted_mask (start: 1, width: prec - 1, negate_p: false, precision: prec))) |
2330 | { |
2331 | /* For partial_ordering result operator>= with unspec as second |
2332 | argument is (res & 1) == res, folded by match.pd into |
2333 | (res & ~1) == 0. */ |
2334 | orig_use_lhs = gimple_assign_lhs (gs: use_stmt); |
2335 | if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs)) |
2336 | return false; |
2337 | if (!single_imm_use (var: orig_use_lhs, use_p: &use_p, stmt: &use_stmt)) |
2338 | return false; |
2339 | } |
2340 | if (gimple_code (g: use_stmt) == GIMPLE_COND) |
2341 | { |
2342 | cmp = gimple_cond_code (gs: use_stmt); |
2343 | lhs = gimple_cond_lhs (gs: use_stmt); |
2344 | rhs = gimple_cond_rhs (gs: use_stmt); |
2345 | } |
2346 | else if (is_gimple_assign (gs: use_stmt)) |
2347 | { |
2348 | if (gimple_assign_rhs_class (gs: use_stmt) == GIMPLE_BINARY_RHS) |
2349 | { |
2350 | cmp = gimple_assign_rhs_code (gs: use_stmt); |
2351 | lhs = gimple_assign_rhs1 (gs: use_stmt); |
2352 | rhs = gimple_assign_rhs2 (gs: use_stmt); |
2353 | } |
2354 | else if (gimple_assign_rhs_code (gs: use_stmt) == COND_EXPR) |
2355 | { |
2356 | tree cond = gimple_assign_rhs1 (gs: use_stmt); |
2357 | if (!COMPARISON_CLASS_P (cond)) |
2358 | return false; |
2359 | cmp = TREE_CODE (cond); |
2360 | lhs = TREE_OPERAND (cond, 0); |
2361 | rhs = TREE_OPERAND (cond, 1); |
2362 | } |
2363 | else |
2364 | return false; |
2365 | } |
2366 | else |
2367 | return false; |
2368 | switch (cmp) |
2369 | { |
2370 | case EQ_EXPR: |
2371 | case NE_EXPR: |
2372 | case LT_EXPR: |
2373 | case GT_EXPR: |
2374 | case LE_EXPR: |
2375 | case GE_EXPR: |
2376 | break; |
2377 | default: |
2378 | return false; |
2379 | } |
2380 | if (lhs != (orig_use_lhs ? orig_use_lhs : phires) |
2381 | || !tree_fits_shwi_p (rhs) |
2382 | || !IN_RANGE (tree_to_shwi (rhs), -1, 1)) |
2383 | return false; |
2384 | |
2385 | if (is_cast) |
2386 | { |
2387 | if (TREE_CODE (rhs) != INTEGER_CST) |
2388 | return false; |
2389 | /* As for -ffast-math we assume the 2 return to be |
2390 | impossible, canonicalize (unsigned) res <= 1U or |
2391 | (unsigned) res < 2U into res >= 0 and (unsigned) res > 1U |
2392 | or (unsigned) res >= 2U as res < 0. */ |
2393 | switch (cmp) |
2394 | { |
2395 | case LE_EXPR: |
2396 | if (!integer_onep (rhs)) |
2397 | return false; |
2398 | cmp = GE_EXPR; |
2399 | break; |
2400 | case LT_EXPR: |
2401 | if (wi::ne_p (x: wi::to_widest (t: rhs), y: 2)) |
2402 | return false; |
2403 | cmp = GE_EXPR; |
2404 | break; |
2405 | case GT_EXPR: |
2406 | if (!integer_onep (rhs)) |
2407 | return false; |
2408 | cmp = LT_EXPR; |
2409 | break; |
2410 | case GE_EXPR: |
2411 | if (wi::ne_p (x: wi::to_widest (t: rhs), y: 2)) |
2412 | return false; |
2413 | cmp = LT_EXPR; |
2414 | break; |
2415 | default: |
2416 | return false; |
2417 | } |
2418 | rhs = build_zero_cst (TREE_TYPE (phires)); |
2419 | } |
2420 | else if (orig_use_lhs) |
2421 | { |
2422 | if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs)) |
2423 | return false; |
2424 | /* As for -ffast-math we assume the 2 return to be |
2425 | impossible, canonicalize (res & ~1) == 0 into |
2426 | res >= 0 and (res & ~1) != 0 as res < 0. */ |
2427 | cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR; |
2428 | } |
2429 | |
2430 | if (!empty_block_p (middle_bb)) |
2431 | return false; |
2432 | |
2433 | gcond *cond1 = as_a <gcond *> (p: *gsi_last_bb (bb: cond_bb)); |
2434 | enum tree_code cmp1 = gimple_cond_code (gs: cond1); |
2435 | switch (cmp1) |
2436 | { |
2437 | case LT_EXPR: |
2438 | case LE_EXPR: |
2439 | case GT_EXPR: |
2440 | case GE_EXPR: |
2441 | break; |
2442 | default: |
2443 | return false; |
2444 | } |
2445 | tree lhs1 = gimple_cond_lhs (gs: cond1); |
2446 | tree rhs1 = gimple_cond_rhs (gs: cond1); |
2447 | /* The optimization may be unsafe due to NaNs. */ |
2448 | if (HONOR_NANS (TREE_TYPE (lhs1))) |
2449 | return false; |
2450 | if (TREE_CODE (lhs1) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs1)) |
2451 | return false; |
2452 | if (TREE_CODE (rhs1) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) |
2453 | return false; |
2454 | |
2455 | if (!single_pred_p (bb: cond_bb) || !cond_only_block_p (cond_bb)) |
2456 | return false; |
2457 | |
2458 | basic_block cond2_bb = single_pred (bb: cond_bb); |
2459 | if (EDGE_COUNT (cond2_bb->succs) != 2) |
2460 | return false; |
2461 | edge cond2_phi_edge; |
2462 | if (EDGE_SUCC (cond2_bb, 0)->dest == cond_bb) |
2463 | { |
2464 | if (EDGE_SUCC (cond2_bb, 1)->dest != phi_bb) |
2465 | return false; |
2466 | cond2_phi_edge = EDGE_SUCC (cond2_bb, 1); |
2467 | } |
2468 | else if (EDGE_SUCC (cond2_bb, 0)->dest != phi_bb) |
2469 | return false; |
2470 | else |
2471 | cond2_phi_edge = EDGE_SUCC (cond2_bb, 0); |
2472 | tree arg2 = gimple_phi_arg_def (gs: phi, index: cond2_phi_edge->dest_idx); |
2473 | if (!tree_fits_shwi_p (arg2)) |
2474 | return false; |
2475 | gcond *cond2 = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: cond2_bb)); |
2476 | if (!cond2) |
2477 | return false; |
2478 | enum tree_code cmp2 = gimple_cond_code (gs: cond2); |
2479 | tree lhs2 = gimple_cond_lhs (gs: cond2); |
2480 | tree rhs2 = gimple_cond_rhs (gs: cond2); |
2481 | if (lhs2 == lhs1) |
2482 | { |
2483 | if (!operand_equal_p (rhs2, rhs1, flags: 0)) |
2484 | { |
2485 | if ((cmp2 == EQ_EXPR || cmp2 == NE_EXPR) |
2486 | && TREE_CODE (rhs1) == INTEGER_CST |
2487 | && TREE_CODE (rhs2) == INTEGER_CST) |
2488 | { |
2489 | /* For integers, we can have cond2 x == 5 |
2490 | and cond1 x < 5, x <= 4, x <= 5, x < 6, |
2491 | x > 5, x >= 6, x >= 5 or x > 4. */ |
2492 | if (tree_int_cst_lt (t1: rhs1, t2: rhs2)) |
2493 | { |
2494 | if (wi::ne_p (x: wi::to_wide (t: rhs1) + 1, y: wi::to_wide (t: rhs2))) |
2495 | return false; |
2496 | if (cmp1 == LE_EXPR) |
2497 | cmp1 = LT_EXPR; |
2498 | else if (cmp1 == GT_EXPR) |
2499 | cmp1 = GE_EXPR; |
2500 | else |
2501 | return false; |
2502 | } |
2503 | else |
2504 | { |
2505 | gcc_checking_assert (tree_int_cst_lt (rhs2, rhs1)); |
2506 | if (wi::ne_p (x: wi::to_wide (t: rhs2) + 1, y: wi::to_wide (t: rhs1))) |
2507 | return false; |
2508 | if (cmp1 == LT_EXPR) |
2509 | cmp1 = LE_EXPR; |
2510 | else if (cmp1 == GE_EXPR) |
2511 | cmp1 = GT_EXPR; |
2512 | else |
2513 | return false; |
2514 | } |
2515 | rhs1 = rhs2; |
2516 | } |
2517 | else |
2518 | return false; |
2519 | } |
2520 | } |
2521 | else if (lhs2 == rhs1) |
2522 | { |
2523 | if (rhs2 != lhs1) |
2524 | return false; |
2525 | } |
2526 | else |
2527 | return false; |
2528 | |
2529 | tree arg3 = arg2; |
2530 | basic_block cond3_bb = cond2_bb; |
2531 | edge cond3_phi_edge = cond2_phi_edge; |
2532 | gcond *cond3 = cond2; |
2533 | enum tree_code cmp3 = cmp2; |
2534 | tree lhs3 = lhs2; |
2535 | tree rhs3 = rhs2; |
2536 | if (EDGE_COUNT (phi_bb->preds) == 4) |
2537 | { |
2538 | if (absu_hwi (x: tree_to_shwi (arg2)) != 1) |
2539 | return false; |
2540 | if (e1->flags & EDGE_TRUE_VALUE) |
2541 | { |
2542 | if (tree_to_shwi (arg0) != 2 |
2543 | || absu_hwi (x: tree_to_shwi (arg1)) != 1 |
2544 | || wi::to_widest (t: arg1) == wi::to_widest (t: arg2)) |
2545 | return false; |
2546 | } |
2547 | else if (tree_to_shwi (arg1) != 2 |
2548 | || absu_hwi (x: tree_to_shwi (arg0)) != 1 |
2549 | || wi::to_widest (t: arg0) == wi::to_widest (t: arg1)) |
2550 | return false; |
2551 | switch (cmp2) |
2552 | { |
2553 | case LT_EXPR: |
2554 | case LE_EXPR: |
2555 | case GT_EXPR: |
2556 | case GE_EXPR: |
2557 | break; |
2558 | default: |
2559 | return false; |
2560 | } |
2561 | /* if (x < y) goto phi_bb; else fallthru; |
2562 | if (x > y) goto phi_bb; else fallthru; |
2563 | bbx:; |
2564 | phi_bb:; |
2565 | is ok, but if x and y are swapped in one of the comparisons, |
2566 | or the comparisons are the same and operands not swapped, |
2567 | or the true and false edges are swapped, it is not. */ |
2568 | if ((lhs2 == lhs1) |
2569 | ^ (((cond2_phi_edge->flags |
2570 | & ((cmp2 == LT_EXPR || cmp2 == LE_EXPR) |
2571 | ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) != 0) |
2572 | != ((e1->flags |
2573 | & ((cmp1 == LT_EXPR || cmp1 == LE_EXPR) |
2574 | ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) != 0))) |
2575 | return false; |
2576 | if (!single_pred_p (bb: cond2_bb) || !cond_only_block_p (cond2_bb)) |
2577 | return false; |
2578 | cond3_bb = single_pred (bb: cond2_bb); |
2579 | if (EDGE_COUNT (cond2_bb->succs) != 2) |
2580 | return false; |
2581 | if (EDGE_SUCC (cond3_bb, 0)->dest == cond2_bb) |
2582 | { |
2583 | if (EDGE_SUCC (cond3_bb, 1)->dest != phi_bb) |
2584 | return false; |
2585 | cond3_phi_edge = EDGE_SUCC (cond3_bb, 1); |
2586 | } |
2587 | else if (EDGE_SUCC (cond3_bb, 0)->dest != phi_bb) |
2588 | return false; |
2589 | else |
2590 | cond3_phi_edge = EDGE_SUCC (cond3_bb, 0); |
2591 | arg3 = gimple_phi_arg_def (gs: phi, index: cond3_phi_edge->dest_idx); |
2592 | cond3 = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: cond3_bb)); |
2593 | if (!cond3) |
2594 | return false; |
2595 | cmp3 = gimple_cond_code (gs: cond3); |
2596 | lhs3 = gimple_cond_lhs (gs: cond3); |
2597 | rhs3 = gimple_cond_rhs (gs: cond3); |
2598 | if (lhs3 == lhs1) |
2599 | { |
2600 | if (!operand_equal_p (rhs3, rhs1, flags: 0)) |
2601 | return false; |
2602 | } |
2603 | else if (lhs3 == rhs1) |
2604 | { |
2605 | if (rhs3 != lhs1) |
2606 | return false; |
2607 | } |
2608 | else |
2609 | return false; |
2610 | } |
2611 | else if (absu_hwi (x: tree_to_shwi (arg0)) != 1 |
2612 | || absu_hwi (x: tree_to_shwi (arg1)) != 1 |
2613 | || wi::to_widest (t: arg0) == wi::to_widest (t: arg1)) |
2614 | return false; |
2615 | |
2616 | if (!integer_zerop (arg3) || (cmp3 != EQ_EXPR && cmp3 != NE_EXPR)) |
2617 | return false; |
2618 | if ((cond3_phi_edge->flags & (cmp3 == EQ_EXPR |
2619 | ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0) |
2620 | return false; |
2621 | |
2622 | /* lhs1 one_cmp rhs1 results in phires of 1. */ |
2623 | enum tree_code one_cmp; |
2624 | if ((cmp1 == LT_EXPR || cmp1 == LE_EXPR) |
2625 | ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0))) |
2626 | one_cmp = LT_EXPR; |
2627 | else |
2628 | one_cmp = GT_EXPR; |
2629 | |
2630 | enum tree_code res_cmp; |
2631 | switch (cmp) |
2632 | { |
2633 | case EQ_EXPR: |
2634 | if (integer_zerop (rhs)) |
2635 | res_cmp = EQ_EXPR; |
2636 | else if (integer_minus_onep (rhs)) |
2637 | res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; |
2638 | else if (integer_onep (rhs)) |
2639 | res_cmp = one_cmp; |
2640 | else |
2641 | return false; |
2642 | break; |
2643 | case NE_EXPR: |
2644 | if (integer_zerop (rhs)) |
2645 | res_cmp = NE_EXPR; |
2646 | else if (integer_minus_onep (rhs)) |
2647 | res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; |
2648 | else if (integer_onep (rhs)) |
2649 | res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; |
2650 | else |
2651 | return false; |
2652 | break; |
2653 | case LT_EXPR: |
2654 | if (integer_onep (rhs)) |
2655 | res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; |
2656 | else if (integer_zerop (rhs)) |
2657 | res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; |
2658 | else |
2659 | return false; |
2660 | break; |
2661 | case LE_EXPR: |
2662 | if (integer_zerop (rhs)) |
2663 | res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; |
2664 | else if (integer_minus_onep (rhs)) |
2665 | res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; |
2666 | else |
2667 | return false; |
2668 | break; |
2669 | case GT_EXPR: |
2670 | if (integer_minus_onep (rhs)) |
2671 | res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; |
2672 | else if (integer_zerop (rhs)) |
2673 | res_cmp = one_cmp; |
2674 | else |
2675 | return false; |
2676 | break; |
2677 | case GE_EXPR: |
2678 | if (integer_zerop (rhs)) |
2679 | res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; |
2680 | else if (integer_onep (rhs)) |
2681 | res_cmp = one_cmp; |
2682 | else |
2683 | return false; |
2684 | break; |
2685 | default: |
2686 | gcc_unreachable (); |
2687 | } |
2688 | |
2689 | if (gimple_code (g: use_stmt) == GIMPLE_COND) |
2690 | { |
2691 | gcond *use_cond = as_a <gcond *> (p: use_stmt); |
2692 | gimple_cond_set_code (gs: use_cond, code: res_cmp); |
2693 | gimple_cond_set_lhs (gs: use_cond, lhs: lhs1); |
2694 | gimple_cond_set_rhs (gs: use_cond, rhs: rhs1); |
2695 | } |
2696 | else if (gimple_assign_rhs_class (gs: use_stmt) == GIMPLE_BINARY_RHS) |
2697 | { |
2698 | gimple_assign_set_rhs_code (s: use_stmt, code: res_cmp); |
2699 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: lhs1); |
2700 | gimple_assign_set_rhs2 (gs: use_stmt, rhs: rhs1); |
2701 | } |
2702 | else |
2703 | { |
2704 | tree cond = build2 (res_cmp, TREE_TYPE (gimple_assign_rhs1 (use_stmt)), |
2705 | lhs1, rhs1); |
2706 | gimple_assign_set_rhs1 (gs: use_stmt, rhs: cond); |
2707 | } |
2708 | update_stmt (s: use_stmt); |
2709 | |
2710 | if (MAY_HAVE_DEBUG_BIND_STMTS) |
2711 | { |
2712 | use_operand_p use_p; |
2713 | imm_use_iterator iter; |
2714 | bool has_debug_uses = false; |
2715 | bool has_cast_debug_uses = false; |
2716 | FOR_EACH_IMM_USE_FAST (use_p, iter, phires) |
2717 | { |
2718 | gimple *use_stmt = USE_STMT (use_p); |
2719 | if (orig_use_lhs && use_stmt == orig_use_stmt) |
2720 | continue; |
2721 | gcc_assert (is_gimple_debug (use_stmt)); |
2722 | has_debug_uses = true; |
2723 | break; |
2724 | } |
2725 | if (orig_use_lhs) |
2726 | { |
2727 | if (!has_debug_uses || is_cast) |
2728 | FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs) |
2729 | { |
2730 | gimple *use_stmt = USE_STMT (use_p); |
2731 | gcc_assert (is_gimple_debug (use_stmt)); |
2732 | has_debug_uses = true; |
2733 | if (is_cast) |
2734 | has_cast_debug_uses = true; |
2735 | } |
2736 | gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt); |
2737 | tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs)); |
2738 | gimple_assign_set_rhs_with_ops (gsi: &gsi, code: INTEGER_CST, op1: zero); |
2739 | update_stmt (s: orig_use_stmt); |
2740 | } |
2741 | |
2742 | if (has_debug_uses) |
2743 | { |
2744 | /* If there are debug uses, emit something like: |
2745 | # DEBUG D#1 => i_2(D) > j_3(D) ? 1 : -1 |
2746 | # DEBUG D#2 => i_2(D) == j_3(D) ? 0 : D#1 |
2747 | where > stands for the comparison that yielded 1 |
2748 | and replace debug uses of phi result with that D#2. |
2749 | Ignore the value of 2, because if NaNs aren't expected, |
2750 | all floating point numbers should be comparable. */ |
2751 | gimple_stmt_iterator gsi = gsi_after_labels (bb: gimple_bb (g: phi)); |
2752 | tree type = TREE_TYPE (phires); |
2753 | tree temp1 = build_debug_expr_decl (type); |
2754 | tree t = build2 (one_cmp, boolean_type_node, lhs1, rhs2); |
2755 | t = build3 (COND_EXPR, type, t, build_one_cst (type), |
2756 | build_int_cst (type, -1)); |
2757 | gimple *g = gimple_build_debug_bind (temp1, t, phi); |
2758 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
2759 | tree temp2 = build_debug_expr_decl (type); |
2760 | t = build2 (EQ_EXPR, boolean_type_node, lhs1, rhs2); |
2761 | t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1); |
2762 | g = gimple_build_debug_bind (temp2, t, phi); |
2763 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
2764 | replace_uses_by (phires, temp2); |
2765 | if (orig_use_lhs) |
2766 | { |
2767 | if (has_cast_debug_uses) |
2768 | { |
2769 | tree temp3 = make_node (DEBUG_EXPR_DECL); |
2770 | DECL_ARTIFICIAL (temp3) = 1; |
2771 | TREE_TYPE (temp3) = TREE_TYPE (orig_use_lhs); |
2772 | SET_DECL_MODE (temp3, TYPE_MODE (type)); |
2773 | t = fold_convert (TREE_TYPE (temp3), temp2); |
2774 | g = gimple_build_debug_bind (temp3, t, phi); |
2775 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
2776 | replace_uses_by (orig_use_lhs, temp3); |
2777 | } |
2778 | else |
2779 | replace_uses_by (orig_use_lhs, temp2); |
2780 | } |
2781 | } |
2782 | } |
2783 | |
2784 | if (orig_use_lhs) |
2785 | { |
2786 | gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt); |
2787 | gsi_remove (&gsi, true); |
2788 | } |
2789 | |
2790 | gimple_stmt_iterator psi = gsi_for_stmt (phi); |
2791 | remove_phi_node (&psi, true); |
2792 | statistics_counter_event (cfun, "spaceship replacement" , 1); |
2793 | |
2794 | return true; |
2795 | } |
2796 | |
2797 | /* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0). |
2798 | Convert |
2799 | |
2800 | <bb 2> |
2801 | if (b_4(D) != 0) |
2802 | goto <bb 3> |
2803 | else |
2804 | goto <bb 4> |
2805 | |
2806 | <bb 3> |
2807 | _2 = (unsigned long) b_4(D); |
2808 | _9 = __builtin_popcountl (_2); |
2809 | OR |
2810 | _9 = __builtin_popcountl (b_4(D)); |
2811 | |
2812 | <bb 4> |
2813 | c_12 = PHI <0(2), _9(3)> |
2814 | |
2815 | Into |
2816 | <bb 2> |
2817 | _2 = (unsigned long) b_4(D); |
2818 | _9 = __builtin_popcountl (_2); |
2819 | OR |
2820 | _9 = __builtin_popcountl (b_4(D)); |
2821 | |
2822 | <bb 4> |
2823 | c_12 = PHI <_9(2)> |
2824 | |
2825 | Similarly for __builtin_clz or __builtin_ctz if |
2826 | C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and |
2827 | instead of 0 above it uses the value from that macro. */ |
2828 | |
2829 | static bool |
2830 | cond_removal_in_builtin_zero_pattern (basic_block cond_bb, |
2831 | basic_block middle_bb, |
2832 | edge e1, edge e2, gphi *phi, |
2833 | tree arg0, tree arg1) |
2834 | { |
2835 | gimple_stmt_iterator gsi, gsi_from; |
2836 | gimple *call; |
2837 | gimple *cast = NULL; |
2838 | tree lhs, arg; |
2839 | |
2840 | /* Check that |
2841 | _2 = (unsigned long) b_4(D); |
2842 | _9 = __builtin_popcountl (_2); |
2843 | OR |
2844 | _9 = __builtin_popcountl (b_4(D)); |
2845 | are the only stmts in the middle_bb. */ |
2846 | |
2847 | gsi = gsi_start_nondebug_after_labels_bb (bb: middle_bb); |
2848 | if (gsi_end_p (i: gsi)) |
2849 | return false; |
2850 | cast = gsi_stmt (i: gsi); |
2851 | gsi_next_nondebug (i: &gsi); |
2852 | if (!gsi_end_p (i: gsi)) |
2853 | { |
2854 | call = gsi_stmt (i: gsi); |
2855 | gsi_next_nondebug (i: &gsi); |
2856 | if (!gsi_end_p (i: gsi)) |
2857 | return false; |
2858 | } |
2859 | else |
2860 | { |
2861 | call = cast; |
2862 | cast = NULL; |
2863 | } |
2864 | |
2865 | /* Check that we have a popcount/clz/ctz builtin. */ |
2866 | if (!is_gimple_call (gs: call) || gimple_call_num_args (gs: call) != 1) |
2867 | return false; |
2868 | |
2869 | arg = gimple_call_arg (gs: call, index: 0); |
2870 | lhs = gimple_get_lhs (call); |
2871 | |
2872 | if (lhs == NULL_TREE) |
2873 | return false; |
2874 | |
2875 | combined_fn cfn = gimple_call_combined_fn (call); |
2876 | internal_fn ifn = IFN_LAST; |
2877 | int val = 0; |
2878 | switch (cfn) |
2879 | { |
2880 | case CFN_BUILT_IN_BSWAP16: |
2881 | case CFN_BUILT_IN_BSWAP32: |
2882 | case CFN_BUILT_IN_BSWAP64: |
2883 | case CFN_BUILT_IN_BSWAP128: |
2884 | CASE_CFN_FFS: |
2885 | CASE_CFN_PARITY: |
2886 | CASE_CFN_POPCOUNT: |
2887 | break; |
2888 | CASE_CFN_CLZ: |
2889 | if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) |
2890 | { |
2891 | tree type = TREE_TYPE (arg); |
2892 | if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH) |
2893 | && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), |
2894 | val) == 2) |
2895 | { |
2896 | ifn = IFN_CLZ; |
2897 | break; |
2898 | } |
2899 | } |
2900 | return false; |
2901 | CASE_CFN_CTZ: |
2902 | if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) |
2903 | { |
2904 | tree type = TREE_TYPE (arg); |
2905 | if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH) |
2906 | && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), |
2907 | val) == 2) |
2908 | { |
2909 | ifn = IFN_CTZ; |
2910 | break; |
2911 | } |
2912 | } |
2913 | return false; |
2914 | case CFN_BUILT_IN_CLRSB: |
2915 | val = TYPE_PRECISION (integer_type_node) - 1; |
2916 | break; |
2917 | case CFN_BUILT_IN_CLRSBL: |
2918 | val = TYPE_PRECISION (long_integer_type_node) - 1; |
2919 | break; |
2920 | case CFN_BUILT_IN_CLRSBLL: |
2921 | val = TYPE_PRECISION (long_long_integer_type_node) - 1; |
2922 | break; |
2923 | default: |
2924 | return false; |
2925 | } |
2926 | |
2927 | if (cast) |
2928 | { |
2929 | /* We have a cast stmt feeding popcount/clz/ctz builtin. */ |
2930 | /* Check that we have a cast prior to that. */ |
2931 | if (gimple_code (g: cast) != GIMPLE_ASSIGN |
2932 | || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast))) |
2933 | return false; |
2934 | /* Result of the cast stmt is the argument to the builtin. */ |
2935 | if (arg != gimple_assign_lhs (gs: cast)) |
2936 | return false; |
2937 | arg = gimple_assign_rhs1 (gs: cast); |
2938 | } |
2939 | |
2940 | gcond *cond = dyn_cast <gcond *> (p: *gsi_last_bb (bb: cond_bb)); |
2941 | |
2942 | /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz |
2943 | builtin. */ |
2944 | if (!cond |
2945 | || (gimple_cond_code (gs: cond) != NE_EXPR |
2946 | && gimple_cond_code (gs: cond) != EQ_EXPR) |
2947 | || !integer_zerop (gimple_cond_rhs (gs: cond)) |
2948 | || arg != gimple_cond_lhs (gs: cond)) |
2949 | return false; |
2950 | |
2951 | /* Canonicalize. */ |
2952 | if ((e2->flags & EDGE_TRUE_VALUE |
2953 | && gimple_cond_code (gs: cond) == NE_EXPR) |
2954 | || (e1->flags & EDGE_TRUE_VALUE |
2955 | && gimple_cond_code (gs: cond) == EQ_EXPR)) |
2956 | { |
2957 | std::swap (a&: arg0, b&: arg1); |
2958 | std::swap (a&: e1, b&: e2); |
2959 | } |
2960 | |
2961 | /* Check PHI arguments. */ |
2962 | if (lhs != arg0 |
2963 | || TREE_CODE (arg1) != INTEGER_CST |
2964 | || wi::to_wide (t: arg1) != val) |
2965 | return false; |
2966 | |
2967 | /* And insert the popcount/clz/ctz builtin and cast stmt before the |
2968 | cond_bb. */ |
2969 | gsi = gsi_last_bb (bb: cond_bb); |
2970 | if (cast) |
2971 | { |
2972 | gsi_from = gsi_for_stmt (cast); |
2973 | gsi_move_before (&gsi_from, &gsi); |
2974 | reset_flow_sensitive_info (gimple_get_lhs (cast)); |
2975 | } |
2976 | gsi_from = gsi_for_stmt (call); |
2977 | if (ifn == IFN_LAST || gimple_call_internal_p (gs: call)) |
2978 | gsi_move_before (&gsi_from, &gsi); |
2979 | else |
2980 | { |
2981 | /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only |
2982 | the latter is well defined at zero. */ |
2983 | call = gimple_build_call_internal (ifn, 1, gimple_call_arg (gs: call, index: 0)); |
2984 | gimple_call_set_lhs (gs: call, lhs); |
2985 | gsi_insert_before (&gsi, call, GSI_SAME_STMT); |
2986 | gsi_remove (&gsi_from, true); |
2987 | } |
2988 | reset_flow_sensitive_info (lhs); |
2989 | |
2990 | /* Now update the PHI and remove unneeded bbs. */ |
2991 | replace_phi_edge_with_variable (cond_block: cond_bb, e: e2, phi, new_tree: lhs); |
2992 | return true; |
2993 | } |
2994 | |
2995 | /* Auxiliary functions to determine the set of memory accesses which |
2996 | can't trap because they are preceded by accesses to the same memory |
2997 | portion. We do that for MEM_REFs, so we only need to track |
2998 | the SSA_NAME of the pointer indirectly referenced. The algorithm |
2999 | simply is a walk over all instructions in dominator order. When |
3000 | we see an MEM_REF we determine if we've already seen a same |
3001 | ref anywhere up to the root of the dominator tree. If we do the |
3002 | current access can't trap. If we don't see any dominating access |
3003 | the current access might trap, but might also make later accesses |
3004 | non-trapping, so we remember it. We need to be careful with loads |
3005 | or stores, for instance a load might not trap, while a store would, |
3006 | so if we see a dominating read access this doesn't mean that a later |
3007 | write access would not trap. Hence we also need to differentiate the |
3008 | type of access(es) seen. |
3009 | |
3010 | ??? We currently are very conservative and assume that a load might |
3011 | trap even if a store doesn't (write-only memory). This probably is |
3012 | overly conservative. |
3013 | |
3014 | We currently support a special case that for !TREE_ADDRESSABLE automatic |
3015 | variables, it could ignore whether something is a load or store because the |
3016 | local stack should be always writable. */ |
3017 | |
3018 | /* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which |
3019 | basic block an *_REF through it was seen, which would constitute a |
3020 | no-trap region for same accesses. |
3021 | |
3022 | Size is needed to support 2 MEM_REFs of different types, like |
3023 | MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with |
3024 | OEP_ADDRESS_OF. */ |
3025 | struct ref_to_bb |
3026 | { |
3027 | tree exp; |
3028 | HOST_WIDE_INT size; |
3029 | unsigned int phase; |
3030 | basic_block bb; |
3031 | }; |
3032 | |
3033 | /* Hashtable helpers. */ |
3034 | |
3035 | struct refs_hasher : free_ptr_hash<ref_to_bb> |
3036 | { |
3037 | static inline hashval_t hash (const ref_to_bb *); |
3038 | static inline bool equal (const ref_to_bb *, const ref_to_bb *); |
3039 | }; |
3040 | |
3041 | /* Used for quick clearing of the hash-table when we see calls. |
3042 | Hash entries with phase < nt_call_phase are invalid. */ |
3043 | static unsigned int nt_call_phase; |
3044 | |
3045 | /* The hash function. */ |
3046 | |
3047 | inline hashval_t |
3048 | refs_hasher::hash (const ref_to_bb *n) |
3049 | { |
3050 | inchash::hash hstate; |
3051 | inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF); |
3052 | hstate.add_hwi (v: n->size); |
3053 | return hstate.end (); |
3054 | } |
3055 | |
3056 | /* The equality function of *P1 and *P2. */ |
3057 | |
3058 | inline bool |
3059 | refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) |
3060 | { |
3061 | return operand_equal_p (n1->exp, n2->exp, flags: OEP_ADDRESS_OF) |
3062 | && n1->size == n2->size; |
3063 | } |
3064 | |
3065 | class nontrapping_dom_walker : public dom_walker |
3066 | { |
3067 | public: |
3068 | nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) |
3069 | : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128) |
3070 | {} |
3071 | |
3072 | edge before_dom_children (basic_block) final override; |
3073 | void after_dom_children (basic_block) final override; |
3074 | |
3075 | private: |
3076 | |
3077 | /* We see the expression EXP in basic block BB. If it's an interesting |
3078 | expression (an MEM_REF through an SSA_NAME) possibly insert the |
3079 | expression into the set NONTRAP or the hash table of seen expressions. |
3080 | STORE is true if this expression is on the LHS, otherwise it's on |
3081 | the RHS. */ |
3082 | void add_or_mark_expr (basic_block, tree, bool); |
3083 | |
3084 | hash_set<tree> *m_nontrapping; |
3085 | |
3086 | /* The hash table for remembering what we've seen. */ |
3087 | hash_table<refs_hasher> m_seen_refs; |
3088 | }; |
3089 | |
3090 | /* Called by walk_dominator_tree, when entering the block BB. */ |
3091 | edge |
3092 | nontrapping_dom_walker::before_dom_children (basic_block bb) |
3093 | { |
3094 | edge e; |
3095 | edge_iterator ei; |
3096 | gimple_stmt_iterator gsi; |
3097 | |
3098 | /* If we haven't seen all our predecessors, clear the hash-table. */ |
3099 | FOR_EACH_EDGE (e, ei, bb->preds) |
3100 | if ((((size_t)e->src->aux) & 2) == 0) |
3101 | { |
3102 | nt_call_phase++; |
3103 | break; |
3104 | } |
3105 | |
3106 | /* Mark this BB as being on the path to dominator root and as visited. */ |
3107 | bb->aux = (void*)(1 | 2); |
3108 | |
3109 | /* And walk the statements in order. */ |
3110 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
3111 | { |
3112 | gimple *stmt = gsi_stmt (i: gsi); |
3113 | |
3114 | if ((gimple_code (g: stmt) == GIMPLE_ASM && gimple_vdef (g: stmt)) |
3115 | || (is_gimple_call (gs: stmt) |
3116 | && (!nonfreeing_call_p (stmt) || !nonbarrier_call_p (stmt)))) |
3117 | nt_call_phase++; |
3118 | else if (gimple_assign_single_p (gs: stmt) && !gimple_has_volatile_ops (stmt)) |
3119 | { |
3120 | add_or_mark_expr (bb, gimple_assign_lhs (gs: stmt), true); |
3121 | add_or_mark_expr (bb, gimple_assign_rhs1 (gs: stmt), false); |
3122 | } |
3123 | } |
3124 | return NULL; |
3125 | } |
3126 | |
3127 | /* Called by walk_dominator_tree, when basic block BB is exited. */ |
3128 | void |
3129 | nontrapping_dom_walker::after_dom_children (basic_block bb) |
3130 | { |
3131 | /* This BB isn't on the path to dominator root anymore. */ |
3132 | bb->aux = (void*)2; |
3133 | } |
3134 | |
3135 | /* We see the expression EXP in basic block BB. If it's an interesting |
3136 | expression of: |
3137 | 1) MEM_REF |
3138 | 2) ARRAY_REF |
3139 | 3) COMPONENT_REF |
3140 | possibly insert the expression into the set NONTRAP or the hash table |
3141 | of seen expressions. STORE is true if this expression is on the LHS, |
3142 | otherwise it's on the RHS. */ |
3143 | void |
3144 | nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) |
3145 | { |
3146 | HOST_WIDE_INT size; |
3147 | |
3148 | if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF |
3149 | || TREE_CODE (exp) == COMPONENT_REF) |
3150 | && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) |
3151 | { |
3152 | struct ref_to_bb map; |
3153 | ref_to_bb **slot; |
3154 | struct ref_to_bb *r2bb; |
3155 | basic_block found_bb = 0; |
3156 | |
3157 | if (!store) |
3158 | { |
3159 | tree base = get_base_address (t: exp); |
3160 | /* Only record a LOAD of a local variable without address-taken, as |
3161 | the local stack is always writable. This allows cselim on a STORE |
3162 | with a dominating LOAD. */ |
3163 | if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) |
3164 | return; |
3165 | } |
3166 | |
3167 | /* Try to find the last seen *_REF, which can trap. */ |
3168 | map.exp = exp; |
3169 | map.size = size; |
3170 | slot = m_seen_refs.find_slot (value: &map, insert: INSERT); |
3171 | r2bb = *slot; |
3172 | if (r2bb && r2bb->phase >= nt_call_phase) |
3173 | found_bb = r2bb->bb; |
3174 | |
3175 | /* If we've found a trapping *_REF, _and_ it dominates EXP |
3176 | (it's in a basic block on the path from us to the dominator root) |
3177 | then we can't trap. */ |
3178 | if (found_bb && (((size_t)found_bb->aux) & 1) == 1) |
3179 | { |
3180 | m_nontrapping->add (k: exp); |
3181 | } |
3182 | else |
3183 | { |
3184 | /* EXP might trap, so insert it into the hash table. */ |
3185 | if (r2bb) |
3186 | { |
3187 | r2bb->phase = nt_call_phase; |
3188 | r2bb->bb = bb; |
3189 | } |
3190 | else |
3191 | { |
3192 | r2bb = XNEW (struct ref_to_bb); |
3193 | r2bb->phase = nt_call_phase; |
3194 | r2bb->bb = bb; |
3195 | r2bb->exp = exp; |
3196 | r2bb->size = size; |
3197 | *slot = r2bb; |
3198 | } |
3199 | } |
3200 | } |
3201 | } |
3202 | |
3203 | /* This is the entry point of gathering non trapping memory accesses. |
3204 | It will do a dominator walk over the whole function, and it will |
3205 | make use of the bb->aux pointers. It returns a set of trees |
3206 | (the MEM_REFs itself) which can't trap. */ |
3207 | static hash_set<tree> * |
3208 | get_non_trapping (void) |
3209 | { |
3210 | nt_call_phase = 0; |
3211 | hash_set<tree> *nontrap = new hash_set<tree>; |
3212 | |
3213 | nontrapping_dom_walker (CDI_DOMINATORS, nontrap) |
3214 | .walk (cfun->cfg->x_entry_block_ptr); |
3215 | |
3216 | clear_aux_for_blocks (); |
3217 | return nontrap; |
3218 | } |
3219 | |
3220 | /* Do the main work of conditional store replacement. We already know |
3221 | that the recognized pattern looks like so: |
3222 | |
3223 | split: |
3224 | if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1) |
3225 | MIDDLE_BB: |
3226 | something |
3227 | fallthrough (edge E0) |
3228 | JOIN_BB: |
3229 | some more |
3230 | |
3231 | We check that MIDDLE_BB contains only one store, that that store |
3232 | doesn't trap (not via NOTRAP, but via checking if an access to the same |
3233 | memory location dominates us, or the store is to a local addressable |
3234 | object) and that the store has a "simple" RHS. */ |
3235 | |
3236 | static bool |
3237 | cond_store_replacement (basic_block middle_bb, basic_block join_bb, |
3238 | edge e0, edge e1, hash_set<tree> *nontrap) |
3239 | { |
3240 | gimple *assign = last_and_only_stmt (middle_bb); |
3241 | tree lhs, rhs, name, name2; |
3242 | gphi *newphi; |
3243 | gassign *new_stmt; |
3244 | gimple_stmt_iterator gsi; |
3245 | location_t locus; |
3246 | |
3247 | /* Check if middle_bb contains of only one store. */ |
3248 | if (!assign |
3249 | || !gimple_assign_single_p (gs: assign) |
3250 | || gimple_has_volatile_ops (stmt: assign)) |
3251 | return false; |
3252 | |
3253 | /* And no PHI nodes so all uses in the single stmt are also |
3254 | available where we insert to. */ |
3255 | if (!gimple_seq_empty_p (s: phi_nodes (bb: middle_bb))) |
3256 | return false; |
3257 | |
3258 | locus = gimple_location (g: assign); |
3259 | lhs = gimple_assign_lhs (gs: assign); |
3260 | rhs = gimple_assign_rhs1 (gs: assign); |
3261 | if ((!REFERENCE_CLASS_P (lhs) |
3262 | && !DECL_P (lhs)) |
3263 | || !is_gimple_reg_type (TREE_TYPE (lhs))) |
3264 | return false; |
3265 | |
3266 | /* Prove that we can move the store down. We could also check |
3267 | TREE_THIS_NOTRAP here, but in that case we also could move stores, |
3268 | whose value is not available readily, which we want to avoid. */ |
3269 | if (!nontrap->contains (k: lhs)) |
3270 | { |
3271 | /* If LHS is an access to a local variable without address-taken |
3272 | (or when we allow data races) and known not to trap, we could |
3273 | always safely move down the store. */ |
3274 | tree base = get_base_address (t: lhs); |
3275 | if (!auto_var_p (base) |
3276 | || (TREE_ADDRESSABLE (base) && !flag_store_data_races) |
3277 | || tree_could_trap_p (lhs)) |
3278 | return false; |
3279 | } |
3280 | |
3281 | /* Now we've checked the constraints, so do the transformation: |
3282 | 1) Remove the single store. */ |
3283 | gsi = gsi_for_stmt (assign); |
3284 | unlink_stmt_vdef (assign); |
3285 | gsi_remove (&gsi, true); |
3286 | release_defs (assign); |
3287 | |
3288 | /* Make both store and load use alias-set zero as we have to |
3289 | deal with the case of the store being a conditional change |
3290 | of the dynamic type. */ |
3291 | lhs = unshare_expr (lhs); |
3292 | tree *basep = &lhs; |
3293 | while (handled_component_p (t: *basep)) |
3294 | basep = &TREE_OPERAND (*basep, 0); |
3295 | if (TREE_CODE (*basep) == MEM_REF |
3296 | || TREE_CODE (*basep) == TARGET_MEM_REF) |
3297 | TREE_OPERAND (*basep, 1) |
3298 | = fold_convert (ptr_type_node, TREE_OPERAND (*basep, 1)); |
3299 | else |
3300 | *basep = build2 (MEM_REF, TREE_TYPE (*basep), |
3301 | build_fold_addr_expr (*basep), |
3302 | build_zero_cst (ptr_type_node)); |
3303 | |
3304 | /* 2) Insert a load from the memory of the store to the temporary |
3305 | on the edge which did not contain the store. */ |
3306 | name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, name: "cstore" ); |
3307 | new_stmt = gimple_build_assign (name, lhs); |
3308 | gimple_set_location (g: new_stmt, location: locus); |
3309 | lhs = unshare_expr (lhs); |
3310 | { |
3311 | /* Set the no-warning bit on the rhs of the load to avoid uninit |
3312 | warnings. */ |
3313 | tree rhs1 = gimple_assign_rhs1 (gs: new_stmt); |
3314 | suppress_warning (rhs1, OPT_Wuninitialized); |
3315 | } |
3316 | gsi_insert_on_edge (e1, new_stmt); |
3317 | |
3318 | /* 3) Create a PHI node at the join block, with one argument |
3319 | holding the old RHS, and the other holding the temporary |
3320 | where we stored the old memory contents. */ |
3321 | name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, name: "cstore" ); |
3322 | newphi = create_phi_node (name2, join_bb); |
3323 | add_phi_arg (newphi, rhs, e0, locus); |
3324 | add_phi_arg (newphi, name, e1, locus); |
3325 | |
3326 | new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); |
3327 | |
3328 | /* 4) Insert that PHI node. */ |
3329 | gsi = gsi_after_labels (bb: join_bb); |
3330 | if (gsi_end_p (i: gsi)) |
3331 | { |
3332 | gsi = gsi_last_bb (bb: join_bb); |
3333 | gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); |
3334 | } |
3335 | else |
3336 | gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); |
3337 | |
3338 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3339 | { |
3340 | fprintf (stream: dump_file, format: "\nConditional store replacement happened!" ); |
3341 | fprintf (stream: dump_file, format: "\nReplaced the store with a load." ); |
3342 | fprintf (stream: dump_file, format: "\nInserted a new PHI statement in joint block:\n" ); |
3343 | print_gimple_stmt (dump_file, new_stmt, 0, TDF_VOPS|TDF_MEMSYMS); |
3344 | } |
3345 | statistics_counter_event (cfun, "conditional store replacement" , 1); |
3346 | |
3347 | return true; |
3348 | } |
3349 | |
3350 | /* Do the main work of conditional store replacement. */ |
3351 | |
3352 | static bool |
3353 | cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, |
3354 | basic_block join_bb, gimple *then_assign, |
3355 | gimple *else_assign) |
3356 | { |
3357 | tree lhs_base, lhs, then_rhs, else_rhs, name; |
3358 | location_t then_locus, else_locus; |
3359 | gimple_stmt_iterator gsi; |
3360 | gphi *newphi; |
3361 | gassign *new_stmt; |
3362 | |
3363 | if (then_assign == NULL |
3364 | || !gimple_assign_single_p (gs: then_assign) |
3365 | || gimple_clobber_p (s: then_assign) |
3366 | || gimple_has_volatile_ops (stmt: then_assign) |
3367 | || else_assign == NULL |
3368 | || !gimple_assign_single_p (gs: else_assign) |
3369 | || gimple_clobber_p (s: else_assign) |
3370 | || gimple_has_volatile_ops (stmt: else_assign)) |
3371 | return false; |
3372 | |
3373 | lhs = gimple_assign_lhs (gs: then_assign); |
3374 | if (!is_gimple_reg_type (TREE_TYPE (lhs)) |
3375 | || !operand_equal_p (lhs, gimple_assign_lhs (gs: else_assign), flags: 0)) |
3376 | return false; |
3377 | |
3378 | lhs_base = get_base_address (t: lhs); |
3379 | if (lhs_base == NULL_TREE |
3380 | || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) |
3381 | return false; |
3382 | |
3383 | then_rhs = gimple_assign_rhs1 (gs: then_assign); |
3384 | else_rhs = gimple_assign_rhs1 (gs: else_assign); |
3385 | then_locus = gimple_location (g: then_assign); |
3386 | else_locus = gimple_location (g: else_assign); |
3387 | |
3388 | /* Now we've checked the constraints, so do the transformation: |
3389 | 1) Remove the stores. */ |
3390 | gsi = gsi_for_stmt (then_assign); |
3391 | unlink_stmt_vdef (then_assign); |
3392 | gsi_remove (&gsi, true); |
3393 | release_defs (then_assign); |
3394 | |
3395 | gsi = gsi_for_stmt (else_assign); |
3396 | unlink_stmt_vdef (else_assign); |
3397 | gsi_remove (&gsi, true); |
3398 | release_defs (else_assign); |
3399 | |
3400 | /* 2) Create a PHI node at the join block, with one argument |
3401 | holding the old RHS, and the other holding the temporary |
3402 | where we stored the old memory contents. */ |
3403 | name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, name: "cstore" ); |
3404 | newphi = create_phi_node (name, join_bb); |
3405 | add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); |
3406 | add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); |
3407 | |
3408 | new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); |
3409 | |
3410 | /* 3) Insert that PHI node. */ |
3411 | gsi = gsi_after_labels (bb: join_bb); |
3412 | if (gsi_end_p (i: gsi)) |
3413 | { |
3414 | gsi = gsi_last_bb (bb: join_bb); |
3415 | gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); |
3416 | } |
3417 | else |
3418 | gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); |
3419 | |
3420 | statistics_counter_event (cfun, "if-then-else store replacement" , 1); |
3421 | |
3422 | return true; |
3423 | } |
3424 | |
3425 | /* Return the single store in BB with VDEF or NULL if there are |
3426 | other stores in the BB or loads following the store. */ |
3427 | |
3428 | static gimple * |
3429 | single_trailing_store_in_bb (basic_block bb, tree vdef) |
3430 | { |
3431 | if (SSA_NAME_IS_DEFAULT_DEF (vdef)) |
3432 | return NULL; |
3433 | gimple *store = SSA_NAME_DEF_STMT (vdef); |
3434 | if (gimple_bb (g: store) != bb |
3435 | || gimple_code (g: store) == GIMPLE_PHI) |
3436 | return NULL; |
3437 | |
3438 | /* Verify there is no other store in this BB. */ |
3439 | if (!SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store)) |
3440 | && gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb |
3441 | && gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI) |
3442 | return NULL; |
3443 | |
3444 | /* Verify there is no load or store after the store. */ |
3445 | use_operand_p use_p; |
3446 | imm_use_iterator imm_iter; |
3447 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vdef (store)) |
3448 | if (USE_STMT (use_p) != store |
3449 | && gimple_bb (USE_STMT (use_p)) == bb) |
3450 | return NULL; |
3451 | |
3452 | return store; |
3453 | } |
3454 | |
3455 | /* Conditional store replacement. We already know |
3456 | that the recognized pattern looks like so: |
3457 | |
3458 | split: |
3459 | if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) |
3460 | THEN_BB: |
3461 | ... |
3462 | X = Y; |
3463 | ... |
3464 | goto JOIN_BB; |
3465 | ELSE_BB: |
3466 | ... |
3467 | X = Z; |
3468 | ... |
3469 | fallthrough (edge E0) |
3470 | JOIN_BB: |
3471 | some more |
3472 | |
3473 | We check that it is safe to sink the store to JOIN_BB by verifying that |
3474 | there are no read-after-write or write-after-write dependencies in |
3475 | THEN_BB and ELSE_BB. */ |
3476 | |
3477 | static bool |
3478 | cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, |
3479 | basic_block join_bb) |
3480 | { |
3481 | vec<data_reference_p> then_datarefs, else_datarefs; |
3482 | vec<ddr_p> then_ddrs, else_ddrs; |
3483 | gimple *then_store, *else_store; |
3484 | bool found, ok = false, res; |
3485 | struct data_dependence_relation *ddr; |
3486 | data_reference_p then_dr, else_dr; |
3487 | int i, j; |
3488 | tree then_lhs, else_lhs; |
3489 | basic_block blocks[3]; |
3490 | |
3491 | /* Handle the case with single store in THEN_BB and ELSE_BB. That is |
3492 | cheap enough to always handle as it allows us to elide dependence |
3493 | checking. */ |
3494 | gphi *vphi = NULL; |
3495 | for (gphi_iterator si = gsi_start_phis (join_bb); !gsi_end_p (i: si); |
3496 | gsi_next (i: &si)) |
3497 | if (virtual_operand_p (op: gimple_phi_result (gs: si.phi ()))) |
3498 | { |
3499 | vphi = si.phi (); |
3500 | break; |
3501 | } |
3502 | if (!vphi) |
3503 | return false; |
3504 | tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb)); |
3505 | tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb)); |
3506 | gimple *then_assign = single_trailing_store_in_bb (bb: then_bb, vdef: then_vdef); |
3507 | if (then_assign) |
3508 | { |
3509 | gimple *else_assign = single_trailing_store_in_bb (bb: else_bb, vdef: else_vdef); |
3510 | if (else_assign) |
3511 | return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, |
3512 | then_assign, else_assign); |
3513 | } |
3514 | |
3515 | /* If either vectorization or if-conversion is disabled then do |
3516 | not sink any stores. */ |
3517 | if (param_max_stores_to_sink == 0 |
3518 | || (!flag_tree_loop_vectorize && !flag_tree_slp_vectorize) |
3519 | || !flag_tree_loop_if_convert) |
3520 | return false; |
3521 | |
3522 | /* Find data references. */ |
3523 | then_datarefs.create (nelems: 1); |
3524 | else_datarefs.create (nelems: 1); |
3525 | if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) |
3526 | == chrec_dont_know) |
3527 | || !then_datarefs.length () |
3528 | || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) |
3529 | == chrec_dont_know) |
3530 | || !else_datarefs.length ()) |
3531 | { |
3532 | free_data_refs (then_datarefs); |
3533 | free_data_refs (else_datarefs); |
3534 | return false; |
3535 | } |
3536 | |
3537 | /* Find pairs of stores with equal LHS. */ |
3538 | auto_vec<gimple *, 1> then_stores, else_stores; |
3539 | FOR_EACH_VEC_ELT (then_datarefs, i, then_dr) |
3540 | { |
3541 | if (DR_IS_READ (then_dr)) |
3542 | continue; |
3543 | |
3544 | then_store = DR_STMT (then_dr); |
3545 | then_lhs = gimple_get_lhs (then_store); |
3546 | if (then_lhs == NULL_TREE) |
3547 | continue; |
3548 | found = false; |
3549 | |
3550 | FOR_EACH_VEC_ELT (else_datarefs, j, else_dr) |
3551 | { |
3552 | if (DR_IS_READ (else_dr)) |
3553 | continue; |
3554 | |
3555 | else_store = DR_STMT (else_dr); |
3556 | else_lhs = gimple_get_lhs (else_store); |
3557 | if (else_lhs == NULL_TREE) |
3558 | continue; |
3559 | |
3560 | if (operand_equal_p (then_lhs, else_lhs, flags: 0)) |
3561 | { |
3562 | found = true; |
3563 | break; |
3564 | } |
3565 | } |
3566 | |
3567 | if (!found) |
3568 | continue; |
3569 | |
3570 | then_stores.safe_push (obj: then_store); |
3571 | else_stores.safe_push (obj: else_store); |
3572 | } |
3573 | |
3574 | /* No pairs of stores found. */ |
3575 | if (!then_stores.length () |
3576 | || then_stores.length () > (unsigned) param_max_stores_to_sink) |
3577 | { |
3578 | free_data_refs (then_datarefs); |
3579 | free_data_refs (else_datarefs); |
3580 | return false; |
3581 | } |
3582 | |
3583 | /* Compute and check data dependencies in both basic blocks. */ |
3584 | then_ddrs.create (nelems: 1); |
3585 | else_ddrs.create (nelems: 1); |
3586 | if (!compute_all_dependences (then_datarefs, &then_ddrs, |
3587 | vNULL, false) |
3588 | || !compute_all_dependences (else_datarefs, &else_ddrs, |
3589 | vNULL, false)) |
3590 | { |
3591 | free_dependence_relations (then_ddrs); |
3592 | free_dependence_relations (else_ddrs); |
3593 | free_data_refs (then_datarefs); |
3594 | free_data_refs (else_datarefs); |
3595 | return false; |
3596 | } |
3597 | blocks[0] = then_bb; |
3598 | blocks[1] = else_bb; |
3599 | blocks[2] = join_bb; |
3600 | renumber_gimple_stmt_uids_in_blocks (blocks, 3); |
3601 | |
3602 | /* Check that there are no read-after-write or write-after-write dependencies |
3603 | in THEN_BB. */ |
3604 | FOR_EACH_VEC_ELT (then_ddrs, i, ddr) |
3605 | { |
3606 | struct data_reference *dra = DDR_A (ddr); |
3607 | struct data_reference *drb = DDR_B (ddr); |
3608 | |
3609 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known |
3610 | && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) |
3611 | && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) |
3612 | || (DR_IS_READ (drb) && DR_IS_WRITE (dra) |
3613 | && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) |
3614 | || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) |
3615 | { |
3616 | free_dependence_relations (then_ddrs); |
3617 | free_dependence_relations (else_ddrs); |
3618 | free_data_refs (then_datarefs); |
3619 | free_data_refs (else_datarefs); |
3620 | return false; |
3621 | } |
3622 | } |
3623 | |
3624 | /* Check that there are no read-after-write or write-after-write dependencies |
3625 | in ELSE_BB. */ |
3626 | FOR_EACH_VEC_ELT (else_ddrs, i, ddr) |
3627 | { |
3628 | struct data_reference *dra = DDR_A (ddr); |
3629 | struct data_reference *drb = DDR_B (ddr); |
3630 | |
3631 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known |
3632 | && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) |
3633 | && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) |
3634 | || (DR_IS_READ (drb) && DR_IS_WRITE (dra) |
3635 | && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) |
3636 | || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) |
3637 | { |
3638 | free_dependence_relations (then_ddrs); |
3639 | free_dependence_relations (else_ddrs); |
3640 | free_data_refs (then_datarefs); |
3641 | free_data_refs (else_datarefs); |
3642 | return false; |
3643 | } |
3644 | } |
3645 | |
3646 | /* Sink stores with same LHS. */ |
3647 | FOR_EACH_VEC_ELT (then_stores, i, then_store) |
3648 | { |
3649 | else_store = else_stores[i]; |
3650 | res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, |
3651 | then_assign: then_store, else_assign: else_store); |
3652 | ok = ok || res; |
3653 | } |
3654 | |
3655 | free_dependence_relations (then_ddrs); |
3656 | free_dependence_relations (else_ddrs); |
3657 | free_data_refs (then_datarefs); |
3658 | free_data_refs (else_datarefs); |
3659 | |
3660 | return ok; |
3661 | } |
3662 | |
3663 | /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ |
3664 | |
3665 | static bool |
3666 | local_mem_dependence (gimple *stmt, basic_block bb) |
3667 | { |
3668 | tree vuse = gimple_vuse (g: stmt); |
3669 | gimple *def; |
3670 | |
3671 | if (!vuse) |
3672 | return false; |
3673 | |
3674 | def = SSA_NAME_DEF_STMT (vuse); |
3675 | return (def && gimple_bb (g: def) == bb); |
3676 | } |
3677 | |
3678 | /* Given a "diamond" control-flow pattern where BB0 tests a condition, |
3679 | BB1 and BB2 are "then" and "else" blocks dependent on this test, |
3680 | and BB3 rejoins control flow following BB1 and BB2, look for |
3681 | opportunities to hoist loads as follows. If BB3 contains a PHI of |
3682 | two loads, one each occurring in BB1 and BB2, and the loads are |
3683 | provably of adjacent fields in the same structure, then move both |
3684 | loads into BB0. Of course this can only be done if there are no |
3685 | dependencies preventing such motion. |
3686 | |
3687 | One of the hoisted loads will always be speculative, so the |
3688 | transformation is currently conservative: |
3689 | |
3690 | - The fields must be strictly adjacent. |
3691 | - The two fields must occupy a single memory block that is |
3692 | guaranteed to not cross a page boundary. |
3693 | |
3694 | The last is difficult to prove, as such memory blocks should be |
3695 | aligned on the minimum of the stack alignment boundary and the |
3696 | alignment guaranteed by heap allocation interfaces. Thus we rely |
3697 | on a parameter for the alignment value. |
3698 | |
3699 | Provided a good value is used for the last case, the first |
3700 | restriction could possibly be relaxed. */ |
3701 | |
3702 | static void |
3703 | hoist_adjacent_loads (basic_block bb0, basic_block bb1, |
3704 | basic_block bb2, basic_block bb3) |
3705 | { |
3706 | int param_align = param_l1_cache_line_size; |
3707 | unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT); |
3708 | gphi_iterator gsi; |
3709 | |
3710 | /* Walk the phis in bb3 looking for an opportunity. We are looking |
3711 | for phis of two SSA names, one each of which is defined in bb1 and |
3712 | bb2. */ |
3713 | for (gsi = gsi_start_phis (bb3); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
3714 | { |
3715 | gphi *phi_stmt = gsi.phi (); |
3716 | gimple *def1, *def2; |
3717 | tree arg1, arg2, ref1, ref2, field1, field2; |
3718 | tree tree_offset1, tree_offset2, tree_size2, next; |
3719 | int offset1, offset2, size2; |
3720 | unsigned align1; |
3721 | gimple_stmt_iterator gsi2; |
3722 | basic_block bb_for_def1, bb_for_def2; |
3723 | |
3724 | if (gimple_phi_num_args (gs: phi_stmt) != 2 |
3725 | || virtual_operand_p (op: gimple_phi_result (gs: phi_stmt))) |
3726 | continue; |
3727 | |
3728 | arg1 = gimple_phi_arg_def (gs: phi_stmt, index: 0); |
3729 | arg2 = gimple_phi_arg_def (gs: phi_stmt, index: 1); |
3730 | |
3731 | if (TREE_CODE (arg1) != SSA_NAME |
3732 | || TREE_CODE (arg2) != SSA_NAME |
3733 | || SSA_NAME_IS_DEFAULT_DEF (arg1) |
3734 | || SSA_NAME_IS_DEFAULT_DEF (arg2)) |
3735 | continue; |
3736 | |
3737 | def1 = SSA_NAME_DEF_STMT (arg1); |
3738 | def2 = SSA_NAME_DEF_STMT (arg2); |
3739 | |
3740 | if ((gimple_bb (g: def1) != bb1 || gimple_bb (g: def2) != bb2) |
3741 | && (gimple_bb (g: def2) != bb1 || gimple_bb (g: def1) != bb2)) |
3742 | continue; |
3743 | |
3744 | /* Check the mode of the arguments to be sure a conditional move |
3745 | can be generated for it. */ |
3746 | if (optab_handler (op: movcc_optab, TYPE_MODE (TREE_TYPE (arg1))) |
3747 | == CODE_FOR_nothing) |
3748 | continue; |
3749 | |
3750 | /* Both statements must be assignments whose RHS is a COMPONENT_REF. */ |
3751 | if (!gimple_assign_single_p (gs: def1) |
3752 | || !gimple_assign_single_p (gs: def2) |
3753 | || gimple_has_volatile_ops (stmt: def1) |
3754 | || gimple_has_volatile_ops (stmt: def2)) |
3755 | continue; |
3756 | |
3757 | ref1 = gimple_assign_rhs1 (gs: def1); |
3758 | ref2 = gimple_assign_rhs1 (gs: def2); |
3759 | |
3760 | if (TREE_CODE (ref1) != COMPONENT_REF |
3761 | || TREE_CODE (ref2) != COMPONENT_REF) |
3762 | continue; |
3763 | |
3764 | /* The zeroth operand of the two component references must be |
3765 | identical. It is not sufficient to compare get_base_address of |
3766 | the two references, because this could allow for different |
3767 | elements of the same array in the two trees. It is not safe to |
3768 | assume that the existence of one array element implies the |
3769 | existence of a different one. */ |
3770 | if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), flags: 0)) |
3771 | continue; |
3772 | |
3773 | field1 = TREE_OPERAND (ref1, 1); |
3774 | field2 = TREE_OPERAND (ref2, 1); |
3775 | |
3776 | /* Check for field adjacency, and ensure field1 comes first. */ |
3777 | for (next = DECL_CHAIN (field1); |
3778 | next && TREE_CODE (next) != FIELD_DECL; |
3779 | next = DECL_CHAIN (next)) |
3780 | ; |
3781 | |
3782 | if (next != field2) |
3783 | { |
3784 | for (next = DECL_CHAIN (field2); |
3785 | next && TREE_CODE (next) != FIELD_DECL; |
3786 | next = DECL_CHAIN (next)) |
3787 | ; |
3788 | |
3789 | if (next != field1) |
3790 | continue; |
3791 | |
3792 | std::swap (a&: field1, b&: field2); |
3793 | std::swap (a&: def1, b&: def2); |
3794 | } |
3795 | |
3796 | bb_for_def1 = gimple_bb (g: def1); |
3797 | bb_for_def2 = gimple_bb (g: def2); |
3798 | |
3799 | /* Check for proper alignment of the first field. */ |
3800 | tree_offset1 = bit_position (field1); |
3801 | tree_offset2 = bit_position (field2); |
3802 | tree_size2 = DECL_SIZE (field2); |
3803 | |
3804 | if (!tree_fits_uhwi_p (tree_offset1) |
3805 | || !tree_fits_uhwi_p (tree_offset2) |
3806 | || !tree_fits_uhwi_p (tree_size2)) |
3807 | continue; |
3808 | |
3809 | offset1 = tree_to_uhwi (tree_offset1); |
3810 | offset2 = tree_to_uhwi (tree_offset2); |
3811 | size2 = tree_to_uhwi (tree_size2); |
3812 | align1 = DECL_ALIGN (field1) % param_align_bits; |
3813 | |
3814 | if (offset1 % BITS_PER_UNIT != 0) |
3815 | continue; |
3816 | |
3817 | /* For profitability, the two field references should fit within |
3818 | a single cache line. */ |
3819 | if (align1 + offset2 - offset1 + size2 > param_align_bits) |
3820 | continue; |
3821 | |
3822 | /* The two expressions cannot be dependent upon vdefs defined |
3823 | in bb1/bb2. */ |
3824 | if (local_mem_dependence (stmt: def1, bb: bb_for_def1) |
3825 | || local_mem_dependence (stmt: def2, bb: bb_for_def2)) |
3826 | continue; |
3827 | |
3828 | /* The conditions are satisfied; hoist the loads from bb1 and bb2 into |
3829 | bb0. We hoist the first one first so that a cache miss is handled |
3830 | efficiently regardless of hardware cache-fill policy. */ |
3831 | gsi2 = gsi_for_stmt (def1); |
3832 | gsi_move_to_bb_end (&gsi2, bb0); |
3833 | gsi2 = gsi_for_stmt (def2); |
3834 | gsi_move_to_bb_end (&gsi2, bb0); |
3835 | statistics_counter_event (cfun, "hoisted loads" , 1); |
3836 | |
3837 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3838 | { |
3839 | fprintf (stream: dump_file, |
3840 | format: "\nHoisting adjacent loads from %d and %d into %d: \n" , |
3841 | bb_for_def1->index, bb_for_def2->index, bb0->index); |
3842 | print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS); |
3843 | print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS); |
3844 | } |
3845 | } |
3846 | } |
3847 | |
3848 | /* Determine whether we should attempt to hoist adjacent loads out of |
3849 | diamond patterns in pass_phiopt. Always hoist loads if |
3850 | -fhoist-adjacent-loads is specified and the target machine has |
3851 | both a conditional move instruction and a defined cache line size. */ |
3852 | |
3853 | static bool |
3854 | gate_hoist_loads (void) |
3855 | { |
3856 | return (flag_hoist_adjacent_loads == 1 |
3857 | && param_l1_cache_line_size |
3858 | && HAVE_conditional_move); |
3859 | } |
3860 | |
3861 | /* This pass tries to replaces an if-then-else block with an |
3862 | assignment. We have different kinds of transformations. |
3863 | Some of these transformations are also performed by the ifcvt |
3864 | RTL optimizer. |
3865 | |
3866 | PHI-OPT using Match-and-simplify infrastructure |
3867 | ----------------------- |
3868 | |
3869 | The PHI-OPT pass will try to use match-and-simplify infrastructure |
3870 | (gimple_simplify) to do transformations. This is implemented in |
3871 | match_simplify_replacement. |
3872 | |
3873 | The way it works is it replaces: |
3874 | bb0: |
3875 | if (cond) goto bb2; else goto bb1; |
3876 | bb1: |
3877 | bb2: |
3878 | x = PHI <a (bb1), b (bb0), ...>; |
3879 | |
3880 | with a statement if it gets simplified from `cond ? b : a`. |
3881 | |
3882 | bb0: |
3883 | x1 = cond ? b : a; |
3884 | bb2: |
3885 | x = PHI <a (bb1), x1 (bb0), ...>; |
3886 | Bb1 might be removed as it becomes unreachable when doing the replacement. |
3887 | Though bb1 does not have to be considered a forwarding basic block from bb0. |
3888 | |
3889 | Will try to see if `(!cond) ? a : b` gets simplified (iff !cond simplifies); |
3890 | this is done not to have an explosion of patterns in match.pd. |
3891 | Note bb1 does not need to be completely empty, it can contain |
3892 | one statement which is known not to trap. |
3893 | |
3894 | It also can handle the case where we have two forwarding bbs (diamond): |
3895 | bb0: |
3896 | if (cond) goto bb2; else goto bb1; |
3897 | bb1: goto bb3; |
3898 | bb2: goto bb3; |
3899 | bb3: |
3900 | x = PHI <a (bb1), b (bb2), ...>; |
3901 | And that is replaced with a statement if it is simplified |
3902 | from `cond ? b : a`. |
3903 | Again bb1 and bb2 does not have to be completely empty but |
3904 | each can contain one statement which is known not to trap. |
3905 | But in this case bb1/bb2 can only be forwarding basic blocks. |
3906 | |
3907 | This fully replaces the old "Conditional Replacement", |
3908 | "ABS Replacement" transformations as they are now |
3909 | implmeneted in match.pd. |
3910 | Some parts of the "MIN/MAX Replacement" are re-implemented in match.pd. |
3911 | |
3912 | Value Replacement |
3913 | ----------------- |
3914 | |
3915 | This transformation, implemented in value_replacement, replaces |
3916 | |
3917 | bb0: |
3918 | if (a != b) goto bb2; else goto bb1; |
3919 | bb1: |
3920 | bb2: |
3921 | x = PHI <a (bb1), b (bb0), ...>; |
3922 | |
3923 | with |
3924 | |
3925 | bb0: |
3926 | bb2: |
3927 | x = PHI <b (bb0), ...>; |
3928 | |
3929 | This opportunity can sometimes occur as a result of other |
3930 | optimizations. |
3931 | |
3932 | |
3933 | Another case caught by value replacement looks like this: |
3934 | |
3935 | bb0: |
3936 | t1 = a == CONST; |
3937 | t2 = b > c; |
3938 | t3 = t1 & t2; |
3939 | if (t3 != 0) goto bb1; else goto bb2; |
3940 | bb1: |
3941 | bb2: |
3942 | x = PHI (CONST, a) |
3943 | |
3944 | Gets replaced with: |
3945 | bb0: |
3946 | bb2: |
3947 | t1 = a == CONST; |
3948 | t2 = b > c; |
3949 | t3 = t1 & t2; |
3950 | x = a; |
3951 | |
3952 | MIN/MAX Replacement |
3953 | ------------------- |
3954 | |
3955 | This transformation, minmax_replacement replaces |
3956 | |
3957 | bb0: |
3958 | if (a <= b) goto bb2; else goto bb1; |
3959 | bb1: |
3960 | bb2: |
3961 | x = PHI <b (bb1), a (bb0), ...>; |
3962 | |
3963 | with |
3964 | |
3965 | bb0: |
3966 | x' = MIN_EXPR (a, b) |
3967 | bb2: |
3968 | x = PHI <x' (bb0), ...>; |
3969 | |
3970 | A similar transformation is done for MAX_EXPR. |
3971 | |
3972 | |
3973 | This pass also performs a fifth transformation of a slightly different |
3974 | flavor. |
3975 | |
3976 | Factor operations in COND_EXPR |
3977 | ------------------------------ |
3978 | |
3979 | This transformation factors the unary operations out of COND_EXPR with |
3980 | factor_out_conditional_operation. |
3981 | |
3982 | For example: |
3983 | if (a <= CST) goto <bb 3>; else goto <bb 4>; |
3984 | <bb 3>: |
3985 | tmp = (int) a; |
3986 | <bb 4>: |
3987 | tmp = PHI <tmp, CST> |
3988 | |
3989 | Into: |
3990 | if (a <= CST) goto <bb 3>; else goto <bb 4>; |
3991 | <bb 3>: |
3992 | <bb 4>: |
3993 | a = PHI <a, CST> |
3994 | tmp = (int) a; |
3995 | |
3996 | Adjacent Load Hoisting |
3997 | ---------------------- |
3998 | |
3999 | This transformation replaces |
4000 | |
4001 | bb0: |
4002 | if (...) goto bb2; else goto bb1; |
4003 | bb1: |
4004 | x1 = (<expr>).field1; |
4005 | goto bb3; |
4006 | bb2: |
4007 | x2 = (<expr>).field2; |
4008 | bb3: |
4009 | # x = PHI <x1, x2>; |
4010 | |
4011 | with |
4012 | |
4013 | bb0: |
4014 | x1 = (<expr>).field1; |
4015 | x2 = (<expr>).field2; |
4016 | if (...) goto bb2; else goto bb1; |
4017 | bb1: |
4018 | goto bb3; |
4019 | bb2: |
4020 | bb3: |
4021 | # x = PHI <x1, x2>; |
4022 | |
4023 | The purpose of this transformation is to enable generation of conditional |
4024 | move instructions such as Intel CMOVE or PowerPC ISEL. Because one of |
4025 | the loads is speculative, the transformation is restricted to very |
4026 | specific cases to avoid introducing a page fault. We are looking for |
4027 | the common idiom: |
4028 | |
4029 | if (...) |
4030 | x = y->left; |
4031 | else |
4032 | x = y->right; |
4033 | |
4034 | where left and right are typically adjacent pointers in a tree structure. */ |
4035 | |
4036 | namespace { |
4037 | |
4038 | const pass_data pass_data_phiopt = |
4039 | { |
4040 | .type: GIMPLE_PASS, /* type */ |
4041 | .name: "phiopt" , /* name */ |
4042 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
4043 | .tv_id: TV_TREE_PHIOPT, /* tv_id */ |
4044 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
4045 | .properties_provided: 0, /* properties_provided */ |
4046 | .properties_destroyed: 0, /* properties_destroyed */ |
4047 | .todo_flags_start: 0, /* todo_flags_start */ |
4048 | .todo_flags_finish: 0, /* todo_flags_finish */ |
4049 | }; |
4050 | |
4051 | class pass_phiopt : public gimple_opt_pass |
4052 | { |
4053 | public: |
4054 | pass_phiopt (gcc::context *ctxt) |
4055 | : gimple_opt_pass (pass_data_phiopt, ctxt), early_p (false) |
4056 | {} |
4057 | |
4058 | /* opt_pass methods: */ |
4059 | opt_pass * clone () final override { return new pass_phiopt (m_ctxt); } |
4060 | void set_pass_param (unsigned n, bool param) final override |
4061 | { |
4062 | gcc_assert (n == 0); |
4063 | early_p = param; |
4064 | } |
4065 | bool gate (function *) final override { return flag_ssa_phiopt; } |
4066 | unsigned int execute (function *) final override; |
4067 | |
4068 | private: |
4069 | bool early_p; |
4070 | }; // class pass_phiopt |
4071 | |
4072 | } // anon namespace |
4073 | |
4074 | gimple_opt_pass * |
4075 | make_pass_phiopt (gcc::context *ctxt) |
4076 | { |
4077 | return new pass_phiopt (ctxt); |
4078 | } |
4079 | |
4080 | unsigned int |
4081 | pass_phiopt::execute (function *) |
4082 | { |
4083 | bool do_hoist_loads = !early_p ? gate_hoist_loads () : false; |
4084 | basic_block bb; |
4085 | basic_block *bb_order; |
4086 | unsigned n, i; |
4087 | bool cfgchanged = false; |
4088 | |
4089 | calculate_dominance_info (CDI_DOMINATORS); |
4090 | mark_ssa_maybe_undefs (); |
4091 | |
4092 | /* Search every basic block for COND_EXPR we may be able to optimize. |
4093 | |
4094 | We walk the blocks in order that guarantees that a block with |
4095 | a single predecessor is processed before the predecessor. |
4096 | This ensures that we collapse inner ifs before visiting the |
4097 | outer ones, and also that we do not try to visit a removed |
4098 | block. */ |
4099 | bb_order = single_pred_before_succ_order (); |
4100 | n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
4101 | |
4102 | for (i = 0; i < n; i++) |
4103 | { |
4104 | gphi *phi; |
4105 | basic_block bb1, bb2; |
4106 | edge e1, e2; |
4107 | tree arg0, arg1; |
4108 | bool diamond_p = false; |
4109 | |
4110 | bb = bb_order[i]; |
4111 | |
4112 | /* Check to see if the last statement is a GIMPLE_COND. */ |
4113 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb)); |
4114 | if (!cond_stmt) |
4115 | continue; |
4116 | |
4117 | e1 = EDGE_SUCC (bb, 0); |
4118 | bb1 = e1->dest; |
4119 | e2 = EDGE_SUCC (bb, 1); |
4120 | bb2 = e2->dest; |
4121 | |
4122 | /* We cannot do the optimization on abnormal edges. */ |
4123 | if ((e1->flags & EDGE_ABNORMAL) != 0 |
4124 | || (e2->flags & EDGE_ABNORMAL) != 0) |
4125 | continue; |
4126 | |
4127 | /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ |
4128 | if (EDGE_COUNT (bb1->succs) == 0 |
4129 | || EDGE_COUNT (bb2->succs) == 0) |
4130 | continue; |
4131 | |
4132 | /* Find the bb which is the fall through to the other. */ |
4133 | if (EDGE_SUCC (bb1, 0)->dest == bb2) |
4134 | ; |
4135 | else if (EDGE_SUCC (bb2, 0)->dest == bb1) |
4136 | { |
4137 | std::swap (a&: bb1, b&: bb2); |
4138 | std::swap (a&: e1, b&: e2); |
4139 | } |
4140 | else if (EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest |
4141 | && single_succ_p (bb: bb2)) |
4142 | { |
4143 | diamond_p = true; |
4144 | e2 = EDGE_SUCC (bb2, 0); |
4145 | /* Make sure bb2 is just a fall through. */ |
4146 | if ((e2->flags & EDGE_FALLTHRU) == 0) |
4147 | continue; |
4148 | } |
4149 | else |
4150 | continue; |
4151 | |
4152 | e1 = EDGE_SUCC (bb1, 0); |
4153 | |
4154 | /* Make sure that bb1 is just a fall through. */ |
4155 | if (!single_succ_p (bb: bb1) |
4156 | || (e1->flags & EDGE_FALLTHRU) == 0) |
4157 | continue; |
4158 | |
4159 | if (diamond_p) |
4160 | { |
4161 | basic_block bb3 = e1->dest; |
4162 | |
4163 | if (!single_pred_p (bb: bb1) |
4164 | || !single_pred_p (bb: bb2)) |
4165 | continue; |
4166 | |
4167 | if (do_hoist_loads |
4168 | && !FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt))) |
4169 | && EDGE_COUNT (bb->succs) == 2 |
4170 | && EDGE_COUNT (bb3->preds) == 2 |
4171 | /* If one edge or the other is dominant, a conditional move |
4172 | is likely to perform worse than the well-predicted branch. */ |
4173 | && !predictable_edge_p (EDGE_SUCC (bb, 0)) |
4174 | && !predictable_edge_p (EDGE_SUCC (bb, 1))) |
4175 | hoist_adjacent_loads (bb0: bb, bb1, bb2, bb3); |
4176 | } |
4177 | |
4178 | gimple_stmt_iterator gsi; |
4179 | bool candorest = true; |
4180 | |
4181 | /* Check that we're looking for nested phis. */ |
4182 | basic_block merge = diamond_p ? EDGE_SUCC (bb2, 0)->dest : bb2; |
4183 | gimple_seq phis = phi_nodes (bb: merge); |
4184 | |
4185 | /* Value replacement can work with more than one PHI |
4186 | so try that first. */ |
4187 | if (!early_p && !diamond_p) |
4188 | for (gsi = gsi_start (seq&: phis); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
4189 | { |
4190 | phi = as_a <gphi *> (p: gsi_stmt (i: gsi)); |
4191 | arg0 = gimple_phi_arg_def (gs: phi, index: e1->dest_idx); |
4192 | arg1 = gimple_phi_arg_def (gs: phi, index: e2->dest_idx); |
4193 | if (value_replacement (cond_bb: bb, middle_bb: bb1, e0: e1, e1: e2, phi, arg0, arg1) == 2) |
4194 | { |
4195 | candorest = false; |
4196 | cfgchanged = true; |
4197 | break; |
4198 | } |
4199 | } |
4200 | |
4201 | if (!candorest) |
4202 | continue; |
4203 | |
4204 | phi = single_non_singleton_phi_for_edges (seq: phis, e0: e1, e1: e2); |
4205 | if (!phi) |
4206 | continue; |
4207 | |
4208 | arg0 = gimple_phi_arg_def (gs: phi, index: e1->dest_idx); |
4209 | arg1 = gimple_phi_arg_def (gs: phi, index: e2->dest_idx); |
4210 | |
4211 | /* Something is wrong if we cannot find the arguments in the PHI |
4212 | node. */ |
4213 | gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); |
4214 | |
4215 | if (single_pred_p (bb: bb1) |
4216 | && EDGE_COUNT (merge->preds) == 2) |
4217 | { |
4218 | gphi *newphi = phi; |
4219 | while (newphi) |
4220 | { |
4221 | phi = newphi; |
4222 | /* factor_out_conditional_operation may create a new PHI in |
4223 | BB2 and eliminate an existing PHI in BB2. Recompute values |
4224 | that may be affected by that change. */ |
4225 | arg0 = gimple_phi_arg_def (gs: phi, index: e1->dest_idx); |
4226 | arg1 = gimple_phi_arg_def (gs: phi, index: e2->dest_idx); |
4227 | gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); |
4228 | newphi = factor_out_conditional_operation (e0: e1, e1: e2, phi, |
4229 | arg0, arg1, |
4230 | cond_stmt); |
4231 | } |
4232 | } |
4233 | |
4234 | /* Do the replacement of conditional if it can be done. */ |
4235 | if (match_simplify_replacement (cond_bb: bb, middle_bb: bb1, middle_bb_alt: bb2, e0: e1, e1: e2, phi, |
4236 | arg0, arg1, early_p, threeway_p: diamond_p)) |
4237 | cfgchanged = true; |
4238 | else if (!early_p |
4239 | && !diamond_p |
4240 | && single_pred_p (bb: bb1) |
4241 | && cond_removal_in_builtin_zero_pattern (cond_bb: bb, middle_bb: bb1, e1, e2, |
4242 | phi, arg0, arg1)) |
4243 | cfgchanged = true; |
4244 | else if (minmax_replacement (cond_bb: bb, middle_bb: bb1, alt_middle_bb: bb2, e0: e1, e1: e2, phi, arg0, arg1, |
4245 | threeway_p: diamond_p)) |
4246 | cfgchanged = true; |
4247 | else if (single_pred_p (bb: bb1) |
4248 | && !diamond_p |
4249 | && spaceship_replacement (cond_bb: bb, middle_bb: bb1, e0: e1, e1: e2, phi, arg0, arg1)) |
4250 | cfgchanged = true; |
4251 | } |
4252 | |
4253 | free (ptr: bb_order); |
4254 | |
4255 | if (cfgchanged) |
4256 | return TODO_cleanup_cfg; |
4257 | return 0; |
4258 | } |
4259 | |
4260 | /* This pass tries to transform conditional stores into unconditional |
4261 | ones, enabling further simplifications with the simpler then and else |
4262 | blocks. In particular it replaces this: |
4263 | |
4264 | bb0: |
4265 | if (cond) goto bb2; else goto bb1; |
4266 | bb1: |
4267 | *p = RHS; |
4268 | bb2: |
4269 | |
4270 | with |
4271 | |
4272 | bb0: |
4273 | if (cond) goto bb1; else goto bb2; |
4274 | bb1: |
4275 | condtmp' = *p; |
4276 | bb2: |
4277 | condtmp = PHI <RHS, condtmp'> |
4278 | *p = condtmp; |
4279 | |
4280 | This transformation can only be done under several constraints, |
4281 | documented below. It also replaces: |
4282 | |
4283 | bb0: |
4284 | if (cond) goto bb2; else goto bb1; |
4285 | bb1: |
4286 | *p = RHS1; |
4287 | goto bb3; |
4288 | bb2: |
4289 | *p = RHS2; |
4290 | bb3: |
4291 | |
4292 | with |
4293 | |
4294 | bb0: |
4295 | if (cond) goto bb3; else goto bb1; |
4296 | bb1: |
4297 | bb3: |
4298 | condtmp = PHI <RHS1, RHS2> |
4299 | *p = condtmp; */ |
4300 | |
4301 | namespace { |
4302 | |
4303 | const pass_data pass_data_cselim = |
4304 | { |
4305 | .type: GIMPLE_PASS, /* type */ |
4306 | .name: "cselim" , /* name */ |
4307 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
4308 | .tv_id: TV_TREE_PHIOPT, /* tv_id */ |
4309 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
4310 | .properties_provided: 0, /* properties_provided */ |
4311 | .properties_destroyed: 0, /* properties_destroyed */ |
4312 | .todo_flags_start: 0, /* todo_flags_start */ |
4313 | .todo_flags_finish: 0, /* todo_flags_finish */ |
4314 | }; |
4315 | |
4316 | class pass_cselim : public gimple_opt_pass |
4317 | { |
4318 | public: |
4319 | pass_cselim (gcc::context *ctxt) |
4320 | : gimple_opt_pass (pass_data_cselim, ctxt) |
4321 | {} |
4322 | |
4323 | /* opt_pass methods: */ |
4324 | bool gate (function *) final override { return flag_tree_cselim; } |
4325 | unsigned int execute (function *) final override; |
4326 | |
4327 | }; // class pass_cselim |
4328 | |
4329 | } // anon namespace |
4330 | |
4331 | gimple_opt_pass * |
4332 | make_pass_cselim (gcc::context *ctxt) |
4333 | { |
4334 | return new pass_cselim (ctxt); |
4335 | } |
4336 | |
4337 | unsigned int |
4338 | pass_cselim::execute (function *) |
4339 | { |
4340 | basic_block bb; |
4341 | basic_block *bb_order; |
4342 | unsigned n, i; |
4343 | bool cfgchanged = false; |
4344 | hash_set<tree> *nontrap = 0; |
4345 | unsigned todo = 0; |
4346 | |
4347 | /* ??? We are not interested in loop related info, but the following |
4348 | will create it, ICEing as we didn't init loops with pre-headers. |
4349 | An interfacing issue of find_data_references_in_bb. */ |
4350 | loop_optimizer_init (LOOPS_NORMAL); |
4351 | scev_initialize (); |
4352 | |
4353 | calculate_dominance_info (CDI_DOMINATORS); |
4354 | |
4355 | /* Calculate the set of non-trapping memory accesses. */ |
4356 | nontrap = get_non_trapping (); |
4357 | |
4358 | /* Search every basic block for COND_EXPR we may be able to optimize. |
4359 | |
4360 | We walk the blocks in order that guarantees that a block with |
4361 | a single predecessor is processed before the predecessor. |
4362 | This ensures that we collapse inner ifs before visiting the |
4363 | outer ones, and also that we do not try to visit a removed |
4364 | block. */ |
4365 | bb_order = single_pred_before_succ_order (); |
4366 | n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; |
4367 | |
4368 | for (i = 0; i < n; i++) |
4369 | { |
4370 | basic_block bb1, bb2; |
4371 | edge e1, e2; |
4372 | bool diamond_p = false; |
4373 | |
4374 | bb = bb_order[i]; |
4375 | |
4376 | /* Check to see if the last statement is a GIMPLE_COND. */ |
4377 | gcond *cond_stmt = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb)); |
4378 | if (!cond_stmt) |
4379 | continue; |
4380 | |
4381 | e1 = EDGE_SUCC (bb, 0); |
4382 | bb1 = e1->dest; |
4383 | e2 = EDGE_SUCC (bb, 1); |
4384 | bb2 = e2->dest; |
4385 | |
4386 | /* We cannot do the optimization on abnormal edges. */ |
4387 | if ((e1->flags & EDGE_ABNORMAL) != 0 |
4388 | || (e2->flags & EDGE_ABNORMAL) != 0) |
4389 | continue; |
4390 | |
4391 | /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ |
4392 | if (EDGE_COUNT (bb1->succs) == 0 |
4393 | || EDGE_COUNT (bb2->succs) == 0) |
4394 | continue; |
4395 | |
4396 | /* Find the bb which is the fall through to the other. */ |
4397 | if (EDGE_SUCC (bb1, 0)->dest == bb2) |
4398 | ; |
4399 | else if (EDGE_SUCC (bb2, 0)->dest == bb1) |
4400 | { |
4401 | std::swap (a&: bb1, b&: bb2); |
4402 | std::swap (a&: e1, b&: e2); |
4403 | } |
4404 | else if (EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest |
4405 | && single_succ_p (bb: bb2)) |
4406 | { |
4407 | diamond_p = true; |
4408 | e2 = EDGE_SUCC (bb2, 0); |
4409 | /* Make sure bb2 is just a fall through. */ |
4410 | if ((e2->flags & EDGE_FALLTHRU) == 0) |
4411 | continue; |
4412 | } |
4413 | else |
4414 | continue; |
4415 | |
4416 | e1 = EDGE_SUCC (bb1, 0); |
4417 | |
4418 | /* Make sure that bb1 is just a fall through. */ |
4419 | if (!single_succ_p (bb: bb1) |
4420 | || (e1->flags & EDGE_FALLTHRU) == 0) |
4421 | continue; |
4422 | |
4423 | if (diamond_p) |
4424 | { |
4425 | basic_block bb3 = e1->dest; |
4426 | |
4427 | /* Only handle sinking of store from 2 bbs only, |
4428 | The middle bbs don't need to come from the |
4429 | if always since we are sinking rather than |
4430 | hoisting. */ |
4431 | if (EDGE_COUNT (bb3->preds) != 2) |
4432 | continue; |
4433 | if (cond_if_else_store_replacement (then_bb: bb1, else_bb: bb2, join_bb: bb3)) |
4434 | cfgchanged = true; |
4435 | continue; |
4436 | } |
4437 | |
4438 | /* Also make sure that bb1 only have one predecessor and that it |
4439 | is bb. */ |
4440 | if (!single_pred_p (bb: bb1) |
4441 | || single_pred (bb: bb1) != bb) |
4442 | continue; |
4443 | |
4444 | /* bb1 is the middle block, bb2 the join block, bb the split block, |
4445 | e1 the fallthrough edge from bb1 to bb2. We can't do the |
4446 | optimization if the join block has more than two predecessors. */ |
4447 | if (EDGE_COUNT (bb2->preds) > 2) |
4448 | continue; |
4449 | if (cond_store_replacement (middle_bb: bb1, join_bb: bb2, e0: e1, e1: e2, nontrap)) |
4450 | cfgchanged = true; |
4451 | } |
4452 | |
4453 | free (ptr: bb_order); |
4454 | |
4455 | delete nontrap; |
4456 | /* If the CFG has changed, we should cleanup the CFG. */ |
4457 | if (cfgchanged) |
4458 | { |
4459 | /* In cond-store replacement we have added some loads on edges |
4460 | and new VOPS (as we moved the store, and created a load). */ |
4461 | gsi_commit_edge_inserts (); |
4462 | todo = TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; |
4463 | } |
4464 | scev_finalize (); |
4465 | loop_optimizer_finalize (); |
4466 | return todo; |
4467 | } |
4468 | |