1 | /* Dead and redundant store elimination |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #define INCLUDE_MEMORY |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "rtl.h" |
26 | #include "tree.h" |
27 | #include "gimple.h" |
28 | #include "tree-pass.h" |
29 | #include "ssa.h" |
30 | #include "gimple-pretty-print.h" |
31 | #include "fold-const.h" |
32 | #include "gimple-iterator.h" |
33 | #include "tree-cfg.h" |
34 | #include "tree-dfa.h" |
35 | #include "tree-cfgcleanup.h" |
36 | #include "alias.h" |
37 | #include "tree-ssa-loop.h" |
38 | #include "tree-ssa-dse.h" |
39 | #include "builtins.h" |
40 | #include "gimple-fold.h" |
41 | #include "gimplify.h" |
42 | #include "tree-eh.h" |
43 | #include "cfganal.h" |
44 | #include "cgraph.h" |
45 | #include "ipa-modref-tree.h" |
46 | #include "ipa-modref.h" |
47 | #include "target.h" |
48 | #include "tree-ssa-loop-niter.h" |
49 | #include "cfgloop.h" |
50 | #include "tree-data-ref.h" |
51 | #include "internal-fn.h" |
52 | |
53 | /* This file implements dead store elimination. |
54 | |
55 | A dead store is a store into a memory location which will later be |
56 | overwritten by another store without any intervening loads. In this |
57 | case the earlier store can be deleted or trimmed if the store |
58 | was partially dead. |
59 | |
60 | A redundant store is a store into a memory location which stores |
61 | the exact same value as a prior store to the same memory location. |
62 | While this can often be handled by dead store elimination, removing |
63 | the redundant store is often better than removing or trimming the |
64 | dead store. |
65 | |
66 | In our SSA + virtual operand world we use immediate uses of virtual |
67 | operands to detect these cases. If a store's virtual definition |
68 | is used precisely once by a later store to the same location which |
69 | post dominates the first store, then the first store is dead. If |
70 | the data stored is the same, then the second store is redundant. |
71 | |
72 | The single use of the store's virtual definition ensures that |
73 | there are no intervening aliased loads and the requirement that |
74 | the second load post dominate the first ensures that if the earlier |
75 | store executes, then the later stores will execute before the function |
76 | exits. |
77 | |
78 | It may help to think of this as first moving the earlier store to |
79 | the point immediately before the later store. Again, the single |
80 | use of the virtual definition and the post-dominance relationship |
81 | ensure that such movement would be safe. Clearly if there are |
82 | back to back stores, then the second is makes the first dead. If |
83 | the second store stores the same value, then the second store is |
84 | redundant. |
85 | |
86 | Reviewing section 10.7.2 in Morgan's "Building an Optimizing Compiler" |
87 | may also help in understanding this code since it discusses the |
88 | relationship between dead store and redundant load elimination. In |
89 | fact, they are the same transformation applied to different views of |
90 | the CFG. */ |
91 | |
92 | static void delete_dead_or_redundant_call (gimple_stmt_iterator *, const char *); |
93 | |
94 | /* Bitmap of blocks that have had EH statements cleaned. We should |
95 | remove their dead edges eventually. */ |
96 | static bitmap need_eh_cleanup; |
97 | static bitmap need_ab_cleanup; |
98 | |
99 | /* STMT is a statement that may write into memory. Analyze it and |
100 | initialize WRITE to describe how STMT affects memory. When |
101 | MAY_DEF_OK is true then the function initializes WRITE to what |
102 | the stmt may define. |
103 | |
104 | Return TRUE if the statement was analyzed, FALSE otherwise. |
105 | |
106 | It is always safe to return FALSE. But typically better optimziation |
107 | can be achieved by analyzing more statements. */ |
108 | |
109 | static bool |
110 | initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool may_def_ok = false) |
111 | { |
112 | /* It's advantageous to handle certain mem* functions. */ |
113 | if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) |
114 | { |
115 | switch (DECL_FUNCTION_CODE (decl: gimple_call_fndecl (gs: stmt))) |
116 | { |
117 | case BUILT_IN_MEMCPY: |
118 | case BUILT_IN_MEMMOVE: |
119 | case BUILT_IN_MEMSET: |
120 | case BUILT_IN_MEMCPY_CHK: |
121 | case BUILT_IN_MEMMOVE_CHK: |
122 | case BUILT_IN_MEMSET_CHK: |
123 | case BUILT_IN_STRNCPY: |
124 | case BUILT_IN_STRNCPY_CHK: |
125 | { |
126 | tree size = gimple_call_arg (gs: stmt, index: 2); |
127 | tree ptr = gimple_call_arg (gs: stmt, index: 0); |
128 | ao_ref_init_from_ptr_and_size (write, ptr, size); |
129 | return true; |
130 | } |
131 | |
132 | /* A calloc call can never be dead, but it can make |
133 | subsequent stores redundant if they store 0 into |
134 | the same memory locations. */ |
135 | case BUILT_IN_CALLOC: |
136 | { |
137 | tree nelem = gimple_call_arg (gs: stmt, index: 0); |
138 | tree selem = gimple_call_arg (gs: stmt, index: 1); |
139 | tree lhs; |
140 | if (TREE_CODE (nelem) == INTEGER_CST |
141 | && TREE_CODE (selem) == INTEGER_CST |
142 | && (lhs = gimple_call_lhs (gs: stmt)) != NULL_TREE) |
143 | { |
144 | tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem), |
145 | nelem, selem); |
146 | ao_ref_init_from_ptr_and_size (write, lhs, size); |
147 | return true; |
148 | } |
149 | } |
150 | |
151 | default: |
152 | break; |
153 | } |
154 | } |
155 | else if (is_gimple_call (gs: stmt) |
156 | && gimple_call_internal_p (gs: stmt)) |
157 | { |
158 | switch (gimple_call_internal_fn (gs: stmt)) |
159 | { |
160 | case IFN_LEN_STORE: |
161 | case IFN_MASK_STORE: |
162 | case IFN_MASK_LEN_STORE: |
163 | { |
164 | internal_fn ifn = gimple_call_internal_fn (gs: stmt); |
165 | int stored_value_index = internal_fn_stored_value_index (ifn); |
166 | int len_index = internal_fn_len_index (ifn); |
167 | if (ifn == IFN_LEN_STORE) |
168 | { |
169 | tree len = gimple_call_arg (gs: stmt, index: len_index); |
170 | tree bias = gimple_call_arg (gs: stmt, index: len_index + 1); |
171 | if (tree_fits_uhwi_p (len)) |
172 | { |
173 | ao_ref_init_from_ptr_and_size (write, |
174 | gimple_call_arg (gs: stmt, index: 0), |
175 | int_const_binop (MINUS_EXPR, |
176 | len, bias)); |
177 | return true; |
178 | } |
179 | } |
180 | /* We cannot initialize a must-def ao_ref (in all cases) but we |
181 | can provide a may-def variant. */ |
182 | if (may_def_ok) |
183 | { |
184 | ao_ref_init_from_ptr_and_size ( |
185 | write, gimple_call_arg (gs: stmt, index: 0), |
186 | TYPE_SIZE_UNIT ( |
187 | TREE_TYPE (gimple_call_arg (stmt, stored_value_index)))); |
188 | return true; |
189 | } |
190 | break; |
191 | } |
192 | default:; |
193 | } |
194 | } |
195 | if (tree lhs = gimple_get_lhs (stmt)) |
196 | { |
197 | if (TREE_CODE (lhs) != SSA_NAME |
198 | && (may_def_ok || !stmt_could_throw_p (cfun, stmt))) |
199 | { |
200 | ao_ref_init (write, lhs); |
201 | return true; |
202 | } |
203 | } |
204 | return false; |
205 | } |
206 | |
207 | /* Given REF from the alias oracle, return TRUE if it is a valid |
208 | kill memory reference for dead store elimination, false otherwise. |
209 | |
210 | In particular, the reference must have a known base, known maximum |
211 | size, start at a byte offset and have a size that is one or more |
212 | bytes. */ |
213 | |
214 | static bool |
215 | valid_ao_ref_kill_for_dse (ao_ref *ref) |
216 | { |
217 | return (ao_ref_base (ref) |
218 | && known_size_p (a: ref->max_size) |
219 | && maybe_ne (a: ref->size, b: 0) |
220 | && known_eq (ref->max_size, ref->size) |
221 | && known_ge (ref->offset, 0)); |
222 | } |
223 | |
224 | /* Given REF from the alias oracle, return TRUE if it is a valid |
225 | load or store memory reference for dead store elimination, false otherwise. |
226 | |
227 | Unlike for valid_ao_ref_kill_for_dse we can accept writes where max_size |
228 | is not same as size since we can handle conservatively the larger range. */ |
229 | |
230 | static bool |
231 | valid_ao_ref_for_dse (ao_ref *ref) |
232 | { |
233 | return (ao_ref_base (ref) |
234 | && known_size_p (a: ref->max_size) |
235 | && known_ge (ref->offset, 0)); |
236 | } |
237 | |
238 | /* Initialize OFFSET and SIZE to a range known to contain REF |
239 | where the boundaries are divisible by BITS_PER_UNIT (bit still in bits). |
240 | Return false if this is impossible. */ |
241 | |
242 | static bool |
243 | get_byte_aligned_range_containing_ref (ao_ref *ref, poly_int64 *offset, |
244 | HOST_WIDE_INT *size) |
245 | { |
246 | if (!known_size_p (a: ref->max_size)) |
247 | return false; |
248 | *offset = aligned_lower_bound (value: ref->offset, BITS_PER_UNIT); |
249 | poly_int64 end = aligned_upper_bound (value: ref->offset + ref->max_size, |
250 | BITS_PER_UNIT); |
251 | return (end - *offset).is_constant (const_value: size); |
252 | } |
253 | |
254 | /* Initialize OFFSET and SIZE to a range known to be contained REF |
255 | where the boundaries are divisible by BITS_PER_UNIT (but still in bits). |
256 | Return false if this is impossible. */ |
257 | |
258 | static bool |
259 | get_byte_aligned_range_contained_in_ref (ao_ref *ref, poly_int64 *offset, |
260 | HOST_WIDE_INT *size) |
261 | { |
262 | if (!known_size_p (a: ref->size) |
263 | || !known_eq (ref->size, ref->max_size)) |
264 | return false; |
265 | *offset = aligned_upper_bound (value: ref->offset, BITS_PER_UNIT); |
266 | poly_int64 end = aligned_lower_bound (value: ref->offset + ref->max_size, |
267 | BITS_PER_UNIT); |
268 | /* For bit accesses we can get -1 here, but also 0 sized kill is not |
269 | useful. */ |
270 | if (!known_gt (end, *offset)) |
271 | return false; |
272 | return (end - *offset).is_constant (const_value: size); |
273 | } |
274 | |
275 | /* Compute byte range (returned iN REF_OFFSET and RET_SIZE) for access COPY |
276 | inside REF. If KILL is true, then COPY represent a kill and the byte range |
277 | needs to be fully contained in bit range given by COPY. If KILL is false |
278 | then the byte range returned must contain the range of COPY. */ |
279 | |
280 | static bool |
281 | get_byte_range (ao_ref *copy, ao_ref *ref, bool kill, |
282 | HOST_WIDE_INT *ret_offset, HOST_WIDE_INT *ret_size) |
283 | { |
284 | HOST_WIDE_INT copy_size, ref_size; |
285 | poly_int64 copy_offset, ref_offset; |
286 | HOST_WIDE_INT diff; |
287 | |
288 | /* First translate from bits to bytes, rounding to bigger or smaller ranges |
289 | as needed. Kills needs to be always rounded to smaller ranges while |
290 | uses and stores to larger ranges. */ |
291 | if (kill) |
292 | { |
293 | if (!get_byte_aligned_range_contained_in_ref (ref: copy, offset: ©_offset, |
294 | size: ©_size)) |
295 | return false; |
296 | } |
297 | else |
298 | { |
299 | if (!get_byte_aligned_range_containing_ref (ref: copy, offset: ©_offset, |
300 | size: ©_size)) |
301 | return false; |
302 | } |
303 | |
304 | if (!get_byte_aligned_range_containing_ref (ref, offset: &ref_offset, size: &ref_size) |
305 | || !ordered_p (a: copy_offset, b: ref_offset)) |
306 | return false; |
307 | |
308 | /* Switch sizes from bits to bytes so we do not need to care about |
309 | overflows. Offset calculation needs to stay in bits until we compute |
310 | the difference and can switch to HOST_WIDE_INT. */ |
311 | copy_size /= BITS_PER_UNIT; |
312 | ref_size /= BITS_PER_UNIT; |
313 | |
314 | /* If COPY starts before REF, then reset the beginning of |
315 | COPY to match REF and decrease the size of COPY by the |
316 | number of bytes removed from COPY. */ |
317 | if (maybe_lt (a: copy_offset, b: ref_offset)) |
318 | { |
319 | if (!(ref_offset - copy_offset).is_constant (const_value: &diff) |
320 | || copy_size < diff / BITS_PER_UNIT) |
321 | return false; |
322 | copy_size -= diff / BITS_PER_UNIT; |
323 | copy_offset = ref_offset; |
324 | } |
325 | |
326 | if (!(copy_offset - ref_offset).is_constant (const_value: &diff) |
327 | || ref_size <= diff / BITS_PER_UNIT) |
328 | return false; |
329 | |
330 | /* If COPY extends beyond REF, chop off its size appropriately. */ |
331 | HOST_WIDE_INT limit = ref_size - diff / BITS_PER_UNIT; |
332 | |
333 | if (copy_size > limit) |
334 | copy_size = limit; |
335 | *ret_size = copy_size; |
336 | if (!(copy_offset - ref_offset).is_constant (const_value: ret_offset)) |
337 | return false; |
338 | *ret_offset /= BITS_PER_UNIT; |
339 | return true; |
340 | } |
341 | |
342 | /* Update LIVE_BYTES tracking REF for write to WRITE: |
343 | Verify we have the same base memory address, the write |
344 | has a known size and overlaps with REF. */ |
345 | static void |
346 | clear_live_bytes_for_ref (sbitmap live_bytes, ao_ref *ref, ao_ref *write) |
347 | { |
348 | HOST_WIDE_INT start, size; |
349 | |
350 | if (valid_ao_ref_kill_for_dse (ref: write) |
351 | && operand_equal_p (write->base, ref->base, flags: OEP_ADDRESS_OF) |
352 | && get_byte_range (copy: write, ref, kill: true, ret_offset: &start, ret_size: &size)) |
353 | bitmap_clear_range (live_bytes, start, size); |
354 | } |
355 | |
356 | /* Clear any bytes written by STMT from the bitmap LIVE_BYTES. The base |
357 | address written by STMT must match the one found in REF, which must |
358 | have its base address previously initialized. |
359 | |
360 | This routine must be conservative. If we don't know the offset or |
361 | actual size written, assume nothing was written. */ |
362 | |
363 | static void |
364 | clear_bytes_written_by (sbitmap live_bytes, gimple *stmt, ao_ref *ref) |
365 | { |
366 | ao_ref write; |
367 | |
368 | if (gcall *call = dyn_cast <gcall *> (p: stmt)) |
369 | { |
370 | bool interposed; |
371 | modref_summary *summary = get_modref_function_summary (call, interposed: &interposed); |
372 | |
373 | if (summary && !interposed) |
374 | for (auto kill : summary->kills) |
375 | if (kill.get_ao_ref (stmt: as_a <gcall *> (p: stmt), ref: &write)) |
376 | clear_live_bytes_for_ref (live_bytes, ref, write: &write); |
377 | } |
378 | if (!initialize_ao_ref_for_dse (stmt, write: &write)) |
379 | return; |
380 | |
381 | clear_live_bytes_for_ref (live_bytes, ref, write: &write); |
382 | } |
383 | |
384 | /* REF is a memory write. Extract relevant information from it and |
385 | initialize the LIVE_BYTES bitmap. If successful, return TRUE. |
386 | Otherwise return FALSE. */ |
387 | |
388 | static bool |
389 | setup_live_bytes_from_ref (ao_ref *ref, sbitmap live_bytes) |
390 | { |
391 | HOST_WIDE_INT const_size; |
392 | if (valid_ao_ref_for_dse (ref) |
393 | && ((aligned_upper_bound (value: ref->offset + ref->max_size, BITS_PER_UNIT) |
394 | - aligned_lower_bound (value: ref->offset, |
395 | BITS_PER_UNIT)).is_constant (const_value: &const_size)) |
396 | && (const_size / BITS_PER_UNIT <= param_dse_max_object_size) |
397 | && const_size > 1) |
398 | { |
399 | bitmap_clear (live_bytes); |
400 | bitmap_set_range (live_bytes, 0, const_size / BITS_PER_UNIT); |
401 | return true; |
402 | } |
403 | return false; |
404 | } |
405 | |
406 | /* Compute the number of elements that we can trim from the head and |
407 | tail of ORIG resulting in a bitmap that is a superset of LIVE. |
408 | |
409 | Store the number of elements trimmed from the head and tail in |
410 | TRIM_HEAD and TRIM_TAIL. |
411 | |
412 | STMT is the statement being trimmed and is used for debugging dump |
413 | output only. */ |
414 | |
415 | static void |
416 | compute_trims (ao_ref *ref, sbitmap live, int *trim_head, int *trim_tail, |
417 | gimple *stmt) |
418 | { |
419 | /* We use sbitmaps biased such that ref->offset is bit zero and the bitmap |
420 | extends through ref->size. So we know that in the original bitmap |
421 | bits 0..ref->size were true. We don't actually need the bitmap, just |
422 | the REF to compute the trims. */ |
423 | |
424 | /* Now identify how much, if any of the tail we can chop off. */ |
425 | HOST_WIDE_INT const_size; |
426 | int last_live = bitmap_last_set_bit (live); |
427 | if (ref->size.is_constant (const_value: &const_size)) |
428 | { |
429 | int last_orig = (const_size / BITS_PER_UNIT) - 1; |
430 | /* We can leave inconvenient amounts on the tail as |
431 | residual handling in mem* and str* functions is usually |
432 | reasonably efficient. */ |
433 | *trim_tail = last_orig - last_live; |
434 | |
435 | /* But don't trim away out of bounds accesses, as this defeats |
436 | proper warnings. |
437 | |
438 | We could have a type with no TYPE_SIZE_UNIT or we could have a VLA |
439 | where TYPE_SIZE_UNIT is not a constant. */ |
440 | if (*trim_tail |
441 | && TYPE_SIZE_UNIT (TREE_TYPE (ref->base)) |
442 | && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref->base))) == INTEGER_CST |
443 | && compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (ref->base)), |
444 | last_orig) <= 0) |
445 | *trim_tail = 0; |
446 | } |
447 | else |
448 | *trim_tail = 0; |
449 | |
450 | /* Identify how much, if any of the head we can chop off. */ |
451 | int first_orig = 0; |
452 | int first_live = bitmap_first_set_bit (live); |
453 | *trim_head = first_live - first_orig; |
454 | |
455 | /* If REF is aligned, try to maintain this alignment if it reduces |
456 | the number of (power-of-two sized aligned) writes to memory. */ |
457 | unsigned int align_bits; |
458 | unsigned HOST_WIDE_INT bitpos; |
459 | if ((*trim_head || *trim_tail) |
460 | && last_live - first_live >= 2 |
461 | && ao_ref_alignment (ref, &align_bits, &bitpos) |
462 | && align_bits >= 32 |
463 | && bitpos == 0 |
464 | && align_bits % BITS_PER_UNIT == 0) |
465 | { |
466 | unsigned int align_units = align_bits / BITS_PER_UNIT; |
467 | if (align_units > 16) |
468 | align_units = 16; |
469 | while ((first_live | (align_units - 1)) > (unsigned int)last_live) |
470 | align_units >>= 1; |
471 | |
472 | if (*trim_head) |
473 | { |
474 | unsigned int pos = first_live & (align_units - 1); |
475 | for (unsigned int i = 1; i <= align_units; i <<= 1) |
476 | { |
477 | unsigned int mask = ~(i - 1); |
478 | unsigned int bytes = align_units - (pos & mask); |
479 | if (wi::popcount (bytes) <= 1) |
480 | { |
481 | *trim_head &= mask; |
482 | break; |
483 | } |
484 | } |
485 | } |
486 | |
487 | if (*trim_tail) |
488 | { |
489 | unsigned int pos = last_live & (align_units - 1); |
490 | for (unsigned int i = 1; i <= align_units; i <<= 1) |
491 | { |
492 | int mask = i - 1; |
493 | unsigned int bytes = (pos | mask) + 1; |
494 | if ((last_live | mask) > (last_live + *trim_tail)) |
495 | break; |
496 | if (wi::popcount (bytes) <= 1) |
497 | { |
498 | unsigned int = (last_live | mask) - last_live; |
499 | *trim_tail -= extra; |
500 | break; |
501 | } |
502 | } |
503 | } |
504 | } |
505 | |
506 | if ((*trim_head || *trim_tail) |
507 | && dump_file && (dump_flags & TDF_DETAILS)) |
508 | { |
509 | fprintf (stream: dump_file, format: " Trimming statement (head = %d, tail = %d): " , |
510 | *trim_head, *trim_tail); |
511 | print_gimple_stmt (dump_file, stmt, 0, dump_flags); |
512 | fprintf (stream: dump_file, format: "\n" ); |
513 | } |
514 | } |
515 | |
516 | /* STMT initializes an object from COMPLEX_CST where one or more of the |
517 | bytes written may be dead stores. REF is a representation of the |
518 | memory written. LIVE is the bitmap of stores that are actually live. |
519 | |
520 | Attempt to rewrite STMT so that only the real or imaginary part of |
521 | the object is actually stored. */ |
522 | |
523 | static void |
524 | maybe_trim_complex_store (ao_ref *ref, sbitmap live, gimple *stmt) |
525 | { |
526 | int trim_head, trim_tail; |
527 | compute_trims (ref, live, trim_head: &trim_head, trim_tail: &trim_tail, stmt); |
528 | |
529 | /* The amount of data trimmed from the head or tail must be at |
530 | least half the size of the object to ensure we're trimming |
531 | the entire real or imaginary half. By writing things this |
532 | way we avoid more O(n) bitmap operations. */ |
533 | if (known_ge (trim_tail * 2 * BITS_PER_UNIT, ref->size)) |
534 | { |
535 | /* TREE_REALPART is live */ |
536 | tree x = TREE_REALPART (gimple_assign_rhs1 (stmt)); |
537 | tree y = gimple_assign_lhs (gs: stmt); |
538 | y = build1 (REALPART_EXPR, TREE_TYPE (x), y); |
539 | gimple_assign_set_lhs (gs: stmt, lhs: y); |
540 | gimple_assign_set_rhs1 (gs: stmt, rhs: x); |
541 | } |
542 | else if (known_ge (trim_head * 2 * BITS_PER_UNIT, ref->size)) |
543 | { |
544 | /* TREE_IMAGPART is live */ |
545 | tree x = TREE_IMAGPART (gimple_assign_rhs1 (stmt)); |
546 | tree y = gimple_assign_lhs (gs: stmt); |
547 | y = build1 (IMAGPART_EXPR, TREE_TYPE (x), y); |
548 | gimple_assign_set_lhs (gs: stmt, lhs: y); |
549 | gimple_assign_set_rhs1 (gs: stmt, rhs: x); |
550 | } |
551 | |
552 | /* Other cases indicate parts of both the real and imag subobjects |
553 | are live. We do not try to optimize those cases. */ |
554 | } |
555 | |
556 | /* STMT initializes an object using a CONSTRUCTOR where one or more of the |
557 | bytes written are dead stores. ORIG is the bitmap of bytes stored by |
558 | STMT. LIVE is the bitmap of stores that are actually live. |
559 | |
560 | Attempt to rewrite STMT so that only the real or imaginary part of |
561 | the object is actually stored. |
562 | |
563 | The most common case for getting here is a CONSTRUCTOR with no elements |
564 | being used to zero initialize an object. We do not try to handle other |
565 | cases as those would force us to fully cover the object with the |
566 | CONSTRUCTOR node except for the components that are dead. */ |
567 | |
568 | static void |
569 | maybe_trim_constructor_store (ao_ref *ref, sbitmap live, gimple *stmt) |
570 | { |
571 | tree ctor = gimple_assign_rhs1 (gs: stmt); |
572 | |
573 | /* This is the only case we currently handle. It actually seems to |
574 | catch most cases of actual interest. */ |
575 | gcc_assert (CONSTRUCTOR_NELTS (ctor) == 0); |
576 | |
577 | int head_trim = 0; |
578 | int tail_trim = 0; |
579 | compute_trims (ref, live, trim_head: &head_trim, trim_tail: &tail_trim, stmt); |
580 | |
581 | /* Now we want to replace the constructor initializer |
582 | with memset (object + head_trim, 0, size - head_trim - tail_trim). */ |
583 | if (head_trim || tail_trim) |
584 | { |
585 | /* We want &lhs for the MEM_REF expression. */ |
586 | tree lhs_addr = build_fold_addr_expr (gimple_assign_lhs (stmt)); |
587 | |
588 | if (! is_gimple_min_invariant (lhs_addr)) |
589 | return; |
590 | |
591 | /* The number of bytes for the new constructor. */ |
592 | poly_int64 ref_bytes = exact_div (a: ref->size, BITS_PER_UNIT); |
593 | poly_int64 count = ref_bytes - head_trim - tail_trim; |
594 | |
595 | /* And the new type for the CONSTRUCTOR. Essentially it's just |
596 | a char array large enough to cover the non-trimmed parts of |
597 | the original CONSTRUCTOR. Note we want explicit bounds here |
598 | so that we know how many bytes to clear when expanding the |
599 | CONSTRUCTOR. */ |
600 | tree type = build_array_type_nelts (char_type_node, count); |
601 | |
602 | /* Build a suitable alias type rather than using alias set zero |
603 | to avoid pessimizing. */ |
604 | tree alias_type = reference_alias_ptr_type (gimple_assign_lhs (gs: stmt)); |
605 | |
606 | /* Build a MEM_REF representing the whole accessed area, starting |
607 | at the first byte not trimmed. */ |
608 | tree exp = fold_build2 (MEM_REF, type, lhs_addr, |
609 | build_int_cst (alias_type, head_trim)); |
610 | |
611 | /* Now update STMT with a new RHS and LHS. */ |
612 | gimple_assign_set_lhs (gs: stmt, lhs: exp); |
613 | gimple_assign_set_rhs1 (gs: stmt, rhs: build_constructor (type, NULL)); |
614 | } |
615 | } |
616 | |
617 | /* STMT is a memcpy, memmove or memset. Decrement the number of bytes |
618 | copied/set by DECREMENT. */ |
619 | static void |
620 | decrement_count (gimple *stmt, int decrement) |
621 | { |
622 | tree *countp = gimple_call_arg_ptr (gs: stmt, index: 2); |
623 | gcc_assert (TREE_CODE (*countp) == INTEGER_CST); |
624 | *countp = wide_int_to_tree (TREE_TYPE (*countp), cst: (TREE_INT_CST_LOW (*countp) |
625 | - decrement)); |
626 | } |
627 | |
628 | static void |
629 | increment_start_addr (gimple *stmt, tree *where, int increment) |
630 | { |
631 | if (tree lhs = gimple_call_lhs (gs: stmt)) |
632 | if (where == gimple_call_arg_ptr (gs: stmt, index: 0)) |
633 | { |
634 | gassign *newop = gimple_build_assign (lhs, unshare_expr (*where)); |
635 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
636 | gsi_insert_after (&gsi, newop, GSI_SAME_STMT); |
637 | gimple_call_set_lhs (gs: stmt, NULL_TREE); |
638 | update_stmt (s: stmt); |
639 | } |
640 | |
641 | if (TREE_CODE (*where) == SSA_NAME) |
642 | { |
643 | tree tem = make_ssa_name (TREE_TYPE (*where)); |
644 | gassign *newop |
645 | = gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, |
646 | build_int_cst (sizetype, increment)); |
647 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
648 | gsi_insert_before (&gsi, newop, GSI_SAME_STMT); |
649 | *where = tem; |
650 | update_stmt (s: stmt); |
651 | return; |
652 | } |
653 | |
654 | *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node, |
655 | *where, |
656 | build_int_cst (ptr_type_node, |
657 | increment))); |
658 | } |
659 | |
660 | /* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead |
661 | (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce |
662 | the amount of data it actually writes. |
663 | |
664 | Right now we only support trimming from the head or the tail of the |
665 | memory region. In theory we could split the mem* call, but it's |
666 | likely of marginal value. */ |
667 | |
668 | static void |
669 | maybe_trim_memstar_call (ao_ref *ref, sbitmap live, gimple *stmt) |
670 | { |
671 | int head_trim, tail_trim; |
672 | switch (DECL_FUNCTION_CODE (decl: gimple_call_fndecl (gs: stmt))) |
673 | { |
674 | case BUILT_IN_STRNCPY: |
675 | case BUILT_IN_STRNCPY_CHK: |
676 | compute_trims (ref, live, trim_head: &head_trim, trim_tail: &tail_trim, stmt); |
677 | if (head_trim) |
678 | { |
679 | /* Head trimming of strncpy is only possible if we can |
680 | prove all bytes we would trim are non-zero (or we could |
681 | turn the strncpy into memset if there must be zero |
682 | among the head trimmed bytes). If we don't know anything |
683 | about those bytes, the presence or absence of '\0' bytes |
684 | in there will affect whether it acts for the non-trimmed |
685 | bytes as memset or memcpy/strncpy. */ |
686 | c_strlen_data lendata = { }; |
687 | int orig_head_trim = head_trim; |
688 | tree srcstr = gimple_call_arg (gs: stmt, index: 1); |
689 | if (!get_range_strlen (srcstr, &lendata, /*eltsize=*/1) |
690 | || !tree_fits_uhwi_p (lendata.minlen)) |
691 | head_trim = 0; |
692 | else if (tree_to_uhwi (lendata.minlen) < (unsigned) head_trim) |
693 | { |
694 | head_trim = tree_to_uhwi (lendata.minlen); |
695 | if ((orig_head_trim & (UNITS_PER_WORD - 1)) == 0) |
696 | head_trim &= ~(UNITS_PER_WORD - 1); |
697 | } |
698 | if (orig_head_trim != head_trim |
699 | && dump_file |
700 | && (dump_flags & TDF_DETAILS)) |
701 | fprintf (stream: dump_file, |
702 | format: " Adjusting strncpy trimming to (head = %d," |
703 | " tail = %d)\n" , head_trim, tail_trim); |
704 | } |
705 | goto do_memcpy; |
706 | |
707 | case BUILT_IN_MEMCPY: |
708 | case BUILT_IN_MEMMOVE: |
709 | case BUILT_IN_MEMCPY_CHK: |
710 | case BUILT_IN_MEMMOVE_CHK: |
711 | compute_trims (ref, live, trim_head: &head_trim, trim_tail: &tail_trim, stmt); |
712 | |
713 | do_memcpy: |
714 | /* Tail trimming is easy, we can just reduce the count. */ |
715 | if (tail_trim) |
716 | decrement_count (stmt, decrement: tail_trim); |
717 | |
718 | /* Head trimming requires adjusting all the arguments. */ |
719 | if (head_trim) |
720 | { |
721 | /* For __*_chk need to adjust also the last argument. */ |
722 | if (gimple_call_num_args (gs: stmt) == 4) |
723 | { |
724 | tree size = gimple_call_arg (gs: stmt, index: 3); |
725 | if (!tree_fits_uhwi_p (size)) |
726 | break; |
727 | if (!integer_all_onesp (size)) |
728 | { |
729 | unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); |
730 | if (sz < (unsigned) head_trim) |
731 | break; |
732 | tree arg = wide_int_to_tree (TREE_TYPE (size), |
733 | cst: sz - head_trim); |
734 | gimple_call_set_arg (gs: stmt, index: 3, arg); |
735 | } |
736 | } |
737 | tree *dst = gimple_call_arg_ptr (gs: stmt, index: 0); |
738 | increment_start_addr (stmt, where: dst, increment: head_trim); |
739 | tree *src = gimple_call_arg_ptr (gs: stmt, index: 1); |
740 | increment_start_addr (stmt, where: src, increment: head_trim); |
741 | decrement_count (stmt, decrement: head_trim); |
742 | } |
743 | break; |
744 | |
745 | case BUILT_IN_MEMSET: |
746 | case BUILT_IN_MEMSET_CHK: |
747 | compute_trims (ref, live, trim_head: &head_trim, trim_tail: &tail_trim, stmt); |
748 | |
749 | /* Tail trimming is easy, we can just reduce the count. */ |
750 | if (tail_trim) |
751 | decrement_count (stmt, decrement: tail_trim); |
752 | |
753 | /* Head trimming requires adjusting all the arguments. */ |
754 | if (head_trim) |
755 | { |
756 | /* For __*_chk need to adjust also the last argument. */ |
757 | if (gimple_call_num_args (gs: stmt) == 4) |
758 | { |
759 | tree size = gimple_call_arg (gs: stmt, index: 3); |
760 | if (!tree_fits_uhwi_p (size)) |
761 | break; |
762 | if (!integer_all_onesp (size)) |
763 | { |
764 | unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); |
765 | if (sz < (unsigned) head_trim) |
766 | break; |
767 | tree arg = wide_int_to_tree (TREE_TYPE (size), |
768 | cst: sz - head_trim); |
769 | gimple_call_set_arg (gs: stmt, index: 3, arg); |
770 | } |
771 | } |
772 | tree *dst = gimple_call_arg_ptr (gs: stmt, index: 0); |
773 | increment_start_addr (stmt, where: dst, increment: head_trim); |
774 | decrement_count (stmt, decrement: head_trim); |
775 | } |
776 | break; |
777 | |
778 | default: |
779 | break; |
780 | } |
781 | } |
782 | |
783 | /* STMT is a memory write where one or more bytes written are dead |
784 | stores. ORIG is the bitmap of bytes stored by STMT. LIVE is the |
785 | bitmap of stores that are actually live. |
786 | |
787 | Attempt to rewrite STMT so that it writes fewer memory locations. Right |
788 | now we only support trimming at the start or end of the memory region. |
789 | It's not clear how much there is to be gained by trimming from the middle |
790 | of the region. */ |
791 | |
792 | static void |
793 | maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt) |
794 | { |
795 | if (is_gimple_assign (gs: stmt) |
796 | && TREE_CODE (gimple_assign_lhs (stmt)) != TARGET_MEM_REF) |
797 | { |
798 | switch (gimple_assign_rhs_code (gs: stmt)) |
799 | { |
800 | case CONSTRUCTOR: |
801 | maybe_trim_constructor_store (ref, live, stmt); |
802 | break; |
803 | case COMPLEX_CST: |
804 | maybe_trim_complex_store (ref, live, stmt); |
805 | break; |
806 | default: |
807 | break; |
808 | } |
809 | } |
810 | } |
811 | |
812 | /* Return TRUE if USE_REF reads bytes from LIVE where live is |
813 | derived from REF, a write reference. |
814 | |
815 | While this routine may modify USE_REF, it's passed by value, not |
816 | location. So callers do not see those modifications. */ |
817 | |
818 | static bool |
819 | live_bytes_read (ao_ref *use_ref, ao_ref *ref, sbitmap live) |
820 | { |
821 | /* We have already verified that USE_REF and REF hit the same object. |
822 | Now verify that there's actually an overlap between USE_REF and REF. */ |
823 | HOST_WIDE_INT start, size; |
824 | if (get_byte_range (copy: use_ref, ref, kill: false, ret_offset: &start, ret_size: &size)) |
825 | { |
826 | /* If USE_REF covers all of REF, then it will hit one or more |
827 | live bytes. This avoids useless iteration over the bitmap |
828 | below. */ |
829 | if (start == 0 && known_eq (size * 8, ref->size)) |
830 | return true; |
831 | |
832 | /* Now check if any of the remaining bits in use_ref are set in LIVE. */ |
833 | return bitmap_bit_in_range_p (live, start, (start + size - 1)); |
834 | } |
835 | return true; |
836 | } |
837 | |
838 | /* Callback for dse_classify_store calling for_each_index. Verify that |
839 | indices are invariant in the loop with backedge PHI in basic-block DATA. */ |
840 | |
841 | static bool |
842 | check_name (tree, tree *idx, void *data) |
843 | { |
844 | basic_block phi_bb = (basic_block) data; |
845 | if (TREE_CODE (*idx) == SSA_NAME |
846 | && !SSA_NAME_IS_DEFAULT_DEF (*idx) |
847 | && dominated_by_p (CDI_DOMINATORS, gimple_bb (SSA_NAME_DEF_STMT (*idx)), |
848 | phi_bb)) |
849 | return false; |
850 | return true; |
851 | } |
852 | |
853 | /* STMT stores the value 0 into one or more memory locations |
854 | (via memset, empty constructor, calloc call, etc). |
855 | |
856 | See if there is a subsequent store of the value 0 to one |
857 | or more of the same memory location(s). If so, the subsequent |
858 | store is redundant and can be removed. |
859 | |
860 | The subsequent stores could be via memset, empty constructors, |
861 | simple MEM stores, etc. */ |
862 | |
863 | static void |
864 | dse_optimize_redundant_stores (gimple *stmt) |
865 | { |
866 | int cnt = 0; |
867 | |
868 | /* TBAA state of STMT, if it is a call it is effectively alias-set zero. */ |
869 | alias_set_type earlier_set = 0; |
870 | alias_set_type earlier_base_set = 0; |
871 | if (is_gimple_assign (gs: stmt)) |
872 | { |
873 | ao_ref lhs_ref; |
874 | ao_ref_init (&lhs_ref, gimple_assign_lhs (gs: stmt)); |
875 | earlier_set = ao_ref_alias_set (&lhs_ref); |
876 | earlier_base_set = ao_ref_base_alias_set (&lhs_ref); |
877 | } |
878 | |
879 | /* We could do something fairly complex and look through PHIs |
880 | like DSE_CLASSIFY_STORE, but it doesn't seem to be worth |
881 | the effort. |
882 | |
883 | Look at all the immediate uses of the VDEF (which are obviously |
884 | dominated by STMT). See if one or more stores 0 into the same |
885 | memory locations a STMT, if so remove the immediate use statements. */ |
886 | tree defvar = gimple_vdef (g: stmt); |
887 | imm_use_iterator ui; |
888 | gimple *use_stmt; |
889 | FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar) |
890 | { |
891 | /* Limit stmt walking. */ |
892 | if (++cnt > param_dse_max_alias_queries_per_store) |
893 | break; |
894 | |
895 | /* If USE_STMT stores 0 into one or more of the same locations |
896 | as STMT and STMT would kill USE_STMT, then we can just remove |
897 | USE_STMT. */ |
898 | tree fndecl; |
899 | if ((is_gimple_assign (gs: use_stmt) |
900 | && gimple_vdef (g: use_stmt) |
901 | && (gimple_assign_single_p (gs: use_stmt) |
902 | && initializer_zerop (gimple_assign_rhs1 (gs: use_stmt)))) |
903 | || (gimple_call_builtin_p (use_stmt, BUILT_IN_NORMAL) |
904 | && (fndecl = gimple_call_fndecl (gs: use_stmt)) != NULL |
905 | && (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_MEMSET |
906 | || DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_MEMSET_CHK) |
907 | && integer_zerop (gimple_call_arg (gs: use_stmt, index: 1)))) |
908 | { |
909 | ao_ref write; |
910 | |
911 | if (!initialize_ao_ref_for_dse (stmt: use_stmt, write: &write)) |
912 | break; |
913 | |
914 | if (valid_ao_ref_for_dse (ref: &write) |
915 | && stmt_kills_ref_p (stmt, &write)) |
916 | { |
917 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
918 | if (is_gimple_assign (gs: use_stmt)) |
919 | { |
920 | ao_ref lhs_ref; |
921 | ao_ref_init (&lhs_ref, gimple_assign_lhs (gs: use_stmt)); |
922 | if ((earlier_set == ao_ref_alias_set (&lhs_ref) |
923 | || alias_set_subset_of (ao_ref_alias_set (&lhs_ref), |
924 | earlier_set)) |
925 | && (earlier_base_set == ao_ref_base_alias_set (&lhs_ref) |
926 | || alias_set_subset_of |
927 | (ao_ref_base_alias_set (&lhs_ref), |
928 | earlier_base_set))) |
929 | delete_dead_or_redundant_assignment (&gsi, "redundant" , |
930 | need_eh_cleanup, |
931 | need_ab_cleanup); |
932 | } |
933 | else if (is_gimple_call (gs: use_stmt)) |
934 | { |
935 | if ((earlier_set == 0 |
936 | || alias_set_subset_of (0, earlier_set)) |
937 | && (earlier_base_set == 0 |
938 | || alias_set_subset_of (0, earlier_base_set))) |
939 | delete_dead_or_redundant_call (&gsi, "redundant" ); |
940 | } |
941 | else |
942 | gcc_unreachable (); |
943 | } |
944 | } |
945 | } |
946 | } |
947 | |
948 | /* Return whether PHI contains ARG as an argument. */ |
949 | |
950 | static bool |
951 | contains_phi_arg (gphi *phi, tree arg) |
952 | { |
953 | for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i) |
954 | if (gimple_phi_arg_def (gs: phi, index: i) == arg) |
955 | return true; |
956 | return false; |
957 | } |
958 | |
959 | /* Hash map of the memory use in a GIMPLE assignment to its |
960 | data reference. If NULL data-ref analysis isn't used. */ |
961 | static hash_map<gimple *, data_reference_p> *dse_stmt_to_dr_map; |
962 | |
963 | /* A helper of dse_optimize_stmt. |
964 | Given a GIMPLE_ASSIGN in STMT that writes to REF, classify it |
965 | according to downstream uses and defs. Sets *BY_CLOBBER_P to true |
966 | if only clobber statements influenced the classification result. |
967 | Returns the classification. */ |
968 | |
969 | dse_store_status |
970 | dse_classify_store (ao_ref *ref, gimple *stmt, |
971 | bool byte_tracking_enabled, sbitmap live_bytes, |
972 | bool *by_clobber_p, tree stop_at_vuse) |
973 | { |
974 | gimple *temp; |
975 | int cnt = 0; |
976 | auto_bitmap visited; |
977 | std::unique_ptr<data_reference, void(*)(data_reference_p)> |
978 | dra (nullptr, free_data_ref); |
979 | |
980 | if (by_clobber_p) |
981 | *by_clobber_p = true; |
982 | |
983 | /* Find the first dominated statement that clobbers (part of) the |
984 | memory stmt stores to with no intermediate statement that may use |
985 | part of the memory stmt stores. That is, find a store that may |
986 | prove stmt to be a dead store. */ |
987 | temp = stmt; |
988 | do |
989 | { |
990 | gimple *use_stmt; |
991 | imm_use_iterator ui; |
992 | bool fail = false; |
993 | tree defvar; |
994 | |
995 | if (gimple_code (g: temp) == GIMPLE_PHI) |
996 | { |
997 | defvar = PHI_RESULT (temp); |
998 | bitmap_set_bit (visited, SSA_NAME_VERSION (defvar)); |
999 | } |
1000 | else |
1001 | defvar = gimple_vdef (g: temp); |
1002 | |
1003 | auto_vec<gimple *, 10> defs; |
1004 | gphi *first_phi_def = NULL; |
1005 | gphi *last_phi_def = NULL; |
1006 | |
1007 | auto_vec<tree, 10> worklist; |
1008 | worklist.quick_push (obj: defvar); |
1009 | |
1010 | do |
1011 | { |
1012 | defvar = worklist.pop (); |
1013 | /* If we're instructed to stop walking at region boundary, do so. */ |
1014 | if (defvar == stop_at_vuse) |
1015 | return DSE_STORE_LIVE; |
1016 | |
1017 | FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar) |
1018 | { |
1019 | /* Limit stmt walking. */ |
1020 | if (++cnt > param_dse_max_alias_queries_per_store) |
1021 | { |
1022 | fail = true; |
1023 | break; |
1024 | } |
1025 | |
1026 | /* In simple cases we can look through PHI nodes, but we |
1027 | have to be careful with loops and with memory references |
1028 | containing operands that are also operands of PHI nodes. |
1029 | See gcc.c-torture/execute/20051110-*.c. */ |
1030 | if (gimple_code (g: use_stmt) == GIMPLE_PHI) |
1031 | { |
1032 | /* Look through single-argument PHIs. */ |
1033 | if (gimple_phi_num_args (gs: use_stmt) == 1) |
1034 | worklist.safe_push (obj: gimple_phi_result (gs: use_stmt)); |
1035 | |
1036 | /* If we already visited this PHI ignore it for further |
1037 | processing. */ |
1038 | else if (!bitmap_bit_p (visited, |
1039 | SSA_NAME_VERSION |
1040 | (PHI_RESULT (use_stmt)))) |
1041 | { |
1042 | /* If we visit this PHI by following a backedge then we |
1043 | have to make sure ref->ref only refers to SSA names |
1044 | that are invariant with respect to the loop |
1045 | represented by this PHI node. */ |
1046 | if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: stmt), |
1047 | gimple_bb (g: use_stmt)) |
1048 | && !for_each_index (ref->ref ? &ref->ref : &ref->base, |
1049 | check_name, gimple_bb (g: use_stmt))) |
1050 | return DSE_STORE_LIVE; |
1051 | defs.safe_push (obj: use_stmt); |
1052 | if (!first_phi_def) |
1053 | first_phi_def = as_a <gphi *> (p: use_stmt); |
1054 | last_phi_def = as_a <gphi *> (p: use_stmt); |
1055 | } |
1056 | } |
1057 | /* If the statement is a use the store is not dead. */ |
1058 | else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) |
1059 | { |
1060 | if (dse_stmt_to_dr_map |
1061 | && ref->ref |
1062 | && is_gimple_assign (gs: use_stmt)) |
1063 | { |
1064 | if (!dra) |
1065 | dra.reset (p: create_data_ref (NULL, NULL, ref->ref, stmt, |
1066 | false, false)); |
1067 | bool existed_p; |
1068 | data_reference_p &drb |
1069 | = dse_stmt_to_dr_map->get_or_insert (k: use_stmt, |
1070 | existed: &existed_p); |
1071 | if (!existed_p) |
1072 | drb = create_data_ref (NULL, NULL, |
1073 | gimple_assign_rhs1 (gs: use_stmt), |
1074 | use_stmt, false, false); |
1075 | if (!dr_may_alias_p (dra.get (), drb, NULL)) |
1076 | { |
1077 | if (gimple_vdef (g: use_stmt)) |
1078 | defs.safe_push (obj: use_stmt); |
1079 | continue; |
1080 | } |
1081 | } |
1082 | |
1083 | /* Handle common cases where we can easily build an ao_ref |
1084 | structure for USE_STMT and in doing so we find that the |
1085 | references hit non-live bytes and thus can be ignored. |
1086 | |
1087 | TODO: We can also use modref summary to handle calls. */ |
1088 | if (byte_tracking_enabled |
1089 | && is_gimple_assign (gs: use_stmt)) |
1090 | { |
1091 | ao_ref use_ref; |
1092 | ao_ref_init (&use_ref, gimple_assign_rhs1 (gs: use_stmt)); |
1093 | if (valid_ao_ref_for_dse (ref: &use_ref) |
1094 | && operand_equal_p (use_ref.base, ref->base, |
1095 | flags: OEP_ADDRESS_OF) |
1096 | && !live_bytes_read (use_ref: &use_ref, ref, live: live_bytes)) |
1097 | { |
1098 | /* If this is a store, remember it as we possibly |
1099 | need to walk the defs uses. */ |
1100 | if (gimple_vdef (g: use_stmt)) |
1101 | defs.safe_push (obj: use_stmt); |
1102 | continue; |
1103 | } |
1104 | } |
1105 | |
1106 | fail = true; |
1107 | break; |
1108 | } |
1109 | /* We have visited ourselves already so ignore STMT for the |
1110 | purpose of chaining. */ |
1111 | else if (use_stmt == stmt) |
1112 | ; |
1113 | /* If this is a store, remember it as we possibly need to walk the |
1114 | defs uses. */ |
1115 | else if (gimple_vdef (g: use_stmt)) |
1116 | defs.safe_push (obj: use_stmt); |
1117 | } |
1118 | } |
1119 | while (!fail && !worklist.is_empty ()); |
1120 | |
1121 | if (fail) |
1122 | { |
1123 | /* STMT might be partially dead and we may be able to reduce |
1124 | how many memory locations it stores into. */ |
1125 | if (byte_tracking_enabled && !gimple_clobber_p (s: stmt)) |
1126 | return DSE_STORE_MAYBE_PARTIAL_DEAD; |
1127 | return DSE_STORE_LIVE; |
1128 | } |
1129 | |
1130 | /* If we didn't find any definition this means the store is dead |
1131 | if it isn't a store to global reachable memory. In this case |
1132 | just pretend the stmt makes itself dead. Otherwise fail. */ |
1133 | if (defs.is_empty ()) |
1134 | { |
1135 | if (ref_may_alias_global_p (ref, false)) |
1136 | { |
1137 | basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (defvar)); |
1138 | /* Assume that BUILT_IN_UNREACHABLE and BUILT_IN_UNREACHABLE_TRAP |
1139 | do not need to keep (global) memory side-effects live. |
1140 | We do not have virtual operands on BUILT_IN_UNREACHABLE |
1141 | but we can do poor mans reachability when the last |
1142 | definition we want to elide is in the block that ends |
1143 | in such a call. */ |
1144 | if (EDGE_COUNT (def_bb->succs) == 0) |
1145 | if (gcall *last = dyn_cast <gcall *> (p: *gsi_last_bb (bb: def_bb))) |
1146 | if (gimple_call_builtin_p (last, BUILT_IN_UNREACHABLE) |
1147 | || gimple_call_builtin_p (last, |
1148 | BUILT_IN_UNREACHABLE_TRAP)) |
1149 | { |
1150 | if (by_clobber_p) |
1151 | *by_clobber_p = false; |
1152 | return DSE_STORE_DEAD; |
1153 | } |
1154 | return DSE_STORE_LIVE; |
1155 | } |
1156 | |
1157 | if (by_clobber_p) |
1158 | *by_clobber_p = false; |
1159 | return DSE_STORE_DEAD; |
1160 | } |
1161 | |
1162 | /* Process defs and remove those we need not process further. */ |
1163 | for (unsigned i = 0; i < defs.length ();) |
1164 | { |
1165 | gimple *def = defs[i]; |
1166 | gimple *use_stmt; |
1167 | use_operand_p use_p; |
1168 | tree vdef = (gimple_code (g: def) == GIMPLE_PHI |
1169 | ? gimple_phi_result (gs: def) : gimple_vdef (g: def)); |
1170 | gphi *phi_def; |
1171 | /* If the path to check starts with a kill we do not need to |
1172 | process it further. |
1173 | ??? With byte tracking we need only kill the bytes currently |
1174 | live. */ |
1175 | if (stmt_kills_ref_p (def, ref)) |
1176 | { |
1177 | if (by_clobber_p && !gimple_clobber_p (s: def)) |
1178 | *by_clobber_p = false; |
1179 | defs.unordered_remove (ix: i); |
1180 | } |
1181 | /* If the path ends here we do not need to process it further. |
1182 | This for example happens with calls to noreturn functions. */ |
1183 | else if (has_zero_uses (var: vdef)) |
1184 | { |
1185 | /* But if the store is to global memory it is definitely |
1186 | not dead. */ |
1187 | if (ref_may_alias_global_p (ref, false)) |
1188 | return DSE_STORE_LIVE; |
1189 | defs.unordered_remove (ix: i); |
1190 | } |
1191 | /* In addition to kills we can remove defs whose only use |
1192 | is another def in defs. That can only ever be PHIs of which |
1193 | we track two for simplicity reasons, the first and last in |
1194 | {first,last}_phi_def (we fail for multiple PHIs anyways). |
1195 | We can also ignore defs that feed only into |
1196 | already visited PHIs. */ |
1197 | else if (single_imm_use (var: vdef, use_p: &use_p, stmt: &use_stmt) |
1198 | && (use_stmt == first_phi_def |
1199 | || use_stmt == last_phi_def |
1200 | || (gimple_code (g: use_stmt) == GIMPLE_PHI |
1201 | && bitmap_bit_p (visited, |
1202 | SSA_NAME_VERSION |
1203 | (PHI_RESULT (use_stmt)))))) |
1204 | { |
1205 | defs.unordered_remove (ix: i); |
1206 | if (def == first_phi_def) |
1207 | first_phi_def = NULL; |
1208 | else if (def == last_phi_def) |
1209 | last_phi_def = NULL; |
1210 | } |
1211 | /* If def is a PHI and one of its arguments is another PHI node still |
1212 | in consideration we can defer processing it. */ |
1213 | else if ((phi_def = dyn_cast <gphi *> (p: def)) |
1214 | && ((last_phi_def |
1215 | && phi_def != last_phi_def |
1216 | && contains_phi_arg (phi: phi_def, |
1217 | arg: gimple_phi_result (gs: last_phi_def))) |
1218 | || (first_phi_def |
1219 | && phi_def != first_phi_def |
1220 | && contains_phi_arg |
1221 | (phi: phi_def, arg: gimple_phi_result (gs: first_phi_def))))) |
1222 | { |
1223 | defs.unordered_remove (ix: i); |
1224 | if (phi_def == first_phi_def) |
1225 | first_phi_def = NULL; |
1226 | else if (phi_def == last_phi_def) |
1227 | last_phi_def = NULL; |
1228 | } |
1229 | else |
1230 | ++i; |
1231 | } |
1232 | |
1233 | /* If all defs kill the ref we are done. */ |
1234 | if (defs.is_empty ()) |
1235 | return DSE_STORE_DEAD; |
1236 | /* If more than one def survives fail. */ |
1237 | if (defs.length () > 1) |
1238 | { |
1239 | /* STMT might be partially dead and we may be able to reduce |
1240 | how many memory locations it stores into. */ |
1241 | if (byte_tracking_enabled && !gimple_clobber_p (s: stmt)) |
1242 | return DSE_STORE_MAYBE_PARTIAL_DEAD; |
1243 | return DSE_STORE_LIVE; |
1244 | } |
1245 | temp = defs[0]; |
1246 | |
1247 | /* Track partial kills. */ |
1248 | if (byte_tracking_enabled) |
1249 | { |
1250 | clear_bytes_written_by (live_bytes, stmt: temp, ref); |
1251 | if (bitmap_empty_p (live_bytes)) |
1252 | { |
1253 | if (by_clobber_p && !gimple_clobber_p (s: temp)) |
1254 | *by_clobber_p = false; |
1255 | return DSE_STORE_DEAD; |
1256 | } |
1257 | } |
1258 | } |
1259 | /* Continue walking until there are no more live bytes. */ |
1260 | while (1); |
1261 | } |
1262 | |
1263 | |
1264 | /* Delete a dead call at GSI, which is mem* call of some kind. */ |
1265 | static void |
1266 | delete_dead_or_redundant_call (gimple_stmt_iterator *gsi, const char *type) |
1267 | { |
1268 | gimple *stmt = gsi_stmt (i: *gsi); |
1269 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1270 | { |
1271 | fprintf (stream: dump_file, format: " Deleted %s call: " , type); |
1272 | print_gimple_stmt (dump_file, stmt, 0, dump_flags); |
1273 | fprintf (stream: dump_file, format: "\n" ); |
1274 | } |
1275 | |
1276 | basic_block bb = gimple_bb (g: stmt); |
1277 | tree lhs = gimple_call_lhs (gs: stmt); |
1278 | if (lhs) |
1279 | { |
1280 | tree ptr = gimple_call_arg (gs: stmt, index: 0); |
1281 | gimple *new_stmt = gimple_build_assign (lhs, ptr); |
1282 | unlink_stmt_vdef (stmt); |
1283 | if (gsi_replace (gsi, new_stmt, true)) |
1284 | bitmap_set_bit (need_eh_cleanup, bb->index); |
1285 | } |
1286 | else |
1287 | { |
1288 | /* Then we need to fix the operand of the consuming stmt. */ |
1289 | unlink_stmt_vdef (stmt); |
1290 | |
1291 | /* Remove the dead store. */ |
1292 | if (gsi_remove (gsi, true)) |
1293 | bitmap_set_bit (need_eh_cleanup, bb->index); |
1294 | release_defs (stmt); |
1295 | } |
1296 | } |
1297 | |
1298 | /* Delete a dead store at GSI, which is a gimple assignment. */ |
1299 | |
1300 | void |
1301 | delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi, |
1302 | const char *type, |
1303 | bitmap need_eh_cleanup, |
1304 | bitmap need_ab_cleanup) |
1305 | { |
1306 | gimple *stmt = gsi_stmt (i: *gsi); |
1307 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1308 | { |
1309 | fprintf (stream: dump_file, format: " Deleted %s store: " , type); |
1310 | print_gimple_stmt (dump_file, stmt, 0, dump_flags); |
1311 | fprintf (stream: dump_file, format: "\n" ); |
1312 | } |
1313 | |
1314 | /* Then we need to fix the operand of the consuming stmt. */ |
1315 | unlink_stmt_vdef (stmt); |
1316 | |
1317 | /* Remove the dead store. */ |
1318 | basic_block bb = gimple_bb (g: stmt); |
1319 | if (need_ab_cleanup && stmt_can_make_abnormal_goto (stmt)) |
1320 | bitmap_set_bit (need_ab_cleanup, bb->index); |
1321 | if (gsi_remove (gsi, true) && need_eh_cleanup) |
1322 | bitmap_set_bit (need_eh_cleanup, bb->index); |
1323 | |
1324 | /* And release any SSA_NAMEs set in this statement back to the |
1325 | SSA_NAME manager. */ |
1326 | release_defs (stmt); |
1327 | } |
1328 | |
1329 | /* Try to prove, using modref summary, that all memory written to by a call is |
1330 | dead and remove it. Assume that if return value is written to memory |
1331 | it is already proved to be dead. */ |
1332 | |
1333 | static bool |
1334 | dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes) |
1335 | { |
1336 | gcall *stmt = dyn_cast <gcall *> (p: gsi_stmt (i: *gsi)); |
1337 | |
1338 | if (!stmt) |
1339 | return false; |
1340 | |
1341 | tree callee = gimple_call_fndecl (gs: stmt); |
1342 | |
1343 | if (!callee) |
1344 | return false; |
1345 | |
1346 | /* Pure/const functions are optimized by normal DCE |
1347 | or handled as store above. */ |
1348 | int flags = gimple_call_flags (stmt); |
1349 | if ((flags & (ECF_PURE|ECF_CONST|ECF_NOVOPS)) |
1350 | && !(flags & (ECF_LOOPING_CONST_OR_PURE))) |
1351 | return false; |
1352 | |
1353 | cgraph_node *node = cgraph_node::get (decl: callee); |
1354 | if (!node) |
1355 | return false; |
1356 | |
1357 | if (stmt_could_throw_p (cfun, stmt) |
1358 | && !cfun->can_delete_dead_exceptions) |
1359 | return false; |
1360 | |
1361 | /* If return value is used the call is not dead. */ |
1362 | tree lhs = gimple_call_lhs (gs: stmt); |
1363 | if (lhs && TREE_CODE (lhs) == SSA_NAME) |
1364 | { |
1365 | imm_use_iterator ui; |
1366 | gimple *use_stmt; |
1367 | FOR_EACH_IMM_USE_STMT (use_stmt, ui, lhs) |
1368 | if (!is_gimple_debug (gs: use_stmt)) |
1369 | return false; |
1370 | } |
1371 | |
1372 | /* Verify that there are no side-effects except for return value |
1373 | and memory writes tracked by modref. */ |
1374 | modref_summary *summary = get_modref_function_summary (func: node); |
1375 | if (!summary || !summary->try_dse) |
1376 | return false; |
1377 | |
1378 | bool by_clobber_p = false; |
1379 | |
1380 | /* Walk all memory writes and verify that they are dead. */ |
1381 | for (auto base_node : summary->stores->bases) |
1382 | for (auto ref_node : base_node->refs) |
1383 | for (auto access_node : ref_node->accesses) |
1384 | { |
1385 | tree arg = access_node.get_call_arg (stmt); |
1386 | |
1387 | if (!arg || !POINTER_TYPE_P (TREE_TYPE (arg))) |
1388 | return false; |
1389 | |
1390 | if (integer_zerop (arg) |
1391 | && !targetm.addr_space.zero_address_valid |
1392 | (TYPE_ADDR_SPACE (TREE_TYPE (arg)))) |
1393 | continue; |
1394 | |
1395 | ao_ref ref; |
1396 | |
1397 | if (!access_node.get_ao_ref (stmt, ref: &ref)) |
1398 | return false; |
1399 | ref.ref_alias_set = ref_node->ref; |
1400 | ref.base_alias_set = base_node->base; |
1401 | |
1402 | bool byte_tracking_enabled |
1403 | = setup_live_bytes_from_ref (ref: &ref, live_bytes); |
1404 | enum dse_store_status store_status; |
1405 | |
1406 | store_status = dse_classify_store (ref: &ref, stmt, |
1407 | byte_tracking_enabled, |
1408 | live_bytes, by_clobber_p: &by_clobber_p); |
1409 | if (store_status != DSE_STORE_DEAD) |
1410 | return false; |
1411 | } |
1412 | delete_dead_or_redundant_assignment (gsi, type: "dead" , need_eh_cleanup, |
1413 | need_ab_cleanup); |
1414 | return true; |
1415 | } |
1416 | |
1417 | /* Attempt to eliminate dead stores in the statement referenced by BSI. |
1418 | |
1419 | A dead store is a store into a memory location which will later be |
1420 | overwritten by another store without any intervening loads. In this |
1421 | case the earlier store can be deleted. |
1422 | |
1423 | In our SSA + virtual operand world we use immediate uses of virtual |
1424 | operands to detect dead stores. If a store's virtual definition |
1425 | is used precisely once by a later store to the same location which |
1426 | post dominates the first store, then the first store is dead. */ |
1427 | |
1428 | static void |
1429 | dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes) |
1430 | { |
1431 | gimple *stmt = gsi_stmt (i: *gsi); |
1432 | |
1433 | /* Don't return early on *this_2(D) ={v} {CLOBBER}. */ |
1434 | if (gimple_has_volatile_ops (stmt) |
1435 | && (!gimple_clobber_p (s: stmt) |
1436 | || TREE_CODE (gimple_assign_lhs (stmt)) != MEM_REF)) |
1437 | return; |
1438 | |
1439 | ao_ref ref; |
1440 | /* If this is not a store we can still remove dead call using |
1441 | modref summary. Note we specifically allow ref to be initialized |
1442 | to a conservative may-def since we are looking for followup stores |
1443 | to kill all of it. */ |
1444 | if (!initialize_ao_ref_for_dse (stmt, write: &ref, may_def_ok: true)) |
1445 | { |
1446 | dse_optimize_call (gsi, live_bytes); |
1447 | return; |
1448 | } |
1449 | |
1450 | /* We know we have virtual definitions. We can handle assignments and |
1451 | some builtin calls. */ |
1452 | if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) |
1453 | { |
1454 | tree fndecl = gimple_call_fndecl (gs: stmt); |
1455 | switch (DECL_FUNCTION_CODE (decl: fndecl)) |
1456 | { |
1457 | case BUILT_IN_MEMCPY: |
1458 | case BUILT_IN_MEMMOVE: |
1459 | case BUILT_IN_STRNCPY: |
1460 | case BUILT_IN_MEMSET: |
1461 | case BUILT_IN_MEMCPY_CHK: |
1462 | case BUILT_IN_MEMMOVE_CHK: |
1463 | case BUILT_IN_STRNCPY_CHK: |
1464 | case BUILT_IN_MEMSET_CHK: |
1465 | { |
1466 | /* Occasionally calls with an explicit length of zero |
1467 | show up in the IL. It's pointless to do analysis |
1468 | on them, they're trivially dead. */ |
1469 | tree size = gimple_call_arg (gs: stmt, index: 2); |
1470 | if (integer_zerop (size)) |
1471 | { |
1472 | delete_dead_or_redundant_call (gsi, type: "dead" ); |
1473 | return; |
1474 | } |
1475 | |
1476 | /* If this is a memset call that initializes an object |
1477 | to zero, it may be redundant with an earlier memset |
1478 | or empty CONSTRUCTOR of a larger object. */ |
1479 | if ((DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_MEMSET |
1480 | || DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_MEMSET_CHK) |
1481 | && integer_zerop (gimple_call_arg (gs: stmt, index: 1))) |
1482 | dse_optimize_redundant_stores (stmt); |
1483 | |
1484 | enum dse_store_status store_status; |
1485 | bool byte_tracking_enabled |
1486 | = setup_live_bytes_from_ref (ref: &ref, live_bytes); |
1487 | store_status = dse_classify_store (ref: &ref, stmt, |
1488 | byte_tracking_enabled, |
1489 | live_bytes); |
1490 | if (store_status == DSE_STORE_LIVE) |
1491 | return; |
1492 | |
1493 | if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD) |
1494 | { |
1495 | maybe_trim_memstar_call (ref: &ref, live: live_bytes, stmt); |
1496 | return; |
1497 | } |
1498 | |
1499 | if (store_status == DSE_STORE_DEAD) |
1500 | delete_dead_or_redundant_call (gsi, type: "dead" ); |
1501 | return; |
1502 | } |
1503 | |
1504 | case BUILT_IN_CALLOC: |
1505 | /* We already know the arguments are integer constants. */ |
1506 | dse_optimize_redundant_stores (stmt); |
1507 | return; |
1508 | |
1509 | default: |
1510 | return; |
1511 | } |
1512 | } |
1513 | else if (is_gimple_call (gs: stmt) |
1514 | && gimple_call_internal_p (gs: stmt)) |
1515 | { |
1516 | switch (gimple_call_internal_fn (gs: stmt)) |
1517 | { |
1518 | case IFN_LEN_STORE: |
1519 | case IFN_MASK_STORE: |
1520 | case IFN_MASK_LEN_STORE: |
1521 | { |
1522 | enum dse_store_status store_status; |
1523 | store_status = dse_classify_store (ref: &ref, stmt, byte_tracking_enabled: false, live_bytes); |
1524 | if (store_status == DSE_STORE_DEAD) |
1525 | delete_dead_or_redundant_call (gsi, type: "dead" ); |
1526 | return; |
1527 | } |
1528 | default:; |
1529 | } |
1530 | } |
1531 | |
1532 | bool by_clobber_p = false; |
1533 | |
1534 | /* Check if this statement stores zero to a memory location, |
1535 | and if there is a subsequent store of zero to the same |
1536 | memory location. If so, remove the subsequent store. */ |
1537 | if (gimple_assign_single_p (gs: stmt) |
1538 | && initializer_zerop (gimple_assign_rhs1 (gs: stmt))) |
1539 | dse_optimize_redundant_stores (stmt); |
1540 | |
1541 | /* Self-assignments are zombies. */ |
1542 | if (is_gimple_assign (gs: stmt) |
1543 | && operand_equal_p (gimple_assign_rhs1 (gs: stmt), |
1544 | gimple_assign_lhs (gs: stmt), flags: 0)) |
1545 | ; |
1546 | else |
1547 | { |
1548 | bool byte_tracking_enabled |
1549 | = setup_live_bytes_from_ref (ref: &ref, live_bytes); |
1550 | enum dse_store_status store_status; |
1551 | store_status = dse_classify_store (ref: &ref, stmt, |
1552 | byte_tracking_enabled, |
1553 | live_bytes, by_clobber_p: &by_clobber_p); |
1554 | if (store_status == DSE_STORE_LIVE) |
1555 | return; |
1556 | |
1557 | if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD) |
1558 | { |
1559 | maybe_trim_partially_dead_store (ref: &ref, live: live_bytes, stmt); |
1560 | return; |
1561 | } |
1562 | } |
1563 | |
1564 | /* Now we know that use_stmt kills the LHS of stmt. */ |
1565 | |
1566 | /* But only remove *this_2(D) ={v} {CLOBBER} if killed by |
1567 | another clobber stmt. */ |
1568 | if (gimple_clobber_p (s: stmt) |
1569 | && !by_clobber_p) |
1570 | return; |
1571 | |
1572 | if (is_gimple_call (gs: stmt) |
1573 | && (gimple_has_side_effects (stmt) |
1574 | || (stmt_could_throw_p (fun, stmt) |
1575 | && !fun->can_delete_dead_exceptions))) |
1576 | { |
1577 | /* See if we can remove complete call. */ |
1578 | if (dse_optimize_call (gsi, live_bytes)) |
1579 | return; |
1580 | /* Make sure we do not remove a return slot we cannot reconstruct |
1581 | later. */ |
1582 | if (gimple_call_return_slot_opt_p (s: as_a <gcall *>(p: stmt)) |
1583 | && (TREE_ADDRESSABLE (TREE_TYPE (gimple_call_fntype (stmt))) |
1584 | || !poly_int_tree_p |
1585 | (TYPE_SIZE (TREE_TYPE (gimple_call_fntype (stmt)))))) |
1586 | return; |
1587 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1588 | { |
1589 | fprintf (stream: dump_file, format: " Deleted dead store in call LHS: " ); |
1590 | print_gimple_stmt (dump_file, stmt, 0, dump_flags); |
1591 | fprintf (stream: dump_file, format: "\n" ); |
1592 | } |
1593 | gimple_call_set_lhs (gs: stmt, NULL_TREE); |
1594 | update_stmt (s: stmt); |
1595 | } |
1596 | else if (!stmt_could_throw_p (fun, stmt) |
1597 | || fun->can_delete_dead_exceptions) |
1598 | delete_dead_or_redundant_assignment (gsi, type: "dead" , need_eh_cleanup, |
1599 | need_ab_cleanup); |
1600 | } |
1601 | |
1602 | namespace { |
1603 | |
1604 | const pass_data pass_data_dse = |
1605 | { |
1606 | .type: GIMPLE_PASS, /* type */ |
1607 | .name: "dse" , /* name */ |
1608 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1609 | .tv_id: TV_TREE_DSE, /* tv_id */ |
1610 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
1611 | .properties_provided: 0, /* properties_provided */ |
1612 | .properties_destroyed: 0, /* properties_destroyed */ |
1613 | .todo_flags_start: 0, /* todo_flags_start */ |
1614 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1615 | }; |
1616 | |
1617 | class pass_dse : public gimple_opt_pass |
1618 | { |
1619 | public: |
1620 | pass_dse (gcc::context *ctxt) |
1621 | : gimple_opt_pass (pass_data_dse, ctxt), use_dr_analysis_p (false) |
1622 | {} |
1623 | |
1624 | /* opt_pass methods: */ |
1625 | opt_pass * clone () final override { return new pass_dse (m_ctxt); } |
1626 | void set_pass_param (unsigned n, bool param) final override |
1627 | { |
1628 | gcc_assert (n == 0); |
1629 | use_dr_analysis_p = param; |
1630 | } |
1631 | bool gate (function *) final override { return flag_tree_dse != 0; } |
1632 | unsigned int execute (function *) final override; |
1633 | |
1634 | private: |
1635 | bool use_dr_analysis_p; |
1636 | }; // class pass_dse |
1637 | |
1638 | unsigned int |
1639 | pass_dse::execute (function *fun) |
1640 | { |
1641 | unsigned todo = 0; |
1642 | bool released_def = false; |
1643 | |
1644 | need_eh_cleanup = BITMAP_ALLOC (NULL); |
1645 | need_ab_cleanup = BITMAP_ALLOC (NULL); |
1646 | auto_sbitmap live_bytes (param_dse_max_object_size); |
1647 | if (flag_expensive_optimizations && use_dr_analysis_p) |
1648 | dse_stmt_to_dr_map = new hash_map<gimple *, data_reference_p>; |
1649 | |
1650 | renumber_gimple_stmt_uids (fun); |
1651 | |
1652 | calculate_dominance_info (CDI_DOMINATORS); |
1653 | |
1654 | /* Dead store elimination is fundamentally a reverse program order walk. */ |
1655 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS); |
1656 | int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); |
1657 | for (int i = n; i != 0; --i) |
1658 | { |
1659 | basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i-1]); |
1660 | gimple_stmt_iterator gsi; |
1661 | |
1662 | for (gsi = gsi_last_bb (bb); !gsi_end_p (i: gsi);) |
1663 | { |
1664 | gimple *stmt = gsi_stmt (i: gsi); |
1665 | |
1666 | if (gimple_vdef (g: stmt)) |
1667 | dse_optimize_stmt (fun, gsi: &gsi, live_bytes); |
1668 | else if (def_operand_p |
1669 | def_p = single_ssa_def_operand (stmt, SSA_OP_DEF)) |
1670 | { |
1671 | /* When we remove dead stores make sure to also delete trivially |
1672 | dead SSA defs. */ |
1673 | if (has_zero_uses (DEF_FROM_PTR (def_p)) |
1674 | && !gimple_has_side_effects (stmt) |
1675 | && !is_ctrl_altering_stmt (stmt) |
1676 | && (!stmt_could_throw_p (fun, stmt) |
1677 | || fun->can_delete_dead_exceptions)) |
1678 | { |
1679 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1680 | { |
1681 | fprintf (stream: dump_file, format: " Deleted trivially dead stmt: " ); |
1682 | print_gimple_stmt (dump_file, stmt, 0, dump_flags); |
1683 | fprintf (stream: dump_file, format: "\n" ); |
1684 | } |
1685 | if (gsi_remove (&gsi, true) && need_eh_cleanup) |
1686 | bitmap_set_bit (need_eh_cleanup, bb->index); |
1687 | release_defs (stmt); |
1688 | released_def = true; |
1689 | } |
1690 | } |
1691 | if (gsi_end_p (i: gsi)) |
1692 | gsi = gsi_last_bb (bb); |
1693 | else |
1694 | gsi_prev (i: &gsi); |
1695 | } |
1696 | bool removed_phi = false; |
1697 | for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);) |
1698 | { |
1699 | gphi *phi = si.phi (); |
1700 | if (has_zero_uses (var: gimple_phi_result (gs: phi))) |
1701 | { |
1702 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1703 | { |
1704 | fprintf (stream: dump_file, format: " Deleted trivially dead PHI: " ); |
1705 | print_gimple_stmt (dump_file, phi, 0, dump_flags); |
1706 | fprintf (stream: dump_file, format: "\n" ); |
1707 | } |
1708 | remove_phi_node (&si, true); |
1709 | removed_phi = true; |
1710 | released_def = true; |
1711 | } |
1712 | else |
1713 | gsi_next (i: &si); |
1714 | } |
1715 | if (removed_phi && gimple_seq_empty_p (s: phi_nodes (bb))) |
1716 | todo |= TODO_cleanup_cfg; |
1717 | } |
1718 | free (ptr: rpo); |
1719 | |
1720 | /* Removal of stores may make some EH edges dead. Purge such edges from |
1721 | the CFG as needed. */ |
1722 | if (!bitmap_empty_p (map: need_eh_cleanup)) |
1723 | { |
1724 | gimple_purge_all_dead_eh_edges (need_eh_cleanup); |
1725 | todo |= TODO_cleanup_cfg; |
1726 | } |
1727 | if (!bitmap_empty_p (map: need_ab_cleanup)) |
1728 | { |
1729 | gimple_purge_all_dead_abnormal_call_edges (need_ab_cleanup); |
1730 | todo |= TODO_cleanup_cfg; |
1731 | } |
1732 | |
1733 | BITMAP_FREE (need_eh_cleanup); |
1734 | BITMAP_FREE (need_ab_cleanup); |
1735 | |
1736 | if (released_def) |
1737 | free_numbers_of_iterations_estimates (fun); |
1738 | |
1739 | if (flag_expensive_optimizations && use_dr_analysis_p) |
1740 | { |
1741 | for (auto i = dse_stmt_to_dr_map->begin (); |
1742 | i != dse_stmt_to_dr_map->end (); ++i) |
1743 | free_data_ref ((*i).second); |
1744 | delete dse_stmt_to_dr_map; |
1745 | dse_stmt_to_dr_map = NULL; |
1746 | } |
1747 | |
1748 | return todo; |
1749 | } |
1750 | |
1751 | } // anon namespace |
1752 | |
1753 | gimple_opt_pass * |
1754 | make_pass_dse (gcc::context *ctxt) |
1755 | { |
1756 | return new pass_dse (ctxt); |
1757 | } |
1758 | |