1 | /* RTL-based forward propagation pass for GNU compiler. |
2 | Copyright (C) 2005-2023 Free Software Foundation, Inc. |
3 | Contributed by Paolo Bonzini and Steven Bosscher. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #define INCLUDE_ALGORITHM |
22 | #define INCLUDE_FUNCTIONAL |
23 | #include "config.h" |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "backend.h" |
27 | #include "rtl.h" |
28 | #include "rtlanal.h" |
29 | #include "df.h" |
30 | #include "rtl-ssa.h" |
31 | |
32 | #include "predict.h" |
33 | #include "cfgrtl.h" |
34 | #include "cfgcleanup.h" |
35 | #include "cfgloop.h" |
36 | #include "tree-pass.h" |
37 | #include "rtl-iter.h" |
38 | #include "target.h" |
39 | |
40 | /* This pass does simple forward propagation and simplification when an |
41 | operand of an insn can only come from a single def. This pass uses |
42 | RTL SSA, so it is global. However, we only do limited analysis of |
43 | available expressions. |
44 | |
45 | 1) The pass tries to propagate the source of the def into the use, |
46 | and checks if the result is independent of the substituted value. |
47 | For example, the high word of a (zero_extend:DI (reg:SI M)) is always |
48 | zero, independent of the source register. |
49 | |
50 | In particular, we propagate constants into the use site. Sometimes |
51 | RTL expansion did not put the constant in the same insn on purpose, |
52 | to satisfy a predicate, and the result will fail to be recognized; |
53 | but this happens rarely and in this case we can still create a |
54 | REG_EQUAL note. For multi-word operations, this |
55 | |
56 | (set (subreg:SI (reg:DI 120) 0) (const_int 0)) |
57 | (set (subreg:SI (reg:DI 120) 4) (const_int -1)) |
58 | (set (subreg:SI (reg:DI 122) 0) |
59 | (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) |
60 | (set (subreg:SI (reg:DI 122) 4) |
61 | (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) |
62 | |
63 | can be simplified to the much simpler |
64 | |
65 | (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119))) |
66 | (set (subreg:SI (reg:DI 122) 4) (const_int -1)) |
67 | |
68 | This particular propagation is also effective at putting together |
69 | complex addressing modes. We are more aggressive inside MEMs, in |
70 | that all definitions are propagated if the use is in a MEM; if the |
71 | result is a valid memory address we check address_cost to decide |
72 | whether the substitution is worthwhile. |
73 | |
74 | 2) The pass propagates register copies. This is not as effective as |
75 | the copy propagation done by CSE's canon_reg, which works by walking |
76 | the instruction chain, it can help the other transformations. |
77 | |
78 | We should consider removing this optimization, and instead reorder the |
79 | RTL passes, because GCSE does this transformation too. With some luck, |
80 | the CSE pass at the end of rest_of_handle_gcse could also go away. |
81 | |
82 | 3) The pass looks for paradoxical subregs that are actually unnecessary. |
83 | Things like this: |
84 | |
85 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) |
86 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) |
87 | (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0) |
88 | (subreg:SI (reg:QI 121) 0))) |
89 | |
90 | are very common on machines that can only do word-sized operations. |
91 | For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0), |
92 | if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0), |
93 | we can replace the paradoxical subreg with simply (reg:WIDE M). The |
94 | above will simplify this to |
95 | |
96 | (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) |
97 | (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) |
98 | (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) |
99 | |
100 | where the first two insns are now dead. */ |
101 | |
102 | using namespace rtl_ssa; |
103 | |
104 | static int num_changes; |
105 | |
106 | /* Do not try to replace constant addresses or addresses of local and |
107 | argument slots. These MEM expressions are made only once and inserted |
108 | in many instructions, as well as being used to control symbol table |
109 | output. It is not safe to clobber them. |
110 | |
111 | There are some uncommon cases where the address is already in a register |
112 | for some reason, but we cannot take advantage of that because we have |
113 | no easy way to unshare the MEM. In addition, looking up all stack |
114 | addresses is costly. */ |
115 | |
116 | static bool |
117 | can_simplify_addr (rtx addr) |
118 | { |
119 | rtx reg; |
120 | |
121 | if (CONSTANT_ADDRESS_P (addr)) |
122 | return false; |
123 | |
124 | if (GET_CODE (addr) == PLUS) |
125 | reg = XEXP (addr, 0); |
126 | else |
127 | reg = addr; |
128 | |
129 | return (!REG_P (reg) |
130 | || (REGNO (reg) != FRAME_POINTER_REGNUM |
131 | && REGNO (reg) != HARD_FRAME_POINTER_REGNUM |
132 | && REGNO (reg) != ARG_POINTER_REGNUM)); |
133 | } |
134 | |
135 | /* MEM is the result of an address simplification, and temporarily |
136 | undoing changes OLD_NUM_CHANGES onwards restores the original address. |
137 | Return whether it is good to use the new address instead of the |
138 | old one. INSN is the containing instruction. */ |
139 | |
140 | static bool |
141 | should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn) |
142 | { |
143 | int gain; |
144 | |
145 | /* Prefer the new address if it is less expensive. */ |
146 | bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); |
147 | temporarily_undo_changes (old_num_changes); |
148 | gain = address_cost (XEXP (mem, 0), GET_MODE (mem), |
149 | MEM_ADDR_SPACE (mem), speed); |
150 | redo_changes (old_num_changes); |
151 | gain -= address_cost (XEXP (mem, 0), GET_MODE (mem), |
152 | MEM_ADDR_SPACE (mem), speed); |
153 | |
154 | /* If the addresses have equivalent cost, prefer the new address |
155 | if it has the highest `set_src_cost'. That has the potential of |
156 | eliminating the most insns without additional costs, and it |
157 | is the same that cse.cc used to do. */ |
158 | if (gain == 0) |
159 | { |
160 | gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed); |
161 | temporarily_undo_changes (old_num_changes); |
162 | gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed); |
163 | redo_changes (old_num_changes); |
164 | } |
165 | |
166 | return (gain > 0); |
167 | } |
168 | |
169 | |
170 | namespace |
171 | { |
172 | class fwprop_propagation : public insn_propagation |
173 | { |
174 | public: |
175 | static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT; |
176 | static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1; |
177 | static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2; |
178 | |
179 | fwprop_propagation (insn_info *, set_info *, rtx, rtx); |
180 | |
181 | bool changed_mem_p () const { return result_flags & CHANGED_MEM; } |
182 | bool folded_to_constants_p () const; |
183 | bool profitable_p () const; |
184 | |
185 | bool check_mem (int, rtx) final override; |
186 | void note_simplification (int, uint16_t, rtx, rtx) final override; |
187 | uint16_t classify_result (rtx, rtx); |
188 | |
189 | private: |
190 | const bool single_use_p; |
191 | const bool single_ebb_p; |
192 | }; |
193 | } |
194 | |
195 | /* Prepare to replace FROM with TO in USE_INSN. */ |
196 | |
197 | fwprop_propagation::fwprop_propagation (insn_info *use_insn, |
198 | set_info *def, rtx from, rtx to) |
199 | : insn_propagation (use_insn->rtl (), from, to), |
200 | single_use_p (def->single_nondebug_use ()), |
201 | single_ebb_p (use_insn->ebb () == def->ebb ()) |
202 | { |
203 | should_check_mems = true; |
204 | should_note_simplifications = true; |
205 | } |
206 | |
207 | /* MEM is the result of an address simplification, and temporarily |
208 | undoing changes OLD_NUM_CHANGES onwards restores the original address. |
209 | Return true if the propagation should continue, false if it has failed. */ |
210 | |
211 | bool |
212 | fwprop_propagation::check_mem (int old_num_changes, rtx mem) |
213 | { |
214 | if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0), |
215 | MEM_ADDR_SPACE (mem))) |
216 | { |
217 | failure_reason = "would create an invalid MEM" ; |
218 | return false; |
219 | } |
220 | |
221 | temporarily_undo_changes (old_num_changes); |
222 | bool can_simplify = can_simplify_addr (XEXP (mem, 0)); |
223 | redo_changes (old_num_changes); |
224 | if (!can_simplify) |
225 | { |
226 | failure_reason = "would replace a frame address" ; |
227 | return false; |
228 | } |
229 | |
230 | /* Copy propagations are always ok. Otherwise check the costs. */ |
231 | if (!(REG_P (from) && REG_P (to)) |
232 | && !should_replace_address (old_num_changes, mem, insn)) |
233 | { |
234 | failure_reason = "would increase the cost of a MEM" ; |
235 | return false; |
236 | } |
237 | |
238 | result_flags |= CHANGED_MEM; |
239 | return true; |
240 | } |
241 | |
242 | /* OLDX has been simplified to NEWX. Describe the change in terms of |
243 | result_flags. */ |
244 | |
245 | uint16_t |
246 | fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx) |
247 | { |
248 | if (CONSTANT_P (new_rtx)) |
249 | { |
250 | /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason, |
251 | and NEW_RTX is likely not a legitimate address. We want it to |
252 | disappear if it is invalid. |
253 | |
254 | ??? Using the mode of the LO_SUM as the mode of the address |
255 | seems odd, but it was what the pre-SSA code did. */ |
256 | if (GET_CODE (old_rtx) == LO_SUM |
257 | && !memory_address_p (GET_MODE (old_rtx), new_rtx)) |
258 | return CONSTANT; |
259 | return CONSTANT | PROFITABLE; |
260 | } |
261 | |
262 | /* Allow replacements that simplify operations on a vector or complex |
263 | value to a component. The most prominent case is |
264 | (subreg ([vec_]concat ...)). */ |
265 | if (REG_P (new_rtx) |
266 | && !HARD_REGISTER_P (new_rtx) |
267 | && (VECTOR_MODE_P (GET_MODE (from)) |
268 | || COMPLEX_MODE_P (GET_MODE (from))) |
269 | && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from))) |
270 | return PROFITABLE; |
271 | |
272 | /* Allow (subreg (mem)) -> (mem) simplifications with the following |
273 | exceptions: |
274 | 1) Propagating (mem)s into multiple uses is not profitable. |
275 | 2) Propagating (mem)s across EBBs may not be profitable if the source EBB |
276 | runs less frequently. |
277 | 3) Propagating (mem)s into paradoxical (subreg)s is not profitable. |
278 | 4) Creating new (mem/v)s is not correct, since DCE will not remove the old |
279 | ones. */ |
280 | if (single_use_p |
281 | && single_ebb_p |
282 | && SUBREG_P (old_rtx) |
283 | && !paradoxical_subreg_p (x: old_rtx) |
284 | && MEM_P (new_rtx) |
285 | && !MEM_VOLATILE_P (new_rtx)) |
286 | return PROFITABLE; |
287 | |
288 | return 0; |
289 | } |
290 | |
291 | /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES |
292 | is the number of unrelated changes that had been made before processing |
293 | OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags |
294 | had at that point. */ |
295 | |
296 | void |
297 | fwprop_propagation::note_simplification (int old_num_changes, |
298 | uint16_t old_result_flags, |
299 | rtx old_rtx, rtx new_rtx) |
300 | { |
301 | result_flags &= ~(CONSTANT | PROFITABLE); |
302 | uint16_t new_flags = classify_result (old_rtx, new_rtx); |
303 | if (old_num_changes) |
304 | new_flags &= old_result_flags; |
305 | result_flags |= new_flags; |
306 | } |
307 | |
308 | /* Return true if all substitutions eventually folded to constants. */ |
309 | |
310 | bool |
311 | fwprop_propagation::folded_to_constants_p () const |
312 | { |
313 | /* If we're propagating a HIGH, require it to be folded with a |
314 | partnering LO_SUM. For example, a REG_EQUAL note with a register |
315 | replaced by an unfolded HIGH is not useful. */ |
316 | if (CONSTANT_P (to) && GET_CODE (to) != HIGH) |
317 | return true; |
318 | return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT); |
319 | } |
320 | |
321 | |
322 | /* Return true if it is worth keeping the result of the propagation, |
323 | false if it would increase the complexity of the pattern too much. */ |
324 | |
325 | bool |
326 | fwprop_propagation::profitable_p () const |
327 | { |
328 | if (changed_mem_p ()) |
329 | return true; |
330 | |
331 | if (!(result_flags & UNSIMPLIFIED) |
332 | && (result_flags & PROFITABLE)) |
333 | return true; |
334 | |
335 | if (REG_P (to)) |
336 | return true; |
337 | |
338 | if (GET_CODE (to) == SUBREG |
339 | && REG_P (SUBREG_REG (to)) |
340 | && !paradoxical_subreg_p (x: to)) |
341 | return true; |
342 | |
343 | if (CONSTANT_P (to)) |
344 | return true; |
345 | |
346 | return false; |
347 | } |
348 | |
349 | /* Check that X has a single def. */ |
350 | |
351 | static bool |
352 | reg_single_def_p (rtx x) |
353 | { |
354 | return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x)); |
355 | } |
356 | |
357 | /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of |
358 | USE_INSN. Return the number of substitutions on success, otherwise return |
359 | -1 and leave USE_INSN unchanged. |
360 | |
361 | If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC |
362 | to fold to a constant, so that the note does not use any more registers |
363 | than it did previously. If REQUIRE_CONSTANT is false, also allow the |
364 | substitution if it's something we'd normally allow for the main |
365 | instruction pattern. */ |
366 | |
367 | static int |
368 | try_fwprop_subst_note (insn_info *use_insn, set_info *def, |
369 | rtx note, rtx dest, rtx src, bool require_constant) |
370 | { |
371 | rtx_insn *use_rtl = use_insn->rtl (); |
372 | insn_info *def_insn = def->insn (); |
373 | |
374 | insn_change_watermark watermark; |
375 | fwprop_propagation prop (use_insn, def, dest, src); |
376 | if (!prop.apply_to_rvalue (&XEXP (note, 0))) |
377 | { |
378 | if (dump_file && (dump_flags & TDF_DETAILS)) |
379 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
380 | " notes of insn %d: %s\n" , def_insn->uid (), |
381 | use_insn->uid (), prop.failure_reason); |
382 | return -1; |
383 | } |
384 | |
385 | if (prop.num_replacements == 0) |
386 | return 0; |
387 | |
388 | if (require_constant) |
389 | { |
390 | if (!prop.folded_to_constants_p ()) |
391 | { |
392 | if (dump_file && (dump_flags & TDF_DETAILS)) |
393 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
394 | " notes of insn %d: %s\n" , def_insn->uid (), |
395 | use_insn->uid (), "wouldn't fold to constants" ); |
396 | return -1; |
397 | } |
398 | } |
399 | else |
400 | { |
401 | if (!prop.folded_to_constants_p () && !prop.profitable_p ()) |
402 | { |
403 | if (dump_file && (dump_flags & TDF_DETAILS)) |
404 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
405 | " notes of insn %d: %s\n" , def_insn->uid (), |
406 | use_insn->uid (), "would increase complexity of node" ); |
407 | return -1; |
408 | } |
409 | } |
410 | |
411 | if (dump_file && (dump_flags & TDF_DETAILS)) |
412 | { |
413 | fprintf (stream: dump_file, format: "\nin notes of insn %d, replacing:\n " , |
414 | INSN_UID (insn: use_rtl)); |
415 | temporarily_undo_changes (0); |
416 | print_inline_rtx (dump_file, note, 2); |
417 | redo_changes (0); |
418 | fprintf (stream: dump_file, format: "\n with:\n " ); |
419 | print_inline_rtx (dump_file, note, 2); |
420 | fprintf (stream: dump_file, format: "\n" ); |
421 | } |
422 | watermark.keep (); |
423 | return prop.num_replacements; |
424 | } |
425 | |
426 | /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of |
427 | USE_INSN's pattern. Return true on success, otherwise leave USE_INSN |
428 | unchanged. */ |
429 | |
430 | static bool |
431 | try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, |
432 | set_info *def, rtx *loc, rtx dest, rtx src) |
433 | { |
434 | insn_info *use_insn = use_change.insn (); |
435 | rtx_insn *use_rtl = use_insn->rtl (); |
436 | insn_info *def_insn = def->insn (); |
437 | |
438 | insn_change_watermark watermark; |
439 | fwprop_propagation prop (use_insn, def, dest, src); |
440 | if (!prop.apply_to_pattern (loc)) |
441 | { |
442 | if (dump_file && (dump_flags & TDF_DETAILS)) |
443 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
444 | " insn %d: %s\n" , def_insn->uid (), use_insn->uid (), |
445 | prop.failure_reason); |
446 | return false; |
447 | } |
448 | |
449 | if (prop.num_replacements == 0) |
450 | return false; |
451 | |
452 | if (!prop.profitable_p ()) |
453 | { |
454 | if (dump_file && (dump_flags & TDF_DETAILS)) |
455 | fprintf (stream: dump_file, format: "cannot propagate from insn %d into" |
456 | " insn %d: %s\n" , def_insn->uid (), use_insn->uid (), |
457 | "would increase complexity of pattern" ); |
458 | return false; |
459 | } |
460 | |
461 | if (dump_file && (dump_flags & TDF_DETAILS)) |
462 | { |
463 | fprintf (stream: dump_file, format: "\npropagating insn %d into insn %d, replacing:\n" , |
464 | def_insn->uid (), use_insn->uid ()); |
465 | temporarily_undo_changes (0); |
466 | print_rtl_single (dump_file, PATTERN (insn: use_rtl)); |
467 | redo_changes (0); |
468 | } |
469 | |
470 | /* ??? In theory, it should be better to use insn costs rather than |
471 | set_src_costs here. That would involve replacing this code with |
472 | change_is_worthwhile. */ |
473 | bool ok = recog (watermark&: attempt, change&: use_change); |
474 | if (ok && !prop.changed_mem_p () && !use_insn->is_asm ()) |
475 | if (rtx use_set = single_set (insn: use_rtl)) |
476 | { |
477 | bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn: use_rtl)); |
478 | temporarily_undo_changes (0); |
479 | auto old_cost = set_src_cost (SET_SRC (use_set), |
480 | GET_MODE (SET_DEST (use_set)), speed_p: speed); |
481 | redo_changes (0); |
482 | auto new_cost = set_src_cost (SET_SRC (use_set), |
483 | GET_MODE (SET_DEST (use_set)), speed_p: speed); |
484 | if (new_cost > old_cost) |
485 | { |
486 | if (dump_file) |
487 | fprintf (stream: dump_file, format: "change not profitable" |
488 | " (cost %d -> cost %d)\n" , old_cost, new_cost); |
489 | ok = false; |
490 | } |
491 | } |
492 | |
493 | if (!ok) |
494 | { |
495 | /* The pattern didn't match, but if all uses of SRC folded to |
496 | constants, we can add a REG_EQUAL note for the result, if there |
497 | isn't one already. */ |
498 | if (!prop.folded_to_constants_p ()) |
499 | return false; |
500 | |
501 | /* Test this first to avoid creating an unnecessary copy of SRC. */ |
502 | if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX)) |
503 | return false; |
504 | |
505 | rtx set = set_for_reg_notes (use_rtl); |
506 | if (!set || !REG_P (SET_DEST (set))) |
507 | return false; |
508 | |
509 | rtx value = copy_rtx (SET_SRC (set)); |
510 | cancel_changes (0); |
511 | |
512 | /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note, |
513 | because the bits in there can be anything and so might not |
514 | match the REG_EQUAL note content. See PR70574. */ |
515 | if (contains_paradoxical_subreg_p (SET_SRC (set))) |
516 | return false; |
517 | |
518 | if (dump_file && (dump_flags & TDF_DETAILS)) |
519 | fprintf (stream: dump_file, format: " Setting REG_EQUAL note\n" ); |
520 | |
521 | return set_unique_reg_note (use_rtl, REG_EQUAL, value); |
522 | } |
523 | |
524 | rtx *note_ptr = ®_NOTES (use_rtl); |
525 | while (rtx note = *note_ptr) |
526 | { |
527 | if ((REG_NOTE_KIND (note) == REG_EQUAL |
528 | || REG_NOTE_KIND (note) == REG_EQUIV) |
529 | && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: false) < 0) |
530 | { |
531 | *note_ptr = XEXP (note, 1); |
532 | free_EXPR_LIST_node (note); |
533 | } |
534 | else |
535 | note_ptr = &XEXP (note, 1); |
536 | } |
537 | |
538 | confirm_change_group (); |
539 | crtl->ssa->change_insn (change&: use_change); |
540 | num_changes++; |
541 | return true; |
542 | } |
543 | |
544 | /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes, |
545 | given that it was not possible to do this for USE_INSN's main pattern. |
546 | Return true on success, otherwise leave USE_INSN unchanged. */ |
547 | |
548 | static bool |
549 | try_fwprop_subst_notes (insn_info *use_insn, set_info *def, |
550 | rtx dest, rtx src) |
551 | { |
552 | rtx_insn *use_rtl = use_insn->rtl (); |
553 | for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1)) |
554 | if ((REG_NOTE_KIND (note) == REG_EQUAL |
555 | || REG_NOTE_KIND (note) == REG_EQUIV) |
556 | && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: true) > 0) |
557 | { |
558 | confirm_change_group (); |
559 | return true; |
560 | } |
561 | |
562 | return false; |
563 | } |
564 | |
565 | /* Check whether we could validly substitute (set DEST SRC), which defines DEF, |
566 | into USE. If so, first try performing the substitution in location LOC |
567 | of USE->insn ()'s pattern. If that fails, try instead to substitute |
568 | into the notes. |
569 | |
570 | Return true on success, otherwise leave USE_INSN unchanged. */ |
571 | |
572 | static bool |
573 | try_fwprop_subst (use_info *use, set_info *def, |
574 | rtx *loc, rtx dest, rtx src) |
575 | { |
576 | insn_info *use_insn = use->insn (); |
577 | insn_info *def_insn = def->insn (); |
578 | |
579 | auto attempt = crtl->ssa->new_change_attempt (); |
580 | use_array src_uses = remove_note_accesses (watermark&: attempt, accesses: def_insn->uses ()); |
581 | |
582 | /* ??? Not really a meaningful test: it means we can propagate arithmetic |
583 | involving hard registers but not bare references to them. A better |
584 | test would be to iterate over src_uses looking for hard registers |
585 | that are not fixed. */ |
586 | if (REG_P (src) && HARD_REGISTER_P (src)) |
587 | return false; |
588 | |
589 | /* ??? It would be better to make this EBB-based instead. That would |
590 | involve checking for equal EBBs rather than equal BBs and trying |
591 | to make the uses available at use_insn->ebb ()->first_bb (). */ |
592 | if (def_insn->bb () != use_insn->bb ()) |
593 | { |
594 | src_uses = crtl->ssa->make_uses_available (watermark&: attempt, uses: src_uses, |
595 | bb: use_insn->bb (), |
596 | will_be_debug_uses: use_insn->is_debug_insn ()); |
597 | if (!src_uses.is_valid ()) |
598 | return false; |
599 | } |
600 | |
601 | insn_change use_change (use_insn); |
602 | use_change.new_uses = merge_access_arrays (watermark&: attempt, accesses1: use_change.new_uses, |
603 | accesses2: src_uses); |
604 | if (!use_change.new_uses.is_valid ()) |
605 | return false; |
606 | |
607 | /* ??? We could allow movement within the EBB by adding: |
608 | |
609 | use_change.move_range = use_insn->ebb ()->insn_range (); */ |
610 | if (!restrict_movement (change&: use_change)) |
611 | return false; |
612 | |
613 | return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src) |
614 | || try_fwprop_subst_notes (use_insn, def, dest, src)); |
615 | } |
616 | |
617 | /* For the given single_set INSN, containing SRC known to be a |
618 | ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN |
619 | is redundant due to the register being set by a LOAD_EXTEND_OP |
620 | load from memory. */ |
621 | |
622 | static bool |
623 | free_load_extend (rtx src, insn_info *insn) |
624 | { |
625 | rtx reg = XEXP (src, 0); |
626 | if (load_extend_op (GET_MODE (reg)) != GET_CODE (src)) |
627 | return false; |
628 | |
629 | def_info *def = nullptr; |
630 | for (use_info *use : insn->uses ()) |
631 | if (use->regno () == REGNO (reg)) |
632 | { |
633 | def = use->def (); |
634 | break; |
635 | } |
636 | |
637 | if (!def) |
638 | return false; |
639 | |
640 | insn_info *def_insn = def->insn (); |
641 | if (def_insn->is_artificial ()) |
642 | return false; |
643 | |
644 | rtx_insn *def_rtl = def_insn->rtl (); |
645 | if (NONJUMP_INSN_P (def_rtl)) |
646 | { |
647 | rtx patt = PATTERN (insn: def_rtl); |
648 | |
649 | if (GET_CODE (patt) == SET |
650 | && GET_CODE (SET_SRC (patt)) == MEM |
651 | && rtx_equal_p (SET_DEST (patt), reg)) |
652 | return true; |
653 | } |
654 | return false; |
655 | } |
656 | |
657 | /* Subroutine of forward_propagate_subreg that handles a use of DEST |
658 | in REF. The other parameters are the same. */ |
659 | |
660 | static bool |
661 | forward_propagate_subreg (use_info *use, set_info *def, |
662 | rtx dest, rtx src, df_ref ref) |
663 | { |
664 | scalar_int_mode int_use_mode, src_mode; |
665 | |
666 | /* Only consider subregs... */ |
667 | rtx use_reg = DF_REF_REG (ref); |
668 | machine_mode use_mode = GET_MODE (use_reg); |
669 | if (GET_CODE (use_reg) != SUBREG |
670 | || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest)) |
671 | return false; |
672 | |
673 | /* ??? Replacing throughout the pattern would help for match_dups. */ |
674 | rtx *loc = DF_REF_LOC (ref); |
675 | if (paradoxical_subreg_p (x: use_reg)) |
676 | { |
677 | /* If this is a paradoxical SUBREG, we have no idea what value the |
678 | extra bits would have. However, if the operand is equivalent to |
679 | a SUBREG whose operand is the same as our mode, and all the modes |
680 | are within a word, we can just use the inner operand because |
681 | these SUBREGs just say how to treat the register. */ |
682 | if (GET_CODE (src) == SUBREG |
683 | && REG_P (SUBREG_REG (src)) |
684 | && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER |
685 | && GET_MODE (SUBREG_REG (src)) == use_mode |
686 | && subreg_lowpart_p (src)) |
687 | return try_fwprop_subst (use, def, loc, dest: use_reg, SUBREG_REG (src)); |
688 | } |
689 | |
690 | /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG |
691 | is the low part of the reg being extended then just use the inner |
692 | operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will |
693 | be removed due to it matching a LOAD_EXTEND_OP load from memory, |
694 | or due to the operation being a no-op when applied to registers. |
695 | For example, if we have: |
696 | |
697 | A: (set (reg:DI X) (sign_extend:DI (reg:SI Y))) |
698 | B: (... (subreg:SI (reg:DI X)) ...) |
699 | |
700 | and mode_rep_extended says that Y is already sign-extended, |
701 | the backend will typically allow A to be combined with the |
702 | definition of Y or, failing that, allow A to be deleted after |
703 | reload through register tying. Introducing more uses of Y |
704 | prevents both optimisations. */ |
705 | else if (is_a <scalar_int_mode> (m: use_mode, result: &int_use_mode) |
706 | && subreg_lowpart_p (use_reg)) |
707 | { |
708 | if ((GET_CODE (src) == ZERO_EXTEND |
709 | || GET_CODE (src) == SIGN_EXTEND) |
710 | && is_a <scalar_int_mode> (GET_MODE (src), result: &src_mode) |
711 | && REG_P (XEXP (src, 0)) |
712 | && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER |
713 | && GET_MODE (XEXP (src, 0)) == use_mode |
714 | && !free_load_extend (src, insn: def->insn ()) |
715 | && (targetm.mode_rep_extended (int_use_mode, src_mode) |
716 | != (int) GET_CODE (src))) |
717 | return try_fwprop_subst (use, def, loc, dest: use_reg, XEXP (src, 0)); |
718 | } |
719 | |
720 | return false; |
721 | } |
722 | |
723 | /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify |
724 | the result, handling cases where DEST is used in a subreg and where |
725 | applying that subreg to SRC results in a useful simplification. */ |
726 | |
727 | static bool |
728 | forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src) |
729 | { |
730 | if (!use->includes_subregs () || !REG_P (dest)) |
731 | return false; |
732 | |
733 | if (GET_CODE (src) != SUBREG |
734 | && GET_CODE (src) != ZERO_EXTEND |
735 | && GET_CODE (src) != SIGN_EXTEND) |
736 | return false; |
737 | |
738 | rtx_insn *use_rtl = use->insn ()->rtl (); |
739 | df_ref ref; |
740 | |
741 | FOR_EACH_INSN_USE (ref, use_rtl) |
742 | if (DF_REF_REGNO (ref) == use->regno () |
743 | && forward_propagate_subreg (use, def, dest, src, ref)) |
744 | return true; |
745 | |
746 | FOR_EACH_INSN_EQ_USE (ref, use_rtl) |
747 | if (DF_REF_REGNO (ref) == use->regno () |
748 | && forward_propagate_subreg (use, def, dest, src, ref)) |
749 | return true; |
750 | |
751 | return false; |
752 | } |
753 | |
754 | /* Try to substitute (set DEST SRC), which defines DEF, into USE and |
755 | simplify the result. */ |
756 | |
757 | static bool |
758 | forward_propagate_and_simplify (use_info *use, set_info *def, |
759 | rtx dest, rtx src) |
760 | { |
761 | insn_info *use_insn = use->insn (); |
762 | rtx_insn *use_rtl = use_insn->rtl (); |
763 | insn_info *def_insn = def->insn (); |
764 | |
765 | /* ??? This check seems unnecessary. We should be able to propagate |
766 | into any kind of instruction, regardless of whether it's a single set. |
767 | It seems odd to be more permissive with asms than normal instructions. */ |
768 | bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ()); |
769 | rtx use_set = single_set (insn: use_rtl); |
770 | if (need_single_set && !use_set) |
771 | return false; |
772 | |
773 | /* Do not propagate into PC etc. |
774 | |
775 | ??? This too seems unnecessary. The current code should work correctly |
776 | without it, including cases where jumps become unconditional. */ |
777 | if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode) |
778 | return false; |
779 | |
780 | /* In __asm don't replace if src might need more registers than |
781 | reg, as that could increase register pressure on the __asm. */ |
782 | if (use_insn->is_asm () && def_insn->uses ().size () > 1) |
783 | return false; |
784 | |
785 | /* Check if the def is loading something from the constant pool; in this |
786 | case we would undo optimization such as compress_float_constant. |
787 | Still, we can set a REG_EQUAL note. */ |
788 | if (MEM_P (src) && MEM_READONLY_P (src)) |
789 | { |
790 | rtx x = avoid_constant_pool_reference (src); |
791 | rtx note_set; |
792 | if (x != src |
793 | && (note_set = set_for_reg_notes (use_rtl)) |
794 | && REG_P (SET_DEST (note_set)) |
795 | && !contains_paradoxical_subreg_p (SET_SRC (note_set))) |
796 | { |
797 | rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX); |
798 | rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set); |
799 | rtx new_rtx = simplify_replace_rtx (old_rtx, src, x); |
800 | if (old_rtx != new_rtx) |
801 | set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx)); |
802 | } |
803 | return false; |
804 | } |
805 | |
806 | /* ??? Unconditionally propagating into PATTERN would work better |
807 | for instructions that have match_dups. */ |
808 | rtx *loc = need_single_set ? &use_set : &PATTERN (insn: use_rtl); |
809 | return try_fwprop_subst (use, def, loc, dest, src); |
810 | } |
811 | |
812 | /* Given a use USE of an insn, if it has a single reaching |
813 | definition, try to forward propagate it into that insn. |
814 | Return true if something changed. |
815 | |
816 | REG_PROP_ONLY is true if we should only propagate register copies. */ |
817 | |
818 | static bool |
819 | forward_propagate_into (use_info *use, bool reg_prop_only = false) |
820 | { |
821 | if (use->includes_read_writes ()) |
822 | return false; |
823 | |
824 | /* Disregard uninitialized uses. */ |
825 | set_info *def = use->def (); |
826 | if (!def) |
827 | return false; |
828 | |
829 | /* Only consider single-register definitions. This could be relaxed, |
830 | but it should rarely be needed before RA. */ |
831 | def = look_through_degenerate_phi (access: def); |
832 | if (def->includes_multiregs ()) |
833 | return false; |
834 | |
835 | /* Only consider uses whose definition comes from a real instruction. */ |
836 | insn_info *def_insn = def->insn (); |
837 | if (def_insn->is_artificial ()) |
838 | return false; |
839 | |
840 | rtx_insn *def_rtl = def_insn->rtl (); |
841 | if (!NONJUMP_INSN_P (def_rtl)) |
842 | return false; |
843 | /* ??? This seems an unnecessary restriction. We can easily tell |
844 | which set the definition comes from. */ |
845 | if (multiple_sets (def_rtl)) |
846 | return false; |
847 | rtx def_set = simple_regno_set (PATTERN (insn: def_rtl), def->regno ()); |
848 | if (!def_set) |
849 | return false; |
850 | |
851 | rtx dest = SET_DEST (def_set); |
852 | rtx src = SET_SRC (def_set); |
853 | |
854 | /* Allow propagations into a loop only for reg-to-reg copies, since |
855 | replacing one register by another shouldn't increase the cost. |
856 | Propagations from inner loop to outer loop should also be ok. */ |
857 | struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father; |
858 | struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father; |
859 | if ((reg_prop_only |
860 | || (def_loop != use_loop |
861 | && !flow_loop_nested_p (use_loop, def_loop))) |
862 | && (!reg_single_def_p (x: dest) || !reg_single_def_p (x: src))) |
863 | return false; |
864 | |
865 | /* Don't substitute into a non-local goto, this confuses CFG. */ |
866 | insn_info *use_insn = use->insn (); |
867 | rtx_insn *use_rtl = use_insn->rtl (); |
868 | if (JUMP_P (use_rtl) |
869 | && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX)) |
870 | return false; |
871 | |
872 | if (forward_propagate_and_simplify (use, def, dest, src) |
873 | || forward_propagate_subreg (use, def, dest, src)) |
874 | return true; |
875 | |
876 | return false; |
877 | } |
878 | |
879 | static void |
880 | fwprop_init (void) |
881 | { |
882 | num_changes = 0; |
883 | calculate_dominance_info (CDI_DOMINATORS); |
884 | |
885 | /* We do not always want to propagate into loops, so we have to find |
886 | loops and be careful about them. Avoid CFG modifications so that |
887 | we don't have to update dominance information afterwards for |
888 | build_single_def_use_links. */ |
889 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
890 | |
891 | df_analyze (); |
892 | crtl->ssa = new rtl_ssa::function_info (cfun); |
893 | } |
894 | |
895 | static void |
896 | fwprop_done (void) |
897 | { |
898 | loop_optimizer_finalize (); |
899 | |
900 | crtl->ssa->perform_pending_updates (); |
901 | free_dominance_info (CDI_DOMINATORS); |
902 | cleanup_cfg (0); |
903 | |
904 | delete crtl->ssa; |
905 | crtl->ssa = nullptr; |
906 | |
907 | delete_trivially_dead_insns (get_insns (), max_reg_num ()); |
908 | |
909 | if (dump_file) |
910 | fprintf (stream: dump_file, |
911 | format: "\nNumber of successful forward propagations: %d\n\n" , |
912 | num_changes); |
913 | } |
914 | |
915 | /* Try to optimize INSN, returning true if something changes. |
916 | FWPROP_ADDR_P is true if we are running fwprop_addr rather than |
917 | the full fwprop. */ |
918 | |
919 | static bool |
920 | fwprop_insn (insn_info *insn, bool fwprop_addr_p) |
921 | { |
922 | for (use_info *use : insn->uses ()) |
923 | { |
924 | if (use->is_mem ()) |
925 | continue; |
926 | /* ??? The choices here follow those in the pre-SSA code. */ |
927 | if (!use->includes_address_uses ()) |
928 | { |
929 | if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p)) |
930 | return true; |
931 | } |
932 | else |
933 | { |
934 | struct loop *loop = insn->bb ()->cfg_bb ()->loop_father; |
935 | /* The outermost loop is not really a loop. */ |
936 | if (loop == NULL || loop_outer (loop) == NULL) |
937 | { |
938 | if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p)) |
939 | return true; |
940 | } |
941 | else if (fwprop_addr_p) |
942 | { |
943 | if (forward_propagate_into (use, reg_prop_only: false)) |
944 | return true; |
945 | } |
946 | } |
947 | } |
948 | return false; |
949 | } |
950 | |
951 | /* Main entry point. */ |
952 | |
953 | static bool |
954 | gate_fwprop (void) |
955 | { |
956 | return optimize > 0 && flag_forward_propagate; |
957 | } |
958 | |
959 | static unsigned int |
960 | fwprop (bool fwprop_addr_p) |
961 | { |
962 | fwprop_init (); |
963 | |
964 | /* Go through all the instructions (including debug instructions) looking |
965 | for uses that we could propagate into. |
966 | |
967 | Do not forward propagate addresses into loops until after unrolling. |
968 | CSE did so because it was able to fix its own mess, but we are not. */ |
969 | |
970 | insn_info *next; |
971 | |
972 | /* ??? This code uses a worklist in order to preserve the behavior |
973 | of the pre-SSA implementation. It would be better to instead |
974 | iterate on each instruction until no more propagations are |
975 | possible, then move on to the next. */ |
976 | auto_vec<insn_info *> worklist; |
977 | for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next) |
978 | { |
979 | next = insn->next_any_insn (); |
980 | if (insn->can_be_optimized () || insn->is_debug_insn ()) |
981 | if (fwprop_insn (insn, fwprop_addr_p)) |
982 | worklist.safe_push (obj: insn); |
983 | } |
984 | for (unsigned int i = 0; i < worklist.length (); ++i) |
985 | { |
986 | insn_info *insn = worklist[i]; |
987 | if (fwprop_insn (insn, fwprop_addr_p)) |
988 | worklist.safe_push (obj: insn); |
989 | } |
990 | |
991 | fwprop_done (); |
992 | return 0; |
993 | } |
994 | |
995 | namespace { |
996 | |
997 | const pass_data pass_data_rtl_fwprop = |
998 | { |
999 | .type: RTL_PASS, /* type */ |
1000 | .name: "fwprop1" , /* name */ |
1001 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1002 | .tv_id: TV_FWPROP, /* tv_id */ |
1003 | .properties_required: 0, /* properties_required */ |
1004 | .properties_provided: 0, /* properties_provided */ |
1005 | .properties_destroyed: 0, /* properties_destroyed */ |
1006 | .todo_flags_start: 0, /* todo_flags_start */ |
1007 | TODO_df_finish, /* todo_flags_finish */ |
1008 | }; |
1009 | |
1010 | class pass_rtl_fwprop : public rtl_opt_pass |
1011 | { |
1012 | public: |
1013 | pass_rtl_fwprop (gcc::context *ctxt) |
1014 | : rtl_opt_pass (pass_data_rtl_fwprop, ctxt) |
1015 | {} |
1016 | |
1017 | /* opt_pass methods: */ |
1018 | bool gate (function *) final override { return gate_fwprop (); } |
1019 | unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: false); } |
1020 | |
1021 | }; // class pass_rtl_fwprop |
1022 | |
1023 | } // anon namespace |
1024 | |
1025 | rtl_opt_pass * |
1026 | make_pass_rtl_fwprop (gcc::context *ctxt) |
1027 | { |
1028 | return new pass_rtl_fwprop (ctxt); |
1029 | } |
1030 | |
1031 | namespace { |
1032 | |
1033 | const pass_data pass_data_rtl_fwprop_addr = |
1034 | { |
1035 | .type: RTL_PASS, /* type */ |
1036 | .name: "fwprop2" , /* name */ |
1037 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1038 | .tv_id: TV_FWPROP, /* tv_id */ |
1039 | .properties_required: 0, /* properties_required */ |
1040 | .properties_provided: 0, /* properties_provided */ |
1041 | .properties_destroyed: 0, /* properties_destroyed */ |
1042 | .todo_flags_start: 0, /* todo_flags_start */ |
1043 | TODO_df_finish, /* todo_flags_finish */ |
1044 | }; |
1045 | |
1046 | class pass_rtl_fwprop_addr : public rtl_opt_pass |
1047 | { |
1048 | public: |
1049 | pass_rtl_fwprop_addr (gcc::context *ctxt) |
1050 | : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt) |
1051 | {} |
1052 | |
1053 | /* opt_pass methods: */ |
1054 | bool gate (function *) final override { return gate_fwprop (); } |
1055 | unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: true); } |
1056 | |
1057 | }; // class pass_rtl_fwprop_addr |
1058 | |
1059 | } // anon namespace |
1060 | |
1061 | rtl_opt_pass * |
1062 | make_pass_rtl_fwprop_addr (gcc::context *ctxt) |
1063 | { |
1064 | return new pass_rtl_fwprop_addr (ctxt); |
1065 | } |
1066 | |