1/* Late RTL pass to fold memory offsets.
2 Copyright (C) 2023-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "rtl.h"
25#include "tree.h"
26#include "expr.h"
27#include "backend.h"
28#include "regs.h"
29#include "target.h"
30#include "memmodel.h"
31#include "emit-rtl.h"
32#include "insn-config.h"
33#include "recog.h"
34#include "predict.h"
35#include "df.h"
36#include "tree-pass.h"
37#include "cfgrtl.h"
38
39/* This pass tries to optimize memory offset calculations by moving constants
40 from add instructions to the memory instructions (loads / stores).
41 For example it can transform code like this:
42
43 add t4, sp, 16
44 add t2, a6, t4
45 shl t3, t2, 1
46 ld a2, 0(t3)
47 add a2, 1
48 sd a2, 8(t2)
49
50 into the following (one instruction less):
51
52 add t2, a6, sp
53 shl t3, t2, 1
54 ld a2, 32(t3)
55 add a2, 1
56 sd a2, 24(t2)
57
58 Although the previous passes try to emit efficient offset calculations
59 this pass is still beneficial because:
60
61 - The mechanisms that optimize memory offsets usually work with specific
62 patterns or have limitations. This pass is designed to fold offsets
63 through complex calculations that affect multiple memory operations
64 and have partially overlapping calculations.
65
66 - There are cases where add instructions are introduced in late rtl passes
67 and the rest of the pipeline cannot eliminate them. Arrays and structs
68 allocated on the stack can result in unwanted add instructions that
69 cannot be eliminated easily.
70
71 This pass works on a basic block level and consists of 4 phases:
72
73 - Phase 1 (Analysis): Find "foldable" instructions.
74 Foldable instructions are those that we know how to propagate
75 a constant addition through (add, shift, move, ...) and only have other
76 foldable instructions for uses. In that phase a DFS traversal on the
77 definition tree is performed and foldable instructions are marked on
78 a bitmap. The add immediate instructions that are reachable in this
79 DFS are candidates for folding since all the intermediate calculations
80 affected by them are also foldable.
81
82 - Phase 2 (Validity): Traverse and calculate the offsets that would result
83 from folding the add immediate instructions. Check whether the
84 calculated offsets result in a valid instruction for the target.
85
86 - Phase 3 (Commit offsets): Traverse again. It is now known which folds
87 are valid so at this point change the offsets in the memory instructions.
88
89 - Phase 4 (Commit instruction deletions): Scan all instructions and delete
90 or simplify (reduce to move) all add immediate instructions that were
91 folded.
92
93 This pass should run before hard register propagation because it creates
94 register moves that we expect to be eliminated. */
95
96namespace {
97
98const pass_data pass_data_fold_mem =
99{
100 .type: RTL_PASS, /* type */
101 .name: "fold_mem_offsets", /* name */
102 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
103 .tv_id: TV_NONE, /* tv_id */
104 .properties_required: 0, /* properties_required */
105 .properties_provided: 0, /* properties_provided */
106 .properties_destroyed: 0, /* properties_destroyed */
107 .todo_flags_start: 0, /* todo_flags_start */
108 TODO_df_finish, /* todo_flags_finish */
109};
110
111class pass_fold_mem_offsets : public rtl_opt_pass
112{
113public:
114 pass_fold_mem_offsets (gcc::context *ctxt)
115 : rtl_opt_pass (pass_data_fold_mem, ctxt)
116 {}
117
118 /* opt_pass methods: */
119 virtual bool gate (function *)
120 {
121 return flag_fold_mem_offsets && optimize >= 2;
122 }
123
124 virtual unsigned int execute (function *);
125}; // class pass_fold_mem_offsets
126
127/* Class that holds in FOLD_INSNS the instructions that if folded the offset
128 of a memory instruction would increase by ADDED_OFFSET. */
129class fold_mem_info {
130public:
131 auto_bitmap fold_insns;
132 HOST_WIDE_INT added_offset;
133};
134
135typedef hash_map<rtx_insn *, fold_mem_info *> fold_info_map;
136
137/* Tracks which instructions can be reached through instructions that can
138 propagate offsets for folding. */
139static bitmap_head can_fold_insns;
140
141/* Marks instructions that are currently eligible for folding. */
142static bitmap_head candidate_fold_insns;
143
144/* Tracks instructions that cannot be folded because it turned out that
145 folding will result in creating an invalid memory instruction.
146 An instruction can be in both CANDIDATE_FOLD_INSNS and CANNOT_FOLD_INSNS
147 at the same time, in which case it is not legal to fold. */
148static bitmap_head cannot_fold_insns;
149
150/* The number of instructions that were simplified or eliminated. */
151static int stats_fold_count;
152
153/* Get the single reaching definition of an instruction inside a BB.
154 The definition is desired for REG used in INSN.
155 Return the definition insn or NULL if there's no definition with
156 the desired criteria. */
157static rtx_insn *
158get_single_def_in_bb (rtx_insn *insn, rtx reg)
159{
160 df_ref use;
161 struct df_link *ref_chain, *ref_link;
162
163 FOR_EACH_INSN_USE (use, insn)
164 {
165 if (GET_CODE (DF_REF_REG (use)) == SUBREG)
166 return NULL;
167 if (REGNO (DF_REF_REG (use)) == REGNO (reg))
168 break;
169 }
170
171 if (!use)
172 return NULL;
173
174 ref_chain = DF_REF_CHAIN (use);
175
176 if (!ref_chain)
177 return NULL;
178
179 for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
180 {
181 /* Problem getting some definition for this instruction. */
182 if (ref_link->ref == NULL)
183 return NULL;
184 if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
185 return NULL;
186 if (global_regs[REGNO (reg)]
187 && !set_of (reg, DF_REF_INSN (ref_link->ref)))
188 return NULL;
189 }
190
191 if (ref_chain->next)
192 return NULL;
193
194 rtx_insn *def = DF_REF_INSN (ref_chain->ref);
195
196 if (BLOCK_FOR_INSN (insn: def) != BLOCK_FOR_INSN (insn))
197 return NULL;
198
199 if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
200 return NULL;
201
202 return def;
203}
204
205/* Get all uses of REG which is set in INSN. Return the use list or NULL if a
206 use is missing / irregular. If SUCCESS is not NULL then set it to false if
207 there are missing / irregular uses and true otherwise. */
208static df_link *
209get_uses (rtx_insn *insn, rtx reg, bool *success)
210{
211 df_ref def;
212
213 if (success)
214 *success = false;
215
216 FOR_EACH_INSN_DEF (def, insn)
217 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
218 break;
219
220 if (!def)
221 return NULL;
222
223 df_link *ref_chain = DF_REF_CHAIN (def);
224 int insn_luid = DF_INSN_LUID (insn);
225 basic_block insn_bb = BLOCK_FOR_INSN (insn);
226
227 for (df_link *ref_link = ref_chain; ref_link; ref_link = ref_link->next)
228 {
229 /* Problem getting a use for this instruction. */
230 if (ref_link->ref == NULL)
231 return NULL;
232 if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
233 return NULL;
234
235 rtx_insn *use = DF_REF_INSN (ref_link->ref);
236 if (DEBUG_INSN_P (use))
237 continue;
238
239 /* We do not handle REG_EQUIV/REG_EQ notes for now. */
240 if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
241 return NULL;
242 if (BLOCK_FOR_INSN (insn: use) != insn_bb)
243 return NULL;
244 /* Punt if use appears before def in the basic block. See PR111601. */
245 if (DF_INSN_LUID (use) < insn_luid)
246 return NULL;
247 }
248
249 if (success)
250 *success = true;
251
252 return ref_chain;
253}
254
255static HOST_WIDE_INT
256fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns);
257
258/* Helper function for fold_offsets.
259
260 If DO_RECURSION is false and ANALYZE is true this function returns true iff
261 it understands the structure of INSN and knows how to propagate constants
262 through it. In this case OFFSET_OUT and FOLDABLE_INSNS are unused.
263
264 If DO_RECURSION is true then it also calls fold_offsets for each recognized
265 part of INSN with the appropriate arguments.
266
267 If DO_RECURSION is true and ANALYZE is false then offset that would result
268 from folding is computed and is returned through the pointer OFFSET_OUT.
269 The instructions that can be folded are recorded in FOLDABLE_INSNS. */
270static bool
271fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion,
272 HOST_WIDE_INT *offset_out, bitmap foldable_insns)
273{
274 /* Doesn't make sense if both DO_RECURSION and ANALYZE are false. */
275 gcc_checking_assert (do_recursion || analyze);
276 gcc_checking_assert (GET_CODE (PATTERN (insn)) == SET);
277
278 rtx src = SET_SRC (PATTERN (insn));
279 HOST_WIDE_INT offset = 0;
280
281 switch (GET_CODE (src))
282 {
283 case PLUS:
284 {
285 /* Propagate through add. */
286 rtx arg1 = XEXP (src, 0);
287 rtx arg2 = XEXP (src, 1);
288
289 if (REG_P (arg1))
290 {
291 if (do_recursion)
292 offset += fold_offsets (insn, reg: arg1, analyze, foldable_insns);
293 }
294 else if (GET_CODE (arg1) == ASHIFT
295 && REG_P (XEXP (arg1, 0))
296 && CONST_INT_P (XEXP (arg1, 1)))
297 {
298 /* Handle R1 = (R2 << C) + ... */
299 if (do_recursion)
300 {
301 HOST_WIDE_INT scale
302 = (HOST_WIDE_INT_1U << INTVAL (XEXP (arg1, 1)));
303 offset += scale * fold_offsets (insn, XEXP (arg1, 0), analyze,
304 foldable_insns);
305 }
306 }
307 else if (GET_CODE (arg1) == PLUS
308 && REG_P (XEXP (arg1, 0))
309 && REG_P (XEXP (arg1, 1)))
310 {
311 /* Handle R1 = (R2 + R3) + ... */
312 if (do_recursion)
313 {
314 offset += fold_offsets (insn, XEXP (arg1, 0), analyze,
315 foldable_insns);
316 offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
317 foldable_insns);
318 }
319 }
320 else if (GET_CODE (arg1) == PLUS
321 && GET_CODE (XEXP (arg1, 0)) == ASHIFT
322 && REG_P (XEXP (XEXP (arg1, 0), 0))
323 && CONST_INT_P (XEXP (XEXP (arg1, 0), 1))
324 && REG_P (XEXP (arg1, 1)))
325 {
326 /* Handle R1 = ((R2 << C) + R3) + ... */
327 if (do_recursion)
328 {
329 HOST_WIDE_INT scale
330 = (HOST_WIDE_INT_1U << INTVAL (XEXP (XEXP (arg1, 0), 1)));
331 offset += scale * fold_offsets (insn, XEXP (XEXP (arg1, 0), 0),
332 analyze, foldable_insns);
333 offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
334 foldable_insns);
335 }
336 }
337 else
338 return false;
339
340 if (REG_P (arg2))
341 {
342 if (do_recursion)
343 offset += fold_offsets (insn, reg: arg2, analyze, foldable_insns);
344 }
345 else if (CONST_INT_P (arg2))
346 {
347 if (REG_P (arg1))
348 {
349 offset += INTVAL (arg2);
350 /* This is a R1 = R2 + C instruction, candidate for folding. */
351 if (!analyze)
352 bitmap_set_bit (foldable_insns, INSN_UID (insn));
353 }
354 }
355 else
356 return false;
357
358 /* Pattern recognized for folding. */
359 break;
360 }
361 case MINUS:
362 {
363 /* Propagate through minus. */
364 rtx arg1 = XEXP (src, 0);
365 rtx arg2 = XEXP (src, 1);
366
367 if (REG_P (arg1))
368 {
369 if (do_recursion)
370 offset += fold_offsets (insn, reg: arg1, analyze, foldable_insns);
371 }
372 else
373 return false;
374
375 if (REG_P (arg2))
376 {
377 if (do_recursion)
378 offset -= fold_offsets (insn, reg: arg2, analyze, foldable_insns);
379 }
380 else if (CONST_INT_P (arg2))
381 {
382 if (REG_P (arg1))
383 {
384 offset -= INTVAL (arg2);
385 /* This is a R1 = R2 - C instruction, candidate for folding. */
386 if (!analyze)
387 bitmap_set_bit (foldable_insns, INSN_UID (insn));
388 }
389 }
390 else
391 return false;
392
393 /* Pattern recognized for folding. */
394 break;
395 }
396 case NEG:
397 {
398 /* Propagate through negation. */
399 rtx arg1 = XEXP (src, 0);
400 if (REG_P (arg1))
401 {
402 if (do_recursion)
403 offset = -fold_offsets (insn, reg: arg1, analyze, foldable_insns);
404 }
405 else
406 return false;
407
408 /* Pattern recognized for folding. */
409 break;
410 }
411 case MULT:
412 {
413 /* Propagate through multiply by constant. */
414 rtx arg1 = XEXP (src, 0);
415 rtx arg2 = XEXP (src, 1);
416
417 if (REG_P (arg1) && CONST_INT_P (arg2))
418 {
419 if (do_recursion)
420 {
421 HOST_WIDE_INT scale = INTVAL (arg2);
422 offset = scale * fold_offsets (insn, reg: arg1, analyze,
423 foldable_insns);
424 }
425 }
426 else
427 return false;
428
429 /* Pattern recognized for folding. */
430 break;
431 }
432 case ASHIFT:
433 {
434 /* Propagate through shift left by constant. */
435 rtx arg1 = XEXP (src, 0);
436 rtx arg2 = XEXP (src, 1);
437
438 if (REG_P (arg1) && CONST_INT_P (arg2))
439 {
440 if (do_recursion)
441 {
442 HOST_WIDE_INT scale = (HOST_WIDE_INT_1U << INTVAL (arg2));
443 offset = scale * fold_offsets (insn, reg: arg1, analyze,
444 foldable_insns);
445 }
446 }
447 else
448 return false;
449
450 /* Pattern recognized for folding. */
451 break;
452 }
453 case REG:
454 {
455 /* Propagate through register move. */
456 if (do_recursion)
457 offset = fold_offsets (insn, reg: src, analyze, foldable_insns);
458
459 /* Pattern recognized for folding. */
460 break;
461 }
462 case CONST_INT:
463 {
464 offset = INTVAL (src);
465 /* R1 = C is candidate for folding. */
466 if (!analyze)
467 bitmap_set_bit (foldable_insns, INSN_UID (insn));
468
469 /* Pattern recognized for folding. */
470 break;
471 }
472 default:
473 /* Cannot recognize. */
474 return false;
475 }
476
477 if (do_recursion && !analyze)
478 *offset_out = offset;
479
480 return true;
481}
482
483/* Function that computes the offset that would have to be added to all uses
484 of REG if the instructions marked in FOLDABLE_INSNS were to be eliminated.
485
486 If ANALYZE is true then mark in CAN_FOLD_INSNS which instructions
487 transitively only affect other instructions found in CAN_FOLD_INSNS.
488 If ANALYZE is false then compute the offset required for folding. */
489static HOST_WIDE_INT
490fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns)
491{
492 rtx_insn *def = get_single_def_in_bb (insn, reg);
493
494 if (!def || GET_CODE (PATTERN (def)) != SET)
495 return 0;
496
497 rtx dest = SET_DEST (PATTERN (def));
498
499 if (!REG_P (dest))
500 return 0;
501
502 /* We can only affect the values of GPR registers. */
503 unsigned int dest_regno = REGNO (dest);
504 if (fixed_regs[dest_regno]
505 || !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], bit: dest_regno))
506 return 0;
507
508 if (analyze)
509 {
510 /* Check if we know how to handle DEF. */
511 if (!fold_offsets_1 (insn: def, analyze: true, do_recursion: false, NULL, NULL))
512 return 0;
513
514 /* We only fold through instructions that are transitively used as
515 memory addresses and do not have other uses. Use the same logic
516 from offset calculation to visit instructions that can propagate
517 offsets and keep track of them in CAN_FOLD_INSNS. */
518 bool success;
519 struct df_link *uses = get_uses (insn: def, reg: dest, success: &success), *ref_link;
520
521 if (!success)
522 return 0;
523
524 for (ref_link = uses; ref_link; ref_link = ref_link->next)
525 {
526 rtx_insn *use = DF_REF_INSN (ref_link->ref);
527
528 if (DEBUG_INSN_P (use))
529 continue;
530
531 /* Punt if the use is anything more complicated than a set
532 (clobber, use, etc). */
533 if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != SET)
534 return 0;
535
536 /* This use affects instructions outside of CAN_FOLD_INSNS. */
537 if (!bitmap_bit_p (&can_fold_insns, INSN_UID (insn: use)))
538 return 0;
539
540 rtx use_set = PATTERN (insn: use);
541
542 /* Special case: A foldable memory store is not foldable if it
543 mentions DEST outside of the address calculation. */
544 if (use_set && MEM_P (SET_DEST (use_set))
545 && reg_mentioned_p (dest, SET_SRC (use_set)))
546 return 0;
547 }
548
549 bitmap_set_bit (&can_fold_insns, INSN_UID (insn: def));
550
551 if (dump_file && (dump_flags & TDF_DETAILS))
552 {
553 fprintf (stream: dump_file, format: "Instruction marked for propagation: ");
554 print_rtl_single (dump_file, def);
555 }
556 }
557 else
558 {
559 /* We cannot propagate through this instruction. */
560 if (!bitmap_bit_p (&can_fold_insns, INSN_UID (insn: def)))
561 return 0;
562 }
563
564 HOST_WIDE_INT offset = 0;
565 bool recognized = fold_offsets_1 (insn: def, analyze, do_recursion: true, offset_out: &offset,
566 foldable_insns);
567
568 if (!recognized)
569 return 0;
570
571 return offset;
572}
573
574/* Test if INSN is a memory load / store that can have an offset folded to it.
575 Return true iff INSN is such an instruction and return through MEM_OUT,
576 REG_OUT and OFFSET_OUT the RTX that has a MEM code, the register that is
577 used as a base address and the offset accordingly.
578 All of the out pointers may be NULL in which case they will be ignored. */
579bool
580get_fold_mem_root (rtx_insn *insn, rtx *mem_out, rtx *reg_out,
581 HOST_WIDE_INT *offset_out)
582{
583 rtx set = single_set (insn);
584 rtx mem = NULL_RTX;
585
586 if (set != NULL_RTX)
587 {
588 rtx src = SET_SRC (set);
589 rtx dest = SET_DEST (set);
590
591 /* Don't fold when we have unspec / volatile. */
592 if (GET_CODE (src) == UNSPEC
593 || GET_CODE (src) == UNSPEC_VOLATILE
594 || GET_CODE (dest) == UNSPEC
595 || GET_CODE (dest) == UNSPEC_VOLATILE)
596 return false;
597
598 if (MEM_P (src))
599 mem = src;
600 else if (MEM_P (dest))
601 mem = dest;
602 else if ((GET_CODE (src) == SIGN_EXTEND
603 || GET_CODE (src) == ZERO_EXTEND)
604 && MEM_P (XEXP (src, 0)))
605 mem = XEXP (src, 0);
606 }
607
608 if (mem == NULL_RTX)
609 return false;
610
611 rtx mem_addr = XEXP (mem, 0);
612 rtx reg;
613 HOST_WIDE_INT offset;
614
615 if (REG_P (mem_addr))
616 {
617 reg = mem_addr;
618 offset = 0;
619 }
620 else if (GET_CODE (mem_addr) == PLUS
621 && REG_P (XEXP (mem_addr, 0))
622 && CONST_INT_P (XEXP (mem_addr, 1)))
623 {
624 reg = XEXP (mem_addr, 0);
625 offset = INTVAL (XEXP (mem_addr, 1));
626 }
627 else
628 return false;
629
630 if (mem_out)
631 *mem_out = mem;
632 if (reg_out)
633 *reg_out = reg;
634 if (offset_out)
635 *offset_out = offset;
636
637 return true;
638}
639
640/* If INSN is a root memory instruction then do a DFS traversal on its
641 definitions and find folding candidates. */
642static void
643do_analysis (rtx_insn *insn)
644{
645 rtx reg;
646 if (!get_fold_mem_root (insn, NULL, reg_out: &reg, NULL))
647 return;
648
649 if (dump_file && (dump_flags & TDF_DETAILS))
650 {
651 fprintf (stream: dump_file, format: "Starting analysis from root: ");
652 print_rtl_single (dump_file, insn);
653 }
654
655 /* Analyse folding opportunities for this memory instruction. */
656 bitmap_set_bit (&can_fold_insns, INSN_UID (insn));
657 fold_offsets (insn, reg, analyze: true, NULL);
658}
659
660static void
661do_fold_info_calculation (rtx_insn *insn, fold_info_map *fold_info)
662{
663 rtx mem, reg;
664 HOST_WIDE_INT cur_offset;
665 if (!get_fold_mem_root (insn, mem_out: &mem, reg_out: &reg, offset_out: &cur_offset))
666 return;
667
668 fold_mem_info *info = new fold_mem_info;
669 info->added_offset = fold_offsets (insn, reg, analyze: false, foldable_insns: info->fold_insns);
670
671 fold_info->put (k: insn, v: info);
672}
673
674/* If INSN is a root memory instruction then compute a potentially new offset
675 for it and test if the resulting instruction is valid. */
676static void
677do_check_validity (rtx_insn *insn, fold_mem_info *info)
678{
679 rtx mem, reg;
680 HOST_WIDE_INT cur_offset;
681 if (!get_fold_mem_root (insn, mem_out: &mem, reg_out: &reg, offset_out: &cur_offset))
682 return;
683
684 HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
685
686 /* Test if it is valid to change MEM's address offset to NEW_OFFSET. */
687 int icode = INSN_CODE (insn);
688 INSN_CODE (insn) = -1;
689 rtx mem_addr = XEXP (mem, 0);
690 machine_mode mode = GET_MODE (mem_addr);
691 if (new_offset != 0)
692 XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode));
693 else
694 XEXP (mem, 0) = reg;
695
696 bool illegal = insn_invalid_p (insn, false)
697 || !memory_address_addr_space_p (mode, XEXP (mem, 0),
698 MEM_ADDR_SPACE (mem));
699
700 /* Restore the instruction. */
701 XEXP (mem, 0) = mem_addr;
702 INSN_CODE (insn) = icode;
703
704 if (illegal)
705 bitmap_ior_into (&cannot_fold_insns, info->fold_insns);
706 else
707 bitmap_ior_into (&candidate_fold_insns, info->fold_insns);
708}
709
710static bool
711compute_validity_closure (fold_info_map *fold_info)
712{
713 /* Let's say we have an arbitrary chain of foldable instructions xN = xN + C
714 and memory operations rN that use xN as shown below. If folding x1 in r1
715 turns out to be invalid for whatever reason then it's also invalid to fold
716 any of the other xN into any rN. That means that we need the transitive
717 closure of validity to determine whether we can fold a xN instruction.
718
719 +--------------+ +-------------------+ +-------------------+
720 | r1 = mem[x1] | | r2 = mem[x1 + x2] | | r3 = mem[x2 + x3] | ...
721 +--------------+ +-------------------+ +-------------------+
722 ^ ^ ^ ^ ^
723 | / | / | ...
724 | / | / |
725 +-------------+ / +-------------+ / +-------------+
726 | x1 = x1 + 1 |-----+ | x2 = x2 + 1 |-----+ | x3 = x3 + 1 |--- ...
727 +-------------+ +-------------+ +-------------+
728 ^ ^ ^
729 | | |
730 ... ... ...
731 */
732
733 /* In general three iterations should be enough for most cases, but allow up
734 to five when -fexpensive-optimizations is used. */
735 int max_iters = 3 + 2 * flag_expensive_optimizations;
736 for (int pass = 0; pass < max_iters; pass++)
737 {
738 bool made_changes = false;
739 for (fold_info_map::iterator iter = fold_info->begin ();
740 iter != fold_info->end (); ++iter)
741 {
742 fold_mem_info *info = (*iter).second;
743 if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
744 made_changes |= bitmap_ior_into (&cannot_fold_insns,
745 info->fold_insns);
746 }
747
748 if (!made_changes)
749 return true;
750 }
751
752 return false;
753}
754
755/* If INSN is a root memory instruction that was affected by any folding
756 then update its offset as necessary. */
757static void
758do_commit_offset (rtx_insn *insn, fold_mem_info *info)
759{
760 rtx mem, reg;
761 HOST_WIDE_INT cur_offset;
762 if (!get_fold_mem_root (insn, mem_out: &mem, reg_out: &reg, offset_out: &cur_offset))
763 return;
764
765 HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
766
767 if (new_offset == cur_offset)
768 return;
769
770 gcc_assert (!bitmap_empty_p (info->fold_insns));
771
772 if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
773 return;
774
775 if (dump_file)
776 {
777 fprintf (stream: dump_file, format: "Memory offset changed from "
778 HOST_WIDE_INT_PRINT_DEC " to " HOST_WIDE_INT_PRINT_DEC
779 " for instruction:\n", cur_offset, new_offset);
780 print_rtl_single (dump_file, insn);
781 }
782
783 machine_mode mode = GET_MODE (XEXP (mem, 0));
784 if (new_offset != 0)
785 XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode));
786 else
787 XEXP (mem, 0) = reg;
788 INSN_CODE (insn) = recog (PATTERN (insn), insn, 0);
789 df_insn_rescan (insn);
790}
791
792/* If INSN is a move / add instruction that was folded then replace its
793 constant part with zero. */
794static void
795do_commit_insn (rtx_insn *insn)
796{
797 if (bitmap_bit_p (&candidate_fold_insns, INSN_UID (insn))
798 && !bitmap_bit_p (&cannot_fold_insns, INSN_UID (insn)))
799 {
800 if (dump_file)
801 {
802 fprintf (stream: dump_file, format: "Instruction folded:");
803 print_rtl_single (dump_file, insn);
804 }
805
806 stats_fold_count++;
807
808 rtx set = single_set (insn);
809 rtx dest = SET_DEST (set);
810 rtx src = SET_SRC (set);
811
812 /* Emit a move and let subsequent passes eliminate it if possible. */
813 if (GET_CODE (src) == CONST_INT)
814 {
815 /* INSN is R1 = C.
816 Replace it with R1 = 0 because C was folded. */
817 rtx mov_rtx
818 = gen_move_insn (dest, gen_int_mode (0, GET_MODE (dest)));
819 df_insn_rescan (emit_insn_after (mov_rtx, insn));
820 }
821 else
822 {
823 /* INSN is R1 = R2 + C.
824 Replace it with R1 = R2 because C was folded. */
825 rtx arg1 = XEXP (src, 0);
826
827 /* If the DEST == ARG1 then the move is a no-op. */
828 if (REGNO (dest) != REGNO (arg1))
829 {
830 gcc_checking_assert (GET_MODE (dest) == GET_MODE (arg1));
831 rtx mov_rtx = gen_move_insn (dest, arg1);
832 df_insn_rescan (emit_insn_after (mov_rtx, insn));
833 }
834 }
835
836 /* Delete the original move / add instruction. */
837 delete_insn (insn);
838 }
839}
840
841unsigned int
842pass_fold_mem_offsets::execute (function *fn)
843{
844 df_set_flags (DF_EQ_NOTES + DF_RD_PRUNE_DEAD_DEFS + DF_DEFER_INSN_RESCAN);
845 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
846 df_analyze ();
847
848 bitmap_initialize (head: &can_fold_insns, NULL);
849 bitmap_initialize (head: &candidate_fold_insns, NULL);
850 bitmap_initialize (head: &cannot_fold_insns, NULL);
851
852 stats_fold_count = 0;
853
854 basic_block bb;
855 rtx_insn *insn;
856 FOR_ALL_BB_FN (bb, fn)
857 {
858 /* There is a conflict between this pass and RISCV's shorten-memrefs
859 pass. For now disable folding if optimizing for size because
860 otherwise this cancels the effects of shorten-memrefs. */
861 if (optimize_bb_for_size_p (bb))
862 continue;
863
864 fold_info_map fold_info;
865
866 bitmap_clear (&can_fold_insns);
867 bitmap_clear (&candidate_fold_insns);
868 bitmap_clear (&cannot_fold_insns);
869
870 FOR_BB_INSNS (bb, insn)
871 do_analysis (insn);
872
873 FOR_BB_INSNS (bb, insn)
874 do_fold_info_calculation (insn, fold_info: &fold_info);
875
876 FOR_BB_INSNS (bb, insn)
877 if (fold_mem_info **info = fold_info.get (k: insn))
878 do_check_validity (insn, info: *info);
879
880 if (compute_validity_closure (fold_info: &fold_info))
881 {
882 FOR_BB_INSNS (bb, insn)
883 if (fold_mem_info **info = fold_info.get (k: insn))
884 do_commit_offset (insn, info: *info);
885
886 FOR_BB_INSNS (bb, insn)
887 do_commit_insn (insn);
888 }
889
890 for (fold_info_map::iterator iter = fold_info.begin ();
891 iter != fold_info.end (); ++iter)
892 delete (*iter).second;
893 }
894
895 statistics_counter_event (cfun, "Number of folded instructions",
896 stats_fold_count);
897
898 bitmap_release (head: &can_fold_insns);
899 bitmap_release (head: &candidate_fold_insns);
900 bitmap_release (head: &cannot_fold_insns);
901
902 return 0;
903}
904
905} // anon namespace
906
907rtl_opt_pass *
908make_pass_fold_mem_offsets (gcc::context *ctxt)
909{
910 return new pass_fold_mem_offsets (ctxt);
911}
912

source code of gcc/fold-mem-offsets.cc