1/* Decompose multiword subregs.
2 Copyright (C) 2007-2023 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "rtl.h"
27#include "tree.h"
28#include "cfghooks.h"
29#include "df.h"
30#include "memmodel.h"
31#include "tm_p.h"
32#include "expmed.h"
33#include "insn-config.h"
34#include "emit-rtl.h"
35#include "recog.h"
36#include "cfgrtl.h"
37#include "cfgbuild.h"
38#include "dce.h"
39#include "expr.h"
40#include "explow.h"
41#include "tree-pass.h"
42#include "lower-subreg.h"
43#include "rtl-iter.h"
44#include "target.h"
45
46
47/* Decompose multi-word pseudo-registers into individual
48 pseudo-registers when possible and profitable. This is possible
49 when all the uses of a multi-word register are via SUBREG, or are
50 copies of the register to another location. Breaking apart the
51 register permits more CSE and permits better register allocation.
52 This is profitable if the machine does not have move instructions
53 to do this.
54
55 This pass only splits moves with modes that are wider than
56 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
57 integer modes that are twice the width of word_mode. The latter
58 could be generalized if there was a need to do this, but the trend in
59 architectures is to not need this.
60
61 There are two useful preprocessor defines for use by maintainers:
62
63 #define LOG_COSTS 1
64
65 if you wish to see the actual cost estimates that are being used
66 for each mode wider than word mode and the cost estimates for zero
67 extension and the shifts. This can be useful when port maintainers
68 are tuning insn rtx costs.
69
70 #define FORCE_LOWERING 1
71
72 if you wish to test the pass with all the transformation forced on.
73 This can be useful for finding bugs in the transformations. */
74
75#define LOG_COSTS 0
76#define FORCE_LOWERING 0
77
78/* Bit N in this bitmap is set if regno N is used in a context in
79 which we can decompose it. */
80static bitmap decomposable_context;
81
82/* Bit N in this bitmap is set if regno N is used in a context in
83 which it cannot be decomposed. */
84static bitmap non_decomposable_context;
85
86/* Bit N in this bitmap is set if regno N is used in a subreg
87 which changes the mode but not the size. This typically happens
88 when the register accessed as a floating-point value; we want to
89 avoid generating accesses to its subwords in integer modes. */
90static bitmap subreg_context;
91
92/* Bit N in the bitmap in element M of this array is set if there is a
93 copy from reg M to reg N. */
94static vec<bitmap> reg_copy_graph;
95
96struct target_lower_subreg default_target_lower_subreg;
97#if SWITCHABLE_TARGET
98struct target_lower_subreg *this_target_lower_subreg
99 = &default_target_lower_subreg;
100#endif
101
102#define twice_word_mode \
103 this_target_lower_subreg->x_twice_word_mode
104#define choices \
105 this_target_lower_subreg->x_choices
106
107/* Return true if MODE is a mode we know how to lower. When returning true,
108 store its byte size in *BYTES and its word size in *WORDS. */
109
110static inline bool
111interesting_mode_p (machine_mode mode, unsigned int *bytes,
112 unsigned int *words)
113{
114 if (!GET_MODE_SIZE (mode).is_constant (const_value: bytes))
115 return false;
116 *words = CEIL (*bytes, UNITS_PER_WORD);
117 return true;
118}
119
120/* RTXes used while computing costs. */
121struct cost_rtxes {
122 /* Source and target registers. */
123 rtx source;
124 rtx target;
125
126 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
127 rtx zext;
128
129 /* A shift of SOURCE. */
130 rtx shift;
131
132 /* A SET of TARGET. */
133 rtx set;
134};
135
136/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
137 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
138
139static int
140shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
141 machine_mode mode, int op1)
142{
143 PUT_CODE (rtxes->shift, code);
144 PUT_MODE (x: rtxes->shift, mode);
145 PUT_MODE (x: rtxes->source, mode);
146 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
147 return set_src_cost (x: rtxes->shift, mode, speed_p);
148}
149
150/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
151 to true if it is profitable to split a double-word CODE shift
152 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
153 for speed or size profitability.
154
155 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
156 the cost of moving zero into a word-mode register. WORD_MOVE_COST
157 is the cost of moving between word registers. */
158
159static void
160compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
161 bool *splitting, enum rtx_code code,
162 int word_move_zero_cost, int word_move_cost)
163{
164 int wide_cost, narrow_cost, upper_cost, i;
165
166 for (i = 0; i < BITS_PER_WORD; i++)
167 {
168 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
169 op1: i + BITS_PER_WORD);
170 if (i == 0)
171 narrow_cost = word_move_cost;
172 else
173 narrow_cost = shift_cost (speed_p, rtxes, code, mode: word_mode, op1: i);
174
175 if (code != ASHIFTRT)
176 upper_cost = word_move_zero_cost;
177 else if (i == BITS_PER_WORD - 1)
178 upper_cost = word_move_cost;
179 else
180 upper_cost = shift_cost (speed_p, rtxes, code, mode: word_mode,
181 BITS_PER_WORD - 1);
182
183 if (LOG_COSTS)
184 fprintf (stderr, format: "%s %s by %d: original cost %d, split cost %d + %d\n",
185 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
186 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
187
188 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
189 splitting[i] = true;
190 }
191}
192
193/* Compute what we should do when optimizing for speed or size; SPEED_P
194 selects which. Use RTXES for computing costs. */
195
196static void
197compute_costs (bool speed_p, struct cost_rtxes *rtxes)
198{
199 unsigned int i;
200 int word_move_zero_cost, word_move_cost;
201
202 PUT_MODE (x: rtxes->target, mode: word_mode);
203 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
204 word_move_zero_cost = set_rtx_cost (x: rtxes->set, speed_p);
205
206 SET_SRC (rtxes->set) = rtxes->source;
207 word_move_cost = set_rtx_cost (x: rtxes->set, speed_p);
208
209 if (LOG_COSTS)
210 fprintf (stderr, format: "%s move: from zero cost %d, from reg cost %d\n",
211 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
212
213 for (i = 0; i < MAX_MACHINE_MODE; i++)
214 {
215 machine_mode mode = (machine_mode) i;
216 unsigned int size, factor;
217 if (interesting_mode_p (mode, bytes: &size, words: &factor) && factor > 1)
218 {
219 unsigned int mode_move_cost;
220
221 PUT_MODE (x: rtxes->target, mode);
222 PUT_MODE (x: rtxes->source, mode);
223 mode_move_cost = set_rtx_cost (x: rtxes->set, speed_p);
224
225 if (LOG_COSTS)
226 fprintf (stderr, format: "%s move: original cost %d, split cost %d * %d\n",
227 GET_MODE_NAME (mode), mode_move_cost,
228 word_move_cost, factor);
229
230 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
231 {
232 choices[speed_p].move_modes_to_split[i] = true;
233 choices[speed_p].something_to_do = true;
234 }
235 }
236 }
237
238 /* For the moves and shifts, the only case that is checked is one
239 where the mode of the target is an integer mode twice the width
240 of the word_mode.
241
242 If it is not profitable to split a double word move then do not
243 even consider the shifts or the zero extension. */
244 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
245 {
246 int zext_cost;
247
248 /* The only case here to check to see if moving the upper part with a
249 zero is cheaper than doing the zext itself. */
250 PUT_MODE (x: rtxes->source, mode: word_mode);
251 zext_cost = set_src_cost (x: rtxes->zext, twice_word_mode, speed_p);
252
253 if (LOG_COSTS)
254 fprintf (stderr, format: "%s %s: original cost %d, split cost %d + %d\n",
255 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
256 zext_cost, word_move_cost, word_move_zero_cost);
257
258 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
259 choices[speed_p].splitting_zext = true;
260
261 compute_splitting_shift (speed_p, rtxes,
262 choices[speed_p].splitting_ashift, code: ASHIFT,
263 word_move_zero_cost, word_move_cost);
264 compute_splitting_shift (speed_p, rtxes,
265 choices[speed_p].splitting_lshiftrt, code: LSHIFTRT,
266 word_move_zero_cost, word_move_cost);
267 compute_splitting_shift (speed_p, rtxes,
268 choices[speed_p].splitting_ashiftrt, code: ASHIFTRT,
269 word_move_zero_cost, word_move_cost);
270 }
271}
272
273/* Do one-per-target initialisation. This involves determining
274 which operations on the machine are profitable. If none are found,
275 then the pass just returns when called. */
276
277void
278init_lower_subreg (void)
279{
280 struct cost_rtxes rtxes;
281
282 memset (s: this_target_lower_subreg, c: 0, n: sizeof (*this_target_lower_subreg));
283
284 twice_word_mode = GET_MODE_2XWIDER_MODE (m: word_mode).require ();
285
286 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
287 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
288 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
289 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
290 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
291
292 if (LOG_COSTS)
293 fprintf (stderr, format: "\nSize costs\n==========\n\n");
294 compute_costs (speed_p: false, rtxes: &rtxes);
295
296 if (LOG_COSTS)
297 fprintf (stderr, format: "\nSpeed costs\n===========\n\n");
298 compute_costs (speed_p: true, rtxes: &rtxes);
299}
300
301static bool
302simple_move_operand (rtx x)
303{
304 if (GET_CODE (x) == SUBREG)
305 x = SUBREG_REG (x);
306
307 if (!OBJECT_P (x))
308 return false;
309
310 if (GET_CODE (x) == LABEL_REF
311 || GET_CODE (x) == SYMBOL_REF
312 || GET_CODE (x) == HIGH
313 || GET_CODE (x) == CONST)
314 return false;
315
316 if (MEM_P (x)
317 && (MEM_VOLATILE_P (x)
318 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
319 return false;
320
321 return true;
322}
323
324/* If X is an operator that can be treated as a simple move that we
325 can split, then return the operand that is operated on. */
326
327static rtx
328operand_for_swap_move_operator (rtx x)
329{
330 /* A word sized rotate of a register pair is equivalent to swapping
331 the registers in the register pair. */
332 if (GET_CODE (x) == ROTATE
333 && GET_MODE (x) == twice_word_mode
334 && simple_move_operand (XEXP (x, 0))
335 && CONST_INT_P (XEXP (x, 1))
336 && INTVAL (XEXP (x, 1)) == BITS_PER_WORD)
337 return XEXP (x, 0);
338
339 return NULL_RTX;
340}
341
342/* If INSN is a single set between two objects that we want to split,
343 return the single set. SPEED_P says whether we are optimizing
344 INSN for speed or size.
345
346 INSN should have been passed to recog and extract_insn before this
347 is called. */
348
349static rtx
350simple_move (rtx_insn *insn, bool speed_p)
351{
352 rtx x, op;
353 rtx set;
354 machine_mode mode;
355
356 if (recog_data.n_operands != 2)
357 return NULL_RTX;
358
359 set = single_set (insn);
360 if (!set)
361 return NULL_RTX;
362
363 x = SET_DEST (set);
364 if (x != recog_data.operand[0] && x != recog_data.operand[1])
365 return NULL_RTX;
366 if (!simple_move_operand (x))
367 return NULL_RTX;
368
369 x = SET_SRC (set);
370 if ((op = operand_for_swap_move_operator (x)) != NULL_RTX)
371 x = op;
372
373 if (x != recog_data.operand[0] && x != recog_data.operand[1])
374 return NULL_RTX;
375 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
376 things like x86 rdtsc which returns a DImode value. */
377 if (GET_CODE (x) != ASM_OPERANDS
378 && !simple_move_operand (x))
379 return NULL_RTX;
380
381 /* We try to decompose in integer modes, to avoid generating
382 inefficient code copying between integer and floating point
383 registers. That means that we can't decompose if this is a
384 non-integer mode for which there is no integer mode of the same
385 size. */
386 mode = GET_MODE (SET_DEST (set));
387 scalar_int_mode int_mode;
388 if (!SCALAR_INT_MODE_P (mode)
389 && (!int_mode_for_size (size: GET_MODE_BITSIZE (mode), limit: 0).exists (mode: &int_mode)
390 || !targetm.modes_tieable_p (mode, int_mode)))
391 return NULL_RTX;
392
393 /* Reject PARTIAL_INT modes. They are used for processor specific
394 purposes and it's probably best not to tamper with them. */
395 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
396 return NULL_RTX;
397
398 if (!choices[speed_p].move_modes_to_split[(int) mode])
399 return NULL_RTX;
400
401 return set;
402}
403
404/* If SET is a copy from one multi-word pseudo-register to another,
405 record that in reg_copy_graph. Return whether it is such a
406 copy. */
407
408static bool
409find_pseudo_copy (rtx set)
410{
411 rtx dest = SET_DEST (set);
412 rtx src = SET_SRC (set);
413 rtx op;
414 unsigned int rd, rs;
415 bitmap b;
416
417 if ((op = operand_for_swap_move_operator (x: src)) != NULL_RTX)
418 src = op;
419
420 if (!REG_P (dest) || !REG_P (src))
421 return false;
422
423 rd = REGNO (dest);
424 rs = REGNO (src);
425 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
426 return false;
427
428 b = reg_copy_graph[rs];
429 if (b == NULL)
430 {
431 b = BITMAP_ALLOC (NULL);
432 reg_copy_graph[rs] = b;
433 }
434
435 bitmap_set_bit (b, rd);
436
437 return true;
438}
439
440/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
441 where they are copied to another register, add the register to
442 which they are copied to DECOMPOSABLE_CONTEXT. Use
443 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
444 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
445
446static void
447propagate_pseudo_copies (void)
448{
449 auto_bitmap queue, propagate;
450
451 bitmap_copy (queue, decomposable_context);
452 do
453 {
454 bitmap_iterator iter;
455 unsigned int i;
456
457 bitmap_clear (propagate);
458
459 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
460 {
461 bitmap b = reg_copy_graph[i];
462 if (b)
463 bitmap_ior_and_compl_into (A: propagate, B: b, C: non_decomposable_context);
464 }
465
466 bitmap_and_compl (queue, propagate, decomposable_context);
467 bitmap_ior_into (decomposable_context, propagate);
468 }
469 while (!bitmap_empty_p (map: queue));
470}
471
472/* A pointer to one of these values is passed to
473 find_decomposable_subregs. */
474
475enum classify_move_insn
476{
477 /* Not a simple move from one location to another. */
478 NOT_SIMPLE_MOVE,
479 /* A simple move we want to decompose. */
480 DECOMPOSABLE_SIMPLE_MOVE,
481 /* Any other simple move. */
482 SIMPLE_MOVE
483};
484
485/* If we find a SUBREG in *LOC which we could use to decompose a
486 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
487 unadorned register which is not a simple pseudo-register copy,
488 DATA will point at the type of move, and we set a bit in
489 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
490
491static void
492find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
493{
494 subrtx_var_iterator::array_type array;
495 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
496 {
497 rtx x = *iter;
498 if (GET_CODE (x) == SUBREG)
499 {
500 rtx inner = SUBREG_REG (x);
501 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
502
503 if (!REG_P (inner))
504 continue;
505
506 regno = REGNO (inner);
507 if (HARD_REGISTER_NUM_P (regno))
508 {
509 iter.skip_subrtxes ();
510 continue;
511 }
512
513 if (!interesting_mode_p (GET_MODE (x), bytes: &outer_size, words: &outer_words)
514 || !interesting_mode_p (GET_MODE (inner), bytes: &inner_size,
515 words: &inner_words))
516 continue;
517
518 /* We only try to decompose single word subregs of multi-word
519 registers. When we find one, we return -1 to avoid iterating
520 over the inner register.
521
522 ??? This doesn't allow, e.g., DImode subregs of TImode values
523 on 32-bit targets. We would need to record the way the
524 pseudo-register was used, and only decompose if all the uses
525 were the same number and size of pieces. Hopefully this
526 doesn't happen much. */
527
528 if (outer_words == 1
529 && inner_words > 1
530 /* Don't allow to decompose floating point subregs of
531 multi-word pseudos if the floating point mode does
532 not have word size, because otherwise we'd generate
533 a subreg with that floating mode from a different
534 sized integral pseudo which is not allowed by
535 validate_subreg. */
536 && (!FLOAT_MODE_P (GET_MODE (x))
537 || outer_size == UNITS_PER_WORD))
538 {
539 bitmap_set_bit (decomposable_context, regno);
540 iter.skip_subrtxes ();
541 continue;
542 }
543
544 /* If this is a cast from one mode to another, where the modes
545 have the same size, and they are not tieable, then mark this
546 register as non-decomposable. If we decompose it we are
547 likely to mess up whatever the backend is trying to do. */
548 if (outer_words > 1
549 && outer_size == inner_size
550 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
551 {
552 bitmap_set_bit (non_decomposable_context, regno);
553 bitmap_set_bit (subreg_context, regno);
554 iter.skip_subrtxes ();
555 continue;
556 }
557 }
558 else if (REG_P (x))
559 {
560 unsigned int regno, size, words;
561
562 /* We will see an outer SUBREG before we see the inner REG, so
563 when we see a plain REG here it means a direct reference to
564 the register.
565
566 If this is not a simple copy from one location to another,
567 then we cannot decompose this register. If this is a simple
568 copy we want to decompose, and the mode is right,
569 then we mark the register as decomposable.
570 Otherwise we don't say anything about this register --
571 it could be decomposed, but whether that would be
572 profitable depends upon how it is used elsewhere.
573
574 We only set bits in the bitmap for multi-word
575 pseudo-registers, since those are the only ones we care about
576 and it keeps the size of the bitmaps down. */
577
578 regno = REGNO (x);
579 if (!HARD_REGISTER_NUM_P (regno)
580 && interesting_mode_p (GET_MODE (x), bytes: &size, words: &words)
581 && words > 1)
582 {
583 switch (*pcmi)
584 {
585 case NOT_SIMPLE_MOVE:
586 bitmap_set_bit (non_decomposable_context, regno);
587 break;
588 case DECOMPOSABLE_SIMPLE_MOVE:
589 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
590 bitmap_set_bit (decomposable_context, regno);
591 break;
592 case SIMPLE_MOVE:
593 break;
594 default:
595 gcc_unreachable ();
596 }
597 }
598 }
599 else if (MEM_P (x))
600 {
601 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
602
603 /* Any registers used in a MEM do not participate in a
604 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
605 here, and return -1 to block the parent's recursion. */
606 find_decomposable_subregs (loc: &XEXP (x, 0), pcmi: &cmi_mem);
607 iter.skip_subrtxes ();
608 }
609 }
610}
611
612/* Decompose REGNO into word-sized components. We smash the REG node
613 in place. This ensures that (1) something goes wrong quickly if we
614 fail to make some replacement, and (2) the debug information inside
615 the symbol table is automatically kept up to date. */
616
617static void
618decompose_register (unsigned int regno)
619{
620 rtx reg;
621 unsigned int size, words, i;
622 rtvec v;
623
624 reg = regno_reg_rtx[regno];
625
626 regno_reg_rtx[regno] = NULL_RTX;
627
628 if (!interesting_mode_p (GET_MODE (reg), bytes: &size, words: &words))
629 gcc_unreachable ();
630
631 v = rtvec_alloc (words);
632 for (i = 0; i < words; ++i)
633 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
634
635 PUT_CODE (reg, CONCATN);
636 XVEC (reg, 0) = v;
637
638 if (dump_file)
639 {
640 fprintf (stream: dump_file, format: "; Splitting reg %u ->", regno);
641 for (i = 0; i < words; ++i)
642 fprintf (stream: dump_file, format: " %u", REGNO (XVECEXP (reg, 0, i)));
643 fputc (c: '\n', stream: dump_file);
644 }
645}
646
647/* Get a SUBREG of a CONCATN. */
648
649static rtx
650simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
651{
652 unsigned int outer_size, outer_words, inner_size, inner_words;
653 machine_mode innermode, partmode;
654 rtx part;
655 unsigned int final_offset;
656 unsigned int byte;
657
658 innermode = GET_MODE (op);
659 if (!interesting_mode_p (mode: outermode, bytes: &outer_size, words: &outer_words)
660 || !interesting_mode_p (mode: innermode, bytes: &inner_size, words: &inner_words))
661 gcc_unreachable ();
662
663 /* Must be constant if interesting_mode_p passes. */
664 byte = orig_byte.to_constant ();
665 gcc_assert (GET_CODE (op) == CONCATN);
666 gcc_assert (byte % outer_size == 0);
667
668 gcc_assert (byte < inner_size);
669 if (outer_size > inner_size)
670 return NULL_RTX;
671
672 inner_size /= XVECLEN (op, 0);
673 part = XVECEXP (op, 0, byte / inner_size);
674 partmode = GET_MODE (part);
675
676 final_offset = byte % inner_size;
677 if (final_offset + outer_size > inner_size)
678 return NULL_RTX;
679
680 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
681 regular CONST_VECTORs. They have vector or integer modes, depending
682 on the capabilities of the target. Cope with them. */
683 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
684 partmode = GET_MODE_INNER (innermode);
685 else if (partmode == VOIDmode)
686 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
687 GET_MODE_CLASS (innermode), 0).require ();
688
689 return simplify_gen_subreg (outermode, op: part, innermode: partmode, byte: final_offset);
690}
691
692/* Wrapper around simplify_gen_subreg which handles CONCATN. */
693
694static rtx
695simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
696 machine_mode innermode, unsigned int byte)
697{
698 rtx ret;
699
700 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
701 If OP is a SUBREG of a CONCATN, then it must be a simple mode
702 change with the same size and offset 0, or it must extract a
703 part. We shouldn't see anything else here. */
704 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
705 {
706 rtx op2;
707
708 if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
709 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
710 && known_eq (SUBREG_BYTE (op), 0))
711 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
712 GET_MODE (SUBREG_REG (op)), byte);
713
714 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
715 SUBREG_BYTE (op));
716 if (op2 == NULL_RTX)
717 {
718 /* We don't handle paradoxical subregs here. */
719 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
720 gcc_assert (!paradoxical_subreg_p (op));
721 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
722 orig_byte: byte + SUBREG_BYTE (op));
723 gcc_assert (op2 != NULL_RTX);
724 return op2;
725 }
726
727 op = op2;
728 gcc_assert (op != NULL_RTX);
729 gcc_assert (innermode == GET_MODE (op));
730 }
731
732 if (GET_CODE (op) == CONCATN)
733 return simplify_subreg_concatn (outermode, op, orig_byte: byte);
734
735 ret = simplify_gen_subreg (outermode, op, innermode, byte);
736
737 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
738 resolve_simple_move will ask for the high part of the paradoxical
739 subreg, which does not have a value. Just return a zero. */
740 if (ret == NULL_RTX
741 && paradoxical_subreg_p (x: op))
742 return CONST0_RTX (outermode);
743
744 gcc_assert (ret != NULL_RTX);
745 return ret;
746}
747
748/* Return whether we should resolve X into the registers into which it
749 was decomposed. */
750
751static bool
752resolve_reg_p (rtx x)
753{
754 return GET_CODE (x) == CONCATN;
755}
756
757/* Return whether X is a SUBREG of a register which we need to
758 resolve. */
759
760static bool
761resolve_subreg_p (rtx x)
762{
763 if (GET_CODE (x) != SUBREG)
764 return false;
765 return resolve_reg_p (SUBREG_REG (x));
766}
767
768/* Look for SUBREGs in *LOC which need to be decomposed. */
769
770static bool
771resolve_subreg_use (rtx *loc, rtx insn)
772{
773 subrtx_ptr_iterator::array_type array;
774 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
775 {
776 rtx *loc = *iter;
777 rtx x = *loc;
778 if (resolve_subreg_p (x))
779 {
780 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
781 SUBREG_BYTE (x));
782
783 /* It is possible for a note to contain a reference which we can
784 decompose. In this case, return 1 to the caller to indicate
785 that the note must be removed. */
786 if (!x)
787 {
788 gcc_assert (!insn);
789 return true;
790 }
791
792 validate_change (insn, loc, x, 1);
793 iter.skip_subrtxes ();
794 }
795 else if (resolve_reg_p (x))
796 /* Return 1 to the caller to indicate that we found a direct
797 reference to a register which is being decomposed. This can
798 happen inside notes, multiword shift or zero-extend
799 instructions. */
800 return true;
801 }
802
803 return false;
804}
805
806/* Resolve any decomposed registers which appear in register notes on
807 INSN. */
808
809static void
810resolve_reg_notes (rtx_insn *insn)
811{
812 rtx *pnote, note;
813
814 note = find_reg_equal_equiv_note (insn);
815 if (note)
816 {
817 int old_count = num_validated_changes ();
818 if (resolve_subreg_use (loc: &XEXP (note, 0), NULL_RTX))
819 remove_note (insn, note);
820 else
821 if (old_count != num_validated_changes ())
822 df_notes_rescan (insn);
823 }
824
825 pnote = &REG_NOTES (insn);
826 while (*pnote != NULL_RTX)
827 {
828 bool del = false;
829
830 note = *pnote;
831 switch (REG_NOTE_KIND (note))
832 {
833 case REG_DEAD:
834 case REG_UNUSED:
835 if (resolve_reg_p (XEXP (note, 0)))
836 del = true;
837 break;
838
839 default:
840 break;
841 }
842
843 if (del)
844 *pnote = XEXP (note, 1);
845 else
846 pnote = &XEXP (note, 1);
847 }
848}
849
850/* Return whether X can be decomposed into subwords. */
851
852static bool
853can_decompose_p (rtx x)
854{
855 if (REG_P (x))
856 {
857 unsigned int regno = REGNO (x);
858
859 if (HARD_REGISTER_NUM_P (regno))
860 {
861 unsigned int byte, num_bytes, num_words;
862
863 if (!interesting_mode_p (GET_MODE (x), bytes: &num_bytes, words: &num_words))
864 return false;
865 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
866 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
867 return false;
868 return true;
869 }
870 else
871 return !bitmap_bit_p (subreg_context, regno);
872 }
873
874 return true;
875}
876
877/* OPND is a concatn operand this is used with a simple move operator.
878 Return a new rtx with the concatn's operands swapped. */
879
880static rtx
881resolve_operand_for_swap_move_operator (rtx opnd)
882{
883 gcc_assert (GET_CODE (opnd) == CONCATN);
884 rtx concatn = copy_rtx (opnd);
885 rtx op0 = XVECEXP (concatn, 0, 0);
886 rtx op1 = XVECEXP (concatn, 0, 1);
887 XVECEXP (concatn, 0, 0) = op1;
888 XVECEXP (concatn, 0, 1) = op0;
889 return concatn;
890}
891
892/* Decompose the registers used in a simple move SET within INSN. If
893 we don't change anything, return INSN, otherwise return the start
894 of the sequence of moves. */
895
896static rtx_insn *
897resolve_simple_move (rtx set, rtx_insn *insn)
898{
899 rtx src, dest, real_dest, src_op;
900 rtx_insn *insns;
901 machine_mode orig_mode, dest_mode;
902 unsigned int orig_size, words;
903 bool pushing;
904
905 src = SET_SRC (set);
906 dest = SET_DEST (set);
907 orig_mode = GET_MODE (dest);
908
909 if (!interesting_mode_p (mode: orig_mode, bytes: &orig_size, words: &words))
910 gcc_unreachable ();
911 gcc_assert (words > 1);
912
913 start_sequence ();
914
915 /* We have to handle copying from a SUBREG of a decomposed reg where
916 the SUBREG is larger than word size. Rather than assume that we
917 can take a word_mode SUBREG of the destination, we copy to a new
918 register and then copy that to the destination. */
919
920 real_dest = NULL_RTX;
921
922 if ((src_op = operand_for_swap_move_operator (x: src)) != NULL_RTX)
923 {
924 if (resolve_reg_p (x: dest))
925 {
926 /* DEST is a CONCATN, so swap its operands and strip
927 SRC's operator. */
928 dest = resolve_operand_for_swap_move_operator (opnd: dest);
929 src = src_op;
930 }
931 else if (resolve_reg_p (x: src_op))
932 {
933 /* SRC is an operation on a CONCATN, so strip the operator and
934 swap the CONCATN's operands. */
935 src = resolve_operand_for_swap_move_operator (opnd: src_op);
936 }
937 }
938
939 if (GET_CODE (src) == SUBREG
940 && resolve_reg_p (SUBREG_REG (src))
941 && (maybe_ne (SUBREG_BYTE (src), b: 0)
942 || maybe_ne (a: orig_size, b: GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
943 {
944 real_dest = dest;
945 dest = gen_reg_rtx (orig_mode);
946 if (REG_P (real_dest))
947 REG_ATTRS (dest) = REG_ATTRS (real_dest);
948 }
949
950 /* Similarly if we are copying to a SUBREG of a decomposed reg where
951 the SUBREG is larger than word size. */
952
953 if (GET_CODE (dest) == SUBREG
954 && resolve_reg_p (SUBREG_REG (dest))
955 && (maybe_ne (SUBREG_BYTE (dest), b: 0)
956 || maybe_ne (a: orig_size,
957 b: GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
958 {
959 rtx reg, smove;
960 rtx_insn *minsn;
961
962 reg = gen_reg_rtx (orig_mode);
963 minsn = emit_move_insn (reg, src);
964 smove = single_set (insn: minsn);
965 gcc_assert (smove != NULL_RTX);
966 resolve_simple_move (set: smove, insn: minsn);
967 src = reg;
968 }
969
970 /* If we didn't have any big SUBREGS of decomposed registers, and
971 neither side of the move is a register we are decomposing, then
972 we don't have to do anything here. */
973
974 if (src == SET_SRC (set)
975 && dest == SET_DEST (set)
976 && !resolve_reg_p (x: src)
977 && !resolve_subreg_p (x: src)
978 && !resolve_reg_p (x: dest)
979 && !resolve_subreg_p (x: dest))
980 {
981 end_sequence ();
982 return insn;
983 }
984
985 /* It's possible for the code to use a subreg of a decomposed
986 register while forming an address. We need to handle that before
987 passing the address to emit_move_insn. We pass NULL_RTX as the
988 insn parameter to resolve_subreg_use because we cannot validate
989 the insn yet. */
990 if (MEM_P (src) || MEM_P (dest))
991 {
992 int acg;
993
994 if (MEM_P (src))
995 resolve_subreg_use (loc: &XEXP (src, 0), NULL_RTX);
996 if (MEM_P (dest))
997 resolve_subreg_use (loc: &XEXP (dest, 0), NULL_RTX);
998 acg = apply_change_group ();
999 gcc_assert (acg);
1000 }
1001
1002 /* If SRC is a register which we can't decompose, or has side
1003 effects, we need to move via a temporary register. */
1004
1005 if (!can_decompose_p (x: src)
1006 || side_effects_p (src)
1007 || GET_CODE (src) == ASM_OPERANDS)
1008 {
1009 rtx reg;
1010
1011 reg = gen_reg_rtx (orig_mode);
1012
1013 if (AUTO_INC_DEC)
1014 {
1015 rtx_insn *move = emit_move_insn (reg, src);
1016 if (MEM_P (src))
1017 {
1018 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1019 if (note)
1020 add_reg_note (move, REG_INC, XEXP (note, 0));
1021 }
1022 }
1023 else
1024 emit_move_insn (reg, src);
1025
1026 src = reg;
1027 }
1028
1029 /* If DEST is a register which we can't decompose, or has side
1030 effects, we need to first move to a temporary register. We
1031 handle the common case of pushing an operand directly. We also
1032 go through a temporary register if it holds a floating point
1033 value. This gives us better code on systems which can't move
1034 data easily between integer and floating point registers. */
1035
1036 dest_mode = orig_mode;
1037 pushing = push_operand (dest, dest_mode);
1038 if (!can_decompose_p (x: dest)
1039 || (side_effects_p (dest) && !pushing)
1040 || (!SCALAR_INT_MODE_P (dest_mode)
1041 && !resolve_reg_p (x: dest)
1042 && !resolve_subreg_p (x: dest)))
1043 {
1044 if (real_dest == NULL_RTX)
1045 real_dest = dest;
1046 if (!SCALAR_INT_MODE_P (dest_mode))
1047 dest_mode = int_mode_for_mode (dest_mode).require ();
1048 dest = gen_reg_rtx (dest_mode);
1049 if (REG_P (real_dest))
1050 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1051 }
1052
1053 if (pushing)
1054 {
1055 unsigned int i, j, jinc;
1056
1057 gcc_assert (orig_size % UNITS_PER_WORD == 0);
1058 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1059 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1060
1061 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1062 {
1063 j = 0;
1064 jinc = 1;
1065 }
1066 else
1067 {
1068 j = words - 1;
1069 jinc = -1;
1070 }
1071
1072 for (i = 0; i < words; ++i, j += jinc)
1073 {
1074 rtx temp;
1075
1076 temp = copy_rtx (XEXP (dest, 0));
1077 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1078 j * UNITS_PER_WORD);
1079 emit_move_insn (temp,
1080 simplify_gen_subreg_concatn (outermode: word_mode, op: src,
1081 innermode: orig_mode,
1082 byte: j * UNITS_PER_WORD));
1083 }
1084 }
1085 else
1086 {
1087 unsigned int i;
1088
1089 for (i = 0; i < words; ++i)
1090 {
1091 rtx t = simplify_gen_subreg_concatn (outermode: word_mode, op: dest,
1092 innermode: dest_mode,
1093 byte: i * UNITS_PER_WORD);
1094 /* simplify_gen_subreg_concatn can return (const_int 0) for
1095 some sub-objects of paradoxical subregs. As a source operand,
1096 that's fine. As a destination it must be avoided. Those are
1097 supposed to be don't care bits, so we can just drop that store
1098 on the floor. */
1099 if (t != CONST0_RTX (word_mode))
1100 emit_move_insn (t,
1101 simplify_gen_subreg_concatn (outermode: word_mode, op: src,
1102 innermode: orig_mode,
1103 byte: i * UNITS_PER_WORD));
1104 }
1105 }
1106
1107 if (real_dest != NULL_RTX)
1108 {
1109 rtx mdest, smove;
1110 rtx_insn *minsn;
1111
1112 if (dest_mode == orig_mode)
1113 mdest = dest;
1114 else
1115 mdest = simplify_gen_subreg (outermode: orig_mode, op: dest, GET_MODE (dest), byte: 0);
1116 minsn = emit_move_insn (real_dest, mdest);
1117
1118 if (AUTO_INC_DEC && MEM_P (real_dest)
1119 && !(resolve_reg_p (x: real_dest) || resolve_subreg_p (x: real_dest)))
1120 {
1121 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1122 if (note)
1123 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1124 }
1125
1126 smove = single_set (insn: minsn);
1127 gcc_assert (smove != NULL_RTX);
1128
1129 resolve_simple_move (set: smove, insn: minsn);
1130 }
1131
1132 insns = get_insns ();
1133 end_sequence ();
1134
1135 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1136
1137 emit_insn_before (insns, insn);
1138
1139 /* If we get here via self-recursion, then INSN is not yet in the insns
1140 chain and delete_insn will fail. We only want to remove INSN from the
1141 current sequence. See PR56738. */
1142 if (in_sequence_p ())
1143 remove_insn (insn);
1144 else
1145 delete_insn (insn);
1146
1147 return insns;
1148}
1149
1150/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1151 component registers. Return whether we changed something. */
1152
1153static bool
1154resolve_clobber (rtx pat, rtx_insn *insn)
1155{
1156 rtx reg;
1157 machine_mode orig_mode;
1158 unsigned int orig_size, words, i;
1159 int ret;
1160
1161 reg = XEXP (pat, 0);
1162 /* For clobbers we can look through paradoxical subregs which
1163 we do not handle in simplify_gen_subreg_concatn. */
1164 if (paradoxical_subreg_p (x: reg))
1165 reg = SUBREG_REG (reg);
1166 if (!resolve_reg_p (x: reg) && !resolve_subreg_p (x: reg))
1167 return false;
1168
1169 orig_mode = GET_MODE (reg);
1170 if (!interesting_mode_p (mode: orig_mode, bytes: &orig_size, words: &words))
1171 gcc_unreachable ();
1172
1173 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1174 simplify_gen_subreg_concatn (outermode: word_mode, op: reg,
1175 innermode: orig_mode, byte: 0),
1176 0);
1177 df_insn_rescan (insn);
1178 gcc_assert (ret != 0);
1179
1180 for (i = words - 1; i > 0; --i)
1181 {
1182 rtx x;
1183
1184 x = simplify_gen_subreg_concatn (outermode: word_mode, op: reg, innermode: orig_mode,
1185 byte: i * UNITS_PER_WORD);
1186 x = gen_rtx_CLOBBER (VOIDmode, x);
1187 emit_insn_after (x, insn);
1188 }
1189
1190 resolve_reg_notes (insn);
1191
1192 return true;
1193}
1194
1195/* A USE of a decomposed register is no longer meaningful. Return
1196 whether we changed something. */
1197
1198static bool
1199resolve_use (rtx pat, rtx_insn *insn)
1200{
1201 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1202 {
1203 delete_insn (insn);
1204 return true;
1205 }
1206
1207 resolve_reg_notes (insn);
1208
1209 return false;
1210}
1211
1212/* A VAR_LOCATION can be simplified. */
1213
1214static void
1215resolve_debug (rtx_insn *insn)
1216{
1217 subrtx_ptr_iterator::array_type array;
1218 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1219 {
1220 rtx *loc = *iter;
1221 rtx x = *loc;
1222 if (resolve_subreg_p (x))
1223 {
1224 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1225 SUBREG_BYTE (x));
1226
1227 if (x)
1228 *loc = x;
1229 else
1230 x = copy_rtx (*loc);
1231 }
1232 if (resolve_reg_p (x))
1233 *loc = copy_rtx (x);
1234 }
1235
1236 df_insn_rescan (insn);
1237
1238 resolve_reg_notes (insn);
1239}
1240
1241/* Check if INSN is a decomposable multiword-shift or zero-extend and
1242 set the decomposable_context bitmap accordingly. SPEED_P is true
1243 if we are optimizing INSN for speed rather than size. Return true
1244 if INSN is decomposable. */
1245
1246static bool
1247find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1248{
1249 rtx set;
1250 rtx op;
1251 rtx op_operand;
1252
1253 set = single_set (insn);
1254 if (!set)
1255 return false;
1256
1257 op = SET_SRC (set);
1258 if (GET_CODE (op) != ASHIFT
1259 && GET_CODE (op) != LSHIFTRT
1260 && GET_CODE (op) != ASHIFTRT
1261 && GET_CODE (op) != ZERO_EXTEND)
1262 return false;
1263
1264 op_operand = XEXP (op, 0);
1265 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1266 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1267 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1268 || GET_MODE (op) != twice_word_mode)
1269 return false;
1270
1271 if (GET_CODE (op) == ZERO_EXTEND)
1272 {
1273 if (GET_MODE (op_operand) != word_mode
1274 || !choices[speed_p].splitting_zext)
1275 return false;
1276 }
1277 else /* left or right shift */
1278 {
1279 bool *splitting = (GET_CODE (op) == ASHIFT
1280 ? choices[speed_p].splitting_ashift
1281 : GET_CODE (op) == ASHIFTRT
1282 ? choices[speed_p].splitting_ashiftrt
1283 : choices[speed_p].splitting_lshiftrt);
1284 if (!CONST_INT_P (XEXP (op, 1))
1285 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1286 2 * BITS_PER_WORD - 1)
1287 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1288 return false;
1289
1290 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1291 }
1292
1293 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1294
1295 return true;
1296}
1297
1298/* Decompose a more than word wide shift (in INSN) of a multiword
1299 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1300 and 'set to zero' insn. SPEED_P says whether we are optimizing
1301 for speed or size, when checking if a ZERO_EXTEND is preferable.
1302 Return a pointer to the new insn when a replacement was done. */
1303
1304static rtx_insn *
1305resolve_shift_zext (rtx_insn *insn, bool speed_p)
1306{
1307 rtx set;
1308 rtx op;
1309 rtx op_operand;
1310 rtx_insn *insns;
1311 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1312 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1313 scalar_int_mode inner_mode;
1314
1315 set = single_set (insn);
1316 if (!set)
1317 return NULL;
1318
1319 op = SET_SRC (set);
1320 if (GET_CODE (op) != ASHIFT
1321 && GET_CODE (op) != LSHIFTRT
1322 && GET_CODE (op) != ASHIFTRT
1323 && GET_CODE (op) != ZERO_EXTEND)
1324 return NULL;
1325
1326 op_operand = XEXP (op, 0);
1327 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), result: &inner_mode))
1328 return NULL;
1329
1330 /* We can tear this operation apart only if the regs were already
1331 torn apart. */
1332 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (x: op_operand))
1333 return NULL;
1334
1335 /* src_reg_num is the number of the word mode register which we
1336 are operating on. For a left shift and a zero_extend on little
1337 endian machines this is register 0. */
1338 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1339 ? 1 : 0;
1340
1341 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (mode: inner_mode) > UNITS_PER_WORD)
1342 src_reg_num = 1 - src_reg_num;
1343
1344 if (GET_CODE (op) == ZERO_EXTEND)
1345 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1346 else
1347 dest_reg_num = 1 - src_reg_num;
1348
1349 offset1 = UNITS_PER_WORD * dest_reg_num;
1350 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1351 src_offset = UNITS_PER_WORD * src_reg_num;
1352
1353 start_sequence ();
1354
1355 dest_reg = simplify_gen_subreg_concatn (outermode: word_mode, SET_DEST (set),
1356 GET_MODE (SET_DEST (set)),
1357 byte: offset1);
1358 dest_upper = simplify_gen_subreg_concatn (outermode: word_mode, SET_DEST (set),
1359 GET_MODE (SET_DEST (set)),
1360 byte: offset2);
1361 src_reg = simplify_gen_subreg_concatn (outermode: word_mode, op: op_operand,
1362 GET_MODE (op_operand),
1363 byte: src_offset);
1364 if (GET_CODE (op) == ASHIFTRT
1365 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1366 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1367 BITS_PER_WORD - 1, NULL_RTX, 0);
1368
1369 if (GET_CODE (op) != ZERO_EXTEND)
1370 {
1371 int shift_count = INTVAL (XEXP (op, 1));
1372 if (shift_count > BITS_PER_WORD)
1373 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1374 LSHIFT_EXPR : RSHIFT_EXPR,
1375 word_mode, src_reg,
1376 shift_count - BITS_PER_WORD,
1377 dest_reg, GET_CODE (op) != ASHIFTRT);
1378 }
1379
1380 /* Consider using ZERO_EXTEND instead of setting DEST_UPPER to zero
1381 if this is considered reasonable. */
1382 if (GET_CODE (op) == LSHIFTRT
1383 && GET_MODE (op) == twice_word_mode
1384 && REG_P (SET_DEST (set))
1385 && !choices[speed_p].splitting_zext)
1386 {
1387 rtx tmp = force_reg (word_mode, copy_rtx (src_reg));
1388 tmp = simplify_gen_unary (code: ZERO_EXTEND, twice_word_mode, op: tmp, op_mode: word_mode);
1389 emit_move_insn (SET_DEST (set), tmp);
1390 }
1391 else
1392 {
1393 if (dest_reg != src_reg)
1394 emit_move_insn (dest_reg, src_reg);
1395 if (GET_CODE (op) != ASHIFTRT)
1396 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1397 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1398 emit_move_insn (dest_upper, copy_rtx (src_reg));
1399 else
1400 emit_move_insn (dest_upper, upper_src);
1401 }
1402
1403 insns = get_insns ();
1404
1405 end_sequence ();
1406
1407 emit_insn_before (insns, insn);
1408
1409 if (dump_file)
1410 {
1411 rtx_insn *in;
1412 fprintf (stream: dump_file, format: "; Replacing insn: %d with insns: ", INSN_UID (insn));
1413 for (in = insns; in != insn; in = NEXT_INSN (insn: in))
1414 fprintf (stream: dump_file, format: "%d ", INSN_UID (insn: in));
1415 fprintf (stream: dump_file, format: "\n");
1416 }
1417
1418 delete_insn (insn);
1419 return insns;
1420}
1421
1422/* Print to dump_file a description of what we're doing with shift code CODE.
1423 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1424
1425static void
1426dump_shift_choices (enum rtx_code code, bool *splitting)
1427{
1428 int i;
1429 const char *sep;
1430
1431 fprintf (stream: dump_file,
1432 format: " Splitting mode %s for %s lowering with shift amounts = ",
1433 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1434 sep = "";
1435 for (i = 0; i < BITS_PER_WORD; i++)
1436 if (splitting[i])
1437 {
1438 fprintf (stream: dump_file, format: "%s%d", sep, i + BITS_PER_WORD);
1439 sep = ",";
1440 }
1441 fprintf (stream: dump_file, format: "\n");
1442}
1443
1444/* Print to dump_file a description of what we're doing when optimizing
1445 for speed or size; SPEED_P says which. DESCRIPTION is a description
1446 of the SPEED_P choice. */
1447
1448static void
1449dump_choices (bool speed_p, const char *description)
1450{
1451 unsigned int size, factor, i;
1452
1453 fprintf (stream: dump_file, format: "Choices when optimizing for %s:\n", description);
1454
1455 for (i = 0; i < MAX_MACHINE_MODE; i++)
1456 if (interesting_mode_p (mode: (machine_mode) i, bytes: &size, words: &factor)
1457 && factor > 1)
1458 fprintf (stream: dump_file, format: " %s mode %s for copy lowering.\n",
1459 choices[speed_p].move_modes_to_split[i]
1460 ? "Splitting"
1461 : "Skipping",
1462 GET_MODE_NAME ((machine_mode) i));
1463
1464 fprintf (stream: dump_file, format: " %s mode %s for zero_extend lowering.\n",
1465 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1466 GET_MODE_NAME (twice_word_mode));
1467
1468 dump_shift_choices (code: ASHIFT, choices[speed_p].splitting_ashift);
1469 dump_shift_choices (code: LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1470 dump_shift_choices (code: ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1471 fprintf (stream: dump_file, format: "\n");
1472}
1473
1474/* Look for registers which are always accessed via word-sized SUBREGs
1475 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1476 registers into several word-sized pseudo-registers. */
1477
1478static void
1479decompose_multiword_subregs (bool decompose_copies)
1480{
1481 unsigned int max;
1482 basic_block bb;
1483 bool speed_p;
1484
1485 if (dump_file)
1486 {
1487 dump_choices (speed_p: false, description: "size");
1488 dump_choices (speed_p: true, description: "speed");
1489 }
1490
1491 /* Check if this target even has any modes to consider lowering. */
1492 if (!choices[false].something_to_do && !choices[true].something_to_do)
1493 {
1494 if (dump_file)
1495 fprintf (stream: dump_file, format: "Nothing to do!\n");
1496 return;
1497 }
1498
1499 max = max_reg_num ();
1500
1501 /* First see if there are any multi-word pseudo-registers. If there
1502 aren't, there is nothing we can do. This should speed up this
1503 pass in the normal case, since it should be faster than scanning
1504 all the insns. */
1505 {
1506 unsigned int i;
1507 bool useful_modes_seen = false;
1508
1509 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1510 if (regno_reg_rtx[i] != NULL)
1511 {
1512 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1513 if (choices[false].move_modes_to_split[(int) mode]
1514 || choices[true].move_modes_to_split[(int) mode])
1515 {
1516 useful_modes_seen = true;
1517 break;
1518 }
1519 }
1520
1521 if (!useful_modes_seen)
1522 {
1523 if (dump_file)
1524 fprintf (stream: dump_file, format: "Nothing to lower in this function.\n");
1525 return;
1526 }
1527 }
1528
1529 if (df)
1530 {
1531 df_set_flags (DF_DEFER_INSN_RESCAN);
1532 run_word_dce ();
1533 }
1534
1535 /* FIXME: It may be possible to change this code to look for each
1536 multi-word pseudo-register and to find each insn which sets or
1537 uses that register. That should be faster than scanning all the
1538 insns. */
1539
1540 decomposable_context = BITMAP_ALLOC (NULL);
1541 non_decomposable_context = BITMAP_ALLOC (NULL);
1542 subreg_context = BITMAP_ALLOC (NULL);
1543
1544 reg_copy_graph.create (nelems: max);
1545 reg_copy_graph.safe_grow_cleared (len: max, exact: true);
1546 memset (s: reg_copy_graph.address (), c: 0, n: sizeof (bitmap) * max);
1547
1548 speed_p = optimize_function_for_speed_p (cfun);
1549 FOR_EACH_BB_FN (bb, cfun)
1550 {
1551 rtx_insn *insn;
1552
1553 FOR_BB_INSNS (bb, insn)
1554 {
1555 rtx set;
1556 enum classify_move_insn cmi;
1557 int i, n;
1558
1559 if (!INSN_P (insn)
1560 || GET_CODE (PATTERN (insn)) == CLOBBER
1561 || GET_CODE (PATTERN (insn)) == USE)
1562 continue;
1563
1564 recog_memoized (insn);
1565
1566 if (find_decomposable_shift_zext (insn, speed_p))
1567 continue;
1568
1569 extract_insn (insn);
1570
1571 set = simple_move (insn, speed_p);
1572
1573 if (!set)
1574 cmi = NOT_SIMPLE_MOVE;
1575 else
1576 {
1577 /* We mark pseudo-to-pseudo copies as decomposable during the
1578 second pass only. The first pass is so early that there is
1579 good chance such moves will be optimized away completely by
1580 subsequent optimizations anyway.
1581
1582 However, we call find_pseudo_copy even during the first pass
1583 so as to properly set up the reg_copy_graph. */
1584 if (find_pseudo_copy (set))
1585 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1586 else
1587 cmi = SIMPLE_MOVE;
1588 }
1589
1590 n = recog_data.n_operands;
1591 for (i = 0; i < n; ++i)
1592 {
1593 find_decomposable_subregs (loc: &recog_data.operand[i], pcmi: &cmi);
1594
1595 /* We handle ASM_OPERANDS as a special case to support
1596 things like x86 rdtsc which returns a DImode value.
1597 We can decompose the output, which will certainly be
1598 operand 0, but not the inputs. */
1599
1600 if (cmi == SIMPLE_MOVE
1601 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1602 {
1603 gcc_assert (i == 0);
1604 cmi = NOT_SIMPLE_MOVE;
1605 }
1606 }
1607 }
1608 }
1609
1610 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1611 if (!bitmap_empty_p (map: decomposable_context))
1612 {
1613 unsigned int i;
1614 sbitmap_iterator sbi;
1615 bitmap_iterator iter;
1616 unsigned int regno;
1617
1618 propagate_pseudo_copies ();
1619
1620 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1621 bitmap_clear (sub_blocks);
1622
1623 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1624 decompose_register (regno);
1625
1626 FOR_EACH_BB_FN (bb, cfun)
1627 {
1628 rtx_insn *insn;
1629
1630 FOR_BB_INSNS (bb, insn)
1631 {
1632 rtx pat;
1633
1634 if (!INSN_P (insn))
1635 continue;
1636
1637 pat = PATTERN (insn);
1638 if (GET_CODE (pat) == CLOBBER)
1639 resolve_clobber (pat, insn);
1640 else if (GET_CODE (pat) == USE)
1641 resolve_use (pat, insn);
1642 else if (DEBUG_INSN_P (insn))
1643 resolve_debug (insn);
1644 else
1645 {
1646 rtx set;
1647 int i;
1648
1649 recog_memoized (insn);
1650 extract_insn (insn);
1651
1652 set = simple_move (insn, speed_p);
1653 if (set)
1654 {
1655 rtx_insn *orig_insn = insn;
1656 bool cfi = control_flow_insn_p (insn);
1657
1658 /* We can end up splitting loads to multi-word pseudos
1659 into separate loads to machine word size pseudos.
1660 When this happens, we first had one load that can
1661 throw, and after resolve_simple_move we'll have a
1662 bunch of loads (at least two). All those loads may
1663 trap if we can have non-call exceptions, so they
1664 all will end the current basic block. We split the
1665 block after the outer loop over all insns, but we
1666 make sure here that we will be able to split the
1667 basic block and still produce the correct control
1668 flow graph for it. */
1669 gcc_assert (!cfi
1670 || (cfun->can_throw_non_call_exceptions
1671 && can_throw_internal (insn)));
1672
1673 insn = resolve_simple_move (set, insn);
1674 if (insn != orig_insn)
1675 {
1676 recog_memoized (insn);
1677 extract_insn (insn);
1678
1679 if (cfi)
1680 bitmap_set_bit (map: sub_blocks, bitno: bb->index);
1681 }
1682 }
1683 else
1684 {
1685 rtx_insn *decomposed_shift;
1686
1687 decomposed_shift = resolve_shift_zext (insn, speed_p);
1688 if (decomposed_shift != NULL_RTX)
1689 {
1690 insn = decomposed_shift;
1691 recog_memoized (insn);
1692 extract_insn (insn);
1693 }
1694 }
1695
1696 for (i = recog_data.n_operands - 1; i >= 0; --i)
1697 resolve_subreg_use (loc: recog_data.operand_loc[i], insn);
1698
1699 resolve_reg_notes (insn);
1700
1701 if (num_validated_changes () > 0)
1702 {
1703 for (i = recog_data.n_dups - 1; i >= 0; --i)
1704 {
1705 rtx *pl = recog_data.dup_loc[i];
1706 int dup_num = recog_data.dup_num[i];
1707 rtx *px = recog_data.operand_loc[dup_num];
1708
1709 validate_unshare_change (insn, pl, *px, 1);
1710 }
1711
1712 i = apply_change_group ();
1713 gcc_assert (i);
1714 }
1715 }
1716 }
1717 }
1718
1719 /* If we had insns to split that caused control flow insns in the middle
1720 of a basic block, split those blocks now. Note that we only handle
1721 the case where splitting a load has caused multiple possibly trapping
1722 loads to appear. */
1723 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1724 {
1725 rtx_insn *insn, *end;
1726 edge fallthru;
1727
1728 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1729 insn = BB_HEAD (bb);
1730 end = BB_END (bb);
1731
1732 while (insn != end)
1733 {
1734 if (control_flow_insn_p (insn))
1735 {
1736 /* Split the block after insn. There will be a fallthru
1737 edge, which is OK so we keep it. We have to create the
1738 exception edges ourselves. */
1739 fallthru = split_block (bb, insn);
1740 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1741 bb = fallthru->dest;
1742 insn = BB_HEAD (bb);
1743 }
1744 else
1745 insn = NEXT_INSN (insn);
1746 }
1747 }
1748 }
1749
1750 for (bitmap b : reg_copy_graph)
1751 if (b)
1752 BITMAP_FREE (b);
1753
1754 reg_copy_graph.release ();
1755
1756 BITMAP_FREE (decomposable_context);
1757 BITMAP_FREE (non_decomposable_context);
1758 BITMAP_FREE (subreg_context);
1759}
1760
1761/* Implement first lower subreg pass. */
1762
1763namespace {
1764
1765const pass_data pass_data_lower_subreg =
1766{
1767 .type: RTL_PASS, /* type */
1768 .name: "subreg1", /* name */
1769 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1770 .tv_id: TV_LOWER_SUBREG, /* tv_id */
1771 .properties_required: 0, /* properties_required */
1772 .properties_provided: 0, /* properties_provided */
1773 .properties_destroyed: 0, /* properties_destroyed */
1774 .todo_flags_start: 0, /* todo_flags_start */
1775 .todo_flags_finish: 0, /* todo_flags_finish */
1776};
1777
1778class pass_lower_subreg : public rtl_opt_pass
1779{
1780public:
1781 pass_lower_subreg (gcc::context *ctxt)
1782 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1783 {}
1784
1785 /* opt_pass methods: */
1786 bool gate (function *) final override { return flag_split_wide_types != 0; }
1787 unsigned int execute (function *) final override
1788 {
1789 decompose_multiword_subregs (decompose_copies: false);
1790 return 0;
1791 }
1792
1793}; // class pass_lower_subreg
1794
1795} // anon namespace
1796
1797rtl_opt_pass *
1798make_pass_lower_subreg (gcc::context *ctxt)
1799{
1800 return new pass_lower_subreg (ctxt);
1801}
1802
1803/* Implement second lower subreg pass. */
1804
1805namespace {
1806
1807const pass_data pass_data_lower_subreg2 =
1808{
1809 .type: RTL_PASS, /* type */
1810 .name: "subreg2", /* name */
1811 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1812 .tv_id: TV_LOWER_SUBREG, /* tv_id */
1813 .properties_required: 0, /* properties_required */
1814 .properties_provided: 0, /* properties_provided */
1815 .properties_destroyed: 0, /* properties_destroyed */
1816 .todo_flags_start: 0, /* todo_flags_start */
1817 TODO_df_finish, /* todo_flags_finish */
1818};
1819
1820class pass_lower_subreg2 : public rtl_opt_pass
1821{
1822public:
1823 pass_lower_subreg2 (gcc::context *ctxt)
1824 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1825 {}
1826
1827 /* opt_pass methods: */
1828 bool gate (function *) final override
1829 {
1830 return flag_split_wide_types && flag_split_wide_types_early;
1831 }
1832 unsigned int execute (function *) final override
1833 {
1834 decompose_multiword_subregs (decompose_copies: true);
1835 return 0;
1836 }
1837
1838}; // class pass_lower_subreg2
1839
1840} // anon namespace
1841
1842rtl_opt_pass *
1843make_pass_lower_subreg2 (gcc::context *ctxt)
1844{
1845 return new pass_lower_subreg2 (ctxt);
1846}
1847
1848/* Implement third lower subreg pass. */
1849
1850namespace {
1851
1852const pass_data pass_data_lower_subreg3 =
1853{
1854 .type: RTL_PASS, /* type */
1855 .name: "subreg3", /* name */
1856 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1857 .tv_id: TV_LOWER_SUBREG, /* tv_id */
1858 .properties_required: 0, /* properties_required */
1859 .properties_provided: 0, /* properties_provided */
1860 .properties_destroyed: 0, /* properties_destroyed */
1861 .todo_flags_start: 0, /* todo_flags_start */
1862 TODO_df_finish, /* todo_flags_finish */
1863};
1864
1865class pass_lower_subreg3 : public rtl_opt_pass
1866{
1867public:
1868 pass_lower_subreg3 (gcc::context *ctxt)
1869 : rtl_opt_pass (pass_data_lower_subreg3, ctxt)
1870 {}
1871
1872 /* opt_pass methods: */
1873 bool gate (function *) final override { return flag_split_wide_types; }
1874 unsigned int execute (function *) final override
1875 {
1876 decompose_multiword_subregs (decompose_copies: true);
1877 return 0;
1878 }
1879
1880}; // class pass_lower_subreg3
1881
1882} // anon namespace
1883
1884rtl_opt_pass *
1885make_pass_lower_subreg3 (gcc::context *ctxt)
1886{
1887 return new pass_lower_subreg3 (ctxt);
1888}
1889

source code of gcc/lower-subreg.cc