1 | /* Decompose multiword subregs. |
2 | Copyright (C) 2007-2023 Free Software Foundation, Inc. |
3 | Contributed by Richard Henderson <rth@redhat.com> |
4 | Ian Lance Taylor <iant@google.com> |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free |
10 | Software Foundation; either version 3, or (at your option) any later |
11 | version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
16 | for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "rtl.h" |
27 | #include "tree.h" |
28 | #include "cfghooks.h" |
29 | #include "df.h" |
30 | #include "memmodel.h" |
31 | #include "tm_p.h" |
32 | #include "expmed.h" |
33 | #include "insn-config.h" |
34 | #include "emit-rtl.h" |
35 | #include "recog.h" |
36 | #include "cfgrtl.h" |
37 | #include "cfgbuild.h" |
38 | #include "dce.h" |
39 | #include "expr.h" |
40 | #include "explow.h" |
41 | #include "tree-pass.h" |
42 | #include "lower-subreg.h" |
43 | #include "rtl-iter.h" |
44 | #include "target.h" |
45 | |
46 | |
47 | /* Decompose multi-word pseudo-registers into individual |
48 | pseudo-registers when possible and profitable. This is possible |
49 | when all the uses of a multi-word register are via SUBREG, or are |
50 | copies of the register to another location. Breaking apart the |
51 | register permits more CSE and permits better register allocation. |
52 | This is profitable if the machine does not have move instructions |
53 | to do this. |
54 | |
55 | This pass only splits moves with modes that are wider than |
56 | word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with |
57 | integer modes that are twice the width of word_mode. The latter |
58 | could be generalized if there was a need to do this, but the trend in |
59 | architectures is to not need this. |
60 | |
61 | There are two useful preprocessor defines for use by maintainers: |
62 | |
63 | #define LOG_COSTS 1 |
64 | |
65 | if you wish to see the actual cost estimates that are being used |
66 | for each mode wider than word mode and the cost estimates for zero |
67 | extension and the shifts. This can be useful when port maintainers |
68 | are tuning insn rtx costs. |
69 | |
70 | #define FORCE_LOWERING 1 |
71 | |
72 | if you wish to test the pass with all the transformation forced on. |
73 | This can be useful for finding bugs in the transformations. */ |
74 | |
75 | #define LOG_COSTS 0 |
76 | #define FORCE_LOWERING 0 |
77 | |
78 | /* Bit N in this bitmap is set if regno N is used in a context in |
79 | which we can decompose it. */ |
80 | static bitmap decomposable_context; |
81 | |
82 | /* Bit N in this bitmap is set if regno N is used in a context in |
83 | which it cannot be decomposed. */ |
84 | static bitmap non_decomposable_context; |
85 | |
86 | /* Bit N in this bitmap is set if regno N is used in a subreg |
87 | which changes the mode but not the size. This typically happens |
88 | when the register accessed as a floating-point value; we want to |
89 | avoid generating accesses to its subwords in integer modes. */ |
90 | static bitmap subreg_context; |
91 | |
92 | /* Bit N in the bitmap in element M of this array is set if there is a |
93 | copy from reg M to reg N. */ |
94 | static vec<bitmap> reg_copy_graph; |
95 | |
96 | struct target_lower_subreg default_target_lower_subreg; |
97 | #if SWITCHABLE_TARGET |
98 | struct target_lower_subreg *this_target_lower_subreg |
99 | = &default_target_lower_subreg; |
100 | #endif |
101 | |
102 | #define twice_word_mode \ |
103 | this_target_lower_subreg->x_twice_word_mode |
104 | #define choices \ |
105 | this_target_lower_subreg->x_choices |
106 | |
107 | /* Return true if MODE is a mode we know how to lower. When returning true, |
108 | store its byte size in *BYTES and its word size in *WORDS. */ |
109 | |
110 | static inline bool |
111 | interesting_mode_p (machine_mode mode, unsigned int *bytes, |
112 | unsigned int *words) |
113 | { |
114 | if (!GET_MODE_SIZE (mode).is_constant (const_value: bytes)) |
115 | return false; |
116 | *words = CEIL (*bytes, UNITS_PER_WORD); |
117 | return true; |
118 | } |
119 | |
120 | /* RTXes used while computing costs. */ |
121 | struct cost_rtxes { |
122 | /* Source and target registers. */ |
123 | rtx source; |
124 | rtx target; |
125 | |
126 | /* A twice_word_mode ZERO_EXTEND of SOURCE. */ |
127 | rtx zext; |
128 | |
129 | /* A shift of SOURCE. */ |
130 | rtx shift; |
131 | |
132 | /* A SET of TARGET. */ |
133 | rtx set; |
134 | }; |
135 | |
136 | /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the |
137 | rtxes in RTXES. SPEED_P selects between the speed and size cost. */ |
138 | |
139 | static int |
140 | shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, |
141 | machine_mode mode, int op1) |
142 | { |
143 | PUT_CODE (rtxes->shift, code); |
144 | PUT_MODE (x: rtxes->shift, mode); |
145 | PUT_MODE (x: rtxes->source, mode); |
146 | XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1); |
147 | return set_src_cost (x: rtxes->shift, mode, speed_p); |
148 | } |
149 | |
150 | /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] |
151 | to true if it is profitable to split a double-word CODE shift |
152 | of X + BITS_PER_WORD bits. SPEED_P says whether we are testing |
153 | for speed or size profitability. |
154 | |
155 | Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is |
156 | the cost of moving zero into a word-mode register. WORD_MOVE_COST |
157 | is the cost of moving between word registers. */ |
158 | |
159 | static void |
160 | compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, |
161 | bool *splitting, enum rtx_code code, |
162 | int word_move_zero_cost, int word_move_cost) |
163 | { |
164 | int wide_cost, narrow_cost, upper_cost, i; |
165 | |
166 | for (i = 0; i < BITS_PER_WORD; i++) |
167 | { |
168 | wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, |
169 | op1: i + BITS_PER_WORD); |
170 | if (i == 0) |
171 | narrow_cost = word_move_cost; |
172 | else |
173 | narrow_cost = shift_cost (speed_p, rtxes, code, mode: word_mode, op1: i); |
174 | |
175 | if (code != ASHIFTRT) |
176 | upper_cost = word_move_zero_cost; |
177 | else if (i == BITS_PER_WORD - 1) |
178 | upper_cost = word_move_cost; |
179 | else |
180 | upper_cost = shift_cost (speed_p, rtxes, code, mode: word_mode, |
181 | BITS_PER_WORD - 1); |
182 | |
183 | if (LOG_COSTS) |
184 | fprintf (stderr, format: "%s %s by %d: original cost %d, split cost %d + %d\n" , |
185 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), |
186 | i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); |
187 | |
188 | if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) |
189 | splitting[i] = true; |
190 | } |
191 | } |
192 | |
193 | /* Compute what we should do when optimizing for speed or size; SPEED_P |
194 | selects which. Use RTXES for computing costs. */ |
195 | |
196 | static void |
197 | compute_costs (bool speed_p, struct cost_rtxes *rtxes) |
198 | { |
199 | unsigned int i; |
200 | int word_move_zero_cost, word_move_cost; |
201 | |
202 | PUT_MODE (x: rtxes->target, mode: word_mode); |
203 | SET_SRC (rtxes->set) = CONST0_RTX (word_mode); |
204 | word_move_zero_cost = set_rtx_cost (x: rtxes->set, speed_p); |
205 | |
206 | SET_SRC (rtxes->set) = rtxes->source; |
207 | word_move_cost = set_rtx_cost (x: rtxes->set, speed_p); |
208 | |
209 | if (LOG_COSTS) |
210 | fprintf (stderr, format: "%s move: from zero cost %d, from reg cost %d\n" , |
211 | GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); |
212 | |
213 | for (i = 0; i < MAX_MACHINE_MODE; i++) |
214 | { |
215 | machine_mode mode = (machine_mode) i; |
216 | unsigned int size, factor; |
217 | if (interesting_mode_p (mode, bytes: &size, words: &factor) && factor > 1) |
218 | { |
219 | unsigned int mode_move_cost; |
220 | |
221 | PUT_MODE (x: rtxes->target, mode); |
222 | PUT_MODE (x: rtxes->source, mode); |
223 | mode_move_cost = set_rtx_cost (x: rtxes->set, speed_p); |
224 | |
225 | if (LOG_COSTS) |
226 | fprintf (stderr, format: "%s move: original cost %d, split cost %d * %d\n" , |
227 | GET_MODE_NAME (mode), mode_move_cost, |
228 | word_move_cost, factor); |
229 | |
230 | if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) |
231 | { |
232 | choices[speed_p].move_modes_to_split[i] = true; |
233 | choices[speed_p].something_to_do = true; |
234 | } |
235 | } |
236 | } |
237 | |
238 | /* For the moves and shifts, the only case that is checked is one |
239 | where the mode of the target is an integer mode twice the width |
240 | of the word_mode. |
241 | |
242 | If it is not profitable to split a double word move then do not |
243 | even consider the shifts or the zero extension. */ |
244 | if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) |
245 | { |
246 | int zext_cost; |
247 | |
248 | /* The only case here to check to see if moving the upper part with a |
249 | zero is cheaper than doing the zext itself. */ |
250 | PUT_MODE (x: rtxes->source, mode: word_mode); |
251 | zext_cost = set_src_cost (x: rtxes->zext, twice_word_mode, speed_p); |
252 | |
253 | if (LOG_COSTS) |
254 | fprintf (stderr, format: "%s %s: original cost %d, split cost %d + %d\n" , |
255 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), |
256 | zext_cost, word_move_cost, word_move_zero_cost); |
257 | |
258 | if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) |
259 | choices[speed_p].splitting_zext = true; |
260 | |
261 | compute_splitting_shift (speed_p, rtxes, |
262 | choices[speed_p].splitting_ashift, code: ASHIFT, |
263 | word_move_zero_cost, word_move_cost); |
264 | compute_splitting_shift (speed_p, rtxes, |
265 | choices[speed_p].splitting_lshiftrt, code: LSHIFTRT, |
266 | word_move_zero_cost, word_move_cost); |
267 | compute_splitting_shift (speed_p, rtxes, |
268 | choices[speed_p].splitting_ashiftrt, code: ASHIFTRT, |
269 | word_move_zero_cost, word_move_cost); |
270 | } |
271 | } |
272 | |
273 | /* Do one-per-target initialisation. This involves determining |
274 | which operations on the machine are profitable. If none are found, |
275 | then the pass just returns when called. */ |
276 | |
277 | void |
278 | init_lower_subreg (void) |
279 | { |
280 | struct cost_rtxes rtxes; |
281 | |
282 | memset (s: this_target_lower_subreg, c: 0, n: sizeof (*this_target_lower_subreg)); |
283 | |
284 | twice_word_mode = GET_MODE_2XWIDER_MODE (m: word_mode).require (); |
285 | |
286 | rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); |
287 | rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2); |
288 | rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source); |
289 | rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); |
290 | rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); |
291 | |
292 | if (LOG_COSTS) |
293 | fprintf (stderr, format: "\nSize costs\n==========\n\n" ); |
294 | compute_costs (speed_p: false, rtxes: &rtxes); |
295 | |
296 | if (LOG_COSTS) |
297 | fprintf (stderr, format: "\nSpeed costs\n===========\n\n" ); |
298 | compute_costs (speed_p: true, rtxes: &rtxes); |
299 | } |
300 | |
301 | static bool |
302 | simple_move_operand (rtx x) |
303 | { |
304 | if (GET_CODE (x) == SUBREG) |
305 | x = SUBREG_REG (x); |
306 | |
307 | if (!OBJECT_P (x)) |
308 | return false; |
309 | |
310 | if (GET_CODE (x) == LABEL_REF |
311 | || GET_CODE (x) == SYMBOL_REF |
312 | || GET_CODE (x) == HIGH |
313 | || GET_CODE (x) == CONST) |
314 | return false; |
315 | |
316 | if (MEM_P (x) |
317 | && (MEM_VOLATILE_P (x) |
318 | || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) |
319 | return false; |
320 | |
321 | return true; |
322 | } |
323 | |
324 | /* If X is an operator that can be treated as a simple move that we |
325 | can split, then return the operand that is operated on. */ |
326 | |
327 | static rtx |
328 | operand_for_swap_move_operator (rtx x) |
329 | { |
330 | /* A word sized rotate of a register pair is equivalent to swapping |
331 | the registers in the register pair. */ |
332 | if (GET_CODE (x) == ROTATE |
333 | && GET_MODE (x) == twice_word_mode |
334 | && simple_move_operand (XEXP (x, 0)) |
335 | && CONST_INT_P (XEXP (x, 1)) |
336 | && INTVAL (XEXP (x, 1)) == BITS_PER_WORD) |
337 | return XEXP (x, 0); |
338 | |
339 | return NULL_RTX; |
340 | } |
341 | |
342 | /* If INSN is a single set between two objects that we want to split, |
343 | return the single set. SPEED_P says whether we are optimizing |
344 | INSN for speed or size. |
345 | |
346 | INSN should have been passed to recog and extract_insn before this |
347 | is called. */ |
348 | |
349 | static rtx |
350 | simple_move (rtx_insn *insn, bool speed_p) |
351 | { |
352 | rtx x, op; |
353 | rtx set; |
354 | machine_mode mode; |
355 | |
356 | if (recog_data.n_operands != 2) |
357 | return NULL_RTX; |
358 | |
359 | set = single_set (insn); |
360 | if (!set) |
361 | return NULL_RTX; |
362 | |
363 | x = SET_DEST (set); |
364 | if (x != recog_data.operand[0] && x != recog_data.operand[1]) |
365 | return NULL_RTX; |
366 | if (!simple_move_operand (x)) |
367 | return NULL_RTX; |
368 | |
369 | x = SET_SRC (set); |
370 | if ((op = operand_for_swap_move_operator (x)) != NULL_RTX) |
371 | x = op; |
372 | |
373 | if (x != recog_data.operand[0] && x != recog_data.operand[1]) |
374 | return NULL_RTX; |
375 | /* For the src we can handle ASM_OPERANDS, and it is beneficial for |
376 | things like x86 rdtsc which returns a DImode value. */ |
377 | if (GET_CODE (x) != ASM_OPERANDS |
378 | && !simple_move_operand (x)) |
379 | return NULL_RTX; |
380 | |
381 | /* We try to decompose in integer modes, to avoid generating |
382 | inefficient code copying between integer and floating point |
383 | registers. That means that we can't decompose if this is a |
384 | non-integer mode for which there is no integer mode of the same |
385 | size. */ |
386 | mode = GET_MODE (SET_DEST (set)); |
387 | scalar_int_mode int_mode; |
388 | if (!SCALAR_INT_MODE_P (mode) |
389 | && (!int_mode_for_size (size: GET_MODE_BITSIZE (mode), limit: 0).exists (mode: &int_mode) |
390 | || !targetm.modes_tieable_p (mode, int_mode))) |
391 | return NULL_RTX; |
392 | |
393 | /* Reject PARTIAL_INT modes. They are used for processor specific |
394 | purposes and it's probably best not to tamper with them. */ |
395 | if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) |
396 | return NULL_RTX; |
397 | |
398 | if (!choices[speed_p].move_modes_to_split[(int) mode]) |
399 | return NULL_RTX; |
400 | |
401 | return set; |
402 | } |
403 | |
404 | /* If SET is a copy from one multi-word pseudo-register to another, |
405 | record that in reg_copy_graph. Return whether it is such a |
406 | copy. */ |
407 | |
408 | static bool |
409 | find_pseudo_copy (rtx set) |
410 | { |
411 | rtx dest = SET_DEST (set); |
412 | rtx src = SET_SRC (set); |
413 | rtx op; |
414 | unsigned int rd, rs; |
415 | bitmap b; |
416 | |
417 | if ((op = operand_for_swap_move_operator (x: src)) != NULL_RTX) |
418 | src = op; |
419 | |
420 | if (!REG_P (dest) || !REG_P (src)) |
421 | return false; |
422 | |
423 | rd = REGNO (dest); |
424 | rs = REGNO (src); |
425 | if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) |
426 | return false; |
427 | |
428 | b = reg_copy_graph[rs]; |
429 | if (b == NULL) |
430 | { |
431 | b = BITMAP_ALLOC (NULL); |
432 | reg_copy_graph[rs] = b; |
433 | } |
434 | |
435 | bitmap_set_bit (b, rd); |
436 | |
437 | return true; |
438 | } |
439 | |
440 | /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case |
441 | where they are copied to another register, add the register to |
442 | which they are copied to DECOMPOSABLE_CONTEXT. Use |
443 | NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track |
444 | copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ |
445 | |
446 | static void |
447 | propagate_pseudo_copies (void) |
448 | { |
449 | auto_bitmap queue, propagate; |
450 | |
451 | bitmap_copy (queue, decomposable_context); |
452 | do |
453 | { |
454 | bitmap_iterator iter; |
455 | unsigned int i; |
456 | |
457 | bitmap_clear (propagate); |
458 | |
459 | EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) |
460 | { |
461 | bitmap b = reg_copy_graph[i]; |
462 | if (b) |
463 | bitmap_ior_and_compl_into (A: propagate, B: b, C: non_decomposable_context); |
464 | } |
465 | |
466 | bitmap_and_compl (queue, propagate, decomposable_context); |
467 | bitmap_ior_into (decomposable_context, propagate); |
468 | } |
469 | while (!bitmap_empty_p (map: queue)); |
470 | } |
471 | |
472 | /* A pointer to one of these values is passed to |
473 | find_decomposable_subregs. */ |
474 | |
475 | enum classify_move_insn |
476 | { |
477 | /* Not a simple move from one location to another. */ |
478 | NOT_SIMPLE_MOVE, |
479 | /* A simple move we want to decompose. */ |
480 | DECOMPOSABLE_SIMPLE_MOVE, |
481 | /* Any other simple move. */ |
482 | SIMPLE_MOVE |
483 | }; |
484 | |
485 | /* If we find a SUBREG in *LOC which we could use to decompose a |
486 | pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an |
487 | unadorned register which is not a simple pseudo-register copy, |
488 | DATA will point at the type of move, and we set a bit in |
489 | DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */ |
490 | |
491 | static void |
492 | find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi) |
493 | { |
494 | subrtx_var_iterator::array_type array; |
495 | FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) |
496 | { |
497 | rtx x = *iter; |
498 | if (GET_CODE (x) == SUBREG) |
499 | { |
500 | rtx inner = SUBREG_REG (x); |
501 | unsigned int regno, outer_size, inner_size, outer_words, inner_words; |
502 | |
503 | if (!REG_P (inner)) |
504 | continue; |
505 | |
506 | regno = REGNO (inner); |
507 | if (HARD_REGISTER_NUM_P (regno)) |
508 | { |
509 | iter.skip_subrtxes (); |
510 | continue; |
511 | } |
512 | |
513 | if (!interesting_mode_p (GET_MODE (x), bytes: &outer_size, words: &outer_words) |
514 | || !interesting_mode_p (GET_MODE (inner), bytes: &inner_size, |
515 | words: &inner_words)) |
516 | continue; |
517 | |
518 | /* We only try to decompose single word subregs of multi-word |
519 | registers. When we find one, we return -1 to avoid iterating |
520 | over the inner register. |
521 | |
522 | ??? This doesn't allow, e.g., DImode subregs of TImode values |
523 | on 32-bit targets. We would need to record the way the |
524 | pseudo-register was used, and only decompose if all the uses |
525 | were the same number and size of pieces. Hopefully this |
526 | doesn't happen much. */ |
527 | |
528 | if (outer_words == 1 |
529 | && inner_words > 1 |
530 | /* Don't allow to decompose floating point subregs of |
531 | multi-word pseudos if the floating point mode does |
532 | not have word size, because otherwise we'd generate |
533 | a subreg with that floating mode from a different |
534 | sized integral pseudo which is not allowed by |
535 | validate_subreg. */ |
536 | && (!FLOAT_MODE_P (GET_MODE (x)) |
537 | || outer_size == UNITS_PER_WORD)) |
538 | { |
539 | bitmap_set_bit (decomposable_context, regno); |
540 | iter.skip_subrtxes (); |
541 | continue; |
542 | } |
543 | |
544 | /* If this is a cast from one mode to another, where the modes |
545 | have the same size, and they are not tieable, then mark this |
546 | register as non-decomposable. If we decompose it we are |
547 | likely to mess up whatever the backend is trying to do. */ |
548 | if (outer_words > 1 |
549 | && outer_size == inner_size |
550 | && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner))) |
551 | { |
552 | bitmap_set_bit (non_decomposable_context, regno); |
553 | bitmap_set_bit (subreg_context, regno); |
554 | iter.skip_subrtxes (); |
555 | continue; |
556 | } |
557 | } |
558 | else if (REG_P (x)) |
559 | { |
560 | unsigned int regno, size, words; |
561 | |
562 | /* We will see an outer SUBREG before we see the inner REG, so |
563 | when we see a plain REG here it means a direct reference to |
564 | the register. |
565 | |
566 | If this is not a simple copy from one location to another, |
567 | then we cannot decompose this register. If this is a simple |
568 | copy we want to decompose, and the mode is right, |
569 | then we mark the register as decomposable. |
570 | Otherwise we don't say anything about this register -- |
571 | it could be decomposed, but whether that would be |
572 | profitable depends upon how it is used elsewhere. |
573 | |
574 | We only set bits in the bitmap for multi-word |
575 | pseudo-registers, since those are the only ones we care about |
576 | and it keeps the size of the bitmaps down. */ |
577 | |
578 | regno = REGNO (x); |
579 | if (!HARD_REGISTER_NUM_P (regno) |
580 | && interesting_mode_p (GET_MODE (x), bytes: &size, words: &words) |
581 | && words > 1) |
582 | { |
583 | switch (*pcmi) |
584 | { |
585 | case NOT_SIMPLE_MOVE: |
586 | bitmap_set_bit (non_decomposable_context, regno); |
587 | break; |
588 | case DECOMPOSABLE_SIMPLE_MOVE: |
589 | if (targetm.modes_tieable_p (GET_MODE (x), word_mode)) |
590 | bitmap_set_bit (decomposable_context, regno); |
591 | break; |
592 | case SIMPLE_MOVE: |
593 | break; |
594 | default: |
595 | gcc_unreachable (); |
596 | } |
597 | } |
598 | } |
599 | else if (MEM_P (x)) |
600 | { |
601 | enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; |
602 | |
603 | /* Any registers used in a MEM do not participate in a |
604 | SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion |
605 | here, and return -1 to block the parent's recursion. */ |
606 | find_decomposable_subregs (loc: &XEXP (x, 0), pcmi: &cmi_mem); |
607 | iter.skip_subrtxes (); |
608 | } |
609 | } |
610 | } |
611 | |
612 | /* Decompose REGNO into word-sized components. We smash the REG node |
613 | in place. This ensures that (1) something goes wrong quickly if we |
614 | fail to make some replacement, and (2) the debug information inside |
615 | the symbol table is automatically kept up to date. */ |
616 | |
617 | static void |
618 | decompose_register (unsigned int regno) |
619 | { |
620 | rtx reg; |
621 | unsigned int size, words, i; |
622 | rtvec v; |
623 | |
624 | reg = regno_reg_rtx[regno]; |
625 | |
626 | regno_reg_rtx[regno] = NULL_RTX; |
627 | |
628 | if (!interesting_mode_p (GET_MODE (reg), bytes: &size, words: &words)) |
629 | gcc_unreachable (); |
630 | |
631 | v = rtvec_alloc (words); |
632 | for (i = 0; i < words; ++i) |
633 | RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); |
634 | |
635 | PUT_CODE (reg, CONCATN); |
636 | XVEC (reg, 0) = v; |
637 | |
638 | if (dump_file) |
639 | { |
640 | fprintf (stream: dump_file, format: "; Splitting reg %u ->" , regno); |
641 | for (i = 0; i < words; ++i) |
642 | fprintf (stream: dump_file, format: " %u" , REGNO (XVECEXP (reg, 0, i))); |
643 | fputc (c: '\n', stream: dump_file); |
644 | } |
645 | } |
646 | |
647 | /* Get a SUBREG of a CONCATN. */ |
648 | |
649 | static rtx |
650 | simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte) |
651 | { |
652 | unsigned int outer_size, outer_words, inner_size, inner_words; |
653 | machine_mode innermode, partmode; |
654 | rtx part; |
655 | unsigned int final_offset; |
656 | unsigned int byte; |
657 | |
658 | innermode = GET_MODE (op); |
659 | if (!interesting_mode_p (mode: outermode, bytes: &outer_size, words: &outer_words) |
660 | || !interesting_mode_p (mode: innermode, bytes: &inner_size, words: &inner_words)) |
661 | gcc_unreachable (); |
662 | |
663 | /* Must be constant if interesting_mode_p passes. */ |
664 | byte = orig_byte.to_constant (); |
665 | gcc_assert (GET_CODE (op) == CONCATN); |
666 | gcc_assert (byte % outer_size == 0); |
667 | |
668 | gcc_assert (byte < inner_size); |
669 | if (outer_size > inner_size) |
670 | return NULL_RTX; |
671 | |
672 | inner_size /= XVECLEN (op, 0); |
673 | part = XVECEXP (op, 0, byte / inner_size); |
674 | partmode = GET_MODE (part); |
675 | |
676 | final_offset = byte % inner_size; |
677 | if (final_offset + outer_size > inner_size) |
678 | return NULL_RTX; |
679 | |
680 | /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of |
681 | regular CONST_VECTORs. They have vector or integer modes, depending |
682 | on the capabilities of the target. Cope with them. */ |
683 | if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) |
684 | partmode = GET_MODE_INNER (innermode); |
685 | else if (partmode == VOIDmode) |
686 | partmode = mode_for_size (inner_size * BITS_PER_UNIT, |
687 | GET_MODE_CLASS (innermode), 0).require (); |
688 | |
689 | return simplify_gen_subreg (outermode, op: part, innermode: partmode, byte: final_offset); |
690 | } |
691 | |
692 | /* Wrapper around simplify_gen_subreg which handles CONCATN. */ |
693 | |
694 | static rtx |
695 | simplify_gen_subreg_concatn (machine_mode outermode, rtx op, |
696 | machine_mode innermode, unsigned int byte) |
697 | { |
698 | rtx ret; |
699 | |
700 | /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. |
701 | If OP is a SUBREG of a CONCATN, then it must be a simple mode |
702 | change with the same size and offset 0, or it must extract a |
703 | part. We shouldn't see anything else here. */ |
704 | if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) |
705 | { |
706 | rtx op2; |
707 | |
708 | if (known_eq (GET_MODE_SIZE (GET_MODE (op)), |
709 | GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) |
710 | && known_eq (SUBREG_BYTE (op), 0)) |
711 | return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), |
712 | GET_MODE (SUBREG_REG (op)), byte); |
713 | |
714 | op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), |
715 | SUBREG_BYTE (op)); |
716 | if (op2 == NULL_RTX) |
717 | { |
718 | /* We don't handle paradoxical subregs here. */ |
719 | gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op))); |
720 | gcc_assert (!paradoxical_subreg_p (op)); |
721 | op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), |
722 | orig_byte: byte + SUBREG_BYTE (op)); |
723 | gcc_assert (op2 != NULL_RTX); |
724 | return op2; |
725 | } |
726 | |
727 | op = op2; |
728 | gcc_assert (op != NULL_RTX); |
729 | gcc_assert (innermode == GET_MODE (op)); |
730 | } |
731 | |
732 | if (GET_CODE (op) == CONCATN) |
733 | return simplify_subreg_concatn (outermode, op, orig_byte: byte); |
734 | |
735 | ret = simplify_gen_subreg (outermode, op, innermode, byte); |
736 | |
737 | /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then |
738 | resolve_simple_move will ask for the high part of the paradoxical |
739 | subreg, which does not have a value. Just return a zero. */ |
740 | if (ret == NULL_RTX |
741 | && paradoxical_subreg_p (x: op)) |
742 | return CONST0_RTX (outermode); |
743 | |
744 | gcc_assert (ret != NULL_RTX); |
745 | return ret; |
746 | } |
747 | |
748 | /* Return whether we should resolve X into the registers into which it |
749 | was decomposed. */ |
750 | |
751 | static bool |
752 | resolve_reg_p (rtx x) |
753 | { |
754 | return GET_CODE (x) == CONCATN; |
755 | } |
756 | |
757 | /* Return whether X is a SUBREG of a register which we need to |
758 | resolve. */ |
759 | |
760 | static bool |
761 | resolve_subreg_p (rtx x) |
762 | { |
763 | if (GET_CODE (x) != SUBREG) |
764 | return false; |
765 | return resolve_reg_p (SUBREG_REG (x)); |
766 | } |
767 | |
768 | /* Look for SUBREGs in *LOC which need to be decomposed. */ |
769 | |
770 | static bool |
771 | resolve_subreg_use (rtx *loc, rtx insn) |
772 | { |
773 | subrtx_ptr_iterator::array_type array; |
774 | FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) |
775 | { |
776 | rtx *loc = *iter; |
777 | rtx x = *loc; |
778 | if (resolve_subreg_p (x)) |
779 | { |
780 | x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), |
781 | SUBREG_BYTE (x)); |
782 | |
783 | /* It is possible for a note to contain a reference which we can |
784 | decompose. In this case, return 1 to the caller to indicate |
785 | that the note must be removed. */ |
786 | if (!x) |
787 | { |
788 | gcc_assert (!insn); |
789 | return true; |
790 | } |
791 | |
792 | validate_change (insn, loc, x, 1); |
793 | iter.skip_subrtxes (); |
794 | } |
795 | else if (resolve_reg_p (x)) |
796 | /* Return 1 to the caller to indicate that we found a direct |
797 | reference to a register which is being decomposed. This can |
798 | happen inside notes, multiword shift or zero-extend |
799 | instructions. */ |
800 | return true; |
801 | } |
802 | |
803 | return false; |
804 | } |
805 | |
806 | /* Resolve any decomposed registers which appear in register notes on |
807 | INSN. */ |
808 | |
809 | static void |
810 | resolve_reg_notes (rtx_insn *insn) |
811 | { |
812 | rtx *pnote, note; |
813 | |
814 | note = find_reg_equal_equiv_note (insn); |
815 | if (note) |
816 | { |
817 | int old_count = num_validated_changes (); |
818 | if (resolve_subreg_use (loc: &XEXP (note, 0), NULL_RTX)) |
819 | remove_note (insn, note); |
820 | else |
821 | if (old_count != num_validated_changes ()) |
822 | df_notes_rescan (insn); |
823 | } |
824 | |
825 | pnote = ®_NOTES (insn); |
826 | while (*pnote != NULL_RTX) |
827 | { |
828 | bool del = false; |
829 | |
830 | note = *pnote; |
831 | switch (REG_NOTE_KIND (note)) |
832 | { |
833 | case REG_DEAD: |
834 | case REG_UNUSED: |
835 | if (resolve_reg_p (XEXP (note, 0))) |
836 | del = true; |
837 | break; |
838 | |
839 | default: |
840 | break; |
841 | } |
842 | |
843 | if (del) |
844 | *pnote = XEXP (note, 1); |
845 | else |
846 | pnote = &XEXP (note, 1); |
847 | } |
848 | } |
849 | |
850 | /* Return whether X can be decomposed into subwords. */ |
851 | |
852 | static bool |
853 | can_decompose_p (rtx x) |
854 | { |
855 | if (REG_P (x)) |
856 | { |
857 | unsigned int regno = REGNO (x); |
858 | |
859 | if (HARD_REGISTER_NUM_P (regno)) |
860 | { |
861 | unsigned int byte, num_bytes, num_words; |
862 | |
863 | if (!interesting_mode_p (GET_MODE (x), bytes: &num_bytes, words: &num_words)) |
864 | return false; |
865 | for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) |
866 | if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) |
867 | return false; |
868 | return true; |
869 | } |
870 | else |
871 | return !bitmap_bit_p (subreg_context, regno); |
872 | } |
873 | |
874 | return true; |
875 | } |
876 | |
877 | /* OPND is a concatn operand this is used with a simple move operator. |
878 | Return a new rtx with the concatn's operands swapped. */ |
879 | |
880 | static rtx |
881 | resolve_operand_for_swap_move_operator (rtx opnd) |
882 | { |
883 | gcc_assert (GET_CODE (opnd) == CONCATN); |
884 | rtx concatn = copy_rtx (opnd); |
885 | rtx op0 = XVECEXP (concatn, 0, 0); |
886 | rtx op1 = XVECEXP (concatn, 0, 1); |
887 | XVECEXP (concatn, 0, 0) = op1; |
888 | XVECEXP (concatn, 0, 1) = op0; |
889 | return concatn; |
890 | } |
891 | |
892 | /* Decompose the registers used in a simple move SET within INSN. If |
893 | we don't change anything, return INSN, otherwise return the start |
894 | of the sequence of moves. */ |
895 | |
896 | static rtx_insn * |
897 | resolve_simple_move (rtx set, rtx_insn *insn) |
898 | { |
899 | rtx src, dest, real_dest, src_op; |
900 | rtx_insn *insns; |
901 | machine_mode orig_mode, dest_mode; |
902 | unsigned int orig_size, words; |
903 | bool pushing; |
904 | |
905 | src = SET_SRC (set); |
906 | dest = SET_DEST (set); |
907 | orig_mode = GET_MODE (dest); |
908 | |
909 | if (!interesting_mode_p (mode: orig_mode, bytes: &orig_size, words: &words)) |
910 | gcc_unreachable (); |
911 | gcc_assert (words > 1); |
912 | |
913 | start_sequence (); |
914 | |
915 | /* We have to handle copying from a SUBREG of a decomposed reg where |
916 | the SUBREG is larger than word size. Rather than assume that we |
917 | can take a word_mode SUBREG of the destination, we copy to a new |
918 | register and then copy that to the destination. */ |
919 | |
920 | real_dest = NULL_RTX; |
921 | |
922 | if ((src_op = operand_for_swap_move_operator (x: src)) != NULL_RTX) |
923 | { |
924 | if (resolve_reg_p (x: dest)) |
925 | { |
926 | /* DEST is a CONCATN, so swap its operands and strip |
927 | SRC's operator. */ |
928 | dest = resolve_operand_for_swap_move_operator (opnd: dest); |
929 | src = src_op; |
930 | } |
931 | else if (resolve_reg_p (x: src_op)) |
932 | { |
933 | /* SRC is an operation on a CONCATN, so strip the operator and |
934 | swap the CONCATN's operands. */ |
935 | src = resolve_operand_for_swap_move_operator (opnd: src_op); |
936 | } |
937 | } |
938 | |
939 | if (GET_CODE (src) == SUBREG |
940 | && resolve_reg_p (SUBREG_REG (src)) |
941 | && (maybe_ne (SUBREG_BYTE (src), b: 0) |
942 | || maybe_ne (a: orig_size, b: GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) |
943 | { |
944 | real_dest = dest; |
945 | dest = gen_reg_rtx (orig_mode); |
946 | if (REG_P (real_dest)) |
947 | REG_ATTRS (dest) = REG_ATTRS (real_dest); |
948 | } |
949 | |
950 | /* Similarly if we are copying to a SUBREG of a decomposed reg where |
951 | the SUBREG is larger than word size. */ |
952 | |
953 | if (GET_CODE (dest) == SUBREG |
954 | && resolve_reg_p (SUBREG_REG (dest)) |
955 | && (maybe_ne (SUBREG_BYTE (dest), b: 0) |
956 | || maybe_ne (a: orig_size, |
957 | b: GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) |
958 | { |
959 | rtx reg, smove; |
960 | rtx_insn *minsn; |
961 | |
962 | reg = gen_reg_rtx (orig_mode); |
963 | minsn = emit_move_insn (reg, src); |
964 | smove = single_set (insn: minsn); |
965 | gcc_assert (smove != NULL_RTX); |
966 | resolve_simple_move (set: smove, insn: minsn); |
967 | src = reg; |
968 | } |
969 | |
970 | /* If we didn't have any big SUBREGS of decomposed registers, and |
971 | neither side of the move is a register we are decomposing, then |
972 | we don't have to do anything here. */ |
973 | |
974 | if (src == SET_SRC (set) |
975 | && dest == SET_DEST (set) |
976 | && !resolve_reg_p (x: src) |
977 | && !resolve_subreg_p (x: src) |
978 | && !resolve_reg_p (x: dest) |
979 | && !resolve_subreg_p (x: dest)) |
980 | { |
981 | end_sequence (); |
982 | return insn; |
983 | } |
984 | |
985 | /* It's possible for the code to use a subreg of a decomposed |
986 | register while forming an address. We need to handle that before |
987 | passing the address to emit_move_insn. We pass NULL_RTX as the |
988 | insn parameter to resolve_subreg_use because we cannot validate |
989 | the insn yet. */ |
990 | if (MEM_P (src) || MEM_P (dest)) |
991 | { |
992 | int acg; |
993 | |
994 | if (MEM_P (src)) |
995 | resolve_subreg_use (loc: &XEXP (src, 0), NULL_RTX); |
996 | if (MEM_P (dest)) |
997 | resolve_subreg_use (loc: &XEXP (dest, 0), NULL_RTX); |
998 | acg = apply_change_group (); |
999 | gcc_assert (acg); |
1000 | } |
1001 | |
1002 | /* If SRC is a register which we can't decompose, or has side |
1003 | effects, we need to move via a temporary register. */ |
1004 | |
1005 | if (!can_decompose_p (x: src) |
1006 | || side_effects_p (src) |
1007 | || GET_CODE (src) == ASM_OPERANDS) |
1008 | { |
1009 | rtx reg; |
1010 | |
1011 | reg = gen_reg_rtx (orig_mode); |
1012 | |
1013 | if (AUTO_INC_DEC) |
1014 | { |
1015 | rtx_insn *move = emit_move_insn (reg, src); |
1016 | if (MEM_P (src)) |
1017 | { |
1018 | rtx note = find_reg_note (insn, REG_INC, NULL_RTX); |
1019 | if (note) |
1020 | add_reg_note (move, REG_INC, XEXP (note, 0)); |
1021 | } |
1022 | } |
1023 | else |
1024 | emit_move_insn (reg, src); |
1025 | |
1026 | src = reg; |
1027 | } |
1028 | |
1029 | /* If DEST is a register which we can't decompose, or has side |
1030 | effects, we need to first move to a temporary register. We |
1031 | handle the common case of pushing an operand directly. We also |
1032 | go through a temporary register if it holds a floating point |
1033 | value. This gives us better code on systems which can't move |
1034 | data easily between integer and floating point registers. */ |
1035 | |
1036 | dest_mode = orig_mode; |
1037 | pushing = push_operand (dest, dest_mode); |
1038 | if (!can_decompose_p (x: dest) |
1039 | || (side_effects_p (dest) && !pushing) |
1040 | || (!SCALAR_INT_MODE_P (dest_mode) |
1041 | && !resolve_reg_p (x: dest) |
1042 | && !resolve_subreg_p (x: dest))) |
1043 | { |
1044 | if (real_dest == NULL_RTX) |
1045 | real_dest = dest; |
1046 | if (!SCALAR_INT_MODE_P (dest_mode)) |
1047 | dest_mode = int_mode_for_mode (dest_mode).require (); |
1048 | dest = gen_reg_rtx (dest_mode); |
1049 | if (REG_P (real_dest)) |
1050 | REG_ATTRS (dest) = REG_ATTRS (real_dest); |
1051 | } |
1052 | |
1053 | if (pushing) |
1054 | { |
1055 | unsigned int i, j, jinc; |
1056 | |
1057 | gcc_assert (orig_size % UNITS_PER_WORD == 0); |
1058 | gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); |
1059 | gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); |
1060 | |
1061 | if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) |
1062 | { |
1063 | j = 0; |
1064 | jinc = 1; |
1065 | } |
1066 | else |
1067 | { |
1068 | j = words - 1; |
1069 | jinc = -1; |
1070 | } |
1071 | |
1072 | for (i = 0; i < words; ++i, j += jinc) |
1073 | { |
1074 | rtx temp; |
1075 | |
1076 | temp = copy_rtx (XEXP (dest, 0)); |
1077 | temp = adjust_automodify_address_nv (dest, word_mode, temp, |
1078 | j * UNITS_PER_WORD); |
1079 | emit_move_insn (temp, |
1080 | simplify_gen_subreg_concatn (outermode: word_mode, op: src, |
1081 | innermode: orig_mode, |
1082 | byte: j * UNITS_PER_WORD)); |
1083 | } |
1084 | } |
1085 | else |
1086 | { |
1087 | unsigned int i; |
1088 | |
1089 | for (i = 0; i < words; ++i) |
1090 | { |
1091 | rtx t = simplify_gen_subreg_concatn (outermode: word_mode, op: dest, |
1092 | innermode: dest_mode, |
1093 | byte: i * UNITS_PER_WORD); |
1094 | /* simplify_gen_subreg_concatn can return (const_int 0) for |
1095 | some sub-objects of paradoxical subregs. As a source operand, |
1096 | that's fine. As a destination it must be avoided. Those are |
1097 | supposed to be don't care bits, so we can just drop that store |
1098 | on the floor. */ |
1099 | if (t != CONST0_RTX (word_mode)) |
1100 | emit_move_insn (t, |
1101 | simplify_gen_subreg_concatn (outermode: word_mode, op: src, |
1102 | innermode: orig_mode, |
1103 | byte: i * UNITS_PER_WORD)); |
1104 | } |
1105 | } |
1106 | |
1107 | if (real_dest != NULL_RTX) |
1108 | { |
1109 | rtx mdest, smove; |
1110 | rtx_insn *minsn; |
1111 | |
1112 | if (dest_mode == orig_mode) |
1113 | mdest = dest; |
1114 | else |
1115 | mdest = simplify_gen_subreg (outermode: orig_mode, op: dest, GET_MODE (dest), byte: 0); |
1116 | minsn = emit_move_insn (real_dest, mdest); |
1117 | |
1118 | if (AUTO_INC_DEC && MEM_P (real_dest) |
1119 | && !(resolve_reg_p (x: real_dest) || resolve_subreg_p (x: real_dest))) |
1120 | { |
1121 | rtx note = find_reg_note (insn, REG_INC, NULL_RTX); |
1122 | if (note) |
1123 | add_reg_note (minsn, REG_INC, XEXP (note, 0)); |
1124 | } |
1125 | |
1126 | smove = single_set (insn: minsn); |
1127 | gcc_assert (smove != NULL_RTX); |
1128 | |
1129 | resolve_simple_move (set: smove, insn: minsn); |
1130 | } |
1131 | |
1132 | insns = get_insns (); |
1133 | end_sequence (); |
1134 | |
1135 | copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); |
1136 | |
1137 | emit_insn_before (insns, insn); |
1138 | |
1139 | /* If we get here via self-recursion, then INSN is not yet in the insns |
1140 | chain and delete_insn will fail. We only want to remove INSN from the |
1141 | current sequence. See PR56738. */ |
1142 | if (in_sequence_p ()) |
1143 | remove_insn (insn); |
1144 | else |
1145 | delete_insn (insn); |
1146 | |
1147 | return insns; |
1148 | } |
1149 | |
1150 | /* Change a CLOBBER of a decomposed register into a CLOBBER of the |
1151 | component registers. Return whether we changed something. */ |
1152 | |
1153 | static bool |
1154 | resolve_clobber (rtx pat, rtx_insn *insn) |
1155 | { |
1156 | rtx reg; |
1157 | machine_mode orig_mode; |
1158 | unsigned int orig_size, words, i; |
1159 | int ret; |
1160 | |
1161 | reg = XEXP (pat, 0); |
1162 | /* For clobbers we can look through paradoxical subregs which |
1163 | we do not handle in simplify_gen_subreg_concatn. */ |
1164 | if (paradoxical_subreg_p (x: reg)) |
1165 | reg = SUBREG_REG (reg); |
1166 | if (!resolve_reg_p (x: reg) && !resolve_subreg_p (x: reg)) |
1167 | return false; |
1168 | |
1169 | orig_mode = GET_MODE (reg); |
1170 | if (!interesting_mode_p (mode: orig_mode, bytes: &orig_size, words: &words)) |
1171 | gcc_unreachable (); |
1172 | |
1173 | ret = validate_change (NULL_RTX, &XEXP (pat, 0), |
1174 | simplify_gen_subreg_concatn (outermode: word_mode, op: reg, |
1175 | innermode: orig_mode, byte: 0), |
1176 | 0); |
1177 | df_insn_rescan (insn); |
1178 | gcc_assert (ret != 0); |
1179 | |
1180 | for (i = words - 1; i > 0; --i) |
1181 | { |
1182 | rtx x; |
1183 | |
1184 | x = simplify_gen_subreg_concatn (outermode: word_mode, op: reg, innermode: orig_mode, |
1185 | byte: i * UNITS_PER_WORD); |
1186 | x = gen_rtx_CLOBBER (VOIDmode, x); |
1187 | emit_insn_after (x, insn); |
1188 | } |
1189 | |
1190 | resolve_reg_notes (insn); |
1191 | |
1192 | return true; |
1193 | } |
1194 | |
1195 | /* A USE of a decomposed register is no longer meaningful. Return |
1196 | whether we changed something. */ |
1197 | |
1198 | static bool |
1199 | resolve_use (rtx pat, rtx_insn *insn) |
1200 | { |
1201 | if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) |
1202 | { |
1203 | delete_insn (insn); |
1204 | return true; |
1205 | } |
1206 | |
1207 | resolve_reg_notes (insn); |
1208 | |
1209 | return false; |
1210 | } |
1211 | |
1212 | /* A VAR_LOCATION can be simplified. */ |
1213 | |
1214 | static void |
1215 | resolve_debug (rtx_insn *insn) |
1216 | { |
1217 | subrtx_ptr_iterator::array_type array; |
1218 | FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST) |
1219 | { |
1220 | rtx *loc = *iter; |
1221 | rtx x = *loc; |
1222 | if (resolve_subreg_p (x)) |
1223 | { |
1224 | x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), |
1225 | SUBREG_BYTE (x)); |
1226 | |
1227 | if (x) |
1228 | *loc = x; |
1229 | else |
1230 | x = copy_rtx (*loc); |
1231 | } |
1232 | if (resolve_reg_p (x)) |
1233 | *loc = copy_rtx (x); |
1234 | } |
1235 | |
1236 | df_insn_rescan (insn); |
1237 | |
1238 | resolve_reg_notes (insn); |
1239 | } |
1240 | |
1241 | /* Check if INSN is a decomposable multiword-shift or zero-extend and |
1242 | set the decomposable_context bitmap accordingly. SPEED_P is true |
1243 | if we are optimizing INSN for speed rather than size. Return true |
1244 | if INSN is decomposable. */ |
1245 | |
1246 | static bool |
1247 | find_decomposable_shift_zext (rtx_insn *insn, bool speed_p) |
1248 | { |
1249 | rtx set; |
1250 | rtx op; |
1251 | rtx op_operand; |
1252 | |
1253 | set = single_set (insn); |
1254 | if (!set) |
1255 | return false; |
1256 | |
1257 | op = SET_SRC (set); |
1258 | if (GET_CODE (op) != ASHIFT |
1259 | && GET_CODE (op) != LSHIFTRT |
1260 | && GET_CODE (op) != ASHIFTRT |
1261 | && GET_CODE (op) != ZERO_EXTEND) |
1262 | return false; |
1263 | |
1264 | op_operand = XEXP (op, 0); |
1265 | if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) |
1266 | || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) |
1267 | || HARD_REGISTER_NUM_P (REGNO (op_operand)) |
1268 | || GET_MODE (op) != twice_word_mode) |
1269 | return false; |
1270 | |
1271 | if (GET_CODE (op) == ZERO_EXTEND) |
1272 | { |
1273 | if (GET_MODE (op_operand) != word_mode |
1274 | || !choices[speed_p].splitting_zext) |
1275 | return false; |
1276 | } |
1277 | else /* left or right shift */ |
1278 | { |
1279 | bool *splitting = (GET_CODE (op) == ASHIFT |
1280 | ? choices[speed_p].splitting_ashift |
1281 | : GET_CODE (op) == ASHIFTRT |
1282 | ? choices[speed_p].splitting_ashiftrt |
1283 | : choices[speed_p].splitting_lshiftrt); |
1284 | if (!CONST_INT_P (XEXP (op, 1)) |
1285 | || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, |
1286 | 2 * BITS_PER_WORD - 1) |
1287 | || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) |
1288 | return false; |
1289 | |
1290 | bitmap_set_bit (decomposable_context, REGNO (op_operand)); |
1291 | } |
1292 | |
1293 | bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); |
1294 | |
1295 | return true; |
1296 | } |
1297 | |
1298 | /* Decompose a more than word wide shift (in INSN) of a multiword |
1299 | pseudo or a multiword zero-extend of a wordmode pseudo into a move |
1300 | and 'set to zero' insn. SPEED_P says whether we are optimizing |
1301 | for speed or size, when checking if a ZERO_EXTEND is preferable. |
1302 | Return a pointer to the new insn when a replacement was done. */ |
1303 | |
1304 | static rtx_insn * |
1305 | resolve_shift_zext (rtx_insn *insn, bool speed_p) |
1306 | { |
1307 | rtx set; |
1308 | rtx op; |
1309 | rtx op_operand; |
1310 | rtx_insn *insns; |
1311 | rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; |
1312 | int src_reg_num, dest_reg_num, offset1, offset2, src_offset; |
1313 | scalar_int_mode inner_mode; |
1314 | |
1315 | set = single_set (insn); |
1316 | if (!set) |
1317 | return NULL; |
1318 | |
1319 | op = SET_SRC (set); |
1320 | if (GET_CODE (op) != ASHIFT |
1321 | && GET_CODE (op) != LSHIFTRT |
1322 | && GET_CODE (op) != ASHIFTRT |
1323 | && GET_CODE (op) != ZERO_EXTEND) |
1324 | return NULL; |
1325 | |
1326 | op_operand = XEXP (op, 0); |
1327 | if (!is_a <scalar_int_mode> (GET_MODE (op_operand), result: &inner_mode)) |
1328 | return NULL; |
1329 | |
1330 | /* We can tear this operation apart only if the regs were already |
1331 | torn apart. */ |
1332 | if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (x: op_operand)) |
1333 | return NULL; |
1334 | |
1335 | /* src_reg_num is the number of the word mode register which we |
1336 | are operating on. For a left shift and a zero_extend on little |
1337 | endian machines this is register 0. */ |
1338 | src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) |
1339 | ? 1 : 0; |
1340 | |
1341 | if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (mode: inner_mode) > UNITS_PER_WORD) |
1342 | src_reg_num = 1 - src_reg_num; |
1343 | |
1344 | if (GET_CODE (op) == ZERO_EXTEND) |
1345 | dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; |
1346 | else |
1347 | dest_reg_num = 1 - src_reg_num; |
1348 | |
1349 | offset1 = UNITS_PER_WORD * dest_reg_num; |
1350 | offset2 = UNITS_PER_WORD * (1 - dest_reg_num); |
1351 | src_offset = UNITS_PER_WORD * src_reg_num; |
1352 | |
1353 | start_sequence (); |
1354 | |
1355 | dest_reg = simplify_gen_subreg_concatn (outermode: word_mode, SET_DEST (set), |
1356 | GET_MODE (SET_DEST (set)), |
1357 | byte: offset1); |
1358 | dest_upper = simplify_gen_subreg_concatn (outermode: word_mode, SET_DEST (set), |
1359 | GET_MODE (SET_DEST (set)), |
1360 | byte: offset2); |
1361 | src_reg = simplify_gen_subreg_concatn (outermode: word_mode, op: op_operand, |
1362 | GET_MODE (op_operand), |
1363 | byte: src_offset); |
1364 | if (GET_CODE (op) == ASHIFTRT |
1365 | && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) |
1366 | upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), |
1367 | BITS_PER_WORD - 1, NULL_RTX, 0); |
1368 | |
1369 | if (GET_CODE (op) != ZERO_EXTEND) |
1370 | { |
1371 | int shift_count = INTVAL (XEXP (op, 1)); |
1372 | if (shift_count > BITS_PER_WORD) |
1373 | src_reg = expand_shift (GET_CODE (op) == ASHIFT ? |
1374 | LSHIFT_EXPR : RSHIFT_EXPR, |
1375 | word_mode, src_reg, |
1376 | shift_count - BITS_PER_WORD, |
1377 | dest_reg, GET_CODE (op) != ASHIFTRT); |
1378 | } |
1379 | |
1380 | /* Consider using ZERO_EXTEND instead of setting DEST_UPPER to zero |
1381 | if this is considered reasonable. */ |
1382 | if (GET_CODE (op) == LSHIFTRT |
1383 | && GET_MODE (op) == twice_word_mode |
1384 | && REG_P (SET_DEST (set)) |
1385 | && !choices[speed_p].splitting_zext) |
1386 | { |
1387 | rtx tmp = force_reg (word_mode, copy_rtx (src_reg)); |
1388 | tmp = simplify_gen_unary (code: ZERO_EXTEND, twice_word_mode, op: tmp, op_mode: word_mode); |
1389 | emit_move_insn (SET_DEST (set), tmp); |
1390 | } |
1391 | else |
1392 | { |
1393 | if (dest_reg != src_reg) |
1394 | emit_move_insn (dest_reg, src_reg); |
1395 | if (GET_CODE (op) != ASHIFTRT) |
1396 | emit_move_insn (dest_upper, CONST0_RTX (word_mode)); |
1397 | else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) |
1398 | emit_move_insn (dest_upper, copy_rtx (src_reg)); |
1399 | else |
1400 | emit_move_insn (dest_upper, upper_src); |
1401 | } |
1402 | |
1403 | insns = get_insns (); |
1404 | |
1405 | end_sequence (); |
1406 | |
1407 | emit_insn_before (insns, insn); |
1408 | |
1409 | if (dump_file) |
1410 | { |
1411 | rtx_insn *in; |
1412 | fprintf (stream: dump_file, format: "; Replacing insn: %d with insns: " , INSN_UID (insn)); |
1413 | for (in = insns; in != insn; in = NEXT_INSN (insn: in)) |
1414 | fprintf (stream: dump_file, format: "%d " , INSN_UID (insn: in)); |
1415 | fprintf (stream: dump_file, format: "\n" ); |
1416 | } |
1417 | |
1418 | delete_insn (insn); |
1419 | return insns; |
1420 | } |
1421 | |
1422 | /* Print to dump_file a description of what we're doing with shift code CODE. |
1423 | SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ |
1424 | |
1425 | static void |
1426 | dump_shift_choices (enum rtx_code code, bool *splitting) |
1427 | { |
1428 | int i; |
1429 | const char *sep; |
1430 | |
1431 | fprintf (stream: dump_file, |
1432 | format: " Splitting mode %s for %s lowering with shift amounts = " , |
1433 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); |
1434 | sep = "" ; |
1435 | for (i = 0; i < BITS_PER_WORD; i++) |
1436 | if (splitting[i]) |
1437 | { |
1438 | fprintf (stream: dump_file, format: "%s%d" , sep, i + BITS_PER_WORD); |
1439 | sep = "," ; |
1440 | } |
1441 | fprintf (stream: dump_file, format: "\n" ); |
1442 | } |
1443 | |
1444 | /* Print to dump_file a description of what we're doing when optimizing |
1445 | for speed or size; SPEED_P says which. DESCRIPTION is a description |
1446 | of the SPEED_P choice. */ |
1447 | |
1448 | static void |
1449 | dump_choices (bool speed_p, const char *description) |
1450 | { |
1451 | unsigned int size, factor, i; |
1452 | |
1453 | fprintf (stream: dump_file, format: "Choices when optimizing for %s:\n" , description); |
1454 | |
1455 | for (i = 0; i < MAX_MACHINE_MODE; i++) |
1456 | if (interesting_mode_p (mode: (machine_mode) i, bytes: &size, words: &factor) |
1457 | && factor > 1) |
1458 | fprintf (stream: dump_file, format: " %s mode %s for copy lowering.\n" , |
1459 | choices[speed_p].move_modes_to_split[i] |
1460 | ? "Splitting" |
1461 | : "Skipping" , |
1462 | GET_MODE_NAME ((machine_mode) i)); |
1463 | |
1464 | fprintf (stream: dump_file, format: " %s mode %s for zero_extend lowering.\n" , |
1465 | choices[speed_p].splitting_zext ? "Splitting" : "Skipping" , |
1466 | GET_MODE_NAME (twice_word_mode)); |
1467 | |
1468 | dump_shift_choices (code: ASHIFT, choices[speed_p].splitting_ashift); |
1469 | dump_shift_choices (code: LSHIFTRT, choices[speed_p].splitting_lshiftrt); |
1470 | dump_shift_choices (code: ASHIFTRT, choices[speed_p].splitting_ashiftrt); |
1471 | fprintf (stream: dump_file, format: "\n" ); |
1472 | } |
1473 | |
1474 | /* Look for registers which are always accessed via word-sized SUBREGs |
1475 | or -if DECOMPOSE_COPIES is true- via copies. Decompose these |
1476 | registers into several word-sized pseudo-registers. */ |
1477 | |
1478 | static void |
1479 | decompose_multiword_subregs (bool decompose_copies) |
1480 | { |
1481 | unsigned int max; |
1482 | basic_block bb; |
1483 | bool speed_p; |
1484 | |
1485 | if (dump_file) |
1486 | { |
1487 | dump_choices (speed_p: false, description: "size" ); |
1488 | dump_choices (speed_p: true, description: "speed" ); |
1489 | } |
1490 | |
1491 | /* Check if this target even has any modes to consider lowering. */ |
1492 | if (!choices[false].something_to_do && !choices[true].something_to_do) |
1493 | { |
1494 | if (dump_file) |
1495 | fprintf (stream: dump_file, format: "Nothing to do!\n" ); |
1496 | return; |
1497 | } |
1498 | |
1499 | max = max_reg_num (); |
1500 | |
1501 | /* First see if there are any multi-word pseudo-registers. If there |
1502 | aren't, there is nothing we can do. This should speed up this |
1503 | pass in the normal case, since it should be faster than scanning |
1504 | all the insns. */ |
1505 | { |
1506 | unsigned int i; |
1507 | bool useful_modes_seen = false; |
1508 | |
1509 | for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) |
1510 | if (regno_reg_rtx[i] != NULL) |
1511 | { |
1512 | machine_mode mode = GET_MODE (regno_reg_rtx[i]); |
1513 | if (choices[false].move_modes_to_split[(int) mode] |
1514 | || choices[true].move_modes_to_split[(int) mode]) |
1515 | { |
1516 | useful_modes_seen = true; |
1517 | break; |
1518 | } |
1519 | } |
1520 | |
1521 | if (!useful_modes_seen) |
1522 | { |
1523 | if (dump_file) |
1524 | fprintf (stream: dump_file, format: "Nothing to lower in this function.\n" ); |
1525 | return; |
1526 | } |
1527 | } |
1528 | |
1529 | if (df) |
1530 | { |
1531 | df_set_flags (DF_DEFER_INSN_RESCAN); |
1532 | run_word_dce (); |
1533 | } |
1534 | |
1535 | /* FIXME: It may be possible to change this code to look for each |
1536 | multi-word pseudo-register and to find each insn which sets or |
1537 | uses that register. That should be faster than scanning all the |
1538 | insns. */ |
1539 | |
1540 | decomposable_context = BITMAP_ALLOC (NULL); |
1541 | non_decomposable_context = BITMAP_ALLOC (NULL); |
1542 | subreg_context = BITMAP_ALLOC (NULL); |
1543 | |
1544 | reg_copy_graph.create (nelems: max); |
1545 | reg_copy_graph.safe_grow_cleared (len: max, exact: true); |
1546 | memset (s: reg_copy_graph.address (), c: 0, n: sizeof (bitmap) * max); |
1547 | |
1548 | speed_p = optimize_function_for_speed_p (cfun); |
1549 | FOR_EACH_BB_FN (bb, cfun) |
1550 | { |
1551 | rtx_insn *insn; |
1552 | |
1553 | FOR_BB_INSNS (bb, insn) |
1554 | { |
1555 | rtx set; |
1556 | enum classify_move_insn cmi; |
1557 | int i, n; |
1558 | |
1559 | if (!INSN_P (insn) |
1560 | || GET_CODE (PATTERN (insn)) == CLOBBER |
1561 | || GET_CODE (PATTERN (insn)) == USE) |
1562 | continue; |
1563 | |
1564 | recog_memoized (insn); |
1565 | |
1566 | if (find_decomposable_shift_zext (insn, speed_p)) |
1567 | continue; |
1568 | |
1569 | extract_insn (insn); |
1570 | |
1571 | set = simple_move (insn, speed_p); |
1572 | |
1573 | if (!set) |
1574 | cmi = NOT_SIMPLE_MOVE; |
1575 | else |
1576 | { |
1577 | /* We mark pseudo-to-pseudo copies as decomposable during the |
1578 | second pass only. The first pass is so early that there is |
1579 | good chance such moves will be optimized away completely by |
1580 | subsequent optimizations anyway. |
1581 | |
1582 | However, we call find_pseudo_copy even during the first pass |
1583 | so as to properly set up the reg_copy_graph. */ |
1584 | if (find_pseudo_copy (set)) |
1585 | cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; |
1586 | else |
1587 | cmi = SIMPLE_MOVE; |
1588 | } |
1589 | |
1590 | n = recog_data.n_operands; |
1591 | for (i = 0; i < n; ++i) |
1592 | { |
1593 | find_decomposable_subregs (loc: &recog_data.operand[i], pcmi: &cmi); |
1594 | |
1595 | /* We handle ASM_OPERANDS as a special case to support |
1596 | things like x86 rdtsc which returns a DImode value. |
1597 | We can decompose the output, which will certainly be |
1598 | operand 0, but not the inputs. */ |
1599 | |
1600 | if (cmi == SIMPLE_MOVE |
1601 | && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) |
1602 | { |
1603 | gcc_assert (i == 0); |
1604 | cmi = NOT_SIMPLE_MOVE; |
1605 | } |
1606 | } |
1607 | } |
1608 | } |
1609 | |
1610 | bitmap_and_compl_into (decomposable_context, non_decomposable_context); |
1611 | if (!bitmap_empty_p (map: decomposable_context)) |
1612 | { |
1613 | unsigned int i; |
1614 | sbitmap_iterator sbi; |
1615 | bitmap_iterator iter; |
1616 | unsigned int regno; |
1617 | |
1618 | propagate_pseudo_copies (); |
1619 | |
1620 | auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun)); |
1621 | bitmap_clear (sub_blocks); |
1622 | |
1623 | EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) |
1624 | decompose_register (regno); |
1625 | |
1626 | FOR_EACH_BB_FN (bb, cfun) |
1627 | { |
1628 | rtx_insn *insn; |
1629 | |
1630 | FOR_BB_INSNS (bb, insn) |
1631 | { |
1632 | rtx pat; |
1633 | |
1634 | if (!INSN_P (insn)) |
1635 | continue; |
1636 | |
1637 | pat = PATTERN (insn); |
1638 | if (GET_CODE (pat) == CLOBBER) |
1639 | resolve_clobber (pat, insn); |
1640 | else if (GET_CODE (pat) == USE) |
1641 | resolve_use (pat, insn); |
1642 | else if (DEBUG_INSN_P (insn)) |
1643 | resolve_debug (insn); |
1644 | else |
1645 | { |
1646 | rtx set; |
1647 | int i; |
1648 | |
1649 | recog_memoized (insn); |
1650 | extract_insn (insn); |
1651 | |
1652 | set = simple_move (insn, speed_p); |
1653 | if (set) |
1654 | { |
1655 | rtx_insn *orig_insn = insn; |
1656 | bool cfi = control_flow_insn_p (insn); |
1657 | |
1658 | /* We can end up splitting loads to multi-word pseudos |
1659 | into separate loads to machine word size pseudos. |
1660 | When this happens, we first had one load that can |
1661 | throw, and after resolve_simple_move we'll have a |
1662 | bunch of loads (at least two). All those loads may |
1663 | trap if we can have non-call exceptions, so they |
1664 | all will end the current basic block. We split the |
1665 | block after the outer loop over all insns, but we |
1666 | make sure here that we will be able to split the |
1667 | basic block and still produce the correct control |
1668 | flow graph for it. */ |
1669 | gcc_assert (!cfi |
1670 | || (cfun->can_throw_non_call_exceptions |
1671 | && can_throw_internal (insn))); |
1672 | |
1673 | insn = resolve_simple_move (set, insn); |
1674 | if (insn != orig_insn) |
1675 | { |
1676 | recog_memoized (insn); |
1677 | extract_insn (insn); |
1678 | |
1679 | if (cfi) |
1680 | bitmap_set_bit (map: sub_blocks, bitno: bb->index); |
1681 | } |
1682 | } |
1683 | else |
1684 | { |
1685 | rtx_insn *decomposed_shift; |
1686 | |
1687 | decomposed_shift = resolve_shift_zext (insn, speed_p); |
1688 | if (decomposed_shift != NULL_RTX) |
1689 | { |
1690 | insn = decomposed_shift; |
1691 | recog_memoized (insn); |
1692 | extract_insn (insn); |
1693 | } |
1694 | } |
1695 | |
1696 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
1697 | resolve_subreg_use (loc: recog_data.operand_loc[i], insn); |
1698 | |
1699 | resolve_reg_notes (insn); |
1700 | |
1701 | if (num_validated_changes () > 0) |
1702 | { |
1703 | for (i = recog_data.n_dups - 1; i >= 0; --i) |
1704 | { |
1705 | rtx *pl = recog_data.dup_loc[i]; |
1706 | int dup_num = recog_data.dup_num[i]; |
1707 | rtx *px = recog_data.operand_loc[dup_num]; |
1708 | |
1709 | validate_unshare_change (insn, pl, *px, 1); |
1710 | } |
1711 | |
1712 | i = apply_change_group (); |
1713 | gcc_assert (i); |
1714 | } |
1715 | } |
1716 | } |
1717 | } |
1718 | |
1719 | /* If we had insns to split that caused control flow insns in the middle |
1720 | of a basic block, split those blocks now. Note that we only handle |
1721 | the case where splitting a load has caused multiple possibly trapping |
1722 | loads to appear. */ |
1723 | EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) |
1724 | { |
1725 | rtx_insn *insn, *end; |
1726 | edge fallthru; |
1727 | |
1728 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
1729 | insn = BB_HEAD (bb); |
1730 | end = BB_END (bb); |
1731 | |
1732 | while (insn != end) |
1733 | { |
1734 | if (control_flow_insn_p (insn)) |
1735 | { |
1736 | /* Split the block after insn. There will be a fallthru |
1737 | edge, which is OK so we keep it. We have to create the |
1738 | exception edges ourselves. */ |
1739 | fallthru = split_block (bb, insn); |
1740 | rtl_make_eh_edge (NULL, bb, BB_END (bb)); |
1741 | bb = fallthru->dest; |
1742 | insn = BB_HEAD (bb); |
1743 | } |
1744 | else |
1745 | insn = NEXT_INSN (insn); |
1746 | } |
1747 | } |
1748 | } |
1749 | |
1750 | for (bitmap b : reg_copy_graph) |
1751 | if (b) |
1752 | BITMAP_FREE (b); |
1753 | |
1754 | reg_copy_graph.release (); |
1755 | |
1756 | BITMAP_FREE (decomposable_context); |
1757 | BITMAP_FREE (non_decomposable_context); |
1758 | BITMAP_FREE (subreg_context); |
1759 | } |
1760 | |
1761 | /* Implement first lower subreg pass. */ |
1762 | |
1763 | namespace { |
1764 | |
1765 | const pass_data pass_data_lower_subreg = |
1766 | { |
1767 | .type: RTL_PASS, /* type */ |
1768 | .name: "subreg1" , /* name */ |
1769 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1770 | .tv_id: TV_LOWER_SUBREG, /* tv_id */ |
1771 | .properties_required: 0, /* properties_required */ |
1772 | .properties_provided: 0, /* properties_provided */ |
1773 | .properties_destroyed: 0, /* properties_destroyed */ |
1774 | .todo_flags_start: 0, /* todo_flags_start */ |
1775 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1776 | }; |
1777 | |
1778 | class pass_lower_subreg : public rtl_opt_pass |
1779 | { |
1780 | public: |
1781 | pass_lower_subreg (gcc::context *ctxt) |
1782 | : rtl_opt_pass (pass_data_lower_subreg, ctxt) |
1783 | {} |
1784 | |
1785 | /* opt_pass methods: */ |
1786 | bool gate (function *) final override { return flag_split_wide_types != 0; } |
1787 | unsigned int execute (function *) final override |
1788 | { |
1789 | decompose_multiword_subregs (decompose_copies: false); |
1790 | return 0; |
1791 | } |
1792 | |
1793 | }; // class pass_lower_subreg |
1794 | |
1795 | } // anon namespace |
1796 | |
1797 | rtl_opt_pass * |
1798 | make_pass_lower_subreg (gcc::context *ctxt) |
1799 | { |
1800 | return new pass_lower_subreg (ctxt); |
1801 | } |
1802 | |
1803 | /* Implement second lower subreg pass. */ |
1804 | |
1805 | namespace { |
1806 | |
1807 | const pass_data pass_data_lower_subreg2 = |
1808 | { |
1809 | .type: RTL_PASS, /* type */ |
1810 | .name: "subreg2" , /* name */ |
1811 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1812 | .tv_id: TV_LOWER_SUBREG, /* tv_id */ |
1813 | .properties_required: 0, /* properties_required */ |
1814 | .properties_provided: 0, /* properties_provided */ |
1815 | .properties_destroyed: 0, /* properties_destroyed */ |
1816 | .todo_flags_start: 0, /* todo_flags_start */ |
1817 | TODO_df_finish, /* todo_flags_finish */ |
1818 | }; |
1819 | |
1820 | class pass_lower_subreg2 : public rtl_opt_pass |
1821 | { |
1822 | public: |
1823 | pass_lower_subreg2 (gcc::context *ctxt) |
1824 | : rtl_opt_pass (pass_data_lower_subreg2, ctxt) |
1825 | {} |
1826 | |
1827 | /* opt_pass methods: */ |
1828 | bool gate (function *) final override |
1829 | { |
1830 | return flag_split_wide_types && flag_split_wide_types_early; |
1831 | } |
1832 | unsigned int execute (function *) final override |
1833 | { |
1834 | decompose_multiword_subregs (decompose_copies: true); |
1835 | return 0; |
1836 | } |
1837 | |
1838 | }; // class pass_lower_subreg2 |
1839 | |
1840 | } // anon namespace |
1841 | |
1842 | rtl_opt_pass * |
1843 | make_pass_lower_subreg2 (gcc::context *ctxt) |
1844 | { |
1845 | return new pass_lower_subreg2 (ctxt); |
1846 | } |
1847 | |
1848 | /* Implement third lower subreg pass. */ |
1849 | |
1850 | namespace { |
1851 | |
1852 | const pass_data pass_data_lower_subreg3 = |
1853 | { |
1854 | .type: RTL_PASS, /* type */ |
1855 | .name: "subreg3" , /* name */ |
1856 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1857 | .tv_id: TV_LOWER_SUBREG, /* tv_id */ |
1858 | .properties_required: 0, /* properties_required */ |
1859 | .properties_provided: 0, /* properties_provided */ |
1860 | .properties_destroyed: 0, /* properties_destroyed */ |
1861 | .todo_flags_start: 0, /* todo_flags_start */ |
1862 | TODO_df_finish, /* todo_flags_finish */ |
1863 | }; |
1864 | |
1865 | class pass_lower_subreg3 : public rtl_opt_pass |
1866 | { |
1867 | public: |
1868 | pass_lower_subreg3 (gcc::context *ctxt) |
1869 | : rtl_opt_pass (pass_data_lower_subreg3, ctxt) |
1870 | {} |
1871 | |
1872 | /* opt_pass methods: */ |
1873 | bool gate (function *) final override { return flag_split_wide_types; } |
1874 | unsigned int execute (function *) final override |
1875 | { |
1876 | decompose_multiword_subregs (decompose_copies: true); |
1877 | return 0; |
1878 | } |
1879 | |
1880 | }; // class pass_lower_subreg3 |
1881 | |
1882 | } // anon namespace |
1883 | |
1884 | rtl_opt_pass * |
1885 | make_pass_lower_subreg3 (gcc::context *ctxt) |
1886 | { |
1887 | return new pass_lower_subreg3 (ctxt); |
1888 | } |
1889 | |