1 | /* Redundant Extension Elimination pass for the GNU compiler. |
2 | Copyright (C) 2010-2023 Free Software Foundation, Inc. |
3 | Contributed by Ilya Enkovich (ilya.enkovich@intel.com) |
4 | |
5 | Based on the Redundant Zero-extension elimination pass contributed by |
6 | Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com). |
7 | |
8 | This file is part of GCC. |
9 | |
10 | GCC is free software; you can redistribute it and/or modify it under |
11 | the terms of the GNU General Public License as published by the Free |
12 | Software Foundation; either version 3, or (at your option) any later |
13 | version. |
14 | |
15 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
17 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
18 | for more details. |
19 | |
20 | You should have received a copy of the GNU General Public License |
21 | along with GCC; see the file COPYING3. If not see |
22 | <http://www.gnu.org/licenses/>. */ |
23 | |
24 | |
25 | /* Problem Description : |
26 | -------------------- |
27 | This pass is intended to remove redundant extension instructions. |
28 | Such instructions appear for different reasons. We expect some of |
29 | them due to implicit zero-extension in 64-bit registers after writing |
30 | to their lower 32-bit half (e.g. for the x86-64 architecture). |
31 | Another possible reason is a type cast which follows a load (for |
32 | instance a register restore) and which can be combined into a single |
33 | instruction, and for which earlier local passes, e.g. the combiner, |
34 | weren't able to optimize. |
35 | |
36 | How does this pass work ? |
37 | -------------------------- |
38 | |
39 | This pass is run after register allocation. Hence, all registers that |
40 | this pass deals with are hard registers. This pass first looks for an |
41 | extension instruction that could possibly be redundant. Such extension |
42 | instructions show up in RTL with the pattern : |
43 | (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))), |
44 | where x can be any hard register. |
45 | Now, this pass tries to eliminate this instruction by merging the |
46 | extension with the definitions of register x. For instance, if |
47 | one of the definitions of register x was : |
48 | (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))), |
49 | followed by extension : |
50 | (set (reg:DI x) (zero_extend:DI (reg:SI x))) |
51 | then the combination converts this into : |
52 | (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))). |
53 | If all the merged definitions are recognizable assembly instructions, |
54 | the extension is effectively eliminated. |
55 | |
56 | For example, for the x86-64 architecture, implicit zero-extensions |
57 | are captured with appropriate patterns in the i386.md file. Hence, |
58 | these merged definition can be matched to a single assembly instruction. |
59 | The original extension instruction is then deleted if all the |
60 | definitions can be merged. |
61 | |
62 | However, there are cases where the definition instruction cannot be |
63 | merged with an extension. Examples are CALL instructions. In such |
64 | cases, the original extension is not redundant and this pass does |
65 | not delete it. |
66 | |
67 | Handling conditional moves : |
68 | ---------------------------- |
69 | |
70 | Architectures like x86-64 support conditional moves whose semantics for |
71 | extension differ from the other instructions. For instance, the |
72 | instruction *cmov ebx, eax* |
73 | zero-extends eax onto rax only when the move from ebx to eax happens. |
74 | Otherwise, eax may not be zero-extended. Consider conditional moves as |
75 | RTL instructions of the form |
76 | (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))). |
77 | This pass tries to merge an extension with a conditional move by |
78 | actually merging the definitions of y and z with an extension and then |
79 | converting the conditional move into : |
80 | (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))). |
81 | Since registers y and z are extended, register x will also be extended |
82 | after the conditional move. Note that this step has to be done |
83 | transitively since the definition of a conditional copy can be |
84 | another conditional copy. |
85 | |
86 | Motivating Example I : |
87 | --------------------- |
88 | For this program : |
89 | ********************************************** |
90 | bad_code.c |
91 | |
92 | int mask[1000]; |
93 | |
94 | int foo(unsigned x) |
95 | { |
96 | if (x < 10) |
97 | x = x * 45; |
98 | else |
99 | x = x * 78; |
100 | return mask[x]; |
101 | } |
102 | ********************************************** |
103 | |
104 | $ gcc -O2 bad_code.c |
105 | ........ |
106 | 400315: b8 4e 00 00 00 mov $0x4e,%eax |
107 | 40031a: 0f af f8 imul %eax,%edi |
108 | 40031d: 89 ff mov %edi,%edi - useless extension |
109 | 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax |
110 | 400326: c3 retq |
111 | ...... |
112 | 400330: ba 2d 00 00 00 mov $0x2d,%edx |
113 | 400335: 0f af fa imul %edx,%edi |
114 | 400338: 89 ff mov %edi,%edi - useless extension |
115 | 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax |
116 | 400341: c3 retq |
117 | |
118 | $ gcc -O2 -free bad_code.c |
119 | ...... |
120 | 400315: 6b ff 4e imul $0x4e,%edi,%edi |
121 | 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax |
122 | 40031f: c3 retq |
123 | 400320: 6b ff 2d imul $0x2d,%edi,%edi |
124 | 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax |
125 | 40032a: c3 retq |
126 | |
127 | Motivating Example II : |
128 | --------------------- |
129 | |
130 | Here is an example with a conditional move. |
131 | |
132 | For this program : |
133 | ********************************************** |
134 | |
135 | unsigned long long foo(unsigned x , unsigned y) |
136 | { |
137 | unsigned z; |
138 | if (x > 100) |
139 | z = x + y; |
140 | else |
141 | z = x - y; |
142 | return (unsigned long long)(z); |
143 | } |
144 | |
145 | $ gcc -O2 bad_code.c |
146 | ............ |
147 | 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx |
148 | 400363: 89 f8 mov %edi,%eax |
149 | 400365: 29 f0 sub %esi,%eax |
150 | 400367: 83 ff 65 cmp $0x65,%edi |
151 | 40036a: 0f 43 c2 cmovae %edx,%eax |
152 | 40036d: 89 c0 mov %eax,%eax - useless extension |
153 | 40036f: c3 retq |
154 | |
155 | $ gcc -O2 -free bad_code.c |
156 | ............. |
157 | 400360: 89 fa mov %edi,%edx |
158 | 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax |
159 | 400365: 29 f2 sub %esi,%edx |
160 | 400367: 83 ff 65 cmp $0x65,%edi |
161 | 40036a: 89 d6 mov %edx,%esi |
162 | 40036c: 48 0f 42 c6 cmovb %rsi,%rax |
163 | 400370: c3 retq |
164 | |
165 | Motivating Example III : |
166 | --------------------- |
167 | |
168 | Here is an example with a type cast. |
169 | |
170 | For this program : |
171 | ********************************************** |
172 | |
173 | void test(int size, unsigned char *in, unsigned char *out) |
174 | { |
175 | int i; |
176 | unsigned char xr, xg, xy=0; |
177 | |
178 | for (i = 0; i < size; i++) { |
179 | xr = *in++; |
180 | xg = *in++; |
181 | xy = (unsigned char) ((19595*xr + 38470*xg) >> 16); |
182 | *out++ = xy; |
183 | } |
184 | } |
185 | |
186 | $ gcc -O2 bad_code.c |
187 | ............ |
188 | 10: 0f b6 0e movzbl (%rsi),%ecx |
189 | 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax |
190 | 17: 48 83 c6 02 add $0x2,%rsi |
191 | 1b: 0f b6 c9 movzbl %cl,%ecx - useless extension |
192 | 1e: 0f b6 c0 movzbl %al,%eax - useless extension |
193 | 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx |
194 | 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax |
195 | |
196 | $ gcc -O2 -free bad_code.c |
197 | ............. |
198 | 10: 0f b6 0e movzbl (%rsi),%ecx |
199 | 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax |
200 | 17: 48 83 c6 02 add $0x2,%rsi |
201 | 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx |
202 | 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax |
203 | |
204 | Usefulness : |
205 | ---------- |
206 | |
207 | The original redundant zero-extension elimination pass reported reduction |
208 | of the dynamic instruction count of a compression benchmark by 2.8% and |
209 | improvement of its run time by about 1%. |
210 | |
211 | The additional performance gain with the enhanced pass is mostly expected |
212 | on in-order architectures where redundancy cannot be compensated by out of |
213 | order execution. Measurements showed up to 10% performance gain (reduced |
214 | run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance |
215 | gain 1%. */ |
216 | |
217 | |
218 | #include "config.h" |
219 | #include "system.h" |
220 | #include "coretypes.h" |
221 | #include "backend.h" |
222 | #include "target.h" |
223 | #include "rtl.h" |
224 | #include "tree.h" |
225 | #include "df.h" |
226 | #include "memmodel.h" |
227 | #include "tm_p.h" |
228 | #include "optabs.h" |
229 | #include "regs.h" |
230 | #include "emit-rtl.h" |
231 | #include "recog.h" |
232 | #include "cfgrtl.h" |
233 | #include "expr.h" |
234 | #include "tree-pass.h" |
235 | |
236 | /* This structure represents a candidate for elimination. */ |
237 | |
238 | struct ext_cand |
239 | { |
240 | /* The expression. */ |
241 | const_rtx expr; |
242 | |
243 | /* The kind of extension. */ |
244 | enum rtx_code code; |
245 | |
246 | /* The destination mode. */ |
247 | machine_mode mode; |
248 | |
249 | /* The instruction where it lives. */ |
250 | rtx_insn *insn; |
251 | }; |
252 | |
253 | |
254 | static int max_insn_uid; |
255 | |
256 | /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN. */ |
257 | |
258 | static bool |
259 | update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode, |
260 | machine_mode old_mode, enum rtx_code code) |
261 | { |
262 | rtx *loc = ®_NOTES (insn); |
263 | while (*loc) |
264 | { |
265 | enum reg_note kind = REG_NOTE_KIND (*loc); |
266 | if (kind == REG_EQUAL || kind == REG_EQUIV) |
267 | { |
268 | rtx orig_src = XEXP (*loc, 0); |
269 | /* Update equivalency constants. Recall that RTL constants are |
270 | sign-extended. */ |
271 | if (GET_CODE (orig_src) == CONST_INT |
272 | && HWI_COMPUTABLE_MODE_P (mode: new_mode)) |
273 | { |
274 | if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND) |
275 | /* Nothing needed. */; |
276 | else |
277 | { |
278 | /* Zero-extend the negative constant by masking out the |
279 | bits outside the source mode. */ |
280 | rtx new_const_int |
281 | = gen_int_mode (INTVAL (orig_src) |
282 | & GET_MODE_MASK (old_mode), |
283 | new_mode); |
284 | if (!validate_change (insn, &XEXP (*loc, 0), |
285 | new_const_int, true)) |
286 | return false; |
287 | } |
288 | loc = &XEXP (*loc, 1); |
289 | } |
290 | /* Drop all other notes, they assume a wrong mode. */ |
291 | else if (!validate_change (insn, loc, XEXP (*loc, 1), true)) |
292 | return false; |
293 | } |
294 | else |
295 | loc = &XEXP (*loc, 1); |
296 | } |
297 | return true; |
298 | } |
299 | |
300 | /* Given a insn (CURR_INSN), an extension candidate for removal (CAND) |
301 | and a pointer to the SET rtx (ORIG_SET) that needs to be modified, |
302 | this code modifies the SET rtx to a new SET rtx that extends the |
303 | right hand expression into a register on the left hand side. Note |
304 | that multiple assumptions are made about the nature of the set that |
305 | needs to be true for this to work and is called from merge_def_and_ext. |
306 | |
307 | Original : |
308 | (set (reg a) (expression)) |
309 | |
310 | Transform : |
311 | (set (reg a) (any_extend (expression))) |
312 | |
313 | Special Cases : |
314 | If the expression is a constant or another extension, then directly |
315 | assign it to the register. */ |
316 | |
317 | static bool |
318 | combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set) |
319 | { |
320 | rtx orig_src = SET_SRC (*orig_set); |
321 | machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set)); |
322 | rtx new_set; |
323 | rtx cand_pat = single_set (insn: cand->insn); |
324 | |
325 | /* If the extension's source/destination registers are not the same |
326 | then we need to change the original load to reference the destination |
327 | of the extension. Then we need to emit a copy from that destination |
328 | to the original destination of the load. */ |
329 | rtx new_reg; |
330 | bool copy_needed |
331 | = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0))); |
332 | if (copy_needed) |
333 | new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat))); |
334 | else |
335 | new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set))); |
336 | |
337 | /* Merge constants by directly moving the constant into the register under |
338 | some conditions. Recall that RTL constants are sign-extended. */ |
339 | if (GET_CODE (orig_src) == CONST_INT |
340 | && HWI_COMPUTABLE_MODE_P (mode: cand->mode)) |
341 | { |
342 | if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND) |
343 | new_set = gen_rtx_SET (new_reg, orig_src); |
344 | else |
345 | { |
346 | /* Zero-extend the negative constant by masking out the bits outside |
347 | the source mode. */ |
348 | rtx new_const_int |
349 | = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (orig_mode), |
350 | GET_MODE (new_reg)); |
351 | new_set = gen_rtx_SET (new_reg, new_const_int); |
352 | } |
353 | } |
354 | else if (GET_MODE (orig_src) == VOIDmode) |
355 | { |
356 | /* This is mostly due to a call insn that should not be optimized. */ |
357 | return false; |
358 | } |
359 | else if (GET_CODE (orig_src) == cand->code) |
360 | { |
361 | /* Here is a sequence of two extensions. Try to merge them. */ |
362 | rtx temp_extension |
363 | = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0)); |
364 | rtx simplified_temp_extension = simplify_rtx (temp_extension); |
365 | if (simplified_temp_extension) |
366 | temp_extension = simplified_temp_extension; |
367 | new_set = gen_rtx_SET (new_reg, temp_extension); |
368 | } |
369 | else if (GET_CODE (orig_src) == IF_THEN_ELSE) |
370 | { |
371 | /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise, |
372 | in general, IF_THEN_ELSE should not be combined. */ |
373 | return false; |
374 | } |
375 | else |
376 | { |
377 | /* This is the normal case. */ |
378 | rtx temp_extension |
379 | = gen_rtx_fmt_e (cand->code, cand->mode, orig_src); |
380 | rtx simplified_temp_extension = simplify_rtx (temp_extension); |
381 | if (simplified_temp_extension) |
382 | temp_extension = simplified_temp_extension; |
383 | new_set = gen_rtx_SET (new_reg, temp_extension); |
384 | } |
385 | |
386 | /* This change is a part of a group of changes. Hence, |
387 | validate_change will not try to commit the change. */ |
388 | if (validate_change (curr_insn, orig_set, new_set, true) |
389 | && update_reg_equal_equiv_notes (insn: curr_insn, new_mode: cand->mode, old_mode: orig_mode, |
390 | code: cand->code)) |
391 | { |
392 | if (dump_file) |
393 | { |
394 | fprintf (stream: dump_file, |
395 | format: "Tentatively merged extension with definition %s:\n" , |
396 | (copy_needed) ? "(copy needed)" : "" ); |
397 | print_rtl_single (dump_file, curr_insn); |
398 | } |
399 | return true; |
400 | } |
401 | |
402 | return false; |
403 | } |
404 | |
405 | /* Treat if_then_else insns, where the operands of both branches |
406 | are registers, as copies. For instance, |
407 | Original : |
408 | (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c))) |
409 | Transformed : |
410 | (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c))) |
411 | DEF_INSN is the if_then_else insn. */ |
412 | |
413 | static bool |
414 | transform_ifelse (ext_cand *cand, rtx_insn *def_insn) |
415 | { |
416 | rtx set_insn = PATTERN (insn: def_insn); |
417 | rtx srcreg, dstreg, srcreg2; |
418 | rtx map_srcreg, map_dstreg, map_srcreg2; |
419 | rtx ifexpr; |
420 | rtx cond; |
421 | rtx new_set; |
422 | |
423 | gcc_assert (GET_CODE (set_insn) == SET); |
424 | |
425 | cond = XEXP (SET_SRC (set_insn), 0); |
426 | dstreg = SET_DEST (set_insn); |
427 | srcreg = XEXP (SET_SRC (set_insn), 1); |
428 | srcreg2 = XEXP (SET_SRC (set_insn), 2); |
429 | /* If the conditional move already has the right or wider mode, |
430 | there is nothing to do. */ |
431 | if (GET_MODE_UNIT_SIZE (GET_MODE (dstreg)) |
432 | >= GET_MODE_UNIT_SIZE (cand->mode)) |
433 | return true; |
434 | |
435 | map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg)); |
436 | map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2)); |
437 | map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg)); |
438 | ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2); |
439 | new_set = gen_rtx_SET (map_dstreg, ifexpr); |
440 | |
441 | if (validate_change (def_insn, &PATTERN (insn: def_insn), new_set, true) |
442 | && update_reg_equal_equiv_notes (insn: def_insn, new_mode: cand->mode, GET_MODE (dstreg), |
443 | code: cand->code)) |
444 | { |
445 | if (dump_file) |
446 | { |
447 | fprintf (stream: dump_file, |
448 | format: "Mode of conditional move instruction extended:\n" ); |
449 | print_rtl_single (dump_file, def_insn); |
450 | } |
451 | return true; |
452 | } |
453 | |
454 | return false; |
455 | } |
456 | |
457 | /* Get all the reaching definitions of an instruction. The definitions are |
458 | desired for REG used in INSN. Return the definition list or NULL if a |
459 | definition is missing. If DEST is non-NULL, additionally push the INSN |
460 | of the definitions onto DEST. */ |
461 | |
462 | static struct df_link * |
463 | get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest) |
464 | { |
465 | df_ref use; |
466 | struct df_link *ref_chain, *ref_link; |
467 | |
468 | FOR_EACH_INSN_USE (use, insn) |
469 | { |
470 | if (GET_CODE (DF_REF_REG (use)) == SUBREG) |
471 | return NULL; |
472 | if (REGNO (DF_REF_REG (use)) == REGNO (reg)) |
473 | break; |
474 | } |
475 | |
476 | gcc_assert (use != NULL); |
477 | |
478 | ref_chain = DF_REF_CHAIN (use); |
479 | |
480 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) |
481 | { |
482 | /* Problem getting some definition for this instruction. */ |
483 | if (ref_link->ref == NULL) |
484 | return NULL; |
485 | if (DF_REF_INSN_INFO (ref_link->ref) == NULL) |
486 | return NULL; |
487 | /* As global regs are assumed to be defined at each function call |
488 | dataflow can report a call_insn as being a definition of REG. |
489 | But we can't do anything with that in this pass so proceed only |
490 | if the instruction really sets REG in a way that can be deduced |
491 | from the RTL structure. */ |
492 | if (global_regs[REGNO (reg)] |
493 | && !set_of (reg, DF_REF_INSN (ref_link->ref))) |
494 | return NULL; |
495 | } |
496 | |
497 | if (dest) |
498 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) |
499 | dest->safe_push (DF_REF_INSN (ref_link->ref)); |
500 | |
501 | return ref_chain; |
502 | } |
503 | |
504 | /* Get all the reaching uses of an instruction. The uses are desired for REG |
505 | set in INSN. Return use list or NULL if a use is missing or irregular. */ |
506 | |
507 | static struct df_link * |
508 | get_uses (rtx_insn *insn, rtx reg) |
509 | { |
510 | df_ref def; |
511 | struct df_link *ref_chain, *ref_link; |
512 | |
513 | FOR_EACH_INSN_DEF (def, insn) |
514 | if (REGNO (DF_REF_REG (def)) == REGNO (reg)) |
515 | break; |
516 | |
517 | gcc_assert (def != NULL); |
518 | |
519 | ref_chain = DF_REF_CHAIN (def); |
520 | |
521 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) |
522 | { |
523 | /* Problem getting some use for this instruction. */ |
524 | if (ref_link->ref == NULL) |
525 | return NULL; |
526 | if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR) |
527 | return NULL; |
528 | } |
529 | |
530 | return ref_chain; |
531 | } |
532 | |
533 | /* Return true if INSN is |
534 | (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2))) |
535 | and store x1 and x2 in REG_1 and REG_2. */ |
536 | |
537 | static bool |
538 | is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2) |
539 | { |
540 | rtx expr = single_set (insn); |
541 | |
542 | if (expr != NULL_RTX |
543 | && GET_CODE (expr) == SET |
544 | && GET_CODE (SET_DEST (expr)) == REG |
545 | && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE |
546 | && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG |
547 | && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG) |
548 | { |
549 | *reg1 = XEXP (SET_SRC (expr), 1); |
550 | *reg2 = XEXP (SET_SRC (expr), 2); |
551 | return true; |
552 | } |
553 | |
554 | return false; |
555 | } |
556 | |
557 | enum ext_modified_kind |
558 | { |
559 | /* The insn hasn't been modified by ree pass yet. */ |
560 | EXT_MODIFIED_NONE, |
561 | /* Changed into zero extension. */ |
562 | EXT_MODIFIED_ZEXT, |
563 | /* Changed into sign extension. */ |
564 | EXT_MODIFIED_SEXT |
565 | }; |
566 | |
567 | struct ext_modified |
568 | { |
569 | /* Mode from which ree has zero or sign extended the destination. */ |
570 | ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE; |
571 | |
572 | /* Kind of modification of the insn. */ |
573 | ENUM_BITFIELD(ext_modified_kind) kind : 2; |
574 | |
575 | unsigned int do_not_reextend : 1; |
576 | |
577 | /* True if the insn is scheduled to be deleted. */ |
578 | unsigned int deleted : 1; |
579 | }; |
580 | |
581 | /* Vectors used by combine_reaching_defs and its helpers. */ |
582 | class ext_state |
583 | { |
584 | public: |
585 | /* In order to avoid constant alloc/free, we keep these |
586 | 4 vectors live through the entire find_and_remove_re and just |
587 | truncate them each time. */ |
588 | auto_vec<rtx_insn *> defs_list; |
589 | auto_vec<rtx_insn *> copies_list; |
590 | auto_vec<rtx_insn *> modified_list; |
591 | auto_vec<rtx_insn *> work_list; |
592 | |
593 | /* For instructions that have been successfully modified, this is |
594 | the original mode from which the insn is extending and |
595 | kind of extension. */ |
596 | struct ext_modified *modified; |
597 | }; |
598 | |
599 | /* Reaching Definitions of the extended register could be conditional copies |
600 | or regular definitions. This function separates the two types into two |
601 | lists, STATE->DEFS_LIST and STATE->COPIES_LIST. This is necessary because, |
602 | if a reaching definition is a conditional copy, merging the extension with |
603 | this definition is wrong. Conditional copies are merged by transitively |
604 | merging their definitions. The defs_list is populated with all the reaching |
605 | definitions of the extension instruction (EXTEND_INSN) which must be merged |
606 | with an extension. The copies_list contains all the conditional moves that |
607 | will later be extended into a wider mode conditional move if all the merges |
608 | are successful. The function returns false upon failure, true upon |
609 | success. */ |
610 | |
611 | static bool |
612 | make_defs_and_copies_lists (rtx_insn *extend_insn, const_rtx set_pat, |
613 | ext_state *state) |
614 | { |
615 | rtx src_reg = XEXP (SET_SRC (set_pat), 0); |
616 | bool *is_insn_visited; |
617 | bool ret = true; |
618 | |
619 | state->work_list.truncate (size: 0); |
620 | |
621 | /* Initialize the work list. */ |
622 | if (!get_defs (insn: extend_insn, reg: src_reg, dest: &state->work_list)) |
623 | return false; |
624 | |
625 | is_insn_visited = XCNEWVEC (bool, max_insn_uid); |
626 | |
627 | /* Perform transitive closure for conditional copies. */ |
628 | while (!state->work_list.is_empty ()) |
629 | { |
630 | rtx_insn *def_insn = state->work_list.pop (); |
631 | rtx reg1, reg2; |
632 | |
633 | gcc_assert (INSN_UID (def_insn) < max_insn_uid); |
634 | |
635 | if (is_insn_visited[INSN_UID (insn: def_insn)]) |
636 | continue; |
637 | is_insn_visited[INSN_UID (insn: def_insn)] = true; |
638 | |
639 | if (is_cond_copy_insn (insn: def_insn, reg1: ®1, reg2: ®2)) |
640 | { |
641 | /* Push it onto the copy list first. */ |
642 | state->copies_list.safe_push (obj: def_insn); |
643 | |
644 | /* Now perform the transitive closure. */ |
645 | if (!get_defs (insn: def_insn, reg: reg1, dest: &state->work_list) |
646 | || !get_defs (insn: def_insn, reg: reg2, dest: &state->work_list)) |
647 | { |
648 | ret = false; |
649 | break; |
650 | } |
651 | } |
652 | else |
653 | state->defs_list.safe_push (obj: def_insn); |
654 | } |
655 | |
656 | XDELETEVEC (is_insn_visited); |
657 | |
658 | return ret; |
659 | } |
660 | |
661 | /* If DEF_INSN has single SET expression with a register |
662 | destination, possibly buried inside a PARALLEL, return |
663 | the address of the SET expression, else return NULL. |
664 | This is similar to single_set, except that single_set |
665 | allows multiple SETs when all but one is dead. */ |
666 | static rtx * |
667 | get_sub_rtx (rtx_insn *def_insn) |
668 | { |
669 | enum rtx_code code = GET_CODE (PATTERN (def_insn)); |
670 | rtx *sub_rtx = NULL; |
671 | |
672 | if (code == PARALLEL) |
673 | { |
674 | for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++) |
675 | { |
676 | rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i); |
677 | if (GET_CODE (s_expr) != SET) |
678 | continue; |
679 | if (!REG_P (SET_DEST (s_expr))) |
680 | continue; |
681 | |
682 | if (sub_rtx == NULL) |
683 | sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i); |
684 | else |
685 | { |
686 | /* PARALLEL with multiple SETs. */ |
687 | return NULL; |
688 | } |
689 | } |
690 | } |
691 | else if (code == SET) |
692 | { |
693 | rtx s_expr = PATTERN (insn: def_insn); |
694 | if (REG_P (SET_DEST (s_expr))) |
695 | sub_rtx = &PATTERN (insn: def_insn); |
696 | } |
697 | |
698 | return sub_rtx; |
699 | } |
700 | |
701 | /* Merge the DEF_INSN with an extension. Calls combine_set_extension |
702 | on the SET pattern. */ |
703 | |
704 | static bool |
705 | merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state) |
706 | { |
707 | machine_mode ext_src_mode; |
708 | rtx *sub_rtx; |
709 | |
710 | ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0)); |
711 | sub_rtx = get_sub_rtx (def_insn); |
712 | |
713 | if (sub_rtx == NULL) |
714 | return false; |
715 | |
716 | if (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode |
717 | || ((state->modified[INSN_UID (insn: def_insn)].kind |
718 | == (cand->code == ZERO_EXTEND |
719 | ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)) |
720 | && state->modified[INSN_UID (insn: def_insn)].mode |
721 | == ext_src_mode)) |
722 | { |
723 | if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx))) |
724 | >= GET_MODE_UNIT_SIZE (cand->mode)) |
725 | return true; |
726 | /* If def_insn is already scheduled to be deleted, don't attempt |
727 | to modify it. */ |
728 | if (state->modified[INSN_UID (insn: def_insn)].deleted) |
729 | return false; |
730 | if (combine_set_extension (cand, curr_insn: def_insn, orig_set: sub_rtx)) |
731 | { |
732 | if (state->modified[INSN_UID (insn: def_insn)].kind == EXT_MODIFIED_NONE) |
733 | state->modified[INSN_UID (insn: def_insn)].mode = ext_src_mode; |
734 | return true; |
735 | } |
736 | } |
737 | |
738 | return false; |
739 | } |
740 | |
741 | /* Given SRC, which should be one or more extensions of a REG, strip |
742 | away the extensions and return the REG. */ |
743 | |
744 | static inline rtx |
745 | get_extended_src_reg (rtx src) |
746 | { |
747 | while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND) |
748 | src = XEXP (src, 0); |
749 | gcc_assert (REG_P (src)); |
750 | return src; |
751 | } |
752 | |
753 | /* This function goes through all reaching defs of the source |
754 | of the candidate for elimination (CAND) and tries to combine |
755 | the extension with the definition instruction. The changes |
756 | are made as a group so that even if one definition cannot be |
757 | merged, all reaching definitions end up not being merged. |
758 | When a conditional copy is encountered, merging is attempted |
759 | transitively on its definitions. It returns true upon success |
760 | and false upon failure. */ |
761 | |
762 | static bool |
763 | combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state) |
764 | { |
765 | rtx_insn *def_insn; |
766 | bool merge_successful = true; |
767 | int i; |
768 | int defs_ix; |
769 | bool outcome; |
770 | |
771 | state->defs_list.truncate (size: 0); |
772 | state->copies_list.truncate (size: 0); |
773 | |
774 | outcome = make_defs_and_copies_lists (extend_insn: cand->insn, set_pat, state); |
775 | |
776 | if (!outcome) |
777 | return false; |
778 | |
779 | /* If the destination operand of the extension is a different |
780 | register than the source operand, then additional restrictions |
781 | are needed. Note we have to handle cases where we have nested |
782 | extensions in the source operand. |
783 | |
784 | Candidate insns are known to be single_sets, via the test in |
785 | find_removable_extensions. So we continue to use single_set here |
786 | rather than get_sub_rtx. */ |
787 | rtx set = single_set (insn: cand->insn); |
788 | bool copy_needed |
789 | = (REGNO (SET_DEST (set)) != REGNO (get_extended_src_reg (SET_SRC (set)))); |
790 | if (copy_needed) |
791 | { |
792 | /* Considering transformation of |
793 | (set (reg1) (expression)) |
794 | ... |
795 | (set (reg2) (any_extend (reg1))) |
796 | |
797 | into |
798 | |
799 | (set (reg2) (any_extend (expression))) |
800 | (set (reg1) (reg2)) |
801 | ... */ |
802 | |
803 | /* In theory we could handle more than one reaching def, it |
804 | just makes the code to update the insn stream more complex. */ |
805 | if (state->defs_list.length () != 1) |
806 | return false; |
807 | |
808 | /* We don't have the structure described above if there are |
809 | conditional moves in between the def and the candidate, |
810 | and we will not handle them correctly. See PR68194. */ |
811 | if (state->copies_list.length () > 0) |
812 | return false; |
813 | |
814 | /* We require the candidate not already be modified. It may, |
815 | for example have been changed from a (sign_extend (reg)) |
816 | into (zero_extend (sign_extend (reg))). |
817 | |
818 | Handling that case shouldn't be terribly difficult, but the code |
819 | here and the code to emit copies would need auditing. Until |
820 | we see a need, this is the safe thing to do. */ |
821 | if (state->modified[INSN_UID (insn: cand->insn)].kind != EXT_MODIFIED_NONE) |
822 | return false; |
823 | |
824 | machine_mode dst_mode = GET_MODE (SET_DEST (set)); |
825 | rtx src_reg = get_extended_src_reg (SET_SRC (set)); |
826 | |
827 | /* Ensure we can use the src_reg in dst_mode (needed for |
828 | the (set (reg1) (reg2)) insn mentioned above). */ |
829 | if (!targetm.hard_regno_mode_ok (REGNO (src_reg), dst_mode)) |
830 | return false; |
831 | |
832 | /* Ensure the number of hard registers of the copy match. */ |
833 | if (hard_regno_nregs (REGNO (src_reg), mode: dst_mode) != REG_NREGS (src_reg)) |
834 | return false; |
835 | |
836 | /* There's only one reaching def. */ |
837 | rtx_insn *def_insn = state->defs_list[0]; |
838 | |
839 | /* The defining statement must not have been modified either. */ |
840 | if (state->modified[INSN_UID (insn: def_insn)].kind != EXT_MODIFIED_NONE) |
841 | return false; |
842 | |
843 | /* The defining statement and candidate insn must be in the same block. |
844 | This is merely to keep the test for safety and updating the insn |
845 | stream simple. Also ensure that within the block the candidate |
846 | follows the defining insn. */ |
847 | basic_block bb = BLOCK_FOR_INSN (insn: cand->insn); |
848 | if (bb != BLOCK_FOR_INSN (insn: def_insn) |
849 | || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn)) |
850 | return false; |
851 | |
852 | /* If there is an overlap between the destination of DEF_INSN and |
853 | CAND->insn, then this transformation is not safe. Note we have |
854 | to test in the widened mode. */ |
855 | rtx *dest_sub_rtx = get_sub_rtx (def_insn); |
856 | if (dest_sub_rtx == NULL) |
857 | return false; |
858 | |
859 | rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (set)), |
860 | REGNO (SET_DEST (*dest_sub_rtx))); |
861 | if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (set))) |
862 | return false; |
863 | |
864 | /* On RISC machines we must make sure that changing the mode of SRC_REG |
865 | as destination register will not affect its reaching uses, which may |
866 | read its value in a larger mode because DEF_INSN implicitly sets it |
867 | in word mode. */ |
868 | poly_int64 prec |
869 | = GET_MODE_PRECISION (GET_MODE (SET_DEST (*dest_sub_rtx))); |
870 | if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD)) |
871 | { |
872 | struct df_link *uses = get_uses (insn: def_insn, reg: src_reg); |
873 | if (!uses) |
874 | return false; |
875 | |
876 | for (df_link *use = uses; use; use = use->next) |
877 | if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)), |
878 | GET_MODE (SET_DEST (*dest_sub_rtx))) |
879 | && !DEBUG_INSN_P (DF_REF_INSN (use->ref))) |
880 | return false; |
881 | } |
882 | |
883 | /* The destination register of the extension insn must not be |
884 | used or set between the def_insn and cand->insn exclusive. */ |
885 | if (reg_used_between_p (SET_DEST (set), def_insn, cand->insn) |
886 | || reg_set_between_p (SET_DEST (set), def_insn, cand->insn)) |
887 | return false; |
888 | |
889 | /* We must be able to copy between the two registers. Generate, |
890 | recognize and verify constraints of the copy. Also fail if this |
891 | generated more than one insn. |
892 | |
893 | This generates garbage since we throw away the insn when we're |
894 | done, only to recreate it later if this test was successful. |
895 | |
896 | Make sure to get the mode from the extension (cand->insn). This |
897 | is different than in the code to emit the copy as we have not |
898 | modified the defining insn yet. */ |
899 | start_sequence (); |
900 | rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (set)), |
901 | REGNO (get_extended_src_reg (SET_SRC (set)))); |
902 | rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (set)), |
903 | REGNO (SET_DEST (set))); |
904 | emit_move_insn (new_dst, new_src); |
905 | |
906 | rtx_insn *insn = get_insns (); |
907 | end_sequence (); |
908 | if (NEXT_INSN (insn)) |
909 | return false; |
910 | if (recog_memoized (insn) == -1) |
911 | return false; |
912 | extract_insn (insn); |
913 | if (!constrain_operands (1, get_preferred_alternatives (insn, bb))) |
914 | return false; |
915 | |
916 | while (REG_P (SET_SRC (*dest_sub_rtx)) |
917 | && (REGNO (SET_SRC (*dest_sub_rtx)) == REGNO (SET_DEST (set)))) |
918 | { |
919 | /* Considering transformation of |
920 | (set (reg2) (expression)) |
921 | ... |
922 | (set (reg1) (reg2)) |
923 | ... |
924 | (set (reg2) (any_extend (reg1))) |
925 | |
926 | into |
927 | |
928 | (set (reg2) (any_extend (expression))) |
929 | (set (reg1) (reg2)) |
930 | ... */ |
931 | struct df_link *defs |
932 | = get_defs (insn: def_insn, SET_SRC (*dest_sub_rtx), NULL); |
933 | if (defs == NULL || defs->next) |
934 | break; |
935 | |
936 | /* There is only one reaching def. */ |
937 | rtx_insn *def_insn2 = DF_REF_INSN (defs->ref); |
938 | |
939 | /* The defining statement must not have been modified either. */ |
940 | if (state->modified[INSN_UID (insn: def_insn2)].kind != EXT_MODIFIED_NONE) |
941 | break; |
942 | |
943 | /* The def_insn2 and candidate insn must be in the same |
944 | block and def_insn follows def_insn2. */ |
945 | if (bb != BLOCK_FOR_INSN (insn: def_insn2) |
946 | || DF_INSN_LUID (def_insn2) > DF_INSN_LUID (def_insn)) |
947 | break; |
948 | |
949 | rtx *dest_sub_rtx2 = get_sub_rtx (def_insn: def_insn2); |
950 | if (dest_sub_rtx2 == NULL) |
951 | break; |
952 | |
953 | /* On RISC machines we must make sure that changing the mode of |
954 | SRC_REG as destination register will not affect its reaching |
955 | uses, which may read its value in a larger mode because DEF_INSN |
956 | implicitly sets it in word mode. */ |
957 | if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD)) |
958 | { |
959 | struct df_link *uses = get_uses (insn: def_insn2, SET_DEST (set)); |
960 | if (!uses) |
961 | break; |
962 | |
963 | df_link *use; |
964 | rtx dest2 = SET_DEST (*dest_sub_rtx2); |
965 | for (use = uses; use; use = use->next) |
966 | if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)), |
967 | GET_MODE (dest2)) |
968 | && !DEBUG_INSN_P (DF_REF_INSN (use->ref))) |
969 | break; |
970 | if (use) |
971 | break; |
972 | } |
973 | |
974 | /* The destination register of the extension insn must not be |
975 | used or set between the def_insn2 and def_insn exclusive. |
976 | Likewise for the other reg, i.e. check both reg1 and reg2 |
977 | in the above comment. */ |
978 | if (reg_used_between_p (SET_DEST (set), def_insn2, def_insn) |
979 | || reg_set_between_p (SET_DEST (set), def_insn2, def_insn) |
980 | || reg_used_between_p (src_reg, def_insn2, def_insn) |
981 | || reg_set_between_p (src_reg, def_insn2, def_insn)) |
982 | break; |
983 | |
984 | state->defs_list[0] = def_insn2; |
985 | break; |
986 | } |
987 | } |
988 | |
989 | /* If cand->insn has been already modified, update cand->mode to a wider |
990 | mode if possible, or punt. */ |
991 | if (state->modified[INSN_UID (insn: cand->insn)].kind != EXT_MODIFIED_NONE) |
992 | { |
993 | machine_mode mode; |
994 | |
995 | if (state->modified[INSN_UID (insn: cand->insn)].kind |
996 | != (cand->code == ZERO_EXTEND |
997 | ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT) |
998 | || state->modified[INSN_UID (insn: cand->insn)].mode != cand->mode |
999 | || (set == NULL_RTX)) |
1000 | return false; |
1001 | mode = GET_MODE (SET_DEST (set)); |
1002 | gcc_assert (GET_MODE_UNIT_SIZE (mode) |
1003 | >= GET_MODE_UNIT_SIZE (cand->mode)); |
1004 | cand->mode = mode; |
1005 | } |
1006 | |
1007 | merge_successful = true; |
1008 | |
1009 | /* Go through the defs vector and try to merge all the definitions |
1010 | in this vector. */ |
1011 | state->modified_list.truncate (size: 0); |
1012 | FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn) |
1013 | { |
1014 | if (merge_def_and_ext (cand, def_insn, state)) |
1015 | state->modified_list.safe_push (obj: def_insn); |
1016 | else |
1017 | { |
1018 | merge_successful = false; |
1019 | break; |
1020 | } |
1021 | } |
1022 | |
1023 | /* Now go through the conditional copies vector and try to merge all |
1024 | the copies in this vector. */ |
1025 | if (merge_successful) |
1026 | { |
1027 | FOR_EACH_VEC_ELT (state->copies_list, i, def_insn) |
1028 | { |
1029 | if (transform_ifelse (cand, def_insn)) |
1030 | state->modified_list.safe_push (obj: def_insn); |
1031 | else |
1032 | { |
1033 | merge_successful = false; |
1034 | break; |
1035 | } |
1036 | } |
1037 | } |
1038 | |
1039 | if (merge_successful) |
1040 | { |
1041 | /* Commit the changes here if possible |
1042 | FIXME: It's an all-or-nothing scenario. Even if only one definition |
1043 | cannot be merged, we entirely give up. In the future, we should allow |
1044 | extensions to be partially eliminated along those paths where the |
1045 | definitions could be merged. */ |
1046 | if (apply_change_group ()) |
1047 | { |
1048 | if (dump_file) |
1049 | fprintf (stream: dump_file, format: "All merges were successful.\n" ); |
1050 | |
1051 | FOR_EACH_VEC_ELT (state->modified_list, i, def_insn) |
1052 | { |
1053 | ext_modified *modified = &state->modified[INSN_UID (insn: def_insn)]; |
1054 | if (modified->kind == EXT_MODIFIED_NONE) |
1055 | modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT |
1056 | : EXT_MODIFIED_SEXT); |
1057 | |
1058 | if (copy_needed) |
1059 | modified->do_not_reextend = 1; |
1060 | } |
1061 | return true; |
1062 | } |
1063 | else |
1064 | { |
1065 | /* Changes need not be cancelled explicitly as apply_change_group |
1066 | does it. Print list of definitions in the dump_file for debug |
1067 | purposes. This extension cannot be deleted. */ |
1068 | if (dump_file) |
1069 | { |
1070 | fprintf (stream: dump_file, |
1071 | format: "Merge cancelled, non-mergeable definitions:\n" ); |
1072 | FOR_EACH_VEC_ELT (state->modified_list, i, def_insn) |
1073 | print_rtl_single (dump_file, def_insn); |
1074 | } |
1075 | } |
1076 | } |
1077 | else |
1078 | { |
1079 | /* Cancel any changes that have been made so far. */ |
1080 | cancel_changes (0); |
1081 | } |
1082 | |
1083 | return false; |
1084 | } |
1085 | |
1086 | /* Add an extension pattern that could be eliminated. */ |
1087 | |
1088 | static void |
1089 | add_removable_extension (const_rtx expr, rtx_insn *insn, |
1090 | vec<ext_cand> *insn_list, |
1091 | unsigned *def_map, |
1092 | bitmap init_regs) |
1093 | { |
1094 | enum rtx_code code; |
1095 | machine_mode mode; |
1096 | unsigned int idx; |
1097 | rtx src, dest; |
1098 | |
1099 | /* We are looking for SET (REG N) (ANY_EXTEND (REG N)). */ |
1100 | if (GET_CODE (expr) != SET) |
1101 | return; |
1102 | |
1103 | src = SET_SRC (expr); |
1104 | code = GET_CODE (src); |
1105 | dest = SET_DEST (expr); |
1106 | mode = GET_MODE (dest); |
1107 | |
1108 | if (REG_P (dest) |
1109 | && (code == SIGN_EXTEND || code == ZERO_EXTEND) |
1110 | && REG_P (XEXP (src, 0))) |
1111 | { |
1112 | rtx reg = XEXP (src, 0); |
1113 | struct df_link *defs, *def; |
1114 | ext_cand *cand; |
1115 | |
1116 | /* Zero-extension of an undefined value is partly defined (it's |
1117 | completely undefined for sign-extension, though). So if there exists |
1118 | a path from the entry to this zero-extension that leaves this register |
1119 | uninitialized, removing the extension could change the behavior of |
1120 | correct programs. So first, check it is not the case. */ |
1121 | if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg))) |
1122 | { |
1123 | if (dump_file) |
1124 | { |
1125 | fprintf (stream: dump_file, format: "Cannot eliminate extension:\n" ); |
1126 | print_rtl_single (dump_file, insn); |
1127 | fprintf (stream: dump_file, format: " because it can operate on uninitialized" |
1128 | " data\n" ); |
1129 | } |
1130 | return; |
1131 | } |
1132 | |
1133 | /* Second, make sure we can get all the reaching definitions. */ |
1134 | defs = get_defs (insn, reg, NULL); |
1135 | if (!defs) |
1136 | { |
1137 | if (dump_file) |
1138 | { |
1139 | fprintf (stream: dump_file, format: "Cannot eliminate extension:\n" ); |
1140 | print_rtl_single (dump_file, insn); |
1141 | fprintf (stream: dump_file, format: " because of missing definition(s)\n" ); |
1142 | } |
1143 | return; |
1144 | } |
1145 | |
1146 | /* Third, make sure the reaching definitions don't feed another and |
1147 | different extension. FIXME: this obviously can be improved. */ |
1148 | for (def = defs; def; def = def->next) |
1149 | if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))]) |
1150 | && idx != -1U |
1151 | && (cand = &(*insn_list)[idx - 1]) |
1152 | && cand->code != code) |
1153 | { |
1154 | if (dump_file) |
1155 | { |
1156 | fprintf (stream: dump_file, format: "Cannot eliminate extension:\n" ); |
1157 | print_rtl_single (dump_file, insn); |
1158 | fprintf (stream: dump_file, format: " because of other extension\n" ); |
1159 | } |
1160 | return; |
1161 | } |
1162 | /* For vector mode extensions, ensure that all uses of the |
1163 | XEXP (src, 0) register are in insn or debug insns, as unlike |
1164 | integral extensions lowpart subreg of the sign/zero extended |
1165 | register are not equal to the original register, so we have |
1166 | to change all uses or none and the current code isn't able |
1167 | to change them all at once in one transaction. */ |
1168 | else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0)))) |
1169 | { |
1170 | struct df_link *ref_chain, *ref_link; |
1171 | |
1172 | ref_chain = DF_REF_CHAIN (def->ref); |
1173 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) |
1174 | { |
1175 | if (ref_link->ref == NULL |
1176 | || DF_REF_INSN_INFO (ref_link->ref) == NULL) |
1177 | { |
1178 | idx = -1U; |
1179 | break; |
1180 | } |
1181 | rtx_insn *use_insn = DF_REF_INSN (ref_link->ref); |
1182 | if (use_insn != insn && !DEBUG_INSN_P (use_insn)) |
1183 | { |
1184 | idx = -1U; |
1185 | break; |
1186 | } |
1187 | } |
1188 | |
1189 | if (idx == -1U) |
1190 | { |
1191 | def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx; |
1192 | if (dump_file) |
1193 | { |
1194 | fprintf (stream: dump_file, format: "Cannot eliminate extension:\n" ); |
1195 | print_rtl_single (dump_file, insn); |
1196 | fprintf (stream: dump_file, |
1197 | format: " because some vector uses aren't extension\n" ); |
1198 | } |
1199 | return; |
1200 | } |
1201 | } |
1202 | |
1203 | /* Fourth, if the extended version occupies more registers than the |
1204 | original and the source of the extension is the same hard register |
1205 | as the destination of the extension, then we cannot eliminate |
1206 | the extension without deep analysis, so just punt. |
1207 | |
1208 | We allow this when the registers are different because the |
1209 | code in combine_reaching_defs will handle that case correctly. */ |
1210 | if (hard_regno_nregs (REGNO (dest), mode) != REG_NREGS (reg) |
1211 | && reg_overlap_mentioned_p (dest, reg)) |
1212 | return; |
1213 | |
1214 | /* Then add the candidate to the list and insert the reaching definitions |
1215 | into the definition map. */ |
1216 | ext_cand e = {.expr: expr, .code: code, .mode: mode, .insn: insn}; |
1217 | insn_list->safe_push (obj: e); |
1218 | idx = insn_list->length (); |
1219 | |
1220 | for (def = defs; def; def = def->next) |
1221 | def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx; |
1222 | } |
1223 | } |
1224 | |
1225 | /* Traverse the instruction stream looking for extensions and return the |
1226 | list of candidates. */ |
1227 | |
1228 | static vec<ext_cand> |
1229 | find_removable_extensions (void) |
1230 | { |
1231 | vec<ext_cand> insn_list = vNULL; |
1232 | basic_block bb; |
1233 | rtx_insn *insn; |
1234 | rtx set; |
1235 | unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid); |
1236 | bitmap_head init, kill, gen, tmp; |
1237 | |
1238 | bitmap_initialize (head: &init, NULL); |
1239 | bitmap_initialize (head: &kill, NULL); |
1240 | bitmap_initialize (head: &gen, NULL); |
1241 | bitmap_initialize (head: &tmp, NULL); |
1242 | |
1243 | FOR_EACH_BB_FN (bb, cfun) |
1244 | { |
1245 | bitmap_copy (&init, DF_MIR_IN (bb)); |
1246 | bitmap_clear (&kill); |
1247 | bitmap_clear (&gen); |
1248 | |
1249 | FOR_BB_INSNS (bb, insn) |
1250 | { |
1251 | if (NONDEBUG_INSN_P (insn)) |
1252 | { |
1253 | set = single_set (insn); |
1254 | if (set != NULL_RTX) |
1255 | add_removable_extension (expr: set, insn, insn_list: &insn_list, def_map, |
1256 | init_regs: &init); |
1257 | df_mir_simulate_one_insn (bb, insn, &kill, &gen); |
1258 | bitmap_ior_and_compl (DST: &tmp, A: &gen, B: &init, C: &kill); |
1259 | bitmap_copy (&init, &tmp); |
1260 | } |
1261 | } |
1262 | } |
1263 | |
1264 | XDELETEVEC (def_map); |
1265 | |
1266 | return insn_list; |
1267 | } |
1268 | |
1269 | /* This is the main function that checks the insn stream for redundant |
1270 | extensions and tries to remove them if possible. */ |
1271 | |
1272 | static void |
1273 | find_and_remove_re (void) |
1274 | { |
1275 | ext_cand *curr_cand; |
1276 | rtx_insn *curr_insn = NULL; |
1277 | int num_re_opportunities = 0, num_realized = 0, i; |
1278 | vec<ext_cand> reinsn_list; |
1279 | auto_vec<rtx_insn *> reinsn_del_list; |
1280 | auto_vec<rtx_insn *> reinsn_copy_list; |
1281 | |
1282 | /* Construct DU chain to get all reaching definitions of each |
1283 | extension instruction. */ |
1284 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); |
1285 | df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); |
1286 | df_mir_add_problem (); |
1287 | df_analyze (); |
1288 | df_set_flags (DF_DEFER_INSN_RESCAN); |
1289 | |
1290 | max_insn_uid = get_max_uid (); |
1291 | reinsn_list = find_removable_extensions (); |
1292 | |
1293 | ext_state state; |
1294 | if (reinsn_list.is_empty ()) |
1295 | state.modified = NULL; |
1296 | else |
1297 | state.modified = XCNEWVEC (struct ext_modified, max_insn_uid); |
1298 | |
1299 | FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand) |
1300 | { |
1301 | num_re_opportunities++; |
1302 | |
1303 | /* Try to combine the extension with the definition. */ |
1304 | if (dump_file) |
1305 | { |
1306 | fprintf (stream: dump_file, format: "Trying to eliminate extension:\n" ); |
1307 | print_rtl_single (dump_file, curr_cand->insn); |
1308 | } |
1309 | |
1310 | if (combine_reaching_defs (cand: curr_cand, set_pat: curr_cand->expr, state: &state)) |
1311 | { |
1312 | if (dump_file) |
1313 | fprintf (stream: dump_file, format: "Eliminated the extension.\n" ); |
1314 | num_realized++; |
1315 | /* If the RHS of the current candidate is not (extend (reg)), then |
1316 | we do not allow the optimization of extensions where |
1317 | the source and destination registers do not match. Thus |
1318 | checking REG_P here is correct. */ |
1319 | rtx set = single_set (insn: curr_cand->insn); |
1320 | if (REG_P (XEXP (SET_SRC (set), 0)) |
1321 | && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)))) |
1322 | { |
1323 | reinsn_copy_list.safe_push (obj: curr_cand->insn); |
1324 | reinsn_copy_list.safe_push (obj: state.defs_list[0]); |
1325 | } |
1326 | reinsn_del_list.safe_push (obj: curr_cand->insn); |
1327 | state.modified[INSN_UID (insn: curr_cand->insn)].deleted = 1; |
1328 | } |
1329 | } |
1330 | |
1331 | /* The copy list contains pairs of insns which describe copies we |
1332 | need to insert into the INSN stream. |
1333 | |
1334 | The first insn in each pair is the extension insn, from which |
1335 | we derive the source and destination of the copy. |
1336 | |
1337 | The second insn in each pair is the memory reference where the |
1338 | extension will ultimately happen. We emit the new copy |
1339 | immediately after this insn. |
1340 | |
1341 | It may first appear that the arguments for the copy are reversed. |
1342 | Remember that the memory reference will be changed to refer to the |
1343 | destination of the extention. So we're actually emitting a copy |
1344 | from the new destination to the old destination. */ |
1345 | for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2) |
1346 | { |
1347 | rtx_insn *curr_insn = reinsn_copy_list[i]; |
1348 | rtx_insn *def_insn = reinsn_copy_list[i + 1]; |
1349 | |
1350 | /* Use the mode of the destination of the defining insn |
1351 | for the mode of the copy. This is necessary if the |
1352 | defining insn was used to eliminate a second extension |
1353 | that was wider than the first. */ |
1354 | rtx sub_rtx = *get_sub_rtx (def_insn); |
1355 | rtx set = single_set (insn: curr_insn); |
1356 | rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)), |
1357 | REGNO (XEXP (SET_SRC (set), 0))); |
1358 | rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)), |
1359 | REGNO (SET_DEST (set))); |
1360 | rtx new_set = gen_rtx_SET (new_dst, new_src); |
1361 | emit_insn_after (new_set, def_insn); |
1362 | } |
1363 | |
1364 | /* Delete all useless extensions here in one sweep. */ |
1365 | FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn) |
1366 | delete_insn (curr_insn); |
1367 | |
1368 | reinsn_list.release (); |
1369 | XDELETEVEC (state.modified); |
1370 | |
1371 | if (dump_file && num_re_opportunities > 0) |
1372 | fprintf (stream: dump_file, format: "Elimination opportunities = %d realized = %d\n" , |
1373 | num_re_opportunities, num_realized); |
1374 | } |
1375 | |
1376 | /* Find and remove redundant extensions. */ |
1377 | |
1378 | static unsigned int |
1379 | rest_of_handle_ree (void) |
1380 | { |
1381 | find_and_remove_re (); |
1382 | return 0; |
1383 | } |
1384 | |
1385 | namespace { |
1386 | |
1387 | const pass_data pass_data_ree = |
1388 | { |
1389 | .type: RTL_PASS, /* type */ |
1390 | .name: "ree" , /* name */ |
1391 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1392 | .tv_id: TV_REE, /* tv_id */ |
1393 | .properties_required: 0, /* properties_required */ |
1394 | .properties_provided: 0, /* properties_provided */ |
1395 | .properties_destroyed: 0, /* properties_destroyed */ |
1396 | .todo_flags_start: 0, /* todo_flags_start */ |
1397 | TODO_df_finish, /* todo_flags_finish */ |
1398 | }; |
1399 | |
1400 | class pass_ree : public rtl_opt_pass |
1401 | { |
1402 | public: |
1403 | pass_ree (gcc::context *ctxt) |
1404 | : rtl_opt_pass (pass_data_ree, ctxt) |
1405 | {} |
1406 | |
1407 | /* opt_pass methods: */ |
1408 | bool gate (function *) final override { return (optimize > 0 && flag_ree); } |
1409 | unsigned int execute (function *) final override |
1410 | { |
1411 | return rest_of_handle_ree (); |
1412 | } |
1413 | |
1414 | }; // class pass_ree |
1415 | |
1416 | } // anon namespace |
1417 | |
1418 | rtl_opt_pass * |
1419 | make_pass_ree (gcc::context *ctxt) |
1420 | { |
1421 | return new pass_ree (ctxt); |
1422 | } |
1423 | |