1 | /* Scheduler hooks for IA-32 which implement CPU specific logic. |
2 | Copyright (C) 1988-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #define IN_TARGET_CODE 1 |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "rtl.h" |
27 | #include "tree.h" |
28 | #include "cfghooks.h" |
29 | #include "tm_p.h" |
30 | #include "target.h" |
31 | #include "insn-config.h" |
32 | #include "insn-attr.h" |
33 | #include "insn-opinit.h" |
34 | #include "recog.h" |
35 | |
36 | /* Return the maximum number of instructions a cpu can issue. */ |
37 | |
38 | int |
39 | ix86_issue_rate (void) |
40 | { |
41 | switch (ix86_tune) |
42 | { |
43 | case PROCESSOR_PENTIUM: |
44 | case PROCESSOR_LAKEMONT: |
45 | case PROCESSOR_BONNELL: |
46 | case PROCESSOR_SILVERMONT: |
47 | case PROCESSOR_KNL: |
48 | case PROCESSOR_KNM: |
49 | case PROCESSOR_INTEL: |
50 | case PROCESSOR_K6: |
51 | case PROCESSOR_BTVER2: |
52 | case PROCESSOR_PENTIUM4: |
53 | case PROCESSOR_NOCONA: |
54 | return 2; |
55 | |
56 | case PROCESSOR_PENTIUMPRO: |
57 | case PROCESSOR_ATHLON: |
58 | case PROCESSOR_K8: |
59 | case PROCESSOR_AMDFAM10: |
60 | case PROCESSOR_BTVER1: |
61 | case PROCESSOR_LUJIAZUI: |
62 | return 3; |
63 | |
64 | case PROCESSOR_BDVER1: |
65 | case PROCESSOR_BDVER2: |
66 | case PROCESSOR_BDVER3: |
67 | case PROCESSOR_BDVER4: |
68 | case PROCESSOR_ZNVER1: |
69 | case PROCESSOR_ZNVER2: |
70 | case PROCESSOR_ZNVER3: |
71 | case PROCESSOR_ZNVER4: |
72 | case PROCESSOR_CORE2: |
73 | case PROCESSOR_NEHALEM: |
74 | case PROCESSOR_SANDYBRIDGE: |
75 | case PROCESSOR_HASWELL: |
76 | case PROCESSOR_TREMONT: |
77 | case PROCESSOR_SKYLAKE: |
78 | case PROCESSOR_SKYLAKE_AVX512: |
79 | case PROCESSOR_CASCADELAKE: |
80 | case PROCESSOR_CANNONLAKE: |
81 | case PROCESSOR_ALDERLAKE: |
82 | case PROCESSOR_YONGFENG: |
83 | case PROCESSOR_GENERIC: |
84 | return 4; |
85 | |
86 | case PROCESSOR_ICELAKE_CLIENT: |
87 | case PROCESSOR_ICELAKE_SERVER: |
88 | case PROCESSOR_TIGERLAKE: |
89 | case PROCESSOR_COOPERLAKE: |
90 | case PROCESSOR_ROCKETLAKE: |
91 | return 5; |
92 | |
93 | case PROCESSOR_SAPPHIRERAPIDS: |
94 | return 6; |
95 | |
96 | default: |
97 | return 1; |
98 | } |
99 | } |
100 | |
101 | /* Return true iff USE_INSN has a memory address with operands set by |
102 | SET_INSN. */ |
103 | |
104 | bool |
105 | ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn) |
106 | { |
107 | int i; |
108 | extract_insn_cached (use_insn); |
109 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
110 | if (MEM_P (recog_data.operand[i])) |
111 | { |
112 | rtx addr = XEXP (recog_data.operand[i], 0); |
113 | if (modified_in_p (addr, set_insn) != 0) |
114 | { |
115 | /* No AGI stall if SET_INSN is a push or pop and USE_INSN |
116 | has SP based memory (unless index reg is modified in a pop). */ |
117 | rtx set = single_set (insn: set_insn); |
118 | if (set |
119 | && (push_operand (SET_DEST (set), GET_MODE (SET_DEST (set))) |
120 | || pop_operand (SET_SRC (set), GET_MODE (SET_SRC (set))))) |
121 | { |
122 | struct ix86_address parts; |
123 | if (ix86_decompose_address (addr, &parts) |
124 | && parts.base == stack_pointer_rtx |
125 | && (parts.index == NULL_RTX |
126 | || MEM_P (SET_DEST (set)) |
127 | || !modified_in_p (parts.index, set_insn))) |
128 | return false; |
129 | } |
130 | return true; |
131 | } |
132 | return false; |
133 | } |
134 | return false; |
135 | } |
136 | |
137 | /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set |
138 | by DEP_INSN and nothing set by DEP_INSN. */ |
139 | |
140 | static bool |
141 | ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type) |
142 | { |
143 | rtx set, set2; |
144 | |
145 | /* Simplify the test for uninteresting insns. */ |
146 | if (insn_type != TYPE_SETCC |
147 | && insn_type != TYPE_ICMOV |
148 | && insn_type != TYPE_FCMOV |
149 | && insn_type != TYPE_IBR) |
150 | return false; |
151 | |
152 | if ((set = single_set (insn: dep_insn)) != 0) |
153 | { |
154 | set = SET_DEST (set); |
155 | set2 = NULL_RTX; |
156 | } |
157 | else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL |
158 | && XVECLEN (PATTERN (dep_insn), 0) == 2 |
159 | && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET |
160 | && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) |
161 | { |
162 | set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); |
163 | set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); |
164 | } |
165 | else |
166 | return false; |
167 | |
168 | if (!REG_P (set) || REGNO (set) != FLAGS_REG) |
169 | return false; |
170 | |
171 | /* This test is true if the dependent insn reads the flags but |
172 | not any other potentially set register. */ |
173 | if (!reg_overlap_mentioned_p (set, PATTERN (insn))) |
174 | return false; |
175 | |
176 | if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) |
177 | return false; |
178 | |
179 | return true; |
180 | } |
181 | |
182 | /* Helper function for exact_store_load_dependency. |
183 | Return true if addr is found in insn. */ |
184 | static bool |
185 | exact_dependency_1 (rtx addr, rtx insn) |
186 | { |
187 | enum rtx_code code; |
188 | const char *format_ptr; |
189 | int i, j; |
190 | |
191 | code = GET_CODE (insn); |
192 | switch (code) |
193 | { |
194 | case MEM: |
195 | if (rtx_equal_p (addr, insn)) |
196 | return true; |
197 | break; |
198 | case REG: |
199 | CASE_CONST_ANY: |
200 | case SYMBOL_REF: |
201 | case CODE_LABEL: |
202 | case PC: |
203 | case EXPR_LIST: |
204 | return false; |
205 | default: |
206 | break; |
207 | } |
208 | |
209 | format_ptr = GET_RTX_FORMAT (code); |
210 | for (i = 0; i < GET_RTX_LENGTH (code); i++) |
211 | { |
212 | switch (*format_ptr++) |
213 | { |
214 | case 'e': |
215 | if (exact_dependency_1 (addr, XEXP (insn, i))) |
216 | return true; |
217 | break; |
218 | case 'E': |
219 | for (j = 0; j < XVECLEN (insn, i); j++) |
220 | if (exact_dependency_1 (addr, XVECEXP (insn, i, j))) |
221 | return true; |
222 | break; |
223 | } |
224 | } |
225 | return false; |
226 | } |
227 | |
228 | /* Return true if there exists exact dependency for store & load, i.e. |
229 | the same memory address is used in them. */ |
230 | static bool |
231 | exact_store_load_dependency (rtx_insn *store, rtx_insn *load) |
232 | { |
233 | rtx set1, set2; |
234 | |
235 | set1 = single_set (insn: store); |
236 | if (!set1) |
237 | return false; |
238 | if (!MEM_P (SET_DEST (set1))) |
239 | return false; |
240 | set2 = single_set (insn: load); |
241 | if (!set2) |
242 | return false; |
243 | if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2))) |
244 | return true; |
245 | return false; |
246 | } |
247 | |
248 | |
249 | /* This function corrects the value of COST (latency) based on the relationship |
250 | between INSN and DEP_INSN through a dependence of type DEP_TYPE, and strength |
251 | DW. It should return the new value. |
252 | |
253 | On x86 CPUs this is most commonly used to model the fact that valus of |
254 | registers used to compute address of memory operand needs to be ready |
255 | earlier than values of registers used in the actual operation. */ |
256 | |
257 | int |
258 | ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, |
259 | unsigned int) |
260 | { |
261 | enum attr_type insn_type, dep_insn_type; |
262 | enum attr_memory memory; |
263 | rtx set, set2; |
264 | int dep_insn_code_number; |
265 | |
266 | /* Anti and output dependencies have zero cost on all CPUs. */ |
267 | if (dep_type != 0) |
268 | return 0; |
269 | |
270 | dep_insn_code_number = recog_memoized (insn: dep_insn); |
271 | |
272 | /* If we can't recognize the insns, we can't really do anything. */ |
273 | if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) |
274 | return cost; |
275 | |
276 | insn_type = get_attr_type (insn); |
277 | dep_insn_type = get_attr_type (dep_insn); |
278 | |
279 | switch (ix86_tune) |
280 | { |
281 | case PROCESSOR_PENTIUM: |
282 | case PROCESSOR_LAKEMONT: |
283 | /* Address Generation Interlock adds a cycle of latency. */ |
284 | if (insn_type == TYPE_LEA) |
285 | { |
286 | rtx addr = PATTERN (insn); |
287 | |
288 | if (GET_CODE (addr) == PARALLEL) |
289 | addr = XVECEXP (addr, 0, 0); |
290 | |
291 | gcc_assert (GET_CODE (addr) == SET); |
292 | |
293 | addr = SET_SRC (addr); |
294 | if (modified_in_p (addr, dep_insn)) |
295 | cost += 1; |
296 | } |
297 | else if (ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
298 | cost += 1; |
299 | |
300 | /* ??? Compares pair with jump/setcc. */ |
301 | if (ix86_flags_dependent (insn, dep_insn, insn_type)) |
302 | cost = 0; |
303 | |
304 | /* Floating point stores require value to be ready one cycle earlier. */ |
305 | if (insn_type == TYPE_FMOV |
306 | && get_attr_memory (insn) == MEMORY_STORE |
307 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
308 | cost += 1; |
309 | break; |
310 | |
311 | case PROCESSOR_PENTIUMPRO: |
312 | /* INT->FP conversion is expensive. */ |
313 | if (get_attr_fp_int_src (dep_insn)) |
314 | cost += 5; |
315 | |
316 | /* There is one cycle extra latency between an FP op and a store. */ |
317 | if (insn_type == TYPE_FMOV |
318 | && (set = single_set (insn: dep_insn)) != NULL_RTX |
319 | && (set2 = single_set (insn)) != NULL_RTX |
320 | && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) |
321 | && MEM_P (SET_DEST (set2))) |
322 | cost += 1; |
323 | |
324 | memory = get_attr_memory (insn); |
325 | |
326 | /* Show ability of reorder buffer to hide latency of load by executing |
327 | in parallel with previous instruction in case |
328 | previous instruction is not needed to compute the address. */ |
329 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
330 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
331 | { |
332 | /* Claim moves to take one cycle, as core can issue one load |
333 | at time and the next load can start cycle later. */ |
334 | if (dep_insn_type == TYPE_IMOV |
335 | || dep_insn_type == TYPE_FMOV) |
336 | cost = 1; |
337 | else if (cost > 1) |
338 | cost--; |
339 | } |
340 | break; |
341 | |
342 | case PROCESSOR_K6: |
343 | /* The esp dependency is resolved before |
344 | the instruction is really finished. */ |
345 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
346 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
347 | return 1; |
348 | |
349 | /* INT->FP conversion is expensive. */ |
350 | if (get_attr_fp_int_src (dep_insn)) |
351 | cost += 5; |
352 | |
353 | memory = get_attr_memory (insn); |
354 | |
355 | /* Show ability of reorder buffer to hide latency of load by executing |
356 | in parallel with previous instruction in case |
357 | previous instruction is not needed to compute the address. */ |
358 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
359 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
360 | { |
361 | /* Claim moves to take one cycle, as core can issue one load |
362 | at time and the next load can start cycle later. */ |
363 | if (dep_insn_type == TYPE_IMOV |
364 | || dep_insn_type == TYPE_FMOV) |
365 | cost = 1; |
366 | else if (cost > 2) |
367 | cost -= 2; |
368 | else |
369 | cost = 1; |
370 | } |
371 | break; |
372 | |
373 | case PROCESSOR_AMDFAM10: |
374 | case PROCESSOR_BDVER1: |
375 | case PROCESSOR_BDVER2: |
376 | case PROCESSOR_BDVER3: |
377 | case PROCESSOR_BDVER4: |
378 | case PROCESSOR_BTVER1: |
379 | case PROCESSOR_BTVER2: |
380 | /* Stack engine allows to execute push&pop instructions in parall. */ |
381 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
382 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
383 | return 0; |
384 | /* FALLTHRU */ |
385 | |
386 | case PROCESSOR_ATHLON: |
387 | case PROCESSOR_K8: |
388 | memory = get_attr_memory (insn); |
389 | |
390 | /* Show ability of reorder buffer to hide latency of load by executing |
391 | in parallel with previous instruction in case |
392 | previous instruction is not needed to compute the address. */ |
393 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
394 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
395 | { |
396 | enum attr_unit unit = get_attr_unit (insn); |
397 | int loadcost = 3; |
398 | |
399 | /* Because of the difference between the length of integer and |
400 | floating unit pipeline preparation stages, the memory operands |
401 | for floating point are cheaper. |
402 | |
403 | ??? For Athlon it the difference is most probably 2. */ |
404 | if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) |
405 | loadcost = 3; |
406 | else |
407 | loadcost = TARGET_CPU_P (ATHLON) ? 2 : 0; |
408 | |
409 | if (cost >= loadcost) |
410 | cost -= loadcost; |
411 | else |
412 | cost = 0; |
413 | } |
414 | break; |
415 | |
416 | case PROCESSOR_ZNVER1: |
417 | case PROCESSOR_ZNVER2: |
418 | case PROCESSOR_ZNVER3: |
419 | case PROCESSOR_ZNVER4: |
420 | /* Stack engine allows to execute push&pop instructions in parall. */ |
421 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
422 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
423 | return 0; |
424 | |
425 | memory = get_attr_memory (insn); |
426 | |
427 | /* Show ability of reorder buffer to hide latency of load by executing |
428 | in parallel with previous instruction in case |
429 | previous instruction is not needed to compute the address. */ |
430 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
431 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
432 | { |
433 | enum attr_unit unit = get_attr_unit (insn); |
434 | int loadcost; |
435 | |
436 | if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) |
437 | loadcost = 4; |
438 | else |
439 | loadcost = 7; |
440 | |
441 | if (cost >= loadcost) |
442 | cost -= loadcost; |
443 | else |
444 | cost = 0; |
445 | } |
446 | break; |
447 | |
448 | case PROCESSOR_YONGFENG: |
449 | /* Stack engine allows to execute push&pop instructions in parallel. */ |
450 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
451 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
452 | return 0; |
453 | /* FALLTHRU */ |
454 | |
455 | case PROCESSOR_LUJIAZUI: |
456 | memory = get_attr_memory (insn); |
457 | |
458 | /* Show ability of reorder buffer to hide latency of load by executing |
459 | in parallel with previous instruction in case |
460 | previous instruction is not needed to compute the address. */ |
461 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
462 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
463 | { |
464 | int loadcost = 4; |
465 | |
466 | if (cost >= loadcost) |
467 | cost -= loadcost; |
468 | else |
469 | cost = 0; |
470 | } |
471 | break; |
472 | |
473 | case PROCESSOR_CORE2: |
474 | case PROCESSOR_NEHALEM: |
475 | case PROCESSOR_SANDYBRIDGE: |
476 | case PROCESSOR_HASWELL: |
477 | case PROCESSOR_TREMONT: |
478 | case PROCESSOR_ALDERLAKE: |
479 | case PROCESSOR_GENERIC: |
480 | /* Stack engine allows to execute push&pop instructions in parall. */ |
481 | if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
482 | && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
483 | return 0; |
484 | |
485 | memory = get_attr_memory (insn); |
486 | |
487 | /* Show ability of reorder buffer to hide latency of load by executing |
488 | in parallel with previous instruction in case |
489 | previous instruction is not needed to compute the address. */ |
490 | if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
491 | && !ix86_agi_dependent (set_insn: dep_insn, use_insn: insn)) |
492 | { |
493 | if (cost >= 4) |
494 | cost -= 4; |
495 | else |
496 | cost = 0; |
497 | } |
498 | break; |
499 | |
500 | case PROCESSOR_SILVERMONT: |
501 | case PROCESSOR_KNL: |
502 | case PROCESSOR_KNM: |
503 | case PROCESSOR_INTEL: |
504 | if (!reload_completed) |
505 | return cost; |
506 | |
507 | /* Increase cost of integer loads. */ |
508 | memory = get_attr_memory (dep_insn); |
509 | if (memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
510 | { |
511 | enum attr_unit unit = get_attr_unit (dep_insn); |
512 | if (unit == UNIT_INTEGER && cost == 1) |
513 | { |
514 | if (memory == MEMORY_LOAD) |
515 | cost = 3; |
516 | else |
517 | { |
518 | /* Increase cost of ld/st for short int types only |
519 | because of store forwarding issue. */ |
520 | rtx set = single_set (insn: dep_insn); |
521 | if (set && (GET_MODE (SET_DEST (set)) == QImode |
522 | || GET_MODE (SET_DEST (set)) == HImode)) |
523 | { |
524 | /* Increase cost of store/load insn if exact |
525 | dependence exists and it is load insn. */ |
526 | enum attr_memory insn_memory = get_attr_memory (insn); |
527 | if (insn_memory == MEMORY_LOAD |
528 | && exact_store_load_dependency (store: dep_insn, load: insn)) |
529 | cost = 3; |
530 | } |
531 | } |
532 | } |
533 | } |
534 | |
535 | default: |
536 | break; |
537 | } |
538 | |
539 | return cost; |
540 | } |
541 | |
542 | /* How many alternative schedules to try. This should be as wide as the |
543 | scheduling freedom in the DFA, but no wider. Making this value too |
544 | large results extra work for the scheduler. */ |
545 | |
546 | int |
547 | ia32_multipass_dfa_lookahead (void) |
548 | { |
549 | /* Generally, we want haifa-sched:max_issue() to look ahead as far |
550 | as many instructions can be executed on a cycle, i.e., |
551 | issue_rate. */ |
552 | if (reload_completed) |
553 | return ix86_issue_rate (); |
554 | /* Don't use lookahead for pre-reload schedule to save compile time. */ |
555 | return 0; |
556 | } |
557 | |
558 | /* Return true if target platform supports macro-fusion. */ |
559 | |
560 | bool |
561 | ix86_macro_fusion_p () |
562 | { |
563 | return TARGET_FUSE_CMP_AND_BRANCH; |
564 | } |
565 | |
566 | /* Check whether current microarchitecture support macro fusion |
567 | for insn pair "CONDGEN + CONDJMP". Refer to |
568 | "Intel Architectures Optimization Reference Manual". */ |
569 | |
570 | bool |
571 | ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp) |
572 | { |
573 | rtx src, dest; |
574 | enum rtx_code ccode; |
575 | rtx compare_set = NULL_RTX, test_if, cond; |
576 | rtx alu_set = NULL_RTX, addr = NULL_RTX; |
577 | enum attr_type condgen_type; |
578 | |
579 | if (!any_condjump_p (condjmp)) |
580 | return false; |
581 | |
582 | unsigned int condreg1, condreg2; |
583 | rtx cc_reg_1; |
584 | targetm.fixed_condition_code_regs (&condreg1, &condreg2); |
585 | cc_reg_1 = gen_rtx_REG (CCmode, condreg1); |
586 | if (!reg_referenced_p (cc_reg_1, PATTERN (insn: condjmp)) |
587 | || !condgen |
588 | || !modified_in_p (cc_reg_1, condgen)) |
589 | return false; |
590 | |
591 | condgen_type = get_attr_type (condgen); |
592 | if (condgen_type == TYPE_MULTI |
593 | && INSN_CODE (condgen) == code_for_stack_protect_test_1 (arg0: ptr_mode) |
594 | && TARGET_FUSE_ALU_AND_BRANCH) |
595 | { |
596 | /* stack_protect_test_<mode> ends with a sub, which subtracts |
597 | a non-rip special memory operand from a GPR. */ |
598 | src = NULL_RTX; |
599 | alu_set = XVECEXP (PATTERN (condgen), 0, 1); |
600 | goto handle_stack_protect_test; |
601 | } |
602 | else if (condgen_type != TYPE_TEST |
603 | && condgen_type != TYPE_ICMP |
604 | && condgen_type != TYPE_INCDEC |
605 | && condgen_type != TYPE_ALU) |
606 | return false; |
607 | |
608 | compare_set = single_set (insn: condgen); |
609 | if (compare_set == NULL_RTX && !TARGET_FUSE_ALU_AND_BRANCH) |
610 | return false; |
611 | |
612 | if (compare_set == NULL_RTX) |
613 | { |
614 | int i; |
615 | rtx pat = PATTERN (insn: condgen); |
616 | for (i = 0; i < XVECLEN (pat, 0); i++) |
617 | if (GET_CODE (XVECEXP (pat, 0, i)) == SET) |
618 | { |
619 | rtx set_src = SET_SRC (XVECEXP (pat, 0, i)); |
620 | if (GET_CODE (set_src) == COMPARE) |
621 | compare_set = XVECEXP (pat, 0, i); |
622 | else |
623 | alu_set = XVECEXP (pat, 0, i); |
624 | } |
625 | } |
626 | if (compare_set == NULL_RTX) |
627 | return false; |
628 | src = SET_SRC (compare_set); |
629 | if (GET_CODE (src) != COMPARE) |
630 | return false; |
631 | |
632 | /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not |
633 | supported. */ |
634 | if ((MEM_P (XEXP (src, 0)) && CONST_INT_P (XEXP (src, 1))) |
635 | || (MEM_P (XEXP (src, 1)) && CONST_INT_P (XEXP (src, 0)))) |
636 | return false; |
637 | |
638 | /* No fusion for RIP-relative address. */ |
639 | if (MEM_P (XEXP (src, 0))) |
640 | addr = XEXP (XEXP (src, 0), 0); |
641 | else if (MEM_P (XEXP (src, 1))) |
642 | addr = XEXP (XEXP (src, 1), 0); |
643 | |
644 | if (addr) |
645 | { |
646 | ix86_address parts; |
647 | int ok = ix86_decompose_address (addr, &parts); |
648 | gcc_assert (ok); |
649 | |
650 | if (ix86_rip_relative_addr_p (parts: &parts)) |
651 | return false; |
652 | } |
653 | |
654 | handle_stack_protect_test: |
655 | test_if = SET_SRC (pc_set (condjmp)); |
656 | cond = XEXP (test_if, 0); |
657 | ccode = GET_CODE (cond); |
658 | /* Check whether conditional jump use Sign or Overflow Flags. */ |
659 | if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS |
660 | && (ccode == GE || ccode == GT || ccode == LE || ccode == LT)) |
661 | return false; |
662 | |
663 | /* Return true for TYPE_TEST and TYPE_ICMP. */ |
664 | if (condgen_type == TYPE_TEST || condgen_type == TYPE_ICMP) |
665 | return true; |
666 | |
667 | /* The following is the case that macro-fusion for alu + jmp. */ |
668 | if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set) |
669 | return false; |
670 | |
671 | /* No fusion for alu op with memory destination operand. */ |
672 | dest = SET_DEST (alu_set); |
673 | if (MEM_P (dest)) |
674 | return false; |
675 | |
676 | /* Macro-fusion for inc/dec + unsigned conditional jump is not |
677 | supported. */ |
678 | if (condgen_type == TYPE_INCDEC |
679 | && (ccode == GEU || ccode == GTU || ccode == LEU || ccode == LTU)) |
680 | return false; |
681 | |
682 | return true; |
683 | } |
684 | |
685 | |