1 | /* Subroutines used for code generation on IA-32. |
2 | Copyright (C) 1988-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #define INCLUDE_STRING |
21 | #define IN_TARGET_CODE 1 |
22 | |
23 | #include "config.h" |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "backend.h" |
27 | #include "rtl.h" |
28 | #include "tree.h" |
29 | #include "memmodel.h" |
30 | #include "gimple.h" |
31 | #include "cfghooks.h" |
32 | #include "cfgloop.h" |
33 | #include "df.h" |
34 | #include "tm_p.h" |
35 | #include "stringpool.h" |
36 | #include "expmed.h" |
37 | #include "optabs.h" |
38 | #include "regs.h" |
39 | #include "emit-rtl.h" |
40 | #include "recog.h" |
41 | #include "cgraph.h" |
42 | #include "diagnostic.h" |
43 | #include "cfgbuild.h" |
44 | #include "alias.h" |
45 | #include "fold-const.h" |
46 | #include "attribs.h" |
47 | #include "calls.h" |
48 | #include "stor-layout.h" |
49 | #include "varasm.h" |
50 | #include "output.h" |
51 | #include "insn-attr.h" |
52 | #include "flags.h" |
53 | #include "except.h" |
54 | #include "explow.h" |
55 | #include "expr.h" |
56 | #include "cfgrtl.h" |
57 | #include "common/common-target.h" |
58 | #include "langhooks.h" |
59 | #include "reload.h" |
60 | #include "gimplify.h" |
61 | #include "dwarf2.h" |
62 | #include "tm-constrs.h" |
63 | #include "cselib.h" |
64 | #include "sched-int.h" |
65 | #include "opts.h" |
66 | #include "tree-pass.h" |
67 | #include "context.h" |
68 | #include "pass_manager.h" |
69 | #include "target-globals.h" |
70 | #include "gimple-iterator.h" |
71 | #include "gimple-fold.h" |
72 | #include "tree-vectorizer.h" |
73 | #include "shrink-wrap.h" |
74 | #include "builtins.h" |
75 | #include "rtl-iter.h" |
76 | #include "tree-iterator.h" |
77 | #include "dbgcnt.h" |
78 | #include "case-cfn-macros.h" |
79 | #include "dojump.h" |
80 | #include "fold-const-call.h" |
81 | #include "tree-vrp.h" |
82 | #include "tree-ssanames.h" |
83 | #include "selftest.h" |
84 | #include "selftest-rtl.h" |
85 | #include "print-rtl.h" |
86 | #include "intl.h" |
87 | #include "ifcvt.h" |
88 | #include "symbol-summary.h" |
89 | #include "ipa-prop.h" |
90 | #include "ipa-fnsummary.h" |
91 | #include "wide-int-bitmask.h" |
92 | #include "tree-vector-builder.h" |
93 | #include "debug.h" |
94 | #include "dwarf2out.h" |
95 | #include "i386-options.h" |
96 | #include "i386-builtins.h" |
97 | #include "i386-expand.h" |
98 | #include "i386-features.h" |
99 | #include "function-abi.h" |
100 | #include "rtl-error.h" |
101 | |
102 | /* This file should be included last. */ |
103 | #include "target-def.h" |
104 | |
105 | static rtx legitimize_dllimport_symbol (rtx, bool); |
106 | static rtx legitimize_pe_coff_extern_decl (rtx, bool); |
107 | static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); |
108 | static void ix86_emit_restore_reg_using_pop (rtx); |
109 | |
110 | |
111 | #ifndef CHECK_STACK_LIMIT |
112 | #define CHECK_STACK_LIMIT (-1) |
113 | #endif |
114 | |
115 | /* Return index of given mode in mult and division cost tables. */ |
116 | #define MODE_INDEX(mode) \ |
117 | ((mode) == QImode ? 0 \ |
118 | : (mode) == HImode ? 1 \ |
119 | : (mode) == SImode ? 2 \ |
120 | : (mode) == DImode ? 3 \ |
121 | : 4) |
122 | |
123 | |
124 | /* Set by -mtune. */ |
125 | const struct processor_costs *ix86_tune_cost = NULL; |
126 | |
127 | /* Set by -mtune or -Os. */ |
128 | const struct processor_costs *ix86_cost = NULL; |
129 | |
130 | /* In case the average insn count for single function invocation is |
131 | lower than this constant, emit fast (but longer) prologue and |
132 | epilogue code. */ |
133 | #define FAST_PROLOGUE_INSN_COUNT 20 |
134 | |
135 | /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
136 | static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
137 | static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
138 | static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
139 | |
140 | /* Array of the smallest class containing reg number REGNO, indexed by |
141 | REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
142 | |
143 | enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
144 | { |
145 | /* ax, dx, cx, bx */ |
146 | AREG, DREG, CREG, BREG, |
147 | /* si, di, bp, sp */ |
148 | SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
149 | /* FP registers */ |
150 | FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
151 | FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
152 | /* arg pointer, flags, fpsr, frame */ |
153 | NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
154 | /* SSE registers */ |
155 | SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, |
156 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
157 | /* MMX registers */ |
158 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
159 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
160 | /* REX registers */ |
161 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
162 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
163 | /* SSE REX registers */ |
164 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
165 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
166 | /* AVX-512 SSE registers */ |
167 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
168 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
169 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
170 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
171 | /* Mask registers. */ |
172 | ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
173 | MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
174 | /* REX2 registers */ |
175 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
176 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
177 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
178 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
179 | }; |
180 | |
181 | /* The "default" register map used in 32bit mode. */ |
182 | |
183 | int const debugger_register_map[FIRST_PSEUDO_REGISTER] = |
184 | { |
185 | /* general regs */ |
186 | 0, 2, 1, 3, 6, 7, 4, 5, |
187 | /* fp regs */ |
188 | 12, 13, 14, 15, 16, 17, 18, 19, |
189 | /* arg, flags, fpsr, frame */ |
190 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
191 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
192 | /* SSE */ |
193 | 21, 22, 23, 24, 25, 26, 27, 28, |
194 | /* MMX */ |
195 | 29, 30, 31, 32, 33, 34, 35, 36, |
196 | /* extended integer registers */ |
197 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
198 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
199 | /* extended sse registers */ |
200 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
201 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
202 | /* AVX-512 registers 16-23 */ |
203 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
204 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
205 | /* AVX-512 registers 24-31 */ |
206 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
207 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
208 | /* Mask registers */ |
209 | 93, 94, 95, 96, 97, 98, 99, 100 |
210 | }; |
211 | |
212 | /* The "default" register map used in 64bit mode. */ |
213 | |
214 | int const debugger64_register_map[FIRST_PSEUDO_REGISTER] = |
215 | { |
216 | /* general regs */ |
217 | 0, 1, 2, 3, 4, 5, 6, 7, |
218 | /* fp regs */ |
219 | 33, 34, 35, 36, 37, 38, 39, 40, |
220 | /* arg, flags, fpsr, frame */ |
221 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
222 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
223 | /* SSE */ |
224 | 17, 18, 19, 20, 21, 22, 23, 24, |
225 | /* MMX */ |
226 | 41, 42, 43, 44, 45, 46, 47, 48, |
227 | /* extended integer registers */ |
228 | 8, 9, 10, 11, 12, 13, 14, 15, |
229 | /* extended SSE registers */ |
230 | 25, 26, 27, 28, 29, 30, 31, 32, |
231 | /* AVX-512 registers 16-23 */ |
232 | 67, 68, 69, 70, 71, 72, 73, 74, |
233 | /* AVX-512 registers 24-31 */ |
234 | 75, 76, 77, 78, 79, 80, 81, 82, |
235 | /* Mask registers */ |
236 | 118, 119, 120, 121, 122, 123, 124, 125, |
237 | /* rex2 extend interger registers */ |
238 | 130, 131, 132, 133, 134, 135, 136, 137, |
239 | 138, 139, 140, 141, 142, 143, 144, 145 |
240 | }; |
241 | |
242 | /* Define the register numbers to be used in Dwarf debugging information. |
243 | The SVR4 reference port C compiler uses the following register numbers |
244 | in its Dwarf output code: |
245 | 0 for %eax (gcc regno = 0) |
246 | 1 for %ecx (gcc regno = 2) |
247 | 2 for %edx (gcc regno = 1) |
248 | 3 for %ebx (gcc regno = 3) |
249 | 4 for %esp (gcc regno = 7) |
250 | 5 for %ebp (gcc regno = 6) |
251 | 6 for %esi (gcc regno = 4) |
252 | 7 for %edi (gcc regno = 5) |
253 | The following three DWARF register numbers are never generated by |
254 | the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
255 | believed these numbers have these meanings. |
256 | 8 for %eip (no gcc equivalent) |
257 | 9 for %eflags (gcc regno = 17) |
258 | 10 for %trapno (no gcc equivalent) |
259 | It is not at all clear how we should number the FP stack registers |
260 | for the x86 architecture. If the version of SDB on x86/svr4 were |
261 | a bit less brain dead with respect to floating-point then we would |
262 | have a precedent to follow with respect to DWARF register numbers |
263 | for x86 FP registers, but the SDB on x86/svr4 was so completely |
264 | broken with respect to FP registers that it is hardly worth thinking |
265 | of it as something to strive for compatibility with. |
266 | The version of x86/svr4 SDB I had does (partially) |
267 | seem to believe that DWARF register number 11 is associated with |
268 | the x86 register %st(0), but that's about all. Higher DWARF |
269 | register numbers don't seem to be associated with anything in |
270 | particular, and even for DWARF regno 11, SDB only seemed to under- |
271 | stand that it should say that a variable lives in %st(0) (when |
272 | asked via an `=' command) if we said it was in DWARF regno 11, |
273 | but SDB still printed garbage when asked for the value of the |
274 | variable in question (via a `/' command). |
275 | (Also note that the labels SDB printed for various FP stack regs |
276 | when doing an `x' command were all wrong.) |
277 | Note that these problems generally don't affect the native SVR4 |
278 | C compiler because it doesn't allow the use of -O with -g and |
279 | because when it is *not* optimizing, it allocates a memory |
280 | location for each floating-point variable, and the memory |
281 | location is what gets described in the DWARF AT_location |
282 | attribute for the variable in question. |
283 | Regardless of the severe mental illness of the x86/svr4 SDB, we |
284 | do something sensible here and we use the following DWARF |
285 | register numbers. Note that these are all stack-top-relative |
286 | numbers. |
287 | 11 for %st(0) (gcc regno = 8) |
288 | 12 for %st(1) (gcc regno = 9) |
289 | 13 for %st(2) (gcc regno = 10) |
290 | 14 for %st(3) (gcc regno = 11) |
291 | 15 for %st(4) (gcc regno = 12) |
292 | 16 for %st(5) (gcc regno = 13) |
293 | 17 for %st(6) (gcc regno = 14) |
294 | 18 for %st(7) (gcc regno = 15) |
295 | */ |
296 | int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] = |
297 | { |
298 | /* general regs */ |
299 | 0, 2, 1, 3, 6, 7, 5, 4, |
300 | /* fp regs */ |
301 | 11, 12, 13, 14, 15, 16, 17, 18, |
302 | /* arg, flags, fpsr, frame */ |
303 | IGNORED_DWARF_REGNUM, 9, |
304 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
305 | /* SSE registers */ |
306 | 21, 22, 23, 24, 25, 26, 27, 28, |
307 | /* MMX registers */ |
308 | 29, 30, 31, 32, 33, 34, 35, 36, |
309 | /* extended integer registers */ |
310 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
311 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
312 | /* extended sse registers */ |
313 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
314 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
315 | /* AVX-512 registers 16-23 */ |
316 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
317 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
318 | /* AVX-512 registers 24-31 */ |
319 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
320 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
321 | /* Mask registers */ |
322 | 93, 94, 95, 96, 97, 98, 99, 100 |
323 | }; |
324 | |
325 | /* Define parameter passing and return registers. */ |
326 | |
327 | static int const x86_64_int_parameter_registers[6] = |
328 | { |
329 | DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG |
330 | }; |
331 | |
332 | static int const x86_64_ms_abi_int_parameter_registers[4] = |
333 | { |
334 | CX_REG, DX_REG, R8_REG, R9_REG |
335 | }; |
336 | |
337 | static int const x86_64_int_return_registers[4] = |
338 | { |
339 | AX_REG, DX_REG, DI_REG, SI_REG |
340 | }; |
341 | |
342 | /* Define the structure for the machine field in struct function. */ |
343 | |
344 | struct GTY(()) stack_local_entry { |
345 | unsigned short mode; |
346 | unsigned short n; |
347 | rtx rtl; |
348 | struct stack_local_entry *next; |
349 | }; |
350 | |
351 | /* Which cpu are we scheduling for. */ |
352 | enum attr_cpu ix86_schedule; |
353 | |
354 | /* Which cpu are we optimizing for. */ |
355 | enum processor_type ix86_tune; |
356 | |
357 | /* Which instruction set architecture to use. */ |
358 | enum processor_type ix86_arch; |
359 | |
360 | /* True if processor has SSE prefetch instruction. */ |
361 | unsigned char ix86_prefetch_sse; |
362 | |
363 | /* Preferred alignment for stack boundary in bits. */ |
364 | unsigned int ix86_preferred_stack_boundary; |
365 | |
366 | /* Alignment for incoming stack boundary in bits specified at |
367 | command line. */ |
368 | unsigned int ix86_user_incoming_stack_boundary; |
369 | |
370 | /* Default alignment for incoming stack boundary in bits. */ |
371 | unsigned int ix86_default_incoming_stack_boundary; |
372 | |
373 | /* Alignment for incoming stack boundary in bits. */ |
374 | unsigned int ix86_incoming_stack_boundary; |
375 | |
376 | /* True if there is no direct access to extern symbols. */ |
377 | bool ix86_has_no_direct_extern_access; |
378 | |
379 | /* Calling abi specific va_list type nodes. */ |
380 | tree sysv_va_list_type_node; |
381 | tree ms_va_list_type_node; |
382 | |
383 | /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
384 | char internal_label_prefix[16]; |
385 | int internal_label_prefix_len; |
386 | |
387 | /* Fence to use after loop using movnt. */ |
388 | tree x86_mfence; |
389 | |
390 | /* Register class used for passing given 64bit part of the argument. |
391 | These represent classes as documented by the PS ABI, with the exception |
392 | of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
393 | use SF or DFmode move instead of DImode to avoid reformatting penalties. |
394 | |
395 | Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
396 | whenever possible (upper half does contain padding). */ |
397 | enum x86_64_reg_class |
398 | { |
399 | X86_64_NO_CLASS, |
400 | X86_64_INTEGER_CLASS, |
401 | X86_64_INTEGERSI_CLASS, |
402 | X86_64_SSE_CLASS, |
403 | X86_64_SSEHF_CLASS, |
404 | X86_64_SSESF_CLASS, |
405 | X86_64_SSEDF_CLASS, |
406 | X86_64_SSEUP_CLASS, |
407 | X86_64_X87_CLASS, |
408 | X86_64_X87UP_CLASS, |
409 | X86_64_COMPLEX_X87_CLASS, |
410 | X86_64_MEMORY_CLASS |
411 | }; |
412 | |
413 | #define MAX_CLASSES 8 |
414 | |
415 | /* Table of constants used by fldpi, fldln2, etc.... */ |
416 | static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
417 | static bool ext_80387_constants_init; |
418 | |
419 | |
420 | static rtx ix86_function_value (const_tree, const_tree, bool); |
421 | static bool ix86_function_value_regno_p (const unsigned int); |
422 | static unsigned int ix86_function_arg_boundary (machine_mode, |
423 | const_tree); |
424 | static rtx ix86_static_chain (const_tree, bool); |
425 | static int ix86_function_regparm (const_tree, const_tree); |
426 | static void ix86_compute_frame_layout (void); |
427 | static tree ix86_canonical_va_list_type (tree); |
428 | static unsigned int split_stack_prologue_scratch_regno (void); |
429 | static bool i386_asm_output_addr_const_extra (FILE *, rtx); |
430 | |
431 | static bool ix86_can_inline_p (tree, tree); |
432 | static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
433 | |
434 | |
435 | /* Whether -mtune= or -march= were specified */ |
436 | int ix86_tune_defaulted; |
437 | int ix86_arch_specified; |
438 | |
439 | /* Return true if a red-zone is in use. We can't use red-zone when |
440 | there are local indirect jumps, like "indirect_jump" or "tablejump", |
441 | which jumps to another place in the function, since "call" in the |
442 | indirect thunk pushes the return address onto stack, destroying |
443 | red-zone. |
444 | |
445 | TODO: If we can reserve the first 2 WORDs, for PUSH and, another |
446 | for CALL, in red-zone, we can allow local indirect jumps with |
447 | indirect thunk. */ |
448 | |
449 | bool |
450 | ix86_using_red_zone (void) |
451 | { |
452 | return (TARGET_RED_ZONE |
453 | && !TARGET_64BIT_MS_ABI |
454 | && (!cfun->machine->has_local_indirect_jump |
455 | || cfun->machine->indirect_branch_type == indirect_branch_keep)); |
456 | } |
457 | |
458 | /* Return true, if profiling code should be emitted before |
459 | prologue. Otherwise it returns false. |
460 | Note: For x86 with "hotfix" it is sorried. */ |
461 | static bool |
462 | ix86_profile_before_prologue (void) |
463 | { |
464 | return flag_fentry != 0; |
465 | } |
466 | |
467 | /* Update register usage after having seen the compiler flags. */ |
468 | |
469 | static void |
470 | ix86_conditional_register_usage (void) |
471 | { |
472 | int i, c_mask; |
473 | |
474 | /* If there are no caller-saved registers, preserve all registers. |
475 | except fixed_regs and registers used for function return value |
476 | since aggregate_value_p checks call_used_regs[regno] on return |
477 | value. */ |
478 | if (cfun && cfun->machine->no_caller_saved_registers) |
479 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
480 | if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) |
481 | call_used_regs[i] = 0; |
482 | |
483 | /* For 32-bit targets, disable the REX registers. */ |
484 | if (! TARGET_64BIT) |
485 | { |
486 | for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) |
487 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
488 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
489 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
490 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
491 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
492 | } |
493 | |
494 | /* See the definition of CALL_USED_REGISTERS in i386.h. */ |
495 | c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); |
496 | |
497 | CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); |
498 | |
499 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
500 | { |
501 | /* Set/reset conditionally defined registers from |
502 | CALL_USED_REGISTERS initializer. */ |
503 | if (call_used_regs[i] > 1) |
504 | call_used_regs[i] = !!(call_used_regs[i] & c_mask); |
505 | |
506 | /* Calculate registers of CLOBBERED_REGS register set |
507 | as call used registers from GENERAL_REGS register set. */ |
508 | if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i) |
509 | && call_used_regs[i]) |
510 | SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i); |
511 | } |
512 | |
513 | /* If MMX is disabled, disable the registers. */ |
514 | if (! TARGET_MMX) |
515 | accessible_reg_set &= ~reg_class_contents[MMX_REGS]; |
516 | |
517 | /* If SSE is disabled, disable the registers. */ |
518 | if (! TARGET_SSE) |
519 | accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; |
520 | |
521 | /* If the FPU is disabled, disable the registers. */ |
522 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
523 | accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; |
524 | |
525 | /* If AVX512F is disabled, disable the registers. */ |
526 | if (! TARGET_AVX512F) |
527 | { |
528 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
529 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
530 | |
531 | accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; |
532 | } |
533 | |
534 | /* If APX is disabled, disable the registers. */ |
535 | if (! (TARGET_APX_EGPR && TARGET_64BIT)) |
536 | { |
537 | for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++) |
538 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
539 | } |
540 | } |
541 | |
542 | /* Canonicalize a comparison from one we don't have to one we do have. */ |
543 | |
544 | static void |
545 | ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
546 | bool op0_preserve_value) |
547 | { |
548 | /* The order of operands in x87 ficom compare is forced by combine in |
549 | simplify_comparison () function. Float operator is treated as RTX_OBJ |
550 | with a precedence over other operators and is always put in the first |
551 | place. Swap condition and operands to match ficom instruction. */ |
552 | if (!op0_preserve_value |
553 | && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) |
554 | { |
555 | enum rtx_code scode = swap_condition ((enum rtx_code) *code); |
556 | |
557 | /* We are called only for compares that are split to SAHF instruction. |
558 | Ensure that we have setcc/jcc insn for the swapped condition. */ |
559 | if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) |
560 | { |
561 | std::swap (a&: *op0, b&: *op1); |
562 | *code = (int) scode; |
563 | } |
564 | } |
565 | } |
566 | |
567 | |
568 | /* Hook to determine if one function can safely inline another. */ |
569 | |
570 | static bool |
571 | ix86_can_inline_p (tree caller, tree callee) |
572 | { |
573 | tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); |
574 | tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); |
575 | |
576 | /* Changes of those flags can be tolerated for always inlines. Lets hope |
577 | user knows what he is doing. */ |
578 | unsigned HOST_WIDE_INT always_inline_safe_mask |
579 | = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS |
580 | | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD |
581 | | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD |
582 | | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS |
583 | | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE |
584 | | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER |
585 | | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); |
586 | |
587 | |
588 | if (!callee_tree) |
589 | callee_tree = target_option_default_node; |
590 | if (!caller_tree) |
591 | caller_tree = target_option_default_node; |
592 | if (callee_tree == caller_tree) |
593 | return true; |
594 | |
595 | struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); |
596 | struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); |
597 | bool ret = false; |
598 | bool always_inline |
599 | = (DECL_DISREGARD_INLINE_LIMITS (callee) |
600 | && lookup_attribute (attr_name: "always_inline" , |
601 | DECL_ATTRIBUTES (callee))); |
602 | |
603 | /* If callee only uses GPRs, ignore MASK_80387. */ |
604 | if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags)) |
605 | always_inline_safe_mask |= MASK_80387; |
606 | |
607 | cgraph_node *callee_node = cgraph_node::get (decl: callee); |
608 | /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 |
609 | function can inline a SSE2 function but a SSE2 function can't inline |
610 | a SSE4 function. */ |
611 | if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) |
612 | != callee_opts->x_ix86_isa_flags) |
613 | || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) |
614 | != callee_opts->x_ix86_isa_flags2)) |
615 | ret = false; |
616 | |
617 | /* See if we have the same non-isa options. */ |
618 | else if ((!always_inline |
619 | && caller_opts->x_target_flags != callee_opts->x_target_flags) |
620 | || (caller_opts->x_target_flags & ~always_inline_safe_mask) |
621 | != (callee_opts->x_target_flags & ~always_inline_safe_mask)) |
622 | ret = false; |
623 | |
624 | else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath |
625 | /* If the calle doesn't use FP expressions differences in |
626 | ix86_fpmath can be ignored. We are called from FEs |
627 | for multi-versioning call optimization, so beware of |
628 | ipa_fn_summaries not available. */ |
629 | && (! ipa_fn_summaries |
630 | || ipa_fn_summaries->get (node: callee_node) == NULL |
631 | || ipa_fn_summaries->get (node: callee_node)->fp_expressions)) |
632 | ret = false; |
633 | |
634 | /* At this point we cannot identify whether arch or tune setting |
635 | comes from target attribute or not. So the most conservative way |
636 | is to allow the callee that uses default arch and tune string to |
637 | be inlined. */ |
638 | else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64" ) |
639 | && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic" )) |
640 | ret = true; |
641 | |
642 | /* See if arch, tune, etc. are the same. As previous ISA flags already |
643 | checks if callee's ISA is subset of caller's, do not block |
644 | always_inline attribute for callee even it has different arch. */ |
645 | else if (!always_inline && caller_opts->arch != callee_opts->arch) |
646 | ret = false; |
647 | |
648 | else if (!always_inline && caller_opts->tune != callee_opts->tune) |
649 | ret = false; |
650 | |
651 | else if (!always_inline |
652 | && caller_opts->branch_cost != callee_opts->branch_cost) |
653 | ret = false; |
654 | |
655 | else |
656 | ret = true; |
657 | |
658 | return ret; |
659 | } |
660 | |
661 | /* Return true if this goes in large data/bss. */ |
662 | |
663 | static bool |
664 | ix86_in_large_data_p (tree exp) |
665 | { |
666 | if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC |
667 | && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC) |
668 | return false; |
669 | |
670 | if (exp == NULL_TREE) |
671 | return false; |
672 | |
673 | /* Functions are never large data. */ |
674 | if (TREE_CODE (exp) == FUNCTION_DECL) |
675 | return false; |
676 | |
677 | /* Automatic variables are never large data. */ |
678 | if (VAR_P (exp) && !is_global_var (t: exp)) |
679 | return false; |
680 | |
681 | if (VAR_P (exp) && DECL_SECTION_NAME (exp)) |
682 | { |
683 | const char *section = DECL_SECTION_NAME (exp); |
684 | if (strcmp (s1: section, s2: ".ldata" ) == 0 |
685 | || strcmp (s1: section, s2: ".lbss" ) == 0) |
686 | return true; |
687 | return false; |
688 | } |
689 | else |
690 | { |
691 | HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
692 | |
693 | /* If this is an incomplete type with size 0, then we can't put it |
694 | in data because it might be too big when completed. Also, |
695 | int_size_in_bytes returns -1 if size can vary or is larger than |
696 | an integer in which case also it is safer to assume that it goes in |
697 | large data. */ |
698 | if (size <= 0 || size > ix86_section_threshold) |
699 | return true; |
700 | } |
701 | |
702 | return false; |
703 | } |
704 | |
705 | /* i386-specific section flag to mark large sections. */ |
706 | #define SECTION_LARGE SECTION_MACH_DEP |
707 | |
708 | /* Switch to the appropriate section for output of DECL. |
709 | DECL is either a `VAR_DECL' node or a constant of some sort. |
710 | RELOC indicates whether forming the initial value of DECL requires |
711 | link-time relocations. */ |
712 | |
713 | ATTRIBUTE_UNUSED static section * |
714 | x86_64_elf_select_section (tree decl, int reloc, |
715 | unsigned HOST_WIDE_INT align) |
716 | { |
717 | if (ix86_in_large_data_p (exp: decl)) |
718 | { |
719 | const char *sname = NULL; |
720 | unsigned int flags = SECTION_WRITE | SECTION_LARGE; |
721 | switch (categorize_decl_for_section (decl, reloc)) |
722 | { |
723 | case SECCAT_DATA: |
724 | sname = ".ldata" ; |
725 | break; |
726 | case SECCAT_DATA_REL: |
727 | sname = ".ldata.rel" ; |
728 | break; |
729 | case SECCAT_DATA_REL_LOCAL: |
730 | sname = ".ldata.rel.local" ; |
731 | break; |
732 | case SECCAT_DATA_REL_RO: |
733 | sname = ".ldata.rel.ro" ; |
734 | break; |
735 | case SECCAT_DATA_REL_RO_LOCAL: |
736 | sname = ".ldata.rel.ro.local" ; |
737 | break; |
738 | case SECCAT_BSS: |
739 | sname = ".lbss" ; |
740 | flags |= SECTION_BSS; |
741 | break; |
742 | case SECCAT_RODATA: |
743 | case SECCAT_RODATA_MERGE_STR: |
744 | case SECCAT_RODATA_MERGE_STR_INIT: |
745 | case SECCAT_RODATA_MERGE_CONST: |
746 | sname = ".lrodata" ; |
747 | flags &= ~SECTION_WRITE; |
748 | break; |
749 | case SECCAT_SRODATA: |
750 | case SECCAT_SDATA: |
751 | case SECCAT_SBSS: |
752 | gcc_unreachable (); |
753 | case SECCAT_TEXT: |
754 | case SECCAT_TDATA: |
755 | case SECCAT_TBSS: |
756 | /* We don't split these for medium model. Place them into |
757 | default sections and hope for best. */ |
758 | break; |
759 | } |
760 | if (sname) |
761 | { |
762 | /* We might get called with string constants, but get_named_section |
763 | doesn't like them as they are not DECLs. Also, we need to set |
764 | flags in that case. */ |
765 | if (!DECL_P (decl)) |
766 | return get_section (sname, flags, NULL); |
767 | return get_named_section (decl, sname, reloc); |
768 | } |
769 | } |
770 | return default_elf_select_section (decl, reloc, align); |
771 | } |
772 | |
773 | /* Select a set of attributes for section NAME based on the properties |
774 | of DECL and whether or not RELOC indicates that DECL's initializer |
775 | might contain runtime relocations. */ |
776 | |
777 | static unsigned int ATTRIBUTE_UNUSED |
778 | x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) |
779 | { |
780 | unsigned int flags = default_section_type_flags (decl, name, reloc); |
781 | |
782 | if (ix86_in_large_data_p (exp: decl)) |
783 | flags |= SECTION_LARGE; |
784 | |
785 | if (decl == NULL_TREE |
786 | && (strcmp (s1: name, s2: ".ldata.rel.ro" ) == 0 |
787 | || strcmp (s1: name, s2: ".ldata.rel.ro.local" ) == 0)) |
788 | flags |= SECTION_RELRO; |
789 | |
790 | if (strcmp (s1: name, s2: ".lbss" ) == 0 |
791 | || startswith (str: name, prefix: ".lbss." ) |
792 | || startswith (str: name, prefix: ".gnu.linkonce.lb." )) |
793 | flags |= SECTION_BSS; |
794 | |
795 | return flags; |
796 | } |
797 | |
798 | /* Build up a unique section name, expressed as a |
799 | STRING_CST node, and assign it to DECL_SECTION_NAME (decl). |
800 | RELOC indicates whether the initial value of EXP requires |
801 | link-time relocations. */ |
802 | |
803 | static void ATTRIBUTE_UNUSED |
804 | x86_64_elf_unique_section (tree decl, int reloc) |
805 | { |
806 | if (ix86_in_large_data_p (exp: decl)) |
807 | { |
808 | const char *prefix = NULL; |
809 | /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ |
810 | bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; |
811 | |
812 | switch (categorize_decl_for_section (decl, reloc)) |
813 | { |
814 | case SECCAT_DATA: |
815 | case SECCAT_DATA_REL: |
816 | case SECCAT_DATA_REL_LOCAL: |
817 | case SECCAT_DATA_REL_RO: |
818 | case SECCAT_DATA_REL_RO_LOCAL: |
819 | prefix = one_only ? ".ld" : ".ldata" ; |
820 | break; |
821 | case SECCAT_BSS: |
822 | prefix = one_only ? ".lb" : ".lbss" ; |
823 | break; |
824 | case SECCAT_RODATA: |
825 | case SECCAT_RODATA_MERGE_STR: |
826 | case SECCAT_RODATA_MERGE_STR_INIT: |
827 | case SECCAT_RODATA_MERGE_CONST: |
828 | prefix = one_only ? ".lr" : ".lrodata" ; |
829 | break; |
830 | case SECCAT_SRODATA: |
831 | case SECCAT_SDATA: |
832 | case SECCAT_SBSS: |
833 | gcc_unreachable (); |
834 | case SECCAT_TEXT: |
835 | case SECCAT_TDATA: |
836 | case SECCAT_TBSS: |
837 | /* We don't split these for medium model. Place them into |
838 | default sections and hope for best. */ |
839 | break; |
840 | } |
841 | if (prefix) |
842 | { |
843 | const char *name, *linkonce; |
844 | char *string; |
845 | |
846 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
847 | name = targetm.strip_name_encoding (name); |
848 | |
849 | /* If we're using one_only, then there needs to be a .gnu.linkonce |
850 | prefix to the section name. */ |
851 | linkonce = one_only ? ".gnu.linkonce" : "" ; |
852 | |
853 | string = ACONCAT ((linkonce, prefix, "." , name, NULL)); |
854 | |
855 | set_decl_section_name (decl, string); |
856 | return; |
857 | } |
858 | } |
859 | default_unique_section (decl, reloc); |
860 | } |
861 | |
862 | #ifdef COMMON_ASM_OP |
863 | |
864 | #ifndef LARGECOMM_SECTION_ASM_OP |
865 | #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" |
866 | #endif |
867 | |
868 | /* This says how to output assembler code to declare an |
869 | uninitialized external linkage data object. |
870 | |
871 | For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for |
872 | large objects. */ |
873 | void |
874 | x86_elf_aligned_decl_common (FILE *file, tree decl, |
875 | const char *name, unsigned HOST_WIDE_INT size, |
876 | unsigned align) |
877 | { |
878 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
879 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
880 | && size > (unsigned int)ix86_section_threshold) |
881 | { |
882 | switch_to_section (get_named_section (decl, ".lbss" , 0)); |
883 | fputs (LARGECOMM_SECTION_ASM_OP, stream: file); |
884 | } |
885 | else |
886 | fputs (COMMON_ASM_OP, stream: file); |
887 | assemble_name (file, name); |
888 | fprintf (stream: file, format: "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n" , |
889 | size, align / BITS_PER_UNIT); |
890 | } |
891 | #endif |
892 | |
893 | /* Utility function for targets to use in implementing |
894 | ASM_OUTPUT_ALIGNED_BSS. */ |
895 | |
896 | void |
897 | x86_output_aligned_bss (FILE *file, tree decl, const char *name, |
898 | unsigned HOST_WIDE_INT size, unsigned align) |
899 | { |
900 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
901 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
902 | && size > (unsigned int)ix86_section_threshold) |
903 | switch_to_section (get_named_section (decl, ".lbss" , 0)); |
904 | else |
905 | switch_to_section (bss_section); |
906 | ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); |
907 | #ifdef ASM_DECLARE_OBJECT_NAME |
908 | last_assemble_variable_decl = decl; |
909 | ASM_DECLARE_OBJECT_NAME (file, name, decl); |
910 | #else |
911 | /* Standard thing is just output label for the object. */ |
912 | ASM_OUTPUT_LABEL (file, name); |
913 | #endif /* ASM_DECLARE_OBJECT_NAME */ |
914 | ASM_OUTPUT_SKIP (file, size ? size : 1); |
915 | } |
916 | |
917 | /* Decide whether we must probe the stack before any space allocation |
918 | on this target. It's essentially TARGET_STACK_PROBE except when |
919 | -fstack-check causes the stack to be already probed differently. */ |
920 | |
921 | bool |
922 | ix86_target_stack_probe (void) |
923 | { |
924 | /* Do not probe the stack twice if static stack checking is enabled. */ |
925 | if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
926 | return false; |
927 | |
928 | return TARGET_STACK_PROBE; |
929 | } |
930 | |
931 | /* Decide whether we can make a sibling call to a function. DECL is the |
932 | declaration of the function being targeted by the call and EXP is the |
933 | CALL_EXPR representing the call. */ |
934 | |
935 | static bool |
936 | ix86_function_ok_for_sibcall (tree decl, tree exp) |
937 | { |
938 | tree type, decl_or_type; |
939 | rtx a, b; |
940 | bool bind_global = decl && !targetm.binds_local_p (decl); |
941 | |
942 | if (ix86_function_naked (fn: current_function_decl)) |
943 | return false; |
944 | |
945 | /* Sibling call isn't OK if there are no caller-saved registers |
946 | since all registers must be preserved before return. */ |
947 | if (cfun->machine->no_caller_saved_registers) |
948 | return false; |
949 | |
950 | /* If we are generating position-independent code, we cannot sibcall |
951 | optimize direct calls to global functions, as the PLT requires |
952 | %ebx be live. (Darwin does not have a PLT.) */ |
953 | if (!TARGET_MACHO |
954 | && !TARGET_64BIT |
955 | && flag_pic |
956 | && flag_plt |
957 | && bind_global) |
958 | return false; |
959 | |
960 | /* If we need to align the outgoing stack, then sibcalling would |
961 | unalign the stack, which may break the called function. */ |
962 | if (ix86_minimum_incoming_stack_boundary (true) |
963 | < PREFERRED_STACK_BOUNDARY) |
964 | return false; |
965 | |
966 | if (decl) |
967 | { |
968 | decl_or_type = decl; |
969 | type = TREE_TYPE (decl); |
970 | } |
971 | else |
972 | { |
973 | /* We're looking at the CALL_EXPR, we need the type of the function. */ |
974 | type = CALL_EXPR_FN (exp); /* pointer expression */ |
975 | type = TREE_TYPE (type); /* pointer type */ |
976 | type = TREE_TYPE (type); /* function type */ |
977 | decl_or_type = type; |
978 | } |
979 | |
980 | /* If outgoing reg parm stack space changes, we cannot do sibcall. */ |
981 | if ((OUTGOING_REG_PARM_STACK_SPACE (type) |
982 | != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl))) |
983 | || (REG_PARM_STACK_SPACE (decl_or_type) |
984 | != REG_PARM_STACK_SPACE (current_function_decl))) |
985 | { |
986 | maybe_complain_about_tail_call (exp, |
987 | "inconsistent size of stack space" |
988 | " allocated for arguments which are" |
989 | " passed in registers" ); |
990 | return false; |
991 | } |
992 | |
993 | /* Check that the return value locations are the same. Like |
994 | if we are returning floats on the 80387 register stack, we cannot |
995 | make a sibcall from a function that doesn't return a float to a |
996 | function that does or, conversely, from a function that does return |
997 | a float to a function that doesn't; the necessary stack adjustment |
998 | would not be executed. This is also the place we notice |
999 | differences in the return value ABI. Note that it is ok for one |
1000 | of the functions to have void return type as long as the return |
1001 | value of the other is passed in a register. */ |
1002 | a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); |
1003 | b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
1004 | cfun->decl, false); |
1005 | if (STACK_REG_P (a) || STACK_REG_P (b)) |
1006 | { |
1007 | if (!rtx_equal_p (a, b)) |
1008 | return false; |
1009 | } |
1010 | else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
1011 | ; |
1012 | else if (!rtx_equal_p (a, b)) |
1013 | return false; |
1014 | |
1015 | if (TARGET_64BIT) |
1016 | { |
1017 | /* The SYSV ABI has more call-clobbered registers; |
1018 | disallow sibcalls from MS to SYSV. */ |
1019 | if (cfun->machine->call_abi == MS_ABI |
1020 | && ix86_function_type_abi (type) == SYSV_ABI) |
1021 | return false; |
1022 | } |
1023 | else |
1024 | { |
1025 | /* If this call is indirect, we'll need to be able to use a |
1026 | call-clobbered register for the address of the target function. |
1027 | Make sure that all such registers are not used for passing |
1028 | parameters. Note that DLLIMPORT functions and call to global |
1029 | function via GOT slot are indirect. */ |
1030 | if (!decl |
1031 | || (bind_global && flag_pic && !flag_plt) |
1032 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) |
1033 | || flag_force_indirect_call) |
1034 | { |
1035 | /* Check if regparm >= 3 since arg_reg_available is set to |
1036 | false if regparm == 0. If regparm is 1 or 2, there is |
1037 | always a call-clobbered register available. |
1038 | |
1039 | ??? The symbol indirect call doesn't need a call-clobbered |
1040 | register. But we don't know if this is a symbol indirect |
1041 | call or not here. */ |
1042 | if (ix86_function_regparm (type, decl) >= 3 |
1043 | && !cfun->machine->arg_reg_available) |
1044 | return false; |
1045 | } |
1046 | } |
1047 | |
1048 | if (decl && ix86_use_pseudo_pic_reg ()) |
1049 | { |
1050 | /* When PIC register is used, it must be restored after ifunc |
1051 | function returns. */ |
1052 | cgraph_node *node = cgraph_node::get (decl); |
1053 | if (node && node->ifunc_resolver) |
1054 | return false; |
1055 | } |
1056 | |
1057 | /* Disable sibcall if callee has indirect_return attribute and |
1058 | caller doesn't since callee will return to the caller's caller |
1059 | via an indirect jump. */ |
1060 | if (((flag_cf_protection & (CF_RETURN | CF_BRANCH)) |
1061 | == (CF_RETURN | CF_BRANCH)) |
1062 | && lookup_attribute (attr_name: "indirect_return" , TYPE_ATTRIBUTES (type)) |
1063 | && !lookup_attribute (attr_name: "indirect_return" , |
1064 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))) |
1065 | return false; |
1066 | |
1067 | /* Otherwise okay. That also includes certain types of indirect calls. */ |
1068 | return true; |
1069 | } |
1070 | |
1071 | /* This function determines from TYPE the calling-convention. */ |
1072 | |
1073 | unsigned int |
1074 | ix86_get_callcvt (const_tree type) |
1075 | { |
1076 | unsigned int ret = 0; |
1077 | bool is_stdarg; |
1078 | tree attrs; |
1079 | |
1080 | if (TARGET_64BIT) |
1081 | return IX86_CALLCVT_CDECL; |
1082 | |
1083 | attrs = TYPE_ATTRIBUTES (type); |
1084 | if (attrs != NULL_TREE) |
1085 | { |
1086 | if (lookup_attribute (attr_name: "cdecl" , list: attrs)) |
1087 | ret |= IX86_CALLCVT_CDECL; |
1088 | else if (lookup_attribute (attr_name: "stdcall" , list: attrs)) |
1089 | ret |= IX86_CALLCVT_STDCALL; |
1090 | else if (lookup_attribute (attr_name: "fastcall" , list: attrs)) |
1091 | ret |= IX86_CALLCVT_FASTCALL; |
1092 | else if (lookup_attribute (attr_name: "thiscall" , list: attrs)) |
1093 | ret |= IX86_CALLCVT_THISCALL; |
1094 | |
1095 | /* Regparam isn't allowed for thiscall and fastcall. */ |
1096 | if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) |
1097 | { |
1098 | if (lookup_attribute (attr_name: "regparm" , list: attrs)) |
1099 | ret |= IX86_CALLCVT_REGPARM; |
1100 | if (lookup_attribute (attr_name: "sseregparm" , list: attrs)) |
1101 | ret |= IX86_CALLCVT_SSEREGPARM; |
1102 | } |
1103 | |
1104 | if (IX86_BASE_CALLCVT(ret) != 0) |
1105 | return ret; |
1106 | } |
1107 | |
1108 | is_stdarg = stdarg_p (type); |
1109 | if (TARGET_RTD && !is_stdarg) |
1110 | return IX86_CALLCVT_STDCALL | ret; |
1111 | |
1112 | if (ret != 0 |
1113 | || is_stdarg |
1114 | || TREE_CODE (type) != METHOD_TYPE |
1115 | || ix86_function_type_abi (type) != MS_ABI) |
1116 | return IX86_CALLCVT_CDECL | ret; |
1117 | |
1118 | return IX86_CALLCVT_THISCALL; |
1119 | } |
1120 | |
1121 | /* Return 0 if the attributes for two types are incompatible, 1 if they |
1122 | are compatible, and 2 if they are nearly compatible (which causes a |
1123 | warning to be generated). */ |
1124 | |
1125 | static int |
1126 | ix86_comp_type_attributes (const_tree type1, const_tree type2) |
1127 | { |
1128 | unsigned int ccvt1, ccvt2; |
1129 | |
1130 | if (TREE_CODE (type1) != FUNCTION_TYPE |
1131 | && TREE_CODE (type1) != METHOD_TYPE) |
1132 | return 1; |
1133 | |
1134 | ccvt1 = ix86_get_callcvt (type: type1); |
1135 | ccvt2 = ix86_get_callcvt (type: type2); |
1136 | if (ccvt1 != ccvt2) |
1137 | return 0; |
1138 | if (ix86_function_regparm (type1, NULL) |
1139 | != ix86_function_regparm (type2, NULL)) |
1140 | return 0; |
1141 | |
1142 | return 1; |
1143 | } |
1144 | |
1145 | /* Return the regparm value for a function with the indicated TYPE and DECL. |
1146 | DECL may be NULL when calling function indirectly |
1147 | or considering a libcall. */ |
1148 | |
1149 | static int |
1150 | ix86_function_regparm (const_tree type, const_tree decl) |
1151 | { |
1152 | tree attr; |
1153 | int regparm; |
1154 | unsigned int ccvt; |
1155 | |
1156 | if (TARGET_64BIT) |
1157 | return (ix86_function_type_abi (type) == SYSV_ABI |
1158 | ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); |
1159 | ccvt = ix86_get_callcvt (type); |
1160 | regparm = ix86_regparm; |
1161 | |
1162 | if ((ccvt & IX86_CALLCVT_REGPARM) != 0) |
1163 | { |
1164 | attr = lookup_attribute (attr_name: "regparm" , TYPE_ATTRIBUTES (type)); |
1165 | if (attr) |
1166 | { |
1167 | regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
1168 | return regparm; |
1169 | } |
1170 | } |
1171 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
1172 | return 2; |
1173 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
1174 | return 1; |
1175 | |
1176 | /* Use register calling convention for local functions when possible. */ |
1177 | if (decl |
1178 | && TREE_CODE (decl) == FUNCTION_DECL) |
1179 | { |
1180 | cgraph_node *target = cgraph_node::get (decl); |
1181 | if (target) |
1182 | target = target->function_symbol (); |
1183 | |
1184 | /* Caller and callee must agree on the calling convention, so |
1185 | checking here just optimize means that with |
1186 | __attribute__((optimize (...))) caller could use regparm convention |
1187 | and callee not, or vice versa. Instead look at whether the callee |
1188 | is optimized or not. */ |
1189 | if (target && opt_for_fn (target->decl, optimize) |
1190 | && !(profile_flag && !flag_fentry)) |
1191 | { |
1192 | if (target->local && target->can_change_signature) |
1193 | { |
1194 | int local_regparm, globals = 0, regno; |
1195 | |
1196 | /* Make sure no regparm register is taken by a |
1197 | fixed register variable. */ |
1198 | for (local_regparm = 0; local_regparm < REGPARM_MAX; |
1199 | local_regparm++) |
1200 | if (fixed_regs[local_regparm]) |
1201 | break; |
1202 | |
1203 | /* We don't want to use regparm(3) for nested functions as |
1204 | these use a static chain pointer in the third argument. */ |
1205 | if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) |
1206 | local_regparm = 2; |
1207 | |
1208 | /* Save a register for the split stack. */ |
1209 | if (flag_split_stack) |
1210 | { |
1211 | if (local_regparm == 3) |
1212 | local_regparm = 2; |
1213 | else if (local_regparm == 2 |
1214 | && DECL_STATIC_CHAIN (target->decl)) |
1215 | local_regparm = 1; |
1216 | } |
1217 | |
1218 | /* Each fixed register usage increases register pressure, |
1219 | so less registers should be used for argument passing. |
1220 | This functionality can be overriden by an explicit |
1221 | regparm value. */ |
1222 | for (regno = AX_REG; regno <= DI_REG; regno++) |
1223 | if (fixed_regs[regno]) |
1224 | globals++; |
1225 | |
1226 | local_regparm |
1227 | = globals < local_regparm ? local_regparm - globals : 0; |
1228 | |
1229 | if (local_regparm > regparm) |
1230 | regparm = local_regparm; |
1231 | } |
1232 | } |
1233 | } |
1234 | |
1235 | return regparm; |
1236 | } |
1237 | |
1238 | /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and |
1239 | DFmode (2) arguments in SSE registers for a function with the |
1240 | indicated TYPE and DECL. DECL may be NULL when calling function |
1241 | indirectly or considering a libcall. Return -1 if any FP parameter |
1242 | should be rejected by error. This is used in siutation we imply SSE |
1243 | calling convetion but the function is called from another function with |
1244 | SSE disabled. Otherwise return 0. */ |
1245 | |
1246 | static int |
1247 | ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) |
1248 | { |
1249 | gcc_assert (!TARGET_64BIT); |
1250 | |
1251 | /* Use SSE registers to pass SFmode and DFmode arguments if requested |
1252 | by the sseregparm attribute. */ |
1253 | if (TARGET_SSEREGPARM |
1254 | || (type && lookup_attribute (attr_name: "sseregparm" , TYPE_ATTRIBUTES (type)))) |
1255 | { |
1256 | if (!TARGET_SSE) |
1257 | { |
1258 | if (warn) |
1259 | { |
1260 | if (decl) |
1261 | error ("calling %qD with attribute sseregparm without " |
1262 | "SSE/SSE2 enabled" , decl); |
1263 | else |
1264 | error ("calling %qT with attribute sseregparm without " |
1265 | "SSE/SSE2 enabled" , type); |
1266 | } |
1267 | return 0; |
1268 | } |
1269 | |
1270 | return 2; |
1271 | } |
1272 | |
1273 | if (!decl) |
1274 | return 0; |
1275 | |
1276 | cgraph_node *target = cgraph_node::get (decl); |
1277 | if (target) |
1278 | target = target->function_symbol (); |
1279 | |
1280 | /* For local functions, pass up to SSE_REGPARM_MAX SFmode |
1281 | (and DFmode for SSE2) arguments in SSE registers. */ |
1282 | if (target |
1283 | /* TARGET_SSE_MATH */ |
1284 | && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE) |
1285 | && opt_for_fn (target->decl, optimize) |
1286 | && !(profile_flag && !flag_fentry)) |
1287 | { |
1288 | if (target->local && target->can_change_signature) |
1289 | { |
1290 | /* Refuse to produce wrong code when local function with SSE enabled |
1291 | is called from SSE disabled function. |
1292 | FIXME: We need a way to detect these cases cross-ltrans partition |
1293 | and avoid using SSE calling conventions on local functions called |
1294 | from function with SSE disabled. For now at least delay the |
1295 | warning until we know we are going to produce wrong code. |
1296 | See PR66047 */ |
1297 | if (!TARGET_SSE && warn) |
1298 | return -1; |
1299 | return TARGET_SSE2_P (target_opts_for_fn (target->decl) |
1300 | ->x_ix86_isa_flags) ? 2 : 1; |
1301 | } |
1302 | } |
1303 | |
1304 | return 0; |
1305 | } |
1306 | |
1307 | /* Return true if EAX is live at the start of the function. Used by |
1308 | ix86_expand_prologue to determine if we need special help before |
1309 | calling allocate_stack_worker. */ |
1310 | |
1311 | static bool |
1312 | ix86_eax_live_at_start_p (void) |
1313 | { |
1314 | /* Cheat. Don't bother working forward from ix86_function_regparm |
1315 | to the function type to whether an actual argument is located in |
1316 | eax. Instead just look at cfg info, which is still close enough |
1317 | to correct at this point. This gives false positives for broken |
1318 | functions that might use uninitialized data that happens to be |
1319 | allocated in eax, but who cares? */ |
1320 | return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); |
1321 | } |
1322 | |
1323 | static bool |
1324 | ix86_keep_aggregate_return_pointer (tree fntype) |
1325 | { |
1326 | tree attr; |
1327 | |
1328 | if (!TARGET_64BIT) |
1329 | { |
1330 | attr = lookup_attribute (attr_name: "callee_pop_aggregate_return" , |
1331 | TYPE_ATTRIBUTES (fntype)); |
1332 | if (attr) |
1333 | return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); |
1334 | |
1335 | /* For 32-bit MS-ABI the default is to keep aggregate |
1336 | return pointer. */ |
1337 | if (ix86_function_type_abi (fntype) == MS_ABI) |
1338 | return true; |
1339 | } |
1340 | return KEEP_AGGREGATE_RETURN_POINTER != 0; |
1341 | } |
1342 | |
1343 | /* Value is the number of bytes of arguments automatically |
1344 | popped when returning from a subroutine call. |
1345 | FUNDECL is the declaration node of the function (as a tree), |
1346 | FUNTYPE is the data type of the function (as a tree), |
1347 | or for a library call it is an identifier node for the subroutine name. |
1348 | SIZE is the number of bytes of arguments passed on the stack. |
1349 | |
1350 | On the 80386, the RTD insn may be used to pop them if the number |
1351 | of args is fixed, but if the number is variable then the caller |
1352 | must pop them all. RTD can't be used for library calls now |
1353 | because the library is compiled with the Unix compiler. |
1354 | Use of RTD is a selectable option, since it is incompatible with |
1355 | standard Unix calling sequences. If the option is not selected, |
1356 | the caller must always pop the args. |
1357 | |
1358 | The attribute stdcall is equivalent to RTD on a per module basis. */ |
1359 | |
1360 | static poly_int64 |
1361 | ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) |
1362 | { |
1363 | unsigned int ccvt; |
1364 | |
1365 | /* None of the 64-bit ABIs pop arguments. */ |
1366 | if (TARGET_64BIT) |
1367 | return 0; |
1368 | |
1369 | ccvt = ix86_get_callcvt (type: funtype); |
1370 | |
1371 | if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL |
1372 | | IX86_CALLCVT_THISCALL)) != 0 |
1373 | && ! stdarg_p (funtype)) |
1374 | return size; |
1375 | |
1376 | /* Lose any fake structure return argument if it is passed on the stack. */ |
1377 | if (aggregate_value_p (TREE_TYPE (funtype), fundecl) |
1378 | && !ix86_keep_aggregate_return_pointer (fntype: funtype)) |
1379 | { |
1380 | int nregs = ix86_function_regparm (type: funtype, decl: fundecl); |
1381 | if (nregs == 0) |
1382 | return GET_MODE_SIZE (Pmode); |
1383 | } |
1384 | |
1385 | return 0; |
1386 | } |
1387 | |
1388 | /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ |
1389 | |
1390 | static bool |
1391 | ix86_legitimate_combined_insn (rtx_insn *insn) |
1392 | { |
1393 | int i; |
1394 | |
1395 | /* Check operand constraints in case hard registers were propagated |
1396 | into insn pattern. This check prevents combine pass from |
1397 | generating insn patterns with invalid hard register operands. |
1398 | These invalid insns can eventually confuse reload to error out |
1399 | with a spill failure. See also PRs 46829 and 46843. */ |
1400 | |
1401 | gcc_assert (INSN_CODE (insn) >= 0); |
1402 | |
1403 | extract_insn (insn); |
1404 | preprocess_constraints (insn); |
1405 | |
1406 | int n_operands = recog_data.n_operands; |
1407 | int n_alternatives = recog_data.n_alternatives; |
1408 | for (i = 0; i < n_operands; i++) |
1409 | { |
1410 | rtx op = recog_data.operand[i]; |
1411 | machine_mode mode = GET_MODE (op); |
1412 | const operand_alternative *op_alt; |
1413 | int offset = 0; |
1414 | bool win; |
1415 | int j; |
1416 | |
1417 | /* A unary operator may be accepted by the predicate, but it |
1418 | is irrelevant for matching constraints. */ |
1419 | if (UNARY_P (op)) |
1420 | op = XEXP (op, 0); |
1421 | |
1422 | if (SUBREG_P (op)) |
1423 | { |
1424 | if (REG_P (SUBREG_REG (op)) |
1425 | && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) |
1426 | offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), |
1427 | GET_MODE (SUBREG_REG (op)), |
1428 | SUBREG_BYTE (op), |
1429 | GET_MODE (op)); |
1430 | op = SUBREG_REG (op); |
1431 | } |
1432 | |
1433 | if (!(REG_P (op) && HARD_REGISTER_P (op))) |
1434 | continue; |
1435 | |
1436 | op_alt = recog_op_alt; |
1437 | |
1438 | /* Operand has no constraints, anything is OK. */ |
1439 | win = !n_alternatives; |
1440 | |
1441 | alternative_mask preferred = get_preferred_alternatives (insn); |
1442 | for (j = 0; j < n_alternatives; j++, op_alt += n_operands) |
1443 | { |
1444 | if (!TEST_BIT (preferred, j)) |
1445 | continue; |
1446 | if (op_alt[i].anything_ok |
1447 | || (op_alt[i].matches != -1 |
1448 | && operands_match_p |
1449 | (recog_data.operand[i], |
1450 | recog_data.operand[op_alt[i].matches])) |
1451 | || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) |
1452 | { |
1453 | win = true; |
1454 | break; |
1455 | } |
1456 | } |
1457 | |
1458 | if (!win) |
1459 | return false; |
1460 | } |
1461 | |
1462 | return true; |
1463 | } |
1464 | |
1465 | /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ |
1466 | |
1467 | static unsigned HOST_WIDE_INT |
1468 | ix86_asan_shadow_offset (void) |
1469 | { |
1470 | return SUBTARGET_SHADOW_OFFSET; |
1471 | } |
1472 | |
1473 | /* Argument support functions. */ |
1474 | |
1475 | /* Return true when register may be used to pass function parameters. */ |
1476 | bool |
1477 | ix86_function_arg_regno_p (int regno) |
1478 | { |
1479 | int i; |
1480 | enum calling_abi call_abi; |
1481 | const int *parm_regs; |
1482 | |
1483 | if (TARGET_SSE && SSE_REGNO_P (regno) |
1484 | && regno < FIRST_SSE_REG + SSE_REGPARM_MAX) |
1485 | return true; |
1486 | |
1487 | if (!TARGET_64BIT) |
1488 | return (regno < REGPARM_MAX |
1489 | || (TARGET_MMX && MMX_REGNO_P (regno) |
1490 | && regno < FIRST_MMX_REG + MMX_REGPARM_MAX)); |
1491 | |
1492 | /* TODO: The function should depend on current function ABI but |
1493 | builtins.cc would need updating then. Therefore we use the |
1494 | default ABI. */ |
1495 | call_abi = ix86_cfun_abi (); |
1496 | |
1497 | /* RAX is used as hidden argument to va_arg functions. */ |
1498 | if (call_abi == SYSV_ABI && regno == AX_REG) |
1499 | return true; |
1500 | |
1501 | if (call_abi == MS_ABI) |
1502 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
1503 | else |
1504 | parm_regs = x86_64_int_parameter_registers; |
1505 | |
1506 | for (i = 0; i < (call_abi == MS_ABI |
1507 | ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) |
1508 | if (regno == parm_regs[i]) |
1509 | return true; |
1510 | return false; |
1511 | } |
1512 | |
1513 | /* Return if we do not know how to pass ARG solely in registers. */ |
1514 | |
1515 | static bool |
1516 | ix86_must_pass_in_stack (const function_arg_info &arg) |
1517 | { |
1518 | if (must_pass_in_stack_var_size_or_pad (arg)) |
1519 | return true; |
1520 | |
1521 | /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! |
1522 | The layout_type routine is crafty and tries to trick us into passing |
1523 | currently unsupported vector types on the stack by using TImode. */ |
1524 | return (!TARGET_64BIT && arg.mode == TImode |
1525 | && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); |
1526 | } |
1527 | |
1528 | /* It returns the size, in bytes, of the area reserved for arguments passed |
1529 | in registers for the function represented by fndecl dependent to the used |
1530 | abi format. */ |
1531 | int |
1532 | ix86_reg_parm_stack_space (const_tree fndecl) |
1533 | { |
1534 | enum calling_abi call_abi = SYSV_ABI; |
1535 | if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) |
1536 | call_abi = ix86_function_abi (fndecl); |
1537 | else |
1538 | call_abi = ix86_function_type_abi (fndecl); |
1539 | if (TARGET_64BIT && call_abi == MS_ABI) |
1540 | return 32; |
1541 | return 0; |
1542 | } |
1543 | |
1544 | /* We add this as a workaround in order to use libc_has_function |
1545 | hook in i386.md. */ |
1546 | bool |
1547 | ix86_libc_has_function (enum function_class fn_class) |
1548 | { |
1549 | return targetm.libc_has_function (fn_class, NULL_TREE); |
1550 | } |
1551 | |
1552 | /* Returns value SYSV_ABI, MS_ABI dependent on fntype, |
1553 | specifying the call abi used. */ |
1554 | enum calling_abi |
1555 | ix86_function_type_abi (const_tree fntype) |
1556 | { |
1557 | enum calling_abi abi = ix86_abi; |
1558 | |
1559 | if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) |
1560 | return abi; |
1561 | |
1562 | if (abi == SYSV_ABI |
1563 | && lookup_attribute (attr_name: "ms_abi" , TYPE_ATTRIBUTES (fntype))) |
1564 | { |
1565 | static int warned; |
1566 | if (TARGET_X32 && !warned) |
1567 | { |
1568 | error ("X32 does not support %<ms_abi%> attribute" ); |
1569 | warned = 1; |
1570 | } |
1571 | |
1572 | abi = MS_ABI; |
1573 | } |
1574 | else if (abi == MS_ABI |
1575 | && lookup_attribute (attr_name: "sysv_abi" , TYPE_ATTRIBUTES (fntype))) |
1576 | abi = SYSV_ABI; |
1577 | |
1578 | return abi; |
1579 | } |
1580 | |
1581 | enum calling_abi |
1582 | ix86_function_abi (const_tree fndecl) |
1583 | { |
1584 | return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; |
1585 | } |
1586 | |
1587 | /* Returns value SYSV_ABI, MS_ABI dependent on cfun, |
1588 | specifying the call abi used. */ |
1589 | enum calling_abi |
1590 | ix86_cfun_abi (void) |
1591 | { |
1592 | return cfun ? cfun->machine->call_abi : ix86_abi; |
1593 | } |
1594 | |
1595 | bool |
1596 | ix86_function_ms_hook_prologue (const_tree fn) |
1597 | { |
1598 | if (fn && lookup_attribute (attr_name: "ms_hook_prologue" , DECL_ATTRIBUTES (fn))) |
1599 | { |
1600 | if (decl_function_context (fn) != NULL_TREE) |
1601 | error_at (DECL_SOURCE_LOCATION (fn), |
1602 | "%<ms_hook_prologue%> attribute is not compatible " |
1603 | "with nested function" ); |
1604 | else |
1605 | return true; |
1606 | } |
1607 | return false; |
1608 | } |
1609 | |
1610 | bool |
1611 | ix86_function_naked (const_tree fn) |
1612 | { |
1613 | if (fn && lookup_attribute (attr_name: "naked" , DECL_ATTRIBUTES (fn))) |
1614 | return true; |
1615 | |
1616 | return false; |
1617 | } |
1618 | |
1619 | /* Write the extra assembler code needed to declare a function properly. */ |
1620 | |
1621 | void |
1622 | ix86_asm_output_function_label (FILE *out_file, const char *fname, |
1623 | tree decl) |
1624 | { |
1625 | bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl); |
1626 | |
1627 | if (cfun) |
1628 | cfun->machine->function_label_emitted = true; |
1629 | |
1630 | if (is_ms_hook) |
1631 | { |
1632 | int i, filler_count = (TARGET_64BIT ? 32 : 16); |
1633 | unsigned int filler_cc = 0xcccccccc; |
1634 | |
1635 | for (i = 0; i < filler_count; i += 4) |
1636 | fprintf (stream: out_file, ASM_LONG " %#x\n" , filler_cc); |
1637 | } |
1638 | |
1639 | #ifdef SUBTARGET_ASM_UNWIND_INIT |
1640 | SUBTARGET_ASM_UNWIND_INIT (out_file); |
1641 | #endif |
1642 | |
1643 | ASM_OUTPUT_LABEL (out_file, fname); |
1644 | |
1645 | /* Output magic byte marker, if hot-patch attribute is set. */ |
1646 | if (is_ms_hook) |
1647 | { |
1648 | if (TARGET_64BIT) |
1649 | { |
1650 | /* leaq [%rsp + 0], %rsp */ |
1651 | fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n" , |
1652 | stream: out_file); |
1653 | } |
1654 | else |
1655 | { |
1656 | /* movl.s %edi, %edi |
1657 | push %ebp |
1658 | movl.s %esp, %ebp */ |
1659 | fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n" , stream: out_file); |
1660 | } |
1661 | } |
1662 | } |
1663 | |
1664 | /* Implementation of call abi switching target hook. Specific to FNDECL |
1665 | the specific call register sets are set. See also |
1666 | ix86_conditional_register_usage for more details. */ |
1667 | void |
1668 | ix86_call_abi_override (const_tree fndecl) |
1669 | { |
1670 | cfun->machine->call_abi = ix86_function_abi (fndecl); |
1671 | } |
1672 | |
1673 | /* Return 1 if pseudo register should be created and used to hold |
1674 | GOT address for PIC code. */ |
1675 | bool |
1676 | ix86_use_pseudo_pic_reg (void) |
1677 | { |
1678 | if ((TARGET_64BIT |
1679 | && (ix86_cmodel == CM_SMALL_PIC |
1680 | || TARGET_PECOFF)) |
1681 | || !flag_pic) |
1682 | return false; |
1683 | return true; |
1684 | } |
1685 | |
1686 | /* Initialize large model PIC register. */ |
1687 | |
1688 | static void |
1689 | ix86_init_large_pic_reg (unsigned int tmp_regno) |
1690 | { |
1691 | rtx_code_label *label; |
1692 | rtx tmp_reg; |
1693 | |
1694 | gcc_assert (Pmode == DImode); |
1695 | label = gen_label_rtx (); |
1696 | emit_label (label); |
1697 | LABEL_PRESERVE_P (label) = 1; |
1698 | tmp_reg = gen_rtx_REG (Pmode, tmp_regno); |
1699 | gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); |
1700 | emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, |
1701 | label)); |
1702 | emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); |
1703 | emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg)); |
1704 | const char *name = LABEL_NAME (label); |
1705 | PUT_CODE (label, NOTE); |
1706 | NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; |
1707 | NOTE_DELETED_LABEL_NAME (label) = name; |
1708 | } |
1709 | |
1710 | /* Create and initialize PIC register if required. */ |
1711 | static void |
1712 | ix86_init_pic_reg (void) |
1713 | { |
1714 | edge entry_edge; |
1715 | rtx_insn *seq; |
1716 | |
1717 | if (!ix86_use_pseudo_pic_reg ()) |
1718 | return; |
1719 | |
1720 | start_sequence (); |
1721 | |
1722 | if (TARGET_64BIT) |
1723 | { |
1724 | if (ix86_cmodel == CM_LARGE_PIC) |
1725 | ix86_init_large_pic_reg (R11_REG); |
1726 | else |
1727 | emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); |
1728 | } |
1729 | else |
1730 | { |
1731 | /* If there is future mcount call in the function it is more profitable |
1732 | to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ |
1733 | rtx reg = crtl->profile |
1734 | ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) |
1735 | : pic_offset_table_rtx; |
1736 | rtx_insn *insn = emit_insn (gen_set_got (reg)); |
1737 | RTX_FRAME_RELATED_P (insn) = 1; |
1738 | if (crtl->profile) |
1739 | emit_move_insn (pic_offset_table_rtx, reg); |
1740 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
1741 | } |
1742 | |
1743 | seq = get_insns (); |
1744 | end_sequence (); |
1745 | |
1746 | entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1747 | insert_insn_on_edge (seq, entry_edge); |
1748 | commit_one_edge_insertion (e: entry_edge); |
1749 | } |
1750 | |
1751 | /* Initialize a variable CUM of type CUMULATIVE_ARGS |
1752 | for a call to a function whose data type is FNTYPE. |
1753 | For a library call, FNTYPE is 0. */ |
1754 | |
1755 | void |
1756 | init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
1757 | tree fntype, /* tree ptr for function decl */ |
1758 | rtx libname, /* SYMBOL_REF of library name or 0 */ |
1759 | tree fndecl, |
1760 | int caller) |
1761 | { |
1762 | struct cgraph_node *local_info_node = NULL; |
1763 | struct cgraph_node *target = NULL; |
1764 | |
1765 | /* Set silent_p to false to raise an error for invalid calls when |
1766 | expanding function body. */ |
1767 | cfun->machine->silent_p = false; |
1768 | |
1769 | memset (s: cum, c: 0, n: sizeof (*cum)); |
1770 | |
1771 | if (fndecl) |
1772 | { |
1773 | target = cgraph_node::get (decl: fndecl); |
1774 | if (target) |
1775 | { |
1776 | target = target->function_symbol (); |
1777 | local_info_node = cgraph_node::local_info_node (decl: target->decl); |
1778 | cum->call_abi = ix86_function_abi (fndecl: target->decl); |
1779 | } |
1780 | else |
1781 | cum->call_abi = ix86_function_abi (fndecl); |
1782 | } |
1783 | else |
1784 | cum->call_abi = ix86_function_type_abi (fntype); |
1785 | |
1786 | cum->caller = caller; |
1787 | |
1788 | /* Set up the number of registers to use for passing arguments. */ |
1789 | cum->nregs = ix86_regparm; |
1790 | if (TARGET_64BIT) |
1791 | { |
1792 | cum->nregs = (cum->call_abi == SYSV_ABI |
1793 | ? X86_64_REGPARM_MAX |
1794 | : X86_64_MS_REGPARM_MAX); |
1795 | } |
1796 | if (TARGET_SSE) |
1797 | { |
1798 | cum->sse_nregs = SSE_REGPARM_MAX; |
1799 | if (TARGET_64BIT) |
1800 | { |
1801 | cum->sse_nregs = (cum->call_abi == SYSV_ABI |
1802 | ? X86_64_SSE_REGPARM_MAX |
1803 | : X86_64_MS_SSE_REGPARM_MAX); |
1804 | } |
1805 | } |
1806 | if (TARGET_MMX) |
1807 | cum->mmx_nregs = MMX_REGPARM_MAX; |
1808 | cum->warn_avx512f = true; |
1809 | cum->warn_avx = true; |
1810 | cum->warn_sse = true; |
1811 | cum->warn_mmx = true; |
1812 | |
1813 | /* Because type might mismatch in between caller and callee, we need to |
1814 | use actual type of function for local calls. |
1815 | FIXME: cgraph_analyze can be told to actually record if function uses |
1816 | va_start so for local functions maybe_vaarg can be made aggressive |
1817 | helping K&R code. |
1818 | FIXME: once typesytem is fixed, we won't need this code anymore. */ |
1819 | if (local_info_node && local_info_node->local |
1820 | && local_info_node->can_change_signature) |
1821 | fntype = TREE_TYPE (target->decl); |
1822 | cum->stdarg = stdarg_p (fntype); |
1823 | cum->maybe_vaarg = (fntype |
1824 | ? (!prototype_p (fntype) || stdarg_p (fntype)) |
1825 | : !libname); |
1826 | |
1827 | cum->decl = fndecl; |
1828 | |
1829 | cum->warn_empty = !warn_abi || cum->stdarg; |
1830 | if (!cum->warn_empty && fntype) |
1831 | { |
1832 | function_args_iterator iter; |
1833 | tree argtype; |
1834 | bool seen_empty_type = false; |
1835 | FOREACH_FUNCTION_ARGS (fntype, argtype, iter) |
1836 | { |
1837 | if (argtype == error_mark_node || VOID_TYPE_P (argtype)) |
1838 | break; |
1839 | if (TYPE_EMPTY_P (argtype)) |
1840 | seen_empty_type = true; |
1841 | else if (seen_empty_type) |
1842 | { |
1843 | cum->warn_empty = true; |
1844 | break; |
1845 | } |
1846 | } |
1847 | } |
1848 | |
1849 | if (!TARGET_64BIT) |
1850 | { |
1851 | /* If there are variable arguments, then we won't pass anything |
1852 | in registers in 32-bit mode. */ |
1853 | if (stdarg_p (fntype)) |
1854 | { |
1855 | cum->nregs = 0; |
1856 | /* Since in 32-bit, variable arguments are always passed on |
1857 | stack, there is scratch register available for indirect |
1858 | sibcall. */ |
1859 | cfun->machine->arg_reg_available = true; |
1860 | cum->sse_nregs = 0; |
1861 | cum->mmx_nregs = 0; |
1862 | cum->warn_avx512f = false; |
1863 | cum->warn_avx = false; |
1864 | cum->warn_sse = false; |
1865 | cum->warn_mmx = false; |
1866 | return; |
1867 | } |
1868 | |
1869 | /* Use ecx and edx registers if function has fastcall attribute, |
1870 | else look for regparm information. */ |
1871 | if (fntype) |
1872 | { |
1873 | unsigned int ccvt = ix86_get_callcvt (type: fntype); |
1874 | if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
1875 | { |
1876 | cum->nregs = 1; |
1877 | cum->fastcall = 1; /* Same first register as in fastcall. */ |
1878 | } |
1879 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
1880 | { |
1881 | cum->nregs = 2; |
1882 | cum->fastcall = 1; |
1883 | } |
1884 | else |
1885 | cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl); |
1886 | } |
1887 | |
1888 | /* Set up the number of SSE registers used for passing SFmode |
1889 | and DFmode arguments. Warn for mismatching ABI. */ |
1890 | cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true); |
1891 | } |
1892 | |
1893 | cfun->machine->arg_reg_available = (cum->nregs > 0); |
1894 | } |
1895 | |
1896 | /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. |
1897 | But in the case of vector types, it is some vector mode. |
1898 | |
1899 | When we have only some of our vector isa extensions enabled, then there |
1900 | are some modes for which vector_mode_supported_p is false. For these |
1901 | modes, the generic vector support in gcc will choose some non-vector mode |
1902 | in order to implement the type. By computing the natural mode, we'll |
1903 | select the proper ABI location for the operand and not depend on whatever |
1904 | the middle-end decides to do with these vector types. |
1905 | |
1906 | The midde-end can't deal with the vector types > 16 bytes. In this |
1907 | case, we return the original mode and warn ABI change if CUM isn't |
1908 | NULL. |
1909 | |
1910 | If INT_RETURN is true, warn ABI change if the vector mode isn't |
1911 | available for function return value. */ |
1912 | |
1913 | static machine_mode |
1914 | type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, |
1915 | bool in_return) |
1916 | { |
1917 | machine_mode mode = TYPE_MODE (type); |
1918 | |
1919 | if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode)) |
1920 | { |
1921 | HOST_WIDE_INT size = int_size_in_bytes (type); |
1922 | if ((size == 8 || size == 16 || size == 32 || size == 64) |
1923 | /* ??? Generic code allows us to create width 1 vectors. Ignore. */ |
1924 | && TYPE_VECTOR_SUBPARTS (node: type) > 1) |
1925 | { |
1926 | machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); |
1927 | |
1928 | /* There are no XFmode vector modes ... */ |
1929 | if (innermode == XFmode) |
1930 | return mode; |
1931 | |
1932 | /* ... and no decimal float vector modes. */ |
1933 | if (DECIMAL_FLOAT_MODE_P (innermode)) |
1934 | return mode; |
1935 | |
1936 | if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type))) |
1937 | mode = MIN_MODE_VECTOR_FLOAT; |
1938 | else |
1939 | mode = MIN_MODE_VECTOR_INT; |
1940 | |
1941 | /* Get the mode which has this inner mode and number of units. */ |
1942 | FOR_EACH_MODE_FROM (mode, mode) |
1943 | if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type) |
1944 | && GET_MODE_INNER (mode) == innermode) |
1945 | { |
1946 | if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512) |
1947 | && !TARGET_IAMCU) |
1948 | { |
1949 | static bool warnedavx512f; |
1950 | static bool warnedavx512f_ret; |
1951 | |
1952 | if (cum && cum->warn_avx512f && !warnedavx512f) |
1953 | { |
1954 | if (warning (OPT_Wpsabi, "AVX512F vector argument " |
1955 | "without AVX512F enabled changes the ABI" )) |
1956 | warnedavx512f = true; |
1957 | } |
1958 | else if (in_return && !warnedavx512f_ret) |
1959 | { |
1960 | if (warning (OPT_Wpsabi, "AVX512F vector return " |
1961 | "without AVX512F enabled changes the ABI" )) |
1962 | warnedavx512f_ret = true; |
1963 | } |
1964 | |
1965 | return TYPE_MODE (type); |
1966 | } |
1967 | else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) |
1968 | { |
1969 | static bool warnedavx; |
1970 | static bool warnedavx_ret; |
1971 | |
1972 | if (cum && cum->warn_avx && !warnedavx) |
1973 | { |
1974 | if (warning (OPT_Wpsabi, "AVX vector argument " |
1975 | "without AVX enabled changes the ABI" )) |
1976 | warnedavx = true; |
1977 | } |
1978 | else if (in_return && !warnedavx_ret) |
1979 | { |
1980 | if (warning (OPT_Wpsabi, "AVX vector return " |
1981 | "without AVX enabled changes the ABI" )) |
1982 | warnedavx_ret = true; |
1983 | } |
1984 | |
1985 | return TYPE_MODE (type); |
1986 | } |
1987 | else if (((size == 8 && TARGET_64BIT) || size == 16) |
1988 | && !TARGET_SSE |
1989 | && !TARGET_IAMCU) |
1990 | { |
1991 | static bool warnedsse; |
1992 | static bool warnedsse_ret; |
1993 | |
1994 | if (cum && cum->warn_sse && !warnedsse) |
1995 | { |
1996 | if (warning (OPT_Wpsabi, "SSE vector argument " |
1997 | "without SSE enabled changes the ABI" )) |
1998 | warnedsse = true; |
1999 | } |
2000 | else if (!TARGET_64BIT && in_return && !warnedsse_ret) |
2001 | { |
2002 | if (warning (OPT_Wpsabi, "SSE vector return " |
2003 | "without SSE enabled changes the ABI" )) |
2004 | warnedsse_ret = true; |
2005 | } |
2006 | } |
2007 | else if ((size == 8 && !TARGET_64BIT) |
2008 | && (!cfun |
2009 | || cfun->machine->func_type == TYPE_NORMAL) |
2010 | && !TARGET_MMX |
2011 | && !TARGET_IAMCU) |
2012 | { |
2013 | static bool warnedmmx; |
2014 | static bool warnedmmx_ret; |
2015 | |
2016 | if (cum && cum->warn_mmx && !warnedmmx) |
2017 | { |
2018 | if (warning (OPT_Wpsabi, "MMX vector argument " |
2019 | "without MMX enabled changes the ABI" )) |
2020 | warnedmmx = true; |
2021 | } |
2022 | else if (in_return && !warnedmmx_ret) |
2023 | { |
2024 | if (warning (OPT_Wpsabi, "MMX vector return " |
2025 | "without MMX enabled changes the ABI" )) |
2026 | warnedmmx_ret = true; |
2027 | } |
2028 | } |
2029 | return mode; |
2030 | } |
2031 | |
2032 | gcc_unreachable (); |
2033 | } |
2034 | } |
2035 | |
2036 | return mode; |
2037 | } |
2038 | |
2039 | /* We want to pass a value in REGNO whose "natural" mode is MODE. However, |
2040 | this may not agree with the mode that the type system has chosen for the |
2041 | register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can |
2042 | go ahead and use it. Otherwise we have to build a PARALLEL instead. */ |
2043 | |
2044 | static rtx |
2045 | gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, |
2046 | unsigned int regno) |
2047 | { |
2048 | rtx tmp; |
2049 | |
2050 | if (orig_mode != BLKmode) |
2051 | tmp = gen_rtx_REG (orig_mode, regno); |
2052 | else |
2053 | { |
2054 | tmp = gen_rtx_REG (mode, regno); |
2055 | tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); |
2056 | tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); |
2057 | } |
2058 | |
2059 | return tmp; |
2060 | } |
2061 | |
2062 | /* x86-64 register passing implementation. See x86-64 ABI for details. Goal |
2063 | of this code is to classify each 8bytes of incoming argument by the register |
2064 | class and assign registers accordingly. */ |
2065 | |
2066 | /* Return the union class of CLASS1 and CLASS2. |
2067 | See the x86-64 PS ABI for details. */ |
2068 | |
2069 | static enum x86_64_reg_class |
2070 | merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) |
2071 | { |
2072 | /* Rule #1: If both classes are equal, this is the resulting class. */ |
2073 | if (class1 == class2) |
2074 | return class1; |
2075 | |
2076 | /* Rule #2: If one of the classes is NO_CLASS, the resulting class is |
2077 | the other class. */ |
2078 | if (class1 == X86_64_NO_CLASS) |
2079 | return class2; |
2080 | if (class2 == X86_64_NO_CLASS) |
2081 | return class1; |
2082 | |
2083 | /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ |
2084 | if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) |
2085 | return X86_64_MEMORY_CLASS; |
2086 | |
2087 | /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ |
2088 | if ((class1 == X86_64_INTEGERSI_CLASS |
2089 | && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS)) |
2090 | || (class2 == X86_64_INTEGERSI_CLASS |
2091 | && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS))) |
2092 | return X86_64_INTEGERSI_CLASS; |
2093 | if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS |
2094 | || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) |
2095 | return X86_64_INTEGER_CLASS; |
2096 | |
2097 | /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, |
2098 | MEMORY is used. */ |
2099 | if (class1 == X86_64_X87_CLASS |
2100 | || class1 == X86_64_X87UP_CLASS |
2101 | || class1 == X86_64_COMPLEX_X87_CLASS |
2102 | || class2 == X86_64_X87_CLASS |
2103 | || class2 == X86_64_X87UP_CLASS |
2104 | || class2 == X86_64_COMPLEX_X87_CLASS) |
2105 | return X86_64_MEMORY_CLASS; |
2106 | |
2107 | /* Rule #6: Otherwise class SSE is used. */ |
2108 | return X86_64_SSE_CLASS; |
2109 | } |
2110 | |
2111 | /* Classify the argument of type TYPE and mode MODE. |
2112 | CLASSES will be filled by the register class used to pass each word |
2113 | of the operand. The number of words is returned. In case the parameter |
2114 | should be passed in memory, 0 is returned. As a special case for zero |
2115 | sized containers, classes[0] will be NO_CLASS and 1 is returned. |
2116 | |
2117 | BIT_OFFSET is used internally for handling records and specifies offset |
2118 | of the offset in bits modulo 512 to avoid overflow cases. |
2119 | |
2120 | See the x86-64 PS ABI for details. |
2121 | */ |
2122 | |
2123 | static int |
2124 | classify_argument (machine_mode mode, const_tree type, |
2125 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset, |
2126 | int &zero_width_bitfields) |
2127 | { |
2128 | HOST_WIDE_INT bytes |
2129 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
2130 | int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); |
2131 | |
2132 | /* Variable sized entities are always passed/returned in memory. */ |
2133 | if (bytes < 0) |
2134 | return 0; |
2135 | |
2136 | if (mode != VOIDmode) |
2137 | { |
2138 | /* The value of "named" doesn't matter. */ |
2139 | function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true); |
2140 | if (targetm.calls.must_pass_in_stack (arg)) |
2141 | return 0; |
2142 | } |
2143 | |
2144 | if (type && (AGGREGATE_TYPE_P (type) |
2145 | || (TREE_CODE (type) == BITINT_TYPE && words > 1))) |
2146 | { |
2147 | int i; |
2148 | tree field; |
2149 | enum x86_64_reg_class subclasses[MAX_CLASSES]; |
2150 | |
2151 | /* On x86-64 we pass structures larger than 64 bytes on the stack. */ |
2152 | if (bytes > 64) |
2153 | return 0; |
2154 | |
2155 | for (i = 0; i < words; i++) |
2156 | classes[i] = X86_64_NO_CLASS; |
2157 | |
2158 | /* Zero sized arrays or structures are NO_CLASS. We return 0 to |
2159 | signalize memory class, so handle it as special case. */ |
2160 | if (!words) |
2161 | { |
2162 | classes[0] = X86_64_NO_CLASS; |
2163 | return 1; |
2164 | } |
2165 | |
2166 | /* Classify each field of record and merge classes. */ |
2167 | switch (TREE_CODE (type)) |
2168 | { |
2169 | case RECORD_TYPE: |
2170 | /* And now merge the fields of structure. */ |
2171 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
2172 | { |
2173 | if (TREE_CODE (field) == FIELD_DECL) |
2174 | { |
2175 | int num; |
2176 | |
2177 | if (TREE_TYPE (field) == error_mark_node) |
2178 | continue; |
2179 | |
2180 | /* Bitfields are always classified as integer. Handle them |
2181 | early, since later code would consider them to be |
2182 | misaligned integers. */ |
2183 | if (DECL_BIT_FIELD (field)) |
2184 | { |
2185 | if (integer_zerop (DECL_SIZE (field))) |
2186 | { |
2187 | if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field)) |
2188 | continue; |
2189 | if (zero_width_bitfields != 2) |
2190 | { |
2191 | zero_width_bitfields = 1; |
2192 | continue; |
2193 | } |
2194 | } |
2195 | for (i = (int_bit_position (field) |
2196 | + (bit_offset % 64)) / 8 / 8; |
2197 | i < ((int_bit_position (field) + (bit_offset % 64)) |
2198 | + tree_to_shwi (DECL_SIZE (field)) |
2199 | + 63) / 8 / 8; i++) |
2200 | classes[i] |
2201 | = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]); |
2202 | } |
2203 | else |
2204 | { |
2205 | int pos; |
2206 | |
2207 | type = TREE_TYPE (field); |
2208 | |
2209 | /* Flexible array member is ignored. */ |
2210 | if (TYPE_MODE (type) == BLKmode |
2211 | && TREE_CODE (type) == ARRAY_TYPE |
2212 | && TYPE_SIZE (type) == NULL_TREE |
2213 | && TYPE_DOMAIN (type) != NULL_TREE |
2214 | && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) |
2215 | == NULL_TREE)) |
2216 | { |
2217 | static bool warned; |
2218 | |
2219 | if (!warned && warn_psabi) |
2220 | { |
2221 | warned = true; |
2222 | inform (input_location, |
2223 | "the ABI of passing struct with" |
2224 | " a flexible array member has" |
2225 | " changed in GCC 4.4" ); |
2226 | } |
2227 | continue; |
2228 | } |
2229 | num = classify_argument (TYPE_MODE (type), type, |
2230 | classes: subclasses, |
2231 | bit_offset: (int_bit_position (field) |
2232 | + bit_offset) % 512, |
2233 | zero_width_bitfields); |
2234 | if (!num) |
2235 | return 0; |
2236 | pos = (int_bit_position (field) |
2237 | + (bit_offset % 64)) / 8 / 8; |
2238 | for (i = 0; i < num && (i + pos) < words; i++) |
2239 | classes[i + pos] |
2240 | = merge_classes (class1: subclasses[i], class2: classes[i + pos]); |
2241 | } |
2242 | } |
2243 | } |
2244 | break; |
2245 | |
2246 | case ARRAY_TYPE: |
2247 | /* Arrays are handled as small records. */ |
2248 | { |
2249 | int num; |
2250 | num = classify_argument (TYPE_MODE (TREE_TYPE (type)), |
2251 | TREE_TYPE (type), classes: subclasses, bit_offset, |
2252 | zero_width_bitfields); |
2253 | if (!num) |
2254 | return 0; |
2255 | |
2256 | /* The partial classes are now full classes. */ |
2257 | if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) |
2258 | subclasses[0] = X86_64_SSE_CLASS; |
2259 | if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2) |
2260 | subclasses[0] = X86_64_SSE_CLASS; |
2261 | if (subclasses[0] == X86_64_INTEGERSI_CLASS |
2262 | && !((bit_offset % 64) == 0 && bytes == 4)) |
2263 | subclasses[0] = X86_64_INTEGER_CLASS; |
2264 | |
2265 | for (i = 0; i < words; i++) |
2266 | classes[i] = subclasses[i % num]; |
2267 | |
2268 | break; |
2269 | } |
2270 | case UNION_TYPE: |
2271 | case QUAL_UNION_TYPE: |
2272 | /* Unions are similar to RECORD_TYPE but offset is always 0. |
2273 | */ |
2274 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
2275 | { |
2276 | if (TREE_CODE (field) == FIELD_DECL) |
2277 | { |
2278 | int num; |
2279 | |
2280 | if (TREE_TYPE (field) == error_mark_node) |
2281 | continue; |
2282 | |
2283 | num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
2284 | TREE_TYPE (field), classes: subclasses, |
2285 | bit_offset, zero_width_bitfields); |
2286 | if (!num) |
2287 | return 0; |
2288 | for (i = 0; i < num && i < words; i++) |
2289 | classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]); |
2290 | } |
2291 | } |
2292 | break; |
2293 | |
2294 | case BITINT_TYPE: |
2295 | /* _BitInt(N) for N > 64 is passed as structure containing |
2296 | (N + 63) / 64 64-bit elements. */ |
2297 | if (words > 2) |
2298 | return 0; |
2299 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2300 | return 2; |
2301 | |
2302 | default: |
2303 | gcc_unreachable (); |
2304 | } |
2305 | |
2306 | if (words > 2) |
2307 | { |
2308 | /* When size > 16 bytes, if the first one isn't |
2309 | X86_64_SSE_CLASS or any other ones aren't |
2310 | X86_64_SSEUP_CLASS, everything should be passed in |
2311 | memory. */ |
2312 | if (classes[0] != X86_64_SSE_CLASS) |
2313 | return 0; |
2314 | |
2315 | for (i = 1; i < words; i++) |
2316 | if (classes[i] != X86_64_SSEUP_CLASS) |
2317 | return 0; |
2318 | } |
2319 | |
2320 | /* Final merger cleanup. */ |
2321 | for (i = 0; i < words; i++) |
2322 | { |
2323 | /* If one class is MEMORY, everything should be passed in |
2324 | memory. */ |
2325 | if (classes[i] == X86_64_MEMORY_CLASS) |
2326 | return 0; |
2327 | |
2328 | /* The X86_64_SSEUP_CLASS should be always preceded by |
2329 | X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ |
2330 | if (classes[i] == X86_64_SSEUP_CLASS |
2331 | && classes[i - 1] != X86_64_SSE_CLASS |
2332 | && classes[i - 1] != X86_64_SSEUP_CLASS) |
2333 | { |
2334 | /* The first one should never be X86_64_SSEUP_CLASS. */ |
2335 | gcc_assert (i != 0); |
2336 | classes[i] = X86_64_SSE_CLASS; |
2337 | } |
2338 | |
2339 | /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, |
2340 | everything should be passed in memory. */ |
2341 | if (classes[i] == X86_64_X87UP_CLASS |
2342 | && (classes[i - 1] != X86_64_X87_CLASS)) |
2343 | { |
2344 | static bool warned; |
2345 | |
2346 | /* The first one should never be X86_64_X87UP_CLASS. */ |
2347 | gcc_assert (i != 0); |
2348 | if (!warned && warn_psabi) |
2349 | { |
2350 | warned = true; |
2351 | inform (input_location, |
2352 | "the ABI of passing union with %<long double%>" |
2353 | " has changed in GCC 4.4" ); |
2354 | } |
2355 | return 0; |
2356 | } |
2357 | } |
2358 | return words; |
2359 | } |
2360 | |
2361 | /* Compute alignment needed. We align all types to natural boundaries with |
2362 | exception of XFmode that is aligned to 64bits. */ |
2363 | if (mode != VOIDmode && mode != BLKmode) |
2364 | { |
2365 | int mode_alignment = GET_MODE_BITSIZE (mode); |
2366 | |
2367 | if (mode == XFmode) |
2368 | mode_alignment = 128; |
2369 | else if (mode == XCmode) |
2370 | mode_alignment = 256; |
2371 | if (COMPLEX_MODE_P (mode)) |
2372 | mode_alignment /= 2; |
2373 | /* Misaligned fields are always returned in memory. */ |
2374 | if (bit_offset % mode_alignment) |
2375 | return 0; |
2376 | } |
2377 | |
2378 | /* for V1xx modes, just use the base mode */ |
2379 | if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode |
2380 | && GET_MODE_UNIT_SIZE (mode) == bytes) |
2381 | mode = GET_MODE_INNER (mode); |
2382 | |
2383 | /* Classification of atomic types. */ |
2384 | switch (mode) |
2385 | { |
2386 | case E_SDmode: |
2387 | case E_DDmode: |
2388 | classes[0] = X86_64_SSE_CLASS; |
2389 | return 1; |
2390 | case E_TDmode: |
2391 | classes[0] = X86_64_SSE_CLASS; |
2392 | classes[1] = X86_64_SSEUP_CLASS; |
2393 | return 2; |
2394 | case E_DImode: |
2395 | case E_SImode: |
2396 | case E_HImode: |
2397 | case E_QImode: |
2398 | case E_CSImode: |
2399 | case E_CHImode: |
2400 | case E_CQImode: |
2401 | { |
2402 | int size = bit_offset + (int) GET_MODE_BITSIZE (mode); |
2403 | |
2404 | /* Analyze last 128 bits only. */ |
2405 | size = (size - 1) & 0x7f; |
2406 | |
2407 | if (size < 32) |
2408 | { |
2409 | classes[0] = X86_64_INTEGERSI_CLASS; |
2410 | return 1; |
2411 | } |
2412 | else if (size < 64) |
2413 | { |
2414 | classes[0] = X86_64_INTEGER_CLASS; |
2415 | return 1; |
2416 | } |
2417 | else if (size < 64+32) |
2418 | { |
2419 | classes[0] = X86_64_INTEGER_CLASS; |
2420 | classes[1] = X86_64_INTEGERSI_CLASS; |
2421 | return 2; |
2422 | } |
2423 | else if (size < 64+64) |
2424 | { |
2425 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2426 | return 2; |
2427 | } |
2428 | else |
2429 | gcc_unreachable (); |
2430 | } |
2431 | case E_CDImode: |
2432 | case E_TImode: |
2433 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2434 | return 2; |
2435 | case E_COImode: |
2436 | case E_OImode: |
2437 | /* OImode shouldn't be used directly. */ |
2438 | gcc_unreachable (); |
2439 | case E_CTImode: |
2440 | return 0; |
2441 | case E_HFmode: |
2442 | case E_BFmode: |
2443 | if (!(bit_offset % 64)) |
2444 | classes[0] = X86_64_SSEHF_CLASS; |
2445 | else |
2446 | classes[0] = X86_64_SSE_CLASS; |
2447 | return 1; |
2448 | case E_SFmode: |
2449 | if (!(bit_offset % 64)) |
2450 | classes[0] = X86_64_SSESF_CLASS; |
2451 | else |
2452 | classes[0] = X86_64_SSE_CLASS; |
2453 | return 1; |
2454 | case E_DFmode: |
2455 | classes[0] = X86_64_SSEDF_CLASS; |
2456 | return 1; |
2457 | case E_XFmode: |
2458 | classes[0] = X86_64_X87_CLASS; |
2459 | classes[1] = X86_64_X87UP_CLASS; |
2460 | return 2; |
2461 | case E_TFmode: |
2462 | classes[0] = X86_64_SSE_CLASS; |
2463 | classes[1] = X86_64_SSEUP_CLASS; |
2464 | return 2; |
2465 | case E_HCmode: |
2466 | case E_BCmode: |
2467 | classes[0] = X86_64_SSE_CLASS; |
2468 | if (!(bit_offset % 64)) |
2469 | return 1; |
2470 | else |
2471 | { |
2472 | classes[1] = X86_64_SSEHF_CLASS; |
2473 | return 2; |
2474 | } |
2475 | case E_SCmode: |
2476 | classes[0] = X86_64_SSE_CLASS; |
2477 | if (!(bit_offset % 64)) |
2478 | return 1; |
2479 | else |
2480 | { |
2481 | static bool warned; |
2482 | |
2483 | if (!warned && warn_psabi) |
2484 | { |
2485 | warned = true; |
2486 | inform (input_location, |
2487 | "the ABI of passing structure with %<complex float%>" |
2488 | " member has changed in GCC 4.4" ); |
2489 | } |
2490 | classes[1] = X86_64_SSESF_CLASS; |
2491 | return 2; |
2492 | } |
2493 | case E_DCmode: |
2494 | classes[0] = X86_64_SSEDF_CLASS; |
2495 | classes[1] = X86_64_SSEDF_CLASS; |
2496 | return 2; |
2497 | case E_XCmode: |
2498 | classes[0] = X86_64_COMPLEX_X87_CLASS; |
2499 | return 1; |
2500 | case E_TCmode: |
2501 | /* This modes is larger than 16 bytes. */ |
2502 | return 0; |
2503 | case E_V8SFmode: |
2504 | case E_V8SImode: |
2505 | case E_V32QImode: |
2506 | case E_V16HFmode: |
2507 | case E_V16BFmode: |
2508 | case E_V16HImode: |
2509 | case E_V4DFmode: |
2510 | case E_V4DImode: |
2511 | classes[0] = X86_64_SSE_CLASS; |
2512 | classes[1] = X86_64_SSEUP_CLASS; |
2513 | classes[2] = X86_64_SSEUP_CLASS; |
2514 | classes[3] = X86_64_SSEUP_CLASS; |
2515 | return 4; |
2516 | case E_V8DFmode: |
2517 | case E_V16SFmode: |
2518 | case E_V32HFmode: |
2519 | case E_V32BFmode: |
2520 | case E_V8DImode: |
2521 | case E_V16SImode: |
2522 | case E_V32HImode: |
2523 | case E_V64QImode: |
2524 | classes[0] = X86_64_SSE_CLASS; |
2525 | classes[1] = X86_64_SSEUP_CLASS; |
2526 | classes[2] = X86_64_SSEUP_CLASS; |
2527 | classes[3] = X86_64_SSEUP_CLASS; |
2528 | classes[4] = X86_64_SSEUP_CLASS; |
2529 | classes[5] = X86_64_SSEUP_CLASS; |
2530 | classes[6] = X86_64_SSEUP_CLASS; |
2531 | classes[7] = X86_64_SSEUP_CLASS; |
2532 | return 8; |
2533 | case E_V4SFmode: |
2534 | case E_V4SImode: |
2535 | case E_V16QImode: |
2536 | case E_V8HImode: |
2537 | case E_V8HFmode: |
2538 | case E_V8BFmode: |
2539 | case E_V2DFmode: |
2540 | case E_V2DImode: |
2541 | classes[0] = X86_64_SSE_CLASS; |
2542 | classes[1] = X86_64_SSEUP_CLASS; |
2543 | return 2; |
2544 | case E_V1TImode: |
2545 | case E_V1DImode: |
2546 | case E_V2SFmode: |
2547 | case E_V2SImode: |
2548 | case E_V4HImode: |
2549 | case E_V4HFmode: |
2550 | case E_V4BFmode: |
2551 | case E_V2HFmode: |
2552 | case E_V2BFmode: |
2553 | case E_V8QImode: |
2554 | classes[0] = X86_64_SSE_CLASS; |
2555 | return 1; |
2556 | case E_BLKmode: |
2557 | case E_VOIDmode: |
2558 | return 0; |
2559 | default: |
2560 | gcc_assert (VECTOR_MODE_P (mode)); |
2561 | |
2562 | if (bytes > 16) |
2563 | return 0; |
2564 | |
2565 | gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); |
2566 | |
2567 | if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
2568 | classes[0] = X86_64_INTEGERSI_CLASS; |
2569 | else |
2570 | classes[0] = X86_64_INTEGER_CLASS; |
2571 | classes[1] = X86_64_INTEGER_CLASS; |
2572 | return 1 + (bytes > 8); |
2573 | } |
2574 | } |
2575 | |
2576 | /* Wrapper around classify_argument with the extra zero_width_bitfields |
2577 | argument, to diagnose GCC 12.1 ABI differences for C. */ |
2578 | |
2579 | static int |
2580 | classify_argument (machine_mode mode, const_tree type, |
2581 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) |
2582 | { |
2583 | int zero_width_bitfields = 0; |
2584 | static bool warned = false; |
2585 | int n = classify_argument (mode, type, classes, bit_offset, |
2586 | zero_width_bitfields); |
2587 | if (!zero_width_bitfields || warned || !warn_psabi) |
2588 | return n; |
2589 | enum x86_64_reg_class alt_classes[MAX_CLASSES]; |
2590 | zero_width_bitfields = 2; |
2591 | if (classify_argument (mode, type, classes: alt_classes, bit_offset, |
2592 | zero_width_bitfields) != n) |
2593 | zero_width_bitfields = 3; |
2594 | else |
2595 | for (int i = 0; i < n; i++) |
2596 | if (classes[i] != alt_classes[i]) |
2597 | { |
2598 | zero_width_bitfields = 3; |
2599 | break; |
2600 | } |
2601 | if (zero_width_bitfields == 3) |
2602 | { |
2603 | warned = true; |
2604 | const char *url |
2605 | = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields" ; |
2606 | |
2607 | inform (input_location, |
2608 | "the ABI of passing C structures with zero-width bit-fields" |
2609 | " has changed in GCC %{12.1%}" , url); |
2610 | } |
2611 | return n; |
2612 | } |
2613 | |
2614 | /* Examine the argument and return set number of register required in each |
2615 | class. Return true iff parameter should be passed in memory. */ |
2616 | |
2617 | static bool |
2618 | examine_argument (machine_mode mode, const_tree type, int in_return, |
2619 | int *int_nregs, int *sse_nregs) |
2620 | { |
2621 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
2622 | int n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
2623 | |
2624 | *int_nregs = 0; |
2625 | *sse_nregs = 0; |
2626 | |
2627 | if (!n) |
2628 | return true; |
2629 | for (n--; n >= 0; n--) |
2630 | switch (regclass[n]) |
2631 | { |
2632 | case X86_64_INTEGER_CLASS: |
2633 | case X86_64_INTEGERSI_CLASS: |
2634 | (*int_nregs)++; |
2635 | break; |
2636 | case X86_64_SSE_CLASS: |
2637 | case X86_64_SSEHF_CLASS: |
2638 | case X86_64_SSESF_CLASS: |
2639 | case X86_64_SSEDF_CLASS: |
2640 | (*sse_nregs)++; |
2641 | break; |
2642 | case X86_64_NO_CLASS: |
2643 | case X86_64_SSEUP_CLASS: |
2644 | break; |
2645 | case X86_64_X87_CLASS: |
2646 | case X86_64_X87UP_CLASS: |
2647 | case X86_64_COMPLEX_X87_CLASS: |
2648 | if (!in_return) |
2649 | return true; |
2650 | break; |
2651 | case X86_64_MEMORY_CLASS: |
2652 | gcc_unreachable (); |
2653 | } |
2654 | |
2655 | return false; |
2656 | } |
2657 | |
2658 | /* Construct container for the argument used by GCC interface. See |
2659 | FUNCTION_ARG for the detailed description. */ |
2660 | |
2661 | static rtx |
2662 | construct_container (machine_mode mode, machine_mode orig_mode, |
2663 | const_tree type, int in_return, int nintregs, int nsseregs, |
2664 | const int *intreg, int sse_regno) |
2665 | { |
2666 | /* The following variables hold the static issued_error state. */ |
2667 | static bool issued_sse_arg_error; |
2668 | static bool issued_sse_ret_error; |
2669 | static bool issued_x87_ret_error; |
2670 | |
2671 | machine_mode tmpmode; |
2672 | int bytes |
2673 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
2674 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
2675 | int n; |
2676 | int i; |
2677 | int nexps = 0; |
2678 | int needed_sseregs, needed_intregs; |
2679 | rtx exp[MAX_CLASSES]; |
2680 | rtx ret; |
2681 | |
2682 | n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
2683 | if (!n) |
2684 | return NULL; |
2685 | if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs, |
2686 | sse_nregs: &needed_sseregs)) |
2687 | return NULL; |
2688 | if (needed_intregs > nintregs || needed_sseregs > nsseregs) |
2689 | return NULL; |
2690 | |
2691 | /* We allowed the user to turn off SSE for kernel mode. Don't crash if |
2692 | some less clueful developer tries to use floating-point anyway. */ |
2693 | if (needed_sseregs |
2694 | && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2))) |
2695 | { |
2696 | /* Return early if we shouldn't raise an error for invalid |
2697 | calls. */ |
2698 | if (cfun != NULL && cfun->machine->silent_p) |
2699 | return NULL; |
2700 | if (in_return) |
2701 | { |
2702 | if (!issued_sse_ret_error) |
2703 | { |
2704 | if (VALID_SSE2_TYPE_MODE (mode)) |
2705 | error ("SSE register return with SSE2 disabled" ); |
2706 | else |
2707 | error ("SSE register return with SSE disabled" ); |
2708 | issued_sse_ret_error = true; |
2709 | } |
2710 | } |
2711 | else if (!issued_sse_arg_error) |
2712 | { |
2713 | if (VALID_SSE2_TYPE_MODE (mode)) |
2714 | error ("SSE register argument with SSE2 disabled" ); |
2715 | else |
2716 | error ("SSE register argument with SSE disabled" ); |
2717 | issued_sse_arg_error = true; |
2718 | } |
2719 | return NULL; |
2720 | } |
2721 | |
2722 | /* Likewise, error if the ABI requires us to return values in the |
2723 | x87 registers and the user specified -mno-80387. */ |
2724 | if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) |
2725 | for (i = 0; i < n; i++) |
2726 | if (regclass[i] == X86_64_X87_CLASS |
2727 | || regclass[i] == X86_64_X87UP_CLASS |
2728 | || regclass[i] == X86_64_COMPLEX_X87_CLASS) |
2729 | { |
2730 | /* Return early if we shouldn't raise an error for invalid |
2731 | calls. */ |
2732 | if (cfun != NULL && cfun->machine->silent_p) |
2733 | return NULL; |
2734 | if (!issued_x87_ret_error) |
2735 | { |
2736 | error ("x87 register return with x87 disabled" ); |
2737 | issued_x87_ret_error = true; |
2738 | } |
2739 | return NULL; |
2740 | } |
2741 | |
2742 | /* First construct simple cases. Avoid SCmode, since we want to use |
2743 | single register to pass this type. */ |
2744 | if (n == 1 && mode != SCmode && mode != HCmode) |
2745 | switch (regclass[0]) |
2746 | { |
2747 | case X86_64_INTEGER_CLASS: |
2748 | case X86_64_INTEGERSI_CLASS: |
2749 | return gen_rtx_REG (mode, intreg[0]); |
2750 | case X86_64_SSE_CLASS: |
2751 | case X86_64_SSEHF_CLASS: |
2752 | case X86_64_SSESF_CLASS: |
2753 | case X86_64_SSEDF_CLASS: |
2754 | if (mode != BLKmode) |
2755 | return gen_reg_or_parallel (mode, orig_mode, |
2756 | GET_SSE_REGNO (sse_regno)); |
2757 | break; |
2758 | case X86_64_X87_CLASS: |
2759 | case X86_64_COMPLEX_X87_CLASS: |
2760 | return gen_rtx_REG (mode, FIRST_STACK_REG); |
2761 | case X86_64_NO_CLASS: |
2762 | /* Zero sized array, struct or class. */ |
2763 | return NULL; |
2764 | default: |
2765 | gcc_unreachable (); |
2766 | } |
2767 | if (n == 2 |
2768 | && regclass[0] == X86_64_SSE_CLASS |
2769 | && regclass[1] == X86_64_SSEUP_CLASS |
2770 | && mode != BLKmode) |
2771 | return gen_reg_or_parallel (mode, orig_mode, |
2772 | GET_SSE_REGNO (sse_regno)); |
2773 | if (n == 4 |
2774 | && regclass[0] == X86_64_SSE_CLASS |
2775 | && regclass[1] == X86_64_SSEUP_CLASS |
2776 | && regclass[2] == X86_64_SSEUP_CLASS |
2777 | && regclass[3] == X86_64_SSEUP_CLASS |
2778 | && mode != BLKmode) |
2779 | return gen_reg_or_parallel (mode, orig_mode, |
2780 | GET_SSE_REGNO (sse_regno)); |
2781 | if (n == 8 |
2782 | && regclass[0] == X86_64_SSE_CLASS |
2783 | && regclass[1] == X86_64_SSEUP_CLASS |
2784 | && regclass[2] == X86_64_SSEUP_CLASS |
2785 | && regclass[3] == X86_64_SSEUP_CLASS |
2786 | && regclass[4] == X86_64_SSEUP_CLASS |
2787 | && regclass[5] == X86_64_SSEUP_CLASS |
2788 | && regclass[6] == X86_64_SSEUP_CLASS |
2789 | && regclass[7] == X86_64_SSEUP_CLASS |
2790 | && mode != BLKmode) |
2791 | return gen_reg_or_parallel (mode, orig_mode, |
2792 | GET_SSE_REGNO (sse_regno)); |
2793 | if (n == 2 |
2794 | && regclass[0] == X86_64_X87_CLASS |
2795 | && regclass[1] == X86_64_X87UP_CLASS) |
2796 | return gen_rtx_REG (XFmode, FIRST_STACK_REG); |
2797 | |
2798 | if (n == 2 |
2799 | && regclass[0] == X86_64_INTEGER_CLASS |
2800 | && regclass[1] == X86_64_INTEGER_CLASS |
2801 | && (mode == CDImode || mode == TImode || mode == BLKmode) |
2802 | && intreg[0] + 1 == intreg[1]) |
2803 | { |
2804 | if (mode == BLKmode) |
2805 | { |
2806 | /* Use TImode for BLKmode values in 2 integer registers. */ |
2807 | exp[0] = gen_rtx_EXPR_LIST (VOIDmode, |
2808 | gen_rtx_REG (TImode, intreg[0]), |
2809 | GEN_INT (0)); |
2810 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); |
2811 | XVECEXP (ret, 0, 0) = exp[0]; |
2812 | return ret; |
2813 | } |
2814 | else |
2815 | return gen_rtx_REG (mode, intreg[0]); |
2816 | } |
2817 | |
2818 | /* Otherwise figure out the entries of the PARALLEL. */ |
2819 | for (i = 0; i < n; i++) |
2820 | { |
2821 | int pos; |
2822 | |
2823 | switch (regclass[i]) |
2824 | { |
2825 | case X86_64_NO_CLASS: |
2826 | break; |
2827 | case X86_64_INTEGER_CLASS: |
2828 | case X86_64_INTEGERSI_CLASS: |
2829 | /* Merge TImodes on aligned occasions here too. */ |
2830 | if (i * 8 + 8 > bytes) |
2831 | { |
2832 | unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; |
2833 | if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode)) |
2834 | /* We've requested 24 bytes we |
2835 | don't have mode for. Use DImode. */ |
2836 | tmpmode = DImode; |
2837 | } |
2838 | else if (regclass[i] == X86_64_INTEGERSI_CLASS) |
2839 | tmpmode = SImode; |
2840 | else |
2841 | tmpmode = DImode; |
2842 | exp [nexps++] |
2843 | = gen_rtx_EXPR_LIST (VOIDmode, |
2844 | gen_rtx_REG (tmpmode, *intreg), |
2845 | GEN_INT (i*8)); |
2846 | intreg++; |
2847 | break; |
2848 | case X86_64_SSEHF_CLASS: |
2849 | tmpmode = (mode == BFmode ? BFmode : HFmode); |
2850 | exp [nexps++] |
2851 | = gen_rtx_EXPR_LIST (VOIDmode, |
2852 | gen_rtx_REG (tmpmode, |
2853 | GET_SSE_REGNO (sse_regno)), |
2854 | GEN_INT (i*8)); |
2855 | sse_regno++; |
2856 | break; |
2857 | case X86_64_SSESF_CLASS: |
2858 | exp [nexps++] |
2859 | = gen_rtx_EXPR_LIST (VOIDmode, |
2860 | gen_rtx_REG (SFmode, |
2861 | GET_SSE_REGNO (sse_regno)), |
2862 | GEN_INT (i*8)); |
2863 | sse_regno++; |
2864 | break; |
2865 | case X86_64_SSEDF_CLASS: |
2866 | exp [nexps++] |
2867 | = gen_rtx_EXPR_LIST (VOIDmode, |
2868 | gen_rtx_REG (DFmode, |
2869 | GET_SSE_REGNO (sse_regno)), |
2870 | GEN_INT (i*8)); |
2871 | sse_regno++; |
2872 | break; |
2873 | case X86_64_SSE_CLASS: |
2874 | pos = i; |
2875 | switch (n) |
2876 | { |
2877 | case 1: |
2878 | tmpmode = DImode; |
2879 | break; |
2880 | case 2: |
2881 | if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) |
2882 | { |
2883 | tmpmode = TImode; |
2884 | i++; |
2885 | } |
2886 | else |
2887 | tmpmode = DImode; |
2888 | break; |
2889 | case 4: |
2890 | gcc_assert (i == 0 |
2891 | && regclass[1] == X86_64_SSEUP_CLASS |
2892 | && regclass[2] == X86_64_SSEUP_CLASS |
2893 | && regclass[3] == X86_64_SSEUP_CLASS); |
2894 | tmpmode = OImode; |
2895 | i += 3; |
2896 | break; |
2897 | case 8: |
2898 | gcc_assert (i == 0 |
2899 | && regclass[1] == X86_64_SSEUP_CLASS |
2900 | && regclass[2] == X86_64_SSEUP_CLASS |
2901 | && regclass[3] == X86_64_SSEUP_CLASS |
2902 | && regclass[4] == X86_64_SSEUP_CLASS |
2903 | && regclass[5] == X86_64_SSEUP_CLASS |
2904 | && regclass[6] == X86_64_SSEUP_CLASS |
2905 | && regclass[7] == X86_64_SSEUP_CLASS); |
2906 | tmpmode = XImode; |
2907 | i += 7; |
2908 | break; |
2909 | default: |
2910 | gcc_unreachable (); |
2911 | } |
2912 | exp [nexps++] |
2913 | = gen_rtx_EXPR_LIST (VOIDmode, |
2914 | gen_rtx_REG (tmpmode, |
2915 | GET_SSE_REGNO (sse_regno)), |
2916 | GEN_INT (pos*8)); |
2917 | sse_regno++; |
2918 | break; |
2919 | default: |
2920 | gcc_unreachable (); |
2921 | } |
2922 | } |
2923 | |
2924 | /* Empty aligned struct, union or class. */ |
2925 | if (nexps == 0) |
2926 | return NULL; |
2927 | |
2928 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); |
2929 | for (i = 0; i < nexps; i++) |
2930 | XVECEXP (ret, 0, i) = exp [i]; |
2931 | return ret; |
2932 | } |
2933 | |
2934 | /* Update the data in CUM to advance over an argument of mode MODE |
2935 | and data type TYPE. (TYPE is null for libcalls where that information |
2936 | may not be available.) |
2937 | |
2938 | Return a number of integer regsiters advanced over. */ |
2939 | |
2940 | static int |
2941 | function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
2942 | const_tree type, HOST_WIDE_INT bytes, |
2943 | HOST_WIDE_INT words) |
2944 | { |
2945 | int res = 0; |
2946 | bool error_p = false; |
2947 | |
2948 | if (TARGET_IAMCU) |
2949 | { |
2950 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
2951 | bytes in registers. */ |
2952 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
2953 | goto pass_in_reg; |
2954 | return res; |
2955 | } |
2956 | |
2957 | switch (mode) |
2958 | { |
2959 | default: |
2960 | break; |
2961 | |
2962 | case E_BLKmode: |
2963 | if (bytes < 0) |
2964 | break; |
2965 | /* FALLTHRU */ |
2966 | |
2967 | case E_DImode: |
2968 | case E_SImode: |
2969 | case E_HImode: |
2970 | case E_QImode: |
2971 | pass_in_reg: |
2972 | cum->words += words; |
2973 | cum->nregs -= words; |
2974 | cum->regno += words; |
2975 | if (cum->nregs >= 0) |
2976 | res = words; |
2977 | if (cum->nregs <= 0) |
2978 | { |
2979 | cum->nregs = 0; |
2980 | cfun->machine->arg_reg_available = false; |
2981 | cum->regno = 0; |
2982 | } |
2983 | break; |
2984 | |
2985 | case E_OImode: |
2986 | /* OImode shouldn't be used directly. */ |
2987 | gcc_unreachable (); |
2988 | |
2989 | case E_DFmode: |
2990 | if (cum->float_in_sse == -1) |
2991 | error_p = true; |
2992 | if (cum->float_in_sse < 2) |
2993 | break; |
2994 | /* FALLTHRU */ |
2995 | case E_SFmode: |
2996 | if (cum->float_in_sse == -1) |
2997 | error_p = true; |
2998 | if (cum->float_in_sse < 1) |
2999 | break; |
3000 | /* FALLTHRU */ |
3001 | |
3002 | case E_V16HFmode: |
3003 | case E_V16BFmode: |
3004 | case E_V8SFmode: |
3005 | case E_V8SImode: |
3006 | case E_V64QImode: |
3007 | case E_V32HImode: |
3008 | case E_V16SImode: |
3009 | case E_V8DImode: |
3010 | case E_V32HFmode: |
3011 | case E_V32BFmode: |
3012 | case E_V16SFmode: |
3013 | case E_V8DFmode: |
3014 | case E_V32QImode: |
3015 | case E_V16HImode: |
3016 | case E_V4DFmode: |
3017 | case E_V4DImode: |
3018 | case E_TImode: |
3019 | case E_V16QImode: |
3020 | case E_V8HImode: |
3021 | case E_V4SImode: |
3022 | case E_V2DImode: |
3023 | case E_V8HFmode: |
3024 | case E_V8BFmode: |
3025 | case E_V4SFmode: |
3026 | case E_V2DFmode: |
3027 | if (!type || !AGGREGATE_TYPE_P (type)) |
3028 | { |
3029 | cum->sse_words += words; |
3030 | cum->sse_nregs -= 1; |
3031 | cum->sse_regno += 1; |
3032 | if (cum->sse_nregs <= 0) |
3033 | { |
3034 | cum->sse_nregs = 0; |
3035 | cum->sse_regno = 0; |
3036 | } |
3037 | } |
3038 | break; |
3039 | |
3040 | case E_V8QImode: |
3041 | case E_V4HImode: |
3042 | case E_V4HFmode: |
3043 | case E_V4BFmode: |
3044 | case E_V2SImode: |
3045 | case E_V2SFmode: |
3046 | case E_V1TImode: |
3047 | case E_V1DImode: |
3048 | if (!type || !AGGREGATE_TYPE_P (type)) |
3049 | { |
3050 | cum->mmx_words += words; |
3051 | cum->mmx_nregs -= 1; |
3052 | cum->mmx_regno += 1; |
3053 | if (cum->mmx_nregs <= 0) |
3054 | { |
3055 | cum->mmx_nregs = 0; |
3056 | cum->mmx_regno = 0; |
3057 | } |
3058 | } |
3059 | break; |
3060 | } |
3061 | if (error_p) |
3062 | { |
3063 | cum->float_in_sse = 0; |
3064 | error ("calling %qD with SSE calling convention without " |
3065 | "SSE/SSE2 enabled" , cum->decl); |
3066 | sorry ("this is a GCC bug that can be worked around by adding " |
3067 | "attribute used to function called" ); |
3068 | } |
3069 | |
3070 | return res; |
3071 | } |
3072 | |
3073 | static int |
3074 | function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, |
3075 | const_tree type, HOST_WIDE_INT words, bool named) |
3076 | { |
3077 | int int_nregs, sse_nregs; |
3078 | |
3079 | /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ |
3080 | if (!named && (VALID_AVX512F_REG_MODE (mode) |
3081 | || VALID_AVX256_REG_MODE (mode))) |
3082 | return 0; |
3083 | |
3084 | if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs) |
3085 | && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
3086 | { |
3087 | cum->nregs -= int_nregs; |
3088 | cum->sse_nregs -= sse_nregs; |
3089 | cum->regno += int_nregs; |
3090 | cum->sse_regno += sse_nregs; |
3091 | return int_nregs; |
3092 | } |
3093 | else |
3094 | { |
3095 | int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; |
3096 | cum->words = ROUND_UP (cum->words, align); |
3097 | cum->words += words; |
3098 | return 0; |
3099 | } |
3100 | } |
3101 | |
3102 | static int |
3103 | function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, |
3104 | HOST_WIDE_INT words) |
3105 | { |
3106 | /* Otherwise, this should be passed indirect. */ |
3107 | gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); |
3108 | |
3109 | cum->words += words; |
3110 | if (cum->nregs > 0) |
3111 | { |
3112 | cum->nregs -= 1; |
3113 | cum->regno += 1; |
3114 | return 1; |
3115 | } |
3116 | return 0; |
3117 | } |
3118 | |
3119 | /* Update the data in CUM to advance over argument ARG. */ |
3120 | |
3121 | static void |
3122 | ix86_function_arg_advance (cumulative_args_t cum_v, |
3123 | const function_arg_info &arg) |
3124 | { |
3125 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3126 | machine_mode mode = arg.mode; |
3127 | HOST_WIDE_INT bytes, words; |
3128 | int nregs; |
3129 | |
3130 | /* The argument of interrupt handler is a special case and is |
3131 | handled in ix86_function_arg. */ |
3132 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
3133 | return; |
3134 | |
3135 | bytes = arg.promoted_size_in_bytes (); |
3136 | words = CEIL (bytes, UNITS_PER_WORD); |
3137 | |
3138 | if (arg.type) |
3139 | mode = type_natural_mode (type: arg.type, NULL, in_return: false); |
3140 | |
3141 | if (TARGET_64BIT) |
3142 | { |
3143 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3144 | |
3145 | if (call_abi == MS_ABI) |
3146 | nregs = function_arg_advance_ms_64 (cum, bytes, words); |
3147 | else |
3148 | nregs = function_arg_advance_64 (cum, mode, type: arg.type, words, |
3149 | named: arg.named); |
3150 | } |
3151 | else |
3152 | nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words); |
3153 | |
3154 | if (!nregs) |
3155 | { |
3156 | /* Track if there are outgoing arguments on stack. */ |
3157 | if (cum->caller) |
3158 | cfun->machine->outgoing_args_on_stack = true; |
3159 | } |
3160 | } |
3161 | |
3162 | /* Define where to put the arguments to a function. |
3163 | Value is zero to push the argument on the stack, |
3164 | or a hard register in which to store the argument. |
3165 | |
3166 | MODE is the argument's machine mode. |
3167 | TYPE is the data type of the argument (as a tree). |
3168 | This is null for libcalls where that information may |
3169 | not be available. |
3170 | CUM is a variable of type CUMULATIVE_ARGS which gives info about |
3171 | the preceding args and about the function being called. |
3172 | NAMED is nonzero if this argument is a named parameter |
3173 | (otherwise it is an extra parameter matching an ellipsis). */ |
3174 | |
3175 | static rtx |
3176 | function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
3177 | machine_mode orig_mode, const_tree type, |
3178 | HOST_WIDE_INT bytes, HOST_WIDE_INT words) |
3179 | { |
3180 | bool error_p = false; |
3181 | |
3182 | /* Avoid the AL settings for the Unix64 ABI. */ |
3183 | if (mode == VOIDmode) |
3184 | return constm1_rtx; |
3185 | |
3186 | if (TARGET_IAMCU) |
3187 | { |
3188 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
3189 | bytes in registers. */ |
3190 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
3191 | goto pass_in_reg; |
3192 | return NULL_RTX; |
3193 | } |
3194 | |
3195 | switch (mode) |
3196 | { |
3197 | default: |
3198 | break; |
3199 | |
3200 | case E_BLKmode: |
3201 | if (bytes < 0) |
3202 | break; |
3203 | /* FALLTHRU */ |
3204 | case E_DImode: |
3205 | case E_SImode: |
3206 | case E_HImode: |
3207 | case E_QImode: |
3208 | pass_in_reg: |
3209 | if (words <= cum->nregs) |
3210 | { |
3211 | int regno = cum->regno; |
3212 | |
3213 | /* Fastcall allocates the first two DWORD (SImode) or |
3214 | smaller arguments to ECX and EDX if it isn't an |
3215 | aggregate type . */ |
3216 | if (cum->fastcall) |
3217 | { |
3218 | if (mode == BLKmode |
3219 | || mode == DImode |
3220 | || (type && AGGREGATE_TYPE_P (type))) |
3221 | break; |
3222 | |
3223 | /* ECX not EAX is the first allocated register. */ |
3224 | if (regno == AX_REG) |
3225 | regno = CX_REG; |
3226 | } |
3227 | return gen_rtx_REG (mode, regno); |
3228 | } |
3229 | break; |
3230 | |
3231 | case E_DFmode: |
3232 | if (cum->float_in_sse == -1) |
3233 | error_p = true; |
3234 | if (cum->float_in_sse < 2) |
3235 | break; |
3236 | /* FALLTHRU */ |
3237 | case E_SFmode: |
3238 | if (cum->float_in_sse == -1) |
3239 | error_p = true; |
3240 | if (cum->float_in_sse < 1) |
3241 | break; |
3242 | /* FALLTHRU */ |
3243 | case E_TImode: |
3244 | /* In 32bit, we pass TImode in xmm registers. */ |
3245 | case E_V16QImode: |
3246 | case E_V8HImode: |
3247 | case E_V4SImode: |
3248 | case E_V2DImode: |
3249 | case E_V8HFmode: |
3250 | case E_V8BFmode: |
3251 | case E_V4SFmode: |
3252 | case E_V2DFmode: |
3253 | if (!type || !AGGREGATE_TYPE_P (type)) |
3254 | { |
3255 | if (cum->sse_nregs) |
3256 | return gen_reg_or_parallel (mode, orig_mode, |
3257 | regno: cum->sse_regno + FIRST_SSE_REG); |
3258 | } |
3259 | break; |
3260 | |
3261 | case E_OImode: |
3262 | case E_XImode: |
3263 | /* OImode and XImode shouldn't be used directly. */ |
3264 | gcc_unreachable (); |
3265 | |
3266 | case E_V64QImode: |
3267 | case E_V32HImode: |
3268 | case E_V16SImode: |
3269 | case E_V8DImode: |
3270 | case E_V32HFmode: |
3271 | case E_V32BFmode: |
3272 | case E_V16SFmode: |
3273 | case E_V8DFmode: |
3274 | case E_V16HFmode: |
3275 | case E_V16BFmode: |
3276 | case E_V8SFmode: |
3277 | case E_V8SImode: |
3278 | case E_V32QImode: |
3279 | case E_V16HImode: |
3280 | case E_V4DFmode: |
3281 | case E_V4DImode: |
3282 | if (!type || !AGGREGATE_TYPE_P (type)) |
3283 | { |
3284 | if (cum->sse_nregs) |
3285 | return gen_reg_or_parallel (mode, orig_mode, |
3286 | regno: cum->sse_regno + FIRST_SSE_REG); |
3287 | } |
3288 | break; |
3289 | |
3290 | case E_V8QImode: |
3291 | case E_V4HImode: |
3292 | case E_V4HFmode: |
3293 | case E_V4BFmode: |
3294 | case E_V2SImode: |
3295 | case E_V2SFmode: |
3296 | case E_V1TImode: |
3297 | case E_V1DImode: |
3298 | if (!type || !AGGREGATE_TYPE_P (type)) |
3299 | { |
3300 | if (cum->mmx_nregs) |
3301 | return gen_reg_or_parallel (mode, orig_mode, |
3302 | regno: cum->mmx_regno + FIRST_MMX_REG); |
3303 | } |
3304 | break; |
3305 | } |
3306 | if (error_p) |
3307 | { |
3308 | cum->float_in_sse = 0; |
3309 | error ("calling %qD with SSE calling convention without " |
3310 | "SSE/SSE2 enabled" , cum->decl); |
3311 | sorry ("this is a GCC bug that can be worked around by adding " |
3312 | "attribute used to function called" ); |
3313 | } |
3314 | |
3315 | return NULL_RTX; |
3316 | } |
3317 | |
3318 | static rtx |
3319 | function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
3320 | machine_mode orig_mode, const_tree type, bool named) |
3321 | { |
3322 | /* Handle a hidden AL argument containing number of registers |
3323 | for varargs x86-64 functions. */ |
3324 | if (mode == VOIDmode) |
3325 | return GEN_INT (cum->maybe_vaarg |
3326 | ? (cum->sse_nregs < 0 |
3327 | ? X86_64_SSE_REGPARM_MAX |
3328 | : cum->sse_regno) |
3329 | : -1); |
3330 | |
3331 | switch (mode) |
3332 | { |
3333 | default: |
3334 | break; |
3335 | |
3336 | case E_V16HFmode: |
3337 | case E_V16BFmode: |
3338 | case E_V8SFmode: |
3339 | case E_V8SImode: |
3340 | case E_V32QImode: |
3341 | case E_V16HImode: |
3342 | case E_V4DFmode: |
3343 | case E_V4DImode: |
3344 | case E_V32HFmode: |
3345 | case E_V32BFmode: |
3346 | case E_V16SFmode: |
3347 | case E_V16SImode: |
3348 | case E_V64QImode: |
3349 | case E_V32HImode: |
3350 | case E_V8DFmode: |
3351 | case E_V8DImode: |
3352 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
3353 | if (!named) |
3354 | return NULL; |
3355 | break; |
3356 | } |
3357 | |
3358 | return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs, |
3359 | nsseregs: cum->sse_nregs, |
3360 | intreg: &x86_64_int_parameter_registers [cum->regno], |
3361 | sse_regno: cum->sse_regno); |
3362 | } |
3363 | |
3364 | static rtx |
3365 | function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
3366 | machine_mode orig_mode, bool named, const_tree type, |
3367 | HOST_WIDE_INT bytes) |
3368 | { |
3369 | unsigned int regno; |
3370 | |
3371 | /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. |
3372 | We use value of -2 to specify that current function call is MSABI. */ |
3373 | if (mode == VOIDmode) |
3374 | return GEN_INT (-2); |
3375 | |
3376 | /* If we've run out of registers, it goes on the stack. */ |
3377 | if (cum->nregs == 0) |
3378 | return NULL_RTX; |
3379 | |
3380 | regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; |
3381 | |
3382 | /* Only floating point modes are passed in anything but integer regs. */ |
3383 | if (TARGET_SSE && (mode == SFmode || mode == DFmode)) |
3384 | { |
3385 | if (named) |
3386 | { |
3387 | if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) |
3388 | regno = cum->regno + FIRST_SSE_REG; |
3389 | } |
3390 | else |
3391 | { |
3392 | rtx t1, t2; |
3393 | |
3394 | /* Unnamed floating parameters are passed in both the |
3395 | SSE and integer registers. */ |
3396 | t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); |
3397 | t2 = gen_rtx_REG (mode, regno); |
3398 | t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); |
3399 | t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); |
3400 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); |
3401 | } |
3402 | } |
3403 | /* Handle aggregated types passed in register. */ |
3404 | if (orig_mode == BLKmode) |
3405 | { |
3406 | if (bytes > 0 && bytes <= 8) |
3407 | mode = (bytes > 4 ? DImode : SImode); |
3408 | if (mode == BLKmode) |
3409 | mode = DImode; |
3410 | } |
3411 | |
3412 | return gen_reg_or_parallel (mode, orig_mode, regno); |
3413 | } |
3414 | |
3415 | /* Return where to put the arguments to a function. |
3416 | Return zero to push the argument on the stack, or a hard register in which to store the argument. |
3417 | |
3418 | ARG describes the argument while CUM gives information about the |
3419 | preceding args and about the function being called. */ |
3420 | |
3421 | static rtx |
3422 | ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) |
3423 | { |
3424 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3425 | machine_mode mode = arg.mode; |
3426 | HOST_WIDE_INT bytes, words; |
3427 | rtx reg; |
3428 | |
3429 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
3430 | { |
3431 | gcc_assert (arg.type != NULL_TREE); |
3432 | if (POINTER_TYPE_P (arg.type)) |
3433 | { |
3434 | /* This is the pointer argument. */ |
3435 | gcc_assert (TYPE_MODE (arg.type) == ptr_mode); |
3436 | /* It is at -WORD(AP) in the current frame in interrupt and |
3437 | exception handlers. */ |
3438 | reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); |
3439 | } |
3440 | else |
3441 | { |
3442 | gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION |
3443 | && TREE_CODE (arg.type) == INTEGER_TYPE |
3444 | && TYPE_MODE (arg.type) == word_mode); |
3445 | /* The error code is the word-mode integer argument at |
3446 | -2 * WORD(AP) in the current frame of the exception |
3447 | handler. */ |
3448 | reg = gen_rtx_MEM (word_mode, |
3449 | plus_constant (Pmode, |
3450 | arg_pointer_rtx, |
3451 | -2 * UNITS_PER_WORD)); |
3452 | } |
3453 | return reg; |
3454 | } |
3455 | |
3456 | bytes = arg.promoted_size_in_bytes (); |
3457 | words = CEIL (bytes, UNITS_PER_WORD); |
3458 | |
3459 | /* To simplify the code below, represent vector types with a vector mode |
3460 | even if MMX/SSE are not active. */ |
3461 | if (arg.type && VECTOR_TYPE_P (arg.type)) |
3462 | mode = type_natural_mode (type: arg.type, cum, in_return: false); |
3463 | |
3464 | if (TARGET_64BIT) |
3465 | { |
3466 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3467 | |
3468 | if (call_abi == MS_ABI) |
3469 | reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named, |
3470 | type: arg.type, bytes); |
3471 | else |
3472 | reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named); |
3473 | } |
3474 | else |
3475 | reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words); |
3476 | |
3477 | /* Track if there are outgoing arguments on stack. */ |
3478 | if (reg == NULL_RTX && cum->caller) |
3479 | cfun->machine->outgoing_args_on_stack = true; |
3480 | |
3481 | return reg; |
3482 | } |
3483 | |
3484 | /* A C expression that indicates when an argument must be passed by |
3485 | reference. If nonzero for an argument, a copy of that argument is |
3486 | made in memory and a pointer to the argument is passed instead of |
3487 | the argument itself. The pointer is passed in whatever way is |
3488 | appropriate for passing a pointer to that type. */ |
3489 | |
3490 | static bool |
3491 | ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) |
3492 | { |
3493 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3494 | |
3495 | if (TARGET_64BIT) |
3496 | { |
3497 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3498 | |
3499 | /* See Windows x64 Software Convention. */ |
3500 | if (call_abi == MS_ABI) |
3501 | { |
3502 | HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode); |
3503 | |
3504 | if (tree type = arg.type) |
3505 | { |
3506 | /* Arrays are passed by reference. */ |
3507 | if (TREE_CODE (type) == ARRAY_TYPE) |
3508 | return true; |
3509 | |
3510 | if (RECORD_OR_UNION_TYPE_P (type)) |
3511 | { |
3512 | /* Structs/unions of sizes other than 8, 16, 32, or 64 bits |
3513 | are passed by reference. */ |
3514 | msize = int_size_in_bytes (type); |
3515 | } |
3516 | } |
3517 | |
3518 | /* __m128 is passed by reference. */ |
3519 | return msize != 1 && msize != 2 && msize != 4 && msize != 8; |
3520 | } |
3521 | else if (arg.type && int_size_in_bytes (arg.type) == -1) |
3522 | return true; |
3523 | } |
3524 | |
3525 | return false; |
3526 | } |
3527 | |
3528 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
3529 | passing ABI. XXX: This function is obsolete and is only used for |
3530 | checking psABI compatibility with previous versions of GCC. */ |
3531 | |
3532 | static bool |
3533 | ix86_compat_aligned_value_p (const_tree type) |
3534 | { |
3535 | machine_mode mode = TYPE_MODE (type); |
3536 | if (((TARGET_SSE && SSE_REG_MODE_P (mode)) |
3537 | || mode == TDmode |
3538 | || mode == TFmode |
3539 | || mode == TCmode) |
3540 | && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) |
3541 | return true; |
3542 | if (TYPE_ALIGN (type) < 128) |
3543 | return false; |
3544 | |
3545 | if (AGGREGATE_TYPE_P (type)) |
3546 | { |
3547 | /* Walk the aggregates recursively. */ |
3548 | switch (TREE_CODE (type)) |
3549 | { |
3550 | case RECORD_TYPE: |
3551 | case UNION_TYPE: |
3552 | case QUAL_UNION_TYPE: |
3553 | { |
3554 | tree field; |
3555 | |
3556 | /* Walk all the structure fields. */ |
3557 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
3558 | { |
3559 | if (TREE_CODE (field) == FIELD_DECL |
3560 | && ix86_compat_aligned_value_p (TREE_TYPE (field))) |
3561 | return true; |
3562 | } |
3563 | break; |
3564 | } |
3565 | |
3566 | case ARRAY_TYPE: |
3567 | /* Just for use if some languages passes arrays by value. */ |
3568 | if (ix86_compat_aligned_value_p (TREE_TYPE (type))) |
3569 | return true; |
3570 | break; |
3571 | |
3572 | default: |
3573 | gcc_unreachable (); |
3574 | } |
3575 | } |
3576 | return false; |
3577 | } |
3578 | |
3579 | /* Return the alignment boundary for MODE and TYPE with alignment ALIGN. |
3580 | XXX: This function is obsolete and is only used for checking psABI |
3581 | compatibility with previous versions of GCC. */ |
3582 | |
3583 | static unsigned int |
3584 | ix86_compat_function_arg_boundary (machine_mode mode, |
3585 | const_tree type, unsigned int align) |
3586 | { |
3587 | /* In 32bit, only _Decimal128 and __float128 are aligned to their |
3588 | natural boundaries. */ |
3589 | if (!TARGET_64BIT && mode != TDmode && mode != TFmode) |
3590 | { |
3591 | /* i386 ABI defines all arguments to be 4 byte aligned. We have to |
3592 | make an exception for SSE modes since these require 128bit |
3593 | alignment. |
3594 | |
3595 | The handling here differs from field_alignment. ICC aligns MMX |
3596 | arguments to 4 byte boundaries, while structure fields are aligned |
3597 | to 8 byte boundaries. */ |
3598 | if (!type) |
3599 | { |
3600 | if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) |
3601 | align = PARM_BOUNDARY; |
3602 | } |
3603 | else |
3604 | { |
3605 | if (!ix86_compat_aligned_value_p (type)) |
3606 | align = PARM_BOUNDARY; |
3607 | } |
3608 | } |
3609 | if (align > BIGGEST_ALIGNMENT) |
3610 | align = BIGGEST_ALIGNMENT; |
3611 | return align; |
3612 | } |
3613 | |
3614 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
3615 | passing ABI. */ |
3616 | |
3617 | static bool |
3618 | ix86_contains_aligned_value_p (const_tree type) |
3619 | { |
3620 | machine_mode mode = TYPE_MODE (type); |
3621 | |
3622 | if (mode == XFmode || mode == XCmode) |
3623 | return false; |
3624 | |
3625 | if (TYPE_ALIGN (type) < 128) |
3626 | return false; |
3627 | |
3628 | if (AGGREGATE_TYPE_P (type)) |
3629 | { |
3630 | /* Walk the aggregates recursively. */ |
3631 | switch (TREE_CODE (type)) |
3632 | { |
3633 | case RECORD_TYPE: |
3634 | case UNION_TYPE: |
3635 | case QUAL_UNION_TYPE: |
3636 | { |
3637 | tree field; |
3638 | |
3639 | /* Walk all the structure fields. */ |
3640 | for (field = TYPE_FIELDS (type); |
3641 | field; |
3642 | field = DECL_CHAIN (field)) |
3643 | { |
3644 | if (TREE_CODE (field) == FIELD_DECL |
3645 | && ix86_contains_aligned_value_p (TREE_TYPE (field))) |
3646 | return true; |
3647 | } |
3648 | break; |
3649 | } |
3650 | |
3651 | case ARRAY_TYPE: |
3652 | /* Just for use if some languages passes arrays by value. */ |
3653 | if (ix86_contains_aligned_value_p (TREE_TYPE (type))) |
3654 | return true; |
3655 | break; |
3656 | |
3657 | default: |
3658 | gcc_unreachable (); |
3659 | } |
3660 | } |
3661 | else |
3662 | return TYPE_ALIGN (type) >= 128; |
3663 | |
3664 | return false; |
3665 | } |
3666 | |
3667 | /* Gives the alignment boundary, in bits, of an argument with the |
3668 | specified mode and type. */ |
3669 | |
3670 | static unsigned int |
3671 | ix86_function_arg_boundary (machine_mode mode, const_tree type) |
3672 | { |
3673 | unsigned int align; |
3674 | if (type) |
3675 | { |
3676 | /* Since the main variant type is used for call, we convert it to |
3677 | the main variant type. */ |
3678 | type = TYPE_MAIN_VARIANT (type); |
3679 | align = TYPE_ALIGN (type); |
3680 | if (TYPE_EMPTY_P (type)) |
3681 | return PARM_BOUNDARY; |
3682 | } |
3683 | else |
3684 | align = GET_MODE_ALIGNMENT (mode); |
3685 | if (align < PARM_BOUNDARY) |
3686 | align = PARM_BOUNDARY; |
3687 | else |
3688 | { |
3689 | static bool warned; |
3690 | unsigned int saved_align = align; |
3691 | |
3692 | if (!TARGET_64BIT) |
3693 | { |
3694 | /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ |
3695 | if (!type) |
3696 | { |
3697 | if (mode == XFmode || mode == XCmode) |
3698 | align = PARM_BOUNDARY; |
3699 | } |
3700 | else if (!ix86_contains_aligned_value_p (type)) |
3701 | align = PARM_BOUNDARY; |
3702 | |
3703 | if (align < 128) |
3704 | align = PARM_BOUNDARY; |
3705 | } |
3706 | |
3707 | if (warn_psabi |
3708 | && !warned |
3709 | && align != ix86_compat_function_arg_boundary (mode, type, |
3710 | align: saved_align)) |
3711 | { |
3712 | warned = true; |
3713 | inform (input_location, |
3714 | "the ABI for passing parameters with %d-byte" |
3715 | " alignment has changed in GCC 4.6" , |
3716 | align / BITS_PER_UNIT); |
3717 | } |
3718 | } |
3719 | |
3720 | return align; |
3721 | } |
3722 | |
3723 | /* Return true if N is a possible register number of function value. */ |
3724 | |
3725 | static bool |
3726 | ix86_function_value_regno_p (const unsigned int regno) |
3727 | { |
3728 | switch (regno) |
3729 | { |
3730 | case AX_REG: |
3731 | return true; |
3732 | case DX_REG: |
3733 | return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); |
3734 | case DI_REG: |
3735 | case SI_REG: |
3736 | return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; |
3737 | |
3738 | /* Complex values are returned in %st(0)/%st(1) pair. */ |
3739 | case ST0_REG: |
3740 | case ST1_REG: |
3741 | /* TODO: The function should depend on current function ABI but |
3742 | builtins.cc would need updating then. Therefore we use the |
3743 | default ABI. */ |
3744 | if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) |
3745 | return false; |
3746 | return TARGET_FLOAT_RETURNS_IN_80387; |
3747 | |
3748 | /* Complex values are returned in %xmm0/%xmm1 pair. */ |
3749 | case XMM0_REG: |
3750 | case XMM1_REG: |
3751 | return TARGET_SSE; |
3752 | |
3753 | case MM0_REG: |
3754 | if (TARGET_MACHO || TARGET_64BIT) |
3755 | return false; |
3756 | return TARGET_MMX; |
3757 | } |
3758 | |
3759 | return false; |
3760 | } |
3761 | |
3762 | /* Check whether the register REGNO should be zeroed on X86. |
3763 | When ALL_SSE_ZEROED is true, all SSE registers have been zeroed |
3764 | together, no need to zero it again. |
3765 | When NEED_ZERO_MMX is true, MMX registers should be cleared. */ |
3766 | |
3767 | static bool |
3768 | zero_call_used_regno_p (const unsigned int regno, |
3769 | bool all_sse_zeroed, |
3770 | bool need_zero_mmx) |
3771 | { |
3772 | return GENERAL_REGNO_P (regno) |
3773 | || (!all_sse_zeroed && SSE_REGNO_P (regno)) |
3774 | || MASK_REGNO_P (regno) |
3775 | || (need_zero_mmx && MMX_REGNO_P (regno)); |
3776 | } |
3777 | |
3778 | /* Return the machine_mode that is used to zero register REGNO. */ |
3779 | |
3780 | static machine_mode |
3781 | zero_call_used_regno_mode (const unsigned int regno) |
3782 | { |
3783 | /* NB: We only need to zero the lower 32 bits for integer registers |
3784 | and the lower 128 bits for vector registers since destination are |
3785 | zero-extended to the full register width. */ |
3786 | if (GENERAL_REGNO_P (regno)) |
3787 | return SImode; |
3788 | else if (SSE_REGNO_P (regno)) |
3789 | return V4SFmode; |
3790 | else if (MASK_REGNO_P (regno)) |
3791 | return HImode; |
3792 | else if (MMX_REGNO_P (regno)) |
3793 | return V2SImode; |
3794 | else |
3795 | gcc_unreachable (); |
3796 | } |
3797 | |
3798 | /* Generate a rtx to zero all vector registers together if possible, |
3799 | otherwise, return NULL. */ |
3800 | |
3801 | static rtx |
3802 | zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs) |
3803 | { |
3804 | if (!TARGET_AVX) |
3805 | return NULL; |
3806 | |
3807 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3808 | if ((LEGACY_SSE_REGNO_P (regno) |
3809 | || (TARGET_64BIT |
3810 | && (REX_SSE_REGNO_P (regno) |
3811 | || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno))))) |
3812 | && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3813 | return NULL; |
3814 | |
3815 | return gen_avx_vzeroall (); |
3816 | } |
3817 | |
3818 | /* Generate insns to zero all st registers together. |
3819 | Return true when zeroing instructions are generated. |
3820 | Assume the number of st registers that are zeroed is num_of_st, |
3821 | we will emit the following sequence to zero them together: |
3822 | fldz; \ |
3823 | fldz; \ |
3824 | ... |
3825 | fldz; \ |
3826 | fstp %%st(0); \ |
3827 | fstp %%st(0); \ |
3828 | ... |
3829 | fstp %%st(0); |
3830 | i.e., num_of_st fldz followed by num_of_st fstp to clear the stack |
3831 | mark stack slots empty. |
3832 | |
3833 | How to compute the num_of_st: |
3834 | There is no direct mapping from stack registers to hard register |
3835 | numbers. If one stack register needs to be cleared, we don't know |
3836 | where in the stack the value remains. So, if any stack register |
3837 | needs to be cleared, the whole stack should be cleared. However, |
3838 | x87 stack registers that hold the return value should be excluded. |
3839 | x87 returns in the top (two for complex values) register, so |
3840 | num_of_st should be 7/6 when x87 returns, otherwise it will be 8. |
3841 | return the value of num_of_st. */ |
3842 | |
3843 | |
3844 | static int |
3845 | zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs) |
3846 | { |
3847 | |
3848 | /* If the FPU is disabled, no need to zero all st registers. */ |
3849 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
3850 | return 0; |
3851 | |
3852 | unsigned int num_of_st = 0; |
3853 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3854 | if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno)) |
3855 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3856 | { |
3857 | num_of_st++; |
3858 | break; |
3859 | } |
3860 | |
3861 | if (num_of_st == 0) |
3862 | return 0; |
3863 | |
3864 | bool return_with_x87 = false; |
3865 | return_with_x87 = (crtl->return_rtx |
3866 | && (STACK_REG_P (crtl->return_rtx))); |
3867 | |
3868 | bool complex_return = false; |
3869 | complex_return = (crtl->return_rtx |
3870 | && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx))); |
3871 | |
3872 | if (return_with_x87) |
3873 | if (complex_return) |
3874 | num_of_st = 6; |
3875 | else |
3876 | num_of_st = 7; |
3877 | else |
3878 | num_of_st = 8; |
3879 | |
3880 | rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG); |
3881 | for (unsigned int i = 0; i < num_of_st; i++) |
3882 | emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode))); |
3883 | |
3884 | for (unsigned int i = 0; i < num_of_st; i++) |
3885 | { |
3886 | rtx insn; |
3887 | insn = emit_insn (gen_rtx_SET (st_reg, st_reg)); |
3888 | add_reg_note (insn, REG_DEAD, st_reg); |
3889 | } |
3890 | return num_of_st; |
3891 | } |
3892 | |
3893 | |
3894 | /* When the routine exit in MMX mode, if any ST register needs |
3895 | to be zeroed, we should clear all MMX registers except the |
3896 | RET_MMX_REGNO that holds the return value. */ |
3897 | static bool |
3898 | zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs, |
3899 | unsigned int ret_mmx_regno) |
3900 | { |
3901 | bool need_zero_all_mm = false; |
3902 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3903 | if (STACK_REGNO_P (regno) |
3904 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3905 | { |
3906 | need_zero_all_mm = true; |
3907 | break; |
3908 | } |
3909 | |
3910 | if (!need_zero_all_mm) |
3911 | return false; |
3912 | |
3913 | machine_mode mode = V2SImode; |
3914 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
3915 | if (regno != ret_mmx_regno) |
3916 | { |
3917 | rtx reg = gen_rtx_REG (mode, regno); |
3918 | emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode))); |
3919 | } |
3920 | return true; |
3921 | } |
3922 | |
3923 | /* TARGET_ZERO_CALL_USED_REGS. */ |
3924 | /* Generate a sequence of instructions that zero registers specified by |
3925 | NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually |
3926 | zeroed. */ |
3927 | static HARD_REG_SET |
3928 | ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) |
3929 | { |
3930 | HARD_REG_SET zeroed_hardregs; |
3931 | bool all_sse_zeroed = false; |
3932 | int all_st_zeroed_num = 0; |
3933 | bool all_mm_zeroed = false; |
3934 | |
3935 | CLEAR_HARD_REG_SET (set&: zeroed_hardregs); |
3936 | |
3937 | /* first, let's see whether we can zero all vector registers together. */ |
3938 | rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs); |
3939 | if (zero_all_vec_insn) |
3940 | { |
3941 | emit_insn (zero_all_vec_insn); |
3942 | all_sse_zeroed = true; |
3943 | } |
3944 | |
3945 | /* mm/st registers are shared registers set, we should follow the following |
3946 | rules to clear them: |
3947 | MMX exit mode x87 exit mode |
3948 | -------------|----------------------|--------------- |
3949 | uses x87 reg | clear all MMX | clear all x87 |
3950 | uses MMX reg | clear individual MMX | clear all x87 |
3951 | x87 + MMX | clear all MMX | clear all x87 |
3952 | |
3953 | first, we should decide which mode (MMX mode or x87 mode) the function |
3954 | exit with. */ |
3955 | |
3956 | bool exit_with_mmx_mode = (crtl->return_rtx |
3957 | && (MMX_REG_P (crtl->return_rtx))); |
3958 | |
3959 | if (!exit_with_mmx_mode) |
3960 | /* x87 exit mode, we should zero all st registers together. */ |
3961 | { |
3962 | all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs); |
3963 | |
3964 | if (all_st_zeroed_num > 0) |
3965 | for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++) |
3966 | /* x87 stack registers that hold the return value should be excluded. |
3967 | x87 returns in the top (two for complex values) register. */ |
3968 | if (all_st_zeroed_num == 8 |
3969 | || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx)) |
3970 | || (all_st_zeroed_num == 6 |
3971 | && (regno == (REGNO (crtl->return_rtx) + 1))))) |
3972 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
3973 | } |
3974 | else |
3975 | /* MMX exit mode, check whether we can zero all mm registers. */ |
3976 | { |
3977 | unsigned int exit_mmx_regno = REGNO (crtl->return_rtx); |
3978 | all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs, |
3979 | ret_mmx_regno: exit_mmx_regno); |
3980 | if (all_mm_zeroed) |
3981 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
3982 | if (regno != exit_mmx_regno) |
3983 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
3984 | } |
3985 | |
3986 | /* Now, generate instructions to zero all the other registers. */ |
3987 | |
3988 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3989 | { |
3990 | if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3991 | continue; |
3992 | if (!zero_call_used_regno_p (regno, all_sse_zeroed, |
3993 | need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed)) |
3994 | continue; |
3995 | |
3996 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
3997 | |
3998 | machine_mode mode = zero_call_used_regno_mode (regno); |
3999 | |
4000 | rtx reg = gen_rtx_REG (mode, regno); |
4001 | rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode)); |
4002 | |
4003 | switch (mode) |
4004 | { |
4005 | case E_SImode: |
4006 | if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) |
4007 | { |
4008 | rtx clob = gen_rtx_CLOBBER (VOIDmode, |
4009 | gen_rtx_REG (CCmode, |
4010 | FLAGS_REG)); |
4011 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, |
4012 | tmp, |
4013 | clob)); |
4014 | } |
4015 | /* FALLTHRU. */ |
4016 | |
4017 | case E_V4SFmode: |
4018 | case E_HImode: |
4019 | case E_V2SImode: |
4020 | emit_insn (tmp); |
4021 | break; |
4022 | |
4023 | default: |
4024 | gcc_unreachable (); |
4025 | } |
4026 | } |
4027 | return zeroed_hardregs; |
4028 | } |
4029 | |
4030 | /* Define how to find the value returned by a function. |
4031 | VALTYPE is the data type of the value (as a tree). |
4032 | If the precise function being called is known, FUNC is its FUNCTION_DECL; |
4033 | otherwise, FUNC is 0. */ |
4034 | |
4035 | static rtx |
4036 | function_value_32 (machine_mode orig_mode, machine_mode mode, |
4037 | const_tree fntype, const_tree fn) |
4038 | { |
4039 | unsigned int regno; |
4040 | |
4041 | /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where |
4042 | we normally prevent this case when mmx is not available. However |
4043 | some ABIs may require the result to be returned like DImode. */ |
4044 | if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) |
4045 | regno = FIRST_MMX_REG; |
4046 | |
4047 | /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where |
4048 | we prevent this case when sse is not available. However some ABIs |
4049 | may require the result to be returned like integer TImode. */ |
4050 | else if (mode == TImode |
4051 | || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) |
4052 | regno = FIRST_SSE_REG; |
4053 | |
4054 | /* 32-byte vector modes in %ymm0. */ |
4055 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) |
4056 | regno = FIRST_SSE_REG; |
4057 | |
4058 | /* 64-byte vector modes in %zmm0. */ |
4059 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) |
4060 | regno = FIRST_SSE_REG; |
4061 | |
4062 | /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ |
4063 | else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) |
4064 | regno = FIRST_FLOAT_REG; |
4065 | else |
4066 | /* Most things go in %eax. */ |
4067 | regno = AX_REG; |
4068 | |
4069 | /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */ |
4070 | if (mode == HFmode || mode == BFmode) |
4071 | { |
4072 | if (!TARGET_SSE2) |
4073 | { |
4074 | error ("SSE register return with SSE2 disabled" ); |
4075 | regno = AX_REG; |
4076 | } |
4077 | else |
4078 | regno = FIRST_SSE_REG; |
4079 | } |
4080 | |
4081 | if (mode == HCmode) |
4082 | { |
4083 | if (!TARGET_SSE2) |
4084 | error ("SSE register return with SSE2 disabled" ); |
4085 | |
4086 | rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1)); |
4087 | XVECEXP (ret, 0, 0) |
4088 | = gen_rtx_EXPR_LIST (VOIDmode, |
4089 | gen_rtx_REG (SImode, |
4090 | TARGET_SSE2 ? FIRST_SSE_REG : AX_REG), |
4091 | GEN_INT (0)); |
4092 | return ret; |
4093 | } |
4094 | |
4095 | /* Override FP return register with %xmm0 for local functions when |
4096 | SSE math is enabled or for functions with sseregparm attribute. */ |
4097 | if ((fn || fntype) && (mode == SFmode || mode == DFmode)) |
4098 | { |
4099 | int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false); |
4100 | if (sse_level == -1) |
4101 | { |
4102 | error ("calling %qD with SSE calling convention without " |
4103 | "SSE/SSE2 enabled" , fn); |
4104 | sorry ("this is a GCC bug that can be worked around by adding " |
4105 | "attribute used to function called" ); |
4106 | } |
4107 | else if ((sse_level >= 1 && mode == SFmode) |
4108 | || (sse_level == 2 && mode == DFmode)) |
4109 | regno = FIRST_SSE_REG; |
4110 | } |
4111 | |
4112 | /* OImode shouldn't be used directly. */ |
4113 | gcc_assert (mode != OImode); |
4114 | |
4115 | return gen_rtx_REG (orig_mode, regno); |
4116 | } |
4117 | |
4118 | static rtx |
4119 | function_value_64 (machine_mode orig_mode, machine_mode mode, |
4120 | const_tree valtype) |
4121 | { |
4122 | rtx ret; |
4123 | |
4124 | /* Handle libcalls, which don't provide a type node. */ |
4125 | if (valtype == NULL) |
4126 | { |
4127 | unsigned int regno; |
4128 | |
4129 | switch (mode) |
4130 | { |
4131 | case E_BFmode: |
4132 | case E_HFmode: |
4133 | case E_HCmode: |
4134 | case E_SFmode: |
4135 | case E_SCmode: |
4136 | case E_DFmode: |
4137 | case E_DCmode: |
4138 | case E_TFmode: |
4139 | case E_SDmode: |
4140 | case E_DDmode: |
4141 | case E_TDmode: |
4142 | regno = FIRST_SSE_REG; |
4143 | break; |
4144 | case E_XFmode: |
4145 | case E_XCmode: |
4146 | regno = FIRST_FLOAT_REG; |
4147 | break; |
4148 | case E_TCmode: |
4149 | return NULL; |
4150 | default: |
4151 | regno = AX_REG; |
4152 | } |
4153 | |
4154 | return gen_rtx_REG (mode, regno); |
4155 | } |
4156 | else if (POINTER_TYPE_P (valtype)) |
4157 | { |
4158 | /* Pointers are always returned in word_mode. */ |
4159 | mode = word_mode; |
4160 | } |
4161 | |
4162 | ret = construct_container (mode, orig_mode, type: valtype, in_return: 1, |
4163 | X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, |
4164 | intreg: x86_64_int_return_registers, sse_regno: 0); |
4165 | |
4166 | /* For zero sized structures, construct_container returns NULL, but we |
4167 | need to keep rest of compiler happy by returning meaningful value. */ |
4168 | if (!ret) |
4169 | ret = gen_rtx_REG (orig_mode, AX_REG); |
4170 | |
4171 | return ret; |
4172 | } |
4173 | |
4174 | static rtx |
4175 | function_value_ms_32 (machine_mode orig_mode, machine_mode mode, |
4176 | const_tree fntype, const_tree fn, const_tree valtype) |
4177 | { |
4178 | unsigned int regno; |
4179 | |
4180 | /* Floating point return values in %st(0) |
4181 | (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ |
4182 | if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 |
4183 | && (GET_MODE_SIZE (mode) > 8 |
4184 | || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) |
4185 | { |
4186 | regno = FIRST_FLOAT_REG; |
4187 | return gen_rtx_REG (orig_mode, regno); |
4188 | } |
4189 | else |
4190 | return function_value_32(orig_mode, mode, fntype,fn); |
4191 | } |
4192 | |
4193 | static rtx |
4194 | function_value_ms_64 (machine_mode orig_mode, machine_mode mode, |
4195 | const_tree valtype) |
4196 | { |
4197 | unsigned int regno = AX_REG; |
4198 | |
4199 | if (TARGET_SSE) |
4200 | { |
4201 | switch (GET_MODE_SIZE (mode)) |
4202 | { |
4203 | case 16: |
4204 | if (valtype != NULL_TREE |
4205 | && !VECTOR_INTEGER_TYPE_P (valtype) |
4206 | && !VECTOR_INTEGER_TYPE_P (valtype) |
4207 | && !INTEGRAL_TYPE_P (valtype) |
4208 | && !VECTOR_FLOAT_TYPE_P (valtype)) |
4209 | break; |
4210 | if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
4211 | && !COMPLEX_MODE_P (mode)) |
4212 | regno = FIRST_SSE_REG; |
4213 | break; |
4214 | case 8: |
4215 | case 4: |
4216 | if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) |
4217 | break; |
4218 | if (mode == SFmode || mode == DFmode) |
4219 | regno = FIRST_SSE_REG; |
4220 | break; |
4221 | default: |
4222 | break; |
4223 | } |
4224 | } |
4225 | return gen_rtx_REG (orig_mode, regno); |
4226 | } |
4227 | |
4228 | static rtx |
4229 | ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, |
4230 | machine_mode orig_mode, machine_mode mode) |
4231 | { |
4232 | const_tree fn, fntype; |
4233 | |
4234 | fn = NULL_TREE; |
4235 | if (fntype_or_decl && DECL_P (fntype_or_decl)) |
4236 | fn = fntype_or_decl; |
4237 | fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; |
4238 | |
4239 | if (ix86_function_type_abi (fntype) == MS_ABI) |
4240 | { |
4241 | if (TARGET_64BIT) |
4242 | return function_value_ms_64 (orig_mode, mode, valtype); |
4243 | else |
4244 | return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); |
4245 | } |
4246 | else if (TARGET_64BIT) |
4247 | return function_value_64 (orig_mode, mode, valtype); |
4248 | else |
4249 | return function_value_32 (orig_mode, mode, fntype, fn); |
4250 | } |
4251 | |
4252 | static rtx |
4253 | ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) |
4254 | { |
4255 | machine_mode mode, orig_mode; |
4256 | |
4257 | orig_mode = TYPE_MODE (valtype); |
4258 | mode = type_natural_mode (type: valtype, NULL, in_return: true); |
4259 | return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); |
4260 | } |
4261 | |
4262 | /* Pointer function arguments and return values are promoted to |
4263 | word_mode for normal functions. */ |
4264 | |
4265 | static machine_mode |
4266 | ix86_promote_function_mode (const_tree type, machine_mode mode, |
4267 | int *punsignedp, const_tree fntype, |
4268 | int for_return) |
4269 | { |
4270 | if (cfun->machine->func_type == TYPE_NORMAL |
4271 | && type != NULL_TREE |
4272 | && POINTER_TYPE_P (type)) |
4273 | { |
4274 | *punsignedp = POINTERS_EXTEND_UNSIGNED; |
4275 | return word_mode; |
4276 | } |
4277 | return default_promote_function_mode (type, mode, punsignedp, fntype, |
4278 | for_return); |
4279 | } |
4280 | |
4281 | /* Return true if a structure, union or array with MODE containing FIELD |
4282 | should be accessed using BLKmode. */ |
4283 | |
4284 | static bool |
4285 | ix86_member_type_forces_blk (const_tree field, machine_mode mode) |
4286 | { |
4287 | /* Union with XFmode must be in BLKmode. */ |
4288 | return (mode == XFmode |
4289 | && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE |
4290 | || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); |
4291 | } |
4292 | |
4293 | rtx |
4294 | ix86_libcall_value (machine_mode mode) |
4295 | { |
4296 | return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode); |
4297 | } |
4298 | |
4299 | /* Return true iff type is returned in memory. */ |
4300 | |
4301 | static bool |
4302 | ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
4303 | { |
4304 | const machine_mode mode = type_natural_mode (type, NULL, in_return: true); |
4305 | HOST_WIDE_INT size; |
4306 | |
4307 | if (TARGET_64BIT) |
4308 | { |
4309 | if (ix86_function_type_abi (fntype) == MS_ABI) |
4310 | { |
4311 | size = int_size_in_bytes (type); |
4312 | |
4313 | /* __m128 is returned in xmm0. */ |
4314 | if ((!type || VECTOR_INTEGER_TYPE_P (type) |
4315 | || INTEGRAL_TYPE_P (type) |
4316 | || VECTOR_FLOAT_TYPE_P (type)) |
4317 | && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
4318 | && !COMPLEX_MODE_P (mode) |
4319 | && (GET_MODE_SIZE (mode) == 16 || size == 16)) |
4320 | return false; |
4321 | |
4322 | /* Otherwise, the size must be exactly in [1248]. */ |
4323 | return size != 1 && size != 2 && size != 4 && size != 8; |
4324 | } |
4325 | else |
4326 | { |
4327 | int needed_intregs, needed_sseregs; |
4328 | |
4329 | return examine_argument (mode, type, in_return: 1, |
4330 | int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
4331 | } |
4332 | } |
4333 | else |
4334 | { |
4335 | size = int_size_in_bytes (type); |
4336 | |
4337 | /* Intel MCU psABI returns scalars and aggregates no larger than 8 |
4338 | bytes in registers. */ |
4339 | if (TARGET_IAMCU) |
4340 | return VECTOR_MODE_P (mode) || size < 0 || size > 8; |
4341 | |
4342 | if (mode == BLKmode) |
4343 | return true; |
4344 | |
4345 | if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) |
4346 | return false; |
4347 | |
4348 | if (VECTOR_MODE_P (mode) || mode == TImode) |
4349 | { |
4350 | /* User-created vectors small enough to fit in EAX. */ |
4351 | if (size < 8) |
4352 | return false; |
4353 | |
4354 | /* Unless ABI prescibes otherwise, |
4355 | MMX/3dNow values are returned in MM0 if available. */ |
4356 | |
4357 | if (size == 8) |
4358 | return TARGET_VECT8_RETURNS || !TARGET_MMX; |
4359 | |
4360 | /* SSE values are returned in XMM0 if available. */ |
4361 | if (size == 16) |
4362 | return !TARGET_SSE; |
4363 | |
4364 | /* AVX values are returned in YMM0 if available. */ |
4365 | if (size == 32) |
4366 | return !TARGET_AVX; |
4367 | |
4368 | /* AVX512F values are returned in ZMM0 if available. */ |
4369 | if (size == 64) |
4370 | return !TARGET_AVX512F || !TARGET_EVEX512; |
4371 | } |
4372 | |
4373 | if (mode == XFmode) |
4374 | return false; |
4375 | |
4376 | if (size > 12) |
4377 | return true; |
4378 | |
4379 | /* OImode shouldn't be used directly. */ |
4380 | gcc_assert (mode != OImode); |
4381 | |
4382 | return false; |
4383 | } |
4384 | } |
4385 | |
4386 | /* Implement TARGET_PUSH_ARGUMENT. */ |
4387 | |
4388 | static bool |
4389 | ix86_push_argument (unsigned int npush) |
4390 | { |
4391 | /* If SSE2 is available, use vector move to put large argument onto |
4392 | stack. NB: In 32-bit mode, use 8-byte vector move. */ |
4393 | return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8)) |
4394 | && TARGET_PUSH_ARGS |
4395 | && !ACCUMULATE_OUTGOING_ARGS); |
4396 | } |
4397 | |
4398 | |
4399 | /* Create the va_list data type. */ |
4400 | |
4401 | static tree |
4402 | ix86_build_builtin_va_list_64 (void) |
4403 | { |
4404 | tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; |
4405 | |
4406 | record = lang_hooks.types.make_type (RECORD_TYPE); |
4407 | type_decl = build_decl (BUILTINS_LOCATION, |
4408 | TYPE_DECL, get_identifier ("__va_list_tag" ), record); |
4409 | |
4410 | f_gpr = build_decl (BUILTINS_LOCATION, |
4411 | FIELD_DECL, get_identifier ("gp_offset" ), |
4412 | unsigned_type_node); |
4413 | f_fpr = build_decl (BUILTINS_LOCATION, |
4414 | FIELD_DECL, get_identifier ("fp_offset" ), |
4415 | unsigned_type_node); |
4416 | f_ovf = build_decl (BUILTINS_LOCATION, |
4417 | FIELD_DECL, get_identifier ("overflow_arg_area" ), |
4418 | ptr_type_node); |
4419 | f_sav = build_decl (BUILTINS_LOCATION, |
4420 | FIELD_DECL, get_identifier ("reg_save_area" ), |
4421 | ptr_type_node); |
4422 | |
4423 | va_list_gpr_counter_field = f_gpr; |
4424 | va_list_fpr_counter_field = f_fpr; |
4425 | |
4426 | DECL_FIELD_CONTEXT (f_gpr) = record; |
4427 | DECL_FIELD_CONTEXT (f_fpr) = record; |
4428 | DECL_FIELD_CONTEXT (f_ovf) = record; |
4429 | DECL_FIELD_CONTEXT (f_sav) = record; |
4430 | |
4431 | TYPE_STUB_DECL (record) = type_decl; |
4432 | TYPE_NAME (record) = type_decl; |
4433 | TYPE_FIELDS (record) = f_gpr; |
4434 | DECL_CHAIN (f_gpr) = f_fpr; |
4435 | DECL_CHAIN (f_fpr) = f_ovf; |
4436 | DECL_CHAIN (f_ovf) = f_sav; |
4437 | |
4438 | layout_type (record); |
4439 | |
4440 | TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list" ), |
4441 | NULL_TREE, TYPE_ATTRIBUTES (record)); |
4442 | |
4443 | /* The correct type is an array type of one element. */ |
4444 | return build_array_type (record, build_index_type (size_zero_node)); |
4445 | } |
4446 | |
4447 | /* Setup the builtin va_list data type and for 64-bit the additional |
4448 | calling convention specific va_list data types. */ |
4449 | |
4450 | static tree |
4451 | ix86_build_builtin_va_list (void) |
4452 | { |
4453 | if (TARGET_64BIT) |
4454 | { |
4455 | /* Initialize ABI specific va_list builtin types. |
4456 | |
4457 | In lto1, we can encounter two va_list types: |
4458 | - one as a result of the type-merge across TUs, and |
4459 | - the one constructed here. |
4460 | These two types will not have the same TYPE_MAIN_VARIANT, and therefore |
4461 | a type identity check in canonical_va_list_type based on |
4462 | TYPE_MAIN_VARIANT (which we used to have) will not work. |
4463 | Instead, we tag each va_list_type_node with its unique attribute, and |
4464 | look for the attribute in the type identity check in |
4465 | canonical_va_list_type. |
4466 | |
4467 | Tagging sysv_va_list_type_node directly with the attribute is |
4468 | problematic since it's a array of one record, which will degrade into a |
4469 | pointer to record when used as parameter (see build_va_arg comments for |
4470 | an example), dropping the attribute in the process. So we tag the |
4471 | record instead. */ |
4472 | |
4473 | /* For SYSV_ABI we use an array of one record. */ |
4474 | sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); |
4475 | |
4476 | /* For MS_ABI we use plain pointer to argument area. */ |
4477 | tree char_ptr_type = build_pointer_type (char_type_node); |
4478 | tree attr = tree_cons (get_identifier ("ms_abi va_list" ), NULL_TREE, |
4479 | TYPE_ATTRIBUTES (char_ptr_type)); |
4480 | ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); |
4481 | |
4482 | return ((ix86_abi == MS_ABI) |
4483 | ? ms_va_list_type_node |
4484 | : sysv_va_list_type_node); |
4485 | } |
4486 | else |
4487 | { |
4488 | /* For i386 we use plain pointer to argument area. */ |
4489 | return build_pointer_type (char_type_node); |
4490 | } |
4491 | } |
4492 | |
4493 | /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ |
4494 | |
4495 | static void |
4496 | setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
4497 | { |
4498 | rtx save_area, mem; |
4499 | alias_set_type set; |
4500 | int i, max; |
4501 | |
4502 | /* GPR size of varargs save area. */ |
4503 | if (cfun->va_list_gpr_size) |
4504 | ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; |
4505 | else |
4506 | ix86_varargs_gpr_size = 0; |
4507 | |
4508 | /* FPR size of varargs save area. We don't need it if we don't pass |
4509 | anything in SSE registers. */ |
4510 | if (TARGET_SSE && cfun->va_list_fpr_size) |
4511 | ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; |
4512 | else |
4513 | ix86_varargs_fpr_size = 0; |
4514 | |
4515 | if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) |
4516 | return; |
4517 | |
4518 | save_area = frame_pointer_rtx; |
4519 | set = get_varargs_alias_set (); |
4520 | |
4521 | max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; |
4522 | if (max > X86_64_REGPARM_MAX) |
4523 | max = X86_64_REGPARM_MAX; |
4524 | |
4525 | for (i = cum->regno; i < max; i++) |
4526 | { |
4527 | mem = gen_rtx_MEM (word_mode, |
4528 | plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); |
4529 | MEM_NOTRAP_P (mem) = 1; |
4530 | set_mem_alias_set (mem, set); |
4531 | emit_move_insn (mem, |
4532 | gen_rtx_REG (word_mode, |
4533 | x86_64_int_parameter_registers[i])); |
4534 | } |
4535 | |
4536 | if (ix86_varargs_fpr_size) |
4537 | { |
4538 | machine_mode smode; |
4539 | rtx_code_label *label; |
4540 | rtx test; |
4541 | |
4542 | /* Now emit code to save SSE registers. The AX parameter contains number |
4543 | of SSE parameter registers used to call this function, though all we |
4544 | actually check here is the zero/non-zero status. */ |
4545 | |
4546 | label = gen_label_rtx (); |
4547 | test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); |
4548 | emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), |
4549 | label)); |
4550 | |
4551 | /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if |
4552 | we used movdqa (i.e. TImode) instead? Perhaps even better would |
4553 | be if we could determine the real mode of the data, via a hook |
4554 | into pass_stdarg. Ignore all that for now. */ |
4555 | smode = V4SFmode; |
4556 | if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) |
4557 | crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); |
4558 | |
4559 | max = cum->sse_regno + cfun->va_list_fpr_size / 16; |
4560 | if (max > X86_64_SSE_REGPARM_MAX) |
4561 | max = X86_64_SSE_REGPARM_MAX; |
4562 | |
4563 | for (i = cum->sse_regno; i < max; ++i) |
4564 | { |
4565 | mem = plus_constant (Pmode, save_area, |
4566 | i * 16 + ix86_varargs_gpr_size); |
4567 | mem = gen_rtx_MEM (smode, mem); |
4568 | MEM_NOTRAP_P (mem) = 1; |
4569 | set_mem_alias_set (mem, set); |
4570 | set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); |
4571 | |
4572 | emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); |
4573 | } |
4574 | |
4575 | emit_label (label); |
4576 | } |
4577 | } |
4578 | |
4579 | static void |
4580 | setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) |
4581 | { |
4582 | alias_set_type set = get_varargs_alias_set (); |
4583 | int i; |
4584 | |
4585 | /* Reset to zero, as there might be a sysv vaarg used |
4586 | before. */ |
4587 | ix86_varargs_gpr_size = 0; |
4588 | ix86_varargs_fpr_size = 0; |
4589 | |
4590 | for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) |
4591 | { |
4592 | rtx reg, mem; |
4593 | |
4594 | mem = gen_rtx_MEM (Pmode, |
4595 | plus_constant (Pmode, virtual_incoming_args_rtx, |
4596 | i * UNITS_PER_WORD)); |
4597 | MEM_NOTRAP_P (mem) = 1; |
4598 | set_mem_alias_set (mem, set); |
4599 | |
4600 | reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); |
4601 | emit_move_insn (mem, reg); |
4602 | } |
4603 | } |
4604 | |
4605 | static void |
4606 | ix86_setup_incoming_varargs (cumulative_args_t cum_v, |
4607 | const function_arg_info &arg, |
4608 | int *, int no_rtl) |
4609 | { |
4610 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
4611 | CUMULATIVE_ARGS next_cum; |
4612 | tree fntype; |
4613 | |
4614 | /* This argument doesn't appear to be used anymore. Which is good, |
4615 | because the old code here didn't suppress rtl generation. */ |
4616 | gcc_assert (!no_rtl); |
4617 | |
4618 | if (!TARGET_64BIT) |
4619 | return; |
4620 | |
4621 | fntype = TREE_TYPE (current_function_decl); |
4622 | |
4623 | /* For varargs, we do not want to skip the dummy va_dcl argument. |
4624 | For stdargs, we do want to skip the last named argument. */ |
4625 | next_cum = *cum; |
4626 | if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) |
4627 | && stdarg_p (fntype)) |
4628 | ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg); |
4629 | |
4630 | if (cum->call_abi == MS_ABI) |
4631 | setup_incoming_varargs_ms_64 (&next_cum); |
4632 | else |
4633 | setup_incoming_varargs_64 (&next_cum); |
4634 | } |
4635 | |
4636 | /* Checks if TYPE is of kind va_list char *. */ |
4637 | |
4638 | static bool |
4639 | is_va_list_char_pointer (tree type) |
4640 | { |
4641 | tree canonic; |
4642 | |
4643 | /* For 32-bit it is always true. */ |
4644 | if (!TARGET_64BIT) |
4645 | return true; |
4646 | canonic = ix86_canonical_va_list_type (type); |
4647 | return (canonic == ms_va_list_type_node |
4648 | || (ix86_abi == MS_ABI && canonic == va_list_type_node)); |
4649 | } |
4650 | |
4651 | /* Implement va_start. */ |
4652 | |
4653 | static void |
4654 | ix86_va_start (tree valist, rtx nextarg) |
4655 | { |
4656 | HOST_WIDE_INT words, n_gpr, n_fpr; |
4657 | tree f_gpr, f_fpr, f_ovf, f_sav; |
4658 | tree gpr, fpr, ovf, sav, t; |
4659 | tree type; |
4660 | rtx ovf_rtx; |
4661 | |
4662 | if (flag_split_stack |
4663 | && cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4664 | { |
4665 | unsigned int scratch_regno; |
4666 | |
4667 | /* When we are splitting the stack, we can't refer to the stack |
4668 | arguments using internal_arg_pointer, because they may be on |
4669 | the old stack. The split stack prologue will arrange to |
4670 | leave a pointer to the old stack arguments in a scratch |
4671 | register, which we here copy to a pseudo-register. The split |
4672 | stack prologue can't set the pseudo-register directly because |
4673 | it (the prologue) runs before any registers have been saved. */ |
4674 | |
4675 | scratch_regno = split_stack_prologue_scratch_regno (); |
4676 | if (scratch_regno != INVALID_REGNUM) |
4677 | { |
4678 | rtx reg; |
4679 | rtx_insn *seq; |
4680 | |
4681 | reg = gen_reg_rtx (Pmode); |
4682 | cfun->machine->split_stack_varargs_pointer = reg; |
4683 | |
4684 | start_sequence (); |
4685 | emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); |
4686 | seq = get_insns (); |
4687 | end_sequence (); |
4688 | |
4689 | push_topmost_sequence (); |
4690 | emit_insn_after (seq, entry_of_function ()); |
4691 | pop_topmost_sequence (); |
4692 | } |
4693 | } |
4694 | |
4695 | /* Only 64bit target needs something special. */ |
4696 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
4697 | { |
4698 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4699 | std_expand_builtin_va_start (valist, nextarg); |
4700 | else |
4701 | { |
4702 | rtx va_r, next; |
4703 | |
4704 | va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE); |
4705 | next = expand_binop (ptr_mode, add_optab, |
4706 | cfun->machine->split_stack_varargs_pointer, |
4707 | crtl->args.arg_offset_rtx, |
4708 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4709 | convert_move (va_r, next, 0); |
4710 | } |
4711 | return; |
4712 | } |
4713 | |
4714 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
4715 | f_fpr = DECL_CHAIN (f_gpr); |
4716 | f_ovf = DECL_CHAIN (f_fpr); |
4717 | f_sav = DECL_CHAIN (f_ovf); |
4718 | |
4719 | valist = build_simple_mem_ref (valist); |
4720 | TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); |
4721 | /* The following should be folded into the MEM_REF offset. */ |
4722 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), |
4723 | f_gpr, NULL_TREE); |
4724 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), |
4725 | f_fpr, NULL_TREE); |
4726 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), |
4727 | f_ovf, NULL_TREE); |
4728 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), |
4729 | f_sav, NULL_TREE); |
4730 | |
4731 | /* Count number of gp and fp argument registers used. */ |
4732 | words = crtl->args.info.words; |
4733 | n_gpr = crtl->args.info.regno; |
4734 | n_fpr = crtl->args.info.sse_regno; |
4735 | |
4736 | if (cfun->va_list_gpr_size) |
4737 | { |
4738 | type = TREE_TYPE (gpr); |
4739 | t = build2 (MODIFY_EXPR, type, |
4740 | gpr, build_int_cst (type, n_gpr * 8)); |
4741 | TREE_SIDE_EFFECTS (t) = 1; |
4742 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4743 | } |
4744 | |
4745 | if (TARGET_SSE && cfun->va_list_fpr_size) |
4746 | { |
4747 | type = TREE_TYPE (fpr); |
4748 | t = build2 (MODIFY_EXPR, type, fpr, |
4749 | build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); |
4750 | TREE_SIDE_EFFECTS (t) = 1; |
4751 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4752 | } |
4753 | |
4754 | /* Find the overflow area. */ |
4755 | type = TREE_TYPE (ovf); |
4756 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4757 | ovf_rtx = crtl->args.internal_arg_pointer; |
4758 | else |
4759 | ovf_rtx = cfun->machine->split_stack_varargs_pointer; |
4760 | t = make_tree (type, ovf_rtx); |
4761 | if (words != 0) |
4762 | t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); |
4763 | |
4764 | t = build2 (MODIFY_EXPR, type, ovf, t); |
4765 | TREE_SIDE_EFFECTS (t) = 1; |
4766 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4767 | |
4768 | if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) |
4769 | { |
4770 | /* Find the register save area. |
4771 | Prologue of the function save it right above stack frame. */ |
4772 | type = TREE_TYPE (sav); |
4773 | t = make_tree (type, frame_pointer_rtx); |
4774 | if (!ix86_varargs_gpr_size) |
4775 | t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); |
4776 | |
4777 | t = build2 (MODIFY_EXPR, type, sav, t); |
4778 | TREE_SIDE_EFFECTS (t) = 1; |
4779 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4780 | } |
4781 | } |
4782 | |
4783 | /* Implement va_arg. */ |
4784 | |
4785 | static tree |
4786 | ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
4787 | gimple_seq *post_p) |
4788 | { |
4789 | static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; |
4790 | tree f_gpr, f_fpr, f_ovf, f_sav; |
4791 | tree gpr, fpr, ovf, sav, t; |
4792 | int size, rsize; |
4793 | tree lab_false, lab_over = NULL_TREE; |
4794 | tree addr, t2; |
4795 | rtx container; |
4796 | int indirect_p = 0; |
4797 | tree ptrtype; |
4798 | machine_mode nat_mode; |
4799 | unsigned int arg_boundary; |
4800 | unsigned int type_align; |
4801 | |
4802 | /* Only 64bit target needs something special. */ |
4803 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
4804 | return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
4805 | |
4806 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
4807 | f_fpr = DECL_CHAIN (f_gpr); |
4808 | f_ovf = DECL_CHAIN (f_fpr); |
4809 | f_sav = DECL_CHAIN (f_ovf); |
4810 | |
4811 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), |
4812 | valist, f_gpr, NULL_TREE); |
4813 | |
4814 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); |
4815 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); |
4816 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); |
4817 | |
4818 | indirect_p = pass_va_arg_by_reference (type); |
4819 | if (indirect_p) |
4820 | type = build_pointer_type (type); |
4821 | size = arg_int_size_in_bytes (type); |
4822 | rsize = CEIL (size, UNITS_PER_WORD); |
4823 | |
4824 | nat_mode = type_natural_mode (type, NULL, in_return: false); |
4825 | switch (nat_mode) |
4826 | { |
4827 | case E_V16HFmode: |
4828 | case E_V16BFmode: |
4829 | case E_V8SFmode: |
4830 | case E_V8SImode: |
4831 | case E_V32QImode: |
4832 | case E_V16HImode: |
4833 | case E_V4DFmode: |
4834 | case E_V4DImode: |
4835 | case E_V32HFmode: |
4836 | case E_V32BFmode: |
4837 | case E_V16SFmode: |
4838 | case E_V16SImode: |
4839 | case E_V64QImode: |
4840 | case E_V32HImode: |
4841 | case E_V8DFmode: |
4842 | case E_V8DImode: |
4843 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
4844 | if (!TARGET_64BIT_MS_ABI) |
4845 | { |
4846 | container = NULL; |
4847 | break; |
4848 | } |
4849 | /* FALLTHRU */ |
4850 | |
4851 | default: |
4852 | container = construct_container (mode: nat_mode, TYPE_MODE (type), |
4853 | type, in_return: 0, X86_64_REGPARM_MAX, |
4854 | X86_64_SSE_REGPARM_MAX, intreg, |
4855 | sse_regno: 0); |
4856 | break; |
4857 | } |
4858 | |
4859 | /* Pull the value out of the saved registers. */ |
4860 | |
4861 | addr = create_tmp_var (ptr_type_node, "addr" ); |
4862 | type_align = TYPE_ALIGN (type); |
4863 | |
4864 | if (container) |
4865 | { |
4866 | int needed_intregs, needed_sseregs; |
4867 | bool need_temp; |
4868 | tree int_addr, sse_addr; |
4869 | |
4870 | lab_false = create_artificial_label (UNKNOWN_LOCATION); |
4871 | lab_over = create_artificial_label (UNKNOWN_LOCATION); |
4872 | |
4873 | examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
4874 | |
4875 | need_temp = (!REG_P (container) |
4876 | && ((needed_intregs && TYPE_ALIGN (type) > 64) |
4877 | || TYPE_ALIGN (type) > 128)); |
4878 | |
4879 | /* In case we are passing structure, verify that it is consecutive block |
4880 | on the register save area. If not we need to do moves. */ |
4881 | if (!need_temp && !REG_P (container)) |
4882 | { |
4883 | /* Verify that all registers are strictly consecutive */ |
4884 | if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) |
4885 | { |
4886 | int i; |
4887 | |
4888 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
4889 | { |
4890 | rtx slot = XVECEXP (container, 0, i); |
4891 | if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i |
4892 | || INTVAL (XEXP (slot, 1)) != i * 16) |
4893 | need_temp = true; |
4894 | } |
4895 | } |
4896 | else |
4897 | { |
4898 | int i; |
4899 | |
4900 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
4901 | { |
4902 | rtx slot = XVECEXP (container, 0, i); |
4903 | if (REGNO (XEXP (slot, 0)) != (unsigned int) i |
4904 | || INTVAL (XEXP (slot, 1)) != i * 8) |
4905 | need_temp = true; |
4906 | } |
4907 | } |
4908 | } |
4909 | if (!need_temp) |
4910 | { |
4911 | int_addr = addr; |
4912 | sse_addr = addr; |
4913 | } |
4914 | else |
4915 | { |
4916 | int_addr = create_tmp_var (ptr_type_node, "int_addr" ); |
4917 | sse_addr = create_tmp_var (ptr_type_node, "sse_addr" ); |
4918 | } |
4919 | |
4920 | /* First ensure that we fit completely in registers. */ |
4921 | if (needed_intregs) |
4922 | { |
4923 | t = build_int_cst (TREE_TYPE (gpr), |
4924 | (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); |
4925 | t = build2 (GE_EXPR, boolean_type_node, gpr, t); |
4926 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
4927 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
4928 | gimplify_and_add (t, pre_p); |
4929 | } |
4930 | if (needed_sseregs) |
4931 | { |
4932 | t = build_int_cst (TREE_TYPE (fpr), |
4933 | (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 |
4934 | + X86_64_REGPARM_MAX * 8); |
4935 | t = build2 (GE_EXPR, boolean_type_node, fpr, t); |
4936 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
4937 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
4938 | gimplify_and_add (t, pre_p); |
4939 | } |
4940 | |
4941 | /* Compute index to start of area used for integer regs. */ |
4942 | if (needed_intregs) |
4943 | { |
4944 | /* int_addr = gpr + sav; */ |
4945 | t = fold_build_pointer_plus (sav, gpr); |
4946 | gimplify_assign (int_addr, t, pre_p); |
4947 | } |
4948 | if (needed_sseregs) |
4949 | { |
4950 | /* sse_addr = fpr + sav; */ |
4951 | t = fold_build_pointer_plus (sav, fpr); |
4952 | gimplify_assign (sse_addr, t, pre_p); |
4953 | } |
4954 | if (need_temp) |
4955 | { |
4956 | int i, prev_size = 0; |
4957 | tree temp = create_tmp_var (type, "va_arg_tmp" ); |
4958 | TREE_ADDRESSABLE (temp) = 1; |
4959 | |
4960 | /* addr = &temp; */ |
4961 | t = build1 (ADDR_EXPR, build_pointer_type (type), temp); |
4962 | gimplify_assign (addr, t, pre_p); |
4963 | |
4964 | for (i = 0; i < XVECLEN (container, 0); i++) |
4965 | { |
4966 | rtx slot = XVECEXP (container, 0, i); |
4967 | rtx reg = XEXP (slot, 0); |
4968 | machine_mode mode = GET_MODE (reg); |
4969 | tree piece_type; |
4970 | tree addr_type; |
4971 | tree daddr_type; |
4972 | tree src_addr, src; |
4973 | int src_offset; |
4974 | tree dest_addr, dest; |
4975 | int cur_size = GET_MODE_SIZE (mode); |
4976 | |
4977 | gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); |
4978 | prev_size = INTVAL (XEXP (slot, 1)); |
4979 | if (prev_size + cur_size > size) |
4980 | { |
4981 | cur_size = size - prev_size; |
4982 | unsigned int nbits = cur_size * BITS_PER_UNIT; |
4983 | if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode)) |
4984 | mode = QImode; |
4985 | } |
4986 | piece_type = lang_hooks.types.type_for_mode (mode, 1); |
4987 | if (mode == GET_MODE (reg)) |
4988 | addr_type = build_pointer_type (piece_type); |
4989 | else |
4990 | addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
4991 | true); |
4992 | daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
4993 | true); |
4994 | |
4995 | if (SSE_REGNO_P (REGNO (reg))) |
4996 | { |
4997 | src_addr = sse_addr; |
4998 | src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; |
4999 | } |
5000 | else |
5001 | { |
5002 | src_addr = int_addr; |
5003 | src_offset = REGNO (reg) * 8; |
5004 | } |
5005 | src_addr = fold_convert (addr_type, src_addr); |
5006 | src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); |
5007 | |
5008 | dest_addr = fold_convert (daddr_type, addr); |
5009 | dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); |
5010 | if (cur_size == GET_MODE_SIZE (mode)) |
5011 | { |
5012 | src = build_va_arg_indirect_ref (src_addr); |
5013 | dest = build_va_arg_indirect_ref (dest_addr); |
5014 | |
5015 | gimplify_assign (dest, src, pre_p); |
5016 | } |
5017 | else |
5018 | { |
5019 | tree copy |
5020 | = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY), |
5021 | 3, dest_addr, src_addr, |
5022 | size_int (cur_size)); |
5023 | gimplify_and_add (copy, pre_p); |
5024 | } |
5025 | prev_size += cur_size; |
5026 | } |
5027 | } |
5028 | |
5029 | if (needed_intregs) |
5030 | { |
5031 | t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, |
5032 | build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); |
5033 | gimplify_assign (gpr, t, pre_p); |
5034 | /* The GPR save area guarantees only 8-byte alignment. */ |
5035 | if (!need_temp) |
5036 | type_align = MIN (type_align, 64); |
5037 | } |
5038 | |
5039 | if (needed_sseregs) |
5040 | { |
5041 | t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, |
5042 | build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); |
5043 | gimplify_assign (unshare_expr (fpr), t, pre_p); |
5044 | } |
5045 | |
5046 | gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over)); |
5047 | |
5048 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false)); |
5049 | } |
5050 | |
5051 | /* ... otherwise out of the overflow area. */ |
5052 | |
5053 | /* When we align parameter on stack for caller, if the parameter |
5054 | alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be |
5055 | aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee |
5056 | here with caller. */ |
5057 | arg_boundary = ix86_function_arg_boundary (VOIDmode, type); |
5058 | if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) |
5059 | arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; |
5060 | |
5061 | /* Care for on-stack alignment if needed. */ |
5062 | if (arg_boundary <= 64 || size == 0) |
5063 | t = ovf; |
5064 | else |
5065 | { |
5066 | HOST_WIDE_INT align = arg_boundary / 8; |
5067 | t = fold_build_pointer_plus_hwi (ovf, align - 1); |
5068 | t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, |
5069 | build_int_cst (TREE_TYPE (t), -align)); |
5070 | } |
5071 | |
5072 | gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); |
5073 | gimplify_assign (addr, t, pre_p); |
5074 | |
5075 | t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); |
5076 | gimplify_assign (unshare_expr (ovf), t, pre_p); |
5077 | |
5078 | if (container) |
5079 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over)); |
5080 | |
5081 | type = build_aligned_type (type, type_align); |
5082 | ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); |
5083 | addr = fold_convert (ptrtype, addr); |
5084 | |
5085 | if (indirect_p) |
5086 | addr = build_va_arg_indirect_ref (addr); |
5087 | return build_va_arg_indirect_ref (addr); |
5088 | } |
5089 | |
5090 | /* Return true if OPNUM's MEM should be matched |
5091 | in movabs* patterns. */ |
5092 | |
5093 | bool |
5094 | ix86_check_movabs (rtx insn, int opnum) |
5095 | { |
5096 | rtx set, mem; |
5097 | |
5098 | set = PATTERN (insn); |
5099 | if (GET_CODE (set) == PARALLEL) |
5100 | set = XVECEXP (set, 0, 0); |
5101 | gcc_assert (GET_CODE (set) == SET); |
5102 | mem = XEXP (set, opnum); |
5103 | while (SUBREG_P (mem)) |
5104 | mem = SUBREG_REG (mem); |
5105 | gcc_assert (MEM_P (mem)); |
5106 | return volatile_ok || !MEM_VOLATILE_P (mem); |
5107 | } |
5108 | |
5109 | /* Return false if INSN contains a MEM with a non-default address space. */ |
5110 | bool |
5111 | ix86_check_no_addr_space (rtx insn) |
5112 | { |
5113 | subrtx_var_iterator::array_type array; |
5114 | FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) |
5115 | { |
5116 | rtx x = *iter; |
5117 | if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) |
5118 | return false; |
5119 | } |
5120 | return true; |
5121 | } |
5122 | |
5123 | /* Initialize the table of extra 80387 mathematical constants. */ |
5124 | |
5125 | static void |
5126 | init_ext_80387_constants (void) |
5127 | { |
5128 | static const char * cst[5] = |
5129 | { |
5130 | "0.3010299956639811952256464283594894482" , /* 0: fldlg2 */ |
5131 | "0.6931471805599453094286904741849753009" , /* 1: fldln2 */ |
5132 | "1.4426950408889634073876517827983434472" , /* 2: fldl2e */ |
5133 | "3.3219280948873623478083405569094566090" , /* 3: fldl2t */ |
5134 | "3.1415926535897932385128089594061862044" , /* 4: fldpi */ |
5135 | }; |
5136 | int i; |
5137 | |
5138 | for (i = 0; i < 5; i++) |
5139 | { |
5140 | real_from_string (&ext_80387_constants_table[i], cst[i]); |
5141 | /* Ensure each constant is rounded to XFmode precision. */ |
5142 | real_convert (&ext_80387_constants_table[i], |
5143 | XFmode, &ext_80387_constants_table[i]); |
5144 | } |
5145 | |
5146 | ext_80387_constants_init = 1; |
5147 | } |
5148 | |
5149 | /* Return non-zero if the constant is something that |
5150 | can be loaded with a special instruction. */ |
5151 | |
5152 | int |
5153 | standard_80387_constant_p (rtx x) |
5154 | { |
5155 | machine_mode mode = GET_MODE (x); |
5156 | |
5157 | const REAL_VALUE_TYPE *r; |
5158 | |
5159 | if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) |
5160 | return -1; |
5161 | |
5162 | if (x == CONST0_RTX (mode)) |
5163 | return 1; |
5164 | if (x == CONST1_RTX (mode)) |
5165 | return 2; |
5166 | |
5167 | r = CONST_DOUBLE_REAL_VALUE (x); |
5168 | |
5169 | /* For XFmode constants, try to find a special 80387 instruction when |
5170 | optimizing for size or on those CPUs that benefit from them. */ |
5171 | if (mode == XFmode |
5172 | && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS) |
5173 | && !flag_rounding_math) |
5174 | { |
5175 | int i; |
5176 | |
5177 | if (! ext_80387_constants_init) |
5178 | init_ext_80387_constants (); |
5179 | |
5180 | for (i = 0; i < 5; i++) |
5181 | if (real_identical (r, &ext_80387_constants_table[i])) |
5182 | return i + 3; |
5183 | } |
5184 | |
5185 | /* Load of the constant -0.0 or -1.0 will be split as |
5186 | fldz;fchs or fld1;fchs sequence. */ |
5187 | if (real_isnegzero (r)) |
5188 | return 8; |
5189 | if (real_identical (r, &dconstm1)) |
5190 | return 9; |
5191 | |
5192 | return 0; |
5193 | } |
5194 | |
5195 | /* Return the opcode of the special instruction to be used to load |
5196 | the constant X. */ |
5197 | |
5198 | const char * |
5199 | standard_80387_constant_opcode (rtx x) |
5200 | { |
5201 | switch (standard_80387_constant_p (x)) |
5202 | { |
5203 | case 1: |
5204 | return "fldz" ; |
5205 | case 2: |
5206 | return "fld1" ; |
5207 | case 3: |
5208 | return "fldlg2" ; |
5209 | case 4: |
5210 | return "fldln2" ; |
5211 | case 5: |
5212 | return "fldl2e" ; |
5213 | case 6: |
5214 | return "fldl2t" ; |
5215 | case 7: |
5216 | return "fldpi" ; |
5217 | case 8: |
5218 | case 9: |
5219 | return "#" ; |
5220 | default: |
5221 | gcc_unreachable (); |
5222 | } |
5223 | } |
5224 | |
5225 | /* Return the CONST_DOUBLE representing the 80387 constant that is |
5226 | loaded by the specified special instruction. The argument IDX |
5227 | matches the return value from standard_80387_constant_p. */ |
5228 | |
5229 | rtx |
5230 | standard_80387_constant_rtx (int idx) |
5231 | { |
5232 | int i; |
5233 | |
5234 | if (! ext_80387_constants_init) |
5235 | init_ext_80387_constants (); |
5236 | |
5237 | switch (idx) |
5238 | { |
5239 | case 3: |
5240 | case 4: |
5241 | case 5: |
5242 | case 6: |
5243 | case 7: |
5244 | i = idx - 3; |
5245 | break; |
5246 | |
5247 | default: |
5248 | gcc_unreachable (); |
5249 | } |
5250 | |
5251 | return const_double_from_real_value (ext_80387_constants_table[i], |
5252 | XFmode); |
5253 | } |
5254 | |
5255 | /* Return 1 if X is all bits 0, 2 if X is all bits 1 |
5256 | and 3 if X is all bits 1 with zero extend |
5257 | in supported SSE/AVX vector mode. */ |
5258 | |
5259 | int |
5260 | standard_sse_constant_p (rtx x, machine_mode pred_mode) |
5261 | { |
5262 | machine_mode mode; |
5263 | |
5264 | if (!TARGET_SSE) |
5265 | return 0; |
5266 | |
5267 | mode = GET_MODE (x); |
5268 | |
5269 | if (x == const0_rtx || const0_operand (x, mode)) |
5270 | return 1; |
5271 | |
5272 | if (x == constm1_rtx |
5273 | || vector_all_ones_operand (x, mode) |
5274 | || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
5275 | || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT) |
5276 | && float_vector_all_ones_operand (x, mode))) |
5277 | { |
5278 | /* VOIDmode integer constant, get mode from the predicate. */ |
5279 | if (mode == VOIDmode) |
5280 | mode = pred_mode; |
5281 | |
5282 | switch (GET_MODE_SIZE (mode)) |
5283 | { |
5284 | case 64: |
5285 | if (TARGET_AVX512F && TARGET_EVEX512) |
5286 | return 2; |
5287 | break; |
5288 | case 32: |
5289 | if (TARGET_AVX2) |
5290 | return 2; |
5291 | break; |
5292 | case 16: |
5293 | if (TARGET_SSE2) |
5294 | return 2; |
5295 | break; |
5296 | case 0: |
5297 | /* VOIDmode */ |
5298 | gcc_unreachable (); |
5299 | default: |
5300 | break; |
5301 | } |
5302 | } |
5303 | |
5304 | if (vector_all_ones_zero_extend_half_operand (x, mode) |
5305 | || vector_all_ones_zero_extend_quarter_operand (x, mode)) |
5306 | return 3; |
5307 | |
5308 | return 0; |
5309 | } |
5310 | |
5311 | /* Return the opcode of the special instruction to be used to load |
5312 | the constant operands[1] into operands[0]. */ |
5313 | |
5314 | const char * |
5315 | standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) |
5316 | { |
5317 | machine_mode mode; |
5318 | rtx x = operands[1]; |
5319 | |
5320 | gcc_assert (TARGET_SSE); |
5321 | |
5322 | mode = GET_MODE (x); |
5323 | |
5324 | if (x == const0_rtx || const0_operand (x, mode)) |
5325 | { |
5326 | switch (get_attr_mode (insn)) |
5327 | { |
5328 | case MODE_TI: |
5329 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5330 | return "%vpxor\t%0, %d0" ; |
5331 | /* FALLTHRU */ |
5332 | case MODE_XI: |
5333 | case MODE_OI: |
5334 | if (EXT_REX_SSE_REG_P (operands[0])) |
5335 | { |
5336 | if (TARGET_AVX512VL) |
5337 | return "vpxord\t%x0, %x0, %x0" ; |
5338 | else if (TARGET_EVEX512) |
5339 | return "vpxord\t%g0, %g0, %g0" ; |
5340 | else |
5341 | gcc_unreachable (); |
5342 | } |
5343 | return "vpxor\t%x0, %x0, %x0" ; |
5344 | |
5345 | case MODE_V2DF: |
5346 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5347 | return "%vxorpd\t%0, %d0" ; |
5348 | /* FALLTHRU */ |
5349 | case MODE_V8DF: |
5350 | case MODE_V4DF: |
5351 | if (EXT_REX_SSE_REG_P (operands[0])) |
5352 | { |
5353 | if (TARGET_AVX512DQ) |
5354 | { |
5355 | if (TARGET_AVX512VL) |
5356 | return "vxorpd\t%x0, %x0, %x0" ; |
5357 | else if (TARGET_EVEX512) |
5358 | return "vxorpd\t%g0, %g0, %g0" ; |
5359 | else |
5360 | gcc_unreachable (); |
5361 | } |
5362 | else |
5363 | { |
5364 | if (TARGET_AVX512VL) |
5365 | return "vpxorq\t%x0, %x0, %x0" ; |
5366 | else if (TARGET_EVEX512) |
5367 | return "vpxorq\t%g0, %g0, %g0" ; |
5368 | else |
5369 | gcc_unreachable (); |
5370 | } |
5371 | } |
5372 | return "vxorpd\t%x0, %x0, %x0" ; |
5373 | |
5374 | case MODE_V4SF: |
5375 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5376 | return "%vxorps\t%0, %d0" ; |
5377 | /* FALLTHRU */ |
5378 | case MODE_V16SF: |
5379 | case MODE_V8SF: |
5380 | if (EXT_REX_SSE_REG_P (operands[0])) |
5381 | { |
5382 | if (TARGET_AVX512DQ) |
5383 | { |
5384 | if (TARGET_AVX512VL) |
5385 | return "vxorps\t%x0, %x0, %x0" ; |
5386 | else if (TARGET_EVEX512) |
5387 | return "vxorps\t%g0, %g0, %g0" ; |
5388 | else |
5389 | gcc_unreachable (); |
5390 | } |
5391 | else |
5392 | { |
5393 | if (TARGET_AVX512VL) |
5394 | return "vpxord\t%x0, %x0, %x0" ; |
5395 | else if (TARGET_EVEX512) |
5396 | return "vpxord\t%g0, %g0, %g0" ; |
5397 | else |
5398 | gcc_unreachable (); |
5399 | } |
5400 | } |
5401 | return "vxorps\t%x0, %x0, %x0" ; |
5402 | |
5403 | default: |
5404 | gcc_unreachable (); |
5405 | } |
5406 | } |
5407 | else if (x == constm1_rtx |
5408 | || vector_all_ones_operand (x, mode) |
5409 | || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
5410 | && float_vector_all_ones_operand (x, mode))) |
5411 | { |
5412 | enum attr_mode insn_mode = get_attr_mode (insn); |
5413 | |
5414 | switch (insn_mode) |
5415 | { |
5416 | case MODE_XI: |
5417 | case MODE_V8DF: |
5418 | case MODE_V16SF: |
5419 | gcc_assert (TARGET_AVX512F && TARGET_EVEX512); |
5420 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}" ; |
5421 | |
5422 | case MODE_OI: |
5423 | case MODE_V4DF: |
5424 | case MODE_V8SF: |
5425 | gcc_assert (TARGET_AVX2); |
5426 | /* FALLTHRU */ |
5427 | case MODE_TI: |
5428 | case MODE_V2DF: |
5429 | case MODE_V4SF: |
5430 | gcc_assert (TARGET_SSE2); |
5431 | if (EXT_REX_SSE_REG_P (operands[0])) |
5432 | { |
5433 | if (TARGET_AVX512VL) |
5434 | return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}" ; |
5435 | else if (TARGET_EVEX512) |
5436 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}" ; |
5437 | else |
5438 | gcc_unreachable (); |
5439 | } |
5440 | return (TARGET_AVX |
5441 | ? "vpcmpeqd\t%0, %0, %0" |
5442 | : "pcmpeqd\t%0, %0" ); |
5443 | |
5444 | default: |
5445 | gcc_unreachable (); |
5446 | } |
5447 | } |
5448 | else if (vector_all_ones_zero_extend_half_operand (x, mode)) |
5449 | { |
5450 | if (GET_MODE_SIZE (mode) == 64) |
5451 | { |
5452 | gcc_assert (TARGET_AVX512F && TARGET_EVEX512); |
5453 | return "vpcmpeqd\t%t0, %t0, %t0" ; |
5454 | } |
5455 | else if (GET_MODE_SIZE (mode) == 32) |
5456 | { |
5457 | gcc_assert (TARGET_AVX); |
5458 | return "vpcmpeqd\t%x0, %x0, %x0" ; |
5459 | } |
5460 | gcc_unreachable (); |
5461 | } |
5462 | else if (vector_all_ones_zero_extend_quarter_operand (x, mode)) |
5463 | { |
5464 | gcc_assert (TARGET_AVX512F && TARGET_EVEX512); |
5465 | return "vpcmpeqd\t%x0, %x0, %x0" ; |
5466 | } |
5467 | |
5468 | gcc_unreachable (); |
5469 | } |
5470 | |
5471 | /* Returns true if INSN can be transformed from a memory load |
5472 | to a supported FP constant load. */ |
5473 | |
5474 | bool |
5475 | ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) |
5476 | { |
5477 | rtx src = find_constant_src (insn); |
5478 | |
5479 | gcc_assert (REG_P (dst)); |
5480 | |
5481 | if (src == NULL |
5482 | || (SSE_REGNO_P (REGNO (dst)) |
5483 | && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1) |
5484 | || (!TARGET_AVX512VL |
5485 | && EXT_REX_SSE_REGNO_P (REGNO (dst)) |
5486 | && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1) |
5487 | || (STACK_REGNO_P (REGNO (dst)) |
5488 | && standard_80387_constant_p (x: src) < 1)) |
5489 | return false; |
5490 | |
5491 | return true; |
5492 | } |
5493 | |
5494 | /* Predicate for pre-reload splitters with associated instructions, |
5495 | which can match any time before the split1 pass (usually combine), |
5496 | then are unconditionally split in that pass and should not be |
5497 | matched again afterwards. */ |
5498 | |
5499 | bool |
5500 | ix86_pre_reload_split (void) |
5501 | { |
5502 | return (can_create_pseudo_p () |
5503 | && !(cfun->curr_properties & PROP_rtl_split_insns)); |
5504 | } |
5505 | |
5506 | /* Return the opcode of the TYPE_SSEMOV instruction. To move from |
5507 | or to xmm16-xmm31/ymm16-ymm31 registers, we either require |
5508 | TARGET_AVX512VL or it is a register to register move which can |
5509 | be done with zmm register move. */ |
5510 | |
5511 | static const char * |
5512 | ix86_get_ssemov (rtx *operands, unsigned size, |
5513 | enum attr_mode insn_mode, machine_mode mode) |
5514 | { |
5515 | char buf[128]; |
5516 | bool misaligned_p = (misaligned_operand (operands[0], mode) |
5517 | || misaligned_operand (operands[1], mode)); |
5518 | bool evex_reg_p = (size == 64 |
5519 | || EXT_REX_SSE_REG_P (operands[0]) |
5520 | || EXT_REX_SSE_REG_P (operands[1])); |
5521 | |
5522 | bool egpr_p = (TARGET_APX_EGPR |
5523 | && (x86_extended_rex2reg_mentioned_p (operands[0]) |
5524 | || x86_extended_rex2reg_mentioned_p (operands[1]))); |
5525 | bool egpr_vl = egpr_p && TARGET_AVX512VL; |
5526 | |
5527 | machine_mode scalar_mode; |
5528 | |
5529 | const char *opcode = NULL; |
5530 | enum |
5531 | { |
5532 | opcode_int, |
5533 | opcode_float, |
5534 | opcode_double |
5535 | } type = opcode_int; |
5536 | |
5537 | switch (insn_mode) |
5538 | { |
5539 | case MODE_V16SF: |
5540 | case MODE_V8SF: |
5541 | case MODE_V4SF: |
5542 | scalar_mode = E_SFmode; |
5543 | type = opcode_float; |
5544 | break; |
5545 | case MODE_V8DF: |
5546 | case MODE_V4DF: |
5547 | case MODE_V2DF: |
5548 | scalar_mode = E_DFmode; |
5549 | type = opcode_double; |
5550 | break; |
5551 | case MODE_XI: |
5552 | case MODE_OI: |
5553 | case MODE_TI: |
5554 | scalar_mode = GET_MODE_INNER (mode); |
5555 | break; |
5556 | default: |
5557 | gcc_unreachable (); |
5558 | } |
5559 | |
5560 | /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL, |
5561 | we can only use zmm register move without memory operand. */ |
5562 | if (evex_reg_p |
5563 | && !TARGET_AVX512VL |
5564 | && GET_MODE_SIZE (mode) < 64) |
5565 | { |
5566 | /* NB: Even though ix86_hard_regno_mode_ok doesn't allow |
5567 | xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when |
5568 | AVX512VL is disabled, LRA can still generate reg to |
5569 | reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit |
5570 | modes. */ |
5571 | if (memory_operand (operands[0], mode) |
5572 | || memory_operand (operands[1], mode)) |
5573 | gcc_unreachable (); |
5574 | size = 64; |
5575 | /* We need TARGET_EVEX512 to move into zmm register. */ |
5576 | gcc_assert (TARGET_EVEX512); |
5577 | switch (type) |
5578 | { |
5579 | case opcode_int: |
5580 | if (scalar_mode == E_HFmode || scalar_mode == E_BFmode) |
5581 | opcode = (misaligned_p |
5582 | ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64" ) |
5583 | : "vmovdqa64" ); |
5584 | else |
5585 | opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32" ; |
5586 | break; |
5587 | case opcode_float: |
5588 | opcode = misaligned_p ? "vmovups" : "vmovaps" ; |
5589 | break; |
5590 | case opcode_double: |
5591 | opcode = misaligned_p ? "vmovupd" : "vmovapd" ; |
5592 | break; |
5593 | } |
5594 | } |
5595 | else if (SCALAR_FLOAT_MODE_P (scalar_mode)) |
5596 | { |
5597 | switch (scalar_mode) |
5598 | { |
5599 | case E_HFmode: |
5600 | case E_BFmode: |
5601 | if (evex_reg_p || egpr_vl) |
5602 | opcode = (misaligned_p |
5603 | ? (TARGET_AVX512BW |
5604 | ? "vmovdqu16" |
5605 | : "vmovdqu64" ) |
5606 | : "vmovdqa64" ); |
5607 | else if (egpr_p) |
5608 | opcode = (misaligned_p |
5609 | ? (TARGET_AVX512BW |
5610 | ? "vmovdqu16" |
5611 | : "%vmovups" ) |
5612 | : "%vmovaps" ); |
5613 | else |
5614 | opcode = (misaligned_p |
5615 | ? (TARGET_AVX512BW |
5616 | ? "vmovdqu16" |
5617 | : "%vmovdqu" ) |
5618 | : "%vmovdqa" ); |
5619 | break; |
5620 | case E_SFmode: |
5621 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5622 | break; |
5623 | case E_DFmode: |
5624 | opcode = misaligned_p ? "%vmovupd" : "%vmovapd" ; |
5625 | break; |
5626 | case E_TFmode: |
5627 | if (evex_reg_p || egpr_vl) |
5628 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
5629 | else if (egpr_p) |
5630 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5631 | else |
5632 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
5633 | break; |
5634 | default: |
5635 | gcc_unreachable (); |
5636 | } |
5637 | } |
5638 | else if (SCALAR_INT_MODE_P (scalar_mode)) |
5639 | { |
5640 | switch (scalar_mode) |
5641 | { |
5642 | case E_QImode: |
5643 | if (evex_reg_p || egpr_vl) |
5644 | opcode = (misaligned_p |
5645 | ? (TARGET_AVX512BW |
5646 | ? "vmovdqu8" |
5647 | : "vmovdqu64" ) |
5648 | : "vmovdqa64" ); |
5649 | else if (egpr_p) |
5650 | opcode = (misaligned_p |
5651 | ? (TARGET_AVX512BW |
5652 | ? "vmovdqu8" |
5653 | : "%vmovups" ) |
5654 | : "%vmovaps" ); |
5655 | else |
5656 | opcode = (misaligned_p |
5657 | ? (TARGET_AVX512BW |
5658 | ? "vmovdqu8" |
5659 | : "%vmovdqu" ) |
5660 | : "%vmovdqa" ); |
5661 | break; |
5662 | case E_HImode: |
5663 | if (evex_reg_p || egpr_vl) |
5664 | opcode = (misaligned_p |
5665 | ? (TARGET_AVX512BW |
5666 | ? "vmovdqu16" |
5667 | : "vmovdqu64" ) |
5668 | : "vmovdqa64" ); |
5669 | else if (egpr_p) |
5670 | opcode = (misaligned_p |
5671 | ? (TARGET_AVX512BW |
5672 | ? "vmovdqu16" |
5673 | : "%vmovups" ) |
5674 | : "%vmovaps" ); |
5675 | else |
5676 | opcode = (misaligned_p |
5677 | ? (TARGET_AVX512BW |
5678 | ? "vmovdqu16" |
5679 | : "%vmovdqu" ) |
5680 | : "%vmovdqa" ); |
5681 | break; |
5682 | case E_SImode: |
5683 | if (evex_reg_p || egpr_vl) |
5684 | opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32" ; |
5685 | else if (egpr_p) |
5686 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5687 | else |
5688 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
5689 | break; |
5690 | case E_DImode: |
5691 | case E_TImode: |
5692 | case E_OImode: |
5693 | if (evex_reg_p || egpr_vl) |
5694 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
5695 | else if (egpr_p) |
5696 | opcode = misaligned_p ? "%vmovups" : "%vmovaps" ; |
5697 | else |
5698 | opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa" ; |
5699 | break; |
5700 | case E_XImode: |
5701 | opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64" ; |
5702 | break; |
5703 | default: |
5704 | gcc_unreachable (); |
5705 | } |
5706 | } |
5707 | else |
5708 | gcc_unreachable (); |
5709 | |
5710 | switch (size) |
5711 | { |
5712 | case 64: |
5713 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}" , |
5714 | opcode); |
5715 | break; |
5716 | case 32: |
5717 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}" , |
5718 | opcode); |
5719 | break; |
5720 | case 16: |
5721 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}" , |
5722 | opcode); |
5723 | break; |
5724 | default: |
5725 | gcc_unreachable (); |
5726 | } |
5727 | output_asm_insn (buf, operands); |
5728 | return "" ; |
5729 | } |
5730 | |
5731 | /* Return the template of the TYPE_SSEMOV instruction to move |
5732 | operands[1] into operands[0]. */ |
5733 | |
5734 | const char * |
5735 | ix86_output_ssemov (rtx_insn *insn, rtx *operands) |
5736 | { |
5737 | machine_mode mode = GET_MODE (operands[0]); |
5738 | if (get_attr_type (insn) != TYPE_SSEMOV |
5739 | || mode != GET_MODE (operands[1])) |
5740 | gcc_unreachable (); |
5741 | |
5742 | enum attr_mode insn_mode = get_attr_mode (insn); |
5743 | |
5744 | switch (insn_mode) |
5745 | { |
5746 | case MODE_XI: |
5747 | case MODE_V8DF: |
5748 | case MODE_V16SF: |
5749 | return ix86_get_ssemov (operands, size: 64, insn_mode, mode); |
5750 | |
5751 | case MODE_OI: |
5752 | case MODE_V4DF: |
5753 | case MODE_V8SF: |
5754 | return ix86_get_ssemov (operands, size: 32, insn_mode, mode); |
5755 | |
5756 | case MODE_TI: |
5757 | case MODE_V2DF: |
5758 | case MODE_V4SF: |
5759 | return ix86_get_ssemov (operands, size: 16, insn_mode, mode); |
5760 | |
5761 | case MODE_DI: |
5762 | /* Handle broken assemblers that require movd instead of movq. */ |
5763 | if (GENERAL_REG_P (operands[0])) |
5764 | { |
5765 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
5766 | return "%vmovq\t{%1, %q0|%q0, %1}" ; |
5767 | else |
5768 | return "%vmovd\t{%1, %q0|%q0, %1}" ; |
5769 | } |
5770 | else if (GENERAL_REG_P (operands[1])) |
5771 | { |
5772 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
5773 | return "%vmovq\t{%q1, %0|%0, %q1}" ; |
5774 | else |
5775 | return "%vmovd\t{%q1, %0|%0, %q1}" ; |
5776 | } |
5777 | else |
5778 | return "%vmovq\t{%1, %0|%0, %1}" ; |
5779 | |
5780 | case MODE_SI: |
5781 | if (GENERAL_REG_P (operands[0])) |
5782 | return "%vmovd\t{%1, %k0|%k0, %1}" ; |
5783 | else if (GENERAL_REG_P (operands[1])) |
5784 | return "%vmovd\t{%k1, %0|%0, %k1}" ; |
5785 | else |
5786 | return "%vmovd\t{%1, %0|%0, %1}" ; |
5787 | |
5788 | case MODE_HI: |
5789 | if (GENERAL_REG_P (operands[0])) |
5790 | return "vmovw\t{%1, %k0|%k0, %1}" ; |
5791 | else if (GENERAL_REG_P (operands[1])) |
5792 | return "vmovw\t{%k1, %0|%0, %k1}" ; |
5793 | else |
5794 | return "vmovw\t{%1, %0|%0, %1}" ; |
5795 | |
5796 | case MODE_DF: |
5797 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
5798 | return "vmovsd\t{%d1, %0|%0, %d1}" ; |
5799 | else |
5800 | return "%vmovsd\t{%1, %0|%0, %1}" ; |
5801 | |
5802 | case MODE_SF: |
5803 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
5804 | return "vmovss\t{%d1, %0|%0, %d1}" ; |
5805 | else |
5806 | return "%vmovss\t{%1, %0|%0, %1}" ; |
5807 | |
5808 | case MODE_HF: |
5809 | case MODE_BF: |
5810 | if (REG_P (operands[0]) && REG_P (operands[1])) |
5811 | return "vmovsh\t{%d1, %0|%0, %d1}" ; |
5812 | else |
5813 | return "vmovsh\t{%1, %0|%0, %1}" ; |
5814 | |
5815 | case MODE_V1DF: |
5816 | gcc_assert (!TARGET_AVX); |
5817 | return "movlpd\t{%1, %0|%0, %1}" ; |
5818 | |
5819 | case MODE_V2SF: |
5820 | if (TARGET_AVX && REG_P (operands[0])) |
5821 | return "vmovlps\t{%1, %d0|%d0, %1}" ; |
5822 | else |
5823 | return "%vmovlps\t{%1, %0|%0, %1}" ; |
5824 | |
5825 | default: |
5826 | gcc_unreachable (); |
5827 | } |
5828 | } |
5829 | |
5830 | /* Returns true if OP contains a symbol reference */ |
5831 | |
5832 | bool |
5833 | symbolic_reference_mentioned_p (rtx op) |
5834 | { |
5835 | const char *fmt; |
5836 | int i; |
5837 | |
5838 | if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) |
5839 | return true; |
5840 | |
5841 | fmt = GET_RTX_FORMAT (GET_CODE (op)); |
5842 | for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) |
5843 | { |
5844 | if (fmt[i] == 'E') |
5845 | { |
5846 | int j; |
5847 | |
5848 | for (j = XVECLEN (op, i) - 1; j >= 0; j--) |
5849 | if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) |
5850 | return true; |
5851 | } |
5852 | |
5853 | else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) |
5854 | return true; |
5855 | } |
5856 | |
5857 | return false; |
5858 | } |
5859 | |
5860 | /* Return true if it is appropriate to emit `ret' instructions in the |
5861 | body of a function. Do this only if the epilogue is simple, needing a |
5862 | couple of insns. Prior to reloading, we can't tell how many registers |
5863 | must be saved, so return false then. Return false if there is no frame |
5864 | marker to de-allocate. */ |
5865 | |
5866 | bool |
5867 | ix86_can_use_return_insn_p (void) |
5868 | { |
5869 | if (ix86_function_ms_hook_prologue (fn: current_function_decl)) |
5870 | return false; |
5871 | |
5872 | if (ix86_function_naked (fn: current_function_decl)) |
5873 | return false; |
5874 | |
5875 | /* Don't use `ret' instruction in interrupt handler. */ |
5876 | if (! reload_completed |
5877 | || frame_pointer_needed |
5878 | || cfun->machine->func_type != TYPE_NORMAL) |
5879 | return 0; |
5880 | |
5881 | /* Don't allow more than 32k pop, since that's all we can do |
5882 | with one instruction. */ |
5883 | if (crtl->args.pops_args && crtl->args.size >= 32768) |
5884 | return 0; |
5885 | |
5886 | struct ix86_frame &frame = cfun->machine->frame; |
5887 | return (frame.stack_pointer_offset == UNITS_PER_WORD |
5888 | && (frame.nregs + frame.nsseregs) == 0); |
5889 | } |
5890 | |
5891 | /* Return stack frame size. get_frame_size () returns used stack slots |
5892 | during compilation, which may be optimized out later. If stack frame |
5893 | is needed, stack_frame_required should be true. */ |
5894 | |
5895 | static HOST_WIDE_INT |
5896 | ix86_get_frame_size (void) |
5897 | { |
5898 | if (cfun->machine->stack_frame_required) |
5899 | return get_frame_size (); |
5900 | else |
5901 | return 0; |
5902 | } |
5903 | |
5904 | /* Value should be nonzero if functions must have frame pointers. |
5905 | Zero means the frame pointer need not be set up (and parms may |
5906 | be accessed via the stack pointer) in functions that seem suitable. */ |
5907 | |
5908 | static bool |
5909 | ix86_frame_pointer_required (void) |
5910 | { |
5911 | /* If we accessed previous frames, then the generated code expects |
5912 | to be able to access the saved ebp value in our frame. */ |
5913 | if (cfun->machine->accesses_prev_frame) |
5914 | return true; |
5915 | |
5916 | /* Several x86 os'es need a frame pointer for other reasons, |
5917 | usually pertaining to setjmp. */ |
5918 | if (SUBTARGET_FRAME_POINTER_REQUIRED) |
5919 | return true; |
5920 | |
5921 | /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ |
5922 | if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) |
5923 | return true; |
5924 | |
5925 | /* Win64 SEH, very large frames need a frame-pointer as maximum stack |
5926 | allocation is 4GB. */ |
5927 | if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE) |
5928 | return true; |
5929 | |
5930 | /* SSE saves require frame-pointer when stack is misaligned. */ |
5931 | if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) |
5932 | return true; |
5933 | |
5934 | /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER |
5935 | turns off the frame pointer by default. Turn it back on now if |
5936 | we've not got a leaf function. */ |
5937 | if (TARGET_OMIT_LEAF_FRAME_POINTER |
5938 | && (!crtl->is_leaf |
5939 | || ix86_current_function_calls_tls_descriptor)) |
5940 | return true; |
5941 | |
5942 | /* Several versions of mcount for the x86 assumes that there is a |
5943 | frame, so we cannot allow profiling without a frame pointer. */ |
5944 | if (crtl->profile && !flag_fentry) |
5945 | return true; |
5946 | |
5947 | return false; |
5948 | } |
5949 | |
5950 | /* Record that the current function accesses previous call frames. */ |
5951 | |
5952 | void |
5953 | ix86_setup_frame_addresses (void) |
5954 | { |
5955 | cfun->machine->accesses_prev_frame = 1; |
5956 | } |
5957 | |
5958 | #ifndef USE_HIDDEN_LINKONCE |
5959 | # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) |
5960 | # define USE_HIDDEN_LINKONCE 1 |
5961 | # else |
5962 | # define USE_HIDDEN_LINKONCE 0 |
5963 | # endif |
5964 | #endif |
5965 | |
5966 | /* Label count for call and return thunks. It is used to make unique |
5967 | labels in call and return thunks. */ |
5968 | static int indirectlabelno; |
5969 | |
5970 | /* True if call thunk function is needed. */ |
5971 | static bool indirect_thunk_needed = false; |
5972 | |
5973 | /* Bit masks of integer registers, which contain branch target, used |
5974 | by call thunk functions. */ |
5975 | static HARD_REG_SET indirect_thunks_used; |
5976 | |
5977 | /* True if return thunk function is needed. */ |
5978 | static bool indirect_return_needed = false; |
5979 | |
5980 | /* True if return thunk function via CX is needed. */ |
5981 | static bool indirect_return_via_cx; |
5982 | |
5983 | #ifndef INDIRECT_LABEL |
5984 | # define INDIRECT_LABEL "LIND" |
5985 | #endif |
5986 | |
5987 | /* Indicate what prefix is needed for an indirect branch. */ |
5988 | enum indirect_thunk_prefix |
5989 | { |
5990 | indirect_thunk_prefix_none, |
5991 | indirect_thunk_prefix_nt |
5992 | }; |
5993 | |
5994 | /* Return the prefix needed for an indirect branch INSN. */ |
5995 | |
5996 | enum indirect_thunk_prefix |
5997 | indirect_thunk_need_prefix (rtx_insn *insn) |
5998 | { |
5999 | enum indirect_thunk_prefix need_prefix; |
6000 | if ((cfun->machine->indirect_branch_type |
6001 | == indirect_branch_thunk_extern) |
6002 | && ix86_notrack_prefixed_insn_p (insn)) |
6003 | { |
6004 | /* NOTRACK prefix is only used with external thunk so that it |
6005 | can be properly updated to support CET at run-time. */ |
6006 | need_prefix = indirect_thunk_prefix_nt; |
6007 | } |
6008 | else |
6009 | need_prefix = indirect_thunk_prefix_none; |
6010 | return need_prefix; |
6011 | } |
6012 | |
6013 | /* Fills in the label name that should be used for the indirect thunk. */ |
6014 | |
6015 | static void |
6016 | indirect_thunk_name (char name[32], unsigned int regno, |
6017 | enum indirect_thunk_prefix need_prefix, |
6018 | bool ret_p) |
6019 | { |
6020 | if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) |
6021 | gcc_unreachable (); |
6022 | |
6023 | if (USE_HIDDEN_LINKONCE) |
6024 | { |
6025 | const char *prefix; |
6026 | |
6027 | if (need_prefix == indirect_thunk_prefix_nt |
6028 | && regno != INVALID_REGNUM) |
6029 | { |
6030 | /* NOTRACK prefix is only used with external thunk via |
6031 | register so that NOTRACK prefix can be added to indirect |
6032 | branch via register to support CET at run-time. */ |
6033 | prefix = "_nt" ; |
6034 | } |
6035 | else |
6036 | prefix = "" ; |
6037 | |
6038 | const char *ret = ret_p ? "return" : "indirect" ; |
6039 | |
6040 | if (regno != INVALID_REGNUM) |
6041 | { |
6042 | const char *reg_prefix; |
6043 | if (LEGACY_INT_REGNO_P (regno)) |
6044 | reg_prefix = TARGET_64BIT ? "r" : "e" ; |
6045 | else |
6046 | reg_prefix = "" ; |
6047 | sprintf (s: name, format: "__x86_%s_thunk%s_%s%s" , |
6048 | ret, prefix, reg_prefix, reg_names[regno]); |
6049 | } |
6050 | else |
6051 | sprintf (s: name, format: "__x86_%s_thunk%s" , ret, prefix); |
6052 | } |
6053 | else |
6054 | { |
6055 | if (regno != INVALID_REGNUM) |
6056 | ASM_GENERATE_INTERNAL_LABEL (name, "LITR" , regno); |
6057 | else |
6058 | { |
6059 | if (ret_p) |
6060 | ASM_GENERATE_INTERNAL_LABEL (name, "LRT" , 0); |
6061 | else |
6062 | ASM_GENERATE_INTERNAL_LABEL (name, "LIT" , 0); |
6063 | } |
6064 | } |
6065 | } |
6066 | |
6067 | /* Output a call and return thunk for indirect branch. If REGNO != -1, |
6068 | the function address is in REGNO and the call and return thunk looks like: |
6069 | |
6070 | call L2 |
6071 | L1: |
6072 | pause |
6073 | lfence |
6074 | jmp L1 |
6075 | L2: |
6076 | mov %REG, (%sp) |
6077 | ret |
6078 | |
6079 | Otherwise, the function address is on the top of stack and the |
6080 | call and return thunk looks like: |
6081 | |
6082 | call L2 |
6083 | L1: |
6084 | pause |
6085 | lfence |
6086 | jmp L1 |
6087 | L2: |
6088 | lea WORD_SIZE(%sp), %sp |
6089 | ret |
6090 | */ |
6091 | |
6092 | static void |
6093 | output_indirect_thunk (unsigned int regno) |
6094 | { |
6095 | char indirectlabel1[32]; |
6096 | char indirectlabel2[32]; |
6097 | |
6098 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, |
6099 | indirectlabelno++); |
6100 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, |
6101 | indirectlabelno++); |
6102 | |
6103 | /* Call */ |
6104 | fputs (s: "\tcall\t" , stream: asm_out_file); |
6105 | assemble_name_raw (asm_out_file, indirectlabel2); |
6106 | fputc (c: '\n', stream: asm_out_file); |
6107 | |
6108 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
6109 | |
6110 | /* AMD and Intel CPUs prefer each a different instruction as loop filler. |
6111 | Usage of both pause + lfence is compromise solution. */ |
6112 | fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n" ); |
6113 | |
6114 | /* Jump. */ |
6115 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
6116 | assemble_name_raw (asm_out_file, indirectlabel1); |
6117 | fputc (c: '\n', stream: asm_out_file); |
6118 | |
6119 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
6120 | |
6121 | /* The above call insn pushed a word to stack. Adjust CFI info. */ |
6122 | if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) |
6123 | { |
6124 | if (! dwarf2out_do_cfi_asm ()) |
6125 | { |
6126 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
6127 | xcfi->dw_cfi_opc = DW_CFA_advance_loc4; |
6128 | xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); |
6129 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
6130 | } |
6131 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
6132 | xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; |
6133 | xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; |
6134 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
6135 | dwarf2out_emit_cfi (cfi: xcfi); |
6136 | } |
6137 | |
6138 | if (regno != INVALID_REGNUM) |
6139 | { |
6140 | /* MOV. */ |
6141 | rtx xops[2]; |
6142 | xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); |
6143 | xops[1] = gen_rtx_REG (word_mode, regno); |
6144 | output_asm_insn ("mov\t{%1, %0|%0, %1}" , xops); |
6145 | } |
6146 | else |
6147 | { |
6148 | /* LEA. */ |
6149 | rtx xops[2]; |
6150 | xops[0] = stack_pointer_rtx; |
6151 | xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
6152 | output_asm_insn ("lea\t{%E1, %0|%0, %E1}" , xops); |
6153 | } |
6154 | |
6155 | fputs (s: "\tret\n" , stream: asm_out_file); |
6156 | if ((ix86_harden_sls & harden_sls_return)) |
6157 | fputs (s: "\tint3\n" , stream: asm_out_file); |
6158 | } |
6159 | |
6160 | /* Output a funtion with a call and return thunk for indirect branch. |
6161 | If REGNO != INVALID_REGNUM, the function address is in REGNO. |
6162 | Otherwise, the function address is on the top of stack. Thunk is |
6163 | used for function return if RET_P is true. */ |
6164 | |
6165 | static void |
6166 | output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, |
6167 | unsigned int regno, bool ret_p) |
6168 | { |
6169 | char name[32]; |
6170 | tree decl; |
6171 | |
6172 | /* Create __x86_indirect_thunk. */ |
6173 | indirect_thunk_name (name, regno, need_prefix, ret_p); |
6174 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
6175 | get_identifier (name), |
6176 | build_function_type_list (void_type_node, NULL_TREE)); |
6177 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
6178 | NULL_TREE, void_type_node); |
6179 | TREE_PUBLIC (decl) = 1; |
6180 | TREE_STATIC (decl) = 1; |
6181 | DECL_IGNORED_P (decl) = 1; |
6182 | |
6183 | #if TARGET_MACHO |
6184 | if (TARGET_MACHO) |
6185 | { |
6186 | switch_to_section (darwin_sections[picbase_thunk_section]); |
6187 | fputs ("\t.weak_definition\t" , asm_out_file); |
6188 | assemble_name (asm_out_file, name); |
6189 | fputs ("\n\t.private_extern\t" , asm_out_file); |
6190 | assemble_name (asm_out_file, name); |
6191 | putc ('\n', asm_out_file); |
6192 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6193 | DECL_WEAK (decl) = 1; |
6194 | } |
6195 | else |
6196 | #endif |
6197 | if (USE_HIDDEN_LINKONCE) |
6198 | { |
6199 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
6200 | |
6201 | targetm.asm_out.unique_section (decl, 0); |
6202 | switch_to_section (get_named_section (decl, NULL, 0)); |
6203 | |
6204 | targetm.asm_out.globalize_label (asm_out_file, name); |
6205 | fputs (s: "\t.hidden\t" , stream: asm_out_file); |
6206 | assemble_name (asm_out_file, name); |
6207 | putc (c: '\n', stream: asm_out_file); |
6208 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
6209 | } |
6210 | else |
6211 | { |
6212 | switch_to_section (text_section); |
6213 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6214 | } |
6215 | |
6216 | DECL_INITIAL (decl) = make_node (BLOCK); |
6217 | current_function_decl = decl; |
6218 | allocate_struct_function (decl, false); |
6219 | init_function_start (decl); |
6220 | /* We're about to hide the function body from callees of final_* by |
6221 | emitting it directly; tell them we're a thunk, if they care. */ |
6222 | cfun->is_thunk = true; |
6223 | first_function_block_is_cold = false; |
6224 | /* Make sure unwind info is emitted for the thunk if needed. */ |
6225 | final_start_function (emit_barrier (), asm_out_file, 1); |
6226 | |
6227 | output_indirect_thunk (regno); |
6228 | |
6229 | final_end_function (); |
6230 | init_insn_lengths (); |
6231 | free_after_compilation (cfun); |
6232 | set_cfun (NULL); |
6233 | current_function_decl = NULL; |
6234 | } |
6235 | |
6236 | static int pic_labels_used; |
6237 | |
6238 | /* Fills in the label name that should be used for a pc thunk for |
6239 | the given register. */ |
6240 | |
6241 | static void |
6242 | get_pc_thunk_name (char name[32], unsigned int regno) |
6243 | { |
6244 | gcc_assert (!TARGET_64BIT); |
6245 | |
6246 | if (USE_HIDDEN_LINKONCE) |
6247 | sprintf (s: name, format: "__x86.get_pc_thunk.%s" , reg_names[regno]); |
6248 | else |
6249 | ASM_GENERATE_INTERNAL_LABEL (name, "LPR" , regno); |
6250 | } |
6251 | |
6252 | |
6253 | /* This function generates code for -fpic that loads %ebx with |
6254 | the return address of the caller and then returns. */ |
6255 | |
6256 | static void |
6257 | ix86_code_end (void) |
6258 | { |
6259 | rtx xops[2]; |
6260 | unsigned int regno; |
6261 | |
6262 | if (indirect_return_needed) |
6263 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6264 | INVALID_REGNUM, ret_p: true); |
6265 | if (indirect_return_via_cx) |
6266 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6267 | CX_REG, ret_p: true); |
6268 | if (indirect_thunk_needed) |
6269 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6270 | INVALID_REGNUM, ret_p: false); |
6271 | |
6272 | for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) |
6273 | { |
6274 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6275 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6276 | regno, ret_p: false); |
6277 | } |
6278 | |
6279 | for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++) |
6280 | { |
6281 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6282 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6283 | regno, ret_p: false); |
6284 | } |
6285 | |
6286 | for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) |
6287 | { |
6288 | char name[32]; |
6289 | tree decl; |
6290 | |
6291 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6292 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6293 | regno, ret_p: false); |
6294 | |
6295 | if (!(pic_labels_used & (1 << regno))) |
6296 | continue; |
6297 | |
6298 | get_pc_thunk_name (name, regno); |
6299 | |
6300 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
6301 | get_identifier (name), |
6302 | build_function_type_list (void_type_node, NULL_TREE)); |
6303 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
6304 | NULL_TREE, void_type_node); |
6305 | TREE_PUBLIC (decl) = 1; |
6306 | TREE_STATIC (decl) = 1; |
6307 | DECL_IGNORED_P (decl) = 1; |
6308 | |
6309 | #if TARGET_MACHO |
6310 | if (TARGET_MACHO) |
6311 | { |
6312 | switch_to_section (darwin_sections[picbase_thunk_section]); |
6313 | fputs ("\t.weak_definition\t" , asm_out_file); |
6314 | assemble_name (asm_out_file, name); |
6315 | fputs ("\n\t.private_extern\t" , asm_out_file); |
6316 | assemble_name (asm_out_file, name); |
6317 | putc ('\n', asm_out_file); |
6318 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6319 | DECL_WEAK (decl) = 1; |
6320 | } |
6321 | else |
6322 | #endif |
6323 | if (USE_HIDDEN_LINKONCE) |
6324 | { |
6325 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
6326 | |
6327 | targetm.asm_out.unique_section (decl, 0); |
6328 | switch_to_section (get_named_section (decl, NULL, 0)); |
6329 | |
6330 | targetm.asm_out.globalize_label (asm_out_file, name); |
6331 | fputs (s: "\t.hidden\t" , stream: asm_out_file); |
6332 | assemble_name (asm_out_file, name); |
6333 | putc (c: '\n', stream: asm_out_file); |
6334 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
6335 | } |
6336 | else |
6337 | { |
6338 | switch_to_section (text_section); |
6339 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6340 | } |
6341 | |
6342 | DECL_INITIAL (decl) = make_node (BLOCK); |
6343 | current_function_decl = decl; |
6344 | allocate_struct_function (decl, false); |
6345 | init_function_start (decl); |
6346 | /* We're about to hide the function body from callees of final_* by |
6347 | emitting it directly; tell them we're a thunk, if they care. */ |
6348 | cfun->is_thunk = true; |
6349 | first_function_block_is_cold = false; |
6350 | /* Make sure unwind info is emitted for the thunk if needed. */ |
6351 | final_start_function (emit_barrier (), asm_out_file, 1); |
6352 | |
6353 | /* Pad stack IP move with 4 instructions (two NOPs count |
6354 | as one instruction). */ |
6355 | if (TARGET_PAD_SHORT_FUNCTION) |
6356 | { |
6357 | int i = 8; |
6358 | |
6359 | while (i--) |
6360 | fputs (s: "\tnop\n" , stream: asm_out_file); |
6361 | } |
6362 | |
6363 | xops[0] = gen_rtx_REG (Pmode, regno); |
6364 | xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); |
6365 | output_asm_insn ("mov%z0\t{%1, %0|%0, %1}" , xops); |
6366 | fputs (s: "\tret\n" , stream: asm_out_file); |
6367 | final_end_function (); |
6368 | init_insn_lengths (); |
6369 | free_after_compilation (cfun); |
6370 | set_cfun (NULL); |
6371 | current_function_decl = NULL; |
6372 | } |
6373 | |
6374 | if (flag_split_stack) |
6375 | file_end_indicate_split_stack (); |
6376 | } |
6377 | |
6378 | /* Emit code for the SET_GOT patterns. */ |
6379 | |
6380 | const char * |
6381 | output_set_got (rtx dest, rtx label) |
6382 | { |
6383 | rtx xops[3]; |
6384 | |
6385 | xops[0] = dest; |
6386 | |
6387 | if (TARGET_VXWORKS_RTP && flag_pic) |
6388 | { |
6389 | /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ |
6390 | xops[2] = gen_rtx_MEM (Pmode, |
6391 | gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); |
6392 | output_asm_insn ("mov{l}\t{%2, %0|%0, %2}" , xops); |
6393 | |
6394 | /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. |
6395 | Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as |
6396 | an unadorned address. */ |
6397 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); |
6398 | SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; |
6399 | output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}" , xops); |
6400 | return "" ; |
6401 | } |
6402 | |
6403 | xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
6404 | |
6405 | if (flag_pic) |
6406 | { |
6407 | char name[32]; |
6408 | get_pc_thunk_name (name, REGNO (dest)); |
6409 | pic_labels_used |= 1 << REGNO (dest); |
6410 | |
6411 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
6412 | xops[2] = gen_rtx_MEM (QImode, xops[2]); |
6413 | output_asm_insn ("%!call\t%X2" , xops); |
6414 | |
6415 | #if TARGET_MACHO |
6416 | /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. |
6417 | This is what will be referenced by the Mach-O PIC subsystem. */ |
6418 | if (machopic_should_output_picbase_label () || !label) |
6419 | ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); |
6420 | |
6421 | /* When we are restoring the pic base at the site of a nonlocal label, |
6422 | and we decided to emit the pic base above, we will still output a |
6423 | local label used for calculating the correction offset (even though |
6424 | the offset will be 0 in that case). */ |
6425 | if (label) |
6426 | targetm.asm_out.internal_label (asm_out_file, "L" , |
6427 | CODE_LABEL_NUMBER (label)); |
6428 | #endif |
6429 | } |
6430 | else |
6431 | { |
6432 | if (TARGET_MACHO) |
6433 | /* We don't need a pic base, we're not producing pic. */ |
6434 | gcc_unreachable (); |
6435 | |
6436 | xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); |
6437 | output_asm_insn ("mov%z0\t{%2, %0|%0, %2}" , xops); |
6438 | targetm.asm_out.internal_label (asm_out_file, "L" , |
6439 | CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
6440 | } |
6441 | |
6442 | if (!TARGET_MACHO) |
6443 | output_asm_insn ("add%z0\t{%1, %0|%0, %1}" , xops); |
6444 | |
6445 | return "" ; |
6446 | } |
6447 | |
6448 | /* Generate an "push" pattern for input ARG. */ |
6449 | |
6450 | rtx |
6451 | gen_push (rtx arg) |
6452 | { |
6453 | struct machine_function *m = cfun->machine; |
6454 | |
6455 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6456 | m->fs.cfa_offset += UNITS_PER_WORD; |
6457 | m->fs.sp_offset += UNITS_PER_WORD; |
6458 | |
6459 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
6460 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
6461 | |
6462 | return gen_rtx_SET (gen_rtx_MEM (word_mode, |
6463 | gen_rtx_PRE_DEC (Pmode, |
6464 | stack_pointer_rtx)), |
6465 | arg); |
6466 | } |
6467 | |
6468 | /* Generate an "pop" pattern for input ARG. */ |
6469 | |
6470 | rtx |
6471 | gen_pop (rtx arg) |
6472 | { |
6473 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
6474 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
6475 | |
6476 | return gen_rtx_SET (arg, |
6477 | gen_rtx_MEM (word_mode, |
6478 | gen_rtx_POST_INC (Pmode, |
6479 | stack_pointer_rtx))); |
6480 | } |
6481 | |
6482 | /* Generate a "push2" pattern for input ARG. */ |
6483 | rtx |
6484 | gen_push2 (rtx mem, rtx reg1, rtx reg2) |
6485 | { |
6486 | struct machine_function *m = cfun->machine; |
6487 | const int offset = UNITS_PER_WORD * 2; |
6488 | |
6489 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6490 | m->fs.cfa_offset += offset; |
6491 | m->fs.sp_offset += offset; |
6492 | |
6493 | if (REG_P (reg1) && GET_MODE (reg1) != word_mode) |
6494 | reg1 = gen_rtx_REG (word_mode, REGNO (reg1)); |
6495 | |
6496 | if (REG_P (reg2) && GET_MODE (reg2) != word_mode) |
6497 | reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); |
6498 | |
6499 | return gen_push2_di (mem, reg1, reg2); |
6500 | } |
6501 | |
6502 | /* Return >= 0 if there is an unused call-clobbered register available |
6503 | for the entire function. */ |
6504 | |
6505 | static unsigned int |
6506 | ix86_select_alt_pic_regnum (void) |
6507 | { |
6508 | if (ix86_use_pseudo_pic_reg ()) |
6509 | return INVALID_REGNUM; |
6510 | |
6511 | if (crtl->is_leaf |
6512 | && !crtl->profile |
6513 | && !ix86_current_function_calls_tls_descriptor) |
6514 | { |
6515 | int i, drap; |
6516 | /* Can't use the same register for both PIC and DRAP. */ |
6517 | if (crtl->drap_reg) |
6518 | drap = REGNO (crtl->drap_reg); |
6519 | else |
6520 | drap = -1; |
6521 | for (i = 2; i >= 0; --i) |
6522 | if (i != drap && !df_regs_ever_live_p (i)) |
6523 | return i; |
6524 | } |
6525 | |
6526 | return INVALID_REGNUM; |
6527 | } |
6528 | |
6529 | /* Return true if REGNO is used by the epilogue. */ |
6530 | |
6531 | bool |
6532 | ix86_epilogue_uses (int regno) |
6533 | { |
6534 | /* If there are no caller-saved registers, we preserve all registers, |
6535 | except for MMX and x87 registers which aren't supported when saving |
6536 | and restoring registers. Don't explicitly save SP register since |
6537 | it is always preserved. */ |
6538 | return (epilogue_completed |
6539 | && cfun->machine->no_caller_saved_registers |
6540 | && !fixed_regs[regno] |
6541 | && !STACK_REGNO_P (regno) |
6542 | && !MMX_REGNO_P (regno)); |
6543 | } |
6544 | |
6545 | /* Return nonzero if register REGNO can be used as a scratch register |
6546 | in peephole2. */ |
6547 | |
6548 | static bool |
6549 | ix86_hard_regno_scratch_ok (unsigned int regno) |
6550 | { |
6551 | /* If there are no caller-saved registers, we can't use any register |
6552 | as a scratch register after epilogue and use REGNO as scratch |
6553 | register only if it has been used before to avoid saving and |
6554 | restoring it. */ |
6555 | return (!cfun->machine->no_caller_saved_registers |
6556 | || (!epilogue_completed |
6557 | && df_regs_ever_live_p (regno))); |
6558 | } |
6559 | |
6560 | /* Return TRUE if we need to save REGNO. */ |
6561 | |
6562 | bool |
6563 | ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) |
6564 | { |
6565 | /* If there are no caller-saved registers, we preserve all registers, |
6566 | except for MMX and x87 registers which aren't supported when saving |
6567 | and restoring registers. Don't explicitly save SP register since |
6568 | it is always preserved. */ |
6569 | if (cfun->machine->no_caller_saved_registers) |
6570 | { |
6571 | /* Don't preserve registers used for function return value. */ |
6572 | rtx reg = crtl->return_rtx; |
6573 | if (reg) |
6574 | { |
6575 | unsigned int i = REGNO (reg); |
6576 | unsigned int nregs = REG_NREGS (reg); |
6577 | while (nregs-- > 0) |
6578 | if ((i + nregs) == regno) |
6579 | return false; |
6580 | } |
6581 | |
6582 | return (df_regs_ever_live_p (regno) |
6583 | && !fixed_regs[regno] |
6584 | && !STACK_REGNO_P (regno) |
6585 | && !MMX_REGNO_P (regno) |
6586 | && (regno != HARD_FRAME_POINTER_REGNUM |
6587 | || !frame_pointer_needed)); |
6588 | } |
6589 | |
6590 | if (regno == REAL_PIC_OFFSET_TABLE_REGNUM |
6591 | && pic_offset_table_rtx) |
6592 | { |
6593 | if (ix86_use_pseudo_pic_reg ()) |
6594 | { |
6595 | /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to |
6596 | _mcount in prologue. */ |
6597 | if (!TARGET_64BIT && flag_pic && crtl->profile) |
6598 | return true; |
6599 | } |
6600 | else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) |
6601 | || crtl->profile |
6602 | || crtl->calls_eh_return |
6603 | || crtl->uses_const_pool |
6604 | || cfun->has_nonlocal_label) |
6605 | return ix86_select_alt_pic_regnum () == INVALID_REGNUM; |
6606 | } |
6607 | |
6608 | if (crtl->calls_eh_return && maybe_eh_return) |
6609 | { |
6610 | unsigned i; |
6611 | for (i = 0; ; i++) |
6612 | { |
6613 | unsigned test = EH_RETURN_DATA_REGNO (i); |
6614 | if (test == INVALID_REGNUM) |
6615 | break; |
6616 | if (test == regno) |
6617 | return true; |
6618 | } |
6619 | } |
6620 | |
6621 | if (ignore_outlined && cfun->machine->call_ms2sysv) |
6622 | { |
6623 | unsigned count = cfun->machine->call_ms2sysv_extra_regs |
6624 | + xlogue_layout::MIN_REGS; |
6625 | if (xlogue_layout::is_stub_managed_reg (regno, count)) |
6626 | return false; |
6627 | } |
6628 | |
6629 | if (crtl->drap_reg |
6630 | && regno == REGNO (crtl->drap_reg) |
6631 | && !cfun->machine->no_drap_save_restore) |
6632 | return true; |
6633 | |
6634 | return (df_regs_ever_live_p (regno) |
6635 | && !call_used_or_fixed_reg_p (regno) |
6636 | && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); |
6637 | } |
6638 | |
6639 | /* Return number of saved general prupose registers. */ |
6640 | |
6641 | static int |
6642 | ix86_nsaved_regs (void) |
6643 | { |
6644 | int nregs = 0; |
6645 | int regno; |
6646 | |
6647 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
6648 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
6649 | nregs ++; |
6650 | return nregs; |
6651 | } |
6652 | |
6653 | /* Return number of saved SSE registers. */ |
6654 | |
6655 | static int |
6656 | ix86_nsaved_sseregs (void) |
6657 | { |
6658 | int nregs = 0; |
6659 | int regno; |
6660 | |
6661 | if (!TARGET_64BIT_MS_ABI) |
6662 | return 0; |
6663 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
6664 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
6665 | nregs ++; |
6666 | return nregs; |
6667 | } |
6668 | |
6669 | /* Given FROM and TO register numbers, say whether this elimination is |
6670 | allowed. If stack alignment is needed, we can only replace argument |
6671 | pointer with hard frame pointer, or replace frame pointer with stack |
6672 | pointer. Otherwise, frame pointer elimination is automatically |
6673 | handled and all other eliminations are valid. */ |
6674 | |
6675 | static bool |
6676 | ix86_can_eliminate (const int from, const int to) |
6677 | { |
6678 | if (stack_realign_fp) |
6679 | return ((from == ARG_POINTER_REGNUM |
6680 | && to == HARD_FRAME_POINTER_REGNUM) |
6681 | || (from == FRAME_POINTER_REGNUM |
6682 | && to == STACK_POINTER_REGNUM)); |
6683 | else |
6684 | return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; |
6685 | } |
6686 | |
6687 | /* Return the offset between two registers, one to be eliminated, and the other |
6688 | its replacement, at the start of a routine. */ |
6689 | |
6690 | HOST_WIDE_INT |
6691 | ix86_initial_elimination_offset (int from, int to) |
6692 | { |
6693 | struct ix86_frame &frame = cfun->machine->frame; |
6694 | |
6695 | if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
6696 | return frame.hard_frame_pointer_offset; |
6697 | else if (from == FRAME_POINTER_REGNUM |
6698 | && to == HARD_FRAME_POINTER_REGNUM) |
6699 | return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; |
6700 | else |
6701 | { |
6702 | gcc_assert (to == STACK_POINTER_REGNUM); |
6703 | |
6704 | if (from == ARG_POINTER_REGNUM) |
6705 | return frame.stack_pointer_offset; |
6706 | |
6707 | gcc_assert (from == FRAME_POINTER_REGNUM); |
6708 | return frame.stack_pointer_offset - frame.frame_pointer_offset; |
6709 | } |
6710 | } |
6711 | |
6712 | /* Emits a warning for unsupported msabi to sysv pro/epilogues. */ |
6713 | void |
6714 | warn_once_call_ms2sysv_xlogues (const char *feature) |
6715 | { |
6716 | static bool warned_once = false; |
6717 | if (!warned_once) |
6718 | { |
6719 | warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s" , |
6720 | feature); |
6721 | warned_once = true; |
6722 | } |
6723 | } |
6724 | |
6725 | /* Return the probing interval for -fstack-clash-protection. */ |
6726 | |
6727 | static HOST_WIDE_INT |
6728 | get_probe_interval (void) |
6729 | { |
6730 | if (flag_stack_clash_protection) |
6731 | return (HOST_WIDE_INT_1U |
6732 | << param_stack_clash_protection_probe_interval); |
6733 | else |
6734 | return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); |
6735 | } |
6736 | |
6737 | /* When using -fsplit-stack, the allocation routines set a field in |
6738 | the TCB to the bottom of the stack plus this much space, measured |
6739 | in bytes. */ |
6740 | |
6741 | #define SPLIT_STACK_AVAILABLE 256 |
6742 | |
6743 | /* Helper function to determine whether push2/pop2 can be used in prologue or |
6744 | epilogue for register save/restore. */ |
6745 | static bool |
6746 | ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) |
6747 | { |
6748 | int aligned = cfun->machine->fs.sp_offset % 16 == 0; |
6749 | return TARGET_APX_PUSH2POP2 |
6750 | && !cfun->machine->frame.save_regs_using_mov |
6751 | && cfun->machine->func_type == TYPE_NORMAL |
6752 | && (nregs + aligned) >= 3; |
6753 | } |
6754 | |
6755 | /* Fill structure ix86_frame about frame of currently computed function. */ |
6756 | |
6757 | static void |
6758 | ix86_compute_frame_layout (void) |
6759 | { |
6760 | struct ix86_frame *frame = &cfun->machine->frame; |
6761 | struct machine_function *m = cfun->machine; |
6762 | unsigned HOST_WIDE_INT stack_alignment_needed; |
6763 | HOST_WIDE_INT offset; |
6764 | unsigned HOST_WIDE_INT preferred_alignment; |
6765 | HOST_WIDE_INT size = ix86_get_frame_size (); |
6766 | HOST_WIDE_INT to_allocate; |
6767 | |
6768 | /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit |
6769 | * ms_abi functions that call a sysv function. We now need to prune away |
6770 | * cases where it should be disabled. */ |
6771 | if (TARGET_64BIT && m->call_ms2sysv) |
6772 | { |
6773 | gcc_assert (TARGET_64BIT_MS_ABI); |
6774 | gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); |
6775 | gcc_assert (!TARGET_SEH); |
6776 | gcc_assert (TARGET_SSE); |
6777 | gcc_assert (!ix86_using_red_zone ()); |
6778 | |
6779 | if (crtl->calls_eh_return) |
6780 | { |
6781 | gcc_assert (!reload_completed); |
6782 | m->call_ms2sysv = false; |
6783 | warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return" ); |
6784 | } |
6785 | |
6786 | else if (ix86_static_chain_on_stack) |
6787 | { |
6788 | gcc_assert (!reload_completed); |
6789 | m->call_ms2sysv = false; |
6790 | warn_once_call_ms2sysv_xlogues (feature: "static call chains" ); |
6791 | } |
6792 | |
6793 | /* Finally, compute which registers the stub will manage. */ |
6794 | else |
6795 | { |
6796 | unsigned count = xlogue_layout::count_stub_managed_regs (); |
6797 | m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; |
6798 | m->call_ms2sysv_pad_in = 0; |
6799 | } |
6800 | } |
6801 | |
6802 | frame->nregs = ix86_nsaved_regs (); |
6803 | frame->nsseregs = ix86_nsaved_sseregs (); |
6804 | |
6805 | /* 64-bit MS ABI seem to require stack alignment to be always 16, |
6806 | except for function prologues, leaf functions and when the defult |
6807 | incoming stack boundary is overriden at command line or via |
6808 | force_align_arg_pointer attribute. |
6809 | |
6810 | Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants |
6811 | at call sites, including profile function calls. |
6812 | |
6813 | For APX push2/pop2, the stack also requires 128b alignment. */ |
6814 | if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs) |
6815 | && crtl->preferred_stack_boundary < 128) |
6816 | || (((TARGET_64BIT_MS_ABI || TARGET_MACHO) |
6817 | && crtl->preferred_stack_boundary < 128) |
6818 | && (!crtl->is_leaf || cfun->calls_alloca != 0 |
6819 | || ix86_current_function_calls_tls_descriptor |
6820 | || (TARGET_MACHO && crtl->profile) |
6821 | || ix86_incoming_stack_boundary < 128))) |
6822 | { |
6823 | crtl->preferred_stack_boundary = 128; |
6824 | if (crtl->stack_alignment_needed < 128) |
6825 | crtl->stack_alignment_needed = 128; |
6826 | } |
6827 | |
6828 | stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; |
6829 | preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; |
6830 | |
6831 | gcc_assert (!size || stack_alignment_needed); |
6832 | gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); |
6833 | gcc_assert (preferred_alignment <= stack_alignment_needed); |
6834 | |
6835 | /* The only ABI saving SSE regs should be 64-bit ms_abi. */ |
6836 | gcc_assert (TARGET_64BIT || !frame->nsseregs); |
6837 | if (TARGET_64BIT && m->call_ms2sysv) |
6838 | { |
6839 | gcc_assert (stack_alignment_needed >= 16); |
6840 | gcc_assert (!frame->nsseregs); |
6841 | } |
6842 | |
6843 | /* For SEH we have to limit the amount of code movement into the prologue. |
6844 | At present we do this via a BLOCKAGE, at which point there's very little |
6845 | scheduling that can be done, which means that there's very little point |
6846 | in doing anything except PUSHs. */ |
6847 | if (TARGET_SEH) |
6848 | m->use_fast_prologue_epilogue = false; |
6849 | else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) |
6850 | { |
6851 | int count = frame->nregs; |
6852 | struct cgraph_node *node = cgraph_node::get (decl: current_function_decl); |
6853 | |
6854 | /* The fast prologue uses move instead of push to save registers. This |
6855 | is significantly longer, but also executes faster as modern hardware |
6856 | can execute the moves in parallel, but can't do that for push/pop. |
6857 | |
6858 | Be careful about choosing what prologue to emit: When function takes |
6859 | many instructions to execute we may use slow version as well as in |
6860 | case function is known to be outside hot spot (this is known with |
6861 | feedback only). Weight the size of function by number of registers |
6862 | to save as it is cheap to use one or two push instructions but very |
6863 | slow to use many of them. |
6864 | |
6865 | Calling this hook multiple times with the same frame requirements |
6866 | must produce the same layout, since the RA might otherwise be |
6867 | unable to reach a fixed point or might fail its final sanity checks. |
6868 | This means that once we've assumed that a function does or doesn't |
6869 | have a particular size, we have to stick to that assumption |
6870 | regardless of how the function has changed since. */ |
6871 | if (count) |
6872 | count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; |
6873 | if (node->frequency < NODE_FREQUENCY_NORMAL |
6874 | || (flag_branch_probabilities |
6875 | && node->frequency < NODE_FREQUENCY_HOT)) |
6876 | m->use_fast_prologue_epilogue = false; |
6877 | else |
6878 | { |
6879 | if (count != frame->expensive_count) |
6880 | { |
6881 | frame->expensive_count = count; |
6882 | frame->expensive_p = expensive_function_p (count); |
6883 | } |
6884 | m->use_fast_prologue_epilogue = !frame->expensive_p; |
6885 | } |
6886 | } |
6887 | |
6888 | frame->save_regs_using_mov |
6889 | = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue; |
6890 | |
6891 | /* Skip return address and error code in exception handler. */ |
6892 | offset = INCOMING_FRAME_SP_OFFSET; |
6893 | |
6894 | /* Skip pushed static chain. */ |
6895 | if (ix86_static_chain_on_stack) |
6896 | offset += UNITS_PER_WORD; |
6897 | |
6898 | /* Skip saved base pointer. */ |
6899 | if (frame_pointer_needed) |
6900 | offset += UNITS_PER_WORD; |
6901 | frame->hfp_save_offset = offset; |
6902 | |
6903 | /* The traditional frame pointer location is at the top of the frame. */ |
6904 | frame->hard_frame_pointer_offset = offset; |
6905 | |
6906 | /* Register save area */ |
6907 | offset += frame->nregs * UNITS_PER_WORD; |
6908 | frame->reg_save_offset = offset; |
6909 | |
6910 | /* Calculate the size of the va-arg area (not including padding, if any). */ |
6911 | frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; |
6912 | |
6913 | /* Also adjust stack_realign_offset for the largest alignment of |
6914 | stack slot actually used. */ |
6915 | if (stack_realign_fp |
6916 | || (cfun->machine->max_used_stack_alignment != 0 |
6917 | && (offset % cfun->machine->max_used_stack_alignment) != 0)) |
6918 | { |
6919 | /* We may need a 16-byte aligned stack for the remainder of the |
6920 | register save area, but the stack frame for the local function |
6921 | may require a greater alignment if using AVX/2/512. In order |
6922 | to avoid wasting space, we first calculate the space needed for |
6923 | the rest of the register saves, add that to the stack pointer, |
6924 | and then realign the stack to the boundary of the start of the |
6925 | frame for the local function. */ |
6926 | HOST_WIDE_INT space_needed = 0; |
6927 | HOST_WIDE_INT sse_reg_space_needed = 0; |
6928 | |
6929 | if (TARGET_64BIT) |
6930 | { |
6931 | if (m->call_ms2sysv) |
6932 | { |
6933 | m->call_ms2sysv_pad_in = 0; |
6934 | space_needed = xlogue_layout::get_instance ().get_stack_space_used (); |
6935 | } |
6936 | |
6937 | else if (frame->nsseregs) |
6938 | /* The only ABI that has saved SSE registers (Win64) also has a |
6939 | 16-byte aligned default stack. However, many programs violate |
6940 | the ABI, and Wine64 forces stack realignment to compensate. */ |
6941 | space_needed = frame->nsseregs * 16; |
6942 | |
6943 | sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); |
6944 | |
6945 | /* 64-bit frame->va_arg_size should always be a multiple of 16, but |
6946 | rounding to be pedantic. */ |
6947 | space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); |
6948 | } |
6949 | else |
6950 | space_needed = frame->va_arg_size; |
6951 | |
6952 | /* Record the allocation size required prior to the realignment AND. */ |
6953 | frame->stack_realign_allocate = space_needed; |
6954 | |
6955 | /* The re-aligned stack starts at frame->stack_realign_offset. Values |
6956 | before this point are not directly comparable with values below |
6957 | this point. Use sp_valid_at to determine if the stack pointer is |
6958 | valid for a given offset, fp_valid_at for the frame pointer, or |
6959 | choose_baseaddr to have a base register chosen for you. |
6960 | |
6961 | Note that the result of (frame->stack_realign_offset |
6962 | & (stack_alignment_needed - 1)) may not equal zero. */ |
6963 | offset = ROUND_UP (offset + space_needed, stack_alignment_needed); |
6964 | frame->stack_realign_offset = offset - space_needed; |
6965 | frame->sse_reg_save_offset = frame->stack_realign_offset |
6966 | + sse_reg_space_needed; |
6967 | } |
6968 | else |
6969 | { |
6970 | frame->stack_realign_offset = offset; |
6971 | |
6972 | if (TARGET_64BIT && m->call_ms2sysv) |
6973 | { |
6974 | m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); |
6975 | offset += xlogue_layout::get_instance ().get_stack_space_used (); |
6976 | } |
6977 | |
6978 | /* Align and set SSE register save area. */ |
6979 | else if (frame->nsseregs) |
6980 | { |
6981 | /* If the incoming stack boundary is at least 16 bytes, or DRAP is |
6982 | required and the DRAP re-alignment boundary is at least 16 bytes, |
6983 | then we want the SSE register save area properly aligned. */ |
6984 | if (ix86_incoming_stack_boundary >= 128 |
6985 | || (stack_realign_drap && stack_alignment_needed >= 16)) |
6986 | offset = ROUND_UP (offset, 16); |
6987 | offset += frame->nsseregs * 16; |
6988 | } |
6989 | frame->sse_reg_save_offset = offset; |
6990 | offset += frame->va_arg_size; |
6991 | } |
6992 | |
6993 | /* Align start of frame for local function. When a function call |
6994 | is removed, it may become a leaf function. But if argument may |
6995 | be passed on stack, we need to align the stack when there is no |
6996 | tail call. */ |
6997 | if (m->call_ms2sysv |
6998 | || frame->va_arg_size != 0 |
6999 | || size != 0 |
7000 | || !crtl->is_leaf |
7001 | || (!crtl->tail_call_emit |
7002 | && cfun->machine->outgoing_args_on_stack) |
7003 | || cfun->calls_alloca |
7004 | || ix86_current_function_calls_tls_descriptor) |
7005 | offset = ROUND_UP (offset, stack_alignment_needed); |
7006 | |
7007 | /* Frame pointer points here. */ |
7008 | frame->frame_pointer_offset = offset; |
7009 | |
7010 | offset += size; |
7011 | |
7012 | /* Add outgoing arguments area. Can be skipped if we eliminated |
7013 | all the function calls as dead code. |
7014 | Skipping is however impossible when function calls alloca. Alloca |
7015 | expander assumes that last crtl->outgoing_args_size |
7016 | of stack frame are unused. */ |
7017 | if (ACCUMULATE_OUTGOING_ARGS |
7018 | && (!crtl->is_leaf || cfun->calls_alloca |
7019 | || ix86_current_function_calls_tls_descriptor)) |
7020 | { |
7021 | offset += crtl->outgoing_args_size; |
7022 | frame->outgoing_arguments_size = crtl->outgoing_args_size; |
7023 | } |
7024 | else |
7025 | frame->outgoing_arguments_size = 0; |
7026 | |
7027 | /* Align stack boundary. Only needed if we're calling another function |
7028 | or using alloca. */ |
7029 | if (!crtl->is_leaf || cfun->calls_alloca |
7030 | || ix86_current_function_calls_tls_descriptor) |
7031 | offset = ROUND_UP (offset, preferred_alignment); |
7032 | |
7033 | /* We've reached end of stack frame. */ |
7034 | frame->stack_pointer_offset = offset; |
7035 | |
7036 | /* Size prologue needs to allocate. */ |
7037 | to_allocate = offset - frame->sse_reg_save_offset; |
7038 | |
7039 | if ((!to_allocate && frame->nregs <= 1) |
7040 | || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) |
7041 | /* If static stack checking is enabled and done with probes, |
7042 | the registers need to be saved before allocating the frame. */ |
7043 | || flag_stack_check == STATIC_BUILTIN_STACK_CHECK |
7044 | /* If stack clash probing needs a loop, then it needs a |
7045 | scratch register. But the returned register is only guaranteed |
7046 | to be safe to use after register saves are complete. So if |
7047 | stack clash protections are enabled and the allocated frame is |
7048 | larger than the probe interval, then use pushes to save |
7049 | callee saved registers. */ |
7050 | || (flag_stack_clash_protection |
7051 | && !ix86_target_stack_probe () |
7052 | && to_allocate > get_probe_interval ())) |
7053 | frame->save_regs_using_mov = false; |
7054 | |
7055 | if (ix86_using_red_zone () |
7056 | && crtl->sp_is_unchanging |
7057 | && crtl->is_leaf |
7058 | && !ix86_pc_thunk_call_expanded |
7059 | && !ix86_current_function_calls_tls_descriptor) |
7060 | { |
7061 | frame->red_zone_size = to_allocate; |
7062 | if (frame->save_regs_using_mov) |
7063 | frame->red_zone_size += frame->nregs * UNITS_PER_WORD; |
7064 | if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) |
7065 | frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; |
7066 | } |
7067 | else |
7068 | frame->red_zone_size = 0; |
7069 | frame->stack_pointer_offset -= frame->red_zone_size; |
7070 | |
7071 | /* The SEH frame pointer location is near the bottom of the frame. |
7072 | This is enforced by the fact that the difference between the |
7073 | stack pointer and the frame pointer is limited to 240 bytes in |
7074 | the unwind data structure. */ |
7075 | if (TARGET_SEH) |
7076 | { |
7077 | /* Force the frame pointer to point at or below the lowest register save |
7078 | area, see the SEH code in config/i386/winnt.cc for the rationale. */ |
7079 | frame->hard_frame_pointer_offset = frame->sse_reg_save_offset; |
7080 | |
7081 | /* If we can leave the frame pointer where it is, do so; however return |
7082 | the establisher frame for __builtin_frame_address (0) or else if the |
7083 | frame overflows the SEH maximum frame size. |
7084 | |
7085 | Note that the value returned by __builtin_frame_address (0) is quite |
7086 | constrained, because setjmp is piggybacked on the SEH machinery with |
7087 | recent versions of MinGW: |
7088 | |
7089 | # elif defined(__SEH__) |
7090 | # if defined(__aarch64__) || defined(_ARM64_) |
7091 | # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry()) |
7092 | # elif (__MINGW_GCC_VERSION < 40702) |
7093 | # define setjmp(BUF) _setjmp((BUF), mingw_getsp()) |
7094 | # else |
7095 | # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0)) |
7096 | # endif |
7097 | |
7098 | and the second argument passed to _setjmp, if not null, is forwarded |
7099 | to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has |
7100 | built an ExceptionRecord on the fly describing the setjmp buffer). */ |
7101 | const HOST_WIDE_INT diff |
7102 | = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; |
7103 | if (diff <= 255 && !crtl->accesses_prior_frames) |
7104 | { |
7105 | /* The resulting diff will be a multiple of 16 lower than 255, |
7106 | i.e. at most 240 as required by the unwind data structure. */ |
7107 | frame->hard_frame_pointer_offset += (diff & 15); |
7108 | } |
7109 | else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames) |
7110 | { |
7111 | /* Ideally we'd determine what portion of the local stack frame |
7112 | (within the constraint of the lowest 240) is most heavily used. |
7113 | But without that complication, simply bias the frame pointer |
7114 | by 128 bytes so as to maximize the amount of the local stack |
7115 | frame that is addressable with 8-bit offsets. */ |
7116 | frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; |
7117 | } |
7118 | else |
7119 | frame->hard_frame_pointer_offset = frame->hfp_save_offset; |
7120 | } |
7121 | } |
7122 | |
7123 | /* This is semi-inlined memory_address_length, but simplified |
7124 | since we know that we're always dealing with reg+offset, and |
7125 | to avoid having to create and discard all that rtl. */ |
7126 | |
7127 | static inline int |
7128 | choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) |
7129 | { |
7130 | int len = 4; |
7131 | |
7132 | if (offset == 0) |
7133 | { |
7134 | /* EBP and R13 cannot be encoded without an offset. */ |
7135 | len = (regno == BP_REG || regno == R13_REG); |
7136 | } |
7137 | else if (IN_RANGE (offset, -128, 127)) |
7138 | len = 1; |
7139 | |
7140 | /* ESP and R12 must be encoded with a SIB byte. */ |
7141 | if (regno == SP_REG || regno == R12_REG) |
7142 | len++; |
7143 | |
7144 | return len; |
7145 | } |
7146 | |
7147 | /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in |
7148 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
7149 | |
7150 | static bool |
7151 | sp_valid_at (HOST_WIDE_INT cfa_offset) |
7152 | { |
7153 | const struct machine_frame_state &fs = cfun->machine->fs; |
7154 | if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) |
7155 | { |
7156 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
7157 | gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); |
7158 | return false; |
7159 | } |
7160 | return fs.sp_valid; |
7161 | } |
7162 | |
7163 | /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in |
7164 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
7165 | |
7166 | static inline bool |
7167 | fp_valid_at (HOST_WIDE_INT cfa_offset) |
7168 | { |
7169 | const struct machine_frame_state &fs = cfun->machine->fs; |
7170 | if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) |
7171 | { |
7172 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
7173 | gcc_assert (cfa_offset >= fs.sp_realigned_offset); |
7174 | return false; |
7175 | } |
7176 | return fs.fp_valid; |
7177 | } |
7178 | |
7179 | /* Choose a base register based upon alignment requested, speed and/or |
7180 | size. */ |
7181 | |
7182 | static void |
7183 | choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, |
7184 | HOST_WIDE_INT &base_offset, |
7185 | unsigned int align_reqested, unsigned int *align) |
7186 | { |
7187 | const struct machine_function *m = cfun->machine; |
7188 | unsigned int hfp_align; |
7189 | unsigned int drap_align; |
7190 | unsigned int sp_align; |
7191 | bool hfp_ok = fp_valid_at (cfa_offset); |
7192 | bool drap_ok = m->fs.drap_valid; |
7193 | bool sp_ok = sp_valid_at (cfa_offset); |
7194 | |
7195 | hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; |
7196 | |
7197 | /* Filter out any registers that don't meet the requested alignment |
7198 | criteria. */ |
7199 | if (align_reqested) |
7200 | { |
7201 | if (m->fs.realigned) |
7202 | hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; |
7203 | /* SEH unwind code does do not currently support REG_CFA_EXPRESSION |
7204 | notes (which we would need to use a realigned stack pointer), |
7205 | so disable on SEH targets. */ |
7206 | else if (m->fs.sp_realigned) |
7207 | sp_align = crtl->stack_alignment_needed; |
7208 | |
7209 | hfp_ok = hfp_ok && hfp_align >= align_reqested; |
7210 | drap_ok = drap_ok && drap_align >= align_reqested; |
7211 | sp_ok = sp_ok && sp_align >= align_reqested; |
7212 | } |
7213 | |
7214 | if (m->use_fast_prologue_epilogue) |
7215 | { |
7216 | /* Choose the base register most likely to allow the most scheduling |
7217 | opportunities. Generally FP is valid throughout the function, |
7218 | while DRAP must be reloaded within the epilogue. But choose either |
7219 | over the SP due to increased encoding size. */ |
7220 | |
7221 | if (hfp_ok) |
7222 | { |
7223 | base_reg = hard_frame_pointer_rtx; |
7224 | base_offset = m->fs.fp_offset - cfa_offset; |
7225 | } |
7226 | else if (drap_ok) |
7227 | { |
7228 | base_reg = crtl->drap_reg; |
7229 | base_offset = 0 - cfa_offset; |
7230 | } |
7231 | else if (sp_ok) |
7232 | { |
7233 | base_reg = stack_pointer_rtx; |
7234 | base_offset = m->fs.sp_offset - cfa_offset; |
7235 | } |
7236 | } |
7237 | else |
7238 | { |
7239 | HOST_WIDE_INT toffset; |
7240 | int len = 16, tlen; |
7241 | |
7242 | /* Choose the base register with the smallest address encoding. |
7243 | With a tie, choose FP > DRAP > SP. */ |
7244 | if (sp_ok) |
7245 | { |
7246 | base_reg = stack_pointer_rtx; |
7247 | base_offset = m->fs.sp_offset - cfa_offset; |
7248 | len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset); |
7249 | } |
7250 | if (drap_ok) |
7251 | { |
7252 | toffset = 0 - cfa_offset; |
7253 | tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset); |
7254 | if (tlen <= len) |
7255 | { |
7256 | base_reg = crtl->drap_reg; |
7257 | base_offset = toffset; |
7258 | len = tlen; |
7259 | } |
7260 | } |
7261 | if (hfp_ok) |
7262 | { |
7263 | toffset = m->fs.fp_offset - cfa_offset; |
7264 | tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset); |
7265 | if (tlen <= len) |
7266 | { |
7267 | base_reg = hard_frame_pointer_rtx; |
7268 | base_offset = toffset; |
7269 | } |
7270 | } |
7271 | } |
7272 | |
7273 | /* Set the align return value. */ |
7274 | if (align) |
7275 | { |
7276 | if (base_reg == stack_pointer_rtx) |
7277 | *align = sp_align; |
7278 | else if (base_reg == crtl->drap_reg) |
7279 | *align = drap_align; |
7280 | else if (base_reg == hard_frame_pointer_rtx) |
7281 | *align = hfp_align; |
7282 | } |
7283 | } |
7284 | |
7285 | /* Return an RTX that points to CFA_OFFSET within the stack frame and |
7286 | the alignment of address. If ALIGN is non-null, it should point to |
7287 | an alignment value (in bits) that is preferred or zero and will |
7288 | recieve the alignment of the base register that was selected, |
7289 | irrespective of rather or not CFA_OFFSET is a multiple of that |
7290 | alignment value. If it is possible for the base register offset to be |
7291 | non-immediate then SCRATCH_REGNO should specify a scratch register to |
7292 | use. |
7293 | |
7294 | The valid base registers are taken from CFUN->MACHINE->FS. */ |
7295 | |
7296 | static rtx |
7297 | choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, |
7298 | unsigned int scratch_regno = INVALID_REGNUM) |
7299 | { |
7300 | rtx base_reg = NULL; |
7301 | HOST_WIDE_INT base_offset = 0; |
7302 | |
7303 | /* If a specific alignment is requested, try to get a base register |
7304 | with that alignment first. */ |
7305 | if (align && *align) |
7306 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align); |
7307 | |
7308 | if (!base_reg) |
7309 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align); |
7310 | |
7311 | gcc_assert (base_reg != NULL); |
7312 | |
7313 | rtx base_offset_rtx = GEN_INT (base_offset); |
7314 | |
7315 | if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) |
7316 | { |
7317 | gcc_assert (scratch_regno != INVALID_REGNUM); |
7318 | |
7319 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
7320 | emit_move_insn (scratch_reg, base_offset_rtx); |
7321 | |
7322 | return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); |
7323 | } |
7324 | |
7325 | return plus_constant (Pmode, base_reg, base_offset); |
7326 | } |
7327 | |
7328 | /* Emit code to save registers in the prologue. */ |
7329 | |
7330 | static void |
7331 | ix86_emit_save_regs (void) |
7332 | { |
7333 | int regno; |
7334 | rtx_insn *insn; |
7335 | |
7336 | if (!TARGET_APX_PUSH2POP2 || cfun->machine->func_type != TYPE_NORMAL) |
7337 | { |
7338 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
7339 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7340 | { |
7341 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno))); |
7342 | RTX_FRAME_RELATED_P (insn) = 1; |
7343 | } |
7344 | } |
7345 | else |
7346 | { |
7347 | int regno_list[2]; |
7348 | regno_list[0] = regno_list[1] = -1; |
7349 | int loaded_regnum = 0; |
7350 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
7351 | |
7352 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
7353 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7354 | { |
7355 | if (aligned) |
7356 | { |
7357 | regno_list[loaded_regnum++] = regno; |
7358 | if (loaded_regnum == 2) |
7359 | { |
7360 | gcc_assert (regno_list[0] != -1 |
7361 | && regno_list[1] != -1 |
7362 | && regno_list[0] != regno_list[1]); |
7363 | const int offset = UNITS_PER_WORD * 2; |
7364 | rtx mem = gen_rtx_MEM (TImode, |
7365 | gen_rtx_PRE_DEC (Pmode, |
7366 | stack_pointer_rtx)); |
7367 | insn = emit_insn (gen_push2 (mem, |
7368 | reg1: gen_rtx_REG (word_mode, |
7369 | regno_list[0]), |
7370 | reg2: gen_rtx_REG (word_mode, |
7371 | regno_list[1]))); |
7372 | RTX_FRAME_RELATED_P (insn) = 1; |
7373 | rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); |
7374 | |
7375 | for (int i = 0; i < 2; i++) |
7376 | { |
7377 | rtx dwarf_reg = gen_rtx_REG (word_mode, |
7378 | regno_list[i]); |
7379 | rtx sp_offset = plus_constant (Pmode, |
7380 | stack_pointer_rtx, |
7381 | + UNITS_PER_WORD |
7382 | * (1 - i)); |
7383 | rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, |
7384 | sp_offset), |
7385 | dwarf_reg); |
7386 | RTX_FRAME_RELATED_P (tmp) = 1; |
7387 | XVECEXP (dwarf, 0, i + 1) = tmp; |
7388 | } |
7389 | rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, |
7390 | plus_constant (Pmode, |
7391 | stack_pointer_rtx, |
7392 | -offset)); |
7393 | RTX_FRAME_RELATED_P (sp_tmp) = 1; |
7394 | XVECEXP (dwarf, 0, 0) = sp_tmp; |
7395 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
7396 | |
7397 | loaded_regnum = 0; |
7398 | regno_list[0] = regno_list[1] = -1; |
7399 | } |
7400 | } |
7401 | else |
7402 | { |
7403 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno))); |
7404 | RTX_FRAME_RELATED_P (insn) = 1; |
7405 | aligned = true; |
7406 | } |
7407 | } |
7408 | if (loaded_regnum == 1) |
7409 | { |
7410 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, |
7411 | regno_list[0]))); |
7412 | RTX_FRAME_RELATED_P (insn) = 1; |
7413 | } |
7414 | } |
7415 | } |
7416 | |
7417 | /* Emit a single register save at CFA - CFA_OFFSET. */ |
7418 | |
7419 | static void |
7420 | ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, |
7421 | HOST_WIDE_INT cfa_offset) |
7422 | { |
7423 | struct machine_function *m = cfun->machine; |
7424 | rtx reg = gen_rtx_REG (mode, regno); |
7425 | rtx mem, addr, base, insn; |
7426 | unsigned int align = GET_MODE_ALIGNMENT (mode); |
7427 | |
7428 | addr = choose_baseaddr (cfa_offset, align: &align); |
7429 | mem = gen_frame_mem (mode, addr); |
7430 | |
7431 | /* The location aligment depends upon the base register. */ |
7432 | align = MIN (GET_MODE_ALIGNMENT (mode), align); |
7433 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
7434 | set_mem_align (mem, align); |
7435 | |
7436 | insn = emit_insn (gen_rtx_SET (mem, reg)); |
7437 | RTX_FRAME_RELATED_P (insn) = 1; |
7438 | |
7439 | base = addr; |
7440 | if (GET_CODE (base) == PLUS) |
7441 | base = XEXP (base, 0); |
7442 | gcc_checking_assert (REG_P (base)); |
7443 | |
7444 | /* When saving registers into a re-aligned local stack frame, avoid |
7445 | any tricky guessing by dwarf2out. */ |
7446 | if (m->fs.realigned) |
7447 | { |
7448 | gcc_checking_assert (stack_realign_drap); |
7449 | |
7450 | if (regno == REGNO (crtl->drap_reg)) |
7451 | { |
7452 | /* A bit of a hack. We force the DRAP register to be saved in |
7453 | the re-aligned stack frame, which provides us with a copy |
7454 | of the CFA that will last past the prologue. Install it. */ |
7455 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
7456 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
7457 | cfun->machine->fs.fp_offset - cfa_offset); |
7458 | mem = gen_rtx_MEM (mode, addr); |
7459 | add_reg_note (insn, REG_CFA_DEF_CFA, mem); |
7460 | } |
7461 | else |
7462 | { |
7463 | /* The frame pointer is a stable reference within the |
7464 | aligned frame. Use it. */ |
7465 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
7466 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
7467 | cfun->machine->fs.fp_offset - cfa_offset); |
7468 | mem = gen_rtx_MEM (mode, addr); |
7469 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
7470 | } |
7471 | } |
7472 | |
7473 | else if (base == stack_pointer_rtx && m->fs.sp_realigned |
7474 | && cfa_offset >= m->fs.sp_realigned_offset) |
7475 | { |
7476 | gcc_checking_assert (stack_realign_fp); |
7477 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
7478 | } |
7479 | |
7480 | /* The memory may not be relative to the current CFA register, |
7481 | which means that we may need to generate a new pattern for |
7482 | use by the unwind info. */ |
7483 | else if (base != m->fs.cfa_reg) |
7484 | { |
7485 | addr = plus_constant (Pmode, m->fs.cfa_reg, |
7486 | m->fs.cfa_offset - cfa_offset); |
7487 | mem = gen_rtx_MEM (mode, addr); |
7488 | add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); |
7489 | } |
7490 | } |
7491 | |
7492 | /* Emit code to save registers using MOV insns. |
7493 | First register is stored at CFA - CFA_OFFSET. */ |
7494 | static void |
7495 | ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) |
7496 | { |
7497 | unsigned int regno; |
7498 | |
7499 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
7500 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7501 | { |
7502 | ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset); |
7503 | cfa_offset -= UNITS_PER_WORD; |
7504 | } |
7505 | } |
7506 | |
7507 | /* Emit code to save SSE registers using MOV insns. |
7508 | First register is stored at CFA - CFA_OFFSET. */ |
7509 | static void |
7510 | ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) |
7511 | { |
7512 | unsigned int regno; |
7513 | |
7514 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
7515 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7516 | { |
7517 | ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); |
7518 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
7519 | } |
7520 | } |
7521 | |
7522 | static GTY(()) rtx queued_cfa_restores; |
7523 | |
7524 | /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack |
7525 | manipulation insn. The value is on the stack at CFA - CFA_OFFSET. |
7526 | Don't add the note if the previously saved value will be left untouched |
7527 | within stack red-zone till return, as unwinders can find the same value |
7528 | in the register and on the stack. */ |
7529 | |
7530 | static void |
7531 | ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) |
7532 | { |
7533 | if (!crtl->shrink_wrapped |
7534 | && cfa_offset <= cfun->machine->fs.red_zone_offset) |
7535 | return; |
7536 | |
7537 | if (insn) |
7538 | { |
7539 | add_reg_note (insn, REG_CFA_RESTORE, reg); |
7540 | RTX_FRAME_RELATED_P (insn) = 1; |
7541 | } |
7542 | else |
7543 | queued_cfa_restores |
7544 | = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); |
7545 | } |
7546 | |
7547 | /* Add queued REG_CFA_RESTORE notes if any to INSN. */ |
7548 | |
7549 | static void |
7550 | ix86_add_queued_cfa_restore_notes (rtx insn) |
7551 | { |
7552 | rtx last; |
7553 | if (!queued_cfa_restores) |
7554 | return; |
7555 | for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) |
7556 | ; |
7557 | XEXP (last, 1) = REG_NOTES (insn); |
7558 | REG_NOTES (insn) = queued_cfa_restores; |
7559 | queued_cfa_restores = NULL_RTX; |
7560 | RTX_FRAME_RELATED_P (insn) = 1; |
7561 | } |
7562 | |
7563 | /* Expand prologue or epilogue stack adjustment. |
7564 | The pattern exist to put a dependency on all ebp-based memory accesses. |
7565 | STYLE should be negative if instructions should be marked as frame related, |
7566 | zero if %r11 register is live and cannot be freely used and positive |
7567 | otherwise. */ |
7568 | |
7569 | static rtx |
7570 | pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, |
7571 | int style, bool set_cfa) |
7572 | { |
7573 | struct machine_function *m = cfun->machine; |
7574 | rtx addend = offset; |
7575 | rtx insn; |
7576 | bool add_frame_related_expr = false; |
7577 | |
7578 | if (!x86_64_immediate_operand (offset, Pmode)) |
7579 | { |
7580 | /* r11 is used by indirect sibcall return as well, set before the |
7581 | epilogue and used after the epilogue. */ |
7582 | if (style) |
7583 | addend = gen_rtx_REG (Pmode, R11_REG); |
7584 | else |
7585 | { |
7586 | gcc_assert (src != hard_frame_pointer_rtx |
7587 | && dest != hard_frame_pointer_rtx); |
7588 | addend = hard_frame_pointer_rtx; |
7589 | } |
7590 | emit_insn (gen_rtx_SET (addend, offset)); |
7591 | if (style < 0) |
7592 | add_frame_related_expr = true; |
7593 | } |
7594 | |
7595 | insn = emit_insn (gen_pro_epilogue_adjust_stack_add |
7596 | (Pmode, x0: dest, x1: src, x2: addend)); |
7597 | if (style >= 0) |
7598 | ix86_add_queued_cfa_restore_notes (insn); |
7599 | |
7600 | if (set_cfa) |
7601 | { |
7602 | rtx r; |
7603 | |
7604 | gcc_assert (m->fs.cfa_reg == src); |
7605 | m->fs.cfa_offset += INTVAL (offset); |
7606 | m->fs.cfa_reg = dest; |
7607 | |
7608 | r = gen_rtx_PLUS (Pmode, src, offset); |
7609 | r = gen_rtx_SET (dest, r); |
7610 | add_reg_note (insn, REG_CFA_ADJUST_CFA, r); |
7611 | RTX_FRAME_RELATED_P (insn) = 1; |
7612 | } |
7613 | else if (style < 0) |
7614 | { |
7615 | RTX_FRAME_RELATED_P (insn) = 1; |
7616 | if (add_frame_related_expr) |
7617 | { |
7618 | rtx r = gen_rtx_PLUS (Pmode, src, offset); |
7619 | r = gen_rtx_SET (dest, r); |
7620 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); |
7621 | } |
7622 | } |
7623 | |
7624 | if (dest == stack_pointer_rtx) |
7625 | { |
7626 | HOST_WIDE_INT ooffset = m->fs.sp_offset; |
7627 | bool valid = m->fs.sp_valid; |
7628 | bool realigned = m->fs.sp_realigned; |
7629 | |
7630 | if (src == hard_frame_pointer_rtx) |
7631 | { |
7632 | valid = m->fs.fp_valid; |
7633 | realigned = false; |
7634 | ooffset = m->fs.fp_offset; |
7635 | } |
7636 | else if (src == crtl->drap_reg) |
7637 | { |
7638 | valid = m->fs.drap_valid; |
7639 | realigned = false; |
7640 | ooffset = 0; |
7641 | } |
7642 | else |
7643 | { |
7644 | /* Else there are two possibilities: SP itself, which we set |
7645 | up as the default above. Or EH_RETURN_STACKADJ_RTX, which is |
7646 | taken care of this by hand along the eh_return path. */ |
7647 | gcc_checking_assert (src == stack_pointer_rtx |
7648 | || offset == const0_rtx); |
7649 | } |
7650 | |
7651 | m->fs.sp_offset = ooffset - INTVAL (offset); |
7652 | m->fs.sp_valid = valid; |
7653 | m->fs.sp_realigned = realigned; |
7654 | } |
7655 | return insn; |
7656 | } |
7657 | |
7658 | /* Find an available register to be used as dynamic realign argument |
7659 | pointer regsiter. Such a register will be written in prologue and |
7660 | used in begin of body, so it must not be |
7661 | 1. parameter passing register. |
7662 | 2. GOT pointer. |
7663 | We reuse static-chain register if it is available. Otherwise, we |
7664 | use DI for i386 and R13 for x86-64. We chose R13 since it has |
7665 | shorter encoding. |
7666 | |
7667 | Return: the regno of chosen register. */ |
7668 | |
7669 | static unsigned int |
7670 | find_drap_reg (void) |
7671 | { |
7672 | tree decl = cfun->decl; |
7673 | |
7674 | /* Always use callee-saved register if there are no caller-saved |
7675 | registers. */ |
7676 | if (TARGET_64BIT) |
7677 | { |
7678 | /* Use R13 for nested function or function need static chain. |
7679 | Since function with tail call may use any caller-saved |
7680 | registers in epilogue, DRAP must not use caller-saved |
7681 | register in such case. */ |
7682 | if (DECL_STATIC_CHAIN (decl) |
7683 | || cfun->machine->no_caller_saved_registers |
7684 | || crtl->tail_call_emit) |
7685 | return R13_REG; |
7686 | |
7687 | return R10_REG; |
7688 | } |
7689 | else |
7690 | { |
7691 | /* Use DI for nested function or function need static chain. |
7692 | Since function with tail call may use any caller-saved |
7693 | registers in epilogue, DRAP must not use caller-saved |
7694 | register in such case. */ |
7695 | if (DECL_STATIC_CHAIN (decl) |
7696 | || cfun->machine->no_caller_saved_registers |
7697 | || crtl->tail_call_emit |
7698 | || crtl->calls_eh_return) |
7699 | return DI_REG; |
7700 | |
7701 | /* Reuse static chain register if it isn't used for parameter |
7702 | passing. */ |
7703 | if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) |
7704 | { |
7705 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); |
7706 | if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) |
7707 | return CX_REG; |
7708 | } |
7709 | return DI_REG; |
7710 | } |
7711 | } |
7712 | |
7713 | /* Return minimum incoming stack alignment. */ |
7714 | |
7715 | static unsigned int |
7716 | ix86_minimum_incoming_stack_boundary (bool sibcall) |
7717 | { |
7718 | unsigned int incoming_stack_boundary; |
7719 | |
7720 | /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ |
7721 | if (cfun->machine->func_type != TYPE_NORMAL) |
7722 | incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; |
7723 | /* Prefer the one specified at command line. */ |
7724 | else if (ix86_user_incoming_stack_boundary) |
7725 | incoming_stack_boundary = ix86_user_incoming_stack_boundary; |
7726 | /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary |
7727 | if -mstackrealign is used, it isn't used for sibcall check and |
7728 | estimated stack alignment is 128bit. */ |
7729 | else if (!sibcall |
7730 | && ix86_force_align_arg_pointer |
7731 | && crtl->stack_alignment_estimated == 128) |
7732 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
7733 | else |
7734 | incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
7735 | |
7736 | /* Incoming stack alignment can be changed on individual functions |
7737 | via force_align_arg_pointer attribute. We use the smallest |
7738 | incoming stack boundary. */ |
7739 | if (incoming_stack_boundary > MIN_STACK_BOUNDARY |
7740 | && lookup_attribute (attr_name: "force_align_arg_pointer" , |
7741 | TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) |
7742 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
7743 | |
7744 | /* The incoming stack frame has to be aligned at least at |
7745 | parm_stack_boundary. */ |
7746 | if (incoming_stack_boundary < crtl->parm_stack_boundary) |
7747 | incoming_stack_boundary = crtl->parm_stack_boundary; |
7748 | |
7749 | /* Stack at entrance of main is aligned by runtime. We use the |
7750 | smallest incoming stack boundary. */ |
7751 | if (incoming_stack_boundary > MAIN_STACK_BOUNDARY |
7752 | && DECL_NAME (current_function_decl) |
7753 | && MAIN_NAME_P (DECL_NAME (current_function_decl)) |
7754 | && DECL_FILE_SCOPE_P (current_function_decl)) |
7755 | incoming_stack_boundary = MAIN_STACK_BOUNDARY; |
7756 | |
7757 | return incoming_stack_boundary; |
7758 | } |
7759 | |
7760 | /* Update incoming stack boundary and estimated stack alignment. */ |
7761 | |
7762 | static void |
7763 | ix86_update_stack_boundary (void) |
7764 | { |
7765 | ix86_incoming_stack_boundary |
7766 | = ix86_minimum_incoming_stack_boundary (sibcall: false); |
7767 | |
7768 | /* x86_64 vararg needs 16byte stack alignment for register save area. */ |
7769 | if (TARGET_64BIT |
7770 | && cfun->stdarg |
7771 | && crtl->stack_alignment_estimated < 128) |
7772 | crtl->stack_alignment_estimated = 128; |
7773 | |
7774 | /* __tls_get_addr needs to be called with 16-byte aligned stack. */ |
7775 | if (ix86_tls_descriptor_calls_expanded_in_cfun |
7776 | && crtl->preferred_stack_boundary < 128) |
7777 | crtl->preferred_stack_boundary = 128; |
7778 | } |
7779 | |
7780 | /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is |
7781 | needed or an rtx for DRAP otherwise. */ |
7782 | |
7783 | static rtx |
7784 | ix86_get_drap_rtx (void) |
7785 | { |
7786 | /* We must use DRAP if there are outgoing arguments on stack or |
7787 | the stack pointer register is clobbered by asm statment and |
7788 | ACCUMULATE_OUTGOING_ARGS is false. */ |
7789 | if (ix86_force_drap |
7790 | || ((cfun->machine->outgoing_args_on_stack |
7791 | || crtl->sp_is_clobbered_by_asm) |
7792 | && !ACCUMULATE_OUTGOING_ARGS)) |
7793 | crtl->need_drap = true; |
7794 | |
7795 | if (stack_realign_drap) |
7796 | { |
7797 | /* Assign DRAP to vDRAP and returns vDRAP */ |
7798 | unsigned int regno = find_drap_reg (); |
7799 | rtx drap_vreg; |
7800 | rtx arg_ptr; |
7801 | rtx_insn *seq, *insn; |
7802 | |
7803 | arg_ptr = gen_rtx_REG (Pmode, regno); |
7804 | crtl->drap_reg = arg_ptr; |
7805 | |
7806 | start_sequence (); |
7807 | drap_vreg = copy_to_reg (arg_ptr); |
7808 | seq = get_insns (); |
7809 | end_sequence (); |
7810 | |
7811 | insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ())); |
7812 | if (!optimize) |
7813 | { |
7814 | add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); |
7815 | RTX_FRAME_RELATED_P (insn) = 1; |
7816 | } |
7817 | return drap_vreg; |
7818 | } |
7819 | else |
7820 | return NULL; |
7821 | } |
7822 | |
7823 | /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ |
7824 | |
7825 | static rtx |
7826 | ix86_internal_arg_pointer (void) |
7827 | { |
7828 | return virtual_incoming_args_rtx; |
7829 | } |
7830 | |
7831 | struct scratch_reg { |
7832 | rtx reg; |
7833 | bool saved; |
7834 | }; |
7835 | |
7836 | /* Return a short-lived scratch register for use on function entry. |
7837 | In 32-bit mode, it is valid only after the registers are saved |
7838 | in the prologue. This register must be released by means of |
7839 | release_scratch_register_on_entry once it is dead. */ |
7840 | |
7841 | static void |
7842 | get_scratch_register_on_entry (struct scratch_reg *sr) |
7843 | { |
7844 | int regno; |
7845 | |
7846 | sr->saved = false; |
7847 | |
7848 | if (TARGET_64BIT) |
7849 | { |
7850 | /* We always use R11 in 64-bit mode. */ |
7851 | regno = R11_REG; |
7852 | } |
7853 | else |
7854 | { |
7855 | tree decl = current_function_decl, fntype = TREE_TYPE (decl); |
7856 | bool fastcall_p |
7857 | = lookup_attribute (attr_name: "fastcall" , TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
7858 | bool thiscall_p |
7859 | = lookup_attribute (attr_name: "thiscall" , TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
7860 | bool static_chain_p = DECL_STATIC_CHAIN (decl); |
7861 | int regparm = ix86_function_regparm (type: fntype, decl); |
7862 | int drap_regno |
7863 | = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; |
7864 | |
7865 | /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax |
7866 | for the static chain register. */ |
7867 | if ((regparm < 1 || (fastcall_p && !static_chain_p)) |
7868 | && drap_regno != AX_REG) |
7869 | regno = AX_REG; |
7870 | /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx |
7871 | for the static chain register. */ |
7872 | else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) |
7873 | regno = AX_REG; |
7874 | else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) |
7875 | regno = DX_REG; |
7876 | /* ecx is the static chain register. */ |
7877 | else if (regparm < 3 && !fastcall_p && !thiscall_p |
7878 | && !static_chain_p |
7879 | && drap_regno != CX_REG) |
7880 | regno = CX_REG; |
7881 | else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false)) |
7882 | regno = BX_REG; |
7883 | /* esi is the static chain register. */ |
7884 | else if (!(regparm == 3 && static_chain_p) |
7885 | && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false)) |
7886 | regno = SI_REG; |
7887 | else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false)) |
7888 | regno = DI_REG; |
7889 | else |
7890 | { |
7891 | regno = (drap_regno == AX_REG ? DX_REG : AX_REG); |
7892 | sr->saved = true; |
7893 | } |
7894 | } |
7895 | |
7896 | sr->reg = gen_rtx_REG (Pmode, regno); |
7897 | if (sr->saved) |
7898 | { |
7899 | rtx_insn *insn = emit_insn (gen_push (arg: sr->reg)); |
7900 | RTX_FRAME_RELATED_P (insn) = 1; |
7901 | } |
7902 | } |
7903 | |
7904 | /* Release a scratch register obtained from the preceding function. |
7905 | |
7906 | If RELEASE_VIA_POP is true, we just pop the register off the stack |
7907 | to release it. This is what non-Linux systems use with -fstack-check. |
7908 | |
7909 | Otherwise we use OFFSET to locate the saved register and the |
7910 | allocated stack space becomes part of the local frame and is |
7911 | deallocated by the epilogue. */ |
7912 | |
7913 | static void |
7914 | release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, |
7915 | bool release_via_pop) |
7916 | { |
7917 | if (sr->saved) |
7918 | { |
7919 | if (release_via_pop) |
7920 | { |
7921 | struct machine_function *m = cfun->machine; |
7922 | rtx x, insn = emit_insn (gen_pop (arg: sr->reg)); |
7923 | |
7924 | /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ |
7925 | RTX_FRAME_RELATED_P (insn) = 1; |
7926 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
7927 | x = gen_rtx_SET (stack_pointer_rtx, x); |
7928 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); |
7929 | m->fs.sp_offset -= UNITS_PER_WORD; |
7930 | } |
7931 | else |
7932 | { |
7933 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
7934 | x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); |
7935 | emit_insn (x); |
7936 | } |
7937 | } |
7938 | } |
7939 | |
7940 | /* Emit code to adjust the stack pointer by SIZE bytes while probing it. |
7941 | |
7942 | If INT_REGISTERS_SAVED is true, then integer registers have already been |
7943 | pushed on the stack. |
7944 | |
7945 | If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope |
7946 | beyond SIZE bytes. |
7947 | |
7948 | This assumes no knowledge of the current probing state, i.e. it is never |
7949 | allowed to allocate more than PROBE_INTERVAL bytes of stack space without |
7950 | a suitable probe. */ |
7951 | |
7952 | static void |
7953 | ix86_adjust_stack_and_probe (HOST_WIDE_INT size, |
7954 | const bool int_registers_saved, |
7955 | const bool protection_area) |
7956 | { |
7957 | struct machine_function *m = cfun->machine; |
7958 | |
7959 | /* If this function does not statically allocate stack space, then |
7960 | no probes are needed. */ |
7961 | if (!size) |
7962 | { |
7963 | /* However, the allocation of space via pushes for register |
7964 | saves could be viewed as allocating space, but without the |
7965 | need to probe. */ |
7966 | if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) |
7967 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
7968 | else |
7969 | dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); |
7970 | return; |
7971 | } |
7972 | |
7973 | /* If we are a noreturn function, then we have to consider the |
7974 | possibility that we're called via a jump rather than a call. |
7975 | |
7976 | Thus we don't have the implicit probe generated by saving the |
7977 | return address into the stack at the call. Thus, the stack |
7978 | pointer could be anywhere in the guard page. The safe thing |
7979 | to do is emit a probe now. |
7980 | |
7981 | The probe can be avoided if we have already emitted any callee |
7982 | register saves into the stack or have a frame pointer (which will |
7983 | have been saved as well). Those saves will function as implicit |
7984 | probes. |
7985 | |
7986 | ?!? This should be revamped to work like aarch64 and s390 where |
7987 | we track the offset from the most recent probe. Normally that |
7988 | offset would be zero. For a noreturn function we would reset |
7989 | it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then |
7990 | we just probe when we cross PROBE_INTERVAL. */ |
7991 | if (TREE_THIS_VOLATILE (cfun->decl) |
7992 | && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) |
7993 | { |
7994 | /* We can safely use any register here since we're just going to push |
7995 | its value and immediately pop it back. But we do try and avoid |
7996 | argument passing registers so as not to introduce dependencies in |
7997 | the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ |
7998 | rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); |
7999 | rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg)); |
8000 | rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg)); |
8001 | m->fs.sp_offset -= UNITS_PER_WORD; |
8002 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8003 | { |
8004 | m->fs.cfa_offset -= UNITS_PER_WORD; |
8005 | rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
8006 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8007 | add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); |
8008 | RTX_FRAME_RELATED_P (insn_push) = 1; |
8009 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
8010 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8011 | add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); |
8012 | RTX_FRAME_RELATED_P (insn_pop) = 1; |
8013 | } |
8014 | emit_insn (gen_blockage ()); |
8015 | } |
8016 | |
8017 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
8018 | const int dope = 4 * UNITS_PER_WORD; |
8019 | |
8020 | /* If there is protection area, take it into account in the size. */ |
8021 | if (protection_area) |
8022 | size += probe_interval + dope; |
8023 | |
8024 | /* If we allocate less than the size of the guard statically, |
8025 | then no probing is necessary, but we do need to allocate |
8026 | the stack. */ |
8027 | else if (size < (1 << param_stack_clash_protection_guard_size)) |
8028 | { |
8029 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8030 | GEN_INT (-size), style: -1, |
8031 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8032 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
8033 | return; |
8034 | } |
8035 | |
8036 | /* We're allocating a large enough stack frame that we need to |
8037 | emit probes. Either emit them inline or in a loop depending |
8038 | on the size. */ |
8039 | if (size <= 4 * probe_interval) |
8040 | { |
8041 | HOST_WIDE_INT i; |
8042 | for (i = probe_interval; i <= size; i += probe_interval) |
8043 | { |
8044 | /* Allocate PROBE_INTERVAL bytes. */ |
8045 | rtx insn |
8046 | = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8047 | GEN_INT (-probe_interval), style: -1, |
8048 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8049 | add_reg_note (insn, REG_STACK_CHECK, const0_rtx); |
8050 | |
8051 | /* And probe at *sp. */ |
8052 | emit_stack_probe (stack_pointer_rtx); |
8053 | emit_insn (gen_blockage ()); |
8054 | } |
8055 | |
8056 | /* We need to allocate space for the residual, but we do not need |
8057 | to probe the residual... */ |
8058 | HOST_WIDE_INT residual = (i - probe_interval - size); |
8059 | if (residual) |
8060 | { |
8061 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8062 | GEN_INT (residual), style: -1, |
8063 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8064 | |
8065 | /* ...except if there is a protection area to maintain. */ |
8066 | if (protection_area) |
8067 | emit_stack_probe (stack_pointer_rtx); |
8068 | } |
8069 | |
8070 | dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); |
8071 | } |
8072 | else |
8073 | { |
8074 | /* We expect the GP registers to be saved when probes are used |
8075 | as the probing sequences might need a scratch register and |
8076 | the routine to allocate one assumes the integer registers |
8077 | have already been saved. */ |
8078 | gcc_assert (int_registers_saved); |
8079 | |
8080 | struct scratch_reg sr; |
8081 | get_scratch_register_on_entry (sr: &sr); |
8082 | |
8083 | /* If we needed to save a register, then account for any space |
8084 | that was pushed (we are not going to pop the register when |
8085 | we do the restore). */ |
8086 | if (sr.saved) |
8087 | size -= UNITS_PER_WORD; |
8088 | |
8089 | /* Step 1: round SIZE down to a multiple of the interval. */ |
8090 | HOST_WIDE_INT rounded_size = size & -probe_interval; |
8091 | |
8092 | /* Step 2: compute final value of the loop counter. Use lea if |
8093 | possible. */ |
8094 | rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); |
8095 | rtx insn; |
8096 | if (address_no_seg_operand (addr, Pmode)) |
8097 | insn = emit_insn (gen_rtx_SET (sr.reg, addr)); |
8098 | else |
8099 | { |
8100 | emit_move_insn (sr.reg, GEN_INT (-rounded_size)); |
8101 | insn = emit_insn (gen_rtx_SET (sr.reg, |
8102 | gen_rtx_PLUS (Pmode, sr.reg, |
8103 | stack_pointer_rtx))); |
8104 | } |
8105 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8106 | { |
8107 | add_reg_note (insn, REG_CFA_DEF_CFA, |
8108 | plus_constant (Pmode, sr.reg, |
8109 | m->fs.cfa_offset + rounded_size)); |
8110 | RTX_FRAME_RELATED_P (insn) = 1; |
8111 | } |
8112 | |
8113 | /* Step 3: the loop. */ |
8114 | rtx size_rtx = GEN_INT (rounded_size); |
8115 | insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg, |
8116 | x2: size_rtx)); |
8117 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8118 | { |
8119 | m->fs.cfa_offset += rounded_size; |
8120 | add_reg_note (insn, REG_CFA_DEF_CFA, |
8121 | plus_constant (Pmode, stack_pointer_rtx, |
8122 | m->fs.cfa_offset)); |
8123 | RTX_FRAME_RELATED_P (insn) = 1; |
8124 | } |
8125 | m->fs.sp_offset += rounded_size; |
8126 | emit_insn (gen_blockage ()); |
8127 | |
8128 | /* Step 4: adjust SP if we cannot assert at compile-time that SIZE |
8129 | is equal to ROUNDED_SIZE. */ |
8130 | |
8131 | if (size != rounded_size) |
8132 | { |
8133 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8134 | GEN_INT (rounded_size - size), style: -1, |
8135 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8136 | |
8137 | if (protection_area) |
8138 | emit_stack_probe (stack_pointer_rtx); |
8139 | } |
8140 | |
8141 | dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); |
8142 | |
8143 | /* This does not deallocate the space reserved for the scratch |
8144 | register. That will be deallocated in the epilogue. */ |
8145 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false); |
8146 | } |
8147 | |
8148 | /* Adjust back to account for the protection area. */ |
8149 | if (protection_area) |
8150 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8151 | GEN_INT (probe_interval + dope), style: -1, |
8152 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8153 | |
8154 | /* Make sure nothing is scheduled before we are done. */ |
8155 | emit_insn (gen_blockage ()); |
8156 | } |
8157 | |
8158 | /* Adjust the stack pointer up to REG while probing it. */ |
8159 | |
8160 | const char * |
8161 | output_adjust_stack_and_probe (rtx reg) |
8162 | { |
8163 | static int labelno = 0; |
8164 | char loop_lab[32]; |
8165 | rtx xops[2]; |
8166 | |
8167 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL" , labelno++); |
8168 | |
8169 | /* Loop. */ |
8170 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
8171 | |
8172 | /* SP = SP + PROBE_INTERVAL. */ |
8173 | xops[0] = stack_pointer_rtx; |
8174 | xops[1] = GEN_INT (get_probe_interval ()); |
8175 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}" , xops); |
8176 | |
8177 | /* Probe at SP. */ |
8178 | xops[1] = const0_rtx; |
8179 | output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}" , xops); |
8180 | |
8181 | /* Test if SP == LAST_ADDR. */ |
8182 | xops[0] = stack_pointer_rtx; |
8183 | xops[1] = reg; |
8184 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}" , xops); |
8185 | |
8186 | /* Branch. */ |
8187 | fputs (s: "\tjne\t" , stream: asm_out_file); |
8188 | assemble_name_raw (asm_out_file, loop_lab); |
8189 | fputc (c: '\n', stream: asm_out_file); |
8190 | |
8191 | return "" ; |
8192 | } |
8193 | |
8194 | /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, |
8195 | inclusive. These are offsets from the current stack pointer. |
8196 | |
8197 | INT_REGISTERS_SAVED is true if integer registers have already been |
8198 | pushed on the stack. */ |
8199 | |
8200 | static void |
8201 | ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, |
8202 | const bool int_registers_saved) |
8203 | { |
8204 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
8205 | |
8206 | /* See if we have a constant small number of probes to generate. If so, |
8207 | that's the easy case. The run-time loop is made up of 6 insns in the |
8208 | generic case while the compile-time loop is made up of n insns for n # |
8209 | of intervals. */ |
8210 | if (size <= 6 * probe_interval) |
8211 | { |
8212 | HOST_WIDE_INT i; |
8213 | |
8214 | /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until |
8215 | it exceeds SIZE. If only one probe is needed, this will not |
8216 | generate any code. Then probe at FIRST + SIZE. */ |
8217 | for (i = probe_interval; i < size; i += probe_interval) |
8218 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
8219 | -(first + i))); |
8220 | |
8221 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
8222 | -(first + size))); |
8223 | } |
8224 | |
8225 | /* Otherwise, do the same as above, but in a loop. Note that we must be |
8226 | extra careful with variables wrapping around because we might be at |
8227 | the very top (or the very bottom) of the address space and we have |
8228 | to be able to handle this case properly; in particular, we use an |
8229 | equality test for the loop condition. */ |
8230 | else |
8231 | { |
8232 | /* We expect the GP registers to be saved when probes are used |
8233 | as the probing sequences might need a scratch register and |
8234 | the routine to allocate one assumes the integer registers |
8235 | have already been saved. */ |
8236 | gcc_assert (int_registers_saved); |
8237 | |
8238 | HOST_WIDE_INT rounded_size, last; |
8239 | struct scratch_reg sr; |
8240 | |
8241 | get_scratch_register_on_entry (sr: &sr); |
8242 | |
8243 | |
8244 | /* Step 1: round SIZE to the previous multiple of the interval. */ |
8245 | |
8246 | rounded_size = ROUND_DOWN (size, probe_interval); |
8247 | |
8248 | |
8249 | /* Step 2: compute initial and final value of the loop counter. */ |
8250 | |
8251 | /* TEST_OFFSET = FIRST. */ |
8252 | emit_move_insn (sr.reg, GEN_INT (-first)); |
8253 | |
8254 | /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ |
8255 | last = first + rounded_size; |
8256 | |
8257 | |
8258 | /* Step 3: the loop |
8259 | |
8260 | do |
8261 | { |
8262 | TEST_ADDR = TEST_ADDR + PROBE_INTERVAL |
8263 | probe at TEST_ADDR |
8264 | } |
8265 | while (TEST_ADDR != LAST_ADDR) |
8266 | |
8267 | probes at FIRST + N * PROBE_INTERVAL for values of N from 1 |
8268 | until it is equal to ROUNDED_SIZE. */ |
8269 | |
8270 | emit_insn |
8271 | (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last))); |
8272 | |
8273 | |
8274 | /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
8275 | that SIZE is equal to ROUNDED_SIZE. */ |
8276 | |
8277 | if (size != rounded_size) |
8278 | emit_stack_probe (plus_constant (Pmode, |
8279 | gen_rtx_PLUS (Pmode, |
8280 | stack_pointer_rtx, |
8281 | sr.reg), |
8282 | rounded_size - size)); |
8283 | |
8284 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true); |
8285 | } |
8286 | |
8287 | /* Make sure nothing is scheduled before we are done. */ |
8288 | emit_insn (gen_blockage ()); |
8289 | } |
8290 | |
8291 | /* Probe a range of stack addresses from REG to END, inclusive. These are |
8292 | offsets from the current stack pointer. */ |
8293 | |
8294 | const char * |
8295 | output_probe_stack_range (rtx reg, rtx end) |
8296 | { |
8297 | static int labelno = 0; |
8298 | char loop_lab[32]; |
8299 | rtx xops[3]; |
8300 | |
8301 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL" , labelno++); |
8302 | |
8303 | /* Loop. */ |
8304 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
8305 | |
8306 | /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ |
8307 | xops[0] = reg; |
8308 | xops[1] = GEN_INT (get_probe_interval ()); |
8309 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}" , xops); |
8310 | |
8311 | /* Probe at TEST_ADDR. */ |
8312 | xops[0] = stack_pointer_rtx; |
8313 | xops[1] = reg; |
8314 | xops[2] = const0_rtx; |
8315 | output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}" , xops); |
8316 | |
8317 | /* Test if TEST_ADDR == LAST_ADDR. */ |
8318 | xops[0] = reg; |
8319 | xops[1] = end; |
8320 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}" , xops); |
8321 | |
8322 | /* Branch. */ |
8323 | fputs (s: "\tjne\t" , stream: asm_out_file); |
8324 | assemble_name_raw (asm_out_file, loop_lab); |
8325 | fputc (c: '\n', stream: asm_out_file); |
8326 | |
8327 | return "" ; |
8328 | } |
8329 | |
8330 | /* Set stack_frame_required to false if stack frame isn't required. |
8331 | Update STACK_ALIGNMENT to the largest alignment, in bits, of stack |
8332 | slot used if stack frame is required and CHECK_STACK_SLOT is true. */ |
8333 | |
8334 | static void |
8335 | ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, |
8336 | bool check_stack_slot) |
8337 | { |
8338 | HARD_REG_SET set_up_by_prologue, prologue_used; |
8339 | basic_block bb; |
8340 | |
8341 | CLEAR_HARD_REG_SET (set&: prologue_used); |
8342 | CLEAR_HARD_REG_SET (set&: set_up_by_prologue); |
8343 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); |
8344 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); |
8345 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, |
8346 | HARD_FRAME_POINTER_REGNUM); |
8347 | |
8348 | /* The preferred stack alignment is the minimum stack alignment. */ |
8349 | if (stack_alignment > crtl->preferred_stack_boundary) |
8350 | stack_alignment = crtl->preferred_stack_boundary; |
8351 | |
8352 | bool require_stack_frame = false; |
8353 | |
8354 | FOR_EACH_BB_FN (bb, cfun) |
8355 | { |
8356 | rtx_insn *insn; |
8357 | FOR_BB_INSNS (bb, insn) |
8358 | if (NONDEBUG_INSN_P (insn) |
8359 | && requires_stack_frame_p (insn, prologue_used, |
8360 | set_up_by_prologue)) |
8361 | { |
8362 | require_stack_frame = true; |
8363 | |
8364 | if (check_stack_slot) |
8365 | { |
8366 | /* Find the maximum stack alignment. */ |
8367 | subrtx_iterator::array_type array; |
8368 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) |
8369 | if (MEM_P (*iter) |
8370 | && (reg_mentioned_p (stack_pointer_rtx, |
8371 | *iter) |
8372 | || reg_mentioned_p (frame_pointer_rtx, |
8373 | *iter))) |
8374 | { |
8375 | unsigned int alignment = MEM_ALIGN (*iter); |
8376 | if (alignment > stack_alignment) |
8377 | stack_alignment = alignment; |
8378 | } |
8379 | } |
8380 | } |
8381 | } |
8382 | |
8383 | cfun->machine->stack_frame_required = require_stack_frame; |
8384 | } |
8385 | |
8386 | /* Finalize stack_realign_needed and frame_pointer_needed flags, which |
8387 | will guide prologue/epilogue to be generated in correct form. */ |
8388 | |
8389 | static void |
8390 | ix86_finalize_stack_frame_flags (void) |
8391 | { |
8392 | /* Check if stack realign is really needed after reload, and |
8393 | stores result in cfun */ |
8394 | unsigned int incoming_stack_boundary |
8395 | = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
8396 | ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
8397 | unsigned int stack_alignment |
8398 | = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor |
8399 | ? crtl->max_used_stack_slot_alignment |
8400 | : crtl->stack_alignment_needed); |
8401 | unsigned int stack_realign |
8402 | = (incoming_stack_boundary < stack_alignment); |
8403 | bool recompute_frame_layout_p = false; |
8404 | |
8405 | if (crtl->stack_realign_finalized) |
8406 | { |
8407 | /* After stack_realign_needed is finalized, we can't no longer |
8408 | change it. */ |
8409 | gcc_assert (crtl->stack_realign_needed == stack_realign); |
8410 | return; |
8411 | } |
8412 | |
8413 | /* It is always safe to compute max_used_stack_alignment. We |
8414 | compute it only if 128-bit aligned load/store may be generated |
8415 | on misaligned stack slot which will lead to segfault. */ |
8416 | bool check_stack_slot |
8417 | = (stack_realign || crtl->max_used_stack_slot_alignment >= 128); |
8418 | ix86_find_max_used_stack_alignment (stack_alignment, |
8419 | check_stack_slot); |
8420 | |
8421 | /* If the only reason for frame_pointer_needed is that we conservatively |
8422 | assumed stack realignment might be needed or -fno-omit-frame-pointer |
8423 | is used, but in the end nothing that needed the stack alignment had |
8424 | been spilled nor stack access, clear frame_pointer_needed and say we |
8425 | don't need stack realignment. |
8426 | |
8427 | When vector register is used for piecewise move and store, we don't |
8428 | increase stack_alignment_needed as there is no register spill for |
8429 | piecewise move and store. Since stack_realign_needed is set to true |
8430 | by checking stack_alignment_estimated which is updated by pseudo |
8431 | vector register usage, we also need to check stack_realign_needed to |
8432 | eliminate frame pointer. */ |
8433 | if ((stack_realign |
8434 | || (!flag_omit_frame_pointer && optimize) |
8435 | || crtl->stack_realign_needed) |
8436 | && frame_pointer_needed |
8437 | && crtl->is_leaf |
8438 | && crtl->sp_is_unchanging |
8439 | && !ix86_current_function_calls_tls_descriptor |
8440 | && !crtl->accesses_prior_frames |
8441 | && !cfun->calls_alloca |
8442 | && !crtl->calls_eh_return |
8443 | /* See ira_setup_eliminable_regset for the rationale. */ |
8444 | && !(STACK_CHECK_MOVING_SP |
8445 | && flag_stack_check |
8446 | && flag_exceptions |
8447 | && cfun->can_throw_non_call_exceptions) |
8448 | && !ix86_frame_pointer_required () |
8449 | && ix86_get_frame_size () == 0 |
8450 | && ix86_nsaved_sseregs () == 0 |
8451 | && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) |
8452 | { |
8453 | if (cfun->machine->stack_frame_required) |
8454 | { |
8455 | /* Stack frame is required. If stack alignment needed is less |
8456 | than incoming stack boundary, don't realign stack. */ |
8457 | stack_realign = incoming_stack_boundary < stack_alignment; |
8458 | if (!stack_realign) |
8459 | { |
8460 | crtl->max_used_stack_slot_alignment |
8461 | = incoming_stack_boundary; |
8462 | crtl->stack_alignment_needed |
8463 | = incoming_stack_boundary; |
8464 | /* Also update preferred_stack_boundary for leaf |
8465 | functions. */ |
8466 | crtl->preferred_stack_boundary |
8467 | = incoming_stack_boundary; |
8468 | } |
8469 | } |
8470 | else |
8471 | { |
8472 | /* If drap has been set, but it actually isn't live at the |
8473 | start of the function, there is no reason to set it up. */ |
8474 | if (crtl->drap_reg) |
8475 | { |
8476 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
8477 | if (! REGNO_REG_SET_P (DF_LR_IN (bb), |
8478 | REGNO (crtl->drap_reg))) |
8479 | { |
8480 | crtl->drap_reg = NULL_RTX; |
8481 | crtl->need_drap = false; |
8482 | } |
8483 | } |
8484 | else |
8485 | cfun->machine->no_drap_save_restore = true; |
8486 | |
8487 | frame_pointer_needed = false; |
8488 | stack_realign = false; |
8489 | crtl->max_used_stack_slot_alignment = incoming_stack_boundary; |
8490 | crtl->stack_alignment_needed = incoming_stack_boundary; |
8491 | crtl->stack_alignment_estimated = incoming_stack_boundary; |
8492 | if (crtl->preferred_stack_boundary > incoming_stack_boundary) |
8493 | crtl->preferred_stack_boundary = incoming_stack_boundary; |
8494 | df_finish_pass (true); |
8495 | df_scan_alloc (NULL); |
8496 | df_scan_blocks (); |
8497 | df_compute_regs_ever_live (true); |
8498 | df_analyze (); |
8499 | |
8500 | if (flag_var_tracking) |
8501 | { |
8502 | /* Since frame pointer is no longer available, replace it with |
8503 | stack pointer - UNITS_PER_WORD in debug insns. */ |
8504 | df_ref ref, next; |
8505 | for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); |
8506 | ref; ref = next) |
8507 | { |
8508 | next = DF_REF_NEXT_REG (ref); |
8509 | if (!DF_REF_INSN_INFO (ref)) |
8510 | continue; |
8511 | |
8512 | /* Make sure the next ref is for a different instruction, |
8513 | so that we're not affected by the rescan. */ |
8514 | rtx_insn *insn = DF_REF_INSN (ref); |
8515 | while (next && DF_REF_INSN (next) == insn) |
8516 | next = DF_REF_NEXT_REG (next); |
8517 | |
8518 | if (DEBUG_INSN_P (insn)) |
8519 | { |
8520 | bool changed = false; |
8521 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) |
8522 | { |
8523 | rtx *loc = DF_REF_LOC (ref); |
8524 | if (*loc == hard_frame_pointer_rtx) |
8525 | { |
8526 | *loc = plus_constant (Pmode, |
8527 | stack_pointer_rtx, |
8528 | -UNITS_PER_WORD); |
8529 | changed = true; |
8530 | } |
8531 | } |
8532 | if (changed) |
8533 | df_insn_rescan (insn); |
8534 | } |
8535 | } |
8536 | } |
8537 | |
8538 | recompute_frame_layout_p = true; |
8539 | } |
8540 | } |
8541 | else if (crtl->max_used_stack_slot_alignment >= 128 |
8542 | && cfun->machine->stack_frame_required) |
8543 | { |
8544 | /* We don't need to realign stack. max_used_stack_alignment is |
8545 | used to decide how stack frame should be aligned. This is |
8546 | independent of any psABIs nor 32-bit vs 64-bit. */ |
8547 | cfun->machine->max_used_stack_alignment |
8548 | = stack_alignment / BITS_PER_UNIT; |
8549 | } |
8550 | |
8551 | if (crtl->stack_realign_needed != stack_realign) |
8552 | recompute_frame_layout_p = true; |
8553 | crtl->stack_realign_needed = stack_realign; |
8554 | crtl->stack_realign_finalized = true; |
8555 | if (recompute_frame_layout_p) |
8556 | ix86_compute_frame_layout (); |
8557 | } |
8558 | |
8559 | /* Delete SET_GOT right after entry block if it is allocated to reg. */ |
8560 | |
8561 | static void |
8562 | ix86_elim_entry_set_got (rtx reg) |
8563 | { |
8564 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
8565 | rtx_insn *c_insn = BB_HEAD (bb); |
8566 | if (!NONDEBUG_INSN_P (c_insn)) |
8567 | c_insn = next_nonnote_nondebug_insn (c_insn); |
8568 | if (c_insn && NONJUMP_INSN_P (c_insn)) |
8569 | { |
8570 | rtx pat = PATTERN (insn: c_insn); |
8571 | if (GET_CODE (pat) == PARALLEL) |
8572 | { |
8573 | rtx vec = XVECEXP (pat, 0, 0); |
8574 | if (GET_CODE (vec) == SET |
8575 | && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT |
8576 | && REGNO (XEXP (vec, 0)) == REGNO (reg)) |
8577 | delete_insn (c_insn); |
8578 | } |
8579 | } |
8580 | } |
8581 | |
8582 | static rtx |
8583 | gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) |
8584 | { |
8585 | rtx addr, mem; |
8586 | |
8587 | if (offset) |
8588 | addr = plus_constant (Pmode, frame_reg, offset); |
8589 | mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); |
8590 | return gen_rtx_SET (store ? mem : reg, store ? reg : mem); |
8591 | } |
8592 | |
8593 | static inline rtx |
8594 | gen_frame_load (rtx reg, rtx frame_reg, int offset) |
8595 | { |
8596 | return gen_frame_set (reg, frame_reg, offset, store: false); |
8597 | } |
8598 | |
8599 | static inline rtx |
8600 | gen_frame_store (rtx reg, rtx frame_reg, int offset) |
8601 | { |
8602 | return gen_frame_set (reg, frame_reg, offset, store: true); |
8603 | } |
8604 | |
8605 | static void |
8606 | ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) |
8607 | { |
8608 | struct machine_function *m = cfun->machine; |
8609 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
8610 | + m->call_ms2sysv_extra_regs; |
8611 | rtvec v = rtvec_alloc (ncregs + 1); |
8612 | unsigned int align, i, vi = 0; |
8613 | rtx_insn *insn; |
8614 | rtx sym, addr; |
8615 | rtx rax = gen_rtx_REG (word_mode, AX_REG); |
8616 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
8617 | |
8618 | /* AL should only be live with sysv_abi. */ |
8619 | gcc_assert (!ix86_eax_live_at_start_p ()); |
8620 | gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); |
8621 | |
8622 | /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather |
8623 | we've actually realigned the stack or not. */ |
8624 | align = GET_MODE_ALIGNMENT (V4SFmode); |
8625 | addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset |
8626 | + xlogue.get_stub_ptr_offset (), align: &align, AX_REG); |
8627 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
8628 | |
8629 | emit_insn (gen_rtx_SET (rax, addr)); |
8630 | |
8631 | /* Get the stub symbol. */ |
8632 | sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP |
8633 | : XLOGUE_STUB_SAVE); |
8634 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
8635 | |
8636 | for (i = 0; i < ncregs; ++i) |
8637 | { |
8638 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
8639 | rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), |
8640 | r.regno); |
8641 | RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset); |
8642 | } |
8643 | |
8644 | gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); |
8645 | |
8646 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); |
8647 | RTX_FRAME_RELATED_P (insn) = true; |
8648 | } |
8649 | |
8650 | /* Generate and return an insn body to AND X with Y. */ |
8651 | |
8652 | static rtx_insn * |
8653 | gen_and2_insn (rtx x, rtx y) |
8654 | { |
8655 | enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x)); |
8656 | |
8657 | gcc_assert (insn_operand_matches (icode, 0, x)); |
8658 | gcc_assert (insn_operand_matches (icode, 1, x)); |
8659 | gcc_assert (insn_operand_matches (icode, 2, y)); |
8660 | |
8661 | return GEN_FCN (icode) (x, x, y); |
8662 | } |
8663 | |
8664 | /* Expand the prologue into a bunch of separate insns. */ |
8665 | |
8666 | void |
8667 | ix86_expand_prologue (void) |
8668 | { |
8669 | struct machine_function *m = cfun->machine; |
8670 | rtx insn, t; |
8671 | HOST_WIDE_INT allocate; |
8672 | bool int_registers_saved; |
8673 | bool sse_registers_saved; |
8674 | bool save_stub_call_needed; |
8675 | rtx static_chain = NULL_RTX; |
8676 | |
8677 | ix86_last_zero_store_uid = 0; |
8678 | if (ix86_function_naked (fn: current_function_decl)) |
8679 | { |
8680 | if (flag_stack_usage_info) |
8681 | current_function_static_stack_size = 0; |
8682 | return; |
8683 | } |
8684 | |
8685 | ix86_finalize_stack_frame_flags (); |
8686 | |
8687 | /* DRAP should not coexist with stack_realign_fp */ |
8688 | gcc_assert (!(crtl->drap_reg && stack_realign_fp)); |
8689 | |
8690 | memset (s: &m->fs, c: 0, n: sizeof (m->fs)); |
8691 | |
8692 | /* Initialize CFA state for before the prologue. */ |
8693 | m->fs.cfa_reg = stack_pointer_rtx; |
8694 | m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; |
8695 | |
8696 | /* Track SP offset to the CFA. We continue tracking this after we've |
8697 | swapped the CFA register away from SP. In the case of re-alignment |
8698 | this is fudged; we're interested to offsets within the local frame. */ |
8699 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
8700 | m->fs.sp_valid = true; |
8701 | m->fs.sp_realigned = false; |
8702 | |
8703 | const struct ix86_frame &frame = cfun->machine->frame; |
8704 | |
8705 | if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl)) |
8706 | { |
8707 | /* We should have already generated an error for any use of |
8708 | ms_hook on a nested function. */ |
8709 | gcc_checking_assert (!ix86_static_chain_on_stack); |
8710 | |
8711 | /* Check if profiling is active and we shall use profiling before |
8712 | prologue variant. If so sorry. */ |
8713 | if (crtl->profile && flag_fentry != 0) |
8714 | sorry ("%<ms_hook_prologue%> attribute is not compatible " |
8715 | "with %<-mfentry%> for 32-bit" ); |
8716 | |
8717 | /* In ix86_asm_output_function_label we emitted: |
8718 | 8b ff movl.s %edi,%edi |
8719 | 55 push %ebp |
8720 | 8b ec movl.s %esp,%ebp |
8721 | |
8722 | This matches the hookable function prologue in Win32 API |
8723 | functions in Microsoft Windows XP Service Pack 2 and newer. |
8724 | Wine uses this to enable Windows apps to hook the Win32 API |
8725 | functions provided by Wine. |
8726 | |
8727 | What that means is that we've already set up the frame pointer. */ |
8728 | |
8729 | if (frame_pointer_needed |
8730 | && !(crtl->drap_reg && crtl->stack_realign_needed)) |
8731 | { |
8732 | rtx push, mov; |
8733 | |
8734 | /* We've decided to use the frame pointer already set up. |
8735 | Describe this to the unwinder by pretending that both |
8736 | push and mov insns happen right here. |
8737 | |
8738 | Putting the unwind info here at the end of the ms_hook |
8739 | is done so that we can make absolutely certain we get |
8740 | the required byte sequence at the start of the function, |
8741 | rather than relying on an assembler that can produce |
8742 | the exact encoding required. |
8743 | |
8744 | However it does mean (in the unpatched case) that we have |
8745 | a 1 insn window where the asynchronous unwind info is |
8746 | incorrect. However, if we placed the unwind info at |
8747 | its correct location we would have incorrect unwind info |
8748 | in the patched case. Which is probably all moot since |
8749 | I don't expect Wine generates dwarf2 unwind info for the |
8750 | system libraries that use this feature. */ |
8751 | |
8752 | insn = emit_insn (gen_blockage ()); |
8753 | |
8754 | push = gen_push (hard_frame_pointer_rtx); |
8755 | mov = gen_rtx_SET (hard_frame_pointer_rtx, |
8756 | stack_pointer_rtx); |
8757 | RTX_FRAME_RELATED_P (push) = 1; |
8758 | RTX_FRAME_RELATED_P (mov) = 1; |
8759 | |
8760 | RTX_FRAME_RELATED_P (insn) = 1; |
8761 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
8762 | gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); |
8763 | |
8764 | /* Note that gen_push incremented m->fs.cfa_offset, even |
8765 | though we didn't emit the push insn here. */ |
8766 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
8767 | m->fs.fp_offset = m->fs.cfa_offset; |
8768 | m->fs.fp_valid = true; |
8769 | } |
8770 | else |
8771 | { |
8772 | /* The frame pointer is not needed so pop %ebp again. |
8773 | This leaves us with a pristine state. */ |
8774 | emit_insn (gen_pop (hard_frame_pointer_rtx)); |
8775 | } |
8776 | } |
8777 | |
8778 | /* The first insn of a function that accepts its static chain on the |
8779 | stack is to push the register that would be filled in by a direct |
8780 | call. This insn will be skipped by the trampoline. */ |
8781 | else if (ix86_static_chain_on_stack) |
8782 | { |
8783 | static_chain = ix86_static_chain (cfun->decl, false); |
8784 | insn = emit_insn (gen_push (arg: static_chain)); |
8785 | emit_insn (gen_blockage ()); |
8786 | |
8787 | /* We don't want to interpret this push insn as a register save, |
8788 | only as a stack adjustment. The real copy of the register as |
8789 | a save will be done later, if needed. */ |
8790 | t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
8791 | t = gen_rtx_SET (stack_pointer_rtx, t); |
8792 | add_reg_note (insn, REG_CFA_ADJUST_CFA, t); |
8793 | RTX_FRAME_RELATED_P (insn) = 1; |
8794 | } |
8795 | |
8796 | /* Emit prologue code to adjust stack alignment and setup DRAP, in case |
8797 | of DRAP is needed and stack realignment is really needed after reload */ |
8798 | if (stack_realign_drap) |
8799 | { |
8800 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
8801 | |
8802 | /* Can't use DRAP in interrupt function. */ |
8803 | if (cfun->machine->func_type != TYPE_NORMAL) |
8804 | sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " |
8805 | "in interrupt service routine. This may be worked " |
8806 | "around by avoiding functions with aggregate return." ); |
8807 | |
8808 | /* Only need to push parameter pointer reg if it is caller saved. */ |
8809 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
8810 | { |
8811 | /* Push arg pointer reg */ |
8812 | insn = emit_insn (gen_push (crtl->drap_reg)); |
8813 | RTX_FRAME_RELATED_P (insn) = 1; |
8814 | } |
8815 | |
8816 | /* Grab the argument pointer. */ |
8817 | t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); |
8818 | insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
8819 | RTX_FRAME_RELATED_P (insn) = 1; |
8820 | m->fs.cfa_reg = crtl->drap_reg; |
8821 | m->fs.cfa_offset = 0; |
8822 | |
8823 | /* Align the stack. */ |
8824 | insn = emit_insn (gen_and2_insn (stack_pointer_rtx, |
8825 | GEN_INT (-align_bytes))); |
8826 | RTX_FRAME_RELATED_P (insn) = 1; |
8827 | |
8828 | /* Replicate the return address on the stack so that return |
8829 | address can be reached via (argp - 1) slot. This is needed |
8830 | to implement macro RETURN_ADDR_RTX and intrinsic function |
8831 | expand_builtin_return_addr etc. */ |
8832 | t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); |
8833 | t = gen_frame_mem (word_mode, t); |
8834 | insn = emit_insn (gen_push (arg: t)); |
8835 | RTX_FRAME_RELATED_P (insn) = 1; |
8836 | |
8837 | /* For the purposes of frame and register save area addressing, |
8838 | we've started over with a new frame. */ |
8839 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
8840 | m->fs.realigned = true; |
8841 | |
8842 | if (static_chain) |
8843 | { |
8844 | /* Replicate static chain on the stack so that static chain |
8845 | can be reached via (argp - 2) slot. This is needed for |
8846 | nested function with stack realignment. */ |
8847 | insn = emit_insn (gen_push (arg: static_chain)); |
8848 | RTX_FRAME_RELATED_P (insn) = 1; |
8849 | } |
8850 | } |
8851 | |
8852 | int_registers_saved = (frame.nregs == 0); |
8853 | sse_registers_saved = (frame.nsseregs == 0); |
8854 | save_stub_call_needed = (m->call_ms2sysv); |
8855 | gcc_assert (sse_registers_saved || !save_stub_call_needed); |
8856 | |
8857 | if (frame_pointer_needed && !m->fs.fp_valid) |
8858 | { |
8859 | /* Note: AT&T enter does NOT have reversed args. Enter is probably |
8860 | slower on all targets. Also sdb didn't like it. */ |
8861 | insn = emit_insn (gen_push (hard_frame_pointer_rtx)); |
8862 | RTX_FRAME_RELATED_P (insn) = 1; |
8863 | |
8864 | if (m->fs.sp_offset == frame.hard_frame_pointer_offset) |
8865 | { |
8866 | insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
8867 | RTX_FRAME_RELATED_P (insn) = 1; |
8868 | |
8869 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8870 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
8871 | m->fs.fp_offset = m->fs.sp_offset; |
8872 | m->fs.fp_valid = true; |
8873 | } |
8874 | } |
8875 | |
8876 | if (!int_registers_saved) |
8877 | { |
8878 | /* If saving registers via PUSH, do so now. */ |
8879 | if (!frame.save_regs_using_mov) |
8880 | { |
8881 | ix86_emit_save_regs (); |
8882 | int_registers_saved = true; |
8883 | gcc_assert (m->fs.sp_offset == frame.reg_save_offset); |
8884 | } |
8885 | |
8886 | /* When using red zone we may start register saving before allocating |
8887 | the stack frame saving one cycle of the prologue. However, avoid |
8888 | doing this if we have to probe the stack; at least on x86_64 the |
8889 | stack probe can turn into a call that clobbers a red zone location. */ |
8890 | else if (ix86_using_red_zone () |
8891 | && (! TARGET_STACK_PROBE |
8892 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) |
8893 | { |
8894 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
8895 | cfun->machine->red_zone_used = true; |
8896 | int_registers_saved = true; |
8897 | } |
8898 | } |
8899 | |
8900 | if (frame.red_zone_size != 0) |
8901 | cfun->machine->red_zone_used = true; |
8902 | |
8903 | if (stack_realign_fp) |
8904 | { |
8905 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
8906 | gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); |
8907 | |
8908 | /* Record last valid frame pointer offset. */ |
8909 | m->fs.sp_realigned_fp_last = frame.reg_save_offset; |
8910 | |
8911 | /* The computation of the size of the re-aligned stack frame means |
8912 | that we must allocate the size of the register save area before |
8913 | performing the actual alignment. Otherwise we cannot guarantee |
8914 | that there's enough storage above the realignment point. */ |
8915 | allocate = frame.reg_save_offset - m->fs.sp_offset |
8916 | + frame.stack_realign_allocate; |
8917 | if (allocate) |
8918 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8919 | GEN_INT (-allocate), style: -1, set_cfa: false); |
8920 | |
8921 | /* Align the stack. */ |
8922 | emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes))); |
8923 | m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); |
8924 | m->fs.sp_realigned_offset = m->fs.sp_offset |
8925 | - frame.stack_realign_allocate; |
8926 | /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. |
8927 | Beyond this point, stack access should be done via choose_baseaddr or |
8928 | by using sp_valid_at and fp_valid_at to determine the correct base |
8929 | register. Henceforth, any CFA offset should be thought of as logical |
8930 | and not physical. */ |
8931 | gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); |
8932 | gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); |
8933 | m->fs.sp_realigned = true; |
8934 | |
8935 | /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which |
8936 | is needed to describe where a register is saved using a realigned |
8937 | stack pointer, so we need to invalidate the stack pointer for that |
8938 | target. */ |
8939 | if (TARGET_SEH) |
8940 | m->fs.sp_valid = false; |
8941 | |
8942 | /* If SP offset is non-immediate after allocation of the stack frame, |
8943 | then emit SSE saves or stub call prior to allocating the rest of the |
8944 | stack frame. This is less efficient for the out-of-line stub because |
8945 | we can't combine allocations across the call barrier, but it's better |
8946 | than using a scratch register. */ |
8947 | else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset |
8948 | - m->fs.sp_realigned_offset), |
8949 | Pmode)) |
8950 | { |
8951 | if (!sse_registers_saved) |
8952 | { |
8953 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
8954 | sse_registers_saved = true; |
8955 | } |
8956 | else if (save_stub_call_needed) |
8957 | { |
8958 | ix86_emit_outlined_ms2sysv_save (frame); |
8959 | save_stub_call_needed = false; |
8960 | } |
8961 | } |
8962 | } |
8963 | |
8964 | allocate = frame.stack_pointer_offset - m->fs.sp_offset; |
8965 | |
8966 | if (flag_stack_usage_info) |
8967 | { |
8968 | /* We start to count from ARG_POINTER. */ |
8969 | HOST_WIDE_INT stack_size = frame.stack_pointer_offset; |
8970 | |
8971 | /* If it was realigned, take into account the fake frame. */ |
8972 | if (stack_realign_drap) |
8973 | { |
8974 | if (ix86_static_chain_on_stack) |
8975 | stack_size += UNITS_PER_WORD; |
8976 | |
8977 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
8978 | stack_size += UNITS_PER_WORD; |
8979 | |
8980 | /* This over-estimates by 1 minimal-stack-alignment-unit but |
8981 | mitigates that by counting in the new return address slot. */ |
8982 | current_function_dynamic_stack_size |
8983 | += crtl->stack_alignment_needed / BITS_PER_UNIT; |
8984 | } |
8985 | |
8986 | current_function_static_stack_size = stack_size; |
8987 | } |
8988 | |
8989 | /* On SEH target with very large frame size, allocate an area to save |
8990 | SSE registers (as the very large allocation won't be described). */ |
8991 | if (TARGET_SEH |
8992 | && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE |
8993 | && !sse_registers_saved) |
8994 | { |
8995 | HOST_WIDE_INT sse_size |
8996 | = frame.sse_reg_save_offset - frame.reg_save_offset; |
8997 | |
8998 | gcc_assert (int_registers_saved); |
8999 | |
9000 | /* No need to do stack checking as the area will be immediately |
9001 | written. */ |
9002 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9003 | GEN_INT (-sse_size), style: -1, |
9004 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9005 | allocate -= sse_size; |
9006 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9007 | sse_registers_saved = true; |
9008 | } |
9009 | |
9010 | /* If stack clash protection is requested, then probe the stack, unless it |
9011 | is already probed on the target. */ |
9012 | if (allocate >= 0 |
9013 | && flag_stack_clash_protection |
9014 | && !ix86_target_stack_probe ()) |
9015 | { |
9016 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false); |
9017 | allocate = 0; |
9018 | } |
9019 | |
9020 | /* The stack has already been decremented by the instruction calling us |
9021 | so probe if the size is non-negative to preserve the protection area. */ |
9022 | else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
9023 | { |
9024 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
9025 | |
9026 | if (STACK_CHECK_MOVING_SP) |
9027 | { |
9028 | if (crtl->is_leaf |
9029 | && !cfun->calls_alloca |
9030 | && allocate <= probe_interval) |
9031 | ; |
9032 | |
9033 | else |
9034 | { |
9035 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true); |
9036 | allocate = 0; |
9037 | } |
9038 | } |
9039 | |
9040 | else |
9041 | { |
9042 | HOST_WIDE_INT size = allocate; |
9043 | |
9044 | if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) |
9045 | size = 0x80000000 - get_stack_check_protect () - 1; |
9046 | |
9047 | if (TARGET_STACK_PROBE) |
9048 | { |
9049 | if (crtl->is_leaf && !cfun->calls_alloca) |
9050 | { |
9051 | if (size > probe_interval) |
9052 | ix86_emit_probe_stack_range (first: 0, size, int_registers_saved); |
9053 | } |
9054 | else |
9055 | ix86_emit_probe_stack_range (first: 0, |
9056 | size: size + get_stack_check_protect (), |
9057 | int_registers_saved); |
9058 | } |
9059 | else |
9060 | { |
9061 | if (crtl->is_leaf && !cfun->calls_alloca) |
9062 | { |
9063 | if (size > probe_interval |
9064 | && size > get_stack_check_protect ()) |
9065 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), |
9066 | size: (size |
9067 | - get_stack_check_protect ()), |
9068 | int_registers_saved); |
9069 | } |
9070 | else |
9071 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), size, |
9072 | int_registers_saved); |
9073 | } |
9074 | } |
9075 | } |
9076 | |
9077 | if (allocate == 0) |
9078 | ; |
9079 | else if (!ix86_target_stack_probe () |
9080 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT) |
9081 | { |
9082 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9083 | GEN_INT (-allocate), style: -1, |
9084 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9085 | } |
9086 | else |
9087 | { |
9088 | rtx eax = gen_rtx_REG (Pmode, AX_REG); |
9089 | rtx r10 = NULL; |
9090 | const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); |
9091 | bool eax_live = ix86_eax_live_at_start_p (); |
9092 | bool r10_live = false; |
9093 | |
9094 | if (TARGET_64BIT) |
9095 | r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); |
9096 | |
9097 | if (eax_live) |
9098 | { |
9099 | insn = emit_insn (gen_push (arg: eax)); |
9100 | allocate -= UNITS_PER_WORD; |
9101 | /* Note that SEH directives need to continue tracking the stack |
9102 | pointer even after the frame pointer has been set up. */ |
9103 | if (sp_is_cfa_reg || TARGET_SEH) |
9104 | { |
9105 | if (sp_is_cfa_reg) |
9106 | m->fs.cfa_offset += UNITS_PER_WORD; |
9107 | RTX_FRAME_RELATED_P (insn) = 1; |
9108 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9109 | gen_rtx_SET (stack_pointer_rtx, |
9110 | plus_constant (Pmode, |
9111 | stack_pointer_rtx, |
9112 | -UNITS_PER_WORD))); |
9113 | } |
9114 | } |
9115 | |
9116 | if (r10_live) |
9117 | { |
9118 | r10 = gen_rtx_REG (Pmode, R10_REG); |
9119 | insn = emit_insn (gen_push (arg: r10)); |
9120 | allocate -= UNITS_PER_WORD; |
9121 | if (sp_is_cfa_reg || TARGET_SEH) |
9122 | { |
9123 | if (sp_is_cfa_reg) |
9124 | m->fs.cfa_offset += UNITS_PER_WORD; |
9125 | RTX_FRAME_RELATED_P (insn) = 1; |
9126 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9127 | gen_rtx_SET (stack_pointer_rtx, |
9128 | plus_constant (Pmode, |
9129 | stack_pointer_rtx, |
9130 | -UNITS_PER_WORD))); |
9131 | } |
9132 | } |
9133 | |
9134 | emit_move_insn (eax, GEN_INT (allocate)); |
9135 | emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax)); |
9136 | |
9137 | /* Use the fact that AX still contains ALLOCATE. */ |
9138 | insn = emit_insn (gen_pro_epilogue_adjust_stack_sub |
9139 | (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax)); |
9140 | |
9141 | if (sp_is_cfa_reg || TARGET_SEH) |
9142 | { |
9143 | if (sp_is_cfa_reg) |
9144 | m->fs.cfa_offset += allocate; |
9145 | RTX_FRAME_RELATED_P (insn) = 1; |
9146 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9147 | gen_rtx_SET (stack_pointer_rtx, |
9148 | plus_constant (Pmode, stack_pointer_rtx, |
9149 | -allocate))); |
9150 | } |
9151 | m->fs.sp_offset += allocate; |
9152 | |
9153 | /* Use stack_pointer_rtx for relative addressing so that code works for |
9154 | realigned stack. But this means that we need a blockage to prevent |
9155 | stores based on the frame pointer from being scheduled before. */ |
9156 | if (r10_live && eax_live) |
9157 | { |
9158 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
9159 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
9160 | gen_frame_mem (word_mode, t)); |
9161 | t = plus_constant (Pmode, t, UNITS_PER_WORD); |
9162 | emit_move_insn (gen_rtx_REG (word_mode, AX_REG), |
9163 | gen_frame_mem (word_mode, t)); |
9164 | emit_insn (gen_memory_blockage ()); |
9165 | } |
9166 | else if (eax_live || r10_live) |
9167 | { |
9168 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
9169 | emit_move_insn (gen_rtx_REG (word_mode, |
9170 | (eax_live ? AX_REG : R10_REG)), |
9171 | gen_frame_mem (word_mode, t)); |
9172 | emit_insn (gen_memory_blockage ()); |
9173 | } |
9174 | } |
9175 | gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); |
9176 | |
9177 | /* If we havn't already set up the frame pointer, do so now. */ |
9178 | if (frame_pointer_needed && !m->fs.fp_valid) |
9179 | { |
9180 | insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, |
9181 | GEN_INT (frame.stack_pointer_offset |
9182 | - frame.hard_frame_pointer_offset)); |
9183 | insn = emit_insn (insn); |
9184 | RTX_FRAME_RELATED_P (insn) = 1; |
9185 | add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); |
9186 | |
9187 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9188 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
9189 | m->fs.fp_offset = frame.hard_frame_pointer_offset; |
9190 | m->fs.fp_valid = true; |
9191 | } |
9192 | |
9193 | if (!int_registers_saved) |
9194 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
9195 | if (!sse_registers_saved) |
9196 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9197 | else if (save_stub_call_needed) |
9198 | ix86_emit_outlined_ms2sysv_save (frame); |
9199 | |
9200 | /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT |
9201 | in PROLOGUE. */ |
9202 | if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) |
9203 | { |
9204 | rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); |
9205 | insn = emit_insn (gen_set_got (pic)); |
9206 | RTX_FRAME_RELATED_P (insn) = 1; |
9207 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
9208 | emit_insn (gen_prologue_use (pic)); |
9209 | /* Deleting already emmitted SET_GOT if exist and allocated to |
9210 | REAL_PIC_OFFSET_TABLE_REGNUM. */ |
9211 | ix86_elim_entry_set_got (reg: pic); |
9212 | } |
9213 | |
9214 | if (crtl->drap_reg && !crtl->stack_realign_needed) |
9215 | { |
9216 | /* vDRAP is setup but after reload it turns out stack realign |
9217 | isn't necessary, here we will emit prologue to setup DRAP |
9218 | without stack realign adjustment */ |
9219 | t = choose_baseaddr (cfa_offset: 0, NULL); |
9220 | emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
9221 | } |
9222 | |
9223 | /* Prevent instructions from being scheduled into register save push |
9224 | sequence when access to the redzone area is done through frame pointer. |
9225 | The offset between the frame pointer and the stack pointer is calculated |
9226 | relative to the value of the stack pointer at the end of the function |
9227 | prologue, and moving instructions that access redzone area via frame |
9228 | pointer inside push sequence violates this assumption. */ |
9229 | if (frame_pointer_needed && frame.red_zone_size) |
9230 | emit_insn (gen_memory_blockage ()); |
9231 | |
9232 | /* SEH requires that the prologue end within 256 bytes of the start of |
9233 | the function. Prevent instruction schedules that would extend that. |
9234 | Further, prevent alloca modifications to the stack pointer from being |
9235 | combined with prologue modifications. */ |
9236 | if (TARGET_SEH) |
9237 | emit_insn (gen_prologue_use (stack_pointer_rtx)); |
9238 | } |
9239 | |
9240 | /* Emit code to restore REG using a POP insn. */ |
9241 | |
9242 | static void |
9243 | ix86_emit_restore_reg_using_pop (rtx reg) |
9244 | { |
9245 | struct machine_function *m = cfun->machine; |
9246 | rtx_insn *insn = emit_insn (gen_pop (arg: reg)); |
9247 | |
9248 | ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset); |
9249 | m->fs.sp_offset -= UNITS_PER_WORD; |
9250 | |
9251 | if (m->fs.cfa_reg == crtl->drap_reg |
9252 | && REGNO (reg) == REGNO (crtl->drap_reg)) |
9253 | { |
9254 | /* Previously we'd represented the CFA as an expression |
9255 | like *(%ebp - 8). We've just popped that value from |
9256 | the stack, which means we need to reset the CFA to |
9257 | the drap register. This will remain until we restore |
9258 | the stack pointer. */ |
9259 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
9260 | RTX_FRAME_RELATED_P (insn) = 1; |
9261 | |
9262 | /* This means that the DRAP register is valid for addressing too. */ |
9263 | m->fs.drap_valid = true; |
9264 | return; |
9265 | } |
9266 | |
9267 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9268 | { |
9269 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
9270 | x = gen_rtx_SET (stack_pointer_rtx, x); |
9271 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
9272 | RTX_FRAME_RELATED_P (insn) = 1; |
9273 | |
9274 | m->fs.cfa_offset -= UNITS_PER_WORD; |
9275 | } |
9276 | |
9277 | /* When the frame pointer is the CFA, and we pop it, we are |
9278 | swapping back to the stack pointer as the CFA. This happens |
9279 | for stack frames that don't allocate other data, so we assume |
9280 | the stack pointer is now pointing at the return address, i.e. |
9281 | the function entry state, which makes the offset be 1 word. */ |
9282 | if (reg == hard_frame_pointer_rtx) |
9283 | { |
9284 | m->fs.fp_valid = false; |
9285 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9286 | { |
9287 | m->fs.cfa_reg = stack_pointer_rtx; |
9288 | m->fs.cfa_offset -= UNITS_PER_WORD; |
9289 | |
9290 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9291 | plus_constant (Pmode, stack_pointer_rtx, |
9292 | m->fs.cfa_offset)); |
9293 | RTX_FRAME_RELATED_P (insn) = 1; |
9294 | } |
9295 | } |
9296 | } |
9297 | |
9298 | /* Emit code to restore REG using a POP2 insn. */ |
9299 | static void |
9300 | ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) |
9301 | { |
9302 | struct machine_function *m = cfun->machine; |
9303 | const int offset = UNITS_PER_WORD * 2; |
9304 | |
9305 | rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, |
9306 | stack_pointer_rtx)); |
9307 | rtx_insn *insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); |
9308 | |
9309 | RTX_FRAME_RELATED_P (insn) = 1; |
9310 | |
9311 | rtx dwarf = NULL_RTX; |
9312 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf); |
9313 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf); |
9314 | REG_NOTES (insn) = dwarf; |
9315 | m->fs.sp_offset -= offset; |
9316 | |
9317 | if (m->fs.cfa_reg == crtl->drap_reg |
9318 | && (REGNO (reg1) == REGNO (crtl->drap_reg) |
9319 | || REGNO (reg2) == REGNO (crtl->drap_reg))) |
9320 | { |
9321 | /* Previously we'd represented the CFA as an expression |
9322 | like *(%ebp - 8). We've just popped that value from |
9323 | the stack, which means we need to reset the CFA to |
9324 | the drap register. This will remain until we restore |
9325 | the stack pointer. */ |
9326 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9327 | REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2); |
9328 | RTX_FRAME_RELATED_P (insn) = 1; |
9329 | |
9330 | /* This means that the DRAP register is valid for addressing too. */ |
9331 | m->fs.drap_valid = true; |
9332 | return; |
9333 | } |
9334 | |
9335 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9336 | { |
9337 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
9338 | x = gen_rtx_SET (stack_pointer_rtx, x); |
9339 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
9340 | RTX_FRAME_RELATED_P (insn) = 1; |
9341 | |
9342 | m->fs.cfa_offset -= offset; |
9343 | } |
9344 | |
9345 | /* When the frame pointer is the CFA, and we pop it, we are |
9346 | swapping back to the stack pointer as the CFA. This happens |
9347 | for stack frames that don't allocate other data, so we assume |
9348 | the stack pointer is now pointing at the return address, i.e. |
9349 | the function entry state, which makes the offset be 1 word. */ |
9350 | if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx) |
9351 | { |
9352 | m->fs.fp_valid = false; |
9353 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9354 | { |
9355 | m->fs.cfa_reg = stack_pointer_rtx; |
9356 | m->fs.cfa_offset -= offset; |
9357 | |
9358 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9359 | plus_constant (Pmode, stack_pointer_rtx, |
9360 | m->fs.cfa_offset)); |
9361 | RTX_FRAME_RELATED_P (insn) = 1; |
9362 | } |
9363 | } |
9364 | } |
9365 | |
9366 | /* Emit code to restore saved registers using POP insns. */ |
9367 | |
9368 | static void |
9369 | ix86_emit_restore_regs_using_pop (void) |
9370 | { |
9371 | unsigned int regno; |
9372 | |
9373 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9374 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
9375 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno)); |
9376 | } |
9377 | |
9378 | /* Emit code to restore saved registers using POP2 insns. */ |
9379 | |
9380 | static void |
9381 | ix86_emit_restore_regs_using_pop2 (void) |
9382 | { |
9383 | int regno; |
9384 | int regno_list[2]; |
9385 | regno_list[0] = regno_list[1] = -1; |
9386 | int loaded_regnum = 0; |
9387 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
9388 | |
9389 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9390 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
9391 | { |
9392 | if (aligned) |
9393 | { |
9394 | regno_list[loaded_regnum++] = regno; |
9395 | if (loaded_regnum == 2) |
9396 | { |
9397 | gcc_assert (regno_list[0] != -1 |
9398 | && regno_list[1] != -1 |
9399 | && regno_list[0] != regno_list[1]); |
9400 | |
9401 | ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode, |
9402 | regno_list[0]), |
9403 | reg2: gen_rtx_REG (word_mode, |
9404 | regno_list[1])); |
9405 | loaded_regnum = 0; |
9406 | regno_list[0] = regno_list[1] = -1; |
9407 | } |
9408 | } |
9409 | else |
9410 | { |
9411 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno)); |
9412 | aligned = true; |
9413 | } |
9414 | } |
9415 | |
9416 | if (loaded_regnum == 1) |
9417 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0])); |
9418 | } |
9419 | |
9420 | /* Emit code and notes for the LEAVE instruction. If insn is non-null, |
9421 | omits the emit and only attaches the notes. */ |
9422 | |
9423 | static void |
9424 | ix86_emit_leave (rtx_insn *insn) |
9425 | { |
9426 | struct machine_function *m = cfun->machine; |
9427 | |
9428 | if (!insn) |
9429 | insn = emit_insn (gen_leave (arg0: word_mode)); |
9430 | |
9431 | ix86_add_queued_cfa_restore_notes (insn); |
9432 | |
9433 | gcc_assert (m->fs.fp_valid); |
9434 | m->fs.sp_valid = true; |
9435 | m->fs.sp_realigned = false; |
9436 | m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; |
9437 | m->fs.fp_valid = false; |
9438 | |
9439 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9440 | { |
9441 | m->fs.cfa_reg = stack_pointer_rtx; |
9442 | m->fs.cfa_offset = m->fs.sp_offset; |
9443 | |
9444 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9445 | plus_constant (Pmode, stack_pointer_rtx, |
9446 | m->fs.sp_offset)); |
9447 | RTX_FRAME_RELATED_P (insn) = 1; |
9448 | } |
9449 | ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, |
9450 | cfa_offset: m->fs.fp_offset); |
9451 | } |
9452 | |
9453 | /* Emit code to restore saved registers using MOV insns. |
9454 | First register is restored from CFA - CFA_OFFSET. */ |
9455 | static void |
9456 | ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, |
9457 | bool maybe_eh_return) |
9458 | { |
9459 | struct machine_function *m = cfun->machine; |
9460 | unsigned int regno; |
9461 | |
9462 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9463 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
9464 | { |
9465 | rtx reg = gen_rtx_REG (word_mode, regno); |
9466 | rtx mem; |
9467 | rtx_insn *insn; |
9468 | |
9469 | mem = choose_baseaddr (cfa_offset, NULL); |
9470 | mem = gen_frame_mem (word_mode, mem); |
9471 | insn = emit_move_insn (reg, mem); |
9472 | |
9473 | if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) |
9474 | { |
9475 | /* Previously we'd represented the CFA as an expression |
9476 | like *(%ebp - 8). We've just popped that value from |
9477 | the stack, which means we need to reset the CFA to |
9478 | the drap register. This will remain until we restore |
9479 | the stack pointer. */ |
9480 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
9481 | RTX_FRAME_RELATED_P (insn) = 1; |
9482 | |
9483 | /* This means that the DRAP register is valid for addressing. */ |
9484 | m->fs.drap_valid = true; |
9485 | } |
9486 | else |
9487 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
9488 | |
9489 | cfa_offset -= UNITS_PER_WORD; |
9490 | } |
9491 | } |
9492 | |
9493 | /* Emit code to restore saved registers using MOV insns. |
9494 | First register is restored from CFA - CFA_OFFSET. */ |
9495 | static void |
9496 | ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, |
9497 | bool maybe_eh_return) |
9498 | { |
9499 | unsigned int regno; |
9500 | |
9501 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9502 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
9503 | { |
9504 | rtx reg = gen_rtx_REG (V4SFmode, regno); |
9505 | rtx mem; |
9506 | unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); |
9507 | |
9508 | mem = choose_baseaddr (cfa_offset, align: &align); |
9509 | mem = gen_rtx_MEM (V4SFmode, mem); |
9510 | |
9511 | /* The location aligment depends upon the base register. */ |
9512 | align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); |
9513 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
9514 | set_mem_align (mem, align); |
9515 | emit_insn (gen_rtx_SET (reg, mem)); |
9516 | |
9517 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
9518 | |
9519 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
9520 | } |
9521 | } |
9522 | |
9523 | static void |
9524 | ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, |
9525 | bool use_call, int style) |
9526 | { |
9527 | struct machine_function *m = cfun->machine; |
9528 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
9529 | + m->call_ms2sysv_extra_regs; |
9530 | rtvec v; |
9531 | unsigned int elems_needed, align, i, vi = 0; |
9532 | rtx_insn *insn; |
9533 | rtx sym, tmp; |
9534 | rtx rsi = gen_rtx_REG (word_mode, SI_REG); |
9535 | rtx r10 = NULL_RTX; |
9536 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
9537 | HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); |
9538 | HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; |
9539 | rtx rsi_frame_load = NULL_RTX; |
9540 | HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; |
9541 | enum xlogue_stub stub; |
9542 | |
9543 | gcc_assert (!m->fs.fp_valid || frame_pointer_needed); |
9544 | |
9545 | /* If using a realigned stack, we should never start with padding. */ |
9546 | gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); |
9547 | |
9548 | /* Setup RSI as the stub's base pointer. */ |
9549 | align = GET_MODE_ALIGNMENT (V4SFmode); |
9550 | tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG); |
9551 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
9552 | |
9553 | emit_insn (gen_rtx_SET (rsi, tmp)); |
9554 | |
9555 | /* Get a symbol for the stub. */ |
9556 | if (frame_pointer_needed) |
9557 | stub = use_call ? XLOGUE_STUB_RESTORE_HFP |
9558 | : XLOGUE_STUB_RESTORE_HFP_TAIL; |
9559 | else |
9560 | stub = use_call ? XLOGUE_STUB_RESTORE |
9561 | : XLOGUE_STUB_RESTORE_TAIL; |
9562 | sym = xlogue.get_stub_rtx (stub); |
9563 | |
9564 | elems_needed = ncregs; |
9565 | if (use_call) |
9566 | elems_needed += 1; |
9567 | else |
9568 | elems_needed += frame_pointer_needed ? 5 : 3; |
9569 | v = rtvec_alloc (elems_needed); |
9570 | |
9571 | /* We call the epilogue stub when we need to pop incoming args or we are |
9572 | doing a sibling call as the tail. Otherwise, we will emit a jmp to the |
9573 | epilogue stub and it is the tail-call. */ |
9574 | if (use_call) |
9575 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
9576 | else |
9577 | { |
9578 | RTVEC_ELT (v, vi++) = ret_rtx; |
9579 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
9580 | if (frame_pointer_needed) |
9581 | { |
9582 | rtx rbp = gen_rtx_REG (DImode, BP_REG); |
9583 | gcc_assert (m->fs.fp_valid); |
9584 | gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); |
9585 | |
9586 | tmp = plus_constant (DImode, rbp, 8); |
9587 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); |
9588 | RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); |
9589 | tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); |
9590 | RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); |
9591 | } |
9592 | else |
9593 | { |
9594 | /* If no hard frame pointer, we set R10 to the SP restore value. */ |
9595 | gcc_assert (!m->fs.fp_valid); |
9596 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
9597 | gcc_assert (m->fs.sp_valid); |
9598 | |
9599 | r10 = gen_rtx_REG (DImode, R10_REG); |
9600 | tmp = plus_constant (Pmode, rsi, stub_ptr_offset); |
9601 | emit_insn (gen_rtx_SET (r10, tmp)); |
9602 | |
9603 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); |
9604 | } |
9605 | } |
9606 | |
9607 | /* Generate frame load insns and restore notes. */ |
9608 | for (i = 0; i < ncregs; ++i) |
9609 | { |
9610 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
9611 | machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; |
9612 | rtx reg, frame_load; |
9613 | |
9614 | reg = gen_rtx_REG (mode, r.regno); |
9615 | frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset); |
9616 | |
9617 | /* Save RSI frame load insn & note to add last. */ |
9618 | if (r.regno == SI_REG) |
9619 | { |
9620 | gcc_assert (!rsi_frame_load); |
9621 | rsi_frame_load = frame_load; |
9622 | rsi_restore_offset = r.offset; |
9623 | } |
9624 | else |
9625 | { |
9626 | RTVEC_ELT (v, vi++) = frame_load; |
9627 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset); |
9628 | } |
9629 | } |
9630 | |
9631 | /* Add RSI frame load & restore note at the end. */ |
9632 | gcc_assert (rsi_frame_load); |
9633 | gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); |
9634 | RTVEC_ELT (v, vi++) = rsi_frame_load; |
9635 | ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG), |
9636 | cfa_offset: rsi_restore_offset); |
9637 | |
9638 | /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ |
9639 | if (!use_call && !frame_pointer_needed) |
9640 | { |
9641 | gcc_assert (m->fs.sp_valid); |
9642 | gcc_assert (!m->fs.sp_realigned); |
9643 | |
9644 | /* At this point, R10 should point to frame.stack_realign_offset. */ |
9645 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9646 | m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; |
9647 | m->fs.sp_offset = frame.stack_realign_offset; |
9648 | } |
9649 | |
9650 | gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); |
9651 | tmp = gen_rtx_PARALLEL (VOIDmode, v); |
9652 | if (use_call) |
9653 | insn = emit_insn (tmp); |
9654 | else |
9655 | { |
9656 | insn = emit_jump_insn (tmp); |
9657 | JUMP_LABEL (insn) = ret_rtx; |
9658 | |
9659 | if (frame_pointer_needed) |
9660 | ix86_emit_leave (insn); |
9661 | else |
9662 | { |
9663 | /* Need CFA adjust note. */ |
9664 | tmp = gen_rtx_SET (stack_pointer_rtx, r10); |
9665 | add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); |
9666 | } |
9667 | } |
9668 | |
9669 | RTX_FRAME_RELATED_P (insn) = true; |
9670 | ix86_add_queued_cfa_restore_notes (insn); |
9671 | |
9672 | /* If we're not doing a tail-call, we need to adjust the stack. */ |
9673 | if (use_call && m->fs.sp_valid) |
9674 | { |
9675 | HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; |
9676 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9677 | GEN_INT (dealloc), style, |
9678 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9679 | } |
9680 | } |
9681 | |
9682 | /* Restore function stack, frame, and registers. */ |
9683 | |
9684 | void |
9685 | ix86_expand_epilogue (int style) |
9686 | { |
9687 | struct machine_function *m = cfun->machine; |
9688 | struct machine_frame_state frame_state_save = m->fs; |
9689 | bool restore_regs_via_mov; |
9690 | bool using_drap; |
9691 | bool restore_stub_is_tail = false; |
9692 | |
9693 | if (ix86_function_naked (fn: current_function_decl)) |
9694 | { |
9695 | /* The program should not reach this point. */ |
9696 | emit_insn (gen_ud2 ()); |
9697 | return; |
9698 | } |
9699 | |
9700 | ix86_finalize_stack_frame_flags (); |
9701 | const struct ix86_frame &frame = cfun->machine->frame; |
9702 | |
9703 | m->fs.sp_realigned = stack_realign_fp; |
9704 | m->fs.sp_valid = stack_realign_fp |
9705 | || !frame_pointer_needed |
9706 | || crtl->sp_is_unchanging; |
9707 | gcc_assert (!m->fs.sp_valid |
9708 | || m->fs.sp_offset == frame.stack_pointer_offset); |
9709 | |
9710 | /* The FP must be valid if the frame pointer is present. */ |
9711 | gcc_assert (frame_pointer_needed == m->fs.fp_valid); |
9712 | gcc_assert (!m->fs.fp_valid |
9713 | || m->fs.fp_offset == frame.hard_frame_pointer_offset); |
9714 | |
9715 | /* We must have *some* valid pointer to the stack frame. */ |
9716 | gcc_assert (m->fs.sp_valid || m->fs.fp_valid); |
9717 | |
9718 | /* The DRAP is never valid at this point. */ |
9719 | gcc_assert (!m->fs.drap_valid); |
9720 | |
9721 | /* See the comment about red zone and frame |
9722 | pointer usage in ix86_expand_prologue. */ |
9723 | if (frame_pointer_needed && frame.red_zone_size) |
9724 | emit_insn (gen_memory_blockage ()); |
9725 | |
9726 | using_drap = crtl->drap_reg && crtl->stack_realign_needed; |
9727 | gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); |
9728 | |
9729 | /* Determine the CFA offset of the end of the red-zone. */ |
9730 | m->fs.red_zone_offset = 0; |
9731 | if (ix86_using_red_zone () && crtl->args.pops_args < 65536) |
9732 | { |
9733 | /* The red-zone begins below return address and error code in |
9734 | exception handler. */ |
9735 | m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; |
9736 | |
9737 | /* When the register save area is in the aligned portion of |
9738 | the stack, determine the maximum runtime displacement that |
9739 | matches up with the aligned frame. */ |
9740 | if (stack_realign_drap) |
9741 | m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT |
9742 | + UNITS_PER_WORD); |
9743 | } |
9744 | |
9745 | HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; |
9746 | |
9747 | /* Special care must be taken for the normal return case of a function |
9748 | using eh_return: the eax and edx registers are marked as saved, but |
9749 | not restored along this path. Adjust the save location to match. */ |
9750 | if (crtl->calls_eh_return && style != 2) |
9751 | reg_save_offset -= 2 * UNITS_PER_WORD; |
9752 | |
9753 | /* EH_RETURN requires the use of moves to function properly. */ |
9754 | if (crtl->calls_eh_return) |
9755 | restore_regs_via_mov = true; |
9756 | /* SEH requires the use of pops to identify the epilogue. */ |
9757 | else if (TARGET_SEH) |
9758 | restore_regs_via_mov = false; |
9759 | /* If we're only restoring one register and sp cannot be used then |
9760 | using a move instruction to restore the register since it's |
9761 | less work than reloading sp and popping the register. */ |
9762 | else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1) |
9763 | restore_regs_via_mov = true; |
9764 | else if (TARGET_EPILOGUE_USING_MOVE |
9765 | && cfun->machine->use_fast_prologue_epilogue |
9766 | && (frame.nregs > 1 |
9767 | || m->fs.sp_offset != reg_save_offset)) |
9768 | restore_regs_via_mov = true; |
9769 | else if (frame_pointer_needed |
9770 | && !frame.nregs |
9771 | && m->fs.sp_offset != reg_save_offset) |
9772 | restore_regs_via_mov = true; |
9773 | else if (frame_pointer_needed |
9774 | && TARGET_USE_LEAVE |
9775 | && cfun->machine->use_fast_prologue_epilogue |
9776 | && frame.nregs == 1) |
9777 | restore_regs_via_mov = true; |
9778 | else |
9779 | restore_regs_via_mov = false; |
9780 | |
9781 | if (restore_regs_via_mov || frame.nsseregs) |
9782 | { |
9783 | /* Ensure that the entire register save area is addressable via |
9784 | the stack pointer, if we will restore SSE regs via sp. */ |
9785 | if (TARGET_64BIT |
9786 | && m->fs.sp_offset > 0x7fffffff |
9787 | && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1) |
9788 | && (frame.nsseregs + frame.nregs) != 0) |
9789 | { |
9790 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9791 | GEN_INT (m->fs.sp_offset |
9792 | - frame.sse_reg_save_offset), |
9793 | style, |
9794 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9795 | } |
9796 | } |
9797 | |
9798 | /* If there are any SSE registers to restore, then we have to do it |
9799 | via moves, since there's obviously no pop for SSE regs. */ |
9800 | if (frame.nsseregs) |
9801 | ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset, |
9802 | maybe_eh_return: style == 2); |
9803 | |
9804 | if (m->call_ms2sysv) |
9805 | { |
9806 | int pop_incoming_args = crtl->args.pops_args && crtl->args.size; |
9807 | |
9808 | /* We cannot use a tail-call for the stub if: |
9809 | 1. We have to pop incoming args, |
9810 | 2. We have additional int regs to restore, or |
9811 | 3. A sibling call will be the tail-call, or |
9812 | 4. We are emitting an eh_return_internal epilogue. |
9813 | |
9814 | TODO: Item 4 has not yet tested! |
9815 | |
9816 | If any of the above are true, we will call the stub rather than |
9817 | jump to it. */ |
9818 | restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); |
9819 | ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style); |
9820 | } |
9821 | |
9822 | /* If using out-of-line stub that is a tail-call, then...*/ |
9823 | if (m->call_ms2sysv && restore_stub_is_tail) |
9824 | { |
9825 | /* TODO: parinoid tests. (remove eventually) */ |
9826 | gcc_assert (m->fs.sp_valid); |
9827 | gcc_assert (!m->fs.sp_realigned); |
9828 | gcc_assert (!m->fs.fp_valid); |
9829 | gcc_assert (!m->fs.realigned); |
9830 | gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); |
9831 | gcc_assert (!crtl->drap_reg); |
9832 | gcc_assert (!frame.nregs); |
9833 | } |
9834 | else if (restore_regs_via_mov) |
9835 | { |
9836 | rtx t; |
9837 | |
9838 | if (frame.nregs) |
9839 | ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2); |
9840 | |
9841 | /* eh_return epilogues need %ecx added to the stack pointer. */ |
9842 | if (style == 2) |
9843 | { |
9844 | rtx sa = EH_RETURN_STACKADJ_RTX; |
9845 | rtx_insn *insn; |
9846 | |
9847 | /* Stack realignment doesn't work with eh_return. */ |
9848 | if (crtl->stack_realign_needed) |
9849 | sorry ("Stack realignment not supported with " |
9850 | "%<__builtin_eh_return%>" ); |
9851 | |
9852 | /* regparm nested functions don't work with eh_return. */ |
9853 | if (ix86_static_chain_on_stack) |
9854 | sorry ("regparm nested function not supported with " |
9855 | "%<__builtin_eh_return%>" ); |
9856 | |
9857 | if (frame_pointer_needed) |
9858 | { |
9859 | t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); |
9860 | t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); |
9861 | emit_insn (gen_rtx_SET (sa, t)); |
9862 | |
9863 | /* NB: eh_return epilogues must restore the frame pointer |
9864 | in word_mode since the upper 32 bits of RBP register |
9865 | can have any values. */ |
9866 | t = gen_frame_mem (word_mode, hard_frame_pointer_rtx); |
9867 | rtx frame_reg = gen_rtx_REG (word_mode, |
9868 | HARD_FRAME_POINTER_REGNUM); |
9869 | insn = emit_move_insn (frame_reg, t); |
9870 | |
9871 | /* Note that we use SA as a temporary CFA, as the return |
9872 | address is at the proper place relative to it. We |
9873 | pretend this happens at the FP restore insn because |
9874 | prior to this insn the FP would be stored at the wrong |
9875 | offset relative to SA, and after this insn we have no |
9876 | other reasonable register to use for the CFA. We don't |
9877 | bother resetting the CFA to the SP for the duration of |
9878 | the return insn, unless the control flow instrumentation |
9879 | is done. In this case the SP is used later and we have |
9880 | to reset CFA to SP. */ |
9881 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9882 | plus_constant (Pmode, sa, UNITS_PER_WORD)); |
9883 | ix86_add_queued_cfa_restore_notes (insn); |
9884 | add_reg_note (insn, REG_CFA_RESTORE, frame_reg); |
9885 | RTX_FRAME_RELATED_P (insn) = 1; |
9886 | |
9887 | m->fs.cfa_reg = sa; |
9888 | m->fs.cfa_offset = UNITS_PER_WORD; |
9889 | m->fs.fp_valid = false; |
9890 | |
9891 | pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa, |
9892 | const0_rtx, style, |
9893 | flag_cf_protection); |
9894 | } |
9895 | else |
9896 | { |
9897 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); |
9898 | t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); |
9899 | insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); |
9900 | ix86_add_queued_cfa_restore_notes (insn); |
9901 | |
9902 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
9903 | if (m->fs.cfa_offset != UNITS_PER_WORD) |
9904 | { |
9905 | m->fs.cfa_offset = UNITS_PER_WORD; |
9906 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9907 | plus_constant (Pmode, stack_pointer_rtx, |
9908 | UNITS_PER_WORD)); |
9909 | RTX_FRAME_RELATED_P (insn) = 1; |
9910 | } |
9911 | } |
9912 | m->fs.sp_offset = UNITS_PER_WORD; |
9913 | m->fs.sp_valid = true; |
9914 | m->fs.sp_realigned = false; |
9915 | } |
9916 | } |
9917 | else |
9918 | { |
9919 | /* SEH requires that the function end with (1) a stack adjustment |
9920 | if necessary, (2) a sequence of pops, and (3) a return or |
9921 | jump instruction. Prevent insns from the function body from |
9922 | being scheduled into this sequence. */ |
9923 | if (TARGET_SEH) |
9924 | { |
9925 | /* Prevent a catch region from being adjacent to the standard |
9926 | epilogue sequence. Unfortunately neither crtl->uses_eh_lsda |
9927 | nor several other flags that would be interesting to test are |
9928 | set up yet. */ |
9929 | if (flag_non_call_exceptions) |
9930 | emit_insn (gen_nops (const1_rtx)); |
9931 | else |
9932 | emit_insn (gen_blockage ()); |
9933 | } |
9934 | |
9935 | /* First step is to deallocate the stack frame so that we can |
9936 | pop the registers. If the stack pointer was realigned, it needs |
9937 | to be restored now. Also do it on SEH target for very large |
9938 | frame as the emitted instructions aren't allowed by the ABI |
9939 | in epilogues. */ |
9940 | if (!m->fs.sp_valid || m->fs.sp_realigned |
9941 | || (TARGET_SEH |
9942 | && (m->fs.sp_offset - reg_save_offset |
9943 | >= SEH_MAX_FRAME_SIZE))) |
9944 | { |
9945 | pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, |
9946 | GEN_INT (m->fs.fp_offset |
9947 | - reg_save_offset), |
9948 | style, set_cfa: false); |
9949 | } |
9950 | else if (m->fs.sp_offset != reg_save_offset) |
9951 | { |
9952 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9953 | GEN_INT (m->fs.sp_offset |
9954 | - reg_save_offset), |
9955 | style, |
9956 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9957 | } |
9958 | |
9959 | if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL) |
9960 | ix86_emit_restore_regs_using_pop2 (); |
9961 | else |
9962 | ix86_emit_restore_regs_using_pop (); |
9963 | } |
9964 | |
9965 | /* If we used a stack pointer and haven't already got rid of it, |
9966 | then do so now. */ |
9967 | if (m->fs.fp_valid) |
9968 | { |
9969 | /* If the stack pointer is valid and pointing at the frame |
9970 | pointer store address, then we only need a pop. */ |
9971 | if (sp_valid_at (cfa_offset: frame.hfp_save_offset) |
9972 | && m->fs.sp_offset == frame.hfp_save_offset) |
9973 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
9974 | /* Leave results in shorter dependency chains on CPUs that are |
9975 | able to grok it fast. */ |
9976 | else if (TARGET_USE_LEAVE |
9977 | || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) |
9978 | || !cfun->machine->use_fast_prologue_epilogue) |
9979 | ix86_emit_leave (NULL); |
9980 | else |
9981 | { |
9982 | pro_epilogue_adjust_stack (stack_pointer_rtx, |
9983 | hard_frame_pointer_rtx, |
9984 | const0_rtx, style, set_cfa: !using_drap); |
9985 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
9986 | } |
9987 | } |
9988 | |
9989 | if (using_drap) |
9990 | { |
9991 | int param_ptr_offset = UNITS_PER_WORD; |
9992 | rtx_insn *insn; |
9993 | |
9994 | gcc_assert (stack_realign_drap); |
9995 | |
9996 | if (ix86_static_chain_on_stack) |
9997 | param_ptr_offset += UNITS_PER_WORD; |
9998 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
9999 | param_ptr_offset += UNITS_PER_WORD; |
10000 | |
10001 | insn = emit_insn (gen_rtx_SET |
10002 | (stack_pointer_rtx, |
10003 | plus_constant (Pmode, crtl->drap_reg, |
10004 | -param_ptr_offset))); |
10005 | m->fs.cfa_reg = stack_pointer_rtx; |
10006 | m->fs.cfa_offset = param_ptr_offset; |
10007 | m->fs.sp_offset = param_ptr_offset; |
10008 | m->fs.realigned = false; |
10009 | |
10010 | add_reg_note (insn, REG_CFA_DEF_CFA, |
10011 | plus_constant (Pmode, stack_pointer_rtx, |
10012 | param_ptr_offset)); |
10013 | RTX_FRAME_RELATED_P (insn) = 1; |
10014 | |
10015 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
10016 | ix86_emit_restore_reg_using_pop (crtl->drap_reg); |
10017 | } |
10018 | |
10019 | /* At this point the stack pointer must be valid, and we must have |
10020 | restored all of the registers. We may not have deallocated the |
10021 | entire stack frame. We've delayed this until now because it may |
10022 | be possible to merge the local stack deallocation with the |
10023 | deallocation forced by ix86_static_chain_on_stack. */ |
10024 | gcc_assert (m->fs.sp_valid); |
10025 | gcc_assert (!m->fs.sp_realigned); |
10026 | gcc_assert (!m->fs.fp_valid); |
10027 | gcc_assert (!m->fs.realigned); |
10028 | if (m->fs.sp_offset != UNITS_PER_WORD) |
10029 | { |
10030 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10031 | GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), |
10032 | style, set_cfa: true); |
10033 | } |
10034 | else |
10035 | ix86_add_queued_cfa_restore_notes (insn: get_last_insn ()); |
10036 | |
10037 | /* Sibcall epilogues don't want a return instruction. */ |
10038 | if (style == 0) |
10039 | { |
10040 | m->fs = frame_state_save; |
10041 | return; |
10042 | } |
10043 | |
10044 | if (cfun->machine->func_type != TYPE_NORMAL) |
10045 | emit_jump_insn (gen_interrupt_return ()); |
10046 | else if (crtl->args.pops_args && crtl->args.size) |
10047 | { |
10048 | rtx popc = GEN_INT (crtl->args.pops_args); |
10049 | |
10050 | /* i386 can only pop 64K bytes. If asked to pop more, pop return |
10051 | address, do explicit add, and jump indirectly to the caller. */ |
10052 | |
10053 | if (crtl->args.pops_args >= 65536) |
10054 | { |
10055 | rtx ecx = gen_rtx_REG (SImode, CX_REG); |
10056 | rtx_insn *insn; |
10057 | |
10058 | /* There is no "pascal" calling convention in any 64bit ABI. */ |
10059 | gcc_assert (!TARGET_64BIT); |
10060 | |
10061 | insn = emit_insn (gen_pop (arg: ecx)); |
10062 | m->fs.cfa_offset -= UNITS_PER_WORD; |
10063 | m->fs.sp_offset -= UNITS_PER_WORD; |
10064 | |
10065 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
10066 | x = gen_rtx_SET (stack_pointer_rtx, x); |
10067 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
10068 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
10069 | RTX_FRAME_RELATED_P (insn) = 1; |
10070 | |
10071 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10072 | offset: popc, style: -1, set_cfa: true); |
10073 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
10074 | } |
10075 | else |
10076 | emit_jump_insn (gen_simple_return_pop_internal (popc)); |
10077 | } |
10078 | else if (!m->call_ms2sysv || !restore_stub_is_tail) |
10079 | { |
10080 | /* In case of return from EH a simple return cannot be used |
10081 | as a return address will be compared with a shadow stack |
10082 | return address. Use indirect jump instead. */ |
10083 | if (style == 2 && flag_cf_protection) |
10084 | { |
10085 | /* Register used in indirect jump must be in word_mode. But |
10086 | Pmode may not be the same as word_mode for x32. */ |
10087 | rtx ecx = gen_rtx_REG (word_mode, CX_REG); |
10088 | rtx_insn *insn; |
10089 | |
10090 | insn = emit_insn (gen_pop (arg: ecx)); |
10091 | m->fs.cfa_offset -= UNITS_PER_WORD; |
10092 | m->fs.sp_offset -= UNITS_PER_WORD; |
10093 | |
10094 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
10095 | x = gen_rtx_SET (stack_pointer_rtx, x); |
10096 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
10097 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
10098 | RTX_FRAME_RELATED_P (insn) = 1; |
10099 | |
10100 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
10101 | } |
10102 | else |
10103 | emit_jump_insn (gen_simple_return_internal ()); |
10104 | } |
10105 | |
10106 | /* Restore the state back to the state from the prologue, |
10107 | so that it's correct for the next epilogue. */ |
10108 | m->fs = frame_state_save; |
10109 | } |
10110 | |
10111 | /* Reset from the function's potential modifications. */ |
10112 | |
10113 | static void |
10114 | ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) |
10115 | { |
10116 | if (pic_offset_table_rtx |
10117 | && !ix86_use_pseudo_pic_reg ()) |
10118 | SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); |
10119 | |
10120 | if (TARGET_MACHO) |
10121 | { |
10122 | rtx_insn *insn = get_last_insn (); |
10123 | rtx_insn *deleted_debug_label = NULL; |
10124 | |
10125 | /* Mach-O doesn't support labels at the end of objects, so if |
10126 | it looks like we might want one, take special action. |
10127 | First, collect any sequence of deleted debug labels. */ |
10128 | while (insn |
10129 | && NOTE_P (insn) |
10130 | && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) |
10131 | { |
10132 | /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL |
10133 | notes only, instead set their CODE_LABEL_NUMBER to -1, |
10134 | otherwise there would be code generation differences |
10135 | in between -g and -g0. */ |
10136 | if (NOTE_P (insn) && NOTE_KIND (insn) |
10137 | == NOTE_INSN_DELETED_DEBUG_LABEL) |
10138 | deleted_debug_label = insn; |
10139 | insn = PREV_INSN (insn); |
10140 | } |
10141 | |
10142 | /* If we have: |
10143 | label: |
10144 | barrier |
10145 | then this needs to be detected, so skip past the barrier. */ |
10146 | |
10147 | if (insn && BARRIER_P (insn)) |
10148 | insn = PREV_INSN (insn); |
10149 | |
10150 | /* Up to now we've only seen notes or barriers. */ |
10151 | if (insn) |
10152 | { |
10153 | if (LABEL_P (insn) |
10154 | || (NOTE_P (insn) |
10155 | && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) |
10156 | /* Trailing label. */ |
10157 | fputs (s: "\tnop\n" , stream: file); |
10158 | else if (cfun && ! cfun->is_thunk) |
10159 | { |
10160 | /* See if we have a completely empty function body, skipping |
10161 | the special case of the picbase thunk emitted as asm. */ |
10162 | while (insn && ! INSN_P (insn)) |
10163 | insn = PREV_INSN (insn); |
10164 | /* If we don't find any insns, we've got an empty function body; |
10165 | I.e. completely empty - without a return or branch. This is |
10166 | taken as the case where a function body has been removed |
10167 | because it contains an inline __builtin_unreachable(). GCC |
10168 | declares that reaching __builtin_unreachable() means UB so |
10169 | we're not obliged to do anything special; however, we want |
10170 | non-zero-sized function bodies. To meet this, and help the |
10171 | user out, let's trap the case. */ |
10172 | if (insn == NULL) |
10173 | fputs (s: "\tud2\n" , stream: file); |
10174 | } |
10175 | } |
10176 | else if (deleted_debug_label) |
10177 | for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) |
10178 | if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) |
10179 | CODE_LABEL_NUMBER (insn) = -1; |
10180 | } |
10181 | } |
10182 | |
10183 | /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */ |
10184 | |
10185 | void |
10186 | ix86_print_patchable_function_entry (FILE *file, |
10187 | unsigned HOST_WIDE_INT patch_area_size, |
10188 | bool record_p) |
10189 | { |
10190 | if (cfun->machine->function_label_emitted) |
10191 | { |
10192 | /* NB: When ix86_print_patchable_function_entry is called after |
10193 | function table has been emitted, we have inserted or queued |
10194 | a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper |
10195 | place. There is nothing to do here. */ |
10196 | return; |
10197 | } |
10198 | |
10199 | default_print_patchable_function_entry (file, patch_area_size, |
10200 | record_p); |
10201 | } |
10202 | |
10203 | /* Output patchable area. NB: default_print_patchable_function_entry |
10204 | isn't available in i386.md. */ |
10205 | |
10206 | void |
10207 | ix86_output_patchable_area (unsigned int patch_area_size, |
10208 | bool record_p) |
10209 | { |
10210 | default_print_patchable_function_entry (asm_out_file, |
10211 | patch_area_size, |
10212 | record_p); |
10213 | } |
10214 | |
10215 | /* Return a scratch register to use in the split stack prologue. The |
10216 | split stack prologue is used for -fsplit-stack. It is the first |
10217 | instructions in the function, even before the regular prologue. |
10218 | The scratch register can be any caller-saved register which is not |
10219 | used for parameters or for the static chain. */ |
10220 | |
10221 | static unsigned int |
10222 | split_stack_prologue_scratch_regno (void) |
10223 | { |
10224 | if (TARGET_64BIT) |
10225 | return R11_REG; |
10226 | else |
10227 | { |
10228 | bool is_fastcall, is_thiscall; |
10229 | int regparm; |
10230 | |
10231 | is_fastcall = (lookup_attribute (attr_name: "fastcall" , |
10232 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
10233 | != NULL); |
10234 | is_thiscall = (lookup_attribute (attr_name: "thiscall" , |
10235 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
10236 | != NULL); |
10237 | regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); |
10238 | |
10239 | if (is_fastcall) |
10240 | { |
10241 | if (DECL_STATIC_CHAIN (cfun->decl)) |
10242 | { |
10243 | sorry ("%<-fsplit-stack%> does not support fastcall with " |
10244 | "nested function" ); |
10245 | return INVALID_REGNUM; |
10246 | } |
10247 | return AX_REG; |
10248 | } |
10249 | else if (is_thiscall) |
10250 | { |
10251 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
10252 | return DX_REG; |
10253 | return AX_REG; |
10254 | } |
10255 | else if (regparm < 3) |
10256 | { |
10257 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
10258 | return CX_REG; |
10259 | else |
10260 | { |
10261 | if (regparm >= 2) |
10262 | { |
10263 | sorry ("%<-fsplit-stack%> does not support 2 register " |
10264 | "parameters for a nested function" ); |
10265 | return INVALID_REGNUM; |
10266 | } |
10267 | return DX_REG; |
10268 | } |
10269 | } |
10270 | else |
10271 | { |
10272 | /* FIXME: We could make this work by pushing a register |
10273 | around the addition and comparison. */ |
10274 | sorry ("%<-fsplit-stack%> does not support 3 register parameters" ); |
10275 | return INVALID_REGNUM; |
10276 | } |
10277 | } |
10278 | } |
10279 | |
10280 | /* A SYMBOL_REF for the function which allocates new stackspace for |
10281 | -fsplit-stack. */ |
10282 | |
10283 | static GTY(()) rtx split_stack_fn; |
10284 | |
10285 | /* A SYMBOL_REF for the more stack function when using the large |
10286 | model. */ |
10287 | |
10288 | static GTY(()) rtx split_stack_fn_large; |
10289 | |
10290 | /* Return location of the stack guard value in the TLS block. */ |
10291 | |
10292 | rtx |
10293 | ix86_split_stack_guard (void) |
10294 | { |
10295 | int offset; |
10296 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
10297 | rtx r; |
10298 | |
10299 | gcc_assert (flag_split_stack); |
10300 | |
10301 | #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET |
10302 | offset = TARGET_THREAD_SPLIT_STACK_OFFSET; |
10303 | #else |
10304 | gcc_unreachable (); |
10305 | #endif |
10306 | |
10307 | r = GEN_INT (offset); |
10308 | r = gen_const_mem (Pmode, r); |
10309 | set_mem_addr_space (r, as); |
10310 | |
10311 | return r; |
10312 | } |
10313 | |
10314 | /* Handle -fsplit-stack. These are the first instructions in the |
10315 | function, even before the regular prologue. */ |
10316 | |
10317 | void |
10318 | ix86_expand_split_stack_prologue (void) |
10319 | { |
10320 | HOST_WIDE_INT allocate; |
10321 | unsigned HOST_WIDE_INT args_size; |
10322 | rtx_code_label *label; |
10323 | rtx limit, current, allocate_rtx, call_fusage; |
10324 | rtx_insn *call_insn; |
10325 | rtx scratch_reg = NULL_RTX; |
10326 | rtx_code_label *varargs_label = NULL; |
10327 | rtx fn; |
10328 | |
10329 | gcc_assert (flag_split_stack && reload_completed); |
10330 | |
10331 | ix86_finalize_stack_frame_flags (); |
10332 | struct ix86_frame &frame = cfun->machine->frame; |
10333 | allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; |
10334 | |
10335 | /* This is the label we will branch to if we have enough stack |
10336 | space. We expect the basic block reordering pass to reverse this |
10337 | branch if optimizing, so that we branch in the unlikely case. */ |
10338 | label = gen_label_rtx (); |
10339 | |
10340 | /* We need to compare the stack pointer minus the frame size with |
10341 | the stack boundary in the TCB. The stack boundary always gives |
10342 | us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we |
10343 | can compare directly. Otherwise we need to do an addition. */ |
10344 | |
10345 | limit = ix86_split_stack_guard (); |
10346 | |
10347 | if (allocate < SPLIT_STACK_AVAILABLE) |
10348 | current = stack_pointer_rtx; |
10349 | else |
10350 | { |
10351 | unsigned int scratch_regno; |
10352 | rtx offset; |
10353 | |
10354 | /* We need a scratch register to hold the stack pointer minus |
10355 | the required frame size. Since this is the very start of the |
10356 | function, the scratch register can be any caller-saved |
10357 | register which is not used for parameters. */ |
10358 | offset = GEN_INT (- allocate); |
10359 | scratch_regno = split_stack_prologue_scratch_regno (); |
10360 | if (scratch_regno == INVALID_REGNUM) |
10361 | return; |
10362 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10363 | if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) |
10364 | { |
10365 | /* We don't use gen_add in this case because it will |
10366 | want to split to lea, but when not optimizing the insn |
10367 | will not be split after this point. */ |
10368 | emit_insn (gen_rtx_SET (scratch_reg, |
10369 | gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
10370 | offset))); |
10371 | } |
10372 | else |
10373 | { |
10374 | emit_move_insn (scratch_reg, offset); |
10375 | emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx)); |
10376 | } |
10377 | current = scratch_reg; |
10378 | } |
10379 | |
10380 | ix86_expand_branch (GEU, current, limit, label); |
10381 | rtx_insn *jump_insn = get_last_insn (); |
10382 | JUMP_LABEL (jump_insn) = label; |
10383 | |
10384 | /* Mark the jump as very likely to be taken. */ |
10385 | add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); |
10386 | |
10387 | if (split_stack_fn == NULL_RTX) |
10388 | { |
10389 | split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack" ); |
10390 | SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; |
10391 | } |
10392 | fn = split_stack_fn; |
10393 | |
10394 | /* Get more stack space. We pass in the desired stack space and the |
10395 | size of the arguments to copy to the new stack. In 32-bit mode |
10396 | we push the parameters; __morestack will return on a new stack |
10397 | anyhow. In 64-bit mode we pass the parameters in r10 and |
10398 | r11. */ |
10399 | allocate_rtx = GEN_INT (allocate); |
10400 | args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; |
10401 | call_fusage = NULL_RTX; |
10402 | rtx pop = NULL_RTX; |
10403 | if (TARGET_64BIT) |
10404 | { |
10405 | rtx reg10, reg11; |
10406 | |
10407 | reg10 = gen_rtx_REG (Pmode, R10_REG); |
10408 | reg11 = gen_rtx_REG (Pmode, R11_REG); |
10409 | |
10410 | /* If this function uses a static chain, it will be in %r10. |
10411 | Preserve it across the call to __morestack. */ |
10412 | if (DECL_STATIC_CHAIN (cfun->decl)) |
10413 | { |
10414 | rtx rax; |
10415 | |
10416 | rax = gen_rtx_REG (word_mode, AX_REG); |
10417 | emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); |
10418 | use_reg (fusage: &call_fusage, reg: rax); |
10419 | } |
10420 | |
10421 | if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
10422 | && !TARGET_PECOFF) |
10423 | { |
10424 | HOST_WIDE_INT argval; |
10425 | |
10426 | gcc_assert (Pmode == DImode); |
10427 | /* When using the large model we need to load the address |
10428 | into a register, and we've run out of registers. So we |
10429 | switch to a different calling convention, and we call a |
10430 | different function: __morestack_large. We pass the |
10431 | argument size in the upper 32 bits of r10 and pass the |
10432 | frame size in the lower 32 bits. */ |
10433 | gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); |
10434 | gcc_assert ((args_size & 0xffffffff) == args_size); |
10435 | |
10436 | if (split_stack_fn_large == NULL_RTX) |
10437 | { |
10438 | split_stack_fn_large |
10439 | = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model" ); |
10440 | SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; |
10441 | } |
10442 | if (ix86_cmodel == CM_LARGE_PIC) |
10443 | { |
10444 | rtx_code_label *label; |
10445 | rtx x; |
10446 | |
10447 | label = gen_label_rtx (); |
10448 | emit_label (label); |
10449 | LABEL_PRESERVE_P (label) = 1; |
10450 | emit_insn (gen_set_rip_rex64 (reg10, label)); |
10451 | emit_insn (gen_set_got_offset_rex64 (reg11, label)); |
10452 | emit_insn (gen_add2_insn (reg10, reg11)); |
10453 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), |
10454 | UNSPEC_GOT); |
10455 | x = gen_rtx_CONST (Pmode, x); |
10456 | emit_move_insn (reg11, x); |
10457 | x = gen_rtx_PLUS (Pmode, reg10, reg11); |
10458 | x = gen_const_mem (Pmode, x); |
10459 | emit_move_insn (reg11, x); |
10460 | } |
10461 | else |
10462 | emit_move_insn (reg11, split_stack_fn_large); |
10463 | |
10464 | fn = reg11; |
10465 | |
10466 | argval = ((args_size << 16) << 16) + allocate; |
10467 | emit_move_insn (reg10, GEN_INT (argval)); |
10468 | } |
10469 | else |
10470 | { |
10471 | emit_move_insn (reg10, allocate_rtx); |
10472 | emit_move_insn (reg11, GEN_INT (args_size)); |
10473 | use_reg (fusage: &call_fusage, reg: reg11); |
10474 | } |
10475 | |
10476 | use_reg (fusage: &call_fusage, reg: reg10); |
10477 | } |
10478 | else |
10479 | { |
10480 | rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); |
10481 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); |
10482 | insn = emit_insn (gen_push (arg: allocate_rtx)); |
10483 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); |
10484 | pop = GEN_INT (2 * UNITS_PER_WORD); |
10485 | } |
10486 | call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), |
10487 | GEN_INT (UNITS_PER_WORD), constm1_rtx, |
10488 | pop, false); |
10489 | add_function_usage_to (call_insn, call_fusage); |
10490 | if (!TARGET_64BIT) |
10491 | add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); |
10492 | /* Indicate that this function can't jump to non-local gotos. */ |
10493 | make_reg_eh_region_note_nothrow_nononlocal (call_insn); |
10494 | |
10495 | /* In order to make call/return prediction work right, we now need |
10496 | to execute a return instruction. See |
10497 | libgcc/config/i386/morestack.S for the details on how this works. |
10498 | |
10499 | For flow purposes gcc must not see this as a return |
10500 | instruction--we need control flow to continue at the subsequent |
10501 | label. Therefore, we use an unspec. */ |
10502 | gcc_assert (crtl->args.pops_args < 65536); |
10503 | rtx_insn *ret_insn |
10504 | = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); |
10505 | |
10506 | if ((flag_cf_protection & CF_BRANCH)) |
10507 | { |
10508 | /* Insert ENDBR since __morestack will jump back here via indirect |
10509 | call. */ |
10510 | rtx cet_eb = gen_nop_endbr (); |
10511 | emit_insn_after (cet_eb, ret_insn); |
10512 | } |
10513 | |
10514 | /* If we are in 64-bit mode and this function uses a static chain, |
10515 | we saved %r10 in %rax before calling _morestack. */ |
10516 | if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) |
10517 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
10518 | gen_rtx_REG (word_mode, AX_REG)); |
10519 | |
10520 | /* If this function calls va_start, we need to store a pointer to |
10521 | the arguments on the old stack, because they may not have been |
10522 | all copied to the new stack. At this point the old stack can be |
10523 | found at the frame pointer value used by __morestack, because |
10524 | __morestack has set that up before calling back to us. Here we |
10525 | store that pointer in a scratch register, and in |
10526 | ix86_expand_prologue we store the scratch register in a stack |
10527 | slot. */ |
10528 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10529 | { |
10530 | unsigned int scratch_regno; |
10531 | rtx frame_reg; |
10532 | int words; |
10533 | |
10534 | scratch_regno = split_stack_prologue_scratch_regno (); |
10535 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10536 | frame_reg = gen_rtx_REG (Pmode, BP_REG); |
10537 | |
10538 | /* 64-bit: |
10539 | fp -> old fp value |
10540 | return address within this function |
10541 | return address of caller of this function |
10542 | stack arguments |
10543 | So we add three words to get to the stack arguments. |
10544 | |
10545 | 32-bit: |
10546 | fp -> old fp value |
10547 | return address within this function |
10548 | first argument to __morestack |
10549 | second argument to __morestack |
10550 | return address of caller of this function |
10551 | stack arguments |
10552 | So we add five words to get to the stack arguments. |
10553 | */ |
10554 | words = TARGET_64BIT ? 3 : 5; |
10555 | emit_insn (gen_rtx_SET (scratch_reg, |
10556 | plus_constant (Pmode, frame_reg, |
10557 | words * UNITS_PER_WORD))); |
10558 | |
10559 | varargs_label = gen_label_rtx (); |
10560 | emit_jump_insn (gen_jump (varargs_label)); |
10561 | JUMP_LABEL (get_last_insn ()) = varargs_label; |
10562 | |
10563 | emit_barrier (); |
10564 | } |
10565 | |
10566 | emit_label (label); |
10567 | LABEL_NUSES (label) = 1; |
10568 | |
10569 | /* If this function calls va_start, we now have to set the scratch |
10570 | register for the case where we do not call __morestack. In this |
10571 | case we need to set it based on the stack pointer. */ |
10572 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10573 | { |
10574 | emit_insn (gen_rtx_SET (scratch_reg, |
10575 | plus_constant (Pmode, stack_pointer_rtx, |
10576 | UNITS_PER_WORD))); |
10577 | |
10578 | emit_label (varargs_label); |
10579 | LABEL_NUSES (varargs_label) = 1; |
10580 | } |
10581 | } |
10582 | |
10583 | /* We may have to tell the dataflow pass that the split stack prologue |
10584 | is initializing a scratch register. */ |
10585 | |
10586 | static void |
10587 | ix86_live_on_entry (bitmap regs) |
10588 | { |
10589 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10590 | { |
10591 | gcc_assert (flag_split_stack); |
10592 | bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); |
10593 | } |
10594 | } |
10595 | |
10596 | /* Extract the parts of an RTL expression that is a valid memory address |
10597 | for an instruction. Return false if the structure of the address is |
10598 | grossly off. */ |
10599 | |
10600 | bool |
10601 | ix86_decompose_address (rtx addr, struct ix86_address *out) |
10602 | { |
10603 | rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; |
10604 | rtx base_reg, index_reg; |
10605 | HOST_WIDE_INT scale = 1; |
10606 | rtx scale_rtx = NULL_RTX; |
10607 | rtx tmp; |
10608 | addr_space_t seg = ADDR_SPACE_GENERIC; |
10609 | |
10610 | /* Allow zero-extended SImode addresses, |
10611 | they will be emitted with addr32 prefix. */ |
10612 | if (TARGET_64BIT && GET_MODE (addr) == DImode) |
10613 | { |
10614 | if (GET_CODE (addr) == ZERO_EXTEND |
10615 | && GET_MODE (XEXP (addr, 0)) == SImode) |
10616 | { |
10617 | addr = XEXP (addr, 0); |
10618 | if (CONST_INT_P (addr)) |
10619 | return false; |
10620 | } |
10621 | else if (GET_CODE (addr) == AND |
10622 | && const_32bit_mask (XEXP (addr, 1), DImode)) |
10623 | { |
10624 | addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); |
10625 | if (addr == NULL_RTX) |
10626 | return false; |
10627 | |
10628 | if (CONST_INT_P (addr)) |
10629 | return false; |
10630 | } |
10631 | else if (GET_CODE (addr) == AND) |
10632 | { |
10633 | /* For ASHIFT inside AND, combine will not generate |
10634 | canonical zero-extend. Merge mask for AND and shift_count |
10635 | to check if it is canonical zero-extend. */ |
10636 | tmp = XEXP (addr, 0); |
10637 | rtx mask = XEXP (addr, 1); |
10638 | if (tmp && GET_CODE(tmp) == ASHIFT) |
10639 | { |
10640 | rtx shift_val = XEXP (tmp, 1); |
10641 | if (CONST_INT_P (mask) && CONST_INT_P (shift_val) |
10642 | && (((unsigned HOST_WIDE_INT) INTVAL(mask) |
10643 | | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1)) |
10644 | == 0xffffffff)) |
10645 | { |
10646 | addr = lowpart_subreg (SImode, XEXP (addr, 0), |
10647 | DImode); |
10648 | } |
10649 | } |
10650 | |
10651 | } |
10652 | } |
10653 | |
10654 | /* Allow SImode subregs of DImode addresses, |
10655 | they will be emitted with addr32 prefix. */ |
10656 | if (TARGET_64BIT && GET_MODE (addr) == SImode) |
10657 | { |
10658 | if (SUBREG_P (addr) |
10659 | && GET_MODE (SUBREG_REG (addr)) == DImode) |
10660 | { |
10661 | addr = SUBREG_REG (addr); |
10662 | if (CONST_INT_P (addr)) |
10663 | return false; |
10664 | } |
10665 | } |
10666 | |
10667 | if (REG_P (addr)) |
10668 | base = addr; |
10669 | else if (SUBREG_P (addr)) |
10670 | { |
10671 | if (REG_P (SUBREG_REG (addr))) |
10672 | base = addr; |
10673 | else |
10674 | return false; |
10675 | } |
10676 | else if (GET_CODE (addr) == PLUS) |
10677 | { |
10678 | rtx addends[4], op; |
10679 | int n = 0, i; |
10680 | |
10681 | op = addr; |
10682 | do |
10683 | { |
10684 | if (n >= 4) |
10685 | return false; |
10686 | addends[n++] = XEXP (op, 1); |
10687 | op = XEXP (op, 0); |
10688 | } |
10689 | while (GET_CODE (op) == PLUS); |
10690 | if (n >= 4) |
10691 | return false; |
10692 | addends[n] = op; |
10693 | |
10694 | for (i = n; i >= 0; --i) |
10695 | { |
10696 | op = addends[i]; |
10697 | switch (GET_CODE (op)) |
10698 | { |
10699 | case MULT: |
10700 | if (index) |
10701 | return false; |
10702 | index = XEXP (op, 0); |
10703 | scale_rtx = XEXP (op, 1); |
10704 | break; |
10705 | |
10706 | case ASHIFT: |
10707 | if (index) |
10708 | return false; |
10709 | index = XEXP (op, 0); |
10710 | tmp = XEXP (op, 1); |
10711 | if (!CONST_INT_P (tmp)) |
10712 | return false; |
10713 | scale = INTVAL (tmp); |
10714 | if ((unsigned HOST_WIDE_INT) scale > 3) |
10715 | return false; |
10716 | scale = 1 << scale; |
10717 | break; |
10718 | |
10719 | case ZERO_EXTEND: |
10720 | op = XEXP (op, 0); |
10721 | if (GET_CODE (op) != UNSPEC) |
10722 | return false; |
10723 | /* FALLTHRU */ |
10724 | |
10725 | case UNSPEC: |
10726 | if (XINT (op, 1) == UNSPEC_TP |
10727 | && TARGET_TLS_DIRECT_SEG_REFS |
10728 | && seg == ADDR_SPACE_GENERIC) |
10729 | seg = DEFAULT_TLS_SEG_REG; |
10730 | else |
10731 | return false; |
10732 | break; |
10733 | |
10734 | case SUBREG: |
10735 | if (!REG_P (SUBREG_REG (op))) |
10736 | return false; |
10737 | /* FALLTHRU */ |
10738 | |
10739 | case REG: |
10740 | if (!base) |
10741 | base = op; |
10742 | else if (!index) |
10743 | index = op; |
10744 | else |
10745 | return false; |
10746 | break; |
10747 | |
10748 | case CONST: |
10749 | case CONST_INT: |
10750 | case SYMBOL_REF: |
10751 | case LABEL_REF: |
10752 | if (disp) |
10753 | return false; |
10754 | disp = op; |
10755 | break; |
10756 | |
10757 | default: |
10758 | return false; |
10759 | } |
10760 | } |
10761 | } |
10762 | else if (GET_CODE (addr) == MULT) |
10763 | { |
10764 | index = XEXP (addr, 0); /* index*scale */ |
10765 | scale_rtx = XEXP (addr, 1); |
10766 | } |
10767 | else if (GET_CODE (addr) == ASHIFT) |
10768 | { |
10769 | /* We're called for lea too, which implements ashift on occasion. */ |
10770 | index = XEXP (addr, 0); |
10771 | tmp = XEXP (addr, 1); |
10772 | if (!CONST_INT_P (tmp)) |
10773 | return false; |
10774 | scale = INTVAL (tmp); |
10775 | if ((unsigned HOST_WIDE_INT) scale > 3) |
10776 | return false; |
10777 | scale = 1 << scale; |
10778 | } |
10779 | else |
10780 | disp = addr; /* displacement */ |
10781 | |
10782 | if (index) |
10783 | { |
10784 | if (REG_P (index)) |
10785 | ; |
10786 | else if (SUBREG_P (index) |
10787 | && REG_P (SUBREG_REG (index))) |
10788 | ; |
10789 | else |
10790 | return false; |
10791 | } |
10792 | |
10793 | /* Extract the integral value of scale. */ |
10794 | if (scale_rtx) |
10795 | { |
10796 | if (!CONST_INT_P (scale_rtx)) |
10797 | return false; |
10798 | scale = INTVAL (scale_rtx); |
10799 | } |
10800 | |
10801 | base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; |
10802 | index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; |
10803 | |
10804 | /* Avoid useless 0 displacement. */ |
10805 | if (disp == const0_rtx && (base || index)) |
10806 | disp = NULL_RTX; |
10807 | |
10808 | /* Allow arg pointer and stack pointer as index if there is not scaling. */ |
10809 | if (base_reg && index_reg && scale == 1 |
10810 | && (REGNO (index_reg) == ARG_POINTER_REGNUM |
10811 | || REGNO (index_reg) == FRAME_POINTER_REGNUM |
10812 | || REGNO (index_reg) == SP_REG)) |
10813 | { |
10814 | std::swap (a&: base, b&: index); |
10815 | std::swap (a&: base_reg, b&: index_reg); |
10816 | } |
10817 | |
10818 | /* Special case: %ebp cannot be encoded as a base without a displacement. |
10819 | Similarly %r13. */ |
10820 | if (!disp && base_reg |
10821 | && (REGNO (base_reg) == ARG_POINTER_REGNUM |
10822 | || REGNO (base_reg) == FRAME_POINTER_REGNUM |
10823 | || REGNO (base_reg) == BP_REG |
10824 | || REGNO (base_reg) == R13_REG)) |
10825 | disp = const0_rtx; |
10826 | |
10827 | /* Special case: on K6, [%esi] makes the instruction vector decoded. |
10828 | Avoid this by transforming to [%esi+0]. |
10829 | Reload calls address legitimization without cfun defined, so we need |
10830 | to test cfun for being non-NULL. */ |
10831 | if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun) |
10832 | && base_reg && !index_reg && !disp |
10833 | && REGNO (base_reg) == SI_REG) |
10834 | disp = const0_rtx; |
10835 | |
10836 | /* Special case: encode reg+reg instead of reg*2. */ |
10837 | if (!base && index && scale == 2) |
10838 | base = index, base_reg = index_reg, scale = 1; |
10839 | |
10840 | /* Special case: scaling cannot be encoded without base or displacement. */ |
10841 | if (!base && !disp && index && scale != 1) |
10842 | disp = const0_rtx; |
10843 | |
10844 | out->base = base; |
10845 | out->index = index; |
10846 | out->disp = disp; |
10847 | out->scale = scale; |
10848 | out->seg = seg; |
10849 | |
10850 | return true; |
10851 | } |
10852 | |
10853 | /* Return cost of the memory address x. |
10854 | For i386, it is better to use a complex address than let gcc copy |
10855 | the address into a reg and make a new pseudo. But not if the address |
10856 | requires to two regs - that would mean more pseudos with longer |
10857 | lifetimes. */ |
10858 | static int |
10859 | ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) |
10860 | { |
10861 | struct ix86_address parts; |
10862 | int cost = 1; |
10863 | int ok = ix86_decompose_address (addr: x, out: &parts); |
10864 | |
10865 | gcc_assert (ok); |
10866 | |
10867 | if (parts.base && SUBREG_P (parts.base)) |
10868 | parts.base = SUBREG_REG (parts.base); |
10869 | if (parts.index && SUBREG_P (parts.index)) |
10870 | parts.index = SUBREG_REG (parts.index); |
10871 | |
10872 | /* Attempt to minimize number of registers in the address by increasing |
10873 | address cost for each used register. We don't increase address cost |
10874 | for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" |
10875 | is not invariant itself it most likely means that base or index is not |
10876 | invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, |
10877 | which is not profitable for x86. */ |
10878 | if (parts.base |
10879 | && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) |
10880 | && (current_pass->type == GIMPLE_PASS |
10881 | || !pic_offset_table_rtx |
10882 | || !REG_P (parts.base) |
10883 | || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) |
10884 | cost++; |
10885 | |
10886 | if (parts.index |
10887 | && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) |
10888 | && (current_pass->type == GIMPLE_PASS |
10889 | || !pic_offset_table_rtx |
10890 | || !REG_P (parts.index) |
10891 | || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) |
10892 | cost++; |
10893 | |
10894 | /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, |
10895 | since it's predecode logic can't detect the length of instructions |
10896 | and it degenerates to vector decoded. Increase cost of such |
10897 | addresses here. The penalty is minimally 2 cycles. It may be worthwhile |
10898 | to split such addresses or even refuse such addresses at all. |
10899 | |
10900 | Following addressing modes are affected: |
10901 | [base+scale*index] |
10902 | [scale*index+disp] |
10903 | [base+index] |
10904 | |
10905 | The first and last case may be avoidable by explicitly coding the zero in |
10906 | memory address, but I don't have AMD-K6 machine handy to check this |
10907 | theory. */ |
10908 | |
10909 | if (TARGET_CPU_P (K6) |
10910 | && ((!parts.disp && parts.base && parts.index && parts.scale != 1) |
10911 | || (parts.disp && !parts.base && parts.index && parts.scale != 1) |
10912 | || (!parts.disp && parts.base && parts.index && parts.scale == 1))) |
10913 | cost += 10; |
10914 | |
10915 | return cost; |
10916 | } |
10917 | |
10918 | /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as |
10919 | this is used for to form addresses to local data when -fPIC is in |
10920 | use. */ |
10921 | |
10922 | static bool |
10923 | darwin_local_data_pic (rtx disp) |
10924 | { |
10925 | return (GET_CODE (disp) == UNSPEC |
10926 | && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); |
10927 | } |
10928 | |
10929 | /* True if the function symbol operand X should be loaded from GOT. |
10930 | If CALL_P is true, X is a call operand. |
10931 | |
10932 | NB: -mno-direct-extern-access doesn't force load from GOT for |
10933 | call. |
10934 | |
10935 | NB: In 32-bit mode, only non-PIC is allowed in inline assembly |
10936 | statements, since a PIC register could not be available at the |
10937 | call site. */ |
10938 | |
10939 | bool |
10940 | ix86_force_load_from_GOT_p (rtx x, bool call_p) |
10941 | { |
10942 | return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X)) |
10943 | && !TARGET_PECOFF && !TARGET_MACHO |
10944 | && (!flag_pic || this_is_asm_operands) |
10945 | && ix86_cmodel != CM_LARGE |
10946 | && ix86_cmodel != CM_LARGE_PIC |
10947 | && GET_CODE (x) == SYMBOL_REF |
10948 | && ((!call_p |
10949 | && (!ix86_direct_extern_access |
10950 | || (SYMBOL_REF_DECL (x) |
10951 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
10952 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))) |
10953 | || (SYMBOL_REF_FUNCTION_P (x) |
10954 | && (!flag_plt |
10955 | || (SYMBOL_REF_DECL (x) |
10956 | && lookup_attribute (attr_name: "noplt" , |
10957 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))) |
10958 | && !SYMBOL_REF_LOCAL_P (x)); |
10959 | } |
10960 | |
10961 | /* Determine if a given RTX is a valid constant. We already know this |
10962 | satisfies CONSTANT_P. */ |
10963 | |
10964 | static bool |
10965 | ix86_legitimate_constant_p (machine_mode mode, rtx x) |
10966 | { |
10967 | switch (GET_CODE (x)) |
10968 | { |
10969 | case CONST: |
10970 | x = XEXP (x, 0); |
10971 | |
10972 | if (GET_CODE (x) == PLUS) |
10973 | { |
10974 | if (!CONST_INT_P (XEXP (x, 1))) |
10975 | return false; |
10976 | x = XEXP (x, 0); |
10977 | } |
10978 | |
10979 | if (TARGET_MACHO && darwin_local_data_pic (disp: x)) |
10980 | return true; |
10981 | |
10982 | /* Only some unspecs are valid as "constants". */ |
10983 | if (GET_CODE (x) == UNSPEC) |
10984 | switch (XINT (x, 1)) |
10985 | { |
10986 | case UNSPEC_GOT: |
10987 | case UNSPEC_GOTOFF: |
10988 | case UNSPEC_PLTOFF: |
10989 | return TARGET_64BIT; |
10990 | case UNSPEC_TPOFF: |
10991 | case UNSPEC_NTPOFF: |
10992 | x = XVECEXP (x, 0, 0); |
10993 | return (GET_CODE (x) == SYMBOL_REF |
10994 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
10995 | case UNSPEC_DTPOFF: |
10996 | x = XVECEXP (x, 0, 0); |
10997 | return (GET_CODE (x) == SYMBOL_REF |
10998 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); |
10999 | default: |
11000 | return false; |
11001 | } |
11002 | |
11003 | /* We must have drilled down to a symbol. */ |
11004 | if (GET_CODE (x) == LABEL_REF) |
11005 | return true; |
11006 | if (GET_CODE (x) != SYMBOL_REF) |
11007 | return false; |
11008 | /* FALLTHRU */ |
11009 | |
11010 | case SYMBOL_REF: |
11011 | /* TLS symbols are never valid. */ |
11012 | if (SYMBOL_REF_TLS_MODEL (x)) |
11013 | return false; |
11014 | |
11015 | /* DLLIMPORT symbols are never valid. */ |
11016 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES |
11017 | && SYMBOL_REF_DLLIMPORT_P (x)) |
11018 | return false; |
11019 | |
11020 | #if TARGET_MACHO |
11021 | /* mdynamic-no-pic */ |
11022 | if (MACHO_DYNAMIC_NO_PIC_P) |
11023 | return machopic_symbol_defined_p (x); |
11024 | #endif |
11025 | |
11026 | /* External function address should be loaded |
11027 | via the GOT slot to avoid PLT. */ |
11028 | if (ix86_force_load_from_GOT_p (x)) |
11029 | return false; |
11030 | |
11031 | break; |
11032 | |
11033 | CASE_CONST_SCALAR_INT: |
11034 | if (ix86_endbr_immediate_operand (x, VOIDmode)) |
11035 | return false; |
11036 | |
11037 | switch (mode) |
11038 | { |
11039 | case E_TImode: |
11040 | if (TARGET_64BIT) |
11041 | return true; |
11042 | /* FALLTHRU */ |
11043 | case E_OImode: |
11044 | case E_XImode: |
11045 | if (!standard_sse_constant_p (x, pred_mode: mode) |
11046 | && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512 |
11047 | ? XImode |
11048 | : (TARGET_AVX |
11049 | ? OImode |
11050 | : (TARGET_SSE2 |
11051 | ? TImode : DImode))) < GET_MODE_SIZE (mode)) |
11052 | return false; |
11053 | default: |
11054 | break; |
11055 | } |
11056 | break; |
11057 | |
11058 | case CONST_VECTOR: |
11059 | if (!standard_sse_constant_p (x, pred_mode: mode)) |
11060 | return false; |
11061 | break; |
11062 | |
11063 | case CONST_DOUBLE: |
11064 | if (mode == E_BFmode) |
11065 | return false; |
11066 | |
11067 | default: |
11068 | break; |
11069 | } |
11070 | |
11071 | /* Otherwise we handle everything else in the move patterns. */ |
11072 | return true; |
11073 | } |
11074 | |
11075 | /* Determine if it's legal to put X into the constant pool. This |
11076 | is not possible for the address of thread-local symbols, which |
11077 | is checked above. */ |
11078 | |
11079 | static bool |
11080 | ix86_cannot_force_const_mem (machine_mode mode, rtx x) |
11081 | { |
11082 | /* We can put any immediate constant in memory. */ |
11083 | switch (GET_CODE (x)) |
11084 | { |
11085 | CASE_CONST_ANY: |
11086 | return false; |
11087 | |
11088 | default: |
11089 | break; |
11090 | } |
11091 | |
11092 | return !ix86_legitimate_constant_p (mode, x); |
11093 | } |
11094 | |
11095 | /* Nonzero if the symbol is marked as dllimport, or as stub-variable, |
11096 | otherwise zero. */ |
11097 | |
11098 | static bool |
11099 | is_imported_p (rtx x) |
11100 | { |
11101 | if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES |
11102 | || GET_CODE (x) != SYMBOL_REF) |
11103 | return false; |
11104 | |
11105 | return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); |
11106 | } |
11107 | |
11108 | |
11109 | /* Nonzero if the constant value X is a legitimate general operand |
11110 | when generating PIC code. It is given that flag_pic is on and |
11111 | that X satisfies CONSTANT_P. */ |
11112 | |
11113 | bool |
11114 | legitimate_pic_operand_p (rtx x) |
11115 | { |
11116 | rtx inner; |
11117 | |
11118 | switch (GET_CODE (x)) |
11119 | { |
11120 | case CONST: |
11121 | inner = XEXP (x, 0); |
11122 | if (GET_CODE (inner) == PLUS |
11123 | && CONST_INT_P (XEXP (inner, 1))) |
11124 | inner = XEXP (inner, 0); |
11125 | |
11126 | /* Only some unspecs are valid as "constants". */ |
11127 | if (GET_CODE (inner) == UNSPEC) |
11128 | switch (XINT (inner, 1)) |
11129 | { |
11130 | case UNSPEC_GOT: |
11131 | case UNSPEC_GOTOFF: |
11132 | case UNSPEC_PLTOFF: |
11133 | return TARGET_64BIT; |
11134 | case UNSPEC_TPOFF: |
11135 | x = XVECEXP (inner, 0, 0); |
11136 | return (GET_CODE (x) == SYMBOL_REF |
11137 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
11138 | case UNSPEC_MACHOPIC_OFFSET: |
11139 | return legitimate_pic_address_disp_p (x); |
11140 | default: |
11141 | return false; |
11142 | } |
11143 | /* FALLTHRU */ |
11144 | |
11145 | case SYMBOL_REF: |
11146 | case LABEL_REF: |
11147 | return legitimate_pic_address_disp_p (x); |
11148 | |
11149 | default: |
11150 | return true; |
11151 | } |
11152 | } |
11153 | |
11154 | /* Determine if a given CONST RTX is a valid memory displacement |
11155 | in PIC mode. */ |
11156 | |
11157 | bool |
11158 | legitimate_pic_address_disp_p (rtx disp) |
11159 | { |
11160 | bool saw_plus; |
11161 | |
11162 | /* In 64bit mode we can allow direct addresses of symbols and labels |
11163 | when they are not dynamic symbols. */ |
11164 | if (TARGET_64BIT) |
11165 | { |
11166 | rtx op0 = disp, op1; |
11167 | |
11168 | switch (GET_CODE (disp)) |
11169 | { |
11170 | case LABEL_REF: |
11171 | return true; |
11172 | |
11173 | case CONST: |
11174 | if (GET_CODE (XEXP (disp, 0)) != PLUS) |
11175 | break; |
11176 | op0 = XEXP (XEXP (disp, 0), 0); |
11177 | op1 = XEXP (XEXP (disp, 0), 1); |
11178 | if (!CONST_INT_P (op1)) |
11179 | break; |
11180 | if (GET_CODE (op0) == UNSPEC |
11181 | && (XINT (op0, 1) == UNSPEC_DTPOFF |
11182 | || XINT (op0, 1) == UNSPEC_NTPOFF) |
11183 | && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) |
11184 | return true; |
11185 | if (INTVAL (op1) >= 16*1024*1024 |
11186 | || INTVAL (op1) < -16*1024*1024) |
11187 | break; |
11188 | if (GET_CODE (op0) == LABEL_REF) |
11189 | return true; |
11190 | if (GET_CODE (op0) == CONST |
11191 | && GET_CODE (XEXP (op0, 0)) == UNSPEC |
11192 | && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) |
11193 | return true; |
11194 | if (GET_CODE (op0) == UNSPEC |
11195 | && XINT (op0, 1) == UNSPEC_PCREL) |
11196 | return true; |
11197 | if (GET_CODE (op0) != SYMBOL_REF) |
11198 | break; |
11199 | /* FALLTHRU */ |
11200 | |
11201 | case SYMBOL_REF: |
11202 | /* TLS references should always be enclosed in UNSPEC. |
11203 | The dllimported symbol needs always to be resolved. */ |
11204 | if (SYMBOL_REF_TLS_MODEL (op0) |
11205 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) |
11206 | return false; |
11207 | |
11208 | if (TARGET_PECOFF) |
11209 | { |
11210 | if (is_imported_p (x: op0)) |
11211 | return true; |
11212 | |
11213 | if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0)) |
11214 | break; |
11215 | |
11216 | /* Non-external-weak function symbols need to be resolved only |
11217 | for the large model. Non-external symbols don't need to be |
11218 | resolved for large and medium models. For the small model, |
11219 | we don't need to resolve anything here. */ |
11220 | if ((ix86_cmodel != CM_LARGE_PIC |
11221 | && SYMBOL_REF_FUNCTION_P (op0) |
11222 | && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0))) |
11223 | || !SYMBOL_REF_EXTERNAL_P (op0) |
11224 | || ix86_cmodel == CM_SMALL_PIC) |
11225 | return true; |
11226 | } |
11227 | else if (!SYMBOL_REF_FAR_ADDR_P (op0) |
11228 | && (SYMBOL_REF_LOCAL_P (op0) |
11229 | || ((ix86_direct_extern_access |
11230 | && !(SYMBOL_REF_DECL (op0) |
11231 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
11232 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0))))) |
11233 | && HAVE_LD_PIE_COPYRELOC |
11234 | && flag_pie |
11235 | && !SYMBOL_REF_WEAK (op0) |
11236 | && !SYMBOL_REF_FUNCTION_P (op0))) |
11237 | && ix86_cmodel != CM_LARGE_PIC) |
11238 | return true; |
11239 | break; |
11240 | |
11241 | default: |
11242 | break; |
11243 | } |
11244 | } |
11245 | if (GET_CODE (disp) != CONST) |
11246 | return false; |
11247 | disp = XEXP (disp, 0); |
11248 | |
11249 | if (TARGET_64BIT) |
11250 | { |
11251 | /* We are unsafe to allow PLUS expressions. This limit allowed distance |
11252 | of GOT tables. We should not need these anyway. */ |
11253 | if (GET_CODE (disp) != UNSPEC |
11254 | || (XINT (disp, 1) != UNSPEC_GOTPCREL |
11255 | && XINT (disp, 1) != UNSPEC_GOTOFF |
11256 | && XINT (disp, 1) != UNSPEC_PCREL |
11257 | && XINT (disp, 1) != UNSPEC_PLTOFF)) |
11258 | return false; |
11259 | |
11260 | if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF |
11261 | && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) |
11262 | return false; |
11263 | return true; |
11264 | } |
11265 | |
11266 | saw_plus = false; |
11267 | if (GET_CODE (disp) == PLUS) |
11268 | { |
11269 | if (!CONST_INT_P (XEXP (disp, 1))) |
11270 | return false; |
11271 | disp = XEXP (disp, 0); |
11272 | saw_plus = true; |
11273 | } |
11274 | |
11275 | if (TARGET_MACHO && darwin_local_data_pic (disp)) |
11276 | return true; |
11277 | |
11278 | if (GET_CODE (disp) != UNSPEC) |
11279 | return false; |
11280 | |
11281 | switch (XINT (disp, 1)) |
11282 | { |
11283 | case UNSPEC_GOT: |
11284 | if (saw_plus) |
11285 | return false; |
11286 | /* We need to check for both symbols and labels because VxWorks loads |
11287 | text labels with @GOT rather than @GOTOFF. See gotoff_operand for |
11288 | details. */ |
11289 | return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
11290 | || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); |
11291 | case UNSPEC_GOTOFF: |
11292 | /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. |
11293 | While ABI specify also 32bit relocation but we don't produce it in |
11294 | small PIC model at all. */ |
11295 | if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
11296 | || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) |
11297 | && !TARGET_64BIT) |
11298 | return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); |
11299 | return false; |
11300 | case UNSPEC_GOTTPOFF: |
11301 | case UNSPEC_GOTNTPOFF: |
11302 | case UNSPEC_INDNTPOFF: |
11303 | if (saw_plus) |
11304 | return false; |
11305 | disp = XVECEXP (disp, 0, 0); |
11306 | return (GET_CODE (disp) == SYMBOL_REF |
11307 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); |
11308 | case UNSPEC_NTPOFF: |
11309 | disp = XVECEXP (disp, 0, 0); |
11310 | return (GET_CODE (disp) == SYMBOL_REF |
11311 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); |
11312 | case UNSPEC_DTPOFF: |
11313 | disp = XVECEXP (disp, 0, 0); |
11314 | return (GET_CODE (disp) == SYMBOL_REF |
11315 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); |
11316 | } |
11317 | |
11318 | return false; |
11319 | } |
11320 | |
11321 | /* Determine if op is suitable RTX for an address register. |
11322 | Return naked register if a register or a register subreg is |
11323 | found, otherwise return NULL_RTX. */ |
11324 | |
11325 | static rtx |
11326 | ix86_validate_address_register (rtx op) |
11327 | { |
11328 | machine_mode mode = GET_MODE (op); |
11329 | |
11330 | /* Only SImode or DImode registers can form the address. */ |
11331 | if (mode != SImode && mode != DImode) |
11332 | return NULL_RTX; |
11333 | |
11334 | if (REG_P (op)) |
11335 | return op; |
11336 | else if (SUBREG_P (op)) |
11337 | { |
11338 | rtx reg = SUBREG_REG (op); |
11339 | |
11340 | if (!REG_P (reg)) |
11341 | return NULL_RTX; |
11342 | |
11343 | mode = GET_MODE (reg); |
11344 | |
11345 | /* Don't allow SUBREGs that span more than a word. It can |
11346 | lead to spill failures when the register is one word out |
11347 | of a two word structure. */ |
11348 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
11349 | return NULL_RTX; |
11350 | |
11351 | /* Allow only SUBREGs of non-eliminable hard registers. */ |
11352 | if (register_no_elim_operand (reg, mode)) |
11353 | return reg; |
11354 | } |
11355 | |
11356 | /* Op is not a register. */ |
11357 | return NULL_RTX; |
11358 | } |
11359 | |
11360 | /* Determine which memory address register set insn can use. */ |
11361 | |
11362 | static enum attr_addr |
11363 | ix86_memory_address_reg_class (rtx_insn* insn) |
11364 | { |
11365 | /* LRA can do some initialization with NULL insn, |
11366 | return maximum register class in this case. */ |
11367 | enum attr_addr addr_rclass = ADDR_GPR32; |
11368 | |
11369 | if (!insn) |
11370 | return addr_rclass; |
11371 | |
11372 | if (asm_noperands (PATTERN (insn)) >= 0 |
11373 | || GET_CODE (PATTERN (insn)) == ASM_INPUT) |
11374 | return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16; |
11375 | |
11376 | /* Return maximum register class for unrecognized instructions. */ |
11377 | if (INSN_CODE (insn) < 0) |
11378 | return addr_rclass; |
11379 | |
11380 | /* Try to recognize the insn before calling get_attr_addr. |
11381 | Save current recog_data and current alternative. */ |
11382 | struct recog_data_d saved_recog_data = recog_data; |
11383 | int saved_alternative = which_alternative; |
11384 | |
11385 | /* Update recog_data for processing of alternatives. */ |
11386 | extract_insn_cached (insn); |
11387 | |
11388 | /* If current alternative is not set, loop throught enabled |
11389 | alternatives and get the most limited register class. */ |
11390 | if (saved_alternative == -1) |
11391 | { |
11392 | alternative_mask enabled = get_enabled_alternatives (insn); |
11393 | |
11394 | for (int i = 0; i < recog_data.n_alternatives; i++) |
11395 | { |
11396 | if (!TEST_BIT (enabled, i)) |
11397 | continue; |
11398 | |
11399 | which_alternative = i; |
11400 | addr_rclass = MIN (addr_rclass, get_attr_addr (insn)); |
11401 | } |
11402 | } |
11403 | else |
11404 | { |
11405 | which_alternative = saved_alternative; |
11406 | addr_rclass = get_attr_addr (insn); |
11407 | } |
11408 | |
11409 | recog_data = saved_recog_data; |
11410 | which_alternative = saved_alternative; |
11411 | |
11412 | return addr_rclass; |
11413 | } |
11414 | |
11415 | /* Return memory address register class insn can use. */ |
11416 | |
11417 | enum reg_class |
11418 | ix86_insn_base_reg_class (rtx_insn* insn) |
11419 | { |
11420 | switch (ix86_memory_address_reg_class (insn)) |
11421 | { |
11422 | case ADDR_GPR8: |
11423 | return LEGACY_GENERAL_REGS; |
11424 | case ADDR_GPR16: |
11425 | return GENERAL_GPR16; |
11426 | case ADDR_GPR32: |
11427 | break; |
11428 | default: |
11429 | gcc_unreachable (); |
11430 | } |
11431 | |
11432 | return BASE_REG_CLASS; |
11433 | } |
11434 | |
11435 | bool |
11436 | ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn) |
11437 | { |
11438 | switch (ix86_memory_address_reg_class (insn)) |
11439 | { |
11440 | case ADDR_GPR8: |
11441 | return LEGACY_INT_REGNO_P (regno); |
11442 | case ADDR_GPR16: |
11443 | return GENERAL_GPR16_REGNO_P (regno); |
11444 | case ADDR_GPR32: |
11445 | break; |
11446 | default: |
11447 | gcc_unreachable (); |
11448 | } |
11449 | |
11450 | return GENERAL_REGNO_P (regno); |
11451 | } |
11452 | |
11453 | enum reg_class |
11454 | ix86_insn_index_reg_class (rtx_insn* insn) |
11455 | { |
11456 | switch (ix86_memory_address_reg_class (insn)) |
11457 | { |
11458 | case ADDR_GPR8: |
11459 | return LEGACY_INDEX_REGS; |
11460 | case ADDR_GPR16: |
11461 | return INDEX_GPR16; |
11462 | case ADDR_GPR32: |
11463 | break; |
11464 | default: |
11465 | gcc_unreachable (); |
11466 | } |
11467 | |
11468 | return INDEX_REG_CLASS; |
11469 | } |
11470 | |
11471 | /* Recognizes RTL expressions that are valid memory addresses for an |
11472 | instruction. The MODE argument is the machine mode for the MEM |
11473 | expression that wants to use this address. |
11474 | |
11475 | It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should |
11476 | convert common non-canonical forms to canonical form so that they will |
11477 | be recognized. */ |
11478 | |
11479 | static bool |
11480 | ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, |
11481 | code_helper = ERROR_MARK) |
11482 | { |
11483 | struct ix86_address parts; |
11484 | rtx base, index, disp; |
11485 | HOST_WIDE_INT scale; |
11486 | addr_space_t seg; |
11487 | |
11488 | if (ix86_decompose_address (addr, out: &parts) == 0) |
11489 | /* Decomposition failed. */ |
11490 | return false; |
11491 | |
11492 | base = parts.base; |
11493 | index = parts.index; |
11494 | disp = parts.disp; |
11495 | scale = parts.scale; |
11496 | seg = parts.seg; |
11497 | |
11498 | /* Validate base register. */ |
11499 | if (base) |
11500 | { |
11501 | rtx reg = ix86_validate_address_register (op: base); |
11502 | |
11503 | if (reg == NULL_RTX) |
11504 | return false; |
11505 | |
11506 | unsigned int regno = REGNO (reg); |
11507 | if ((strict && !REGNO_OK_FOR_BASE_P (regno)) |
11508 | || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno))) |
11509 | /* Base is not valid. */ |
11510 | return false; |
11511 | } |
11512 | |
11513 | /* Validate index register. */ |
11514 | if (index) |
11515 | { |
11516 | rtx reg = ix86_validate_address_register (op: index); |
11517 | |
11518 | if (reg == NULL_RTX) |
11519 | return false; |
11520 | |
11521 | unsigned int regno = REGNO (reg); |
11522 | if ((strict && !REGNO_OK_FOR_INDEX_P (regno)) |
11523 | || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno))) |
11524 | /* Index is not valid. */ |
11525 | return false; |
11526 | } |
11527 | |
11528 | /* Index and base should have the same mode. */ |
11529 | if (base && index |
11530 | && GET_MODE (base) != GET_MODE (index)) |
11531 | return false; |
11532 | |
11533 | /* Address override works only on the (%reg) part of %fs:(%reg). */ |
11534 | if (seg != ADDR_SPACE_GENERIC |
11535 | && ((base && GET_MODE (base) != word_mode) |
11536 | || (index && GET_MODE (index) != word_mode))) |
11537 | return false; |
11538 | |
11539 | /* Validate scale factor. */ |
11540 | if (scale != 1) |
11541 | { |
11542 | if (!index) |
11543 | /* Scale without index. */ |
11544 | return false; |
11545 | |
11546 | if (scale != 2 && scale != 4 && scale != 8) |
11547 | /* Scale is not a valid multiplier. */ |
11548 | return false; |
11549 | } |
11550 | |
11551 | /* Validate displacement. */ |
11552 | if (disp) |
11553 | { |
11554 | if (ix86_endbr_immediate_operand (disp, VOIDmode)) |
11555 | return false; |
11556 | |
11557 | if (GET_CODE (disp) == CONST |
11558 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
11559 | && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) |
11560 | switch (XINT (XEXP (disp, 0), 1)) |
11561 | { |
11562 | /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit |
11563 | when used. While ABI specify also 32bit relocations, we |
11564 | don't produce them at all and use IP relative instead. |
11565 | Allow GOT in 32bit mode for both PIC and non-PIC if symbol |
11566 | should be loaded via GOT. */ |
11567 | case UNSPEC_GOT: |
11568 | if (!TARGET_64BIT |
11569 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
11570 | goto is_legitimate_pic; |
11571 | /* FALLTHRU */ |
11572 | case UNSPEC_GOTOFF: |
11573 | gcc_assert (flag_pic); |
11574 | if (!TARGET_64BIT) |
11575 | goto is_legitimate_pic; |
11576 | |
11577 | /* 64bit address unspec. */ |
11578 | return false; |
11579 | |
11580 | case UNSPEC_GOTPCREL: |
11581 | if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
11582 | goto is_legitimate_pic; |
11583 | /* FALLTHRU */ |
11584 | case UNSPEC_PCREL: |
11585 | gcc_assert (flag_pic); |
11586 | goto is_legitimate_pic; |
11587 | |
11588 | case UNSPEC_GOTTPOFF: |
11589 | case UNSPEC_GOTNTPOFF: |
11590 | case UNSPEC_INDNTPOFF: |
11591 | case UNSPEC_NTPOFF: |
11592 | case UNSPEC_DTPOFF: |
11593 | break; |
11594 | |
11595 | default: |
11596 | /* Invalid address unspec. */ |
11597 | return false; |
11598 | } |
11599 | |
11600 | else if (SYMBOLIC_CONST (disp) |
11601 | && (flag_pic |
11602 | #if TARGET_MACHO |
11603 | || (MACHOPIC_INDIRECT |
11604 | && !machopic_operand_p (disp)) |
11605 | #endif |
11606 | )) |
11607 | { |
11608 | |
11609 | is_legitimate_pic: |
11610 | if (TARGET_64BIT && (index || base)) |
11611 | { |
11612 | /* foo@dtpoff(%rX) is ok. */ |
11613 | if (GET_CODE (disp) != CONST |
11614 | || GET_CODE (XEXP (disp, 0)) != PLUS |
11615 | || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC |
11616 | || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) |
11617 | || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF |
11618 | && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) |
11619 | /* Non-constant pic memory reference. */ |
11620 | return false; |
11621 | } |
11622 | else if ((!TARGET_MACHO || flag_pic) |
11623 | && ! legitimate_pic_address_disp_p (disp)) |
11624 | /* Displacement is an invalid pic construct. */ |
11625 | return false; |
11626 | #if TARGET_MACHO |
11627 | else if (MACHO_DYNAMIC_NO_PIC_P |
11628 | && !ix86_legitimate_constant_p (Pmode, disp)) |
11629 | /* displacment must be referenced via non_lazy_pointer */ |
11630 | return false; |
11631 | #endif |
11632 | |
11633 | /* This code used to verify that a symbolic pic displacement |
11634 | includes the pic_offset_table_rtx register. |
11635 | |
11636 | While this is good idea, unfortunately these constructs may |
11637 | be created by "adds using lea" optimization for incorrect |
11638 | code like: |
11639 | |
11640 | int a; |
11641 | int foo(int i) |
11642 | { |
11643 | return *(&a+i); |
11644 | } |
11645 | |
11646 | This code is nonsensical, but results in addressing |
11647 | GOT table with pic_offset_table_rtx base. We can't |
11648 | just refuse it easily, since it gets matched by |
11649 | "addsi3" pattern, that later gets split to lea in the |
11650 | case output register differs from input. While this |
11651 | can be handled by separate addsi pattern for this case |
11652 | that never results in lea, this seems to be easier and |
11653 | correct fix for crash to disable this test. */ |
11654 | } |
11655 | else if (GET_CODE (disp) != LABEL_REF |
11656 | && !CONST_INT_P (disp) |
11657 | && (GET_CODE (disp) != CONST |
11658 | || !ix86_legitimate_constant_p (Pmode, x: disp)) |
11659 | && (GET_CODE (disp) != SYMBOL_REF |
11660 | || !ix86_legitimate_constant_p (Pmode, x: disp))) |
11661 | /* Displacement is not constant. */ |
11662 | return false; |
11663 | else if (TARGET_64BIT |
11664 | && !x86_64_immediate_operand (disp, VOIDmode)) |
11665 | /* Displacement is out of range. */ |
11666 | return false; |
11667 | /* In x32 mode, constant addresses are sign extended to 64bit, so |
11668 | we have to prevent addresses from 0x80000000 to 0xffffffff. */ |
11669 | else if (TARGET_X32 && !(index || base) |
11670 | && CONST_INT_P (disp) |
11671 | && val_signbit_known_set_p (SImode, INTVAL (disp))) |
11672 | return false; |
11673 | } |
11674 | |
11675 | /* Everything looks valid. */ |
11676 | return true; |
11677 | } |
11678 | |
11679 | /* Determine if a given RTX is a valid constant address. */ |
11680 | |
11681 | bool |
11682 | constant_address_p (rtx x) |
11683 | { |
11684 | return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1); |
11685 | } |
11686 | |
11687 | /* Return a unique alias set for the GOT. */ |
11688 | |
11689 | alias_set_type |
11690 | ix86_GOT_alias_set (void) |
11691 | { |
11692 | static alias_set_type set = -1; |
11693 | if (set == -1) |
11694 | set = new_alias_set (); |
11695 | return set; |
11696 | } |
11697 | |
11698 | /* Return a legitimate reference for ORIG (an address) using the |
11699 | register REG. If REG is 0, a new pseudo is generated. |
11700 | |
11701 | There are two types of references that must be handled: |
11702 | |
11703 | 1. Global data references must load the address from the GOT, via |
11704 | the PIC reg. An insn is emitted to do this load, and the reg is |
11705 | returned. |
11706 | |
11707 | 2. Static data references, constant pool addresses, and code labels |
11708 | compute the address as an offset from the GOT, whose base is in |
11709 | the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to |
11710 | differentiate them from global data objects. The returned |
11711 | address is the PIC reg + an unspec constant. |
11712 | |
11713 | TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC |
11714 | reg also appears in the address. */ |
11715 | |
11716 | rtx |
11717 | legitimize_pic_address (rtx orig, rtx reg) |
11718 | { |
11719 | rtx addr = orig; |
11720 | rtx new_rtx = orig; |
11721 | |
11722 | #if TARGET_MACHO |
11723 | if (TARGET_MACHO && !TARGET_64BIT) |
11724 | { |
11725 | if (reg == 0) |
11726 | reg = gen_reg_rtx (Pmode); |
11727 | /* Use the generic Mach-O PIC machinery. */ |
11728 | return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); |
11729 | } |
11730 | #endif |
11731 | |
11732 | if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
11733 | { |
11734 | rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true); |
11735 | if (tmp) |
11736 | return tmp; |
11737 | } |
11738 | |
11739 | if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr)) |
11740 | new_rtx = addr; |
11741 | else if ((!TARGET_64BIT |
11742 | || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) |
11743 | && !TARGET_PECOFF |
11744 | && gotoff_operand (addr, Pmode)) |
11745 | { |
11746 | /* This symbol may be referenced via a displacement |
11747 | from the PIC base address (@GOTOFF). */ |
11748 | if (GET_CODE (addr) == CONST) |
11749 | addr = XEXP (addr, 0); |
11750 | |
11751 | if (GET_CODE (addr) == PLUS) |
11752 | { |
11753 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), |
11754 | UNSPEC_GOTOFF); |
11755 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); |
11756 | } |
11757 | else |
11758 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
11759 | |
11760 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11761 | |
11762 | if (TARGET_64BIT) |
11763 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
11764 | |
11765 | if (reg != 0) |
11766 | { |
11767 | gcc_assert (REG_P (reg)); |
11768 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
11769 | new_rtx, reg, 1, OPTAB_DIRECT); |
11770 | } |
11771 | else |
11772 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
11773 | } |
11774 | else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) |
11775 | /* We can't always use @GOTOFF for text labels |
11776 | on VxWorks, see gotoff_operand. */ |
11777 | || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) |
11778 | { |
11779 | rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true); |
11780 | if (tmp) |
11781 | return tmp; |
11782 | |
11783 | /* For x64 PE-COFF there is no GOT table, |
11784 | so we use address directly. */ |
11785 | if (TARGET_64BIT && TARGET_PECOFF) |
11786 | { |
11787 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); |
11788 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11789 | } |
11790 | else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) |
11791 | { |
11792 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), |
11793 | UNSPEC_GOTPCREL); |
11794 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11795 | new_rtx = gen_const_mem (Pmode, new_rtx); |
11796 | set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); |
11797 | } |
11798 | else |
11799 | { |
11800 | /* This symbol must be referenced via a load |
11801 | from the Global Offset Table (@GOT). */ |
11802 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); |
11803 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11804 | |
11805 | if (TARGET_64BIT) |
11806 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
11807 | |
11808 | if (reg != 0) |
11809 | { |
11810 | gcc_assert (REG_P (reg)); |
11811 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
11812 | new_rtx, reg, 1, OPTAB_DIRECT); |
11813 | } |
11814 | else |
11815 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
11816 | |
11817 | new_rtx = gen_const_mem (Pmode, new_rtx); |
11818 | set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); |
11819 | } |
11820 | |
11821 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
11822 | } |
11823 | else |
11824 | { |
11825 | if (CONST_INT_P (addr) |
11826 | && !x86_64_immediate_operand (addr, VOIDmode)) |
11827 | new_rtx = copy_to_suggested_reg (addr, reg, Pmode); |
11828 | else if (GET_CODE (addr) == CONST) |
11829 | { |
11830 | addr = XEXP (addr, 0); |
11831 | |
11832 | /* We must match stuff we generate before. Assume the only |
11833 | unspecs that can get here are ours. Not that we could do |
11834 | anything with them anyway.... */ |
11835 | if (GET_CODE (addr) == UNSPEC |
11836 | || (GET_CODE (addr) == PLUS |
11837 | && GET_CODE (XEXP (addr, 0)) == UNSPEC)) |
11838 | return orig; |
11839 | gcc_assert (GET_CODE (addr) == PLUS); |
11840 | } |
11841 | |
11842 | if (GET_CODE (addr) == PLUS) |
11843 | { |
11844 | rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); |
11845 | |
11846 | /* Check first to see if this is a constant |
11847 | offset from a @GOTOFF symbol reference. */ |
11848 | if (!TARGET_PECOFF |
11849 | && gotoff_operand (op0, Pmode) |
11850 | && CONST_INT_P (op1)) |
11851 | { |
11852 | if (!TARGET_64BIT) |
11853 | { |
11854 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), |
11855 | UNSPEC_GOTOFF); |
11856 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); |
11857 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
11858 | |
11859 | if (reg != 0) |
11860 | { |
11861 | gcc_assert (REG_P (reg)); |
11862 | new_rtx = expand_simple_binop (Pmode, PLUS, |
11863 | pic_offset_table_rtx, |
11864 | new_rtx, reg, 1, |
11865 | OPTAB_DIRECT); |
11866 | } |
11867 | else |
11868 | new_rtx |
11869 | = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
11870 | } |
11871 | else |
11872 | { |
11873 | if (INTVAL (op1) < -16*1024*1024 |
11874 | || INTVAL (op1) >= 16*1024*1024) |
11875 | { |
11876 | if (!x86_64_immediate_operand (op1, Pmode)) |
11877 | op1 = force_reg (Pmode, op1); |
11878 | |
11879 | new_rtx |
11880 | = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); |
11881 | } |
11882 | } |
11883 | } |
11884 | else |
11885 | { |
11886 | rtx base = legitimize_pic_address (orig: op0, reg); |
11887 | machine_mode mode = GET_MODE (base); |
11888 | new_rtx |
11889 | = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg); |
11890 | |
11891 | if (CONST_INT_P (new_rtx)) |
11892 | { |
11893 | if (INTVAL (new_rtx) < -16*1024*1024 |
11894 | || INTVAL (new_rtx) >= 16*1024*1024) |
11895 | { |
11896 | if (!x86_64_immediate_operand (new_rtx, mode)) |
11897 | new_rtx = force_reg (mode, new_rtx); |
11898 | |
11899 | new_rtx |
11900 | = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); |
11901 | } |
11902 | else |
11903 | new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); |
11904 | } |
11905 | else |
11906 | { |
11907 | /* For %rip addressing, we have to use |
11908 | just disp32, not base nor index. */ |
11909 | if (TARGET_64BIT |
11910 | && (GET_CODE (base) == SYMBOL_REF |
11911 | || GET_CODE (base) == LABEL_REF)) |
11912 | base = force_reg (mode, base); |
11913 | if (GET_CODE (new_rtx) == PLUS |
11914 | && CONSTANT_P (XEXP (new_rtx, 1))) |
11915 | { |
11916 | base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); |
11917 | new_rtx = XEXP (new_rtx, 1); |
11918 | } |
11919 | new_rtx = gen_rtx_PLUS (mode, base, new_rtx); |
11920 | } |
11921 | } |
11922 | } |
11923 | } |
11924 | return new_rtx; |
11925 | } |
11926 | |
11927 | /* Load the thread pointer. If TO_REG is true, force it into a register. */ |
11928 | |
11929 | static rtx |
11930 | get_thread_pointer (machine_mode tp_mode, bool to_reg) |
11931 | { |
11932 | rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); |
11933 | |
11934 | if (GET_MODE (tp) != tp_mode) |
11935 | { |
11936 | gcc_assert (GET_MODE (tp) == SImode); |
11937 | gcc_assert (tp_mode == DImode); |
11938 | |
11939 | tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); |
11940 | } |
11941 | |
11942 | if (to_reg) |
11943 | tp = copy_to_mode_reg (tp_mode, tp); |
11944 | |
11945 | return tp; |
11946 | } |
11947 | |
11948 | /* Construct the SYMBOL_REF for the tls_get_addr function. */ |
11949 | |
11950 | static GTY(()) rtx ix86_tls_symbol; |
11951 | |
11952 | static rtx |
11953 | ix86_tls_get_addr (void) |
11954 | { |
11955 | if (!ix86_tls_symbol) |
11956 | { |
11957 | const char *sym |
11958 | = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) |
11959 | ? "___tls_get_addr" : "__tls_get_addr" ); |
11960 | |
11961 | ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); |
11962 | } |
11963 | |
11964 | if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) |
11965 | { |
11966 | rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), |
11967 | UNSPEC_PLTOFF); |
11968 | return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, |
11969 | gen_rtx_CONST (Pmode, unspec)); |
11970 | } |
11971 | |
11972 | return ix86_tls_symbol; |
11973 | } |
11974 | |
11975 | /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ |
11976 | |
11977 | static GTY(()) rtx ix86_tls_module_base_symbol; |
11978 | |
11979 | rtx |
11980 | ix86_tls_module_base (void) |
11981 | { |
11982 | if (!ix86_tls_module_base_symbol) |
11983 | { |
11984 | ix86_tls_module_base_symbol |
11985 | = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_" ); |
11986 | |
11987 | SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) |
11988 | |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; |
11989 | } |
11990 | |
11991 | return ix86_tls_module_base_symbol; |
11992 | } |
11993 | |
11994 | /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is |
11995 | false if we expect this to be used for a memory address and true if |
11996 | we expect to load the address into a register. */ |
11997 | |
11998 | rtx |
11999 | legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) |
12000 | { |
12001 | rtx dest, base, off; |
12002 | rtx pic = NULL_RTX, tp = NULL_RTX; |
12003 | machine_mode tp_mode = Pmode; |
12004 | int type; |
12005 | |
12006 | /* Fall back to global dynamic model if tool chain cannot support local |
12007 | dynamic. */ |
12008 | if (TARGET_SUN_TLS && !TARGET_64BIT |
12009 | && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM |
12010 | && model == TLS_MODEL_LOCAL_DYNAMIC) |
12011 | model = TLS_MODEL_GLOBAL_DYNAMIC; |
12012 | |
12013 | switch (model) |
12014 | { |
12015 | case TLS_MODEL_GLOBAL_DYNAMIC: |
12016 | if (!TARGET_64BIT) |
12017 | { |
12018 | if (flag_pic && !TARGET_PECOFF) |
12019 | pic = pic_offset_table_rtx; |
12020 | else |
12021 | { |
12022 | pic = gen_reg_rtx (Pmode); |
12023 | emit_insn (gen_set_got (pic)); |
12024 | } |
12025 | } |
12026 | |
12027 | if (TARGET_GNU2_TLS) |
12028 | { |
12029 | dest = gen_reg_rtx (ptr_mode); |
12030 | if (TARGET_64BIT) |
12031 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x)); |
12032 | else |
12033 | emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); |
12034 | |
12035 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
12036 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
12037 | if (GET_MODE (dest) != Pmode) |
12038 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
12039 | dest = force_reg (Pmode, dest); |
12040 | |
12041 | if (GET_MODE (x) != Pmode) |
12042 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12043 | |
12044 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
12045 | } |
12046 | else |
12047 | { |
12048 | rtx caddr = ix86_tls_get_addr (); |
12049 | |
12050 | dest = gen_reg_rtx (Pmode); |
12051 | if (TARGET_64BIT) |
12052 | { |
12053 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
12054 | rtx_insn *insns; |
12055 | |
12056 | start_sequence (); |
12057 | emit_call_insn |
12058 | (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr)); |
12059 | insns = get_insns (); |
12060 | end_sequence (); |
12061 | |
12062 | if (GET_MODE (x) != Pmode) |
12063 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12064 | |
12065 | RTL_CONST_CALL_P (insns) = 1; |
12066 | emit_libcall_block (insns, dest, rax, x); |
12067 | } |
12068 | else |
12069 | emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); |
12070 | } |
12071 | break; |
12072 | |
12073 | case TLS_MODEL_LOCAL_DYNAMIC: |
12074 | if (!TARGET_64BIT) |
12075 | { |
12076 | if (flag_pic) |
12077 | pic = pic_offset_table_rtx; |
12078 | else |
12079 | { |
12080 | pic = gen_reg_rtx (Pmode); |
12081 | emit_insn (gen_set_got (pic)); |
12082 | } |
12083 | } |
12084 | |
12085 | if (TARGET_GNU2_TLS) |
12086 | { |
12087 | rtx tmp = ix86_tls_module_base (); |
12088 | |
12089 | base = gen_reg_rtx (ptr_mode); |
12090 | if (TARGET_64BIT) |
12091 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp)); |
12092 | else |
12093 | emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); |
12094 | |
12095 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
12096 | if (GET_MODE (base) != Pmode) |
12097 | base = gen_rtx_ZERO_EXTEND (Pmode, base); |
12098 | base = force_reg (Pmode, base); |
12099 | } |
12100 | else |
12101 | { |
12102 | rtx caddr = ix86_tls_get_addr (); |
12103 | |
12104 | base = gen_reg_rtx (Pmode); |
12105 | if (TARGET_64BIT) |
12106 | { |
12107 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
12108 | rtx_insn *insns; |
12109 | rtx eqv; |
12110 | |
12111 | start_sequence (); |
12112 | emit_call_insn |
12113 | (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr)); |
12114 | insns = get_insns (); |
12115 | end_sequence (); |
12116 | |
12117 | /* Attach a unique REG_EQUAL, to allow the RTL optimizers to |
12118 | share the LD_BASE result with other LD model accesses. */ |
12119 | eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), |
12120 | UNSPEC_TLS_LD_BASE); |
12121 | |
12122 | RTL_CONST_CALL_P (insns) = 1; |
12123 | emit_libcall_block (insns, base, rax, eqv); |
12124 | } |
12125 | else |
12126 | emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); |
12127 | } |
12128 | |
12129 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); |
12130 | off = gen_rtx_CONST (Pmode, off); |
12131 | |
12132 | dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); |
12133 | |
12134 | if (TARGET_GNU2_TLS) |
12135 | { |
12136 | if (GET_MODE (tp) != Pmode) |
12137 | { |
12138 | dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode); |
12139 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
12140 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
12141 | } |
12142 | else |
12143 | dest = gen_rtx_PLUS (Pmode, tp, dest); |
12144 | dest = force_reg (Pmode, dest); |
12145 | |
12146 | if (GET_MODE (x) != Pmode) |
12147 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12148 | |
12149 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
12150 | } |
12151 | break; |
12152 | |
12153 | case TLS_MODEL_INITIAL_EXEC: |
12154 | if (TARGET_64BIT) |
12155 | { |
12156 | if (TARGET_SUN_TLS && !TARGET_X32) |
12157 | { |
12158 | /* The Sun linker took the AMD64 TLS spec literally |
12159 | and can only handle %rax as destination of the |
12160 | initial executable code sequence. */ |
12161 | |
12162 | dest = gen_reg_rtx (DImode); |
12163 | emit_insn (gen_tls_initial_exec_64_sun (a: dest, b: x)); |
12164 | return dest; |
12165 | } |
12166 | |
12167 | /* Generate DImode references to avoid %fs:(%reg32) |
12168 | problems and linker IE->LE relaxation bug. */ |
12169 | tp_mode = DImode; |
12170 | pic = NULL; |
12171 | type = UNSPEC_GOTNTPOFF; |
12172 | } |
12173 | else if (flag_pic) |
12174 | { |
12175 | pic = pic_offset_table_rtx; |
12176 | type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; |
12177 | } |
12178 | else if (!TARGET_ANY_GNU_TLS) |
12179 | { |
12180 | pic = gen_reg_rtx (Pmode); |
12181 | emit_insn (gen_set_got (pic)); |
12182 | type = UNSPEC_GOTTPOFF; |
12183 | } |
12184 | else |
12185 | { |
12186 | pic = NULL; |
12187 | type = UNSPEC_INDNTPOFF; |
12188 | } |
12189 | |
12190 | off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); |
12191 | off = gen_rtx_CONST (tp_mode, off); |
12192 | if (pic) |
12193 | off = gen_rtx_PLUS (tp_mode, pic, off); |
12194 | off = gen_const_mem (tp_mode, off); |
12195 | set_mem_alias_set (off, ix86_GOT_alias_set ()); |
12196 | |
12197 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12198 | { |
12199 | base = get_thread_pointer (tp_mode, |
12200 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
12201 | off = force_reg (tp_mode, off); |
12202 | dest = gen_rtx_PLUS (tp_mode, base, off); |
12203 | if (tp_mode != Pmode) |
12204 | dest = convert_to_mode (Pmode, dest, 1); |
12205 | } |
12206 | else |
12207 | { |
12208 | base = get_thread_pointer (Pmode, to_reg: true); |
12209 | dest = gen_reg_rtx (Pmode); |
12210 | emit_insn (gen_sub3_insn (dest, base, off)); |
12211 | } |
12212 | break; |
12213 | |
12214 | case TLS_MODEL_LOCAL_EXEC: |
12215 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
12216 | (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12217 | ? UNSPEC_NTPOFF : UNSPEC_TPOFF); |
12218 | off = gen_rtx_CONST (Pmode, off); |
12219 | |
12220 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12221 | { |
12222 | base = get_thread_pointer (Pmode, |
12223 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
12224 | return gen_rtx_PLUS (Pmode, base, off); |
12225 | } |
12226 | else |
12227 | { |
12228 | base = get_thread_pointer (Pmode, to_reg: true); |
12229 | dest = gen_reg_rtx (Pmode); |
12230 | emit_insn (gen_sub3_insn (dest, base, off)); |
12231 | } |
12232 | break; |
12233 | |
12234 | default: |
12235 | gcc_unreachable (); |
12236 | } |
12237 | |
12238 | return dest; |
12239 | } |
12240 | |
12241 | /* Return true if the TLS address requires insn using integer registers. |
12242 | It's used to prevent KMOV/VMOV in TLS code sequences which require integer |
12243 | MOV instructions, refer to PR103275. */ |
12244 | bool |
12245 | ix86_gpr_tls_address_pattern_p (rtx mem) |
12246 | { |
12247 | gcc_assert (MEM_P (mem)); |
12248 | |
12249 | rtx addr = XEXP (mem, 0); |
12250 | subrtx_var_iterator::array_type array; |
12251 | FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL) |
12252 | { |
12253 | rtx op = *iter; |
12254 | if (GET_CODE (op) == UNSPEC) |
12255 | switch (XINT (op, 1)) |
12256 | { |
12257 | case UNSPEC_GOTNTPOFF: |
12258 | return true; |
12259 | case UNSPEC_TPOFF: |
12260 | if (!TARGET_64BIT) |
12261 | return true; |
12262 | break; |
12263 | default: |
12264 | break; |
12265 | } |
12266 | } |
12267 | |
12268 | return false; |
12269 | } |
12270 | |
12271 | /* Return true if OP refers to a TLS address. */ |
12272 | bool |
12273 | ix86_tls_address_pattern_p (rtx op) |
12274 | { |
12275 | subrtx_var_iterator::array_type array; |
12276 | FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) |
12277 | { |
12278 | rtx op = *iter; |
12279 | if (MEM_P (op)) |
12280 | { |
12281 | rtx *x = &XEXP (op, 0); |
12282 | while (GET_CODE (*x) == PLUS) |
12283 | { |
12284 | int i; |
12285 | for (i = 0; i < 2; i++) |
12286 | { |
12287 | rtx u = XEXP (*x, i); |
12288 | if (GET_CODE (u) == ZERO_EXTEND) |
12289 | u = XEXP (u, 0); |
12290 | if (GET_CODE (u) == UNSPEC |
12291 | && XINT (u, 1) == UNSPEC_TP) |
12292 | return true; |
12293 | } |
12294 | x = &XEXP (*x, 0); |
12295 | } |
12296 | |
12297 | iter.skip_subrtxes (); |
12298 | } |
12299 | } |
12300 | |
12301 | return false; |
12302 | } |
12303 | |
12304 | /* Rewrite *LOC so that it refers to a default TLS address space. */ |
12305 | void |
12306 | ix86_rewrite_tls_address_1 (rtx *loc) |
12307 | { |
12308 | subrtx_ptr_iterator::array_type array; |
12309 | FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) |
12310 | { |
12311 | rtx *loc = *iter; |
12312 | if (MEM_P (*loc)) |
12313 | { |
12314 | rtx addr = XEXP (*loc, 0); |
12315 | rtx *x = &addr; |
12316 | while (GET_CODE (*x) == PLUS) |
12317 | { |
12318 | int i; |
12319 | for (i = 0; i < 2; i++) |
12320 | { |
12321 | rtx u = XEXP (*x, i); |
12322 | if (GET_CODE (u) == ZERO_EXTEND) |
12323 | u = XEXP (u, 0); |
12324 | if (GET_CODE (u) == UNSPEC |
12325 | && XINT (u, 1) == UNSPEC_TP) |
12326 | { |
12327 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
12328 | |
12329 | *x = XEXP (*x, 1 - i); |
12330 | |
12331 | *loc = replace_equiv_address_nv (*loc, addr, true); |
12332 | set_mem_addr_space (*loc, as); |
12333 | return; |
12334 | } |
12335 | } |
12336 | x = &XEXP (*x, 0); |
12337 | } |
12338 | |
12339 | iter.skip_subrtxes (); |
12340 | } |
12341 | } |
12342 | } |
12343 | |
12344 | /* Rewrite instruction pattern involvning TLS address |
12345 | so that it refers to a default TLS address space. */ |
12346 | rtx |
12347 | ix86_rewrite_tls_address (rtx pattern) |
12348 | { |
12349 | pattern = copy_insn (pattern); |
12350 | ix86_rewrite_tls_address_1 (loc: &pattern); |
12351 | return pattern; |
12352 | } |
12353 | |
12354 | /* Create or return the unique __imp_DECL dllimport symbol corresponding |
12355 | to symbol DECL if BEIMPORT is true. Otherwise create or return the |
12356 | unique refptr-DECL symbol corresponding to symbol DECL. */ |
12357 | |
12358 | struct dllimport_hasher : ggc_cache_ptr_hash<tree_map> |
12359 | { |
12360 | static inline hashval_t hash (tree_map *m) { return m->hash; } |
12361 | static inline bool |
12362 | equal (tree_map *a, tree_map *b) |
12363 | { |
12364 | return a->base.from == b->base.from; |
12365 | } |
12366 | |
12367 | static int |
12368 | keep_cache_entry (tree_map *&m) |
12369 | { |
12370 | return ggc_marked_p (m->base.from); |
12371 | } |
12372 | }; |
12373 | |
12374 | static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map; |
12375 | |
12376 | static tree |
12377 | get_dllimport_decl (tree decl, bool beimport) |
12378 | { |
12379 | struct tree_map *h, in; |
12380 | const char *name; |
12381 | const char *prefix; |
12382 | size_t namelen, prefixlen; |
12383 | char *imp_name; |
12384 | tree to; |
12385 | rtx rtl; |
12386 | |
12387 | if (!dllimport_map) |
12388 | dllimport_map = hash_table<dllimport_hasher>::create_ggc (n: 512); |
12389 | |
12390 | in.hash = htab_hash_pointer (decl); |
12391 | in.base.from = decl; |
12392 | tree_map **loc = dllimport_map->find_slot_with_hash (comparable: &in, hash: in.hash, insert: INSERT); |
12393 | h = *loc; |
12394 | if (h) |
12395 | return h->to; |
12396 | |
12397 | *loc = h = ggc_alloc<tree_map> (); |
12398 | h->hash = in.hash; |
12399 | h->base.from = decl; |
12400 | h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), |
12401 | VAR_DECL, NULL, ptr_type_node); |
12402 | DECL_ARTIFICIAL (to) = 1; |
12403 | DECL_IGNORED_P (to) = 1; |
12404 | DECL_EXTERNAL (to) = 1; |
12405 | TREE_READONLY (to) = 1; |
12406 | |
12407 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
12408 | name = targetm.strip_name_encoding (name); |
12409 | if (beimport) |
12410 | prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 |
12411 | ? "*__imp_" : "*__imp__" ; |
12412 | else |
12413 | prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr." ; |
12414 | namelen = strlen (s: name); |
12415 | prefixlen = strlen (s: prefix); |
12416 | imp_name = (char *) alloca (namelen + prefixlen + 1); |
12417 | memcpy (dest: imp_name, src: prefix, n: prefixlen); |
12418 | memcpy (dest: imp_name + prefixlen, src: name, n: namelen + 1); |
12419 | |
12420 | name = ggc_alloc_string (contents: imp_name, length: namelen + prefixlen); |
12421 | rtl = gen_rtx_SYMBOL_REF (Pmode, name); |
12422 | SET_SYMBOL_REF_DECL (rtl, to); |
12423 | SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; |
12424 | if (!beimport) |
12425 | { |
12426 | SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; |
12427 | #ifdef SUB_TARGET_RECORD_STUB |
12428 | SUB_TARGET_RECORD_STUB (name); |
12429 | #endif |
12430 | } |
12431 | |
12432 | rtl = gen_const_mem (Pmode, rtl); |
12433 | set_mem_alias_set (rtl, ix86_GOT_alias_set ()); |
12434 | |
12435 | SET_DECL_RTL (to, rtl); |
12436 | SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); |
12437 | |
12438 | return to; |
12439 | } |
12440 | |
12441 | /* Expand SYMBOL into its corresponding far-address symbol. |
12442 | WANT_REG is true if we require the result be a register. */ |
12443 | |
12444 | static rtx |
12445 | legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) |
12446 | { |
12447 | tree imp_decl; |
12448 | rtx x; |
12449 | |
12450 | gcc_assert (SYMBOL_REF_DECL (symbol)); |
12451 | imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: false); |
12452 | |
12453 | x = DECL_RTL (imp_decl); |
12454 | if (want_reg) |
12455 | x = force_reg (Pmode, x); |
12456 | return x; |
12457 | } |
12458 | |
12459 | /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is |
12460 | true if we require the result be a register. */ |
12461 | |
12462 | static rtx |
12463 | legitimize_dllimport_symbol (rtx symbol, bool want_reg) |
12464 | { |
12465 | tree imp_decl; |
12466 | rtx x; |
12467 | |
12468 | gcc_assert (SYMBOL_REF_DECL (symbol)); |
12469 | imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: true); |
12470 | |
12471 | x = DECL_RTL (imp_decl); |
12472 | if (want_reg) |
12473 | x = force_reg (Pmode, x); |
12474 | return x; |
12475 | } |
12476 | |
12477 | /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG |
12478 | is true if we require the result be a register. */ |
12479 | |
12480 | rtx |
12481 | legitimize_pe_coff_symbol (rtx addr, bool inreg) |
12482 | { |
12483 | if (!TARGET_PECOFF) |
12484 | return NULL_RTX; |
12485 | |
12486 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
12487 | { |
12488 | if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) |
12489 | return legitimize_dllimport_symbol (symbol: addr, want_reg: inreg); |
12490 | if (GET_CODE (addr) == CONST |
12491 | && GET_CODE (XEXP (addr, 0)) == PLUS |
12492 | && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF |
12493 | && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) |
12494 | { |
12495 | rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), want_reg: inreg); |
12496 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); |
12497 | } |
12498 | } |
12499 | |
12500 | if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) |
12501 | return NULL_RTX; |
12502 | if (GET_CODE (addr) == SYMBOL_REF |
12503 | && !is_imported_p (x: addr) |
12504 | && SYMBOL_REF_EXTERNAL_P (addr) |
12505 | && SYMBOL_REF_DECL (addr)) |
12506 | return legitimize_pe_coff_extern_decl (symbol: addr, want_reg: inreg); |
12507 | |
12508 | if (GET_CODE (addr) == CONST |
12509 | && GET_CODE (XEXP (addr, 0)) == PLUS |
12510 | && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF |
12511 | && !is_imported_p (XEXP (XEXP (addr, 0), 0)) |
12512 | && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) |
12513 | && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) |
12514 | { |
12515 | rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), want_reg: inreg); |
12516 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); |
12517 | } |
12518 | return NULL_RTX; |
12519 | } |
12520 | |
12521 | /* Try machine-dependent ways of modifying an illegitimate address |
12522 | to be legitimate. If we find one, return the new, valid address. |
12523 | This macro is used in only one place: `memory_address' in explow.cc. |
12524 | |
12525 | OLDX is the address as it was before break_out_memory_refs was called. |
12526 | In some cases it is useful to look at this to decide what needs to be done. |
12527 | |
12528 | It is always safe for this macro to do nothing. It exists to recognize |
12529 | opportunities to optimize the output. |
12530 | |
12531 | For the 80386, we handle X+REG by loading X into a register R and |
12532 | using R+REG. R will go in a general reg and indexing will be used. |
12533 | However, if REG is a broken-out memory address or multiplication, |
12534 | nothing needs to be done because REG can certainly go in a general reg. |
12535 | |
12536 | When -fpic is used, special handling is needed for symbolic references. |
12537 | See comments by legitimize_pic_address in i386.cc for details. */ |
12538 | |
12539 | static rtx |
12540 | ix86_legitimize_address (rtx x, rtx, machine_mode mode) |
12541 | { |
12542 | bool changed = false; |
12543 | unsigned log; |
12544 | |
12545 | log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; |
12546 | if (log) |
12547 | return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false); |
12548 | if (GET_CODE (x) == CONST |
12549 | && GET_CODE (XEXP (x, 0)) == PLUS |
12550 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF |
12551 | && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) |
12552 | { |
12553 | rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), |
12554 | model: (enum tls_model) log, for_mov: false); |
12555 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); |
12556 | } |
12557 | |
12558 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
12559 | { |
12560 | rtx tmp = legitimize_pe_coff_symbol (addr: x, inreg: true); |
12561 | if (tmp) |
12562 | return tmp; |
12563 | } |
12564 | |
12565 | if (flag_pic && SYMBOLIC_CONST (x)) |
12566 | return legitimize_pic_address (orig: x, reg: 0); |
12567 | |
12568 | #if TARGET_MACHO |
12569 | if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) |
12570 | return machopic_indirect_data_reference (x, 0); |
12571 | #endif |
12572 | |
12573 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ |
12574 | if (GET_CODE (x) == ASHIFT |
12575 | && CONST_INT_P (XEXP (x, 1)) |
12576 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) |
12577 | { |
12578 | changed = true; |
12579 | log = INTVAL (XEXP (x, 1)); |
12580 | x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), |
12581 | GEN_INT (1 << log)); |
12582 | } |
12583 | |
12584 | if (GET_CODE (x) == PLUS) |
12585 | { |
12586 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ |
12587 | |
12588 | if (GET_CODE (XEXP (x, 0)) == ASHIFT |
12589 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
12590 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) |
12591 | { |
12592 | changed = true; |
12593 | log = INTVAL (XEXP (XEXP (x, 0), 1)); |
12594 | XEXP (x, 0) = gen_rtx_MULT (Pmode, |
12595 | force_reg (Pmode, XEXP (XEXP (x, 0), 0)), |
12596 | GEN_INT (1 << log)); |
12597 | } |
12598 | |
12599 | if (GET_CODE (XEXP (x, 1)) == ASHIFT |
12600 | && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
12601 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) |
12602 | { |
12603 | changed = true; |
12604 | log = INTVAL (XEXP (XEXP (x, 1), 1)); |
12605 | XEXP (x, 1) = gen_rtx_MULT (Pmode, |
12606 | force_reg (Pmode, XEXP (XEXP (x, 1), 0)), |
12607 | GEN_INT (1 << log)); |
12608 | } |
12609 | |
12610 | /* Put multiply first if it isn't already. */ |
12611 | if (GET_CODE (XEXP (x, 1)) == MULT) |
12612 | { |
12613 | std::swap (XEXP (x, 0), XEXP (x, 1)); |
12614 | changed = true; |
12615 | } |
12616 | |
12617 | /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) |
12618 | into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be |
12619 | created by virtual register instantiation, register elimination, and |
12620 | similar optimizations. */ |
12621 | if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) |
12622 | { |
12623 | changed = true; |
12624 | x = gen_rtx_PLUS (Pmode, |
12625 | gen_rtx_PLUS (Pmode, XEXP (x, 0), |
12626 | XEXP (XEXP (x, 1), 0)), |
12627 | XEXP (XEXP (x, 1), 1)); |
12628 | } |
12629 | |
12630 | /* Canonicalize |
12631 | (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) |
12632 | into (plus (plus (mult (reg) (const)) (reg)) (const)). */ |
12633 | else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS |
12634 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
12635 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS |
12636 | && CONSTANT_P (XEXP (x, 1))) |
12637 | { |
12638 | rtx constant; |
12639 | rtx other = NULL_RTX; |
12640 | |
12641 | if (CONST_INT_P (XEXP (x, 1))) |
12642 | { |
12643 | constant = XEXP (x, 1); |
12644 | other = XEXP (XEXP (XEXP (x, 0), 1), 1); |
12645 | } |
12646 | else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) |
12647 | { |
12648 | constant = XEXP (XEXP (XEXP (x, 0), 1), 1); |
12649 | other = XEXP (x, 1); |
12650 | } |
12651 | else |
12652 | constant = 0; |
12653 | |
12654 | if (constant) |
12655 | { |
12656 | changed = true; |
12657 | x = gen_rtx_PLUS (Pmode, |
12658 | gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), |
12659 | XEXP (XEXP (XEXP (x, 0), 1), 0)), |
12660 | plus_constant (Pmode, other, |
12661 | INTVAL (constant))); |
12662 | } |
12663 | } |
12664 | |
12665 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
12666 | return x; |
12667 | |
12668 | if (GET_CODE (XEXP (x, 0)) == MULT) |
12669 | { |
12670 | changed = true; |
12671 | XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); |
12672 | } |
12673 | |
12674 | if (GET_CODE (XEXP (x, 1)) == MULT) |
12675 | { |
12676 | changed = true; |
12677 | XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); |
12678 | } |
12679 | |
12680 | if (changed |
12681 | && REG_P (XEXP (x, 1)) |
12682 | && REG_P (XEXP (x, 0))) |
12683 | return x; |
12684 | |
12685 | if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) |
12686 | { |
12687 | changed = true; |
12688 | x = legitimize_pic_address (orig: x, reg: 0); |
12689 | } |
12690 | |
12691 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
12692 | return x; |
12693 | |
12694 | if (REG_P (XEXP (x, 0))) |
12695 | { |
12696 | rtx temp = gen_reg_rtx (Pmode); |
12697 | rtx val = force_operand (XEXP (x, 1), temp); |
12698 | if (val != temp) |
12699 | { |
12700 | val = convert_to_mode (Pmode, val, 1); |
12701 | emit_move_insn (temp, val); |
12702 | } |
12703 | |
12704 | XEXP (x, 1) = temp; |
12705 | return x; |
12706 | } |
12707 | |
12708 | else if (REG_P (XEXP (x, 1))) |
12709 | { |
12710 | rtx temp = gen_reg_rtx (Pmode); |
12711 | rtx val = force_operand (XEXP (x, 0), temp); |
12712 | if (val != temp) |
12713 | { |
12714 | val = convert_to_mode (Pmode, val, 1); |
12715 | emit_move_insn (temp, val); |
12716 | } |
12717 | |
12718 | XEXP (x, 0) = temp; |
12719 | return x; |
12720 | } |
12721 | } |
12722 | |
12723 | return x; |
12724 | } |
12725 | |
12726 | /* Print an integer constant expression in assembler syntax. Addition |
12727 | and subtraction are the only arithmetic that may appear in these |
12728 | expressions. FILE is the stdio stream to write to, X is the rtx, and |
12729 | CODE is the operand print code from the output string. */ |
12730 | |
12731 | static void |
12732 | output_pic_addr_const (FILE *file, rtx x, int code) |
12733 | { |
12734 | char buf[256]; |
12735 | |
12736 | switch (GET_CODE (x)) |
12737 | { |
12738 | case PC: |
12739 | gcc_assert (flag_pic); |
12740 | putc (c: '.', stream: file); |
12741 | break; |
12742 | |
12743 | case SYMBOL_REF: |
12744 | if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) |
12745 | output_addr_const (file, x); |
12746 | else |
12747 | { |
12748 | const char *name = XSTR (x, 0); |
12749 | |
12750 | /* Mark the decl as referenced so that cgraph will |
12751 | output the function. */ |
12752 | if (SYMBOL_REF_DECL (x)) |
12753 | mark_decl_referenced (SYMBOL_REF_DECL (x)); |
12754 | |
12755 | #if TARGET_MACHO |
12756 | if (MACHOPIC_INDIRECT |
12757 | && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) |
12758 | name = machopic_indirection_name (x, /*stub_p=*/true); |
12759 | #endif |
12760 | assemble_name (file, name); |
12761 | } |
12762 | if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) |
12763 | && code == 'P' && ix86_call_use_plt_p (x)) |
12764 | fputs (s: "@PLT" , stream: file); |
12765 | break; |
12766 | |
12767 | case LABEL_REF: |
12768 | x = XEXP (x, 0); |
12769 | /* FALLTHRU */ |
12770 | case CODE_LABEL: |
12771 | ASM_GENERATE_INTERNAL_LABEL (buf, "L" , CODE_LABEL_NUMBER (x)); |
12772 | assemble_name (asm_out_file, buf); |
12773 | break; |
12774 | |
12775 | CASE_CONST_SCALAR_INT: |
12776 | output_addr_const (file, x); |
12777 | break; |
12778 | |
12779 | case CONST: |
12780 | /* This used to output parentheses around the expression, |
12781 | but that does not work on the 386 (either ATT or BSD assembler). */ |
12782 | output_pic_addr_const (file, XEXP (x, 0), code); |
12783 | break; |
12784 | |
12785 | case CONST_DOUBLE: |
12786 | /* We can't handle floating point constants; |
12787 | TARGET_PRINT_OPERAND must handle them. */ |
12788 | output_operand_lossage ("floating constant misused" ); |
12789 | break; |
12790 | |
12791 | case PLUS: |
12792 | /* Some assemblers need integer constants to appear first. */ |
12793 | if (CONST_INT_P (XEXP (x, 0))) |
12794 | { |
12795 | output_pic_addr_const (file, XEXP (x, 0), code); |
12796 | putc (c: '+', stream: file); |
12797 | output_pic_addr_const (file, XEXP (x, 1), code); |
12798 | } |
12799 | else |
12800 | { |
12801 | gcc_assert (CONST_INT_P (XEXP (x, 1))); |
12802 | output_pic_addr_const (file, XEXP (x, 1), code); |
12803 | putc (c: '+', stream: file); |
12804 | output_pic_addr_const (file, XEXP (x, 0), code); |
12805 | } |
12806 | break; |
12807 | |
12808 | case MINUS: |
12809 | if (!TARGET_MACHO) |
12810 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file); |
12811 | output_pic_addr_const (file, XEXP (x, 0), code); |
12812 | putc (c: '-', stream: file); |
12813 | output_pic_addr_const (file, XEXP (x, 1), code); |
12814 | if (!TARGET_MACHO) |
12815 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file); |
12816 | break; |
12817 | |
12818 | case UNSPEC: |
12819 | gcc_assert (XVECLEN (x, 0) == 1); |
12820 | output_pic_addr_const (file, XVECEXP (x, 0, 0), code); |
12821 | switch (XINT (x, 1)) |
12822 | { |
12823 | case UNSPEC_GOT: |
12824 | fputs (s: "@GOT" , stream: file); |
12825 | break; |
12826 | case UNSPEC_GOTOFF: |
12827 | fputs (s: "@GOTOFF" , stream: file); |
12828 | break; |
12829 | case UNSPEC_PLTOFF: |
12830 | fputs (s: "@PLTOFF" , stream: file); |
12831 | break; |
12832 | case UNSPEC_PCREL: |
12833 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
12834 | "(%rip)" : "[rip]" , stream: file); |
12835 | break; |
12836 | case UNSPEC_GOTPCREL: |
12837 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
12838 | "@GOTPCREL(%rip)" : "@GOTPCREL[rip]" , stream: file); |
12839 | break; |
12840 | case UNSPEC_GOTTPOFF: |
12841 | /* FIXME: This might be @TPOFF in Sun ld too. */ |
12842 | fputs (s: "@gottpoff" , stream: file); |
12843 | break; |
12844 | case UNSPEC_TPOFF: |
12845 | fputs (s: "@tpoff" , stream: file); |
12846 | break; |
12847 | case UNSPEC_NTPOFF: |
12848 | if (TARGET_64BIT) |
12849 | fputs (s: "@tpoff" , stream: file); |
12850 | else |
12851 | fputs (s: "@ntpoff" , stream: file); |
12852 | break; |
12853 | case UNSPEC_DTPOFF: |
12854 | fputs (s: "@dtpoff" , stream: file); |
12855 | break; |
12856 | case UNSPEC_GOTNTPOFF: |
12857 | if (TARGET_64BIT) |
12858 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
12859 | "@gottpoff(%rip)" : "@gottpoff[rip]" , stream: file); |
12860 | else |
12861 | fputs (s: "@gotntpoff" , stream: file); |
12862 | break; |
12863 | case UNSPEC_INDNTPOFF: |
12864 | fputs (s: "@indntpoff" , stream: file); |
12865 | break; |
12866 | #if TARGET_MACHO |
12867 | case UNSPEC_MACHOPIC_OFFSET: |
12868 | putc ('-', file); |
12869 | machopic_output_function_base_name (file); |
12870 | break; |
12871 | #endif |
12872 | default: |
12873 | output_operand_lossage ("invalid UNSPEC as operand" ); |
12874 | break; |
12875 | } |
12876 | break; |
12877 | |
12878 | default: |
12879 | output_operand_lossage ("invalid expression as operand" ); |
12880 | } |
12881 | } |
12882 | |
12883 | /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. |
12884 | We need to emit DTP-relative relocations. */ |
12885 | |
12886 | static void ATTRIBUTE_UNUSED |
12887 | i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
12888 | { |
12889 | fputs (ASM_LONG, stream: file); |
12890 | output_addr_const (file, x); |
12891 | fputs (s: "@dtpoff" , stream: file); |
12892 | switch (size) |
12893 | { |
12894 | case 4: |
12895 | break; |
12896 | case 8: |
12897 | fputs (s: ", 0" , stream: file); |
12898 | break; |
12899 | default: |
12900 | gcc_unreachable (); |
12901 | } |
12902 | } |
12903 | |
12904 | /* Return true if X is a representation of the PIC register. This copes |
12905 | with calls from ix86_find_base_term, where the register might have |
12906 | been replaced by a cselib value. */ |
12907 | |
12908 | static bool |
12909 | ix86_pic_register_p (rtx x) |
12910 | { |
12911 | if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) |
12912 | return (pic_offset_table_rtx |
12913 | && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); |
12914 | else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) |
12915 | return true; |
12916 | else if (!REG_P (x)) |
12917 | return false; |
12918 | else if (pic_offset_table_rtx) |
12919 | { |
12920 | if (REGNO (x) == REGNO (pic_offset_table_rtx)) |
12921 | return true; |
12922 | if (HARD_REGISTER_P (x) |
12923 | && !HARD_REGISTER_P (pic_offset_table_rtx) |
12924 | && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) |
12925 | return true; |
12926 | return false; |
12927 | } |
12928 | else |
12929 | return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; |
12930 | } |
12931 | |
12932 | /* Helper function for ix86_delegitimize_address. |
12933 | Attempt to delegitimize TLS local-exec accesses. */ |
12934 | |
12935 | static rtx |
12936 | ix86_delegitimize_tls_address (rtx orig_x) |
12937 | { |
12938 | rtx x = orig_x, unspec; |
12939 | struct ix86_address addr; |
12940 | |
12941 | if (!TARGET_TLS_DIRECT_SEG_REFS) |
12942 | return orig_x; |
12943 | if (MEM_P (x)) |
12944 | x = XEXP (x, 0); |
12945 | if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) |
12946 | return orig_x; |
12947 | if (ix86_decompose_address (addr: x, out: &addr) == 0 |
12948 | || addr.seg != DEFAULT_TLS_SEG_REG |
12949 | || addr.disp == NULL_RTX |
12950 | || GET_CODE (addr.disp) != CONST) |
12951 | return orig_x; |
12952 | unspec = XEXP (addr.disp, 0); |
12953 | if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) |
12954 | unspec = XEXP (unspec, 0); |
12955 | if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) |
12956 | return orig_x; |
12957 | x = XVECEXP (unspec, 0, 0); |
12958 | gcc_assert (GET_CODE (x) == SYMBOL_REF); |
12959 | if (unspec != XEXP (addr.disp, 0)) |
12960 | x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); |
12961 | if (addr.index) |
12962 | { |
12963 | rtx idx = addr.index; |
12964 | if (addr.scale != 1) |
12965 | idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); |
12966 | x = gen_rtx_PLUS (Pmode, idx, x); |
12967 | } |
12968 | if (addr.base) |
12969 | x = gen_rtx_PLUS (Pmode, addr.base, x); |
12970 | if (MEM_P (orig_x)) |
12971 | x = replace_equiv_address_nv (orig_x, x); |
12972 | return x; |
12973 | } |
12974 | |
12975 | /* In the name of slightly smaller debug output, and to cater to |
12976 | general assembler lossage, recognize PIC+GOTOFF and turn it back |
12977 | into a direct symbol reference. |
12978 | |
12979 | On Darwin, this is necessary to avoid a crash, because Darwin |
12980 | has a different PIC label for each routine but the DWARF debugging |
12981 | information is not associated with any particular routine, so it's |
12982 | necessary to remove references to the PIC label from RTL stored by |
12983 | the DWARF output code. |
12984 | |
12985 | This helper is used in the normal ix86_delegitimize_address |
12986 | entrypoint (e.g. used in the target delegitimization hook) and |
12987 | in ix86_find_base_term. As compile time memory optimization, we |
12988 | avoid allocating rtxes that will not change anything on the outcome |
12989 | of the callers (find_base_value and find_base_term). */ |
12990 | |
12991 | static inline rtx |
12992 | ix86_delegitimize_address_1 (rtx x, bool base_term_p) |
12993 | { |
12994 | rtx orig_x = delegitimize_mem_from_attrs (x); |
12995 | /* addend is NULL or some rtx if x is something+GOTOFF where |
12996 | something doesn't include the PIC register. */ |
12997 | rtx addend = NULL_RTX; |
12998 | /* reg_addend is NULL or a multiple of some register. */ |
12999 | rtx reg_addend = NULL_RTX; |
13000 | /* const_addend is NULL or a const_int. */ |
13001 | rtx const_addend = NULL_RTX; |
13002 | /* This is the result, or NULL. */ |
13003 | rtx result = NULL_RTX; |
13004 | |
13005 | x = orig_x; |
13006 | |
13007 | if (MEM_P (x)) |
13008 | x = XEXP (x, 0); |
13009 | |
13010 | if (TARGET_64BIT) |
13011 | { |
13012 | if (GET_CODE (x) == CONST |
13013 | && GET_CODE (XEXP (x, 0)) == PLUS |
13014 | && GET_MODE (XEXP (x, 0)) == Pmode |
13015 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
13016 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC |
13017 | && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) |
13018 | { |
13019 | /* find_base_{value,term} only care about MEMs with arg_pointer_rtx |
13020 | base. A CONST can't be arg_pointer_rtx based. */ |
13021 | if (base_term_p && MEM_P (orig_x)) |
13022 | return orig_x; |
13023 | rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); |
13024 | x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); |
13025 | if (MEM_P (orig_x)) |
13026 | x = replace_equiv_address_nv (orig_x, x); |
13027 | return x; |
13028 | } |
13029 | |
13030 | if (GET_CODE (x) == CONST |
13031 | && GET_CODE (XEXP (x, 0)) == UNSPEC |
13032 | && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL |
13033 | || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) |
13034 | && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) |
13035 | { |
13036 | x = XVECEXP (XEXP (x, 0), 0, 0); |
13037 | if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) |
13038 | { |
13039 | x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x)); |
13040 | if (x == NULL_RTX) |
13041 | return orig_x; |
13042 | } |
13043 | return x; |
13044 | } |
13045 | |
13046 | if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) |
13047 | return ix86_delegitimize_tls_address (orig_x); |
13048 | |
13049 | /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic |
13050 | and -mcmodel=medium -fpic. */ |
13051 | } |
13052 | |
13053 | if (GET_CODE (x) != PLUS |
13054 | || GET_CODE (XEXP (x, 1)) != CONST) |
13055 | return ix86_delegitimize_tls_address (orig_x); |
13056 | |
13057 | if (ix86_pic_register_p (XEXP (x, 0))) |
13058 | /* %ebx + GOT/GOTOFF */ |
13059 | ; |
13060 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
13061 | { |
13062 | /* %ebx + %reg * scale + GOT/GOTOFF */ |
13063 | reg_addend = XEXP (x, 0); |
13064 | if (ix86_pic_register_p (XEXP (reg_addend, 0))) |
13065 | reg_addend = XEXP (reg_addend, 1); |
13066 | else if (ix86_pic_register_p (XEXP (reg_addend, 1))) |
13067 | reg_addend = XEXP (reg_addend, 0); |
13068 | else |
13069 | { |
13070 | reg_addend = NULL_RTX; |
13071 | addend = XEXP (x, 0); |
13072 | } |
13073 | } |
13074 | else |
13075 | addend = XEXP (x, 0); |
13076 | |
13077 | x = XEXP (XEXP (x, 1), 0); |
13078 | if (GET_CODE (x) == PLUS |
13079 | && CONST_INT_P (XEXP (x, 1))) |
13080 | { |
13081 | const_addend = XEXP (x, 1); |
13082 | x = XEXP (x, 0); |
13083 | } |
13084 | |
13085 | if (GET_CODE (x) == UNSPEC |
13086 | && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) |
13087 | || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) |
13088 | || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC |
13089 | && !MEM_P (orig_x) && !addend))) |
13090 | result = XVECEXP (x, 0, 0); |
13091 | |
13092 | if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x) |
13093 | && !MEM_P (orig_x)) |
13094 | result = XVECEXP (x, 0, 0); |
13095 | |
13096 | if (! result) |
13097 | return ix86_delegitimize_tls_address (orig_x); |
13098 | |
13099 | /* For (PLUS something CONST_INT) both find_base_{value,term} just |
13100 | recurse on the first operand. */ |
13101 | if (const_addend && !base_term_p) |
13102 | result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); |
13103 | if (reg_addend) |
13104 | result = gen_rtx_PLUS (Pmode, reg_addend, result); |
13105 | if (addend) |
13106 | { |
13107 | /* If the rest of original X doesn't involve the PIC register, add |
13108 | addend and subtract pic_offset_table_rtx. This can happen e.g. |
13109 | for code like: |
13110 | leal (%ebx, %ecx, 4), %ecx |
13111 | ... |
13112 | movl foo@GOTOFF(%ecx), %edx |
13113 | in which case we return (%ecx - %ebx) + foo |
13114 | or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg |
13115 | and reload has completed. Don't do the latter for debug, |
13116 | as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ |
13117 | if (pic_offset_table_rtx |
13118 | && (!reload_completed || !ix86_use_pseudo_pic_reg ())) |
13119 | result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), |
13120 | pic_offset_table_rtx), |
13121 | result); |
13122 | else if (base_term_p |
13123 | && pic_offset_table_rtx |
13124 | && !TARGET_MACHO |
13125 | && !TARGET_VXWORKS_RTP) |
13126 | { |
13127 | rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
13128 | tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); |
13129 | result = gen_rtx_PLUS (Pmode, tmp, result); |
13130 | } |
13131 | else |
13132 | return orig_x; |
13133 | } |
13134 | if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) |
13135 | { |
13136 | result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode); |
13137 | if (result == NULL_RTX) |
13138 | return orig_x; |
13139 | } |
13140 | return result; |
13141 | } |
13142 | |
13143 | /* The normal instantiation of the above template. */ |
13144 | |
13145 | static rtx |
13146 | ix86_delegitimize_address (rtx x) |
13147 | { |
13148 | return ix86_delegitimize_address_1 (x, base_term_p: false); |
13149 | } |
13150 | |
13151 | /* If X is a machine specific address (i.e. a symbol or label being |
13152 | referenced as a displacement from the GOT implemented using an |
13153 | UNSPEC), then return the base term. Otherwise return X. */ |
13154 | |
13155 | rtx |
13156 | ix86_find_base_term (rtx x) |
13157 | { |
13158 | rtx term; |
13159 | |
13160 | if (TARGET_64BIT) |
13161 | { |
13162 | if (GET_CODE (x) != CONST) |
13163 | return x; |
13164 | term = XEXP (x, 0); |
13165 | if (GET_CODE (term) == PLUS |
13166 | && CONST_INT_P (XEXP (term, 1))) |
13167 | term = XEXP (term, 0); |
13168 | if (GET_CODE (term) != UNSPEC |
13169 | || (XINT (term, 1) != UNSPEC_GOTPCREL |
13170 | && XINT (term, 1) != UNSPEC_PCREL)) |
13171 | return x; |
13172 | |
13173 | return XVECEXP (term, 0, 0); |
13174 | } |
13175 | |
13176 | return ix86_delegitimize_address_1 (x, base_term_p: true); |
13177 | } |
13178 | |
13179 | /* Return true if X shouldn't be emitted into the debug info. |
13180 | Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ |
13181 | symbol easily into the .debug_info section, so we need not to |
13182 | delegitimize, but instead assemble as @gotoff. |
13183 | Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically |
13184 | assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ |
13185 | |
13186 | static bool |
13187 | ix86_const_not_ok_for_debug_p (rtx x) |
13188 | { |
13189 | if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) |
13190 | return true; |
13191 | |
13192 | if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) |
13193 | return true; |
13194 | |
13195 | return false; |
13196 | } |
13197 | |
13198 | static void |
13199 | put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, |
13200 | bool fp, FILE *file) |
13201 | { |
13202 | const char *suffix; |
13203 | |
13204 | if (mode == CCFPmode) |
13205 | { |
13206 | code = ix86_fp_compare_code_to_integer (code); |
13207 | mode = CCmode; |
13208 | } |
13209 | if (reverse) |
13210 | code = reverse_condition (code); |
13211 | |
13212 | switch (code) |
13213 | { |
13214 | case EQ: |
13215 | gcc_assert (mode != CCGZmode); |
13216 | switch (mode) |
13217 | { |
13218 | case E_CCAmode: |
13219 | suffix = "a" ; |
13220 | break; |
13221 | case E_CCCmode: |
13222 | suffix = "c" ; |
13223 | break; |
13224 | case E_CCOmode: |
13225 | suffix = "o" ; |
13226 | break; |
13227 | case E_CCPmode: |
13228 | suffix = "p" ; |
13229 | break; |
13230 | case E_CCSmode: |
13231 | suffix = "s" ; |
13232 | break; |
13233 | default: |
13234 | suffix = "e" ; |
13235 | break; |
13236 | } |
13237 | break; |
13238 | case NE: |
13239 | gcc_assert (mode != CCGZmode); |
13240 | switch (mode) |
13241 | { |
13242 | case E_CCAmode: |
13243 | suffix = "na" ; |
13244 | break; |
13245 | case E_CCCmode: |
13246 | suffix = "nc" ; |
13247 | break; |
13248 | case E_CCOmode: |
13249 | suffix = "no" ; |
13250 | break; |
13251 | case E_CCPmode: |
13252 | suffix = "np" ; |
13253 | break; |
13254 | case E_CCSmode: |
13255 | suffix = "ns" ; |
13256 | break; |
13257 | default: |
13258 | suffix = "ne" ; |
13259 | break; |
13260 | } |
13261 | break; |
13262 | case GT: |
13263 | gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); |
13264 | suffix = "g" ; |
13265 | break; |
13266 | case GTU: |
13267 | /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. |
13268 | Those same assemblers have the same but opposite lossage on cmov. */ |
13269 | if (mode == CCmode) |
13270 | suffix = fp ? "nbe" : "a" ; |
13271 | else |
13272 | gcc_unreachable (); |
13273 | break; |
13274 | case LT: |
13275 | switch (mode) |
13276 | { |
13277 | case E_CCNOmode: |
13278 | case E_CCGOCmode: |
13279 | suffix = "s" ; |
13280 | break; |
13281 | |
13282 | case E_CCmode: |
13283 | case E_CCGCmode: |
13284 | case E_CCGZmode: |
13285 | suffix = "l" ; |
13286 | break; |
13287 | |
13288 | default: |
13289 | gcc_unreachable (); |
13290 | } |
13291 | break; |
13292 | case LTU: |
13293 | if (mode == CCmode || mode == CCGZmode) |
13294 | suffix = "b" ; |
13295 | else if (mode == CCCmode) |
13296 | suffix = fp ? "b" : "c" ; |
13297 | else |
13298 | gcc_unreachable (); |
13299 | break; |
13300 | case GE: |
13301 | switch (mode) |
13302 | { |
13303 | case E_CCNOmode: |
13304 | case E_CCGOCmode: |
13305 | suffix = "ns" ; |
13306 | break; |
13307 | |
13308 | case E_CCmode: |
13309 | case E_CCGCmode: |
13310 | case E_CCGZmode: |
13311 | suffix = "ge" ; |
13312 | break; |
13313 | |
13314 | default: |
13315 | gcc_unreachable (); |
13316 | } |
13317 | break; |
13318 | case GEU: |
13319 | if (mode == CCmode || mode == CCGZmode) |
13320 | suffix = "nb" ; |
13321 | else if (mode == CCCmode) |
13322 | suffix = fp ? "nb" : "nc" ; |
13323 | else |
13324 | gcc_unreachable (); |
13325 | break; |
13326 | case LE: |
13327 | gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); |
13328 | suffix = "le" ; |
13329 | break; |
13330 | case LEU: |
13331 | if (mode == CCmode) |
13332 | suffix = "be" ; |
13333 | else |
13334 | gcc_unreachable (); |
13335 | break; |
13336 | case UNORDERED: |
13337 | suffix = fp ? "u" : "p" ; |
13338 | break; |
13339 | case ORDERED: |
13340 | suffix = fp ? "nu" : "np" ; |
13341 | break; |
13342 | default: |
13343 | gcc_unreachable (); |
13344 | } |
13345 | fputs (s: suffix, stream: file); |
13346 | } |
13347 | |
13348 | /* Print the name of register X to FILE based on its machine mode and number. |
13349 | If CODE is 'w', pretend the mode is HImode. |
13350 | If CODE is 'b', pretend the mode is QImode. |
13351 | If CODE is 'k', pretend the mode is SImode. |
13352 | If CODE is 'q', pretend the mode is DImode. |
13353 | If CODE is 'x', pretend the mode is V4SFmode. |
13354 | If CODE is 't', pretend the mode is V8SFmode. |
13355 | If CODE is 'g', pretend the mode is V16SFmode. |
13356 | If CODE is 'h', pretend the reg is the 'high' byte register. |
13357 | If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. |
13358 | If CODE is 'd', duplicate the operand for AVX instruction. |
13359 | If CODE is 'V', print naked full integer register name without %. |
13360 | */ |
13361 | |
13362 | void |
13363 | print_reg (rtx x, int code, FILE *file) |
13364 | { |
13365 | const char *reg; |
13366 | int msize; |
13367 | unsigned int regno; |
13368 | bool duplicated; |
13369 | |
13370 | if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') |
13371 | putc (c: '%', stream: file); |
13372 | |
13373 | if (x == pc_rtx) |
13374 | { |
13375 | gcc_assert (TARGET_64BIT); |
13376 | fputs (s: "rip" , stream: file); |
13377 | return; |
13378 | } |
13379 | |
13380 | if (code == 'y' && STACK_TOP_P (x)) |
13381 | { |
13382 | fputs (s: "st(0)" , stream: file); |
13383 | return; |
13384 | } |
13385 | |
13386 | if (code == 'w') |
13387 | msize = 2; |
13388 | else if (code == 'b') |
13389 | msize = 1; |
13390 | else if (code == 'k') |
13391 | msize = 4; |
13392 | else if (code == 'q') |
13393 | msize = 8; |
13394 | else if (code == 'h') |
13395 | msize = 0; |
13396 | else if (code == 'x') |
13397 | msize = 16; |
13398 | else if (code == 't') |
13399 | msize = 32; |
13400 | else if (code == 'g') |
13401 | msize = 64; |
13402 | else |
13403 | msize = GET_MODE_SIZE (GET_MODE (x)); |
13404 | |
13405 | regno = REGNO (x); |
13406 | |
13407 | if (regno == ARG_POINTER_REGNUM |
13408 | || regno == FRAME_POINTER_REGNUM |
13409 | || regno == FPSR_REG) |
13410 | { |
13411 | output_operand_lossage |
13412 | ("invalid use of register '%s'" , reg_names[regno]); |
13413 | return; |
13414 | } |
13415 | else if (regno == FLAGS_REG) |
13416 | { |
13417 | output_operand_lossage ("invalid use of asm flag output" ); |
13418 | return; |
13419 | } |
13420 | |
13421 | if (code == 'V') |
13422 | { |
13423 | if (GENERAL_REGNO_P (regno)) |
13424 | msize = GET_MODE_SIZE (word_mode); |
13425 | else |
13426 | error ("%<V%> modifier on non-integer register" ); |
13427 | } |
13428 | |
13429 | duplicated = code == 'd' && TARGET_AVX; |
13430 | |
13431 | switch (msize) |
13432 | { |
13433 | case 16: |
13434 | case 12: |
13435 | case 8: |
13436 | if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) |
13437 | warning (0, "unsupported size for integer register" ); |
13438 | /* FALLTHRU */ |
13439 | case 4: |
13440 | if (LEGACY_INT_REGNO_P (regno)) |
13441 | putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file); |
13442 | /* FALLTHRU */ |
13443 | case 2: |
13444 | normal: |
13445 | reg = hi_reg_name[regno]; |
13446 | break; |
13447 | case 1: |
13448 | if (regno >= ARRAY_SIZE (qi_reg_name)) |
13449 | goto normal; |
13450 | if (!ANY_QI_REGNO_P (regno)) |
13451 | error ("unsupported size for integer register" ); |
13452 | reg = qi_reg_name[regno]; |
13453 | break; |
13454 | case 0: |
13455 | if (regno >= ARRAY_SIZE (qi_high_reg_name)) |
13456 | goto normal; |
13457 | reg = qi_high_reg_name[regno]; |
13458 | break; |
13459 | case 32: |
13460 | case 64: |
13461 | if (SSE_REGNO_P (regno)) |
13462 | { |
13463 | gcc_assert (!duplicated); |
13464 | putc (c: msize == 32 ? 'y' : 'z', stream: file); |
13465 | reg = hi_reg_name[regno] + 1; |
13466 | break; |
13467 | } |
13468 | goto normal; |
13469 | default: |
13470 | gcc_unreachable (); |
13471 | } |
13472 | |
13473 | fputs (s: reg, stream: file); |
13474 | |
13475 | /* Irritatingly, AMD extended registers use |
13476 | different naming convention: "r%d[bwd]" */ |
13477 | if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
13478 | { |
13479 | gcc_assert (TARGET_64BIT); |
13480 | switch (msize) |
13481 | { |
13482 | case 0: |
13483 | error ("extended registers have no high halves" ); |
13484 | break; |
13485 | case 1: |
13486 | putc (c: 'b', stream: file); |
13487 | break; |
13488 | case 2: |
13489 | putc (c: 'w', stream: file); |
13490 | break; |
13491 | case 4: |
13492 | putc (c: 'd', stream: file); |
13493 | break; |
13494 | case 8: |
13495 | /* no suffix */ |
13496 | break; |
13497 | default: |
13498 | error ("unsupported operand size for extended register" ); |
13499 | break; |
13500 | } |
13501 | return; |
13502 | } |
13503 | |
13504 | if (duplicated) |
13505 | { |
13506 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13507 | fprintf (stream: file, format: ", %%%s" , reg); |
13508 | else |
13509 | fprintf (stream: file, format: ", %s" , reg); |
13510 | } |
13511 | } |
13512 | |
13513 | /* Meaning of CODE: |
13514 | L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
13515 | C -- print opcode suffix for set/cmov insn. |
13516 | c -- like C, but print reversed condition |
13517 | F,f -- likewise, but for floating-point. |
13518 | O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
13519 | otherwise nothing |
13520 | R -- print embedded rounding and sae. |
13521 | r -- print only sae. |
13522 | z -- print the opcode suffix for the size of the current operand. |
13523 | Z -- likewise, with special suffixes for x87 instructions. |
13524 | * -- print a star (in certain assembler syntax) |
13525 | A -- print an absolute memory reference. |
13526 | E -- print address with DImode register names if TARGET_64BIT. |
13527 | w -- print the operand as if it's a "word" (HImode) even if it isn't. |
13528 | s -- print a shift double count, followed by the assemblers argument |
13529 | delimiter. |
13530 | b -- print the QImode name of the register for the indicated operand. |
13531 | %b0 would print %al if operands[0] is reg 0. |
13532 | w -- likewise, print the HImode name of the register. |
13533 | k -- likewise, print the SImode name of the register. |
13534 | q -- likewise, print the DImode name of the register. |
13535 | x -- likewise, print the V4SFmode name of the register. |
13536 | t -- likewise, print the V8SFmode name of the register. |
13537 | g -- likewise, print the V16SFmode name of the register. |
13538 | h -- print the QImode name for a "high" register, either ah, bh, ch or dh. |
13539 | y -- print "st(0)" instead of "st" as a register. |
13540 | d -- print duplicated register operand for AVX instruction. |
13541 | D -- print condition for SSE cmp instruction. |
13542 | P -- if PIC, print an @PLT suffix. For -fno-plt, load function |
13543 | address from GOT. |
13544 | p -- print raw symbol name. |
13545 | X -- don't print any sort of PIC '@' suffix for a symbol. |
13546 | & -- print some in-use local-dynamic symbol name. |
13547 | H -- print a memory address offset by 8; used for sse high-parts |
13548 | Y -- print condition for XOP pcom* instruction. |
13549 | V -- print naked full integer register name without %. |
13550 | + -- print a branch hint as 'cs' or 'ds' prefix |
13551 | ; -- print a semicolon (after prefixes due to bug in older gas). |
13552 | ~ -- print "i" if TARGET_AVX2, "f" otherwise. |
13553 | ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode |
13554 | M -- print addr32 prefix for TARGET_X32 with VSIB address. |
13555 | ! -- print NOTRACK prefix for jxx/call/ret instructions if required. |
13556 | N -- print maskz if it's constant 0 operand. |
13557 | */ |
13558 | |
13559 | void |
13560 | ix86_print_operand (FILE *file, rtx x, int code) |
13561 | { |
13562 | if (code) |
13563 | { |
13564 | switch (code) |
13565 | { |
13566 | case 'A': |
13567 | switch (ASSEMBLER_DIALECT) |
13568 | { |
13569 | case ASM_ATT: |
13570 | putc (c: '*', stream: file); |
13571 | break; |
13572 | |
13573 | case ASM_INTEL: |
13574 | /* Intel syntax. For absolute addresses, registers should not |
13575 | be surrounded by braces. */ |
13576 | if (!REG_P (x)) |
13577 | { |
13578 | putc (c: '[', stream: file); |
13579 | ix86_print_operand (file, x, code: 0); |
13580 | putc (c: ']', stream: file); |
13581 | return; |
13582 | } |
13583 | break; |
13584 | |
13585 | default: |
13586 | gcc_unreachable (); |
13587 | } |
13588 | |
13589 | ix86_print_operand (file, x, code: 0); |
13590 | return; |
13591 | |
13592 | case 'E': |
13593 | /* Wrap address in an UNSPEC to declare special handling. */ |
13594 | if (TARGET_64BIT) |
13595 | x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); |
13596 | |
13597 | output_address (VOIDmode, x); |
13598 | return; |
13599 | |
13600 | case 'L': |
13601 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13602 | putc (c: 'l', stream: file); |
13603 | return; |
13604 | |
13605 | case 'W': |
13606 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13607 | putc (c: 'w', stream: file); |
13608 | return; |
13609 | |
13610 | case 'B': |
13611 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13612 | putc (c: 'b', stream: file); |
13613 | return; |
13614 | |
13615 | case 'Q': |
13616 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13617 | putc (c: 'l', stream: file); |
13618 | return; |
13619 | |
13620 | case 'S': |
13621 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13622 | putc (c: 's', stream: file); |
13623 | return; |
13624 | |
13625 | case 'T': |
13626 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13627 | putc (c: 't', stream: file); |
13628 | return; |
13629 | |
13630 | case 'O': |
13631 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
13632 | if (ASSEMBLER_DIALECT != ASM_ATT) |
13633 | return; |
13634 | |
13635 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13636 | { |
13637 | case 2: |
13638 | putc ('w', file); |
13639 | break; |
13640 | |
13641 | case 4: |
13642 | putc ('l', file); |
13643 | break; |
13644 | |
13645 | case 8: |
13646 | putc ('q', file); |
13647 | break; |
13648 | |
13649 | default: |
13650 | output_operand_lossage ("invalid operand size for operand " |
13651 | "code 'O'" ); |
13652 | return; |
13653 | } |
13654 | |
13655 | putc ('.', file); |
13656 | #endif |
13657 | return; |
13658 | |
13659 | case 'z': |
13660 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
13661 | { |
13662 | /* Opcodes don't get size suffixes if using Intel opcodes. */ |
13663 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
13664 | return; |
13665 | |
13666 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13667 | { |
13668 | case 1: |
13669 | putc (c: 'b', stream: file); |
13670 | return; |
13671 | |
13672 | case 2: |
13673 | putc (c: 'w', stream: file); |
13674 | return; |
13675 | |
13676 | case 4: |
13677 | putc (c: 'l', stream: file); |
13678 | return; |
13679 | |
13680 | case 8: |
13681 | putc (c: 'q', stream: file); |
13682 | return; |
13683 | |
13684 | default: |
13685 | output_operand_lossage ("invalid operand size for operand " |
13686 | "code 'z'" ); |
13687 | return; |
13688 | } |
13689 | } |
13690 | |
13691 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
13692 | { |
13693 | if (this_is_asm_operands) |
13694 | warning_for_asm (this_is_asm_operands, |
13695 | "non-integer operand used with operand code %<z%>" ); |
13696 | else |
13697 | warning (0, "non-integer operand used with operand code %<z%>" ); |
13698 | } |
13699 | /* FALLTHRU */ |
13700 | |
13701 | case 'Z': |
13702 | /* 387 opcodes don't get size suffixes if using Intel opcodes. */ |
13703 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
13704 | return; |
13705 | |
13706 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
13707 | { |
13708 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13709 | { |
13710 | case 2: |
13711 | #ifdef HAVE_AS_IX86_FILDS |
13712 | putc (c: 's', stream: file); |
13713 | #endif |
13714 | return; |
13715 | |
13716 | case 4: |
13717 | putc (c: 'l', stream: file); |
13718 | return; |
13719 | |
13720 | case 8: |
13721 | #ifdef HAVE_AS_IX86_FILDQ |
13722 | putc (c: 'q', stream: file); |
13723 | #else |
13724 | fputs ("ll" , file); |
13725 | #endif |
13726 | return; |
13727 | |
13728 | default: |
13729 | break; |
13730 | } |
13731 | } |
13732 | else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
13733 | { |
13734 | /* 387 opcodes don't get size suffixes |
13735 | if the operands are registers. */ |
13736 | if (STACK_REG_P (x)) |
13737 | return; |
13738 | |
13739 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13740 | { |
13741 | case 4: |
13742 | putc (c: 's', stream: file); |
13743 | return; |
13744 | |
13745 | case 8: |
13746 | putc (c: 'l', stream: file); |
13747 | return; |
13748 | |
13749 | case 12: |
13750 | case 16: |
13751 | putc (c: 't', stream: file); |
13752 | return; |
13753 | |
13754 | default: |
13755 | break; |
13756 | } |
13757 | } |
13758 | else |
13759 | { |
13760 | output_operand_lossage ("invalid operand type used with " |
13761 | "operand code '%c'" , code); |
13762 | return; |
13763 | } |
13764 | |
13765 | output_operand_lossage ("invalid operand size for operand code '%c'" , |
13766 | code); |
13767 | return; |
13768 | |
13769 | case 'd': |
13770 | case 'b': |
13771 | case 'w': |
13772 | case 'k': |
13773 | case 'q': |
13774 | case 'h': |
13775 | case 't': |
13776 | case 'g': |
13777 | case 'y': |
13778 | case 'x': |
13779 | case 'X': |
13780 | case 'P': |
13781 | case 'p': |
13782 | case 'V': |
13783 | break; |
13784 | |
13785 | case 's': |
13786 | if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) |
13787 | { |
13788 | ix86_print_operand (file, x, code: 0); |
13789 | fputs (s: ", " , stream: file); |
13790 | } |
13791 | return; |
13792 | |
13793 | case 'Y': |
13794 | switch (GET_CODE (x)) |
13795 | { |
13796 | case NE: |
13797 | fputs (s: "neq" , stream: file); |
13798 | break; |
13799 | case EQ: |
13800 | fputs (s: "eq" , stream: file); |
13801 | break; |
13802 | case GE: |
13803 | case GEU: |
13804 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt" , stream: file); |
13805 | break; |
13806 | case GT: |
13807 | case GTU: |
13808 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle" , stream: file); |
13809 | break; |
13810 | case LE: |
13811 | case LEU: |
13812 | fputs (s: "le" , stream: file); |
13813 | break; |
13814 | case LT: |
13815 | case LTU: |
13816 | fputs (s: "lt" , stream: file); |
13817 | break; |
13818 | case UNORDERED: |
13819 | fputs (s: "unord" , stream: file); |
13820 | break; |
13821 | case ORDERED: |
13822 | fputs (s: "ord" , stream: file); |
13823 | break; |
13824 | case UNEQ: |
13825 | fputs (s: "ueq" , stream: file); |
13826 | break; |
13827 | case UNGE: |
13828 | fputs (s: "nlt" , stream: file); |
13829 | break; |
13830 | case UNGT: |
13831 | fputs (s: "nle" , stream: file); |
13832 | break; |
13833 | case UNLE: |
13834 | fputs (s: "ule" , stream: file); |
13835 | break; |
13836 | case UNLT: |
13837 | fputs (s: "ult" , stream: file); |
13838 | break; |
13839 | case LTGT: |
13840 | fputs (s: "une" , stream: file); |
13841 | break; |
13842 | default: |
13843 | output_operand_lossage ("operand is not a condition code, " |
13844 | "invalid operand code 'Y'" ); |
13845 | return; |
13846 | } |
13847 | return; |
13848 | |
13849 | case 'D': |
13850 | /* Little bit of braindamage here. The SSE compare instructions |
13851 | does use completely different names for the comparisons that the |
13852 | fp conditional moves. */ |
13853 | switch (GET_CODE (x)) |
13854 | { |
13855 | case UNEQ: |
13856 | if (TARGET_AVX) |
13857 | { |
13858 | fputs (s: "eq_us" , stream: file); |
13859 | break; |
13860 | } |
13861 | /* FALLTHRU */ |
13862 | case EQ: |
13863 | fputs (s: "eq" , stream: file); |
13864 | break; |
13865 | case UNLT: |
13866 | if (TARGET_AVX) |
13867 | { |
13868 | fputs (s: "nge" , stream: file); |
13869 | break; |
13870 | } |
13871 | /* FALLTHRU */ |
13872 | case LT: |
13873 | fputs (s: "lt" , stream: file); |
13874 | break; |
13875 | case UNLE: |
13876 | if (TARGET_AVX) |
13877 | { |
13878 | fputs (s: "ngt" , stream: file); |
13879 | break; |
13880 | } |
13881 | /* FALLTHRU */ |
13882 | case LE: |
13883 | fputs (s: "le" , stream: file); |
13884 | break; |
13885 | case UNORDERED: |
13886 | fputs (s: "unord" , stream: file); |
13887 | break; |
13888 | case LTGT: |
13889 | if (TARGET_AVX) |
13890 | { |
13891 | fputs (s: "neq_oq" , stream: file); |
13892 | break; |
13893 | } |
13894 | /* FALLTHRU */ |
13895 | case NE: |
13896 | fputs (s: "neq" , stream: file); |
13897 | break; |
13898 | case GE: |
13899 | if (TARGET_AVX) |
13900 | { |
13901 | fputs (s: "ge" , stream: file); |
13902 | break; |
13903 | } |
13904 | /* FALLTHRU */ |
13905 | case UNGE: |
13906 | fputs (s: "nlt" , stream: file); |
13907 | break; |
13908 | case GT: |
13909 | if (TARGET_AVX) |
13910 | { |
13911 | fputs (s: "gt" , stream: file); |
13912 | break; |
13913 | } |
13914 | /* FALLTHRU */ |
13915 | case UNGT: |
13916 | fputs (s: "nle" , stream: file); |
13917 | break; |
13918 | case ORDERED: |
13919 | fputs (s: "ord" , stream: file); |
13920 | break; |
13921 | default: |
13922 | output_operand_lossage ("operand is not a condition code, " |
13923 | "invalid operand code 'D'" ); |
13924 | return; |
13925 | } |
13926 | return; |
13927 | |
13928 | case 'F': |
13929 | case 'f': |
13930 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
13931 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13932 | putc ('.', file); |
13933 | gcc_fallthrough (); |
13934 | #endif |
13935 | |
13936 | case 'C': |
13937 | case 'c': |
13938 | if (!COMPARISON_P (x)) |
13939 | { |
13940 | output_operand_lossage ("operand is not a condition code, " |
13941 | "invalid operand code '%c'" , code); |
13942 | return; |
13943 | } |
13944 | put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), |
13945 | reverse: code == 'c' || code == 'f', |
13946 | fp: code == 'F' || code == 'f', |
13947 | file); |
13948 | return; |
13949 | |
13950 | case 'H': |
13951 | if (!offsettable_memref_p (x)) |
13952 | { |
13953 | output_operand_lossage ("operand is not an offsettable memory " |
13954 | "reference, invalid operand code 'H'" ); |
13955 | return; |
13956 | } |
13957 | /* It doesn't actually matter what mode we use here, as we're |
13958 | only going to use this for printing. */ |
13959 | x = adjust_address_nv (x, DImode, 8); |
13960 | /* Output 'qword ptr' for intel assembler dialect. */ |
13961 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
13962 | code = 'q'; |
13963 | break; |
13964 | |
13965 | case 'K': |
13966 | if (!CONST_INT_P (x)) |
13967 | { |
13968 | output_operand_lossage ("operand is not an integer, invalid " |
13969 | "operand code 'K'" ); |
13970 | return; |
13971 | } |
13972 | |
13973 | if (INTVAL (x) & IX86_HLE_ACQUIRE) |
13974 | #ifdef HAVE_AS_IX86_HLE |
13975 | fputs (s: "xacquire " , stream: file); |
13976 | #else |
13977 | fputs ("\n" ASM_BYTE "0xf2\n\t" , file); |
13978 | #endif |
13979 | else if (INTVAL (x) & IX86_HLE_RELEASE) |
13980 | #ifdef HAVE_AS_IX86_HLE |
13981 | fputs (s: "xrelease " , stream: file); |
13982 | #else |
13983 | fputs ("\n" ASM_BYTE "0xf3\n\t" , file); |
13984 | #endif |
13985 | /* We do not want to print value of the operand. */ |
13986 | return; |
13987 | |
13988 | case 'N': |
13989 | if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) |
13990 | fputs (s: "{z}" , stream: file); |
13991 | return; |
13992 | |
13993 | case 'r': |
13994 | if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) |
13995 | { |
13996 | output_operand_lossage ("operand is not a specific integer, " |
13997 | "invalid operand code 'r'" ); |
13998 | return; |
13999 | } |
14000 | |
14001 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14002 | fputs (s: ", " , stream: file); |
14003 | |
14004 | fputs (s: "{sae}" , stream: file); |
14005 | |
14006 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14007 | fputs (s: ", " , stream: file); |
14008 | |
14009 | return; |
14010 | |
14011 | case 'R': |
14012 | if (!CONST_INT_P (x)) |
14013 | { |
14014 | output_operand_lossage ("operand is not an integer, invalid " |
14015 | "operand code 'R'" ); |
14016 | return; |
14017 | } |
14018 | |
14019 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14020 | fputs (s: ", " , stream: file); |
14021 | |
14022 | switch (INTVAL (x)) |
14023 | { |
14024 | case ROUND_NEAREST_INT | ROUND_SAE: |
14025 | fputs (s: "{rn-sae}" , stream: file); |
14026 | break; |
14027 | case ROUND_NEG_INF | ROUND_SAE: |
14028 | fputs (s: "{rd-sae}" , stream: file); |
14029 | break; |
14030 | case ROUND_POS_INF | ROUND_SAE: |
14031 | fputs (s: "{ru-sae}" , stream: file); |
14032 | break; |
14033 | case ROUND_ZERO | ROUND_SAE: |
14034 | fputs (s: "{rz-sae}" , stream: file); |
14035 | break; |
14036 | default: |
14037 | output_operand_lossage ("operand is not a specific integer, " |
14038 | "invalid operand code 'R'" ); |
14039 | } |
14040 | |
14041 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14042 | fputs (s: ", " , stream: file); |
14043 | |
14044 | return; |
14045 | |
14046 | case '*': |
14047 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14048 | putc (c: '*', stream: file); |
14049 | return; |
14050 | |
14051 | case '&': |
14052 | { |
14053 | const char *name = get_some_local_dynamic_name (); |
14054 | if (name == NULL) |
14055 | output_operand_lossage ("'%%&' used without any " |
14056 | "local dynamic TLS references" ); |
14057 | else |
14058 | assemble_name (file, name); |
14059 | return; |
14060 | } |
14061 | |
14062 | case '+': |
14063 | { |
14064 | rtx x; |
14065 | |
14066 | if (!optimize |
14067 | || optimize_function_for_size_p (cfun) |
14068 | || !TARGET_BRANCH_PREDICTION_HINTS) |
14069 | return; |
14070 | |
14071 | x = find_reg_note (current_output_insn, REG_BR_PROB, 0); |
14072 | if (x) |
14073 | { |
14074 | int pred_val = profile_probability::from_reg_br_prob_note |
14075 | (XINT (x, 0)).to_reg_br_prob_base (); |
14076 | |
14077 | if (pred_val < REG_BR_PROB_BASE * 45 / 100 |
14078 | || pred_val > REG_BR_PROB_BASE * 55 / 100) |
14079 | { |
14080 | bool taken = pred_val > REG_BR_PROB_BASE / 2; |
14081 | bool cputaken |
14082 | = final_forward_branch_p (current_output_insn) == 0; |
14083 | |
14084 | /* Emit hints only in the case default branch prediction |
14085 | heuristics would fail. */ |
14086 | if (taken != cputaken) |
14087 | { |
14088 | /* We use 3e (DS) prefix for taken branches and |
14089 | 2e (CS) prefix for not taken branches. */ |
14090 | if (taken) |
14091 | fputs (s: "ds ; " , stream: file); |
14092 | else |
14093 | fputs (s: "cs ; " , stream: file); |
14094 | } |
14095 | } |
14096 | } |
14097 | return; |
14098 | } |
14099 | |
14100 | case ';': |
14101 | #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX |
14102 | putc (';', file); |
14103 | #endif |
14104 | return; |
14105 | |
14106 | case '~': |
14107 | putc (TARGET_AVX2 ? 'i' : 'f', stream: file); |
14108 | return; |
14109 | |
14110 | case 'M': |
14111 | if (TARGET_X32) |
14112 | { |
14113 | /* NB: 32-bit indices in VSIB address are sign-extended |
14114 | to 64 bits. In x32, if 32-bit address 0xf7fa3010 is |
14115 | sign-extended to 0xfffffffff7fa3010 which is invalid |
14116 | address. Add addr32 prefix if there is no base |
14117 | register nor symbol. */ |
14118 | bool ok; |
14119 | struct ix86_address parts; |
14120 | ok = ix86_decompose_address (addr: x, out: &parts); |
14121 | gcc_assert (ok && parts.index == NULL_RTX); |
14122 | if (parts.base == NULL_RTX |
14123 | && (parts.disp == NULL_RTX |
14124 | || !symbolic_operand (parts.disp, |
14125 | GET_MODE (parts.disp)))) |
14126 | fputs (s: "addr32 " , stream: file); |
14127 | } |
14128 | return; |
14129 | |
14130 | case '^': |
14131 | if (TARGET_64BIT && Pmode != word_mode) |
14132 | fputs (s: "addr32 " , stream: file); |
14133 | return; |
14134 | |
14135 | case '!': |
14136 | if (ix86_notrack_prefixed_insn_p (current_output_insn)) |
14137 | fputs (s: "notrack " , stream: file); |
14138 | return; |
14139 | |
14140 | default: |
14141 | output_operand_lossage ("invalid operand code '%c'" , code); |
14142 | } |
14143 | } |
14144 | |
14145 | if (REG_P (x)) |
14146 | print_reg (x, code, file); |
14147 | |
14148 | else if (MEM_P (x)) |
14149 | { |
14150 | rtx addr = XEXP (x, 0); |
14151 | |
14152 | /* No `byte ptr' prefix for call instructions ... */ |
14153 | if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') |
14154 | { |
14155 | machine_mode mode = GET_MODE (x); |
14156 | const char *size; |
14157 | |
14158 | /* Check for explicit size override codes. */ |
14159 | if (code == 'b') |
14160 | size = "BYTE" ; |
14161 | else if (code == 'w') |
14162 | size = "WORD" ; |
14163 | else if (code == 'k') |
14164 | size = "DWORD" ; |
14165 | else if (code == 'q') |
14166 | size = "QWORD" ; |
14167 | else if (code == 'x') |
14168 | size = "XMMWORD" ; |
14169 | else if (code == 't') |
14170 | size = "YMMWORD" ; |
14171 | else if (code == 'g') |
14172 | size = "ZMMWORD" ; |
14173 | else if (mode == BLKmode) |
14174 | /* ... or BLKmode operands, when not overridden. */ |
14175 | size = NULL; |
14176 | else |
14177 | switch (GET_MODE_SIZE (mode)) |
14178 | { |
14179 | case 1: size = "BYTE" ; break; |
14180 | case 2: size = "WORD" ; break; |
14181 | case 4: size = "DWORD" ; break; |
14182 | case 8: size = "QWORD" ; break; |
14183 | case 12: size = "TBYTE" ; break; |
14184 | case 16: |
14185 | if (mode == XFmode) |
14186 | size = "TBYTE" ; |
14187 | else |
14188 | size = "XMMWORD" ; |
14189 | break; |
14190 | case 32: size = "YMMWORD" ; break; |
14191 | case 64: size = "ZMMWORD" ; break; |
14192 | default: |
14193 | gcc_unreachable (); |
14194 | } |
14195 | if (size) |
14196 | { |
14197 | fputs (s: size, stream: file); |
14198 | fputs (s: " PTR " , stream: file); |
14199 | } |
14200 | } |
14201 | |
14202 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
14203 | output_operand_lossage ("invalid constraints for operand" ); |
14204 | else |
14205 | ix86_print_operand_address_as |
14206 | (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); |
14207 | } |
14208 | |
14209 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode) |
14210 | { |
14211 | long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
14212 | REAL_MODE_FORMAT (HFmode)); |
14213 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14214 | putc (c: '$', stream: file); |
14215 | fprintf (stream: file, format: "0x%04x" , (unsigned int) l); |
14216 | } |
14217 | |
14218 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) |
14219 | { |
14220 | long l; |
14221 | |
14222 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
14223 | |
14224 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14225 | putc (c: '$', stream: file); |
14226 | /* Sign extend 32bit SFmode immediate to 8 bytes. */ |
14227 | if (code == 'q') |
14228 | fprintf (stream: file, format: "0x%08" HOST_LONG_LONG_FORMAT "x" , |
14229 | (unsigned long long) (int) l); |
14230 | else |
14231 | fprintf (stream: file, format: "0x%08x" , (unsigned int) l); |
14232 | } |
14233 | |
14234 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) |
14235 | { |
14236 | long l[2]; |
14237 | |
14238 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
14239 | |
14240 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14241 | putc (c: '$', stream: file); |
14242 | fprintf (stream: file, format: "0x%lx%08lx" , l[1] & 0xffffffff, l[0] & 0xffffffff); |
14243 | } |
14244 | |
14245 | /* These float cases don't actually occur as immediate operands. */ |
14246 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) |
14247 | { |
14248 | char dstr[30]; |
14249 | |
14250 | real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
14251 | fputs (s: dstr, stream: file); |
14252 | } |
14253 | |
14254 | /* Print bcst_mem_operand. */ |
14255 | else if (GET_CODE (x) == VEC_DUPLICATE) |
14256 | { |
14257 | machine_mode vmode = GET_MODE (x); |
14258 | /* Must be bcst_memory_operand. */ |
14259 | gcc_assert (bcst_mem_operand (x, vmode)); |
14260 | |
14261 | rtx mem = XEXP (x,0); |
14262 | ix86_print_operand (file, x: mem, code: 0); |
14263 | |
14264 | switch (vmode) |
14265 | { |
14266 | case E_V2DImode: |
14267 | case E_V2DFmode: |
14268 | fputs (s: "{1to2}" , stream: file); |
14269 | break; |
14270 | case E_V4SImode: |
14271 | case E_V4SFmode: |
14272 | case E_V4DImode: |
14273 | case E_V4DFmode: |
14274 | fputs (s: "{1to4}" , stream: file); |
14275 | break; |
14276 | case E_V8SImode: |
14277 | case E_V8SFmode: |
14278 | case E_V8DFmode: |
14279 | case E_V8DImode: |
14280 | case E_V8HFmode: |
14281 | fputs (s: "{1to8}" , stream: file); |
14282 | break; |
14283 | case E_V16SFmode: |
14284 | case E_V16SImode: |
14285 | case E_V16HFmode: |
14286 | fputs (s: "{1to16}" , stream: file); |
14287 | break; |
14288 | case E_V32HFmode: |
14289 | fputs (s: "{1to32}" , stream: file); |
14290 | break; |
14291 | default: |
14292 | gcc_unreachable (); |
14293 | } |
14294 | } |
14295 | |
14296 | else |
14297 | { |
14298 | /* We have patterns that allow zero sets of memory, for instance. |
14299 | In 64-bit mode, we should probably support all 8-byte vectors, |
14300 | since we can in fact encode that into an immediate. */ |
14301 | if (GET_CODE (x) == CONST_VECTOR) |
14302 | { |
14303 | if (x != CONST0_RTX (GET_MODE (x))) |
14304 | output_operand_lossage ("invalid vector immediate" ); |
14305 | x = const0_rtx; |
14306 | } |
14307 | |
14308 | if (code == 'P') |
14309 | { |
14310 | if (ix86_force_load_from_GOT_p (x, call_p: true)) |
14311 | { |
14312 | /* For inline assembly statement, load function address |
14313 | from GOT with 'P' operand modifier to avoid PLT. */ |
14314 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
14315 | (TARGET_64BIT |
14316 | ? UNSPEC_GOTPCREL |
14317 | : UNSPEC_GOT)); |
14318 | x = gen_rtx_CONST (Pmode, x); |
14319 | x = gen_const_mem (Pmode, x); |
14320 | ix86_print_operand (file, x, code: 'A'); |
14321 | return; |
14322 | } |
14323 | } |
14324 | else if (code != 'p') |
14325 | { |
14326 | if (CONST_INT_P (x)) |
14327 | { |
14328 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14329 | putc (c: '$', stream: file); |
14330 | } |
14331 | else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF |
14332 | || GET_CODE (x) == LABEL_REF) |
14333 | { |
14334 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14335 | putc (c: '$', stream: file); |
14336 | else |
14337 | fputs (s: "OFFSET FLAT:" , stream: file); |
14338 | } |
14339 | } |
14340 | if (CONST_INT_P (x)) |
14341 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); |
14342 | else if (flag_pic || MACHOPIC_INDIRECT) |
14343 | output_pic_addr_const (file, x, code); |
14344 | else |
14345 | output_addr_const (file, x); |
14346 | } |
14347 | } |
14348 | |
14349 | static bool |
14350 | ix86_print_operand_punct_valid_p (unsigned char code) |
14351 | { |
14352 | return (code == '*' || code == '+' || code == '&' || code == ';' |
14353 | || code == '~' || code == '^' || code == '!'); |
14354 | } |
14355 | |
14356 | /* Print a memory operand whose address is ADDR. */ |
14357 | |
14358 | static void |
14359 | ix86_print_operand_address_as (FILE *file, rtx addr, |
14360 | addr_space_t as, bool raw) |
14361 | { |
14362 | struct ix86_address parts; |
14363 | rtx base, index, disp; |
14364 | int scale; |
14365 | int ok; |
14366 | bool vsib = false; |
14367 | int code = 0; |
14368 | |
14369 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) |
14370 | { |
14371 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
14372 | gcc_assert (parts.index == NULL_RTX); |
14373 | parts.index = XVECEXP (addr, 0, 1); |
14374 | parts.scale = INTVAL (XVECEXP (addr, 0, 2)); |
14375 | addr = XVECEXP (addr, 0, 0); |
14376 | vsib = true; |
14377 | } |
14378 | else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) |
14379 | { |
14380 | gcc_assert (TARGET_64BIT); |
14381 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
14382 | code = 'q'; |
14383 | } |
14384 | else |
14385 | ok = ix86_decompose_address (addr, out: &parts); |
14386 | |
14387 | gcc_assert (ok); |
14388 | |
14389 | base = parts.base; |
14390 | index = parts.index; |
14391 | disp = parts.disp; |
14392 | scale = parts.scale; |
14393 | |
14394 | if (ADDR_SPACE_GENERIC_P (as)) |
14395 | as = parts.seg; |
14396 | else |
14397 | gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); |
14398 | |
14399 | if (!ADDR_SPACE_GENERIC_P (as) && !raw) |
14400 | { |
14401 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14402 | putc (c: '%', stream: file); |
14403 | |
14404 | switch (as) |
14405 | { |
14406 | case ADDR_SPACE_SEG_FS: |
14407 | fputs (s: "fs:" , stream: file); |
14408 | break; |
14409 | case ADDR_SPACE_SEG_GS: |
14410 | fputs (s: "gs:" , stream: file); |
14411 | break; |
14412 | default: |
14413 | gcc_unreachable (); |
14414 | } |
14415 | } |
14416 | |
14417 | /* Use one byte shorter RIP relative addressing for 64bit mode. */ |
14418 | if (TARGET_64BIT && !base && !index && !raw) |
14419 | { |
14420 | rtx symbol = disp; |
14421 | |
14422 | if (GET_CODE (disp) == CONST |
14423 | && GET_CODE (XEXP (disp, 0)) == PLUS |
14424 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
14425 | symbol = XEXP (XEXP (disp, 0), 0); |
14426 | |
14427 | if (GET_CODE (symbol) == LABEL_REF |
14428 | || (GET_CODE (symbol) == SYMBOL_REF |
14429 | && SYMBOL_REF_TLS_MODEL (symbol) == 0)) |
14430 | base = pc_rtx; |
14431 | } |
14432 | |
14433 | if (!base && !index) |
14434 | { |
14435 | /* Displacement only requires special attention. */ |
14436 | if (CONST_INT_P (disp)) |
14437 | { |
14438 | if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) |
14439 | fputs (s: "ds:" , stream: file); |
14440 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); |
14441 | } |
14442 | /* Load the external function address via the GOT slot to avoid PLT. */ |
14443 | else if (GET_CODE (disp) == CONST |
14444 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
14445 | && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL |
14446 | || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) |
14447 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
14448 | output_pic_addr_const (file, x: disp, code: 0); |
14449 | else if (flag_pic) |
14450 | output_pic_addr_const (file, x: disp, code: 0); |
14451 | else |
14452 | output_addr_const (file, disp); |
14453 | } |
14454 | else |
14455 | { |
14456 | /* Print SImode register names to force addr32 prefix. */ |
14457 | if (SImode_address_operand (addr, VOIDmode)) |
14458 | { |
14459 | if (flag_checking) |
14460 | { |
14461 | gcc_assert (TARGET_64BIT); |
14462 | switch (GET_CODE (addr)) |
14463 | { |
14464 | case SUBREG: |
14465 | gcc_assert (GET_MODE (addr) == SImode); |
14466 | gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); |
14467 | break; |
14468 | case ZERO_EXTEND: |
14469 | case AND: |
14470 | gcc_assert (GET_MODE (addr) == DImode); |
14471 | break; |
14472 | default: |
14473 | gcc_unreachable (); |
14474 | } |
14475 | } |
14476 | gcc_assert (!code); |
14477 | code = 'k'; |
14478 | } |
14479 | else if (code == 0 |
14480 | && TARGET_X32 |
14481 | && disp |
14482 | && CONST_INT_P (disp) |
14483 | && INTVAL (disp) < -16*1024*1024) |
14484 | { |
14485 | /* X32 runs in 64-bit mode, where displacement, DISP, in |
14486 | address DISP(%r64), is encoded as 32-bit immediate sign- |
14487 | extended from 32-bit to 64-bit. For -0x40000300(%r64), |
14488 | address is %r64 + 0xffffffffbffffd00. When %r64 < |
14489 | 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, |
14490 | which is invalid for x32. The correct address is %r64 |
14491 | - 0x40000300 == 0xf7ffdd64. To properly encode |
14492 | -0x40000300(%r64) for x32, we zero-extend negative |
14493 | displacement by forcing addr32 prefix which truncates |
14494 | 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should |
14495 | zero-extend all negative displacements, including -1(%rsp). |
14496 | However, for small negative displacements, sign-extension |
14497 | won't cause overflow. We only zero-extend negative |
14498 | displacements if they < -16*1024*1024, which is also used |
14499 | to check legitimate address displacements for PIC. */ |
14500 | code = 'k'; |
14501 | } |
14502 | |
14503 | /* Since the upper 32 bits of RSP are always zero for x32, |
14504 | we can encode %esp as %rsp to avoid 0x67 prefix if |
14505 | there is no index register. */ |
14506 | if (TARGET_X32 && Pmode == SImode |
14507 | && !index && base && REG_P (base) && REGNO (base) == SP_REG) |
14508 | code = 'q'; |
14509 | |
14510 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14511 | { |
14512 | if (disp) |
14513 | { |
14514 | if (flag_pic) |
14515 | output_pic_addr_const (file, x: disp, code: 0); |
14516 | else if (GET_CODE (disp) == LABEL_REF) |
14517 | output_asm_label (disp); |
14518 | else |
14519 | output_addr_const (file, disp); |
14520 | } |
14521 | |
14522 | putc (c: '(', stream: file); |
14523 | if (base) |
14524 | print_reg (x: base, code, file); |
14525 | if (index) |
14526 | { |
14527 | putc (c: ',', stream: file); |
14528 | print_reg (x: index, code: vsib ? 0 : code, file); |
14529 | if (scale != 1 || vsib) |
14530 | fprintf (stream: file, format: ",%d" , scale); |
14531 | } |
14532 | putc (c: ')', stream: file); |
14533 | } |
14534 | else |
14535 | { |
14536 | rtx offset = NULL_RTX; |
14537 | |
14538 | if (disp) |
14539 | { |
14540 | /* Pull out the offset of a symbol; print any symbol itself. */ |
14541 | if (GET_CODE (disp) == CONST |
14542 | && GET_CODE (XEXP (disp, 0)) == PLUS |
14543 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
14544 | { |
14545 | offset = XEXP (XEXP (disp, 0), 1); |
14546 | disp = gen_rtx_CONST (VOIDmode, |
14547 | XEXP (XEXP (disp, 0), 0)); |
14548 | } |
14549 | |
14550 | if (flag_pic) |
14551 | output_pic_addr_const (file, x: disp, code: 0); |
14552 | else if (GET_CODE (disp) == LABEL_REF) |
14553 | output_asm_label (disp); |
14554 | else if (CONST_INT_P (disp)) |
14555 | offset = disp; |
14556 | else |
14557 | output_addr_const (file, disp); |
14558 | } |
14559 | |
14560 | putc (c: '[', stream: file); |
14561 | if (base) |
14562 | { |
14563 | print_reg (x: base, code, file); |
14564 | if (offset) |
14565 | { |
14566 | if (INTVAL (offset) >= 0) |
14567 | putc (c: '+', stream: file); |
14568 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
14569 | } |
14570 | } |
14571 | else if (offset) |
14572 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
14573 | else |
14574 | putc (c: '0', stream: file); |
14575 | |
14576 | if (index) |
14577 | { |
14578 | putc (c: '+', stream: file); |
14579 | print_reg (x: index, code: vsib ? 0 : code, file); |
14580 | if (scale != 1 || vsib) |
14581 | fprintf (stream: file, format: "*%d" , scale); |
14582 | } |
14583 | putc (c: ']', stream: file); |
14584 | } |
14585 | } |
14586 | } |
14587 | |
14588 | static void |
14589 | ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) |
14590 | { |
14591 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
14592 | output_operand_lossage ("invalid constraints for operand" ); |
14593 | else |
14594 | ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false); |
14595 | } |
14596 | |
14597 | /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ |
14598 | |
14599 | static bool |
14600 | (FILE *file, rtx x) |
14601 | { |
14602 | rtx op; |
14603 | |
14604 | if (GET_CODE (x) != UNSPEC) |
14605 | return false; |
14606 | |
14607 | op = XVECEXP (x, 0, 0); |
14608 | switch (XINT (x, 1)) |
14609 | { |
14610 | case UNSPEC_GOTOFF: |
14611 | output_addr_const (file, op); |
14612 | fputs (s: "@gotoff" , stream: file); |
14613 | break; |
14614 | case UNSPEC_GOTTPOFF: |
14615 | output_addr_const (file, op); |
14616 | /* FIXME: This might be @TPOFF in Sun ld. */ |
14617 | fputs (s: "@gottpoff" , stream: file); |
14618 | break; |
14619 | case UNSPEC_TPOFF: |
14620 | output_addr_const (file, op); |
14621 | fputs (s: "@tpoff" , stream: file); |
14622 | break; |
14623 | case UNSPEC_NTPOFF: |
14624 | output_addr_const (file, op); |
14625 | if (TARGET_64BIT) |
14626 | fputs (s: "@tpoff" , stream: file); |
14627 | else |
14628 | fputs (s: "@ntpoff" , stream: file); |
14629 | break; |
14630 | case UNSPEC_DTPOFF: |
14631 | output_addr_const (file, op); |
14632 | fputs (s: "@dtpoff" , stream: file); |
14633 | break; |
14634 | case UNSPEC_GOTNTPOFF: |
14635 | output_addr_const (file, op); |
14636 | if (TARGET_64BIT) |
14637 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
14638 | "@gottpoff(%rip)" : "@gottpoff[rip]" , stream: file); |
14639 | else |
14640 | fputs (s: "@gotntpoff" , stream: file); |
14641 | break; |
14642 | case UNSPEC_INDNTPOFF: |
14643 | output_addr_const (file, op); |
14644 | fputs (s: "@indntpoff" , stream: file); |
14645 | break; |
14646 | #if TARGET_MACHO |
14647 | case UNSPEC_MACHOPIC_OFFSET: |
14648 | output_addr_const (file, op); |
14649 | putc ('-', file); |
14650 | machopic_output_function_base_name (file); |
14651 | break; |
14652 | #endif |
14653 | |
14654 | default: |
14655 | return false; |
14656 | } |
14657 | |
14658 | return true; |
14659 | } |
14660 | |
14661 | |
14662 | /* Output code to perform a 387 binary operation in INSN, one of PLUS, |
14663 | MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] |
14664 | is the expression of the binary operation. The output may either be |
14665 | emitted here, or returned to the caller, like all output_* functions. |
14666 | |
14667 | There is no guarantee that the operands are the same mode, as they |
14668 | might be within FLOAT or FLOAT_EXTEND expressions. */ |
14669 | |
14670 | #ifndef SYSV386_COMPAT |
14671 | /* Set to 1 for compatibility with brain-damaged assemblers. No-one |
14672 | wants to fix the assemblers because that causes incompatibility |
14673 | with gcc. No-one wants to fix gcc because that causes |
14674 | incompatibility with assemblers... You can use the option of |
14675 | -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ |
14676 | #define SYSV386_COMPAT 1 |
14677 | #endif |
14678 | |
14679 | const char * |
14680 | output_387_binary_op (rtx_insn *insn, rtx *operands) |
14681 | { |
14682 | static char buf[40]; |
14683 | const char *p; |
14684 | bool is_sse |
14685 | = (SSE_REG_P (operands[0]) |
14686 | || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); |
14687 | |
14688 | if (is_sse) |
14689 | p = "%v" ; |
14690 | else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
14691 | || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
14692 | p = "fi" ; |
14693 | else |
14694 | p = "f" ; |
14695 | |
14696 | strcpy (dest: buf, src: p); |
14697 | |
14698 | switch (GET_CODE (operands[3])) |
14699 | { |
14700 | case PLUS: |
14701 | p = "add" ; break; |
14702 | case MINUS: |
14703 | p = "sub" ; break; |
14704 | case MULT: |
14705 | p = "mul" ; break; |
14706 | case DIV: |
14707 | p = "div" ; break; |
14708 | default: |
14709 | gcc_unreachable (); |
14710 | } |
14711 | |
14712 | strcat (dest: buf, src: p); |
14713 | |
14714 | if (is_sse) |
14715 | { |
14716 | p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd" ; |
14717 | strcat (dest: buf, src: p); |
14718 | |
14719 | if (TARGET_AVX) |
14720 | p = "\t{%2, %1, %0|%0, %1, %2}" ; |
14721 | else |
14722 | p = "\t{%2, %0|%0, %2}" ; |
14723 | |
14724 | strcat (dest: buf, src: p); |
14725 | return buf; |
14726 | } |
14727 | |
14728 | /* Even if we do not want to check the inputs, this documents input |
14729 | constraints. Which helps in understanding the following code. */ |
14730 | if (flag_checking) |
14731 | { |
14732 | if (STACK_REG_P (operands[0]) |
14733 | && ((REG_P (operands[1]) |
14734 | && REGNO (operands[0]) == REGNO (operands[1]) |
14735 | && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) |
14736 | || (REG_P (operands[2]) |
14737 | && REGNO (operands[0]) == REGNO (operands[2]) |
14738 | && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) |
14739 | && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) |
14740 | ; /* ok */ |
14741 | else |
14742 | gcc_unreachable (); |
14743 | } |
14744 | |
14745 | switch (GET_CODE (operands[3])) |
14746 | { |
14747 | case MULT: |
14748 | case PLUS: |
14749 | if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) |
14750 | std::swap (a&: operands[1], b&: operands[2]); |
14751 | |
14752 | /* know operands[0] == operands[1]. */ |
14753 | |
14754 | if (MEM_P (operands[2])) |
14755 | { |
14756 | p = "%Z2\t%2" ; |
14757 | break; |
14758 | } |
14759 | |
14760 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
14761 | { |
14762 | if (STACK_TOP_P (operands[0])) |
14763 | /* How is it that we are storing to a dead operand[2]? |
14764 | Well, presumably operands[1] is dead too. We can't |
14765 | store the result to st(0) as st(0) gets popped on this |
14766 | instruction. Instead store to operands[2] (which I |
14767 | think has to be st(1)). st(1) will be popped later. |
14768 | gcc <= 2.8.1 didn't have this check and generated |
14769 | assembly code that the Unixware assembler rejected. */ |
14770 | p = "p\t{%0, %2|%2, %0}" ; /* st(1) = st(0) op st(1); pop */ |
14771 | else |
14772 | p = "p\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0); pop */ |
14773 | break; |
14774 | } |
14775 | |
14776 | if (STACK_TOP_P (operands[0])) |
14777 | p = "\t{%y2, %0|%0, %y2}" ; /* st(0) = st(0) op st(r2) */ |
14778 | else |
14779 | p = "\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0) */ |
14780 | break; |
14781 | |
14782 | case MINUS: |
14783 | case DIV: |
14784 | if (MEM_P (operands[1])) |
14785 | { |
14786 | p = "r%Z1\t%1" ; |
14787 | break; |
14788 | } |
14789 | |
14790 | if (MEM_P (operands[2])) |
14791 | { |
14792 | p = "%Z2\t%2" ; |
14793 | break; |
14794 | } |
14795 | |
14796 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
14797 | { |
14798 | #if SYSV386_COMPAT |
14799 | /* The SystemV/386 SVR3.2 assembler, and probably all AT&T |
14800 | derived assemblers, confusingly reverse the direction of |
14801 | the operation for fsub{r} and fdiv{r} when the |
14802 | destination register is not st(0). The Intel assembler |
14803 | doesn't have this brain damage. Read !SYSV386_COMPAT to |
14804 | figure out what the hardware really does. */ |
14805 | if (STACK_TOP_P (operands[0])) |
14806 | p = "{p\t%0, %2|rp\t%2, %0}" ; |
14807 | else |
14808 | p = "{rp\t%2, %0|p\t%0, %2}" ; |
14809 | #else |
14810 | if (STACK_TOP_P (operands[0])) |
14811 | /* As above for fmul/fadd, we can't store to st(0). */ |
14812 | p = "rp\t{%0, %2|%2, %0}" ; /* st(1) = st(0) op st(1); pop */ |
14813 | else |
14814 | p = "p\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0); pop */ |
14815 | #endif |
14816 | break; |
14817 | } |
14818 | |
14819 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
14820 | { |
14821 | #if SYSV386_COMPAT |
14822 | if (STACK_TOP_P (operands[0])) |
14823 | p = "{rp\t%0, %1|p\t%1, %0}" ; |
14824 | else |
14825 | p = "{p\t%1, %0|rp\t%0, %1}" ; |
14826 | #else |
14827 | if (STACK_TOP_P (operands[0])) |
14828 | p = "p\t{%0, %1|%1, %0}" ; /* st(1) = st(1) op st(0); pop */ |
14829 | else |
14830 | p = "rp\t{%1, %0|%0, %1}" ; /* st(r2) = st(0) op st(r2); pop */ |
14831 | #endif |
14832 | break; |
14833 | } |
14834 | |
14835 | if (STACK_TOP_P (operands[0])) |
14836 | { |
14837 | if (STACK_TOP_P (operands[1])) |
14838 | p = "\t{%y2, %0|%0, %y2}" ; /* st(0) = st(0) op st(r2) */ |
14839 | else |
14840 | p = "r\t{%y1, %0|%0, %y1}" ; /* st(0) = st(r1) op st(0) */ |
14841 | break; |
14842 | } |
14843 | else if (STACK_TOP_P (operands[1])) |
14844 | { |
14845 | #if SYSV386_COMPAT |
14846 | p = "{\t%1, %0|r\t%0, %1}" ; |
14847 | #else |
14848 | p = "r\t{%1, %0|%0, %1}" ; /* st(r2) = st(0) op st(r2) */ |
14849 | #endif |
14850 | } |
14851 | else |
14852 | { |
14853 | #if SYSV386_COMPAT |
14854 | p = "{r\t%2, %0|\t%0, %2}" ; |
14855 | #else |
14856 | p = "\t{%2, %0|%0, %2}" ; /* st(r1) = st(r1) op st(0) */ |
14857 | #endif |
14858 | } |
14859 | break; |
14860 | |
14861 | default: |
14862 | gcc_unreachable (); |
14863 | } |
14864 | |
14865 | strcat (dest: buf, src: p); |
14866 | return buf; |
14867 | } |
14868 | |
14869 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
14870 | |
14871 | static int |
14872 | ix86_dirflag_mode_needed (rtx_insn *insn) |
14873 | { |
14874 | if (CALL_P (insn)) |
14875 | { |
14876 | if (cfun->machine->func_type == TYPE_NORMAL) |
14877 | return X86_DIRFLAG_ANY; |
14878 | else |
14879 | /* No need to emit CLD in interrupt handler for TARGET_CLD. */ |
14880 | return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; |
14881 | } |
14882 | |
14883 | if (recog_memoized (insn) < 0) |
14884 | return X86_DIRFLAG_ANY; |
14885 | |
14886 | if (get_attr_type (insn) == TYPE_STR) |
14887 | { |
14888 | /* Emit cld instruction if stringops are used in the function. */ |
14889 | if (cfun->machine->func_type == TYPE_NORMAL) |
14890 | return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; |
14891 | else |
14892 | return X86_DIRFLAG_RESET; |
14893 | } |
14894 | |
14895 | return X86_DIRFLAG_ANY; |
14896 | } |
14897 | |
14898 | /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ |
14899 | |
14900 | static bool |
14901 | ix86_check_avx_upper_register (const_rtx exp) |
14902 | { |
14903 | return (SSE_REG_P (exp) |
14904 | && !EXT_REX_SSE_REG_P (exp) |
14905 | && GET_MODE_BITSIZE (GET_MODE (exp)) > 128); |
14906 | } |
14907 | |
14908 | /* Check if a 256bit or 512bit AVX register is referenced in stores. */ |
14909 | |
14910 | static void |
14911 | ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) |
14912 | { |
14913 | if (ix86_check_avx_upper_register (exp: dest)) |
14914 | { |
14915 | bool *used = (bool *) data; |
14916 | *used = true; |
14917 | } |
14918 | } |
14919 | |
14920 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
14921 | |
14922 | static int |
14923 | ix86_avx_u128_mode_needed (rtx_insn *insn) |
14924 | { |
14925 | if (DEBUG_INSN_P (insn)) |
14926 | return AVX_U128_ANY; |
14927 | |
14928 | if (CALL_P (insn)) |
14929 | { |
14930 | rtx link; |
14931 | |
14932 | /* Needed mode is set to AVX_U128_CLEAN if there are |
14933 | no 256bit or 512bit modes used in function arguments. */ |
14934 | for (link = CALL_INSN_FUNCTION_USAGE (insn); |
14935 | link; |
14936 | link = XEXP (link, 1)) |
14937 | { |
14938 | if (GET_CODE (XEXP (link, 0)) == USE) |
14939 | { |
14940 | rtx arg = XEXP (XEXP (link, 0), 0); |
14941 | |
14942 | if (ix86_check_avx_upper_register (exp: arg)) |
14943 | return AVX_U128_DIRTY; |
14944 | } |
14945 | } |
14946 | |
14947 | /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit |
14948 | nor 512bit registers used in the function return register. */ |
14949 | bool avx_upper_reg_found = false; |
14950 | note_stores (insn, ix86_check_avx_upper_stores, |
14951 | &avx_upper_reg_found); |
14952 | if (avx_upper_reg_found) |
14953 | return AVX_U128_DIRTY; |
14954 | |
14955 | /* If the function is known to preserve some SSE registers, |
14956 | RA and previous passes can legitimately rely on that for |
14957 | modes wider than 256 bits. It's only safe to issue a |
14958 | vzeroupper if all SSE registers are clobbered. */ |
14959 | const function_abi &abi = insn_callee_abi (insn); |
14960 | if (vzeroupper_pattern (PATTERN (insn), VOIDmode) |
14961 | || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS], |
14962 | y: abi.mode_clobbers (V4DImode))) |
14963 | return AVX_U128_ANY; |
14964 | |
14965 | return AVX_U128_CLEAN; |
14966 | } |
14967 | |
14968 | subrtx_iterator::array_type array; |
14969 | |
14970 | rtx set = single_set (insn); |
14971 | if (set) |
14972 | { |
14973 | rtx dest = SET_DEST (set); |
14974 | rtx src = SET_SRC (set); |
14975 | if (ix86_check_avx_upper_register (exp: dest)) |
14976 | { |
14977 | /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the |
14978 | source isn't zero. */ |
14979 | if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1) |
14980 | return AVX_U128_DIRTY; |
14981 | else |
14982 | return AVX_U128_ANY; |
14983 | } |
14984 | else |
14985 | { |
14986 | FOR_EACH_SUBRTX (iter, array, src, NONCONST) |
14987 | if (ix86_check_avx_upper_register (exp: *iter)) |
14988 | return AVX_U128_DIRTY; |
14989 | } |
14990 | |
14991 | /* This isn't YMM/ZMM load/store. */ |
14992 | return AVX_U128_ANY; |
14993 | } |
14994 | |
14995 | /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. |
14996 | Hardware changes state only when a 256bit register is written to, |
14997 | but we need to prevent the compiler from moving optimal insertion |
14998 | point above eventual read from 256bit or 512 bit register. */ |
14999 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) |
15000 | if (ix86_check_avx_upper_register (exp: *iter)) |
15001 | return AVX_U128_DIRTY; |
15002 | |
15003 | return AVX_U128_ANY; |
15004 | } |
15005 | |
15006 | /* Return mode that i387 must be switched into |
15007 | prior to the execution of insn. */ |
15008 | |
15009 | static int |
15010 | ix86_i387_mode_needed (int entity, rtx_insn *insn) |
15011 | { |
15012 | enum attr_i387_cw mode; |
15013 | |
15014 | /* The mode UNINITIALIZED is used to store control word after a |
15015 | function call or ASM pattern. The mode ANY specify that function |
15016 | has no requirements on the control word and make no changes in the |
15017 | bits we are interested in. */ |
15018 | |
15019 | if (CALL_P (insn) |
15020 | || (NONJUMP_INSN_P (insn) |
15021 | && (asm_noperands (PATTERN (insn)) >= 0 |
15022 | || GET_CODE (PATTERN (insn)) == ASM_INPUT))) |
15023 | return I387_CW_UNINITIALIZED; |
15024 | |
15025 | if (recog_memoized (insn) < 0) |
15026 | return I387_CW_ANY; |
15027 | |
15028 | mode = get_attr_i387_cw (insn); |
15029 | |
15030 | switch (entity) |
15031 | { |
15032 | case I387_ROUNDEVEN: |
15033 | if (mode == I387_CW_ROUNDEVEN) |
15034 | return mode; |
15035 | break; |
15036 | |
15037 | case I387_TRUNC: |
15038 | if (mode == I387_CW_TRUNC) |
15039 | return mode; |
15040 | break; |
15041 | |
15042 | case I387_FLOOR: |
15043 | if (mode == I387_CW_FLOOR) |
15044 | return mode; |
15045 | break; |
15046 | |
15047 | case I387_CEIL: |
15048 | if (mode == I387_CW_CEIL) |
15049 | return mode; |
15050 | break; |
15051 | |
15052 | default: |
15053 | gcc_unreachable (); |
15054 | } |
15055 | |
15056 | return I387_CW_ANY; |
15057 | } |
15058 | |
15059 | /* Return mode that entity must be switched into |
15060 | prior to the execution of insn. */ |
15061 | |
15062 | static int |
15063 | ix86_mode_needed (int entity, rtx_insn *insn) |
15064 | { |
15065 | switch (entity) |
15066 | { |
15067 | case X86_DIRFLAG: |
15068 | return ix86_dirflag_mode_needed (insn); |
15069 | case AVX_U128: |
15070 | return ix86_avx_u128_mode_needed (insn); |
15071 | case I387_ROUNDEVEN: |
15072 | case I387_TRUNC: |
15073 | case I387_FLOOR: |
15074 | case I387_CEIL: |
15075 | return ix86_i387_mode_needed (entity, insn); |
15076 | default: |
15077 | gcc_unreachable (); |
15078 | } |
15079 | return 0; |
15080 | } |
15081 | |
15082 | /* Calculate mode of upper 128bit AVX registers after the insn. */ |
15083 | |
15084 | static int |
15085 | ix86_avx_u128_mode_after (int mode, rtx_insn *insn) |
15086 | { |
15087 | rtx pat = PATTERN (insn); |
15088 | |
15089 | if (vzeroupper_pattern (pat, VOIDmode) |
15090 | || vzeroall_pattern (pat, VOIDmode)) |
15091 | return AVX_U128_CLEAN; |
15092 | |
15093 | /* We know that state is clean after CALL insn if there are no |
15094 | 256bit or 512bit registers used in the function return register. */ |
15095 | if (CALL_P (insn)) |
15096 | { |
15097 | bool avx_upper_reg_found = false; |
15098 | note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); |
15099 | |
15100 | return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; |
15101 | } |
15102 | |
15103 | /* Otherwise, return current mode. Remember that if insn |
15104 | references AVX 256bit or 512bit registers, the mode was already |
15105 | changed to DIRTY from MODE_NEEDED. */ |
15106 | return mode; |
15107 | } |
15108 | |
15109 | /* Return the mode that an insn results in. */ |
15110 | |
15111 | static int |
15112 | ix86_mode_after (int entity, int mode, rtx_insn *insn) |
15113 | { |
15114 | switch (entity) |
15115 | { |
15116 | case X86_DIRFLAG: |
15117 | return mode; |
15118 | case AVX_U128: |
15119 | return ix86_avx_u128_mode_after (mode, insn); |
15120 | case I387_ROUNDEVEN: |
15121 | case I387_TRUNC: |
15122 | case I387_FLOOR: |
15123 | case I387_CEIL: |
15124 | return mode; |
15125 | default: |
15126 | gcc_unreachable (); |
15127 | } |
15128 | } |
15129 | |
15130 | static int |
15131 | ix86_dirflag_mode_entry (void) |
15132 | { |
15133 | /* For TARGET_CLD or in the interrupt handler we can't assume |
15134 | direction flag state at function entry. */ |
15135 | if (TARGET_CLD |
15136 | || cfun->machine->func_type != TYPE_NORMAL) |
15137 | return X86_DIRFLAG_ANY; |
15138 | |
15139 | return X86_DIRFLAG_RESET; |
15140 | } |
15141 | |
15142 | static int |
15143 | ix86_avx_u128_mode_entry (void) |
15144 | { |
15145 | tree arg; |
15146 | |
15147 | /* Entry mode is set to AVX_U128_DIRTY if there are |
15148 | 256bit or 512bit modes used in function arguments. */ |
15149 | for (arg = DECL_ARGUMENTS (current_function_decl); arg; |
15150 | arg = TREE_CHAIN (arg)) |
15151 | { |
15152 | rtx incoming = DECL_INCOMING_RTL (arg); |
15153 | |
15154 | if (incoming && ix86_check_avx_upper_register (exp: incoming)) |
15155 | return AVX_U128_DIRTY; |
15156 | } |
15157 | |
15158 | return AVX_U128_CLEAN; |
15159 | } |
15160 | |
15161 | /* Return a mode that ENTITY is assumed to be |
15162 | switched to at function entry. */ |
15163 | |
15164 | static int |
15165 | ix86_mode_entry (int entity) |
15166 | { |
15167 | switch (entity) |
15168 | { |
15169 | case X86_DIRFLAG: |
15170 | return ix86_dirflag_mode_entry (); |
15171 | case AVX_U128: |
15172 | return ix86_avx_u128_mode_entry (); |
15173 | case I387_ROUNDEVEN: |
15174 | case I387_TRUNC: |
15175 | case I387_FLOOR: |
15176 | case I387_CEIL: |
15177 | return I387_CW_ANY; |
15178 | default: |
15179 | gcc_unreachable (); |
15180 | } |
15181 | } |
15182 | |
15183 | static int |
15184 | ix86_avx_u128_mode_exit (void) |
15185 | { |
15186 | rtx reg = crtl->return_rtx; |
15187 | |
15188 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit |
15189 | or 512 bit modes used in the function return register. */ |
15190 | if (reg && ix86_check_avx_upper_register (exp: reg)) |
15191 | return AVX_U128_DIRTY; |
15192 | |
15193 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit |
15194 | modes used in function arguments, otherwise return AVX_U128_CLEAN. |
15195 | */ |
15196 | return ix86_avx_u128_mode_entry (); |
15197 | } |
15198 | |
15199 | /* Return a mode that ENTITY is assumed to be |
15200 | switched to at function exit. */ |
15201 | |
15202 | static int |
15203 | ix86_mode_exit (int entity) |
15204 | { |
15205 | switch (entity) |
15206 | { |
15207 | case X86_DIRFLAG: |
15208 | return X86_DIRFLAG_ANY; |
15209 | case AVX_U128: |
15210 | return ix86_avx_u128_mode_exit (); |
15211 | case I387_ROUNDEVEN: |
15212 | case I387_TRUNC: |
15213 | case I387_FLOOR: |
15214 | case I387_CEIL: |
15215 | return I387_CW_ANY; |
15216 | default: |
15217 | gcc_unreachable (); |
15218 | } |
15219 | } |
15220 | |
15221 | static int |
15222 | ix86_mode_priority (int, int n) |
15223 | { |
15224 | return n; |
15225 | } |
15226 | |
15227 | /* Output code to initialize control word copies used by trunc?f?i and |
15228 | rounding patterns. CURRENT_MODE is set to current control word, |
15229 | while NEW_MODE is set to new control word. */ |
15230 | |
15231 | static void |
15232 | emit_i387_cw_initialization (int mode) |
15233 | { |
15234 | rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); |
15235 | rtx new_mode; |
15236 | |
15237 | enum ix86_stack_slot slot; |
15238 | |
15239 | rtx reg = gen_reg_rtx (HImode); |
15240 | |
15241 | emit_insn (gen_x86_fnstcw_1 (stored_mode)); |
15242 | emit_move_insn (reg, copy_rtx (stored_mode)); |
15243 | |
15244 | switch (mode) |
15245 | { |
15246 | case I387_CW_ROUNDEVEN: |
15247 | /* round to nearest */ |
15248 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15249 | slot = SLOT_CW_ROUNDEVEN; |
15250 | break; |
15251 | |
15252 | case I387_CW_TRUNC: |
15253 | /* round toward zero (truncate) */ |
15254 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); |
15255 | slot = SLOT_CW_TRUNC; |
15256 | break; |
15257 | |
15258 | case I387_CW_FLOOR: |
15259 | /* round down toward -oo */ |
15260 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15261 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); |
15262 | slot = SLOT_CW_FLOOR; |
15263 | break; |
15264 | |
15265 | case I387_CW_CEIL: |
15266 | /* round up toward +oo */ |
15267 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15268 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); |
15269 | slot = SLOT_CW_CEIL; |
15270 | break; |
15271 | |
15272 | default: |
15273 | gcc_unreachable (); |
15274 | } |
15275 | |
15276 | gcc_assert (slot < MAX_386_STACK_LOCALS); |
15277 | |
15278 | new_mode = assign_386_stack_local (HImode, slot); |
15279 | emit_move_insn (new_mode, reg); |
15280 | } |
15281 | |
15282 | /* Generate one or more insns to set ENTITY to MODE. */ |
15283 | |
15284 | static void |
15285 | ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, |
15286 | HARD_REG_SET regs_live ATTRIBUTE_UNUSED) |
15287 | { |
15288 | switch (entity) |
15289 | { |
15290 | case X86_DIRFLAG: |
15291 | if (mode == X86_DIRFLAG_RESET) |
15292 | emit_insn (gen_cld ()); |
15293 | break; |
15294 | case AVX_U128: |
15295 | if (mode == AVX_U128_CLEAN) |
15296 | ix86_expand_avx_vzeroupper (); |
15297 | break; |
15298 | case I387_ROUNDEVEN: |
15299 | case I387_TRUNC: |
15300 | case I387_FLOOR: |
15301 | case I387_CEIL: |
15302 | if (mode != I387_CW_ANY |
15303 | && mode != I387_CW_UNINITIALIZED) |
15304 | emit_i387_cw_initialization (mode); |
15305 | break; |
15306 | default: |
15307 | gcc_unreachable (); |
15308 | } |
15309 | } |
15310 | |
15311 | /* Output code for INSN to convert a float to a signed int. OPERANDS |
15312 | are the insn operands. The output may be [HSD]Imode and the input |
15313 | operand may be [SDX]Fmode. */ |
15314 | |
15315 | const char * |
15316 | output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) |
15317 | { |
15318 | bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
15319 | bool dimode_p = GET_MODE (operands[0]) == DImode; |
15320 | int round_mode = get_attr_i387_cw (insn); |
15321 | |
15322 | static char buf[40]; |
15323 | const char *p; |
15324 | |
15325 | /* Jump through a hoop or two for DImode, since the hardware has no |
15326 | non-popping instruction. We used to do this a different way, but |
15327 | that was somewhat fragile and broke with post-reload splitters. */ |
15328 | if ((dimode_p || fisttp) && !stack_top_dies) |
15329 | output_asm_insn ("fld\t%y1" , operands); |
15330 | |
15331 | gcc_assert (STACK_TOP_P (operands[1])); |
15332 | gcc_assert (MEM_P (operands[0])); |
15333 | gcc_assert (GET_MODE (operands[1]) != TFmode); |
15334 | |
15335 | if (fisttp) |
15336 | return "fisttp%Z0\t%0" ; |
15337 | |
15338 | strcpy (dest: buf, src: "fist" ); |
15339 | |
15340 | if (round_mode != I387_CW_ANY) |
15341 | output_asm_insn ("fldcw\t%3" , operands); |
15342 | |
15343 | p = "p%Z0\t%0" ; |
15344 | strcat (dest: buf, src: p + !(stack_top_dies || dimode_p)); |
15345 | |
15346 | output_asm_insn (buf, operands); |
15347 | |
15348 | if (round_mode != I387_CW_ANY) |
15349 | output_asm_insn ("fldcw\t%2" , operands); |
15350 | |
15351 | return "" ; |
15352 | } |
15353 | |
15354 | /* Output code for x87 ffreep insn. The OPNO argument, which may only |
15355 | have the values zero or one, indicates the ffreep insn's operand |
15356 | from the OPERANDS array. */ |
15357 | |
15358 | static const char * |
15359 | output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) |
15360 | { |
15361 | if (TARGET_USE_FFREEP) |
15362 | #ifdef HAVE_AS_IX86_FFREEP |
15363 | return opno ? "ffreep\t%y1" : "ffreep\t%y0" ; |
15364 | #else |
15365 | { |
15366 | static char retval[32]; |
15367 | int regno = REGNO (operands[opno]); |
15368 | |
15369 | gcc_assert (STACK_REGNO_P (regno)); |
15370 | |
15371 | regno -= FIRST_STACK_REG; |
15372 | |
15373 | snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf" , regno); |
15374 | return retval; |
15375 | } |
15376 | #endif |
15377 | |
15378 | return opno ? "fstp\t%y1" : "fstp\t%y0" ; |
15379 | } |
15380 | |
15381 | |
15382 | /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi |
15383 | should be used. UNORDERED_P is true when fucom should be used. */ |
15384 | |
15385 | const char * |
15386 | output_fp_compare (rtx_insn *insn, rtx *operands, |
15387 | bool eflags_p, bool unordered_p) |
15388 | { |
15389 | rtx *xops = eflags_p ? &operands[0] : &operands[1]; |
15390 | bool stack_top_dies; |
15391 | |
15392 | static char buf[40]; |
15393 | const char *p; |
15394 | |
15395 | gcc_assert (STACK_TOP_P (xops[0])); |
15396 | |
15397 | stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
15398 | |
15399 | if (eflags_p) |
15400 | { |
15401 | p = unordered_p ? "fucomi" : "fcomi" ; |
15402 | strcpy (dest: buf, src: p); |
15403 | |
15404 | p = "p\t{%y1, %0|%0, %y1}" ; |
15405 | strcat (dest: buf, src: p + !stack_top_dies); |
15406 | |
15407 | return buf; |
15408 | } |
15409 | |
15410 | if (STACK_REG_P (xops[1]) |
15411 | && stack_top_dies |
15412 | && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) |
15413 | { |
15414 | gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); |
15415 | |
15416 | /* If both the top of the 387 stack die, and the other operand |
15417 | is also a stack register that dies, then this must be a |
15418 | `fcompp' float compare. */ |
15419 | p = unordered_p ? "fucompp" : "fcompp" ; |
15420 | strcpy (dest: buf, src: p); |
15421 | } |
15422 | else if (const0_operand (xops[1], VOIDmode)) |
15423 | { |
15424 | gcc_assert (!unordered_p); |
15425 | strcpy (dest: buf, src: "ftst" ); |
15426 | } |
15427 | else |
15428 | { |
15429 | if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) |
15430 | { |
15431 | gcc_assert (!unordered_p); |
15432 | p = "ficom" ; |
15433 | } |
15434 | else |
15435 | p = unordered_p ? "fucom" : "fcom" ; |
15436 | |
15437 | strcpy (dest: buf, src: p); |
15438 | |
15439 | p = "p%Z2\t%y2" ; |
15440 | strcat (dest: buf, src: p + !stack_top_dies); |
15441 | } |
15442 | |
15443 | output_asm_insn (buf, operands); |
15444 | return "fnstsw\t%0" ; |
15445 | } |
15446 | |
15447 | void |
15448 | ix86_output_addr_vec_elt (FILE *file, int value) |
15449 | { |
15450 | const char *directive = ASM_LONG; |
15451 | |
15452 | #ifdef ASM_QUAD |
15453 | if (TARGET_LP64) |
15454 | directive = ASM_QUAD; |
15455 | #else |
15456 | gcc_assert (!TARGET_64BIT); |
15457 | #endif |
15458 | |
15459 | fprintf (stream: file, format: "%s%s%d\n" , directive, LPREFIX, value); |
15460 | } |
15461 | |
15462 | void |
15463 | ix86_output_addr_diff_elt (FILE *file, int value, int rel) |
15464 | { |
15465 | const char *directive = ASM_LONG; |
15466 | |
15467 | #ifdef ASM_QUAD |
15468 | if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) |
15469 | directive = ASM_QUAD; |
15470 | #else |
15471 | gcc_assert (!TARGET_64BIT); |
15472 | #endif |
15473 | /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ |
15474 | if (TARGET_64BIT || TARGET_VXWORKS_RTP) |
15475 | fprintf (stream: file, format: "%s%s%d-%s%d\n" , |
15476 | directive, LPREFIX, value, LPREFIX, rel); |
15477 | #if TARGET_MACHO |
15478 | else if (TARGET_MACHO) |
15479 | { |
15480 | fprintf (file, ASM_LONG "%s%d-" , LPREFIX, value); |
15481 | machopic_output_function_base_name (file); |
15482 | putc ('\n', file); |
15483 | } |
15484 | #endif |
15485 | else if (HAVE_AS_GOTOFF_IN_DATA) |
15486 | fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n" , LPREFIX, value); |
15487 | else |
15488 | asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n" , |
15489 | GOT_SYMBOL_NAME, LPREFIX, value); |
15490 | } |
15491 | |
15492 | #define LEA_MAX_STALL (3) |
15493 | #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) |
15494 | |
15495 | /* Increase given DISTANCE in half-cycles according to |
15496 | dependencies between PREV and NEXT instructions. |
15497 | Add 1 half-cycle if there is no dependency and |
15498 | go to next cycle if there is some dependecy. */ |
15499 | |
15500 | static unsigned int |
15501 | increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) |
15502 | { |
15503 | df_ref def, use; |
15504 | |
15505 | if (!prev || !next) |
15506 | return distance + (distance & 1) + 2; |
15507 | |
15508 | if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) |
15509 | return distance + 1; |
15510 | |
15511 | FOR_EACH_INSN_USE (use, next) |
15512 | FOR_EACH_INSN_DEF (def, prev) |
15513 | if (!DF_REF_IS_ARTIFICIAL (def) |
15514 | && DF_REF_REGNO (use) == DF_REF_REGNO (def)) |
15515 | return distance + (distance & 1) + 2; |
15516 | |
15517 | return distance + 1; |
15518 | } |
15519 | |
15520 | /* Function checks if instruction INSN defines register number |
15521 | REGNO1 or REGNO2. */ |
15522 | |
15523 | bool |
15524 | insn_defines_reg (unsigned int regno1, unsigned int regno2, |
15525 | rtx_insn *insn) |
15526 | { |
15527 | df_ref def; |
15528 | |
15529 | FOR_EACH_INSN_DEF (def, insn) |
15530 | if (DF_REF_REG_DEF_P (def) |
15531 | && !DF_REF_IS_ARTIFICIAL (def) |
15532 | && (regno1 == DF_REF_REGNO (def) |
15533 | || regno2 == DF_REF_REGNO (def))) |
15534 | return true; |
15535 | |
15536 | return false; |
15537 | } |
15538 | |
15539 | /* Function checks if instruction INSN uses register number |
15540 | REGNO as a part of address expression. */ |
15541 | |
15542 | static bool |
15543 | insn_uses_reg_mem (unsigned int regno, rtx insn) |
15544 | { |
15545 | df_ref use; |
15546 | |
15547 | FOR_EACH_INSN_USE (use, insn) |
15548 | if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) |
15549 | return true; |
15550 | |
15551 | return false; |
15552 | } |
15553 | |
15554 | /* Search backward for non-agu definition of register number REGNO1 |
15555 | or register number REGNO2 in basic block starting from instruction |
15556 | START up to head of basic block or instruction INSN. |
15557 | |
15558 | Function puts true value into *FOUND var if definition was found |
15559 | and false otherwise. |
15560 | |
15561 | Distance in half-cycles between START and found instruction or head |
15562 | of BB is added to DISTANCE and returned. */ |
15563 | |
15564 | static int |
15565 | distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, |
15566 | rtx_insn *insn, int distance, |
15567 | rtx_insn *start, bool *found) |
15568 | { |
15569 | basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL; |
15570 | rtx_insn *prev = start; |
15571 | rtx_insn *next = NULL; |
15572 | |
15573 | *found = false; |
15574 | |
15575 | while (prev |
15576 | && prev != insn |
15577 | && distance < LEA_SEARCH_THRESHOLD) |
15578 | { |
15579 | if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) |
15580 | { |
15581 | distance = increase_distance (prev, next, distance); |
15582 | if (insn_defines_reg (regno1, regno2, insn: prev)) |
15583 | { |
15584 | if (recog_memoized (insn: prev) < 0 |
15585 | || get_attr_type (prev) != TYPE_LEA) |
15586 | { |
15587 | *found = true; |
15588 | return distance; |
15589 | } |
15590 | } |
15591 | |
15592 | next = prev; |
15593 | } |
15594 | if (prev == BB_HEAD (bb)) |
15595 | break; |
15596 | |
15597 | prev = PREV_INSN (insn: prev); |
15598 | } |
15599 | |
15600 | return distance; |
15601 | } |
15602 | |
15603 | /* Search backward for non-agu definition of register number REGNO1 |
15604 | or register number REGNO2 in INSN's basic block until |
15605 | 1. Pass LEA_SEARCH_THRESHOLD instructions, or |
15606 | 2. Reach neighbor BBs boundary, or |
15607 | 3. Reach agu definition. |
15608 | Returns the distance between the non-agu definition point and INSN. |
15609 | If no definition point, returns -1. */ |
15610 | |
15611 | static int |
15612 | distance_non_agu_define (unsigned int regno1, unsigned int regno2, |
15613 | rtx_insn *insn) |
15614 | { |
15615 | basic_block bb = BLOCK_FOR_INSN (insn); |
15616 | int distance = 0; |
15617 | bool found = false; |
15618 | |
15619 | if (insn != BB_HEAD (bb)) |
15620 | distance = distance_non_agu_define_in_bb (regno1, regno2, insn, |
15621 | distance, start: PREV_INSN (insn), |
15622 | found: &found); |
15623 | |
15624 | if (!found && distance < LEA_SEARCH_THRESHOLD) |
15625 | { |
15626 | edge e; |
15627 | edge_iterator ei; |
15628 | bool simple_loop = false; |
15629 | |
15630 | FOR_EACH_EDGE (e, ei, bb->preds) |
15631 | if (e->src == bb) |
15632 | { |
15633 | simple_loop = true; |
15634 | break; |
15635 | } |
15636 | |
15637 | if (simple_loop) |
15638 | distance = distance_non_agu_define_in_bb (regno1, regno2, |
15639 | insn, distance, |
15640 | BB_END (bb), found: &found); |
15641 | else |
15642 | { |
15643 | int shortest_dist = -1; |
15644 | bool found_in_bb = false; |
15645 | |
15646 | FOR_EACH_EDGE (e, ei, bb->preds) |
15647 | { |
15648 | int bb_dist |
15649 | = distance_non_agu_define_in_bb (regno1, regno2, |
15650 | insn, distance, |
15651 | BB_END (e->src), |
15652 | found: &found_in_bb); |
15653 | if (found_in_bb) |
15654 | { |
15655 | if (shortest_dist < 0) |
15656 | shortest_dist = bb_dist; |
15657 | else if (bb_dist > 0) |
15658 | shortest_dist = MIN (bb_dist, shortest_dist); |
15659 | |
15660 | found = true; |
15661 | } |
15662 | } |
15663 | |
15664 | distance = shortest_dist; |
15665 | } |
15666 | } |
15667 | |
15668 | if (!found) |
15669 | return -1; |
15670 | |
15671 | return distance >> 1; |
15672 | } |
15673 | |
15674 | /* Return the distance in half-cycles between INSN and the next |
15675 | insn that uses register number REGNO in memory address added |
15676 | to DISTANCE. Return -1 if REGNO0 is set. |
15677 | |
15678 | Put true value into *FOUND if register usage was found and |
15679 | false otherwise. |
15680 | Put true value into *REDEFINED if register redefinition was |
15681 | found and false otherwise. */ |
15682 | |
15683 | static int |
15684 | distance_agu_use_in_bb (unsigned int regno, |
15685 | rtx_insn *insn, int distance, rtx_insn *start, |
15686 | bool *found, bool *redefined) |
15687 | { |
15688 | basic_block bb = NULL; |
15689 | rtx_insn *next = start; |
15690 | rtx_insn *prev = NULL; |
15691 | |
15692 | *found = false; |
15693 | *redefined = false; |
15694 | |
15695 | if (start != NULL_RTX) |
15696 | { |
15697 | bb = BLOCK_FOR_INSN (insn: start); |
15698 | if (start != BB_HEAD (bb)) |
15699 | /* If insn and start belong to the same bb, set prev to insn, |
15700 | so the call to increase_distance will increase the distance |
15701 | between insns by 1. */ |
15702 | prev = insn; |
15703 | } |
15704 | |
15705 | while (next |
15706 | && next != insn |
15707 | && distance < LEA_SEARCH_THRESHOLD) |
15708 | { |
15709 | if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) |
15710 | { |
15711 | distance = increase_distance(prev, next, distance); |
15712 | if (insn_uses_reg_mem (regno, insn: next)) |
15713 | { |
15714 | /* Return DISTANCE if OP0 is used in memory |
15715 | address in NEXT. */ |
15716 | *found = true; |
15717 | return distance; |
15718 | } |
15719 | |
15720 | if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next)) |
15721 | { |
15722 | /* Return -1 if OP0 is set in NEXT. */ |
15723 | *redefined = true; |
15724 | return -1; |
15725 | } |
15726 | |
15727 | prev = next; |
15728 | } |
15729 | |
15730 | if (next == BB_END (bb)) |
15731 | break; |
15732 | |
15733 | next = NEXT_INSN (insn: next); |
15734 | } |
15735 | |
15736 | return distance; |
15737 | } |
15738 | |
15739 | /* Return the distance between INSN and the next insn that uses |
15740 | register number REGNO0 in memory address. Return -1 if no such |
15741 | a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ |
15742 | |
15743 | static int |
15744 | distance_agu_use (unsigned int regno0, rtx_insn *insn) |
15745 | { |
15746 | basic_block bb = BLOCK_FOR_INSN (insn); |
15747 | int distance = 0; |
15748 | bool found = false; |
15749 | bool redefined = false; |
15750 | |
15751 | if (insn != BB_END (bb)) |
15752 | distance = distance_agu_use_in_bb (regno: regno0, insn, distance, |
15753 | start: NEXT_INSN (insn), |
15754 | found: &found, redefined: &redefined); |
15755 | |
15756 | if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) |
15757 | { |
15758 | edge e; |
15759 | edge_iterator ei; |
15760 | bool simple_loop = false; |
15761 | |
15762 | FOR_EACH_EDGE (e, ei, bb->succs) |
15763 | if (e->dest == bb) |
15764 | { |
15765 | simple_loop = true; |
15766 | break; |
15767 | } |
15768 | |
15769 | if (simple_loop) |
15770 | distance = distance_agu_use_in_bb (regno: regno0, insn, |
15771 | distance, BB_HEAD (bb), |
15772 | found: &found, redefined: &redefined); |
15773 | else |
15774 | { |
15775 | int shortest_dist = -1; |
15776 | bool found_in_bb = false; |
15777 | bool redefined_in_bb = false; |
15778 | |
15779 | FOR_EACH_EDGE (e, ei, bb->succs) |
15780 | { |
15781 | int bb_dist |
15782 | = distance_agu_use_in_bb (regno: regno0, insn, |
15783 | distance, BB_HEAD (e->dest), |
15784 | found: &found_in_bb, redefined: &redefined_in_bb); |
15785 | if (found_in_bb) |
15786 | { |
15787 | if (shortest_dist < 0) |
15788 | shortest_dist = bb_dist; |
15789 | else if (bb_dist > 0) |
15790 | shortest_dist = MIN (bb_dist, shortest_dist); |
15791 | |
15792 | found = true; |
15793 | } |
15794 | } |
15795 | |
15796 | distance = shortest_dist; |
15797 | } |
15798 | } |
15799 | |
15800 | if (!found || redefined) |
15801 | return -1; |
15802 | |
15803 | return distance >> 1; |
15804 | } |
15805 | |
15806 | /* Define this macro to tune LEA priority vs ADD, it take effect when |
15807 | there is a dilemma of choosing LEA or ADD |
15808 | Negative value: ADD is more preferred than LEA |
15809 | Zero: Neutral |
15810 | Positive value: LEA is more preferred than ADD. */ |
15811 | #define IX86_LEA_PRIORITY 0 |
15812 | |
15813 | /* Return true if usage of lea INSN has performance advantage |
15814 | over a sequence of instructions. Instructions sequence has |
15815 | SPLIT_COST cycles higher latency than lea latency. */ |
15816 | |
15817 | static bool |
15818 | ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, |
15819 | unsigned int regno2, int split_cost, bool has_scale) |
15820 | { |
15821 | int dist_define, dist_use; |
15822 | |
15823 | /* For Atom processors newer than Bonnell, if using a 2-source or |
15824 | 3-source LEA for non-destructive destination purposes, or due to |
15825 | wanting ability to use SCALE, the use of LEA is justified. */ |
15826 | if (!TARGET_CPU_P (BONNELL)) |
15827 | { |
15828 | if (has_scale) |
15829 | return true; |
15830 | if (split_cost < 1) |
15831 | return false; |
15832 | if (regno0 == regno1 || regno0 == regno2) |
15833 | return false; |
15834 | return true; |
15835 | } |
15836 | |
15837 | /* Remember recog_data content. */ |
15838 | struct recog_data_d recog_data_save = recog_data; |
15839 | |
15840 | dist_define = distance_non_agu_define (regno1, regno2, insn); |
15841 | dist_use = distance_agu_use (regno0, insn); |
15842 | |
15843 | /* distance_non_agu_define can call get_attr_type which can call |
15844 | recog_memoized, restore recog_data back to previous content. */ |
15845 | recog_data = recog_data_save; |
15846 | |
15847 | if (dist_define < 0 || dist_define >= LEA_MAX_STALL) |
15848 | { |
15849 | /* If there is no non AGU operand definition, no AGU |
15850 | operand usage and split cost is 0 then both lea |
15851 | and non lea variants have same priority. Currently |
15852 | we prefer lea for 64 bit code and non lea on 32 bit |
15853 | code. */ |
15854 | if (dist_use < 0 && split_cost == 0) |
15855 | return TARGET_64BIT || IX86_LEA_PRIORITY; |
15856 | else |
15857 | return true; |
15858 | } |
15859 | |
15860 | /* With longer definitions distance lea is more preferable. |
15861 | Here we change it to take into account splitting cost and |
15862 | lea priority. */ |
15863 | dist_define += split_cost + IX86_LEA_PRIORITY; |
15864 | |
15865 | /* If there is no use in memory addess then we just check |
15866 | that split cost exceeds AGU stall. */ |
15867 | if (dist_use < 0) |
15868 | return dist_define > LEA_MAX_STALL; |
15869 | |
15870 | /* If this insn has both backward non-agu dependence and forward |
15871 | agu dependence, the one with short distance takes effect. */ |
15872 | return dist_define >= dist_use; |
15873 | } |
15874 | |
15875 | /* Return true if we need to split op0 = op1 + op2 into a sequence of |
15876 | move and add to avoid AGU stalls. */ |
15877 | |
15878 | bool |
15879 | ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) |
15880 | { |
15881 | unsigned int regno0, regno1, regno2; |
15882 | |
15883 | /* Check if we need to optimize. */ |
15884 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
15885 | return false; |
15886 | |
15887 | regno0 = true_regnum (operands[0]); |
15888 | regno1 = true_regnum (operands[1]); |
15889 | regno2 = true_regnum (operands[2]); |
15890 | |
15891 | /* We need to split only adds with non destructive |
15892 | destination operand. */ |
15893 | if (regno0 == regno1 || regno0 == regno2) |
15894 | return false; |
15895 | else |
15896 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false); |
15897 | } |
15898 | |
15899 | /* Return true if we should emit lea instruction instead of mov |
15900 | instruction. */ |
15901 | |
15902 | bool |
15903 | ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) |
15904 | { |
15905 | unsigned int regno0, regno1; |
15906 | |
15907 | /* Check if we need to optimize. */ |
15908 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
15909 | return false; |
15910 | |
15911 | /* Use lea for reg to reg moves only. */ |
15912 | if (!REG_P (operands[0]) || !REG_P (operands[1])) |
15913 | return false; |
15914 | |
15915 | regno0 = true_regnum (operands[0]); |
15916 | regno1 = true_regnum (operands[1]); |
15917 | |
15918 | return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false); |
15919 | } |
15920 | |
15921 | /* Return true if we need to split lea into a sequence of |
15922 | instructions to avoid AGU stalls during peephole2. */ |
15923 | |
15924 | bool |
15925 | ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) |
15926 | { |
15927 | unsigned int regno0, regno1, regno2; |
15928 | int split_cost; |
15929 | struct ix86_address parts; |
15930 | int ok; |
15931 | |
15932 | /* The "at least two components" test below might not catch simple |
15933 | move or zero extension insns if parts.base is non-NULL and parts.disp |
15934 | is const0_rtx as the only components in the address, e.g. if the |
15935 | register is %rbp or %r13. As this test is much cheaper and moves or |
15936 | zero extensions are the common case, do this check first. */ |
15937 | if (REG_P (operands[1]) |
15938 | || (SImode_address_operand (operands[1], VOIDmode) |
15939 | && REG_P (XEXP (operands[1], 0)))) |
15940 | return false; |
15941 | |
15942 | ok = ix86_decompose_address (addr: operands[1], out: &parts); |
15943 | gcc_assert (ok); |
15944 | |
15945 | /* There should be at least two components in the address. */ |
15946 | if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) |
15947 | + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) |
15948 | return false; |
15949 | |
15950 | /* We should not split into add if non legitimate pic |
15951 | operand is used as displacement. */ |
15952 | if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) |
15953 | return false; |
15954 | |
15955 | regno0 = true_regnum (operands[0]) ; |
15956 | regno1 = INVALID_REGNUM; |
15957 | regno2 = INVALID_REGNUM; |
15958 | |
15959 | if (parts.base) |
15960 | regno1 = true_regnum (parts.base); |
15961 | if (parts.index) |
15962 | regno2 = true_regnum (parts.index); |
15963 | |
15964 | /* Use add for a = a + b and a = b + a since it is faster and shorter |
15965 | than lea for most processors. For the processors like BONNELL, if |
15966 | the destination register of LEA holds an actual address which will |
15967 | be used soon, LEA is better and otherwise ADD is better. */ |
15968 | if (!TARGET_CPU_P (BONNELL) |
15969 | && parts.scale == 1 |
15970 | && (!parts.disp || parts.disp == const0_rtx) |
15971 | && (regno0 == regno1 || regno0 == regno2)) |
15972 | return true; |
15973 | |
15974 | /* Split with -Oz if the encoding requires fewer bytes. */ |
15975 | if (optimize_size > 1 |
15976 | && parts.scale > 1 |
15977 | && !parts.base |
15978 | && (!parts.disp || parts.disp == const0_rtx)) |
15979 | return true; |
15980 | |
15981 | /* Check we need to optimize. */ |
15982 | if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) |
15983 | return false; |
15984 | |
15985 | split_cost = 0; |
15986 | |
15987 | /* Compute how many cycles we will add to execution time |
15988 | if split lea into a sequence of instructions. */ |
15989 | if (parts.base || parts.index) |
15990 | { |
15991 | /* Have to use mov instruction if non desctructive |
15992 | destination form is used. */ |
15993 | if (regno1 != regno0 && regno2 != regno0) |
15994 | split_cost += 1; |
15995 | |
15996 | /* Have to add index to base if both exist. */ |
15997 | if (parts.base && parts.index) |
15998 | split_cost += 1; |
15999 | |
16000 | /* Have to use shift and adds if scale is 2 or greater. */ |
16001 | if (parts.scale > 1) |
16002 | { |
16003 | if (regno0 != regno1) |
16004 | split_cost += 1; |
16005 | else if (regno2 == regno0) |
16006 | split_cost += 4; |
16007 | else |
16008 | split_cost += parts.scale; |
16009 | } |
16010 | |
16011 | /* Have to use add instruction with immediate if |
16012 | disp is non zero. */ |
16013 | if (parts.disp && parts.disp != const0_rtx) |
16014 | split_cost += 1; |
16015 | |
16016 | /* Subtract the price of lea. */ |
16017 | split_cost -= 1; |
16018 | } |
16019 | |
16020 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, |
16021 | has_scale: parts.scale > 1); |
16022 | } |
16023 | |
16024 | /* Return true if it is ok to optimize an ADD operation to LEA |
16025 | operation to avoid flag register consumation. For most processors, |
16026 | ADD is faster than LEA. For the processors like BONNELL, if the |
16027 | destination register of LEA holds an actual address which will be |
16028 | used soon, LEA is better and otherwise ADD is better. */ |
16029 | |
16030 | bool |
16031 | ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) |
16032 | { |
16033 | unsigned int regno0 = true_regnum (operands[0]); |
16034 | unsigned int regno1 = true_regnum (operands[1]); |
16035 | unsigned int regno2 = true_regnum (operands[2]); |
16036 | |
16037 | /* If a = b + c, (a!=b && a!=c), must use lea form. */ |
16038 | if (regno0 != regno1 && regno0 != regno2) |
16039 | return true; |
16040 | |
16041 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
16042 | return false; |
16043 | |
16044 | return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false); |
16045 | } |
16046 | |
16047 | /* Return true if destination reg of SET_BODY is shift count of |
16048 | USE_BODY. */ |
16049 | |
16050 | static bool |
16051 | ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) |
16052 | { |
16053 | rtx set_dest; |
16054 | rtx shift_rtx; |
16055 | int i; |
16056 | |
16057 | /* Retrieve destination of SET_BODY. */ |
16058 | switch (GET_CODE (set_body)) |
16059 | { |
16060 | case SET: |
16061 | set_dest = SET_DEST (set_body); |
16062 | if (!set_dest || !REG_P (set_dest)) |
16063 | return false; |
16064 | break; |
16065 | case PARALLEL: |
16066 | for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) |
16067 | if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), |
16068 | use_body)) |
16069 | return true; |
16070 | /* FALLTHROUGH */ |
16071 | default: |
16072 | return false; |
16073 | } |
16074 | |
16075 | /* Retrieve shift count of USE_BODY. */ |
16076 | switch (GET_CODE (use_body)) |
16077 | { |
16078 | case SET: |
16079 | shift_rtx = XEXP (use_body, 1); |
16080 | break; |
16081 | case PARALLEL: |
16082 | for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) |
16083 | if (ix86_dep_by_shift_count_body (set_body, |
16084 | XVECEXP (use_body, 0, i))) |
16085 | return true; |
16086 | /* FALLTHROUGH */ |
16087 | default: |
16088 | return false; |
16089 | } |
16090 | |
16091 | if (shift_rtx |
16092 | && (GET_CODE (shift_rtx) == ASHIFT |
16093 | || GET_CODE (shift_rtx) == LSHIFTRT |
16094 | || GET_CODE (shift_rtx) == ASHIFTRT |
16095 | || GET_CODE (shift_rtx) == ROTATE |
16096 | || GET_CODE (shift_rtx) == ROTATERT)) |
16097 | { |
16098 | rtx shift_count = XEXP (shift_rtx, 1); |
16099 | |
16100 | /* Return true if shift count is dest of SET_BODY. */ |
16101 | if (REG_P (shift_count)) |
16102 | { |
16103 | /* Add check since it can be invoked before register |
16104 | allocation in pre-reload schedule. */ |
16105 | if (reload_completed |
16106 | && true_regnum (set_dest) == true_regnum (shift_count)) |
16107 | return true; |
16108 | else if (REGNO(set_dest) == REGNO(shift_count)) |
16109 | return true; |
16110 | } |
16111 | } |
16112 | |
16113 | return false; |
16114 | } |
16115 | |
16116 | /* Return true if destination reg of SET_INSN is shift count of |
16117 | USE_INSN. */ |
16118 | |
16119 | bool |
16120 | ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) |
16121 | { |
16122 | return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn), |
16123 | use_body: PATTERN (insn: use_insn)); |
16124 | } |
16125 | |
16126 | /* Return TRUE or FALSE depending on whether the unary operator meets the |
16127 | appropriate constraints. */ |
16128 | |
16129 | bool |
16130 | ix86_unary_operator_ok (enum rtx_code, |
16131 | machine_mode, |
16132 | rtx operands[2]) |
16133 | { |
16134 | /* If one of operands is memory, source and destination must match. */ |
16135 | if ((MEM_P (operands[0]) |
16136 | || MEM_P (operands[1])) |
16137 | && ! rtx_equal_p (operands[0], operands[1])) |
16138 | return false; |
16139 | return true; |
16140 | } |
16141 | |
16142 | /* Return TRUE if the operands to a vec_interleave_{high,low}v2df |
16143 | are ok, keeping in mind the possible movddup alternative. */ |
16144 | |
16145 | bool |
16146 | ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) |
16147 | { |
16148 | if (MEM_P (operands[0])) |
16149 | return rtx_equal_p (operands[0], operands[1 + high]); |
16150 | if (MEM_P (operands[1]) && MEM_P (operands[2])) |
16151 | return false; |
16152 | return true; |
16153 | } |
16154 | |
16155 | /* A subroutine of ix86_build_signbit_mask. If VECT is true, |
16156 | then replicate the value for all elements of the vector |
16157 | register. */ |
16158 | |
16159 | rtx |
16160 | ix86_build_const_vector (machine_mode mode, bool vect, rtx value) |
16161 | { |
16162 | int i, n_elt; |
16163 | rtvec v; |
16164 | machine_mode scalar_mode; |
16165 | |
16166 | switch (mode) |
16167 | { |
16168 | case E_V64QImode: |
16169 | case E_V32QImode: |
16170 | case E_V16QImode: |
16171 | case E_V32HImode: |
16172 | case E_V16HImode: |
16173 | case E_V8HImode: |
16174 | case E_V16SImode: |
16175 | case E_V8SImode: |
16176 | case E_V4SImode: |
16177 | case E_V2SImode: |
16178 | case E_V8DImode: |
16179 | case E_V4DImode: |
16180 | case E_V2DImode: |
16181 | gcc_assert (vect); |
16182 | /* FALLTHRU */ |
16183 | case E_V2HFmode: |
16184 | case E_V4HFmode: |
16185 | case E_V8HFmode: |
16186 | case E_V16HFmode: |
16187 | case E_V32HFmode: |
16188 | case E_V16SFmode: |
16189 | case E_V8SFmode: |
16190 | case E_V4SFmode: |
16191 | case E_V2SFmode: |
16192 | case E_V8DFmode: |
16193 | case E_V4DFmode: |
16194 | case E_V2DFmode: |
16195 | n_elt = GET_MODE_NUNITS (mode); |
16196 | v = rtvec_alloc (n_elt); |
16197 | scalar_mode = GET_MODE_INNER (mode); |
16198 | |
16199 | RTVEC_ELT (v, 0) = value; |
16200 | |
16201 | for (i = 1; i < n_elt; ++i) |
16202 | RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); |
16203 | |
16204 | return gen_rtx_CONST_VECTOR (mode, v); |
16205 | |
16206 | default: |
16207 | gcc_unreachable (); |
16208 | } |
16209 | } |
16210 | |
16211 | /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders |
16212 | and ix86_expand_int_vcond. Create a mask for the sign bit in MODE |
16213 | for an SSE register. If VECT is true, then replicate the mask for |
16214 | all elements of the vector register. If INVERT is true, then create |
16215 | a mask excluding the sign bit. */ |
16216 | |
16217 | rtx |
16218 | ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) |
16219 | { |
16220 | machine_mode vec_mode, imode; |
16221 | wide_int w; |
16222 | rtx mask, v; |
16223 | |
16224 | switch (mode) |
16225 | { |
16226 | case E_V2HFmode: |
16227 | case E_V4HFmode: |
16228 | case E_V8HFmode: |
16229 | case E_V16HFmode: |
16230 | case E_V32HFmode: |
16231 | vec_mode = mode; |
16232 | imode = HImode; |
16233 | break; |
16234 | |
16235 | case E_V16SImode: |
16236 | case E_V16SFmode: |
16237 | case E_V8SImode: |
16238 | case E_V4SImode: |
16239 | case E_V8SFmode: |
16240 | case E_V4SFmode: |
16241 | case E_V2SFmode: |
16242 | case E_V2SImode: |
16243 | vec_mode = mode; |
16244 | imode = SImode; |
16245 | break; |
16246 | |
16247 | case E_V8DImode: |
16248 | case E_V4DImode: |
16249 | case E_V2DImode: |
16250 | case E_V8DFmode: |
16251 | case E_V4DFmode: |
16252 | case E_V2DFmode: |
16253 | vec_mode = mode; |
16254 | imode = DImode; |
16255 | break; |
16256 | |
16257 | case E_TImode: |
16258 | case E_TFmode: |
16259 | vec_mode = VOIDmode; |
16260 | imode = TImode; |
16261 | break; |
16262 | |
16263 | default: |
16264 | gcc_unreachable (); |
16265 | } |
16266 | |
16267 | machine_mode inner_mode = GET_MODE_INNER (mode); |
16268 | w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, |
16269 | GET_MODE_BITSIZE (inner_mode)); |
16270 | if (invert) |
16271 | w = wi::bit_not (x: w); |
16272 | |
16273 | /* Force this value into the low part of a fp vector constant. */ |
16274 | mask = immed_wide_int_const (w, imode); |
16275 | mask = gen_lowpart (inner_mode, mask); |
16276 | |
16277 | if (vec_mode == VOIDmode) |
16278 | return force_reg (inner_mode, mask); |
16279 | |
16280 | v = ix86_build_const_vector (mode: vec_mode, vect, value: mask); |
16281 | return force_reg (vec_mode, v); |
16282 | } |
16283 | |
16284 | /* Return HOST_WIDE_INT for const vector OP in MODE. */ |
16285 | |
16286 | HOST_WIDE_INT |
16287 | ix86_convert_const_vector_to_integer (rtx op, machine_mode mode) |
16288 | { |
16289 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
16290 | gcc_unreachable (); |
16291 | |
16292 | int nunits = GET_MODE_NUNITS (mode); |
16293 | wide_int val = wi::zero (GET_MODE_BITSIZE (mode)); |
16294 | machine_mode innermode = GET_MODE_INNER (mode); |
16295 | unsigned int innermode_bits = GET_MODE_BITSIZE (innermode); |
16296 | |
16297 | switch (mode) |
16298 | { |
16299 | case E_V2QImode: |
16300 | case E_V4QImode: |
16301 | case E_V2HImode: |
16302 | case E_V8QImode: |
16303 | case E_V4HImode: |
16304 | case E_V2SImode: |
16305 | for (int i = 0; i < nunits; ++i) |
16306 | { |
16307 | int v = INTVAL (XVECEXP (op, 0, i)); |
16308 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
16309 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
16310 | } |
16311 | break; |
16312 | case E_V2HFmode: |
16313 | case E_V2BFmode: |
16314 | case E_V4HFmode: |
16315 | case E_V4BFmode: |
16316 | case E_V2SFmode: |
16317 | for (int i = 0; i < nunits; ++i) |
16318 | { |
16319 | rtx x = XVECEXP (op, 0, i); |
16320 | int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
16321 | REAL_MODE_FORMAT (innermode)); |
16322 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
16323 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
16324 | } |
16325 | break; |
16326 | default: |
16327 | gcc_unreachable (); |
16328 | } |
16329 | |
16330 | return val.to_shwi (); |
16331 | } |
16332 | |
16333 | /* Return TRUE or FALSE depending on whether the first SET in INSN |
16334 | has source and destination with matching CC modes, and that the |
16335 | CC mode is at least as constrained as REQ_MODE. */ |
16336 | |
16337 | bool |
16338 | ix86_match_ccmode (rtx insn, machine_mode req_mode) |
16339 | { |
16340 | rtx set; |
16341 | machine_mode set_mode; |
16342 | |
16343 | set = PATTERN (insn); |
16344 | if (GET_CODE (set) == PARALLEL) |
16345 | set = XVECEXP (set, 0, 0); |
16346 | gcc_assert (GET_CODE (set) == SET); |
16347 | gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); |
16348 | |
16349 | set_mode = GET_MODE (SET_DEST (set)); |
16350 | switch (set_mode) |
16351 | { |
16352 | case E_CCNOmode: |
16353 | if (req_mode != CCNOmode |
16354 | && (req_mode != CCmode |
16355 | || XEXP (SET_SRC (set), 1) != const0_rtx)) |
16356 | return false; |
16357 | break; |
16358 | case E_CCmode: |
16359 | if (req_mode == CCGCmode) |
16360 | return false; |
16361 | /* FALLTHRU */ |
16362 | case E_CCGCmode: |
16363 | if (req_mode == CCGOCmode || req_mode == CCNOmode) |
16364 | return false; |
16365 | /* FALLTHRU */ |
16366 | case E_CCGOCmode: |
16367 | if (req_mode == CCZmode) |
16368 | return false; |
16369 | /* FALLTHRU */ |
16370 | case E_CCZmode: |
16371 | break; |
16372 | |
16373 | case E_CCGZmode: |
16374 | |
16375 | case E_CCAmode: |
16376 | case E_CCCmode: |
16377 | case E_CCOmode: |
16378 | case E_CCPmode: |
16379 | case E_CCSmode: |
16380 | if (set_mode != req_mode) |
16381 | return false; |
16382 | break; |
16383 | |
16384 | default: |
16385 | gcc_unreachable (); |
16386 | } |
16387 | |
16388 | return GET_MODE (SET_SRC (set)) == set_mode; |
16389 | } |
16390 | |
16391 | machine_mode |
16392 | ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) |
16393 | { |
16394 | machine_mode mode = GET_MODE (op0); |
16395 | |
16396 | if (SCALAR_FLOAT_MODE_P (mode)) |
16397 | { |
16398 | gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); |
16399 | return CCFPmode; |
16400 | } |
16401 | |
16402 | switch (code) |
16403 | { |
16404 | /* Only zero flag is needed. */ |
16405 | case EQ: /* ZF=0 */ |
16406 | case NE: /* ZF!=0 */ |
16407 | return CCZmode; |
16408 | /* Codes needing carry flag. */ |
16409 | case GEU: /* CF=0 */ |
16410 | case LTU: /* CF=1 */ |
16411 | rtx geu; |
16412 | /* Detect overflow checks. They need just the carry flag. */ |
16413 | if (GET_CODE (op0) == PLUS |
16414 | && (rtx_equal_p (op1, XEXP (op0, 0)) |
16415 | || rtx_equal_p (op1, XEXP (op0, 1)))) |
16416 | return CCCmode; |
16417 | /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns. |
16418 | Match LTU of op0 |
16419 | (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
16420 | and op1 |
16421 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)) |
16422 | where CC_CCC is either CC or CCC. */ |
16423 | else if (code == LTU |
16424 | && GET_CODE (op0) == NEG |
16425 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
16426 | && REG_P (XEXP (geu, 0)) |
16427 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
16428 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
16429 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
16430 | && XEXP (geu, 1) == const0_rtx |
16431 | && GET_CODE (op1) == LTU |
16432 | && REG_P (XEXP (op1, 0)) |
16433 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
16434 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
16435 | && XEXP (op1, 1) == const0_rtx) |
16436 | return CCCmode; |
16437 | /* Similarly for *x86_cmc pattern. |
16438 | Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
16439 | and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)). |
16440 | It is sufficient to test that the operand modes are CCCmode. */ |
16441 | else if (code == LTU |
16442 | && GET_CODE (op0) == NEG |
16443 | && GET_CODE (XEXP (op0, 0)) == LTU |
16444 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
16445 | && GET_CODE (op1) == GEU |
16446 | && GET_MODE (XEXP (op1, 0)) == CCCmode) |
16447 | return CCCmode; |
16448 | else |
16449 | return CCmode; |
16450 | case GTU: /* CF=0 & ZF=0 */ |
16451 | case LEU: /* CF=1 | ZF=1 */ |
16452 | return CCmode; |
16453 | /* Codes possibly doable only with sign flag when |
16454 | comparing against zero. */ |
16455 | case GE: /* SF=OF or SF=0 */ |
16456 | case LT: /* SF<>OF or SF=1 */ |
16457 | if (op1 == const0_rtx) |
16458 | return CCGOCmode; |
16459 | else |
16460 | /* For other cases Carry flag is not required. */ |
16461 | return CCGCmode; |
16462 | /* Codes doable only with sign flag when comparing |
16463 | against zero, but we miss jump instruction for it |
16464 | so we need to use relational tests against overflow |
16465 | that thus needs to be zero. */ |
16466 | case GT: /* ZF=0 & SF=OF */ |
16467 | case LE: /* ZF=1 | SF<>OF */ |
16468 | if (op1 == const0_rtx) |
16469 | return CCNOmode; |
16470 | else |
16471 | return CCGCmode; |
16472 | /* strcmp pattern do (use flags) and combine may ask us for proper |
16473 | mode. */ |
16474 | case USE: |
16475 | return CCmode; |
16476 | default: |
16477 | gcc_unreachable (); |
16478 | } |
16479 | } |
16480 | |
16481 | /* Return TRUE or FALSE depending on whether the ptest instruction |
16482 | INSN has source and destination with suitable matching CC modes. */ |
16483 | |
16484 | bool |
16485 | ix86_match_ptest_ccmode (rtx insn) |
16486 | { |
16487 | rtx set, src; |
16488 | machine_mode set_mode; |
16489 | |
16490 | set = PATTERN (insn); |
16491 | gcc_assert (GET_CODE (set) == SET); |
16492 | src = SET_SRC (set); |
16493 | gcc_assert (GET_CODE (src) == UNSPEC |
16494 | && XINT (src, 1) == UNSPEC_PTEST); |
16495 | |
16496 | set_mode = GET_MODE (src); |
16497 | if (set_mode != CCZmode |
16498 | && set_mode != CCCmode |
16499 | && set_mode != CCmode) |
16500 | return false; |
16501 | return GET_MODE (SET_DEST (set)) == set_mode; |
16502 | } |
16503 | |
16504 | /* Return the fixed registers used for condition codes. */ |
16505 | |
16506 | static bool |
16507 | ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) |
16508 | { |
16509 | *p1 = FLAGS_REG; |
16510 | *p2 = INVALID_REGNUM; |
16511 | return true; |
16512 | } |
16513 | |
16514 | /* If two condition code modes are compatible, return a condition code |
16515 | mode which is compatible with both. Otherwise, return |
16516 | VOIDmode. */ |
16517 | |
16518 | static machine_mode |
16519 | ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) |
16520 | { |
16521 | if (m1 == m2) |
16522 | return m1; |
16523 | |
16524 | if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) |
16525 | return VOIDmode; |
16526 | |
16527 | if ((m1 == CCGCmode && m2 == CCGOCmode) |
16528 | || (m1 == CCGOCmode && m2 == CCGCmode)) |
16529 | return CCGCmode; |
16530 | |
16531 | if ((m1 == CCNOmode && m2 == CCGOCmode) |
16532 | || (m1 == CCGOCmode && m2 == CCNOmode)) |
16533 | return CCNOmode; |
16534 | |
16535 | if (m1 == CCZmode |
16536 | && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) |
16537 | return m2; |
16538 | else if (m2 == CCZmode |
16539 | && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) |
16540 | return m1; |
16541 | |
16542 | switch (m1) |
16543 | { |
16544 | default: |
16545 | gcc_unreachable (); |
16546 | |
16547 | case E_CCmode: |
16548 | case E_CCGCmode: |
16549 | case E_CCGOCmode: |
16550 | case E_CCNOmode: |
16551 | case E_CCAmode: |
16552 | case E_CCCmode: |
16553 | case E_CCOmode: |
16554 | case E_CCPmode: |
16555 | case E_CCSmode: |
16556 | case E_CCZmode: |
16557 | switch (m2) |
16558 | { |
16559 | default: |
16560 | return VOIDmode; |
16561 | |
16562 | case E_CCmode: |
16563 | case E_CCGCmode: |
16564 | case E_CCGOCmode: |
16565 | case E_CCNOmode: |
16566 | case E_CCAmode: |
16567 | case E_CCCmode: |
16568 | case E_CCOmode: |
16569 | case E_CCPmode: |
16570 | case E_CCSmode: |
16571 | case E_CCZmode: |
16572 | return CCmode; |
16573 | } |
16574 | |
16575 | case E_CCFPmode: |
16576 | /* These are only compatible with themselves, which we already |
16577 | checked above. */ |
16578 | return VOIDmode; |
16579 | } |
16580 | } |
16581 | |
16582 | /* Return strategy to use for floating-point. We assume that fcomi is always |
16583 | preferrable where available, since that is also true when looking at size |
16584 | (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ |
16585 | |
16586 | enum ix86_fpcmp_strategy |
16587 | ix86_fp_comparison_strategy (enum rtx_code) |
16588 | { |
16589 | /* Do fcomi/sahf based test when profitable. */ |
16590 | |
16591 | if (TARGET_CMOVE) |
16592 | return IX86_FPCMP_COMI; |
16593 | |
16594 | if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) |
16595 | return IX86_FPCMP_SAHF; |
16596 | |
16597 | return IX86_FPCMP_ARITH; |
16598 | } |
16599 | |
16600 | /* Convert comparison codes we use to represent FP comparison to integer |
16601 | code that will result in proper branch. Return UNKNOWN if no such code |
16602 | is available. */ |
16603 | |
16604 | enum rtx_code |
16605 | ix86_fp_compare_code_to_integer (enum rtx_code code) |
16606 | { |
16607 | switch (code) |
16608 | { |
16609 | case GT: |
16610 | return GTU; |
16611 | case GE: |
16612 | return GEU; |
16613 | case ORDERED: |
16614 | case UNORDERED: |
16615 | return code; |
16616 | case UNEQ: |
16617 | return EQ; |
16618 | case UNLT: |
16619 | return LTU; |
16620 | case UNLE: |
16621 | return LEU; |
16622 | case LTGT: |
16623 | return NE; |
16624 | default: |
16625 | return UNKNOWN; |
16626 | } |
16627 | } |
16628 | |
16629 | /* Zero extend possibly SImode EXP to Pmode register. */ |
16630 | rtx |
16631 | ix86_zero_extend_to_Pmode (rtx exp) |
16632 | { |
16633 | return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); |
16634 | } |
16635 | |
16636 | /* Return true if the function is called via PLT. */ |
16637 | |
16638 | bool |
16639 | ix86_call_use_plt_p (rtx call_op) |
16640 | { |
16641 | if (SYMBOL_REF_LOCAL_P (call_op)) |
16642 | { |
16643 | if (SYMBOL_REF_DECL (call_op) |
16644 | && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL) |
16645 | { |
16646 | /* NB: All ifunc functions must be called via PLT. */ |
16647 | cgraph_node *node |
16648 | = cgraph_node::get (SYMBOL_REF_DECL (call_op)); |
16649 | if (node && node->ifunc_resolver) |
16650 | return true; |
16651 | } |
16652 | return false; |
16653 | } |
16654 | return true; |
16655 | } |
16656 | |
16657 | /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true, |
16658 | the PLT entry will be used as the function address for local IFUNC |
16659 | functions. When the PIC register is needed for PLT call, indirect |
16660 | call via the PLT entry will fail since the PIC register may not be |
16661 | set up properly for indirect call. In this case, we should return |
16662 | false. */ |
16663 | |
16664 | static bool |
16665 | ix86_ifunc_ref_local_ok (void) |
16666 | { |
16667 | return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC); |
16668 | } |
16669 | |
16670 | /* Return true if the function being called was marked with attribute |
16671 | "noplt" or using -fno-plt and we are compiling for non-PIC. We need |
16672 | to handle the non-PIC case in the backend because there is no easy |
16673 | interface for the front-end to force non-PLT calls to use the GOT. |
16674 | This is currently used only with 64-bit or 32-bit GOT32X ELF targets |
16675 | to call the function marked "noplt" indirectly. */ |
16676 | |
16677 | static bool |
16678 | ix86_nopic_noplt_attribute_p (rtx call_op) |
16679 | { |
16680 | if (flag_pic || ix86_cmodel == CM_LARGE |
16681 | || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) |
16682 | || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF |
16683 | || SYMBOL_REF_LOCAL_P (call_op)) |
16684 | return false; |
16685 | |
16686 | tree symbol_decl = SYMBOL_REF_DECL (call_op); |
16687 | |
16688 | if (!flag_plt |
16689 | || (symbol_decl != NULL_TREE |
16690 | && lookup_attribute (attr_name: "noplt" , DECL_ATTRIBUTES (symbol_decl)))) |
16691 | return true; |
16692 | |
16693 | return false; |
16694 | } |
16695 | |
16696 | /* Helper to output the jmp/call. */ |
16697 | static void |
16698 | ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) |
16699 | { |
16700 | if (thunk_name != NULL) |
16701 | { |
16702 | if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
16703 | && ix86_indirect_branch_cs_prefix) |
16704 | fprintf (stream: asm_out_file, format: "\tcs\n" ); |
16705 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
16706 | assemble_name (asm_out_file, thunk_name); |
16707 | putc (c: '\n', stream: asm_out_file); |
16708 | if ((ix86_harden_sls & harden_sls_indirect_jmp)) |
16709 | fputs (s: "\tint3\n" , stream: asm_out_file); |
16710 | } |
16711 | else |
16712 | output_indirect_thunk (regno); |
16713 | } |
16714 | |
16715 | /* Output indirect branch via a call and return thunk. CALL_OP is a |
16716 | register which contains the branch target. XASM is the assembly |
16717 | template for CALL_OP. Branch is a tail call if SIBCALL_P is true. |
16718 | A normal call is converted to: |
16719 | |
16720 | call __x86_indirect_thunk_reg |
16721 | |
16722 | and a tail call is converted to: |
16723 | |
16724 | jmp __x86_indirect_thunk_reg |
16725 | */ |
16726 | |
16727 | static void |
16728 | ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) |
16729 | { |
16730 | char thunk_name_buf[32]; |
16731 | char *thunk_name; |
16732 | enum indirect_thunk_prefix need_prefix |
16733 | = indirect_thunk_need_prefix (insn: current_output_insn); |
16734 | int regno = REGNO (call_op); |
16735 | |
16736 | if (cfun->machine->indirect_branch_type |
16737 | != indirect_branch_thunk_inline) |
16738 | { |
16739 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
16740 | SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno); |
16741 | |
16742 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
16743 | thunk_name = thunk_name_buf; |
16744 | } |
16745 | else |
16746 | thunk_name = NULL; |
16747 | |
16748 | if (sibcall_p) |
16749 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
16750 | else |
16751 | { |
16752 | if (thunk_name != NULL) |
16753 | { |
16754 | if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno)) |
16755 | && ix86_indirect_branch_cs_prefix) |
16756 | fprintf (stream: asm_out_file, format: "\tcs\n" ); |
16757 | fprintf (stream: asm_out_file, format: "\tcall\t" ); |
16758 | assemble_name (asm_out_file, thunk_name); |
16759 | putc (c: '\n', stream: asm_out_file); |
16760 | return; |
16761 | } |
16762 | |
16763 | char indirectlabel1[32]; |
16764 | char indirectlabel2[32]; |
16765 | |
16766 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
16767 | INDIRECT_LABEL, |
16768 | indirectlabelno++); |
16769 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
16770 | INDIRECT_LABEL, |
16771 | indirectlabelno++); |
16772 | |
16773 | /* Jump. */ |
16774 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
16775 | assemble_name_raw (asm_out_file, indirectlabel2); |
16776 | fputc (c: '\n', stream: asm_out_file); |
16777 | |
16778 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
16779 | |
16780 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
16781 | |
16782 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
16783 | |
16784 | /* Call. */ |
16785 | fputs (s: "\tcall\t" , stream: asm_out_file); |
16786 | assemble_name_raw (asm_out_file, indirectlabel1); |
16787 | fputc (c: '\n', stream: asm_out_file); |
16788 | } |
16789 | } |
16790 | |
16791 | /* Output indirect branch via a call and return thunk. CALL_OP is |
16792 | the branch target. XASM is the assembly template for CALL_OP. |
16793 | Branch is a tail call if SIBCALL_P is true. A normal call is |
16794 | converted to: |
16795 | |
16796 | jmp L2 |
16797 | L1: |
16798 | push CALL_OP |
16799 | jmp __x86_indirect_thunk |
16800 | L2: |
16801 | call L1 |
16802 | |
16803 | and a tail call is converted to: |
16804 | |
16805 | push CALL_OP |
16806 | jmp __x86_indirect_thunk |
16807 | */ |
16808 | |
16809 | static void |
16810 | ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, |
16811 | bool sibcall_p) |
16812 | { |
16813 | char thunk_name_buf[32]; |
16814 | char *thunk_name; |
16815 | char push_buf[64]; |
16816 | enum indirect_thunk_prefix need_prefix |
16817 | = indirect_thunk_need_prefix (insn: current_output_insn); |
16818 | int regno = -1; |
16819 | |
16820 | if (cfun->machine->indirect_branch_type |
16821 | != indirect_branch_thunk_inline) |
16822 | { |
16823 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
16824 | indirect_thunk_needed = true; |
16825 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
16826 | thunk_name = thunk_name_buf; |
16827 | } |
16828 | else |
16829 | thunk_name = NULL; |
16830 | |
16831 | snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s" , |
16832 | TARGET_64BIT ? 'q' : 'l', xasm); |
16833 | |
16834 | if (sibcall_p) |
16835 | { |
16836 | output_asm_insn (push_buf, &call_op); |
16837 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
16838 | } |
16839 | else |
16840 | { |
16841 | char indirectlabel1[32]; |
16842 | char indirectlabel2[32]; |
16843 | |
16844 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
16845 | INDIRECT_LABEL, |
16846 | indirectlabelno++); |
16847 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
16848 | INDIRECT_LABEL, |
16849 | indirectlabelno++); |
16850 | |
16851 | /* Jump. */ |
16852 | fputs (s: "\tjmp\t" , stream: asm_out_file); |
16853 | assemble_name_raw (asm_out_file, indirectlabel2); |
16854 | fputc (c: '\n', stream: asm_out_file); |
16855 | |
16856 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
16857 | |
16858 | /* An external function may be called via GOT, instead of PLT. */ |
16859 | if (MEM_P (call_op)) |
16860 | { |
16861 | struct ix86_address parts; |
16862 | rtx addr = XEXP (call_op, 0); |
16863 | if (ix86_decompose_address (addr, out: &parts) |
16864 | && parts.base == stack_pointer_rtx) |
16865 | { |
16866 | /* Since call will adjust stack by -UNITS_PER_WORD, |
16867 | we must convert "disp(stack, index, scale)" to |
16868 | "disp+UNITS_PER_WORD(stack, index, scale)". */ |
16869 | if (parts.index) |
16870 | { |
16871 | addr = gen_rtx_MULT (Pmode, parts.index, |
16872 | GEN_INT (parts.scale)); |
16873 | addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
16874 | addr); |
16875 | } |
16876 | else |
16877 | addr = stack_pointer_rtx; |
16878 | |
16879 | rtx disp; |
16880 | if (parts.disp != NULL_RTX) |
16881 | disp = plus_constant (Pmode, parts.disp, |
16882 | UNITS_PER_WORD); |
16883 | else |
16884 | disp = GEN_INT (UNITS_PER_WORD); |
16885 | |
16886 | addr = gen_rtx_PLUS (Pmode, addr, disp); |
16887 | call_op = gen_rtx_MEM (GET_MODE (call_op), addr); |
16888 | } |
16889 | } |
16890 | |
16891 | output_asm_insn (push_buf, &call_op); |
16892 | |
16893 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
16894 | |
16895 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
16896 | |
16897 | /* Call. */ |
16898 | fputs (s: "\tcall\t" , stream: asm_out_file); |
16899 | assemble_name_raw (asm_out_file, indirectlabel1); |
16900 | fputc (c: '\n', stream: asm_out_file); |
16901 | } |
16902 | } |
16903 | |
16904 | /* Output indirect branch via a call and return thunk. CALL_OP is |
16905 | the branch target. XASM is the assembly template for CALL_OP. |
16906 | Branch is a tail call if SIBCALL_P is true. */ |
16907 | |
16908 | static void |
16909 | ix86_output_indirect_branch (rtx call_op, const char *xasm, |
16910 | bool sibcall_p) |
16911 | { |
16912 | if (REG_P (call_op)) |
16913 | ix86_output_indirect_branch_via_reg (call_op, sibcall_p); |
16914 | else |
16915 | ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); |
16916 | } |
16917 | |
16918 | /* Output indirect jump. CALL_OP is the jump target. */ |
16919 | |
16920 | const char * |
16921 | ix86_output_indirect_jmp (rtx call_op) |
16922 | { |
16923 | if (cfun->machine->indirect_branch_type != indirect_branch_keep) |
16924 | { |
16925 | /* We can't have red-zone since "call" in the indirect thunk |
16926 | pushes the return address onto stack, destroying red-zone. */ |
16927 | if (ix86_red_zone_used) |
16928 | gcc_unreachable (); |
16929 | |
16930 | ix86_output_indirect_branch (call_op, xasm: "%0" , sibcall_p: true); |
16931 | } |
16932 | else |
16933 | output_asm_insn ("%!jmp\t%A0" , &call_op); |
16934 | return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "" ; |
16935 | } |
16936 | |
16937 | /* Output return instrumentation for current function if needed. */ |
16938 | |
16939 | static void |
16940 | output_return_instrumentation (void) |
16941 | { |
16942 | if (ix86_instrument_return != instrument_return_none |
16943 | && flag_fentry |
16944 | && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) |
16945 | { |
16946 | if (ix86_flag_record_return) |
16947 | fprintf (stream: asm_out_file, format: "1:\n" ); |
16948 | switch (ix86_instrument_return) |
16949 | { |
16950 | case instrument_return_call: |
16951 | fprintf (stream: asm_out_file, format: "\tcall\t__return__\n" ); |
16952 | break; |
16953 | case instrument_return_nop5: |
16954 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
16955 | fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n" ); |
16956 | break; |
16957 | case instrument_return_none: |
16958 | break; |
16959 | } |
16960 | |
16961 | if (ix86_flag_record_return) |
16962 | { |
16963 | fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n" ); |
16964 | fprintf (stream: asm_out_file, format: "\t.%s 1b\n" , TARGET_64BIT ? "quad" : "long" ); |
16965 | fprintf (stream: asm_out_file, format: "\t.previous\n" ); |
16966 | } |
16967 | } |
16968 | } |
16969 | |
16970 | /* Output function return. CALL_OP is the jump target. Add a REP |
16971 | prefix to RET if LONG_P is true and function return is kept. */ |
16972 | |
16973 | const char * |
16974 | ix86_output_function_return (bool long_p) |
16975 | { |
16976 | output_return_instrumentation (); |
16977 | |
16978 | if (cfun->machine->function_return_type != indirect_branch_keep) |
16979 | { |
16980 | char thunk_name[32]; |
16981 | enum indirect_thunk_prefix need_prefix |
16982 | = indirect_thunk_need_prefix (insn: current_output_insn); |
16983 | |
16984 | if (cfun->machine->function_return_type |
16985 | != indirect_branch_thunk_inline) |
16986 | { |
16987 | bool need_thunk = (cfun->machine->function_return_type |
16988 | == indirect_branch_thunk); |
16989 | indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix, |
16990 | ret_p: true); |
16991 | indirect_return_needed |= need_thunk; |
16992 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
16993 | assemble_name (asm_out_file, thunk_name); |
16994 | putc (c: '\n', stream: asm_out_file); |
16995 | } |
16996 | else |
16997 | output_indirect_thunk (INVALID_REGNUM); |
16998 | |
16999 | return "" ; |
17000 | } |
17001 | |
17002 | output_asm_insn (long_p ? "rep%; ret" : "ret" , nullptr); |
17003 | return (ix86_harden_sls & harden_sls_return) ? "int3" : "" ; |
17004 | } |
17005 | |
17006 | /* Output indirect function return. RET_OP is the function return |
17007 | target. */ |
17008 | |
17009 | const char * |
17010 | ix86_output_indirect_function_return (rtx ret_op) |
17011 | { |
17012 | if (cfun->machine->function_return_type != indirect_branch_keep) |
17013 | { |
17014 | char thunk_name[32]; |
17015 | enum indirect_thunk_prefix need_prefix |
17016 | = indirect_thunk_need_prefix (insn: current_output_insn); |
17017 | unsigned int regno = REGNO (ret_op); |
17018 | gcc_assert (regno == CX_REG); |
17019 | |
17020 | if (cfun->machine->function_return_type |
17021 | != indirect_branch_thunk_inline) |
17022 | { |
17023 | bool need_thunk = (cfun->machine->function_return_type |
17024 | == indirect_branch_thunk); |
17025 | indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true); |
17026 | |
17027 | if (need_thunk) |
17028 | { |
17029 | indirect_return_via_cx = true; |
17030 | SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG); |
17031 | } |
17032 | fprintf (stream: asm_out_file, format: "\tjmp\t" ); |
17033 | assemble_name (asm_out_file, thunk_name); |
17034 | putc (c: '\n', stream: asm_out_file); |
17035 | } |
17036 | else |
17037 | output_indirect_thunk (regno); |
17038 | } |
17039 | else |
17040 | { |
17041 | output_asm_insn ("%!jmp\t%A0" , &ret_op); |
17042 | if (ix86_harden_sls & harden_sls_indirect_jmp) |
17043 | fputs (s: "\tint3\n" , stream: asm_out_file); |
17044 | } |
17045 | return "" ; |
17046 | } |
17047 | |
17048 | /* Output the assembly for a call instruction. */ |
17049 | |
17050 | const char * |
17051 | ix86_output_call_insn (rtx_insn *insn, rtx call_op) |
17052 | { |
17053 | bool direct_p = constant_call_address_operand (call_op, VOIDmode); |
17054 | bool output_indirect_p |
17055 | = (!TARGET_SEH |
17056 | && cfun->machine->indirect_branch_type != indirect_branch_keep); |
17057 | bool seh_nop_p = false; |
17058 | const char *xasm; |
17059 | |
17060 | if (SIBLING_CALL_P (insn)) |
17061 | { |
17062 | output_return_instrumentation (); |
17063 | if (direct_p) |
17064 | { |
17065 | if (ix86_nopic_noplt_attribute_p (call_op)) |
17066 | { |
17067 | direct_p = false; |
17068 | if (TARGET_64BIT) |
17069 | { |
17070 | if (output_indirect_p) |
17071 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17072 | else |
17073 | xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17074 | } |
17075 | else |
17076 | { |
17077 | if (output_indirect_p) |
17078 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17079 | else |
17080 | xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17081 | } |
17082 | } |
17083 | else |
17084 | xasm = "%!jmp\t%P0" ; |
17085 | } |
17086 | /* SEH epilogue detection requires the indirect branch case |
17087 | to include REX.W. */ |
17088 | else if (TARGET_SEH) |
17089 | xasm = "%!rex.W jmp\t%A0" ; |
17090 | else |
17091 | { |
17092 | if (output_indirect_p) |
17093 | xasm = "%0" ; |
17094 | else |
17095 | xasm = "%!jmp\t%A0" ; |
17096 | } |
17097 | |
17098 | if (output_indirect_p && !direct_p) |
17099 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: true); |
17100 | else |
17101 | { |
17102 | output_asm_insn (xasm, &call_op); |
17103 | if (!direct_p |
17104 | && (ix86_harden_sls & harden_sls_indirect_jmp)) |
17105 | return "int3" ; |
17106 | } |
17107 | return "" ; |
17108 | } |
17109 | |
17110 | /* SEH unwinding can require an extra nop to be emitted in several |
17111 | circumstances. Determine if we have one of those. */ |
17112 | if (TARGET_SEH) |
17113 | { |
17114 | rtx_insn *i; |
17115 | |
17116 | for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i)) |
17117 | { |
17118 | /* Prevent a catch region from being adjacent to a jump that would |
17119 | be interpreted as an epilogue sequence by the unwinder. */ |
17120 | if (JUMP_P(i) && CROSSING_JUMP_P (i)) |
17121 | { |
17122 | seh_nop_p = true; |
17123 | break; |
17124 | } |
17125 | |
17126 | /* If we get to another real insn, we don't need the nop. */ |
17127 | if (INSN_P (i)) |
17128 | break; |
17129 | |
17130 | /* If we get to the epilogue note, prevent a catch region from |
17131 | being adjacent to the standard epilogue sequence. Note that, |
17132 | if non-call exceptions are enabled, we already did it during |
17133 | epilogue expansion, or else, if the insn can throw internally, |
17134 | we already did it during the reorg pass. */ |
17135 | if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG |
17136 | && !flag_non_call_exceptions |
17137 | && !can_throw_internal (insn)) |
17138 | { |
17139 | seh_nop_p = true; |
17140 | break; |
17141 | } |
17142 | } |
17143 | |
17144 | /* If we didn't find a real insn following the call, prevent the |
17145 | unwinder from looking into the next function. */ |
17146 | if (i == NULL) |
17147 | seh_nop_p = true; |
17148 | } |
17149 | |
17150 | if (direct_p) |
17151 | { |
17152 | if (ix86_nopic_noplt_attribute_p (call_op)) |
17153 | { |
17154 | direct_p = false; |
17155 | if (TARGET_64BIT) |
17156 | { |
17157 | if (output_indirect_p) |
17158 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17159 | else |
17160 | xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}" ; |
17161 | } |
17162 | else |
17163 | { |
17164 | if (output_indirect_p) |
17165 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17166 | else |
17167 | xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}" ; |
17168 | } |
17169 | } |
17170 | else |
17171 | xasm = "%!call\t%P0" ; |
17172 | } |
17173 | else |
17174 | { |
17175 | if (output_indirect_p) |
17176 | xasm = "%0" ; |
17177 | else |
17178 | xasm = "%!call\t%A0" ; |
17179 | } |
17180 | |
17181 | if (output_indirect_p && !direct_p) |
17182 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: false); |
17183 | else |
17184 | output_asm_insn (xasm, &call_op); |
17185 | |
17186 | if (seh_nop_p) |
17187 | return "nop" ; |
17188 | |
17189 | return "" ; |
17190 | } |
17191 | |
17192 | /* Return a MEM corresponding to a stack slot with mode MODE. |
17193 | Allocate a new slot if necessary. |
17194 | |
17195 | The RTL for a function can have several slots available: N is |
17196 | which slot to use. */ |
17197 | |
17198 | rtx |
17199 | assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) |
17200 | { |
17201 | struct stack_local_entry *s; |
17202 | |
17203 | gcc_assert (n < MAX_386_STACK_LOCALS); |
17204 | |
17205 | for (s = ix86_stack_locals; s; s = s->next) |
17206 | if (s->mode == mode && s->n == n) |
17207 | return validize_mem (copy_rtx (s->rtl)); |
17208 | |
17209 | int align = 0; |
17210 | /* For DImode with SLOT_FLOATxFDI_387 use 32-bit |
17211 | alignment with -m32 -mpreferred-stack-boundary=2. */ |
17212 | if (mode == DImode |
17213 | && !TARGET_64BIT |
17214 | && n == SLOT_FLOATxFDI_387 |
17215 | && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode)) |
17216 | align = 32; |
17217 | s = ggc_alloc<stack_local_entry> (); |
17218 | s->n = n; |
17219 | s->mode = mode; |
17220 | s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align); |
17221 | |
17222 | s->next = ix86_stack_locals; |
17223 | ix86_stack_locals = s; |
17224 | return validize_mem (copy_rtx (s->rtl)); |
17225 | } |
17226 | |
17227 | static void |
17228 | ix86_instantiate_decls (void) |
17229 | { |
17230 | struct stack_local_entry *s; |
17231 | |
17232 | for (s = ix86_stack_locals; s; s = s->next) |
17233 | if (s->rtl != NULL_RTX) |
17234 | instantiate_decl_rtl (x: s->rtl); |
17235 | } |
17236 | |
17237 | /* Check whether x86 address PARTS is a pc-relative address. */ |
17238 | |
17239 | bool |
17240 | ix86_rip_relative_addr_p (struct ix86_address *parts) |
17241 | { |
17242 | rtx base, index, disp; |
17243 | |
17244 | base = parts->base; |
17245 | index = parts->index; |
17246 | disp = parts->disp; |
17247 | |
17248 | if (disp && !base && !index) |
17249 | { |
17250 | if (TARGET_64BIT) |
17251 | { |
17252 | rtx symbol = disp; |
17253 | |
17254 | if (GET_CODE (disp) == CONST) |
17255 | symbol = XEXP (disp, 0); |
17256 | if (GET_CODE (symbol) == PLUS |
17257 | && CONST_INT_P (XEXP (symbol, 1))) |
17258 | symbol = XEXP (symbol, 0); |
17259 | |
17260 | if (GET_CODE (symbol) == LABEL_REF |
17261 | || (GET_CODE (symbol) == SYMBOL_REF |
17262 | && SYMBOL_REF_TLS_MODEL (symbol) == 0) |
17263 | || (GET_CODE (symbol) == UNSPEC |
17264 | && (XINT (symbol, 1) == UNSPEC_GOTPCREL |
17265 | || XINT (symbol, 1) == UNSPEC_PCREL |
17266 | || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) |
17267 | return true; |
17268 | } |
17269 | } |
17270 | return false; |
17271 | } |
17272 | |
17273 | /* Calculate the length of the memory address in the instruction encoding. |
17274 | Includes addr32 prefix, does not include the one-byte modrm, opcode, |
17275 | or other prefixes. We never generate addr32 prefix for LEA insn. */ |
17276 | |
17277 | int |
17278 | memory_address_length (rtx addr, bool lea) |
17279 | { |
17280 | struct ix86_address parts; |
17281 | rtx base, index, disp; |
17282 | int len; |
17283 | int ok; |
17284 | |
17285 | if (GET_CODE (addr) == PRE_DEC |
17286 | || GET_CODE (addr) == POST_INC |
17287 | || GET_CODE (addr) == PRE_MODIFY |
17288 | || GET_CODE (addr) == POST_MODIFY) |
17289 | return 0; |
17290 | |
17291 | ok = ix86_decompose_address (addr, out: &parts); |
17292 | gcc_assert (ok); |
17293 | |
17294 | len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; |
17295 | |
17296 | /* If this is not LEA instruction, add the length of addr32 prefix. */ |
17297 | if (TARGET_64BIT && !lea |
17298 | && (SImode_address_operand (addr, VOIDmode) |
17299 | || (parts.base && GET_MODE (parts.base) == SImode) |
17300 | || (parts.index && GET_MODE (parts.index) == SImode))) |
17301 | len++; |
17302 | |
17303 | base = parts.base; |
17304 | index = parts.index; |
17305 | disp = parts.disp; |
17306 | |
17307 | if (base && SUBREG_P (base)) |
17308 | base = SUBREG_REG (base); |
17309 | if (index && SUBREG_P (index)) |
17310 | index = SUBREG_REG (index); |
17311 | |
17312 | gcc_assert (base == NULL_RTX || REG_P (base)); |
17313 | gcc_assert (index == NULL_RTX || REG_P (index)); |
17314 | |
17315 | /* Rule of thumb: |
17316 | - esp as the base always wants an index, |
17317 | - ebp as the base always wants a displacement, |
17318 | - r12 as the base always wants an index, |
17319 | - r13 as the base always wants a displacement. */ |
17320 | |
17321 | /* Register Indirect. */ |
17322 | if (base && !index && !disp) |
17323 | { |
17324 | /* esp (for its index) and ebp (for its displacement) need |
17325 | the two-byte modrm form. Similarly for r12 and r13 in 64-bit |
17326 | code. */ |
17327 | if (base == arg_pointer_rtx |
17328 | || base == frame_pointer_rtx |
17329 | || REGNO (base) == SP_REG |
17330 | || REGNO (base) == BP_REG |
17331 | || REGNO (base) == R12_REG |
17332 | || REGNO (base) == R13_REG) |
17333 | len++; |
17334 | } |
17335 | |
17336 | /* Direct Addressing. In 64-bit mode mod 00 r/m 5 |
17337 | is not disp32, but disp32(%rip), so for disp32 |
17338 | SIB byte is needed, unless print_operand_address |
17339 | optimizes it into disp32(%rip) or (%rip) is implied |
17340 | by UNSPEC. */ |
17341 | else if (disp && !base && !index) |
17342 | { |
17343 | len += 4; |
17344 | if (!ix86_rip_relative_addr_p (parts: &parts)) |
17345 | len++; |
17346 | } |
17347 | else |
17348 | { |
17349 | /* Find the length of the displacement constant. */ |
17350 | if (disp) |
17351 | { |
17352 | if (base && satisfies_constraint_K (op: disp)) |
17353 | len += 1; |
17354 | else |
17355 | len += 4; |
17356 | } |
17357 | /* ebp always wants a displacement. Similarly r13. */ |
17358 | else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) |
17359 | len++; |
17360 | |
17361 | /* An index requires the two-byte modrm form.... */ |
17362 | if (index |
17363 | /* ...like esp (or r12), which always wants an index. */ |
17364 | || base == arg_pointer_rtx |
17365 | || base == frame_pointer_rtx |
17366 | || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) |
17367 | len++; |
17368 | } |
17369 | |
17370 | return len; |
17371 | } |
17372 | |
17373 | /* Compute default value for "length_immediate" attribute. When SHORTFORM |
17374 | is set, expect that insn have 8bit immediate alternative. */ |
17375 | int |
17376 | ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) |
17377 | { |
17378 | int len = 0; |
17379 | int i; |
17380 | extract_insn_cached (insn); |
17381 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17382 | if (CONSTANT_P (recog_data.operand[i])) |
17383 | { |
17384 | enum attr_mode mode = get_attr_mode (insn); |
17385 | |
17386 | gcc_assert (!len); |
17387 | if (shortform && CONST_INT_P (recog_data.operand[i])) |
17388 | { |
17389 | HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); |
17390 | switch (mode) |
17391 | { |
17392 | case MODE_QI: |
17393 | len = 1; |
17394 | continue; |
17395 | case MODE_HI: |
17396 | ival = trunc_int_for_mode (ival, HImode); |
17397 | break; |
17398 | case MODE_SI: |
17399 | ival = trunc_int_for_mode (ival, SImode); |
17400 | break; |
17401 | default: |
17402 | break; |
17403 | } |
17404 | if (IN_RANGE (ival, -128, 127)) |
17405 | { |
17406 | len = 1; |
17407 | continue; |
17408 | } |
17409 | } |
17410 | switch (mode) |
17411 | { |
17412 | case MODE_QI: |
17413 | len = 1; |
17414 | break; |
17415 | case MODE_HI: |
17416 | len = 2; |
17417 | break; |
17418 | case MODE_SI: |
17419 | len = 4; |
17420 | break; |
17421 | /* Immediates for DImode instructions are encoded |
17422 | as 32bit sign extended values. */ |
17423 | case MODE_DI: |
17424 | len = 4; |
17425 | break; |
17426 | default: |
17427 | fatal_insn ("unknown insn mode" , insn); |
17428 | } |
17429 | } |
17430 | return len; |
17431 | } |
17432 | |
17433 | /* Compute default value for "length_address" attribute. */ |
17434 | int |
17435 | ix86_attr_length_address_default (rtx_insn *insn) |
17436 | { |
17437 | int i; |
17438 | |
17439 | if (get_attr_type (insn) == TYPE_LEA) |
17440 | { |
17441 | rtx set = PATTERN (insn), addr; |
17442 | |
17443 | if (GET_CODE (set) == PARALLEL) |
17444 | set = XVECEXP (set, 0, 0); |
17445 | |
17446 | gcc_assert (GET_CODE (set) == SET); |
17447 | |
17448 | addr = SET_SRC (set); |
17449 | |
17450 | return memory_address_length (addr, lea: true); |
17451 | } |
17452 | |
17453 | extract_insn_cached (insn); |
17454 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17455 | { |
17456 | rtx op = recog_data.operand[i]; |
17457 | if (MEM_P (op)) |
17458 | { |
17459 | constrain_operands_cached (insn, reload_completed); |
17460 | if (which_alternative != -1) |
17461 | { |
17462 | const char *constraints = recog_data.constraints[i]; |
17463 | int alt = which_alternative; |
17464 | |
17465 | while (*constraints == '=' || *constraints == '+') |
17466 | constraints++; |
17467 | while (alt-- > 0) |
17468 | while (*constraints++ != ',') |
17469 | ; |
17470 | /* Skip ignored operands. */ |
17471 | if (*constraints == 'X') |
17472 | continue; |
17473 | } |
17474 | |
17475 | int len = memory_address_length (XEXP (op, 0), lea: false); |
17476 | |
17477 | /* Account for segment prefix for non-default addr spaces. */ |
17478 | if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) |
17479 | len++; |
17480 | |
17481 | return len; |
17482 | } |
17483 | } |
17484 | return 0; |
17485 | } |
17486 | |
17487 | /* Compute default value for "length_vex" attribute. It includes |
17488 | 2 or 3 byte VEX prefix and 1 opcode byte. */ |
17489 | |
17490 | int |
17491 | ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, |
17492 | bool has_vex_w) |
17493 | { |
17494 | int i, reg_only = 2 + 1; |
17495 | bool has_mem = false; |
17496 | |
17497 | /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 |
17498 | byte VEX prefix. */ |
17499 | if (!has_0f_opcode || has_vex_w) |
17500 | return 3 + 1; |
17501 | |
17502 | /* We can always use 2 byte VEX prefix in 32bit. */ |
17503 | if (!TARGET_64BIT) |
17504 | return 2 + 1; |
17505 | |
17506 | extract_insn_cached (insn); |
17507 | |
17508 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17509 | if (REG_P (recog_data.operand[i])) |
17510 | { |
17511 | /* REX.W bit uses 3 byte VEX prefix. |
17512 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
17513 | if (GET_MODE (recog_data.operand[i]) == DImode |
17514 | && GENERAL_REG_P (recog_data.operand[i])) |
17515 | return 3 + 1; |
17516 | |
17517 | /* REX.B bit requires 3-byte VEX. Right here we don't know which |
17518 | operand will be encoded using VEX.B, so be conservative. |
17519 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
17520 | if (REX_INT_REGNO_P (recog_data.operand[i]) |
17521 | || REX2_INT_REGNO_P (recog_data.operand[i]) |
17522 | || REX_SSE_REGNO_P (recog_data.operand[i])) |
17523 | reg_only = 3 + 1; |
17524 | } |
17525 | else if (MEM_P (recog_data.operand[i])) |
17526 | { |
17527 | /* REX2.X or REX2.B bits use 3 byte VEX prefix. */ |
17528 | if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i])) |
17529 | return 4; |
17530 | |
17531 | /* REX.X or REX.B bits use 3 byte VEX prefix. */ |
17532 | if (x86_extended_reg_mentioned_p (recog_data.operand[i])) |
17533 | return 3 + 1; |
17534 | |
17535 | has_mem = true; |
17536 | } |
17537 | |
17538 | return has_mem ? 2 + 1 : reg_only; |
17539 | } |
17540 | |
17541 | |
17542 | static bool |
17543 | ix86_class_likely_spilled_p (reg_class_t); |
17544 | |
17545 | /* Returns true if lhs of insn is HW function argument register and set up |
17546 | is_spilled to true if it is likely spilled HW register. */ |
17547 | static bool |
17548 | insn_is_function_arg (rtx insn, bool* is_spilled) |
17549 | { |
17550 | rtx dst; |
17551 | |
17552 | if (!NONDEBUG_INSN_P (insn)) |
17553 | return false; |
17554 | /* Call instructions are not movable, ignore it. */ |
17555 | if (CALL_P (insn)) |
17556 | return false; |
17557 | insn = PATTERN (insn); |
17558 | if (GET_CODE (insn) == PARALLEL) |
17559 | insn = XVECEXP (insn, 0, 0); |
17560 | if (GET_CODE (insn) != SET) |
17561 | return false; |
17562 | dst = SET_DEST (insn); |
17563 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
17564 | && ix86_function_arg_regno_p (REGNO (dst))) |
17565 | { |
17566 | /* Is it likely spilled HW register? */ |
17567 | if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) |
17568 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) |
17569 | *is_spilled = true; |
17570 | return true; |
17571 | } |
17572 | return false; |
17573 | } |
17574 | |
17575 | /* Add output dependencies for chain of function adjacent arguments if only |
17576 | there is a move to likely spilled HW register. Return first argument |
17577 | if at least one dependence was added or NULL otherwise. */ |
17578 | static rtx_insn * |
17579 | add_parameter_dependencies (rtx_insn *call, rtx_insn *head) |
17580 | { |
17581 | rtx_insn *insn; |
17582 | rtx_insn *last = call; |
17583 | rtx_insn *first_arg = NULL; |
17584 | bool is_spilled = false; |
17585 | |
17586 | head = PREV_INSN (insn: head); |
17587 | |
17588 | /* Find nearest to call argument passing instruction. */ |
17589 | while (true) |
17590 | { |
17591 | last = PREV_INSN (insn: last); |
17592 | if (last == head) |
17593 | return NULL; |
17594 | if (!NONDEBUG_INSN_P (last)) |
17595 | continue; |
17596 | if (insn_is_function_arg (insn: last, is_spilled: &is_spilled)) |
17597 | break; |
17598 | return NULL; |
17599 | } |
17600 | |
17601 | first_arg = last; |
17602 | while (true) |
17603 | { |
17604 | insn = PREV_INSN (insn: last); |
17605 | if (!INSN_P (insn)) |
17606 | break; |
17607 | if (insn == head) |
17608 | break; |
17609 | if (!NONDEBUG_INSN_P (insn)) |
17610 | { |
17611 | last = insn; |
17612 | continue; |
17613 | } |
17614 | if (insn_is_function_arg (insn, is_spilled: &is_spilled)) |
17615 | { |
17616 | /* Add output depdendence between two function arguments if chain |
17617 | of output arguments contains likely spilled HW registers. */ |
17618 | if (is_spilled) |
17619 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
17620 | first_arg = last = insn; |
17621 | } |
17622 | else |
17623 | break; |
17624 | } |
17625 | if (!is_spilled) |
17626 | return NULL; |
17627 | return first_arg; |
17628 | } |
17629 | |
17630 | /* Add output or anti dependency from insn to first_arg to restrict its code |
17631 | motion. */ |
17632 | static void |
17633 | avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) |
17634 | { |
17635 | rtx set; |
17636 | rtx tmp; |
17637 | |
17638 | set = single_set (insn); |
17639 | if (!set) |
17640 | return; |
17641 | tmp = SET_DEST (set); |
17642 | if (REG_P (tmp)) |
17643 | { |
17644 | /* Add output dependency to the first function argument. */ |
17645 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
17646 | return; |
17647 | } |
17648 | /* Add anti dependency. */ |
17649 | add_dependence (first_arg, insn, REG_DEP_ANTI); |
17650 | } |
17651 | |
17652 | /* Avoid cross block motion of function argument through adding dependency |
17653 | from the first non-jump instruction in bb. */ |
17654 | static void |
17655 | add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) |
17656 | { |
17657 | rtx_insn *insn = BB_END (bb); |
17658 | |
17659 | while (insn) |
17660 | { |
17661 | if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) |
17662 | { |
17663 | rtx set = single_set (insn); |
17664 | if (set) |
17665 | { |
17666 | avoid_func_arg_motion (first_arg: arg, insn); |
17667 | return; |
17668 | } |
17669 | } |
17670 | if (insn == BB_HEAD (bb)) |
17671 | return; |
17672 | insn = PREV_INSN (insn); |
17673 | } |
17674 | } |
17675 | |
17676 | /* Hook for pre-reload schedule - avoid motion of function arguments |
17677 | passed in likely spilled HW registers. */ |
17678 | static void |
17679 | ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) |
17680 | { |
17681 | rtx_insn *insn; |
17682 | rtx_insn *first_arg = NULL; |
17683 | if (reload_completed) |
17684 | return; |
17685 | while (head != tail && DEBUG_INSN_P (head)) |
17686 | head = NEXT_INSN (insn: head); |
17687 | for (insn = tail; insn != head; insn = PREV_INSN (insn)) |
17688 | if (INSN_P (insn) && CALL_P (insn)) |
17689 | { |
17690 | first_arg = add_parameter_dependencies (call: insn, head); |
17691 | if (first_arg) |
17692 | { |
17693 | /* Add dependee for first argument to predecessors if only |
17694 | region contains more than one block. */ |
17695 | basic_block bb = BLOCK_FOR_INSN (insn); |
17696 | int rgn = CONTAINING_RGN (bb->index); |
17697 | int nr_blks = RGN_NR_BLOCKS (rgn); |
17698 | /* Skip trivial regions and region head blocks that can have |
17699 | predecessors outside of region. */ |
17700 | if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) |
17701 | { |
17702 | edge e; |
17703 | edge_iterator ei; |
17704 | |
17705 | /* Regions are SCCs with the exception of selective |
17706 | scheduling with pipelining of outer blocks enabled. |
17707 | So also check that immediate predecessors of a non-head |
17708 | block are in the same region. */ |
17709 | FOR_EACH_EDGE (e, ei, bb->preds) |
17710 | { |
17711 | /* Avoid creating of loop-carried dependencies through |
17712 | using topological ordering in the region. */ |
17713 | if (rgn == CONTAINING_RGN (e->src->index) |
17714 | && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) |
17715 | add_dependee_for_func_arg (arg: first_arg, bb: e->src); |
17716 | } |
17717 | } |
17718 | insn = first_arg; |
17719 | if (insn == head) |
17720 | break; |
17721 | } |
17722 | } |
17723 | else if (first_arg) |
17724 | avoid_func_arg_motion (first_arg, insn); |
17725 | } |
17726 | |
17727 | /* Hook for pre-reload schedule - set priority of moves from likely spilled |
17728 | HW registers to maximum, to schedule them at soon as possible. These are |
17729 | moves from function argument registers at the top of the function entry |
17730 | and moves from function return value registers after call. */ |
17731 | static int |
17732 | ix86_adjust_priority (rtx_insn *insn, int priority) |
17733 | { |
17734 | rtx set; |
17735 | |
17736 | if (reload_completed) |
17737 | return priority; |
17738 | |
17739 | if (!NONDEBUG_INSN_P (insn)) |
17740 | return priority; |
17741 | |
17742 | set = single_set (insn); |
17743 | if (set) |
17744 | { |
17745 | rtx tmp = SET_SRC (set); |
17746 | if (REG_P (tmp) |
17747 | && HARD_REGISTER_P (tmp) |
17748 | && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) |
17749 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) |
17750 | return current_sched_info->sched_max_insns_priority; |
17751 | } |
17752 | |
17753 | return priority; |
17754 | } |
17755 | |
17756 | /* Prepare for scheduling pass. */ |
17757 | static void |
17758 | ix86_sched_init_global (FILE *, int, int) |
17759 | { |
17760 | /* Install scheduling hooks for current CPU. Some of these hooks are used |
17761 | in time-critical parts of the scheduler, so we only set them up when |
17762 | they are actually used. */ |
17763 | switch (ix86_tune) |
17764 | { |
17765 | case PROCESSOR_CORE2: |
17766 | case PROCESSOR_NEHALEM: |
17767 | case PROCESSOR_SANDYBRIDGE: |
17768 | case PROCESSOR_HASWELL: |
17769 | case PROCESSOR_TREMONT: |
17770 | case PROCESSOR_ALDERLAKE: |
17771 | case PROCESSOR_GENERIC: |
17772 | /* Do not perform multipass scheduling for pre-reload schedule |
17773 | to save compile time. */ |
17774 | if (reload_completed) |
17775 | { |
17776 | ix86_core2i7_init_hooks (); |
17777 | break; |
17778 | } |
17779 | /* Fall through. */ |
17780 | default: |
17781 | targetm.sched.dfa_post_advance_cycle = NULL; |
17782 | targetm.sched.first_cycle_multipass_init = NULL; |
17783 | targetm.sched.first_cycle_multipass_begin = NULL; |
17784 | targetm.sched.first_cycle_multipass_issue = NULL; |
17785 | targetm.sched.first_cycle_multipass_backtrack = NULL; |
17786 | targetm.sched.first_cycle_multipass_end = NULL; |
17787 | targetm.sched.first_cycle_multipass_fini = NULL; |
17788 | break; |
17789 | } |
17790 | } |
17791 | |
17792 | |
17793 | /* Implement TARGET_STATIC_RTX_ALIGNMENT. */ |
17794 | |
17795 | static HOST_WIDE_INT |
17796 | ix86_static_rtx_alignment (machine_mode mode) |
17797 | { |
17798 | if (mode == DFmode) |
17799 | return 64; |
17800 | if (ALIGN_MODE_128 (mode)) |
17801 | return MAX (128, GET_MODE_ALIGNMENT (mode)); |
17802 | return GET_MODE_ALIGNMENT (mode); |
17803 | } |
17804 | |
17805 | /* Implement TARGET_CONSTANT_ALIGNMENT. */ |
17806 | |
17807 | static HOST_WIDE_INT |
17808 | ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) |
17809 | { |
17810 | if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST |
17811 | || TREE_CODE (exp) == INTEGER_CST) |
17812 | { |
17813 | machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); |
17814 | HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); |
17815 | return MAX (mode_align, align); |
17816 | } |
17817 | else if (!optimize_size && TREE_CODE (exp) == STRING_CST |
17818 | && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) |
17819 | return BITS_PER_WORD; |
17820 | |
17821 | return align; |
17822 | } |
17823 | |
17824 | /* Implement TARGET_EMPTY_RECORD_P. */ |
17825 | |
17826 | static bool |
17827 | ix86_is_empty_record (const_tree type) |
17828 | { |
17829 | if (!TARGET_64BIT) |
17830 | return false; |
17831 | return default_is_empty_record (type); |
17832 | } |
17833 | |
17834 | /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ |
17835 | |
17836 | static void |
17837 | ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) |
17838 | { |
17839 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
17840 | |
17841 | if (!cum->warn_empty) |
17842 | return; |
17843 | |
17844 | if (!TYPE_EMPTY_P (type)) |
17845 | return; |
17846 | |
17847 | /* Don't warn if the function isn't visible outside of the TU. */ |
17848 | if (cum->decl && !TREE_PUBLIC (cum->decl)) |
17849 | return; |
17850 | |
17851 | const_tree ctx = get_ultimate_context (cum->decl); |
17852 | if (ctx != NULL_TREE |
17853 | && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) |
17854 | return; |
17855 | |
17856 | /* If the actual size of the type is zero, then there is no change |
17857 | in how objects of this size are passed. */ |
17858 | if (int_size_in_bytes (type) == 0) |
17859 | return; |
17860 | |
17861 | warning (OPT_Wabi, "empty class %qT parameter passing ABI " |
17862 | "changes in %<-fabi-version=12%> (GCC 8)" , type); |
17863 | |
17864 | /* Only warn once. */ |
17865 | cum->warn_empty = false; |
17866 | } |
17867 | |
17868 | /* This hook returns name of multilib ABI. */ |
17869 | |
17870 | static const char * |
17871 | ix86_get_multilib_abi_name (void) |
17872 | { |
17873 | if (!(TARGET_64BIT_P (ix86_isa_flags))) |
17874 | return "i386" ; |
17875 | else if (TARGET_X32_P (ix86_isa_flags)) |
17876 | return "x32" ; |
17877 | else |
17878 | return "x86_64" ; |
17879 | } |
17880 | |
17881 | /* Compute the alignment for a variable for Intel MCU psABI. TYPE is |
17882 | the data type, and ALIGN is the alignment that the object would |
17883 | ordinarily have. */ |
17884 | |
17885 | static int |
17886 | iamcu_alignment (tree type, int align) |
17887 | { |
17888 | machine_mode mode; |
17889 | |
17890 | if (align < 32 || TYPE_USER_ALIGN (type)) |
17891 | return align; |
17892 | |
17893 | /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 |
17894 | bytes. */ |
17895 | type = strip_array_types (type); |
17896 | if (TYPE_ATOMIC (type)) |
17897 | return align; |
17898 | |
17899 | mode = TYPE_MODE (type); |
17900 | switch (GET_MODE_CLASS (mode)) |
17901 | { |
17902 | case MODE_INT: |
17903 | case MODE_COMPLEX_INT: |
17904 | case MODE_COMPLEX_FLOAT: |
17905 | case MODE_FLOAT: |
17906 | case MODE_DECIMAL_FLOAT: |
17907 | return 32; |
17908 | default: |
17909 | return align; |
17910 | } |
17911 | } |
17912 | |
17913 | /* Compute the alignment for a static variable. |
17914 | TYPE is the data type, and ALIGN is the alignment that |
17915 | the object would ordinarily have. The value of this function is used |
17916 | instead of that alignment to align the object. */ |
17917 | |
17918 | int |
17919 | ix86_data_alignment (tree type, unsigned int align, bool opt) |
17920 | { |
17921 | /* GCC 4.8 and earlier used to incorrectly assume this alignment even |
17922 | for symbols from other compilation units or symbols that don't need |
17923 | to bind locally. In order to preserve some ABI compatibility with |
17924 | those compilers, ensure we don't decrease alignment from what we |
17925 | used to assume. */ |
17926 | |
17927 | unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); |
17928 | |
17929 | /* A data structure, equal or greater than the size of a cache line |
17930 | (64 bytes in the Pentium 4 and other recent Intel processors, including |
17931 | processors based on Intel Core microarchitecture) should be aligned |
17932 | so that its base address is a multiple of a cache line size. */ |
17933 | |
17934 | unsigned int max_align |
17935 | = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); |
17936 | |
17937 | if (max_align < BITS_PER_WORD) |
17938 | max_align = BITS_PER_WORD; |
17939 | |
17940 | switch (ix86_align_data_type) |
17941 | { |
17942 | case ix86_align_data_type_abi: opt = false; break; |
17943 | case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; |
17944 | case ix86_align_data_type_cacheline: break; |
17945 | } |
17946 | |
17947 | if (TARGET_IAMCU) |
17948 | align = iamcu_alignment (type, align); |
17949 | |
17950 | if (opt |
17951 | && AGGREGATE_TYPE_P (type) |
17952 | && TYPE_SIZE (type) |
17953 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) |
17954 | { |
17955 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat) |
17956 | && align < max_align_compat) |
17957 | align = max_align_compat; |
17958 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align) |
17959 | && align < max_align) |
17960 | align = max_align; |
17961 | } |
17962 | |
17963 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
17964 | to 16byte boundary. */ |
17965 | if (TARGET_64BIT) |
17966 | { |
17967 | if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) |
17968 | && TYPE_SIZE (type) |
17969 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
17970 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
17971 | && align < 128) |
17972 | return 128; |
17973 | } |
17974 | |
17975 | if (!opt) |
17976 | return align; |
17977 | |
17978 | if (TREE_CODE (type) == ARRAY_TYPE) |
17979 | { |
17980 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
17981 | return 64; |
17982 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
17983 | return 128; |
17984 | } |
17985 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
17986 | { |
17987 | |
17988 | if (TYPE_MODE (type) == DCmode && align < 64) |
17989 | return 64; |
17990 | if ((TYPE_MODE (type) == XCmode |
17991 | || TYPE_MODE (type) == TCmode) && align < 128) |
17992 | return 128; |
17993 | } |
17994 | else if (RECORD_OR_UNION_TYPE_P (type) |
17995 | && TYPE_FIELDS (type)) |
17996 | { |
17997 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
17998 | return 64; |
17999 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
18000 | return 128; |
18001 | } |
18002 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
18003 | || TREE_CODE (type) == INTEGER_TYPE) |
18004 | { |
18005 | if (TYPE_MODE (type) == DFmode && align < 64) |
18006 | return 64; |
18007 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
18008 | return 128; |
18009 | } |
18010 | |
18011 | return align; |
18012 | } |
18013 | |
18014 | /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */ |
18015 | static void |
18016 | ix86_lower_local_decl_alignment (tree decl) |
18017 | { |
18018 | unsigned int new_align = ix86_local_alignment (decl, VOIDmode, |
18019 | DECL_ALIGN (decl), true); |
18020 | if (new_align < DECL_ALIGN (decl)) |
18021 | SET_DECL_ALIGN (decl, new_align); |
18022 | } |
18023 | |
18024 | /* Compute the alignment for a local variable or a stack slot. EXP is |
18025 | the data type or decl itself, MODE is the widest mode available and |
18026 | ALIGN is the alignment that the object would ordinarily have. The |
18027 | value of this macro is used instead of that alignment to align the |
18028 | object. */ |
18029 | |
18030 | unsigned int |
18031 | ix86_local_alignment (tree exp, machine_mode mode, |
18032 | unsigned int align, bool may_lower) |
18033 | { |
18034 | tree type, decl; |
18035 | |
18036 | if (exp && DECL_P (exp)) |
18037 | { |
18038 | type = TREE_TYPE (exp); |
18039 | decl = exp; |
18040 | } |
18041 | else |
18042 | { |
18043 | type = exp; |
18044 | decl = NULL; |
18045 | } |
18046 | |
18047 | /* Don't do dynamic stack realignment for long long objects with |
18048 | -mpreferred-stack-boundary=2. */ |
18049 | if (may_lower |
18050 | && !TARGET_64BIT |
18051 | && align == 64 |
18052 | && ix86_preferred_stack_boundary < 64 |
18053 | && (mode == DImode || (type && TYPE_MODE (type) == DImode)) |
18054 | && (!type || (!TYPE_USER_ALIGN (type) |
18055 | && !TYPE_ATOMIC (strip_array_types (type)))) |
18056 | && (!decl || !DECL_USER_ALIGN (decl))) |
18057 | align = 32; |
18058 | |
18059 | /* If TYPE is NULL, we are allocating a stack slot for caller-save |
18060 | register in MODE. We will return the largest alignment of XF |
18061 | and DF. */ |
18062 | if (!type) |
18063 | { |
18064 | if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) |
18065 | align = GET_MODE_ALIGNMENT (DFmode); |
18066 | return align; |
18067 | } |
18068 | |
18069 | /* Don't increase alignment for Intel MCU psABI. */ |
18070 | if (TARGET_IAMCU) |
18071 | return align; |
18072 | |
18073 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
18074 | to 16byte boundary. Exact wording is: |
18075 | |
18076 | An array uses the same alignment as its elements, except that a local or |
18077 | global array variable of length at least 16 bytes or |
18078 | a C99 variable-length array variable always has alignment of at least 16 bytes. |
18079 | |
18080 | This was added to allow use of aligned SSE instructions at arrays. This |
18081 | rule is meant for static storage (where compiler cannot do the analysis |
18082 | by itself). We follow it for automatic variables only when convenient. |
18083 | We fully control everything in the function compiled and functions from |
18084 | other unit cannot rely on the alignment. |
18085 | |
18086 | Exclude va_list type. It is the common case of local array where |
18087 | we cannot benefit from the alignment. |
18088 | |
18089 | TODO: Probably one should optimize for size only when var is not escaping. */ |
18090 | if (TARGET_64BIT && optimize_function_for_speed_p (cfun) |
18091 | && TARGET_SSE) |
18092 | { |
18093 | if (AGGREGATE_TYPE_P (type) |
18094 | && (va_list_type_node == NULL_TREE |
18095 | || (TYPE_MAIN_VARIANT (type) |
18096 | != TYPE_MAIN_VARIANT (va_list_type_node))) |
18097 | && TYPE_SIZE (type) |
18098 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
18099 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
18100 | && align < 128) |
18101 | return 128; |
18102 | } |
18103 | if (TREE_CODE (type) == ARRAY_TYPE) |
18104 | { |
18105 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
18106 | return 64; |
18107 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
18108 | return 128; |
18109 | } |
18110 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
18111 | { |
18112 | if (TYPE_MODE (type) == DCmode && align < 64) |
18113 | return 64; |
18114 | if ((TYPE_MODE (type) == XCmode |
18115 | || TYPE_MODE (type) == TCmode) && align < 128) |
18116 | return 128; |
18117 | } |
18118 | else if (RECORD_OR_UNION_TYPE_P (type) |
18119 | && TYPE_FIELDS (type)) |
18120 | { |
18121 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
18122 | return 64; |
18123 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
18124 | return 128; |
18125 | } |
18126 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
18127 | || TREE_CODE (type) == INTEGER_TYPE) |
18128 | { |
18129 | |
18130 | if (TYPE_MODE (type) == DFmode && align < 64) |
18131 | return 64; |
18132 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
18133 | return 128; |
18134 | } |
18135 | return align; |
18136 | } |
18137 | |
18138 | /* Compute the minimum required alignment for dynamic stack realignment |
18139 | purposes for a local variable, parameter or a stack slot. EXP is |
18140 | the data type or decl itself, MODE is its mode and ALIGN is the |
18141 | alignment that the object would ordinarily have. */ |
18142 | |
18143 | unsigned int |
18144 | ix86_minimum_alignment (tree exp, machine_mode mode, |
18145 | unsigned int align) |
18146 | { |
18147 | tree type, decl; |
18148 | |
18149 | if (exp && DECL_P (exp)) |
18150 | { |
18151 | type = TREE_TYPE (exp); |
18152 | decl = exp; |
18153 | } |
18154 | else |
18155 | { |
18156 | type = exp; |
18157 | decl = NULL; |
18158 | } |
18159 | |
18160 | if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) |
18161 | return align; |
18162 | |
18163 | /* Don't do dynamic stack realignment for long long objects with |
18164 | -mpreferred-stack-boundary=2. */ |
18165 | if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) |
18166 | && (!type || (!TYPE_USER_ALIGN (type) |
18167 | && !TYPE_ATOMIC (strip_array_types (type)))) |
18168 | && (!decl || !DECL_USER_ALIGN (decl))) |
18169 | { |
18170 | gcc_checking_assert (!TARGET_STV); |
18171 | return 32; |
18172 | } |
18173 | |
18174 | return align; |
18175 | } |
18176 | |
18177 | /* Find a location for the static chain incoming to a nested function. |
18178 | This is a register, unless all free registers are used by arguments. */ |
18179 | |
18180 | static rtx |
18181 | ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) |
18182 | { |
18183 | unsigned regno; |
18184 | |
18185 | if (TARGET_64BIT) |
18186 | { |
18187 | /* We always use R10 in 64-bit mode. */ |
18188 | regno = R10_REG; |
18189 | } |
18190 | else |
18191 | { |
18192 | const_tree fntype, fndecl; |
18193 | unsigned int ccvt; |
18194 | |
18195 | /* By default in 32-bit mode we use ECX to pass the static chain. */ |
18196 | regno = CX_REG; |
18197 | |
18198 | if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) |
18199 | { |
18200 | fntype = TREE_TYPE (fndecl_or_type); |
18201 | fndecl = fndecl_or_type; |
18202 | } |
18203 | else |
18204 | { |
18205 | fntype = fndecl_or_type; |
18206 | fndecl = NULL; |
18207 | } |
18208 | |
18209 | ccvt = ix86_get_callcvt (type: fntype); |
18210 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
18211 | { |
18212 | /* Fastcall functions use ecx/edx for arguments, which leaves |
18213 | us with EAX for the static chain. |
18214 | Thiscall functions use ecx for arguments, which also |
18215 | leaves us with EAX for the static chain. */ |
18216 | regno = AX_REG; |
18217 | } |
18218 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
18219 | { |
18220 | /* Thiscall functions use ecx for arguments, which leaves |
18221 | us with EAX and EDX for the static chain. |
18222 | We are using for abi-compatibility EAX. */ |
18223 | regno = AX_REG; |
18224 | } |
18225 | else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3) |
18226 | { |
18227 | /* For regparm 3, we have no free call-clobbered registers in |
18228 | which to store the static chain. In order to implement this, |
18229 | we have the trampoline push the static chain to the stack. |
18230 | However, we can't push a value below the return address when |
18231 | we call the nested function directly, so we have to use an |
18232 | alternate entry point. For this we use ESI, and have the |
18233 | alternate entry point push ESI, so that things appear the |
18234 | same once we're executing the nested function. */ |
18235 | if (incoming_p) |
18236 | { |
18237 | if (fndecl == current_function_decl |
18238 | && !ix86_static_chain_on_stack) |
18239 | { |
18240 | gcc_assert (!reload_completed); |
18241 | ix86_static_chain_on_stack = true; |
18242 | } |
18243 | return gen_frame_mem (SImode, |
18244 | plus_constant (Pmode, |
18245 | arg_pointer_rtx, -8)); |
18246 | } |
18247 | regno = SI_REG; |
18248 | } |
18249 | } |
18250 | |
18251 | return gen_rtx_REG (Pmode, regno); |
18252 | } |
18253 | |
18254 | /* Emit RTL insns to initialize the variable parts of a trampoline. |
18255 | FNDECL is the decl of the target address; M_TRAMP is a MEM for |
18256 | the trampoline, and CHAIN_VALUE is an RTX for the static chain |
18257 | to be passed to the target function. */ |
18258 | |
18259 | static void |
18260 | ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) |
18261 | { |
18262 | rtx mem, fnaddr; |
18263 | int opcode; |
18264 | int offset = 0; |
18265 | bool need_endbr = (flag_cf_protection & CF_BRANCH); |
18266 | |
18267 | fnaddr = XEXP (DECL_RTL (fndecl), 0); |
18268 | |
18269 | if (TARGET_64BIT) |
18270 | { |
18271 | int size; |
18272 | |
18273 | if (need_endbr) |
18274 | { |
18275 | /* Insert ENDBR64. */ |
18276 | mem = adjust_address (m_tramp, SImode, offset); |
18277 | emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); |
18278 | offset += 4; |
18279 | } |
18280 | |
18281 | /* Load the function address to r11. Try to load address using |
18282 | the shorter movl instead of movabs. We may want to support |
18283 | movq for kernel mode, but kernel does not use trampolines at |
18284 | the moment. FNADDR is a 32bit address and may not be in |
18285 | DImode when ptr_mode == SImode. Always use movl in this |
18286 | case. */ |
18287 | if (ptr_mode == SImode |
18288 | || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) |
18289 | { |
18290 | fnaddr = copy_addr_to_reg (fnaddr); |
18291 | |
18292 | mem = adjust_address (m_tramp, HImode, offset); |
18293 | emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); |
18294 | |
18295 | mem = adjust_address (m_tramp, SImode, offset + 2); |
18296 | emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); |
18297 | offset += 6; |
18298 | } |
18299 | else |
18300 | { |
18301 | mem = adjust_address (m_tramp, HImode, offset); |
18302 | emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); |
18303 | |
18304 | mem = adjust_address (m_tramp, DImode, offset + 2); |
18305 | emit_move_insn (mem, fnaddr); |
18306 | offset += 10; |
18307 | } |
18308 | |
18309 | /* Load static chain using movabs to r10. Use the shorter movl |
18310 | instead of movabs when ptr_mode == SImode. */ |
18311 | if (ptr_mode == SImode) |
18312 | { |
18313 | opcode = 0xba41; |
18314 | size = 6; |
18315 | } |
18316 | else |
18317 | { |
18318 | opcode = 0xba49; |
18319 | size = 10; |
18320 | } |
18321 | |
18322 | mem = adjust_address (m_tramp, HImode, offset); |
18323 | emit_move_insn (mem, gen_int_mode (opcode, HImode)); |
18324 | |
18325 | mem = adjust_address (m_tramp, ptr_mode, offset + 2); |
18326 | emit_move_insn (mem, chain_value); |
18327 | offset += size; |
18328 | |
18329 | /* Jump to r11; the last (unused) byte is a nop, only there to |
18330 | pad the write out to a single 32-bit store. */ |
18331 | mem = adjust_address (m_tramp, SImode, offset); |
18332 | emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); |
18333 | offset += 4; |
18334 | } |
18335 | else |
18336 | { |
18337 | rtx disp, chain; |
18338 | |
18339 | /* Depending on the static chain location, either load a register |
18340 | with a constant, or push the constant to the stack. All of the |
18341 | instructions are the same size. */ |
18342 | chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true); |
18343 | if (REG_P (chain)) |
18344 | { |
18345 | switch (REGNO (chain)) |
18346 | { |
18347 | case AX_REG: |
18348 | opcode = 0xb8; break; |
18349 | case CX_REG: |
18350 | opcode = 0xb9; break; |
18351 | default: |
18352 | gcc_unreachable (); |
18353 | } |
18354 | } |
18355 | else |
18356 | opcode = 0x68; |
18357 | |
18358 | if (need_endbr) |
18359 | { |
18360 | /* Insert ENDBR32. */ |
18361 | mem = adjust_address (m_tramp, SImode, offset); |
18362 | emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); |
18363 | offset += 4; |
18364 | } |
18365 | |
18366 | mem = adjust_address (m_tramp, QImode, offset); |
18367 | emit_move_insn (mem, gen_int_mode (opcode, QImode)); |
18368 | |
18369 | mem = adjust_address (m_tramp, SImode, offset + 1); |
18370 | emit_move_insn (mem, chain_value); |
18371 | offset += 5; |
18372 | |
18373 | mem = adjust_address (m_tramp, QImode, offset); |
18374 | emit_move_insn (mem, gen_int_mode (0xe9, QImode)); |
18375 | |
18376 | mem = adjust_address (m_tramp, SImode, offset + 1); |
18377 | |
18378 | /* Compute offset from the end of the jmp to the target function. |
18379 | In the case in which the trampoline stores the static chain on |
18380 | the stack, we need to skip the first insn which pushes the |
18381 | (call-saved) register static chain; this push is 1 byte. */ |
18382 | offset += 5; |
18383 | int skip = MEM_P (chain) ? 1 : 0; |
18384 | /* Skip ENDBR32 at the entry of the target function. */ |
18385 | if (need_endbr |
18386 | && !cgraph_node::get (decl: fndecl)->only_called_directly_p ()) |
18387 | skip += 4; |
18388 | disp = expand_binop (SImode, sub_optab, fnaddr, |
18389 | plus_constant (Pmode, XEXP (m_tramp, 0), |
18390 | offset - skip), |
18391 | NULL_RTX, 1, OPTAB_DIRECT); |
18392 | emit_move_insn (mem, disp); |
18393 | } |
18394 | |
18395 | gcc_assert (offset <= TRAMPOLINE_SIZE); |
18396 | |
18397 | #ifdef HAVE_ENABLE_EXECUTE_STACK |
18398 | #ifdef CHECK_EXECUTE_STACK_ENABLED |
18399 | if (CHECK_EXECUTE_STACK_ENABLED) |
18400 | #endif |
18401 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack" ), |
18402 | LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); |
18403 | #endif |
18404 | } |
18405 | |
18406 | static bool |
18407 | ix86_allocate_stack_slots_for_args (void) |
18408 | { |
18409 | /* Naked functions should not allocate stack slots for arguments. */ |
18410 | return !ix86_function_naked (fn: current_function_decl); |
18411 | } |
18412 | |
18413 | static bool |
18414 | ix86_warn_func_return (tree decl) |
18415 | { |
18416 | /* Naked functions are implemented entirely in assembly, including the |
18417 | return sequence, so suppress warnings about this. */ |
18418 | return !ix86_function_naked (fn: decl); |
18419 | } |
18420 | |
18421 | /* Return the shift count of a vector by scalar shift builtin second argument |
18422 | ARG1. */ |
18423 | static tree |
18424 | ix86_vector_shift_count (tree arg1) |
18425 | { |
18426 | if (tree_fits_uhwi_p (arg1)) |
18427 | return arg1; |
18428 | else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) |
18429 | { |
18430 | /* The count argument is weird, passed in as various 128-bit |
18431 | (or 64-bit) vectors, the low 64 bits from it are the count. */ |
18432 | unsigned char buf[16]; |
18433 | int len = native_encode_expr (arg1, buf, 16); |
18434 | if (len == 0) |
18435 | return NULL_TREE; |
18436 | tree t = native_interpret_expr (uint64_type_node, buf, len); |
18437 | if (t && tree_fits_uhwi_p (t)) |
18438 | return t; |
18439 | } |
18440 | return NULL_TREE; |
18441 | } |
18442 | |
18443 | /* Return true if arg_mask is all ones, ELEMS is elements number of |
18444 | corresponding vector. */ |
18445 | static bool |
18446 | ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask) |
18447 | { |
18448 | if (TREE_CODE (arg_mask) != INTEGER_CST) |
18449 | return false; |
18450 | |
18451 | unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask); |
18452 | if (elems == HOST_BITS_PER_WIDE_INT) |
18453 | return mask == HOST_WIDE_INT_M1U; |
18454 | if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) |
18455 | return false; |
18456 | |
18457 | return true; |
18458 | } |
18459 | |
18460 | static tree |
18461 | ix86_fold_builtin (tree fndecl, int n_args, |
18462 | tree *args, bool ignore ATTRIBUTE_UNUSED) |
18463 | { |
18464 | if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) |
18465 | { |
18466 | enum ix86_builtins fn_code |
18467 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
18468 | enum rtx_code rcode; |
18469 | bool is_vshift; |
18470 | unsigned HOST_WIDE_INT mask; |
18471 | |
18472 | switch (fn_code) |
18473 | { |
18474 | case IX86_BUILTIN_CPU_IS: |
18475 | case IX86_BUILTIN_CPU_SUPPORTS: |
18476 | gcc_assert (n_args == 1); |
18477 | return fold_builtin_cpu (fndecl, args); |
18478 | |
18479 | case IX86_BUILTIN_NANQ: |
18480 | case IX86_BUILTIN_NANSQ: |
18481 | { |
18482 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18483 | const char *str = c_getstr (*args); |
18484 | int quiet = fn_code == IX86_BUILTIN_NANQ; |
18485 | REAL_VALUE_TYPE real; |
18486 | |
18487 | if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) |
18488 | return build_real (type, real); |
18489 | return NULL_TREE; |
18490 | } |
18491 | |
18492 | case IX86_BUILTIN_INFQ: |
18493 | case IX86_BUILTIN_HUGE_VALQ: |
18494 | { |
18495 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18496 | REAL_VALUE_TYPE inf; |
18497 | real_inf (&inf); |
18498 | return build_real (type, inf); |
18499 | } |
18500 | |
18501 | case IX86_BUILTIN_TZCNT16: |
18502 | case IX86_BUILTIN_CTZS: |
18503 | case IX86_BUILTIN_TZCNT32: |
18504 | case IX86_BUILTIN_TZCNT64: |
18505 | gcc_assert (n_args == 1); |
18506 | if (TREE_CODE (args[0]) == INTEGER_CST) |
18507 | { |
18508 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18509 | tree arg = args[0]; |
18510 | if (fn_code == IX86_BUILTIN_TZCNT16 |
18511 | || fn_code == IX86_BUILTIN_CTZS) |
18512 | arg = fold_convert (short_unsigned_type_node, arg); |
18513 | if (integer_zerop (arg)) |
18514 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
18515 | else |
18516 | return fold_const_call (CFN_CTZ, type, arg); |
18517 | } |
18518 | break; |
18519 | |
18520 | case IX86_BUILTIN_LZCNT16: |
18521 | case IX86_BUILTIN_CLZS: |
18522 | case IX86_BUILTIN_LZCNT32: |
18523 | case IX86_BUILTIN_LZCNT64: |
18524 | gcc_assert (n_args == 1); |
18525 | if (TREE_CODE (args[0]) == INTEGER_CST) |
18526 | { |
18527 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18528 | tree arg = args[0]; |
18529 | if (fn_code == IX86_BUILTIN_LZCNT16 |
18530 | || fn_code == IX86_BUILTIN_CLZS) |
18531 | arg = fold_convert (short_unsigned_type_node, arg); |
18532 | if (integer_zerop (arg)) |
18533 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
18534 | else |
18535 | return fold_const_call (CFN_CLZ, type, arg); |
18536 | } |
18537 | break; |
18538 | |
18539 | case IX86_BUILTIN_BEXTR32: |
18540 | case IX86_BUILTIN_BEXTR64: |
18541 | case IX86_BUILTIN_BEXTRI32: |
18542 | case IX86_BUILTIN_BEXTRI64: |
18543 | gcc_assert (n_args == 2); |
18544 | if (tree_fits_uhwi_p (args[1])) |
18545 | { |
18546 | unsigned HOST_WIDE_INT res = 0; |
18547 | unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); |
18548 | unsigned int start = tree_to_uhwi (args[1]); |
18549 | unsigned int len = (start & 0xff00) >> 8; |
18550 | start &= 0xff; |
18551 | if (start >= prec || len == 0) |
18552 | res = 0; |
18553 | else if (!tree_fits_uhwi_p (args[0])) |
18554 | break; |
18555 | else |
18556 | res = tree_to_uhwi (args[0]) >> start; |
18557 | if (len > prec) |
18558 | len = prec; |
18559 | if (len < HOST_BITS_PER_WIDE_INT) |
18560 | res &= (HOST_WIDE_INT_1U << len) - 1; |
18561 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18562 | } |
18563 | break; |
18564 | |
18565 | case IX86_BUILTIN_BZHI32: |
18566 | case IX86_BUILTIN_BZHI64: |
18567 | gcc_assert (n_args == 2); |
18568 | if (tree_fits_uhwi_p (args[1])) |
18569 | { |
18570 | unsigned int idx = tree_to_uhwi (args[1]) & 0xff; |
18571 | if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) |
18572 | return args[0]; |
18573 | if (idx == 0) |
18574 | return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0); |
18575 | if (!tree_fits_uhwi_p (args[0])) |
18576 | break; |
18577 | unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); |
18578 | res &= ~(HOST_WIDE_INT_M1U << idx); |
18579 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18580 | } |
18581 | break; |
18582 | |
18583 | case IX86_BUILTIN_PDEP32: |
18584 | case IX86_BUILTIN_PDEP64: |
18585 | gcc_assert (n_args == 2); |
18586 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
18587 | { |
18588 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
18589 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
18590 | unsigned HOST_WIDE_INT res = 0; |
18591 | unsigned HOST_WIDE_INT m, k = 1; |
18592 | for (m = 1; m; m <<= 1) |
18593 | if ((mask & m) != 0) |
18594 | { |
18595 | if ((src & k) != 0) |
18596 | res |= m; |
18597 | k <<= 1; |
18598 | } |
18599 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18600 | } |
18601 | break; |
18602 | |
18603 | case IX86_BUILTIN_PEXT32: |
18604 | case IX86_BUILTIN_PEXT64: |
18605 | gcc_assert (n_args == 2); |
18606 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
18607 | { |
18608 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
18609 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
18610 | unsigned HOST_WIDE_INT res = 0; |
18611 | unsigned HOST_WIDE_INT m, k = 1; |
18612 | for (m = 1; m; m <<= 1) |
18613 | if ((mask & m) != 0) |
18614 | { |
18615 | if ((src & m) != 0) |
18616 | res |= k; |
18617 | k <<= 1; |
18618 | } |
18619 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18620 | } |
18621 | break; |
18622 | |
18623 | case IX86_BUILTIN_MOVMSKPS: |
18624 | case IX86_BUILTIN_PMOVMSKB: |
18625 | case IX86_BUILTIN_MOVMSKPD: |
18626 | case IX86_BUILTIN_PMOVMSKB128: |
18627 | case IX86_BUILTIN_MOVMSKPD256: |
18628 | case IX86_BUILTIN_MOVMSKPS256: |
18629 | case IX86_BUILTIN_PMOVMSKB256: |
18630 | gcc_assert (n_args == 1); |
18631 | if (TREE_CODE (args[0]) == VECTOR_CST) |
18632 | { |
18633 | HOST_WIDE_INT res = 0; |
18634 | for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) |
18635 | { |
18636 | tree e = VECTOR_CST_ELT (args[0], i); |
18637 | if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) |
18638 | { |
18639 | if (wi::neg_p (x: wi::to_wide (t: e))) |
18640 | res |= HOST_WIDE_INT_1 << i; |
18641 | } |
18642 | else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) |
18643 | { |
18644 | if (TREE_REAL_CST (e).sign) |
18645 | res |= HOST_WIDE_INT_1 << i; |
18646 | } |
18647 | else |
18648 | return NULL_TREE; |
18649 | } |
18650 | return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18651 | } |
18652 | break; |
18653 | |
18654 | case IX86_BUILTIN_PSLLD: |
18655 | case IX86_BUILTIN_PSLLD128: |
18656 | case IX86_BUILTIN_PSLLD128_MASK: |
18657 | case IX86_BUILTIN_PSLLD256: |
18658 | case IX86_BUILTIN_PSLLD256_MASK: |
18659 | case IX86_BUILTIN_PSLLD512: |
18660 | case IX86_BUILTIN_PSLLDI: |
18661 | case IX86_BUILTIN_PSLLDI128: |
18662 | case IX86_BUILTIN_PSLLDI128_MASK: |
18663 | case IX86_BUILTIN_PSLLDI256: |
18664 | case IX86_BUILTIN_PSLLDI256_MASK: |
18665 | case IX86_BUILTIN_PSLLDI512: |
18666 | case IX86_BUILTIN_PSLLQ: |
18667 | case IX86_BUILTIN_PSLLQ128: |
18668 | case IX86_BUILTIN_PSLLQ128_MASK: |
18669 | case IX86_BUILTIN_PSLLQ256: |
18670 | case IX86_BUILTIN_PSLLQ256_MASK: |
18671 | case IX86_BUILTIN_PSLLQ512: |
18672 | case IX86_BUILTIN_PSLLQI: |
18673 | case IX86_BUILTIN_PSLLQI128: |
18674 | case IX86_BUILTIN_PSLLQI128_MASK: |
18675 | case IX86_BUILTIN_PSLLQI256: |
18676 | case IX86_BUILTIN_PSLLQI256_MASK: |
18677 | case IX86_BUILTIN_PSLLQI512: |
18678 | case IX86_BUILTIN_PSLLW: |
18679 | case IX86_BUILTIN_PSLLW128: |
18680 | case IX86_BUILTIN_PSLLW128_MASK: |
18681 | case IX86_BUILTIN_PSLLW256: |
18682 | case IX86_BUILTIN_PSLLW256_MASK: |
18683 | case IX86_BUILTIN_PSLLW512_MASK: |
18684 | case IX86_BUILTIN_PSLLWI: |
18685 | case IX86_BUILTIN_PSLLWI128: |
18686 | case IX86_BUILTIN_PSLLWI128_MASK: |
18687 | case IX86_BUILTIN_PSLLWI256: |
18688 | case IX86_BUILTIN_PSLLWI256_MASK: |
18689 | case IX86_BUILTIN_PSLLWI512_MASK: |
18690 | rcode = ASHIFT; |
18691 | is_vshift = false; |
18692 | goto do_shift; |
18693 | case IX86_BUILTIN_PSRAD: |
18694 | case IX86_BUILTIN_PSRAD128: |
18695 | case IX86_BUILTIN_PSRAD128_MASK: |
18696 | case IX86_BUILTIN_PSRAD256: |
18697 | case IX86_BUILTIN_PSRAD256_MASK: |
18698 | case IX86_BUILTIN_PSRAD512: |
18699 | case IX86_BUILTIN_PSRADI: |
18700 | case IX86_BUILTIN_PSRADI128: |
18701 | case IX86_BUILTIN_PSRADI128_MASK: |
18702 | case IX86_BUILTIN_PSRADI256: |
18703 | case IX86_BUILTIN_PSRADI256_MASK: |
18704 | case IX86_BUILTIN_PSRADI512: |
18705 | case IX86_BUILTIN_PSRAQ128_MASK: |
18706 | case IX86_BUILTIN_PSRAQ256_MASK: |
18707 | case IX86_BUILTIN_PSRAQ512: |
18708 | case IX86_BUILTIN_PSRAQI128_MASK: |
18709 | case IX86_BUILTIN_PSRAQI256_MASK: |
18710 | case IX86_BUILTIN_PSRAQI512: |
18711 | case IX86_BUILTIN_PSRAW: |
18712 | case IX86_BUILTIN_PSRAW128: |
18713 | case IX86_BUILTIN_PSRAW128_MASK: |
18714 | case IX86_BUILTIN_PSRAW256: |
18715 | case IX86_BUILTIN_PSRAW256_MASK: |
18716 | case IX86_BUILTIN_PSRAW512: |
18717 | case IX86_BUILTIN_PSRAWI: |
18718 | case IX86_BUILTIN_PSRAWI128: |
18719 | case IX86_BUILTIN_PSRAWI128_MASK: |
18720 | case IX86_BUILTIN_PSRAWI256: |
18721 | case IX86_BUILTIN_PSRAWI256_MASK: |
18722 | case IX86_BUILTIN_PSRAWI512: |
18723 | rcode = ASHIFTRT; |
18724 | is_vshift = false; |
18725 | goto do_shift; |
18726 | case IX86_BUILTIN_PSRLD: |
18727 | case IX86_BUILTIN_PSRLD128: |
18728 | case IX86_BUILTIN_PSRLD128_MASK: |
18729 | case IX86_BUILTIN_PSRLD256: |
18730 | case IX86_BUILTIN_PSRLD256_MASK: |
18731 | case IX86_BUILTIN_PSRLD512: |
18732 | case IX86_BUILTIN_PSRLDI: |
18733 | case IX86_BUILTIN_PSRLDI128: |
18734 | case IX86_BUILTIN_PSRLDI128_MASK: |
18735 | case IX86_BUILTIN_PSRLDI256: |
18736 | case IX86_BUILTIN_PSRLDI256_MASK: |
18737 | case IX86_BUILTIN_PSRLDI512: |
18738 | case IX86_BUILTIN_PSRLQ: |
18739 | case IX86_BUILTIN_PSRLQ128: |
18740 | case IX86_BUILTIN_PSRLQ128_MASK: |
18741 | case IX86_BUILTIN_PSRLQ256: |
18742 | case IX86_BUILTIN_PSRLQ256_MASK: |
18743 | case IX86_BUILTIN_PSRLQ512: |
18744 | case IX86_BUILTIN_PSRLQI: |
18745 | case IX86_BUILTIN_PSRLQI128: |
18746 | case IX86_BUILTIN_PSRLQI128_MASK: |
18747 | case IX86_BUILTIN_PSRLQI256: |
18748 | case IX86_BUILTIN_PSRLQI256_MASK: |
18749 | case IX86_BUILTIN_PSRLQI512: |
18750 | case IX86_BUILTIN_PSRLW: |
18751 | case IX86_BUILTIN_PSRLW128: |
18752 | case IX86_BUILTIN_PSRLW128_MASK: |
18753 | case IX86_BUILTIN_PSRLW256: |
18754 | case IX86_BUILTIN_PSRLW256_MASK: |
18755 | case IX86_BUILTIN_PSRLW512: |
18756 | case IX86_BUILTIN_PSRLWI: |
18757 | case IX86_BUILTIN_PSRLWI128: |
18758 | case IX86_BUILTIN_PSRLWI128_MASK: |
18759 | case IX86_BUILTIN_PSRLWI256: |
18760 | case IX86_BUILTIN_PSRLWI256_MASK: |
18761 | case IX86_BUILTIN_PSRLWI512: |
18762 | rcode = LSHIFTRT; |
18763 | is_vshift = false; |
18764 | goto do_shift; |
18765 | case IX86_BUILTIN_PSLLVV16HI: |
18766 | case IX86_BUILTIN_PSLLVV16SI: |
18767 | case IX86_BUILTIN_PSLLVV2DI: |
18768 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
18769 | case IX86_BUILTIN_PSLLVV32HI: |
18770 | case IX86_BUILTIN_PSLLVV4DI: |
18771 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
18772 | case IX86_BUILTIN_PSLLVV4SI: |
18773 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
18774 | case IX86_BUILTIN_PSLLVV8DI: |
18775 | case IX86_BUILTIN_PSLLVV8HI: |
18776 | case IX86_BUILTIN_PSLLVV8SI: |
18777 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
18778 | rcode = ASHIFT; |
18779 | is_vshift = true; |
18780 | goto do_shift; |
18781 | case IX86_BUILTIN_PSRAVQ128: |
18782 | case IX86_BUILTIN_PSRAVQ256: |
18783 | case IX86_BUILTIN_PSRAVV16HI: |
18784 | case IX86_BUILTIN_PSRAVV16SI: |
18785 | case IX86_BUILTIN_PSRAVV32HI: |
18786 | case IX86_BUILTIN_PSRAVV4SI: |
18787 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
18788 | case IX86_BUILTIN_PSRAVV8DI: |
18789 | case IX86_BUILTIN_PSRAVV8HI: |
18790 | case IX86_BUILTIN_PSRAVV8SI: |
18791 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
18792 | rcode = ASHIFTRT; |
18793 | is_vshift = true; |
18794 | goto do_shift; |
18795 | case IX86_BUILTIN_PSRLVV16HI: |
18796 | case IX86_BUILTIN_PSRLVV16SI: |
18797 | case IX86_BUILTIN_PSRLVV2DI: |
18798 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
18799 | case IX86_BUILTIN_PSRLVV32HI: |
18800 | case IX86_BUILTIN_PSRLVV4DI: |
18801 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
18802 | case IX86_BUILTIN_PSRLVV4SI: |
18803 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
18804 | case IX86_BUILTIN_PSRLVV8DI: |
18805 | case IX86_BUILTIN_PSRLVV8HI: |
18806 | case IX86_BUILTIN_PSRLVV8SI: |
18807 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
18808 | rcode = LSHIFTRT; |
18809 | is_vshift = true; |
18810 | goto do_shift; |
18811 | |
18812 | do_shift: |
18813 | gcc_assert (n_args >= 2); |
18814 | if (TREE_CODE (args[0]) != VECTOR_CST) |
18815 | break; |
18816 | mask = HOST_WIDE_INT_M1U; |
18817 | if (n_args > 2) |
18818 | { |
18819 | /* This is masked shift. */ |
18820 | if (!tree_fits_uhwi_p (args[n_args - 1]) |
18821 | || TREE_SIDE_EFFECTS (args[n_args - 2])) |
18822 | break; |
18823 | mask = tree_to_uhwi (args[n_args - 1]); |
18824 | unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
18825 | mask |= HOST_WIDE_INT_M1U << elems; |
18826 | if (mask != HOST_WIDE_INT_M1U |
18827 | && TREE_CODE (args[n_args - 2]) != VECTOR_CST) |
18828 | break; |
18829 | if (mask == (HOST_WIDE_INT_M1U << elems)) |
18830 | return args[n_args - 2]; |
18831 | } |
18832 | if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) |
18833 | break; |
18834 | if (tree tem = (is_vshift ? integer_one_node |
18835 | : ix86_vector_shift_count (arg1: args[1]))) |
18836 | { |
18837 | unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); |
18838 | unsigned HOST_WIDE_INT prec |
18839 | = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); |
18840 | if (count == 0 && mask == HOST_WIDE_INT_M1U) |
18841 | return args[0]; |
18842 | if (count >= prec) |
18843 | { |
18844 | if (rcode == ASHIFTRT) |
18845 | count = prec - 1; |
18846 | else if (mask == HOST_WIDE_INT_M1U) |
18847 | return build_zero_cst (TREE_TYPE (args[0])); |
18848 | } |
18849 | tree countt = NULL_TREE; |
18850 | if (!is_vshift) |
18851 | { |
18852 | if (count >= prec) |
18853 | countt = integer_zero_node; |
18854 | else |
18855 | countt = build_int_cst (integer_type_node, count); |
18856 | } |
18857 | tree_vector_builder builder; |
18858 | if (mask != HOST_WIDE_INT_M1U || is_vshift) |
18859 | builder.new_vector (TREE_TYPE (args[0]), |
18860 | npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), |
18861 | nelts_per_pattern: 1); |
18862 | else |
18863 | builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0], |
18864 | allow_stepped_p: false); |
18865 | unsigned int cnt = builder.encoded_nelts (); |
18866 | for (unsigned int i = 0; i < cnt; ++i) |
18867 | { |
18868 | tree elt = VECTOR_CST_ELT (args[0], i); |
18869 | if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) |
18870 | return NULL_TREE; |
18871 | tree type = TREE_TYPE (elt); |
18872 | if (rcode == LSHIFTRT) |
18873 | elt = fold_convert (unsigned_type_for (type), elt); |
18874 | if (is_vshift) |
18875 | { |
18876 | countt = VECTOR_CST_ELT (args[1], i); |
18877 | if (TREE_CODE (countt) != INTEGER_CST |
18878 | || TREE_OVERFLOW (countt)) |
18879 | return NULL_TREE; |
18880 | if (wi::neg_p (x: wi::to_wide (t: countt)) |
18881 | || wi::to_widest (t: countt) >= prec) |
18882 | { |
18883 | if (rcode == ASHIFTRT) |
18884 | countt = build_int_cst (TREE_TYPE (countt), |
18885 | prec - 1); |
18886 | else |
18887 | { |
18888 | elt = build_zero_cst (TREE_TYPE (elt)); |
18889 | countt = build_zero_cst (TREE_TYPE (countt)); |
18890 | } |
18891 | } |
18892 | } |
18893 | else if (count >= prec) |
18894 | elt = build_zero_cst (TREE_TYPE (elt)); |
18895 | elt = const_binop (rcode == ASHIFT |
18896 | ? LSHIFT_EXPR : RSHIFT_EXPR, |
18897 | TREE_TYPE (elt), elt, countt); |
18898 | if (!elt || TREE_CODE (elt) != INTEGER_CST) |
18899 | return NULL_TREE; |
18900 | if (rcode == LSHIFTRT) |
18901 | elt = fold_convert (type, elt); |
18902 | if ((mask & (HOST_WIDE_INT_1U << i)) == 0) |
18903 | { |
18904 | elt = VECTOR_CST_ELT (args[n_args - 2], i); |
18905 | if (TREE_CODE (elt) != INTEGER_CST |
18906 | || TREE_OVERFLOW (elt)) |
18907 | return NULL_TREE; |
18908 | } |
18909 | builder.quick_push (obj: elt); |
18910 | } |
18911 | return builder.build (); |
18912 | } |
18913 | break; |
18914 | |
18915 | default: |
18916 | break; |
18917 | } |
18918 | } |
18919 | |
18920 | #ifdef SUBTARGET_FOLD_BUILTIN |
18921 | return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); |
18922 | #endif |
18923 | |
18924 | return NULL_TREE; |
18925 | } |
18926 | |
18927 | /* Fold a MD builtin (use ix86_fold_builtin for folding into |
18928 | constant) in GIMPLE. */ |
18929 | |
18930 | bool |
18931 | ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) |
18932 | { |
18933 | gimple *stmt = gsi_stmt (i: *gsi), *g; |
18934 | gimple_seq stmts = NULL; |
18935 | tree fndecl = gimple_call_fndecl (gs: stmt); |
18936 | gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); |
18937 | int n_args = gimple_call_num_args (gs: stmt); |
18938 | enum ix86_builtins fn_code |
18939 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
18940 | tree decl = NULL_TREE; |
18941 | tree arg0, arg1, arg2; |
18942 | enum rtx_code rcode; |
18943 | enum tree_code tcode; |
18944 | unsigned HOST_WIDE_INT count; |
18945 | bool is_vshift; |
18946 | unsigned HOST_WIDE_INT elems; |
18947 | location_t loc; |
18948 | |
18949 | /* Don't fold when there's isa mismatch. */ |
18950 | if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL)) |
18951 | return false; |
18952 | |
18953 | switch (fn_code) |
18954 | { |
18955 | case IX86_BUILTIN_TZCNT32: |
18956 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ); |
18957 | goto fold_tzcnt_lzcnt; |
18958 | |
18959 | case IX86_BUILTIN_TZCNT64: |
18960 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL); |
18961 | goto fold_tzcnt_lzcnt; |
18962 | |
18963 | case IX86_BUILTIN_LZCNT32: |
18964 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ); |
18965 | goto fold_tzcnt_lzcnt; |
18966 | |
18967 | case IX86_BUILTIN_LZCNT64: |
18968 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL); |
18969 | goto fold_tzcnt_lzcnt; |
18970 | |
18971 | fold_tzcnt_lzcnt: |
18972 | gcc_assert (n_args == 1); |
18973 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
18974 | if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt)) |
18975 | { |
18976 | int prec = TYPE_PRECISION (TREE_TYPE (arg0)); |
18977 | /* If arg0 is provably non-zero, optimize into generic |
18978 | __builtin_c[tl]z{,ll} function the middle-end handles |
18979 | better. */ |
18980 | if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec))) |
18981 | return false; |
18982 | |
18983 | loc = gimple_location (g: stmt); |
18984 | g = gimple_build_call (decl, 1, arg0); |
18985 | gimple_set_location (g, location: loc); |
18986 | tree lhs = make_ssa_name (integer_type_node); |
18987 | gimple_call_set_lhs (gs: g, lhs); |
18988 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
18989 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs); |
18990 | gimple_set_location (g, location: loc); |
18991 | gsi_replace (gsi, g, false); |
18992 | return true; |
18993 | } |
18994 | break; |
18995 | |
18996 | case IX86_BUILTIN_BZHI32: |
18997 | case IX86_BUILTIN_BZHI64: |
18998 | gcc_assert (n_args == 2); |
18999 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19000 | if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt)) |
19001 | { |
19002 | unsigned int idx = tree_to_uhwi (arg1) & 0xff; |
19003 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19004 | if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) |
19005 | break; |
19006 | loc = gimple_location (g: stmt); |
19007 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19008 | gimple_set_location (g, location: loc); |
19009 | gsi_replace (gsi, g, false); |
19010 | return true; |
19011 | } |
19012 | break; |
19013 | |
19014 | case IX86_BUILTIN_PDEP32: |
19015 | case IX86_BUILTIN_PDEP64: |
19016 | case IX86_BUILTIN_PEXT32: |
19017 | case IX86_BUILTIN_PEXT64: |
19018 | gcc_assert (n_args == 2); |
19019 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19020 | if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt)) |
19021 | { |
19022 | loc = gimple_location (g: stmt); |
19023 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19024 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19025 | gimple_set_location (g, location: loc); |
19026 | gsi_replace (gsi, g, false); |
19027 | return true; |
19028 | } |
19029 | break; |
19030 | |
19031 | case IX86_BUILTIN_PBLENDVB256: |
19032 | case IX86_BUILTIN_BLENDVPS256: |
19033 | case IX86_BUILTIN_BLENDVPD256: |
19034 | /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower |
19035 | to scalar operations and not combined back. */ |
19036 | if (!TARGET_AVX2) |
19037 | break; |
19038 | |
19039 | /* FALLTHRU. */ |
19040 | case IX86_BUILTIN_BLENDVPD: |
19041 | /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2, |
19042 | w/o sse4.2, it's veclowered to scalar operations and |
19043 | not combined back. */ |
19044 | if (!TARGET_SSE4_2) |
19045 | break; |
19046 | /* FALLTHRU. */ |
19047 | case IX86_BUILTIN_PBLENDVB128: |
19048 | case IX86_BUILTIN_BLENDVPS: |
19049 | gcc_assert (n_args == 3); |
19050 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19051 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19052 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
19053 | if (gimple_call_lhs (gs: stmt)) |
19054 | { |
19055 | loc = gimple_location (g: stmt); |
19056 | tree type = TREE_TYPE (arg2); |
19057 | if (VECTOR_FLOAT_TYPE_P (type)) |
19058 | { |
19059 | tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode |
19060 | ? intSI_type_node : intDI_type_node; |
19061 | type = get_same_sized_vectype (itype, type); |
19062 | } |
19063 | else |
19064 | type = signed_type_for (type); |
19065 | arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2); |
19066 | tree zero_vec = build_zero_cst (type); |
19067 | tree cmp_type = truth_type_for (type); |
19068 | tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec); |
19069 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19070 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19071 | VEC_COND_EXPR, cmp, |
19072 | arg1, arg0); |
19073 | gimple_set_location (g, location: loc); |
19074 | gsi_replace (gsi, g, false); |
19075 | } |
19076 | else |
19077 | gsi_replace (gsi, gimple_build_nop (), false); |
19078 | return true; |
19079 | |
19080 | |
19081 | case IX86_BUILTIN_PCMPEQB128: |
19082 | case IX86_BUILTIN_PCMPEQW128: |
19083 | case IX86_BUILTIN_PCMPEQD128: |
19084 | case IX86_BUILTIN_PCMPEQQ: |
19085 | case IX86_BUILTIN_PCMPEQB256: |
19086 | case IX86_BUILTIN_PCMPEQW256: |
19087 | case IX86_BUILTIN_PCMPEQD256: |
19088 | case IX86_BUILTIN_PCMPEQQ256: |
19089 | tcode = EQ_EXPR; |
19090 | goto do_cmp; |
19091 | |
19092 | case IX86_BUILTIN_PCMPGTB128: |
19093 | case IX86_BUILTIN_PCMPGTW128: |
19094 | case IX86_BUILTIN_PCMPGTD128: |
19095 | case IX86_BUILTIN_PCMPGTQ: |
19096 | case IX86_BUILTIN_PCMPGTB256: |
19097 | case IX86_BUILTIN_PCMPGTW256: |
19098 | case IX86_BUILTIN_PCMPGTD256: |
19099 | case IX86_BUILTIN_PCMPGTQ256: |
19100 | tcode = GT_EXPR; |
19101 | |
19102 | do_cmp: |
19103 | gcc_assert (n_args == 2); |
19104 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19105 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19106 | if (gimple_call_lhs (gs: stmt)) |
19107 | { |
19108 | loc = gimple_location (g: stmt); |
19109 | tree type = TREE_TYPE (arg0); |
19110 | tree zero_vec = build_zero_cst (type); |
19111 | tree minus_one_vec = build_minus_one_cst (type); |
19112 | tree cmp_type = truth_type_for (type); |
19113 | tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1); |
19114 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19115 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19116 | VEC_COND_EXPR, cmp, |
19117 | minus_one_vec, zero_vec); |
19118 | gimple_set_location (g, location: loc); |
19119 | gsi_replace (gsi, g, false); |
19120 | } |
19121 | else |
19122 | gsi_replace (gsi, gimple_build_nop (), false); |
19123 | return true; |
19124 | |
19125 | case IX86_BUILTIN_PSLLD: |
19126 | case IX86_BUILTIN_PSLLD128: |
19127 | case IX86_BUILTIN_PSLLD128_MASK: |
19128 | case IX86_BUILTIN_PSLLD256: |
19129 | case IX86_BUILTIN_PSLLD256_MASK: |
19130 | case IX86_BUILTIN_PSLLD512: |
19131 | case IX86_BUILTIN_PSLLDI: |
19132 | case IX86_BUILTIN_PSLLDI128: |
19133 | case IX86_BUILTIN_PSLLDI128_MASK: |
19134 | case IX86_BUILTIN_PSLLDI256: |
19135 | case IX86_BUILTIN_PSLLDI256_MASK: |
19136 | case IX86_BUILTIN_PSLLDI512: |
19137 | case IX86_BUILTIN_PSLLQ: |
19138 | case IX86_BUILTIN_PSLLQ128: |
19139 | case IX86_BUILTIN_PSLLQ128_MASK: |
19140 | case IX86_BUILTIN_PSLLQ256: |
19141 | case IX86_BUILTIN_PSLLQ256_MASK: |
19142 | case IX86_BUILTIN_PSLLQ512: |
19143 | case IX86_BUILTIN_PSLLQI: |
19144 | case IX86_BUILTIN_PSLLQI128: |
19145 | case IX86_BUILTIN_PSLLQI128_MASK: |
19146 | case IX86_BUILTIN_PSLLQI256: |
19147 | case IX86_BUILTIN_PSLLQI256_MASK: |
19148 | case IX86_BUILTIN_PSLLQI512: |
19149 | case IX86_BUILTIN_PSLLW: |
19150 | case IX86_BUILTIN_PSLLW128: |
19151 | case IX86_BUILTIN_PSLLW128_MASK: |
19152 | case IX86_BUILTIN_PSLLW256: |
19153 | case IX86_BUILTIN_PSLLW256_MASK: |
19154 | case IX86_BUILTIN_PSLLW512_MASK: |
19155 | case IX86_BUILTIN_PSLLWI: |
19156 | case IX86_BUILTIN_PSLLWI128: |
19157 | case IX86_BUILTIN_PSLLWI128_MASK: |
19158 | case IX86_BUILTIN_PSLLWI256: |
19159 | case IX86_BUILTIN_PSLLWI256_MASK: |
19160 | case IX86_BUILTIN_PSLLWI512_MASK: |
19161 | rcode = ASHIFT; |
19162 | is_vshift = false; |
19163 | goto do_shift; |
19164 | case IX86_BUILTIN_PSRAD: |
19165 | case IX86_BUILTIN_PSRAD128: |
19166 | case IX86_BUILTIN_PSRAD128_MASK: |
19167 | case IX86_BUILTIN_PSRAD256: |
19168 | case IX86_BUILTIN_PSRAD256_MASK: |
19169 | case IX86_BUILTIN_PSRAD512: |
19170 | case IX86_BUILTIN_PSRADI: |
19171 | case IX86_BUILTIN_PSRADI128: |
19172 | case IX86_BUILTIN_PSRADI128_MASK: |
19173 | case IX86_BUILTIN_PSRADI256: |
19174 | case IX86_BUILTIN_PSRADI256_MASK: |
19175 | case IX86_BUILTIN_PSRADI512: |
19176 | case IX86_BUILTIN_PSRAQ128_MASK: |
19177 | case IX86_BUILTIN_PSRAQ256_MASK: |
19178 | case IX86_BUILTIN_PSRAQ512: |
19179 | case IX86_BUILTIN_PSRAQI128_MASK: |
19180 | case IX86_BUILTIN_PSRAQI256_MASK: |
19181 | case IX86_BUILTIN_PSRAQI512: |
19182 | case IX86_BUILTIN_PSRAW: |
19183 | case IX86_BUILTIN_PSRAW128: |
19184 | case IX86_BUILTIN_PSRAW128_MASK: |
19185 | case IX86_BUILTIN_PSRAW256: |
19186 | case IX86_BUILTIN_PSRAW256_MASK: |
19187 | case IX86_BUILTIN_PSRAW512: |
19188 | case IX86_BUILTIN_PSRAWI: |
19189 | case IX86_BUILTIN_PSRAWI128: |
19190 | case IX86_BUILTIN_PSRAWI128_MASK: |
19191 | case IX86_BUILTIN_PSRAWI256: |
19192 | case IX86_BUILTIN_PSRAWI256_MASK: |
19193 | case IX86_BUILTIN_PSRAWI512: |
19194 | rcode = ASHIFTRT; |
19195 | is_vshift = false; |
19196 | goto do_shift; |
19197 | case IX86_BUILTIN_PSRLD: |
19198 | case IX86_BUILTIN_PSRLD128: |
19199 | case IX86_BUILTIN_PSRLD128_MASK: |
19200 | case IX86_BUILTIN_PSRLD256: |
19201 | case IX86_BUILTIN_PSRLD256_MASK: |
19202 | case IX86_BUILTIN_PSRLD512: |
19203 | case IX86_BUILTIN_PSRLDI: |
19204 | case IX86_BUILTIN_PSRLDI128: |
19205 | case IX86_BUILTIN_PSRLDI128_MASK: |
19206 | case IX86_BUILTIN_PSRLDI256: |
19207 | case IX86_BUILTIN_PSRLDI256_MASK: |
19208 | case IX86_BUILTIN_PSRLDI512: |
19209 | case IX86_BUILTIN_PSRLQ: |
19210 | case IX86_BUILTIN_PSRLQ128: |
19211 | case IX86_BUILTIN_PSRLQ128_MASK: |
19212 | case IX86_BUILTIN_PSRLQ256: |
19213 | case IX86_BUILTIN_PSRLQ256_MASK: |
19214 | case IX86_BUILTIN_PSRLQ512: |
19215 | case IX86_BUILTIN_PSRLQI: |
19216 | case IX86_BUILTIN_PSRLQI128: |
19217 | case IX86_BUILTIN_PSRLQI128_MASK: |
19218 | case IX86_BUILTIN_PSRLQI256: |
19219 | case IX86_BUILTIN_PSRLQI256_MASK: |
19220 | case IX86_BUILTIN_PSRLQI512: |
19221 | case IX86_BUILTIN_PSRLW: |
19222 | case IX86_BUILTIN_PSRLW128: |
19223 | case IX86_BUILTIN_PSRLW128_MASK: |
19224 | case IX86_BUILTIN_PSRLW256: |
19225 | case IX86_BUILTIN_PSRLW256_MASK: |
19226 | case IX86_BUILTIN_PSRLW512: |
19227 | case IX86_BUILTIN_PSRLWI: |
19228 | case IX86_BUILTIN_PSRLWI128: |
19229 | case IX86_BUILTIN_PSRLWI128_MASK: |
19230 | case IX86_BUILTIN_PSRLWI256: |
19231 | case IX86_BUILTIN_PSRLWI256_MASK: |
19232 | case IX86_BUILTIN_PSRLWI512: |
19233 | rcode = LSHIFTRT; |
19234 | is_vshift = false; |
19235 | goto do_shift; |
19236 | case IX86_BUILTIN_PSLLVV16HI: |
19237 | case IX86_BUILTIN_PSLLVV16SI: |
19238 | case IX86_BUILTIN_PSLLVV2DI: |
19239 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
19240 | case IX86_BUILTIN_PSLLVV32HI: |
19241 | case IX86_BUILTIN_PSLLVV4DI: |
19242 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
19243 | case IX86_BUILTIN_PSLLVV4SI: |
19244 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
19245 | case IX86_BUILTIN_PSLLVV8DI: |
19246 | case IX86_BUILTIN_PSLLVV8HI: |
19247 | case IX86_BUILTIN_PSLLVV8SI: |
19248 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
19249 | rcode = ASHIFT; |
19250 | is_vshift = true; |
19251 | goto do_shift; |
19252 | case IX86_BUILTIN_PSRAVQ128: |
19253 | case IX86_BUILTIN_PSRAVQ256: |
19254 | case IX86_BUILTIN_PSRAVV16HI: |
19255 | case IX86_BUILTIN_PSRAVV16SI: |
19256 | case IX86_BUILTIN_PSRAVV32HI: |
19257 | case IX86_BUILTIN_PSRAVV4SI: |
19258 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
19259 | case IX86_BUILTIN_PSRAVV8DI: |
19260 | case IX86_BUILTIN_PSRAVV8HI: |
19261 | case IX86_BUILTIN_PSRAVV8SI: |
19262 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
19263 | rcode = ASHIFTRT; |
19264 | is_vshift = true; |
19265 | goto do_shift; |
19266 | case IX86_BUILTIN_PSRLVV16HI: |
19267 | case IX86_BUILTIN_PSRLVV16SI: |
19268 | case IX86_BUILTIN_PSRLVV2DI: |
19269 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
19270 | case IX86_BUILTIN_PSRLVV32HI: |
19271 | case IX86_BUILTIN_PSRLVV4DI: |
19272 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
19273 | case IX86_BUILTIN_PSRLVV4SI: |
19274 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
19275 | case IX86_BUILTIN_PSRLVV8DI: |
19276 | case IX86_BUILTIN_PSRLVV8HI: |
19277 | case IX86_BUILTIN_PSRLVV8SI: |
19278 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
19279 | rcode = LSHIFTRT; |
19280 | is_vshift = true; |
19281 | goto do_shift; |
19282 | |
19283 | do_shift: |
19284 | gcc_assert (n_args >= 2); |
19285 | if (!gimple_call_lhs (gs: stmt)) |
19286 | break; |
19287 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19288 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19289 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19290 | /* For masked shift, only optimize if the mask is all ones. */ |
19291 | if (n_args > 2 |
19292 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19293 | break; |
19294 | if (is_vshift) |
19295 | { |
19296 | if (TREE_CODE (arg1) != VECTOR_CST) |
19297 | break; |
19298 | count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); |
19299 | if (integer_zerop (arg1)) |
19300 | count = 0; |
19301 | else if (rcode == ASHIFTRT) |
19302 | break; |
19303 | else |
19304 | for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) |
19305 | { |
19306 | tree elt = VECTOR_CST_ELT (arg1, i); |
19307 | if (!wi::neg_p (x: wi::to_wide (t: elt)) |
19308 | && wi::to_widest (t: elt) < count) |
19309 | return false; |
19310 | } |
19311 | } |
19312 | else |
19313 | { |
19314 | arg1 = ix86_vector_shift_count (arg1); |
19315 | if (!arg1) |
19316 | break; |
19317 | count = tree_to_uhwi (arg1); |
19318 | } |
19319 | if (count == 0) |
19320 | { |
19321 | /* Just return the first argument for shift by 0. */ |
19322 | loc = gimple_location (g: stmt); |
19323 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19324 | gimple_set_location (g, location: loc); |
19325 | gsi_replace (gsi, g, false); |
19326 | return true; |
19327 | } |
19328 | if (rcode != ASHIFTRT |
19329 | && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) |
19330 | { |
19331 | /* For shift counts equal or greater than precision, except for |
19332 | arithmetic right shift the result is zero. */ |
19333 | loc = gimple_location (g: stmt); |
19334 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19335 | build_zero_cst (TREE_TYPE (arg0))); |
19336 | gimple_set_location (g, location: loc); |
19337 | gsi_replace (gsi, g, false); |
19338 | return true; |
19339 | } |
19340 | break; |
19341 | |
19342 | case IX86_BUILTIN_SHUFPD512: |
19343 | case IX86_BUILTIN_SHUFPS512: |
19344 | case IX86_BUILTIN_SHUFPD: |
19345 | case IX86_BUILTIN_SHUFPD256: |
19346 | case IX86_BUILTIN_SHUFPS: |
19347 | case IX86_BUILTIN_SHUFPS256: |
19348 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19349 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19350 | /* This is masked shuffle. Only optimize if the mask is all ones. */ |
19351 | if (n_args > 3 |
19352 | && !ix86_masked_all_ones (elems, |
19353 | arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19354 | break; |
19355 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
19356 | if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt)) |
19357 | { |
19358 | unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2); |
19359 | /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */ |
19360 | if (shuffle_mask > 255) |
19361 | return false; |
19362 | |
19363 | machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))); |
19364 | loc = gimple_location (g: stmt); |
19365 | tree itype = (imode == E_DFmode |
19366 | ? long_long_integer_type_node : integer_type_node); |
19367 | tree vtype = build_vector_type (itype, elems); |
19368 | tree_vector_builder elts (vtype, elems, 1); |
19369 | |
19370 | |
19371 | /* Transform integer shuffle_mask to vector perm_mask which |
19372 | is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */ |
19373 | for (unsigned i = 0; i != elems; i++) |
19374 | { |
19375 | unsigned sel_idx; |
19376 | /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6]) |
19377 | provide 2 select constrols for each element of the |
19378 | destination. */ |
19379 | if (imode == E_DFmode) |
19380 | sel_idx = (i & 1) * elems + (i & ~1) |
19381 | + ((shuffle_mask >> i) & 1); |
19382 | else |
19383 | { |
19384 | /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select |
19385 | controls for each element of the destination. */ |
19386 | unsigned j = i % 4; |
19387 | sel_idx = ((i >> 1) & 1) * elems + (i & ~3) |
19388 | + ((shuffle_mask >> 2 * j) & 3); |
19389 | } |
19390 | elts.quick_push (obj: build_int_cst (itype, sel_idx)); |
19391 | } |
19392 | |
19393 | tree perm_mask = elts.build (); |
19394 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19395 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19396 | VEC_PERM_EXPR, |
19397 | arg0, arg1, perm_mask); |
19398 | gimple_set_location (g, location: loc); |
19399 | gsi_replace (gsi, g, false); |
19400 | return true; |
19401 | } |
19402 | // Do not error yet, the constant could be propagated later? |
19403 | break; |
19404 | |
19405 | case IX86_BUILTIN_PABSB: |
19406 | case IX86_BUILTIN_PABSW: |
19407 | case IX86_BUILTIN_PABSD: |
19408 | /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ |
19409 | if (!TARGET_MMX_WITH_SSE) |
19410 | break; |
19411 | /* FALLTHRU. */ |
19412 | case IX86_BUILTIN_PABSB128: |
19413 | case IX86_BUILTIN_PABSB256: |
19414 | case IX86_BUILTIN_PABSB512: |
19415 | case IX86_BUILTIN_PABSW128: |
19416 | case IX86_BUILTIN_PABSW256: |
19417 | case IX86_BUILTIN_PABSW512: |
19418 | case IX86_BUILTIN_PABSD128: |
19419 | case IX86_BUILTIN_PABSD256: |
19420 | case IX86_BUILTIN_PABSD512: |
19421 | case IX86_BUILTIN_PABSQ128: |
19422 | case IX86_BUILTIN_PABSQ256: |
19423 | case IX86_BUILTIN_PABSQ512: |
19424 | case IX86_BUILTIN_PABSB128_MASK: |
19425 | case IX86_BUILTIN_PABSB256_MASK: |
19426 | case IX86_BUILTIN_PABSW128_MASK: |
19427 | case IX86_BUILTIN_PABSW256_MASK: |
19428 | case IX86_BUILTIN_PABSD128_MASK: |
19429 | case IX86_BUILTIN_PABSD256_MASK: |
19430 | gcc_assert (n_args >= 1); |
19431 | if (!gimple_call_lhs (gs: stmt)) |
19432 | break; |
19433 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19434 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19435 | /* For masked ABS, only optimize if the mask is all ones. */ |
19436 | if (n_args > 1 |
19437 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19438 | break; |
19439 | { |
19440 | tree utype, ures, vce; |
19441 | utype = unsigned_type_for (TREE_TYPE (arg0)); |
19442 | /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR |
19443 | instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ |
19444 | ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0); |
19445 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19446 | loc = gimple_location (g: stmt); |
19447 | vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); |
19448 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19449 | VIEW_CONVERT_EXPR, vce); |
19450 | gsi_replace (gsi, g, false); |
19451 | } |
19452 | return true; |
19453 | |
19454 | default: |
19455 | break; |
19456 | } |
19457 | |
19458 | return false; |
19459 | } |
19460 | |
19461 | /* Handler for an SVML-style interface to |
19462 | a library with vectorized intrinsics. */ |
19463 | |
19464 | tree |
19465 | ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) |
19466 | { |
19467 | char name[20]; |
19468 | tree fntype, new_fndecl, args; |
19469 | unsigned arity; |
19470 | const char *bname; |
19471 | machine_mode el_mode, in_mode; |
19472 | int n, in_n; |
19473 | |
19474 | /* The SVML is suitable for unsafe math only. */ |
19475 | if (!flag_unsafe_math_optimizations) |
19476 | return NULL_TREE; |
19477 | |
19478 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
19479 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
19480 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
19481 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
19482 | if (el_mode != in_mode |
19483 | || n != in_n) |
19484 | return NULL_TREE; |
19485 | |
19486 | switch (fn) |
19487 | { |
19488 | CASE_CFN_EXP: |
19489 | CASE_CFN_LOG: |
19490 | CASE_CFN_LOG10: |
19491 | CASE_CFN_POW: |
19492 | CASE_CFN_TANH: |
19493 | CASE_CFN_TAN: |
19494 | CASE_CFN_ATAN: |
19495 | CASE_CFN_ATAN2: |
19496 | CASE_CFN_ATANH: |
19497 | CASE_CFN_CBRT: |
19498 | CASE_CFN_SINH: |
19499 | CASE_CFN_SIN: |
19500 | CASE_CFN_ASINH: |
19501 | CASE_CFN_ASIN: |
19502 | CASE_CFN_COSH: |
19503 | CASE_CFN_COS: |
19504 | CASE_CFN_ACOSH: |
19505 | CASE_CFN_ACOS: |
19506 | if ((el_mode != DFmode || n != 2) |
19507 | && (el_mode != SFmode || n != 4)) |
19508 | return NULL_TREE; |
19509 | break; |
19510 | |
19511 | default: |
19512 | return NULL_TREE; |
19513 | } |
19514 | |
19515 | tree fndecl = mathfn_built_in (el_mode == DFmode |
19516 | ? double_type_node : float_type_node, fn); |
19517 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
19518 | |
19519 | if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF) |
19520 | strcpy (dest: name, src: "vmlsLn4" ); |
19521 | else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG) |
19522 | strcpy (dest: name, src: "vmldLn2" ); |
19523 | else if (n == 4) |
19524 | { |
19525 | sprintf (s: name, format: "vmls%s" , bname+10); |
19526 | name[strlen (s: name)-1] = '4'; |
19527 | } |
19528 | else |
19529 | sprintf (s: name, format: "vmld%s2" , bname+10); |
19530 | |
19531 | /* Convert to uppercase. */ |
19532 | name[4] &= ~0x20; |
19533 | |
19534 | arity = 0; |
19535 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
19536 | arity++; |
19537 | |
19538 | if (arity == 1) |
19539 | fntype = build_function_type_list (type_out, type_in, NULL); |
19540 | else |
19541 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
19542 | |
19543 | /* Build a function declaration for the vectorized function. */ |
19544 | new_fndecl = build_decl (BUILTINS_LOCATION, |
19545 | FUNCTION_DECL, get_identifier (name), fntype); |
19546 | TREE_PUBLIC (new_fndecl) = 1; |
19547 | DECL_EXTERNAL (new_fndecl) = 1; |
19548 | DECL_IS_NOVOPS (new_fndecl) = 1; |
19549 | TREE_READONLY (new_fndecl) = 1; |
19550 | |
19551 | return new_fndecl; |
19552 | } |
19553 | |
19554 | /* Handler for an ACML-style interface to |
19555 | a library with vectorized intrinsics. */ |
19556 | |
19557 | tree |
19558 | ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) |
19559 | { |
19560 | char name[20] = "__vr.._" ; |
19561 | tree fntype, new_fndecl, args; |
19562 | unsigned arity; |
19563 | const char *bname; |
19564 | machine_mode el_mode, in_mode; |
19565 | int n, in_n; |
19566 | |
19567 | /* The ACML is 64bits only and suitable for unsafe math only as |
19568 | it does not correctly support parts of IEEE with the required |
19569 | precision such as denormals. */ |
19570 | if (!TARGET_64BIT |
19571 | || !flag_unsafe_math_optimizations) |
19572 | return NULL_TREE; |
19573 | |
19574 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
19575 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
19576 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
19577 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
19578 | if (el_mode != in_mode |
19579 | || n != in_n) |
19580 | return NULL_TREE; |
19581 | |
19582 | switch (fn) |
19583 | { |
19584 | CASE_CFN_SIN: |
19585 | CASE_CFN_COS: |
19586 | CASE_CFN_EXP: |
19587 | CASE_CFN_LOG: |
19588 | CASE_CFN_LOG2: |
19589 | CASE_CFN_LOG10: |
19590 | if (el_mode == DFmode && n == 2) |
19591 | { |
19592 | name[4] = 'd'; |
19593 | name[5] = '2'; |
19594 | } |
19595 | else if (el_mode == SFmode && n == 4) |
19596 | { |
19597 | name[4] = 's'; |
19598 | name[5] = '4'; |
19599 | } |
19600 | else |
19601 | return NULL_TREE; |
19602 | break; |
19603 | |
19604 | default: |
19605 | return NULL_TREE; |
19606 | } |
19607 | |
19608 | tree fndecl = mathfn_built_in (el_mode == DFmode |
19609 | ? double_type_node : float_type_node, fn); |
19610 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
19611 | sprintf (s: name + 7, format: "%s" , bname+10); |
19612 | |
19613 | arity = 0; |
19614 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
19615 | arity++; |
19616 | |
19617 | if (arity == 1) |
19618 | fntype = build_function_type_list (type_out, type_in, NULL); |
19619 | else |
19620 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
19621 | |
19622 | /* Build a function declaration for the vectorized function. */ |
19623 | new_fndecl = build_decl (BUILTINS_LOCATION, |
19624 | FUNCTION_DECL, get_identifier (name), fntype); |
19625 | TREE_PUBLIC (new_fndecl) = 1; |
19626 | DECL_EXTERNAL (new_fndecl) = 1; |
19627 | DECL_IS_NOVOPS (new_fndecl) = 1; |
19628 | TREE_READONLY (new_fndecl) = 1; |
19629 | |
19630 | return new_fndecl; |
19631 | } |
19632 | |
19633 | /* Returns a decl of a function that implements scatter store with |
19634 | register type VECTYPE and index type INDEX_TYPE and SCALE. |
19635 | Return NULL_TREE if it is not available. */ |
19636 | |
19637 | static tree |
19638 | ix86_vectorize_builtin_scatter (const_tree vectype, |
19639 | const_tree index_type, int scale) |
19640 | { |
19641 | bool si; |
19642 | enum ix86_builtins code; |
19643 | const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype)); |
19644 | |
19645 | if (!TARGET_AVX512F) |
19646 | return NULL_TREE; |
19647 | |
19648 | if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64) |
19649 | return NULL_TREE; |
19650 | |
19651 | if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u) |
19652 | ? !TARGET_USE_SCATTER_2PARTS |
19653 | : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) |
19654 | ? !TARGET_USE_SCATTER_4PARTS |
19655 | : !TARGET_USE_SCATTER_8PARTS)) |
19656 | return NULL_TREE; |
19657 | |
19658 | if ((TREE_CODE (index_type) != INTEGER_TYPE |
19659 | && !POINTER_TYPE_P (index_type)) |
19660 | || (TYPE_MODE (index_type) != SImode |
19661 | && TYPE_MODE (index_type) != DImode)) |
19662 | return NULL_TREE; |
19663 | |
19664 | if (TYPE_PRECISION (index_type) > POINTER_SIZE) |
19665 | return NULL_TREE; |
19666 | |
19667 | /* v*scatter* insn sign extends index to pointer mode. */ |
19668 | if (TYPE_PRECISION (index_type) < POINTER_SIZE |
19669 | && TYPE_UNSIGNED (index_type)) |
19670 | return NULL_TREE; |
19671 | |
19672 | /* Scale can be 1, 2, 4 or 8. */ |
19673 | if (scale <= 0 |
19674 | || scale > 8 |
19675 | || (scale & (scale - 1)) != 0) |
19676 | return NULL_TREE; |
19677 | |
19678 | si = TYPE_MODE (index_type) == SImode; |
19679 | switch (TYPE_MODE (vectype)) |
19680 | { |
19681 | case E_V8DFmode: |
19682 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; |
19683 | break; |
19684 | case E_V8DImode: |
19685 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; |
19686 | break; |
19687 | case E_V16SFmode: |
19688 | code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; |
19689 | break; |
19690 | case E_V16SImode: |
19691 | code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; |
19692 | break; |
19693 | case E_V4DFmode: |
19694 | if (TARGET_AVX512VL) |
19695 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; |
19696 | else |
19697 | return NULL_TREE; |
19698 | break; |
19699 | case E_V4DImode: |
19700 | if (TARGET_AVX512VL) |
19701 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; |
19702 | else |
19703 | return NULL_TREE; |
19704 | break; |
19705 | case E_V8SFmode: |
19706 | if (TARGET_AVX512VL) |
19707 | code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; |
19708 | else |
19709 | return NULL_TREE; |
19710 | break; |
19711 | case E_V8SImode: |
19712 | if (TARGET_AVX512VL) |
19713 | code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; |
19714 | else |
19715 | return NULL_TREE; |
19716 | break; |
19717 | case E_V2DFmode: |
19718 | if (TARGET_AVX512VL) |
19719 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; |
19720 | else |
19721 | return NULL_TREE; |
19722 | break; |
19723 | case E_V2DImode: |
19724 | if (TARGET_AVX512VL) |
19725 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; |
19726 | else |
19727 | return NULL_TREE; |
19728 | break; |
19729 | case E_V4SFmode: |
19730 | if (TARGET_AVX512VL) |
19731 | code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; |
19732 | else |
19733 | return NULL_TREE; |
19734 | break; |
19735 | case E_V4SImode: |
19736 | if (TARGET_AVX512VL) |
19737 | code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; |
19738 | else |
19739 | return NULL_TREE; |
19740 | break; |
19741 | default: |
19742 | return NULL_TREE; |
19743 | } |
19744 | |
19745 | return get_ix86_builtin (c: code); |
19746 | } |
19747 | |
19748 | /* Return true if it is safe to use the rsqrt optabs to optimize |
19749 | 1.0/sqrt. */ |
19750 | |
19751 | static bool |
19752 | use_rsqrt_p (machine_mode mode) |
19753 | { |
19754 | return ((mode == HFmode |
19755 | || (TARGET_SSE && TARGET_SSE_MATH)) |
19756 | && flag_finite_math_only |
19757 | && !flag_trapping_math |
19758 | && flag_unsafe_math_optimizations); |
19759 | } |
19760 | |
19761 | /* Helper for avx_vpermilps256_operand et al. This is also used by |
19762 | the expansion functions to turn the parallel back into a mask. |
19763 | The return value is 0 for no match and the imm8+1 for a match. */ |
19764 | |
19765 | int |
19766 | avx_vpermilp_parallel (rtx par, machine_mode mode) |
19767 | { |
19768 | unsigned i, nelt = GET_MODE_NUNITS (mode); |
19769 | unsigned mask = 0; |
19770 | unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ |
19771 | |
19772 | if (XVECLEN (par, 0) != (int) nelt) |
19773 | return 0; |
19774 | |
19775 | /* Validate that all of the elements are constants, and not totally |
19776 | out of range. Copy the data into an integral array to make the |
19777 | subsequent checks easier. */ |
19778 | for (i = 0; i < nelt; ++i) |
19779 | { |
19780 | rtx er = XVECEXP (par, 0, i); |
19781 | unsigned HOST_WIDE_INT ei; |
19782 | |
19783 | if (!CONST_INT_P (er)) |
19784 | return 0; |
19785 | ei = INTVAL (er); |
19786 | if (ei >= nelt) |
19787 | return 0; |
19788 | ipar[i] = ei; |
19789 | } |
19790 | |
19791 | switch (mode) |
19792 | { |
19793 | case E_V8DFmode: |
19794 | /* In the 512-bit DFmode case, we can only move elements within |
19795 | a 128-bit lane. First fill the second part of the mask, |
19796 | then fallthru. */ |
19797 | for (i = 4; i < 6; ++i) |
19798 | { |
19799 | if (ipar[i] < 4 || ipar[i] >= 6) |
19800 | return 0; |
19801 | mask |= (ipar[i] - 4) << i; |
19802 | } |
19803 | for (i = 6; i < 8; ++i) |
19804 | { |
19805 | if (ipar[i] < 6) |
19806 | return 0; |
19807 | mask |= (ipar[i] - 6) << i; |
19808 | } |
19809 | /* FALLTHRU */ |
19810 | |
19811 | case E_V4DFmode: |
19812 | /* In the 256-bit DFmode case, we can only move elements within |
19813 | a 128-bit lane. */ |
19814 | for (i = 0; i < 2; ++i) |
19815 | { |
19816 | if (ipar[i] >= 2) |
19817 | return 0; |
19818 | mask |= ipar[i] << i; |
19819 | } |
19820 | for (i = 2; i < 4; ++i) |
19821 | { |
19822 | if (ipar[i] < 2) |
19823 | return 0; |
19824 | mask |= (ipar[i] - 2) << i; |
19825 | } |
19826 | break; |
19827 | |
19828 | case E_V16SFmode: |
19829 | /* In 512 bit SFmode case, permutation in the upper 256 bits |
19830 | must mirror the permutation in the lower 256-bits. */ |
19831 | for (i = 0; i < 8; ++i) |
19832 | if (ipar[i] + 8 != ipar[i + 8]) |
19833 | return 0; |
19834 | /* FALLTHRU */ |
19835 | |
19836 | case E_V8SFmode: |
19837 | /* In 256 bit SFmode case, we have full freedom of |
19838 | movement within the low 128-bit lane, but the high 128-bit |
19839 | lane must mirror the exact same pattern. */ |
19840 | for (i = 0; i < 4; ++i) |
19841 | if (ipar[i] + 4 != ipar[i + 4]) |
19842 | return 0; |
19843 | nelt = 4; |
19844 | /* FALLTHRU */ |
19845 | |
19846 | case E_V2DFmode: |
19847 | case E_V4SFmode: |
19848 | /* In the 128-bit case, we've full freedom in the placement of |
19849 | the elements from the source operand. */ |
19850 | for (i = 0; i < nelt; ++i) |
19851 | mask |= ipar[i] << (i * (nelt / 2)); |
19852 | break; |
19853 | |
19854 | default: |
19855 | gcc_unreachable (); |
19856 | } |
19857 | |
19858 | /* Make sure success has a non-zero value by adding one. */ |
19859 | return mask + 1; |
19860 | } |
19861 | |
19862 | /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by |
19863 | the expansion functions to turn the parallel back into a mask. |
19864 | The return value is 0 for no match and the imm8+1 for a match. */ |
19865 | |
19866 | int |
19867 | avx_vperm2f128_parallel (rtx par, machine_mode mode) |
19868 | { |
19869 | unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; |
19870 | unsigned mask = 0; |
19871 | unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ |
19872 | |
19873 | if (XVECLEN (par, 0) != (int) nelt) |
19874 | return 0; |
19875 | |
19876 | /* Validate that all of the elements are constants, and not totally |
19877 | out of range. Copy the data into an integral array to make the |
19878 | subsequent checks easier. */ |
19879 | for (i = 0; i < nelt; ++i) |
19880 | { |
19881 | rtx er = XVECEXP (par, 0, i); |
19882 | unsigned HOST_WIDE_INT ei; |
19883 | |
19884 | if (!CONST_INT_P (er)) |
19885 | return 0; |
19886 | ei = INTVAL (er); |
19887 | if (ei >= 2 * nelt) |
19888 | return 0; |
19889 | ipar[i] = ei; |
19890 | } |
19891 | |
19892 | /* Validate that the halves of the permute are halves. */ |
19893 | for (i = 0; i < nelt2 - 1; ++i) |
19894 | if (ipar[i] + 1 != ipar[i + 1]) |
19895 | return 0; |
19896 | for (i = nelt2; i < nelt - 1; ++i) |
19897 | if (ipar[i] + 1 != ipar[i + 1]) |
19898 | return 0; |
19899 | |
19900 | /* Reconstruct the mask. */ |
19901 | for (i = 0; i < 2; ++i) |
19902 | { |
19903 | unsigned e = ipar[i * nelt2]; |
19904 | if (e % nelt2) |
19905 | return 0; |
19906 | e /= nelt2; |
19907 | mask |= e << (i * 4); |
19908 | } |
19909 | |
19910 | /* Make sure success has a non-zero value by adding one. */ |
19911 | return mask + 1; |
19912 | } |
19913 | |
19914 | /* Return a mask of VPTERNLOG operands that do not affect output. */ |
19915 | |
19916 | int |
19917 | vpternlog_redundant_operand_mask (rtx pternlog_imm) |
19918 | { |
19919 | int mask = 0; |
19920 | int imm8 = INTVAL (pternlog_imm); |
19921 | |
19922 | if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F)) |
19923 | mask |= 1; |
19924 | if (((imm8 >> 2) & 0x33) == (imm8 & 0x33)) |
19925 | mask |= 2; |
19926 | if (((imm8 >> 1) & 0x55) == (imm8 & 0x55)) |
19927 | mask |= 4; |
19928 | |
19929 | return mask; |
19930 | } |
19931 | |
19932 | /* Eliminate false dependencies on operands that do not affect output |
19933 | by substituting other operands of a VPTERNLOG. */ |
19934 | |
19935 | void |
19936 | substitute_vpternlog_operands (rtx *operands) |
19937 | { |
19938 | int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]); |
19939 | |
19940 | if (mask & 1) /* The first operand is redundant. */ |
19941 | operands[1] = operands[2]; |
19942 | |
19943 | if (mask & 2) /* The second operand is redundant. */ |
19944 | operands[2] = operands[1]; |
19945 | |
19946 | if (mask & 4) /* The third operand is redundant. */ |
19947 | operands[3] = operands[1]; |
19948 | else if (REG_P (operands[3])) |
19949 | { |
19950 | if (mask & 1) |
19951 | operands[1] = operands[3]; |
19952 | if (mask & 2) |
19953 | operands[2] = operands[3]; |
19954 | } |
19955 | } |
19956 | |
19957 | /* Return a register priority for hard reg REGNO. */ |
19958 | static int |
19959 | ix86_register_priority (int hard_regno) |
19960 | { |
19961 | /* ebp and r13 as the base always wants a displacement, r12 as the |
19962 | base always wants an index. So discourage their usage in an |
19963 | address. */ |
19964 | if (hard_regno == R12_REG || hard_regno == R13_REG) |
19965 | return 0; |
19966 | if (hard_regno == BP_REG) |
19967 | return 1; |
19968 | /* New x86-64 int registers result in bigger code size. Discourage them. */ |
19969 | if (REX_INT_REGNO_P (hard_regno)) |
19970 | return 2; |
19971 | if (REX2_INT_REGNO_P (hard_regno)) |
19972 | return 2; |
19973 | /* New x86-64 SSE registers result in bigger code size. Discourage them. */ |
19974 | if (REX_SSE_REGNO_P (hard_regno)) |
19975 | return 2; |
19976 | if (EXT_REX_SSE_REGNO_P (hard_regno)) |
19977 | return 1; |
19978 | /* Usage of AX register results in smaller code. Prefer it. */ |
19979 | if (hard_regno == AX_REG) |
19980 | return 4; |
19981 | return 3; |
19982 | } |
19983 | |
19984 | /* Implement TARGET_PREFERRED_RELOAD_CLASS. |
19985 | |
19986 | Put float CONST_DOUBLE in the constant pool instead of fp regs. |
19987 | QImode must go into class Q_REGS. |
19988 | Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and |
19989 | movdf to do mem-to-mem moves through integer regs. */ |
19990 | |
19991 | static reg_class_t |
19992 | ix86_preferred_reload_class (rtx x, reg_class_t regclass) |
19993 | { |
19994 | machine_mode mode = GET_MODE (x); |
19995 | |
19996 | /* We're only allowed to return a subclass of CLASS. Many of the |
19997 | following checks fail for NO_REGS, so eliminate that early. */ |
19998 | if (regclass == NO_REGS) |
19999 | return NO_REGS; |
20000 | |
20001 | /* All classes can load zeros. */ |
20002 | if (x == CONST0_RTX (mode)) |
20003 | return regclass; |
20004 | |
20005 | /* Force constants into memory if we are loading a (nonzero) constant into |
20006 | an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK |
20007 | instructions to load from a constant. */ |
20008 | if (CONSTANT_P (x) |
20009 | && (MAYBE_MMX_CLASS_P (regclass) |
20010 | || MAYBE_SSE_CLASS_P (regclass) |
20011 | || MAYBE_MASK_CLASS_P (regclass))) |
20012 | return NO_REGS; |
20013 | |
20014 | /* Floating-point constants need more complex checks. */ |
20015 | if (CONST_DOUBLE_P (x)) |
20016 | { |
20017 | /* General regs can load everything. */ |
20018 | if (INTEGER_CLASS_P (regclass)) |
20019 | return regclass; |
20020 | |
20021 | /* Floats can load 0 and 1 plus some others. Note that we eliminated |
20022 | zero above. We only want to wind up preferring 80387 registers if |
20023 | we plan on doing computation with them. */ |
20024 | if (IS_STACK_MODE (mode) |
20025 | && standard_80387_constant_p (x) > 0) |
20026 | { |
20027 | /* Limit class to FP regs. */ |
20028 | if (FLOAT_CLASS_P (regclass)) |
20029 | return FLOAT_REGS; |
20030 | } |
20031 | |
20032 | return NO_REGS; |
20033 | } |
20034 | |
20035 | /* Prefer SSE if we can use them for math. Also allow integer regs |
20036 | when moves between register units are cheap. */ |
20037 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
20038 | { |
20039 | if (TARGET_INTER_UNIT_MOVES_FROM_VEC |
20040 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
20041 | && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode)) |
20042 | return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
20043 | else |
20044 | return SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
20045 | } |
20046 | |
20047 | /* Generally when we see PLUS here, it's the function invariant |
20048 | (plus soft-fp const_int). Which can only be computed into general |
20049 | regs. */ |
20050 | if (GET_CODE (x) == PLUS) |
20051 | return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; |
20052 | |
20053 | /* QImode constants are easy to load, but non-constant QImode data |
20054 | must go into Q_REGS or ALL_MASK_REGS. */ |
20055 | if (GET_MODE (x) == QImode && !CONSTANT_P (x)) |
20056 | { |
20057 | if (Q_CLASS_P (regclass)) |
20058 | return regclass; |
20059 | else if (reg_class_subset_p (Q_REGS, regclass)) |
20060 | return Q_REGS; |
20061 | else if (MASK_CLASS_P (regclass)) |
20062 | return regclass; |
20063 | else |
20064 | return NO_REGS; |
20065 | } |
20066 | |
20067 | return regclass; |
20068 | } |
20069 | |
20070 | /* Discourage putting floating-point values in SSE registers unless |
20071 | SSE math is being used, and likewise for the 387 registers. */ |
20072 | static reg_class_t |
20073 | ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) |
20074 | { |
20075 | /* Restrict the output reload class to the register bank that we are doing |
20076 | math on. If we would like not to return a subset of CLASS, reject this |
20077 | alternative: if reload cannot do this, it will still use its choice. */ |
20078 | machine_mode mode = GET_MODE (x); |
20079 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
20080 | return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; |
20081 | |
20082 | if (IS_STACK_MODE (mode)) |
20083 | return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; |
20084 | |
20085 | return regclass; |
20086 | } |
20087 | |
20088 | static reg_class_t |
20089 | ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, |
20090 | machine_mode mode, secondary_reload_info *sri) |
20091 | { |
20092 | /* Double-word spills from general registers to non-offsettable memory |
20093 | references (zero-extended addresses) require special handling. */ |
20094 | if (TARGET_64BIT |
20095 | && MEM_P (x) |
20096 | && GET_MODE_SIZE (mode) > UNITS_PER_WORD |
20097 | && INTEGER_CLASS_P (rclass) |
20098 | && !offsettable_memref_p (x)) |
20099 | { |
20100 | sri->icode = (in_p |
20101 | ? CODE_FOR_reload_noff_load |
20102 | : CODE_FOR_reload_noff_store); |
20103 | /* Add the cost of moving address to a temporary. */ |
20104 | sri->extra_cost = 1; |
20105 | |
20106 | return NO_REGS; |
20107 | } |
20108 | |
20109 | /* QImode spills from non-QI registers require |
20110 | intermediate register on 32bit targets. */ |
20111 | if (mode == QImode |
20112 | && ((!TARGET_64BIT && !in_p |
20113 | && INTEGER_CLASS_P (rclass) |
20114 | && MAYBE_NON_Q_CLASS_P (rclass)) |
20115 | || (!TARGET_AVX512DQ |
20116 | && MAYBE_MASK_CLASS_P (rclass)))) |
20117 | { |
20118 | int regno = true_regnum (x); |
20119 | |
20120 | /* Return Q_REGS if the operand is in memory. */ |
20121 | if (regno == -1) |
20122 | return Q_REGS; |
20123 | |
20124 | return NO_REGS; |
20125 | } |
20126 | |
20127 | /* Require movement to gpr, and then store to memory. */ |
20128 | if ((mode == HFmode || mode == HImode || mode == V2QImode |
20129 | || mode == BFmode) |
20130 | && !TARGET_SSE4_1 |
20131 | && SSE_CLASS_P (rclass) |
20132 | && !in_p && MEM_P (x)) |
20133 | { |
20134 | sri->extra_cost = 1; |
20135 | return GENERAL_REGS; |
20136 | } |
20137 | |
20138 | /* This condition handles corner case where an expression involving |
20139 | pointers gets vectorized. We're trying to use the address of a |
20140 | stack slot as a vector initializer. |
20141 | |
20142 | (set (reg:V2DI 74 [ vect_cst_.2 ]) |
20143 | (vec_duplicate:V2DI (reg/f:DI 20 frame))) |
20144 | |
20145 | Eventually frame gets turned into sp+offset like this: |
20146 | |
20147 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20148 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
20149 | (const_int 392 [0x188])))) |
20150 | |
20151 | That later gets turned into: |
20152 | |
20153 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20154 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
20155 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) |
20156 | |
20157 | We'll have the following reload recorded: |
20158 | |
20159 | Reload 0: reload_in (DI) = |
20160 | (plus:DI (reg/f:DI 7 sp) |
20161 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) |
20162 | reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20163 | SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine |
20164 | reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) |
20165 | reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20166 | reload_reg_rtx: (reg:V2DI 22 xmm1) |
20167 | |
20168 | Which isn't going to work since SSE instructions can't handle scalar |
20169 | additions. Returning GENERAL_REGS forces the addition into integer |
20170 | register and reload can handle subsequent reloads without problems. */ |
20171 | |
20172 | if (in_p && GET_CODE (x) == PLUS |
20173 | && SSE_CLASS_P (rclass) |
20174 | && SCALAR_INT_MODE_P (mode)) |
20175 | return GENERAL_REGS; |
20176 | |
20177 | return NO_REGS; |
20178 | } |
20179 | |
20180 | /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ |
20181 | |
20182 | static bool |
20183 | ix86_class_likely_spilled_p (reg_class_t rclass) |
20184 | { |
20185 | switch (rclass) |
20186 | { |
20187 | case AREG: |
20188 | case DREG: |
20189 | case CREG: |
20190 | case BREG: |
20191 | case AD_REGS: |
20192 | case SIREG: |
20193 | case DIREG: |
20194 | case SSE_FIRST_REG: |
20195 | case FP_TOP_REG: |
20196 | case FP_SECOND_REG: |
20197 | return true; |
20198 | |
20199 | default: |
20200 | break; |
20201 | } |
20202 | |
20203 | return false; |
20204 | } |
20205 | |
20206 | /* Return true if a set of DST by the expression SRC should be allowed. |
20207 | This prevents complex sets of likely_spilled hard regs before reload. */ |
20208 | |
20209 | bool |
20210 | ix86_hardreg_mov_ok (rtx dst, rtx src) |
20211 | { |
20212 | /* Avoid complex sets of likely_spilled hard registers before reload. */ |
20213 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
20214 | && !REG_P (src) && !MEM_P (src) |
20215 | && !(VECTOR_MODE_P (GET_MODE (dst)) |
20216 | ? standard_sse_constant_p (x: src, GET_MODE (dst)) |
20217 | : x86_64_immediate_operand (src, GET_MODE (dst))) |
20218 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))) |
20219 | && !reload_completed) |
20220 | return false; |
20221 | return true; |
20222 | } |
20223 | |
20224 | /* If we are copying between registers from different register sets |
20225 | (e.g. FP and integer), we may need a memory location. |
20226 | |
20227 | The function can't work reliably when one of the CLASSES is a class |
20228 | containing registers from multiple sets. We avoid this by never combining |
20229 | different sets in a single alternative in the machine description. |
20230 | Ensure that this constraint holds to avoid unexpected surprises. |
20231 | |
20232 | When STRICT is false, we are being called from REGISTER_MOVE_COST, |
20233 | so do not enforce these sanity checks. |
20234 | |
20235 | To optimize register_move_cost performance, define inline variant. */ |
20236 | |
20237 | static inline bool |
20238 | inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
20239 | reg_class_t class2, int strict) |
20240 | { |
20241 | if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) |
20242 | return false; |
20243 | |
20244 | if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) |
20245 | || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) |
20246 | || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) |
20247 | || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) |
20248 | || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) |
20249 | || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) |
20250 | || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) |
20251 | || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) |
20252 | { |
20253 | gcc_assert (!strict || lra_in_progress); |
20254 | return true; |
20255 | } |
20256 | |
20257 | if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) |
20258 | return true; |
20259 | |
20260 | /* ??? This is a lie. We do have moves between mmx/general, and for |
20261 | mmx/sse2. But by saying we need secondary memory we discourage the |
20262 | register allocator from using the mmx registers unless needed. */ |
20263 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
20264 | return true; |
20265 | |
20266 | /* Between mask and general, we have moves no larger than word size. */ |
20267 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
20268 | { |
20269 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) |
20270 | || GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
20271 | return true; |
20272 | } |
20273 | |
20274 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
20275 | { |
20276 | /* SSE1 doesn't have any direct moves from other classes. */ |
20277 | if (!TARGET_SSE2) |
20278 | return true; |
20279 | |
20280 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))) |
20281 | return true; |
20282 | |
20283 | int msize = GET_MODE_SIZE (mode); |
20284 | |
20285 | /* Between SSE and general, we have moves no larger than word size. */ |
20286 | if (msize > UNITS_PER_WORD) |
20287 | return true; |
20288 | |
20289 | /* In addition to SImode moves, HImode moves are supported for SSE2 and above, |
20290 | Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */ |
20291 | int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode); |
20292 | |
20293 | if (msize < minsize) |
20294 | return true; |
20295 | |
20296 | /* If the target says that inter-unit moves are more expensive |
20297 | than moving through memory, then don't generate them. */ |
20298 | if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) |
20299 | || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) |
20300 | return true; |
20301 | } |
20302 | |
20303 | return false; |
20304 | } |
20305 | |
20306 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ |
20307 | |
20308 | static bool |
20309 | ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
20310 | reg_class_t class2) |
20311 | { |
20312 | return inline_secondary_memory_needed (mode, class1, class2, strict: true); |
20313 | } |
20314 | |
20315 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. |
20316 | |
20317 | get_secondary_mem widens integral modes to BITS_PER_WORD. |
20318 | There is no need to emit full 64 bit move on 64 bit targets |
20319 | for integral modes that can be moved using 32 bit move. */ |
20320 | |
20321 | static machine_mode |
20322 | ix86_secondary_memory_needed_mode (machine_mode mode) |
20323 | { |
20324 | if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) |
20325 | return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); |
20326 | return mode; |
20327 | } |
20328 | |
20329 | /* Implement the TARGET_CLASS_MAX_NREGS hook. |
20330 | |
20331 | On the 80386, this is the size of MODE in words, |
20332 | except in the FP regs, where a single reg is always enough. */ |
20333 | |
20334 | static unsigned char |
20335 | ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) |
20336 | { |
20337 | if (MAYBE_INTEGER_CLASS_P (rclass)) |
20338 | { |
20339 | if (mode == XFmode) |
20340 | return (TARGET_64BIT ? 2 : 3); |
20341 | else if (mode == XCmode) |
20342 | return (TARGET_64BIT ? 4 : 6); |
20343 | else |
20344 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
20345 | } |
20346 | else |
20347 | { |
20348 | if (COMPLEX_MODE_P (mode)) |
20349 | return 2; |
20350 | else |
20351 | return 1; |
20352 | } |
20353 | } |
20354 | |
20355 | /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ |
20356 | |
20357 | static bool |
20358 | ix86_can_change_mode_class (machine_mode from, machine_mode to, |
20359 | reg_class_t regclass) |
20360 | { |
20361 | if (from == to) |
20362 | return true; |
20363 | |
20364 | /* x87 registers can't do subreg at all, as all values are reformatted |
20365 | to extended precision. */ |
20366 | if (MAYBE_FLOAT_CLASS_P (regclass)) |
20367 | return false; |
20368 | |
20369 | if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) |
20370 | { |
20371 | /* Vector registers do not support QI or HImode loads. If we don't |
20372 | disallow a change to these modes, reload will assume it's ok to |
20373 | drop the subreg from (subreg:SI (reg:HI 100) 0). This affects |
20374 | the vec_dupv4hi pattern. |
20375 | NB: SSE2 can load 16bit data to sse register via pinsrw. */ |
20376 | int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4; |
20377 | if (GET_MODE_SIZE (from) < mov_size |
20378 | || GET_MODE_SIZE (to) < mov_size) |
20379 | return false; |
20380 | } |
20381 | |
20382 | return true; |
20383 | } |
20384 | |
20385 | /* Return index of MODE in the sse load/store tables. */ |
20386 | |
20387 | static inline int |
20388 | sse_store_index (machine_mode mode) |
20389 | { |
20390 | /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store |
20391 | costs to processor_costs, which requires changes to all entries in |
20392 | processor cost table. */ |
20393 | if (mode == E_HFmode) |
20394 | mode = E_SFmode; |
20395 | |
20396 | switch (GET_MODE_SIZE (mode)) |
20397 | { |
20398 | case 4: |
20399 | return 0; |
20400 | case 8: |
20401 | return 1; |
20402 | case 16: |
20403 | return 2; |
20404 | case 32: |
20405 | return 3; |
20406 | case 64: |
20407 | return 4; |
20408 | default: |
20409 | return -1; |
20410 | } |
20411 | } |
20412 | |
20413 | /* Return the cost of moving data of mode M between a |
20414 | register and memory. A value of 2 is the default; this cost is |
20415 | relative to those in `REGISTER_MOVE_COST'. |
20416 | |
20417 | This function is used extensively by register_move_cost that is used to |
20418 | build tables at startup. Make it inline in this case. |
20419 | When IN is 2, return maximum of in and out move cost. |
20420 | |
20421 | If moving between registers and memory is more expensive than |
20422 | between two registers, you should define this macro to express the |
20423 | relative cost. |
20424 | |
20425 | Model also increased moving costs of QImode registers in non |
20426 | Q_REGS classes. |
20427 | */ |
20428 | static inline int |
20429 | inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) |
20430 | { |
20431 | int cost; |
20432 | |
20433 | if (FLOAT_CLASS_P (regclass)) |
20434 | { |
20435 | int index; |
20436 | switch (mode) |
20437 | { |
20438 | case E_SFmode: |
20439 | index = 0; |
20440 | break; |
20441 | case E_DFmode: |
20442 | index = 1; |
20443 | break; |
20444 | case E_XFmode: |
20445 | index = 2; |
20446 | break; |
20447 | default: |
20448 | return 100; |
20449 | } |
20450 | if (in == 2) |
20451 | return MAX (ix86_cost->hard_register.fp_load [index], |
20452 | ix86_cost->hard_register.fp_store [index]); |
20453 | return in ? ix86_cost->hard_register.fp_load [index] |
20454 | : ix86_cost->hard_register.fp_store [index]; |
20455 | } |
20456 | if (SSE_CLASS_P (regclass)) |
20457 | { |
20458 | int index = sse_store_index (mode); |
20459 | if (index == -1) |
20460 | return 100; |
20461 | if (in == 2) |
20462 | return MAX (ix86_cost->hard_register.sse_load [index], |
20463 | ix86_cost->hard_register.sse_store [index]); |
20464 | return in ? ix86_cost->hard_register.sse_load [index] |
20465 | : ix86_cost->hard_register.sse_store [index]; |
20466 | } |
20467 | if (MASK_CLASS_P (regclass)) |
20468 | { |
20469 | int index; |
20470 | switch (GET_MODE_SIZE (mode)) |
20471 | { |
20472 | case 1: |
20473 | index = 0; |
20474 | break; |
20475 | case 2: |
20476 | index = 1; |
20477 | break; |
20478 | /* DImode loads and stores assumed to cost the same as SImode. */ |
20479 | case 4: |
20480 | case 8: |
20481 | index = 2; |
20482 | break; |
20483 | default: |
20484 | return 100; |
20485 | } |
20486 | |
20487 | if (in == 2) |
20488 | return MAX (ix86_cost->hard_register.mask_load[index], |
20489 | ix86_cost->hard_register.mask_store[index]); |
20490 | return in ? ix86_cost->hard_register.mask_load[2] |
20491 | : ix86_cost->hard_register.mask_store[2]; |
20492 | } |
20493 | if (MMX_CLASS_P (regclass)) |
20494 | { |
20495 | int index; |
20496 | switch (GET_MODE_SIZE (mode)) |
20497 | { |
20498 | case 4: |
20499 | index = 0; |
20500 | break; |
20501 | case 8: |
20502 | index = 1; |
20503 | break; |
20504 | default: |
20505 | return 100; |
20506 | } |
20507 | if (in == 2) |
20508 | return MAX (ix86_cost->hard_register.mmx_load [index], |
20509 | ix86_cost->hard_register.mmx_store [index]); |
20510 | return in ? ix86_cost->hard_register.mmx_load [index] |
20511 | : ix86_cost->hard_register.mmx_store [index]; |
20512 | } |
20513 | switch (GET_MODE_SIZE (mode)) |
20514 | { |
20515 | case 1: |
20516 | if (Q_CLASS_P (regclass) || TARGET_64BIT) |
20517 | { |
20518 | if (!in) |
20519 | return ix86_cost->hard_register.int_store[0]; |
20520 | if (TARGET_PARTIAL_REG_DEPENDENCY |
20521 | && optimize_function_for_speed_p (cfun)) |
20522 | cost = ix86_cost->hard_register.movzbl_load; |
20523 | else |
20524 | cost = ix86_cost->hard_register.int_load[0]; |
20525 | if (in == 2) |
20526 | return MAX (cost, ix86_cost->hard_register.int_store[0]); |
20527 | return cost; |
20528 | } |
20529 | else |
20530 | { |
20531 | if (in == 2) |
20532 | return MAX (ix86_cost->hard_register.movzbl_load, |
20533 | ix86_cost->hard_register.int_store[0] + 4); |
20534 | if (in) |
20535 | return ix86_cost->hard_register.movzbl_load; |
20536 | else |
20537 | return ix86_cost->hard_register.int_store[0] + 4; |
20538 | } |
20539 | break; |
20540 | case 2: |
20541 | { |
20542 | int cost; |
20543 | if (in == 2) |
20544 | cost = MAX (ix86_cost->hard_register.int_load[1], |
20545 | ix86_cost->hard_register.int_store[1]); |
20546 | else |
20547 | cost = in ? ix86_cost->hard_register.int_load[1] |
20548 | : ix86_cost->hard_register.int_store[1]; |
20549 | |
20550 | if (mode == E_HFmode) |
20551 | { |
20552 | /* Prefer SSE over GPR for HFmode. */ |
20553 | int sse_cost; |
20554 | int index = sse_store_index (mode); |
20555 | if (in == 2) |
20556 | sse_cost = MAX (ix86_cost->hard_register.sse_load[index], |
20557 | ix86_cost->hard_register.sse_store[index]); |
20558 | else |
20559 | sse_cost = (in |
20560 | ? ix86_cost->hard_register.sse_load [index] |
20561 | : ix86_cost->hard_register.sse_store [index]); |
20562 | if (sse_cost >= cost) |
20563 | cost = sse_cost + 1; |
20564 | } |
20565 | return cost; |
20566 | } |
20567 | default: |
20568 | if (in == 2) |
20569 | cost = MAX (ix86_cost->hard_register.int_load[2], |
20570 | ix86_cost->hard_register.int_store[2]); |
20571 | else if (in) |
20572 | cost = ix86_cost->hard_register.int_load[2]; |
20573 | else |
20574 | cost = ix86_cost->hard_register.int_store[2]; |
20575 | /* Multiply with the number of GPR moves needed. */ |
20576 | return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); |
20577 | } |
20578 | } |
20579 | |
20580 | static int |
20581 | ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) |
20582 | { |
20583 | return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0); |
20584 | } |
20585 | |
20586 | |
20587 | /* Return the cost of moving data from a register in class CLASS1 to |
20588 | one in class CLASS2. |
20589 | |
20590 | It is not required that the cost always equal 2 when FROM is the same as TO; |
20591 | on some machines it is expensive to move between registers if they are not |
20592 | general registers. */ |
20593 | |
20594 | static int |
20595 | ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, |
20596 | reg_class_t class2_i) |
20597 | { |
20598 | enum reg_class class1 = (enum reg_class) class1_i; |
20599 | enum reg_class class2 = (enum reg_class) class2_i; |
20600 | |
20601 | /* In case we require secondary memory, compute cost of the store followed |
20602 | by load. In order to avoid bad register allocation choices, we need |
20603 | for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ |
20604 | |
20605 | if (inline_secondary_memory_needed (mode, class1, class2, strict: false)) |
20606 | { |
20607 | int cost = 1; |
20608 | |
20609 | cost += inline_memory_move_cost (mode, regclass: class1, in: 2); |
20610 | cost += inline_memory_move_cost (mode, regclass: class2, in: 2); |
20611 | |
20612 | /* In case of copying from general_purpose_register we may emit multiple |
20613 | stores followed by single load causing memory size mismatch stall. |
20614 | Count this as arbitrarily high cost of 20. */ |
20615 | if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD |
20616 | && TARGET_MEMORY_MISMATCH_STALL |
20617 | && targetm.class_max_nregs (class1, mode) |
20618 | > targetm.class_max_nregs (class2, mode)) |
20619 | cost += 20; |
20620 | |
20621 | /* In the case of FP/MMX moves, the registers actually overlap, and we |
20622 | have to switch modes in order to treat them differently. */ |
20623 | if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) |
20624 | || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) |
20625 | cost += 20; |
20626 | |
20627 | return cost; |
20628 | } |
20629 | |
20630 | /* Moves between MMX and non-MMX units require secondary memory. */ |
20631 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
20632 | gcc_unreachable (); |
20633 | |
20634 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
20635 | return (SSE_CLASS_P (class1) |
20636 | ? ix86_cost->hard_register.sse_to_integer |
20637 | : ix86_cost->hard_register.integer_to_sse); |
20638 | |
20639 | /* Moves between mask register and GPR. */ |
20640 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
20641 | { |
20642 | return (MASK_CLASS_P (class1) |
20643 | ? ix86_cost->hard_register.mask_to_integer |
20644 | : ix86_cost->hard_register.integer_to_mask); |
20645 | } |
20646 | /* Moving between mask registers. */ |
20647 | if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2)) |
20648 | return ix86_cost->hard_register.mask_move; |
20649 | |
20650 | if (MAYBE_FLOAT_CLASS_P (class1)) |
20651 | return ix86_cost->hard_register.fp_move; |
20652 | if (MAYBE_SSE_CLASS_P (class1)) |
20653 | { |
20654 | if (GET_MODE_BITSIZE (mode) <= 128) |
20655 | return ix86_cost->hard_register.xmm_move; |
20656 | if (GET_MODE_BITSIZE (mode) <= 256) |
20657 | return ix86_cost->hard_register.ymm_move; |
20658 | return ix86_cost->hard_register.zmm_move; |
20659 | } |
20660 | if (MAYBE_MMX_CLASS_P (class1)) |
20661 | return ix86_cost->hard_register.mmx_move; |
20662 | return 2; |
20663 | } |
20664 | |
20665 | /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in |
20666 | words of a value of mode MODE but can be less for certain modes in |
20667 | special long registers. |
20668 | |
20669 | Actually there are no two word move instructions for consecutive |
20670 | registers. And only registers 0-3 may have mov byte instructions |
20671 | applied to them. */ |
20672 | |
20673 | static unsigned int |
20674 | ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) |
20675 | { |
20676 | if (GENERAL_REGNO_P (regno)) |
20677 | { |
20678 | if (mode == XFmode) |
20679 | return TARGET_64BIT ? 2 : 3; |
20680 | if (mode == XCmode) |
20681 | return TARGET_64BIT ? 4 : 6; |
20682 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
20683 | } |
20684 | if (COMPLEX_MODE_P (mode)) |
20685 | return 2; |
20686 | /* Register pair for mask registers. */ |
20687 | if (mode == P2QImode || mode == P2HImode) |
20688 | return 2; |
20689 | if (mode == V64SFmode || mode == V64SImode) |
20690 | return 4; |
20691 | return 1; |
20692 | } |
20693 | |
20694 | /* Implement REGMODE_NATURAL_SIZE(MODE). */ |
20695 | unsigned int |
20696 | ix86_regmode_natural_size (machine_mode mode) |
20697 | { |
20698 | if (mode == P2HImode || mode == P2QImode) |
20699 | return GET_MODE_SIZE (mode) / 2; |
20700 | return UNITS_PER_WORD; |
20701 | } |
20702 | |
20703 | /* Implement TARGET_HARD_REGNO_MODE_OK. */ |
20704 | |
20705 | static bool |
20706 | ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) |
20707 | { |
20708 | /* Flags and only flags can only hold CCmode values. */ |
20709 | if (CC_REGNO_P (regno)) |
20710 | return GET_MODE_CLASS (mode) == MODE_CC; |
20711 | if (GET_MODE_CLASS (mode) == MODE_CC |
20712 | || GET_MODE_CLASS (mode) == MODE_RANDOM) |
20713 | return false; |
20714 | if (STACK_REGNO_P (regno)) |
20715 | return VALID_FP_MODE_P (mode); |
20716 | if (MASK_REGNO_P (regno)) |
20717 | { |
20718 | /* Register pair only starts at even register number. */ |
20719 | if ((mode == P2QImode || mode == P2HImode)) |
20720 | return MASK_PAIR_REGNO_P(regno); |
20721 | |
20722 | return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode)) |
20723 | || (TARGET_AVX512BW && mode == SImode) |
20724 | || (TARGET_AVX512BW && TARGET_EVEX512 && mode == DImode)); |
20725 | } |
20726 | |
20727 | if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) |
20728 | return false; |
20729 | |
20730 | if (SSE_REGNO_P (regno)) |
20731 | { |
20732 | /* We implement the move patterns for all vector modes into and |
20733 | out of SSE registers, even when no operation instructions |
20734 | are available. */ |
20735 | |
20736 | /* For AVX-512 we allow, regardless of regno: |
20737 | - XI mode |
20738 | - any of 512-bit wide vector mode |
20739 | - any scalar mode. */ |
20740 | if (TARGET_AVX512F |
20741 | && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512) |
20742 | || VALID_AVX512F_SCALAR_MODE (mode))) |
20743 | return true; |
20744 | |
20745 | /* For AVX-5124FMAPS or AVX-5124VNNIW |
20746 | allow V64SF and V64SI modes for special regnos. */ |
20747 | if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW) |
20748 | && (mode == V64SFmode || mode == V64SImode) |
20749 | && MOD4_SSE_REGNO_P (regno)) |
20750 | return true; |
20751 | |
20752 | /* TODO check for QI/HI scalars. */ |
20753 | /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ |
20754 | if (TARGET_AVX512VL |
20755 | && (VALID_AVX256_REG_OR_OI_MODE (mode) |
20756 | || VALID_AVX512VL_128_REG_MODE (mode))) |
20757 | return true; |
20758 | |
20759 | /* xmm16-xmm31 are only available for AVX-512. */ |
20760 | if (EXT_REX_SSE_REGNO_P (regno)) |
20761 | return false; |
20762 | |
20763 | /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */ |
20764 | if (TARGET_SSE2 && mode == HImode) |
20765 | return true; |
20766 | |
20767 | /* OImode and AVX modes are available only when AVX is enabled. */ |
20768 | return ((TARGET_AVX |
20769 | && VALID_AVX256_REG_OR_OI_MODE (mode)) |
20770 | || VALID_SSE_REG_MODE (mode) |
20771 | || VALID_SSE2_REG_MODE (mode) |
20772 | || VALID_MMX_REG_MODE (mode) |
20773 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
20774 | } |
20775 | if (MMX_REGNO_P (regno)) |
20776 | { |
20777 | /* We implement the move patterns for 3DNOW modes even in MMX mode, |
20778 | so if the register is available at all, then we can move data of |
20779 | the given mode into or out of it. */ |
20780 | return (VALID_MMX_REG_MODE (mode) |
20781 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
20782 | } |
20783 | |
20784 | if (mode == QImode) |
20785 | { |
20786 | /* Take care for QImode values - they can be in non-QI regs, |
20787 | but then they do cause partial register stalls. */ |
20788 | if (ANY_QI_REGNO_P (regno)) |
20789 | return true; |
20790 | if (!TARGET_PARTIAL_REG_STALL) |
20791 | return true; |
20792 | /* LRA checks if the hard register is OK for the given mode. |
20793 | QImode values can live in non-QI regs, so we allow all |
20794 | registers here. */ |
20795 | if (lra_in_progress) |
20796 | return true; |
20797 | return !can_create_pseudo_p (); |
20798 | } |
20799 | /* We handle both integer and floats in the general purpose registers. */ |
20800 | else if (VALID_INT_MODE_P (mode) |
20801 | || VALID_FP_MODE_P (mode)) |
20802 | return true; |
20803 | /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go |
20804 | on to use that value in smaller contexts, this can easily force a |
20805 | pseudo to be allocated to GENERAL_REGS. Since this is no worse than |
20806 | supporting DImode, allow it. */ |
20807 | else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) |
20808 | return true; |
20809 | |
20810 | return false; |
20811 | } |
20812 | |
20813 | /* Implement TARGET_INSN_CALLEE_ABI. */ |
20814 | |
20815 | const predefined_function_abi & |
20816 | ix86_insn_callee_abi (const rtx_insn *insn) |
20817 | { |
20818 | unsigned int abi_id = 0; |
20819 | rtx pat = PATTERN (insn); |
20820 | if (vzeroupper_pattern (pat, VOIDmode)) |
20821 | abi_id = ABI_VZEROUPPER; |
20822 | |
20823 | return function_abis[abi_id]; |
20824 | } |
20825 | |
20826 | /* Initialize function_abis with corresponding abi_id, |
20827 | currently only handle vzeroupper. */ |
20828 | void |
20829 | ix86_initialize_callee_abi (unsigned int abi_id) |
20830 | { |
20831 | gcc_assert (abi_id == ABI_VZEROUPPER); |
20832 | predefined_function_abi &vzeroupper_abi = function_abis[abi_id]; |
20833 | if (!vzeroupper_abi.initialized_p ()) |
20834 | { |
20835 | HARD_REG_SET full_reg_clobbers; |
20836 | CLEAR_HARD_REG_SET (set&: full_reg_clobbers); |
20837 | vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers); |
20838 | } |
20839 | } |
20840 | |
20841 | void |
20842 | ix86_expand_avx_vzeroupper (void) |
20843 | { |
20844 | /* Initialize vzeroupper_abi here. */ |
20845 | ix86_initialize_callee_abi (ABI_VZEROUPPER); |
20846 | rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ()); |
20847 | /* Return false for non-local goto in can_nonlocal_goto. */ |
20848 | make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN); |
20849 | /* Flag used for call_insn indicates it's a fake call. */ |
20850 | RTX_FLAG (insn, used) = 1; |
20851 | } |
20852 | |
20853 | |
20854 | /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that |
20855 | saves SSE registers across calls is Win64 (thus no need to check the |
20856 | current ABI here), and with AVX enabled Win64 only guarantees that |
20857 | the low 16 bytes are saved. */ |
20858 | |
20859 | static bool |
20860 | ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno, |
20861 | machine_mode mode) |
20862 | { |
20863 | /* Special ABI for vzeroupper which only clobber higher part of sse regs. */ |
20864 | if (abi_id == ABI_VZEROUPPER) |
20865 | return (GET_MODE_SIZE (mode) > 16 |
20866 | && ((TARGET_64BIT && REX_SSE_REGNO_P (regno)) |
20867 | || LEGACY_SSE_REGNO_P (regno))); |
20868 | |
20869 | return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; |
20870 | } |
20871 | |
20872 | /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a |
20873 | tieable integer mode. */ |
20874 | |
20875 | static bool |
20876 | ix86_tieable_integer_mode_p (machine_mode mode) |
20877 | { |
20878 | switch (mode) |
20879 | { |
20880 | case E_HImode: |
20881 | case E_SImode: |
20882 | return true; |
20883 | |
20884 | case E_QImode: |
20885 | return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; |
20886 | |
20887 | case E_DImode: |
20888 | return TARGET_64BIT; |
20889 | |
20890 | default: |
20891 | return false; |
20892 | } |
20893 | } |
20894 | |
20895 | /* Implement TARGET_MODES_TIEABLE_P. |
20896 | |
20897 | Return true if MODE1 is accessible in a register that can hold MODE2 |
20898 | without copying. That is, all register classes that can hold MODE2 |
20899 | can also hold MODE1. */ |
20900 | |
20901 | static bool |
20902 | ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) |
20903 | { |
20904 | if (mode1 == mode2) |
20905 | return true; |
20906 | |
20907 | if (ix86_tieable_integer_mode_p (mode: mode1) |
20908 | && ix86_tieable_integer_mode_p (mode: mode2)) |
20909 | return true; |
20910 | |
20911 | /* MODE2 being XFmode implies fp stack or general regs, which means we |
20912 | can tie any smaller floating point modes to it. Note that we do not |
20913 | tie this with TFmode. */ |
20914 | if (mode2 == XFmode) |
20915 | return mode1 == SFmode || mode1 == DFmode; |
20916 | |
20917 | /* MODE2 being DFmode implies fp stack, general or sse regs, which means |
20918 | that we can tie it with SFmode. */ |
20919 | if (mode2 == DFmode) |
20920 | return mode1 == SFmode; |
20921 | |
20922 | /* If MODE2 is only appropriate for an SSE register, then tie with |
20923 | any other mode acceptable to SSE registers. */ |
20924 | if (GET_MODE_SIZE (mode2) == 64 |
20925 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
20926 | return (GET_MODE_SIZE (mode1) == 64 |
20927 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1)); |
20928 | if (GET_MODE_SIZE (mode2) == 32 |
20929 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
20930 | return (GET_MODE_SIZE (mode1) == 32 |
20931 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1)); |
20932 | if (GET_MODE_SIZE (mode2) == 16 |
20933 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
20934 | return (GET_MODE_SIZE (mode1) == 16 |
20935 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1)); |
20936 | |
20937 | /* If MODE2 is appropriate for an MMX register, then tie |
20938 | with any other mode acceptable to MMX registers. */ |
20939 | if (GET_MODE_SIZE (mode2) == 8 |
20940 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2)) |
20941 | return (GET_MODE_SIZE (mode1) == 8 |
20942 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1)); |
20943 | |
20944 | /* SCmode and DImode can be tied. */ |
20945 | if ((mode1 == E_SCmode && mode2 == E_DImode) |
20946 | || (mode1 == E_DImode && mode2 == E_SCmode)) |
20947 | return TARGET_64BIT; |
20948 | |
20949 | /* [SD]Cmode and V2[SD]Fmode modes can be tied. */ |
20950 | if ((mode1 == E_SCmode && mode2 == E_V2SFmode) |
20951 | || (mode1 == E_V2SFmode && mode2 == E_SCmode) |
20952 | || (mode1 == E_DCmode && mode2 == E_V2DFmode) |
20953 | || (mode1 == E_V2DFmode && mode2 == E_DCmode)) |
20954 | return true; |
20955 | |
20956 | return false; |
20957 | } |
20958 | |
20959 | /* Return the cost of moving between two registers of mode MODE. */ |
20960 | |
20961 | static int |
20962 | ix86_set_reg_reg_cost (machine_mode mode) |
20963 | { |
20964 | unsigned int units = UNITS_PER_WORD; |
20965 | |
20966 | switch (GET_MODE_CLASS (mode)) |
20967 | { |
20968 | default: |
20969 | break; |
20970 | |
20971 | case MODE_CC: |
20972 | units = GET_MODE_SIZE (CCmode); |
20973 | break; |
20974 | |
20975 | case MODE_FLOAT: |
20976 | if ((TARGET_SSE && mode == TFmode) |
20977 | || (TARGET_80387 && mode == XFmode) |
20978 | || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) |
20979 | || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) |
20980 | units = GET_MODE_SIZE (mode); |
20981 | break; |
20982 | |
20983 | case MODE_COMPLEX_FLOAT: |
20984 | if ((TARGET_SSE && mode == TCmode) |
20985 | || (TARGET_80387 && mode == XCmode) |
20986 | || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) |
20987 | || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) |
20988 | units = GET_MODE_SIZE (mode); |
20989 | break; |
20990 | |
20991 | case MODE_VECTOR_INT: |
20992 | case MODE_VECTOR_FLOAT: |
20993 | if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) |
20994 | || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
20995 | || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
20996 | || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
20997 | || ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
20998 | && VALID_MMX_REG_MODE (mode))) |
20999 | units = GET_MODE_SIZE (mode); |
21000 | } |
21001 | |
21002 | /* Return the cost of moving between two registers of mode MODE, |
21003 | assuming that the move will be in pieces of at most UNITS bytes. */ |
21004 | return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); |
21005 | } |
21006 | |
21007 | /* Return cost of vector operation in MODE given that scalar version has |
21008 | COST. */ |
21009 | |
21010 | static int |
21011 | ix86_vec_cost (machine_mode mode, int cost) |
21012 | { |
21013 | if (!VECTOR_MODE_P (mode)) |
21014 | return cost; |
21015 | |
21016 | if (GET_MODE_BITSIZE (mode) == 128 |
21017 | && TARGET_SSE_SPLIT_REGS) |
21018 | return cost * GET_MODE_BITSIZE (mode) / 64; |
21019 | else if (GET_MODE_BITSIZE (mode) > 128 |
21020 | && TARGET_AVX256_SPLIT_REGS) |
21021 | return cost * GET_MODE_BITSIZE (mode) / 128; |
21022 | else if (GET_MODE_BITSIZE (mode) > 256 |
21023 | && TARGET_AVX512_SPLIT_REGS) |
21024 | return cost * GET_MODE_BITSIZE (mode) / 256; |
21025 | return cost; |
21026 | } |
21027 | |
21028 | /* Return cost of vec_widen_<s>mult_hi/lo_<mode>, |
21029 | vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */ |
21030 | static int |
21031 | ix86_widen_mult_cost (const struct processor_costs *cost, |
21032 | enum machine_mode mode, bool uns_p) |
21033 | { |
21034 | gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); |
21035 | int = 0; |
21036 | int basic_cost = 0; |
21037 | switch (mode) |
21038 | { |
21039 | case V8HImode: |
21040 | case V16HImode: |
21041 | if (!uns_p || mode == V16HImode) |
21042 | extra_cost = cost->sse_op * 2; |
21043 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
21044 | break; |
21045 | case V4SImode: |
21046 | case V8SImode: |
21047 | /* pmulhw/pmullw can be used. */ |
21048 | basic_cost = cost->mulss * 2 + cost->sse_op * 2; |
21049 | break; |
21050 | case V2DImode: |
21051 | /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend, |
21052 | require extra 4 mul, 4 add, 4 cmp and 2 shift. */ |
21053 | if (!TARGET_SSE4_1 && !uns_p) |
21054 | extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4 |
21055 | + cost->sse_op * 2; |
21056 | /* Fallthru. */ |
21057 | case V4DImode: |
21058 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
21059 | break; |
21060 | default: |
21061 | /* Not implemented. */ |
21062 | return 100; |
21063 | } |
21064 | return ix86_vec_cost (mode, cost: basic_cost + extra_cost); |
21065 | } |
21066 | |
21067 | /* Return cost of multiplication in MODE. */ |
21068 | |
21069 | static int |
21070 | ix86_multiplication_cost (const struct processor_costs *cost, |
21071 | enum machine_mode mode) |
21072 | { |
21073 | machine_mode inner_mode = mode; |
21074 | if (VECTOR_MODE_P (mode)) |
21075 | inner_mode = GET_MODE_INNER (mode); |
21076 | |
21077 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21078 | return inner_mode == DFmode ? cost->mulsd : cost->mulss; |
21079 | else if (X87_FLOAT_MODE_P (mode)) |
21080 | return cost->fmul; |
21081 | else if (FLOAT_MODE_P (mode)) |
21082 | return ix86_vec_cost (mode, |
21083 | cost: inner_mode == DFmode ? cost->mulsd : cost->mulss); |
21084 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21085 | { |
21086 | int nmults, nops; |
21087 | /* Cost of reading the memory. */ |
21088 | int ; |
21089 | |
21090 | switch (mode) |
21091 | { |
21092 | case V4QImode: |
21093 | case V8QImode: |
21094 | /* Partial V*QImode is emulated with 4-6 insns. */ |
21095 | nmults = 1; |
21096 | nops = 3; |
21097 | extra = 0; |
21098 | |
21099 | if (TARGET_AVX512BW && TARGET_AVX512VL) |
21100 | ; |
21101 | else if (TARGET_AVX2) |
21102 | nops += 2; |
21103 | else if (TARGET_XOP) |
21104 | extra += cost->sse_load[2]; |
21105 | else |
21106 | { |
21107 | nops += 1; |
21108 | extra += cost->sse_load[2]; |
21109 | } |
21110 | goto do_qimode; |
21111 | |
21112 | case V16QImode: |
21113 | /* V*QImode is emulated with 4-11 insns. */ |
21114 | nmults = 1; |
21115 | nops = 3; |
21116 | extra = 0; |
21117 | |
21118 | if (TARGET_AVX2 && !TARGET_PREFER_AVX128) |
21119 | { |
21120 | if (!(TARGET_AVX512BW && TARGET_AVX512VL)) |
21121 | nops += 3; |
21122 | } |
21123 | else if (TARGET_XOP) |
21124 | { |
21125 | nmults += 1; |
21126 | nops += 2; |
21127 | extra += cost->sse_load[2]; |
21128 | } |
21129 | else |
21130 | { |
21131 | nmults += 1; |
21132 | nops += 4; |
21133 | extra += cost->sse_load[2]; |
21134 | } |
21135 | goto do_qimode; |
21136 | |
21137 | case V32QImode: |
21138 | nmults = 1; |
21139 | nops = 3; |
21140 | extra = 0; |
21141 | |
21142 | if (!TARGET_AVX512BW || TARGET_PREFER_AVX256) |
21143 | { |
21144 | nmults += 1; |
21145 | nops += 4; |
21146 | extra += cost->sse_load[3] * 2; |
21147 | } |
21148 | goto do_qimode; |
21149 | |
21150 | case V64QImode: |
21151 | nmults = 2; |
21152 | nops = 9; |
21153 | extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2; |
21154 | |
21155 | do_qimode: |
21156 | return ix86_vec_cost (mode, cost: cost->mulss * nmults |
21157 | + cost->sse_op * nops) + extra; |
21158 | |
21159 | case V4SImode: |
21160 | /* pmulld is used in this case. No emulation is needed. */ |
21161 | if (TARGET_SSE4_1) |
21162 | goto do_native; |
21163 | /* V4SImode is emulated with 7 insns. */ |
21164 | else |
21165 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5); |
21166 | |
21167 | case V2DImode: |
21168 | case V4DImode: |
21169 | /* vpmullq is used in this case. No emulation is needed. */ |
21170 | if (TARGET_AVX512DQ && TARGET_AVX512VL) |
21171 | goto do_native; |
21172 | /* V*DImode is emulated with 6-8 insns. */ |
21173 | else if (TARGET_XOP && mode == V2DImode) |
21174 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4); |
21175 | /* FALLTHRU */ |
21176 | case V8DImode: |
21177 | /* vpmullq is used in this case. No emulation is needed. */ |
21178 | if (TARGET_AVX512DQ && mode == V8DImode) |
21179 | goto do_native; |
21180 | else |
21181 | return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5); |
21182 | |
21183 | default: |
21184 | do_native: |
21185 | return ix86_vec_cost (mode, cost: cost->mulss); |
21186 | } |
21187 | } |
21188 | else |
21189 | return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); |
21190 | } |
21191 | |
21192 | /* Return cost of multiplication in MODE. */ |
21193 | |
21194 | static int |
21195 | ix86_division_cost (const struct processor_costs *cost, |
21196 | enum machine_mode mode) |
21197 | { |
21198 | machine_mode inner_mode = mode; |
21199 | if (VECTOR_MODE_P (mode)) |
21200 | inner_mode = GET_MODE_INNER (mode); |
21201 | |
21202 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21203 | return inner_mode == DFmode ? cost->divsd : cost->divss; |
21204 | else if (X87_FLOAT_MODE_P (mode)) |
21205 | return cost->fdiv; |
21206 | else if (FLOAT_MODE_P (mode)) |
21207 | return ix86_vec_cost (mode, |
21208 | cost: inner_mode == DFmode ? cost->divsd : cost->divss); |
21209 | else |
21210 | return cost->divide[MODE_INDEX (mode)]; |
21211 | } |
21212 | |
21213 | /* Return cost of shift in MODE. |
21214 | If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. |
21215 | AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE |
21216 | if op1 is a result of subreg. |
21217 | |
21218 | SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ |
21219 | |
21220 | static int |
21221 | ix86_shift_rotate_cost (const struct processor_costs *cost, |
21222 | enum rtx_code code, |
21223 | enum machine_mode mode, bool constant_op1, |
21224 | HOST_WIDE_INT op1_val, |
21225 | bool and_in_op1, |
21226 | bool shift_and_truncate, |
21227 | bool *skip_op0, bool *skip_op1) |
21228 | { |
21229 | if (skip_op0) |
21230 | *skip_op0 = *skip_op1 = false; |
21231 | |
21232 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21233 | { |
21234 | int count; |
21235 | /* Cost of reading the memory. */ |
21236 | int ; |
21237 | |
21238 | switch (mode) |
21239 | { |
21240 | case V4QImode: |
21241 | case V8QImode: |
21242 | if (TARGET_AVX2) |
21243 | /* Use vpbroadcast. */ |
21244 | extra = cost->sse_op; |
21245 | else |
21246 | extra = cost->sse_load[2]; |
21247 | |
21248 | if (constant_op1) |
21249 | { |
21250 | if (code == ASHIFTRT) |
21251 | { |
21252 | count = 4; |
21253 | extra *= 2; |
21254 | } |
21255 | else |
21256 | count = 2; |
21257 | } |
21258 | else if (TARGET_AVX512BW && TARGET_AVX512VL) |
21259 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
21260 | else if (TARGET_SSE4_1) |
21261 | count = 5; |
21262 | else if (code == ASHIFTRT) |
21263 | count = 6; |
21264 | else |
21265 | count = 5; |
21266 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
21267 | |
21268 | case V16QImode: |
21269 | if (TARGET_XOP) |
21270 | { |
21271 | /* For XOP we use vpshab, which requires a broadcast of the |
21272 | value to the variable shift insn. For constants this |
21273 | means a V16Q const in mem; even when we can perform the |
21274 | shift with one insn set the cost to prefer paddb. */ |
21275 | if (constant_op1) |
21276 | { |
21277 | extra = cost->sse_load[2]; |
21278 | return ix86_vec_cost (mode, cost: cost->sse_op) + extra; |
21279 | } |
21280 | else |
21281 | { |
21282 | count = (code == ASHIFT) ? 3 : 4; |
21283 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
21284 | } |
21285 | } |
21286 | /* FALLTHRU */ |
21287 | case V32QImode: |
21288 | if (TARGET_AVX2) |
21289 | /* Use vpbroadcast. */ |
21290 | extra = cost->sse_op; |
21291 | else |
21292 | extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; |
21293 | |
21294 | if (constant_op1) |
21295 | { |
21296 | if (code == ASHIFTRT) |
21297 | { |
21298 | count = 4; |
21299 | extra *= 2; |
21300 | } |
21301 | else |
21302 | count = 2; |
21303 | } |
21304 | else if (TARGET_AVX512BW |
21305 | && ((mode == V32QImode && !TARGET_PREFER_AVX256) |
21306 | || (mode == V16QImode && TARGET_AVX512VL |
21307 | && !TARGET_PREFER_AVX128))) |
21308 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
21309 | else if (TARGET_AVX2 |
21310 | && mode == V16QImode && !TARGET_PREFER_AVX128) |
21311 | count = 6; |
21312 | else if (TARGET_SSE4_1) |
21313 | count = 9; |
21314 | else if (code == ASHIFTRT) |
21315 | count = 10; |
21316 | else |
21317 | count = 9; |
21318 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
21319 | |
21320 | case V2DImode: |
21321 | case V4DImode: |
21322 | /* V*DImode arithmetic right shift is emulated. */ |
21323 | if (code == ASHIFTRT && !TARGET_AVX512VL) |
21324 | { |
21325 | if (constant_op1) |
21326 | { |
21327 | if (op1_val == 63) |
21328 | count = TARGET_SSE4_2 ? 1 : 2; |
21329 | else if (TARGET_XOP) |
21330 | count = 2; |
21331 | else if (TARGET_SSE4_1) |
21332 | count = 3; |
21333 | else |
21334 | count = 4; |
21335 | } |
21336 | else if (TARGET_XOP) |
21337 | count = 3; |
21338 | else if (TARGET_SSE4_2) |
21339 | count = 4; |
21340 | else |
21341 | count = 5; |
21342 | |
21343 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
21344 | } |
21345 | /* FALLTHRU */ |
21346 | default: |
21347 | return ix86_vec_cost (mode, cost: cost->sse_op); |
21348 | } |
21349 | } |
21350 | |
21351 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21352 | { |
21353 | if (constant_op1) |
21354 | { |
21355 | if (op1_val > 32) |
21356 | return cost->shift_const + COSTS_N_INSNS (2); |
21357 | else |
21358 | return cost->shift_const * 2; |
21359 | } |
21360 | else |
21361 | { |
21362 | if (and_in_op1) |
21363 | return cost->shift_var * 2; |
21364 | else |
21365 | return cost->shift_var * 6 + COSTS_N_INSNS (2); |
21366 | } |
21367 | } |
21368 | else |
21369 | { |
21370 | if (constant_op1) |
21371 | return cost->shift_const; |
21372 | else if (shift_and_truncate) |
21373 | { |
21374 | if (skip_op0) |
21375 | *skip_op0 = *skip_op1 = true; |
21376 | /* Return the cost after shift-and truncation. */ |
21377 | return cost->shift_var; |
21378 | } |
21379 | else |
21380 | return cost->shift_var; |
21381 | } |
21382 | } |
21383 | |
21384 | /* Compute a (partial) cost for rtx X. Return true if the complete |
21385 | cost has been computed, and false if subexpressions should be |
21386 | scanned. In either case, *TOTAL contains the cost result. */ |
21387 | |
21388 | static bool |
21389 | ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, |
21390 | int *total, bool speed) |
21391 | { |
21392 | rtx mask; |
21393 | enum rtx_code code = GET_CODE (x); |
21394 | enum rtx_code outer_code = (enum rtx_code) outer_code_i; |
21395 | const struct processor_costs *cost |
21396 | = speed ? ix86_tune_cost : &ix86_size_cost; |
21397 | int src_cost; |
21398 | |
21399 | switch (code) |
21400 | { |
21401 | case SET: |
21402 | if (register_operand (SET_DEST (x), VOIDmode) |
21403 | && register_operand (SET_SRC (x), VOIDmode)) |
21404 | { |
21405 | *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); |
21406 | return true; |
21407 | } |
21408 | |
21409 | if (register_operand (SET_SRC (x), VOIDmode)) |
21410 | /* Avoid potentially incorrect high cost from rtx_costs |
21411 | for non-tieable SUBREGs. */ |
21412 | src_cost = 0; |
21413 | else |
21414 | { |
21415 | src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); |
21416 | |
21417 | if (CONSTANT_P (SET_SRC (x))) |
21418 | /* Constant costs assume a base value of COSTS_N_INSNS (1) and add |
21419 | a small value, possibly zero for cheap constants. */ |
21420 | src_cost += COSTS_N_INSNS (1); |
21421 | } |
21422 | |
21423 | *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); |
21424 | return true; |
21425 | |
21426 | case CONST_INT: |
21427 | case CONST: |
21428 | case LABEL_REF: |
21429 | case SYMBOL_REF: |
21430 | if (x86_64_immediate_operand (x, VOIDmode)) |
21431 | *total = 0; |
21432 | else |
21433 | *total = 1; |
21434 | return true; |
21435 | |
21436 | case CONST_DOUBLE: |
21437 | if (IS_STACK_MODE (mode)) |
21438 | switch (standard_80387_constant_p (x)) |
21439 | { |
21440 | case -1: |
21441 | case 0: |
21442 | break; |
21443 | case 1: /* 0.0 */ |
21444 | *total = 1; |
21445 | return true; |
21446 | default: /* Other constants */ |
21447 | *total = 2; |
21448 | return true; |
21449 | } |
21450 | /* FALLTHRU */ |
21451 | |
21452 | case CONST_VECTOR: |
21453 | switch (standard_sse_constant_p (x, pred_mode: mode)) |
21454 | { |
21455 | case 0: |
21456 | break; |
21457 | case 1: /* 0: xor eliminates false dependency */ |
21458 | *total = 0; |
21459 | return true; |
21460 | default: /* -1: cmp contains false dependency */ |
21461 | *total = 1; |
21462 | return true; |
21463 | } |
21464 | /* FALLTHRU */ |
21465 | |
21466 | case CONST_WIDE_INT: |
21467 | /* Fall back to (MEM (SYMBOL_REF)), since that's where |
21468 | it'll probably end up. Add a penalty for size. */ |
21469 | *total = (COSTS_N_INSNS (1) |
21470 | + (!TARGET_64BIT && flag_pic) |
21471 | + (GET_MODE_SIZE (mode) <= 4 |
21472 | ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); |
21473 | return true; |
21474 | |
21475 | case ZERO_EXTEND: |
21476 | /* The zero extensions is often completely free on x86_64, so make |
21477 | it as cheap as possible. */ |
21478 | if (TARGET_64BIT && mode == DImode |
21479 | && GET_MODE (XEXP (x, 0)) == SImode) |
21480 | *total = 1; |
21481 | else if (TARGET_ZERO_EXTEND_WITH_AND) |
21482 | *total = cost->add; |
21483 | else |
21484 | *total = cost->movzx; |
21485 | return false; |
21486 | |
21487 | case SIGN_EXTEND: |
21488 | *total = cost->movsx; |
21489 | return false; |
21490 | |
21491 | case ASHIFT: |
21492 | if (SCALAR_INT_MODE_P (mode) |
21493 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD |
21494 | && CONST_INT_P (XEXP (x, 1))) |
21495 | { |
21496 | HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
21497 | if (value == 1) |
21498 | { |
21499 | *total = cost->add; |
21500 | return false; |
21501 | } |
21502 | if ((value == 2 || value == 3) |
21503 | && cost->lea <= cost->shift_const) |
21504 | { |
21505 | *total = cost->lea; |
21506 | return false; |
21507 | } |
21508 | } |
21509 | /* FALLTHRU */ |
21510 | |
21511 | case ROTATE: |
21512 | case ASHIFTRT: |
21513 | case LSHIFTRT: |
21514 | case ROTATERT: |
21515 | bool skip_op0, skip_op1; |
21516 | *total = ix86_shift_rotate_cost (cost, code, mode, |
21517 | CONSTANT_P (XEXP (x, 1)), |
21518 | CONST_INT_P (XEXP (x, 1)) |
21519 | ? INTVAL (XEXP (x, 1)) : -1, |
21520 | GET_CODE (XEXP (x, 1)) == AND, |
21521 | SUBREG_P (XEXP (x, 1)) |
21522 | && GET_CODE (XEXP (XEXP (x, 1), |
21523 | 0)) == AND, |
21524 | skip_op0: &skip_op0, skip_op1: &skip_op1); |
21525 | if (skip_op0 || skip_op1) |
21526 | { |
21527 | if (!skip_op0) |
21528 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
21529 | if (!skip_op1) |
21530 | *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); |
21531 | return true; |
21532 | } |
21533 | return false; |
21534 | |
21535 | case FMA: |
21536 | { |
21537 | rtx sub; |
21538 | |
21539 | gcc_assert (FLOAT_MODE_P (mode)); |
21540 | gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); |
21541 | |
21542 | *total = ix86_vec_cost (mode, |
21543 | GET_MODE_INNER (mode) == SFmode |
21544 | ? cost->fmass : cost->fmasd); |
21545 | *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); |
21546 | |
21547 | /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ |
21548 | sub = XEXP (x, 0); |
21549 | if (GET_CODE (sub) == NEG) |
21550 | sub = XEXP (sub, 0); |
21551 | *total += rtx_cost (sub, mode, FMA, 0, speed); |
21552 | |
21553 | sub = XEXP (x, 2); |
21554 | if (GET_CODE (sub) == NEG) |
21555 | sub = XEXP (sub, 0); |
21556 | *total += rtx_cost (sub, mode, FMA, 2, speed); |
21557 | return true; |
21558 | } |
21559 | |
21560 | case MULT: |
21561 | if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) |
21562 | { |
21563 | rtx op0 = XEXP (x, 0); |
21564 | rtx op1 = XEXP (x, 1); |
21565 | int nbits; |
21566 | if (CONST_INT_P (XEXP (x, 1))) |
21567 | { |
21568 | unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
21569 | for (nbits = 0; value != 0; value &= value - 1) |
21570 | nbits++; |
21571 | } |
21572 | else |
21573 | /* This is arbitrary. */ |
21574 | nbits = 7; |
21575 | |
21576 | /* Compute costs correctly for widening multiplication. */ |
21577 | if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) |
21578 | && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 |
21579 | == GET_MODE_SIZE (mode)) |
21580 | { |
21581 | int is_mulwiden = 0; |
21582 | machine_mode inner_mode = GET_MODE (op0); |
21583 | |
21584 | if (GET_CODE (op0) == GET_CODE (op1)) |
21585 | is_mulwiden = 1, op1 = XEXP (op1, 0); |
21586 | else if (CONST_INT_P (op1)) |
21587 | { |
21588 | if (GET_CODE (op0) == SIGN_EXTEND) |
21589 | is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) |
21590 | == INTVAL (op1); |
21591 | else |
21592 | is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); |
21593 | } |
21594 | |
21595 | if (is_mulwiden) |
21596 | op0 = XEXP (op0, 0), mode = GET_MODE (op0); |
21597 | } |
21598 | |
21599 | int mult_init; |
21600 | // Double word multiplication requires 3 mults and 2 adds. |
21601 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21602 | { |
21603 | mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)] |
21604 | + 2 * cost->add; |
21605 | nbits *= 3; |
21606 | } |
21607 | else mult_init = cost->mult_init[MODE_INDEX (mode)]; |
21608 | |
21609 | *total = (mult_init |
21610 | + nbits * cost->mult_bit |
21611 | + rtx_cost (op0, mode, outer_code, opno, speed) |
21612 | + rtx_cost (op1, mode, outer_code, opno, speed)); |
21613 | |
21614 | return true; |
21615 | } |
21616 | *total = ix86_multiplication_cost (cost, mode); |
21617 | return false; |
21618 | |
21619 | case DIV: |
21620 | case UDIV: |
21621 | case MOD: |
21622 | case UMOD: |
21623 | *total = ix86_division_cost (cost, mode); |
21624 | return false; |
21625 | |
21626 | case PLUS: |
21627 | if (GET_MODE_CLASS (mode) == MODE_INT |
21628 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
21629 | { |
21630 | if (GET_CODE (XEXP (x, 0)) == PLUS |
21631 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
21632 | && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) |
21633 | && CONSTANT_P (XEXP (x, 1))) |
21634 | { |
21635 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); |
21636 | if (val == 2 || val == 4 || val == 8) |
21637 | { |
21638 | *total = cost->lea; |
21639 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
21640 | outer_code, opno, speed); |
21641 | *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, |
21642 | outer_code, opno, speed); |
21643 | *total += rtx_cost (XEXP (x, 1), mode, |
21644 | outer_code, opno, speed); |
21645 | return true; |
21646 | } |
21647 | } |
21648 | else if (GET_CODE (XEXP (x, 0)) == MULT |
21649 | && CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
21650 | { |
21651 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); |
21652 | if (val == 2 || val == 4 || val == 8) |
21653 | { |
21654 | *total = cost->lea; |
21655 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21656 | outer_code, opno, speed); |
21657 | *total += rtx_cost (XEXP (x, 1), mode, |
21658 | outer_code, opno, speed); |
21659 | return true; |
21660 | } |
21661 | } |
21662 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
21663 | { |
21664 | rtx op = XEXP (XEXP (x, 0), 0); |
21665 | |
21666 | /* Add with carry, ignore the cost of adding a carry flag. */ |
21667 | if (ix86_carry_flag_operator (op, mode) |
21668 | || ix86_carry_flag_unset_operator (op, mode)) |
21669 | *total = cost->add; |
21670 | else |
21671 | { |
21672 | *total = cost->lea; |
21673 | *total += rtx_cost (op, mode, |
21674 | outer_code, opno, speed); |
21675 | } |
21676 | |
21677 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
21678 | outer_code, opno, speed); |
21679 | *total += rtx_cost (XEXP (x, 1), mode, |
21680 | outer_code, opno, speed); |
21681 | return true; |
21682 | } |
21683 | } |
21684 | /* FALLTHRU */ |
21685 | |
21686 | case MINUS: |
21687 | /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ |
21688 | if (GET_MODE_CLASS (mode) == MODE_INT |
21689 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD |
21690 | && GET_CODE (XEXP (x, 0)) == MINUS |
21691 | && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode) |
21692 | || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode))) |
21693 | { |
21694 | *total = cost->add; |
21695 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21696 | outer_code, opno, speed); |
21697 | *total += rtx_cost (XEXP (x, 1), mode, |
21698 | outer_code, opno, speed); |
21699 | return true; |
21700 | } |
21701 | |
21702 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21703 | *total = cost->addss; |
21704 | else if (X87_FLOAT_MODE_P (mode)) |
21705 | *total = cost->fadd; |
21706 | else if (FLOAT_MODE_P (mode)) |
21707 | *total = ix86_vec_cost (mode, cost: cost->addss); |
21708 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21709 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21710 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21711 | *total = cost->add * 2; |
21712 | else |
21713 | *total = cost->add; |
21714 | return false; |
21715 | |
21716 | case IOR: |
21717 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
21718 | || SSE_FLOAT_MODE_P (mode)) |
21719 | { |
21720 | /* (ior (not ...) ...) can be a single insn in AVX512. */ |
21721 | if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F |
21722 | && ((TARGET_EVEX512 |
21723 | && GET_MODE_SIZE (mode) == 64) |
21724 | || (TARGET_AVX512VL |
21725 | && (GET_MODE_SIZE (mode) == 32 |
21726 | || GET_MODE_SIZE (mode) == 16)))) |
21727 | { |
21728 | rtx right = GET_CODE (XEXP (x, 1)) != NOT |
21729 | ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0); |
21730 | |
21731 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21732 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21733 | outer_code, opno, speed) |
21734 | + rtx_cost (right, mode, outer_code, opno, speed); |
21735 | return true; |
21736 | } |
21737 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21738 | } |
21739 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21740 | *total = cost->add * 2; |
21741 | else |
21742 | *total = cost->add; |
21743 | return false; |
21744 | |
21745 | case XOR: |
21746 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
21747 | || SSE_FLOAT_MODE_P (mode)) |
21748 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21749 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21750 | *total = cost->add * 2; |
21751 | else |
21752 | *total = cost->add; |
21753 | return false; |
21754 | |
21755 | case AND: |
21756 | if (address_no_seg_operand (x, mode)) |
21757 | { |
21758 | *total = cost->lea; |
21759 | return true; |
21760 | } |
21761 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
21762 | || SSE_FLOAT_MODE_P (mode)) |
21763 | { |
21764 | /* pandn is a single instruction. */ |
21765 | if (GET_CODE (XEXP (x, 0)) == NOT) |
21766 | { |
21767 | rtx right = XEXP (x, 1); |
21768 | |
21769 | /* (and (not ...) (not ...)) can be a single insn in AVX512. */ |
21770 | if (GET_CODE (right) == NOT && TARGET_AVX512F |
21771 | && ((TARGET_EVEX512 |
21772 | && GET_MODE_SIZE (mode) == 64) |
21773 | || (TARGET_AVX512VL |
21774 | && (GET_MODE_SIZE (mode) == 32 |
21775 | || GET_MODE_SIZE (mode) == 16)))) |
21776 | right = XEXP (right, 0); |
21777 | |
21778 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21779 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21780 | outer_code, opno, speed) |
21781 | + rtx_cost (right, mode, outer_code, opno, speed); |
21782 | return true; |
21783 | } |
21784 | else if (GET_CODE (XEXP (x, 1)) == NOT) |
21785 | { |
21786 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21787 | + rtx_cost (XEXP (x, 0), mode, |
21788 | outer_code, opno, speed) |
21789 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
21790 | outer_code, opno, speed); |
21791 | return true; |
21792 | } |
21793 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21794 | } |
21795 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21796 | { |
21797 | if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
21798 | { |
21799 | *total = cost->add * 2 |
21800 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21801 | outer_code, opno, speed) |
21802 | + rtx_cost (XEXP (x, 1), mode, |
21803 | outer_code, opno, speed); |
21804 | return true; |
21805 | } |
21806 | else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT) |
21807 | { |
21808 | *total = cost->add * 2 |
21809 | + rtx_cost (XEXP (x, 0), mode, |
21810 | outer_code, opno, speed) |
21811 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
21812 | outer_code, opno, speed); |
21813 | return true; |
21814 | } |
21815 | *total = cost->add * 2; |
21816 | } |
21817 | else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
21818 | { |
21819 | *total = cost->add |
21820 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21821 | outer_code, opno, speed) |
21822 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
21823 | return true; |
21824 | } |
21825 | else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT) |
21826 | { |
21827 | *total = cost->add |
21828 | + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
21829 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
21830 | outer_code, opno, speed); |
21831 | return true; |
21832 | } |
21833 | else |
21834 | *total = cost->add; |
21835 | return false; |
21836 | |
21837 | case NOT: |
21838 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21839 | { |
21840 | /* (not (xor ...)) can be a single insn in AVX512. */ |
21841 | if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F |
21842 | && ((TARGET_EVEX512 |
21843 | && GET_MODE_SIZE (mode) == 64) |
21844 | || (TARGET_AVX512VL |
21845 | && (GET_MODE_SIZE (mode) == 32 |
21846 | || GET_MODE_SIZE (mode) == 16)))) |
21847 | { |
21848 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
21849 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
21850 | outer_code, opno, speed) |
21851 | + rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
21852 | outer_code, opno, speed); |
21853 | return true; |
21854 | } |
21855 | |
21856 | // vnot is pxor -1. |
21857 | *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1; |
21858 | } |
21859 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21860 | *total = cost->add * 2; |
21861 | else |
21862 | *total = cost->add; |
21863 | return false; |
21864 | |
21865 | case NEG: |
21866 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21867 | *total = cost->sse_op; |
21868 | else if (X87_FLOAT_MODE_P (mode)) |
21869 | *total = cost->fchs; |
21870 | else if (FLOAT_MODE_P (mode)) |
21871 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21872 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21873 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21874 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
21875 | *total = cost->add * 3; |
21876 | else |
21877 | *total = cost->add; |
21878 | return false; |
21879 | |
21880 | case COMPARE: |
21881 | rtx op0, op1; |
21882 | op0 = XEXP (x, 0); |
21883 | op1 = XEXP (x, 1); |
21884 | if (GET_CODE (op0) == ZERO_EXTRACT |
21885 | && XEXP (op0, 1) == const1_rtx |
21886 | && CONST_INT_P (XEXP (op0, 2)) |
21887 | && op1 == const0_rtx) |
21888 | { |
21889 | /* This kind of construct is implemented using test[bwl]. |
21890 | Treat it as if we had an AND. */ |
21891 | mode = GET_MODE (XEXP (op0, 0)); |
21892 | *total = (cost->add |
21893 | + rtx_cost (XEXP (op0, 0), mode, outer_code, |
21894 | opno, speed) |
21895 | + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); |
21896 | return true; |
21897 | } |
21898 | |
21899 | if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1)) |
21900 | { |
21901 | /* This is an overflow detection, count it as a normal compare. */ |
21902 | *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed); |
21903 | return true; |
21904 | } |
21905 | |
21906 | rtx geu; |
21907 | /* Match x |
21908 | (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
21909 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */ |
21910 | if (mode == CCCmode |
21911 | && GET_CODE (op0) == NEG |
21912 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
21913 | && REG_P (XEXP (geu, 0)) |
21914 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
21915 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
21916 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
21917 | && XEXP (geu, 1) == const0_rtx |
21918 | && GET_CODE (op1) == LTU |
21919 | && REG_P (XEXP (op1, 0)) |
21920 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
21921 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
21922 | && XEXP (op1, 1) == const0_rtx) |
21923 | { |
21924 | /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */ |
21925 | *total = 0; |
21926 | return true; |
21927 | } |
21928 | /* Match x |
21929 | (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
21930 | (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */ |
21931 | if (mode == CCCmode |
21932 | && GET_CODE (op0) == NEG |
21933 | && GET_CODE (XEXP (op0, 0)) == LTU |
21934 | && REG_P (XEXP (XEXP (op0, 0), 0)) |
21935 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
21936 | && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG |
21937 | && XEXP (XEXP (op0, 0), 1) == const0_rtx |
21938 | && GET_CODE (op1) == GEU |
21939 | && REG_P (XEXP (op1, 0)) |
21940 | && GET_MODE (XEXP (op1, 0)) == CCCmode |
21941 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
21942 | && XEXP (op1, 1) == const0_rtx) |
21943 | { |
21944 | /* This is *x86_cmc. */ |
21945 | if (!speed) |
21946 | *total = COSTS_N_BYTES (1); |
21947 | else if (TARGET_SLOW_STC) |
21948 | *total = COSTS_N_INSNS (2); |
21949 | else |
21950 | *total = COSTS_N_INSNS (1); |
21951 | return true; |
21952 | } |
21953 | |
21954 | if (SCALAR_INT_MODE_P (GET_MODE (op0)) |
21955 | && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) |
21956 | { |
21957 | if (op1 == const0_rtx) |
21958 | *total = cost->add |
21959 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed); |
21960 | else |
21961 | *total = 3*cost->add |
21962 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed) |
21963 | + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed); |
21964 | return true; |
21965 | } |
21966 | |
21967 | /* The embedded comparison operand is completely free. */ |
21968 | if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx) |
21969 | *total = 0; |
21970 | |
21971 | return false; |
21972 | |
21973 | case FLOAT_EXTEND: |
21974 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21975 | *total = 0; |
21976 | else |
21977 | *total = ix86_vec_cost (mode, cost: cost->addss); |
21978 | return false; |
21979 | |
21980 | case FLOAT_TRUNCATE: |
21981 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21982 | *total = cost->fadd; |
21983 | else |
21984 | *total = ix86_vec_cost (mode, cost: cost->addss); |
21985 | return false; |
21986 | |
21987 | case ABS: |
21988 | /* SSE requires memory load for the constant operand. It may make |
21989 | sense to account for this. Of course the constant operand may or |
21990 | may not be reused. */ |
21991 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
21992 | *total = cost->sse_op; |
21993 | else if (X87_FLOAT_MODE_P (mode)) |
21994 | *total = cost->fabs; |
21995 | else if (FLOAT_MODE_P (mode)) |
21996 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
21997 | return false; |
21998 | |
21999 | case SQRT: |
22000 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
22001 | *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; |
22002 | else if (X87_FLOAT_MODE_P (mode)) |
22003 | *total = cost->fsqrt; |
22004 | else if (FLOAT_MODE_P (mode)) |
22005 | *total = ix86_vec_cost (mode, |
22006 | cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd); |
22007 | return false; |
22008 | |
22009 | case UNSPEC: |
22010 | if (XINT (x, 1) == UNSPEC_TP) |
22011 | *total = 0; |
22012 | else if (XINT (x, 1) == UNSPEC_VTERNLOG) |
22013 | { |
22014 | *total = cost->sse_op; |
22015 | return true; |
22016 | } |
22017 | else if (XINT (x, 1) == UNSPEC_PTEST) |
22018 | { |
22019 | *total = cost->sse_op; |
22020 | rtx test_op0 = XVECEXP (x, 0, 0); |
22021 | if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1))) |
22022 | return false; |
22023 | if (GET_CODE (test_op0) == AND) |
22024 | { |
22025 | rtx and_op0 = XEXP (test_op0, 0); |
22026 | if (GET_CODE (and_op0) == NOT) |
22027 | and_op0 = XEXP (and_op0, 0); |
22028 | *total += rtx_cost (and_op0, GET_MODE (and_op0), |
22029 | AND, 0, speed) |
22030 | + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0), |
22031 | AND, 1, speed); |
22032 | } |
22033 | else |
22034 | *total = rtx_cost (test_op0, GET_MODE (test_op0), |
22035 | UNSPEC, 0, speed); |
22036 | return true; |
22037 | } |
22038 | return false; |
22039 | |
22040 | case VEC_SELECT: |
22041 | case VEC_CONCAT: |
22042 | case VEC_DUPLICATE: |
22043 | /* ??? Assume all of these vector manipulation patterns are |
22044 | recognizable. In which case they all pretty much have the |
22045 | same cost. */ |
22046 | *total = cost->sse_op; |
22047 | return true; |
22048 | case VEC_MERGE: |
22049 | mask = XEXP (x, 2); |
22050 | /* This is masked instruction, assume the same cost, |
22051 | as nonmasked variant. */ |
22052 | if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) |
22053 | *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); |
22054 | else |
22055 | *total = cost->sse_op; |
22056 | return true; |
22057 | |
22058 | case MEM: |
22059 | /* An insn that accesses memory is slightly more expensive |
22060 | than one that does not. */ |
22061 | if (speed) |
22062 | *total += 1; |
22063 | return false; |
22064 | |
22065 | case ZERO_EXTRACT: |
22066 | if (XEXP (x, 1) == const1_rtx |
22067 | && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND |
22068 | && GET_MODE (XEXP (x, 2)) == SImode |
22069 | && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode) |
22070 | { |
22071 | /* Ignore cost of zero extension and masking of last argument. */ |
22072 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
22073 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
22074 | *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed); |
22075 | return true; |
22076 | } |
22077 | return false; |
22078 | |
22079 | case IF_THEN_ELSE: |
22080 | if (TARGET_XOP |
22081 | && VECTOR_MODE_P (mode) |
22082 | && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32)) |
22083 | { |
22084 | /* vpcmov. */ |
22085 | *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6); |
22086 | if (!REG_P (XEXP (x, 0))) |
22087 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
22088 | if (!REG_P (XEXP (x, 1))) |
22089 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
22090 | if (!REG_P (XEXP (x, 2))) |
22091 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
22092 | return true; |
22093 | } |
22094 | else if (TARGET_CMOVE |
22095 | && SCALAR_INT_MODE_P (mode) |
22096 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
22097 | { |
22098 | /* cmov. */ |
22099 | *total = COSTS_N_INSNS (1); |
22100 | if (!REG_P (XEXP (x, 0))) |
22101 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
22102 | if (!REG_P (XEXP (x, 1))) |
22103 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
22104 | if (!REG_P (XEXP (x, 2))) |
22105 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
22106 | return true; |
22107 | } |
22108 | return false; |
22109 | |
22110 | default: |
22111 | return false; |
22112 | } |
22113 | } |
22114 | |
22115 | #if TARGET_MACHO |
22116 | |
22117 | static int current_machopic_label_num; |
22118 | |
22119 | /* Given a symbol name and its associated stub, write out the |
22120 | definition of the stub. */ |
22121 | |
22122 | void |
22123 | machopic_output_stub (FILE *file, const char *symb, const char *stub) |
22124 | { |
22125 | unsigned int length; |
22126 | char *binder_name, *symbol_name, lazy_ptr_name[32]; |
22127 | int label = ++current_machopic_label_num; |
22128 | |
22129 | /* For 64-bit we shouldn't get here. */ |
22130 | gcc_assert (!TARGET_64BIT); |
22131 | |
22132 | /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ |
22133 | symb = targetm.strip_name_encoding (symb); |
22134 | |
22135 | length = strlen (stub); |
22136 | binder_name = XALLOCAVEC (char, length + 32); |
22137 | GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); |
22138 | |
22139 | length = strlen (symb); |
22140 | symbol_name = XALLOCAVEC (char, length + 32); |
22141 | GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); |
22142 | |
22143 | sprintf (lazy_ptr_name, "L%d$lz" , label); |
22144 | |
22145 | if (MACHOPIC_ATT_STUB) |
22146 | switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); |
22147 | else if (MACHOPIC_PURE) |
22148 | switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); |
22149 | else |
22150 | switch_to_section (darwin_sections[machopic_symbol_stub_section]); |
22151 | |
22152 | fprintf (file, "%s:\n" , stub); |
22153 | fprintf (file, "\t.indirect_symbol %s\n" , symbol_name); |
22154 | |
22155 | if (MACHOPIC_ATT_STUB) |
22156 | { |
22157 | fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n" ); |
22158 | } |
22159 | else if (MACHOPIC_PURE) |
22160 | { |
22161 | /* PIC stub. */ |
22162 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
22163 | rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); |
22164 | output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ |
22165 | fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n" , |
22166 | label, lazy_ptr_name, label); |
22167 | fprintf (file, "\tjmp\t*%%ecx\n" ); |
22168 | } |
22169 | else |
22170 | fprintf (file, "\tjmp\t*%s\n" , lazy_ptr_name); |
22171 | |
22172 | /* The AT&T-style ("self-modifying") stub is not lazily bound, thus |
22173 | it needs no stub-binding-helper. */ |
22174 | if (MACHOPIC_ATT_STUB) |
22175 | return; |
22176 | |
22177 | fprintf (file, "%s:\n" , binder_name); |
22178 | |
22179 | if (MACHOPIC_PURE) |
22180 | { |
22181 | fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n" , lazy_ptr_name, binder_name); |
22182 | fprintf (file, "\tpushl\t%%ecx\n" ); |
22183 | } |
22184 | else |
22185 | fprintf (file, "\tpushl\t$%s\n" , lazy_ptr_name); |
22186 | |
22187 | fputs ("\tjmp\tdyld_stub_binding_helper\n" , file); |
22188 | |
22189 | /* N.B. Keep the correspondence of these |
22190 | 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the |
22191 | old-pic/new-pic/non-pic stubs; altering this will break |
22192 | compatibility with existing dylibs. */ |
22193 | if (MACHOPIC_PURE) |
22194 | { |
22195 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
22196 | switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); |
22197 | } |
22198 | else |
22199 | /* 16-byte -mdynamic-no-pic stub. */ |
22200 | switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); |
22201 | |
22202 | fprintf (file, "%s:\n" , lazy_ptr_name); |
22203 | fprintf (file, "\t.indirect_symbol %s\n" , symbol_name); |
22204 | fprintf (file, ASM_LONG "%s\n" , binder_name); |
22205 | } |
22206 | #endif /* TARGET_MACHO */ |
22207 | |
22208 | /* Order the registers for register allocator. */ |
22209 | |
22210 | void |
22211 | x86_order_regs_for_local_alloc (void) |
22212 | { |
22213 | int pos = 0; |
22214 | int i; |
22215 | |
22216 | /* First allocate the local general purpose registers. */ |
22217 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
22218 | if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i)) |
22219 | reg_alloc_order [pos++] = i; |
22220 | |
22221 | /* Global general purpose registers. */ |
22222 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
22223 | if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i)) |
22224 | reg_alloc_order [pos++] = i; |
22225 | |
22226 | /* x87 registers come first in case we are doing FP math |
22227 | using them. */ |
22228 | if (!TARGET_SSE_MATH) |
22229 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
22230 | reg_alloc_order [pos++] = i; |
22231 | |
22232 | /* SSE registers. */ |
22233 | for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) |
22234 | reg_alloc_order [pos++] = i; |
22235 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
22236 | reg_alloc_order [pos++] = i; |
22237 | |
22238 | /* Extended REX SSE registers. */ |
22239 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
22240 | reg_alloc_order [pos++] = i; |
22241 | |
22242 | /* Mask register. */ |
22243 | for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) |
22244 | reg_alloc_order [pos++] = i; |
22245 | |
22246 | /* x87 registers. */ |
22247 | if (TARGET_SSE_MATH) |
22248 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
22249 | reg_alloc_order [pos++] = i; |
22250 | |
22251 | for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) |
22252 | reg_alloc_order [pos++] = i; |
22253 | |
22254 | /* Initialize the rest of array as we do not allocate some registers |
22255 | at all. */ |
22256 | while (pos < FIRST_PSEUDO_REGISTER) |
22257 | reg_alloc_order [pos++] = 0; |
22258 | } |
22259 | |
22260 | static bool |
22261 | ix86_ms_bitfield_layout_p (const_tree record_type) |
22262 | { |
22263 | return ((TARGET_MS_BITFIELD_LAYOUT |
22264 | && !lookup_attribute (attr_name: "gcc_struct" , TYPE_ATTRIBUTES (record_type))) |
22265 | || lookup_attribute (attr_name: "ms_struct" , TYPE_ATTRIBUTES (record_type))); |
22266 | } |
22267 | |
22268 | /* Returns an expression indicating where the this parameter is |
22269 | located on entry to the FUNCTION. */ |
22270 | |
22271 | static rtx |
22272 | x86_this_parameter (tree function) |
22273 | { |
22274 | tree type = TREE_TYPE (function); |
22275 | bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; |
22276 | int nregs; |
22277 | |
22278 | if (TARGET_64BIT) |
22279 | { |
22280 | const int *parm_regs; |
22281 | |
22282 | if (ix86_function_type_abi (fntype: type) == MS_ABI) |
22283 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
22284 | else |
22285 | parm_regs = x86_64_int_parameter_registers; |
22286 | return gen_rtx_REG (Pmode, parm_regs[aggr]); |
22287 | } |
22288 | |
22289 | nregs = ix86_function_regparm (type, decl: function); |
22290 | |
22291 | if (nregs > 0 && !stdarg_p (type)) |
22292 | { |
22293 | int regno; |
22294 | unsigned int ccvt = ix86_get_callcvt (type); |
22295 | |
22296 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
22297 | regno = aggr ? DX_REG : CX_REG; |
22298 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
22299 | { |
22300 | regno = CX_REG; |
22301 | if (aggr) |
22302 | return gen_rtx_MEM (SImode, |
22303 | plus_constant (Pmode, stack_pointer_rtx, 4)); |
22304 | } |
22305 | else |
22306 | { |
22307 | regno = AX_REG; |
22308 | if (aggr) |
22309 | { |
22310 | regno = DX_REG; |
22311 | if (nregs == 1) |
22312 | return gen_rtx_MEM (SImode, |
22313 | plus_constant (Pmode, |
22314 | stack_pointer_rtx, 4)); |
22315 | } |
22316 | } |
22317 | return gen_rtx_REG (SImode, regno); |
22318 | } |
22319 | |
22320 | return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, |
22321 | aggr ? 8 : 4)); |
22322 | } |
22323 | |
22324 | /* Determine whether x86_output_mi_thunk can succeed. */ |
22325 | |
22326 | static bool |
22327 | x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, |
22328 | const_tree function) |
22329 | { |
22330 | /* 64-bit can handle anything. */ |
22331 | if (TARGET_64BIT) |
22332 | return true; |
22333 | |
22334 | /* For 32-bit, everything's fine if we have one free register. */ |
22335 | if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3) |
22336 | return true; |
22337 | |
22338 | /* Need a free register for vcall_offset. */ |
22339 | if (vcall_offset) |
22340 | return false; |
22341 | |
22342 | /* Need a free register for GOT references. */ |
22343 | if (flag_pic && !targetm.binds_local_p (function)) |
22344 | return false; |
22345 | |
22346 | /* Otherwise ok. */ |
22347 | return true; |
22348 | } |
22349 | |
22350 | /* Output the assembler code for a thunk function. THUNK_DECL is the |
22351 | declaration for the thunk function itself, FUNCTION is the decl for |
22352 | the target function. DELTA is an immediate constant offset to be |
22353 | added to THIS. If VCALL_OFFSET is nonzero, the word at |
22354 | *(*this + vcall_offset) should be added to THIS. */ |
22355 | |
22356 | static void |
22357 | x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, |
22358 | HOST_WIDE_INT vcall_offset, tree function) |
22359 | { |
22360 | const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); |
22361 | rtx this_param = x86_this_parameter (function); |
22362 | rtx this_reg, tmp, fnaddr; |
22363 | unsigned int tmp_regno; |
22364 | rtx_insn *insn; |
22365 | int saved_flag_force_indirect_call = flag_force_indirect_call; |
22366 | |
22367 | if (TARGET_64BIT) |
22368 | tmp_regno = R10_REG; |
22369 | else |
22370 | { |
22371 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); |
22372 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
22373 | tmp_regno = AX_REG; |
22374 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
22375 | tmp_regno = DX_REG; |
22376 | else |
22377 | tmp_regno = CX_REG; |
22378 | |
22379 | if (flag_pic) |
22380 | flag_force_indirect_call = 0; |
22381 | } |
22382 | |
22383 | emit_note (NOTE_INSN_PROLOGUE_END); |
22384 | |
22385 | /* CET is enabled, insert EB instruction. */ |
22386 | if ((flag_cf_protection & CF_BRANCH)) |
22387 | emit_insn (gen_nop_endbr ()); |
22388 | |
22389 | /* If VCALL_OFFSET, we'll need THIS in a register. Might as well |
22390 | pull it in now and let DELTA benefit. */ |
22391 | if (REG_P (this_param)) |
22392 | this_reg = this_param; |
22393 | else if (vcall_offset) |
22394 | { |
22395 | /* Put the this parameter into %eax. */ |
22396 | this_reg = gen_rtx_REG (Pmode, AX_REG); |
22397 | emit_move_insn (this_reg, this_param); |
22398 | } |
22399 | else |
22400 | this_reg = NULL_RTX; |
22401 | |
22402 | /* Adjust the this parameter by a fixed constant. */ |
22403 | if (delta) |
22404 | { |
22405 | rtx delta_rtx = GEN_INT (delta); |
22406 | rtx delta_dst = this_reg ? this_reg : this_param; |
22407 | |
22408 | if (TARGET_64BIT) |
22409 | { |
22410 | if (!x86_64_general_operand (delta_rtx, Pmode)) |
22411 | { |
22412 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
22413 | emit_move_insn (tmp, delta_rtx); |
22414 | delta_rtx = tmp; |
22415 | } |
22416 | } |
22417 | |
22418 | ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx); |
22419 | } |
22420 | |
22421 | /* Adjust the this parameter by a value stored in the vtable. */ |
22422 | if (vcall_offset) |
22423 | { |
22424 | rtx vcall_addr, vcall_mem, this_mem; |
22425 | |
22426 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
22427 | |
22428 | this_mem = gen_rtx_MEM (ptr_mode, this_reg); |
22429 | if (Pmode != ptr_mode) |
22430 | this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); |
22431 | emit_move_insn (tmp, this_mem); |
22432 | |
22433 | /* Adjust the this parameter. */ |
22434 | vcall_addr = plus_constant (Pmode, tmp, vcall_offset); |
22435 | if (TARGET_64BIT |
22436 | && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true)) |
22437 | { |
22438 | rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); |
22439 | emit_move_insn (tmp2, GEN_INT (vcall_offset)); |
22440 | vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); |
22441 | } |
22442 | |
22443 | vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); |
22444 | if (Pmode != ptr_mode) |
22445 | emit_insn (gen_addsi_1_zext (this_reg, |
22446 | gen_rtx_REG (ptr_mode, |
22447 | REGNO (this_reg)), |
22448 | vcall_mem)); |
22449 | else |
22450 | ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem); |
22451 | } |
22452 | |
22453 | /* If necessary, drop THIS back to its stack slot. */ |
22454 | if (this_reg && this_reg != this_param) |
22455 | emit_move_insn (this_param, this_reg); |
22456 | |
22457 | fnaddr = XEXP (DECL_RTL (function), 0); |
22458 | if (TARGET_64BIT) |
22459 | { |
22460 | if (!flag_pic || targetm.binds_local_p (function) |
22461 | || TARGET_PECOFF) |
22462 | ; |
22463 | else |
22464 | { |
22465 | tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); |
22466 | tmp = gen_rtx_CONST (Pmode, tmp); |
22467 | fnaddr = gen_const_mem (Pmode, tmp); |
22468 | } |
22469 | } |
22470 | else |
22471 | { |
22472 | if (!flag_pic || targetm.binds_local_p (function)) |
22473 | ; |
22474 | #if TARGET_MACHO |
22475 | else if (TARGET_MACHO) |
22476 | { |
22477 | fnaddr = machopic_indirect_call_target (DECL_RTL (function)); |
22478 | fnaddr = XEXP (fnaddr, 0); |
22479 | } |
22480 | #endif /* TARGET_MACHO */ |
22481 | else |
22482 | { |
22483 | tmp = gen_rtx_REG (Pmode, CX_REG); |
22484 | output_set_got (dest: tmp, NULL_RTX); |
22485 | |
22486 | fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); |
22487 | fnaddr = gen_rtx_CONST (Pmode, fnaddr); |
22488 | fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); |
22489 | fnaddr = gen_const_mem (Pmode, fnaddr); |
22490 | } |
22491 | } |
22492 | |
22493 | /* Our sibling call patterns do not allow memories, because we have no |
22494 | predicate that can distinguish between frame and non-frame memory. |
22495 | For our purposes here, we can get away with (ab)using a jump pattern, |
22496 | because we're going to do no optimization. */ |
22497 | if (MEM_P (fnaddr)) |
22498 | { |
22499 | if (sibcall_insn_operand (fnaddr, word_mode)) |
22500 | { |
22501 | fnaddr = XEXP (DECL_RTL (function), 0); |
22502 | tmp = gen_rtx_MEM (QImode, fnaddr); |
22503 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
22504 | tmp = emit_call_insn (tmp); |
22505 | SIBLING_CALL_P (tmp) = 1; |
22506 | } |
22507 | else |
22508 | emit_jump_insn (gen_indirect_jump (fnaddr)); |
22509 | } |
22510 | else |
22511 | { |
22512 | if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) |
22513 | { |
22514 | // CM_LARGE_PIC always uses pseudo PIC register which is |
22515 | // uninitialized. Since FUNCTION is local and calling it |
22516 | // doesn't go through PLT, we use scratch register %r11 as |
22517 | // PIC register and initialize it here. |
22518 | pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); |
22519 | ix86_init_large_pic_reg (tmp_regno); |
22520 | fnaddr = legitimize_pic_address (orig: fnaddr, |
22521 | reg: gen_rtx_REG (Pmode, tmp_regno)); |
22522 | } |
22523 | |
22524 | if (!sibcall_insn_operand (fnaddr, word_mode)) |
22525 | { |
22526 | tmp = gen_rtx_REG (word_mode, tmp_regno); |
22527 | if (GET_MODE (fnaddr) != word_mode) |
22528 | fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); |
22529 | emit_move_insn (tmp, fnaddr); |
22530 | fnaddr = tmp; |
22531 | } |
22532 | |
22533 | tmp = gen_rtx_MEM (QImode, fnaddr); |
22534 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
22535 | tmp = emit_call_insn (tmp); |
22536 | SIBLING_CALL_P (tmp) = 1; |
22537 | } |
22538 | emit_barrier (); |
22539 | |
22540 | /* Emit just enough of rest_of_compilation to get the insns emitted. */ |
22541 | insn = get_insns (); |
22542 | shorten_branches (insn); |
22543 | assemble_start_function (thunk_fndecl, fnname); |
22544 | final_start_function (insn, file, 1); |
22545 | final (insn, file, 1); |
22546 | final_end_function (); |
22547 | assemble_end_function (thunk_fndecl, fnname); |
22548 | |
22549 | flag_force_indirect_call = saved_flag_force_indirect_call; |
22550 | } |
22551 | |
22552 | static void |
22553 | x86_file_start (void) |
22554 | { |
22555 | default_file_start (); |
22556 | if (TARGET_16BIT) |
22557 | fputs (s: "\t.code16gcc\n" , stream: asm_out_file); |
22558 | #if TARGET_MACHO |
22559 | darwin_file_start (); |
22560 | #endif |
22561 | if (X86_FILE_START_VERSION_DIRECTIVE) |
22562 | fputs (s: "\t.version\t\"01.01\"\n" , stream: asm_out_file); |
22563 | if (X86_FILE_START_FLTUSED) |
22564 | fputs (s: "\t.global\t__fltused\n" , stream: asm_out_file); |
22565 | if (ix86_asm_dialect == ASM_INTEL) |
22566 | fputs (s: "\t.intel_syntax noprefix\n" , stream: asm_out_file); |
22567 | } |
22568 | |
22569 | int |
22570 | x86_field_alignment (tree type, int computed) |
22571 | { |
22572 | machine_mode mode; |
22573 | |
22574 | if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) |
22575 | return computed; |
22576 | if (TARGET_IAMCU) |
22577 | return iamcu_alignment (type, align: computed); |
22578 | type = strip_array_types (type); |
22579 | mode = TYPE_MODE (type); |
22580 | if (mode == DFmode || mode == DCmode |
22581 | || GET_MODE_CLASS (mode) == MODE_INT |
22582 | || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) |
22583 | { |
22584 | if (TYPE_ATOMIC (type) && computed > 32) |
22585 | { |
22586 | static bool warned; |
22587 | |
22588 | if (!warned && warn_psabi) |
22589 | { |
22590 | const char *url |
22591 | = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic" ; |
22592 | |
22593 | warned = true; |
22594 | inform (input_location, "the alignment of %<_Atomic %T%> " |
22595 | "fields changed in %{GCC 11.1%}" , |
22596 | TYPE_MAIN_VARIANT (type), url); |
22597 | } |
22598 | } |
22599 | else |
22600 | return MIN (32, computed); |
22601 | } |
22602 | return computed; |
22603 | } |
22604 | |
22605 | /* Print call to TARGET to FILE. */ |
22606 | |
22607 | static void |
22608 | x86_print_call_or_nop (FILE *file, const char *target) |
22609 | { |
22610 | if (flag_nop_mcount || !strcmp (s1: target, s2: "nop" )) |
22611 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
22612 | fprintf (stream: file, format: "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n" ); |
22613 | else |
22614 | fprintf (stream: file, format: "1:\tcall\t%s\n" , target); |
22615 | } |
22616 | |
22617 | static bool |
22618 | current_fentry_name (const char **name) |
22619 | { |
22620 | tree attr = lookup_attribute (attr_name: "fentry_name" , |
22621 | DECL_ATTRIBUTES (current_function_decl)); |
22622 | if (!attr) |
22623 | return false; |
22624 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
22625 | return true; |
22626 | } |
22627 | |
22628 | static bool |
22629 | current_fentry_section (const char **name) |
22630 | { |
22631 | tree attr = lookup_attribute (attr_name: "fentry_section" , |
22632 | DECL_ATTRIBUTES (current_function_decl)); |
22633 | if (!attr) |
22634 | return false; |
22635 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
22636 | return true; |
22637 | } |
22638 | |
22639 | /* Output assembler code to FILE to increment profiler label # LABELNO |
22640 | for profiling a function entry. */ |
22641 | void |
22642 | x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
22643 | { |
22644 | if (cfun->machine->insn_queued_at_entrance) |
22645 | { |
22646 | if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR) |
22647 | fprintf (stream: file, format: "\t%s\n" , TARGET_64BIT ? "endbr64" : "endbr32" ); |
22648 | unsigned int patch_area_size |
22649 | = crtl->patch_area_size - crtl->patch_area_entry; |
22650 | if (patch_area_size) |
22651 | ix86_output_patchable_area (patch_area_size, |
22652 | crtl->patch_area_entry == 0); |
22653 | } |
22654 | |
22655 | const char *mcount_name = MCOUNT_NAME; |
22656 | |
22657 | if (current_fentry_name (name: &mcount_name)) |
22658 | ; |
22659 | else if (fentry_name) |
22660 | mcount_name = fentry_name; |
22661 | else if (flag_fentry) |
22662 | mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; |
22663 | |
22664 | if (TARGET_64BIT) |
22665 | { |
22666 | #ifndef NO_PROFILE_COUNTERS |
22667 | fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n" , LPREFIX, labelno); |
22668 | #endif |
22669 | |
22670 | if (!TARGET_PECOFF) |
22671 | { |
22672 | switch (ix86_cmodel) |
22673 | { |
22674 | case CM_LARGE: |
22675 | /* NB: R10 is caller-saved. Although it can be used as a |
22676 | static chain register, it is preserved when calling |
22677 | mcount for nested functions. */ |
22678 | fprintf (stream: file, format: "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n" , |
22679 | mcount_name); |
22680 | break; |
22681 | case CM_LARGE_PIC: |
22682 | #ifdef NO_PROFILE_COUNTERS |
22683 | fprintf (stream: file, format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n" ); |
22684 | fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%r10\n" ); |
22685 | fprintf (stream: file, format: "\taddq\t%%r11, %%r10\n" ); |
22686 | fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n" , mcount_name); |
22687 | fprintf (stream: file, format: "\taddq\t%%r11, %%r10\n" ); |
22688 | fprintf (stream: file, format: "\tcall\t*%%r10\n" ); |
22689 | #else |
22690 | sorry ("profiling %<-mcmodel=large%> with PIC is not supported" ); |
22691 | #endif |
22692 | break; |
22693 | case CM_SMALL_PIC: |
22694 | case CM_MEDIUM_PIC: |
22695 | if (!ix86_direct_extern_access) |
22696 | { |
22697 | fprintf (stream: file, format: "1:\tcall\t*%s@GOTPCREL(%%rip)\n" , mcount_name); |
22698 | break; |
22699 | } |
22700 | /* fall through */ |
22701 | default: |
22702 | x86_print_call_or_nop (file, target: mcount_name); |
22703 | break; |
22704 | } |
22705 | } |
22706 | else |
22707 | x86_print_call_or_nop (file, target: mcount_name); |
22708 | } |
22709 | else if (flag_pic) |
22710 | { |
22711 | #ifndef NO_PROFILE_COUNTERS |
22712 | fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n" , |
22713 | LPREFIX, labelno); |
22714 | #endif |
22715 | fprintf (stream: file, format: "1:\tcall\t*%s@GOT(%%ebx)\n" , mcount_name); |
22716 | } |
22717 | else |
22718 | { |
22719 | #ifndef NO_PROFILE_COUNTERS |
22720 | fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n" , |
22721 | LPREFIX, labelno); |
22722 | #endif |
22723 | x86_print_call_or_nop (file, target: mcount_name); |
22724 | } |
22725 | |
22726 | if (flag_record_mcount |
22727 | || lookup_attribute (attr_name: "fentry_section" , |
22728 | DECL_ATTRIBUTES (current_function_decl))) |
22729 | { |
22730 | const char *sname = "__mcount_loc" ; |
22731 | |
22732 | if (current_fentry_section (name: &sname)) |
22733 | ; |
22734 | else if (fentry_section) |
22735 | sname = fentry_section; |
22736 | |
22737 | fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n" , sname); |
22738 | fprintf (stream: file, format: "\t.%s 1b\n" , TARGET_64BIT ? "quad" : "long" ); |
22739 | fprintf (stream: file, format: "\t.previous\n" ); |
22740 | } |
22741 | } |
22742 | |
22743 | /* We don't have exact information about the insn sizes, but we may assume |
22744 | quite safely that we are informed about all 1 byte insns and memory |
22745 | address sizes. This is enough to eliminate unnecessary padding in |
22746 | 99% of cases. */ |
22747 | |
22748 | int |
22749 | ix86_min_insn_size (rtx_insn *insn) |
22750 | { |
22751 | int l = 0, len; |
22752 | |
22753 | if (!INSN_P (insn) || !active_insn_p (insn)) |
22754 | return 0; |
22755 | |
22756 | /* Discard alignments we've emit and jump instructions. */ |
22757 | if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE |
22758 | && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) |
22759 | return 0; |
22760 | |
22761 | /* Important case - calls are always 5 bytes. |
22762 | It is common to have many calls in the row. */ |
22763 | if (CALL_P (insn) |
22764 | && symbolic_reference_mentioned_p (op: PATTERN (insn)) |
22765 | && !SIBLING_CALL_P (insn)) |
22766 | return 5; |
22767 | len = get_attr_length (insn); |
22768 | if (len <= 1) |
22769 | return 1; |
22770 | |
22771 | /* For normal instructions we rely on get_attr_length being exact, |
22772 | with a few exceptions. */ |
22773 | if (!JUMP_P (insn)) |
22774 | { |
22775 | enum attr_type type = get_attr_type (insn); |
22776 | |
22777 | switch (type) |
22778 | { |
22779 | case TYPE_MULTI: |
22780 | if (GET_CODE (PATTERN (insn)) == ASM_INPUT |
22781 | || asm_noperands (PATTERN (insn)) >= 0) |
22782 | return 0; |
22783 | break; |
22784 | case TYPE_OTHER: |
22785 | case TYPE_FCMP: |
22786 | break; |
22787 | default: |
22788 | /* Otherwise trust get_attr_length. */ |
22789 | return len; |
22790 | } |
22791 | |
22792 | l = get_attr_length_address (insn); |
22793 | if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn))) |
22794 | l = 4; |
22795 | } |
22796 | if (l) |
22797 | return 1+l; |
22798 | else |
22799 | return 2; |
22800 | } |
22801 | |
22802 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
22803 | |
22804 | /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte |
22805 | window. */ |
22806 | |
22807 | static void |
22808 | ix86_avoid_jump_mispredicts (void) |
22809 | { |
22810 | rtx_insn *insn, *start = get_insns (); |
22811 | int nbytes = 0, njumps = 0; |
22812 | bool isjump = false; |
22813 | |
22814 | /* Look for all minimal intervals of instructions containing 4 jumps. |
22815 | The intervals are bounded by START and INSN. NBYTES is the total |
22816 | size of instructions in the interval including INSN and not including |
22817 | START. When the NBYTES is smaller than 16 bytes, it is possible |
22818 | that the end of START and INSN ends up in the same 16byte page. |
22819 | |
22820 | The smallest offset in the page INSN can start is the case where START |
22821 | ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). |
22822 | We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). |
22823 | |
22824 | Don't consider asm goto as jump, while it can contain a jump, it doesn't |
22825 | have to, control transfer to label(s) can be performed through other |
22826 | means, and also we estimate minimum length of all asm stmts as 0. */ |
22827 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
22828 | { |
22829 | int min_size; |
22830 | |
22831 | if (LABEL_P (insn)) |
22832 | { |
22833 | align_flags alignment = label_to_alignment (insn); |
22834 | int align = alignment.levels[0].log; |
22835 | int max_skip = alignment.levels[0].maxskip; |
22836 | |
22837 | if (max_skip > 15) |
22838 | max_skip = 15; |
22839 | /* If align > 3, only up to 16 - max_skip - 1 bytes can be |
22840 | already in the current 16 byte page, because otherwise |
22841 | ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer |
22842 | bytes to reach 16 byte boundary. */ |
22843 | if (align <= 0 |
22844 | || (align <= 3 && max_skip != (1 << align) - 1)) |
22845 | max_skip = 0; |
22846 | if (dump_file) |
22847 | fprintf (stream: dump_file, format: "Label %i with max_skip %i\n" , |
22848 | INSN_UID (insn), max_skip); |
22849 | if (max_skip) |
22850 | { |
22851 | while (nbytes + max_skip >= 16) |
22852 | { |
22853 | start = NEXT_INSN (insn: start); |
22854 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
22855 | || CALL_P (start)) |
22856 | njumps--, isjump = true; |
22857 | else |
22858 | isjump = false; |
22859 | nbytes -= ix86_min_insn_size (insn: start); |
22860 | } |
22861 | } |
22862 | continue; |
22863 | } |
22864 | |
22865 | min_size = ix86_min_insn_size (insn); |
22866 | nbytes += min_size; |
22867 | if (dump_file) |
22868 | fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n" , |
22869 | INSN_UID (insn), min_size); |
22870 | if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) |
22871 | || CALL_P (insn)) |
22872 | njumps++; |
22873 | else |
22874 | continue; |
22875 | |
22876 | while (njumps > 3) |
22877 | { |
22878 | start = NEXT_INSN (insn: start); |
22879 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
22880 | || CALL_P (start)) |
22881 | njumps--, isjump = true; |
22882 | else |
22883 | isjump = false; |
22884 | nbytes -= ix86_min_insn_size (insn: start); |
22885 | } |
22886 | gcc_assert (njumps >= 0); |
22887 | if (dump_file) |
22888 | fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n" , |
22889 | INSN_UID (insn: start), INSN_UID (insn), nbytes); |
22890 | |
22891 | if (njumps == 3 && isjump && nbytes < 16) |
22892 | { |
22893 | int padsize = 15 - nbytes + ix86_min_insn_size (insn); |
22894 | |
22895 | if (dump_file) |
22896 | fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n" , |
22897 | INSN_UID (insn), padsize); |
22898 | emit_insn_before (gen_pad (GEN_INT (padsize)), insn); |
22899 | } |
22900 | } |
22901 | } |
22902 | #endif |
22903 | |
22904 | /* AMD Athlon works faster |
22905 | when RET is not destination of conditional jump or directly preceded |
22906 | by other jump instruction. We avoid the penalty by inserting NOP just |
22907 | before the RET instructions in such cases. */ |
22908 | static void |
22909 | ix86_pad_returns (void) |
22910 | { |
22911 | edge e; |
22912 | edge_iterator ei; |
22913 | |
22914 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
22915 | { |
22916 | basic_block bb = e->src; |
22917 | rtx_insn *ret = BB_END (bb); |
22918 | rtx_insn *prev; |
22919 | bool replace = false; |
22920 | |
22921 | if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) |
22922 | || optimize_bb_for_size_p (bb)) |
22923 | continue; |
22924 | for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev)) |
22925 | if (active_insn_p (prev) || LABEL_P (prev)) |
22926 | break; |
22927 | if (prev && LABEL_P (prev)) |
22928 | { |
22929 | edge e; |
22930 | edge_iterator ei; |
22931 | |
22932 | FOR_EACH_EDGE (e, ei, bb->preds) |
22933 | if (EDGE_FREQUENCY (e) && e->src->index >= 0 |
22934 | && !(e->flags & EDGE_FALLTHRU)) |
22935 | { |
22936 | replace = true; |
22937 | break; |
22938 | } |
22939 | } |
22940 | if (!replace) |
22941 | { |
22942 | prev = prev_active_insn (ret); |
22943 | if (prev |
22944 | && ((JUMP_P (prev) && any_condjump_p (prev)) |
22945 | || CALL_P (prev))) |
22946 | replace = true; |
22947 | /* Empty functions get branch mispredict even when |
22948 | the jump destination is not visible to us. */ |
22949 | if (!prev && !optimize_function_for_size_p (cfun)) |
22950 | replace = true; |
22951 | } |
22952 | if (replace) |
22953 | { |
22954 | emit_jump_insn_before (gen_simple_return_internal_long (), ret); |
22955 | delete_insn (ret); |
22956 | } |
22957 | } |
22958 | } |
22959 | |
22960 | /* Count the minimum number of instructions in BB. Return 4 if the |
22961 | number of instructions >= 4. */ |
22962 | |
22963 | static int |
22964 | ix86_count_insn_bb (basic_block bb) |
22965 | { |
22966 | rtx_insn *insn; |
22967 | int insn_count = 0; |
22968 | |
22969 | /* Count number of instructions in this block. Return 4 if the number |
22970 | of instructions >= 4. */ |
22971 | FOR_BB_INSNS (bb, insn) |
22972 | { |
22973 | /* Only happen in exit blocks. */ |
22974 | if (JUMP_P (insn) |
22975 | && ANY_RETURN_P (PATTERN (insn))) |
22976 | break; |
22977 | |
22978 | if (NONDEBUG_INSN_P (insn) |
22979 | && GET_CODE (PATTERN (insn)) != USE |
22980 | && GET_CODE (PATTERN (insn)) != CLOBBER) |
22981 | { |
22982 | insn_count++; |
22983 | if (insn_count >= 4) |
22984 | return insn_count; |
22985 | } |
22986 | } |
22987 | |
22988 | return insn_count; |
22989 | } |
22990 | |
22991 | |
22992 | /* Count the minimum number of instructions in code path in BB. |
22993 | Return 4 if the number of instructions >= 4. */ |
22994 | |
22995 | static int |
22996 | ix86_count_insn (basic_block bb) |
22997 | { |
22998 | edge e; |
22999 | edge_iterator ei; |
23000 | int min_prev_count; |
23001 | |
23002 | /* Only bother counting instructions along paths with no |
23003 | more than 2 basic blocks between entry and exit. Given |
23004 | that BB has an edge to exit, determine if a predecessor |
23005 | of BB has an edge from entry. If so, compute the number |
23006 | of instructions in the predecessor block. If there |
23007 | happen to be multiple such blocks, compute the minimum. */ |
23008 | min_prev_count = 4; |
23009 | FOR_EACH_EDGE (e, ei, bb->preds) |
23010 | { |
23011 | edge prev_e; |
23012 | edge_iterator prev_ei; |
23013 | |
23014 | if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
23015 | { |
23016 | min_prev_count = 0; |
23017 | break; |
23018 | } |
23019 | FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) |
23020 | { |
23021 | if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
23022 | { |
23023 | int count = ix86_count_insn_bb (bb: e->src); |
23024 | if (count < min_prev_count) |
23025 | min_prev_count = count; |
23026 | break; |
23027 | } |
23028 | } |
23029 | } |
23030 | |
23031 | if (min_prev_count < 4) |
23032 | min_prev_count += ix86_count_insn_bb (bb); |
23033 | |
23034 | return min_prev_count; |
23035 | } |
23036 | |
23037 | /* Pad short function to 4 instructions. */ |
23038 | |
23039 | static void |
23040 | ix86_pad_short_function (void) |
23041 | { |
23042 | edge e; |
23043 | edge_iterator ei; |
23044 | |
23045 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
23046 | { |
23047 | rtx_insn *ret = BB_END (e->src); |
23048 | if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) |
23049 | { |
23050 | int insn_count = ix86_count_insn (bb: e->src); |
23051 | |
23052 | /* Pad short function. */ |
23053 | if (insn_count < 4) |
23054 | { |
23055 | rtx_insn *insn = ret; |
23056 | |
23057 | /* Find epilogue. */ |
23058 | while (insn |
23059 | && (!NOTE_P (insn) |
23060 | || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) |
23061 | insn = PREV_INSN (insn); |
23062 | |
23063 | if (!insn) |
23064 | insn = ret; |
23065 | |
23066 | /* Two NOPs count as one instruction. */ |
23067 | insn_count = 2 * (4 - insn_count); |
23068 | emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); |
23069 | } |
23070 | } |
23071 | } |
23072 | } |
23073 | |
23074 | /* Fix up a Windows system unwinder issue. If an EH region falls through into |
23075 | the epilogue, the Windows system unwinder will apply epilogue logic and |
23076 | produce incorrect offsets. This can be avoided by adding a nop between |
23077 | the last insn that can throw and the first insn of the epilogue. */ |
23078 | |
23079 | static void |
23080 | ix86_seh_fixup_eh_fallthru (void) |
23081 | { |
23082 | edge e; |
23083 | edge_iterator ei; |
23084 | |
23085 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
23086 | { |
23087 | rtx_insn *insn, *next; |
23088 | |
23089 | /* Find the beginning of the epilogue. */ |
23090 | for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) |
23091 | if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) |
23092 | break; |
23093 | if (insn == NULL) |
23094 | continue; |
23095 | |
23096 | /* We only care about preceding insns that can throw. */ |
23097 | insn = prev_active_insn (insn); |
23098 | if (insn == NULL || !can_throw_internal (insn)) |
23099 | continue; |
23100 | |
23101 | /* Do not separate calls from their debug information. */ |
23102 | for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next)) |
23103 | if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) |
23104 | insn = next; |
23105 | else |
23106 | break; |
23107 | |
23108 | emit_insn_after (gen_nops (const1_rtx), insn); |
23109 | } |
23110 | } |
23111 | /* Split vector load from parm_decl to elemental loads to avoid STLF |
23112 | stalls. */ |
23113 | static void |
23114 | ix86_split_stlf_stall_load () |
23115 | { |
23116 | rtx_insn* insn, *start = get_insns (); |
23117 | unsigned window = 0; |
23118 | |
23119 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
23120 | { |
23121 | if (!NONDEBUG_INSN_P (insn)) |
23122 | continue; |
23123 | window++; |
23124 | /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each |
23125 | other, just emulate for pipeline) before stalled load, stlf stall |
23126 | case is as fast as no stall cases on CLX. |
23127 | Since CFG is freed before machine_reorg, just do a rough |
23128 | calculation of the window according to the layout. */ |
23129 | if (window > (unsigned) x86_stlf_window_ninsns) |
23130 | return; |
23131 | |
23132 | if (any_uncondjump_p (insn) |
23133 | || ANY_RETURN_P (PATTERN (insn)) |
23134 | || CALL_P (insn)) |
23135 | return; |
23136 | |
23137 | rtx set = single_set (insn); |
23138 | if (!set) |
23139 | continue; |
23140 | rtx src = SET_SRC (set); |
23141 | if (!MEM_P (src) |
23142 | /* Only handle V2DFmode load since it doesn't need any scratch |
23143 | register. */ |
23144 | || GET_MODE (src) != E_V2DFmode |
23145 | || !MEM_EXPR (src) |
23146 | || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL) |
23147 | continue; |
23148 | |
23149 | rtx zero = CONST0_RTX (V2DFmode); |
23150 | rtx dest = SET_DEST (set); |
23151 | rtx m = adjust_address (src, DFmode, 0); |
23152 | rtx loadlpd = gen_sse2_loadlpd (dest, zero, m); |
23153 | emit_insn_before (loadlpd, insn); |
23154 | m = adjust_address (src, DFmode, 8); |
23155 | rtx loadhpd = gen_sse2_loadhpd (dest, dest, m); |
23156 | if (dump_file && (dump_flags & TDF_DETAILS)) |
23157 | { |
23158 | fputs (s: "Due to potential STLF stall, split instruction:\n" , |
23159 | stream: dump_file); |
23160 | print_rtl_single (dump_file, insn); |
23161 | fputs (s: "To:\n" , stream: dump_file); |
23162 | print_rtl_single (dump_file, loadlpd); |
23163 | print_rtl_single (dump_file, loadhpd); |
23164 | } |
23165 | PATTERN (insn) = loadhpd; |
23166 | INSN_CODE (insn) = -1; |
23167 | gcc_assert (recog_memoized (insn) != -1); |
23168 | } |
23169 | } |
23170 | |
23171 | /* Implement machine specific optimizations. We implement padding of returns |
23172 | for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ |
23173 | static void |
23174 | ix86_reorg (void) |
23175 | { |
23176 | /* We are freeing block_for_insn in the toplev to keep compatibility |
23177 | with old MDEP_REORGS that are not CFG based. Recompute it now. */ |
23178 | compute_bb_for_insn (); |
23179 | |
23180 | if (TARGET_SEH && current_function_has_exception_handlers ()) |
23181 | ix86_seh_fixup_eh_fallthru (); |
23182 | |
23183 | if (optimize && optimize_function_for_speed_p (cfun)) |
23184 | { |
23185 | if (TARGET_SSE2) |
23186 | ix86_split_stlf_stall_load (); |
23187 | if (TARGET_PAD_SHORT_FUNCTION) |
23188 | ix86_pad_short_function (); |
23189 | else if (TARGET_PAD_RETURNS) |
23190 | ix86_pad_returns (); |
23191 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
23192 | if (TARGET_FOUR_JUMP_LIMIT) |
23193 | ix86_avoid_jump_mispredicts (); |
23194 | #endif |
23195 | } |
23196 | } |
23197 | |
23198 | /* Return nonzero when QImode register that must be represented via REX prefix |
23199 | is used. */ |
23200 | bool |
23201 | x86_extended_QIreg_mentioned_p (rtx_insn *insn) |
23202 | { |
23203 | int i; |
23204 | extract_insn_cached (insn); |
23205 | for (i = 0; i < recog_data.n_operands; i++) |
23206 | if (GENERAL_REG_P (recog_data.operand[i]) |
23207 | && !QI_REGNO_P (REGNO (recog_data.operand[i]))) |
23208 | return true; |
23209 | return false; |
23210 | } |
23211 | |
23212 | /* Return true when INSN mentions register that must be encoded using REX |
23213 | prefix. */ |
23214 | bool |
23215 | x86_extended_reg_mentioned_p (rtx insn) |
23216 | { |
23217 | subrtx_iterator::array_type array; |
23218 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
23219 | { |
23220 | const_rtx x = *iter; |
23221 | if (REG_P (x) |
23222 | && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)) |
23223 | || REX2_INT_REGNO_P (REGNO (x)))) |
23224 | return true; |
23225 | } |
23226 | return false; |
23227 | } |
23228 | |
23229 | /* Return true when INSN mentions register that must be encoded using REX2 |
23230 | prefix. */ |
23231 | bool |
23232 | x86_extended_rex2reg_mentioned_p (rtx insn) |
23233 | { |
23234 | subrtx_iterator::array_type array; |
23235 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
23236 | { |
23237 | const_rtx x = *iter; |
23238 | if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x))) |
23239 | return true; |
23240 | } |
23241 | return false; |
23242 | } |
23243 | |
23244 | /* Return true when rtx operands mentions register that must be encoded using |
23245 | evex prefix. */ |
23246 | bool |
23247 | x86_evex_reg_mentioned_p (rtx operands[], int nops) |
23248 | { |
23249 | int i; |
23250 | for (i = 0; i < nops; i++) |
23251 | if (EXT_REX_SSE_REG_P (operands[i]) |
23252 | || x86_extended_rex2reg_mentioned_p (insn: operands[i])) |
23253 | return true; |
23254 | return false; |
23255 | } |
23256 | |
23257 | /* If profitable, negate (without causing overflow) integer constant |
23258 | of mode MODE at location LOC. Return true in this case. */ |
23259 | bool |
23260 | x86_maybe_negate_const_int (rtx *loc, machine_mode mode) |
23261 | { |
23262 | HOST_WIDE_INT val; |
23263 | |
23264 | if (!CONST_INT_P (*loc)) |
23265 | return false; |
23266 | |
23267 | switch (mode) |
23268 | { |
23269 | case E_DImode: |
23270 | /* DImode x86_64 constants must fit in 32 bits. */ |
23271 | gcc_assert (x86_64_immediate_operand (*loc, mode)); |
23272 | |
23273 | mode = SImode; |
23274 | break; |
23275 | |
23276 | case E_SImode: |
23277 | case E_HImode: |
23278 | case E_QImode: |
23279 | break; |
23280 | |
23281 | default: |
23282 | gcc_unreachable (); |
23283 | } |
23284 | |
23285 | /* Avoid overflows. */ |
23286 | if (mode_signbit_p (mode, *loc)) |
23287 | return false; |
23288 | |
23289 | val = INTVAL (*loc); |
23290 | |
23291 | /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. |
23292 | Exceptions: -128 encodes smaller than 128, so swap sign and op. */ |
23293 | if ((val < 0 && val != -128) |
23294 | || val == 128) |
23295 | { |
23296 | *loc = GEN_INT (-val); |
23297 | return true; |
23298 | } |
23299 | |
23300 | return false; |
23301 | } |
23302 | |
23303 | /* Generate an unsigned DImode/SImode to FP conversion. This is the same code |
23304 | optabs would emit if we didn't have TFmode patterns. */ |
23305 | |
23306 | void |
23307 | x86_emit_floatuns (rtx operands[2]) |
23308 | { |
23309 | rtx_code_label *neglab, *donelab; |
23310 | rtx i0, i1, f0, in, out; |
23311 | machine_mode mode, inmode; |
23312 | |
23313 | inmode = GET_MODE (operands[1]); |
23314 | gcc_assert (inmode == SImode || inmode == DImode); |
23315 | |
23316 | out = operands[0]; |
23317 | in = force_reg (inmode, operands[1]); |
23318 | mode = GET_MODE (out); |
23319 | neglab = gen_label_rtx (); |
23320 | donelab = gen_label_rtx (); |
23321 | f0 = gen_reg_rtx (mode); |
23322 | |
23323 | emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); |
23324 | |
23325 | expand_float (out, in, 0); |
23326 | |
23327 | emit_jump_insn (gen_jump (donelab)); |
23328 | emit_barrier (); |
23329 | |
23330 | emit_label (neglab); |
23331 | |
23332 | i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, |
23333 | 1, OPTAB_DIRECT); |
23334 | i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, |
23335 | 1, OPTAB_DIRECT); |
23336 | i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); |
23337 | |
23338 | expand_float (f0, i0, 0); |
23339 | |
23340 | emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); |
23341 | |
23342 | emit_label (donelab); |
23343 | } |
23344 | |
23345 | /* Return the diagnostic message string if conversion from FROMTYPE to |
23346 | TOTYPE is not allowed, NULL otherwise. */ |
23347 | |
23348 | static const char * |
23349 | ix86_invalid_conversion (const_tree fromtype, const_tree totype) |
23350 | { |
23351 | machine_mode from_mode = element_mode (fromtype); |
23352 | machine_mode to_mode = element_mode (totype); |
23353 | |
23354 | if (!TARGET_SSE2 && from_mode != to_mode) |
23355 | { |
23356 | /* Do no allow conversions to/from BFmode/HFmode scalar types |
23357 | when TARGET_SSE2 is not available. */ |
23358 | if (from_mode == BFmode) |
23359 | return N_("invalid conversion from type %<__bf16%> " |
23360 | "without option %<-msse2%>" ); |
23361 | if (from_mode == HFmode) |
23362 | return N_("invalid conversion from type %<_Float16%> " |
23363 | "without option %<-msse2%>" ); |
23364 | if (to_mode == BFmode) |
23365 | return N_("invalid conversion to type %<__bf16%> " |
23366 | "without option %<-msse2%>" ); |
23367 | if (to_mode == HFmode) |
23368 | return N_("invalid conversion to type %<_Float16%> " |
23369 | "without option %<-msse2%>" ); |
23370 | } |
23371 | |
23372 | /* Warn for silent implicit conversion between __bf16 and short, |
23373 | since __bfloat16 is refined as real __bf16 instead of short |
23374 | since GCC13. */ |
23375 | if (element_mode (fromtype) != element_mode (totype) |
23376 | && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT)) |
23377 | { |
23378 | /* Warn for silent implicit conversion where user may expect |
23379 | a bitcast. */ |
23380 | if ((TYPE_MODE (fromtype) == BFmode |
23381 | && TYPE_MODE (totype) == HImode) |
23382 | || (TYPE_MODE (totype) == BFmode |
23383 | && TYPE_MODE (fromtype) == HImode)) |
23384 | warning (0, "%<__bfloat16%> is redefined from typedef %<short%> " |
23385 | "to real %<__bf16%> since GCC 13.1, be careful of " |
23386 | "implicit conversion between %<__bf16%> and %<short%>; " |
23387 | "an explicit bitcast may be needed here" ); |
23388 | } |
23389 | |
23390 | /* Conversion allowed. */ |
23391 | return NULL; |
23392 | } |
23393 | |
23394 | /* Return the diagnostic message string if the unary operation OP is |
23395 | not permitted on TYPE, NULL otherwise. */ |
23396 | |
23397 | static const char * |
23398 | ix86_invalid_unary_op (int op, const_tree type) |
23399 | { |
23400 | machine_mode mmode = element_mode (type); |
23401 | /* Reject all single-operand operations on BFmode/HFmode except for & |
23402 | when TARGET_SSE2 is not available. */ |
23403 | if (!TARGET_SSE2 && op != ADDR_EXPR) |
23404 | { |
23405 | if (mmode == BFmode) |
23406 | return N_("operation not permitted on type %<__bf16%> " |
23407 | "without option %<-msse2%>" ); |
23408 | if (mmode == HFmode) |
23409 | return N_("operation not permitted on type %<_Float16%> " |
23410 | "without option %<-msse2%>" ); |
23411 | } |
23412 | |
23413 | /* Operation allowed. */ |
23414 | return NULL; |
23415 | } |
23416 | |
23417 | /* Return the diagnostic message string if the binary operation OP is |
23418 | not permitted on TYPE1 and TYPE2, NULL otherwise. */ |
23419 | |
23420 | static const char * |
23421 | ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, |
23422 | const_tree type2) |
23423 | { |
23424 | machine_mode type1_mode = element_mode (type1); |
23425 | machine_mode type2_mode = element_mode (type2); |
23426 | /* Reject all 2-operand operations on BFmode or HFmode |
23427 | when TARGET_SSE2 is not available. */ |
23428 | if (!TARGET_SSE2) |
23429 | { |
23430 | if (type1_mode == BFmode || type2_mode == BFmode) |
23431 | return N_("operation not permitted on type %<__bf16%> " |
23432 | "without option %<-msse2%>" ); |
23433 | |
23434 | if (type1_mode == HFmode || type2_mode == HFmode) |
23435 | return N_("operation not permitted on type %<_Float16%> " |
23436 | "without option %<-msse2%>" ); |
23437 | } |
23438 | |
23439 | /* Operation allowed. */ |
23440 | return NULL; |
23441 | } |
23442 | |
23443 | |
23444 | /* Target hook for scalar_mode_supported_p. */ |
23445 | static bool |
23446 | ix86_scalar_mode_supported_p (scalar_mode mode) |
23447 | { |
23448 | if (DECIMAL_FLOAT_MODE_P (mode)) |
23449 | return default_decimal_float_supported_p (); |
23450 | else if (mode == TFmode) |
23451 | return true; |
23452 | else if (mode == HFmode || mode == BFmode) |
23453 | return true; |
23454 | else |
23455 | return default_scalar_mode_supported_p (mode); |
23456 | } |
23457 | |
23458 | /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE |
23459 | if MODE is HFmode, and punt to the generic implementation otherwise. */ |
23460 | |
23461 | static bool |
23462 | ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode) |
23463 | { |
23464 | /* NB: Always return TRUE for HFmode so that the _Float16 type will |
23465 | be defined by the C front-end for AVX512FP16 intrinsics. We will |
23466 | issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't |
23467 | enabled. */ |
23468 | return ((mode == HFmode || mode == BFmode) |
23469 | ? true |
23470 | : default_libgcc_floating_mode_supported_p (mode)); |
23471 | } |
23472 | |
23473 | /* Implements target hook vector_mode_supported_p. */ |
23474 | static bool |
23475 | ix86_vector_mode_supported_p (machine_mode mode) |
23476 | { |
23477 | /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be |
23478 | either. */ |
23479 | if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode) |
23480 | return false; |
23481 | if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
23482 | return true; |
23483 | if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
23484 | return true; |
23485 | if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
23486 | return true; |
23487 | if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) |
23488 | return true; |
23489 | if ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
23490 | && VALID_MMX_REG_MODE (mode)) |
23491 | return true; |
23492 | if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE) |
23493 | && VALID_MMX_REG_MODE_3DNOW (mode)) |
23494 | return true; |
23495 | if (mode == V2QImode) |
23496 | return true; |
23497 | return false; |
23498 | } |
23499 | |
23500 | /* Target hook for c_mode_for_suffix. */ |
23501 | static machine_mode |
23502 | ix86_c_mode_for_suffix (char suffix) |
23503 | { |
23504 | if (suffix == 'q') |
23505 | return TFmode; |
23506 | if (suffix == 'w') |
23507 | return XFmode; |
23508 | |
23509 | return VOIDmode; |
23510 | } |
23511 | |
23512 | /* Helper function to map common constraints to non-EGPR ones. |
23513 | All related constraints have h prefix, and h plus Upper letter |
23514 | means the constraint is strictly EGPR enabled, while h plus |
23515 | lower letter indicates the constraint is strictly gpr16 only. |
23516 | |
23517 | Specially for "g" constraint, split it to rmi as there is |
23518 | no corresponding general constraint define for backend. |
23519 | |
23520 | Here is the full list to map constraints that may involve |
23521 | gpr to h prefixed. |
23522 | |
23523 | "g" -> "jrjmi" |
23524 | "r" -> "jr" |
23525 | "m" -> "jm" |
23526 | "<" -> "j<" |
23527 | ">" -> "j>" |
23528 | "o" -> "jo" |
23529 | "V" -> "jV" |
23530 | "p" -> "jp" |
23531 | "Bm" -> "ja" |
23532 | */ |
23533 | |
23534 | static void map_egpr_constraints (vec<const char *> &constraints) |
23535 | { |
23536 | for (size_t i = 0; i < constraints.length(); i++) |
23537 | { |
23538 | const char *cur = constraints[i]; |
23539 | |
23540 | if (startswith (str: cur, prefix: "=@cc" )) |
23541 | continue; |
23542 | |
23543 | int len = strlen (s: cur); |
23544 | auto_vec<char> buf; |
23545 | |
23546 | for (int j = 0; j < len; j++) |
23547 | { |
23548 | switch (cur[j]) |
23549 | { |
23550 | case 'g': |
23551 | buf.safe_push (obj: 'j'); |
23552 | buf.safe_push (obj: 'r'); |
23553 | buf.safe_push (obj: 'j'); |
23554 | buf.safe_push (obj: 'm'); |
23555 | buf.safe_push (obj: 'i'); |
23556 | break; |
23557 | case 'r': |
23558 | case 'm': |
23559 | case '<': |
23560 | case '>': |
23561 | case 'o': |
23562 | case 'V': |
23563 | case 'p': |
23564 | buf.safe_push (obj: 'j'); |
23565 | buf.safe_push (obj: cur[j]); |
23566 | break; |
23567 | case 'B': |
23568 | if (cur[j + 1] == 'm') |
23569 | { |
23570 | buf.safe_push (obj: 'j'); |
23571 | buf.safe_push (obj: 'a'); |
23572 | j++; |
23573 | } |
23574 | else |
23575 | { |
23576 | buf.safe_push (obj: cur[j]); |
23577 | buf.safe_push (obj: cur[j + 1]); |
23578 | j++; |
23579 | } |
23580 | break; |
23581 | case 'T': |
23582 | case 'Y': |
23583 | case 'W': |
23584 | case 'j': |
23585 | buf.safe_push (obj: cur[j]); |
23586 | buf.safe_push (obj: cur[j + 1]); |
23587 | j++; |
23588 | break; |
23589 | default: |
23590 | buf.safe_push (obj: cur[j]); |
23591 | break; |
23592 | } |
23593 | } |
23594 | buf.safe_push (obj: '\0'); |
23595 | constraints[i] = xstrdup (buf.address ()); |
23596 | } |
23597 | } |
23598 | |
23599 | /* Worker function for TARGET_MD_ASM_ADJUST. |
23600 | |
23601 | We implement asm flag outputs, and maintain source compatibility |
23602 | with the old cc0-based compiler. */ |
23603 | |
23604 | static rtx_insn * |
23605 | ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, |
23606 | vec<machine_mode> & /*input_modes*/, |
23607 | vec<const char *> &constraints, vec<rtx> &clobbers, |
23608 | HARD_REG_SET &clobbered_regs, location_t loc) |
23609 | { |
23610 | bool saw_asm_flag = false; |
23611 | |
23612 | start_sequence (); |
23613 | |
23614 | if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32) |
23615 | map_egpr_constraints (constraints); |
23616 | |
23617 | for (unsigned i = 0, n = outputs.length (); i < n; ++i) |
23618 | { |
23619 | const char *con = constraints[i]; |
23620 | if (!startswith (str: con, prefix: "=@cc" )) |
23621 | continue; |
23622 | con += 4; |
23623 | if (strchr (s: con, c: ',') != NULL) |
23624 | { |
23625 | error_at (loc, "alternatives not allowed in %<asm%> flag output" ); |
23626 | continue; |
23627 | } |
23628 | |
23629 | bool invert = false; |
23630 | if (con[0] == 'n') |
23631 | invert = true, con++; |
23632 | |
23633 | machine_mode mode = CCmode; |
23634 | rtx_code code = UNKNOWN; |
23635 | |
23636 | switch (con[0]) |
23637 | { |
23638 | case 'a': |
23639 | if (con[1] == 0) |
23640 | mode = CCAmode, code = EQ; |
23641 | else if (con[1] == 'e' && con[2] == 0) |
23642 | mode = CCCmode, code = NE; |
23643 | break; |
23644 | case 'b': |
23645 | if (con[1] == 0) |
23646 | mode = CCCmode, code = EQ; |
23647 | else if (con[1] == 'e' && con[2] == 0) |
23648 | mode = CCAmode, code = NE; |
23649 | break; |
23650 | case 'c': |
23651 | if (con[1] == 0) |
23652 | mode = CCCmode, code = EQ; |
23653 | break; |
23654 | case 'e': |
23655 | if (con[1] == 0) |
23656 | mode = CCZmode, code = EQ; |
23657 | break; |
23658 | case 'g': |
23659 | if (con[1] == 0) |
23660 | mode = CCGCmode, code = GT; |
23661 | else if (con[1] == 'e' && con[2] == 0) |
23662 | mode = CCGCmode, code = GE; |
23663 | break; |
23664 | case 'l': |
23665 | if (con[1] == 0) |
23666 | mode = CCGCmode, code = LT; |
23667 | else if (con[1] == 'e' && con[2] == 0) |
23668 | mode = CCGCmode, code = LE; |
23669 | break; |
23670 | case 'o': |
23671 | if (con[1] == 0) |
23672 | mode = CCOmode, code = EQ; |
23673 | break; |
23674 | case 'p': |
23675 | if (con[1] == 0) |
23676 | mode = CCPmode, code = EQ; |
23677 | break; |
23678 | case 's': |
23679 | if (con[1] == 0) |
23680 | mode = CCSmode, code = EQ; |
23681 | break; |
23682 | case 'z': |
23683 | if (con[1] == 0) |
23684 | mode = CCZmode, code = EQ; |
23685 | break; |
23686 | } |
23687 | if (code == UNKNOWN) |
23688 | { |
23689 | error_at (loc, "unknown %<asm%> flag output %qs" , constraints[i]); |
23690 | continue; |
23691 | } |
23692 | if (invert) |
23693 | code = reverse_condition (code); |
23694 | |
23695 | rtx dest = outputs[i]; |
23696 | if (!saw_asm_flag) |
23697 | { |
23698 | /* This is the first asm flag output. Here we put the flags |
23699 | register in as the real output and adjust the condition to |
23700 | allow it. */ |
23701 | constraints[i] = "=Bf" ; |
23702 | outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); |
23703 | saw_asm_flag = true; |
23704 | } |
23705 | else |
23706 | { |
23707 | /* We don't need the flags register as output twice. */ |
23708 | constraints[i] = "=X" ; |
23709 | outputs[i] = gen_rtx_SCRATCH (SImode); |
23710 | } |
23711 | |
23712 | rtx x = gen_rtx_REG (mode, FLAGS_REG); |
23713 | x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); |
23714 | |
23715 | machine_mode dest_mode = GET_MODE (dest); |
23716 | if (!SCALAR_INT_MODE_P (dest_mode)) |
23717 | { |
23718 | error_at (loc, "invalid type for %<asm%> flag output" ); |
23719 | continue; |
23720 | } |
23721 | |
23722 | if (dest_mode == QImode) |
23723 | emit_insn (gen_rtx_SET (dest, x)); |
23724 | else |
23725 | { |
23726 | rtx reg = gen_reg_rtx (QImode); |
23727 | emit_insn (gen_rtx_SET (reg, x)); |
23728 | |
23729 | reg = convert_to_mode (dest_mode, reg, 1); |
23730 | emit_move_insn (dest, reg); |
23731 | } |
23732 | } |
23733 | |
23734 | rtx_insn *seq = get_insns (); |
23735 | end_sequence (); |
23736 | |
23737 | if (saw_asm_flag) |
23738 | return seq; |
23739 | else |
23740 | { |
23741 | /* If we had no asm flag outputs, clobber the flags. */ |
23742 | clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG)); |
23743 | SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG); |
23744 | return NULL; |
23745 | } |
23746 | } |
23747 | |
23748 | /* Implements target vector targetm.asm.encode_section_info. */ |
23749 | |
23750 | static void ATTRIBUTE_UNUSED |
23751 | ix86_encode_section_info (tree decl, rtx rtl, int first) |
23752 | { |
23753 | default_encode_section_info (decl, rtl, first); |
23754 | |
23755 | if (ix86_in_large_data_p (exp: decl)) |
23756 | SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; |
23757 | } |
23758 | |
23759 | /* Worker function for REVERSE_CONDITION. */ |
23760 | |
23761 | enum rtx_code |
23762 | ix86_reverse_condition (enum rtx_code code, machine_mode mode) |
23763 | { |
23764 | return (mode == CCFPmode |
23765 | ? reverse_condition_maybe_unordered (code) |
23766 | : reverse_condition (code)); |
23767 | } |
23768 | |
23769 | /* Output code to perform an x87 FP register move, from OPERANDS[1] |
23770 | to OPERANDS[0]. */ |
23771 | |
23772 | const char * |
23773 | output_387_reg_move (rtx_insn *insn, rtx *operands) |
23774 | { |
23775 | if (REG_P (operands[0])) |
23776 | { |
23777 | if (REG_P (operands[1]) |
23778 | && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
23779 | { |
23780 | if (REGNO (operands[0]) == FIRST_STACK_REG) |
23781 | return output_387_ffreep (operands, opno: 0); |
23782 | return "fstp\t%y0" ; |
23783 | } |
23784 | if (STACK_TOP_P (operands[0])) |
23785 | return "fld%Z1\t%y1" ; |
23786 | return "fst\t%y0" ; |
23787 | } |
23788 | else if (MEM_P (operands[0])) |
23789 | { |
23790 | gcc_assert (REG_P (operands[1])); |
23791 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
23792 | return "fstp%Z0\t%y0" ; |
23793 | else |
23794 | { |
23795 | /* There is no non-popping store to memory for XFmode. |
23796 | So if we need one, follow the store with a load. */ |
23797 | if (GET_MODE (operands[0]) == XFmode) |
23798 | return "fstp%Z0\t%y0\n\tfld%Z0\t%y0" ; |
23799 | else |
23800 | return "fst%Z0\t%y0" ; |
23801 | } |
23802 | } |
23803 | else |
23804 | gcc_unreachable(); |
23805 | } |
23806 | #ifdef TARGET_SOLARIS |
23807 | /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ |
23808 | |
23809 | static void |
23810 | i386_solaris_elf_named_section (const char *name, unsigned int flags, |
23811 | tree decl) |
23812 | { |
23813 | /* With Binutils 2.15, the "@unwind" marker must be specified on |
23814 | every occurrence of the ".eh_frame" section, not just the first |
23815 | one. */ |
23816 | if (TARGET_64BIT |
23817 | && strcmp (name, ".eh_frame" ) == 0) |
23818 | { |
23819 | fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n" , name, |
23820 | flags & SECTION_WRITE ? "aw" : "a" ); |
23821 | return; |
23822 | } |
23823 | |
23824 | #ifndef USE_GAS |
23825 | if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) |
23826 | { |
23827 | solaris_elf_asm_comdat_section (name, flags, decl); |
23828 | return; |
23829 | } |
23830 | |
23831 | /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the |
23832 | SPARC assembler. One cannot mix single-letter flags and #exclude, so |
23833 | only emit the latter here. */ |
23834 | if (flags & SECTION_EXCLUDE) |
23835 | { |
23836 | fprintf (asm_out_file, "\t.section\t%s,#exclude\n" , name); |
23837 | return; |
23838 | } |
23839 | #endif |
23840 | |
23841 | default_elf_asm_named_section (name, flags, decl); |
23842 | } |
23843 | #endif /* TARGET_SOLARIS */ |
23844 | |
23845 | /* Return the mangling of TYPE if it is an extended fundamental type. */ |
23846 | |
23847 | static const char * |
23848 | ix86_mangle_type (const_tree type) |
23849 | { |
23850 | type = TYPE_MAIN_VARIANT (type); |
23851 | |
23852 | if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE |
23853 | && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) |
23854 | return NULL; |
23855 | |
23856 | if (type == float128_type_node || type == float64x_type_node) |
23857 | return NULL; |
23858 | |
23859 | switch (TYPE_MODE (type)) |
23860 | { |
23861 | case E_BFmode: |
23862 | return "DF16b" ; |
23863 | case E_HFmode: |
23864 | /* _Float16 is "DF16_". |
23865 | Align with clang's decision in https://reviews.llvm.org/D33719. */ |
23866 | return "DF16_" ; |
23867 | case E_TFmode: |
23868 | /* __float128 is "g". */ |
23869 | return "g" ; |
23870 | case E_XFmode: |
23871 | /* "long double" or __float80 is "e". */ |
23872 | return "e" ; |
23873 | default: |
23874 | return NULL; |
23875 | } |
23876 | } |
23877 | |
23878 | /* Create C++ tinfo symbols for only conditionally available fundamental |
23879 | types. */ |
23880 | |
23881 | static void |
23882 | ix86_emit_support_tinfos (emit_support_tinfos_callback callback) |
23883 | { |
23884 | extern tree ix86_float16_type_node; |
23885 | extern tree ix86_bf16_type_node; |
23886 | |
23887 | if (!TARGET_SSE2) |
23888 | { |
23889 | if (!float16_type_node) |
23890 | float16_type_node = ix86_float16_type_node; |
23891 | if (!bfloat16_type_node) |
23892 | bfloat16_type_node = ix86_bf16_type_node; |
23893 | callback (float16_type_node); |
23894 | callback (bfloat16_type_node); |
23895 | float16_type_node = NULL_TREE; |
23896 | bfloat16_type_node = NULL_TREE; |
23897 | } |
23898 | } |
23899 | |
23900 | static GTY(()) tree ix86_tls_stack_chk_guard_decl; |
23901 | |
23902 | static tree |
23903 | ix86_stack_protect_guard (void) |
23904 | { |
23905 | if (TARGET_SSP_TLS_GUARD) |
23906 | { |
23907 | tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); |
23908 | int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); |
23909 | tree type = build_qualified_type (type_node, qual); |
23910 | tree t; |
23911 | |
23912 | if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str)) |
23913 | { |
23914 | t = ix86_tls_stack_chk_guard_decl; |
23915 | |
23916 | if (t == NULL) |
23917 | { |
23918 | rtx x; |
23919 | |
23920 | t = build_decl |
23921 | (UNKNOWN_LOCATION, VAR_DECL, |
23922 | get_identifier (ix86_stack_protector_guard_symbol_str), |
23923 | type); |
23924 | TREE_STATIC (t) = 1; |
23925 | TREE_PUBLIC (t) = 1; |
23926 | DECL_EXTERNAL (t) = 1; |
23927 | TREE_USED (t) = 1; |
23928 | TREE_THIS_VOLATILE (t) = 1; |
23929 | DECL_ARTIFICIAL (t) = 1; |
23930 | DECL_IGNORED_P (t) = 1; |
23931 | |
23932 | /* Do not share RTL as the declaration is visible outside of |
23933 | current function. */ |
23934 | x = DECL_RTL (t); |
23935 | RTX_FLAG (x, used) = 1; |
23936 | |
23937 | ix86_tls_stack_chk_guard_decl = t; |
23938 | } |
23939 | } |
23940 | else |
23941 | { |
23942 | tree asptrtype = build_pointer_type (type); |
23943 | |
23944 | t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); |
23945 | t = build2 (MEM_REF, asptrtype, t, |
23946 | build_int_cst (asptrtype, 0)); |
23947 | TREE_THIS_VOLATILE (t) = 1; |
23948 | } |
23949 | |
23950 | return t; |
23951 | } |
23952 | |
23953 | return default_stack_protect_guard (); |
23954 | } |
23955 | |
23956 | /* For 32-bit code we can save PIC register setup by using |
23957 | __stack_chk_fail_local hidden function instead of calling |
23958 | __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC |
23959 | register, so it is better to call __stack_chk_fail directly. */ |
23960 | |
23961 | static tree ATTRIBUTE_UNUSED |
23962 | ix86_stack_protect_fail (void) |
23963 | { |
23964 | return TARGET_64BIT |
23965 | ? default_external_stack_protect_fail () |
23966 | : default_hidden_stack_protect_fail (); |
23967 | } |
23968 | |
23969 | /* Select a format to encode pointers in exception handling data. CODE |
23970 | is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is |
23971 | true if the symbol may be affected by dynamic relocations. |
23972 | |
23973 | ??? All x86 object file formats are capable of representing this. |
23974 | After all, the relocation needed is the same as for the call insn. |
23975 | Whether or not a particular assembler allows us to enter such, I |
23976 | guess we'll have to see. */ |
23977 | |
23978 | int |
23979 | asm_preferred_eh_data_format (int code, int global) |
23980 | { |
23981 | /* PE-COFF is effectively always -fPIC because of the .reloc section. */ |
23982 | if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access) |
23983 | { |
23984 | int type = DW_EH_PE_sdata8; |
23985 | if (ptr_mode == SImode |
23986 | || ix86_cmodel == CM_SMALL_PIC |
23987 | || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) |
23988 | type = DW_EH_PE_sdata4; |
23989 | return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; |
23990 | } |
23991 | |
23992 | if (ix86_cmodel == CM_SMALL |
23993 | || (ix86_cmodel == CM_MEDIUM && code)) |
23994 | return DW_EH_PE_udata4; |
23995 | |
23996 | return DW_EH_PE_absptr; |
23997 | } |
23998 | |
23999 | /* Implement targetm.vectorize.builtin_vectorization_cost. */ |
24000 | static int |
24001 | ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, |
24002 | tree vectype, int) |
24003 | { |
24004 | bool fp = false; |
24005 | machine_mode mode = TImode; |
24006 | int index; |
24007 | if (vectype != NULL) |
24008 | { |
24009 | fp = FLOAT_TYPE_P (vectype); |
24010 | mode = TYPE_MODE (vectype); |
24011 | } |
24012 | |
24013 | switch (type_of_cost) |
24014 | { |
24015 | case scalar_stmt: |
24016 | return fp ? ix86_cost->addss : COSTS_N_INSNS (1); |
24017 | |
24018 | case scalar_load: |
24019 | /* load/store costs are relative to register move which is 2. Recompute |
24020 | it to COSTS_N_INSNS so everything have same base. */ |
24021 | return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] |
24022 | : ix86_cost->int_load [2]) / 2; |
24023 | |
24024 | case scalar_store: |
24025 | return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] |
24026 | : ix86_cost->int_store [2]) / 2; |
24027 | |
24028 | case vector_stmt: |
24029 | return ix86_vec_cost (mode, |
24030 | cost: fp ? ix86_cost->addss : ix86_cost->sse_op); |
24031 | |
24032 | case vector_load: |
24033 | index = sse_store_index (mode); |
24034 | /* See PR82713 - we may end up being called on non-vector type. */ |
24035 | if (index < 0) |
24036 | index = 2; |
24037 | return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; |
24038 | |
24039 | case vector_store: |
24040 | index = sse_store_index (mode); |
24041 | /* See PR82713 - we may end up being called on non-vector type. */ |
24042 | if (index < 0) |
24043 | index = 2; |
24044 | return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; |
24045 | |
24046 | case vec_to_scalar: |
24047 | case scalar_to_vec: |
24048 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24049 | |
24050 | /* We should have separate costs for unaligned loads and gather/scatter. |
24051 | Do that incrementally. */ |
24052 | case unaligned_load: |
24053 | index = sse_store_index (mode); |
24054 | /* See PR82713 - we may end up being called on non-vector type. */ |
24055 | if (index < 0) |
24056 | index = 2; |
24057 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; |
24058 | |
24059 | case unaligned_store: |
24060 | index = sse_store_index (mode); |
24061 | /* See PR82713 - we may end up being called on non-vector type. */ |
24062 | if (index < 0) |
24063 | index = 2; |
24064 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; |
24065 | |
24066 | case vector_gather_load: |
24067 | return ix86_vec_cost (mode, |
24068 | COSTS_N_INSNS |
24069 | (ix86_cost->gather_static |
24070 | + ix86_cost->gather_per_elt |
24071 | * TYPE_VECTOR_SUBPARTS (vectype)) / 2); |
24072 | |
24073 | case vector_scatter_store: |
24074 | return ix86_vec_cost (mode, |
24075 | COSTS_N_INSNS |
24076 | (ix86_cost->scatter_static |
24077 | + ix86_cost->scatter_per_elt |
24078 | * TYPE_VECTOR_SUBPARTS (vectype)) / 2); |
24079 | |
24080 | case cond_branch_taken: |
24081 | return ix86_cost->cond_taken_branch_cost; |
24082 | |
24083 | case cond_branch_not_taken: |
24084 | return ix86_cost->cond_not_taken_branch_cost; |
24085 | |
24086 | case vec_perm: |
24087 | case vec_promote_demote: |
24088 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24089 | |
24090 | case vec_construct: |
24091 | { |
24092 | int n = TYPE_VECTOR_SUBPARTS (node: vectype); |
24093 | /* N - 1 element inserts into an SSE vector, the possible |
24094 | GPR -> XMM move is accounted for in add_stmt_cost. */ |
24095 | if (GET_MODE_BITSIZE (mode) <= 128) |
24096 | return (n - 1) * ix86_cost->sse_op; |
24097 | /* One vinserti128 for combining two SSE vectors for AVX256. */ |
24098 | else if (GET_MODE_BITSIZE (mode) == 256) |
24099 | return ((n - 2) * ix86_cost->sse_op |
24100 | + ix86_vec_cost (mode, cost: ix86_cost->addss)); |
24101 | /* One vinserti64x4 and two vinserti128 for combining SSE |
24102 | and AVX256 vectors to AVX512. */ |
24103 | else if (GET_MODE_BITSIZE (mode) == 512) |
24104 | return ((n - 4) * ix86_cost->sse_op |
24105 | + 3 * ix86_vec_cost (mode, cost: ix86_cost->addss)); |
24106 | gcc_unreachable (); |
24107 | } |
24108 | |
24109 | default: |
24110 | gcc_unreachable (); |
24111 | } |
24112 | } |
24113 | |
24114 | |
24115 | /* This function returns the calling abi specific va_list type node. |
24116 | It returns the FNDECL specific va_list type. */ |
24117 | |
24118 | static tree |
24119 | ix86_fn_abi_va_list (tree fndecl) |
24120 | { |
24121 | if (!TARGET_64BIT) |
24122 | return va_list_type_node; |
24123 | gcc_assert (fndecl != NULL_TREE); |
24124 | |
24125 | if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI) |
24126 | return ms_va_list_type_node; |
24127 | else |
24128 | return sysv_va_list_type_node; |
24129 | } |
24130 | |
24131 | /* Returns the canonical va_list type specified by TYPE. If there |
24132 | is no valid TYPE provided, it return NULL_TREE. */ |
24133 | |
24134 | static tree |
24135 | ix86_canonical_va_list_type (tree type) |
24136 | { |
24137 | if (TARGET_64BIT) |
24138 | { |
24139 | if (lookup_attribute (attr_name: "ms_abi va_list" , TYPE_ATTRIBUTES (type))) |
24140 | return ms_va_list_type_node; |
24141 | |
24142 | if ((TREE_CODE (type) == ARRAY_TYPE |
24143 | && integer_zerop (array_type_nelts (type))) |
24144 | || POINTER_TYPE_P (type)) |
24145 | { |
24146 | tree elem_type = TREE_TYPE (type); |
24147 | if (TREE_CODE (elem_type) == RECORD_TYPE |
24148 | && lookup_attribute (attr_name: "sysv_abi va_list" , |
24149 | TYPE_ATTRIBUTES (elem_type))) |
24150 | return sysv_va_list_type_node; |
24151 | } |
24152 | |
24153 | return NULL_TREE; |
24154 | } |
24155 | |
24156 | return std_canonical_va_list_type (type); |
24157 | } |
24158 | |
24159 | /* Iterate through the target-specific builtin types for va_list. |
24160 | IDX denotes the iterator, *PTREE is set to the result type of |
24161 | the va_list builtin, and *PNAME to its internal type. |
24162 | Returns zero if there is no element for this index, otherwise |
24163 | IDX should be increased upon the next call. |
24164 | Note, do not iterate a base builtin's name like __builtin_va_list. |
24165 | Used from c_common_nodes_and_builtins. */ |
24166 | |
24167 | static int |
24168 | ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
24169 | { |
24170 | if (TARGET_64BIT) |
24171 | { |
24172 | switch (idx) |
24173 | { |
24174 | default: |
24175 | break; |
24176 | |
24177 | case 0: |
24178 | *ptree = ms_va_list_type_node; |
24179 | *pname = "__builtin_ms_va_list" ; |
24180 | return 1; |
24181 | |
24182 | case 1: |
24183 | *ptree = sysv_va_list_type_node; |
24184 | *pname = "__builtin_sysv_va_list" ; |
24185 | return 1; |
24186 | } |
24187 | } |
24188 | |
24189 | return 0; |
24190 | } |
24191 | |
24192 | #undef TARGET_SCHED_DISPATCH |
24193 | #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch |
24194 | #undef TARGET_SCHED_DISPATCH_DO |
24195 | #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch |
24196 | #undef TARGET_SCHED_REASSOCIATION_WIDTH |
24197 | #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width |
24198 | #undef TARGET_SCHED_REORDER |
24199 | #define TARGET_SCHED_REORDER ix86_atom_sched_reorder |
24200 | #undef TARGET_SCHED_ADJUST_PRIORITY |
24201 | #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority |
24202 | #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK |
24203 | #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \ |
24204 | ix86_dependencies_evaluation_hook |
24205 | |
24206 | |
24207 | /* Implementation of reassociation_width target hook used by |
24208 | reassoc phase to identify parallelism level in reassociated |
24209 | tree. Statements tree_code is passed in OPC. Arguments type |
24210 | is passed in MODE. */ |
24211 | |
24212 | static int |
24213 | ix86_reassociation_width (unsigned int op, machine_mode mode) |
24214 | { |
24215 | int width = 1; |
24216 | /* Vector part. */ |
24217 | if (VECTOR_MODE_P (mode)) |
24218 | { |
24219 | int div = 1; |
24220 | if (INTEGRAL_MODE_P (mode)) |
24221 | width = ix86_cost->reassoc_vec_int; |
24222 | else if (FLOAT_MODE_P (mode)) |
24223 | width = ix86_cost->reassoc_vec_fp; |
24224 | |
24225 | if (width == 1) |
24226 | return 1; |
24227 | |
24228 | /* Integer vector instructions execute in FP unit |
24229 | and can execute 3 additions and one multiplication per cycle. */ |
24230 | if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 |
24231 | || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4) |
24232 | && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) |
24233 | return 1; |
24234 | |
24235 | /* Account for targets that splits wide vectors into multiple parts. */ |
24236 | if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256) |
24237 | div = GET_MODE_BITSIZE (mode) / 256; |
24238 | else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) |
24239 | div = GET_MODE_BITSIZE (mode) / 128; |
24240 | else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64) |
24241 | div = GET_MODE_BITSIZE (mode) / 64; |
24242 | width = (width + div - 1) / div; |
24243 | } |
24244 | /* Scalar part. */ |
24245 | else if (INTEGRAL_MODE_P (mode)) |
24246 | width = ix86_cost->reassoc_int; |
24247 | else if (FLOAT_MODE_P (mode)) |
24248 | width = ix86_cost->reassoc_fp; |
24249 | |
24250 | /* Avoid using too many registers in 32bit mode. */ |
24251 | if (!TARGET_64BIT && width > 2) |
24252 | width = 2; |
24253 | return width; |
24254 | } |
24255 | |
24256 | /* ??? No autovectorization into MMX or 3DNOW until we can reliably |
24257 | place emms and femms instructions. */ |
24258 | |
24259 | static machine_mode |
24260 | ix86_preferred_simd_mode (scalar_mode mode) |
24261 | { |
24262 | if (!TARGET_SSE) |
24263 | return word_mode; |
24264 | |
24265 | switch (mode) |
24266 | { |
24267 | case E_QImode: |
24268 | if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24269 | return V64QImode; |
24270 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24271 | return V32QImode; |
24272 | else |
24273 | return V16QImode; |
24274 | |
24275 | case E_HImode: |
24276 | if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24277 | return V32HImode; |
24278 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24279 | return V16HImode; |
24280 | else |
24281 | return V8HImode; |
24282 | |
24283 | case E_SImode: |
24284 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24285 | return V16SImode; |
24286 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24287 | return V8SImode; |
24288 | else |
24289 | return V4SImode; |
24290 | |
24291 | case E_DImode: |
24292 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24293 | return V8DImode; |
24294 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24295 | return V4DImode; |
24296 | else |
24297 | return V2DImode; |
24298 | |
24299 | case E_HFmode: |
24300 | if (TARGET_AVX512FP16) |
24301 | { |
24302 | if (TARGET_AVX512VL) |
24303 | { |
24304 | if (TARGET_PREFER_AVX128) |
24305 | return V8HFmode; |
24306 | else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512) |
24307 | return V16HFmode; |
24308 | } |
24309 | if (TARGET_EVEX512) |
24310 | return V32HFmode; |
24311 | } |
24312 | return word_mode; |
24313 | |
24314 | case E_SFmode: |
24315 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24316 | return V16SFmode; |
24317 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24318 | return V8SFmode; |
24319 | else |
24320 | return V4SFmode; |
24321 | |
24322 | case E_DFmode: |
24323 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24324 | return V8DFmode; |
24325 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24326 | return V4DFmode; |
24327 | else if (TARGET_SSE2) |
24328 | return V2DFmode; |
24329 | /* FALLTHRU */ |
24330 | |
24331 | default: |
24332 | return word_mode; |
24333 | } |
24334 | } |
24335 | |
24336 | /* If AVX is enabled then try vectorizing with both 256bit and 128bit |
24337 | vectors. If AVX512F is enabled then try vectorizing with 512bit, |
24338 | 256bit and 128bit vectors. */ |
24339 | |
24340 | static unsigned int |
24341 | ix86_autovectorize_vector_modes (vector_modes *modes, bool all) |
24342 | { |
24343 | if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) |
24344 | { |
24345 | modes->safe_push (V64QImode); |
24346 | modes->safe_push (V32QImode); |
24347 | modes->safe_push (V16QImode); |
24348 | } |
24349 | else if (TARGET_AVX512F && TARGET_EVEX512 && all) |
24350 | { |
24351 | modes->safe_push (V32QImode); |
24352 | modes->safe_push (V16QImode); |
24353 | modes->safe_push (V64QImode); |
24354 | } |
24355 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
24356 | { |
24357 | modes->safe_push (V32QImode); |
24358 | modes->safe_push (V16QImode); |
24359 | } |
24360 | else if (TARGET_AVX && all) |
24361 | { |
24362 | modes->safe_push (V16QImode); |
24363 | modes->safe_push (V32QImode); |
24364 | } |
24365 | else if (TARGET_SSE2) |
24366 | modes->safe_push (V16QImode); |
24367 | |
24368 | if (TARGET_MMX_WITH_SSE) |
24369 | modes->safe_push (V8QImode); |
24370 | |
24371 | if (TARGET_SSE2) |
24372 | modes->safe_push (V4QImode); |
24373 | |
24374 | return 0; |
24375 | } |
24376 | |
24377 | /* Implemenation of targetm.vectorize.get_mask_mode. */ |
24378 | |
24379 | static opt_machine_mode |
24380 | ix86_get_mask_mode (machine_mode data_mode) |
24381 | { |
24382 | unsigned vector_size = GET_MODE_SIZE (data_mode); |
24383 | unsigned nunits = GET_MODE_NUNITS (data_mode); |
24384 | unsigned elem_size = vector_size / nunits; |
24385 | |
24386 | /* Scalar mask case. */ |
24387 | if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64) |
24388 | || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)) |
24389 | /* AVX512FP16 only supports vector comparison |
24390 | to kmask for _Float16. */ |
24391 | || (TARGET_AVX512VL && TARGET_AVX512FP16 |
24392 | && GET_MODE_INNER (data_mode) == E_HFmode)) |
24393 | { |
24394 | if (elem_size == 4 |
24395 | || elem_size == 8 |
24396 | || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2))) |
24397 | return smallest_int_mode_for_size (size: nunits); |
24398 | } |
24399 | |
24400 | scalar_int_mode elem_mode |
24401 | = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT); |
24402 | |
24403 | gcc_assert (elem_size * nunits == vector_size); |
24404 | |
24405 | return mode_for_vector (elem_mode, nunits); |
24406 | } |
24407 | |
24408 | |
24409 | |
24410 | /* Return class of registers which could be used for pseudo of MODE |
24411 | and of class RCLASS for spilling instead of memory. Return NO_REGS |
24412 | if it is not possible or non-profitable. */ |
24413 | |
24414 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
24415 | |
24416 | static reg_class_t |
24417 | ix86_spill_class (reg_class_t rclass, machine_mode mode) |
24418 | { |
24419 | if (0 && TARGET_GENERAL_REGS_SSE_SPILL |
24420 | && TARGET_SSE2 |
24421 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
24422 | && TARGET_INTER_UNIT_MOVES_FROM_VEC |
24423 | && (mode == SImode || (TARGET_64BIT && mode == DImode)) |
24424 | && INTEGER_CLASS_P (rclass)) |
24425 | return ALL_SSE_REGS; |
24426 | return NO_REGS; |
24427 | } |
24428 | |
24429 | /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation, |
24430 | but returns a lower bound. */ |
24431 | |
24432 | static unsigned int |
24433 | ix86_max_noce_ifcvt_seq_cost (edge e) |
24434 | { |
24435 | bool predictable_p = predictable_edge_p (e); |
24436 | if (predictable_p) |
24437 | { |
24438 | if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost)) |
24439 | return param_max_rtl_if_conversion_predictable_cost; |
24440 | } |
24441 | else |
24442 | { |
24443 | if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost)) |
24444 | return param_max_rtl_if_conversion_unpredictable_cost; |
24445 | } |
24446 | |
24447 | return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2); |
24448 | } |
24449 | |
24450 | /* Return true if SEQ is a good candidate as a replacement for the |
24451 | if-convertible sequence described in IF_INFO. */ |
24452 | |
24453 | static bool |
24454 | ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) |
24455 | { |
24456 | if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p) |
24457 | { |
24458 | int cmov_cnt = 0; |
24459 | /* Punt if SEQ contains more than one CMOV or FCMOV instruction. |
24460 | Maybe we should allow even more conditional moves as long as they |
24461 | are used far enough not to stall the CPU, or also consider |
24462 | IF_INFO->TEST_BB succ edge probabilities. */ |
24463 | for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) |
24464 | { |
24465 | rtx set = single_set (insn); |
24466 | if (!set) |
24467 | continue; |
24468 | if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) |
24469 | continue; |
24470 | rtx src = SET_SRC (set); |
24471 | machine_mode mode = GET_MODE (src); |
24472 | if (GET_MODE_CLASS (mode) != MODE_INT |
24473 | && GET_MODE_CLASS (mode) != MODE_FLOAT) |
24474 | continue; |
24475 | if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) |
24476 | || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2)))) |
24477 | continue; |
24478 | /* insn is CMOV or FCMOV. */ |
24479 | if (++cmov_cnt > 1) |
24480 | return false; |
24481 | } |
24482 | } |
24483 | return default_noce_conversion_profitable_p (seq, if_info); |
24484 | } |
24485 | |
24486 | /* x86-specific vector costs. */ |
24487 | class ix86_vector_costs : public vector_costs |
24488 | { |
24489 | using vector_costs::vector_costs; |
24490 | |
24491 | unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, |
24492 | stmt_vec_info stmt_info, slp_tree node, |
24493 | tree vectype, int misalign, |
24494 | vect_cost_model_location where) override; |
24495 | void finish_cost (const vector_costs *) override; |
24496 | }; |
24497 | |
24498 | /* Implement targetm.vectorize.create_costs. */ |
24499 | |
24500 | static vector_costs * |
24501 | ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) |
24502 | { |
24503 | return new ix86_vector_costs (vinfo, costing_for_scalar); |
24504 | } |
24505 | |
24506 | unsigned |
24507 | ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, |
24508 | stmt_vec_info stmt_info, slp_tree node, |
24509 | tree vectype, int misalign, |
24510 | vect_cost_model_location where) |
24511 | { |
24512 | unsigned retval = 0; |
24513 | bool scalar_p |
24514 | = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store); |
24515 | int stmt_cost = - 1; |
24516 | |
24517 | bool fp = false; |
24518 | machine_mode mode = scalar_p ? SImode : TImode; |
24519 | |
24520 | if (vectype != NULL) |
24521 | { |
24522 | fp = FLOAT_TYPE_P (vectype); |
24523 | mode = TYPE_MODE (vectype); |
24524 | if (scalar_p) |
24525 | mode = TYPE_MODE (TREE_TYPE (vectype)); |
24526 | } |
24527 | |
24528 | if ((kind == vector_stmt || kind == scalar_stmt) |
24529 | && stmt_info |
24530 | && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN) |
24531 | { |
24532 | tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt); |
24533 | /*machine_mode inner_mode = mode; |
24534 | if (VECTOR_MODE_P (mode)) |
24535 | inner_mode = GET_MODE_INNER (mode);*/ |
24536 | |
24537 | switch (subcode) |
24538 | { |
24539 | case PLUS_EXPR: |
24540 | case POINTER_PLUS_EXPR: |
24541 | case MINUS_EXPR: |
24542 | if (kind == scalar_stmt) |
24543 | { |
24544 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
24545 | stmt_cost = ix86_cost->addss; |
24546 | else if (X87_FLOAT_MODE_P (mode)) |
24547 | stmt_cost = ix86_cost->fadd; |
24548 | else |
24549 | stmt_cost = ix86_cost->add; |
24550 | } |
24551 | else |
24552 | stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss |
24553 | : ix86_cost->sse_op); |
24554 | break; |
24555 | |
24556 | case MULT_EXPR: |
24557 | /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw, |
24558 | take it as MULT_EXPR. */ |
24559 | case MULT_HIGHPART_EXPR: |
24560 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
24561 | break; |
24562 | /* There's no direct instruction for WIDEN_MULT_EXPR, |
24563 | take emulation into account. */ |
24564 | case WIDEN_MULT_EXPR: |
24565 | stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode, |
24566 | TYPE_UNSIGNED (vectype)); |
24567 | break; |
24568 | |
24569 | case NEGATE_EXPR: |
24570 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
24571 | stmt_cost = ix86_cost->sse_op; |
24572 | else if (X87_FLOAT_MODE_P (mode)) |
24573 | stmt_cost = ix86_cost->fchs; |
24574 | else if (VECTOR_MODE_P (mode)) |
24575 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24576 | else |
24577 | stmt_cost = ix86_cost->add; |
24578 | break; |
24579 | case TRUNC_DIV_EXPR: |
24580 | case CEIL_DIV_EXPR: |
24581 | case FLOOR_DIV_EXPR: |
24582 | case ROUND_DIV_EXPR: |
24583 | case TRUNC_MOD_EXPR: |
24584 | case CEIL_MOD_EXPR: |
24585 | case FLOOR_MOD_EXPR: |
24586 | case RDIV_EXPR: |
24587 | case ROUND_MOD_EXPR: |
24588 | case EXACT_DIV_EXPR: |
24589 | stmt_cost = ix86_division_cost (cost: ix86_cost, mode); |
24590 | break; |
24591 | |
24592 | case RSHIFT_EXPR: |
24593 | case LSHIFT_EXPR: |
24594 | case LROTATE_EXPR: |
24595 | case RROTATE_EXPR: |
24596 | { |
24597 | tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt); |
24598 | tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt); |
24599 | stmt_cost = ix86_shift_rotate_cost |
24600 | (cost: ix86_cost, |
24601 | code: (subcode == RSHIFT_EXPR |
24602 | && !TYPE_UNSIGNED (TREE_TYPE (op1))) |
24603 | ? ASHIFTRT : LSHIFTRT, mode, |
24604 | TREE_CODE (op2) == INTEGER_CST, |
24605 | op1_val: cst_and_fits_in_hwi (op2) |
24606 | ? int_cst_value (op2) : -1, |
24607 | and_in_op1: false, shift_and_truncate: false, NULL, NULL); |
24608 | } |
24609 | break; |
24610 | case NOP_EXPR: |
24611 | /* Only sign-conversions are free. */ |
24612 | if (tree_nop_conversion_p |
24613 | (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), |
24614 | TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) |
24615 | stmt_cost = 0; |
24616 | break; |
24617 | |
24618 | case BIT_IOR_EXPR: |
24619 | case ABS_EXPR: |
24620 | case ABSU_EXPR: |
24621 | case MIN_EXPR: |
24622 | case MAX_EXPR: |
24623 | case BIT_XOR_EXPR: |
24624 | case BIT_AND_EXPR: |
24625 | case BIT_NOT_EXPR: |
24626 | if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) |
24627 | stmt_cost = ix86_cost->sse_op; |
24628 | else if (VECTOR_MODE_P (mode)) |
24629 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
24630 | else |
24631 | stmt_cost = ix86_cost->add; |
24632 | break; |
24633 | default: |
24634 | break; |
24635 | } |
24636 | } |
24637 | |
24638 | combined_fn cfn; |
24639 | if ((kind == vector_stmt || kind == scalar_stmt) |
24640 | && stmt_info |
24641 | && stmt_info->stmt |
24642 | && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) |
24643 | switch (cfn) |
24644 | { |
24645 | case CFN_FMA: |
24646 | stmt_cost = ix86_vec_cost (mode, |
24647 | cost: mode == SFmode ? ix86_cost->fmass |
24648 | : ix86_cost->fmasd); |
24649 | break; |
24650 | case CFN_MULH: |
24651 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
24652 | break; |
24653 | default: |
24654 | break; |
24655 | } |
24656 | |
24657 | /* If we do elementwise loads into a vector then we are bound by |
24658 | latency and execution resources for the many scalar loads |
24659 | (AGU and load ports). Try to account for this by scaling the |
24660 | construction cost by the number of elements involved. */ |
24661 | if ((kind == vec_construct || kind == vec_to_scalar) |
24662 | && stmt_info |
24663 | && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type |
24664 | || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) |
24665 | && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE |
24666 | && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) |
24667 | != INTEGER_CST)) |
24668 | || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)) |
24669 | { |
24670 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
24671 | stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1); |
24672 | } |
24673 | else if ((kind == vec_construct || kind == scalar_to_vec) |
24674 | && node |
24675 | && SLP_TREE_DEF_TYPE (node) == vect_external_def |
24676 | && INTEGRAL_TYPE_P (TREE_TYPE (vectype))) |
24677 | { |
24678 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
24679 | unsigned i; |
24680 | tree op; |
24681 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
24682 | if (TREE_CODE (op) == SSA_NAME) |
24683 | TREE_VISITED (op) = 0; |
24684 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
24685 | { |
24686 | if (TREE_CODE (op) != SSA_NAME |
24687 | || TREE_VISITED (op)) |
24688 | continue; |
24689 | TREE_VISITED (op) = 1; |
24690 | gimple *def = SSA_NAME_DEF_STMT (op); |
24691 | tree tem; |
24692 | if (is_gimple_assign (gs: def) |
24693 | && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)) |
24694 | && ((tem = gimple_assign_rhs1 (gs: def)), true) |
24695 | && TREE_CODE (tem) == SSA_NAME |
24696 | /* A sign-change expands to nothing. */ |
24697 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)), |
24698 | TREE_TYPE (tem))) |
24699 | def = SSA_NAME_DEF_STMT (tem); |
24700 | /* When the component is loaded from memory we can directly |
24701 | move it to a vector register, otherwise we have to go |
24702 | via a GPR or via vpinsr which involves similar cost. |
24703 | Likewise with a BIT_FIELD_REF extracting from a vector |
24704 | register we can hope to avoid using a GPR. */ |
24705 | if (!is_gimple_assign (gs: def) |
24706 | || ((!gimple_assign_load_p (def) |
24707 | || (!TARGET_SSE4_1 |
24708 | && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1)) |
24709 | && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF |
24710 | || !VECTOR_TYPE_P (TREE_TYPE |
24711 | (TREE_OPERAND (gimple_assign_rhs1 (def), 0)))))) |
24712 | stmt_cost += ix86_cost->sse_to_integer; |
24713 | } |
24714 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
24715 | if (TREE_CODE (op) == SSA_NAME) |
24716 | TREE_VISITED (op) = 0; |
24717 | } |
24718 | if (stmt_cost == -1) |
24719 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
24720 | |
24721 | /* Penalize DFmode vector operations for Bonnell. */ |
24722 | if (TARGET_CPU_P (BONNELL) && kind == vector_stmt |
24723 | && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode) |
24724 | stmt_cost *= 5; /* FIXME: The value here is arbitrary. */ |
24725 | |
24726 | /* Statements in an inner loop relative to the loop being |
24727 | vectorized are weighted more heavily. The value here is |
24728 | arbitrary and could potentially be improved with analysis. */ |
24729 | retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); |
24730 | |
24731 | /* We need to multiply all vector stmt cost by 1.7 (estimated cost) |
24732 | for Silvermont as it has out of order integer pipeline and can execute |
24733 | 2 scalar instruction per tick, but has in order SIMD pipeline. */ |
24734 | if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT) |
24735 | || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL)) |
24736 | && stmt_info && stmt_info->stmt) |
24737 | { |
24738 | tree lhs_op = gimple_get_lhs (stmt_info->stmt); |
24739 | if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) |
24740 | retval = (retval * 17) / 10; |
24741 | } |
24742 | |
24743 | m_costs[where] += retval; |
24744 | |
24745 | return retval; |
24746 | } |
24747 | |
24748 | void |
24749 | ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) |
24750 | { |
24751 | loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo); |
24752 | if (loop_vinfo && !m_costing_for_scalar) |
24753 | { |
24754 | /* We are currently not asking the vectorizer to compare costs |
24755 | between different vector mode sizes. When using predication |
24756 | that will end up always choosing the prefered mode size even |
24757 | if there's a smaller mode covering all lanes. Test for this |
24758 | situation and artificially reject the larger mode attempt. |
24759 | ??? We currently lack masked ops for sub-SSE sized modes, |
24760 | so we could restrict this rejection to AVX and AVX512 modes |
24761 | but error on the safe side for now. */ |
24762 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) |
24763 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
24764 | && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
24765 | && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()) |
24766 | > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo)))) |
24767 | m_costs[vect_body] = INT_MAX; |
24768 | } |
24769 | |
24770 | vector_costs::finish_cost (scalar_costs); |
24771 | } |
24772 | |
24773 | /* Validate target specific memory model bits in VAL. */ |
24774 | |
24775 | static unsigned HOST_WIDE_INT |
24776 | ix86_memmodel_check (unsigned HOST_WIDE_INT val) |
24777 | { |
24778 | enum memmodel model = memmodel_from_int (val); |
24779 | bool strong; |
24780 | |
24781 | if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE |
24782 | |MEMMODEL_MASK) |
24783 | || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE))) |
24784 | { |
24785 | warning (OPT_Winvalid_memory_model, |
24786 | "unknown architecture specific memory model" ); |
24787 | return MEMMODEL_SEQ_CST; |
24788 | } |
24789 | strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model)); |
24790 | if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) |
24791 | { |
24792 | warning (OPT_Winvalid_memory_model, |
24793 | "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger " |
24794 | "memory model" ); |
24795 | return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; |
24796 | } |
24797 | if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) |
24798 | { |
24799 | warning (OPT_Winvalid_memory_model, |
24800 | "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger " |
24801 | "memory model" ); |
24802 | return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; |
24803 | } |
24804 | return val; |
24805 | } |
24806 | |
24807 | /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, |
24808 | CLONEI->vecsize_float and if CLONEI->simdlen is 0, also |
24809 | CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, |
24810 | or number of vecsize_mangle variants that should be emitted. */ |
24811 | |
24812 | static int |
24813 | ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, |
24814 | struct cgraph_simd_clone *clonei, |
24815 | tree base_type, int num, |
24816 | bool explicit_p) |
24817 | { |
24818 | int ret = 1; |
24819 | |
24820 | if (clonei->simdlen |
24821 | && (clonei->simdlen < 2 |
24822 | || clonei->simdlen > 1024 |
24823 | || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) |
24824 | { |
24825 | if (explicit_p) |
24826 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
24827 | "unsupported simdlen %wd" , clonei->simdlen.to_constant ()); |
24828 | return 0; |
24829 | } |
24830 | |
24831 | tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); |
24832 | if (TREE_CODE (ret_type) != VOID_TYPE) |
24833 | switch (TYPE_MODE (ret_type)) |
24834 | { |
24835 | case E_QImode: |
24836 | case E_HImode: |
24837 | case E_SImode: |
24838 | case E_DImode: |
24839 | case E_SFmode: |
24840 | case E_DFmode: |
24841 | /* case E_SCmode: */ |
24842 | /* case E_DCmode: */ |
24843 | if (!AGGREGATE_TYPE_P (ret_type)) |
24844 | break; |
24845 | /* FALLTHRU */ |
24846 | default: |
24847 | if (explicit_p) |
24848 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
24849 | "unsupported return type %qT for simd" , ret_type); |
24850 | return 0; |
24851 | } |
24852 | |
24853 | tree t; |
24854 | int i; |
24855 | tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); |
24856 | bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE); |
24857 | |
24858 | for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; |
24859 | t && t != void_list_node; t = TREE_CHAIN (t), i++) |
24860 | { |
24861 | tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); |
24862 | switch (TYPE_MODE (arg_type)) |
24863 | { |
24864 | case E_QImode: |
24865 | case E_HImode: |
24866 | case E_SImode: |
24867 | case E_DImode: |
24868 | case E_SFmode: |
24869 | case E_DFmode: |
24870 | /* case E_SCmode: */ |
24871 | /* case E_DCmode: */ |
24872 | if (!AGGREGATE_TYPE_P (arg_type)) |
24873 | break; |
24874 | /* FALLTHRU */ |
24875 | default: |
24876 | if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM) |
24877 | break; |
24878 | if (explicit_p) |
24879 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
24880 | "unsupported argument type %qT for simd" , arg_type); |
24881 | return 0; |
24882 | } |
24883 | } |
24884 | |
24885 | if (!TREE_PUBLIC (node->decl) || !explicit_p) |
24886 | { |
24887 | /* If the function isn't exported, we can pick up just one ISA |
24888 | for the clones. */ |
24889 | if (TARGET_AVX512F && TARGET_EVEX512) |
24890 | clonei->vecsize_mangle = 'e'; |
24891 | else if (TARGET_AVX2) |
24892 | clonei->vecsize_mangle = 'd'; |
24893 | else if (TARGET_AVX) |
24894 | clonei->vecsize_mangle = 'c'; |
24895 | else |
24896 | clonei->vecsize_mangle = 'b'; |
24897 | ret = 1; |
24898 | } |
24899 | else |
24900 | { |
24901 | clonei->vecsize_mangle = "bcde" [num]; |
24902 | ret = 4; |
24903 | } |
24904 | clonei->mask_mode = VOIDmode; |
24905 | switch (clonei->vecsize_mangle) |
24906 | { |
24907 | case 'b': |
24908 | clonei->vecsize_int = 128; |
24909 | clonei->vecsize_float = 128; |
24910 | break; |
24911 | case 'c': |
24912 | clonei->vecsize_int = 128; |
24913 | clonei->vecsize_float = 256; |
24914 | break; |
24915 | case 'd': |
24916 | clonei->vecsize_int = 256; |
24917 | clonei->vecsize_float = 256; |
24918 | break; |
24919 | case 'e': |
24920 | clonei->vecsize_int = 512; |
24921 | clonei->vecsize_float = 512; |
24922 | if (TYPE_MODE (base_type) == QImode) |
24923 | clonei->mask_mode = DImode; |
24924 | else |
24925 | clonei->mask_mode = SImode; |
24926 | break; |
24927 | } |
24928 | if (clonei->simdlen == 0) |
24929 | { |
24930 | if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) |
24931 | clonei->simdlen = clonei->vecsize_int; |
24932 | else |
24933 | clonei->simdlen = clonei->vecsize_float; |
24934 | clonei->simdlen = clonei->simdlen |
24935 | / GET_MODE_BITSIZE (TYPE_MODE (base_type)); |
24936 | } |
24937 | else if (clonei->simdlen > 16) |
24938 | { |
24939 | /* For compatibility with ICC, use the same upper bounds |
24940 | for simdlen. In particular, for CTYPE below, use the return type, |
24941 | unless the function returns void, in that case use the characteristic |
24942 | type. If it is possible for given SIMDLEN to pass CTYPE value |
24943 | in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs |
24944 | for 64-bit code), accept that SIMDLEN, otherwise warn and don't |
24945 | emit corresponding clone. */ |
24946 | tree ctype = ret_type; |
24947 | if (VOID_TYPE_P (ret_type)) |
24948 | ctype = base_type; |
24949 | int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen; |
24950 | if (SCALAR_INT_MODE_P (TYPE_MODE (ctype))) |
24951 | cnt /= clonei->vecsize_int; |
24952 | else |
24953 | cnt /= clonei->vecsize_float; |
24954 | if (cnt > (TARGET_64BIT ? 16 : 8)) |
24955 | { |
24956 | if (explicit_p) |
24957 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
24958 | "unsupported simdlen %wd" , |
24959 | clonei->simdlen.to_constant ()); |
24960 | return 0; |
24961 | } |
24962 | } |
24963 | return ret; |
24964 | } |
24965 | |
24966 | /* If SIMD clone NODE can't be used in a vectorized loop |
24967 | in current function, return -1, otherwise return a badness of using it |
24968 | (0 if it is most desirable from vecsize_mangle point of view, 1 |
24969 | slightly less desirable, etc.). */ |
24970 | |
24971 | static int |
24972 | ix86_simd_clone_usable (struct cgraph_node *node) |
24973 | { |
24974 | switch (node->simdclone->vecsize_mangle) |
24975 | { |
24976 | case 'b': |
24977 | if (!TARGET_SSE2) |
24978 | return -1; |
24979 | if (!TARGET_AVX) |
24980 | return 0; |
24981 | return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1; |
24982 | case 'c': |
24983 | if (!TARGET_AVX) |
24984 | return -1; |
24985 | return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0; |
24986 | case 'd': |
24987 | if (!TARGET_AVX2) |
24988 | return -1; |
24989 | return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0; |
24990 | case 'e': |
24991 | if (!TARGET_AVX512F || !TARGET_EVEX512) |
24992 | return -1; |
24993 | return 0; |
24994 | default: |
24995 | gcc_unreachable (); |
24996 | } |
24997 | } |
24998 | |
24999 | /* This function adjusts the unroll factor based on |
25000 | the hardware capabilities. For ex, bdver3 has |
25001 | a loop buffer which makes unrolling of smaller |
25002 | loops less important. This function decides the |
25003 | unroll factor using number of memory references |
25004 | (value 32 is used) as a heuristic. */ |
25005 | |
25006 | static unsigned |
25007 | ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) |
25008 | { |
25009 | basic_block *bbs; |
25010 | rtx_insn *insn; |
25011 | unsigned i; |
25012 | unsigned mem_count = 0; |
25013 | |
25014 | /* Unroll small size loop when unroll factor is not explicitly |
25015 | specified. */ |
25016 | if (ix86_unroll_only_small_loops && !loop->unroll) |
25017 | { |
25018 | if (loop->ninsns <= ix86_cost->small_unroll_ninsns) |
25019 | return MIN (nunroll, ix86_cost->small_unroll_factor); |
25020 | else |
25021 | return 1; |
25022 | } |
25023 | |
25024 | if (!TARGET_ADJUST_UNROLL) |
25025 | return nunroll; |
25026 | |
25027 | /* Count the number of memory references within the loop body. |
25028 | This value determines the unrolling factor for bdver3 and bdver4 |
25029 | architectures. */ |
25030 | subrtx_iterator::array_type array; |
25031 | bbs = get_loop_body (loop); |
25032 | for (i = 0; i < loop->num_nodes; i++) |
25033 | FOR_BB_INSNS (bbs[i], insn) |
25034 | if (NONDEBUG_INSN_P (insn)) |
25035 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) |
25036 | if (const_rtx x = *iter) |
25037 | if (MEM_P (x)) |
25038 | { |
25039 | machine_mode mode = GET_MODE (x); |
25040 | unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; |
25041 | if (n_words > 4) |
25042 | mem_count += 2; |
25043 | else |
25044 | mem_count += 1; |
25045 | } |
25046 | free (ptr: bbs); |
25047 | |
25048 | if (mem_count && mem_count <=32) |
25049 | return MIN (nunroll, 32 / mem_count); |
25050 | |
25051 | return nunroll; |
25052 | } |
25053 | |
25054 | |
25055 | /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ |
25056 | |
25057 | static bool |
25058 | ix86_float_exceptions_rounding_supported_p (void) |
25059 | { |
25060 | /* For x87 floating point with standard excess precision handling, |
25061 | there is no adddf3 pattern (since x87 floating point only has |
25062 | XFmode operations) so the default hook implementation gets this |
25063 | wrong. */ |
25064 | return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH); |
25065 | } |
25066 | |
25067 | /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ |
25068 | |
25069 | static void |
25070 | ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) |
25071 | { |
25072 | if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH)) |
25073 | return; |
25074 | tree exceptions_var = create_tmp_var_raw (integer_type_node); |
25075 | if (TARGET_80387) |
25076 | { |
25077 | tree fenv_index_type = build_index_type (size_int (6)); |
25078 | tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type); |
25079 | tree fenv_var = create_tmp_var_raw (fenv_type); |
25080 | TREE_ADDRESSABLE (fenv_var) = 1; |
25081 | tree fenv_ptr = build_pointer_type (fenv_type); |
25082 | tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); |
25083 | fenv_addr = fold_convert (ptr_type_node, fenv_addr); |
25084 | tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV); |
25085 | tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV); |
25086 | tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW); |
25087 | tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX); |
25088 | tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); |
25089 | tree hold_fnclex = build_call_expr (fnclex, 0); |
25090 | fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, |
25091 | NULL_TREE, NULL_TREE); |
25092 | *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, |
25093 | hold_fnclex); |
25094 | *clear = build_call_expr (fnclex, 0); |
25095 | tree sw_var = create_tmp_var_raw (short_unsigned_type_node); |
25096 | tree fnstsw_call = build_call_expr (fnstsw, 0); |
25097 | tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var, |
25098 | fnstsw_call, NULL_TREE, NULL_TREE); |
25099 | tree exceptions_x87 = fold_convert (integer_type_node, sw_var); |
25100 | tree update_mod = build4 (TARGET_EXPR, integer_type_node, |
25101 | exceptions_var, exceptions_x87, |
25102 | NULL_TREE, NULL_TREE); |
25103 | *update = build2 (COMPOUND_EXPR, integer_type_node, |
25104 | sw_mod, update_mod); |
25105 | tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr); |
25106 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv); |
25107 | } |
25108 | if (TARGET_SSE && TARGET_SSE_MATH) |
25109 | { |
25110 | tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); |
25111 | tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); |
25112 | tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR); |
25113 | tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR); |
25114 | tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); |
25115 | tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node, |
25116 | mxcsr_orig_var, stmxcsr_hold_call, |
25117 | NULL_TREE, NULL_TREE); |
25118 | tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node, |
25119 | mxcsr_orig_var, |
25120 | build_int_cst (unsigned_type_node, 0x1f80)); |
25121 | hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val, |
25122 | build_int_cst (unsigned_type_node, 0xffffffc0)); |
25123 | tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node, |
25124 | mxcsr_mod_var, hold_mod_val, |
25125 | NULL_TREE, NULL_TREE); |
25126 | tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
25127 | tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node, |
25128 | hold_assign_orig, hold_assign_mod); |
25129 | hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all, |
25130 | ldmxcsr_hold_call); |
25131 | if (*hold) |
25132 | *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all); |
25133 | else |
25134 | *hold = hold_all; |
25135 | tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
25136 | if (*clear) |
25137 | *clear = build2 (COMPOUND_EXPR, void_type_node, *clear, |
25138 | ldmxcsr_clear_call); |
25139 | else |
25140 | *clear = ldmxcsr_clear_call; |
25141 | tree stxmcsr_update_call = build_call_expr (stmxcsr, 0); |
25142 | tree exceptions_sse = fold_convert (integer_type_node, |
25143 | stxmcsr_update_call); |
25144 | if (*update) |
25145 | { |
25146 | tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node, |
25147 | exceptions_var, exceptions_sse); |
25148 | tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node, |
25149 | exceptions_var, exceptions_mod); |
25150 | *update = build2 (COMPOUND_EXPR, integer_type_node, *update, |
25151 | exceptions_assign); |
25152 | } |
25153 | else |
25154 | *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var, |
25155 | exceptions_sse, NULL_TREE, NULL_TREE); |
25156 | tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var); |
25157 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
25158 | ldmxcsr_update_call); |
25159 | } |
25160 | tree atomic_feraiseexcept |
25161 | = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT); |
25162 | tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, |
25163 | 1, exceptions_var); |
25164 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
25165 | atomic_feraiseexcept_call); |
25166 | } |
25167 | |
25168 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
25169 | /* For i386, common symbol is local only for non-PIE binaries. For |
25170 | x86-64, common symbol is local only for non-PIE binaries or linker |
25171 | supports copy reloc in PIE binaries. */ |
25172 | |
25173 | static bool |
25174 | ix86_binds_local_p (const_tree exp) |
25175 | { |
25176 | bool direct_extern_access |
25177 | = (ix86_direct_extern_access |
25178 | && !(VAR_OR_FUNCTION_DECL_P (exp) |
25179 | && lookup_attribute (attr_name: "nodirect_extern_access" , |
25180 | DECL_ATTRIBUTES (exp)))); |
25181 | if (!direct_extern_access) |
25182 | ix86_has_no_direct_extern_access = true; |
25183 | return default_binds_local_p_3 (exp, flag_shlib != 0, true, |
25184 | direct_extern_access, |
25185 | (direct_extern_access |
25186 | && (!flag_pic |
25187 | || (TARGET_64BIT |
25188 | && HAVE_LD_PIE_COPYRELOC != 0)))); |
25189 | } |
25190 | |
25191 | /* If flag_pic or ix86_direct_extern_access is false, then neither |
25192 | local nor global relocs should be placed in readonly memory. */ |
25193 | |
25194 | static int |
25195 | ix86_reloc_rw_mask (void) |
25196 | { |
25197 | return (flag_pic || !ix86_direct_extern_access) ? 3 : 0; |
25198 | } |
25199 | #endif |
25200 | |
25201 | /* Return true iff ADDR can be used as a symbolic base address. */ |
25202 | |
25203 | static bool |
25204 | symbolic_base_address_p (rtx addr) |
25205 | { |
25206 | if (GET_CODE (addr) == SYMBOL_REF) |
25207 | return true; |
25208 | |
25209 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF) |
25210 | return true; |
25211 | |
25212 | return false; |
25213 | } |
25214 | |
25215 | /* Return true iff ADDR can be used as a base address. */ |
25216 | |
25217 | static bool |
25218 | base_address_p (rtx addr) |
25219 | { |
25220 | if (REG_P (addr)) |
25221 | return true; |
25222 | |
25223 | if (symbolic_base_address_p (addr)) |
25224 | return true; |
25225 | |
25226 | return false; |
25227 | } |
25228 | |
25229 | /* If MEM is in the form of [(base+symbase)+offset], extract the three |
25230 | parts of address and set to BASE, SYMBASE and OFFSET, otherwise |
25231 | return false. */ |
25232 | |
25233 | static bool |
25234 | (rtx mem, rtx *base, rtx *symbase, rtx *offset) |
25235 | { |
25236 | rtx addr; |
25237 | |
25238 | gcc_assert (MEM_P (mem)); |
25239 | |
25240 | addr = XEXP (mem, 0); |
25241 | |
25242 | if (GET_CODE (addr) == CONST) |
25243 | addr = XEXP (addr, 0); |
25244 | |
25245 | if (base_address_p (addr)) |
25246 | { |
25247 | *base = addr; |
25248 | *symbase = const0_rtx; |
25249 | *offset = const0_rtx; |
25250 | return true; |
25251 | } |
25252 | |
25253 | if (GET_CODE (addr) == PLUS |
25254 | && base_address_p (XEXP (addr, 0))) |
25255 | { |
25256 | rtx addend = XEXP (addr, 1); |
25257 | |
25258 | if (GET_CODE (addend) == CONST) |
25259 | addend = XEXP (addend, 0); |
25260 | |
25261 | if (CONST_INT_P (addend)) |
25262 | { |
25263 | *base = XEXP (addr, 0); |
25264 | *symbase = const0_rtx; |
25265 | *offset = addend; |
25266 | return true; |
25267 | } |
25268 | |
25269 | /* Also accept REG + symbolic ref, with or without a CONST_INT |
25270 | offset. */ |
25271 | if (REG_P (XEXP (addr, 0))) |
25272 | { |
25273 | if (symbolic_base_address_p (addr: addend)) |
25274 | { |
25275 | *base = XEXP (addr, 0); |
25276 | *symbase = addend; |
25277 | *offset = const0_rtx; |
25278 | return true; |
25279 | } |
25280 | |
25281 | if (GET_CODE (addend) == PLUS |
25282 | && symbolic_base_address_p (XEXP (addend, 0)) |
25283 | && CONST_INT_P (XEXP (addend, 1))) |
25284 | { |
25285 | *base = XEXP (addr, 0); |
25286 | *symbase = XEXP (addend, 0); |
25287 | *offset = XEXP (addend, 1); |
25288 | return true; |
25289 | } |
25290 | } |
25291 | } |
25292 | |
25293 | return false; |
25294 | } |
25295 | |
25296 | /* Given OPERANDS of consecutive load/store, check if we can merge |
25297 | them into move multiple. LOAD is true if they are load instructions. |
25298 | MODE is the mode of memory operands. */ |
25299 | |
25300 | bool |
25301 | ix86_operands_ok_for_move_multiple (rtx *operands, bool load, |
25302 | machine_mode mode) |
25303 | { |
25304 | HOST_WIDE_INT offval_1, offval_2, msize; |
25305 | rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, |
25306 | symbase_1, symbase_2, offset_1, offset_2; |
25307 | |
25308 | if (load) |
25309 | { |
25310 | mem_1 = operands[1]; |
25311 | mem_2 = operands[3]; |
25312 | reg_1 = operands[0]; |
25313 | reg_2 = operands[2]; |
25314 | } |
25315 | else |
25316 | { |
25317 | mem_1 = operands[0]; |
25318 | mem_2 = operands[2]; |
25319 | reg_1 = operands[1]; |
25320 | reg_2 = operands[3]; |
25321 | } |
25322 | |
25323 | gcc_assert (REG_P (reg_1) && REG_P (reg_2)); |
25324 | |
25325 | if (REGNO (reg_1) != REGNO (reg_2)) |
25326 | return false; |
25327 | |
25328 | /* Check if the addresses are in the form of [base+offset]. */ |
25329 | if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1)) |
25330 | return false; |
25331 | if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2)) |
25332 | return false; |
25333 | |
25334 | /* Check if the bases are the same. */ |
25335 | if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2)) |
25336 | return false; |
25337 | |
25338 | offval_1 = INTVAL (offset_1); |
25339 | offval_2 = INTVAL (offset_2); |
25340 | msize = GET_MODE_SIZE (mode); |
25341 | /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ |
25342 | if (offval_1 + msize != offval_2) |
25343 | return false; |
25344 | |
25345 | return true; |
25346 | } |
25347 | |
25348 | /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ |
25349 | |
25350 | static bool |
25351 | ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, |
25352 | optimization_type opt_type) |
25353 | { |
25354 | switch (op) |
25355 | { |
25356 | case asin_optab: |
25357 | case acos_optab: |
25358 | case log1p_optab: |
25359 | case exp_optab: |
25360 | case exp10_optab: |
25361 | case exp2_optab: |
25362 | case expm1_optab: |
25363 | case ldexp_optab: |
25364 | case scalb_optab: |
25365 | case round_optab: |
25366 | case lround_optab: |
25367 | return opt_type == OPTIMIZE_FOR_SPEED; |
25368 | |
25369 | case rint_optab: |
25370 | if (SSE_FLOAT_MODE_P (mode1) |
25371 | && TARGET_SSE_MATH |
25372 | && !flag_trapping_math |
25373 | && !TARGET_SSE4_1 |
25374 | && mode1 != HFmode) |
25375 | return opt_type == OPTIMIZE_FOR_SPEED; |
25376 | return true; |
25377 | |
25378 | case floor_optab: |
25379 | case ceil_optab: |
25380 | case btrunc_optab: |
25381 | if (((SSE_FLOAT_MODE_P (mode1) |
25382 | && TARGET_SSE_MATH |
25383 | && TARGET_SSE4_1) |
25384 | || mode1 == HFmode) |
25385 | && !flag_trapping_math) |
25386 | return true; |
25387 | return opt_type == OPTIMIZE_FOR_SPEED; |
25388 | |
25389 | case rsqrt_optab: |
25390 | return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1); |
25391 | |
25392 | default: |
25393 | return true; |
25394 | } |
25395 | } |
25396 | |
25397 | /* Address space support. |
25398 | |
25399 | This is not "far pointers" in the 16-bit sense, but an easy way |
25400 | to use %fs and %gs segment prefixes. Therefore: |
25401 | |
25402 | (a) All address spaces have the same modes, |
25403 | (b) All address spaces have the same addresss forms, |
25404 | (c) While %fs and %gs are technically subsets of the generic |
25405 | address space, they are probably not subsets of each other. |
25406 | (d) Since we have no access to the segment base register values |
25407 | without resorting to a system call, we cannot convert a |
25408 | non-default address space to a default address space. |
25409 | Therefore we do not claim %fs or %gs are subsets of generic. |
25410 | |
25411 | Therefore we can (mostly) use the default hooks. */ |
25412 | |
25413 | /* All use of segmentation is assumed to make address 0 valid. */ |
25414 | |
25415 | static bool |
25416 | ix86_addr_space_zero_address_valid (addr_space_t as) |
25417 | { |
25418 | return as != ADDR_SPACE_GENERIC; |
25419 | } |
25420 | |
25421 | static void |
25422 | ix86_init_libfuncs (void) |
25423 | { |
25424 | if (TARGET_64BIT) |
25425 | { |
25426 | set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4" ); |
25427 | set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4" ); |
25428 | } |
25429 | else |
25430 | { |
25431 | set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4" ); |
25432 | set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4" ); |
25433 | } |
25434 | |
25435 | #if TARGET_MACHO |
25436 | darwin_rename_builtins (); |
25437 | #endif |
25438 | } |
25439 | |
25440 | /* Set the value of FLT_EVAL_METHOD in float.h. When using only the |
25441 | FPU, assume that the fpcw is set to extended precision; when using |
25442 | only SSE, rounding is correct; when using both SSE and the FPU, |
25443 | the rounding precision is indeterminate, since either may be chosen |
25444 | apparently at random. */ |
25445 | |
25446 | static enum flt_eval_method |
25447 | ix86_get_excess_precision (enum excess_precision_type type) |
25448 | { |
25449 | switch (type) |
25450 | { |
25451 | case EXCESS_PRECISION_TYPE_FAST: |
25452 | /* The fastest type to promote to will always be the native type, |
25453 | whether that occurs with implicit excess precision or |
25454 | otherwise. */ |
25455 | return TARGET_AVX512FP16 |
25456 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 |
25457 | : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
25458 | case EXCESS_PRECISION_TYPE_STANDARD: |
25459 | case EXCESS_PRECISION_TYPE_IMPLICIT: |
25460 | /* Otherwise, the excess precision we want when we are |
25461 | in a standards compliant mode, and the implicit precision we |
25462 | provide would be identical were it not for the unpredictable |
25463 | cases. */ |
25464 | if (TARGET_AVX512FP16 && TARGET_SSE_MATH) |
25465 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
25466 | else if (!TARGET_80387) |
25467 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
25468 | else if (!TARGET_MIX_SSE_I387) |
25469 | { |
25470 | if (!(TARGET_SSE && TARGET_SSE_MATH)) |
25471 | return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE; |
25472 | else if (TARGET_SSE2) |
25473 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
25474 | } |
25475 | |
25476 | /* If we are in standards compliant mode, but we know we will |
25477 | calculate in unpredictable precision, return |
25478 | FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit |
25479 | excess precision if the target can't guarantee it will honor |
25480 | it. */ |
25481 | return (type == EXCESS_PRECISION_TYPE_STANDARD |
25482 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT |
25483 | : FLT_EVAL_METHOD_UNPREDICTABLE); |
25484 | case EXCESS_PRECISION_TYPE_FLOAT16: |
25485 | if (TARGET_80387 |
25486 | && !(TARGET_SSE_MATH && TARGET_SSE)) |
25487 | error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>" ); |
25488 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
25489 | default: |
25490 | gcc_unreachable (); |
25491 | } |
25492 | |
25493 | return FLT_EVAL_METHOD_UNPREDICTABLE; |
25494 | } |
25495 | |
25496 | /* Return true if _BitInt(N) is supported and fill its details into *INFO. */ |
25497 | bool |
25498 | ix86_bitint_type_info (int n, struct bitint_info *info) |
25499 | { |
25500 | if (!TARGET_64BIT) |
25501 | return false; |
25502 | if (n <= 8) |
25503 | info->limb_mode = QImode; |
25504 | else if (n <= 16) |
25505 | info->limb_mode = HImode; |
25506 | else if (n <= 32) |
25507 | info->limb_mode = SImode; |
25508 | else |
25509 | info->limb_mode = DImode; |
25510 | info->big_endian = false; |
25511 | info->extended = false; |
25512 | return true; |
25513 | } |
25514 | |
25515 | /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that |
25516 | decrements by exactly 2 no matter what the position was, there is no pushb. |
25517 | |
25518 | But as CIE data alignment factor on this arch is -4 for 32bit targets |
25519 | and -8 for 64bit targets, we need to make sure all stack pointer adjustments |
25520 | are in multiple of 4 for 32bit targets and 8 for 64bit targets. */ |
25521 | |
25522 | poly_int64 |
25523 | ix86_push_rounding (poly_int64 bytes) |
25524 | { |
25525 | return ROUND_UP (bytes, UNITS_PER_WORD); |
25526 | } |
25527 | |
25528 | /* Use 8 bits metadata start from bit48 for LAM_U48, |
25529 | 6 bits metadat start from bit57 for LAM_U57. */ |
25530 | #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \ |
25531 | ? 48 \ |
25532 | : (ix86_lam_type == lam_u57 ? 57 : 0)) |
25533 | #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \ |
25534 | ? 8 \ |
25535 | : (ix86_lam_type == lam_u57 ? 6 : 0)) |
25536 | |
25537 | /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */ |
25538 | bool |
25539 | ix86_memtag_can_tag_addresses () |
25540 | { |
25541 | return ix86_lam_type != lam_none && TARGET_LP64; |
25542 | } |
25543 | |
25544 | /* Implement TARGET_MEMTAG_TAG_SIZE. */ |
25545 | unsigned char |
25546 | ix86_memtag_tag_size () |
25547 | { |
25548 | return IX86_HWASAN_TAG_SIZE; |
25549 | } |
25550 | |
25551 | /* Implement TARGET_MEMTAG_SET_TAG. */ |
25552 | rtx |
25553 | ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target) |
25554 | { |
25555 | /* default_memtag_insert_random_tag may |
25556 | generate tag with value more than 6 bits. */ |
25557 | if (ix86_lam_type == lam_u57) |
25558 | { |
25559 | unsigned HOST_WIDE_INT and_imm |
25560 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
25561 | |
25562 | emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm))); |
25563 | } |
25564 | tag = expand_simple_binop (Pmode, ASHIFT, tag, |
25565 | GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX, |
25566 | /* unsignedp = */1, OPTAB_WIDEN); |
25567 | rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target, |
25568 | /* unsignedp = */1, OPTAB_DIRECT); |
25569 | return ret; |
25570 | } |
25571 | |
25572 | /* Implement TARGET_MEMTAG_EXTRACT_TAG. */ |
25573 | rtx |
25574 | (rtx tagged_pointer, rtx target) |
25575 | { |
25576 | rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer, |
25577 | GEN_INT (IX86_HWASAN_SHIFT), target, |
25578 | /* unsignedp = */0, |
25579 | OPTAB_DIRECT); |
25580 | rtx ret = gen_reg_rtx (QImode); |
25581 | /* Mask off bit63 when LAM_U57. */ |
25582 | if (ix86_lam_type == lam_u57) |
25583 | { |
25584 | unsigned HOST_WIDE_INT and_imm |
25585 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
25586 | emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag), |
25587 | gen_int_mode (and_imm, QImode))); |
25588 | } |
25589 | else |
25590 | emit_move_insn (ret, gen_lowpart (QImode, tag)); |
25591 | return ret; |
25592 | } |
25593 | |
25594 | /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */ |
25595 | rtx |
25596 | ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target) |
25597 | { |
25598 | /* Leave bit63 alone. */ |
25599 | rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) |
25600 | + (HOST_WIDE_INT_1U << 63) - 1), |
25601 | Pmode); |
25602 | rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer, |
25603 | tag_mask, target, true, |
25604 | OPTAB_DIRECT); |
25605 | gcc_assert (untagged_base); |
25606 | return untagged_base; |
25607 | } |
25608 | |
25609 | /* Implement TARGET_MEMTAG_ADD_TAG. */ |
25610 | rtx |
25611 | ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset) |
25612 | { |
25613 | rtx base_tag = gen_reg_rtx (QImode); |
25614 | rtx base_addr = gen_reg_rtx (Pmode); |
25615 | rtx tagged_addr = gen_reg_rtx (Pmode); |
25616 | rtx new_tag = gen_reg_rtx (QImode); |
25617 | unsigned HOST_WIDE_INT and_imm |
25618 | = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1; |
25619 | |
25620 | /* When there's "overflow" in tag adding, |
25621 | need to mask the most significant bit off. */ |
25622 | emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX)); |
25623 | emit_move_insn (base_addr, |
25624 | ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX)); |
25625 | emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode))); |
25626 | emit_move_insn (new_tag, base_tag); |
25627 | emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode))); |
25628 | emit_move_insn (tagged_addr, |
25629 | ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX)); |
25630 | return plus_constant (Pmode, tagged_addr, offset); |
25631 | } |
25632 | |
25633 | /* Target-specific selftests. */ |
25634 | |
25635 | #if CHECKING_P |
25636 | |
25637 | namespace selftest { |
25638 | |
25639 | /* Verify that hard regs are dumped as expected (in compact mode). */ |
25640 | |
25641 | static void |
25642 | ix86_test_dumping_hard_regs () |
25643 | { |
25644 | ASSERT_RTL_DUMP_EQ ("(reg:SI ax)" , gen_raw_REG (SImode, 0)); |
25645 | ASSERT_RTL_DUMP_EQ ("(reg:SI dx)" , gen_raw_REG (SImode, 1)); |
25646 | } |
25647 | |
25648 | /* Test dumping an insn with repeated references to the same SCRATCH, |
25649 | to verify the rtx_reuse code. */ |
25650 | |
25651 | static void |
25652 | ix86_test_dumping_memory_blockage () |
25653 | { |
25654 | set_new_first_and_last_insn (NULL, NULL); |
25655 | |
25656 | rtx pat = gen_memory_blockage (); |
25657 | rtx_reuse_manager r; |
25658 | r.preprocess (x: pat); |
25659 | |
25660 | /* Verify that the repeated references to the SCRATCH show use |
25661 | reuse IDS. The first should be prefixed with a reuse ID, |
25662 | and the second should be dumped as a "reuse_rtx" of that ID. |
25663 | The expected string assumes Pmode == DImode. */ |
25664 | if (Pmode == DImode) |
25665 | ASSERT_RTL_DUMP_EQ_WITH_REUSE |
25666 | ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n" |
25667 | " (unspec:BLK [\n" |
25668 | " (mem/v:BLK (reuse_rtx 0) [0 A8])\n" |
25669 | " ] UNSPEC_MEMORY_BLOCKAGE)))\n" , pat, &r); |
25670 | } |
25671 | |
25672 | /* Verify loading an RTL dump; specifically a dump of copying |
25673 | a param on x86_64 from a hard reg into the frame. |
25674 | This test is target-specific since the dump contains target-specific |
25675 | hard reg names. */ |
25676 | |
25677 | static void |
25678 | ix86_test_loading_dump_fragment_1 () |
25679 | { |
25680 | rtl_dump_test t (SELFTEST_LOCATION, |
25681 | locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl" )); |
25682 | |
25683 | rtx_insn *insn = get_insn_by_uid (uid: 1); |
25684 | |
25685 | /* The block structure and indentation here is purely for |
25686 | readability; it mirrors the structure of the rtx. */ |
25687 | tree mem_expr; |
25688 | { |
25689 | rtx pat = PATTERN (insn); |
25690 | ASSERT_EQ (SET, GET_CODE (pat)); |
25691 | { |
25692 | rtx dest = SET_DEST (pat); |
25693 | ASSERT_EQ (MEM, GET_CODE (dest)); |
25694 | /* Verify the "/c" was parsed. */ |
25695 | ASSERT_TRUE (RTX_FLAG (dest, call)); |
25696 | ASSERT_EQ (SImode, GET_MODE (dest)); |
25697 | { |
25698 | rtx addr = XEXP (dest, 0); |
25699 | ASSERT_EQ (PLUS, GET_CODE (addr)); |
25700 | ASSERT_EQ (DImode, GET_MODE (addr)); |
25701 | { |
25702 | rtx lhs = XEXP (addr, 0); |
25703 | /* Verify that the "frame" REG was consolidated. */ |
25704 | ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs); |
25705 | } |
25706 | { |
25707 | rtx rhs = XEXP (addr, 1); |
25708 | ASSERT_EQ (CONST_INT, GET_CODE (rhs)); |
25709 | ASSERT_EQ (-4, INTVAL (rhs)); |
25710 | } |
25711 | } |
25712 | /* Verify the "[1 i+0 S4 A32]" was parsed. */ |
25713 | ASSERT_EQ (1, MEM_ALIAS_SET (dest)); |
25714 | /* "i" should have been handled by synthesizing a global int |
25715 | variable named "i". */ |
25716 | mem_expr = MEM_EXPR (dest); |
25717 | ASSERT_NE (mem_expr, NULL); |
25718 | ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr)); |
25719 | ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr)); |
25720 | ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr))); |
25721 | ASSERT_STREQ ("i" , IDENTIFIER_POINTER (DECL_NAME (mem_expr))); |
25722 | /* "+0". */ |
25723 | ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest)); |
25724 | ASSERT_EQ (0, MEM_OFFSET (dest)); |
25725 | /* "S4". */ |
25726 | ASSERT_EQ (4, MEM_SIZE (dest)); |
25727 | /* "A32. */ |
25728 | ASSERT_EQ (32, MEM_ALIGN (dest)); |
25729 | } |
25730 | { |
25731 | rtx src = SET_SRC (pat); |
25732 | ASSERT_EQ (REG, GET_CODE (src)); |
25733 | ASSERT_EQ (SImode, GET_MODE (src)); |
25734 | ASSERT_EQ (5, REGNO (src)); |
25735 | tree reg_expr = REG_EXPR (src); |
25736 | /* "i" here should point to the same var as for the MEM_EXPR. */ |
25737 | ASSERT_EQ (reg_expr, mem_expr); |
25738 | } |
25739 | } |
25740 | } |
25741 | |
25742 | /* Verify that the RTL loader copes with a call_insn dump. |
25743 | This test is target-specific since the dump contains a target-specific |
25744 | hard reg name. */ |
25745 | |
25746 | static void |
25747 | ix86_test_loading_call_insn () |
25748 | { |
25749 | /* The test dump includes register "xmm0", where requires TARGET_SSE |
25750 | to exist. */ |
25751 | if (!TARGET_SSE) |
25752 | return; |
25753 | |
25754 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl" )); |
25755 | |
25756 | rtx_insn *insn = get_insns (); |
25757 | ASSERT_EQ (CALL_INSN, GET_CODE (insn)); |
25758 | |
25759 | /* "/j". */ |
25760 | ASSERT_TRUE (RTX_FLAG (insn, jump)); |
25761 | |
25762 | rtx pat = PATTERN (insn); |
25763 | ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat))); |
25764 | |
25765 | /* Verify REG_NOTES. */ |
25766 | { |
25767 | /* "(expr_list:REG_CALL_DECL". */ |
25768 | ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn))); |
25769 | rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn)); |
25770 | ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0)); |
25771 | |
25772 | /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */ |
25773 | rtx_expr_list *note1 = note0->next (); |
25774 | ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1)); |
25775 | |
25776 | ASSERT_EQ (NULL, note1->next ()); |
25777 | } |
25778 | |
25779 | /* Verify CALL_INSN_FUNCTION_USAGE. */ |
25780 | { |
25781 | /* "(expr_list:DF (use (reg:DF 21 xmm0))". */ |
25782 | rtx_expr_list *usage |
25783 | = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn)); |
25784 | ASSERT_EQ (EXPR_LIST, GET_CODE (usage)); |
25785 | ASSERT_EQ (DFmode, GET_MODE (usage)); |
25786 | ASSERT_EQ (USE, GET_CODE (usage->element ())); |
25787 | ASSERT_EQ (NULL, usage->next ()); |
25788 | } |
25789 | } |
25790 | |
25791 | /* Verify that the RTL loader copes a dump from print_rtx_function. |
25792 | This test is target-specific since the dump contains target-specific |
25793 | hard reg names. */ |
25794 | |
25795 | static void |
25796 | ix86_test_loading_full_dump () |
25797 | { |
25798 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl" )); |
25799 | |
25800 | ASSERT_STREQ ("times_two" , IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
25801 | |
25802 | rtx_insn *insn_1 = get_insn_by_uid (uid: 1); |
25803 | ASSERT_EQ (NOTE, GET_CODE (insn_1)); |
25804 | |
25805 | rtx_insn *insn_7 = get_insn_by_uid (uid: 7); |
25806 | ASSERT_EQ (INSN, GET_CODE (insn_7)); |
25807 | ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7))); |
25808 | |
25809 | rtx_insn *insn_15 = get_insn_by_uid (uid: 15); |
25810 | ASSERT_EQ (INSN, GET_CODE (insn_15)); |
25811 | ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15))); |
25812 | |
25813 | /* Verify crtl->return_rtx. */ |
25814 | ASSERT_EQ (REG, GET_CODE (crtl->return_rtx)); |
25815 | ASSERT_EQ (0, REGNO (crtl->return_rtx)); |
25816 | ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx)); |
25817 | } |
25818 | |
25819 | /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns. |
25820 | In particular, verify that it correctly loads the 2nd operand. |
25821 | This test is target-specific since these are machine-specific |
25822 | operands (and enums). */ |
25823 | |
25824 | static void |
25825 | ix86_test_loading_unspec () |
25826 | { |
25827 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl" )); |
25828 | |
25829 | ASSERT_STREQ ("test_unspec" , IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
25830 | |
25831 | ASSERT_TRUE (cfun); |
25832 | |
25833 | /* Test of an UNSPEC. */ |
25834 | rtx_insn *insn = get_insns (); |
25835 | ASSERT_EQ (INSN, GET_CODE (insn)); |
25836 | rtx set = single_set (insn); |
25837 | ASSERT_NE (NULL, set); |
25838 | rtx dst = SET_DEST (set); |
25839 | ASSERT_EQ (MEM, GET_CODE (dst)); |
25840 | rtx src = SET_SRC (set); |
25841 | ASSERT_EQ (UNSPEC, GET_CODE (src)); |
25842 | ASSERT_EQ (BLKmode, GET_MODE (src)); |
25843 | ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1)); |
25844 | |
25845 | rtx v0 = XVECEXP (src, 0, 0); |
25846 | |
25847 | /* Verify that the two uses of the first SCRATCH have pointer |
25848 | equality. */ |
25849 | rtx scratch_a = XEXP (dst, 0); |
25850 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_a)); |
25851 | |
25852 | rtx scratch_b = XEXP (v0, 0); |
25853 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_b)); |
25854 | |
25855 | ASSERT_EQ (scratch_a, scratch_b); |
25856 | |
25857 | /* Verify that the two mems are thus treated as equal. */ |
25858 | ASSERT_TRUE (rtx_equal_p (dst, v0)); |
25859 | |
25860 | /* Verify that the insn is recognized. */ |
25861 | ASSERT_NE(-1, recog_memoized (insn)); |
25862 | |
25863 | /* Test of an UNSPEC_VOLATILE, which has its own enum values. */ |
25864 | insn = NEXT_INSN (insn); |
25865 | ASSERT_EQ (INSN, GET_CODE (insn)); |
25866 | |
25867 | set = single_set (insn); |
25868 | ASSERT_NE (NULL, set); |
25869 | |
25870 | src = SET_SRC (set); |
25871 | ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src)); |
25872 | ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1)); |
25873 | } |
25874 | |
25875 | /* Run all target-specific selftests. */ |
25876 | |
25877 | static void |
25878 | ix86_run_selftests (void) |
25879 | { |
25880 | ix86_test_dumping_hard_regs (); |
25881 | ix86_test_dumping_memory_blockage (); |
25882 | |
25883 | /* Various tests of loading RTL dumps, here because they contain |
25884 | ix86-isms (e.g. names of hard regs). */ |
25885 | ix86_test_loading_dump_fragment_1 (); |
25886 | ix86_test_loading_call_insn (); |
25887 | ix86_test_loading_full_dump (); |
25888 | ix86_test_loading_unspec (); |
25889 | } |
25890 | |
25891 | } // namespace selftest |
25892 | |
25893 | #endif /* CHECKING_P */ |
25894 | |
25895 | /* Initialize the GCC target structure. */ |
25896 | #undef TARGET_RETURN_IN_MEMORY |
25897 | #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory |
25898 | |
25899 | #undef TARGET_LEGITIMIZE_ADDRESS |
25900 | #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address |
25901 | |
25902 | #undef TARGET_ATTRIBUTE_TABLE |
25903 | #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table |
25904 | #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P |
25905 | #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true |
25906 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
25907 | # undef TARGET_MERGE_DECL_ATTRIBUTES |
25908 | # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
25909 | #endif |
25910 | |
25911 | #undef TARGET_INVALID_CONVERSION |
25912 | #define TARGET_INVALID_CONVERSION ix86_invalid_conversion |
25913 | |
25914 | #undef TARGET_INVALID_UNARY_OP |
25915 | #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op |
25916 | |
25917 | #undef TARGET_INVALID_BINARY_OP |
25918 | #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op |
25919 | |
25920 | #undef TARGET_COMP_TYPE_ATTRIBUTES |
25921 | #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes |
25922 | |
25923 | #undef TARGET_INIT_BUILTINS |
25924 | #define TARGET_INIT_BUILTINS ix86_init_builtins |
25925 | #undef TARGET_BUILTIN_DECL |
25926 | #define TARGET_BUILTIN_DECL ix86_builtin_decl |
25927 | #undef TARGET_EXPAND_BUILTIN |
25928 | #define TARGET_EXPAND_BUILTIN ix86_expand_builtin |
25929 | |
25930 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION |
25931 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ |
25932 | ix86_builtin_vectorized_function |
25933 | |
25934 | #undef TARGET_VECTORIZE_BUILTIN_GATHER |
25935 | #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather |
25936 | |
25937 | #undef TARGET_VECTORIZE_BUILTIN_SCATTER |
25938 | #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter |
25939 | |
25940 | #undef TARGET_BUILTIN_RECIPROCAL |
25941 | #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal |
25942 | |
25943 | #undef TARGET_ASM_FUNCTION_EPILOGUE |
25944 | #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue |
25945 | |
25946 | #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY |
25947 | #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \ |
25948 | ix86_print_patchable_function_entry |
25949 | |
25950 | #undef TARGET_ENCODE_SECTION_INFO |
25951 | #ifndef SUBTARGET_ENCODE_SECTION_INFO |
25952 | #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info |
25953 | #else |
25954 | #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO |
25955 | #endif |
25956 | |
25957 | #undef TARGET_ASM_OPEN_PAREN |
25958 | #define TARGET_ASM_OPEN_PAREN "" |
25959 | #undef TARGET_ASM_CLOSE_PAREN |
25960 | #define TARGET_ASM_CLOSE_PAREN "" |
25961 | |
25962 | #undef TARGET_ASM_BYTE_OP |
25963 | #define TARGET_ASM_BYTE_OP ASM_BYTE |
25964 | |
25965 | #undef TARGET_ASM_ALIGNED_HI_OP |
25966 | #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT |
25967 | #undef TARGET_ASM_ALIGNED_SI_OP |
25968 | #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG |
25969 | #ifdef ASM_QUAD |
25970 | #undef TARGET_ASM_ALIGNED_DI_OP |
25971 | #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD |
25972 | #endif |
25973 | |
25974 | #undef TARGET_PROFILE_BEFORE_PROLOGUE |
25975 | #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue |
25976 | |
25977 | #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME |
25978 | #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name |
25979 | |
25980 | #undef TARGET_ASM_UNALIGNED_HI_OP |
25981 | #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP |
25982 | #undef TARGET_ASM_UNALIGNED_SI_OP |
25983 | #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP |
25984 | #undef TARGET_ASM_UNALIGNED_DI_OP |
25985 | #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP |
25986 | |
25987 | #undef TARGET_PRINT_OPERAND |
25988 | #define TARGET_PRINT_OPERAND ix86_print_operand |
25989 | #undef TARGET_PRINT_OPERAND_ADDRESS |
25990 | #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address |
25991 | #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P |
25992 | #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p |
25993 | #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA |
25994 | #define i386_asm_output_addr_const_extra |
25995 | |
25996 | #undef TARGET_SCHED_INIT_GLOBAL |
25997 | #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global |
25998 | #undef TARGET_SCHED_ADJUST_COST |
25999 | #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost |
26000 | #undef TARGET_SCHED_ISSUE_RATE |
26001 | #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate |
26002 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD |
26003 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ |
26004 | ia32_multipass_dfa_lookahead |
26005 | #undef TARGET_SCHED_MACRO_FUSION_P |
26006 | #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p |
26007 | #undef TARGET_SCHED_MACRO_FUSION_PAIR_P |
26008 | #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p |
26009 | |
26010 | #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
26011 | #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall |
26012 | |
26013 | #undef TARGET_MEMMODEL_CHECK |
26014 | #define TARGET_MEMMODEL_CHECK ix86_memmodel_check |
26015 | |
26016 | #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV |
26017 | #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv |
26018 | |
26019 | #ifdef HAVE_AS_TLS |
26020 | #undef TARGET_HAVE_TLS |
26021 | #define TARGET_HAVE_TLS true |
26022 | #endif |
26023 | #undef TARGET_CANNOT_FORCE_CONST_MEM |
26024 | #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem |
26025 | #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P |
26026 | #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true |
26027 | |
26028 | #undef TARGET_DELEGITIMIZE_ADDRESS |
26029 | #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address |
26030 | |
26031 | #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P |
26032 | #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p |
26033 | |
26034 | #undef TARGET_MS_BITFIELD_LAYOUT_P |
26035 | #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p |
26036 | |
26037 | #if TARGET_MACHO |
26038 | #undef TARGET_BINDS_LOCAL_P |
26039 | #define TARGET_BINDS_LOCAL_P darwin_binds_local_p |
26040 | #else |
26041 | #undef TARGET_BINDS_LOCAL_P |
26042 | #define TARGET_BINDS_LOCAL_P ix86_binds_local_p |
26043 | #endif |
26044 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
26045 | #undef TARGET_BINDS_LOCAL_P |
26046 | #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p |
26047 | #endif |
26048 | |
26049 | #undef TARGET_ASM_OUTPUT_MI_THUNK |
26050 | #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk |
26051 | #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
26052 | #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk |
26053 | |
26054 | #undef TARGET_ASM_FILE_START |
26055 | #define TARGET_ASM_FILE_START x86_file_start |
26056 | |
26057 | #undef TARGET_OPTION_OVERRIDE |
26058 | #define TARGET_OPTION_OVERRIDE ix86_option_override |
26059 | |
26060 | #undef TARGET_REGISTER_MOVE_COST |
26061 | #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost |
26062 | #undef TARGET_MEMORY_MOVE_COST |
26063 | #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost |
26064 | #undef TARGET_RTX_COSTS |
26065 | #define TARGET_RTX_COSTS ix86_rtx_costs |
26066 | #undef TARGET_ADDRESS_COST |
26067 | #define TARGET_ADDRESS_COST ix86_address_cost |
26068 | |
26069 | #undef TARGET_OVERLAP_OP_BY_PIECES_P |
26070 | #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true |
26071 | |
26072 | #undef TARGET_FLAGS_REGNUM |
26073 | #define TARGET_FLAGS_REGNUM FLAGS_REG |
26074 | #undef TARGET_FIXED_CONDITION_CODE_REGS |
26075 | #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs |
26076 | #undef TARGET_CC_MODES_COMPATIBLE |
26077 | #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible |
26078 | |
26079 | #undef TARGET_MACHINE_DEPENDENT_REORG |
26080 | #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg |
26081 | |
26082 | #undef TARGET_BUILD_BUILTIN_VA_LIST |
26083 | #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list |
26084 | |
26085 | #undef TARGET_FOLD_BUILTIN |
26086 | #define TARGET_FOLD_BUILTIN ix86_fold_builtin |
26087 | |
26088 | #undef TARGET_GIMPLE_FOLD_BUILTIN |
26089 | #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin |
26090 | |
26091 | #undef TARGET_COMPARE_VERSION_PRIORITY |
26092 | #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority |
26093 | |
26094 | #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY |
26095 | #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ |
26096 | ix86_generate_version_dispatcher_body |
26097 | |
26098 | #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER |
26099 | #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ |
26100 | ix86_get_function_versions_dispatcher |
26101 | |
26102 | #undef TARGET_ENUM_VA_LIST_P |
26103 | #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list |
26104 | |
26105 | #undef TARGET_FN_ABI_VA_LIST |
26106 | #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list |
26107 | |
26108 | #undef TARGET_CANONICAL_VA_LIST_TYPE |
26109 | #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type |
26110 | |
26111 | #undef TARGET_EXPAND_BUILTIN_VA_START |
26112 | #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start |
26113 | |
26114 | #undef TARGET_MD_ASM_ADJUST |
26115 | #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust |
26116 | |
26117 | #undef TARGET_C_EXCESS_PRECISION |
26118 | #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision |
26119 | #undef TARGET_C_BITINT_TYPE_INFO |
26120 | #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info |
26121 | #undef TARGET_PROMOTE_PROTOTYPES |
26122 | #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true |
26123 | #undef TARGET_PUSH_ARGUMENT |
26124 | #define TARGET_PUSH_ARGUMENT ix86_push_argument |
26125 | #undef TARGET_SETUP_INCOMING_VARARGS |
26126 | #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs |
26127 | #undef TARGET_MUST_PASS_IN_STACK |
26128 | #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack |
26129 | #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS |
26130 | #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args |
26131 | #undef TARGET_FUNCTION_ARG_ADVANCE |
26132 | #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance |
26133 | #undef TARGET_FUNCTION_ARG |
26134 | #define TARGET_FUNCTION_ARG ix86_function_arg |
26135 | #undef TARGET_INIT_PIC_REG |
26136 | #define TARGET_INIT_PIC_REG ix86_init_pic_reg |
26137 | #undef TARGET_USE_PSEUDO_PIC_REG |
26138 | #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg |
26139 | #undef TARGET_FUNCTION_ARG_BOUNDARY |
26140 | #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary |
26141 | #undef TARGET_PASS_BY_REFERENCE |
26142 | #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference |
26143 | #undef TARGET_INTERNAL_ARG_POINTER |
26144 | #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer |
26145 | #undef TARGET_UPDATE_STACK_BOUNDARY |
26146 | #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary |
26147 | #undef TARGET_GET_DRAP_RTX |
26148 | #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx |
26149 | #undef TARGET_STRICT_ARGUMENT_NAMING |
26150 | #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true |
26151 | #undef TARGET_STATIC_CHAIN |
26152 | #define TARGET_STATIC_CHAIN ix86_static_chain |
26153 | #undef TARGET_TRAMPOLINE_INIT |
26154 | #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init |
26155 | #undef TARGET_RETURN_POPS_ARGS |
26156 | #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args |
26157 | |
26158 | #undef TARGET_WARN_FUNC_RETURN |
26159 | #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return |
26160 | |
26161 | #undef TARGET_LEGITIMATE_COMBINED_INSN |
26162 | #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn |
26163 | |
26164 | #undef TARGET_ASAN_SHADOW_OFFSET |
26165 | #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset |
26166 | |
26167 | #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
26168 | #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg |
26169 | |
26170 | #undef TARGET_SCALAR_MODE_SUPPORTED_P |
26171 | #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p |
26172 | |
26173 | #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P |
26174 | #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ |
26175 | ix86_libgcc_floating_mode_supported_p |
26176 | |
26177 | #undef TARGET_VECTOR_MODE_SUPPORTED_P |
26178 | #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p |
26179 | |
26180 | #undef TARGET_C_MODE_FOR_SUFFIX |
26181 | #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix |
26182 | |
26183 | #ifdef HAVE_AS_TLS |
26184 | #undef TARGET_ASM_OUTPUT_DWARF_DTPREL |
26185 | #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel |
26186 | #endif |
26187 | |
26188 | #ifdef SUBTARGET_INSERT_ATTRIBUTES |
26189 | #undef TARGET_INSERT_ATTRIBUTES |
26190 | #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES |
26191 | #endif |
26192 | |
26193 | #undef TARGET_MANGLE_TYPE |
26194 | #define TARGET_MANGLE_TYPE ix86_mangle_type |
26195 | |
26196 | #undef TARGET_EMIT_SUPPORT_TINFOS |
26197 | #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos |
26198 | |
26199 | #undef TARGET_STACK_PROTECT_GUARD |
26200 | #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard |
26201 | |
26202 | #if !TARGET_MACHO |
26203 | #undef TARGET_STACK_PROTECT_FAIL |
26204 | #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail |
26205 | #endif |
26206 | |
26207 | #undef TARGET_FUNCTION_VALUE |
26208 | #define TARGET_FUNCTION_VALUE ix86_function_value |
26209 | |
26210 | #undef TARGET_FUNCTION_VALUE_REGNO_P |
26211 | #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p |
26212 | |
26213 | #undef TARGET_ZERO_CALL_USED_REGS |
26214 | #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs |
26215 | |
26216 | #undef TARGET_PROMOTE_FUNCTION_MODE |
26217 | #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode |
26218 | |
26219 | #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE |
26220 | #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change |
26221 | |
26222 | #undef TARGET_MEMBER_TYPE_FORCES_BLK |
26223 | #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk |
26224 | |
26225 | #undef TARGET_INSTANTIATE_DECLS |
26226 | #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls |
26227 | |
26228 | #undef TARGET_SECONDARY_RELOAD |
26229 | #define TARGET_SECONDARY_RELOAD ix86_secondary_reload |
26230 | #undef TARGET_SECONDARY_MEMORY_NEEDED |
26231 | #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed |
26232 | #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE |
26233 | #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode |
26234 | |
26235 | #undef TARGET_CLASS_MAX_NREGS |
26236 | #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs |
26237 | |
26238 | #undef TARGET_PREFERRED_RELOAD_CLASS |
26239 | #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class |
26240 | #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS |
26241 | #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class |
26242 | #undef TARGET_CLASS_LIKELY_SPILLED_P |
26243 | #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p |
26244 | |
26245 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST |
26246 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ |
26247 | ix86_builtin_vectorization_cost |
26248 | #undef TARGET_VECTORIZE_VEC_PERM_CONST |
26249 | #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const |
26250 | #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE |
26251 | #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ |
26252 | ix86_preferred_simd_mode |
26253 | #undef TARGET_VECTORIZE_SPLIT_REDUCTION |
26254 | #define TARGET_VECTORIZE_SPLIT_REDUCTION \ |
26255 | ix86_split_reduction |
26256 | #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES |
26257 | #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ |
26258 | ix86_autovectorize_vector_modes |
26259 | #undef TARGET_VECTORIZE_GET_MASK_MODE |
26260 | #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode |
26261 | #undef TARGET_VECTORIZE_CREATE_COSTS |
26262 | #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs |
26263 | |
26264 | #undef TARGET_SET_CURRENT_FUNCTION |
26265 | #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function |
26266 | |
26267 | #undef TARGET_OPTION_VALID_ATTRIBUTE_P |
26268 | #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p |
26269 | |
26270 | #undef TARGET_OPTION_SAVE |
26271 | #define TARGET_OPTION_SAVE ix86_function_specific_save |
26272 | |
26273 | #undef TARGET_OPTION_RESTORE |
26274 | #define TARGET_OPTION_RESTORE ix86_function_specific_restore |
26275 | |
26276 | #undef TARGET_OPTION_POST_STREAM_IN |
26277 | #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in |
26278 | |
26279 | #undef TARGET_OPTION_PRINT |
26280 | #define TARGET_OPTION_PRINT ix86_function_specific_print |
26281 | |
26282 | #undef TARGET_OPTION_FUNCTION_VERSIONS |
26283 | #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions |
26284 | |
26285 | #undef TARGET_CAN_INLINE_P |
26286 | #define TARGET_CAN_INLINE_P ix86_can_inline_p |
26287 | |
26288 | #undef TARGET_LEGITIMATE_ADDRESS_P |
26289 | #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p |
26290 | |
26291 | #undef TARGET_REGISTER_PRIORITY |
26292 | #define TARGET_REGISTER_PRIORITY ix86_register_priority |
26293 | |
26294 | #undef TARGET_REGISTER_USAGE_LEVELING_P |
26295 | #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true |
26296 | |
26297 | #undef TARGET_LEGITIMATE_CONSTANT_P |
26298 | #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p |
26299 | |
26300 | #undef TARGET_COMPUTE_FRAME_LAYOUT |
26301 | #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout |
26302 | |
26303 | #undef TARGET_FRAME_POINTER_REQUIRED |
26304 | #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required |
26305 | |
26306 | #undef TARGET_CAN_ELIMINATE |
26307 | #define TARGET_CAN_ELIMINATE ix86_can_eliminate |
26308 | |
26309 | #undef TARGET_EXTRA_LIVE_ON_ENTRY |
26310 | #define ix86_live_on_entry |
26311 | |
26312 | #undef TARGET_ASM_CODE_END |
26313 | #define TARGET_ASM_CODE_END ix86_code_end |
26314 | |
26315 | #undef TARGET_CONDITIONAL_REGISTER_USAGE |
26316 | #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage |
26317 | |
26318 | #undef TARGET_CANONICALIZE_COMPARISON |
26319 | #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison |
26320 | |
26321 | #undef TARGET_LOOP_UNROLL_ADJUST |
26322 | #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust |
26323 | |
26324 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
26325 | #undef TARGET_SPILL_CLASS |
26326 | #define TARGET_SPILL_CLASS ix86_spill_class |
26327 | |
26328 | #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN |
26329 | #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ |
26330 | ix86_simd_clone_compute_vecsize_and_simdlen |
26331 | |
26332 | #undef TARGET_SIMD_CLONE_ADJUST |
26333 | #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust |
26334 | |
26335 | #undef TARGET_SIMD_CLONE_USABLE |
26336 | #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable |
26337 | |
26338 | #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA |
26339 | #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa |
26340 | |
26341 | #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P |
26342 | #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ |
26343 | ix86_float_exceptions_rounding_supported_p |
26344 | |
26345 | #undef TARGET_MODE_EMIT |
26346 | #define TARGET_MODE_EMIT ix86_emit_mode_set |
26347 | |
26348 | #undef TARGET_MODE_NEEDED |
26349 | #define TARGET_MODE_NEEDED ix86_mode_needed |
26350 | |
26351 | #undef TARGET_MODE_AFTER |
26352 | #define TARGET_MODE_AFTER ix86_mode_after |
26353 | |
26354 | #undef TARGET_MODE_ENTRY |
26355 | #define TARGET_MODE_ENTRY ix86_mode_entry |
26356 | |
26357 | #undef TARGET_MODE_EXIT |
26358 | #define TARGET_MODE_EXIT ix86_mode_exit |
26359 | |
26360 | #undef TARGET_MODE_PRIORITY |
26361 | #define TARGET_MODE_PRIORITY ix86_mode_priority |
26362 | |
26363 | #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS |
26364 | #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true |
26365 | |
26366 | #undef TARGET_OFFLOAD_OPTIONS |
26367 | #define TARGET_OFFLOAD_OPTIONS \ |
26368 | ix86_offload_options |
26369 | |
26370 | #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT |
26371 | #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512 |
26372 | |
26373 | #undef TARGET_OPTAB_SUPPORTED_P |
26374 | #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p |
26375 | |
26376 | #undef TARGET_HARD_REGNO_SCRATCH_OK |
26377 | #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok |
26378 | |
26379 | #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS |
26380 | #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST |
26381 | |
26382 | #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID |
26383 | #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid |
26384 | |
26385 | #undef TARGET_INIT_LIBFUNCS |
26386 | #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs |
26387 | |
26388 | #undef TARGET_EXPAND_DIVMOD_LIBFUNC |
26389 | #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc |
26390 | |
26391 | #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST |
26392 | #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost |
26393 | |
26394 | #undef TARGET_NOCE_CONVERSION_PROFITABLE_P |
26395 | #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p |
26396 | |
26397 | #undef TARGET_HARD_REGNO_NREGS |
26398 | #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs |
26399 | #undef TARGET_HARD_REGNO_MODE_OK |
26400 | #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok |
26401 | |
26402 | #undef TARGET_MODES_TIEABLE_P |
26403 | #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p |
26404 | |
26405 | #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED |
26406 | #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ |
26407 | ix86_hard_regno_call_part_clobbered |
26408 | |
26409 | #undef TARGET_INSN_CALLEE_ABI |
26410 | #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi |
26411 | |
26412 | #undef TARGET_CAN_CHANGE_MODE_CLASS |
26413 | #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class |
26414 | |
26415 | #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT |
26416 | #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment |
26417 | |
26418 | #undef TARGET_STATIC_RTX_ALIGNMENT |
26419 | #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment |
26420 | #undef TARGET_CONSTANT_ALIGNMENT |
26421 | #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment |
26422 | |
26423 | #undef TARGET_EMPTY_RECORD_P |
26424 | #define TARGET_EMPTY_RECORD_P ix86_is_empty_record |
26425 | |
26426 | #undef TARGET_WARN_PARAMETER_PASSING_ABI |
26427 | #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi |
26428 | |
26429 | #undef TARGET_GET_MULTILIB_ABI_NAME |
26430 | #define TARGET_GET_MULTILIB_ABI_NAME \ |
26431 | ix86_get_multilib_abi_name |
26432 | |
26433 | #undef TARGET_IFUNC_REF_LOCAL_OK |
26434 | #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok |
26435 | |
26436 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
26437 | # undef TARGET_ASM_RELOC_RW_MASK |
26438 | # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask |
26439 | #endif |
26440 | |
26441 | #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES |
26442 | #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses |
26443 | |
26444 | #undef TARGET_MEMTAG_ADD_TAG |
26445 | #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag |
26446 | |
26447 | #undef TARGET_MEMTAG_SET_TAG |
26448 | #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag |
26449 | |
26450 | #undef TARGET_MEMTAG_EXTRACT_TAG |
26451 | #define ix86_memtag_extract_tag |
26452 | |
26453 | #undef TARGET_MEMTAG_UNTAGGED_POINTER |
26454 | #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer |
26455 | |
26456 | #undef TARGET_MEMTAG_TAG_SIZE |
26457 | #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size |
26458 | |
26459 | static bool |
26460 | ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) |
26461 | { |
26462 | #ifdef OPTION_GLIBC |
26463 | if (OPTION_GLIBC) |
26464 | return (built_in_function)fcode == BUILT_IN_MEMPCPY; |
26465 | else |
26466 | return false; |
26467 | #else |
26468 | return false; |
26469 | #endif |
26470 | } |
26471 | |
26472 | #undef TARGET_LIBC_HAS_FAST_FUNCTION |
26473 | #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function |
26474 | |
26475 | static unsigned |
26476 | ix86_libm_function_max_error (unsigned cfn, machine_mode mode, |
26477 | bool boundary_p) |
26478 | { |
26479 | #ifdef OPTION_GLIBC |
26480 | bool glibc_p = OPTION_GLIBC; |
26481 | #else |
26482 | bool glibc_p = false; |
26483 | #endif |
26484 | if (glibc_p) |
26485 | { |
26486 | /* If __FAST_MATH__ is defined, glibc provides libmvec. */ |
26487 | unsigned int libmvec_ret = 0; |
26488 | if (!flag_trapping_math |
26489 | && flag_unsafe_math_optimizations |
26490 | && flag_finite_math_only |
26491 | && !flag_signed_zeros |
26492 | && !flag_errno_math) |
26493 | switch (cfn) |
26494 | { |
26495 | CASE_CFN_COS: |
26496 | CASE_CFN_COS_FN: |
26497 | CASE_CFN_SIN: |
26498 | CASE_CFN_SIN_FN: |
26499 | if (!boundary_p) |
26500 | { |
26501 | /* With non-default rounding modes, libmvec provides |
26502 | complete garbage in results. E.g. |
26503 | _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD |
26504 | returns 0.00333309174f rather than 1.40129846e-45f. */ |
26505 | if (flag_rounding_math) |
26506 | return ~0U; |
26507 | /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html |
26508 | claims libmvec maximum error is 4ulps. |
26509 | My own random testing indicates 2ulps for SFmode and |
26510 | 0.5ulps for DFmode, but let's go with the 4ulps. */ |
26511 | libmvec_ret = 4; |
26512 | } |
26513 | break; |
26514 | default: |
26515 | break; |
26516 | } |
26517 | unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode, |
26518 | boundary_p); |
26519 | return MAX (ret, libmvec_ret); |
26520 | } |
26521 | return default_libm_function_max_error (cfn, mode, boundary_p); |
26522 | } |
26523 | |
26524 | #undef TARGET_LIBM_FUNCTION_MAX_ERROR |
26525 | #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error |
26526 | |
26527 | #if CHECKING_P |
26528 | #undef TARGET_RUN_TARGET_SELFTESTS |
26529 | #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests |
26530 | #endif /* #if CHECKING_P */ |
26531 | |
26532 | struct gcc_target targetm = TARGET_INITIALIZER; |
26533 | |
26534 | #include "gt-i386.h" |
26535 | |