1 | /* Subroutines used for code generation on IA-32. |
---|---|
2 | Copyright (C) 1988-2025 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; either version 3, or (at your option) |
9 | any later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #define INCLUDE_STRING |
21 | #define IN_TARGET_CODE 1 |
22 | |
23 | #include "config.h" |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "backend.h" |
27 | #include "rtl.h" |
28 | #include "tree.h" |
29 | #include "memmodel.h" |
30 | #include "gimple.h" |
31 | #include "cfghooks.h" |
32 | #include "cfgloop.h" |
33 | #include "df.h" |
34 | #include "tm_p.h" |
35 | #include "stringpool.h" |
36 | #include "expmed.h" |
37 | #include "optabs.h" |
38 | #include "regs.h" |
39 | #include "emit-rtl.h" |
40 | #include "recog.h" |
41 | #include "cgraph.h" |
42 | #include "diagnostic.h" |
43 | #include "cfgbuild.h" |
44 | #include "alias.h" |
45 | #include "fold-const.h" |
46 | #include "attribs.h" |
47 | #include "calls.h" |
48 | #include "stor-layout.h" |
49 | #include "varasm.h" |
50 | #include "output.h" |
51 | #include "insn-attr.h" |
52 | #include "flags.h" |
53 | #include "except.h" |
54 | #include "explow.h" |
55 | #include "expr.h" |
56 | #include "cfgrtl.h" |
57 | #include "common/common-target.h" |
58 | #include "langhooks.h" |
59 | #include "reload.h" |
60 | #include "gimplify.h" |
61 | #include "dwarf2.h" |
62 | #include "tm-constrs.h" |
63 | #include "cselib.h" |
64 | #include "sched-int.h" |
65 | #include "opts.h" |
66 | #include "tree-pass.h" |
67 | #include "context.h" |
68 | #include "pass_manager.h" |
69 | #include "target-globals.h" |
70 | #include "gimple-iterator.h" |
71 | #include "gimple-fold.h" |
72 | #include "tree-vectorizer.h" |
73 | #include "shrink-wrap.h" |
74 | #include "builtins.h" |
75 | #include "rtl-iter.h" |
76 | #include "tree-iterator.h" |
77 | #include "dbgcnt.h" |
78 | #include "case-cfn-macros.h" |
79 | #include "dojump.h" |
80 | #include "fold-const-call.h" |
81 | #include "tree-vrp.h" |
82 | #include "tree-ssanames.h" |
83 | #include "selftest.h" |
84 | #include "selftest-rtl.h" |
85 | #include "print-rtl.h" |
86 | #include "intl.h" |
87 | #include "ifcvt.h" |
88 | #include "symbol-summary.h" |
89 | #include "sreal.h" |
90 | #include "ipa-cp.h" |
91 | #include "ipa-prop.h" |
92 | #include "ipa-fnsummary.h" |
93 | #include "wide-int-bitmask.h" |
94 | #include "tree-vector-builder.h" |
95 | #include "debug.h" |
96 | #include "dwarf2out.h" |
97 | #include "i386-options.h" |
98 | #include "i386-builtins.h" |
99 | #include "i386-expand.h" |
100 | #include "i386-features.h" |
101 | #include "function-abi.h" |
102 | #include "rtl-error.h" |
103 | #include "gimple-pretty-print.h" |
104 | |
105 | /* This file should be included last. */ |
106 | #include "target-def.h" |
107 | |
108 | static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); |
109 | static void ix86_emit_restore_reg_using_pop (rtx, bool = false); |
110 | |
111 | |
112 | #ifndef CHECK_STACK_LIMIT |
113 | #define CHECK_STACK_LIMIT (-1) |
114 | #endif |
115 | |
116 | /* Return index of given mode in mult and division cost tables. */ |
117 | #define MODE_INDEX(mode) \ |
118 | ((mode) == QImode ? 0 \ |
119 | : (mode) == HImode ? 1 \ |
120 | : (mode) == SImode ? 2 \ |
121 | : (mode) == DImode ? 3 \ |
122 | : 4) |
123 | |
124 | |
125 | /* Set by -mtune. */ |
126 | const struct processor_costs *ix86_tune_cost = NULL; |
127 | |
128 | /* Set by -mtune or -Os. */ |
129 | const struct processor_costs *ix86_cost = NULL; |
130 | |
131 | /* In case the average insn count for single function invocation is |
132 | lower than this constant, emit fast (but longer) prologue and |
133 | epilogue code. */ |
134 | #define FAST_PROLOGUE_INSN_COUNT 20 |
135 | |
136 | /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
137 | static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
138 | static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
139 | static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
140 | |
141 | /* Array of the smallest class containing reg number REGNO, indexed by |
142 | REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
143 | |
144 | enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
145 | { |
146 | /* ax, dx, cx, bx */ |
147 | AREG, DREG, CREG, BREG, |
148 | /* si, di, bp, sp */ |
149 | SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
150 | /* FP registers */ |
151 | FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
152 | FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
153 | /* arg pointer, flags, fpsr, frame */ |
154 | NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
155 | /* SSE registers */ |
156 | SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, |
157 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
158 | /* MMX registers */ |
159 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
160 | MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
161 | /* REX registers */ |
162 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
163 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
164 | /* SSE REX registers */ |
165 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
166 | SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
167 | /* AVX-512 SSE registers */ |
168 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
169 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
170 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
171 | ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
172 | /* Mask registers. */ |
173 | ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
174 | MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
175 | /* REX2 registers */ |
176 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
177 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
178 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
179 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
180 | }; |
181 | |
182 | /* The "default" register map used in 32bit mode. */ |
183 | |
184 | unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] = |
185 | { |
186 | /* general regs */ |
187 | 0, 2, 1, 3, 6, 7, 4, 5, |
188 | /* fp regs */ |
189 | 12, 13, 14, 15, 16, 17, 18, 19, |
190 | /* arg, flags, fpsr, frame */ |
191 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
192 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
193 | /* SSE */ |
194 | 21, 22, 23, 24, 25, 26, 27, 28, |
195 | /* MMX */ |
196 | 29, 30, 31, 32, 33, 34, 35, 36, |
197 | /* extended integer registers */ |
198 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
199 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
200 | /* extended sse registers */ |
201 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
202 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
203 | /* AVX-512 registers 16-23 */ |
204 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
205 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
206 | /* AVX-512 registers 24-31 */ |
207 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
208 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
209 | /* Mask registers */ |
210 | 93, 94, 95, 96, 97, 98, 99, 100 |
211 | }; |
212 | |
213 | /* The "default" register map used in 64bit mode. */ |
214 | |
215 | unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] = |
216 | { |
217 | /* general regs */ |
218 | 0, 1, 2, 3, 4, 5, 6, 7, |
219 | /* fp regs */ |
220 | 33, 34, 35, 36, 37, 38, 39, 40, |
221 | /* arg, flags, fpsr, frame */ |
222 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
223 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
224 | /* SSE */ |
225 | 17, 18, 19, 20, 21, 22, 23, 24, |
226 | /* MMX */ |
227 | 41, 42, 43, 44, 45, 46, 47, 48, |
228 | /* extended integer registers */ |
229 | 8, 9, 10, 11, 12, 13, 14, 15, |
230 | /* extended SSE registers */ |
231 | 25, 26, 27, 28, 29, 30, 31, 32, |
232 | /* AVX-512 registers 16-23 */ |
233 | 67, 68, 69, 70, 71, 72, 73, 74, |
234 | /* AVX-512 registers 24-31 */ |
235 | 75, 76, 77, 78, 79, 80, 81, 82, |
236 | /* Mask registers */ |
237 | 118, 119, 120, 121, 122, 123, 124, 125, |
238 | /* rex2 extend interger registers */ |
239 | 130, 131, 132, 133, 134, 135, 136, 137, |
240 | 138, 139, 140, 141, 142, 143, 144, 145 |
241 | }; |
242 | |
243 | /* Define the register numbers to be used in Dwarf debugging information. |
244 | The SVR4 reference port C compiler uses the following register numbers |
245 | in its Dwarf output code: |
246 | 0 for %eax (gcc regno = 0) |
247 | 1 for %ecx (gcc regno = 2) |
248 | 2 for %edx (gcc regno = 1) |
249 | 3 for %ebx (gcc regno = 3) |
250 | 4 for %esp (gcc regno = 7) |
251 | 5 for %ebp (gcc regno = 6) |
252 | 6 for %esi (gcc regno = 4) |
253 | 7 for %edi (gcc regno = 5) |
254 | The following three DWARF register numbers are never generated by |
255 | the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
256 | believed these numbers have these meanings. |
257 | 8 for %eip (no gcc equivalent) |
258 | 9 for %eflags (gcc regno = 17) |
259 | 10 for %trapno (no gcc equivalent) |
260 | It is not at all clear how we should number the FP stack registers |
261 | for the x86 architecture. If the version of SDB on x86/svr4 were |
262 | a bit less brain dead with respect to floating-point then we would |
263 | have a precedent to follow with respect to DWARF register numbers |
264 | for x86 FP registers, but the SDB on x86/svr4 was so completely |
265 | broken with respect to FP registers that it is hardly worth thinking |
266 | of it as something to strive for compatibility with. |
267 | The version of x86/svr4 SDB I had does (partially) |
268 | seem to believe that DWARF register number 11 is associated with |
269 | the x86 register %st(0), but that's about all. Higher DWARF |
270 | register numbers don't seem to be associated with anything in |
271 | particular, and even for DWARF regno 11, SDB only seemed to under- |
272 | stand that it should say that a variable lives in %st(0) (when |
273 | asked via an `=' command) if we said it was in DWARF regno 11, |
274 | but SDB still printed garbage when asked for the value of the |
275 | variable in question (via a `/' command). |
276 | (Also note that the labels SDB printed for various FP stack regs |
277 | when doing an `x' command were all wrong.) |
278 | Note that these problems generally don't affect the native SVR4 |
279 | C compiler because it doesn't allow the use of -O with -g and |
280 | because when it is *not* optimizing, it allocates a memory |
281 | location for each floating-point variable, and the memory |
282 | location is what gets described in the DWARF AT_location |
283 | attribute for the variable in question. |
284 | Regardless of the severe mental illness of the x86/svr4 SDB, we |
285 | do something sensible here and we use the following DWARF |
286 | register numbers. Note that these are all stack-top-relative |
287 | numbers. |
288 | 11 for %st(0) (gcc regno = 8) |
289 | 12 for %st(1) (gcc regno = 9) |
290 | 13 for %st(2) (gcc regno = 10) |
291 | 14 for %st(3) (gcc regno = 11) |
292 | 15 for %st(4) (gcc regno = 12) |
293 | 16 for %st(5) (gcc regno = 13) |
294 | 17 for %st(6) (gcc regno = 14) |
295 | 18 for %st(7) (gcc regno = 15) |
296 | */ |
297 | unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] = |
298 | { |
299 | /* general regs */ |
300 | 0, 2, 1, 3, 6, 7, 5, 4, |
301 | /* fp regs */ |
302 | 11, 12, 13, 14, 15, 16, 17, 18, |
303 | /* arg, flags, fpsr, frame */ |
304 | IGNORED_DWARF_REGNUM, 9, |
305 | IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
306 | /* SSE registers */ |
307 | 21, 22, 23, 24, 25, 26, 27, 28, |
308 | /* MMX registers */ |
309 | 29, 30, 31, 32, 33, 34, 35, 36, |
310 | /* extended integer registers */ |
311 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
312 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
313 | /* extended sse registers */ |
314 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
315 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
316 | /* AVX-512 registers 16-23 */ |
317 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
318 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
319 | /* AVX-512 registers 24-31 */ |
320 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
321 | INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
322 | /* Mask registers */ |
323 | 93, 94, 95, 96, 97, 98, 99, 100 |
324 | }; |
325 | |
326 | /* Define parameter passing and return registers. */ |
327 | |
328 | static int const x86_64_int_parameter_registers[6] = |
329 | { |
330 | DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG |
331 | }; |
332 | |
333 | static int const x86_64_ms_abi_int_parameter_registers[4] = |
334 | { |
335 | CX_REG, DX_REG, R8_REG, R9_REG |
336 | }; |
337 | |
338 | static int const x86_64_int_return_registers[4] = |
339 | { |
340 | AX_REG, DX_REG, DI_REG, SI_REG |
341 | }; |
342 | |
343 | /* Define the structure for the machine field in struct function. */ |
344 | |
345 | struct GTY(()) stack_local_entry { |
346 | unsigned short mode; |
347 | unsigned short n; |
348 | rtx rtl; |
349 | struct stack_local_entry *next; |
350 | }; |
351 | |
352 | /* Which cpu are we scheduling for. */ |
353 | enum attr_cpu ix86_schedule; |
354 | |
355 | /* Which cpu are we optimizing for. */ |
356 | enum processor_type ix86_tune; |
357 | |
358 | /* Which instruction set architecture to use. */ |
359 | enum processor_type ix86_arch; |
360 | |
361 | /* True if processor has SSE prefetch instruction. */ |
362 | unsigned char ix86_prefetch_sse; |
363 | |
364 | /* Preferred alignment for stack boundary in bits. */ |
365 | unsigned int ix86_preferred_stack_boundary; |
366 | |
367 | /* Alignment for incoming stack boundary in bits specified at |
368 | command line. */ |
369 | unsigned int ix86_user_incoming_stack_boundary; |
370 | |
371 | /* Default alignment for incoming stack boundary in bits. */ |
372 | unsigned int ix86_default_incoming_stack_boundary; |
373 | |
374 | /* Alignment for incoming stack boundary in bits. */ |
375 | unsigned int ix86_incoming_stack_boundary; |
376 | |
377 | /* True if there is no direct access to extern symbols. */ |
378 | bool ix86_has_no_direct_extern_access; |
379 | |
380 | /* Calling abi specific va_list type nodes. */ |
381 | tree sysv_va_list_type_node; |
382 | tree ms_va_list_type_node; |
383 | |
384 | /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
385 | char internal_label_prefix[16]; |
386 | int internal_label_prefix_len; |
387 | |
388 | /* Fence to use after loop using movnt. */ |
389 | tree x86_mfence; |
390 | |
391 | /* Register class used for passing given 64bit part of the argument. |
392 | These represent classes as documented by the PS ABI, with the exception |
393 | of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
394 | use SF or DFmode move instead of DImode to avoid reformatting penalties. |
395 | |
396 | Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
397 | whenever possible (upper half does contain padding). */ |
398 | enum x86_64_reg_class |
399 | { |
400 | X86_64_NO_CLASS, |
401 | X86_64_INTEGER_CLASS, |
402 | X86_64_INTEGERSI_CLASS, |
403 | X86_64_SSE_CLASS, |
404 | X86_64_SSEHF_CLASS, |
405 | X86_64_SSESF_CLASS, |
406 | X86_64_SSEDF_CLASS, |
407 | X86_64_SSEUP_CLASS, |
408 | X86_64_X87_CLASS, |
409 | X86_64_X87UP_CLASS, |
410 | X86_64_COMPLEX_X87_CLASS, |
411 | X86_64_MEMORY_CLASS |
412 | }; |
413 | |
414 | #define MAX_CLASSES 8 |
415 | |
416 | /* Table of constants used by fldpi, fldln2, etc.... */ |
417 | static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
418 | static bool ext_80387_constants_init; |
419 | |
420 | |
421 | static rtx ix86_function_value (const_tree, const_tree, bool); |
422 | static bool ix86_function_value_regno_p (const unsigned int); |
423 | static unsigned int ix86_function_arg_boundary (machine_mode, |
424 | const_tree); |
425 | static rtx ix86_static_chain (const_tree, bool); |
426 | static int ix86_function_regparm (const_tree, const_tree); |
427 | static void ix86_compute_frame_layout (void); |
428 | static tree ix86_canonical_va_list_type (tree); |
429 | static unsigned int split_stack_prologue_scratch_regno (void); |
430 | static bool i386_asm_output_addr_const_extra (FILE *, rtx); |
431 | |
432 | static bool ix86_can_inline_p (tree, tree); |
433 | static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
434 | |
435 | typedef enum ix86_flags_cc |
436 | { |
437 | X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB, |
438 | X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE, |
439 | X86_CCS, X86_CCNS, X86_CCP, X86_CCNP, |
440 | X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE |
441 | } ix86_cc; |
442 | |
443 | static const char *ix86_ccmp_dfv_mapping[] = |
444 | { |
445 | "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}", |
446 | "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}", |
447 | "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}", |
448 | "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}" |
449 | }; |
450 | |
451 | |
452 | /* Whether -mtune= or -march= were specified */ |
453 | int ix86_tune_defaulted; |
454 | int ix86_arch_specified; |
455 | |
456 | /* Return true if a red-zone is in use. We can't use red-zone when |
457 | there are local indirect jumps, like "indirect_jump" or "tablejump", |
458 | which jumps to another place in the function, since "call" in the |
459 | indirect thunk pushes the return address onto stack, destroying |
460 | red-zone. |
461 | |
462 | NB: Don't use red-zone for functions with no_caller_saved_registers |
463 | and 32 GPRs since 128-byte red-zone is too small for 31 GPRs. |
464 | |
465 | TODO: If we can reserve the first 2 WORDs, for PUSH and, another |
466 | for CALL, in red-zone, we can allow local indirect jumps with |
467 | indirect thunk. */ |
468 | |
469 | bool |
470 | ix86_using_red_zone (void) |
471 | { |
472 | return (TARGET_RED_ZONE |
473 | && !TARGET_64BIT_MS_ABI |
474 | && (!TARGET_APX_EGPR |
475 | || (cfun->machine->call_saved_registers |
476 | != TYPE_NO_CALLER_SAVED_REGISTERS)) |
477 | && (!cfun->machine->has_local_indirect_jump |
478 | || cfun->machine->indirect_branch_type == indirect_branch_keep)); |
479 | } |
480 | |
481 | /* Return true, if profiling code should be emitted before |
482 | prologue. Otherwise it returns false. |
483 | Note: For x86 with "hotfix" it is sorried. */ |
484 | static bool |
485 | ix86_profile_before_prologue (void) |
486 | { |
487 | return flag_fentry != 0; |
488 | } |
489 | |
490 | /* Update register usage after having seen the compiler flags. */ |
491 | |
492 | static void |
493 | ix86_conditional_register_usage (void) |
494 | { |
495 | int i, c_mask; |
496 | |
497 | /* If there are no caller-saved registers, preserve all registers. |
498 | except fixed_regs and registers used for function return value |
499 | since aggregate_value_p checks call_used_regs[regno] on return |
500 | value. */ |
501 | if (cfun |
502 | && (cfun->machine->call_saved_registers |
503 | == TYPE_NO_CALLER_SAVED_REGISTERS)) |
504 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
505 | if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) |
506 | call_used_regs[i] = 0; |
507 | |
508 | /* For 32-bit targets, disable the REX registers. */ |
509 | if (! TARGET_64BIT) |
510 | { |
511 | for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) |
512 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
513 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
514 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
515 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
516 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
517 | } |
518 | |
519 | /* See the definition of CALL_USED_REGISTERS in i386.h. */ |
520 | c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); |
521 | |
522 | CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); |
523 | |
524 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
525 | { |
526 | /* Set/reset conditionally defined registers from |
527 | CALL_USED_REGISTERS initializer. */ |
528 | if (call_used_regs[i] > 1) |
529 | call_used_regs[i] = !!(call_used_regs[i] & c_mask); |
530 | |
531 | /* Calculate registers of CLOBBERED_REGS register set |
532 | as call used registers from GENERAL_REGS register set. */ |
533 | if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i) |
534 | && call_used_regs[i]) |
535 | SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i); |
536 | } |
537 | |
538 | /* If MMX is disabled, disable the registers. */ |
539 | if (! TARGET_MMX) |
540 | accessible_reg_set &= ~reg_class_contents[MMX_REGS]; |
541 | |
542 | /* If SSE is disabled, disable the registers. */ |
543 | if (! TARGET_SSE) |
544 | accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; |
545 | |
546 | /* If the FPU is disabled, disable the registers. */ |
547 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
548 | accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; |
549 | |
550 | /* If AVX512F is disabled, disable the registers. */ |
551 | if (! TARGET_AVX512F) |
552 | { |
553 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
554 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
555 | |
556 | accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; |
557 | } |
558 | |
559 | /* If APX is disabled, disable the registers. */ |
560 | if (! (TARGET_APX_EGPR && TARGET_64BIT)) |
561 | { |
562 | for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++) |
563 | CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i); |
564 | } |
565 | } |
566 | |
567 | /* Canonicalize a comparison from one we don't have to one we do have. */ |
568 | |
569 | static void |
570 | ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
571 | bool op0_preserve_value) |
572 | { |
573 | /* The order of operands in x87 ficom compare is forced by combine in |
574 | simplify_comparison () function. Float operator is treated as RTX_OBJ |
575 | with a precedence over other operators and is always put in the first |
576 | place. Swap condition and operands to match ficom instruction. */ |
577 | if (!op0_preserve_value |
578 | && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) |
579 | { |
580 | enum rtx_code scode = swap_condition ((enum rtx_code) *code); |
581 | |
582 | /* We are called only for compares that are split to SAHF instruction. |
583 | Ensure that we have setcc/jcc insn for the swapped condition. */ |
584 | if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) |
585 | { |
586 | std::swap (a&: *op0, b&: *op1); |
587 | *code = (int) scode; |
588 | return; |
589 | } |
590 | } |
591 | |
592 | /* Swap operands of GTU comparison to canonicalize |
593 | addcarry/subborrow comparison. */ |
594 | if (!op0_preserve_value |
595 | && *code == GTU |
596 | && GET_CODE (*op0) == PLUS |
597 | && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode) |
598 | && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND |
599 | && GET_CODE (*op1) == ZERO_EXTEND) |
600 | { |
601 | std::swap (a&: *op0, b&: *op1); |
602 | *code = (int) swap_condition ((enum rtx_code) *code); |
603 | return; |
604 | } |
605 | } |
606 | |
607 | /* Hook to determine if one function can safely inline another. */ |
608 | |
609 | static bool |
610 | ix86_can_inline_p (tree caller, tree callee) |
611 | { |
612 | tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); |
613 | tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); |
614 | |
615 | /* Changes of those flags can be tolerated for always inlines. Lets hope |
616 | user knows what he is doing. */ |
617 | unsigned HOST_WIDE_INT always_inline_safe_mask |
618 | = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS |
619 | | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD |
620 | | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD |
621 | | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS |
622 | | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE |
623 | | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER |
624 | | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); |
625 | |
626 | |
627 | if (!callee_tree) |
628 | callee_tree = target_option_default_node; |
629 | if (!caller_tree) |
630 | caller_tree = target_option_default_node; |
631 | if (callee_tree == caller_tree) |
632 | return true; |
633 | |
634 | struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); |
635 | struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); |
636 | bool ret = false; |
637 | bool always_inline |
638 | = (DECL_DISREGARD_INLINE_LIMITS (callee) |
639 | && lookup_attribute (attr_name: "always_inline", |
640 | DECL_ATTRIBUTES (callee))); |
641 | |
642 | /* If callee only uses GPRs, ignore MASK_80387. */ |
643 | if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags)) |
644 | always_inline_safe_mask |= MASK_80387; |
645 | |
646 | cgraph_node *callee_node = cgraph_node::get (decl: callee); |
647 | /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 |
648 | function can inline a SSE2 function but a SSE2 function can't inline |
649 | a SSE4 function. */ |
650 | if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) |
651 | != callee_opts->x_ix86_isa_flags) |
652 | || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) |
653 | != callee_opts->x_ix86_isa_flags2)) |
654 | ret = false; |
655 | |
656 | /* See if we have the same non-isa options. */ |
657 | else if ((!always_inline |
658 | && caller_opts->x_target_flags != callee_opts->x_target_flags) |
659 | || (caller_opts->x_target_flags & ~always_inline_safe_mask) |
660 | != (callee_opts->x_target_flags & ~always_inline_safe_mask)) |
661 | ret = false; |
662 | |
663 | else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath |
664 | /* If the calle doesn't use FP expressions differences in |
665 | ix86_fpmath can be ignored. We are called from FEs |
666 | for multi-versioning call optimization, so beware of |
667 | ipa_fn_summaries not available. */ |
668 | && (! ipa_fn_summaries |
669 | || ipa_fn_summaries->get (node: callee_node) == NULL |
670 | || ipa_fn_summaries->get (node: callee_node)->fp_expressions)) |
671 | ret = false; |
672 | |
673 | /* At this point we cannot identify whether arch or tune setting |
674 | comes from target attribute or not. So the most conservative way |
675 | is to allow the callee that uses default arch and tune string to |
676 | be inlined. */ |
677 | else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64") |
678 | && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic")) |
679 | ret = true; |
680 | |
681 | /* See if arch, tune, etc. are the same. As previous ISA flags already |
682 | checks if callee's ISA is subset of caller's, do not block |
683 | always_inline attribute for callee even it has different arch. */ |
684 | else if (!always_inline && caller_opts->arch != callee_opts->arch) |
685 | ret = false; |
686 | |
687 | else if (!always_inline && caller_opts->tune != callee_opts->tune) |
688 | ret = false; |
689 | |
690 | else if (!always_inline |
691 | && caller_opts->branch_cost != callee_opts->branch_cost) |
692 | ret = false; |
693 | |
694 | else |
695 | ret = true; |
696 | |
697 | return ret; |
698 | } |
699 | |
700 | /* Return true if this goes in large data/bss. */ |
701 | |
702 | static bool |
703 | ix86_in_large_data_p (tree exp) |
704 | { |
705 | if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC |
706 | && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC) |
707 | return false; |
708 | |
709 | if (exp == NULL_TREE) |
710 | return false; |
711 | |
712 | /* Functions are never large data. */ |
713 | if (TREE_CODE (exp) == FUNCTION_DECL) |
714 | return false; |
715 | |
716 | /* Automatic variables are never large data. */ |
717 | if (VAR_P (exp) && !is_global_var (t: exp)) |
718 | return false; |
719 | |
720 | if (VAR_P (exp) && DECL_SECTION_NAME (exp)) |
721 | { |
722 | const char *section = DECL_SECTION_NAME (exp); |
723 | if (strcmp (s1: section, s2: ".ldata") == 0 |
724 | || strcmp (s1: section, s2: ".lbss") == 0) |
725 | return true; |
726 | return false; |
727 | } |
728 | else |
729 | { |
730 | HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
731 | |
732 | /* If this is an incomplete type with size 0, then we can't put it |
733 | in data because it might be too big when completed. Also, |
734 | int_size_in_bytes returns -1 if size can vary or is larger than |
735 | an integer in which case also it is safer to assume that it goes in |
736 | large data. */ |
737 | if (size <= 0 || size > ix86_section_threshold) |
738 | return true; |
739 | } |
740 | |
741 | return false; |
742 | } |
743 | |
744 | /* i386-specific section flag to mark large sections. */ |
745 | #define SECTION_LARGE SECTION_MACH_DEP |
746 | |
747 | /* Switch to the appropriate section for output of DECL. |
748 | DECL is either a `VAR_DECL' node or a constant of some sort. |
749 | RELOC indicates whether forming the initial value of DECL requires |
750 | link-time relocations. */ |
751 | |
752 | ATTRIBUTE_UNUSED static section * |
753 | x86_64_elf_select_section (tree decl, int reloc, |
754 | unsigned HOST_WIDE_INT align) |
755 | { |
756 | if (ix86_in_large_data_p (exp: decl)) |
757 | { |
758 | const char *sname = NULL; |
759 | unsigned int flags = SECTION_WRITE | SECTION_LARGE; |
760 | switch (categorize_decl_for_section (decl, reloc)) |
761 | { |
762 | case SECCAT_DATA: |
763 | sname = ".ldata"; |
764 | break; |
765 | case SECCAT_DATA_REL: |
766 | sname = ".ldata.rel"; |
767 | break; |
768 | case SECCAT_DATA_REL_LOCAL: |
769 | sname = ".ldata.rel.local"; |
770 | break; |
771 | case SECCAT_DATA_REL_RO: |
772 | sname = ".ldata.rel.ro"; |
773 | break; |
774 | case SECCAT_DATA_REL_RO_LOCAL: |
775 | sname = ".ldata.rel.ro.local"; |
776 | break; |
777 | case SECCAT_BSS: |
778 | sname = ".lbss"; |
779 | flags |= SECTION_BSS; |
780 | break; |
781 | case SECCAT_RODATA: |
782 | case SECCAT_RODATA_MERGE_STR: |
783 | case SECCAT_RODATA_MERGE_STR_INIT: |
784 | case SECCAT_RODATA_MERGE_CONST: |
785 | sname = ".lrodata"; |
786 | flags &= ~SECTION_WRITE; |
787 | break; |
788 | case SECCAT_SRODATA: |
789 | case SECCAT_SDATA: |
790 | case SECCAT_SBSS: |
791 | gcc_unreachable (); |
792 | case SECCAT_TEXT: |
793 | case SECCAT_TDATA: |
794 | case SECCAT_TBSS: |
795 | /* We don't split these for medium model. Place them into |
796 | default sections and hope for best. */ |
797 | break; |
798 | } |
799 | if (sname) |
800 | { |
801 | /* We might get called with string constants, but get_named_section |
802 | doesn't like them as they are not DECLs. Also, we need to set |
803 | flags in that case. */ |
804 | if (!DECL_P (decl)) |
805 | return get_section (sname, flags, NULL); |
806 | return get_named_section (decl, sname, reloc); |
807 | } |
808 | } |
809 | return default_elf_select_section (decl, reloc, align); |
810 | } |
811 | |
812 | /* Select a set of attributes for section NAME based on the properties |
813 | of DECL and whether or not RELOC indicates that DECL's initializer |
814 | might contain runtime relocations. */ |
815 | |
816 | static unsigned int ATTRIBUTE_UNUSED |
817 | x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) |
818 | { |
819 | unsigned int flags = default_section_type_flags (decl, name, reloc); |
820 | |
821 | if (ix86_in_large_data_p (exp: decl)) |
822 | flags |= SECTION_LARGE; |
823 | |
824 | if (decl == NULL_TREE |
825 | && (strcmp (s1: name, s2: ".ldata.rel.ro") == 0 |
826 | || strcmp (s1: name, s2: ".ldata.rel.ro.local") == 0)) |
827 | flags |= SECTION_RELRO; |
828 | |
829 | if (strcmp (s1: name, s2: ".lbss") == 0 |
830 | || startswith (str: name, prefix: ".lbss.") |
831 | || startswith (str: name, prefix: ".gnu.linkonce.lb.")) |
832 | flags |= SECTION_BSS; |
833 | |
834 | return flags; |
835 | } |
836 | |
837 | /* Build up a unique section name, expressed as a |
838 | STRING_CST node, and assign it to DECL_SECTION_NAME (decl). |
839 | RELOC indicates whether the initial value of EXP requires |
840 | link-time relocations. */ |
841 | |
842 | static void ATTRIBUTE_UNUSED |
843 | x86_64_elf_unique_section (tree decl, int reloc) |
844 | { |
845 | if (ix86_in_large_data_p (exp: decl)) |
846 | { |
847 | const char *prefix = NULL; |
848 | /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ |
849 | bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; |
850 | |
851 | switch (categorize_decl_for_section (decl, reloc)) |
852 | { |
853 | case SECCAT_DATA: |
854 | case SECCAT_DATA_REL: |
855 | case SECCAT_DATA_REL_LOCAL: |
856 | case SECCAT_DATA_REL_RO: |
857 | case SECCAT_DATA_REL_RO_LOCAL: |
858 | prefix = one_only ? ".ld": ".ldata"; |
859 | break; |
860 | case SECCAT_BSS: |
861 | prefix = one_only ? ".lb": ".lbss"; |
862 | break; |
863 | case SECCAT_RODATA: |
864 | case SECCAT_RODATA_MERGE_STR: |
865 | case SECCAT_RODATA_MERGE_STR_INIT: |
866 | case SECCAT_RODATA_MERGE_CONST: |
867 | prefix = one_only ? ".lr": ".lrodata"; |
868 | break; |
869 | case SECCAT_SRODATA: |
870 | case SECCAT_SDATA: |
871 | case SECCAT_SBSS: |
872 | gcc_unreachable (); |
873 | case SECCAT_TEXT: |
874 | case SECCAT_TDATA: |
875 | case SECCAT_TBSS: |
876 | /* We don't split these for medium model. Place them into |
877 | default sections and hope for best. */ |
878 | break; |
879 | } |
880 | if (prefix) |
881 | { |
882 | const char *name, *linkonce; |
883 | char *string; |
884 | |
885 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
886 | name = targetm.strip_name_encoding (name); |
887 | |
888 | /* If we're using one_only, then there needs to be a .gnu.linkonce |
889 | prefix to the section name. */ |
890 | linkonce = one_only ? ".gnu.linkonce": ""; |
891 | |
892 | string = ACONCAT ((linkonce, prefix, ".", name, NULL)); |
893 | |
894 | set_decl_section_name (decl, string); |
895 | return; |
896 | } |
897 | } |
898 | default_unique_section (decl, reloc); |
899 | } |
900 | |
901 | #ifdef COMMON_ASM_OP |
902 | |
903 | #ifndef LARGECOMM_SECTION_ASM_OP |
904 | #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" |
905 | #endif |
906 | |
907 | /* This says how to output assembler code to declare an |
908 | uninitialized external linkage data object. |
909 | |
910 | For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for |
911 | large objects. */ |
912 | void |
913 | x86_elf_aligned_decl_common (FILE *file, tree decl, |
914 | const char *name, unsigned HOST_WIDE_INT size, |
915 | unsigned align) |
916 | { |
917 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
918 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
919 | && size > (unsigned int)ix86_section_threshold) |
920 | { |
921 | switch_to_section (get_named_section (decl, ".lbss", 0)); |
922 | fputs (LARGECOMM_SECTION_ASM_OP, stream: file); |
923 | } |
924 | else |
925 | fputs (COMMON_ASM_OP, stream: file); |
926 | assemble_name (file, name); |
927 | fprintf (stream: file, format: ","HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", |
928 | size, align / BITS_PER_UNIT); |
929 | } |
930 | #endif |
931 | |
932 | /* Utility function for targets to use in implementing |
933 | ASM_OUTPUT_ALIGNED_BSS. */ |
934 | |
935 | void |
936 | x86_output_aligned_bss (FILE *file, tree decl, const char *name, |
937 | unsigned HOST_WIDE_INT size, unsigned align) |
938 | { |
939 | if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC |
940 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
941 | && size > (unsigned int)ix86_section_threshold) |
942 | switch_to_section (get_named_section (decl, ".lbss", 0)); |
943 | else |
944 | switch_to_section (bss_section); |
945 | ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); |
946 | #ifdef ASM_DECLARE_OBJECT_NAME |
947 | last_assemble_variable_decl = decl; |
948 | ASM_DECLARE_OBJECT_NAME (file, name, decl); |
949 | #else |
950 | /* Standard thing is just output label for the object. */ |
951 | ASM_OUTPUT_LABEL (file, name); |
952 | #endif /* ASM_DECLARE_OBJECT_NAME */ |
953 | ASM_OUTPUT_SKIP (file, size ? size : 1); |
954 | } |
955 | |
956 | /* Decide whether we must probe the stack before any space allocation |
957 | on this target. It's essentially TARGET_STACK_PROBE except when |
958 | -fstack-check causes the stack to be already probed differently. */ |
959 | |
960 | bool |
961 | ix86_target_stack_probe (void) |
962 | { |
963 | /* Do not probe the stack twice if static stack checking is enabled. */ |
964 | if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
965 | return false; |
966 | |
967 | return TARGET_STACK_PROBE; |
968 | } |
969 | |
970 | /* Decide whether we can make a sibling call to a function. DECL is the |
971 | declaration of the function being targeted by the call and EXP is the |
972 | CALL_EXPR representing the call. */ |
973 | |
974 | static bool |
975 | ix86_function_ok_for_sibcall (tree decl, tree exp) |
976 | { |
977 | tree type, decl_or_type; |
978 | rtx a, b; |
979 | bool bind_global = decl && !targetm.binds_local_p (decl); |
980 | |
981 | if (ix86_function_naked (fn: current_function_decl)) |
982 | return false; |
983 | |
984 | /* Sibling call isn't OK if there are no caller-saved registers |
985 | since all registers must be preserved before return. */ |
986 | if (cfun->machine->call_saved_registers |
987 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
988 | return false; |
989 | |
990 | /* If we are generating position-independent code, we cannot sibcall |
991 | optimize direct calls to global functions, as the PLT requires |
992 | %ebx be live. (Darwin does not have a PLT.) */ |
993 | if (!TARGET_MACHO |
994 | && !TARGET_64BIT |
995 | && flag_pic |
996 | && flag_plt |
997 | && bind_global) |
998 | return false; |
999 | |
1000 | /* If we need to align the outgoing stack, then sibcalling would |
1001 | unalign the stack, which may break the called function. */ |
1002 | if (ix86_minimum_incoming_stack_boundary (true) |
1003 | < PREFERRED_STACK_BOUNDARY) |
1004 | return false; |
1005 | |
1006 | if (decl) |
1007 | { |
1008 | decl_or_type = decl; |
1009 | type = TREE_TYPE (decl); |
1010 | } |
1011 | else |
1012 | { |
1013 | /* We're looking at the CALL_EXPR, we need the type of the function. */ |
1014 | type = CALL_EXPR_FN (exp); /* pointer expression */ |
1015 | type = TREE_TYPE (type); /* pointer type */ |
1016 | type = TREE_TYPE (type); /* function type */ |
1017 | decl_or_type = type; |
1018 | } |
1019 | |
1020 | /* Sibling call isn't OK if callee has no callee-saved registers |
1021 | and the calling function has callee-saved registers. */ |
1022 | if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS |
1023 | && (cfun->machine->call_saved_registers |
1024 | != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP) |
1025 | && lookup_attribute (attr_name: "no_callee_saved_registers", |
1026 | TYPE_ATTRIBUTES (type))) |
1027 | return false; |
1028 | |
1029 | /* If outgoing reg parm stack space changes, we cannot do sibcall. */ |
1030 | if ((OUTGOING_REG_PARM_STACK_SPACE (type) |
1031 | != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl))) |
1032 | || (REG_PARM_STACK_SPACE (decl_or_type) |
1033 | != REG_PARM_STACK_SPACE (current_function_decl))) |
1034 | { |
1035 | maybe_complain_about_tail_call (exp, |
1036 | "inconsistent size of stack space" |
1037 | " allocated for arguments which are" |
1038 | " passed in registers"); |
1039 | return false; |
1040 | } |
1041 | |
1042 | /* Check that the return value locations are the same. Like |
1043 | if we are returning floats on the 80387 register stack, we cannot |
1044 | make a sibcall from a function that doesn't return a float to a |
1045 | function that does or, conversely, from a function that does return |
1046 | a float to a function that doesn't; the necessary stack adjustment |
1047 | would not be executed. This is also the place we notice |
1048 | differences in the return value ABI. Note that it is ok for one |
1049 | of the functions to have void return type as long as the return |
1050 | value of the other is passed in a register. */ |
1051 | a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); |
1052 | b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
1053 | cfun->decl, false); |
1054 | if (STACK_REG_P (a) || STACK_REG_P (b)) |
1055 | { |
1056 | if (!rtx_equal_p (a, b)) |
1057 | return false; |
1058 | } |
1059 | else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
1060 | ; |
1061 | else if (!rtx_equal_p (a, b)) |
1062 | return false; |
1063 | |
1064 | if (TARGET_64BIT) |
1065 | { |
1066 | /* The SYSV ABI has more call-clobbered registers; |
1067 | disallow sibcalls from MS to SYSV. */ |
1068 | if (cfun->machine->call_abi == MS_ABI |
1069 | && ix86_function_type_abi (type) == SYSV_ABI) |
1070 | return false; |
1071 | } |
1072 | else |
1073 | { |
1074 | /* If this call is indirect, we'll need to be able to use a |
1075 | call-clobbered register for the address of the target function. |
1076 | Make sure that all such registers are not used for passing |
1077 | parameters. Note that DLLIMPORT functions and call to global |
1078 | function via GOT slot are indirect. */ |
1079 | if (!decl |
1080 | || (bind_global && flag_pic && !flag_plt) |
1081 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) |
1082 | || flag_force_indirect_call) |
1083 | { |
1084 | /* Check if regparm >= 3 since arg_reg_available is set to |
1085 | false if regparm == 0. If regparm is 1 or 2, there is |
1086 | always a call-clobbered register available. |
1087 | |
1088 | ??? The symbol indirect call doesn't need a call-clobbered |
1089 | register. But we don't know if this is a symbol indirect |
1090 | call or not here. */ |
1091 | if (ix86_function_regparm (type, decl) >= 3 |
1092 | && !cfun->machine->arg_reg_available) |
1093 | return false; |
1094 | } |
1095 | } |
1096 | |
1097 | if (decl && ix86_use_pseudo_pic_reg ()) |
1098 | { |
1099 | /* When PIC register is used, it must be restored after ifunc |
1100 | function returns. */ |
1101 | cgraph_node *node = cgraph_node::get (decl); |
1102 | if (node && node->ifunc_resolver) |
1103 | return false; |
1104 | } |
1105 | |
1106 | /* Disable sibcall if callee has indirect_return attribute and |
1107 | caller doesn't since callee will return to the caller's caller |
1108 | via an indirect jump. */ |
1109 | if (((flag_cf_protection & (CF_RETURN | CF_BRANCH)) |
1110 | == (CF_RETURN | CF_BRANCH)) |
1111 | && lookup_attribute (attr_name: "indirect_return", TYPE_ATTRIBUTES (type)) |
1112 | && !lookup_attribute (attr_name: "indirect_return", |
1113 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))) |
1114 | return false; |
1115 | |
1116 | /* Otherwise okay. That also includes certain types of indirect calls. */ |
1117 | return true; |
1118 | } |
1119 | |
1120 | /* This function determines from TYPE the calling-convention. */ |
1121 | |
1122 | unsigned int |
1123 | ix86_get_callcvt (const_tree type) |
1124 | { |
1125 | unsigned int ret = 0; |
1126 | bool is_stdarg; |
1127 | tree attrs; |
1128 | |
1129 | if (TARGET_64BIT) |
1130 | return IX86_CALLCVT_CDECL; |
1131 | |
1132 | attrs = TYPE_ATTRIBUTES (type); |
1133 | if (attrs != NULL_TREE) |
1134 | { |
1135 | if (lookup_attribute (attr_name: "cdecl", list: attrs)) |
1136 | ret |= IX86_CALLCVT_CDECL; |
1137 | else if (lookup_attribute (attr_name: "stdcall", list: attrs)) |
1138 | ret |= IX86_CALLCVT_STDCALL; |
1139 | else if (lookup_attribute (attr_name: "fastcall", list: attrs)) |
1140 | ret |= IX86_CALLCVT_FASTCALL; |
1141 | else if (lookup_attribute (attr_name: "thiscall", list: attrs)) |
1142 | ret |= IX86_CALLCVT_THISCALL; |
1143 | |
1144 | /* Regparam isn't allowed for thiscall and fastcall. */ |
1145 | if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) |
1146 | { |
1147 | if (lookup_attribute (attr_name: "regparm", list: attrs)) |
1148 | ret |= IX86_CALLCVT_REGPARM; |
1149 | if (lookup_attribute (attr_name: "sseregparm", list: attrs)) |
1150 | ret |= IX86_CALLCVT_SSEREGPARM; |
1151 | } |
1152 | |
1153 | if (IX86_BASE_CALLCVT(ret) != 0) |
1154 | return ret; |
1155 | } |
1156 | |
1157 | is_stdarg = stdarg_p (type); |
1158 | if (TARGET_RTD && !is_stdarg) |
1159 | return IX86_CALLCVT_STDCALL | ret; |
1160 | |
1161 | if (ret != 0 |
1162 | || is_stdarg |
1163 | || TREE_CODE (type) != METHOD_TYPE |
1164 | || ix86_function_type_abi (type) != MS_ABI) |
1165 | return IX86_CALLCVT_CDECL | ret; |
1166 | |
1167 | return IX86_CALLCVT_THISCALL; |
1168 | } |
1169 | |
1170 | /* Return 0 if the attributes for two types are incompatible, 1 if they |
1171 | are compatible, and 2 if they are nearly compatible (which causes a |
1172 | warning to be generated). */ |
1173 | |
1174 | static int |
1175 | ix86_comp_type_attributes (const_tree type1, const_tree type2) |
1176 | { |
1177 | unsigned int ccvt1, ccvt2; |
1178 | |
1179 | if (TREE_CODE (type1) != FUNCTION_TYPE |
1180 | && TREE_CODE (type1) != METHOD_TYPE) |
1181 | return 1; |
1182 | |
1183 | ccvt1 = ix86_get_callcvt (type: type1); |
1184 | ccvt2 = ix86_get_callcvt (type: type2); |
1185 | if (ccvt1 != ccvt2) |
1186 | return 0; |
1187 | if (ix86_function_regparm (type1, NULL) |
1188 | != ix86_function_regparm (type2, NULL)) |
1189 | return 0; |
1190 | |
1191 | if (lookup_attribute (attr_name: "no_callee_saved_registers", |
1192 | TYPE_ATTRIBUTES (type1)) |
1193 | != lookup_attribute (attr_name: "no_callee_saved_registers", |
1194 | TYPE_ATTRIBUTES (type2))) |
1195 | return 0; |
1196 | |
1197 | return 1; |
1198 | } |
1199 | |
1200 | /* Return the regparm value for a function with the indicated TYPE and DECL. |
1201 | DECL may be NULL when calling function indirectly |
1202 | or considering a libcall. */ |
1203 | |
1204 | static int |
1205 | ix86_function_regparm (const_tree type, const_tree decl) |
1206 | { |
1207 | tree attr; |
1208 | int regparm; |
1209 | unsigned int ccvt; |
1210 | |
1211 | if (TARGET_64BIT) |
1212 | return (ix86_function_type_abi (type) == SYSV_ABI |
1213 | ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); |
1214 | ccvt = ix86_get_callcvt (type); |
1215 | regparm = ix86_regparm; |
1216 | |
1217 | if ((ccvt & IX86_CALLCVT_REGPARM) != 0) |
1218 | { |
1219 | attr = lookup_attribute (attr_name: "regparm", TYPE_ATTRIBUTES (type)); |
1220 | if (attr) |
1221 | { |
1222 | regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
1223 | return regparm; |
1224 | } |
1225 | } |
1226 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
1227 | return 2; |
1228 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
1229 | return 1; |
1230 | |
1231 | /* Use register calling convention for local functions when possible. */ |
1232 | if (decl |
1233 | && TREE_CODE (decl) == FUNCTION_DECL) |
1234 | { |
1235 | cgraph_node *target = cgraph_node::get (decl); |
1236 | if (target) |
1237 | target = target->function_symbol (); |
1238 | |
1239 | /* Caller and callee must agree on the calling convention, so |
1240 | checking here just optimize means that with |
1241 | __attribute__((optimize (...))) caller could use regparm convention |
1242 | and callee not, or vice versa. Instead look at whether the callee |
1243 | is optimized or not. */ |
1244 | if (target && opt_for_fn (target->decl, optimize) |
1245 | && !(profile_flag && !flag_fentry)) |
1246 | { |
1247 | if (target->local && target->can_change_signature) |
1248 | { |
1249 | int local_regparm, globals = 0, regno; |
1250 | |
1251 | /* Make sure no regparm register is taken by a |
1252 | fixed register variable. */ |
1253 | for (local_regparm = 0; local_regparm < REGPARM_MAX; |
1254 | local_regparm++) |
1255 | if (fixed_regs[local_regparm]) |
1256 | break; |
1257 | |
1258 | /* We don't want to use regparm(3) for nested functions as |
1259 | these use a static chain pointer in the third argument. */ |
1260 | if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) |
1261 | local_regparm = 2; |
1262 | |
1263 | /* Save a register for the split stack. */ |
1264 | if (flag_split_stack) |
1265 | { |
1266 | if (local_regparm == 3) |
1267 | local_regparm = 2; |
1268 | else if (local_regparm == 2 |
1269 | && DECL_STATIC_CHAIN (target->decl)) |
1270 | local_regparm = 1; |
1271 | } |
1272 | |
1273 | /* Each fixed register usage increases register pressure, |
1274 | so less registers should be used for argument passing. |
1275 | This functionality can be overriden by an explicit |
1276 | regparm value. */ |
1277 | for (regno = AX_REG; regno <= DI_REG; regno++) |
1278 | if (fixed_regs[regno]) |
1279 | globals++; |
1280 | |
1281 | local_regparm |
1282 | = globals < local_regparm ? local_regparm - globals : 0; |
1283 | |
1284 | if (local_regparm > regparm) |
1285 | regparm = local_regparm; |
1286 | } |
1287 | } |
1288 | } |
1289 | |
1290 | return regparm; |
1291 | } |
1292 | |
1293 | /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and |
1294 | DFmode (2) arguments in SSE registers for a function with the |
1295 | indicated TYPE and DECL. DECL may be NULL when calling function |
1296 | indirectly or considering a libcall. Return -1 if any FP parameter |
1297 | should be rejected by error. This is used in siutation we imply SSE |
1298 | calling convetion but the function is called from another function with |
1299 | SSE disabled. Otherwise return 0. */ |
1300 | |
1301 | static int |
1302 | ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) |
1303 | { |
1304 | gcc_assert (!TARGET_64BIT); |
1305 | |
1306 | /* Use SSE registers to pass SFmode and DFmode arguments if requested |
1307 | by the sseregparm attribute. */ |
1308 | if (TARGET_SSEREGPARM |
1309 | || (type && lookup_attribute (attr_name: "sseregparm", TYPE_ATTRIBUTES (type)))) |
1310 | { |
1311 | if (!TARGET_SSE) |
1312 | { |
1313 | if (warn) |
1314 | { |
1315 | if (decl) |
1316 | error ("calling %qD with attribute sseregparm without " |
1317 | "SSE/SSE2 enabled", decl); |
1318 | else |
1319 | error ("calling %qT with attribute sseregparm without " |
1320 | "SSE/SSE2 enabled", type); |
1321 | } |
1322 | return 0; |
1323 | } |
1324 | |
1325 | return 2; |
1326 | } |
1327 | |
1328 | if (!decl) |
1329 | return 0; |
1330 | |
1331 | cgraph_node *target = cgraph_node::get (decl); |
1332 | if (target) |
1333 | target = target->function_symbol (); |
1334 | |
1335 | /* For local functions, pass up to SSE_REGPARM_MAX SFmode |
1336 | (and DFmode for SSE2) arguments in SSE registers. */ |
1337 | if (target |
1338 | /* TARGET_SSE_MATH */ |
1339 | && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE) |
1340 | && opt_for_fn (target->decl, optimize) |
1341 | && !(profile_flag && !flag_fentry)) |
1342 | { |
1343 | if (target->local && target->can_change_signature) |
1344 | { |
1345 | /* Refuse to produce wrong code when local function with SSE enabled |
1346 | is called from SSE disabled function. |
1347 | FIXME: We need a way to detect these cases cross-ltrans partition |
1348 | and avoid using SSE calling conventions on local functions called |
1349 | from function with SSE disabled. For now at least delay the |
1350 | warning until we know we are going to produce wrong code. |
1351 | See PR66047 */ |
1352 | if (!TARGET_SSE && warn) |
1353 | return -1; |
1354 | return TARGET_SSE2_P (target_opts_for_fn (target->decl) |
1355 | ->x_ix86_isa_flags) ? 2 : 1; |
1356 | } |
1357 | } |
1358 | |
1359 | return 0; |
1360 | } |
1361 | |
1362 | /* Return true if EAX is live at the start of the function. Used by |
1363 | ix86_expand_prologue to determine if we need special help before |
1364 | calling allocate_stack_worker. */ |
1365 | |
1366 | static bool |
1367 | ix86_eax_live_at_start_p (void) |
1368 | { |
1369 | /* Cheat. Don't bother working forward from ix86_function_regparm |
1370 | to the function type to whether an actual argument is located in |
1371 | eax. Instead just look at cfg info, which is still close enough |
1372 | to correct at this point. This gives false positives for broken |
1373 | functions that might use uninitialized data that happens to be |
1374 | allocated in eax, but who cares? */ |
1375 | return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); |
1376 | } |
1377 | |
1378 | static bool |
1379 | ix86_keep_aggregate_return_pointer (tree fntype) |
1380 | { |
1381 | tree attr; |
1382 | |
1383 | if (!TARGET_64BIT) |
1384 | { |
1385 | attr = lookup_attribute (attr_name: "callee_pop_aggregate_return", |
1386 | TYPE_ATTRIBUTES (fntype)); |
1387 | if (attr) |
1388 | return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); |
1389 | |
1390 | /* For 32-bit MS-ABI the default is to keep aggregate |
1391 | return pointer. */ |
1392 | if (ix86_function_type_abi (fntype) == MS_ABI) |
1393 | return true; |
1394 | } |
1395 | return KEEP_AGGREGATE_RETURN_POINTER != 0; |
1396 | } |
1397 | |
1398 | /* Value is the number of bytes of arguments automatically |
1399 | popped when returning from a subroutine call. |
1400 | FUNDECL is the declaration node of the function (as a tree), |
1401 | FUNTYPE is the data type of the function (as a tree), |
1402 | or for a library call it is an identifier node for the subroutine name. |
1403 | SIZE is the number of bytes of arguments passed on the stack. |
1404 | |
1405 | On the 80386, the RTD insn may be used to pop them if the number |
1406 | of args is fixed, but if the number is variable then the caller |
1407 | must pop them all. RTD can't be used for library calls now |
1408 | because the library is compiled with the Unix compiler. |
1409 | Use of RTD is a selectable option, since it is incompatible with |
1410 | standard Unix calling sequences. If the option is not selected, |
1411 | the caller must always pop the args. |
1412 | |
1413 | The attribute stdcall is equivalent to RTD on a per module basis. */ |
1414 | |
1415 | static poly_int64 |
1416 | ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) |
1417 | { |
1418 | unsigned int ccvt; |
1419 | |
1420 | /* None of the 64-bit ABIs pop arguments. */ |
1421 | if (TARGET_64BIT) |
1422 | return 0; |
1423 | |
1424 | ccvt = ix86_get_callcvt (type: funtype); |
1425 | |
1426 | if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL |
1427 | | IX86_CALLCVT_THISCALL)) != 0 |
1428 | && ! stdarg_p (funtype)) |
1429 | return size; |
1430 | |
1431 | /* Lose any fake structure return argument if it is passed on the stack. */ |
1432 | if (aggregate_value_p (TREE_TYPE (funtype), fundecl) |
1433 | && !ix86_keep_aggregate_return_pointer (fntype: funtype)) |
1434 | { |
1435 | int nregs = ix86_function_regparm (type: funtype, decl: fundecl); |
1436 | if (nregs == 0) |
1437 | return GET_MODE_SIZE (Pmode); |
1438 | } |
1439 | |
1440 | return 0; |
1441 | } |
1442 | |
1443 | /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ |
1444 | |
1445 | static bool |
1446 | ix86_legitimate_combined_insn (rtx_insn *insn) |
1447 | { |
1448 | int i; |
1449 | |
1450 | /* Check operand constraints in case hard registers were propagated |
1451 | into insn pattern. This check prevents combine pass from |
1452 | generating insn patterns with invalid hard register operands. |
1453 | These invalid insns can eventually confuse reload to error out |
1454 | with a spill failure. See also PRs 46829 and 46843. */ |
1455 | |
1456 | gcc_assert (INSN_CODE (insn) >= 0); |
1457 | |
1458 | extract_insn (insn); |
1459 | preprocess_constraints (insn); |
1460 | |
1461 | int n_operands = recog_data.n_operands; |
1462 | int n_alternatives = recog_data.n_alternatives; |
1463 | for (i = 0; i < n_operands; i++) |
1464 | { |
1465 | rtx op = recog_data.operand[i]; |
1466 | machine_mode mode = GET_MODE (op); |
1467 | const operand_alternative *op_alt; |
1468 | int offset = 0; |
1469 | bool win; |
1470 | int j; |
1471 | |
1472 | /* A unary operator may be accepted by the predicate, but it |
1473 | is irrelevant for matching constraints. */ |
1474 | if (UNARY_P (op)) |
1475 | op = XEXP (op, 0); |
1476 | |
1477 | if (SUBREG_P (op)) |
1478 | { |
1479 | if (REG_P (SUBREG_REG (op)) |
1480 | && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) |
1481 | offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), |
1482 | GET_MODE (SUBREG_REG (op)), |
1483 | SUBREG_BYTE (op), |
1484 | GET_MODE (op)); |
1485 | op = SUBREG_REG (op); |
1486 | } |
1487 | |
1488 | if (!(REG_P (op) && HARD_REGISTER_P (op))) |
1489 | continue; |
1490 | |
1491 | op_alt = recog_op_alt; |
1492 | |
1493 | /* Operand has no constraints, anything is OK. */ |
1494 | win = !n_alternatives; |
1495 | |
1496 | alternative_mask preferred = get_preferred_alternatives (insn); |
1497 | for (j = 0; j < n_alternatives; j++, op_alt += n_operands) |
1498 | { |
1499 | if (!TEST_BIT (preferred, j)) |
1500 | continue; |
1501 | if (op_alt[i].anything_ok |
1502 | || (op_alt[i].matches != -1 |
1503 | && operands_match_p |
1504 | (recog_data.operand[i], |
1505 | recog_data.operand[op_alt[i].matches])) |
1506 | || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) |
1507 | { |
1508 | win = true; |
1509 | break; |
1510 | } |
1511 | } |
1512 | |
1513 | if (!win) |
1514 | return false; |
1515 | } |
1516 | |
1517 | return true; |
1518 | } |
1519 | |
1520 | /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ |
1521 | |
1522 | static unsigned HOST_WIDE_INT |
1523 | ix86_asan_shadow_offset (void) |
1524 | { |
1525 | return SUBTARGET_SHADOW_OFFSET; |
1526 | } |
1527 | |
1528 | /* Argument support functions. */ |
1529 | |
1530 | /* Return true when register may be used to pass function parameters. */ |
1531 | bool |
1532 | ix86_function_arg_regno_p (int regno) |
1533 | { |
1534 | int i; |
1535 | enum calling_abi call_abi; |
1536 | const int *parm_regs; |
1537 | |
1538 | if (TARGET_SSE && SSE_REGNO_P (regno) |
1539 | && regno < FIRST_SSE_REG + SSE_REGPARM_MAX) |
1540 | return true; |
1541 | |
1542 | if (!TARGET_64BIT) |
1543 | return (regno < REGPARM_MAX |
1544 | || (TARGET_MMX && MMX_REGNO_P (regno) |
1545 | && regno < FIRST_MMX_REG + MMX_REGPARM_MAX)); |
1546 | |
1547 | /* TODO: The function should depend on current function ABI but |
1548 | builtins.cc would need updating then. Therefore we use the |
1549 | default ABI. */ |
1550 | call_abi = ix86_cfun_abi (); |
1551 | |
1552 | /* RAX is used as hidden argument to va_arg functions. */ |
1553 | if (call_abi == SYSV_ABI && regno == AX_REG) |
1554 | return true; |
1555 | |
1556 | if (call_abi == MS_ABI) |
1557 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
1558 | else |
1559 | parm_regs = x86_64_int_parameter_registers; |
1560 | |
1561 | for (i = 0; i < (call_abi == MS_ABI |
1562 | ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) |
1563 | if (regno == parm_regs[i]) |
1564 | return true; |
1565 | return false; |
1566 | } |
1567 | |
1568 | /* Return if we do not know how to pass ARG solely in registers. */ |
1569 | |
1570 | static bool |
1571 | ix86_must_pass_in_stack (const function_arg_info &arg) |
1572 | { |
1573 | if (must_pass_in_stack_var_size_or_pad (arg)) |
1574 | return true; |
1575 | |
1576 | /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! |
1577 | The layout_type routine is crafty and tries to trick us into passing |
1578 | currently unsupported vector types on the stack by using TImode. */ |
1579 | return (!TARGET_64BIT && arg.mode == TImode |
1580 | && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); |
1581 | } |
1582 | |
1583 | /* It returns the size, in bytes, of the area reserved for arguments passed |
1584 | in registers for the function represented by fndecl dependent to the used |
1585 | abi format. */ |
1586 | int |
1587 | ix86_reg_parm_stack_space (const_tree fndecl) |
1588 | { |
1589 | enum calling_abi call_abi = SYSV_ABI; |
1590 | if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) |
1591 | call_abi = ix86_function_abi (fndecl); |
1592 | else |
1593 | call_abi = ix86_function_type_abi (fndecl); |
1594 | if (TARGET_64BIT && call_abi == MS_ABI) |
1595 | return 32; |
1596 | return 0; |
1597 | } |
1598 | |
1599 | /* We add this as a workaround in order to use libc_has_function |
1600 | hook in i386.md. */ |
1601 | bool |
1602 | ix86_libc_has_function (enum function_class fn_class) |
1603 | { |
1604 | return targetm.libc_has_function (fn_class, NULL_TREE); |
1605 | } |
1606 | |
1607 | /* Returns value SYSV_ABI, MS_ABI dependent on fntype, |
1608 | specifying the call abi used. */ |
1609 | enum calling_abi |
1610 | ix86_function_type_abi (const_tree fntype) |
1611 | { |
1612 | enum calling_abi abi = ix86_abi; |
1613 | |
1614 | if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) |
1615 | return abi; |
1616 | |
1617 | if (abi == SYSV_ABI |
1618 | && lookup_attribute (attr_name: "ms_abi", TYPE_ATTRIBUTES (fntype))) |
1619 | { |
1620 | static int warned; |
1621 | if (TARGET_X32 && !warned) |
1622 | { |
1623 | error ("X32 does not support %<ms_abi%> attribute"); |
1624 | warned = 1; |
1625 | } |
1626 | |
1627 | abi = MS_ABI; |
1628 | } |
1629 | else if (abi == MS_ABI |
1630 | && lookup_attribute (attr_name: "sysv_abi", TYPE_ATTRIBUTES (fntype))) |
1631 | abi = SYSV_ABI; |
1632 | |
1633 | return abi; |
1634 | } |
1635 | |
1636 | enum calling_abi |
1637 | ix86_function_abi (const_tree fndecl) |
1638 | { |
1639 | return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; |
1640 | } |
1641 | |
1642 | /* Returns value SYSV_ABI, MS_ABI dependent on cfun, |
1643 | specifying the call abi used. */ |
1644 | enum calling_abi |
1645 | ix86_cfun_abi (void) |
1646 | { |
1647 | return cfun ? cfun->machine->call_abi : ix86_abi; |
1648 | } |
1649 | |
1650 | bool |
1651 | ix86_function_ms_hook_prologue (const_tree fn) |
1652 | { |
1653 | if (fn && lookup_attribute (attr_name: "ms_hook_prologue", DECL_ATTRIBUTES (fn))) |
1654 | { |
1655 | if (decl_function_context (fn) != NULL_TREE) |
1656 | error_at (DECL_SOURCE_LOCATION (fn), |
1657 | "%<ms_hook_prologue%> attribute is not compatible " |
1658 | "with nested function"); |
1659 | else |
1660 | return true; |
1661 | } |
1662 | return false; |
1663 | } |
1664 | |
1665 | bool |
1666 | ix86_function_naked (const_tree fn) |
1667 | { |
1668 | if (fn && lookup_attribute (attr_name: "naked", DECL_ATTRIBUTES (fn))) |
1669 | return true; |
1670 | |
1671 | return false; |
1672 | } |
1673 | |
1674 | /* Write the extra assembler code needed to declare a function properly. */ |
1675 | |
1676 | void |
1677 | ix86_asm_output_function_label (FILE *out_file, const char *fname, |
1678 | tree decl) |
1679 | { |
1680 | bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl); |
1681 | |
1682 | if (cfun) |
1683 | cfun->machine->function_label_emitted = true; |
1684 | |
1685 | if (is_ms_hook) |
1686 | { |
1687 | int i, filler_count = (TARGET_64BIT ? 32 : 16); |
1688 | unsigned int filler_cc = 0xcccccccc; |
1689 | |
1690 | for (i = 0; i < filler_count; i += 4) |
1691 | fprintf (stream: out_file, ASM_LONG " %#x\n", filler_cc); |
1692 | } |
1693 | |
1694 | #ifdef SUBTARGET_ASM_UNWIND_INIT |
1695 | SUBTARGET_ASM_UNWIND_INIT (out_file); |
1696 | #endif |
1697 | |
1698 | assemble_function_label_raw (out_file, fname); |
1699 | |
1700 | /* Output magic byte marker, if hot-patch attribute is set. */ |
1701 | if (is_ms_hook) |
1702 | { |
1703 | if (TARGET_64BIT) |
1704 | { |
1705 | /* leaq [%rsp + 0], %rsp */ |
1706 | fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n", |
1707 | stream: out_file); |
1708 | } |
1709 | else |
1710 | { |
1711 | /* movl.s %edi, %edi |
1712 | push %ebp |
1713 | movl.s %esp, %ebp */ |
1714 | fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", stream: out_file); |
1715 | } |
1716 | } |
1717 | } |
1718 | |
1719 | /* Output a user-defined label. In AT&T syntax, registers are prefixed |
1720 | with %, so labels require no punctuation. In Intel syntax, registers |
1721 | are unprefixed, so labels may clash with registers or other operators, |
1722 | and require quoting. */ |
1723 | void |
1724 | ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label) |
1725 | { |
1726 | if (ASSEMBLER_DIALECT == ASM_ATT) |
1727 | fprintf (stream: file, format: "%s%s", prefix, label); |
1728 | else |
1729 | fprintf (stream: file, format: "\"%s%s\"", prefix, label); |
1730 | } |
1731 | |
1732 | /* Implementation of call abi switching target hook. Specific to FNDECL |
1733 | the specific call register sets are set. See also |
1734 | ix86_conditional_register_usage for more details. */ |
1735 | void |
1736 | ix86_call_abi_override (const_tree fndecl) |
1737 | { |
1738 | cfun->machine->call_abi = ix86_function_abi (fndecl); |
1739 | } |
1740 | |
1741 | /* Return 1 if pseudo register should be created and used to hold |
1742 | GOT address for PIC code. */ |
1743 | bool |
1744 | ix86_use_pseudo_pic_reg (void) |
1745 | { |
1746 | if ((TARGET_64BIT |
1747 | && (ix86_cmodel == CM_SMALL_PIC |
1748 | || TARGET_PECOFF)) |
1749 | || !flag_pic) |
1750 | return false; |
1751 | return true; |
1752 | } |
1753 | |
1754 | /* Initialize large model PIC register. */ |
1755 | |
1756 | static void |
1757 | ix86_init_large_pic_reg (unsigned int tmp_regno) |
1758 | { |
1759 | rtx_code_label *label; |
1760 | rtx tmp_reg; |
1761 | |
1762 | gcc_assert (Pmode == DImode); |
1763 | label = gen_label_rtx (); |
1764 | emit_label (label); |
1765 | LABEL_PRESERVE_P (label) = 1; |
1766 | tmp_reg = gen_rtx_REG (Pmode, tmp_regno); |
1767 | gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); |
1768 | emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, |
1769 | label)); |
1770 | emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); |
1771 | emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg)); |
1772 | const char *name = LABEL_NAME (label); |
1773 | PUT_CODE (label, NOTE); |
1774 | NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; |
1775 | NOTE_DELETED_LABEL_NAME (label) = name; |
1776 | } |
1777 | |
1778 | /* Create and initialize PIC register if required. */ |
1779 | static void |
1780 | ix86_init_pic_reg (void) |
1781 | { |
1782 | edge entry_edge; |
1783 | rtx_insn *seq; |
1784 | |
1785 | if (!ix86_use_pseudo_pic_reg ()) |
1786 | return; |
1787 | |
1788 | start_sequence (); |
1789 | |
1790 | if (TARGET_64BIT) |
1791 | { |
1792 | if (ix86_cmodel == CM_LARGE_PIC) |
1793 | ix86_init_large_pic_reg (R11_REG); |
1794 | else |
1795 | emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); |
1796 | } |
1797 | else |
1798 | { |
1799 | /* If there is future mcount call in the function it is more profitable |
1800 | to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ |
1801 | rtx reg = crtl->profile |
1802 | ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) |
1803 | : pic_offset_table_rtx; |
1804 | rtx_insn *insn = emit_insn (gen_set_got (reg)); |
1805 | RTX_FRAME_RELATED_P (insn) = 1; |
1806 | if (crtl->profile) |
1807 | emit_move_insn (pic_offset_table_rtx, reg); |
1808 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
1809 | } |
1810 | |
1811 | seq = end_sequence (); |
1812 | |
1813 | entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1814 | insert_insn_on_edge (seq, entry_edge); |
1815 | commit_one_edge_insertion (e: entry_edge); |
1816 | } |
1817 | |
1818 | /* Initialize a variable CUM of type CUMULATIVE_ARGS |
1819 | for a call to a function whose data type is FNTYPE. |
1820 | For a library call, FNTYPE is 0. */ |
1821 | |
1822 | void |
1823 | init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
1824 | tree fntype, /* tree ptr for function decl */ |
1825 | rtx libname, /* SYMBOL_REF of library name or 0 */ |
1826 | tree fndecl, |
1827 | int caller) |
1828 | { |
1829 | struct cgraph_node *local_info_node = NULL; |
1830 | struct cgraph_node *target = NULL; |
1831 | |
1832 | /* Set silent_p to false to raise an error for invalid calls when |
1833 | expanding function body. */ |
1834 | cfun->machine->silent_p = false; |
1835 | |
1836 | memset (s: cum, c: 0, n: sizeof (*cum)); |
1837 | |
1838 | if (fndecl) |
1839 | { |
1840 | target = cgraph_node::get (decl: fndecl); |
1841 | if (target) |
1842 | { |
1843 | target = target->function_symbol (); |
1844 | local_info_node = cgraph_node::local_info_node (decl: target->decl); |
1845 | cum->call_abi = ix86_function_abi (fndecl: target->decl); |
1846 | } |
1847 | else |
1848 | cum->call_abi = ix86_function_abi (fndecl); |
1849 | } |
1850 | else |
1851 | cum->call_abi = ix86_function_type_abi (fntype); |
1852 | |
1853 | cum->caller = caller; |
1854 | |
1855 | /* Set up the number of registers to use for passing arguments. */ |
1856 | cum->nregs = ix86_regparm; |
1857 | if (TARGET_64BIT) |
1858 | { |
1859 | cum->nregs = (cum->call_abi == SYSV_ABI |
1860 | ? X86_64_REGPARM_MAX |
1861 | : X86_64_MS_REGPARM_MAX); |
1862 | } |
1863 | if (TARGET_SSE) |
1864 | { |
1865 | cum->sse_nregs = SSE_REGPARM_MAX; |
1866 | if (TARGET_64BIT) |
1867 | { |
1868 | cum->sse_nregs = (cum->call_abi == SYSV_ABI |
1869 | ? X86_64_SSE_REGPARM_MAX |
1870 | : X86_64_MS_SSE_REGPARM_MAX); |
1871 | } |
1872 | } |
1873 | if (TARGET_MMX) |
1874 | cum->mmx_nregs = MMX_REGPARM_MAX; |
1875 | cum->warn_avx512f = true; |
1876 | cum->warn_avx = true; |
1877 | cum->warn_sse = true; |
1878 | cum->warn_mmx = true; |
1879 | |
1880 | /* Because type might mismatch in between caller and callee, we need to |
1881 | use actual type of function for local calls. |
1882 | FIXME: cgraph_analyze can be told to actually record if function uses |
1883 | va_start so for local functions maybe_vaarg can be made aggressive |
1884 | helping K&R code. |
1885 | FIXME: once typesytem is fixed, we won't need this code anymore. */ |
1886 | if (local_info_node && local_info_node->local |
1887 | && local_info_node->can_change_signature) |
1888 | fntype = TREE_TYPE (target->decl); |
1889 | cum->stdarg = stdarg_p (fntype); |
1890 | cum->maybe_vaarg = (fntype |
1891 | ? (!prototype_p (fntype) || stdarg_p (fntype)) |
1892 | : !libname); |
1893 | |
1894 | cum->decl = fndecl; |
1895 | |
1896 | cum->warn_empty = !warn_abi || cum->stdarg; |
1897 | if (!cum->warn_empty && fntype) |
1898 | { |
1899 | function_args_iterator iter; |
1900 | tree argtype; |
1901 | bool seen_empty_type = false; |
1902 | FOREACH_FUNCTION_ARGS (fntype, argtype, iter) |
1903 | { |
1904 | if (argtype == error_mark_node || VOID_TYPE_P (argtype)) |
1905 | break; |
1906 | if (TYPE_EMPTY_P (argtype)) |
1907 | seen_empty_type = true; |
1908 | else if (seen_empty_type) |
1909 | { |
1910 | cum->warn_empty = true; |
1911 | break; |
1912 | } |
1913 | } |
1914 | } |
1915 | |
1916 | if (!TARGET_64BIT) |
1917 | { |
1918 | /* If there are variable arguments, then we won't pass anything |
1919 | in registers in 32-bit mode. */ |
1920 | if (stdarg_p (fntype)) |
1921 | { |
1922 | cum->nregs = 0; |
1923 | /* Since in 32-bit, variable arguments are always passed on |
1924 | stack, there is scratch register available for indirect |
1925 | sibcall. */ |
1926 | cfun->machine->arg_reg_available = true; |
1927 | cum->sse_nregs = 0; |
1928 | cum->mmx_nregs = 0; |
1929 | cum->warn_avx512f = false; |
1930 | cum->warn_avx = false; |
1931 | cum->warn_sse = false; |
1932 | cum->warn_mmx = false; |
1933 | return; |
1934 | } |
1935 | |
1936 | /* Use ecx and edx registers if function has fastcall attribute, |
1937 | else look for regparm information. */ |
1938 | if (fntype) |
1939 | { |
1940 | unsigned int ccvt = ix86_get_callcvt (type: fntype); |
1941 | if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
1942 | { |
1943 | cum->nregs = 1; |
1944 | cum->fastcall = 1; /* Same first register as in fastcall. */ |
1945 | } |
1946 | else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
1947 | { |
1948 | cum->nregs = 2; |
1949 | cum->fastcall = 1; |
1950 | } |
1951 | else |
1952 | cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl); |
1953 | } |
1954 | |
1955 | /* Set up the number of SSE registers used for passing SFmode |
1956 | and DFmode arguments. Warn for mismatching ABI. */ |
1957 | cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true); |
1958 | } |
1959 | |
1960 | cfun->machine->arg_reg_available = (cum->nregs > 0); |
1961 | } |
1962 | |
1963 | /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. |
1964 | But in the case of vector types, it is some vector mode. |
1965 | |
1966 | When we have only some of our vector isa extensions enabled, then there |
1967 | are some modes for which vector_mode_supported_p is false. For these |
1968 | modes, the generic vector support in gcc will choose some non-vector mode |
1969 | in order to implement the type. By computing the natural mode, we'll |
1970 | select the proper ABI location for the operand and not depend on whatever |
1971 | the middle-end decides to do with these vector types. |
1972 | |
1973 | The midde-end can't deal with the vector types > 16 bytes. In this |
1974 | case, we return the original mode and warn ABI change if CUM isn't |
1975 | NULL. |
1976 | |
1977 | If INT_RETURN is true, warn ABI change if the vector mode isn't |
1978 | available for function return value. */ |
1979 | |
1980 | static machine_mode |
1981 | type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, |
1982 | bool in_return) |
1983 | { |
1984 | machine_mode mode = TYPE_MODE (type); |
1985 | |
1986 | if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode)) |
1987 | { |
1988 | HOST_WIDE_INT size = int_size_in_bytes (type); |
1989 | if ((size == 8 || size == 16 || size == 32 || size == 64) |
1990 | /* ??? Generic code allows us to create width 1 vectors. Ignore. */ |
1991 | && TYPE_VECTOR_SUBPARTS (node: type) > 1) |
1992 | { |
1993 | machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); |
1994 | |
1995 | /* There are no XFmode vector modes ... */ |
1996 | if (innermode == XFmode) |
1997 | return mode; |
1998 | |
1999 | /* ... and no decimal float vector modes. */ |
2000 | if (DECIMAL_FLOAT_MODE_P (innermode)) |
2001 | return mode; |
2002 | |
2003 | if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type))) |
2004 | mode = MIN_MODE_VECTOR_FLOAT; |
2005 | else |
2006 | mode = MIN_MODE_VECTOR_INT; |
2007 | |
2008 | /* Get the mode which has this inner mode and number of units. */ |
2009 | FOR_EACH_MODE_FROM (mode, mode) |
2010 | if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type) |
2011 | && GET_MODE_INNER (mode) == innermode) |
2012 | { |
2013 | if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) |
2014 | { |
2015 | static bool warnedavx512f; |
2016 | static bool warnedavx512f_ret; |
2017 | |
2018 | if (cum && cum->warn_avx512f && !warnedavx512f) |
2019 | { |
2020 | if (warning (OPT_Wpsabi, "AVX512F vector argument " |
2021 | "without AVX512F enabled changes the ABI")) |
2022 | warnedavx512f = true; |
2023 | } |
2024 | else if (in_return && !warnedavx512f_ret) |
2025 | { |
2026 | if (warning (OPT_Wpsabi, "AVX512F vector return " |
2027 | "without AVX512F enabled changes the ABI")) |
2028 | warnedavx512f_ret = true; |
2029 | } |
2030 | |
2031 | return TYPE_MODE (type); |
2032 | } |
2033 | else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) |
2034 | { |
2035 | static bool warnedavx; |
2036 | static bool warnedavx_ret; |
2037 | |
2038 | if (cum && cum->warn_avx && !warnedavx) |
2039 | { |
2040 | if (warning (OPT_Wpsabi, "AVX vector argument " |
2041 | "without AVX enabled changes the ABI")) |
2042 | warnedavx = true; |
2043 | } |
2044 | else if (in_return && !warnedavx_ret) |
2045 | { |
2046 | if (warning (OPT_Wpsabi, "AVX vector return " |
2047 | "without AVX enabled changes the ABI")) |
2048 | warnedavx_ret = true; |
2049 | } |
2050 | |
2051 | return TYPE_MODE (type); |
2052 | } |
2053 | else if (((size == 8 && TARGET_64BIT) || size == 16) |
2054 | && !TARGET_SSE |
2055 | && !TARGET_IAMCU) |
2056 | { |
2057 | static bool warnedsse; |
2058 | static bool warnedsse_ret; |
2059 | |
2060 | if (cum && cum->warn_sse && !warnedsse) |
2061 | { |
2062 | if (warning (OPT_Wpsabi, "SSE vector argument " |
2063 | "without SSE enabled changes the ABI")) |
2064 | warnedsse = true; |
2065 | } |
2066 | else if (!TARGET_64BIT && in_return && !warnedsse_ret) |
2067 | { |
2068 | if (warning (OPT_Wpsabi, "SSE vector return " |
2069 | "without SSE enabled changes the ABI")) |
2070 | warnedsse_ret = true; |
2071 | } |
2072 | } |
2073 | else if ((size == 8 && !TARGET_64BIT) |
2074 | && (!cfun |
2075 | || cfun->machine->func_type == TYPE_NORMAL) |
2076 | && !TARGET_MMX |
2077 | && !TARGET_IAMCU) |
2078 | { |
2079 | static bool warnedmmx; |
2080 | static bool warnedmmx_ret; |
2081 | |
2082 | if (cum && cum->warn_mmx && !warnedmmx) |
2083 | { |
2084 | if (warning (OPT_Wpsabi, "MMX vector argument " |
2085 | "without MMX enabled changes the ABI")) |
2086 | warnedmmx = true; |
2087 | } |
2088 | else if (in_return && !warnedmmx_ret) |
2089 | { |
2090 | if (warning (OPT_Wpsabi, "MMX vector return " |
2091 | "without MMX enabled changes the ABI")) |
2092 | warnedmmx_ret = true; |
2093 | } |
2094 | } |
2095 | return mode; |
2096 | } |
2097 | |
2098 | gcc_unreachable (); |
2099 | } |
2100 | } |
2101 | |
2102 | return mode; |
2103 | } |
2104 | |
2105 | /* We want to pass a value in REGNO whose "natural" mode is MODE. However, |
2106 | this may not agree with the mode that the type system has chosen for the |
2107 | register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can |
2108 | go ahead and use it. Otherwise we have to build a PARALLEL instead. */ |
2109 | |
2110 | static rtx |
2111 | gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, |
2112 | unsigned int regno) |
2113 | { |
2114 | rtx tmp; |
2115 | |
2116 | if (orig_mode != BLKmode) |
2117 | tmp = gen_rtx_REG (orig_mode, regno); |
2118 | else |
2119 | { |
2120 | tmp = gen_rtx_REG (mode, regno); |
2121 | tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); |
2122 | tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); |
2123 | } |
2124 | |
2125 | return tmp; |
2126 | } |
2127 | |
2128 | /* x86-64 register passing implementation. See x86-64 ABI for details. Goal |
2129 | of this code is to classify each 8bytes of incoming argument by the register |
2130 | class and assign registers accordingly. */ |
2131 | |
2132 | /* Return the union class of CLASS1 and CLASS2. |
2133 | See the x86-64 PS ABI for details. */ |
2134 | |
2135 | static enum x86_64_reg_class |
2136 | merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) |
2137 | { |
2138 | /* Rule #1: If both classes are equal, this is the resulting class. */ |
2139 | if (class1 == class2) |
2140 | return class1; |
2141 | |
2142 | /* Rule #2: If one of the classes is NO_CLASS, the resulting class is |
2143 | the other class. */ |
2144 | if (class1 == X86_64_NO_CLASS) |
2145 | return class2; |
2146 | if (class2 == X86_64_NO_CLASS) |
2147 | return class1; |
2148 | |
2149 | /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ |
2150 | if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) |
2151 | return X86_64_MEMORY_CLASS; |
2152 | |
2153 | /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ |
2154 | if ((class1 == X86_64_INTEGERSI_CLASS |
2155 | && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS)) |
2156 | || (class2 == X86_64_INTEGERSI_CLASS |
2157 | && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS))) |
2158 | return X86_64_INTEGERSI_CLASS; |
2159 | if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS |
2160 | || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) |
2161 | return X86_64_INTEGER_CLASS; |
2162 | |
2163 | /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, |
2164 | MEMORY is used. */ |
2165 | if (class1 == X86_64_X87_CLASS |
2166 | || class1 == X86_64_X87UP_CLASS |
2167 | || class1 == X86_64_COMPLEX_X87_CLASS |
2168 | || class2 == X86_64_X87_CLASS |
2169 | || class2 == X86_64_X87UP_CLASS |
2170 | || class2 == X86_64_COMPLEX_X87_CLASS) |
2171 | return X86_64_MEMORY_CLASS; |
2172 | |
2173 | /* Rule #6: Otherwise class SSE is used. */ |
2174 | return X86_64_SSE_CLASS; |
2175 | } |
2176 | |
2177 | /* Classify the argument of type TYPE and mode MODE. |
2178 | CLASSES will be filled by the register class used to pass each word |
2179 | of the operand. The number of words is returned. In case the parameter |
2180 | should be passed in memory, 0 is returned. As a special case for zero |
2181 | sized containers, classes[0] will be NO_CLASS and 1 is returned. |
2182 | |
2183 | BIT_OFFSET is used internally for handling records and specifies offset |
2184 | of the offset in bits modulo 512 to avoid overflow cases. |
2185 | |
2186 | See the x86-64 PS ABI for details. |
2187 | */ |
2188 | |
2189 | static int |
2190 | classify_argument (machine_mode mode, const_tree type, |
2191 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset, |
2192 | int &zero_width_bitfields) |
2193 | { |
2194 | HOST_WIDE_INT bytes |
2195 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
2196 | int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); |
2197 | |
2198 | /* Variable sized entities are always passed/returned in memory. */ |
2199 | if (bytes < 0) |
2200 | return 0; |
2201 | |
2202 | if (mode != VOIDmode) |
2203 | { |
2204 | /* The value of "named" doesn't matter. */ |
2205 | function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true); |
2206 | if (targetm.calls.must_pass_in_stack (arg)) |
2207 | return 0; |
2208 | } |
2209 | |
2210 | if (type && (AGGREGATE_TYPE_P (type) |
2211 | || (TREE_CODE (type) == BITINT_TYPE && words > 1))) |
2212 | { |
2213 | int i; |
2214 | tree field; |
2215 | enum x86_64_reg_class subclasses[MAX_CLASSES]; |
2216 | |
2217 | /* On x86-64 we pass structures larger than 64 bytes on the stack. */ |
2218 | if (bytes > 64) |
2219 | return 0; |
2220 | |
2221 | for (i = 0; i < words; i++) |
2222 | classes[i] = X86_64_NO_CLASS; |
2223 | |
2224 | /* Zero sized arrays or structures are NO_CLASS. We return 0 to |
2225 | signalize memory class, so handle it as special case. */ |
2226 | if (!words) |
2227 | { |
2228 | classes[0] = X86_64_NO_CLASS; |
2229 | return 1; |
2230 | } |
2231 | |
2232 | /* Classify each field of record and merge classes. */ |
2233 | switch (TREE_CODE (type)) |
2234 | { |
2235 | case RECORD_TYPE: |
2236 | /* And now merge the fields of structure. */ |
2237 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
2238 | { |
2239 | if (TREE_CODE (field) == FIELD_DECL) |
2240 | { |
2241 | int num; |
2242 | |
2243 | if (TREE_TYPE (field) == error_mark_node) |
2244 | continue; |
2245 | |
2246 | /* Bitfields are always classified as integer. Handle them |
2247 | early, since later code would consider them to be |
2248 | misaligned integers. */ |
2249 | if (DECL_BIT_FIELD (field)) |
2250 | { |
2251 | if (integer_zerop (DECL_SIZE (field))) |
2252 | { |
2253 | if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field)) |
2254 | continue; |
2255 | if (zero_width_bitfields != 2) |
2256 | { |
2257 | zero_width_bitfields = 1; |
2258 | continue; |
2259 | } |
2260 | } |
2261 | for (i = (int_bit_position (field) |
2262 | + (bit_offset % 64)) / 8 / 8; |
2263 | i < ((int_bit_position (field) + (bit_offset % 64)) |
2264 | + tree_to_shwi (DECL_SIZE (field)) |
2265 | + 63) / 8 / 8; i++) |
2266 | classes[i] |
2267 | = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]); |
2268 | } |
2269 | else |
2270 | { |
2271 | int pos; |
2272 | |
2273 | type = TREE_TYPE (field); |
2274 | |
2275 | /* Flexible array member is ignored. */ |
2276 | if (TYPE_MODE (type) == BLKmode |
2277 | && TREE_CODE (type) == ARRAY_TYPE |
2278 | && TYPE_SIZE (type) == NULL_TREE |
2279 | && TYPE_DOMAIN (type) != NULL_TREE |
2280 | && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) |
2281 | == NULL_TREE)) |
2282 | { |
2283 | static bool warned; |
2284 | |
2285 | if (!warned && warn_psabi) |
2286 | { |
2287 | warned = true; |
2288 | inform (input_location, |
2289 | "the ABI of passing struct with" |
2290 | " a flexible array member has" |
2291 | " changed in GCC 4.4"); |
2292 | } |
2293 | continue; |
2294 | } |
2295 | num = classify_argument (TYPE_MODE (type), type, |
2296 | classes: subclasses, |
2297 | bit_offset: (int_bit_position (field) |
2298 | + bit_offset) % 512, |
2299 | zero_width_bitfields); |
2300 | if (!num) |
2301 | return 0; |
2302 | pos = (int_bit_position (field) |
2303 | + (bit_offset % 64)) / 8 / 8; |
2304 | for (i = 0; i < num && (i + pos) < words; i++) |
2305 | classes[i + pos] |
2306 | = merge_classes (class1: subclasses[i], class2: classes[i + pos]); |
2307 | } |
2308 | } |
2309 | } |
2310 | break; |
2311 | |
2312 | case ARRAY_TYPE: |
2313 | /* Arrays are handled as small records. */ |
2314 | { |
2315 | int num; |
2316 | num = classify_argument (TYPE_MODE (TREE_TYPE (type)), |
2317 | TREE_TYPE (type), classes: subclasses, bit_offset, |
2318 | zero_width_bitfields); |
2319 | if (!num) |
2320 | return 0; |
2321 | |
2322 | /* The partial classes are now full classes. */ |
2323 | if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) |
2324 | subclasses[0] = X86_64_SSE_CLASS; |
2325 | if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2) |
2326 | subclasses[0] = X86_64_SSE_CLASS; |
2327 | if (subclasses[0] == X86_64_INTEGERSI_CLASS |
2328 | && !((bit_offset % 64) == 0 && bytes == 4)) |
2329 | subclasses[0] = X86_64_INTEGER_CLASS; |
2330 | |
2331 | for (i = 0; i < words; i++) |
2332 | classes[i] = subclasses[i % num]; |
2333 | |
2334 | break; |
2335 | } |
2336 | case UNION_TYPE: |
2337 | case QUAL_UNION_TYPE: |
2338 | /* Unions are similar to RECORD_TYPE but offset is always 0. |
2339 | */ |
2340 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
2341 | { |
2342 | if (TREE_CODE (field) == FIELD_DECL) |
2343 | { |
2344 | int num; |
2345 | |
2346 | if (TREE_TYPE (field) == error_mark_node) |
2347 | continue; |
2348 | |
2349 | num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
2350 | TREE_TYPE (field), classes: subclasses, |
2351 | bit_offset, zero_width_bitfields); |
2352 | if (!num) |
2353 | return 0; |
2354 | for (i = 0; i < num && i < words; i++) |
2355 | classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]); |
2356 | } |
2357 | } |
2358 | break; |
2359 | |
2360 | case BITINT_TYPE: |
2361 | /* _BitInt(N) for N > 64 is passed as structure containing |
2362 | (N + 63) / 64 64-bit elements. */ |
2363 | if (words > 2) |
2364 | return 0; |
2365 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2366 | return 2; |
2367 | |
2368 | default: |
2369 | gcc_unreachable (); |
2370 | } |
2371 | |
2372 | if (words > 2) |
2373 | { |
2374 | /* When size > 16 bytes, if the first one isn't |
2375 | X86_64_SSE_CLASS or any other ones aren't |
2376 | X86_64_SSEUP_CLASS, everything should be passed in |
2377 | memory. */ |
2378 | if (classes[0] != X86_64_SSE_CLASS) |
2379 | return 0; |
2380 | |
2381 | for (i = 1; i < words; i++) |
2382 | if (classes[i] != X86_64_SSEUP_CLASS) |
2383 | return 0; |
2384 | } |
2385 | |
2386 | /* Final merger cleanup. */ |
2387 | for (i = 0; i < words; i++) |
2388 | { |
2389 | /* If one class is MEMORY, everything should be passed in |
2390 | memory. */ |
2391 | if (classes[i] == X86_64_MEMORY_CLASS) |
2392 | return 0; |
2393 | |
2394 | /* The X86_64_SSEUP_CLASS should be always preceded by |
2395 | X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ |
2396 | if (classes[i] == X86_64_SSEUP_CLASS |
2397 | && classes[i - 1] != X86_64_SSE_CLASS |
2398 | && classes[i - 1] != X86_64_SSEUP_CLASS) |
2399 | { |
2400 | /* The first one should never be X86_64_SSEUP_CLASS. */ |
2401 | gcc_assert (i != 0); |
2402 | classes[i] = X86_64_SSE_CLASS; |
2403 | } |
2404 | |
2405 | /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, |
2406 | everything should be passed in memory. */ |
2407 | if (classes[i] == X86_64_X87UP_CLASS |
2408 | && (classes[i - 1] != X86_64_X87_CLASS)) |
2409 | { |
2410 | static bool warned; |
2411 | |
2412 | /* The first one should never be X86_64_X87UP_CLASS. */ |
2413 | gcc_assert (i != 0); |
2414 | if (!warned && warn_psabi) |
2415 | { |
2416 | warned = true; |
2417 | inform (input_location, |
2418 | "the ABI of passing union with %<long double%>" |
2419 | " has changed in GCC 4.4"); |
2420 | } |
2421 | return 0; |
2422 | } |
2423 | } |
2424 | return words; |
2425 | } |
2426 | |
2427 | /* Compute alignment needed. We align all types to natural boundaries with |
2428 | exception of XFmode that is aligned to 64bits. */ |
2429 | if (mode != VOIDmode && mode != BLKmode) |
2430 | { |
2431 | int mode_alignment = GET_MODE_BITSIZE (mode); |
2432 | |
2433 | if (mode == XFmode) |
2434 | mode_alignment = 128; |
2435 | else if (mode == XCmode) |
2436 | mode_alignment = 256; |
2437 | if (COMPLEX_MODE_P (mode)) |
2438 | mode_alignment /= 2; |
2439 | /* Misaligned fields are always returned in memory. */ |
2440 | if (bit_offset % mode_alignment) |
2441 | return 0; |
2442 | } |
2443 | |
2444 | /* for V1xx modes, just use the base mode */ |
2445 | if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode |
2446 | && GET_MODE_UNIT_SIZE (mode) == bytes) |
2447 | mode = GET_MODE_INNER (mode); |
2448 | |
2449 | /* Classification of atomic types. */ |
2450 | switch (mode) |
2451 | { |
2452 | case E_SDmode: |
2453 | case E_DDmode: |
2454 | classes[0] = X86_64_SSE_CLASS; |
2455 | return 1; |
2456 | case E_TDmode: |
2457 | classes[0] = X86_64_SSE_CLASS; |
2458 | classes[1] = X86_64_SSEUP_CLASS; |
2459 | return 2; |
2460 | case E_DImode: |
2461 | case E_SImode: |
2462 | case E_HImode: |
2463 | case E_QImode: |
2464 | case E_CSImode: |
2465 | case E_CHImode: |
2466 | case E_CQImode: |
2467 | { |
2468 | int size = bit_offset + (int) GET_MODE_BITSIZE (mode); |
2469 | |
2470 | /* Analyze last 128 bits only. */ |
2471 | size = (size - 1) & 0x7f; |
2472 | |
2473 | if (size < 32) |
2474 | { |
2475 | classes[0] = X86_64_INTEGERSI_CLASS; |
2476 | return 1; |
2477 | } |
2478 | else if (size < 64) |
2479 | { |
2480 | classes[0] = X86_64_INTEGER_CLASS; |
2481 | return 1; |
2482 | } |
2483 | else if (size < 64+32) |
2484 | { |
2485 | classes[0] = X86_64_INTEGER_CLASS; |
2486 | classes[1] = X86_64_INTEGERSI_CLASS; |
2487 | return 2; |
2488 | } |
2489 | else if (size < 64+64) |
2490 | { |
2491 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2492 | return 2; |
2493 | } |
2494 | else |
2495 | gcc_unreachable (); |
2496 | } |
2497 | case E_CDImode: |
2498 | case E_TImode: |
2499 | classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
2500 | return 2; |
2501 | case E_COImode: |
2502 | case E_OImode: |
2503 | /* OImode shouldn't be used directly. */ |
2504 | gcc_unreachable (); |
2505 | case E_CTImode: |
2506 | return 0; |
2507 | case E_HFmode: |
2508 | case E_BFmode: |
2509 | if (!(bit_offset % 64)) |
2510 | classes[0] = X86_64_SSEHF_CLASS; |
2511 | else |
2512 | classes[0] = X86_64_SSE_CLASS; |
2513 | return 1; |
2514 | case E_SFmode: |
2515 | if (!(bit_offset % 64)) |
2516 | classes[0] = X86_64_SSESF_CLASS; |
2517 | else |
2518 | classes[0] = X86_64_SSE_CLASS; |
2519 | return 1; |
2520 | case E_DFmode: |
2521 | classes[0] = X86_64_SSEDF_CLASS; |
2522 | return 1; |
2523 | case E_XFmode: |
2524 | classes[0] = X86_64_X87_CLASS; |
2525 | classes[1] = X86_64_X87UP_CLASS; |
2526 | return 2; |
2527 | case E_TFmode: |
2528 | classes[0] = X86_64_SSE_CLASS; |
2529 | classes[1] = X86_64_SSEUP_CLASS; |
2530 | return 2; |
2531 | case E_HCmode: |
2532 | case E_BCmode: |
2533 | classes[0] = X86_64_SSE_CLASS; |
2534 | if (!(bit_offset % 64)) |
2535 | return 1; |
2536 | else |
2537 | { |
2538 | classes[1] = X86_64_SSEHF_CLASS; |
2539 | return 2; |
2540 | } |
2541 | case E_SCmode: |
2542 | classes[0] = X86_64_SSE_CLASS; |
2543 | if (!(bit_offset % 64)) |
2544 | return 1; |
2545 | else |
2546 | { |
2547 | static bool warned; |
2548 | |
2549 | if (!warned && warn_psabi) |
2550 | { |
2551 | warned = true; |
2552 | inform (input_location, |
2553 | "the ABI of passing structure with %<complex float%>" |
2554 | " member has changed in GCC 4.4"); |
2555 | } |
2556 | classes[1] = X86_64_SSESF_CLASS; |
2557 | return 2; |
2558 | } |
2559 | case E_DCmode: |
2560 | classes[0] = X86_64_SSEDF_CLASS; |
2561 | classes[1] = X86_64_SSEDF_CLASS; |
2562 | return 2; |
2563 | case E_XCmode: |
2564 | classes[0] = X86_64_COMPLEX_X87_CLASS; |
2565 | return 1; |
2566 | case E_TCmode: |
2567 | /* This modes is larger than 16 bytes. */ |
2568 | return 0; |
2569 | case E_V8SFmode: |
2570 | case E_V8SImode: |
2571 | case E_V32QImode: |
2572 | case E_V16HFmode: |
2573 | case E_V16BFmode: |
2574 | case E_V16HImode: |
2575 | case E_V4DFmode: |
2576 | case E_V4DImode: |
2577 | classes[0] = X86_64_SSE_CLASS; |
2578 | classes[1] = X86_64_SSEUP_CLASS; |
2579 | classes[2] = X86_64_SSEUP_CLASS; |
2580 | classes[3] = X86_64_SSEUP_CLASS; |
2581 | return 4; |
2582 | case E_V8DFmode: |
2583 | case E_V16SFmode: |
2584 | case E_V32HFmode: |
2585 | case E_V32BFmode: |
2586 | case E_V8DImode: |
2587 | case E_V16SImode: |
2588 | case E_V32HImode: |
2589 | case E_V64QImode: |
2590 | classes[0] = X86_64_SSE_CLASS; |
2591 | classes[1] = X86_64_SSEUP_CLASS; |
2592 | classes[2] = X86_64_SSEUP_CLASS; |
2593 | classes[3] = X86_64_SSEUP_CLASS; |
2594 | classes[4] = X86_64_SSEUP_CLASS; |
2595 | classes[5] = X86_64_SSEUP_CLASS; |
2596 | classes[6] = X86_64_SSEUP_CLASS; |
2597 | classes[7] = X86_64_SSEUP_CLASS; |
2598 | return 8; |
2599 | case E_V4SFmode: |
2600 | case E_V4SImode: |
2601 | case E_V16QImode: |
2602 | case E_V8HImode: |
2603 | case E_V8HFmode: |
2604 | case E_V8BFmode: |
2605 | case E_V2DFmode: |
2606 | case E_V2DImode: |
2607 | classes[0] = X86_64_SSE_CLASS; |
2608 | classes[1] = X86_64_SSEUP_CLASS; |
2609 | return 2; |
2610 | case E_V1TImode: |
2611 | case E_V1DImode: |
2612 | case E_V2SFmode: |
2613 | case E_V2SImode: |
2614 | case E_V4HImode: |
2615 | case E_V4HFmode: |
2616 | case E_V4BFmode: |
2617 | case E_V2HFmode: |
2618 | case E_V2BFmode: |
2619 | case E_V8QImode: |
2620 | classes[0] = X86_64_SSE_CLASS; |
2621 | return 1; |
2622 | case E_BLKmode: |
2623 | case E_VOIDmode: |
2624 | return 0; |
2625 | default: |
2626 | gcc_assert (VECTOR_MODE_P (mode)); |
2627 | |
2628 | if (bytes > 16) |
2629 | return 0; |
2630 | |
2631 | gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); |
2632 | |
2633 | if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
2634 | classes[0] = X86_64_INTEGERSI_CLASS; |
2635 | else |
2636 | classes[0] = X86_64_INTEGER_CLASS; |
2637 | classes[1] = X86_64_INTEGER_CLASS; |
2638 | return 1 + (bytes > 8); |
2639 | } |
2640 | } |
2641 | |
2642 | /* Wrapper around classify_argument with the extra zero_width_bitfields |
2643 | argument, to diagnose GCC 12.1 ABI differences for C. */ |
2644 | |
2645 | static int |
2646 | classify_argument (machine_mode mode, const_tree type, |
2647 | enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) |
2648 | { |
2649 | int zero_width_bitfields = 0; |
2650 | static bool warned = false; |
2651 | int n = classify_argument (mode, type, classes, bit_offset, |
2652 | zero_width_bitfields); |
2653 | if (!zero_width_bitfields || warned || !warn_psabi) |
2654 | return n; |
2655 | enum x86_64_reg_class alt_classes[MAX_CLASSES]; |
2656 | zero_width_bitfields = 2; |
2657 | if (classify_argument (mode, type, classes: alt_classes, bit_offset, |
2658 | zero_width_bitfields) != n) |
2659 | zero_width_bitfields = 3; |
2660 | else |
2661 | for (int i = 0; i < n; i++) |
2662 | if (classes[i] != alt_classes[i]) |
2663 | { |
2664 | zero_width_bitfields = 3; |
2665 | break; |
2666 | } |
2667 | if (zero_width_bitfields == 3) |
2668 | { |
2669 | warned = true; |
2670 | const char *url |
2671 | = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields"; |
2672 | |
2673 | inform (input_location, |
2674 | "the ABI of passing C structures with zero-width bit-fields" |
2675 | " has changed in GCC %{12.1%}", url); |
2676 | } |
2677 | return n; |
2678 | } |
2679 | |
2680 | /* Examine the argument and return set number of register required in each |
2681 | class. Return true iff parameter should be passed in memory. */ |
2682 | |
2683 | static bool |
2684 | examine_argument (machine_mode mode, const_tree type, int in_return, |
2685 | int *int_nregs, int *sse_nregs) |
2686 | { |
2687 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
2688 | int n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
2689 | |
2690 | *int_nregs = 0; |
2691 | *sse_nregs = 0; |
2692 | |
2693 | if (!n) |
2694 | return true; |
2695 | for (n--; n >= 0; n--) |
2696 | switch (regclass[n]) |
2697 | { |
2698 | case X86_64_INTEGER_CLASS: |
2699 | case X86_64_INTEGERSI_CLASS: |
2700 | (*int_nregs)++; |
2701 | break; |
2702 | case X86_64_SSE_CLASS: |
2703 | case X86_64_SSEHF_CLASS: |
2704 | case X86_64_SSESF_CLASS: |
2705 | case X86_64_SSEDF_CLASS: |
2706 | (*sse_nregs)++; |
2707 | break; |
2708 | case X86_64_NO_CLASS: |
2709 | case X86_64_SSEUP_CLASS: |
2710 | break; |
2711 | case X86_64_X87_CLASS: |
2712 | case X86_64_X87UP_CLASS: |
2713 | case X86_64_COMPLEX_X87_CLASS: |
2714 | if (!in_return) |
2715 | return true; |
2716 | break; |
2717 | case X86_64_MEMORY_CLASS: |
2718 | gcc_unreachable (); |
2719 | } |
2720 | |
2721 | return false; |
2722 | } |
2723 | |
2724 | /* Construct container for the argument used by GCC interface. See |
2725 | FUNCTION_ARG for the detailed description. */ |
2726 | |
2727 | static rtx |
2728 | construct_container (machine_mode mode, machine_mode orig_mode, |
2729 | const_tree type, int in_return, int nintregs, int nsseregs, |
2730 | const int *intreg, int sse_regno) |
2731 | { |
2732 | /* The following variables hold the static issued_error state. */ |
2733 | static bool issued_sse_arg_error; |
2734 | static bool issued_sse_ret_error; |
2735 | static bool issued_x87_ret_error; |
2736 | |
2737 | machine_mode tmpmode; |
2738 | int bytes |
2739 | = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
2740 | enum x86_64_reg_class regclass[MAX_CLASSES]; |
2741 | int n; |
2742 | int i; |
2743 | int nexps = 0; |
2744 | int needed_sseregs, needed_intregs; |
2745 | rtx exp[MAX_CLASSES]; |
2746 | rtx ret; |
2747 | |
2748 | n = classify_argument (mode, type, classes: regclass, bit_offset: 0); |
2749 | if (!n) |
2750 | return NULL; |
2751 | if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs, |
2752 | sse_nregs: &needed_sseregs)) |
2753 | return NULL; |
2754 | if (needed_intregs > nintregs || needed_sseregs > nsseregs) |
2755 | return NULL; |
2756 | |
2757 | /* We allowed the user to turn off SSE for kernel mode. Don't crash if |
2758 | some less clueful developer tries to use floating-point anyway. */ |
2759 | if (needed_sseregs |
2760 | && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2))) |
2761 | { |
2762 | /* Return early if we shouldn't raise an error for invalid |
2763 | calls. */ |
2764 | if (cfun != NULL && cfun->machine->silent_p) |
2765 | return NULL; |
2766 | if (in_return) |
2767 | { |
2768 | if (!issued_sse_ret_error) |
2769 | { |
2770 | if (VALID_SSE2_TYPE_MODE (mode)) |
2771 | error ("SSE register return with SSE2 disabled"); |
2772 | else |
2773 | error ("SSE register return with SSE disabled"); |
2774 | issued_sse_ret_error = true; |
2775 | } |
2776 | } |
2777 | else if (!issued_sse_arg_error) |
2778 | { |
2779 | if (VALID_SSE2_TYPE_MODE (mode)) |
2780 | error ("SSE register argument with SSE2 disabled"); |
2781 | else |
2782 | error ("SSE register argument with SSE disabled"); |
2783 | issued_sse_arg_error = true; |
2784 | } |
2785 | return NULL; |
2786 | } |
2787 | |
2788 | /* Likewise, error if the ABI requires us to return values in the |
2789 | x87 registers and the user specified -mno-80387. */ |
2790 | if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) |
2791 | for (i = 0; i < n; i++) |
2792 | if (regclass[i] == X86_64_X87_CLASS |
2793 | || regclass[i] == X86_64_X87UP_CLASS |
2794 | || regclass[i] == X86_64_COMPLEX_X87_CLASS) |
2795 | { |
2796 | /* Return early if we shouldn't raise an error for invalid |
2797 | calls. */ |
2798 | if (cfun != NULL && cfun->machine->silent_p) |
2799 | return NULL; |
2800 | if (!issued_x87_ret_error) |
2801 | { |
2802 | error ("x87 register return with x87 disabled"); |
2803 | issued_x87_ret_error = true; |
2804 | } |
2805 | return NULL; |
2806 | } |
2807 | |
2808 | /* First construct simple cases. Avoid SCmode, since we want to use |
2809 | single register to pass this type. */ |
2810 | if (n == 1 && mode != SCmode && mode != HCmode) |
2811 | switch (regclass[0]) |
2812 | { |
2813 | case X86_64_INTEGER_CLASS: |
2814 | case X86_64_INTEGERSI_CLASS: |
2815 | return gen_rtx_REG (mode, intreg[0]); |
2816 | case X86_64_SSE_CLASS: |
2817 | case X86_64_SSEHF_CLASS: |
2818 | case X86_64_SSESF_CLASS: |
2819 | case X86_64_SSEDF_CLASS: |
2820 | if (mode != BLKmode) |
2821 | return gen_reg_or_parallel (mode, orig_mode, |
2822 | GET_SSE_REGNO (sse_regno)); |
2823 | break; |
2824 | case X86_64_X87_CLASS: |
2825 | case X86_64_COMPLEX_X87_CLASS: |
2826 | return gen_rtx_REG (mode, FIRST_STACK_REG); |
2827 | case X86_64_NO_CLASS: |
2828 | /* Zero sized array, struct or class. */ |
2829 | return NULL; |
2830 | default: |
2831 | gcc_unreachable (); |
2832 | } |
2833 | if (n == 2 |
2834 | && regclass[0] == X86_64_SSE_CLASS |
2835 | && regclass[1] == X86_64_SSEUP_CLASS |
2836 | && mode != BLKmode) |
2837 | return gen_reg_or_parallel (mode, orig_mode, |
2838 | GET_SSE_REGNO (sse_regno)); |
2839 | if (n == 4 |
2840 | && regclass[0] == X86_64_SSE_CLASS |
2841 | && regclass[1] == X86_64_SSEUP_CLASS |
2842 | && regclass[2] == X86_64_SSEUP_CLASS |
2843 | && regclass[3] == X86_64_SSEUP_CLASS |
2844 | && mode != BLKmode) |
2845 | return gen_reg_or_parallel (mode, orig_mode, |
2846 | GET_SSE_REGNO (sse_regno)); |
2847 | if (n == 8 |
2848 | && regclass[0] == X86_64_SSE_CLASS |
2849 | && regclass[1] == X86_64_SSEUP_CLASS |
2850 | && regclass[2] == X86_64_SSEUP_CLASS |
2851 | && regclass[3] == X86_64_SSEUP_CLASS |
2852 | && regclass[4] == X86_64_SSEUP_CLASS |
2853 | && regclass[5] == X86_64_SSEUP_CLASS |
2854 | && regclass[6] == X86_64_SSEUP_CLASS |
2855 | && regclass[7] == X86_64_SSEUP_CLASS |
2856 | && mode != BLKmode) |
2857 | return gen_reg_or_parallel (mode, orig_mode, |
2858 | GET_SSE_REGNO (sse_regno)); |
2859 | if (n == 2 |
2860 | && regclass[0] == X86_64_X87_CLASS |
2861 | && regclass[1] == X86_64_X87UP_CLASS) |
2862 | return gen_rtx_REG (XFmode, FIRST_STACK_REG); |
2863 | |
2864 | if (n == 2 |
2865 | && regclass[0] == X86_64_INTEGER_CLASS |
2866 | && regclass[1] == X86_64_INTEGER_CLASS |
2867 | && (mode == CDImode || mode == TImode || mode == BLKmode) |
2868 | && intreg[0] + 1 == intreg[1]) |
2869 | { |
2870 | if (mode == BLKmode) |
2871 | { |
2872 | /* Use TImode for BLKmode values in 2 integer registers. */ |
2873 | exp[0] = gen_rtx_EXPR_LIST (VOIDmode, |
2874 | gen_rtx_REG (TImode, intreg[0]), |
2875 | GEN_INT (0)); |
2876 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); |
2877 | XVECEXP (ret, 0, 0) = exp[0]; |
2878 | return ret; |
2879 | } |
2880 | else |
2881 | return gen_rtx_REG (mode, intreg[0]); |
2882 | } |
2883 | |
2884 | /* Otherwise figure out the entries of the PARALLEL. */ |
2885 | for (i = 0; i < n; i++) |
2886 | { |
2887 | int pos; |
2888 | |
2889 | switch (regclass[i]) |
2890 | { |
2891 | case X86_64_NO_CLASS: |
2892 | break; |
2893 | case X86_64_INTEGER_CLASS: |
2894 | case X86_64_INTEGERSI_CLASS: |
2895 | /* Merge TImodes on aligned occasions here too. */ |
2896 | if (i * 8 + 8 > bytes) |
2897 | { |
2898 | unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; |
2899 | if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode)) |
2900 | /* We've requested 24 bytes we |
2901 | don't have mode for. Use DImode. */ |
2902 | tmpmode = DImode; |
2903 | } |
2904 | else if (regclass[i] == X86_64_INTEGERSI_CLASS) |
2905 | tmpmode = SImode; |
2906 | else |
2907 | tmpmode = DImode; |
2908 | exp [nexps++] |
2909 | = gen_rtx_EXPR_LIST (VOIDmode, |
2910 | gen_rtx_REG (tmpmode, *intreg), |
2911 | GEN_INT (i*8)); |
2912 | intreg++; |
2913 | break; |
2914 | case X86_64_SSEHF_CLASS: |
2915 | tmpmode = (mode == BFmode ? BFmode : HFmode); |
2916 | exp [nexps++] |
2917 | = gen_rtx_EXPR_LIST (VOIDmode, |
2918 | gen_rtx_REG (tmpmode, |
2919 | GET_SSE_REGNO (sse_regno)), |
2920 | GEN_INT (i*8)); |
2921 | sse_regno++; |
2922 | break; |
2923 | case X86_64_SSESF_CLASS: |
2924 | exp [nexps++] |
2925 | = gen_rtx_EXPR_LIST (VOIDmode, |
2926 | gen_rtx_REG (SFmode, |
2927 | GET_SSE_REGNO (sse_regno)), |
2928 | GEN_INT (i*8)); |
2929 | sse_regno++; |
2930 | break; |
2931 | case X86_64_SSEDF_CLASS: |
2932 | exp [nexps++] |
2933 | = gen_rtx_EXPR_LIST (VOIDmode, |
2934 | gen_rtx_REG (DFmode, |
2935 | GET_SSE_REGNO (sse_regno)), |
2936 | GEN_INT (i*8)); |
2937 | sse_regno++; |
2938 | break; |
2939 | case X86_64_SSE_CLASS: |
2940 | pos = i; |
2941 | switch (n) |
2942 | { |
2943 | case 1: |
2944 | tmpmode = DImode; |
2945 | break; |
2946 | case 2: |
2947 | if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) |
2948 | { |
2949 | tmpmode = TImode; |
2950 | i++; |
2951 | } |
2952 | else |
2953 | tmpmode = DImode; |
2954 | break; |
2955 | case 4: |
2956 | gcc_assert (i == 0 |
2957 | && regclass[1] == X86_64_SSEUP_CLASS |
2958 | && regclass[2] == X86_64_SSEUP_CLASS |
2959 | && regclass[3] == X86_64_SSEUP_CLASS); |
2960 | tmpmode = OImode; |
2961 | i += 3; |
2962 | break; |
2963 | case 8: |
2964 | gcc_assert (i == 0 |
2965 | && regclass[1] == X86_64_SSEUP_CLASS |
2966 | && regclass[2] == X86_64_SSEUP_CLASS |
2967 | && regclass[3] == X86_64_SSEUP_CLASS |
2968 | && regclass[4] == X86_64_SSEUP_CLASS |
2969 | && regclass[5] == X86_64_SSEUP_CLASS |
2970 | && regclass[6] == X86_64_SSEUP_CLASS |
2971 | && regclass[7] == X86_64_SSEUP_CLASS); |
2972 | tmpmode = XImode; |
2973 | i += 7; |
2974 | break; |
2975 | default: |
2976 | gcc_unreachable (); |
2977 | } |
2978 | exp [nexps++] |
2979 | = gen_rtx_EXPR_LIST (VOIDmode, |
2980 | gen_rtx_REG (tmpmode, |
2981 | GET_SSE_REGNO (sse_regno)), |
2982 | GEN_INT (pos*8)); |
2983 | sse_regno++; |
2984 | break; |
2985 | default: |
2986 | gcc_unreachable (); |
2987 | } |
2988 | } |
2989 | |
2990 | /* Empty aligned struct, union or class. */ |
2991 | if (nexps == 0) |
2992 | return NULL; |
2993 | |
2994 | ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); |
2995 | for (i = 0; i < nexps; i++) |
2996 | XVECEXP (ret, 0, i) = exp [i]; |
2997 | return ret; |
2998 | } |
2999 | |
3000 | /* Update the data in CUM to advance over an argument of mode MODE |
3001 | and data type TYPE. (TYPE is null for libcalls where that information |
3002 | may not be available.) |
3003 | |
3004 | Return a number of integer regsiters advanced over. */ |
3005 | |
3006 | static int |
3007 | function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
3008 | const_tree type, HOST_WIDE_INT bytes, |
3009 | HOST_WIDE_INT words) |
3010 | { |
3011 | int res = 0; |
3012 | bool error_p = false; |
3013 | |
3014 | if (TARGET_IAMCU) |
3015 | { |
3016 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
3017 | bytes in registers. */ |
3018 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
3019 | goto pass_in_reg; |
3020 | return res; |
3021 | } |
3022 | |
3023 | switch (mode) |
3024 | { |
3025 | default: |
3026 | break; |
3027 | |
3028 | case E_BLKmode: |
3029 | if (bytes < 0) |
3030 | break; |
3031 | /* FALLTHRU */ |
3032 | |
3033 | case E_DImode: |
3034 | case E_SImode: |
3035 | case E_HImode: |
3036 | case E_QImode: |
3037 | pass_in_reg: |
3038 | cum->words += words; |
3039 | cum->nregs -= words; |
3040 | cum->regno += words; |
3041 | if (cum->nregs >= 0) |
3042 | res = words; |
3043 | if (cum->nregs <= 0) |
3044 | { |
3045 | cum->nregs = 0; |
3046 | cfun->machine->arg_reg_available = false; |
3047 | cum->regno = 0; |
3048 | } |
3049 | break; |
3050 | |
3051 | case E_OImode: |
3052 | /* OImode shouldn't be used directly. */ |
3053 | gcc_unreachable (); |
3054 | |
3055 | case E_DFmode: |
3056 | if (cum->float_in_sse == -1) |
3057 | error_p = true; |
3058 | if (cum->float_in_sse < 2) |
3059 | break; |
3060 | /* FALLTHRU */ |
3061 | case E_SFmode: |
3062 | if (cum->float_in_sse == -1) |
3063 | error_p = true; |
3064 | if (cum->float_in_sse < 1) |
3065 | break; |
3066 | /* FALLTHRU */ |
3067 | |
3068 | case E_V16HFmode: |
3069 | case E_V16BFmode: |
3070 | case E_V8SFmode: |
3071 | case E_V8SImode: |
3072 | case E_V64QImode: |
3073 | case E_V32HImode: |
3074 | case E_V16SImode: |
3075 | case E_V8DImode: |
3076 | case E_V32HFmode: |
3077 | case E_V32BFmode: |
3078 | case E_V16SFmode: |
3079 | case E_V8DFmode: |
3080 | case E_V32QImode: |
3081 | case E_V16HImode: |
3082 | case E_V4DFmode: |
3083 | case E_V4DImode: |
3084 | case E_TImode: |
3085 | case E_V16QImode: |
3086 | case E_V8HImode: |
3087 | case E_V4SImode: |
3088 | case E_V2DImode: |
3089 | case E_V8HFmode: |
3090 | case E_V8BFmode: |
3091 | case E_V4SFmode: |
3092 | case E_V2DFmode: |
3093 | if (!type || !AGGREGATE_TYPE_P (type)) |
3094 | { |
3095 | cum->sse_words += words; |
3096 | cum->sse_nregs -= 1; |
3097 | cum->sse_regno += 1; |
3098 | if (cum->sse_nregs <= 0) |
3099 | { |
3100 | cum->sse_nregs = 0; |
3101 | cum->sse_regno = 0; |
3102 | } |
3103 | } |
3104 | break; |
3105 | |
3106 | case E_V8QImode: |
3107 | case E_V4HImode: |
3108 | case E_V4HFmode: |
3109 | case E_V4BFmode: |
3110 | case E_V2SImode: |
3111 | case E_V2SFmode: |
3112 | case E_V1TImode: |
3113 | case E_V1DImode: |
3114 | if (!type || !AGGREGATE_TYPE_P (type)) |
3115 | { |
3116 | cum->mmx_words += words; |
3117 | cum->mmx_nregs -= 1; |
3118 | cum->mmx_regno += 1; |
3119 | if (cum->mmx_nregs <= 0) |
3120 | { |
3121 | cum->mmx_nregs = 0; |
3122 | cum->mmx_regno = 0; |
3123 | } |
3124 | } |
3125 | break; |
3126 | } |
3127 | if (error_p) |
3128 | { |
3129 | cum->float_in_sse = 0; |
3130 | error ("calling %qD with SSE calling convention without " |
3131 | "SSE/SSE2 enabled", cum->decl); |
3132 | sorry ("this is a GCC bug that can be worked around by adding " |
3133 | "attribute used to function called"); |
3134 | } |
3135 | |
3136 | return res; |
3137 | } |
3138 | |
3139 | static int |
3140 | function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, |
3141 | const_tree type, HOST_WIDE_INT words, bool named) |
3142 | { |
3143 | int int_nregs, sse_nregs; |
3144 | |
3145 | /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ |
3146 | if (!named && (VALID_AVX512F_REG_MODE (mode) |
3147 | || VALID_AVX256_REG_MODE (mode))) |
3148 | return 0; |
3149 | |
3150 | if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs) |
3151 | && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
3152 | { |
3153 | cum->nregs -= int_nregs; |
3154 | cum->sse_nregs -= sse_nregs; |
3155 | cum->regno += int_nregs; |
3156 | cum->sse_regno += sse_nregs; |
3157 | return int_nregs; |
3158 | } |
3159 | else |
3160 | { |
3161 | int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; |
3162 | cum->words = ROUND_UP (cum->words, align); |
3163 | cum->words += words; |
3164 | return 0; |
3165 | } |
3166 | } |
3167 | |
3168 | static int |
3169 | function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, |
3170 | HOST_WIDE_INT words) |
3171 | { |
3172 | /* Otherwise, this should be passed indirect. */ |
3173 | gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); |
3174 | |
3175 | cum->words += words; |
3176 | if (cum->nregs > 0) |
3177 | { |
3178 | cum->nregs -= 1; |
3179 | cum->regno += 1; |
3180 | return 1; |
3181 | } |
3182 | return 0; |
3183 | } |
3184 | |
3185 | /* Update the data in CUM to advance over argument ARG. */ |
3186 | |
3187 | static void |
3188 | ix86_function_arg_advance (cumulative_args_t cum_v, |
3189 | const function_arg_info &arg) |
3190 | { |
3191 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3192 | machine_mode mode = arg.mode; |
3193 | HOST_WIDE_INT bytes, words; |
3194 | int nregs; |
3195 | |
3196 | /* The argument of interrupt handler is a special case and is |
3197 | handled in ix86_function_arg. */ |
3198 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
3199 | return; |
3200 | |
3201 | bytes = arg.promoted_size_in_bytes (); |
3202 | words = CEIL (bytes, UNITS_PER_WORD); |
3203 | |
3204 | if (arg.type) |
3205 | mode = type_natural_mode (type: arg.type, NULL, in_return: false); |
3206 | |
3207 | if (TARGET_64BIT) |
3208 | { |
3209 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3210 | |
3211 | if (call_abi == MS_ABI) |
3212 | nregs = function_arg_advance_ms_64 (cum, bytes, words); |
3213 | else |
3214 | nregs = function_arg_advance_64 (cum, mode, type: arg.type, words, |
3215 | named: arg.named); |
3216 | } |
3217 | else |
3218 | nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words); |
3219 | |
3220 | if (!nregs) |
3221 | { |
3222 | /* Track if there are outgoing arguments on stack. */ |
3223 | if (cum->caller) |
3224 | cfun->machine->outgoing_args_on_stack = true; |
3225 | } |
3226 | } |
3227 | |
3228 | /* Define where to put the arguments to a function. |
3229 | Value is zero to push the argument on the stack, |
3230 | or a hard register in which to store the argument. |
3231 | |
3232 | MODE is the argument's machine mode. |
3233 | TYPE is the data type of the argument (as a tree). |
3234 | This is null for libcalls where that information may |
3235 | not be available. |
3236 | CUM is a variable of type CUMULATIVE_ARGS which gives info about |
3237 | the preceding args and about the function being called. |
3238 | NAMED is nonzero if this argument is a named parameter |
3239 | (otherwise it is an extra parameter matching an ellipsis). */ |
3240 | |
3241 | static rtx |
3242 | function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
3243 | machine_mode orig_mode, const_tree type, |
3244 | HOST_WIDE_INT bytes, HOST_WIDE_INT words) |
3245 | { |
3246 | bool error_p = false; |
3247 | |
3248 | /* Avoid the AL settings for the Unix64 ABI. */ |
3249 | if (mode == VOIDmode) |
3250 | return constm1_rtx; |
3251 | |
3252 | if (TARGET_IAMCU) |
3253 | { |
3254 | /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
3255 | bytes in registers. */ |
3256 | if (!VECTOR_MODE_P (mode) && bytes <= 8) |
3257 | goto pass_in_reg; |
3258 | return NULL_RTX; |
3259 | } |
3260 | |
3261 | switch (mode) |
3262 | { |
3263 | default: |
3264 | break; |
3265 | |
3266 | case E_BLKmode: |
3267 | if (bytes < 0) |
3268 | break; |
3269 | /* FALLTHRU */ |
3270 | case E_DImode: |
3271 | case E_SImode: |
3272 | case E_HImode: |
3273 | case E_QImode: |
3274 | pass_in_reg: |
3275 | if (words <= cum->nregs) |
3276 | { |
3277 | int regno = cum->regno; |
3278 | |
3279 | /* Fastcall allocates the first two DWORD (SImode) or |
3280 | smaller arguments to ECX and EDX if it isn't an |
3281 | aggregate type . */ |
3282 | if (cum->fastcall) |
3283 | { |
3284 | if (mode == BLKmode |
3285 | || mode == DImode |
3286 | || (type && AGGREGATE_TYPE_P (type))) |
3287 | break; |
3288 | |
3289 | /* ECX not EAX is the first allocated register. */ |
3290 | if (regno == AX_REG) |
3291 | regno = CX_REG; |
3292 | } |
3293 | return gen_rtx_REG (mode, regno); |
3294 | } |
3295 | break; |
3296 | |
3297 | case E_DFmode: |
3298 | if (cum->float_in_sse == -1) |
3299 | error_p = true; |
3300 | if (cum->float_in_sse < 2) |
3301 | break; |
3302 | /* FALLTHRU */ |
3303 | case E_SFmode: |
3304 | if (cum->float_in_sse == -1) |
3305 | error_p = true; |
3306 | if (cum->float_in_sse < 1) |
3307 | break; |
3308 | /* FALLTHRU */ |
3309 | case E_TImode: |
3310 | /* In 32bit, we pass TImode in xmm registers. */ |
3311 | case E_V16QImode: |
3312 | case E_V8HImode: |
3313 | case E_V4SImode: |
3314 | case E_V2DImode: |
3315 | case E_V8HFmode: |
3316 | case E_V8BFmode: |
3317 | case E_V4SFmode: |
3318 | case E_V2DFmode: |
3319 | if (!type || !AGGREGATE_TYPE_P (type)) |
3320 | { |
3321 | if (cum->sse_nregs) |
3322 | return gen_reg_or_parallel (mode, orig_mode, |
3323 | regno: cum->sse_regno + FIRST_SSE_REG); |
3324 | } |
3325 | break; |
3326 | |
3327 | case E_OImode: |
3328 | case E_XImode: |
3329 | /* OImode and XImode shouldn't be used directly. */ |
3330 | gcc_unreachable (); |
3331 | |
3332 | case E_V64QImode: |
3333 | case E_V32HImode: |
3334 | case E_V16SImode: |
3335 | case E_V8DImode: |
3336 | case E_V32HFmode: |
3337 | case E_V32BFmode: |
3338 | case E_V16SFmode: |
3339 | case E_V8DFmode: |
3340 | case E_V16HFmode: |
3341 | case E_V16BFmode: |
3342 | case E_V8SFmode: |
3343 | case E_V8SImode: |
3344 | case E_V32QImode: |
3345 | case E_V16HImode: |
3346 | case E_V4DFmode: |
3347 | case E_V4DImode: |
3348 | if (!type || !AGGREGATE_TYPE_P (type)) |
3349 | { |
3350 | if (cum->sse_nregs) |
3351 | return gen_reg_or_parallel (mode, orig_mode, |
3352 | regno: cum->sse_regno + FIRST_SSE_REG); |
3353 | } |
3354 | break; |
3355 | |
3356 | case E_V8QImode: |
3357 | case E_V4HImode: |
3358 | case E_V4HFmode: |
3359 | case E_V4BFmode: |
3360 | case E_V2SImode: |
3361 | case E_V2SFmode: |
3362 | case E_V1TImode: |
3363 | case E_V1DImode: |
3364 | if (!type || !AGGREGATE_TYPE_P (type)) |
3365 | { |
3366 | if (cum->mmx_nregs) |
3367 | return gen_reg_or_parallel (mode, orig_mode, |
3368 | regno: cum->mmx_regno + FIRST_MMX_REG); |
3369 | } |
3370 | break; |
3371 | } |
3372 | if (error_p) |
3373 | { |
3374 | cum->float_in_sse = 0; |
3375 | error ("calling %qD with SSE calling convention without " |
3376 | "SSE/SSE2 enabled", cum->decl); |
3377 | sorry ("this is a GCC bug that can be worked around by adding " |
3378 | "attribute used to function called"); |
3379 | } |
3380 | |
3381 | return NULL_RTX; |
3382 | } |
3383 | |
3384 | static rtx |
3385 | function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
3386 | machine_mode orig_mode, const_tree type, bool named) |
3387 | { |
3388 | /* Handle a hidden AL argument containing number of registers |
3389 | for varargs x86-64 functions. */ |
3390 | if (mode == VOIDmode) |
3391 | return GEN_INT (cum->maybe_vaarg |
3392 | ? (cum->sse_nregs < 0 |
3393 | ? X86_64_SSE_REGPARM_MAX |
3394 | : cum->sse_regno) |
3395 | : -1); |
3396 | |
3397 | switch (mode) |
3398 | { |
3399 | default: |
3400 | break; |
3401 | |
3402 | case E_V16HFmode: |
3403 | case E_V16BFmode: |
3404 | case E_V8SFmode: |
3405 | case E_V8SImode: |
3406 | case E_V32QImode: |
3407 | case E_V16HImode: |
3408 | case E_V4DFmode: |
3409 | case E_V4DImode: |
3410 | case E_V32HFmode: |
3411 | case E_V32BFmode: |
3412 | case E_V16SFmode: |
3413 | case E_V16SImode: |
3414 | case E_V64QImode: |
3415 | case E_V32HImode: |
3416 | case E_V8DFmode: |
3417 | case E_V8DImode: |
3418 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
3419 | if (!named) |
3420 | return NULL; |
3421 | break; |
3422 | } |
3423 | |
3424 | return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs, |
3425 | nsseregs: cum->sse_nregs, |
3426 | intreg: &x86_64_int_parameter_registers [cum->regno], |
3427 | sse_regno: cum->sse_regno); |
3428 | } |
3429 | |
3430 | static rtx |
3431 | function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
3432 | machine_mode orig_mode, bool named, const_tree type, |
3433 | HOST_WIDE_INT bytes) |
3434 | { |
3435 | unsigned int regno; |
3436 | |
3437 | /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. |
3438 | We use value of -2 to specify that current function call is MSABI. */ |
3439 | if (mode == VOIDmode) |
3440 | return GEN_INT (-2); |
3441 | |
3442 | /* If we've run out of registers, it goes on the stack. */ |
3443 | if (cum->nregs == 0) |
3444 | return NULL_RTX; |
3445 | |
3446 | regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; |
3447 | |
3448 | /* Only floating point modes are passed in anything but integer regs. */ |
3449 | if (TARGET_SSE && (mode == SFmode || mode == DFmode)) |
3450 | { |
3451 | if (named) |
3452 | { |
3453 | if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) |
3454 | regno = cum->regno + FIRST_SSE_REG; |
3455 | } |
3456 | else |
3457 | { |
3458 | rtx t1, t2; |
3459 | |
3460 | /* Unnamed floating parameters are passed in both the |
3461 | SSE and integer registers. */ |
3462 | t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); |
3463 | t2 = gen_rtx_REG (mode, regno); |
3464 | t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); |
3465 | t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); |
3466 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); |
3467 | } |
3468 | } |
3469 | /* Handle aggregated types passed in register. */ |
3470 | if (orig_mode == BLKmode) |
3471 | { |
3472 | if (bytes > 0 && bytes <= 8) |
3473 | mode = (bytes > 4 ? DImode : SImode); |
3474 | if (mode == BLKmode) |
3475 | mode = DImode; |
3476 | } |
3477 | |
3478 | return gen_reg_or_parallel (mode, orig_mode, regno); |
3479 | } |
3480 | |
3481 | /* Return where to put the arguments to a function. |
3482 | Return zero to push the argument on the stack, or a hard register in which to store the argument. |
3483 | |
3484 | ARG describes the argument while CUM gives information about the |
3485 | preceding args and about the function being called. */ |
3486 | |
3487 | static rtx |
3488 | ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) |
3489 | { |
3490 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3491 | machine_mode mode = arg.mode; |
3492 | HOST_WIDE_INT bytes, words; |
3493 | rtx reg; |
3494 | |
3495 | if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
3496 | { |
3497 | gcc_assert (arg.type != NULL_TREE); |
3498 | if (POINTER_TYPE_P (arg.type)) |
3499 | { |
3500 | /* This is the pointer argument. */ |
3501 | gcc_assert (TYPE_MODE (arg.type) == ptr_mode); |
3502 | /* It is at -WORD(AP) in the current frame in interrupt and |
3503 | exception handlers. */ |
3504 | reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); |
3505 | } |
3506 | else |
3507 | { |
3508 | gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION |
3509 | && TREE_CODE (arg.type) == INTEGER_TYPE |
3510 | && TYPE_MODE (arg.type) == word_mode); |
3511 | /* The error code is the word-mode integer argument at |
3512 | -2 * WORD(AP) in the current frame of the exception |
3513 | handler. */ |
3514 | reg = gen_rtx_MEM (word_mode, |
3515 | plus_constant (Pmode, |
3516 | arg_pointer_rtx, |
3517 | -2 * UNITS_PER_WORD)); |
3518 | } |
3519 | return reg; |
3520 | } |
3521 | |
3522 | bytes = arg.promoted_size_in_bytes (); |
3523 | words = CEIL (bytes, UNITS_PER_WORD); |
3524 | |
3525 | /* To simplify the code below, represent vector types with a vector mode |
3526 | even if MMX/SSE are not active. */ |
3527 | if (arg.type && VECTOR_TYPE_P (arg.type)) |
3528 | mode = type_natural_mode (type: arg.type, cum, in_return: false); |
3529 | |
3530 | if (TARGET_64BIT) |
3531 | { |
3532 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3533 | |
3534 | if (call_abi == MS_ABI) |
3535 | reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named, |
3536 | type: arg.type, bytes); |
3537 | else |
3538 | reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named); |
3539 | } |
3540 | else |
3541 | reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words); |
3542 | |
3543 | /* Track if there are outgoing arguments on stack. */ |
3544 | if (reg == NULL_RTX && cum->caller) |
3545 | cfun->machine->outgoing_args_on_stack = true; |
3546 | |
3547 | return reg; |
3548 | } |
3549 | |
3550 | /* A C expression that indicates when an argument must be passed by |
3551 | reference. If nonzero for an argument, a copy of that argument is |
3552 | made in memory and a pointer to the argument is passed instead of |
3553 | the argument itself. The pointer is passed in whatever way is |
3554 | appropriate for passing a pointer to that type. */ |
3555 | |
3556 | static bool |
3557 | ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) |
3558 | { |
3559 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
3560 | |
3561 | if (TARGET_64BIT) |
3562 | { |
3563 | enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
3564 | |
3565 | /* See Windows x64 Software Convention. */ |
3566 | if (call_abi == MS_ABI) |
3567 | { |
3568 | HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode); |
3569 | |
3570 | if (tree type = arg.type) |
3571 | { |
3572 | /* Arrays are passed by reference. */ |
3573 | if (TREE_CODE (type) == ARRAY_TYPE) |
3574 | return true; |
3575 | |
3576 | if (RECORD_OR_UNION_TYPE_P (type)) |
3577 | { |
3578 | /* Structs/unions of sizes other than 8, 16, 32, or 64 bits |
3579 | are passed by reference. */ |
3580 | msize = int_size_in_bytes (type); |
3581 | } |
3582 | } |
3583 | |
3584 | /* __m128 is passed by reference. */ |
3585 | return msize != 1 && msize != 2 && msize != 4 && msize != 8; |
3586 | } |
3587 | else if (arg.type && int_size_in_bytes (arg.type) == -1) |
3588 | return true; |
3589 | } |
3590 | |
3591 | return false; |
3592 | } |
3593 | |
3594 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
3595 | passing ABI. XXX: This function is obsolete and is only used for |
3596 | checking psABI compatibility with previous versions of GCC. */ |
3597 | |
3598 | static bool |
3599 | ix86_compat_aligned_value_p (const_tree type) |
3600 | { |
3601 | machine_mode mode = TYPE_MODE (type); |
3602 | if (((TARGET_SSE && SSE_REG_MODE_P (mode)) |
3603 | || mode == TDmode |
3604 | || mode == TFmode |
3605 | || mode == TCmode) |
3606 | && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) |
3607 | return true; |
3608 | if (TYPE_ALIGN (type) < 128) |
3609 | return false; |
3610 | |
3611 | if (AGGREGATE_TYPE_P (type)) |
3612 | { |
3613 | /* Walk the aggregates recursively. */ |
3614 | switch (TREE_CODE (type)) |
3615 | { |
3616 | case RECORD_TYPE: |
3617 | case UNION_TYPE: |
3618 | case QUAL_UNION_TYPE: |
3619 | { |
3620 | tree field; |
3621 | |
3622 | /* Walk all the structure fields. */ |
3623 | for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
3624 | { |
3625 | if (TREE_CODE (field) == FIELD_DECL |
3626 | && ix86_compat_aligned_value_p (TREE_TYPE (field))) |
3627 | return true; |
3628 | } |
3629 | break; |
3630 | } |
3631 | |
3632 | case ARRAY_TYPE: |
3633 | /* Just for use if some languages passes arrays by value. */ |
3634 | if (ix86_compat_aligned_value_p (TREE_TYPE (type))) |
3635 | return true; |
3636 | break; |
3637 | |
3638 | default: |
3639 | gcc_unreachable (); |
3640 | } |
3641 | } |
3642 | return false; |
3643 | } |
3644 | |
3645 | /* Return the alignment boundary for MODE and TYPE with alignment ALIGN. |
3646 | XXX: This function is obsolete and is only used for checking psABI |
3647 | compatibility with previous versions of GCC. */ |
3648 | |
3649 | static unsigned int |
3650 | ix86_compat_function_arg_boundary (machine_mode mode, |
3651 | const_tree type, unsigned int align) |
3652 | { |
3653 | /* In 32bit, only _Decimal128 and __float128 are aligned to their |
3654 | natural boundaries. */ |
3655 | if (!TARGET_64BIT && mode != TDmode && mode != TFmode) |
3656 | { |
3657 | /* i386 ABI defines all arguments to be 4 byte aligned. We have to |
3658 | make an exception for SSE modes since these require 128bit |
3659 | alignment. |
3660 | |
3661 | The handling here differs from field_alignment. ICC aligns MMX |
3662 | arguments to 4 byte boundaries, while structure fields are aligned |
3663 | to 8 byte boundaries. */ |
3664 | if (!type) |
3665 | { |
3666 | if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) |
3667 | align = PARM_BOUNDARY; |
3668 | } |
3669 | else |
3670 | { |
3671 | if (!ix86_compat_aligned_value_p (type)) |
3672 | align = PARM_BOUNDARY; |
3673 | } |
3674 | } |
3675 | if (align > BIGGEST_ALIGNMENT) |
3676 | align = BIGGEST_ALIGNMENT; |
3677 | return align; |
3678 | } |
3679 | |
3680 | /* Return true when TYPE should be 128bit aligned for 32bit argument |
3681 | passing ABI. */ |
3682 | |
3683 | static bool |
3684 | ix86_contains_aligned_value_p (const_tree type) |
3685 | { |
3686 | machine_mode mode = TYPE_MODE (type); |
3687 | |
3688 | if (mode == XFmode || mode == XCmode) |
3689 | return false; |
3690 | |
3691 | if (TYPE_ALIGN (type) < 128) |
3692 | return false; |
3693 | |
3694 | if (AGGREGATE_TYPE_P (type)) |
3695 | { |
3696 | /* Walk the aggregates recursively. */ |
3697 | switch (TREE_CODE (type)) |
3698 | { |
3699 | case RECORD_TYPE: |
3700 | case UNION_TYPE: |
3701 | case QUAL_UNION_TYPE: |
3702 | { |
3703 | tree field; |
3704 | |
3705 | /* Walk all the structure fields. */ |
3706 | for (field = TYPE_FIELDS (type); |
3707 | field; |
3708 | field = DECL_CHAIN (field)) |
3709 | { |
3710 | if (TREE_CODE (field) == FIELD_DECL |
3711 | && ix86_contains_aligned_value_p (TREE_TYPE (field))) |
3712 | return true; |
3713 | } |
3714 | break; |
3715 | } |
3716 | |
3717 | case ARRAY_TYPE: |
3718 | /* Just for use if some languages passes arrays by value. */ |
3719 | if (ix86_contains_aligned_value_p (TREE_TYPE (type))) |
3720 | return true; |
3721 | break; |
3722 | |
3723 | default: |
3724 | gcc_unreachable (); |
3725 | } |
3726 | } |
3727 | else |
3728 | return TYPE_ALIGN (type) >= 128; |
3729 | |
3730 | return false; |
3731 | } |
3732 | |
3733 | /* Gives the alignment boundary, in bits, of an argument with the |
3734 | specified mode and type. */ |
3735 | |
3736 | static unsigned int |
3737 | ix86_function_arg_boundary (machine_mode mode, const_tree type) |
3738 | { |
3739 | unsigned int align; |
3740 | if (type) |
3741 | { |
3742 | /* Since the main variant type is used for call, we convert it to |
3743 | the main variant type. */ |
3744 | type = TYPE_MAIN_VARIANT (type); |
3745 | align = TYPE_ALIGN (type); |
3746 | if (TYPE_EMPTY_P (type)) |
3747 | return PARM_BOUNDARY; |
3748 | } |
3749 | else |
3750 | align = GET_MODE_ALIGNMENT (mode); |
3751 | if (align < PARM_BOUNDARY) |
3752 | align = PARM_BOUNDARY; |
3753 | else |
3754 | { |
3755 | static bool warned; |
3756 | unsigned int saved_align = align; |
3757 | |
3758 | if (!TARGET_64BIT) |
3759 | { |
3760 | /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ |
3761 | if (!type) |
3762 | { |
3763 | if (mode == XFmode || mode == XCmode) |
3764 | align = PARM_BOUNDARY; |
3765 | } |
3766 | else if (!ix86_contains_aligned_value_p (type)) |
3767 | align = PARM_BOUNDARY; |
3768 | |
3769 | if (align < 128) |
3770 | align = PARM_BOUNDARY; |
3771 | } |
3772 | |
3773 | if (warn_psabi |
3774 | && !warned |
3775 | && align != ix86_compat_function_arg_boundary (mode, type, |
3776 | align: saved_align)) |
3777 | { |
3778 | warned = true; |
3779 | inform (input_location, |
3780 | "the ABI for passing parameters with %d-byte" |
3781 | " alignment has changed in GCC 4.6", |
3782 | align / BITS_PER_UNIT); |
3783 | } |
3784 | } |
3785 | |
3786 | return align; |
3787 | } |
3788 | |
3789 | /* Return true if N is a possible register number of function value. */ |
3790 | |
3791 | static bool |
3792 | ix86_function_value_regno_p (const unsigned int regno) |
3793 | { |
3794 | switch (regno) |
3795 | { |
3796 | case AX_REG: |
3797 | return true; |
3798 | case DX_REG: |
3799 | return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); |
3800 | case DI_REG: |
3801 | case SI_REG: |
3802 | return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; |
3803 | |
3804 | /* Complex values are returned in %st(0)/%st(1) pair. */ |
3805 | case ST0_REG: |
3806 | case ST1_REG: |
3807 | /* TODO: The function should depend on current function ABI but |
3808 | builtins.cc would need updating then. Therefore we use the |
3809 | default ABI. */ |
3810 | if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) |
3811 | return false; |
3812 | return TARGET_FLOAT_RETURNS_IN_80387; |
3813 | |
3814 | /* Complex values are returned in %xmm0/%xmm1 pair. */ |
3815 | case XMM0_REG: |
3816 | case XMM1_REG: |
3817 | return TARGET_SSE; |
3818 | |
3819 | case MM0_REG: |
3820 | if (TARGET_MACHO || TARGET_64BIT) |
3821 | return false; |
3822 | return TARGET_MMX; |
3823 | } |
3824 | |
3825 | return false; |
3826 | } |
3827 | |
3828 | /* Check whether the register REGNO should be zeroed on X86. |
3829 | When ALL_SSE_ZEROED is true, all SSE registers have been zeroed |
3830 | together, no need to zero it again. |
3831 | When NEED_ZERO_MMX is true, MMX registers should be cleared. */ |
3832 | |
3833 | static bool |
3834 | zero_call_used_regno_p (const unsigned int regno, |
3835 | bool all_sse_zeroed, |
3836 | bool need_zero_mmx) |
3837 | { |
3838 | return GENERAL_REGNO_P (regno) |
3839 | || (!all_sse_zeroed && SSE_REGNO_P (regno)) |
3840 | || MASK_REGNO_P (regno) |
3841 | || (need_zero_mmx && MMX_REGNO_P (regno)); |
3842 | } |
3843 | |
3844 | /* Return the machine_mode that is used to zero register REGNO. */ |
3845 | |
3846 | static machine_mode |
3847 | zero_call_used_regno_mode (const unsigned int regno) |
3848 | { |
3849 | /* NB: We only need to zero the lower 32 bits for integer registers |
3850 | and the lower 128 bits for vector registers since destination are |
3851 | zero-extended to the full register width. */ |
3852 | if (GENERAL_REGNO_P (regno)) |
3853 | return SImode; |
3854 | else if (SSE_REGNO_P (regno)) |
3855 | return V4SFmode; |
3856 | else if (MASK_REGNO_P (regno)) |
3857 | return HImode; |
3858 | else if (MMX_REGNO_P (regno)) |
3859 | return V2SImode; |
3860 | else |
3861 | gcc_unreachable (); |
3862 | } |
3863 | |
3864 | /* Generate a rtx to zero all vector registers together if possible, |
3865 | otherwise, return NULL. */ |
3866 | |
3867 | static rtx |
3868 | zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs) |
3869 | { |
3870 | if (!TARGET_AVX) |
3871 | return NULL; |
3872 | |
3873 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3874 | if ((LEGACY_SSE_REGNO_P (regno) |
3875 | || (TARGET_64BIT |
3876 | && (REX_SSE_REGNO_P (regno) |
3877 | || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno))))) |
3878 | && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3879 | return NULL; |
3880 | |
3881 | return gen_avx_vzeroall (); |
3882 | } |
3883 | |
3884 | /* Generate insns to zero all st registers together. |
3885 | Return true when zeroing instructions are generated. |
3886 | Assume the number of st registers that are zeroed is num_of_st, |
3887 | we will emit the following sequence to zero them together: |
3888 | fldz; \ |
3889 | fldz; \ |
3890 | ... |
3891 | fldz; \ |
3892 | fstp %%st(0); \ |
3893 | fstp %%st(0); \ |
3894 | ... |
3895 | fstp %%st(0); |
3896 | i.e., num_of_st fldz followed by num_of_st fstp to clear the stack |
3897 | mark stack slots empty. |
3898 | |
3899 | How to compute the num_of_st: |
3900 | There is no direct mapping from stack registers to hard register |
3901 | numbers. If one stack register needs to be cleared, we don't know |
3902 | where in the stack the value remains. So, if any stack register |
3903 | needs to be cleared, the whole stack should be cleared. However, |
3904 | x87 stack registers that hold the return value should be excluded. |
3905 | x87 returns in the top (two for complex values) register, so |
3906 | num_of_st should be 7/6 when x87 returns, otherwise it will be 8. |
3907 | return the value of num_of_st. */ |
3908 | |
3909 | |
3910 | static int |
3911 | zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs) |
3912 | { |
3913 | |
3914 | /* If the FPU is disabled, no need to zero all st registers. */ |
3915 | if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
3916 | return 0; |
3917 | |
3918 | unsigned int num_of_st = 0; |
3919 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3920 | if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno)) |
3921 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3922 | { |
3923 | num_of_st++; |
3924 | break; |
3925 | } |
3926 | |
3927 | if (num_of_st == 0) |
3928 | return 0; |
3929 | |
3930 | bool return_with_x87 = false; |
3931 | return_with_x87 = (crtl->return_rtx |
3932 | && (STACK_REG_P (crtl->return_rtx))); |
3933 | |
3934 | bool complex_return = false; |
3935 | complex_return = (crtl->return_rtx |
3936 | && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx))); |
3937 | |
3938 | if (return_with_x87) |
3939 | if (complex_return) |
3940 | num_of_st = 6; |
3941 | else |
3942 | num_of_st = 7; |
3943 | else |
3944 | num_of_st = 8; |
3945 | |
3946 | rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG); |
3947 | for (unsigned int i = 0; i < num_of_st; i++) |
3948 | emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode))); |
3949 | |
3950 | for (unsigned int i = 0; i < num_of_st; i++) |
3951 | { |
3952 | rtx insn; |
3953 | insn = emit_insn (gen_rtx_SET (st_reg, st_reg)); |
3954 | add_reg_note (insn, REG_DEAD, st_reg); |
3955 | } |
3956 | return num_of_st; |
3957 | } |
3958 | |
3959 | |
3960 | /* When the routine exit in MMX mode, if any ST register needs |
3961 | to be zeroed, we should clear all MMX registers except the |
3962 | RET_MMX_REGNO that holds the return value. */ |
3963 | static bool |
3964 | zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs, |
3965 | unsigned int ret_mmx_regno) |
3966 | { |
3967 | bool need_zero_all_mm = false; |
3968 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
3969 | if (STACK_REGNO_P (regno) |
3970 | && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
3971 | { |
3972 | need_zero_all_mm = true; |
3973 | break; |
3974 | } |
3975 | |
3976 | if (!need_zero_all_mm) |
3977 | return false; |
3978 | |
3979 | machine_mode mode = V2SImode; |
3980 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
3981 | if (regno != ret_mmx_regno) |
3982 | { |
3983 | rtx reg = gen_rtx_REG (mode, regno); |
3984 | emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode))); |
3985 | } |
3986 | return true; |
3987 | } |
3988 | |
3989 | /* TARGET_ZERO_CALL_USED_REGS. */ |
3990 | /* Generate a sequence of instructions that zero registers specified by |
3991 | NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually |
3992 | zeroed. */ |
3993 | static HARD_REG_SET |
3994 | ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) |
3995 | { |
3996 | HARD_REG_SET zeroed_hardregs; |
3997 | bool all_sse_zeroed = false; |
3998 | int all_st_zeroed_num = 0; |
3999 | bool all_mm_zeroed = false; |
4000 | |
4001 | CLEAR_HARD_REG_SET (set&: zeroed_hardregs); |
4002 | |
4003 | /* first, let's see whether we can zero all vector registers together. */ |
4004 | rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs); |
4005 | if (zero_all_vec_insn) |
4006 | { |
4007 | emit_insn (zero_all_vec_insn); |
4008 | all_sse_zeroed = true; |
4009 | } |
4010 | |
4011 | /* mm/st registers are shared registers set, we should follow the following |
4012 | rules to clear them: |
4013 | MMX exit mode x87 exit mode |
4014 | -------------|----------------------|--------------- |
4015 | uses x87 reg | clear all MMX | clear all x87 |
4016 | uses MMX reg | clear individual MMX | clear all x87 |
4017 | x87 + MMX | clear all MMX | clear all x87 |
4018 | |
4019 | first, we should decide which mode (MMX mode or x87 mode) the function |
4020 | exit with. */ |
4021 | |
4022 | bool exit_with_mmx_mode = (crtl->return_rtx |
4023 | && (MMX_REG_P (crtl->return_rtx))); |
4024 | |
4025 | if (!exit_with_mmx_mode) |
4026 | /* x87 exit mode, we should zero all st registers together. */ |
4027 | { |
4028 | all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs); |
4029 | |
4030 | if (all_st_zeroed_num > 0) |
4031 | for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++) |
4032 | /* x87 stack registers that hold the return value should be excluded. |
4033 | x87 returns in the top (two for complex values) register. */ |
4034 | if (all_st_zeroed_num == 8 |
4035 | || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx)) |
4036 | || (all_st_zeroed_num == 6 |
4037 | && (regno == (REGNO (crtl->return_rtx) + 1))))) |
4038 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
4039 | } |
4040 | else |
4041 | /* MMX exit mode, check whether we can zero all mm registers. */ |
4042 | { |
4043 | unsigned int exit_mmx_regno = REGNO (crtl->return_rtx); |
4044 | all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs, |
4045 | ret_mmx_regno: exit_mmx_regno); |
4046 | if (all_mm_zeroed) |
4047 | for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++) |
4048 | if (regno != exit_mmx_regno) |
4049 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
4050 | } |
4051 | |
4052 | /* Now, generate instructions to zero all the other registers. */ |
4053 | |
4054 | for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
4055 | { |
4056 | if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno)) |
4057 | continue; |
4058 | if (!zero_call_used_regno_p (regno, all_sse_zeroed, |
4059 | need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed)) |
4060 | continue; |
4061 | |
4062 | SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno); |
4063 | |
4064 | machine_mode mode = zero_call_used_regno_mode (regno); |
4065 | |
4066 | rtx reg = gen_rtx_REG (mode, regno); |
4067 | rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode)); |
4068 | |
4069 | switch (mode) |
4070 | { |
4071 | case E_SImode: |
4072 | if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) |
4073 | { |
4074 | rtx clob = gen_rtx_CLOBBER (VOIDmode, |
4075 | gen_rtx_REG (CCmode, |
4076 | FLAGS_REG)); |
4077 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, |
4078 | tmp, |
4079 | clob)); |
4080 | } |
4081 | /* FALLTHRU. */ |
4082 | |
4083 | case E_V4SFmode: |
4084 | case E_HImode: |
4085 | case E_V2SImode: |
4086 | emit_insn (tmp); |
4087 | break; |
4088 | |
4089 | default: |
4090 | gcc_unreachable (); |
4091 | } |
4092 | } |
4093 | return zeroed_hardregs; |
4094 | } |
4095 | |
4096 | /* Define how to find the value returned by a function. |
4097 | VALTYPE is the data type of the value (as a tree). |
4098 | If the precise function being called is known, FUNC is its FUNCTION_DECL; |
4099 | otherwise, FUNC is 0. */ |
4100 | |
4101 | static rtx |
4102 | function_value_32 (machine_mode orig_mode, machine_mode mode, |
4103 | const_tree fntype, const_tree fn) |
4104 | { |
4105 | unsigned int regno; |
4106 | |
4107 | /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where |
4108 | we normally prevent this case when mmx is not available. However |
4109 | some ABIs may require the result to be returned like DImode. */ |
4110 | if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) |
4111 | regno = FIRST_MMX_REG; |
4112 | |
4113 | /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where |
4114 | we prevent this case when sse is not available. However some ABIs |
4115 | may require the result to be returned like integer TImode. */ |
4116 | else if (mode == TImode |
4117 | || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) |
4118 | regno = FIRST_SSE_REG; |
4119 | |
4120 | /* 32-byte vector modes in %ymm0. */ |
4121 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) |
4122 | regno = FIRST_SSE_REG; |
4123 | |
4124 | /* 64-byte vector modes in %zmm0. */ |
4125 | else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) |
4126 | regno = FIRST_SSE_REG; |
4127 | |
4128 | /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ |
4129 | else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) |
4130 | regno = FIRST_FLOAT_REG; |
4131 | else |
4132 | /* Most things go in %eax. */ |
4133 | regno = AX_REG; |
4134 | |
4135 | /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */ |
4136 | if (mode == HFmode || mode == BFmode) |
4137 | { |
4138 | if (!TARGET_SSE2) |
4139 | { |
4140 | error ("SSE register return with SSE2 disabled"); |
4141 | regno = AX_REG; |
4142 | } |
4143 | else |
4144 | regno = FIRST_SSE_REG; |
4145 | } |
4146 | |
4147 | if (mode == HCmode) |
4148 | { |
4149 | if (!TARGET_SSE2) |
4150 | error ("SSE register return with SSE2 disabled"); |
4151 | |
4152 | rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1)); |
4153 | XVECEXP (ret, 0, 0) |
4154 | = gen_rtx_EXPR_LIST (VOIDmode, |
4155 | gen_rtx_REG (SImode, |
4156 | TARGET_SSE2 ? FIRST_SSE_REG : AX_REG), |
4157 | GEN_INT (0)); |
4158 | return ret; |
4159 | } |
4160 | |
4161 | /* Override FP return register with %xmm0 for local functions when |
4162 | SSE math is enabled or for functions with sseregparm attribute. */ |
4163 | if ((fn || fntype) && (mode == SFmode || mode == DFmode)) |
4164 | { |
4165 | int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false); |
4166 | if (sse_level == -1) |
4167 | { |
4168 | error ("calling %qD with SSE calling convention without " |
4169 | "SSE/SSE2 enabled", fn); |
4170 | sorry ("this is a GCC bug that can be worked around by adding " |
4171 | "attribute used to function called"); |
4172 | } |
4173 | else if ((sse_level >= 1 && mode == SFmode) |
4174 | || (sse_level == 2 && mode == DFmode)) |
4175 | regno = FIRST_SSE_REG; |
4176 | } |
4177 | |
4178 | /* OImode shouldn't be used directly. */ |
4179 | gcc_assert (mode != OImode); |
4180 | |
4181 | return gen_rtx_REG (orig_mode, regno); |
4182 | } |
4183 | |
4184 | static rtx |
4185 | function_value_64 (machine_mode orig_mode, machine_mode mode, |
4186 | const_tree valtype) |
4187 | { |
4188 | rtx ret; |
4189 | |
4190 | /* Handle libcalls, which don't provide a type node. */ |
4191 | if (valtype == NULL) |
4192 | { |
4193 | unsigned int regno; |
4194 | |
4195 | switch (mode) |
4196 | { |
4197 | case E_BFmode: |
4198 | case E_HFmode: |
4199 | case E_HCmode: |
4200 | case E_SFmode: |
4201 | case E_SCmode: |
4202 | case E_DFmode: |
4203 | case E_DCmode: |
4204 | case E_TFmode: |
4205 | case E_SDmode: |
4206 | case E_DDmode: |
4207 | case E_TDmode: |
4208 | regno = FIRST_SSE_REG; |
4209 | break; |
4210 | case E_XFmode: |
4211 | case E_XCmode: |
4212 | regno = FIRST_FLOAT_REG; |
4213 | break; |
4214 | case E_TCmode: |
4215 | return NULL; |
4216 | default: |
4217 | regno = AX_REG; |
4218 | } |
4219 | |
4220 | return gen_rtx_REG (mode, regno); |
4221 | } |
4222 | else if (POINTER_TYPE_P (valtype)) |
4223 | { |
4224 | /* Pointers are always returned in word_mode. */ |
4225 | mode = word_mode; |
4226 | } |
4227 | |
4228 | ret = construct_container (mode, orig_mode, type: valtype, in_return: 1, |
4229 | X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, |
4230 | intreg: x86_64_int_return_registers, sse_regno: 0); |
4231 | |
4232 | /* For zero sized structures, construct_container returns NULL, but we |
4233 | need to keep rest of compiler happy by returning meaningful value. */ |
4234 | if (!ret) |
4235 | ret = gen_rtx_REG (orig_mode, AX_REG); |
4236 | |
4237 | return ret; |
4238 | } |
4239 | |
4240 | static rtx |
4241 | function_value_ms_32 (machine_mode orig_mode, machine_mode mode, |
4242 | const_tree fntype, const_tree fn, const_tree valtype) |
4243 | { |
4244 | unsigned int regno; |
4245 | |
4246 | /* Floating point return values in %st(0) |
4247 | (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ |
4248 | if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 |
4249 | && (GET_MODE_SIZE (mode) > 8 |
4250 | || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) |
4251 | { |
4252 | regno = FIRST_FLOAT_REG; |
4253 | return gen_rtx_REG (orig_mode, regno); |
4254 | } |
4255 | else |
4256 | return function_value_32(orig_mode, mode, fntype,fn); |
4257 | } |
4258 | |
4259 | static rtx |
4260 | function_value_ms_64 (machine_mode orig_mode, machine_mode mode, |
4261 | const_tree valtype) |
4262 | { |
4263 | unsigned int regno = AX_REG; |
4264 | |
4265 | if (TARGET_SSE) |
4266 | { |
4267 | switch (GET_MODE_SIZE (mode)) |
4268 | { |
4269 | case 16: |
4270 | if (valtype != NULL_TREE |
4271 | && !VECTOR_INTEGER_TYPE_P (valtype) |
4272 | && !VECTOR_INTEGER_TYPE_P (valtype) |
4273 | && !INTEGRAL_TYPE_P (valtype) |
4274 | && !VECTOR_FLOAT_TYPE_P (valtype)) |
4275 | break; |
4276 | if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
4277 | && !COMPLEX_MODE_P (mode)) |
4278 | regno = FIRST_SSE_REG; |
4279 | break; |
4280 | case 8: |
4281 | case 4: |
4282 | if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) |
4283 | break; |
4284 | if (mode == SFmode || mode == DFmode) |
4285 | regno = FIRST_SSE_REG; |
4286 | break; |
4287 | default: |
4288 | break; |
4289 | } |
4290 | } |
4291 | return gen_rtx_REG (orig_mode, regno); |
4292 | } |
4293 | |
4294 | static rtx |
4295 | ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, |
4296 | machine_mode orig_mode, machine_mode mode) |
4297 | { |
4298 | const_tree fn, fntype; |
4299 | |
4300 | fn = NULL_TREE; |
4301 | if (fntype_or_decl && DECL_P (fntype_or_decl)) |
4302 | fn = fntype_or_decl; |
4303 | fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; |
4304 | |
4305 | if (ix86_function_type_abi (fntype) == MS_ABI) |
4306 | { |
4307 | if (TARGET_64BIT) |
4308 | return function_value_ms_64 (orig_mode, mode, valtype); |
4309 | else |
4310 | return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); |
4311 | } |
4312 | else if (TARGET_64BIT) |
4313 | return function_value_64 (orig_mode, mode, valtype); |
4314 | else |
4315 | return function_value_32 (orig_mode, mode, fntype, fn); |
4316 | } |
4317 | |
4318 | static rtx |
4319 | ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) |
4320 | { |
4321 | machine_mode mode, orig_mode; |
4322 | |
4323 | orig_mode = TYPE_MODE (valtype); |
4324 | mode = type_natural_mode (type: valtype, NULL, in_return: true); |
4325 | return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); |
4326 | } |
4327 | |
4328 | /* Pointer function arguments and return values are promoted to |
4329 | word_mode for normal functions. */ |
4330 | |
4331 | static machine_mode |
4332 | ix86_promote_function_mode (const_tree type, machine_mode mode, |
4333 | int *punsignedp, const_tree fntype, |
4334 | int for_return) |
4335 | { |
4336 | if (cfun->machine->func_type == TYPE_NORMAL |
4337 | && type != NULL_TREE |
4338 | && POINTER_TYPE_P (type)) |
4339 | { |
4340 | *punsignedp = POINTERS_EXTEND_UNSIGNED; |
4341 | return word_mode; |
4342 | } |
4343 | return default_promote_function_mode (type, mode, punsignedp, fntype, |
4344 | for_return); |
4345 | } |
4346 | |
4347 | /* Return true if a structure, union or array with MODE containing FIELD |
4348 | should be accessed using BLKmode. */ |
4349 | |
4350 | static bool |
4351 | ix86_member_type_forces_blk (const_tree field, machine_mode mode) |
4352 | { |
4353 | /* Union with XFmode must be in BLKmode. */ |
4354 | return (mode == XFmode |
4355 | && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE |
4356 | || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); |
4357 | } |
4358 | |
4359 | rtx |
4360 | ix86_libcall_value (machine_mode mode) |
4361 | { |
4362 | return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode); |
4363 | } |
4364 | |
4365 | /* Return true iff type is returned in memory. */ |
4366 | |
4367 | static bool |
4368 | ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
4369 | { |
4370 | const machine_mode mode = type_natural_mode (type, NULL, in_return: true); |
4371 | HOST_WIDE_INT size; |
4372 | |
4373 | if (TARGET_64BIT) |
4374 | { |
4375 | if (ix86_function_type_abi (fntype) == MS_ABI) |
4376 | { |
4377 | size = int_size_in_bytes (type); |
4378 | |
4379 | /* __m128 is returned in xmm0. */ |
4380 | if ((!type || VECTOR_INTEGER_TYPE_P (type) |
4381 | || INTEGRAL_TYPE_P (type) |
4382 | || VECTOR_FLOAT_TYPE_P (type)) |
4383 | && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
4384 | && !COMPLEX_MODE_P (mode) |
4385 | && (GET_MODE_SIZE (mode) == 16 || size == 16)) |
4386 | return false; |
4387 | |
4388 | /* Otherwise, the size must be exactly in [1248]. */ |
4389 | return size != 1 && size != 2 && size != 4 && size != 8; |
4390 | } |
4391 | else |
4392 | { |
4393 | int needed_intregs, needed_sseregs; |
4394 | |
4395 | return examine_argument (mode, type, in_return: 1, |
4396 | int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
4397 | } |
4398 | } |
4399 | else |
4400 | { |
4401 | size = int_size_in_bytes (type); |
4402 | |
4403 | /* Intel MCU psABI returns scalars and aggregates no larger than 8 |
4404 | bytes in registers. */ |
4405 | if (TARGET_IAMCU) |
4406 | return VECTOR_MODE_P (mode) || size < 0 || size > 8; |
4407 | |
4408 | if (mode == BLKmode) |
4409 | return true; |
4410 | |
4411 | if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) |
4412 | return false; |
4413 | |
4414 | if (VECTOR_MODE_P (mode) || mode == TImode) |
4415 | { |
4416 | /* User-created vectors small enough to fit in EAX. */ |
4417 | if (size < 8) |
4418 | return false; |
4419 | |
4420 | /* Unless ABI prescibes otherwise, |
4421 | MMX/3dNow values are returned in MM0 if available. */ |
4422 | |
4423 | if (size == 8) |
4424 | return TARGET_VECT8_RETURNS || !TARGET_MMX; |
4425 | |
4426 | /* SSE values are returned in XMM0 if available. */ |
4427 | if (size == 16) |
4428 | return !TARGET_SSE; |
4429 | |
4430 | /* AVX values are returned in YMM0 if available. */ |
4431 | if (size == 32) |
4432 | return !TARGET_AVX; |
4433 | |
4434 | /* AVX512F values are returned in ZMM0 if available. */ |
4435 | if (size == 64) |
4436 | return !TARGET_AVX512F; |
4437 | } |
4438 | |
4439 | if (mode == XFmode) |
4440 | return false; |
4441 | |
4442 | if (size > 12) |
4443 | return true; |
4444 | |
4445 | /* OImode shouldn't be used directly. */ |
4446 | gcc_assert (mode != OImode); |
4447 | |
4448 | return false; |
4449 | } |
4450 | } |
4451 | |
4452 | /* Implement TARGET_PUSH_ARGUMENT. */ |
4453 | |
4454 | static bool |
4455 | ix86_push_argument (unsigned int npush) |
4456 | { |
4457 | /* If SSE2 is available, use vector move to put large argument onto |
4458 | stack. NB: In 32-bit mode, use 8-byte vector move. */ |
4459 | return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8)) |
4460 | && TARGET_PUSH_ARGS |
4461 | && !ACCUMULATE_OUTGOING_ARGS); |
4462 | } |
4463 | |
4464 | |
4465 | /* Create the va_list data type. */ |
4466 | |
4467 | static tree |
4468 | ix86_build_builtin_va_list_64 (void) |
4469 | { |
4470 | tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; |
4471 | |
4472 | record = lang_hooks.types.make_type (RECORD_TYPE); |
4473 | type_decl = build_decl (BUILTINS_LOCATION, |
4474 | TYPE_DECL, get_identifier ("__va_list_tag"), record); |
4475 | |
4476 | f_gpr = build_decl (BUILTINS_LOCATION, |
4477 | FIELD_DECL, get_identifier ("gp_offset"), |
4478 | unsigned_type_node); |
4479 | f_fpr = build_decl (BUILTINS_LOCATION, |
4480 | FIELD_DECL, get_identifier ("fp_offset"), |
4481 | unsigned_type_node); |
4482 | f_ovf = build_decl (BUILTINS_LOCATION, |
4483 | FIELD_DECL, get_identifier ("overflow_arg_area"), |
4484 | ptr_type_node); |
4485 | f_sav = build_decl (BUILTINS_LOCATION, |
4486 | FIELD_DECL, get_identifier ("reg_save_area"), |
4487 | ptr_type_node); |
4488 | |
4489 | va_list_gpr_counter_field = f_gpr; |
4490 | va_list_fpr_counter_field = f_fpr; |
4491 | |
4492 | DECL_FIELD_CONTEXT (f_gpr) = record; |
4493 | DECL_FIELD_CONTEXT (f_fpr) = record; |
4494 | DECL_FIELD_CONTEXT (f_ovf) = record; |
4495 | DECL_FIELD_CONTEXT (f_sav) = record; |
4496 | |
4497 | TYPE_STUB_DECL (record) = type_decl; |
4498 | TYPE_NAME (record) = type_decl; |
4499 | TYPE_FIELDS (record) = f_gpr; |
4500 | DECL_CHAIN (f_gpr) = f_fpr; |
4501 | DECL_CHAIN (f_fpr) = f_ovf; |
4502 | DECL_CHAIN (f_ovf) = f_sav; |
4503 | |
4504 | layout_type (record); |
4505 | |
4506 | TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"), |
4507 | NULL_TREE, TYPE_ATTRIBUTES (record)); |
4508 | |
4509 | /* The correct type is an array type of one element. */ |
4510 | return build_array_type (record, build_index_type (size_zero_node)); |
4511 | } |
4512 | |
4513 | /* Setup the builtin va_list data type and for 64-bit the additional |
4514 | calling convention specific va_list data types. */ |
4515 | |
4516 | static tree |
4517 | ix86_build_builtin_va_list (void) |
4518 | { |
4519 | if (TARGET_64BIT) |
4520 | { |
4521 | /* Initialize ABI specific va_list builtin types. |
4522 | |
4523 | In lto1, we can encounter two va_list types: |
4524 | - one as a result of the type-merge across TUs, and |
4525 | - the one constructed here. |
4526 | These two types will not have the same TYPE_MAIN_VARIANT, and therefore |
4527 | a type identity check in canonical_va_list_type based on |
4528 | TYPE_MAIN_VARIANT (which we used to have) will not work. |
4529 | Instead, we tag each va_list_type_node with its unique attribute, and |
4530 | look for the attribute in the type identity check in |
4531 | canonical_va_list_type. |
4532 | |
4533 | Tagging sysv_va_list_type_node directly with the attribute is |
4534 | problematic since it's a array of one record, which will degrade into a |
4535 | pointer to record when used as parameter (see build_va_arg comments for |
4536 | an example), dropping the attribute in the process. So we tag the |
4537 | record instead. */ |
4538 | |
4539 | /* For SYSV_ABI we use an array of one record. */ |
4540 | sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); |
4541 | |
4542 | /* For MS_ABI we use plain pointer to argument area. */ |
4543 | tree char_ptr_type = build_pointer_type (char_type_node); |
4544 | tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, |
4545 | TYPE_ATTRIBUTES (char_ptr_type)); |
4546 | ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); |
4547 | |
4548 | return ((ix86_abi == MS_ABI) |
4549 | ? ms_va_list_type_node |
4550 | : sysv_va_list_type_node); |
4551 | } |
4552 | else |
4553 | { |
4554 | /* For i386 we use plain pointer to argument area. */ |
4555 | return build_pointer_type (char_type_node); |
4556 | } |
4557 | } |
4558 | |
4559 | /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ |
4560 | |
4561 | static void |
4562 | setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
4563 | { |
4564 | rtx save_area, mem; |
4565 | alias_set_type set; |
4566 | int i, max; |
4567 | |
4568 | /* GPR size of varargs save area. */ |
4569 | if (cfun->va_list_gpr_size) |
4570 | ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; |
4571 | else |
4572 | ix86_varargs_gpr_size = 0; |
4573 | |
4574 | /* FPR size of varargs save area. We don't need it if we don't pass |
4575 | anything in SSE registers. */ |
4576 | if (TARGET_SSE && cfun->va_list_fpr_size) |
4577 | ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; |
4578 | else |
4579 | ix86_varargs_fpr_size = 0; |
4580 | |
4581 | if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) |
4582 | return; |
4583 | |
4584 | save_area = frame_pointer_rtx; |
4585 | set = get_varargs_alias_set (); |
4586 | |
4587 | max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; |
4588 | if (max > X86_64_REGPARM_MAX) |
4589 | max = X86_64_REGPARM_MAX; |
4590 | |
4591 | for (i = cum->regno; i < max; i++) |
4592 | { |
4593 | mem = gen_rtx_MEM (word_mode, |
4594 | plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); |
4595 | MEM_NOTRAP_P (mem) = 1; |
4596 | set_mem_alias_set (mem, set); |
4597 | emit_move_insn (mem, |
4598 | gen_rtx_REG (word_mode, |
4599 | x86_64_int_parameter_registers[i])); |
4600 | } |
4601 | |
4602 | if (ix86_varargs_fpr_size) |
4603 | { |
4604 | machine_mode smode; |
4605 | rtx_code_label *label; |
4606 | rtx test; |
4607 | |
4608 | /* Now emit code to save SSE registers. The AX parameter contains number |
4609 | of SSE parameter registers used to call this function, though all we |
4610 | actually check here is the zero/non-zero status. */ |
4611 | |
4612 | label = gen_label_rtx (); |
4613 | test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); |
4614 | emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), |
4615 | label)); |
4616 | |
4617 | /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if |
4618 | we used movdqa (i.e. TImode) instead? Perhaps even better would |
4619 | be if we could determine the real mode of the data, via a hook |
4620 | into pass_stdarg. Ignore all that for now. */ |
4621 | smode = V4SFmode; |
4622 | if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) |
4623 | crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); |
4624 | |
4625 | max = cum->sse_regno + cfun->va_list_fpr_size / 16; |
4626 | if (max > X86_64_SSE_REGPARM_MAX) |
4627 | max = X86_64_SSE_REGPARM_MAX; |
4628 | |
4629 | for (i = cum->sse_regno; i < max; ++i) |
4630 | { |
4631 | mem = plus_constant (Pmode, save_area, |
4632 | i * 16 + ix86_varargs_gpr_size); |
4633 | mem = gen_rtx_MEM (smode, mem); |
4634 | MEM_NOTRAP_P (mem) = 1; |
4635 | set_mem_alias_set (mem, set); |
4636 | set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); |
4637 | |
4638 | emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); |
4639 | } |
4640 | |
4641 | emit_label (label); |
4642 | } |
4643 | } |
4644 | |
4645 | static void |
4646 | setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) |
4647 | { |
4648 | alias_set_type set = get_varargs_alias_set (); |
4649 | int i; |
4650 | |
4651 | /* Reset to zero, as there might be a sysv vaarg used |
4652 | before. */ |
4653 | ix86_varargs_gpr_size = 0; |
4654 | ix86_varargs_fpr_size = 0; |
4655 | |
4656 | for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) |
4657 | { |
4658 | rtx reg, mem; |
4659 | |
4660 | mem = gen_rtx_MEM (Pmode, |
4661 | plus_constant (Pmode, virtual_incoming_args_rtx, |
4662 | i * UNITS_PER_WORD)); |
4663 | MEM_NOTRAP_P (mem) = 1; |
4664 | set_mem_alias_set (mem, set); |
4665 | |
4666 | reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); |
4667 | emit_move_insn (mem, reg); |
4668 | } |
4669 | } |
4670 | |
4671 | static void |
4672 | ix86_setup_incoming_varargs (cumulative_args_t cum_v, |
4673 | const function_arg_info &arg, |
4674 | int *, int no_rtl) |
4675 | { |
4676 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
4677 | CUMULATIVE_ARGS next_cum; |
4678 | tree fntype; |
4679 | |
4680 | /* This argument doesn't appear to be used anymore. Which is good, |
4681 | because the old code here didn't suppress rtl generation. */ |
4682 | gcc_assert (!no_rtl); |
4683 | |
4684 | if (!TARGET_64BIT) |
4685 | return; |
4686 | |
4687 | fntype = TREE_TYPE (current_function_decl); |
4688 | |
4689 | /* For varargs, we do not want to skip the dummy va_dcl argument. |
4690 | For stdargs, we do want to skip the last named argument. */ |
4691 | next_cum = *cum; |
4692 | if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) |
4693 | || arg.type != NULL_TREE) |
4694 | && stdarg_p (fntype)) |
4695 | ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg); |
4696 | |
4697 | if (cum->call_abi == MS_ABI) |
4698 | setup_incoming_varargs_ms_64 (&next_cum); |
4699 | else |
4700 | setup_incoming_varargs_64 (&next_cum); |
4701 | } |
4702 | |
4703 | /* Checks if TYPE is of kind va_list char *. */ |
4704 | |
4705 | static bool |
4706 | is_va_list_char_pointer (tree type) |
4707 | { |
4708 | tree canonic; |
4709 | |
4710 | /* For 32-bit it is always true. */ |
4711 | if (!TARGET_64BIT) |
4712 | return true; |
4713 | canonic = ix86_canonical_va_list_type (type); |
4714 | return (canonic == ms_va_list_type_node |
4715 | || (ix86_abi == MS_ABI && canonic == va_list_type_node)); |
4716 | } |
4717 | |
4718 | /* Implement va_start. */ |
4719 | |
4720 | static void |
4721 | ix86_va_start (tree valist, rtx nextarg) |
4722 | { |
4723 | HOST_WIDE_INT words, n_gpr, n_fpr; |
4724 | tree f_gpr, f_fpr, f_ovf, f_sav; |
4725 | tree gpr, fpr, ovf, sav, t; |
4726 | tree type; |
4727 | rtx ovf_rtx; |
4728 | |
4729 | if (flag_split_stack |
4730 | && cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4731 | { |
4732 | unsigned int scratch_regno; |
4733 | |
4734 | /* When we are splitting the stack, we can't refer to the stack |
4735 | arguments using internal_arg_pointer, because they may be on |
4736 | the old stack. The split stack prologue will arrange to |
4737 | leave a pointer to the old stack arguments in a scratch |
4738 | register, which we here copy to a pseudo-register. The split |
4739 | stack prologue can't set the pseudo-register directly because |
4740 | it (the prologue) runs before any registers have been saved. */ |
4741 | |
4742 | scratch_regno = split_stack_prologue_scratch_regno (); |
4743 | if (scratch_regno != INVALID_REGNUM) |
4744 | { |
4745 | rtx reg; |
4746 | rtx_insn *seq; |
4747 | |
4748 | reg = gen_reg_rtx (Pmode); |
4749 | cfun->machine->split_stack_varargs_pointer = reg; |
4750 | |
4751 | start_sequence (); |
4752 | emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); |
4753 | seq = end_sequence (); |
4754 | |
4755 | push_topmost_sequence (); |
4756 | emit_insn_after (seq, entry_of_function ()); |
4757 | pop_topmost_sequence (); |
4758 | } |
4759 | } |
4760 | |
4761 | /* Only 64bit target needs something special. */ |
4762 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
4763 | { |
4764 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4765 | std_expand_builtin_va_start (valist, nextarg); |
4766 | else |
4767 | { |
4768 | rtx va_r, next; |
4769 | |
4770 | va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE); |
4771 | next = expand_binop (ptr_mode, add_optab, |
4772 | cfun->machine->split_stack_varargs_pointer, |
4773 | crtl->args.arg_offset_rtx, |
4774 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4775 | convert_move (va_r, next, 0); |
4776 | } |
4777 | return; |
4778 | } |
4779 | |
4780 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
4781 | f_fpr = DECL_CHAIN (f_gpr); |
4782 | f_ovf = DECL_CHAIN (f_fpr); |
4783 | f_sav = DECL_CHAIN (f_ovf); |
4784 | |
4785 | valist = build_simple_mem_ref (valist); |
4786 | TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); |
4787 | /* The following should be folded into the MEM_REF offset. */ |
4788 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), |
4789 | f_gpr, NULL_TREE); |
4790 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), |
4791 | f_fpr, NULL_TREE); |
4792 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), |
4793 | f_ovf, NULL_TREE); |
4794 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), |
4795 | f_sav, NULL_TREE); |
4796 | |
4797 | /* Count number of gp and fp argument registers used. */ |
4798 | words = crtl->args.info.words; |
4799 | n_gpr = crtl->args.info.regno; |
4800 | n_fpr = crtl->args.info.sse_regno; |
4801 | |
4802 | if (cfun->va_list_gpr_size) |
4803 | { |
4804 | type = TREE_TYPE (gpr); |
4805 | t = build2 (MODIFY_EXPR, type, |
4806 | gpr, build_int_cst (type, n_gpr * 8)); |
4807 | TREE_SIDE_EFFECTS (t) = 1; |
4808 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4809 | } |
4810 | |
4811 | if (TARGET_SSE && cfun->va_list_fpr_size) |
4812 | { |
4813 | type = TREE_TYPE (fpr); |
4814 | t = build2 (MODIFY_EXPR, type, fpr, |
4815 | build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); |
4816 | TREE_SIDE_EFFECTS (t) = 1; |
4817 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4818 | } |
4819 | |
4820 | /* Find the overflow area. */ |
4821 | type = TREE_TYPE (ovf); |
4822 | if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
4823 | ovf_rtx = crtl->args.internal_arg_pointer; |
4824 | else |
4825 | ovf_rtx = cfun->machine->split_stack_varargs_pointer; |
4826 | t = make_tree (type, ovf_rtx); |
4827 | if (words != 0) |
4828 | t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); |
4829 | |
4830 | t = build2 (MODIFY_EXPR, type, ovf, t); |
4831 | TREE_SIDE_EFFECTS (t) = 1; |
4832 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4833 | |
4834 | if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) |
4835 | { |
4836 | /* Find the register save area. |
4837 | Prologue of the function save it right above stack frame. */ |
4838 | type = TREE_TYPE (sav); |
4839 | t = make_tree (type, frame_pointer_rtx); |
4840 | if (!ix86_varargs_gpr_size) |
4841 | t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); |
4842 | |
4843 | t = build2 (MODIFY_EXPR, type, sav, t); |
4844 | TREE_SIDE_EFFECTS (t) = 1; |
4845 | expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL); |
4846 | } |
4847 | } |
4848 | |
4849 | /* Implement va_arg. */ |
4850 | |
4851 | static tree |
4852 | ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
4853 | gimple_seq *post_p) |
4854 | { |
4855 | static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; |
4856 | tree f_gpr, f_fpr, f_ovf, f_sav; |
4857 | tree gpr, fpr, ovf, sav, t; |
4858 | int size, rsize; |
4859 | tree lab_false, lab_over = NULL_TREE; |
4860 | tree addr, t2; |
4861 | rtx container; |
4862 | int indirect_p = 0; |
4863 | tree ptrtype; |
4864 | machine_mode nat_mode; |
4865 | unsigned int arg_boundary; |
4866 | unsigned int type_align; |
4867 | |
4868 | /* Only 64bit target needs something special. */ |
4869 | if (is_va_list_char_pointer (TREE_TYPE (valist))) |
4870 | return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
4871 | |
4872 | f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
4873 | f_fpr = DECL_CHAIN (f_gpr); |
4874 | f_ovf = DECL_CHAIN (f_fpr); |
4875 | f_sav = DECL_CHAIN (f_ovf); |
4876 | |
4877 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), |
4878 | valist, f_gpr, NULL_TREE); |
4879 | |
4880 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); |
4881 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); |
4882 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); |
4883 | |
4884 | indirect_p = pass_va_arg_by_reference (type); |
4885 | if (indirect_p) |
4886 | type = build_pointer_type (type); |
4887 | size = arg_int_size_in_bytes (type); |
4888 | rsize = CEIL (size, UNITS_PER_WORD); |
4889 | |
4890 | nat_mode = type_natural_mode (type, NULL, in_return: false); |
4891 | switch (nat_mode) |
4892 | { |
4893 | case E_V16HFmode: |
4894 | case E_V16BFmode: |
4895 | case E_V8SFmode: |
4896 | case E_V8SImode: |
4897 | case E_V32QImode: |
4898 | case E_V16HImode: |
4899 | case E_V4DFmode: |
4900 | case E_V4DImode: |
4901 | case E_V32HFmode: |
4902 | case E_V32BFmode: |
4903 | case E_V16SFmode: |
4904 | case E_V16SImode: |
4905 | case E_V64QImode: |
4906 | case E_V32HImode: |
4907 | case E_V8DFmode: |
4908 | case E_V8DImode: |
4909 | /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
4910 | if (!TARGET_64BIT_MS_ABI) |
4911 | { |
4912 | container = NULL; |
4913 | break; |
4914 | } |
4915 | /* FALLTHRU */ |
4916 | |
4917 | default: |
4918 | container = construct_container (mode: nat_mode, TYPE_MODE (type), |
4919 | type, in_return: 0, X86_64_REGPARM_MAX, |
4920 | X86_64_SSE_REGPARM_MAX, intreg, |
4921 | sse_regno: 0); |
4922 | break; |
4923 | } |
4924 | |
4925 | /* Pull the value out of the saved registers. */ |
4926 | |
4927 | addr = create_tmp_var (ptr_type_node, "addr"); |
4928 | type_align = TYPE_ALIGN (type); |
4929 | |
4930 | if (container) |
4931 | { |
4932 | int needed_intregs, needed_sseregs; |
4933 | bool need_temp; |
4934 | tree int_addr, sse_addr; |
4935 | |
4936 | lab_false = create_artificial_label (UNKNOWN_LOCATION); |
4937 | lab_over = create_artificial_label (UNKNOWN_LOCATION); |
4938 | |
4939 | examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs); |
4940 | |
4941 | bool container_in_reg = false; |
4942 | if (REG_P (container)) |
4943 | container_in_reg = true; |
4944 | else if (GET_CODE (container) == PARALLEL |
4945 | && GET_MODE (container) == BLKmode |
4946 | && XVECLEN (container, 0) == 1) |
4947 | { |
4948 | /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST |
4949 | expression in a TImode register. In this case, temp isn't |
4950 | needed. Otherwise, the TImode variable will be put in the |
4951 | GPR save area which guarantees only 8-byte alignment. */ |
4952 | rtx x = XVECEXP (container, 0, 0); |
4953 | if (GET_CODE (x) == EXPR_LIST |
4954 | && REG_P (XEXP (x, 0)) |
4955 | && XEXP (x, 1) == const0_rtx) |
4956 | container_in_reg = true; |
4957 | } |
4958 | |
4959 | need_temp = (!container_in_reg |
4960 | && ((needed_intregs && TYPE_ALIGN (type) > 64) |
4961 | || TYPE_ALIGN (type) > 128)); |
4962 | |
4963 | /* In case we are passing structure, verify that it is consecutive block |
4964 | on the register save area. If not we need to do moves. */ |
4965 | if (!need_temp && !container_in_reg) |
4966 | { |
4967 | /* Verify that all registers are strictly consecutive */ |
4968 | if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) |
4969 | { |
4970 | int i; |
4971 | |
4972 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
4973 | { |
4974 | rtx slot = XVECEXP (container, 0, i); |
4975 | if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i |
4976 | || INTVAL (XEXP (slot, 1)) != i * 16) |
4977 | need_temp = true; |
4978 | } |
4979 | } |
4980 | else |
4981 | { |
4982 | int i; |
4983 | |
4984 | for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
4985 | { |
4986 | rtx slot = XVECEXP (container, 0, i); |
4987 | if (REGNO (XEXP (slot, 0)) != (unsigned int) i |
4988 | || INTVAL (XEXP (slot, 1)) != i * 8) |
4989 | need_temp = true; |
4990 | } |
4991 | } |
4992 | } |
4993 | if (!need_temp) |
4994 | { |
4995 | int_addr = addr; |
4996 | sse_addr = addr; |
4997 | } |
4998 | else |
4999 | { |
5000 | int_addr = create_tmp_var (ptr_type_node, "int_addr"); |
5001 | sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); |
5002 | } |
5003 | |
5004 | /* First ensure that we fit completely in registers. */ |
5005 | if (needed_intregs) |
5006 | { |
5007 | t = build_int_cst (TREE_TYPE (gpr), |
5008 | (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); |
5009 | t = build2 (GE_EXPR, boolean_type_node, gpr, t); |
5010 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
5011 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
5012 | gimplify_and_add (t, pre_p); |
5013 | } |
5014 | if (needed_sseregs) |
5015 | { |
5016 | t = build_int_cst (TREE_TYPE (fpr), |
5017 | (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 |
5018 | + X86_64_REGPARM_MAX * 8); |
5019 | t = build2 (GE_EXPR, boolean_type_node, fpr, t); |
5020 | t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
5021 | t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
5022 | gimplify_and_add (t, pre_p); |
5023 | } |
5024 | |
5025 | /* Compute index to start of area used for integer regs. */ |
5026 | if (needed_intregs) |
5027 | { |
5028 | /* int_addr = gpr + sav; */ |
5029 | t = fold_build_pointer_plus (sav, gpr); |
5030 | gimplify_assign (int_addr, t, pre_p); |
5031 | } |
5032 | if (needed_sseregs) |
5033 | { |
5034 | /* sse_addr = fpr + sav; */ |
5035 | t = fold_build_pointer_plus (sav, fpr); |
5036 | gimplify_assign (sse_addr, t, pre_p); |
5037 | } |
5038 | if (need_temp) |
5039 | { |
5040 | int i, prev_size = 0; |
5041 | tree temp = create_tmp_var (type, "va_arg_tmp"); |
5042 | TREE_ADDRESSABLE (temp) = 1; |
5043 | |
5044 | /* addr = &temp; */ |
5045 | t = build1 (ADDR_EXPR, build_pointer_type (type), temp); |
5046 | gimplify_assign (addr, t, pre_p); |
5047 | |
5048 | for (i = 0; i < XVECLEN (container, 0); i++) |
5049 | { |
5050 | rtx slot = XVECEXP (container, 0, i); |
5051 | rtx reg = XEXP (slot, 0); |
5052 | machine_mode mode = GET_MODE (reg); |
5053 | tree piece_type; |
5054 | tree addr_type; |
5055 | tree daddr_type; |
5056 | tree src_addr, src; |
5057 | int src_offset; |
5058 | tree dest_addr, dest; |
5059 | int cur_size = GET_MODE_SIZE (mode); |
5060 | |
5061 | gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); |
5062 | prev_size = INTVAL (XEXP (slot, 1)); |
5063 | if (prev_size + cur_size > size) |
5064 | { |
5065 | cur_size = size - prev_size; |
5066 | unsigned int nbits = cur_size * BITS_PER_UNIT; |
5067 | if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode)) |
5068 | mode = QImode; |
5069 | } |
5070 | piece_type = lang_hooks.types.type_for_mode (mode, 1); |
5071 | if (mode == GET_MODE (reg)) |
5072 | addr_type = build_pointer_type (piece_type); |
5073 | else |
5074 | addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
5075 | true); |
5076 | daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
5077 | true); |
5078 | |
5079 | if (SSE_REGNO_P (REGNO (reg))) |
5080 | { |
5081 | src_addr = sse_addr; |
5082 | src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; |
5083 | } |
5084 | else |
5085 | { |
5086 | src_addr = int_addr; |
5087 | src_offset = REGNO (reg) * 8; |
5088 | } |
5089 | src_addr = fold_convert (addr_type, src_addr); |
5090 | src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); |
5091 | |
5092 | dest_addr = fold_convert (daddr_type, addr); |
5093 | dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); |
5094 | if (cur_size == GET_MODE_SIZE (mode)) |
5095 | { |
5096 | src = build_va_arg_indirect_ref (src_addr); |
5097 | dest = build_va_arg_indirect_ref (dest_addr); |
5098 | |
5099 | gimplify_assign (dest, src, pre_p); |
5100 | } |
5101 | else |
5102 | { |
5103 | tree copy |
5104 | = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY), |
5105 | 3, dest_addr, src_addr, |
5106 | size_int (cur_size)); |
5107 | gimplify_and_add (copy, pre_p); |
5108 | } |
5109 | prev_size += cur_size; |
5110 | } |
5111 | } |
5112 | |
5113 | if (needed_intregs) |
5114 | { |
5115 | t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, |
5116 | build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); |
5117 | gimplify_assign (gpr, t, pre_p); |
5118 | /* The GPR save area guarantees only 8-byte alignment. */ |
5119 | if (!need_temp) |
5120 | type_align = MIN (type_align, 64); |
5121 | } |
5122 | |
5123 | if (needed_sseregs) |
5124 | { |
5125 | t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, |
5126 | build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); |
5127 | gimplify_assign (unshare_expr (fpr), t, pre_p); |
5128 | } |
5129 | |
5130 | gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over)); |
5131 | |
5132 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false)); |
5133 | } |
5134 | |
5135 | /* ... otherwise out of the overflow area. */ |
5136 | |
5137 | /* When we align parameter on stack for caller, if the parameter |
5138 | alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be |
5139 | aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee |
5140 | here with caller. */ |
5141 | arg_boundary = ix86_function_arg_boundary (VOIDmode, type); |
5142 | if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) |
5143 | arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; |
5144 | |
5145 | /* Care for on-stack alignment if needed. */ |
5146 | if (arg_boundary <= 64 || size == 0) |
5147 | t = ovf; |
5148 | else |
5149 | { |
5150 | HOST_WIDE_INT align = arg_boundary / 8; |
5151 | t = fold_build_pointer_plus_hwi (ovf, align - 1); |
5152 | t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, |
5153 | build_int_cst (TREE_TYPE (t), -align)); |
5154 | } |
5155 | |
5156 | gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); |
5157 | gimplify_assign (addr, t, pre_p); |
5158 | |
5159 | t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); |
5160 | gimplify_assign (unshare_expr (ovf), t, pre_p); |
5161 | |
5162 | if (container) |
5163 | gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over)); |
5164 | |
5165 | type = build_aligned_type (type, type_align); |
5166 | ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); |
5167 | addr = fold_convert (ptrtype, addr); |
5168 | |
5169 | if (indirect_p) |
5170 | addr = build_va_arg_indirect_ref (addr); |
5171 | return build_va_arg_indirect_ref (addr); |
5172 | } |
5173 | |
5174 | /* Return true if OPNUM's MEM should be matched |
5175 | in movabs* patterns. */ |
5176 | |
5177 | bool |
5178 | ix86_check_movabs (rtx insn, int opnum) |
5179 | { |
5180 | rtx set, mem; |
5181 | |
5182 | set = PATTERN (insn); |
5183 | if (GET_CODE (set) == PARALLEL) |
5184 | set = XVECEXP (set, 0, 0); |
5185 | gcc_assert (GET_CODE (set) == SET); |
5186 | mem = XEXP (set, opnum); |
5187 | while (SUBREG_P (mem)) |
5188 | mem = SUBREG_REG (mem); |
5189 | gcc_assert (MEM_P (mem)); |
5190 | return volatile_ok || !MEM_VOLATILE_P (mem); |
5191 | } |
5192 | |
5193 | /* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */ |
5194 | bool |
5195 | ix86_check_movs (rtx insn, int idx) |
5196 | { |
5197 | rtx pat = PATTERN (insn); |
5198 | gcc_assert (GET_CODE (pat) == PARALLEL); |
5199 | |
5200 | rtx set = XVECEXP (pat, 0, idx); |
5201 | gcc_assert (GET_CODE (set) == SET); |
5202 | |
5203 | rtx dst = SET_DEST (set); |
5204 | gcc_assert (MEM_P (dst)); |
5205 | |
5206 | rtx src = SET_SRC (set); |
5207 | gcc_assert (MEM_P (src)); |
5208 | |
5209 | return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)) |
5210 | && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)) |
5211 | || Pmode == word_mode)); |
5212 | } |
5213 | |
5214 | /* Return false if INSN contains a MEM with a non-default address space. */ |
5215 | bool |
5216 | ix86_check_no_addr_space (rtx insn) |
5217 | { |
5218 | subrtx_var_iterator::array_type array; |
5219 | FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) |
5220 | { |
5221 | rtx x = *iter; |
5222 | if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) |
5223 | return false; |
5224 | } |
5225 | return true; |
5226 | } |
5227 | |
5228 | /* Initialize the table of extra 80387 mathematical constants. */ |
5229 | |
5230 | static void |
5231 | init_ext_80387_constants (void) |
5232 | { |
5233 | static const char * cst[5] = |
5234 | { |
5235 | "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ |
5236 | "0.6931471805599453094286904741849753009", /* 1: fldln2 */ |
5237 | "1.4426950408889634073876517827983434472", /* 2: fldl2e */ |
5238 | "3.3219280948873623478083405569094566090", /* 3: fldl2t */ |
5239 | "3.1415926535897932385128089594061862044", /* 4: fldpi */ |
5240 | }; |
5241 | int i; |
5242 | |
5243 | for (i = 0; i < 5; i++) |
5244 | { |
5245 | real_from_string (&ext_80387_constants_table[i], cst[i]); |
5246 | /* Ensure each constant is rounded to XFmode precision. */ |
5247 | real_convert (&ext_80387_constants_table[i], |
5248 | XFmode, &ext_80387_constants_table[i]); |
5249 | } |
5250 | |
5251 | ext_80387_constants_init = 1; |
5252 | } |
5253 | |
5254 | /* Return non-zero if the constant is something that |
5255 | can be loaded with a special instruction. */ |
5256 | |
5257 | int |
5258 | standard_80387_constant_p (rtx x) |
5259 | { |
5260 | machine_mode mode = GET_MODE (x); |
5261 | |
5262 | const REAL_VALUE_TYPE *r; |
5263 | |
5264 | if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) |
5265 | return -1; |
5266 | |
5267 | if (x == CONST0_RTX (mode)) |
5268 | return 1; |
5269 | if (x == CONST1_RTX (mode)) |
5270 | return 2; |
5271 | |
5272 | r = CONST_DOUBLE_REAL_VALUE (x); |
5273 | |
5274 | /* For XFmode constants, try to find a special 80387 instruction when |
5275 | optimizing for size or on those CPUs that benefit from them. */ |
5276 | if (mode == XFmode |
5277 | && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS) |
5278 | && !flag_rounding_math) |
5279 | { |
5280 | int i; |
5281 | |
5282 | if (! ext_80387_constants_init) |
5283 | init_ext_80387_constants (); |
5284 | |
5285 | for (i = 0; i < 5; i++) |
5286 | if (real_identical (r, &ext_80387_constants_table[i])) |
5287 | return i + 3; |
5288 | } |
5289 | |
5290 | /* Load of the constant -0.0 or -1.0 will be split as |
5291 | fldz;fchs or fld1;fchs sequence. */ |
5292 | if (real_isnegzero (r)) |
5293 | return 8; |
5294 | if (real_identical (r, &dconstm1)) |
5295 | return 9; |
5296 | |
5297 | return 0; |
5298 | } |
5299 | |
5300 | /* Return the opcode of the special instruction to be used to load |
5301 | the constant X. */ |
5302 | |
5303 | const char * |
5304 | standard_80387_constant_opcode (rtx x) |
5305 | { |
5306 | switch (standard_80387_constant_p (x)) |
5307 | { |
5308 | case 1: |
5309 | return "fldz"; |
5310 | case 2: |
5311 | return "fld1"; |
5312 | case 3: |
5313 | return "fldlg2"; |
5314 | case 4: |
5315 | return "fldln2"; |
5316 | case 5: |
5317 | return "fldl2e"; |
5318 | case 6: |
5319 | return "fldl2t"; |
5320 | case 7: |
5321 | return "fldpi"; |
5322 | case 8: |
5323 | case 9: |
5324 | return "#"; |
5325 | default: |
5326 | gcc_unreachable (); |
5327 | } |
5328 | } |
5329 | |
5330 | /* Return the CONST_DOUBLE representing the 80387 constant that is |
5331 | loaded by the specified special instruction. The argument IDX |
5332 | matches the return value from standard_80387_constant_p. */ |
5333 | |
5334 | rtx |
5335 | standard_80387_constant_rtx (int idx) |
5336 | { |
5337 | int i; |
5338 | |
5339 | if (! ext_80387_constants_init) |
5340 | init_ext_80387_constants (); |
5341 | |
5342 | switch (idx) |
5343 | { |
5344 | case 3: |
5345 | case 4: |
5346 | case 5: |
5347 | case 6: |
5348 | case 7: |
5349 | i = idx - 3; |
5350 | break; |
5351 | |
5352 | default: |
5353 | gcc_unreachable (); |
5354 | } |
5355 | |
5356 | return const_double_from_real_value (ext_80387_constants_table[i], |
5357 | XFmode); |
5358 | } |
5359 | |
5360 | /* Return 1 if X is all bits 0, 2 if X is all bits 1 |
5361 | and 3 if X is all bits 1 with zero extend |
5362 | in supported SSE/AVX vector mode. */ |
5363 | |
5364 | int |
5365 | standard_sse_constant_p (rtx x, machine_mode pred_mode) |
5366 | { |
5367 | machine_mode mode; |
5368 | |
5369 | if (!TARGET_SSE) |
5370 | return 0; |
5371 | |
5372 | mode = GET_MODE (x); |
5373 | |
5374 | if (x == const0_rtx || const0_operand (x, mode)) |
5375 | return 1; |
5376 | |
5377 | if (x == constm1_rtx |
5378 | || vector_all_ones_operand (x, mode) |
5379 | || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
5380 | || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT) |
5381 | && float_vector_all_ones_operand (x, mode))) |
5382 | { |
5383 | /* VOIDmode integer constant, get mode from the predicate. */ |
5384 | if (mode == VOIDmode) |
5385 | mode = pred_mode; |
5386 | |
5387 | switch (GET_MODE_SIZE (mode)) |
5388 | { |
5389 | case 64: |
5390 | if (TARGET_AVX512F) |
5391 | return 2; |
5392 | break; |
5393 | case 32: |
5394 | if (TARGET_AVX2) |
5395 | return 2; |
5396 | break; |
5397 | case 16: |
5398 | if (TARGET_SSE2) |
5399 | return 2; |
5400 | break; |
5401 | case 0: |
5402 | /* VOIDmode */ |
5403 | gcc_unreachable (); |
5404 | default: |
5405 | break; |
5406 | } |
5407 | } |
5408 | |
5409 | if (vector_all_ones_zero_extend_half_operand (x, mode) |
5410 | || vector_all_ones_zero_extend_quarter_operand (x, mode)) |
5411 | return 3; |
5412 | |
5413 | return 0; |
5414 | } |
5415 | |
5416 | /* Return the opcode of the special instruction to be used to load |
5417 | the constant operands[1] into operands[0]. */ |
5418 | |
5419 | const char * |
5420 | standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) |
5421 | { |
5422 | machine_mode mode; |
5423 | rtx x = operands[1]; |
5424 | |
5425 | gcc_assert (TARGET_SSE); |
5426 | |
5427 | mode = GET_MODE (x); |
5428 | |
5429 | if (x == const0_rtx || const0_operand (x, mode)) |
5430 | { |
5431 | switch (get_attr_mode (insn)) |
5432 | { |
5433 | case MODE_TI: |
5434 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5435 | return "%vpxor\t%0, %d0"; |
5436 | /* FALLTHRU */ |
5437 | case MODE_XI: |
5438 | case MODE_OI: |
5439 | if (EXT_REX_SSE_REG_P (operands[0])) |
5440 | { |
5441 | if (TARGET_AVX512VL) |
5442 | return "vpxord\t%x0, %x0, %x0"; |
5443 | else |
5444 | return "vpxord\t%g0, %g0, %g0"; |
5445 | } |
5446 | return "vpxor\t%x0, %x0, %x0"; |
5447 | |
5448 | case MODE_V2DF: |
5449 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5450 | return "%vxorpd\t%0, %d0"; |
5451 | /* FALLTHRU */ |
5452 | case MODE_V8DF: |
5453 | case MODE_V4DF: |
5454 | if (EXT_REX_SSE_REG_P (operands[0])) |
5455 | { |
5456 | if (TARGET_AVX512DQ) |
5457 | { |
5458 | if (TARGET_AVX512VL) |
5459 | return "vxorpd\t%x0, %x0, %x0"; |
5460 | else |
5461 | return "vxorpd\t%g0, %g0, %g0"; |
5462 | } |
5463 | else |
5464 | { |
5465 | if (TARGET_AVX512VL) |
5466 | return "vpxorq\t%x0, %x0, %x0"; |
5467 | else |
5468 | return "vpxorq\t%g0, %g0, %g0"; |
5469 | } |
5470 | } |
5471 | return "vxorpd\t%x0, %x0, %x0"; |
5472 | |
5473 | case MODE_V4SF: |
5474 | if (!EXT_REX_SSE_REG_P (operands[0])) |
5475 | return "%vxorps\t%0, %d0"; |
5476 | /* FALLTHRU */ |
5477 | case MODE_V16SF: |
5478 | case MODE_V8SF: |
5479 | if (EXT_REX_SSE_REG_P (operands[0])) |
5480 | { |
5481 | if (TARGET_AVX512DQ) |
5482 | { |
5483 | if (TARGET_AVX512VL) |
5484 | return "vxorps\t%x0, %x0, %x0"; |
5485 | else |
5486 | return "vxorps\t%g0, %g0, %g0"; |
5487 | } |
5488 | else |
5489 | { |
5490 | if (TARGET_AVX512VL) |
5491 | return "vpxord\t%x0, %x0, %x0"; |
5492 | else |
5493 | return "vpxord\t%g0, %g0, %g0"; |
5494 | } |
5495 | } |
5496 | return "vxorps\t%x0, %x0, %x0"; |
5497 | |
5498 | default: |
5499 | gcc_unreachable (); |
5500 | } |
5501 | } |
5502 | else if (x == constm1_rtx |
5503 | || vector_all_ones_operand (x, mode) |
5504 | || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
5505 | && float_vector_all_ones_operand (x, mode))) |
5506 | { |
5507 | enum attr_mode insn_mode = get_attr_mode (insn); |
5508 | |
5509 | switch (insn_mode) |
5510 | { |
5511 | case MODE_XI: |
5512 | case MODE_V8DF: |
5513 | case MODE_V16SF: |
5514 | gcc_assert (TARGET_AVX512F); |
5515 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; |
5516 | |
5517 | case MODE_OI: |
5518 | case MODE_V4DF: |
5519 | case MODE_V8SF: |
5520 | gcc_assert (TARGET_AVX2); |
5521 | /* FALLTHRU */ |
5522 | case MODE_TI: |
5523 | case MODE_V2DF: |
5524 | case MODE_V4SF: |
5525 | gcc_assert (TARGET_SSE2); |
5526 | if (EXT_REX_SSE_REG_P (operands[0])) |
5527 | { |
5528 | if (TARGET_AVX512VL) |
5529 | return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; |
5530 | else |
5531 | return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; |
5532 | } |
5533 | return (TARGET_AVX |
5534 | ? "vpcmpeqd\t%0, %0, %0" |
5535 | : "pcmpeqd\t%0, %0"); |
5536 | |
5537 | default: |
5538 | gcc_unreachable (); |
5539 | } |
5540 | } |
5541 | else if (vector_all_ones_zero_extend_half_operand (x, mode)) |
5542 | { |
5543 | if (GET_MODE_SIZE (mode) == 64) |
5544 | { |
5545 | gcc_assert (TARGET_AVX512F); |
5546 | return "vpcmpeqd\t%t0, %t0, %t0"; |
5547 | } |
5548 | else if (GET_MODE_SIZE (mode) == 32) |
5549 | { |
5550 | gcc_assert (TARGET_AVX); |
5551 | return "vpcmpeqd\t%x0, %x0, %x0"; |
5552 | } |
5553 | gcc_unreachable (); |
5554 | } |
5555 | else if (vector_all_ones_zero_extend_quarter_operand (x, mode)) |
5556 | { |
5557 | gcc_assert (TARGET_AVX512F); |
5558 | return "vpcmpeqd\t%x0, %x0, %x0"; |
5559 | } |
5560 | |
5561 | gcc_unreachable (); |
5562 | } |
5563 | |
5564 | /* Returns true if INSN can be transformed from a memory load |
5565 | to a supported FP constant load. */ |
5566 | |
5567 | bool |
5568 | ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) |
5569 | { |
5570 | rtx src = find_constant_src (insn); |
5571 | |
5572 | gcc_assert (REG_P (dst)); |
5573 | |
5574 | if (src == NULL |
5575 | || (SSE_REGNO_P (REGNO (dst)) |
5576 | && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1) |
5577 | || (!TARGET_AVX512VL |
5578 | && EXT_REX_SSE_REGNO_P (REGNO (dst)) |
5579 | && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1) |
5580 | || (STACK_REGNO_P (REGNO (dst)) |
5581 | && standard_80387_constant_p (x: src) < 1)) |
5582 | return false; |
5583 | |
5584 | return true; |
5585 | } |
5586 | |
5587 | /* Predicate for pre-reload splitters with associated instructions, |
5588 | which can match any time before the split1 pass (usually combine), |
5589 | then are unconditionally split in that pass and should not be |
5590 | matched again afterwards. */ |
5591 | |
5592 | bool |
5593 | ix86_pre_reload_split (void) |
5594 | { |
5595 | return (can_create_pseudo_p () |
5596 | && !(cfun->curr_properties & PROP_rtl_split_insns)); |
5597 | } |
5598 | |
5599 | /* Return the opcode of the TYPE_SSEMOV instruction. To move from |
5600 | or to xmm16-xmm31/ymm16-ymm31 registers, we either require |
5601 | TARGET_AVX512VL or it is a register to register move which can |
5602 | be done with zmm register move. */ |
5603 | |
5604 | static const char * |
5605 | ix86_get_ssemov (rtx *operands, unsigned size, |
5606 | enum attr_mode insn_mode, machine_mode mode) |
5607 | { |
5608 | char buf[128]; |
5609 | bool misaligned_p = (misaligned_operand (operands[0], mode) |
5610 | || misaligned_operand (operands[1], mode)); |
5611 | bool evex_reg_p = (size == 64 |
5612 | || EXT_REX_SSE_REG_P (operands[0]) |
5613 | || EXT_REX_SSE_REG_P (operands[1])); |
5614 | |
5615 | bool egpr_p = (TARGET_APX_EGPR |
5616 | && (x86_extended_rex2reg_mentioned_p (operands[0]) |
5617 | || x86_extended_rex2reg_mentioned_p (operands[1]))); |
5618 | bool egpr_vl = egpr_p && TARGET_AVX512VL; |
5619 | |
5620 | machine_mode scalar_mode; |
5621 | |
5622 | const char *opcode = NULL; |
5623 | enum |
5624 | { |
5625 | opcode_int, |
5626 | opcode_float, |
5627 | opcode_double |
5628 | } type = opcode_int; |
5629 | |
5630 | switch (insn_mode) |
5631 | { |
5632 | case MODE_V16SF: |
5633 | case MODE_V8SF: |
5634 | case MODE_V4SF: |
5635 | scalar_mode = E_SFmode; |
5636 | type = opcode_float; |
5637 | break; |
5638 | case MODE_V8DF: |
5639 | case MODE_V4DF: |
5640 | case MODE_V2DF: |
5641 | scalar_mode = E_DFmode; |
5642 | type = opcode_double; |
5643 | break; |
5644 | case MODE_XI: |
5645 | case MODE_OI: |
5646 | case MODE_TI: |
5647 | scalar_mode = GET_MODE_INNER (mode); |
5648 | break; |
5649 | default: |
5650 | gcc_unreachable (); |
5651 | } |
5652 | |
5653 | /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL, |
5654 | we can only use zmm register move without memory operand. */ |
5655 | if (evex_reg_p |
5656 | && !TARGET_AVX512VL |
5657 | && GET_MODE_SIZE (mode) < 64) |
5658 | { |
5659 | /* NB: Even though ix86_hard_regno_mode_ok doesn't allow |
5660 | xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when |
5661 | AVX512VL is disabled, LRA can still generate reg to |
5662 | reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit |
5663 | modes. */ |
5664 | if (memory_operand (operands[0], mode) |
5665 | || memory_operand (operands[1], mode)) |
5666 | gcc_unreachable (); |
5667 | size = 64; |
5668 | switch (type) |
5669 | { |
5670 | case opcode_int: |
5671 | if (scalar_mode == E_HFmode || scalar_mode == E_BFmode) |
5672 | opcode = (misaligned_p |
5673 | ? (TARGET_AVX512BW ? "vmovdqu16": "vmovdqu64") |
5674 | : "vmovdqa64"); |
5675 | else |
5676 | opcode = misaligned_p ? "vmovdqu32": "vmovdqa32"; |
5677 | break; |
5678 | case opcode_float: |
5679 | opcode = misaligned_p ? "vmovups": "vmovaps"; |
5680 | break; |
5681 | case opcode_double: |
5682 | opcode = misaligned_p ? "vmovupd": "vmovapd"; |
5683 | break; |
5684 | } |
5685 | } |
5686 | else if (SCALAR_FLOAT_MODE_P (scalar_mode)) |
5687 | { |
5688 | switch (scalar_mode) |
5689 | { |
5690 | case E_HFmode: |
5691 | case E_BFmode: |
5692 | if (evex_reg_p || egpr_vl) |
5693 | opcode = (misaligned_p |
5694 | ? (TARGET_AVX512BW |
5695 | ? "vmovdqu16" |
5696 | : "vmovdqu64") |
5697 | : "vmovdqa64"); |
5698 | else if (egpr_p) |
5699 | opcode = (misaligned_p |
5700 | ? (TARGET_AVX512BW |
5701 | ? "vmovdqu16" |
5702 | : "%vmovups") |
5703 | : "%vmovaps"); |
5704 | else |
5705 | opcode = (misaligned_p |
5706 | ? (TARGET_AVX512BW |
5707 | ? "vmovdqu16" |
5708 | : "%vmovdqu") |
5709 | : "%vmovdqa"); |
5710 | break; |
5711 | case E_SFmode: |
5712 | opcode = misaligned_p ? "%vmovups": "%vmovaps"; |
5713 | break; |
5714 | case E_DFmode: |
5715 | opcode = misaligned_p ? "%vmovupd": "%vmovapd"; |
5716 | break; |
5717 | case E_TFmode: |
5718 | if (evex_reg_p || egpr_vl) |
5719 | opcode = misaligned_p ? "vmovdqu64": "vmovdqa64"; |
5720 | else if (egpr_p) |
5721 | opcode = misaligned_p ? "%vmovups": "%vmovaps"; |
5722 | else |
5723 | opcode = misaligned_p ? "%vmovdqu": "%vmovdqa"; |
5724 | break; |
5725 | default: |
5726 | gcc_unreachable (); |
5727 | } |
5728 | } |
5729 | else if (SCALAR_INT_MODE_P (scalar_mode)) |
5730 | { |
5731 | switch (scalar_mode) |
5732 | { |
5733 | case E_QImode: |
5734 | if (evex_reg_p || egpr_vl) |
5735 | opcode = (misaligned_p |
5736 | ? (TARGET_AVX512BW |
5737 | ? "vmovdqu8" |
5738 | : "vmovdqu64") |
5739 | : "vmovdqa64"); |
5740 | else if (egpr_p) |
5741 | opcode = (misaligned_p |
5742 | ? (TARGET_AVX512BW |
5743 | ? "vmovdqu8" |
5744 | : "%vmovups") |
5745 | : "%vmovaps"); |
5746 | else |
5747 | opcode = (misaligned_p |
5748 | ? (TARGET_AVX512BW |
5749 | ? "vmovdqu8" |
5750 | : "%vmovdqu") |
5751 | : "%vmovdqa"); |
5752 | break; |
5753 | case E_HImode: |
5754 | if (evex_reg_p || egpr_vl) |
5755 | opcode = (misaligned_p |
5756 | ? (TARGET_AVX512BW |
5757 | ? "vmovdqu16" |
5758 | : "vmovdqu64") |
5759 | : "vmovdqa64"); |
5760 | else if (egpr_p) |
5761 | opcode = (misaligned_p |
5762 | ? (TARGET_AVX512BW |
5763 | ? "vmovdqu16" |
5764 | : "%vmovups") |
5765 | : "%vmovaps"); |
5766 | else |
5767 | opcode = (misaligned_p |
5768 | ? (TARGET_AVX512BW |
5769 | ? "vmovdqu16" |
5770 | : "%vmovdqu") |
5771 | : "%vmovdqa"); |
5772 | break; |
5773 | case E_SImode: |
5774 | if (evex_reg_p || egpr_vl) |
5775 | opcode = misaligned_p ? "vmovdqu32": "vmovdqa32"; |
5776 | else if (egpr_p) |
5777 | opcode = misaligned_p ? "%vmovups": "%vmovaps"; |
5778 | else |
5779 | opcode = misaligned_p ? "%vmovdqu": "%vmovdqa"; |
5780 | break; |
5781 | case E_DImode: |
5782 | case E_TImode: |
5783 | case E_OImode: |
5784 | if (evex_reg_p || egpr_vl) |
5785 | opcode = misaligned_p ? "vmovdqu64": "vmovdqa64"; |
5786 | else if (egpr_p) |
5787 | opcode = misaligned_p ? "%vmovups": "%vmovaps"; |
5788 | else |
5789 | opcode = misaligned_p ? "%vmovdqu": "%vmovdqa"; |
5790 | break; |
5791 | case E_XImode: |
5792 | opcode = misaligned_p ? "vmovdqu64": "vmovdqa64"; |
5793 | break; |
5794 | default: |
5795 | gcc_unreachable (); |
5796 | } |
5797 | } |
5798 | else |
5799 | gcc_unreachable (); |
5800 | |
5801 | switch (size) |
5802 | { |
5803 | case 64: |
5804 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}", |
5805 | opcode); |
5806 | break; |
5807 | case 32: |
5808 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}", |
5809 | opcode); |
5810 | break; |
5811 | case 16: |
5812 | snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}", |
5813 | opcode); |
5814 | break; |
5815 | default: |
5816 | gcc_unreachable (); |
5817 | } |
5818 | output_asm_insn (buf, operands); |
5819 | return ""; |
5820 | } |
5821 | |
5822 | /* Return the template of the TYPE_SSEMOV instruction to move |
5823 | operands[1] into operands[0]. */ |
5824 | |
5825 | const char * |
5826 | ix86_output_ssemov (rtx_insn *insn, rtx *operands) |
5827 | { |
5828 | machine_mode mode = GET_MODE (operands[0]); |
5829 | if (get_attr_type (insn) != TYPE_SSEMOV |
5830 | || mode != GET_MODE (operands[1])) |
5831 | gcc_unreachable (); |
5832 | |
5833 | enum attr_mode insn_mode = get_attr_mode (insn); |
5834 | |
5835 | switch (insn_mode) |
5836 | { |
5837 | case MODE_XI: |
5838 | case MODE_V8DF: |
5839 | case MODE_V16SF: |
5840 | return ix86_get_ssemov (operands, size: 64, insn_mode, mode); |
5841 | |
5842 | case MODE_OI: |
5843 | case MODE_V4DF: |
5844 | case MODE_V8SF: |
5845 | return ix86_get_ssemov (operands, size: 32, insn_mode, mode); |
5846 | |
5847 | case MODE_TI: |
5848 | case MODE_V2DF: |
5849 | case MODE_V4SF: |
5850 | return ix86_get_ssemov (operands, size: 16, insn_mode, mode); |
5851 | |
5852 | case MODE_DI: |
5853 | /* Handle broken assemblers that require movd instead of movq. */ |
5854 | if (GENERAL_REG_P (operands[0])) |
5855 | { |
5856 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
5857 | return "%vmovq\t{%1, %q0|%q0, %1}"; |
5858 | else |
5859 | return "%vmovd\t{%1, %q0|%q0, %1}"; |
5860 | } |
5861 | else if (GENERAL_REG_P (operands[1])) |
5862 | { |
5863 | if (HAVE_AS_IX86_INTERUNIT_MOVQ) |
5864 | return "%vmovq\t{%q1, %0|%0, %q1}"; |
5865 | else |
5866 | return "%vmovd\t{%q1, %0|%0, %q1}"; |
5867 | } |
5868 | else |
5869 | return "%vmovq\t{%1, %0|%0, %1}"; |
5870 | |
5871 | case MODE_SI: |
5872 | if (GENERAL_REG_P (operands[0])) |
5873 | return "%vmovd\t{%1, %k0|%k0, %1}"; |
5874 | else if (GENERAL_REG_P (operands[1])) |
5875 | return "%vmovd\t{%k1, %0|%0, %k1}"; |
5876 | else |
5877 | return "%vmovd\t{%1, %0|%0, %1}"; |
5878 | |
5879 | case MODE_HI: |
5880 | if (GENERAL_REG_P (operands[0])) |
5881 | return "vmovw\t{%1, %k0|%k0, %1}"; |
5882 | else if (GENERAL_REG_P (operands[1])) |
5883 | return "vmovw\t{%k1, %0|%0, %k1}"; |
5884 | else |
5885 | return "vmovw\t{%1, %0|%0, %1}"; |
5886 | |
5887 | case MODE_DF: |
5888 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
5889 | return "vmovsd\t{%d1, %0|%0, %d1}"; |
5890 | else |
5891 | return "%vmovsd\t{%1, %0|%0, %1}"; |
5892 | |
5893 | case MODE_SF: |
5894 | if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) |
5895 | return "vmovss\t{%d1, %0|%0, %d1}"; |
5896 | else |
5897 | return "%vmovss\t{%1, %0|%0, %1}"; |
5898 | |
5899 | case MODE_HF: |
5900 | case MODE_BF: |
5901 | if (REG_P (operands[0]) && REG_P (operands[1])) |
5902 | return "vmovsh\t{%d1, %0|%0, %d1}"; |
5903 | else |
5904 | return "vmovsh\t{%1, %0|%0, %1}"; |
5905 | |
5906 | case MODE_V1DF: |
5907 | gcc_assert (!TARGET_AVX); |
5908 | return "movlpd\t{%1, %0|%0, %1}"; |
5909 | |
5910 | case MODE_V2SF: |
5911 | if (TARGET_AVX && REG_P (operands[0])) |
5912 | return "vmovlps\t{%1, %d0|%d0, %1}"; |
5913 | else |
5914 | return "%vmovlps\t{%1, %0|%0, %1}"; |
5915 | |
5916 | default: |
5917 | gcc_unreachable (); |
5918 | } |
5919 | } |
5920 | |
5921 | /* Returns true if OP contains a symbol reference */ |
5922 | |
5923 | bool |
5924 | symbolic_reference_mentioned_p (rtx op) |
5925 | { |
5926 | const char *fmt; |
5927 | int i; |
5928 | |
5929 | if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) |
5930 | return true; |
5931 | |
5932 | fmt = GET_RTX_FORMAT (GET_CODE (op)); |
5933 | for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) |
5934 | { |
5935 | if (fmt[i] == 'E') |
5936 | { |
5937 | int j; |
5938 | |
5939 | for (j = XVECLEN (op, i) - 1; j >= 0; j--) |
5940 | if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) |
5941 | return true; |
5942 | } |
5943 | |
5944 | else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) |
5945 | return true; |
5946 | } |
5947 | |
5948 | return false; |
5949 | } |
5950 | |
5951 | /* Return true if it is appropriate to emit `ret' instructions in the |
5952 | body of a function. Do this only if the epilogue is simple, needing a |
5953 | couple of insns. Prior to reloading, we can't tell how many registers |
5954 | must be saved, so return false then. Return false if there is no frame |
5955 | marker to de-allocate. */ |
5956 | |
5957 | bool |
5958 | ix86_can_use_return_insn_p (void) |
5959 | { |
5960 | if (ix86_function_ms_hook_prologue (fn: current_function_decl)) |
5961 | return false; |
5962 | |
5963 | if (ix86_function_naked (fn: current_function_decl)) |
5964 | return false; |
5965 | |
5966 | /* Don't use `ret' instruction in interrupt handler. */ |
5967 | if (! reload_completed |
5968 | || frame_pointer_needed |
5969 | || cfun->machine->func_type != TYPE_NORMAL) |
5970 | return 0; |
5971 | |
5972 | /* Don't allow more than 32k pop, since that's all we can do |
5973 | with one instruction. */ |
5974 | if (crtl->args.pops_args && crtl->args.size >= 32768) |
5975 | return 0; |
5976 | |
5977 | struct ix86_frame &frame = cfun->machine->frame; |
5978 | return (frame.stack_pointer_offset == UNITS_PER_WORD |
5979 | && (frame.nregs + frame.nsseregs) == 0); |
5980 | } |
5981 | |
5982 | /* Return stack frame size. get_frame_size () returns used stack slots |
5983 | during compilation, which may be optimized out later. If stack frame |
5984 | is needed, stack_frame_required should be true. */ |
5985 | |
5986 | static HOST_WIDE_INT |
5987 | ix86_get_frame_size (void) |
5988 | { |
5989 | if (cfun->machine->stack_frame_required) |
5990 | return get_frame_size (); |
5991 | else |
5992 | return 0; |
5993 | } |
5994 | |
5995 | /* Value should be nonzero if functions must have frame pointers. |
5996 | Zero means the frame pointer need not be set up (and parms may |
5997 | be accessed via the stack pointer) in functions that seem suitable. */ |
5998 | |
5999 | static bool |
6000 | ix86_frame_pointer_required (void) |
6001 | { |
6002 | /* If we accessed previous frames, then the generated code expects |
6003 | to be able to access the saved ebp value in our frame. */ |
6004 | if (cfun->machine->accesses_prev_frame) |
6005 | return true; |
6006 | |
6007 | /* Several x86 os'es need a frame pointer for other reasons, |
6008 | usually pertaining to setjmp. */ |
6009 | if (SUBTARGET_FRAME_POINTER_REQUIRED) |
6010 | return true; |
6011 | |
6012 | /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ |
6013 | if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) |
6014 | return true; |
6015 | |
6016 | /* Win64 SEH, very large frames need a frame-pointer as maximum stack |
6017 | allocation is 4GB. */ |
6018 | if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE) |
6019 | return true; |
6020 | |
6021 | /* SSE saves require frame-pointer when stack is misaligned. */ |
6022 | if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) |
6023 | return true; |
6024 | |
6025 | /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER |
6026 | turns off the frame pointer by default. Turn it back on now if |
6027 | we've not got a leaf function. */ |
6028 | if (TARGET_OMIT_LEAF_FRAME_POINTER |
6029 | && (!crtl->is_leaf |
6030 | || ix86_current_function_calls_tls_descriptor)) |
6031 | return true; |
6032 | |
6033 | /* Several versions of mcount for the x86 assumes that there is a |
6034 | frame, so we cannot allow profiling without a frame pointer. */ |
6035 | if (crtl->profile && !flag_fentry) |
6036 | return true; |
6037 | |
6038 | return false; |
6039 | } |
6040 | |
6041 | /* Record that the current function accesses previous call frames. */ |
6042 | |
6043 | void |
6044 | ix86_setup_frame_addresses (void) |
6045 | { |
6046 | cfun->machine->accesses_prev_frame = 1; |
6047 | } |
6048 | |
6049 | #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) |
6050 | # define USE_HIDDEN_LINKONCE 1 |
6051 | #else |
6052 | # define USE_HIDDEN_LINKONCE 0 |
6053 | #endif |
6054 | |
6055 | /* Label count for call and return thunks. It is used to make unique |
6056 | labels in call and return thunks. */ |
6057 | static int indirectlabelno; |
6058 | |
6059 | /* True if call thunk function is needed. */ |
6060 | static bool indirect_thunk_needed = false; |
6061 | |
6062 | /* Bit masks of integer registers, which contain branch target, used |
6063 | by call thunk functions. */ |
6064 | static HARD_REG_SET indirect_thunks_used; |
6065 | |
6066 | /* True if return thunk function is needed. */ |
6067 | static bool indirect_return_needed = false; |
6068 | |
6069 | /* True if return thunk function via CX is needed. */ |
6070 | static bool indirect_return_via_cx; |
6071 | |
6072 | #ifndef INDIRECT_LABEL |
6073 | # define INDIRECT_LABEL "LIND" |
6074 | #endif |
6075 | |
6076 | /* Indicate what prefix is needed for an indirect branch. */ |
6077 | enum indirect_thunk_prefix |
6078 | { |
6079 | indirect_thunk_prefix_none, |
6080 | indirect_thunk_prefix_nt |
6081 | }; |
6082 | |
6083 | /* Return the prefix needed for an indirect branch INSN. */ |
6084 | |
6085 | enum indirect_thunk_prefix |
6086 | indirect_thunk_need_prefix (rtx_insn *insn) |
6087 | { |
6088 | enum indirect_thunk_prefix need_prefix; |
6089 | if ((cfun->machine->indirect_branch_type |
6090 | == indirect_branch_thunk_extern) |
6091 | && ix86_notrack_prefixed_insn_p (insn)) |
6092 | { |
6093 | /* NOTRACK prefix is only used with external thunk so that it |
6094 | can be properly updated to support CET at run-time. */ |
6095 | need_prefix = indirect_thunk_prefix_nt; |
6096 | } |
6097 | else |
6098 | need_prefix = indirect_thunk_prefix_none; |
6099 | return need_prefix; |
6100 | } |
6101 | |
6102 | /* Fills in the label name that should be used for the indirect thunk. */ |
6103 | |
6104 | static void |
6105 | indirect_thunk_name (char name[32], unsigned int regno, |
6106 | enum indirect_thunk_prefix need_prefix, |
6107 | bool ret_p) |
6108 | { |
6109 | if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) |
6110 | gcc_unreachable (); |
6111 | |
6112 | if (USE_HIDDEN_LINKONCE) |
6113 | { |
6114 | const char *prefix; |
6115 | |
6116 | if (need_prefix == indirect_thunk_prefix_nt |
6117 | && regno != INVALID_REGNUM) |
6118 | { |
6119 | /* NOTRACK prefix is only used with external thunk via |
6120 | register so that NOTRACK prefix can be added to indirect |
6121 | branch via register to support CET at run-time. */ |
6122 | prefix = "_nt"; |
6123 | } |
6124 | else |
6125 | prefix = ""; |
6126 | |
6127 | const char *ret = ret_p ? "return": "indirect"; |
6128 | |
6129 | if (regno != INVALID_REGNUM) |
6130 | { |
6131 | const char *reg_prefix; |
6132 | if (LEGACY_INT_REGNO_P (regno)) |
6133 | reg_prefix = TARGET_64BIT ? "r": "e"; |
6134 | else |
6135 | reg_prefix = ""; |
6136 | sprintf (s: name, format: "__x86_%s_thunk%s_%s%s", |
6137 | ret, prefix, reg_prefix, reg_names[regno]); |
6138 | } |
6139 | else |
6140 | sprintf (s: name, format: "__x86_%s_thunk%s", ret, prefix); |
6141 | } |
6142 | else |
6143 | { |
6144 | if (regno != INVALID_REGNUM) |
6145 | ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); |
6146 | else |
6147 | { |
6148 | if (ret_p) |
6149 | ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0); |
6150 | else |
6151 | ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); |
6152 | } |
6153 | } |
6154 | } |
6155 | |
6156 | /* Output a call and return thunk for indirect branch. If REGNO != -1, |
6157 | the function address is in REGNO and the call and return thunk looks like: |
6158 | |
6159 | call L2 |
6160 | L1: |
6161 | pause |
6162 | lfence |
6163 | jmp L1 |
6164 | L2: |
6165 | mov %REG, (%sp) |
6166 | ret |
6167 | |
6168 | Otherwise, the function address is on the top of stack and the |
6169 | call and return thunk looks like: |
6170 | |
6171 | call L2 |
6172 | L1: |
6173 | pause |
6174 | lfence |
6175 | jmp L1 |
6176 | L2: |
6177 | lea WORD_SIZE(%sp), %sp |
6178 | ret |
6179 | */ |
6180 | |
6181 | static void |
6182 | output_indirect_thunk (unsigned int regno) |
6183 | { |
6184 | char indirectlabel1[32]; |
6185 | char indirectlabel2[32]; |
6186 | |
6187 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, |
6188 | indirectlabelno++); |
6189 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, |
6190 | indirectlabelno++); |
6191 | |
6192 | /* Call */ |
6193 | fputs (s: "\tcall\t", stream: asm_out_file); |
6194 | assemble_name_raw (asm_out_file, indirectlabel2); |
6195 | fputc (c: '\n', stream: asm_out_file); |
6196 | |
6197 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
6198 | |
6199 | /* AMD and Intel CPUs prefer each a different instruction as loop filler. |
6200 | Usage of both pause + lfence is compromise solution. */ |
6201 | fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n"); |
6202 | |
6203 | /* Jump. */ |
6204 | fputs (s: "\tjmp\t", stream: asm_out_file); |
6205 | assemble_name_raw (asm_out_file, indirectlabel1); |
6206 | fputc (c: '\n', stream: asm_out_file); |
6207 | |
6208 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
6209 | |
6210 | /* The above call insn pushed a word to stack. Adjust CFI info. */ |
6211 | if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) |
6212 | { |
6213 | if (! dwarf2out_do_cfi_asm ()) |
6214 | { |
6215 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
6216 | xcfi->dw_cfi_opc = DW_CFA_advance_loc4; |
6217 | xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); |
6218 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
6219 | } |
6220 | dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); |
6221 | xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; |
6222 | xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; |
6223 | vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi); |
6224 | dwarf2out_emit_cfi (cfi: xcfi); |
6225 | } |
6226 | |
6227 | if (regno != INVALID_REGNUM) |
6228 | { |
6229 | /* MOV. */ |
6230 | rtx xops[2]; |
6231 | xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); |
6232 | xops[1] = gen_rtx_REG (word_mode, regno); |
6233 | output_asm_insn ("mov\t{%1, %0|%0, %1}", xops); |
6234 | } |
6235 | else |
6236 | { |
6237 | /* LEA. */ |
6238 | rtx xops[2]; |
6239 | xops[0] = stack_pointer_rtx; |
6240 | xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
6241 | output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops); |
6242 | } |
6243 | |
6244 | fputs (s: "\tret\n", stream: asm_out_file); |
6245 | if ((ix86_harden_sls & harden_sls_return)) |
6246 | fputs (s: "\tint3\n", stream: asm_out_file); |
6247 | } |
6248 | |
6249 | /* Output a funtion with a call and return thunk for indirect branch. |
6250 | If REGNO != INVALID_REGNUM, the function address is in REGNO. |
6251 | Otherwise, the function address is on the top of stack. Thunk is |
6252 | used for function return if RET_P is true. */ |
6253 | |
6254 | static void |
6255 | output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, |
6256 | unsigned int regno, bool ret_p) |
6257 | { |
6258 | char name[32]; |
6259 | tree decl; |
6260 | |
6261 | /* Create __x86_indirect_thunk. */ |
6262 | indirect_thunk_name (name, regno, need_prefix, ret_p); |
6263 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
6264 | get_identifier (name), |
6265 | build_function_type_list (void_type_node, NULL_TREE)); |
6266 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
6267 | NULL_TREE, void_type_node); |
6268 | TREE_PUBLIC (decl) = 1; |
6269 | TREE_STATIC (decl) = 1; |
6270 | DECL_IGNORED_P (decl) = 1; |
6271 | |
6272 | #if TARGET_MACHO |
6273 | if (TARGET_MACHO) |
6274 | { |
6275 | switch_to_section (darwin_sections[picbase_thunk_section]); |
6276 | fputs ("\t.weak_definition\t", asm_out_file); |
6277 | assemble_name (asm_out_file, name); |
6278 | fputs ("\n\t.private_extern\t", asm_out_file); |
6279 | assemble_name (asm_out_file, name); |
6280 | putc ('\n', asm_out_file); |
6281 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6282 | DECL_WEAK (decl) = 1; |
6283 | } |
6284 | else |
6285 | #endif |
6286 | if (USE_HIDDEN_LINKONCE) |
6287 | { |
6288 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
6289 | |
6290 | targetm.asm_out.unique_section (decl, 0); |
6291 | switch_to_section (get_named_section (decl, NULL, 0)); |
6292 | |
6293 | targetm.asm_out.globalize_label (asm_out_file, name); |
6294 | fputs (s: "\t.hidden\t", stream: asm_out_file); |
6295 | assemble_name (asm_out_file, name); |
6296 | putc (c: '\n', stream: asm_out_file); |
6297 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
6298 | } |
6299 | else |
6300 | { |
6301 | switch_to_section (text_section); |
6302 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6303 | } |
6304 | |
6305 | DECL_INITIAL (decl) = make_node (BLOCK); |
6306 | current_function_decl = decl; |
6307 | allocate_struct_function (decl, false); |
6308 | init_function_start (decl); |
6309 | /* We're about to hide the function body from callees of final_* by |
6310 | emitting it directly; tell them we're a thunk, if they care. */ |
6311 | cfun->is_thunk = true; |
6312 | first_function_block_is_cold = false; |
6313 | /* Make sure unwind info is emitted for the thunk if needed. */ |
6314 | final_start_function (emit_barrier (), asm_out_file, 1); |
6315 | |
6316 | output_indirect_thunk (regno); |
6317 | |
6318 | final_end_function (); |
6319 | init_insn_lengths (); |
6320 | free_after_compilation (cfun); |
6321 | set_cfun (NULL); |
6322 | current_function_decl = NULL; |
6323 | } |
6324 | |
6325 | static int pic_labels_used; |
6326 | |
6327 | /* Fills in the label name that should be used for a pc thunk for |
6328 | the given register. */ |
6329 | |
6330 | static void |
6331 | get_pc_thunk_name (char name[32], unsigned int regno) |
6332 | { |
6333 | gcc_assert (!TARGET_64BIT); |
6334 | |
6335 | if (USE_HIDDEN_LINKONCE) |
6336 | sprintf (s: name, format: "__x86.get_pc_thunk.%s", reg_names[regno]); |
6337 | else |
6338 | ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); |
6339 | } |
6340 | |
6341 | |
6342 | /* This function generates code for -fpic that loads %ebx with |
6343 | the return address of the caller and then returns. */ |
6344 | |
6345 | static void |
6346 | ix86_code_end (void) |
6347 | { |
6348 | rtx xops[2]; |
6349 | unsigned int regno; |
6350 | |
6351 | if (indirect_return_needed) |
6352 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6353 | INVALID_REGNUM, ret_p: true); |
6354 | if (indirect_return_via_cx) |
6355 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6356 | CX_REG, ret_p: true); |
6357 | if (indirect_thunk_needed) |
6358 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6359 | INVALID_REGNUM, ret_p: false); |
6360 | |
6361 | for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) |
6362 | { |
6363 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6364 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6365 | regno, ret_p: false); |
6366 | } |
6367 | |
6368 | for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++) |
6369 | { |
6370 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6371 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6372 | regno, ret_p: false); |
6373 | } |
6374 | |
6375 | for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) |
6376 | { |
6377 | char name[32]; |
6378 | tree decl; |
6379 | |
6380 | if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno)) |
6381 | output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none, |
6382 | regno, ret_p: false); |
6383 | |
6384 | if (!(pic_labels_used & (1 << regno))) |
6385 | continue; |
6386 | |
6387 | get_pc_thunk_name (name, regno); |
6388 | |
6389 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
6390 | get_identifier (name), |
6391 | build_function_type_list (void_type_node, NULL_TREE)); |
6392 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
6393 | NULL_TREE, void_type_node); |
6394 | TREE_PUBLIC (decl) = 1; |
6395 | TREE_STATIC (decl) = 1; |
6396 | DECL_IGNORED_P (decl) = 1; |
6397 | |
6398 | #if TARGET_MACHO |
6399 | if (TARGET_MACHO) |
6400 | { |
6401 | switch_to_section (darwin_sections[picbase_thunk_section]); |
6402 | fputs ("\t.weak_definition\t", asm_out_file); |
6403 | assemble_name (asm_out_file, name); |
6404 | fputs ("\n\t.private_extern\t", asm_out_file); |
6405 | assemble_name (asm_out_file, name); |
6406 | putc ('\n', asm_out_file); |
6407 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6408 | DECL_WEAK (decl) = 1; |
6409 | } |
6410 | else |
6411 | #endif |
6412 | if (USE_HIDDEN_LINKONCE) |
6413 | { |
6414 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
6415 | |
6416 | targetm.asm_out.unique_section (decl, 0); |
6417 | switch_to_section (get_named_section (decl, NULL, 0)); |
6418 | |
6419 | targetm.asm_out.globalize_label (asm_out_file, name); |
6420 | fputs (s: "\t.hidden\t", stream: asm_out_file); |
6421 | assemble_name (asm_out_file, name); |
6422 | putc (c: '\n', stream: asm_out_file); |
6423 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
6424 | } |
6425 | else |
6426 | { |
6427 | switch_to_section (text_section); |
6428 | ASM_OUTPUT_LABEL (asm_out_file, name); |
6429 | } |
6430 | |
6431 | DECL_INITIAL (decl) = make_node (BLOCK); |
6432 | current_function_decl = decl; |
6433 | allocate_struct_function (decl, false); |
6434 | init_function_start (decl); |
6435 | /* We're about to hide the function body from callees of final_* by |
6436 | emitting it directly; tell them we're a thunk, if they care. */ |
6437 | cfun->is_thunk = true; |
6438 | first_function_block_is_cold = false; |
6439 | /* Make sure unwind info is emitted for the thunk if needed. */ |
6440 | final_start_function (emit_barrier (), asm_out_file, 1); |
6441 | |
6442 | /* Pad stack IP move with 4 instructions (two NOPs count |
6443 | as one instruction). */ |
6444 | if (TARGET_PAD_SHORT_FUNCTION) |
6445 | { |
6446 | int i = 8; |
6447 | |
6448 | while (i--) |
6449 | fputs (s: "\tnop\n", stream: asm_out_file); |
6450 | } |
6451 | |
6452 | xops[0] = gen_rtx_REG (Pmode, regno); |
6453 | xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); |
6454 | output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); |
6455 | fputs (s: "\tret\n", stream: asm_out_file); |
6456 | final_end_function (); |
6457 | init_insn_lengths (); |
6458 | free_after_compilation (cfun); |
6459 | set_cfun (NULL); |
6460 | current_function_decl = NULL; |
6461 | } |
6462 | |
6463 | if (flag_split_stack) |
6464 | file_end_indicate_split_stack (); |
6465 | } |
6466 | |
6467 | /* Emit code for the SET_GOT patterns. */ |
6468 | |
6469 | const char * |
6470 | output_set_got (rtx dest, rtx label) |
6471 | { |
6472 | rtx xops[3]; |
6473 | |
6474 | xops[0] = dest; |
6475 | |
6476 | if (TARGET_VXWORKS_RTP && flag_pic) |
6477 | { |
6478 | /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ |
6479 | xops[2] = gen_rtx_MEM (Pmode, |
6480 | gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); |
6481 | output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); |
6482 | |
6483 | /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. |
6484 | Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as |
6485 | an unadorned address. */ |
6486 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); |
6487 | SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; |
6488 | output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); |
6489 | return ""; |
6490 | } |
6491 | |
6492 | xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
6493 | |
6494 | if (flag_pic) |
6495 | { |
6496 | char name[32]; |
6497 | get_pc_thunk_name (name, REGNO (dest)); |
6498 | pic_labels_used |= 1 << REGNO (dest); |
6499 | |
6500 | xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
6501 | xops[2] = gen_rtx_MEM (QImode, xops[2]); |
6502 | output_asm_insn ("%!call\t%X2", xops); |
6503 | |
6504 | #if TARGET_MACHO |
6505 | /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. |
6506 | This is what will be referenced by the Mach-O PIC subsystem. */ |
6507 | if (machopic_should_output_picbase_label () || !label) |
6508 | ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); |
6509 | |
6510 | /* When we are restoring the pic base at the site of a nonlocal label, |
6511 | and we decided to emit the pic base above, we will still output a |
6512 | local label used for calculating the correction offset (even though |
6513 | the offset will be 0 in that case). */ |
6514 | if (label) |
6515 | targetm.asm_out.internal_label (asm_out_file, "L", |
6516 | CODE_LABEL_NUMBER (label)); |
6517 | #endif |
6518 | } |
6519 | else |
6520 | { |
6521 | if (TARGET_MACHO) |
6522 | /* We don't need a pic base, we're not producing pic. */ |
6523 | gcc_unreachable (); |
6524 | |
6525 | xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); |
6526 | output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); |
6527 | targetm.asm_out.internal_label (asm_out_file, "L", |
6528 | CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
6529 | } |
6530 | |
6531 | if (!TARGET_MACHO) |
6532 | output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); |
6533 | |
6534 | return ""; |
6535 | } |
6536 | |
6537 | /* Generate an "push" pattern for input ARG. */ |
6538 | |
6539 | rtx |
6540 | gen_push (rtx arg, bool ppx_p) |
6541 | { |
6542 | struct machine_function *m = cfun->machine; |
6543 | |
6544 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6545 | m->fs.cfa_offset += UNITS_PER_WORD; |
6546 | m->fs.sp_offset += UNITS_PER_WORD; |
6547 | |
6548 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
6549 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
6550 | |
6551 | rtx stack = gen_rtx_MEM (word_mode, |
6552 | gen_rtx_PRE_DEC (Pmode, |
6553 | stack_pointer_rtx)); |
6554 | return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg); |
6555 | } |
6556 | |
6557 | rtx |
6558 | gen_pushfl (void) |
6559 | { |
6560 | struct machine_function *m = cfun->machine; |
6561 | rtx flags, mem; |
6562 | |
6563 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6564 | m->fs.cfa_offset += UNITS_PER_WORD; |
6565 | m->fs.sp_offset += UNITS_PER_WORD; |
6566 | |
6567 | flags = gen_rtx_REG (CCmode, FLAGS_REG); |
6568 | |
6569 | mem = gen_rtx_MEM (word_mode, |
6570 | gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); |
6571 | |
6572 | return gen_pushfl2 (arg0: word_mode, x0: mem, x1: flags); |
6573 | } |
6574 | |
6575 | /* Generate an "pop" pattern for input ARG. */ |
6576 | |
6577 | rtx |
6578 | gen_pop (rtx arg, bool ppx_p) |
6579 | { |
6580 | if (REG_P (arg) && GET_MODE (arg) != word_mode) |
6581 | arg = gen_rtx_REG (word_mode, REGNO (arg)); |
6582 | |
6583 | rtx stack = gen_rtx_MEM (word_mode, |
6584 | gen_rtx_POST_INC (Pmode, |
6585 | stack_pointer_rtx)); |
6586 | |
6587 | return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack); |
6588 | } |
6589 | |
6590 | rtx |
6591 | gen_popfl (void) |
6592 | { |
6593 | rtx flags, mem; |
6594 | |
6595 | flags = gen_rtx_REG (CCmode, FLAGS_REG); |
6596 | |
6597 | mem = gen_rtx_MEM (word_mode, |
6598 | gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); |
6599 | |
6600 | return gen_popfl1 (arg0: word_mode, x0: flags, x1: mem); |
6601 | } |
6602 | |
6603 | /* Generate a "push2" pattern for input ARG. */ |
6604 | rtx |
6605 | gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false) |
6606 | { |
6607 | struct machine_function *m = cfun->machine; |
6608 | const int offset = UNITS_PER_WORD * 2; |
6609 | |
6610 | if (m->fs.cfa_reg == stack_pointer_rtx) |
6611 | m->fs.cfa_offset += offset; |
6612 | m->fs.sp_offset += offset; |
6613 | |
6614 | if (REG_P (reg1) && GET_MODE (reg1) != word_mode) |
6615 | reg1 = gen_rtx_REG (word_mode, REGNO (reg1)); |
6616 | |
6617 | if (REG_P (reg2) && GET_MODE (reg2) != word_mode) |
6618 | reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); |
6619 | |
6620 | return ppx_p ? gen_push2p_di (mem, reg1, reg2) |
6621 | : gen_push2_di (mem, reg1, reg2); |
6622 | } |
6623 | |
6624 | /* Return >= 0 if there is an unused call-clobbered register available |
6625 | for the entire function. */ |
6626 | |
6627 | static unsigned int |
6628 | ix86_select_alt_pic_regnum (void) |
6629 | { |
6630 | if (ix86_use_pseudo_pic_reg ()) |
6631 | return INVALID_REGNUM; |
6632 | |
6633 | if (crtl->is_leaf |
6634 | && !crtl->profile |
6635 | && !ix86_current_function_calls_tls_descriptor) |
6636 | { |
6637 | int i, drap; |
6638 | /* Can't use the same register for both PIC and DRAP. */ |
6639 | if (crtl->drap_reg) |
6640 | drap = REGNO (crtl->drap_reg); |
6641 | else |
6642 | drap = -1; |
6643 | for (i = 2; i >= 0; --i) |
6644 | if (i != drap && !df_regs_ever_live_p (i)) |
6645 | return i; |
6646 | } |
6647 | |
6648 | return INVALID_REGNUM; |
6649 | } |
6650 | |
6651 | /* Return true if REGNO is used by the epilogue. */ |
6652 | |
6653 | bool |
6654 | ix86_epilogue_uses (int regno) |
6655 | { |
6656 | /* If there are no caller-saved registers, we preserve all registers, |
6657 | except for MMX and x87 registers which aren't supported when saving |
6658 | and restoring registers. Don't explicitly save SP register since |
6659 | it is always preserved. */ |
6660 | return (epilogue_completed |
6661 | && (cfun->machine->call_saved_registers |
6662 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
6663 | && !fixed_regs[regno] |
6664 | && !STACK_REGNO_P (regno) |
6665 | && !MMX_REGNO_P (regno)); |
6666 | } |
6667 | |
6668 | /* Return nonzero if register REGNO can be used as a scratch register |
6669 | in peephole2. */ |
6670 | |
6671 | static bool |
6672 | ix86_hard_regno_scratch_ok (unsigned int regno) |
6673 | { |
6674 | /* If there are no caller-saved registers, we can't use any register |
6675 | as a scratch register after epilogue and use REGNO as scratch |
6676 | register only if it has been used before to avoid saving and |
6677 | restoring it. */ |
6678 | return ((cfun->machine->call_saved_registers |
6679 | != TYPE_NO_CALLER_SAVED_REGISTERS) |
6680 | || (!epilogue_completed |
6681 | && df_regs_ever_live_p (regno))); |
6682 | } |
6683 | |
6684 | /* Return TRUE if we need to save REGNO. */ |
6685 | |
6686 | bool |
6687 | ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) |
6688 | { |
6689 | rtx reg; |
6690 | |
6691 | switch (cfun->machine->call_saved_registers) |
6692 | { |
6693 | case TYPE_DEFAULT_CALL_SAVED_REGISTERS: |
6694 | break; |
6695 | |
6696 | case TYPE_NO_CALLER_SAVED_REGISTERS: |
6697 | /* If there are no caller-saved registers, we preserve all |
6698 | registers, except for MMX and x87 registers which aren't |
6699 | supported when saving and restoring registers. Don't |
6700 | explicitly save SP register since it is always preserved. |
6701 | |
6702 | Don't preserve registers used for function return value. */ |
6703 | reg = crtl->return_rtx; |
6704 | if (reg) |
6705 | { |
6706 | unsigned int i = REGNO (reg); |
6707 | unsigned int nregs = REG_NREGS (reg); |
6708 | while (nregs-- > 0) |
6709 | if ((i + nregs) == regno) |
6710 | return false; |
6711 | } |
6712 | |
6713 | return (df_regs_ever_live_p (regno) |
6714 | && !fixed_regs[regno] |
6715 | && !STACK_REGNO_P (regno) |
6716 | && !MMX_REGNO_P (regno) |
6717 | && (regno != HARD_FRAME_POINTER_REGNUM |
6718 | || !frame_pointer_needed)); |
6719 | |
6720 | case TYPE_NO_CALLEE_SAVED_REGISTERS: |
6721 | return false; |
6722 | |
6723 | case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP: |
6724 | if (regno != HARD_FRAME_POINTER_REGNUM) |
6725 | return false; |
6726 | break; |
6727 | } |
6728 | |
6729 | if (regno == REAL_PIC_OFFSET_TABLE_REGNUM |
6730 | && pic_offset_table_rtx) |
6731 | { |
6732 | if (ix86_use_pseudo_pic_reg ()) |
6733 | { |
6734 | /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to |
6735 | _mcount in prologue. */ |
6736 | if (!TARGET_64BIT && flag_pic && crtl->profile) |
6737 | return true; |
6738 | } |
6739 | else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) |
6740 | || crtl->profile |
6741 | || crtl->calls_eh_return |
6742 | || crtl->uses_const_pool |
6743 | || cfun->has_nonlocal_label) |
6744 | return ix86_select_alt_pic_regnum () == INVALID_REGNUM; |
6745 | } |
6746 | |
6747 | if (crtl->calls_eh_return && maybe_eh_return) |
6748 | { |
6749 | unsigned i; |
6750 | for (i = 0; ; i++) |
6751 | { |
6752 | unsigned test = EH_RETURN_DATA_REGNO (i); |
6753 | if (test == INVALID_REGNUM) |
6754 | break; |
6755 | if (test == regno) |
6756 | return true; |
6757 | } |
6758 | } |
6759 | |
6760 | if (ignore_outlined && cfun->machine->call_ms2sysv) |
6761 | { |
6762 | unsigned count = cfun->machine->call_ms2sysv_extra_regs |
6763 | + xlogue_layout::MIN_REGS; |
6764 | if (xlogue_layout::is_stub_managed_reg (regno, count)) |
6765 | return false; |
6766 | } |
6767 | |
6768 | if (crtl->drap_reg |
6769 | && regno == REGNO (crtl->drap_reg) |
6770 | && !cfun->machine->no_drap_save_restore) |
6771 | return true; |
6772 | |
6773 | return (df_regs_ever_live_p (regno) |
6774 | && !call_used_or_fixed_reg_p (regno) |
6775 | && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); |
6776 | } |
6777 | |
6778 | /* Return number of saved general prupose registers. */ |
6779 | |
6780 | static int |
6781 | ix86_nsaved_regs (void) |
6782 | { |
6783 | int nregs = 0; |
6784 | int regno; |
6785 | |
6786 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
6787 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
6788 | nregs ++; |
6789 | return nregs; |
6790 | } |
6791 | |
6792 | /* Return number of saved SSE registers. */ |
6793 | |
6794 | static int |
6795 | ix86_nsaved_sseregs (void) |
6796 | { |
6797 | int nregs = 0; |
6798 | int regno; |
6799 | |
6800 | if (!TARGET_64BIT_MS_ABI) |
6801 | return 0; |
6802 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
6803 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
6804 | nregs ++; |
6805 | return nregs; |
6806 | } |
6807 | |
6808 | /* Given FROM and TO register numbers, say whether this elimination is |
6809 | allowed. If stack alignment is needed, we can only replace argument |
6810 | pointer with hard frame pointer, or replace frame pointer with stack |
6811 | pointer. Otherwise, frame pointer elimination is automatically |
6812 | handled and all other eliminations are valid. */ |
6813 | |
6814 | static bool |
6815 | ix86_can_eliminate (const int from, const int to) |
6816 | { |
6817 | if (stack_realign_fp) |
6818 | return ((from == ARG_POINTER_REGNUM |
6819 | && to == HARD_FRAME_POINTER_REGNUM) |
6820 | || (from == FRAME_POINTER_REGNUM |
6821 | && to == STACK_POINTER_REGNUM)); |
6822 | else |
6823 | return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; |
6824 | } |
6825 | |
6826 | /* Return the offset between two registers, one to be eliminated, and the other |
6827 | its replacement, at the start of a routine. */ |
6828 | |
6829 | HOST_WIDE_INT |
6830 | ix86_initial_elimination_offset (int from, int to) |
6831 | { |
6832 | struct ix86_frame &frame = cfun->machine->frame; |
6833 | |
6834 | if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
6835 | return frame.hard_frame_pointer_offset; |
6836 | else if (from == FRAME_POINTER_REGNUM |
6837 | && to == HARD_FRAME_POINTER_REGNUM) |
6838 | return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; |
6839 | else |
6840 | { |
6841 | gcc_assert (to == STACK_POINTER_REGNUM); |
6842 | |
6843 | if (from == ARG_POINTER_REGNUM) |
6844 | return frame.stack_pointer_offset; |
6845 | |
6846 | gcc_assert (from == FRAME_POINTER_REGNUM); |
6847 | return frame.stack_pointer_offset - frame.frame_pointer_offset; |
6848 | } |
6849 | } |
6850 | |
6851 | /* Emits a warning for unsupported msabi to sysv pro/epilogues. */ |
6852 | void |
6853 | warn_once_call_ms2sysv_xlogues (const char *feature) |
6854 | { |
6855 | static bool warned_once = false; |
6856 | if (!warned_once) |
6857 | { |
6858 | warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s", |
6859 | feature); |
6860 | warned_once = true; |
6861 | } |
6862 | } |
6863 | |
6864 | /* Return the probing interval for -fstack-clash-protection. */ |
6865 | |
6866 | static HOST_WIDE_INT |
6867 | get_probe_interval (void) |
6868 | { |
6869 | if (flag_stack_clash_protection) |
6870 | return (HOST_WIDE_INT_1U |
6871 | << param_stack_clash_protection_probe_interval); |
6872 | else |
6873 | return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); |
6874 | } |
6875 | |
6876 | /* When using -fsplit-stack, the allocation routines set a field in |
6877 | the TCB to the bottom of the stack plus this much space, measured |
6878 | in bytes. */ |
6879 | |
6880 | #define SPLIT_STACK_AVAILABLE 256 |
6881 | |
6882 | /* Return true if push2/pop2 can be generated. */ |
6883 | |
6884 | static bool |
6885 | ix86_can_use_push2pop2 (void) |
6886 | { |
6887 | /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */ |
6888 | unsigned int incoming_stack_boundary |
6889 | = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
6890 | ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
6891 | return incoming_stack_boundary % 128 == 0; |
6892 | } |
6893 | |
6894 | /* Helper function to determine whether push2/pop2 can be used in prologue or |
6895 | epilogue for register save/restore. */ |
6896 | static bool |
6897 | ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) |
6898 | { |
6899 | if (!ix86_can_use_push2pop2 ()) |
6900 | return false; |
6901 | int aligned = cfun->machine->fs.sp_offset % 16 == 0; |
6902 | return TARGET_APX_PUSH2POP2 |
6903 | && !cfun->machine->frame.save_regs_using_mov |
6904 | && cfun->machine->func_type == TYPE_NORMAL |
6905 | && (nregs + aligned) >= 3; |
6906 | } |
6907 | |
6908 | /* Fill structure ix86_frame about frame of currently computed function. */ |
6909 | |
6910 | static void |
6911 | ix86_compute_frame_layout (void) |
6912 | { |
6913 | struct ix86_frame *frame = &cfun->machine->frame; |
6914 | struct machine_function *m = cfun->machine; |
6915 | unsigned HOST_WIDE_INT stack_alignment_needed; |
6916 | HOST_WIDE_INT offset; |
6917 | unsigned HOST_WIDE_INT preferred_alignment; |
6918 | HOST_WIDE_INT size = ix86_get_frame_size (); |
6919 | HOST_WIDE_INT to_allocate; |
6920 | |
6921 | /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit |
6922 | * ms_abi functions that call a sysv function. We now need to prune away |
6923 | * cases where it should be disabled. */ |
6924 | if (TARGET_64BIT && m->call_ms2sysv) |
6925 | { |
6926 | gcc_assert (TARGET_64BIT_MS_ABI); |
6927 | gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); |
6928 | gcc_assert (!TARGET_SEH); |
6929 | gcc_assert (TARGET_SSE); |
6930 | gcc_assert (!ix86_using_red_zone ()); |
6931 | |
6932 | if (crtl->calls_eh_return) |
6933 | { |
6934 | gcc_assert (!reload_completed); |
6935 | m->call_ms2sysv = false; |
6936 | warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return"); |
6937 | } |
6938 | |
6939 | else if (ix86_static_chain_on_stack) |
6940 | { |
6941 | gcc_assert (!reload_completed); |
6942 | m->call_ms2sysv = false; |
6943 | warn_once_call_ms2sysv_xlogues (feature: "static call chains"); |
6944 | } |
6945 | |
6946 | /* Finally, compute which registers the stub will manage. */ |
6947 | else |
6948 | { |
6949 | unsigned count = xlogue_layout::count_stub_managed_regs (); |
6950 | m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; |
6951 | m->call_ms2sysv_pad_in = 0; |
6952 | } |
6953 | } |
6954 | |
6955 | frame->nregs = ix86_nsaved_regs (); |
6956 | frame->nsseregs = ix86_nsaved_sseregs (); |
6957 | |
6958 | /* 64-bit MS ABI seem to require stack alignment to be always 16, |
6959 | except for function prologues, leaf functions and when the defult |
6960 | incoming stack boundary is overriden at command line or via |
6961 | force_align_arg_pointer attribute. |
6962 | |
6963 | Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants |
6964 | at call sites, including profile function calls. |
6965 | |
6966 | For APX push2/pop2, the stack also requires 128b alignment. */ |
6967 | if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs) |
6968 | && crtl->preferred_stack_boundary < 128) |
6969 | || (((TARGET_64BIT_MS_ABI || TARGET_MACHO) |
6970 | && crtl->preferred_stack_boundary < 128) |
6971 | && (!crtl->is_leaf || cfun->calls_alloca != 0 |
6972 | || ix86_current_function_calls_tls_descriptor |
6973 | || (TARGET_MACHO && crtl->profile) |
6974 | || ix86_incoming_stack_boundary < 128))) |
6975 | { |
6976 | crtl->preferred_stack_boundary = 128; |
6977 | if (crtl->stack_alignment_needed < 128) |
6978 | crtl->stack_alignment_needed = 128; |
6979 | } |
6980 | |
6981 | stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; |
6982 | preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; |
6983 | |
6984 | gcc_assert (!size || stack_alignment_needed); |
6985 | gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); |
6986 | gcc_assert (preferred_alignment <= stack_alignment_needed); |
6987 | |
6988 | /* The only ABI saving SSE regs should be 64-bit ms_abi. */ |
6989 | gcc_assert (TARGET_64BIT || !frame->nsseregs); |
6990 | if (TARGET_64BIT && m->call_ms2sysv) |
6991 | { |
6992 | gcc_assert (stack_alignment_needed >= 16); |
6993 | gcc_assert (!frame->nsseregs); |
6994 | } |
6995 | |
6996 | /* For SEH we have to limit the amount of code movement into the prologue. |
6997 | At present we do this via a BLOCKAGE, at which point there's very little |
6998 | scheduling that can be done, which means that there's very little point |
6999 | in doing anything except PUSHs. */ |
7000 | if (TARGET_SEH) |
7001 | m->use_fast_prologue_epilogue = false; |
7002 | else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) |
7003 | { |
7004 | int count = frame->nregs; |
7005 | struct cgraph_node *node = cgraph_node::get (decl: current_function_decl); |
7006 | |
7007 | /* The fast prologue uses move instead of push to save registers. This |
7008 | is significantly longer, but also executes faster as modern hardware |
7009 | can execute the moves in parallel, but can't do that for push/pop. |
7010 | |
7011 | Be careful about choosing what prologue to emit: When function takes |
7012 | many instructions to execute we may use slow version as well as in |
7013 | case function is known to be outside hot spot (this is known with |
7014 | feedback only). Weight the size of function by number of registers |
7015 | to save as it is cheap to use one or two push instructions but very |
7016 | slow to use many of them. |
7017 | |
7018 | Calling this hook multiple times with the same frame requirements |
7019 | must produce the same layout, since the RA might otherwise be |
7020 | unable to reach a fixed point or might fail its final sanity checks. |
7021 | This means that once we've assumed that a function does or doesn't |
7022 | have a particular size, we have to stick to that assumption |
7023 | regardless of how the function has changed since. */ |
7024 | if (count) |
7025 | count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; |
7026 | if (node->frequency < NODE_FREQUENCY_NORMAL |
7027 | || (flag_branch_probabilities |
7028 | && node->frequency < NODE_FREQUENCY_HOT)) |
7029 | m->use_fast_prologue_epilogue = false; |
7030 | else |
7031 | { |
7032 | if (count != frame->expensive_count) |
7033 | { |
7034 | frame->expensive_count = count; |
7035 | frame->expensive_p = expensive_function_p (count); |
7036 | } |
7037 | m->use_fast_prologue_epilogue = !frame->expensive_p; |
7038 | } |
7039 | } |
7040 | |
7041 | frame->save_regs_using_mov |
7042 | = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue; |
7043 | |
7044 | /* Skip return address and error code in exception handler. */ |
7045 | offset = INCOMING_FRAME_SP_OFFSET; |
7046 | |
7047 | /* Skip pushed static chain. */ |
7048 | if (ix86_static_chain_on_stack) |
7049 | offset += UNITS_PER_WORD; |
7050 | |
7051 | /* Skip saved base pointer. */ |
7052 | if (frame_pointer_needed) |
7053 | offset += UNITS_PER_WORD; |
7054 | frame->hfp_save_offset = offset; |
7055 | |
7056 | /* The traditional frame pointer location is at the top of the frame. */ |
7057 | frame->hard_frame_pointer_offset = offset; |
7058 | |
7059 | /* Register save area */ |
7060 | offset += frame->nregs * UNITS_PER_WORD; |
7061 | frame->reg_save_offset = offset; |
7062 | |
7063 | /* Calculate the size of the va-arg area (not including padding, if any). */ |
7064 | frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; |
7065 | |
7066 | /* Also adjust stack_realign_offset for the largest alignment of |
7067 | stack slot actually used. */ |
7068 | if (stack_realign_fp |
7069 | || (cfun->machine->max_used_stack_alignment != 0 |
7070 | && (offset % cfun->machine->max_used_stack_alignment) != 0)) |
7071 | { |
7072 | /* We may need a 16-byte aligned stack for the remainder of the |
7073 | register save area, but the stack frame for the local function |
7074 | may require a greater alignment if using AVX/2/512. In order |
7075 | to avoid wasting space, we first calculate the space needed for |
7076 | the rest of the register saves, add that to the stack pointer, |
7077 | and then realign the stack to the boundary of the start of the |
7078 | frame for the local function. */ |
7079 | HOST_WIDE_INT space_needed = 0; |
7080 | HOST_WIDE_INT sse_reg_space_needed = 0; |
7081 | |
7082 | if (TARGET_64BIT) |
7083 | { |
7084 | if (m->call_ms2sysv) |
7085 | { |
7086 | m->call_ms2sysv_pad_in = 0; |
7087 | space_needed = xlogue_layout::get_instance ().get_stack_space_used (); |
7088 | } |
7089 | |
7090 | else if (frame->nsseregs) |
7091 | /* The only ABI that has saved SSE registers (Win64) also has a |
7092 | 16-byte aligned default stack. However, many programs violate |
7093 | the ABI, and Wine64 forces stack realignment to compensate. */ |
7094 | space_needed = frame->nsseregs * 16; |
7095 | |
7096 | sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); |
7097 | |
7098 | /* 64-bit frame->va_arg_size should always be a multiple of 16, but |
7099 | rounding to be pedantic. */ |
7100 | space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); |
7101 | } |
7102 | else |
7103 | space_needed = frame->va_arg_size; |
7104 | |
7105 | /* Record the allocation size required prior to the realignment AND. */ |
7106 | frame->stack_realign_allocate = space_needed; |
7107 | |
7108 | /* The re-aligned stack starts at frame->stack_realign_offset. Values |
7109 | before this point are not directly comparable with values below |
7110 | this point. Use sp_valid_at to determine if the stack pointer is |
7111 | valid for a given offset, fp_valid_at for the frame pointer, or |
7112 | choose_baseaddr to have a base register chosen for you. |
7113 | |
7114 | Note that the result of (frame->stack_realign_offset |
7115 | & (stack_alignment_needed - 1)) may not equal zero. */ |
7116 | offset = ROUND_UP (offset + space_needed, stack_alignment_needed); |
7117 | frame->stack_realign_offset = offset - space_needed; |
7118 | frame->sse_reg_save_offset = frame->stack_realign_offset |
7119 | + sse_reg_space_needed; |
7120 | } |
7121 | else |
7122 | { |
7123 | frame->stack_realign_offset = offset; |
7124 | |
7125 | if (TARGET_64BIT && m->call_ms2sysv) |
7126 | { |
7127 | m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); |
7128 | offset += xlogue_layout::get_instance ().get_stack_space_used (); |
7129 | } |
7130 | |
7131 | /* Align and set SSE register save area. */ |
7132 | else if (frame->nsseregs) |
7133 | { |
7134 | /* If the incoming stack boundary is at least 16 bytes, or DRAP is |
7135 | required and the DRAP re-alignment boundary is at least 16 bytes, |
7136 | then we want the SSE register save area properly aligned. */ |
7137 | if (ix86_incoming_stack_boundary >= 128 |
7138 | || (stack_realign_drap && stack_alignment_needed >= 16)) |
7139 | offset = ROUND_UP (offset, 16); |
7140 | offset += frame->nsseregs * 16; |
7141 | } |
7142 | frame->sse_reg_save_offset = offset; |
7143 | offset += frame->va_arg_size; |
7144 | } |
7145 | |
7146 | /* Align start of frame for local function. When a function call |
7147 | is removed, it may become a leaf function. But if argument may |
7148 | be passed on stack, we need to align the stack when there is no |
7149 | tail call. */ |
7150 | if (m->call_ms2sysv |
7151 | || frame->va_arg_size != 0 |
7152 | || size != 0 |
7153 | || !crtl->is_leaf |
7154 | || (!crtl->tail_call_emit |
7155 | && cfun->machine->outgoing_args_on_stack) |
7156 | || cfun->calls_alloca |
7157 | || ix86_current_function_calls_tls_descriptor) |
7158 | offset = ROUND_UP (offset, stack_alignment_needed); |
7159 | |
7160 | /* Frame pointer points here. */ |
7161 | frame->frame_pointer_offset = offset; |
7162 | |
7163 | offset += size; |
7164 | |
7165 | /* Add outgoing arguments area. Can be skipped if we eliminated |
7166 | all the function calls as dead code. |
7167 | Skipping is however impossible when function calls alloca. Alloca |
7168 | expander assumes that last crtl->outgoing_args_size |
7169 | of stack frame are unused. */ |
7170 | if (ACCUMULATE_OUTGOING_ARGS |
7171 | && (!crtl->is_leaf || cfun->calls_alloca |
7172 | || ix86_current_function_calls_tls_descriptor)) |
7173 | { |
7174 | offset += crtl->outgoing_args_size; |
7175 | frame->outgoing_arguments_size = crtl->outgoing_args_size; |
7176 | } |
7177 | else |
7178 | frame->outgoing_arguments_size = 0; |
7179 | |
7180 | /* Align stack boundary. Only needed if we're calling another function |
7181 | or using alloca. */ |
7182 | if (!crtl->is_leaf || cfun->calls_alloca |
7183 | || ix86_current_function_calls_tls_descriptor) |
7184 | offset = ROUND_UP (offset, preferred_alignment); |
7185 | |
7186 | /* We've reached end of stack frame. */ |
7187 | frame->stack_pointer_offset = offset; |
7188 | |
7189 | /* Size prologue needs to allocate. */ |
7190 | to_allocate = offset - frame->sse_reg_save_offset; |
7191 | |
7192 | if ((!to_allocate && frame->nregs <= 1) |
7193 | || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) |
7194 | /* If static stack checking is enabled and done with probes, |
7195 | the registers need to be saved before allocating the frame. */ |
7196 | || flag_stack_check == STATIC_BUILTIN_STACK_CHECK |
7197 | /* If stack clash probing needs a loop, then it needs a |
7198 | scratch register. But the returned register is only guaranteed |
7199 | to be safe to use after register saves are complete. So if |
7200 | stack clash protections are enabled and the allocated frame is |
7201 | larger than the probe interval, then use pushes to save |
7202 | callee saved registers. */ |
7203 | || (flag_stack_clash_protection |
7204 | && !ix86_target_stack_probe () |
7205 | && to_allocate > get_probe_interval ())) |
7206 | frame->save_regs_using_mov = false; |
7207 | |
7208 | if (ix86_using_red_zone () |
7209 | && crtl->sp_is_unchanging |
7210 | && crtl->is_leaf |
7211 | && !cfun->machine->asm_redzone_clobber_seen |
7212 | && !ix86_pc_thunk_call_expanded |
7213 | && !ix86_current_function_calls_tls_descriptor) |
7214 | { |
7215 | frame->red_zone_size = to_allocate; |
7216 | if (frame->save_regs_using_mov) |
7217 | frame->red_zone_size += frame->nregs * UNITS_PER_WORD; |
7218 | if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) |
7219 | frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; |
7220 | } |
7221 | else |
7222 | frame->red_zone_size = 0; |
7223 | frame->stack_pointer_offset -= frame->red_zone_size; |
7224 | |
7225 | /* The SEH frame pointer location is near the bottom of the frame. |
7226 | This is enforced by the fact that the difference between the |
7227 | stack pointer and the frame pointer is limited to 240 bytes in |
7228 | the unwind data structure. */ |
7229 | if (TARGET_SEH) |
7230 | { |
7231 | /* Force the frame pointer to point at or below the lowest register save |
7232 | area, see the SEH code in config/i386/winnt.cc for the rationale. */ |
7233 | frame->hard_frame_pointer_offset = frame->sse_reg_save_offset; |
7234 | |
7235 | /* If we can leave the frame pointer where it is, do so; however return |
7236 | the establisher frame for __builtin_frame_address (0) or else if the |
7237 | frame overflows the SEH maximum frame size. |
7238 | |
7239 | Note that the value returned by __builtin_frame_address (0) is quite |
7240 | constrained, because setjmp is piggybacked on the SEH machinery with |
7241 | recent versions of MinGW: |
7242 | |
7243 | # elif defined(__SEH__) |
7244 | # if defined(__aarch64__) || defined(_ARM64_) |
7245 | # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry()) |
7246 | # elif (__MINGW_GCC_VERSION < 40702) |
7247 | # define setjmp(BUF) _setjmp((BUF), mingw_getsp()) |
7248 | # else |
7249 | # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0)) |
7250 | # endif |
7251 | |
7252 | and the second argument passed to _setjmp, if not null, is forwarded |
7253 | to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has |
7254 | built an ExceptionRecord on the fly describing the setjmp buffer). */ |
7255 | const HOST_WIDE_INT diff |
7256 | = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; |
7257 | if (diff <= 255 && !crtl->accesses_prior_frames) |
7258 | { |
7259 | /* The resulting diff will be a multiple of 16 lower than 255, |
7260 | i.e. at most 240 as required by the unwind data structure. */ |
7261 | frame->hard_frame_pointer_offset += (diff & 15); |
7262 | } |
7263 | else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames) |
7264 | { |
7265 | /* Ideally we'd determine what portion of the local stack frame |
7266 | (within the constraint of the lowest 240) is most heavily used. |
7267 | But without that complication, simply bias the frame pointer |
7268 | by 128 bytes so as to maximize the amount of the local stack |
7269 | frame that is addressable with 8-bit offsets. */ |
7270 | frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; |
7271 | } |
7272 | else |
7273 | frame->hard_frame_pointer_offset = frame->hfp_save_offset; |
7274 | } |
7275 | } |
7276 | |
7277 | /* This is semi-inlined memory_address_length, but simplified |
7278 | since we know that we're always dealing with reg+offset, and |
7279 | to avoid having to create and discard all that rtl. */ |
7280 | |
7281 | static inline int |
7282 | choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) |
7283 | { |
7284 | int len = 4; |
7285 | |
7286 | if (offset == 0) |
7287 | { |
7288 | /* EBP and R13 cannot be encoded without an offset. */ |
7289 | len = (regno == BP_REG || regno == R13_REG); |
7290 | } |
7291 | else if (IN_RANGE (offset, -128, 127)) |
7292 | len = 1; |
7293 | |
7294 | /* ESP and R12 must be encoded with a SIB byte. */ |
7295 | if (regno == SP_REG || regno == R12_REG) |
7296 | len++; |
7297 | |
7298 | return len; |
7299 | } |
7300 | |
7301 | /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in |
7302 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
7303 | |
7304 | static bool |
7305 | sp_valid_at (HOST_WIDE_INT cfa_offset) |
7306 | { |
7307 | const struct machine_frame_state &fs = cfun->machine->fs; |
7308 | if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) |
7309 | { |
7310 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
7311 | gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); |
7312 | return false; |
7313 | } |
7314 | return fs.sp_valid; |
7315 | } |
7316 | |
7317 | /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in |
7318 | the frame save area. The register is saved at CFA - CFA_OFFSET. */ |
7319 | |
7320 | static inline bool |
7321 | fp_valid_at (HOST_WIDE_INT cfa_offset) |
7322 | { |
7323 | const struct machine_frame_state &fs = cfun->machine->fs; |
7324 | if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) |
7325 | { |
7326 | /* Validate that the cfa_offset isn't in a "no-man's land". */ |
7327 | gcc_assert (cfa_offset >= fs.sp_realigned_offset); |
7328 | return false; |
7329 | } |
7330 | return fs.fp_valid; |
7331 | } |
7332 | |
7333 | /* Choose a base register based upon alignment requested, speed and/or |
7334 | size. */ |
7335 | |
7336 | static void |
7337 | choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, |
7338 | HOST_WIDE_INT &base_offset, |
7339 | unsigned int align_reqested, unsigned int *align) |
7340 | { |
7341 | const struct machine_function *m = cfun->machine; |
7342 | unsigned int hfp_align; |
7343 | unsigned int drap_align; |
7344 | unsigned int sp_align; |
7345 | bool hfp_ok = fp_valid_at (cfa_offset); |
7346 | bool drap_ok = m->fs.drap_valid; |
7347 | bool sp_ok = sp_valid_at (cfa_offset); |
7348 | |
7349 | hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; |
7350 | |
7351 | /* Filter out any registers that don't meet the requested alignment |
7352 | criteria. */ |
7353 | if (align_reqested) |
7354 | { |
7355 | if (m->fs.realigned) |
7356 | hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; |
7357 | /* SEH unwind code does do not currently support REG_CFA_EXPRESSION |
7358 | notes (which we would need to use a realigned stack pointer), |
7359 | so disable on SEH targets. */ |
7360 | else if (m->fs.sp_realigned) |
7361 | sp_align = crtl->stack_alignment_needed; |
7362 | |
7363 | hfp_ok = hfp_ok && hfp_align >= align_reqested; |
7364 | drap_ok = drap_ok && drap_align >= align_reqested; |
7365 | sp_ok = sp_ok && sp_align >= align_reqested; |
7366 | } |
7367 | |
7368 | if (m->use_fast_prologue_epilogue) |
7369 | { |
7370 | /* Choose the base register most likely to allow the most scheduling |
7371 | opportunities. Generally FP is valid throughout the function, |
7372 | while DRAP must be reloaded within the epilogue. But choose either |
7373 | over the SP due to increased encoding size. */ |
7374 | |
7375 | if (hfp_ok) |
7376 | { |
7377 | base_reg = hard_frame_pointer_rtx; |
7378 | base_offset = m->fs.fp_offset - cfa_offset; |
7379 | } |
7380 | else if (drap_ok) |
7381 | { |
7382 | base_reg = crtl->drap_reg; |
7383 | base_offset = 0 - cfa_offset; |
7384 | } |
7385 | else if (sp_ok) |
7386 | { |
7387 | base_reg = stack_pointer_rtx; |
7388 | base_offset = m->fs.sp_offset - cfa_offset; |
7389 | } |
7390 | } |
7391 | else |
7392 | { |
7393 | HOST_WIDE_INT toffset; |
7394 | int len = 16, tlen; |
7395 | |
7396 | /* Choose the base register with the smallest address encoding. |
7397 | With a tie, choose FP > DRAP > SP. */ |
7398 | if (sp_ok) |
7399 | { |
7400 | base_reg = stack_pointer_rtx; |
7401 | base_offset = m->fs.sp_offset - cfa_offset; |
7402 | len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset); |
7403 | } |
7404 | if (drap_ok) |
7405 | { |
7406 | toffset = 0 - cfa_offset; |
7407 | tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset); |
7408 | if (tlen <= len) |
7409 | { |
7410 | base_reg = crtl->drap_reg; |
7411 | base_offset = toffset; |
7412 | len = tlen; |
7413 | } |
7414 | } |
7415 | if (hfp_ok) |
7416 | { |
7417 | toffset = m->fs.fp_offset - cfa_offset; |
7418 | tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset); |
7419 | if (tlen <= len) |
7420 | { |
7421 | base_reg = hard_frame_pointer_rtx; |
7422 | base_offset = toffset; |
7423 | } |
7424 | } |
7425 | } |
7426 | |
7427 | /* Set the align return value. */ |
7428 | if (align) |
7429 | { |
7430 | if (base_reg == stack_pointer_rtx) |
7431 | *align = sp_align; |
7432 | else if (base_reg == crtl->drap_reg) |
7433 | *align = drap_align; |
7434 | else if (base_reg == hard_frame_pointer_rtx) |
7435 | *align = hfp_align; |
7436 | } |
7437 | } |
7438 | |
7439 | /* Return an RTX that points to CFA_OFFSET within the stack frame and |
7440 | the alignment of address. If ALIGN is non-null, it should point to |
7441 | an alignment value (in bits) that is preferred or zero and will |
7442 | recieve the alignment of the base register that was selected, |
7443 | irrespective of rather or not CFA_OFFSET is a multiple of that |
7444 | alignment value. If it is possible for the base register offset to be |
7445 | non-immediate then SCRATCH_REGNO should specify a scratch register to |
7446 | use. |
7447 | |
7448 | The valid base registers are taken from CFUN->MACHINE->FS. */ |
7449 | |
7450 | static rtx |
7451 | choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, |
7452 | unsigned int scratch_regno = INVALID_REGNUM) |
7453 | { |
7454 | rtx base_reg = NULL; |
7455 | HOST_WIDE_INT base_offset = 0; |
7456 | |
7457 | /* If a specific alignment is requested, try to get a base register |
7458 | with that alignment first. */ |
7459 | if (align && *align) |
7460 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align); |
7461 | |
7462 | if (!base_reg) |
7463 | choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align); |
7464 | |
7465 | gcc_assert (base_reg != NULL); |
7466 | |
7467 | rtx base_offset_rtx = GEN_INT (base_offset); |
7468 | |
7469 | if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) |
7470 | { |
7471 | gcc_assert (scratch_regno != INVALID_REGNUM); |
7472 | |
7473 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
7474 | emit_move_insn (scratch_reg, base_offset_rtx); |
7475 | |
7476 | return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); |
7477 | } |
7478 | |
7479 | return plus_constant (Pmode, base_reg, base_offset); |
7480 | } |
7481 | |
7482 | /* Emit code to save registers in the prologue. */ |
7483 | |
7484 | static void |
7485 | ix86_emit_save_regs (void) |
7486 | { |
7487 | int regno; |
7488 | rtx_insn *insn; |
7489 | bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return; |
7490 | |
7491 | if (!TARGET_APX_PUSH2POP2 |
7492 | || !ix86_can_use_push2pop2 () |
7493 | || cfun->machine->func_type != TYPE_NORMAL) |
7494 | { |
7495 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
7496 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7497 | { |
7498 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno), |
7499 | ppx_p: use_ppx)); |
7500 | RTX_FRAME_RELATED_P (insn) = 1; |
7501 | } |
7502 | } |
7503 | else |
7504 | { |
7505 | int regno_list[2]; |
7506 | regno_list[0] = regno_list[1] = -1; |
7507 | int loaded_regnum = 0; |
7508 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
7509 | |
7510 | for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
7511 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7512 | { |
7513 | if (aligned) |
7514 | { |
7515 | regno_list[loaded_regnum++] = regno; |
7516 | if (loaded_regnum == 2) |
7517 | { |
7518 | gcc_assert (regno_list[0] != -1 |
7519 | && regno_list[1] != -1 |
7520 | && regno_list[0] != regno_list[1]); |
7521 | const int offset = UNITS_PER_WORD * 2; |
7522 | rtx mem = gen_rtx_MEM (TImode, |
7523 | gen_rtx_PRE_DEC (Pmode, |
7524 | stack_pointer_rtx)); |
7525 | insn = emit_insn (gen_push2 (mem, |
7526 | reg1: gen_rtx_REG (word_mode, |
7527 | regno_list[0]), |
7528 | reg2: gen_rtx_REG (word_mode, |
7529 | regno_list[1]), |
7530 | ppx_p: use_ppx)); |
7531 | RTX_FRAME_RELATED_P (insn) = 1; |
7532 | rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); |
7533 | |
7534 | for (int i = 0; i < 2; i++) |
7535 | { |
7536 | rtx dwarf_reg = gen_rtx_REG (word_mode, |
7537 | regno_list[i]); |
7538 | rtx sp_offset = plus_constant (Pmode, |
7539 | stack_pointer_rtx, |
7540 | + UNITS_PER_WORD |
7541 | * (1 - i)); |
7542 | rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, |
7543 | sp_offset), |
7544 | dwarf_reg); |
7545 | RTX_FRAME_RELATED_P (tmp) = 1; |
7546 | XVECEXP (dwarf, 0, i + 1) = tmp; |
7547 | } |
7548 | rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, |
7549 | plus_constant (Pmode, |
7550 | stack_pointer_rtx, |
7551 | -offset)); |
7552 | RTX_FRAME_RELATED_P (sp_tmp) = 1; |
7553 | XVECEXP (dwarf, 0, 0) = sp_tmp; |
7554 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
7555 | |
7556 | loaded_regnum = 0; |
7557 | regno_list[0] = regno_list[1] = -1; |
7558 | } |
7559 | } |
7560 | else |
7561 | { |
7562 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno), |
7563 | ppx_p: use_ppx)); |
7564 | RTX_FRAME_RELATED_P (insn) = 1; |
7565 | aligned = true; |
7566 | } |
7567 | } |
7568 | if (loaded_regnum == 1) |
7569 | { |
7570 | insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, |
7571 | regno_list[0]), |
7572 | ppx_p: use_ppx)); |
7573 | RTX_FRAME_RELATED_P (insn) = 1; |
7574 | } |
7575 | } |
7576 | } |
7577 | |
7578 | /* Emit a single register save at CFA - CFA_OFFSET. */ |
7579 | |
7580 | static void |
7581 | ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, |
7582 | HOST_WIDE_INT cfa_offset) |
7583 | { |
7584 | struct machine_function *m = cfun->machine; |
7585 | rtx reg = gen_rtx_REG (mode, regno); |
7586 | rtx mem, addr, base, insn; |
7587 | unsigned int align = GET_MODE_ALIGNMENT (mode); |
7588 | |
7589 | addr = choose_baseaddr (cfa_offset, align: &align); |
7590 | mem = gen_frame_mem (mode, addr); |
7591 | |
7592 | /* The location aligment depends upon the base register. */ |
7593 | align = MIN (GET_MODE_ALIGNMENT (mode), align); |
7594 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
7595 | set_mem_align (mem, align); |
7596 | |
7597 | insn = emit_insn (gen_rtx_SET (mem, reg)); |
7598 | RTX_FRAME_RELATED_P (insn) = 1; |
7599 | |
7600 | base = addr; |
7601 | if (GET_CODE (base) == PLUS) |
7602 | base = XEXP (base, 0); |
7603 | gcc_checking_assert (REG_P (base)); |
7604 | |
7605 | /* When saving registers into a re-aligned local stack frame, avoid |
7606 | any tricky guessing by dwarf2out. */ |
7607 | if (m->fs.realigned) |
7608 | { |
7609 | gcc_checking_assert (stack_realign_drap); |
7610 | |
7611 | if (regno == REGNO (crtl->drap_reg)) |
7612 | { |
7613 | /* A bit of a hack. We force the DRAP register to be saved in |
7614 | the re-aligned stack frame, which provides us with a copy |
7615 | of the CFA that will last past the prologue. Install it. */ |
7616 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
7617 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
7618 | cfun->machine->fs.fp_offset - cfa_offset); |
7619 | mem = gen_rtx_MEM (mode, addr); |
7620 | add_reg_note (insn, REG_CFA_DEF_CFA, mem); |
7621 | } |
7622 | else |
7623 | { |
7624 | /* The frame pointer is a stable reference within the |
7625 | aligned frame. Use it. */ |
7626 | gcc_checking_assert (cfun->machine->fs.fp_valid); |
7627 | addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
7628 | cfun->machine->fs.fp_offset - cfa_offset); |
7629 | mem = gen_rtx_MEM (mode, addr); |
7630 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
7631 | } |
7632 | } |
7633 | |
7634 | else if (base == stack_pointer_rtx && m->fs.sp_realigned |
7635 | && cfa_offset >= m->fs.sp_realigned_offset) |
7636 | { |
7637 | gcc_checking_assert (stack_realign_fp); |
7638 | add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); |
7639 | } |
7640 | |
7641 | /* The memory may not be relative to the current CFA register, |
7642 | which means that we may need to generate a new pattern for |
7643 | use by the unwind info. */ |
7644 | else if (base != m->fs.cfa_reg) |
7645 | { |
7646 | addr = plus_constant (Pmode, m->fs.cfa_reg, |
7647 | m->fs.cfa_offset - cfa_offset); |
7648 | mem = gen_rtx_MEM (mode, addr); |
7649 | add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); |
7650 | } |
7651 | } |
7652 | |
7653 | /* Emit code to save registers using MOV insns. |
7654 | First register is stored at CFA - CFA_OFFSET. */ |
7655 | static void |
7656 | ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) |
7657 | { |
7658 | unsigned int regno; |
7659 | |
7660 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
7661 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7662 | { |
7663 | ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset); |
7664 | cfa_offset -= UNITS_PER_WORD; |
7665 | } |
7666 | } |
7667 | |
7668 | /* Emit code to save SSE registers using MOV insns. |
7669 | First register is stored at CFA - CFA_OFFSET. */ |
7670 | static void |
7671 | ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) |
7672 | { |
7673 | unsigned int regno; |
7674 | |
7675 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
7676 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true)) |
7677 | { |
7678 | ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); |
7679 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
7680 | } |
7681 | } |
7682 | |
7683 | static GTY(()) rtx queued_cfa_restores; |
7684 | |
7685 | /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack |
7686 | manipulation insn. The value is on the stack at CFA - CFA_OFFSET. |
7687 | Don't add the note if the previously saved value will be left untouched |
7688 | within stack red-zone till return, as unwinders can find the same value |
7689 | in the register and on the stack. */ |
7690 | |
7691 | static void |
7692 | ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) |
7693 | { |
7694 | if (!crtl->shrink_wrapped |
7695 | && cfa_offset <= cfun->machine->fs.red_zone_offset) |
7696 | return; |
7697 | |
7698 | if (insn) |
7699 | { |
7700 | add_reg_note (insn, REG_CFA_RESTORE, reg); |
7701 | RTX_FRAME_RELATED_P (insn) = 1; |
7702 | } |
7703 | else |
7704 | queued_cfa_restores |
7705 | = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); |
7706 | } |
7707 | |
7708 | /* Add queued REG_CFA_RESTORE notes if any to INSN. */ |
7709 | |
7710 | static void |
7711 | ix86_add_queued_cfa_restore_notes (rtx insn) |
7712 | { |
7713 | rtx last; |
7714 | if (!queued_cfa_restores) |
7715 | return; |
7716 | for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) |
7717 | ; |
7718 | XEXP (last, 1) = REG_NOTES (insn); |
7719 | REG_NOTES (insn) = queued_cfa_restores; |
7720 | queued_cfa_restores = NULL_RTX; |
7721 | RTX_FRAME_RELATED_P (insn) = 1; |
7722 | } |
7723 | |
7724 | /* Expand prologue or epilogue stack adjustment. |
7725 | The pattern exist to put a dependency on all ebp-based memory accesses. |
7726 | STYLE should be negative if instructions should be marked as frame related, |
7727 | zero if %r11 register is live and cannot be freely used and positive |
7728 | otherwise. */ |
7729 | |
7730 | static rtx |
7731 | pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, |
7732 | int style, bool set_cfa) |
7733 | { |
7734 | struct machine_function *m = cfun->machine; |
7735 | rtx addend = offset; |
7736 | rtx insn; |
7737 | bool add_frame_related_expr = false; |
7738 | |
7739 | if (!x86_64_immediate_operand (offset, Pmode)) |
7740 | { |
7741 | /* r11 is used by indirect sibcall return as well, set before the |
7742 | epilogue and used after the epilogue. */ |
7743 | if (style) |
7744 | addend = gen_rtx_REG (Pmode, R11_REG); |
7745 | else |
7746 | { |
7747 | gcc_assert (src != hard_frame_pointer_rtx |
7748 | && dest != hard_frame_pointer_rtx); |
7749 | addend = hard_frame_pointer_rtx; |
7750 | } |
7751 | emit_insn (gen_rtx_SET (addend, offset)); |
7752 | if (style < 0) |
7753 | add_frame_related_expr = true; |
7754 | } |
7755 | |
7756 | insn = emit_insn (gen_pro_epilogue_adjust_stack_add |
7757 | (Pmode, x0: dest, x1: src, x2: addend)); |
7758 | if (style >= 0) |
7759 | ix86_add_queued_cfa_restore_notes (insn); |
7760 | |
7761 | if (set_cfa) |
7762 | { |
7763 | rtx r; |
7764 | |
7765 | gcc_assert (m->fs.cfa_reg == src); |
7766 | m->fs.cfa_offset += INTVAL (offset); |
7767 | m->fs.cfa_reg = dest; |
7768 | |
7769 | r = gen_rtx_PLUS (Pmode, src, offset); |
7770 | r = gen_rtx_SET (dest, r); |
7771 | add_reg_note (insn, REG_CFA_ADJUST_CFA, r); |
7772 | RTX_FRAME_RELATED_P (insn) = 1; |
7773 | } |
7774 | else if (style < 0) |
7775 | { |
7776 | RTX_FRAME_RELATED_P (insn) = 1; |
7777 | if (add_frame_related_expr) |
7778 | { |
7779 | rtx r = gen_rtx_PLUS (Pmode, src, offset); |
7780 | r = gen_rtx_SET (dest, r); |
7781 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); |
7782 | } |
7783 | } |
7784 | |
7785 | if (dest == stack_pointer_rtx) |
7786 | { |
7787 | HOST_WIDE_INT ooffset = m->fs.sp_offset; |
7788 | bool valid = m->fs.sp_valid; |
7789 | bool realigned = m->fs.sp_realigned; |
7790 | |
7791 | if (src == hard_frame_pointer_rtx) |
7792 | { |
7793 | valid = m->fs.fp_valid; |
7794 | realigned = false; |
7795 | ooffset = m->fs.fp_offset; |
7796 | } |
7797 | else if (src == crtl->drap_reg) |
7798 | { |
7799 | valid = m->fs.drap_valid; |
7800 | realigned = false; |
7801 | ooffset = 0; |
7802 | } |
7803 | else |
7804 | { |
7805 | /* Else there are two possibilities: SP itself, which we set |
7806 | up as the default above. Or EH_RETURN_STACKADJ_RTX, which is |
7807 | taken care of this by hand along the eh_return path. */ |
7808 | gcc_checking_assert (src == stack_pointer_rtx |
7809 | || offset == const0_rtx); |
7810 | } |
7811 | |
7812 | m->fs.sp_offset = ooffset - INTVAL (offset); |
7813 | m->fs.sp_valid = valid; |
7814 | m->fs.sp_realigned = realigned; |
7815 | } |
7816 | return insn; |
7817 | } |
7818 | |
7819 | /* Find an available register to be used as dynamic realign argument |
7820 | pointer regsiter. Such a register will be written in prologue and |
7821 | used in begin of body, so it must not be |
7822 | 1. parameter passing register. |
7823 | 2. GOT pointer. |
7824 | We reuse static-chain register if it is available. Otherwise, we |
7825 | use DI for i386 and R13 for x86-64. We chose R13 since it has |
7826 | shorter encoding. |
7827 | |
7828 | Return: the regno of chosen register. */ |
7829 | |
7830 | static unsigned int |
7831 | find_drap_reg (void) |
7832 | { |
7833 | tree decl = cfun->decl; |
7834 | |
7835 | /* Always use callee-saved register if there are no caller-saved |
7836 | registers. */ |
7837 | if (TARGET_64BIT) |
7838 | { |
7839 | /* Use R13 for nested function or function need static chain. |
7840 | Since function with tail call may use any caller-saved |
7841 | registers in epilogue, DRAP must not use caller-saved |
7842 | register in such case. */ |
7843 | if (DECL_STATIC_CHAIN (decl) |
7844 | || (cfun->machine->call_saved_registers |
7845 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
7846 | || crtl->tail_call_emit) |
7847 | return R13_REG; |
7848 | |
7849 | return R10_REG; |
7850 | } |
7851 | else |
7852 | { |
7853 | /* Use DI for nested function or function need static chain. |
7854 | Since function with tail call may use any caller-saved |
7855 | registers in epilogue, DRAP must not use caller-saved |
7856 | register in such case. */ |
7857 | if (DECL_STATIC_CHAIN (decl) |
7858 | || (cfun->machine->call_saved_registers |
7859 | == TYPE_NO_CALLER_SAVED_REGISTERS) |
7860 | || crtl->tail_call_emit |
7861 | || crtl->calls_eh_return) |
7862 | return DI_REG; |
7863 | |
7864 | /* Reuse static chain register if it isn't used for parameter |
7865 | passing. */ |
7866 | if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) |
7867 | { |
7868 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); |
7869 | if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) |
7870 | return CX_REG; |
7871 | } |
7872 | return DI_REG; |
7873 | } |
7874 | } |
7875 | |
7876 | /* Return minimum incoming stack alignment. */ |
7877 | |
7878 | static unsigned int |
7879 | ix86_minimum_incoming_stack_boundary (bool sibcall) |
7880 | { |
7881 | unsigned int incoming_stack_boundary; |
7882 | |
7883 | /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ |
7884 | if (cfun->machine->func_type != TYPE_NORMAL) |
7885 | incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; |
7886 | /* Prefer the one specified at command line. */ |
7887 | else if (ix86_user_incoming_stack_boundary) |
7888 | incoming_stack_boundary = ix86_user_incoming_stack_boundary; |
7889 | /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary |
7890 | if -mstackrealign is used, it isn't used for sibcall check and |
7891 | estimated stack alignment is 128bit. */ |
7892 | else if (!sibcall |
7893 | && ix86_force_align_arg_pointer |
7894 | && crtl->stack_alignment_estimated == 128) |
7895 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
7896 | else |
7897 | incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
7898 | |
7899 | /* Incoming stack alignment can be changed on individual functions |
7900 | via force_align_arg_pointer attribute. We use the smallest |
7901 | incoming stack boundary. */ |
7902 | if (incoming_stack_boundary > MIN_STACK_BOUNDARY |
7903 | && lookup_attribute (attr_name: "force_align_arg_pointer", |
7904 | TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) |
7905 | incoming_stack_boundary = MIN_STACK_BOUNDARY; |
7906 | |
7907 | /* The incoming stack frame has to be aligned at least at |
7908 | parm_stack_boundary. */ |
7909 | if (incoming_stack_boundary < crtl->parm_stack_boundary) |
7910 | incoming_stack_boundary = crtl->parm_stack_boundary; |
7911 | |
7912 | /* Stack at entrance of main is aligned by runtime. We use the |
7913 | smallest incoming stack boundary. */ |
7914 | if (incoming_stack_boundary > MAIN_STACK_BOUNDARY |
7915 | && DECL_NAME (current_function_decl) |
7916 | && MAIN_NAME_P (DECL_NAME (current_function_decl)) |
7917 | && DECL_FILE_SCOPE_P (current_function_decl)) |
7918 | incoming_stack_boundary = MAIN_STACK_BOUNDARY; |
7919 | |
7920 | return incoming_stack_boundary; |
7921 | } |
7922 | |
7923 | /* Update incoming stack boundary and estimated stack alignment. */ |
7924 | |
7925 | static void |
7926 | ix86_update_stack_boundary (void) |
7927 | { |
7928 | ix86_incoming_stack_boundary |
7929 | = ix86_minimum_incoming_stack_boundary (sibcall: false); |
7930 | |
7931 | /* x86_64 vararg needs 16byte stack alignment for register save area. */ |
7932 | if (TARGET_64BIT |
7933 | && cfun->stdarg |
7934 | && crtl->stack_alignment_estimated < 128) |
7935 | crtl->stack_alignment_estimated = 128; |
7936 | |
7937 | /* __tls_get_addr needs to be called with 16-byte aligned stack. */ |
7938 | if (ix86_tls_descriptor_calls_expanded_in_cfun |
7939 | && crtl->preferred_stack_boundary < 128) |
7940 | crtl->preferred_stack_boundary = 128; |
7941 | |
7942 | /* For 32-bit MS ABI, both the incoming and preferred stack boundaries |
7943 | are 32 bits, but if force_align_arg_pointer is specified, it should |
7944 | prefer 128 bits for a backward-compatibility reason, which is also |
7945 | what the doc suggests. */ |
7946 | if (lookup_attribute (attr_name: "force_align_arg_pointer", |
7947 | TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) |
7948 | && crtl->preferred_stack_boundary < 128) |
7949 | crtl->preferred_stack_boundary = 128; |
7950 | } |
7951 | |
7952 | /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is |
7953 | needed or an rtx for DRAP otherwise. */ |
7954 | |
7955 | static rtx |
7956 | ix86_get_drap_rtx (void) |
7957 | { |
7958 | /* We must use DRAP if there are outgoing arguments on stack or |
7959 | the stack pointer register is clobbered by asm statment and |
7960 | ACCUMULATE_OUTGOING_ARGS is false. */ |
7961 | if (ix86_force_drap |
7962 | || ((cfun->machine->outgoing_args_on_stack |
7963 | || crtl->sp_is_clobbered_by_asm) |
7964 | && !ACCUMULATE_OUTGOING_ARGS)) |
7965 | crtl->need_drap = true; |
7966 | |
7967 | if (stack_realign_drap) |
7968 | { |
7969 | /* Assign DRAP to vDRAP and returns vDRAP */ |
7970 | unsigned int regno = find_drap_reg (); |
7971 | rtx drap_vreg; |
7972 | rtx arg_ptr; |
7973 | rtx_insn *seq, *insn; |
7974 | |
7975 | arg_ptr = gen_rtx_REG (Pmode, regno); |
7976 | crtl->drap_reg = arg_ptr; |
7977 | |
7978 | start_sequence (); |
7979 | drap_vreg = copy_to_reg (arg_ptr); |
7980 | seq = end_sequence (); |
7981 | |
7982 | insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ())); |
7983 | if (!optimize) |
7984 | { |
7985 | add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); |
7986 | RTX_FRAME_RELATED_P (insn) = 1; |
7987 | } |
7988 | return drap_vreg; |
7989 | } |
7990 | else |
7991 | return NULL; |
7992 | } |
7993 | |
7994 | /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ |
7995 | |
7996 | static rtx |
7997 | ix86_internal_arg_pointer (void) |
7998 | { |
7999 | return virtual_incoming_args_rtx; |
8000 | } |
8001 | |
8002 | struct scratch_reg { |
8003 | rtx reg; |
8004 | bool saved; |
8005 | }; |
8006 | |
8007 | /* Return a short-lived scratch register for use on function entry. |
8008 | In 32-bit mode, it is valid only after the registers are saved |
8009 | in the prologue. This register must be released by means of |
8010 | release_scratch_register_on_entry once it is dead. */ |
8011 | |
8012 | static void |
8013 | get_scratch_register_on_entry (struct scratch_reg *sr) |
8014 | { |
8015 | int regno; |
8016 | |
8017 | sr->saved = false; |
8018 | |
8019 | if (TARGET_64BIT) |
8020 | { |
8021 | /* We always use R11 in 64-bit mode. */ |
8022 | regno = R11_REG; |
8023 | } |
8024 | else |
8025 | { |
8026 | tree decl = current_function_decl, fntype = TREE_TYPE (decl); |
8027 | bool fastcall_p |
8028 | = lookup_attribute (attr_name: "fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
8029 | bool thiscall_p |
8030 | = lookup_attribute (attr_name: "thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
8031 | bool static_chain_p = DECL_STATIC_CHAIN (decl); |
8032 | int regparm = ix86_function_regparm (type: fntype, decl); |
8033 | int drap_regno |
8034 | = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; |
8035 | |
8036 | /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax |
8037 | for the static chain register. */ |
8038 | if ((regparm < 1 || (fastcall_p && !static_chain_p)) |
8039 | && drap_regno != AX_REG) |
8040 | regno = AX_REG; |
8041 | /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx |
8042 | for the static chain register. */ |
8043 | else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) |
8044 | regno = AX_REG; |
8045 | else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) |
8046 | regno = DX_REG; |
8047 | /* ecx is the static chain register. */ |
8048 | else if (regparm < 3 && !fastcall_p && !thiscall_p |
8049 | && !static_chain_p |
8050 | && drap_regno != CX_REG) |
8051 | regno = CX_REG; |
8052 | else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false)) |
8053 | regno = BX_REG; |
8054 | /* esi is the static chain register. */ |
8055 | else if (!(regparm == 3 && static_chain_p) |
8056 | && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false)) |
8057 | regno = SI_REG; |
8058 | else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false)) |
8059 | regno = DI_REG; |
8060 | else |
8061 | { |
8062 | regno = (drap_regno == AX_REG ? DX_REG : AX_REG); |
8063 | sr->saved = true; |
8064 | } |
8065 | } |
8066 | |
8067 | sr->reg = gen_rtx_REG (Pmode, regno); |
8068 | if (sr->saved) |
8069 | { |
8070 | rtx_insn *insn = emit_insn (gen_push (arg: sr->reg)); |
8071 | RTX_FRAME_RELATED_P (insn) = 1; |
8072 | } |
8073 | } |
8074 | |
8075 | /* Release a scratch register obtained from the preceding function. |
8076 | |
8077 | If RELEASE_VIA_POP is true, we just pop the register off the stack |
8078 | to release it. This is what non-Linux systems use with -fstack-check. |
8079 | |
8080 | Otherwise we use OFFSET to locate the saved register and the |
8081 | allocated stack space becomes part of the local frame and is |
8082 | deallocated by the epilogue. */ |
8083 | |
8084 | static void |
8085 | release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, |
8086 | bool release_via_pop) |
8087 | { |
8088 | if (sr->saved) |
8089 | { |
8090 | if (release_via_pop) |
8091 | { |
8092 | struct machine_function *m = cfun->machine; |
8093 | rtx x, insn = emit_insn (gen_pop (arg: sr->reg)); |
8094 | |
8095 | /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ |
8096 | RTX_FRAME_RELATED_P (insn) = 1; |
8097 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
8098 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8099 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); |
8100 | m->fs.sp_offset -= UNITS_PER_WORD; |
8101 | } |
8102 | else |
8103 | { |
8104 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
8105 | x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); |
8106 | emit_insn (x); |
8107 | } |
8108 | } |
8109 | } |
8110 | |
8111 | /* Emit code to adjust the stack pointer by SIZE bytes while probing it. |
8112 | |
8113 | If INT_REGISTERS_SAVED is true, then integer registers have already been |
8114 | pushed on the stack. |
8115 | |
8116 | If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope |
8117 | beyond SIZE bytes. |
8118 | |
8119 | This assumes no knowledge of the current probing state, i.e. it is never |
8120 | allowed to allocate more than PROBE_INTERVAL bytes of stack space without |
8121 | a suitable probe. */ |
8122 | |
8123 | static void |
8124 | ix86_adjust_stack_and_probe (HOST_WIDE_INT size, |
8125 | const bool int_registers_saved, |
8126 | const bool protection_area) |
8127 | { |
8128 | struct machine_function *m = cfun->machine; |
8129 | |
8130 | /* If this function does not statically allocate stack space, then |
8131 | no probes are needed. */ |
8132 | if (!size) |
8133 | { |
8134 | /* However, the allocation of space via pushes for register |
8135 | saves could be viewed as allocating space, but without the |
8136 | need to probe. */ |
8137 | if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) |
8138 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
8139 | else |
8140 | dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); |
8141 | return; |
8142 | } |
8143 | |
8144 | /* If we are a noreturn function, then we have to consider the |
8145 | possibility that we're called via a jump rather than a call. |
8146 | |
8147 | Thus we don't have the implicit probe generated by saving the |
8148 | return address into the stack at the call. Thus, the stack |
8149 | pointer could be anywhere in the guard page. The safe thing |
8150 | to do is emit a probe now. |
8151 | |
8152 | The probe can be avoided if we have already emitted any callee |
8153 | register saves into the stack or have a frame pointer (which will |
8154 | have been saved as well). Those saves will function as implicit |
8155 | probes. |
8156 | |
8157 | ?!? This should be revamped to work like aarch64 and s390 where |
8158 | we track the offset from the most recent probe. Normally that |
8159 | offset would be zero. For a noreturn function we would reset |
8160 | it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then |
8161 | we just probe when we cross PROBE_INTERVAL. */ |
8162 | if (TREE_THIS_VOLATILE (cfun->decl) |
8163 | && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) |
8164 | { |
8165 | /* We can safely use any register here since we're just going to push |
8166 | its value and immediately pop it back. But we do try and avoid |
8167 | argument passing registers so as not to introduce dependencies in |
8168 | the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ |
8169 | rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); |
8170 | rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg)); |
8171 | rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg)); |
8172 | m->fs.sp_offset -= UNITS_PER_WORD; |
8173 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8174 | { |
8175 | m->fs.cfa_offset -= UNITS_PER_WORD; |
8176 | rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
8177 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8178 | add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); |
8179 | RTX_FRAME_RELATED_P (insn_push) = 1; |
8180 | x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
8181 | x = gen_rtx_SET (stack_pointer_rtx, x); |
8182 | add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); |
8183 | RTX_FRAME_RELATED_P (insn_pop) = 1; |
8184 | } |
8185 | emit_insn (gen_blockage ()); |
8186 | } |
8187 | |
8188 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
8189 | const int dope = 4 * UNITS_PER_WORD; |
8190 | |
8191 | /* If there is protection area, take it into account in the size. */ |
8192 | if (protection_area) |
8193 | size += probe_interval + dope; |
8194 | |
8195 | /* If we allocate less than the size of the guard statically, |
8196 | then no probing is necessary, but we do need to allocate |
8197 | the stack. */ |
8198 | else if (size < (1 << param_stack_clash_protection_guard_size)) |
8199 | { |
8200 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8201 | GEN_INT (-size), style: -1, |
8202 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8203 | dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); |
8204 | return; |
8205 | } |
8206 | |
8207 | /* We're allocating a large enough stack frame that we need to |
8208 | emit probes. Either emit them inline or in a loop depending |
8209 | on the size. */ |
8210 | if (size <= 4 * probe_interval) |
8211 | { |
8212 | HOST_WIDE_INT i; |
8213 | for (i = probe_interval; i <= size; i += probe_interval) |
8214 | { |
8215 | /* Allocate PROBE_INTERVAL bytes. */ |
8216 | rtx insn |
8217 | = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8218 | GEN_INT (-probe_interval), style: -1, |
8219 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8220 | add_reg_note (insn, REG_STACK_CHECK, const0_rtx); |
8221 | |
8222 | /* And probe at *sp. */ |
8223 | emit_stack_probe (stack_pointer_rtx); |
8224 | emit_insn (gen_blockage ()); |
8225 | } |
8226 | |
8227 | /* We need to allocate space for the residual, but we do not need |
8228 | to probe the residual... */ |
8229 | HOST_WIDE_INT residual = (i - probe_interval - size); |
8230 | if (residual) |
8231 | { |
8232 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8233 | GEN_INT (residual), style: -1, |
8234 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8235 | |
8236 | /* ...except if there is a protection area to maintain. */ |
8237 | if (protection_area) |
8238 | emit_stack_probe (stack_pointer_rtx); |
8239 | } |
8240 | |
8241 | dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); |
8242 | } |
8243 | else |
8244 | { |
8245 | /* We expect the GP registers to be saved when probes are used |
8246 | as the probing sequences might need a scratch register and |
8247 | the routine to allocate one assumes the integer registers |
8248 | have already been saved. */ |
8249 | gcc_assert (int_registers_saved); |
8250 | |
8251 | struct scratch_reg sr; |
8252 | get_scratch_register_on_entry (sr: &sr); |
8253 | |
8254 | /* If we needed to save a register, then account for any space |
8255 | that was pushed (we are not going to pop the register when |
8256 | we do the restore). */ |
8257 | if (sr.saved) |
8258 | size -= UNITS_PER_WORD; |
8259 | |
8260 | /* Step 1: round SIZE down to a multiple of the interval. */ |
8261 | HOST_WIDE_INT rounded_size = size & -probe_interval; |
8262 | |
8263 | /* Step 2: compute final value of the loop counter. Use lea if |
8264 | possible. */ |
8265 | rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); |
8266 | rtx insn; |
8267 | if (address_no_seg_operand (addr, Pmode)) |
8268 | insn = emit_insn (gen_rtx_SET (sr.reg, addr)); |
8269 | else |
8270 | { |
8271 | emit_move_insn (sr.reg, GEN_INT (-rounded_size)); |
8272 | insn = emit_insn (gen_rtx_SET (sr.reg, |
8273 | gen_rtx_PLUS (Pmode, sr.reg, |
8274 | stack_pointer_rtx))); |
8275 | } |
8276 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8277 | { |
8278 | add_reg_note (insn, REG_CFA_DEF_CFA, |
8279 | plus_constant (Pmode, sr.reg, |
8280 | m->fs.cfa_offset + rounded_size)); |
8281 | RTX_FRAME_RELATED_P (insn) = 1; |
8282 | } |
8283 | |
8284 | /* Step 3: the loop. */ |
8285 | rtx size_rtx = GEN_INT (rounded_size); |
8286 | insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg, |
8287 | x2: size_rtx)); |
8288 | if (m->fs.cfa_reg == stack_pointer_rtx) |
8289 | { |
8290 | m->fs.cfa_offset += rounded_size; |
8291 | add_reg_note (insn, REG_CFA_DEF_CFA, |
8292 | plus_constant (Pmode, stack_pointer_rtx, |
8293 | m->fs.cfa_offset)); |
8294 | RTX_FRAME_RELATED_P (insn) = 1; |
8295 | } |
8296 | m->fs.sp_offset += rounded_size; |
8297 | emit_insn (gen_blockage ()); |
8298 | |
8299 | /* Step 4: adjust SP if we cannot assert at compile-time that SIZE |
8300 | is equal to ROUNDED_SIZE. */ |
8301 | |
8302 | if (size != rounded_size) |
8303 | { |
8304 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8305 | GEN_INT (rounded_size - size), style: -1, |
8306 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8307 | |
8308 | if (protection_area) |
8309 | emit_stack_probe (stack_pointer_rtx); |
8310 | } |
8311 | |
8312 | dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); |
8313 | |
8314 | /* This does not deallocate the space reserved for the scratch |
8315 | register. That will be deallocated in the epilogue. */ |
8316 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false); |
8317 | } |
8318 | |
8319 | /* Adjust back to account for the protection area. */ |
8320 | if (protection_area) |
8321 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8322 | GEN_INT (probe_interval + dope), style: -1, |
8323 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
8324 | |
8325 | /* Make sure nothing is scheduled before we are done. */ |
8326 | emit_insn (gen_blockage ()); |
8327 | } |
8328 | |
8329 | /* Adjust the stack pointer up to REG while probing it. */ |
8330 | |
8331 | const char * |
8332 | output_adjust_stack_and_probe (rtx reg) |
8333 | { |
8334 | static int labelno = 0; |
8335 | char loop_lab[32]; |
8336 | rtx xops[2]; |
8337 | |
8338 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); |
8339 | |
8340 | /* Loop. */ |
8341 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
8342 | |
8343 | /* SP = SP + PROBE_INTERVAL. */ |
8344 | xops[0] = stack_pointer_rtx; |
8345 | xops[1] = GEN_INT (get_probe_interval ()); |
8346 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); |
8347 | |
8348 | /* Probe at SP. */ |
8349 | xops[1] = const0_rtx; |
8350 | output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); |
8351 | |
8352 | /* Test if SP == LAST_ADDR. */ |
8353 | xops[0] = stack_pointer_rtx; |
8354 | xops[1] = reg; |
8355 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); |
8356 | |
8357 | /* Branch. */ |
8358 | fputs (s: "\tjne\t", stream: asm_out_file); |
8359 | assemble_name_raw (asm_out_file, loop_lab); |
8360 | fputc (c: '\n', stream: asm_out_file); |
8361 | |
8362 | return ""; |
8363 | } |
8364 | |
8365 | /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, |
8366 | inclusive. These are offsets from the current stack pointer. |
8367 | |
8368 | INT_REGISTERS_SAVED is true if integer registers have already been |
8369 | pushed on the stack. */ |
8370 | |
8371 | static void |
8372 | ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, |
8373 | const bool int_registers_saved) |
8374 | { |
8375 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
8376 | |
8377 | /* See if we have a constant small number of probes to generate. If so, |
8378 | that's the easy case. The run-time loop is made up of 6 insns in the |
8379 | generic case while the compile-time loop is made up of n insns for n # |
8380 | of intervals. */ |
8381 | if (size <= 6 * probe_interval) |
8382 | { |
8383 | HOST_WIDE_INT i; |
8384 | |
8385 | /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until |
8386 | it exceeds SIZE. If only one probe is needed, this will not |
8387 | generate any code. Then probe at FIRST + SIZE. */ |
8388 | for (i = probe_interval; i < size; i += probe_interval) |
8389 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
8390 | -(first + i))); |
8391 | |
8392 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
8393 | -(first + size))); |
8394 | } |
8395 | |
8396 | /* Otherwise, do the same as above, but in a loop. Note that we must be |
8397 | extra careful with variables wrapping around because we might be at |
8398 | the very top (or the very bottom) of the address space and we have |
8399 | to be able to handle this case properly; in particular, we use an |
8400 | equality test for the loop condition. */ |
8401 | else |
8402 | { |
8403 | /* We expect the GP registers to be saved when probes are used |
8404 | as the probing sequences might need a scratch register and |
8405 | the routine to allocate one assumes the integer registers |
8406 | have already been saved. */ |
8407 | gcc_assert (int_registers_saved); |
8408 | |
8409 | HOST_WIDE_INT rounded_size, last; |
8410 | struct scratch_reg sr; |
8411 | |
8412 | get_scratch_register_on_entry (sr: &sr); |
8413 | |
8414 | |
8415 | /* Step 1: round SIZE to the previous multiple of the interval. */ |
8416 | |
8417 | rounded_size = ROUND_DOWN (size, probe_interval); |
8418 | |
8419 | |
8420 | /* Step 2: compute initial and final value of the loop counter. */ |
8421 | |
8422 | /* TEST_OFFSET = FIRST. */ |
8423 | emit_move_insn (sr.reg, GEN_INT (-first)); |
8424 | |
8425 | /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ |
8426 | last = first + rounded_size; |
8427 | |
8428 | |
8429 | /* Step 3: the loop |
8430 | |
8431 | do |
8432 | { |
8433 | TEST_ADDR = TEST_ADDR + PROBE_INTERVAL |
8434 | probe at TEST_ADDR |
8435 | } |
8436 | while (TEST_ADDR != LAST_ADDR) |
8437 | |
8438 | probes at FIRST + N * PROBE_INTERVAL for values of N from 1 |
8439 | until it is equal to ROUNDED_SIZE. */ |
8440 | |
8441 | emit_insn |
8442 | (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last))); |
8443 | |
8444 | |
8445 | /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
8446 | that SIZE is equal to ROUNDED_SIZE. */ |
8447 | |
8448 | if (size != rounded_size) |
8449 | emit_stack_probe (plus_constant (Pmode, |
8450 | gen_rtx_PLUS (Pmode, |
8451 | stack_pointer_rtx, |
8452 | sr.reg), |
8453 | rounded_size - size)); |
8454 | |
8455 | release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true); |
8456 | } |
8457 | |
8458 | /* Make sure nothing is scheduled before we are done. */ |
8459 | emit_insn (gen_blockage ()); |
8460 | } |
8461 | |
8462 | /* Probe a range of stack addresses from REG to END, inclusive. These are |
8463 | offsets from the current stack pointer. */ |
8464 | |
8465 | const char * |
8466 | output_probe_stack_range (rtx reg, rtx end) |
8467 | { |
8468 | static int labelno = 0; |
8469 | char loop_lab[32]; |
8470 | rtx xops[3]; |
8471 | |
8472 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); |
8473 | |
8474 | /* Loop. */ |
8475 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
8476 | |
8477 | /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ |
8478 | xops[0] = reg; |
8479 | xops[1] = GEN_INT (get_probe_interval ()); |
8480 | output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); |
8481 | |
8482 | /* Probe at TEST_ADDR. */ |
8483 | xops[0] = stack_pointer_rtx; |
8484 | xops[1] = reg; |
8485 | xops[2] = const0_rtx; |
8486 | output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); |
8487 | |
8488 | /* Test if TEST_ADDR == LAST_ADDR. */ |
8489 | xops[0] = reg; |
8490 | xops[1] = end; |
8491 | output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); |
8492 | |
8493 | /* Branch. */ |
8494 | fputs (s: "\tjne\t", stream: asm_out_file); |
8495 | assemble_name_raw (asm_out_file, loop_lab); |
8496 | fputc (c: '\n', stream: asm_out_file); |
8497 | |
8498 | return ""; |
8499 | } |
8500 | |
8501 | /* Data passed to ix86_update_stack_alignment. */ |
8502 | struct stack_access_data |
8503 | { |
8504 | /* The stack access register. */ |
8505 | const_rtx reg; |
8506 | /* Pointer to stack alignment. */ |
8507 | unsigned int *stack_alignment; |
8508 | }; |
8509 | |
8510 | /* Update the maximum stack slot alignment from memory alignment in PAT. */ |
8511 | |
8512 | static void |
8513 | ix86_update_stack_alignment (rtx, const_rtx pat, void *data) |
8514 | { |
8515 | /* This insn may reference stack slot. Update the maximum stack slot |
8516 | alignment if the memory is referenced by the stack access register. */ |
8517 | stack_access_data *p = (stack_access_data *) data; |
8518 | |
8519 | subrtx_iterator::array_type array; |
8520 | FOR_EACH_SUBRTX (iter, array, pat, ALL) |
8521 | { |
8522 | auto op = *iter; |
8523 | if (MEM_P (op)) |
8524 | { |
8525 | if (reg_mentioned_p (p->reg, XEXP (op, 0))) |
8526 | { |
8527 | unsigned int alignment = MEM_ALIGN (op); |
8528 | |
8529 | if (alignment > *p->stack_alignment) |
8530 | *p->stack_alignment = alignment; |
8531 | break; |
8532 | } |
8533 | else |
8534 | iter.skip_subrtxes (); |
8535 | } |
8536 | } |
8537 | } |
8538 | |
8539 | /* Helper function for ix86_find_all_reg_uses. */ |
8540 | |
8541 | static void |
8542 | ix86_find_all_reg_uses_1 (HARD_REG_SET ®set, |
8543 | rtx set, unsigned int regno, |
8544 | auto_bitmap &worklist) |
8545 | { |
8546 | rtx dest = SET_DEST (set); |
8547 | |
8548 | if (!REG_P (dest)) |
8549 | return; |
8550 | |
8551 | /* Reject non-Pmode modes. */ |
8552 | if (GET_MODE (dest) != Pmode) |
8553 | return; |
8554 | |
8555 | unsigned int dst_regno = REGNO (dest); |
8556 | |
8557 | if (TEST_HARD_REG_BIT (set: regset, bit: dst_regno)) |
8558 | return; |
8559 | |
8560 | const_rtx src = SET_SRC (set); |
8561 | |
8562 | subrtx_iterator::array_type array; |
8563 | FOR_EACH_SUBRTX (iter, array, src, ALL) |
8564 | { |
8565 | auto op = *iter; |
8566 | |
8567 | if (MEM_P (op)) |
8568 | iter.skip_subrtxes (); |
8569 | |
8570 | if (REG_P (op) && REGNO (op) == regno) |
8571 | { |
8572 | /* Add this register to register set. */ |
8573 | add_to_hard_reg_set (regs: ®set, Pmode, regno: dst_regno); |
8574 | bitmap_set_bit (worklist, dst_regno); |
8575 | break; |
8576 | } |
8577 | } |
8578 | } |
8579 | |
8580 | /* Find all registers defined with register REGNO. */ |
8581 | |
8582 | static void |
8583 | ix86_find_all_reg_uses (HARD_REG_SET ®set, |
8584 | unsigned int regno, auto_bitmap &worklist) |
8585 | { |
8586 | for (df_ref ref = DF_REG_USE_CHAIN (regno); |
8587 | ref != NULL; |
8588 | ref = DF_REF_NEXT_REG (ref)) |
8589 | { |
8590 | if (DF_REF_IS_ARTIFICIAL (ref)) |
8591 | continue; |
8592 | |
8593 | rtx_insn *insn = DF_REF_INSN (ref); |
8594 | |
8595 | if (!NONJUMP_INSN_P (insn)) |
8596 | continue; |
8597 | |
8598 | unsigned int ref_regno = DF_REF_REGNO (ref); |
8599 | |
8600 | rtx set = single_set (insn); |
8601 | if (set) |
8602 | { |
8603 | ix86_find_all_reg_uses_1 (regset, set, |
8604 | regno: ref_regno, worklist); |
8605 | continue; |
8606 | } |
8607 | |
8608 | rtx pat = PATTERN (insn); |
8609 | if (GET_CODE (pat) != PARALLEL) |
8610 | continue; |
8611 | |
8612 | for (int i = 0; i < XVECLEN (pat, 0); i++) |
8613 | { |
8614 | rtx exp = XVECEXP (pat, 0, i); |
8615 | |
8616 | if (GET_CODE (exp) == SET) |
8617 | ix86_find_all_reg_uses_1 (regset, set: exp, |
8618 | regno: ref_regno, worklist); |
8619 | } |
8620 | } |
8621 | } |
8622 | |
8623 | /* Set stack_frame_required to false if stack frame isn't required. |
8624 | Update STACK_ALIGNMENT to the largest alignment, in bits, of stack |
8625 | slot used if stack frame is required and CHECK_STACK_SLOT is true. */ |
8626 | |
8627 | static void |
8628 | ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, |
8629 | bool check_stack_slot) |
8630 | { |
8631 | HARD_REG_SET set_up_by_prologue, prologue_used; |
8632 | basic_block bb; |
8633 | |
8634 | CLEAR_HARD_REG_SET (set&: prologue_used); |
8635 | CLEAR_HARD_REG_SET (set&: set_up_by_prologue); |
8636 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); |
8637 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); |
8638 | add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, |
8639 | HARD_FRAME_POINTER_REGNUM); |
8640 | |
8641 | bool require_stack_frame = false; |
8642 | |
8643 | FOR_EACH_BB_FN (bb, cfun) |
8644 | { |
8645 | rtx_insn *insn; |
8646 | FOR_BB_INSNS (bb, insn) |
8647 | if (NONDEBUG_INSN_P (insn) |
8648 | && requires_stack_frame_p (insn, prologue_used, |
8649 | set_up_by_prologue)) |
8650 | { |
8651 | require_stack_frame = true; |
8652 | break; |
8653 | } |
8654 | } |
8655 | |
8656 | cfun->machine->stack_frame_required = require_stack_frame; |
8657 | |
8658 | /* Stop if we don't need to check stack slot. */ |
8659 | if (!check_stack_slot) |
8660 | return; |
8661 | |
8662 | /* The preferred stack alignment is the minimum stack alignment. */ |
8663 | if (stack_alignment > crtl->preferred_stack_boundary) |
8664 | stack_alignment = crtl->preferred_stack_boundary; |
8665 | |
8666 | HARD_REG_SET stack_slot_access; |
8667 | CLEAR_HARD_REG_SET (set&: stack_slot_access); |
8668 | |
8669 | /* Stack slot can be accessed by stack pointer, frame pointer or |
8670 | registers defined by stack pointer or frame pointer. */ |
8671 | auto_bitmap worklist; |
8672 | |
8673 | add_to_hard_reg_set (regs: &stack_slot_access, Pmode, STACK_POINTER_REGNUM); |
8674 | bitmap_set_bit (worklist, STACK_POINTER_REGNUM); |
8675 | |
8676 | if (frame_pointer_needed) |
8677 | { |
8678 | add_to_hard_reg_set (regs: &stack_slot_access, Pmode, |
8679 | HARD_FRAME_POINTER_REGNUM); |
8680 | bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); |
8681 | } |
8682 | |
8683 | unsigned int regno; |
8684 | |
8685 | do |
8686 | { |
8687 | regno = bitmap_clear_first_set_bit (worklist); |
8688 | ix86_find_all_reg_uses (regset&: stack_slot_access, regno, worklist); |
8689 | } |
8690 | while (!bitmap_empty_p (map: worklist)); |
8691 | |
8692 | hard_reg_set_iterator hrsi; |
8693 | stack_access_data data; |
8694 | |
8695 | data.stack_alignment = &stack_alignment; |
8696 | |
8697 | EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi) |
8698 | for (df_ref ref = DF_REG_USE_CHAIN (regno); |
8699 | ref != NULL; |
8700 | ref = DF_REF_NEXT_REG (ref)) |
8701 | { |
8702 | if (DF_REF_IS_ARTIFICIAL (ref)) |
8703 | continue; |
8704 | |
8705 | rtx_insn *insn = DF_REF_INSN (ref); |
8706 | |
8707 | if (!NONJUMP_INSN_P (insn)) |
8708 | continue; |
8709 | |
8710 | data.reg = DF_REF_REG (ref); |
8711 | note_stores (insn, ix86_update_stack_alignment, &data); |
8712 | } |
8713 | } |
8714 | |
8715 | /* Finalize stack_realign_needed and frame_pointer_needed flags, which |
8716 | will guide prologue/epilogue to be generated in correct form. */ |
8717 | |
8718 | static void |
8719 | ix86_finalize_stack_frame_flags (void) |
8720 | { |
8721 | /* Check if stack realign is really needed after reload, and |
8722 | stores result in cfun */ |
8723 | unsigned int incoming_stack_boundary |
8724 | = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
8725 | ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
8726 | unsigned int stack_alignment |
8727 | = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor |
8728 | ? crtl->max_used_stack_slot_alignment |
8729 | : crtl->stack_alignment_needed); |
8730 | unsigned int stack_realign |
8731 | = (incoming_stack_boundary < stack_alignment); |
8732 | bool recompute_frame_layout_p = false; |
8733 | |
8734 | if (crtl->stack_realign_finalized) |
8735 | { |
8736 | /* After stack_realign_needed is finalized, we can't no longer |
8737 | change it. */ |
8738 | gcc_assert (crtl->stack_realign_needed == stack_realign); |
8739 | return; |
8740 | } |
8741 | |
8742 | /* It is always safe to compute max_used_stack_alignment. We |
8743 | compute it only if 128-bit aligned load/store may be generated |
8744 | on misaligned stack slot which will lead to segfault. */ |
8745 | bool check_stack_slot |
8746 | = (stack_realign || crtl->max_used_stack_slot_alignment >= 128); |
8747 | ix86_find_max_used_stack_alignment (stack_alignment, |
8748 | check_stack_slot); |
8749 | |
8750 | /* If the only reason for frame_pointer_needed is that we conservatively |
8751 | assumed stack realignment might be needed or -fno-omit-frame-pointer |
8752 | is used, but in the end nothing that needed the stack alignment had |
8753 | been spilled nor stack access, clear frame_pointer_needed and say we |
8754 | don't need stack realignment. |
8755 | |
8756 | When vector register is used for piecewise move and store, we don't |
8757 | increase stack_alignment_needed as there is no register spill for |
8758 | piecewise move and store. Since stack_realign_needed is set to true |
8759 | by checking stack_alignment_estimated which is updated by pseudo |
8760 | vector register usage, we also need to check stack_realign_needed to |
8761 | eliminate frame pointer. */ |
8762 | if ((stack_realign |
8763 | || (!flag_omit_frame_pointer && optimize) |
8764 | || crtl->stack_realign_needed) |
8765 | && frame_pointer_needed |
8766 | && crtl->is_leaf |
8767 | && crtl->sp_is_unchanging |
8768 | && !ix86_current_function_calls_tls_descriptor |
8769 | && !crtl->accesses_prior_frames |
8770 | && !cfun->calls_alloca |
8771 | && !crtl->calls_eh_return |
8772 | /* See ira_setup_eliminable_regset for the rationale. */ |
8773 | && !(STACK_CHECK_MOVING_SP |
8774 | && flag_stack_check |
8775 | && flag_exceptions |
8776 | && cfun->can_throw_non_call_exceptions) |
8777 | && !ix86_frame_pointer_required () |
8778 | && ix86_get_frame_size () == 0 |
8779 | && ix86_nsaved_sseregs () == 0 |
8780 | && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) |
8781 | { |
8782 | if (cfun->machine->stack_frame_required) |
8783 | { |
8784 | /* Stack frame is required. If stack alignment needed is less |
8785 | than incoming stack boundary, don't realign stack. */ |
8786 | stack_realign = incoming_stack_boundary < stack_alignment; |
8787 | if (!stack_realign) |
8788 | { |
8789 | crtl->max_used_stack_slot_alignment |
8790 | = incoming_stack_boundary; |
8791 | crtl->stack_alignment_needed |
8792 | = incoming_stack_boundary; |
8793 | /* Also update preferred_stack_boundary for leaf |
8794 | functions. */ |
8795 | crtl->preferred_stack_boundary |
8796 | = incoming_stack_boundary; |
8797 | } |
8798 | } |
8799 | else |
8800 | { |
8801 | /* If drap has been set, but it actually isn't live at the |
8802 | start of the function, there is no reason to set it up. */ |
8803 | if (crtl->drap_reg) |
8804 | { |
8805 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
8806 | if (! REGNO_REG_SET_P (DF_LR_IN (bb), |
8807 | REGNO (crtl->drap_reg))) |
8808 | { |
8809 | crtl->drap_reg = NULL_RTX; |
8810 | crtl->need_drap = false; |
8811 | } |
8812 | } |
8813 | else |
8814 | cfun->machine->no_drap_save_restore = true; |
8815 | |
8816 | frame_pointer_needed = false; |
8817 | stack_realign = false; |
8818 | crtl->max_used_stack_slot_alignment = incoming_stack_boundary; |
8819 | crtl->stack_alignment_needed = incoming_stack_boundary; |
8820 | crtl->stack_alignment_estimated = incoming_stack_boundary; |
8821 | if (crtl->preferred_stack_boundary > incoming_stack_boundary) |
8822 | crtl->preferred_stack_boundary = incoming_stack_boundary; |
8823 | df_finish_pass (true); |
8824 | df_scan_alloc (NULL); |
8825 | df_scan_blocks (); |
8826 | df_compute_regs_ever_live (true); |
8827 | df_analyze (); |
8828 | |
8829 | if (flag_var_tracking) |
8830 | { |
8831 | /* Since frame pointer is no longer available, replace it with |
8832 | stack pointer - UNITS_PER_WORD in debug insns. */ |
8833 | df_ref ref, next; |
8834 | for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); |
8835 | ref; ref = next) |
8836 | { |
8837 | next = DF_REF_NEXT_REG (ref); |
8838 | if (!DF_REF_INSN_INFO (ref)) |
8839 | continue; |
8840 | |
8841 | /* Make sure the next ref is for a different instruction, |
8842 | so that we're not affected by the rescan. */ |
8843 | rtx_insn *insn = DF_REF_INSN (ref); |
8844 | while (next && DF_REF_INSN (next) == insn) |
8845 | next = DF_REF_NEXT_REG (next); |
8846 | |
8847 | if (DEBUG_INSN_P (insn)) |
8848 | { |
8849 | bool changed = false; |
8850 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) |
8851 | { |
8852 | rtx *loc = DF_REF_LOC (ref); |
8853 | if (*loc == hard_frame_pointer_rtx) |
8854 | { |
8855 | *loc = plus_constant (Pmode, |
8856 | stack_pointer_rtx, |
8857 | -UNITS_PER_WORD); |
8858 | changed = true; |
8859 | } |
8860 | } |
8861 | if (changed) |
8862 | df_insn_rescan (insn); |
8863 | } |
8864 | } |
8865 | } |
8866 | |
8867 | recompute_frame_layout_p = true; |
8868 | } |
8869 | } |
8870 | else if (crtl->max_used_stack_slot_alignment >= 128 |
8871 | && cfun->machine->stack_frame_required) |
8872 | { |
8873 | /* We don't need to realign stack. max_used_stack_alignment is |
8874 | used to decide how stack frame should be aligned. This is |
8875 | independent of any psABIs nor 32-bit vs 64-bit. */ |
8876 | cfun->machine->max_used_stack_alignment |
8877 | = stack_alignment / BITS_PER_UNIT; |
8878 | } |
8879 | |
8880 | if (crtl->stack_realign_needed != stack_realign) |
8881 | recompute_frame_layout_p = true; |
8882 | crtl->stack_realign_needed = stack_realign; |
8883 | crtl->stack_realign_finalized = true; |
8884 | if (recompute_frame_layout_p) |
8885 | ix86_compute_frame_layout (); |
8886 | } |
8887 | |
8888 | /* Delete SET_GOT right after entry block if it is allocated to reg. */ |
8889 | |
8890 | static void |
8891 | ix86_elim_entry_set_got (rtx reg) |
8892 | { |
8893 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
8894 | rtx_insn *c_insn = BB_HEAD (bb); |
8895 | if (!NONDEBUG_INSN_P (c_insn)) |
8896 | c_insn = next_nonnote_nondebug_insn (c_insn); |
8897 | if (c_insn && NONJUMP_INSN_P (c_insn)) |
8898 | { |
8899 | rtx pat = PATTERN (insn: c_insn); |
8900 | if (GET_CODE (pat) == PARALLEL) |
8901 | { |
8902 | rtx set = XVECEXP (pat, 0, 0); |
8903 | if (GET_CODE (set) == SET |
8904 | && GET_CODE (SET_SRC (set)) == UNSPEC |
8905 | && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT |
8906 | && REGNO (SET_DEST (set)) == REGNO (reg)) |
8907 | delete_insn (c_insn); |
8908 | } |
8909 | } |
8910 | } |
8911 | |
8912 | static rtx |
8913 | gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) |
8914 | { |
8915 | rtx addr, mem; |
8916 | |
8917 | if (offset) |
8918 | addr = plus_constant (Pmode, frame_reg, offset); |
8919 | mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); |
8920 | return gen_rtx_SET (store ? mem : reg, store ? reg : mem); |
8921 | } |
8922 | |
8923 | static inline rtx |
8924 | gen_frame_load (rtx reg, rtx frame_reg, int offset) |
8925 | { |
8926 | return gen_frame_set (reg, frame_reg, offset, store: false); |
8927 | } |
8928 | |
8929 | static inline rtx |
8930 | gen_frame_store (rtx reg, rtx frame_reg, int offset) |
8931 | { |
8932 | return gen_frame_set (reg, frame_reg, offset, store: true); |
8933 | } |
8934 | |
8935 | static void |
8936 | ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) |
8937 | { |
8938 | struct machine_function *m = cfun->machine; |
8939 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
8940 | + m->call_ms2sysv_extra_regs; |
8941 | rtvec v = rtvec_alloc (ncregs + 1); |
8942 | unsigned int align, i, vi = 0; |
8943 | rtx_insn *insn; |
8944 | rtx sym, addr; |
8945 | rtx rax = gen_rtx_REG (word_mode, AX_REG); |
8946 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
8947 | |
8948 | /* AL should only be live with sysv_abi. */ |
8949 | gcc_assert (!ix86_eax_live_at_start_p ()); |
8950 | gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); |
8951 | |
8952 | /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather |
8953 | we've actually realigned the stack or not. */ |
8954 | align = GET_MODE_ALIGNMENT (V4SFmode); |
8955 | addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset |
8956 | + xlogue.get_stub_ptr_offset (), align: &align, AX_REG); |
8957 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
8958 | |
8959 | emit_insn (gen_rtx_SET (rax, addr)); |
8960 | |
8961 | /* Get the stub symbol. */ |
8962 | sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP |
8963 | : XLOGUE_STUB_SAVE); |
8964 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
8965 | |
8966 | for (i = 0; i < ncregs; ++i) |
8967 | { |
8968 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
8969 | rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), |
8970 | r.regno); |
8971 | RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset); |
8972 | } |
8973 | |
8974 | gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); |
8975 | |
8976 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); |
8977 | RTX_FRAME_RELATED_P (insn) = true; |
8978 | } |
8979 | |
8980 | /* Generate and return an insn body to AND X with Y. */ |
8981 | |
8982 | static rtx_insn * |
8983 | gen_and2_insn (rtx x, rtx y) |
8984 | { |
8985 | enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x)); |
8986 | |
8987 | gcc_assert (insn_operand_matches (icode, 0, x)); |
8988 | gcc_assert (insn_operand_matches (icode, 1, x)); |
8989 | gcc_assert (insn_operand_matches (icode, 2, y)); |
8990 | |
8991 | return GEN_FCN (icode) (x, x, y); |
8992 | } |
8993 | |
8994 | /* Expand the prologue into a bunch of separate insns. */ |
8995 | |
8996 | void |
8997 | ix86_expand_prologue (void) |
8998 | { |
8999 | struct machine_function *m = cfun->machine; |
9000 | rtx insn, t; |
9001 | HOST_WIDE_INT allocate; |
9002 | bool int_registers_saved; |
9003 | bool sse_registers_saved; |
9004 | bool save_stub_call_needed; |
9005 | rtx static_chain = NULL_RTX; |
9006 | |
9007 | ix86_last_zero_store_uid = 0; |
9008 | if (ix86_function_naked (fn: current_function_decl)) |
9009 | { |
9010 | if (flag_stack_usage_info) |
9011 | current_function_static_stack_size = 0; |
9012 | return; |
9013 | } |
9014 | |
9015 | ix86_finalize_stack_frame_flags (); |
9016 | |
9017 | /* DRAP should not coexist with stack_realign_fp */ |
9018 | gcc_assert (!(crtl->drap_reg && stack_realign_fp)); |
9019 | |
9020 | memset (s: &m->fs, c: 0, n: sizeof (m->fs)); |
9021 | |
9022 | /* Initialize CFA state for before the prologue. */ |
9023 | m->fs.cfa_reg = stack_pointer_rtx; |
9024 | m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; |
9025 | |
9026 | /* Track SP offset to the CFA. We continue tracking this after we've |
9027 | swapped the CFA register away from SP. In the case of re-alignment |
9028 | this is fudged; we're interested to offsets within the local frame. */ |
9029 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
9030 | m->fs.sp_valid = true; |
9031 | m->fs.sp_realigned = false; |
9032 | |
9033 | const struct ix86_frame &frame = cfun->machine->frame; |
9034 | |
9035 | if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl)) |
9036 | { |
9037 | /* We should have already generated an error for any use of |
9038 | ms_hook on a nested function. */ |
9039 | gcc_checking_assert (!ix86_static_chain_on_stack); |
9040 | |
9041 | /* Check if profiling is active and we shall use profiling before |
9042 | prologue variant. If so sorry. */ |
9043 | if (crtl->profile && flag_fentry != 0) |
9044 | sorry ("%<ms_hook_prologue%> attribute is not compatible " |
9045 | "with %<-mfentry%> for 32-bit"); |
9046 | |
9047 | /* In ix86_asm_output_function_label we emitted: |
9048 | 8b ff movl.s %edi,%edi |
9049 | 55 push %ebp |
9050 | 8b ec movl.s %esp,%ebp |
9051 | |
9052 | This matches the hookable function prologue in Win32 API |
9053 | functions in Microsoft Windows XP Service Pack 2 and newer. |
9054 | Wine uses this to enable Windows apps to hook the Win32 API |
9055 | functions provided by Wine. |
9056 | |
9057 | What that means is that we've already set up the frame pointer. */ |
9058 | |
9059 | if (frame_pointer_needed |
9060 | && !(crtl->drap_reg && crtl->stack_realign_needed)) |
9061 | { |
9062 | rtx push, mov; |
9063 | |
9064 | /* We've decided to use the frame pointer already set up. |
9065 | Describe this to the unwinder by pretending that both |
9066 | push and mov insns happen right here. |
9067 | |
9068 | Putting the unwind info here at the end of the ms_hook |
9069 | is done so that we can make absolutely certain we get |
9070 | the required byte sequence at the start of the function, |
9071 | rather than relying on an assembler that can produce |
9072 | the exact encoding required. |
9073 | |
9074 | However it does mean (in the unpatched case) that we have |
9075 | a 1 insn window where the asynchronous unwind info is |
9076 | incorrect. However, if we placed the unwind info at |
9077 | its correct location we would have incorrect unwind info |
9078 | in the patched case. Which is probably all moot since |
9079 | I don't expect Wine generates dwarf2 unwind info for the |
9080 | system libraries that use this feature. */ |
9081 | |
9082 | insn = emit_insn (gen_blockage ()); |
9083 | |
9084 | push = gen_push (hard_frame_pointer_rtx); |
9085 | mov = gen_rtx_SET (hard_frame_pointer_rtx, |
9086 | stack_pointer_rtx); |
9087 | RTX_FRAME_RELATED_P (push) = 1; |
9088 | RTX_FRAME_RELATED_P (mov) = 1; |
9089 | |
9090 | RTX_FRAME_RELATED_P (insn) = 1; |
9091 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9092 | gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); |
9093 | |
9094 | /* Note that gen_push incremented m->fs.cfa_offset, even |
9095 | though we didn't emit the push insn here. */ |
9096 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
9097 | m->fs.fp_offset = m->fs.cfa_offset; |
9098 | m->fs.fp_valid = true; |
9099 | } |
9100 | else |
9101 | { |
9102 | /* The frame pointer is not needed so pop %ebp again. |
9103 | This leaves us with a pristine state. */ |
9104 | emit_insn (gen_pop (hard_frame_pointer_rtx)); |
9105 | } |
9106 | } |
9107 | |
9108 | /* The first insn of a function that accepts its static chain on the |
9109 | stack is to push the register that would be filled in by a direct |
9110 | call. This insn will be skipped by the trampoline. */ |
9111 | else if (ix86_static_chain_on_stack) |
9112 | { |
9113 | static_chain = ix86_static_chain (cfun->decl, false); |
9114 | insn = emit_insn (gen_push (arg: static_chain)); |
9115 | emit_insn (gen_blockage ()); |
9116 | |
9117 | /* We don't want to interpret this push insn as a register save, |
9118 | only as a stack adjustment. The real copy of the register as |
9119 | a save will be done later, if needed. */ |
9120 | t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
9121 | t = gen_rtx_SET (stack_pointer_rtx, t); |
9122 | add_reg_note (insn, REG_CFA_ADJUST_CFA, t); |
9123 | RTX_FRAME_RELATED_P (insn) = 1; |
9124 | } |
9125 | |
9126 | /* Emit prologue code to adjust stack alignment and setup DRAP, in case |
9127 | of DRAP is needed and stack realignment is really needed after reload */ |
9128 | if (stack_realign_drap) |
9129 | { |
9130 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
9131 | |
9132 | /* Can't use DRAP in interrupt function. */ |
9133 | if (cfun->machine->func_type != TYPE_NORMAL) |
9134 | sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " |
9135 | "in interrupt service routine. This may be worked " |
9136 | "around by avoiding functions with aggregate return."); |
9137 | |
9138 | /* Only need to push parameter pointer reg if it is caller saved. */ |
9139 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
9140 | { |
9141 | /* Push arg pointer reg */ |
9142 | insn = emit_insn (gen_push (crtl->drap_reg)); |
9143 | RTX_FRAME_RELATED_P (insn) = 1; |
9144 | } |
9145 | |
9146 | /* Grab the argument pointer. */ |
9147 | t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); |
9148 | insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
9149 | RTX_FRAME_RELATED_P (insn) = 1; |
9150 | m->fs.cfa_reg = crtl->drap_reg; |
9151 | m->fs.cfa_offset = 0; |
9152 | |
9153 | /* Align the stack. */ |
9154 | insn = emit_insn (gen_and2_insn (stack_pointer_rtx, |
9155 | GEN_INT (-align_bytes))); |
9156 | RTX_FRAME_RELATED_P (insn) = 1; |
9157 | |
9158 | /* Replicate the return address on the stack so that return |
9159 | address can be reached via (argp - 1) slot. This is needed |
9160 | to implement macro RETURN_ADDR_RTX and intrinsic function |
9161 | expand_builtin_return_addr etc. */ |
9162 | t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); |
9163 | t = gen_frame_mem (word_mode, t); |
9164 | insn = emit_insn (gen_push (arg: t)); |
9165 | RTX_FRAME_RELATED_P (insn) = 1; |
9166 | |
9167 | /* For the purposes of frame and register save area addressing, |
9168 | we've started over with a new frame. */ |
9169 | m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
9170 | m->fs.realigned = true; |
9171 | |
9172 | if (static_chain) |
9173 | { |
9174 | /* Replicate static chain on the stack so that static chain |
9175 | can be reached via (argp - 2) slot. This is needed for |
9176 | nested function with stack realignment. */ |
9177 | insn = emit_insn (gen_push (arg: static_chain)); |
9178 | RTX_FRAME_RELATED_P (insn) = 1; |
9179 | } |
9180 | } |
9181 | |
9182 | int_registers_saved = (frame.nregs == 0); |
9183 | sse_registers_saved = (frame.nsseregs == 0); |
9184 | save_stub_call_needed = (m->call_ms2sysv); |
9185 | gcc_assert (sse_registers_saved || !save_stub_call_needed); |
9186 | |
9187 | if (frame_pointer_needed && !m->fs.fp_valid) |
9188 | { |
9189 | /* Note: AT&T enter does NOT have reversed args. Enter is probably |
9190 | slower on all targets. Also sdb didn't like it. */ |
9191 | insn = emit_insn (gen_push (hard_frame_pointer_rtx)); |
9192 | RTX_FRAME_RELATED_P (insn) = 1; |
9193 | |
9194 | if (m->fs.sp_offset == frame.hard_frame_pointer_offset) |
9195 | { |
9196 | insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
9197 | RTX_FRAME_RELATED_P (insn) = 1; |
9198 | |
9199 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9200 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
9201 | m->fs.fp_offset = m->fs.sp_offset; |
9202 | m->fs.fp_valid = true; |
9203 | } |
9204 | } |
9205 | |
9206 | if (!int_registers_saved) |
9207 | { |
9208 | /* If saving registers via PUSH, do so now. */ |
9209 | if (!frame.save_regs_using_mov) |
9210 | { |
9211 | ix86_emit_save_regs (); |
9212 | m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return; |
9213 | int_registers_saved = true; |
9214 | gcc_assert (m->fs.sp_offset == frame.reg_save_offset); |
9215 | } |
9216 | |
9217 | /* When using red zone we may start register saving before allocating |
9218 | the stack frame saving one cycle of the prologue. However, avoid |
9219 | doing this if we have to probe the stack; at least on x86_64 the |
9220 | stack probe can turn into a call that clobbers a red zone location. */ |
9221 | else if (ix86_using_red_zone () |
9222 | && (! TARGET_STACK_PROBE |
9223 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) |
9224 | { |
9225 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
9226 | cfun->machine->red_zone_used = true; |
9227 | int_registers_saved = true; |
9228 | } |
9229 | } |
9230 | |
9231 | if (frame.red_zone_size != 0) |
9232 | cfun->machine->red_zone_used = true; |
9233 | |
9234 | if (stack_realign_fp) |
9235 | { |
9236 | int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
9237 | gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); |
9238 | |
9239 | /* Record last valid frame pointer offset. */ |
9240 | m->fs.sp_realigned_fp_last = frame.reg_save_offset; |
9241 | |
9242 | /* The computation of the size of the re-aligned stack frame means |
9243 | that we must allocate the size of the register save area before |
9244 | performing the actual alignment. Otherwise we cannot guarantee |
9245 | that there's enough storage above the realignment point. */ |
9246 | allocate = frame.reg_save_offset - m->fs.sp_offset |
9247 | + frame.stack_realign_allocate; |
9248 | if (allocate) |
9249 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9250 | GEN_INT (-allocate), style: -1, set_cfa: false); |
9251 | |
9252 | /* Align the stack. */ |
9253 | emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes))); |
9254 | m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); |
9255 | m->fs.sp_realigned_offset = m->fs.sp_offset |
9256 | - frame.stack_realign_allocate; |
9257 | /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. |
9258 | Beyond this point, stack access should be done via choose_baseaddr or |
9259 | by using sp_valid_at and fp_valid_at to determine the correct base |
9260 | register. Henceforth, any CFA offset should be thought of as logical |
9261 | and not physical. */ |
9262 | gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); |
9263 | gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); |
9264 | m->fs.sp_realigned = true; |
9265 | |
9266 | /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which |
9267 | is needed to describe where a register is saved using a realigned |
9268 | stack pointer, so we need to invalidate the stack pointer for that |
9269 | target. */ |
9270 | if (TARGET_SEH) |
9271 | m->fs.sp_valid = false; |
9272 | |
9273 | /* If SP offset is non-immediate after allocation of the stack frame, |
9274 | then emit SSE saves or stub call prior to allocating the rest of the |
9275 | stack frame. This is less efficient for the out-of-line stub because |
9276 | we can't combine allocations across the call barrier, but it's better |
9277 | than using a scratch register. */ |
9278 | else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset |
9279 | - m->fs.sp_realigned_offset), |
9280 | Pmode)) |
9281 | { |
9282 | if (!sse_registers_saved) |
9283 | { |
9284 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9285 | sse_registers_saved = true; |
9286 | } |
9287 | else if (save_stub_call_needed) |
9288 | { |
9289 | ix86_emit_outlined_ms2sysv_save (frame); |
9290 | save_stub_call_needed = false; |
9291 | } |
9292 | } |
9293 | } |
9294 | |
9295 | allocate = frame.stack_pointer_offset - m->fs.sp_offset; |
9296 | |
9297 | if (flag_stack_usage_info) |
9298 | { |
9299 | /* We start to count from ARG_POINTER. */ |
9300 | HOST_WIDE_INT stack_size = frame.stack_pointer_offset; |
9301 | |
9302 | /* If it was realigned, take into account the fake frame. */ |
9303 | if (stack_realign_drap) |
9304 | { |
9305 | if (ix86_static_chain_on_stack) |
9306 | stack_size += UNITS_PER_WORD; |
9307 | |
9308 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
9309 | stack_size += UNITS_PER_WORD; |
9310 | |
9311 | /* This over-estimates by 1 minimal-stack-alignment-unit but |
9312 | mitigates that by counting in the new return address slot. */ |
9313 | current_function_dynamic_stack_size |
9314 | += crtl->stack_alignment_needed / BITS_PER_UNIT; |
9315 | } |
9316 | |
9317 | current_function_static_stack_size = stack_size; |
9318 | } |
9319 | |
9320 | /* On SEH target with very large frame size, allocate an area to save |
9321 | SSE registers (as the very large allocation won't be described). */ |
9322 | if (TARGET_SEH |
9323 | && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE |
9324 | && !sse_registers_saved) |
9325 | { |
9326 | HOST_WIDE_INT sse_size |
9327 | = frame.sse_reg_save_offset - frame.reg_save_offset; |
9328 | |
9329 | gcc_assert (int_registers_saved); |
9330 | |
9331 | /* No need to do stack checking as the area will be immediately |
9332 | written. */ |
9333 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9334 | GEN_INT (-sse_size), style: -1, |
9335 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9336 | allocate -= sse_size; |
9337 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9338 | sse_registers_saved = true; |
9339 | } |
9340 | |
9341 | /* If stack clash protection is requested, then probe the stack, unless it |
9342 | is already probed on the target. */ |
9343 | if (allocate >= 0 |
9344 | && flag_stack_clash_protection |
9345 | && !ix86_target_stack_probe ()) |
9346 | { |
9347 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false); |
9348 | allocate = 0; |
9349 | } |
9350 | |
9351 | /* The stack has already been decremented by the instruction calling us |
9352 | so probe if the size is non-negative to preserve the protection area. */ |
9353 | else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
9354 | { |
9355 | const HOST_WIDE_INT probe_interval = get_probe_interval (); |
9356 | |
9357 | if (STACK_CHECK_MOVING_SP) |
9358 | { |
9359 | if (crtl->is_leaf |
9360 | && !cfun->calls_alloca |
9361 | && allocate <= probe_interval) |
9362 | ; |
9363 | |
9364 | else |
9365 | { |
9366 | ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true); |
9367 | allocate = 0; |
9368 | } |
9369 | } |
9370 | |
9371 | else |
9372 | { |
9373 | HOST_WIDE_INT size = allocate; |
9374 | |
9375 | if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) |
9376 | size = 0x80000000 - get_stack_check_protect () - 1; |
9377 | |
9378 | if (TARGET_STACK_PROBE) |
9379 | { |
9380 | if (crtl->is_leaf && !cfun->calls_alloca) |
9381 | { |
9382 | if (size > probe_interval) |
9383 | ix86_emit_probe_stack_range (first: 0, size, int_registers_saved); |
9384 | } |
9385 | else |
9386 | ix86_emit_probe_stack_range (first: 0, |
9387 | size: size + get_stack_check_protect (), |
9388 | int_registers_saved); |
9389 | } |
9390 | else |
9391 | { |
9392 | if (crtl->is_leaf && !cfun->calls_alloca) |
9393 | { |
9394 | if (size > probe_interval |
9395 | && size > get_stack_check_protect ()) |
9396 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), |
9397 | size: (size |
9398 | - get_stack_check_protect ()), |
9399 | int_registers_saved); |
9400 | } |
9401 | else |
9402 | ix86_emit_probe_stack_range (first: get_stack_check_protect (), size, |
9403 | int_registers_saved); |
9404 | } |
9405 | } |
9406 | } |
9407 | |
9408 | if (allocate == 0) |
9409 | ; |
9410 | else if (!ix86_target_stack_probe () |
9411 | || frame.stack_pointer_offset < CHECK_STACK_LIMIT) |
9412 | { |
9413 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
9414 | GEN_INT (-allocate), style: -1, |
9415 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
9416 | } |
9417 | else |
9418 | { |
9419 | rtx eax = gen_rtx_REG (Pmode, AX_REG); |
9420 | rtx r10 = NULL; |
9421 | const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); |
9422 | bool eax_live = ix86_eax_live_at_start_p (); |
9423 | bool r10_live = false; |
9424 | |
9425 | if (TARGET_64BIT) |
9426 | r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); |
9427 | |
9428 | if (eax_live) |
9429 | { |
9430 | insn = emit_insn (gen_push (arg: eax)); |
9431 | allocate -= UNITS_PER_WORD; |
9432 | /* Note that SEH directives need to continue tracking the stack |
9433 | pointer even after the frame pointer has been set up. */ |
9434 | if (sp_is_cfa_reg || TARGET_SEH) |
9435 | { |
9436 | if (sp_is_cfa_reg) |
9437 | m->fs.cfa_offset += UNITS_PER_WORD; |
9438 | RTX_FRAME_RELATED_P (insn) = 1; |
9439 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9440 | gen_rtx_SET (stack_pointer_rtx, |
9441 | plus_constant (Pmode, |
9442 | stack_pointer_rtx, |
9443 | -UNITS_PER_WORD))); |
9444 | } |
9445 | } |
9446 | |
9447 | if (r10_live) |
9448 | { |
9449 | r10 = gen_rtx_REG (Pmode, R10_REG); |
9450 | insn = emit_insn (gen_push (arg: r10)); |
9451 | allocate -= UNITS_PER_WORD; |
9452 | if (sp_is_cfa_reg || TARGET_SEH) |
9453 | { |
9454 | if (sp_is_cfa_reg) |
9455 | m->fs.cfa_offset += UNITS_PER_WORD; |
9456 | RTX_FRAME_RELATED_P (insn) = 1; |
9457 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9458 | gen_rtx_SET (stack_pointer_rtx, |
9459 | plus_constant (Pmode, |
9460 | stack_pointer_rtx, |
9461 | -UNITS_PER_WORD))); |
9462 | } |
9463 | } |
9464 | |
9465 | emit_move_insn (eax, GEN_INT (allocate)); |
9466 | emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax)); |
9467 | |
9468 | /* Use the fact that AX still contains ALLOCATE. */ |
9469 | insn = emit_insn (gen_pro_epilogue_adjust_stack_sub |
9470 | (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax)); |
9471 | |
9472 | if (sp_is_cfa_reg || TARGET_SEH) |
9473 | { |
9474 | if (sp_is_cfa_reg) |
9475 | m->fs.cfa_offset += allocate; |
9476 | RTX_FRAME_RELATED_P (insn) = 1; |
9477 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
9478 | gen_rtx_SET (stack_pointer_rtx, |
9479 | plus_constant (Pmode, stack_pointer_rtx, |
9480 | -allocate))); |
9481 | } |
9482 | m->fs.sp_offset += allocate; |
9483 | |
9484 | /* Use stack_pointer_rtx for relative addressing so that code works for |
9485 | realigned stack. But this means that we need a blockage to prevent |
9486 | stores based on the frame pointer from being scheduled before. */ |
9487 | if (r10_live && eax_live) |
9488 | { |
9489 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
9490 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
9491 | gen_frame_mem (word_mode, t)); |
9492 | t = plus_constant (Pmode, t, UNITS_PER_WORD); |
9493 | emit_move_insn (gen_rtx_REG (word_mode, AX_REG), |
9494 | gen_frame_mem (word_mode, t)); |
9495 | emit_insn (gen_memory_blockage ()); |
9496 | } |
9497 | else if (eax_live || r10_live) |
9498 | { |
9499 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
9500 | emit_move_insn (gen_rtx_REG (word_mode, |
9501 | (eax_live ? AX_REG : R10_REG)), |
9502 | gen_frame_mem (word_mode, t)); |
9503 | emit_insn (gen_memory_blockage ()); |
9504 | } |
9505 | } |
9506 | gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); |
9507 | |
9508 | /* If we havn't already set up the frame pointer, do so now. */ |
9509 | if (frame_pointer_needed && !m->fs.fp_valid) |
9510 | { |
9511 | insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, |
9512 | GEN_INT (frame.stack_pointer_offset |
9513 | - frame.hard_frame_pointer_offset)); |
9514 | insn = emit_insn (insn); |
9515 | RTX_FRAME_RELATED_P (insn) = 1; |
9516 | add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); |
9517 | |
9518 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9519 | m->fs.cfa_reg = hard_frame_pointer_rtx; |
9520 | m->fs.fp_offset = frame.hard_frame_pointer_offset; |
9521 | m->fs.fp_valid = true; |
9522 | } |
9523 | |
9524 | if (!int_registers_saved) |
9525 | ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset); |
9526 | if (!sse_registers_saved) |
9527 | ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset); |
9528 | else if (save_stub_call_needed) |
9529 | ix86_emit_outlined_ms2sysv_save (frame); |
9530 | |
9531 | /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT |
9532 | in PROLOGUE. */ |
9533 | if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) |
9534 | { |
9535 | rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); |
9536 | insn = emit_insn (gen_set_got (pic)); |
9537 | RTX_FRAME_RELATED_P (insn) = 1; |
9538 | add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
9539 | emit_insn (gen_prologue_use (pic)); |
9540 | /* Deleting already emmitted SET_GOT if exist and allocated to |
9541 | REAL_PIC_OFFSET_TABLE_REGNUM. */ |
9542 | ix86_elim_entry_set_got (reg: pic); |
9543 | } |
9544 | |
9545 | if (crtl->drap_reg && !crtl->stack_realign_needed) |
9546 | { |
9547 | /* vDRAP is setup but after reload it turns out stack realign |
9548 | isn't necessary, here we will emit prologue to setup DRAP |
9549 | without stack realign adjustment */ |
9550 | t = choose_baseaddr (cfa_offset: 0, NULL); |
9551 | emit_insn (gen_rtx_SET (crtl->drap_reg, t)); |
9552 | } |
9553 | |
9554 | /* Prevent instructions from being scheduled into register save push |
9555 | sequence when access to the redzone area is done through frame pointer. |
9556 | The offset between the frame pointer and the stack pointer is calculated |
9557 | relative to the value of the stack pointer at the end of the function |
9558 | prologue, and moving instructions that access redzone area via frame |
9559 | pointer inside push sequence violates this assumption. */ |
9560 | if (frame_pointer_needed && frame.red_zone_size) |
9561 | emit_insn (gen_memory_blockage ()); |
9562 | |
9563 | /* SEH requires that the prologue end within 256 bytes of the start of |
9564 | the function. Prevent instruction schedules that would extend that. |
9565 | Further, prevent alloca modifications to the stack pointer from being |
9566 | combined with prologue modifications. */ |
9567 | if (TARGET_SEH) |
9568 | emit_insn (gen_prologue_use (stack_pointer_rtx)); |
9569 | } |
9570 | |
9571 | /* Emit code to restore REG using a POP or POPP insn. */ |
9572 | |
9573 | static void |
9574 | ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) |
9575 | { |
9576 | struct machine_function *m = cfun->machine; |
9577 | rtx_insn *insn = emit_insn (gen_pop (arg: reg, ppx_p)); |
9578 | |
9579 | ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset); |
9580 | m->fs.sp_offset -= UNITS_PER_WORD; |
9581 | |
9582 | if (m->fs.cfa_reg == crtl->drap_reg |
9583 | && REGNO (reg) == REGNO (crtl->drap_reg)) |
9584 | { |
9585 | /* Previously we'd represented the CFA as an expression |
9586 | like *(%ebp - 8). We've just popped that value from |
9587 | the stack, which means we need to reset the CFA to |
9588 | the drap register. This will remain until we restore |
9589 | the stack pointer. */ |
9590 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
9591 | RTX_FRAME_RELATED_P (insn) = 1; |
9592 | |
9593 | /* This means that the DRAP register is valid for addressing too. */ |
9594 | m->fs.drap_valid = true; |
9595 | return; |
9596 | } |
9597 | |
9598 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9599 | { |
9600 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
9601 | x = gen_rtx_SET (stack_pointer_rtx, x); |
9602 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
9603 | RTX_FRAME_RELATED_P (insn) = 1; |
9604 | |
9605 | m->fs.cfa_offset -= UNITS_PER_WORD; |
9606 | } |
9607 | |
9608 | /* When the frame pointer is the CFA, and we pop it, we are |
9609 | swapping back to the stack pointer as the CFA. This happens |
9610 | for stack frames that don't allocate other data, so we assume |
9611 | the stack pointer is now pointing at the return address, i.e. |
9612 | the function entry state, which makes the offset be 1 word. */ |
9613 | if (reg == hard_frame_pointer_rtx) |
9614 | { |
9615 | m->fs.fp_valid = false; |
9616 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9617 | { |
9618 | m->fs.cfa_reg = stack_pointer_rtx; |
9619 | m->fs.cfa_offset -= UNITS_PER_WORD; |
9620 | |
9621 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9622 | plus_constant (Pmode, stack_pointer_rtx, |
9623 | m->fs.cfa_offset)); |
9624 | RTX_FRAME_RELATED_P (insn) = 1; |
9625 | } |
9626 | } |
9627 | } |
9628 | |
9629 | /* Emit code to restore REG using a POP2 insn. */ |
9630 | static void |
9631 | ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) |
9632 | { |
9633 | struct machine_function *m = cfun->machine; |
9634 | const int offset = UNITS_PER_WORD * 2; |
9635 | rtx_insn *insn; |
9636 | |
9637 | rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, |
9638 | stack_pointer_rtx)); |
9639 | |
9640 | if (ppx_p) |
9641 | insn = emit_insn (gen_pop2p_di (reg1, mem, reg2)); |
9642 | else |
9643 | insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); |
9644 | |
9645 | RTX_FRAME_RELATED_P (insn) = 1; |
9646 | |
9647 | rtx dwarf = NULL_RTX; |
9648 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf); |
9649 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf); |
9650 | REG_NOTES (insn) = dwarf; |
9651 | m->fs.sp_offset -= offset; |
9652 | |
9653 | if (m->fs.cfa_reg == crtl->drap_reg |
9654 | && (REGNO (reg1) == REGNO (crtl->drap_reg) |
9655 | || REGNO (reg2) == REGNO (crtl->drap_reg))) |
9656 | { |
9657 | /* Previously we'd represented the CFA as an expression |
9658 | like *(%ebp - 8). We've just popped that value from |
9659 | the stack, which means we need to reset the CFA to |
9660 | the drap register. This will remain until we restore |
9661 | the stack pointer. */ |
9662 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9663 | REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2); |
9664 | RTX_FRAME_RELATED_P (insn) = 1; |
9665 | |
9666 | /* This means that the DRAP register is valid for addressing too. */ |
9667 | m->fs.drap_valid = true; |
9668 | return; |
9669 | } |
9670 | |
9671 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9672 | { |
9673 | rtx x = plus_constant (Pmode, stack_pointer_rtx, offset); |
9674 | x = gen_rtx_SET (stack_pointer_rtx, x); |
9675 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
9676 | RTX_FRAME_RELATED_P (insn) = 1; |
9677 | |
9678 | m->fs.cfa_offset -= offset; |
9679 | } |
9680 | |
9681 | /* When the frame pointer is the CFA, and we pop it, we are |
9682 | swapping back to the stack pointer as the CFA. This happens |
9683 | for stack frames that don't allocate other data, so we assume |
9684 | the stack pointer is now pointing at the return address, i.e. |
9685 | the function entry state, which makes the offset be 1 word. */ |
9686 | if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx) |
9687 | { |
9688 | m->fs.fp_valid = false; |
9689 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9690 | { |
9691 | m->fs.cfa_reg = stack_pointer_rtx; |
9692 | m->fs.cfa_offset -= offset; |
9693 | |
9694 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9695 | plus_constant (Pmode, stack_pointer_rtx, |
9696 | m->fs.cfa_offset)); |
9697 | RTX_FRAME_RELATED_P (insn) = 1; |
9698 | } |
9699 | } |
9700 | } |
9701 | |
9702 | /* Emit code to restore saved registers using POP insns. */ |
9703 | |
9704 | static void |
9705 | ix86_emit_restore_regs_using_pop (bool ppx_p) |
9706 | { |
9707 | unsigned int regno; |
9708 | |
9709 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9710 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
9711 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), ppx_p); |
9712 | } |
9713 | |
9714 | /* Emit code to restore saved registers using POP2 insns. */ |
9715 | |
9716 | static void |
9717 | ix86_emit_restore_regs_using_pop2 (void) |
9718 | { |
9719 | int regno; |
9720 | int regno_list[2]; |
9721 | regno_list[0] = regno_list[1] = -1; |
9722 | int loaded_regnum = 0; |
9723 | bool aligned = cfun->machine->fs.sp_offset % 16 == 0; |
9724 | |
9725 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9726 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true)) |
9727 | { |
9728 | if (aligned) |
9729 | { |
9730 | regno_list[loaded_regnum++] = regno; |
9731 | if (loaded_regnum == 2) |
9732 | { |
9733 | gcc_assert (regno_list[0] != -1 |
9734 | && regno_list[1] != -1 |
9735 | && regno_list[0] != regno_list[1]); |
9736 | |
9737 | ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode, |
9738 | regno_list[0]), |
9739 | reg2: gen_rtx_REG (word_mode, |
9740 | regno_list[1]), |
9741 | TARGET_APX_PPX); |
9742 | loaded_regnum = 0; |
9743 | regno_list[0] = regno_list[1] = -1; |
9744 | } |
9745 | } |
9746 | else |
9747 | { |
9748 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno), |
9749 | TARGET_APX_PPX); |
9750 | aligned = true; |
9751 | } |
9752 | } |
9753 | |
9754 | if (loaded_regnum == 1) |
9755 | ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0]), |
9756 | TARGET_APX_PPX); |
9757 | } |
9758 | |
9759 | /* Emit code and notes for the LEAVE instruction. If insn is non-null, |
9760 | omits the emit and only attaches the notes. */ |
9761 | |
9762 | static void |
9763 | ix86_emit_leave (rtx_insn *insn) |
9764 | { |
9765 | struct machine_function *m = cfun->machine; |
9766 | |
9767 | if (!insn) |
9768 | insn = emit_insn (gen_leave (arg0: word_mode)); |
9769 | |
9770 | ix86_add_queued_cfa_restore_notes (insn); |
9771 | |
9772 | gcc_assert (m->fs.fp_valid); |
9773 | m->fs.sp_valid = true; |
9774 | m->fs.sp_realigned = false; |
9775 | m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; |
9776 | m->fs.fp_valid = false; |
9777 | |
9778 | if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
9779 | { |
9780 | m->fs.cfa_reg = stack_pointer_rtx; |
9781 | m->fs.cfa_offset = m->fs.sp_offset; |
9782 | |
9783 | add_reg_note (insn, REG_CFA_DEF_CFA, |
9784 | plus_constant (Pmode, stack_pointer_rtx, |
9785 | m->fs.sp_offset)); |
9786 | RTX_FRAME_RELATED_P (insn) = 1; |
9787 | } |
9788 | ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, |
9789 | cfa_offset: m->fs.fp_offset); |
9790 | } |
9791 | |
9792 | /* Emit code to restore saved registers using MOV insns. |
9793 | First register is restored from CFA - CFA_OFFSET. */ |
9794 | static void |
9795 | ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, |
9796 | bool maybe_eh_return) |
9797 | { |
9798 | struct machine_function *m = cfun->machine; |
9799 | unsigned int regno; |
9800 | |
9801 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9802 | if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
9803 | { |
9804 | rtx reg = gen_rtx_REG (word_mode, regno); |
9805 | rtx mem; |
9806 | rtx_insn *insn; |
9807 | |
9808 | mem = choose_baseaddr (cfa_offset, NULL); |
9809 | mem = gen_frame_mem (word_mode, mem); |
9810 | insn = emit_move_insn (reg, mem); |
9811 | |
9812 | if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) |
9813 | { |
9814 | /* Previously we'd represented the CFA as an expression |
9815 | like *(%ebp - 8). We've just popped that value from |
9816 | the stack, which means we need to reset the CFA to |
9817 | the drap register. This will remain until we restore |
9818 | the stack pointer. */ |
9819 | add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
9820 | RTX_FRAME_RELATED_P (insn) = 1; |
9821 | |
9822 | /* This means that the DRAP register is valid for addressing. */ |
9823 | m->fs.drap_valid = true; |
9824 | } |
9825 | else |
9826 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
9827 | |
9828 | cfa_offset -= UNITS_PER_WORD; |
9829 | } |
9830 | } |
9831 | |
9832 | /* Emit code to restore saved registers using MOV insns. |
9833 | First register is restored from CFA - CFA_OFFSET. */ |
9834 | static void |
9835 | ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, |
9836 | bool maybe_eh_return) |
9837 | { |
9838 | unsigned int regno; |
9839 | |
9840 | for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
9841 | if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true)) |
9842 | { |
9843 | rtx reg = gen_rtx_REG (V4SFmode, regno); |
9844 | rtx mem; |
9845 | unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); |
9846 | |
9847 | mem = choose_baseaddr (cfa_offset, align: &align); |
9848 | mem = gen_rtx_MEM (V4SFmode, mem); |
9849 | |
9850 | /* The location aligment depends upon the base register. */ |
9851 | align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); |
9852 | gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); |
9853 | set_mem_align (mem, align); |
9854 | emit_insn (gen_rtx_SET (reg, mem)); |
9855 | |
9856 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset); |
9857 | |
9858 | cfa_offset -= GET_MODE_SIZE (V4SFmode); |
9859 | } |
9860 | } |
9861 | |
9862 | static void |
9863 | ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, |
9864 | bool use_call, int style) |
9865 | { |
9866 | struct machine_function *m = cfun->machine; |
9867 | const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS |
9868 | + m->call_ms2sysv_extra_regs; |
9869 | rtvec v; |
9870 | unsigned int elems_needed, align, i, vi = 0; |
9871 | rtx_insn *insn; |
9872 | rtx sym, tmp; |
9873 | rtx rsi = gen_rtx_REG (word_mode, SI_REG); |
9874 | rtx r10 = NULL_RTX; |
9875 | const class xlogue_layout &xlogue = xlogue_layout::get_instance (); |
9876 | HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); |
9877 | HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; |
9878 | rtx rsi_frame_load = NULL_RTX; |
9879 | HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; |
9880 | enum xlogue_stub stub; |
9881 | |
9882 | gcc_assert (!m->fs.fp_valid || frame_pointer_needed); |
9883 | |
9884 | /* If using a realigned stack, we should never start with padding. */ |
9885 | gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); |
9886 | |
9887 | /* Setup RSI as the stub's base pointer. */ |
9888 | align = GET_MODE_ALIGNMENT (V4SFmode); |
9889 | tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG); |
9890 | gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); |
9891 | |
9892 | emit_insn (gen_rtx_SET (rsi, tmp)); |
9893 | |
9894 | /* Get a symbol for the stub. */ |
9895 | if (frame_pointer_needed) |
9896 | stub = use_call ? XLOGUE_STUB_RESTORE_HFP |
9897 | : XLOGUE_STUB_RESTORE_HFP_TAIL; |
9898 | else |
9899 | stub = use_call ? XLOGUE_STUB_RESTORE |
9900 | : XLOGUE_STUB_RESTORE_TAIL; |
9901 | sym = xlogue.get_stub_rtx (stub); |
9902 | |
9903 | elems_needed = ncregs; |
9904 | if (use_call) |
9905 | elems_needed += 1; |
9906 | else |
9907 | elems_needed += frame_pointer_needed ? 5 : 3; |
9908 | v = rtvec_alloc (elems_needed); |
9909 | |
9910 | /* We call the epilogue stub when we need to pop incoming args or we are |
9911 | doing a sibling call as the tail. Otherwise, we will emit a jmp to the |
9912 | epilogue stub and it is the tail-call. */ |
9913 | if (use_call) |
9914 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
9915 | else |
9916 | { |
9917 | RTVEC_ELT (v, vi++) = ret_rtx; |
9918 | RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); |
9919 | if (frame_pointer_needed) |
9920 | { |
9921 | rtx rbp = gen_rtx_REG (DImode, BP_REG); |
9922 | gcc_assert (m->fs.fp_valid); |
9923 | gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); |
9924 | |
9925 | tmp = plus_constant (DImode, rbp, 8); |
9926 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); |
9927 | RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); |
9928 | tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); |
9929 | RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); |
9930 | } |
9931 | else |
9932 | { |
9933 | /* If no hard frame pointer, we set R10 to the SP restore value. */ |
9934 | gcc_assert (!m->fs.fp_valid); |
9935 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
9936 | gcc_assert (m->fs.sp_valid); |
9937 | |
9938 | r10 = gen_rtx_REG (DImode, R10_REG); |
9939 | tmp = plus_constant (Pmode, rsi, stub_ptr_offset); |
9940 | emit_insn (gen_rtx_SET (r10, tmp)); |
9941 | |
9942 | RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); |
9943 | } |
9944 | } |
9945 | |
9946 | /* Generate frame load insns and restore notes. */ |
9947 | for (i = 0; i < ncregs; ++i) |
9948 | { |
9949 | const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i); |
9950 | machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; |
9951 | rtx reg, frame_load; |
9952 | |
9953 | reg = gen_rtx_REG (mode, r.regno); |
9954 | frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset); |
9955 | |
9956 | /* Save RSI frame load insn & note to add last. */ |
9957 | if (r.regno == SI_REG) |
9958 | { |
9959 | gcc_assert (!rsi_frame_load); |
9960 | rsi_frame_load = frame_load; |
9961 | rsi_restore_offset = r.offset; |
9962 | } |
9963 | else |
9964 | { |
9965 | RTVEC_ELT (v, vi++) = frame_load; |
9966 | ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset); |
9967 | } |
9968 | } |
9969 | |
9970 | /* Add RSI frame load & restore note at the end. */ |
9971 | gcc_assert (rsi_frame_load); |
9972 | gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); |
9973 | RTVEC_ELT (v, vi++) = rsi_frame_load; |
9974 | ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG), |
9975 | cfa_offset: rsi_restore_offset); |
9976 | |
9977 | /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ |
9978 | if (!use_call && !frame_pointer_needed) |
9979 | { |
9980 | gcc_assert (m->fs.sp_valid); |
9981 | gcc_assert (!m->fs.sp_realigned); |
9982 | |
9983 | /* At this point, R10 should point to frame.stack_realign_offset. */ |
9984 | if (m->fs.cfa_reg == stack_pointer_rtx) |
9985 | m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; |
9986 | m->fs.sp_offset = frame.stack_realign_offset; |
9987 | } |
9988 | |
9989 | gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); |
9990 | tmp = gen_rtx_PARALLEL (VOIDmode, v); |
9991 | if (use_call) |
9992 | insn = emit_insn (tmp); |
9993 | else |
9994 | { |
9995 | insn = emit_jump_insn (tmp); |
9996 | JUMP_LABEL (insn) = ret_rtx; |
9997 | |
9998 | if (frame_pointer_needed) |
9999 | ix86_emit_leave (insn); |
10000 | else |
10001 | { |
10002 | /* Need CFA adjust note. */ |
10003 | tmp = gen_rtx_SET (stack_pointer_rtx, r10); |
10004 | add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); |
10005 | } |
10006 | } |
10007 | |
10008 | RTX_FRAME_RELATED_P (insn) = true; |
10009 | ix86_add_queued_cfa_restore_notes (insn); |
10010 | |
10011 | /* If we're not doing a tail-call, we need to adjust the stack. */ |
10012 | if (use_call && m->fs.sp_valid) |
10013 | { |
10014 | HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; |
10015 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10016 | GEN_INT (dealloc), style, |
10017 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
10018 | } |
10019 | } |
10020 | |
10021 | /* Restore function stack, frame, and registers. */ |
10022 | |
10023 | void |
10024 | ix86_expand_epilogue (int style) |
10025 | { |
10026 | struct machine_function *m = cfun->machine; |
10027 | struct machine_frame_state frame_state_save = m->fs; |
10028 | bool restore_regs_via_mov; |
10029 | bool using_drap; |
10030 | bool restore_stub_is_tail = false; |
10031 | |
10032 | if (ix86_function_naked (fn: current_function_decl)) |
10033 | { |
10034 | /* The program should not reach this point. */ |
10035 | emit_insn (gen_ud2 ()); |
10036 | return; |
10037 | } |
10038 | |
10039 | ix86_finalize_stack_frame_flags (); |
10040 | const struct ix86_frame &frame = cfun->machine->frame; |
10041 | |
10042 | m->fs.sp_realigned = stack_realign_fp; |
10043 | m->fs.sp_valid = stack_realign_fp |
10044 | || !frame_pointer_needed |
10045 | || crtl->sp_is_unchanging; |
10046 | gcc_assert (!m->fs.sp_valid |
10047 | || m->fs.sp_offset == frame.stack_pointer_offset); |
10048 | |
10049 | /* The FP must be valid if the frame pointer is present. */ |
10050 | gcc_assert (frame_pointer_needed == m->fs.fp_valid); |
10051 | gcc_assert (!m->fs.fp_valid |
10052 | || m->fs.fp_offset == frame.hard_frame_pointer_offset); |
10053 | |
10054 | /* We must have *some* valid pointer to the stack frame. */ |
10055 | gcc_assert (m->fs.sp_valid || m->fs.fp_valid); |
10056 | |
10057 | /* The DRAP is never valid at this point. */ |
10058 | gcc_assert (!m->fs.drap_valid); |
10059 | |
10060 | /* See the comment about red zone and frame |
10061 | pointer usage in ix86_expand_prologue. */ |
10062 | if (frame_pointer_needed && frame.red_zone_size) |
10063 | emit_insn (gen_memory_blockage ()); |
10064 | |
10065 | using_drap = crtl->drap_reg && crtl->stack_realign_needed; |
10066 | gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); |
10067 | |
10068 | /* Determine the CFA offset of the end of the red-zone. */ |
10069 | m->fs.red_zone_offset = 0; |
10070 | if (ix86_using_red_zone () && crtl->args.pops_args < 65536) |
10071 | { |
10072 | /* The red-zone begins below return address and error code in |
10073 | exception handler. */ |
10074 | m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; |
10075 | |
10076 | /* When the register save area is in the aligned portion of |
10077 | the stack, determine the maximum runtime displacement that |
10078 | matches up with the aligned frame. */ |
10079 | if (stack_realign_drap) |
10080 | m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT |
10081 | + UNITS_PER_WORD); |
10082 | } |
10083 | |
10084 | HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; |
10085 | |
10086 | /* Special care must be taken for the normal return case of a function |
10087 | using eh_return: the eax and edx registers are marked as saved, but |
10088 | not restored along this path. Adjust the save location to match. */ |
10089 | if (crtl->calls_eh_return && style != 2) |
10090 | reg_save_offset -= 2 * UNITS_PER_WORD; |
10091 | |
10092 | /* EH_RETURN requires the use of moves to function properly. */ |
10093 | if (crtl->calls_eh_return) |
10094 | restore_regs_via_mov = true; |
10095 | /* SEH requires the use of pops to identify the epilogue. */ |
10096 | else if (TARGET_SEH) |
10097 | restore_regs_via_mov = false; |
10098 | /* If we already save reg with pushp, don't use move at epilogue. */ |
10099 | else if (m->fs.apx_ppx_used) |
10100 | restore_regs_via_mov = false; |
10101 | /* If we're only restoring one register and sp cannot be used then |
10102 | using a move instruction to restore the register since it's |
10103 | less work than reloading sp and popping the register. */ |
10104 | else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1) |
10105 | restore_regs_via_mov = true; |
10106 | else if (TARGET_EPILOGUE_USING_MOVE |
10107 | && cfun->machine->use_fast_prologue_epilogue |
10108 | && (frame.nregs > 1 |
10109 | || m->fs.sp_offset != reg_save_offset)) |
10110 | restore_regs_via_mov = true; |
10111 | else if (frame_pointer_needed |
10112 | && !frame.nregs |
10113 | && m->fs.sp_offset != reg_save_offset) |
10114 | restore_regs_via_mov = true; |
10115 | else if (frame_pointer_needed |
10116 | && TARGET_USE_LEAVE |
10117 | && cfun->machine->use_fast_prologue_epilogue |
10118 | && frame.nregs == 1) |
10119 | restore_regs_via_mov = true; |
10120 | else |
10121 | restore_regs_via_mov = false; |
10122 | |
10123 | if (restore_regs_via_mov || frame.nsseregs) |
10124 | { |
10125 | /* Ensure that the entire register save area is addressable via |
10126 | the stack pointer, if we will restore SSE regs via sp. */ |
10127 | if (TARGET_64BIT |
10128 | && m->fs.sp_offset > 0x7fffffff |
10129 | && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1) |
10130 | && (frame.nsseregs + frame.nregs) != 0) |
10131 | { |
10132 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10133 | GEN_INT (m->fs.sp_offset |
10134 | - frame.sse_reg_save_offset), |
10135 | style, |
10136 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
10137 | } |
10138 | } |
10139 | |
10140 | /* If there are any SSE registers to restore, then we have to do it |
10141 | via moves, since there's obviously no pop for SSE regs. */ |
10142 | if (frame.nsseregs) |
10143 | ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset, |
10144 | maybe_eh_return: style == 2); |
10145 | |
10146 | if (m->call_ms2sysv) |
10147 | { |
10148 | int pop_incoming_args = crtl->args.pops_args && crtl->args.size; |
10149 | |
10150 | /* We cannot use a tail-call for the stub if: |
10151 | 1. We have to pop incoming args, |
10152 | 2. We have additional int regs to restore, or |
10153 | 3. A sibling call will be the tail-call, or |
10154 | 4. We are emitting an eh_return_internal epilogue. |
10155 | |
10156 | TODO: Item 4 has not yet tested! |
10157 | |
10158 | If any of the above are true, we will call the stub rather than |
10159 | jump to it. */ |
10160 | restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); |
10161 | ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style); |
10162 | } |
10163 | |
10164 | /* If using out-of-line stub that is a tail-call, then...*/ |
10165 | if (m->call_ms2sysv && restore_stub_is_tail) |
10166 | { |
10167 | /* TODO: parinoid tests. (remove eventually) */ |
10168 | gcc_assert (m->fs.sp_valid); |
10169 | gcc_assert (!m->fs.sp_realigned); |
10170 | gcc_assert (!m->fs.fp_valid); |
10171 | gcc_assert (!m->fs.realigned); |
10172 | gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); |
10173 | gcc_assert (!crtl->drap_reg); |
10174 | gcc_assert (!frame.nregs); |
10175 | } |
10176 | else if (restore_regs_via_mov) |
10177 | { |
10178 | rtx t; |
10179 | |
10180 | if (frame.nregs) |
10181 | ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2); |
10182 | |
10183 | /* eh_return epilogues need %ecx added to the stack pointer. */ |
10184 | if (style == 2) |
10185 | { |
10186 | rtx sa = EH_RETURN_STACKADJ_RTX; |
10187 | rtx_insn *insn; |
10188 | |
10189 | /* Stack realignment doesn't work with eh_return. */ |
10190 | if (crtl->stack_realign_needed) |
10191 | sorry ("Stack realignment not supported with " |
10192 | "%<__builtin_eh_return%>"); |
10193 | |
10194 | /* regparm nested functions don't work with eh_return. */ |
10195 | if (ix86_static_chain_on_stack) |
10196 | sorry ("regparm nested function not supported with " |
10197 | "%<__builtin_eh_return%>"); |
10198 | |
10199 | if (frame_pointer_needed) |
10200 | { |
10201 | t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); |
10202 | t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); |
10203 | emit_insn (gen_rtx_SET (sa, t)); |
10204 | |
10205 | /* NB: eh_return epilogues must restore the frame pointer |
10206 | in word_mode since the upper 32 bits of RBP register |
10207 | can have any values. */ |
10208 | t = gen_frame_mem (word_mode, hard_frame_pointer_rtx); |
10209 | rtx frame_reg = gen_rtx_REG (word_mode, |
10210 | HARD_FRAME_POINTER_REGNUM); |
10211 | insn = emit_move_insn (frame_reg, t); |
10212 | |
10213 | /* Note that we use SA as a temporary CFA, as the return |
10214 | address is at the proper place relative to it. We |
10215 | pretend this happens at the FP restore insn because |
10216 | prior to this insn the FP would be stored at the wrong |
10217 | offset relative to SA, and after this insn we have no |
10218 | other reasonable register to use for the CFA. We don't |
10219 | bother resetting the CFA to the SP for the duration of |
10220 | the return insn, unless the control flow instrumentation |
10221 | is done. In this case the SP is used later and we have |
10222 | to reset CFA to SP. */ |
10223 | add_reg_note (insn, REG_CFA_DEF_CFA, |
10224 | plus_constant (Pmode, sa, UNITS_PER_WORD)); |
10225 | ix86_add_queued_cfa_restore_notes (insn); |
10226 | add_reg_note (insn, REG_CFA_RESTORE, frame_reg); |
10227 | RTX_FRAME_RELATED_P (insn) = 1; |
10228 | |
10229 | m->fs.cfa_reg = sa; |
10230 | m->fs.cfa_offset = UNITS_PER_WORD; |
10231 | m->fs.fp_valid = false; |
10232 | |
10233 | pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa, |
10234 | const0_rtx, style, |
10235 | flag_cf_protection); |
10236 | } |
10237 | else |
10238 | { |
10239 | t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); |
10240 | t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); |
10241 | insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); |
10242 | ix86_add_queued_cfa_restore_notes (insn); |
10243 | |
10244 | gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
10245 | if (m->fs.cfa_offset != UNITS_PER_WORD) |
10246 | { |
10247 | m->fs.cfa_offset = UNITS_PER_WORD; |
10248 | add_reg_note (insn, REG_CFA_DEF_CFA, |
10249 | plus_constant (Pmode, stack_pointer_rtx, |
10250 | UNITS_PER_WORD)); |
10251 | RTX_FRAME_RELATED_P (insn) = 1; |
10252 | } |
10253 | } |
10254 | m->fs.sp_offset = UNITS_PER_WORD; |
10255 | m->fs.sp_valid = true; |
10256 | m->fs.sp_realigned = false; |
10257 | } |
10258 | } |
10259 | else |
10260 | { |
10261 | /* SEH requires that the function end with (1) a stack adjustment |
10262 | if necessary, (2) a sequence of pops, and (3) a return or |
10263 | jump instruction. Prevent insns from the function body from |
10264 | being scheduled into this sequence. */ |
10265 | if (TARGET_SEH) |
10266 | { |
10267 | /* Prevent a catch region from being adjacent to the standard |
10268 | epilogue sequence. Unfortunately neither crtl->uses_eh_lsda |
10269 | nor several other flags that would be interesting to test are |
10270 | set up yet. */ |
10271 | if (flag_non_call_exceptions) |
10272 | emit_insn (gen_nops (const1_rtx)); |
10273 | else |
10274 | emit_insn (gen_blockage ()); |
10275 | } |
10276 | |
10277 | /* First step is to deallocate the stack frame so that we can |
10278 | pop the registers. If the stack pointer was realigned, it needs |
10279 | to be restored now. Also do it on SEH target for very large |
10280 | frame as the emitted instructions aren't allowed by the ABI |
10281 | in epilogues. */ |
10282 | if (!m->fs.sp_valid || m->fs.sp_realigned |
10283 | || (TARGET_SEH |
10284 | && (m->fs.sp_offset - reg_save_offset |
10285 | >= SEH_MAX_FRAME_SIZE))) |
10286 | { |
10287 | pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, |
10288 | GEN_INT (m->fs.fp_offset |
10289 | - reg_save_offset), |
10290 | style, set_cfa: false); |
10291 | } |
10292 | else if (m->fs.sp_offset != reg_save_offset) |
10293 | { |
10294 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10295 | GEN_INT (m->fs.sp_offset |
10296 | - reg_save_offset), |
10297 | style, |
10298 | set_cfa: m->fs.cfa_reg == stack_pointer_rtx); |
10299 | } |
10300 | |
10301 | if (TARGET_APX_PUSH2POP2 |
10302 | && ix86_can_use_push2pop2 () |
10303 | && m->func_type == TYPE_NORMAL) |
10304 | ix86_emit_restore_regs_using_pop2 (); |
10305 | else |
10306 | ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); |
10307 | } |
10308 | |
10309 | /* If we used a stack pointer and haven't already got rid of it, |
10310 | then do so now. */ |
10311 | if (m->fs.fp_valid) |
10312 | { |
10313 | /* If the stack pointer is valid and pointing at the frame |
10314 | pointer store address, then we only need a pop. */ |
10315 | if (sp_valid_at (cfa_offset: frame.hfp_save_offset) |
10316 | && m->fs.sp_offset == frame.hfp_save_offset) |
10317 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
10318 | /* Leave results in shorter dependency chains on CPUs that are |
10319 | able to grok it fast. */ |
10320 | else if (TARGET_USE_LEAVE |
10321 | || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) |
10322 | || !cfun->machine->use_fast_prologue_epilogue) |
10323 | ix86_emit_leave (NULL); |
10324 | else |
10325 | { |
10326 | pro_epilogue_adjust_stack (stack_pointer_rtx, |
10327 | hard_frame_pointer_rtx, |
10328 | const0_rtx, style, set_cfa: !using_drap); |
10329 | ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
10330 | } |
10331 | } |
10332 | |
10333 | if (using_drap) |
10334 | { |
10335 | int param_ptr_offset = UNITS_PER_WORD; |
10336 | rtx_insn *insn; |
10337 | |
10338 | gcc_assert (stack_realign_drap); |
10339 | |
10340 | if (ix86_static_chain_on_stack) |
10341 | param_ptr_offset += UNITS_PER_WORD; |
10342 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
10343 | param_ptr_offset += UNITS_PER_WORD; |
10344 | |
10345 | insn = emit_insn (gen_rtx_SET |
10346 | (stack_pointer_rtx, |
10347 | plus_constant (Pmode, crtl->drap_reg, |
10348 | -param_ptr_offset))); |
10349 | m->fs.cfa_reg = stack_pointer_rtx; |
10350 | m->fs.cfa_offset = param_ptr_offset; |
10351 | m->fs.sp_offset = param_ptr_offset; |
10352 | m->fs.realigned = false; |
10353 | |
10354 | add_reg_note (insn, REG_CFA_DEF_CFA, |
10355 | plus_constant (Pmode, stack_pointer_rtx, |
10356 | param_ptr_offset)); |
10357 | RTX_FRAME_RELATED_P (insn) = 1; |
10358 | |
10359 | if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) |
10360 | ix86_emit_restore_reg_using_pop (crtl->drap_reg); |
10361 | } |
10362 | |
10363 | /* At this point the stack pointer must be valid, and we must have |
10364 | restored all of the registers. We may not have deallocated the |
10365 | entire stack frame. We've delayed this until now because it may |
10366 | be possible to merge the local stack deallocation with the |
10367 | deallocation forced by ix86_static_chain_on_stack. */ |
10368 | gcc_assert (m->fs.sp_valid); |
10369 | gcc_assert (!m->fs.sp_realigned); |
10370 | gcc_assert (!m->fs.fp_valid); |
10371 | gcc_assert (!m->fs.realigned); |
10372 | if (m->fs.sp_offset != UNITS_PER_WORD) |
10373 | { |
10374 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10375 | GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), |
10376 | style, set_cfa: true); |
10377 | } |
10378 | else |
10379 | ix86_add_queued_cfa_restore_notes (insn: get_last_insn ()); |
10380 | |
10381 | /* Sibcall epilogues don't want a return instruction. */ |
10382 | if (style == 0) |
10383 | { |
10384 | m->fs = frame_state_save; |
10385 | return; |
10386 | } |
10387 | |
10388 | if (cfun->machine->func_type != TYPE_NORMAL) |
10389 | emit_jump_insn (gen_interrupt_return ()); |
10390 | else if (crtl->args.pops_args && crtl->args.size) |
10391 | { |
10392 | rtx popc = GEN_INT (crtl->args.pops_args); |
10393 | |
10394 | /* i386 can only pop 64K bytes. If asked to pop more, pop return |
10395 | address, do explicit add, and jump indirectly to the caller. */ |
10396 | |
10397 | if (crtl->args.pops_args >= 65536) |
10398 | { |
10399 | rtx ecx = gen_rtx_REG (SImode, CX_REG); |
10400 | rtx_insn *insn; |
10401 | |
10402 | /* There is no "pascal" calling convention in any 64bit ABI. */ |
10403 | gcc_assert (!TARGET_64BIT); |
10404 | |
10405 | insn = emit_insn (gen_pop (arg: ecx)); |
10406 | m->fs.cfa_offset -= UNITS_PER_WORD; |
10407 | m->fs.sp_offset -= UNITS_PER_WORD; |
10408 | |
10409 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
10410 | x = gen_rtx_SET (stack_pointer_rtx, x); |
10411 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
10412 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
10413 | RTX_FRAME_RELATED_P (insn) = 1; |
10414 | |
10415 | pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
10416 | offset: popc, style: -1, set_cfa: true); |
10417 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
10418 | } |
10419 | else |
10420 | emit_jump_insn (gen_simple_return_pop_internal (popc)); |
10421 | } |
10422 | else if (!m->call_ms2sysv || !restore_stub_is_tail) |
10423 | { |
10424 | /* In case of return from EH a simple return cannot be used |
10425 | as a return address will be compared with a shadow stack |
10426 | return address. Use indirect jump instead. */ |
10427 | if (style == 2 && flag_cf_protection) |
10428 | { |
10429 | /* Register used in indirect jump must be in word_mode. But |
10430 | Pmode may not be the same as word_mode for x32. */ |
10431 | rtx ecx = gen_rtx_REG (word_mode, CX_REG); |
10432 | rtx_insn *insn; |
10433 | |
10434 | insn = emit_insn (gen_pop (arg: ecx)); |
10435 | m->fs.cfa_offset -= UNITS_PER_WORD; |
10436 | m->fs.sp_offset -= UNITS_PER_WORD; |
10437 | |
10438 | rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
10439 | x = gen_rtx_SET (stack_pointer_rtx, x); |
10440 | add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
10441 | add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); |
10442 | RTX_FRAME_RELATED_P (insn) = 1; |
10443 | |
10444 | emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
10445 | } |
10446 | else |
10447 | emit_jump_insn (gen_simple_return_internal ()); |
10448 | } |
10449 | |
10450 | /* Restore the state back to the state from the prologue, |
10451 | so that it's correct for the next epilogue. */ |
10452 | m->fs = frame_state_save; |
10453 | } |
10454 | |
10455 | /* Reset from the function's potential modifications. */ |
10456 | |
10457 | static void |
10458 | ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) |
10459 | { |
10460 | if (pic_offset_table_rtx |
10461 | && !ix86_use_pseudo_pic_reg ()) |
10462 | SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); |
10463 | |
10464 | if (TARGET_MACHO) |
10465 | { |
10466 | rtx_insn *insn = get_last_insn (); |
10467 | rtx_insn *deleted_debug_label = NULL; |
10468 | |
10469 | /* Mach-O doesn't support labels at the end of objects, so if |
10470 | it looks like we might want one, take special action. |
10471 | First, collect any sequence of deleted debug labels. */ |
10472 | while (insn |
10473 | && NOTE_P (insn) |
10474 | && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) |
10475 | { |
10476 | /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL |
10477 | notes only, instead set their CODE_LABEL_NUMBER to -1, |
10478 | otherwise there would be code generation differences |
10479 | in between -g and -g0. */ |
10480 | if (NOTE_P (insn) && NOTE_KIND (insn) |
10481 | == NOTE_INSN_DELETED_DEBUG_LABEL) |
10482 | deleted_debug_label = insn; |
10483 | insn = PREV_INSN (insn); |
10484 | } |
10485 | |
10486 | /* If we have: |
10487 | label: |
10488 | barrier |
10489 | then this needs to be detected, so skip past the barrier. */ |
10490 | |
10491 | if (insn && BARRIER_P (insn)) |
10492 | insn = PREV_INSN (insn); |
10493 | |
10494 | /* Up to now we've only seen notes or barriers. */ |
10495 | if (insn) |
10496 | { |
10497 | if (LABEL_P (insn) |
10498 | || (NOTE_P (insn) |
10499 | && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) |
10500 | /* Trailing label. */ |
10501 | fputs (s: "\tnop\n", stream: file); |
10502 | else if (cfun && ! cfun->is_thunk) |
10503 | { |
10504 | /* See if we have a completely empty function body, skipping |
10505 | the special case of the picbase thunk emitted as asm. */ |
10506 | while (insn && ! INSN_P (insn)) |
10507 | insn = PREV_INSN (insn); |
10508 | /* If we don't find any insns, we've got an empty function body; |
10509 | I.e. completely empty - without a return or branch. This is |
10510 | taken as the case where a function body has been removed |
10511 | because it contains an inline __builtin_unreachable(). GCC |
10512 | declares that reaching __builtin_unreachable() means UB so |
10513 | we're not obliged to do anything special; however, we want |
10514 | non-zero-sized function bodies. To meet this, and help the |
10515 | user out, let's trap the case. */ |
10516 | if (insn == NULL) |
10517 | fputs (s: "\tud2\n", stream: file); |
10518 | } |
10519 | } |
10520 | else if (deleted_debug_label) |
10521 | for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) |
10522 | if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) |
10523 | CODE_LABEL_NUMBER (insn) = -1; |
10524 | } |
10525 | } |
10526 | |
10527 | /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */ |
10528 | |
10529 | void |
10530 | ix86_print_patchable_function_entry (FILE *file, |
10531 | unsigned HOST_WIDE_INT patch_area_size, |
10532 | bool record_p) |
10533 | { |
10534 | if (cfun->machine->function_label_emitted) |
10535 | { |
10536 | /* NB: When ix86_print_patchable_function_entry is called after |
10537 | function table has been emitted, we have inserted or queued |
10538 | a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper |
10539 | place. There is nothing to do here. */ |
10540 | return; |
10541 | } |
10542 | |
10543 | default_print_patchable_function_entry (file, patch_area_size, |
10544 | record_p); |
10545 | } |
10546 | |
10547 | /* Output patchable area. NB: default_print_patchable_function_entry |
10548 | isn't available in i386.md. */ |
10549 | |
10550 | void |
10551 | ix86_output_patchable_area (unsigned int patch_area_size, |
10552 | bool record_p) |
10553 | { |
10554 | default_print_patchable_function_entry (asm_out_file, |
10555 | patch_area_size, |
10556 | record_p); |
10557 | } |
10558 | |
10559 | /* Return a scratch register to use in the split stack prologue. The |
10560 | split stack prologue is used for -fsplit-stack. It is the first |
10561 | instructions in the function, even before the regular prologue. |
10562 | The scratch register can be any caller-saved register which is not |
10563 | used for parameters or for the static chain. */ |
10564 | |
10565 | static unsigned int |
10566 | split_stack_prologue_scratch_regno (void) |
10567 | { |
10568 | if (TARGET_64BIT) |
10569 | return R11_REG; |
10570 | else |
10571 | { |
10572 | bool is_fastcall, is_thiscall; |
10573 | int regparm; |
10574 | |
10575 | is_fastcall = (lookup_attribute (attr_name: "fastcall", |
10576 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
10577 | != NULL); |
10578 | is_thiscall = (lookup_attribute (attr_name: "thiscall", |
10579 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
10580 | != NULL); |
10581 | regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); |
10582 | |
10583 | if (is_fastcall) |
10584 | { |
10585 | if (DECL_STATIC_CHAIN (cfun->decl)) |
10586 | { |
10587 | sorry ("%<-fsplit-stack%> does not support fastcall with " |
10588 | "nested function"); |
10589 | return INVALID_REGNUM; |
10590 | } |
10591 | return AX_REG; |
10592 | } |
10593 | else if (is_thiscall) |
10594 | { |
10595 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
10596 | return DX_REG; |
10597 | return AX_REG; |
10598 | } |
10599 | else if (regparm < 3) |
10600 | { |
10601 | if (!DECL_STATIC_CHAIN (cfun->decl)) |
10602 | return CX_REG; |
10603 | else |
10604 | { |
10605 | if (regparm >= 2) |
10606 | { |
10607 | sorry ("%<-fsplit-stack%> does not support 2 register " |
10608 | "parameters for a nested function"); |
10609 | return INVALID_REGNUM; |
10610 | } |
10611 | return DX_REG; |
10612 | } |
10613 | } |
10614 | else |
10615 | { |
10616 | /* FIXME: We could make this work by pushing a register |
10617 | around the addition and comparison. */ |
10618 | sorry ("%<-fsplit-stack%> does not support 3 register parameters"); |
10619 | return INVALID_REGNUM; |
10620 | } |
10621 | } |
10622 | } |
10623 | |
10624 | /* A SYMBOL_REF for the function which allocates new stackspace for |
10625 | -fsplit-stack. */ |
10626 | |
10627 | static GTY(()) rtx split_stack_fn; |
10628 | |
10629 | /* A SYMBOL_REF for the more stack function when using the large |
10630 | model. */ |
10631 | |
10632 | static GTY(()) rtx split_stack_fn_large; |
10633 | |
10634 | /* Return location of the stack guard value in the TLS block. */ |
10635 | |
10636 | rtx |
10637 | ix86_split_stack_guard (void) |
10638 | { |
10639 | int offset; |
10640 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
10641 | rtx r; |
10642 | |
10643 | gcc_assert (flag_split_stack); |
10644 | |
10645 | #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET |
10646 | offset = TARGET_THREAD_SPLIT_STACK_OFFSET; |
10647 | #else |
10648 | gcc_unreachable (); |
10649 | #endif |
10650 | |
10651 | r = GEN_INT (offset); |
10652 | r = gen_const_mem (Pmode, r); |
10653 | set_mem_addr_space (r, as); |
10654 | |
10655 | return r; |
10656 | } |
10657 | |
10658 | /* Handle -fsplit-stack. These are the first instructions in the |
10659 | function, even before the regular prologue. */ |
10660 | |
10661 | void |
10662 | ix86_expand_split_stack_prologue (void) |
10663 | { |
10664 | HOST_WIDE_INT allocate; |
10665 | unsigned HOST_WIDE_INT args_size; |
10666 | rtx_code_label *label; |
10667 | rtx limit, current, allocate_rtx, call_fusage; |
10668 | rtx_insn *call_insn; |
10669 | unsigned int scratch_regno = INVALID_REGNUM; |
10670 | rtx scratch_reg = NULL_RTX; |
10671 | rtx_code_label *varargs_label = NULL; |
10672 | rtx fn; |
10673 | |
10674 | gcc_assert (flag_split_stack && reload_completed); |
10675 | |
10676 | ix86_finalize_stack_frame_flags (); |
10677 | struct ix86_frame &frame = cfun->machine->frame; |
10678 | allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; |
10679 | |
10680 | /* This is the label we will branch to if we have enough stack |
10681 | space. We expect the basic block reordering pass to reverse this |
10682 | branch if optimizing, so that we branch in the unlikely case. */ |
10683 | label = gen_label_rtx (); |
10684 | |
10685 | /* We need to compare the stack pointer minus the frame size with |
10686 | the stack boundary in the TCB. The stack boundary always gives |
10687 | us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we |
10688 | can compare directly. Otherwise we need to do an addition. */ |
10689 | |
10690 | limit = ix86_split_stack_guard (); |
10691 | |
10692 | if (allocate >= SPLIT_STACK_AVAILABLE |
10693 | || flag_force_indirect_call) |
10694 | { |
10695 | scratch_regno = split_stack_prologue_scratch_regno (); |
10696 | if (scratch_regno == INVALID_REGNUM) |
10697 | return; |
10698 | } |
10699 | |
10700 | if (allocate >= SPLIT_STACK_AVAILABLE) |
10701 | { |
10702 | rtx offset; |
10703 | |
10704 | /* We need a scratch register to hold the stack pointer minus |
10705 | the required frame size. Since this is the very start of the |
10706 | function, the scratch register can be any caller-saved |
10707 | register which is not used for parameters. */ |
10708 | offset = GEN_INT (- allocate); |
10709 | |
10710 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10711 | if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) |
10712 | { |
10713 | /* We don't use gen_add in this case because it will |
10714 | want to split to lea, but when not optimizing the insn |
10715 | will not be split after this point. */ |
10716 | emit_insn (gen_rtx_SET (scratch_reg, |
10717 | gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
10718 | offset))); |
10719 | } |
10720 | else |
10721 | { |
10722 | emit_move_insn (scratch_reg, offset); |
10723 | emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx)); |
10724 | } |
10725 | current = scratch_reg; |
10726 | } |
10727 | else |
10728 | current = stack_pointer_rtx; |
10729 | |
10730 | ix86_expand_branch (GEU, current, limit, label); |
10731 | rtx_insn *jump_insn = get_last_insn (); |
10732 | JUMP_LABEL (jump_insn) = label; |
10733 | |
10734 | /* Mark the jump as very likely to be taken. */ |
10735 | add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); |
10736 | |
10737 | if (split_stack_fn == NULL_RTX) |
10738 | { |
10739 | split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); |
10740 | SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; |
10741 | } |
10742 | fn = split_stack_fn; |
10743 | |
10744 | /* Get more stack space. We pass in the desired stack space and the |
10745 | size of the arguments to copy to the new stack. In 32-bit mode |
10746 | we push the parameters; __morestack will return on a new stack |
10747 | anyhow. In 64-bit mode we pass the parameters in r10 and |
10748 | r11. */ |
10749 | allocate_rtx = GEN_INT (allocate); |
10750 | args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; |
10751 | call_fusage = NULL_RTX; |
10752 | rtx pop = NULL_RTX; |
10753 | if (TARGET_64BIT) |
10754 | { |
10755 | rtx reg10, reg11; |
10756 | |
10757 | reg10 = gen_rtx_REG (DImode, R10_REG); |
10758 | reg11 = gen_rtx_REG (DImode, R11_REG); |
10759 | |
10760 | /* If this function uses a static chain, it will be in %r10. |
10761 | Preserve it across the call to __morestack. */ |
10762 | if (DECL_STATIC_CHAIN (cfun->decl)) |
10763 | { |
10764 | rtx rax; |
10765 | |
10766 | rax = gen_rtx_REG (word_mode, AX_REG); |
10767 | emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); |
10768 | use_reg (fusage: &call_fusage, reg: rax); |
10769 | } |
10770 | |
10771 | if (flag_force_indirect_call |
10772 | || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
10773 | { |
10774 | HOST_WIDE_INT argval; |
10775 | |
10776 | if (split_stack_fn_large == NULL_RTX) |
10777 | { |
10778 | split_stack_fn_large |
10779 | = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); |
10780 | SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; |
10781 | } |
10782 | |
10783 | fn = split_stack_fn_large; |
10784 | |
10785 | if (ix86_cmodel == CM_LARGE_PIC) |
10786 | { |
10787 | rtx_code_label *label; |
10788 | rtx x; |
10789 | |
10790 | gcc_assert (Pmode == DImode); |
10791 | |
10792 | label = gen_label_rtx (); |
10793 | emit_label (label); |
10794 | LABEL_PRESERVE_P (label) = 1; |
10795 | emit_insn (gen_set_rip_rex64 (reg10, label)); |
10796 | emit_insn (gen_set_got_offset_rex64 (reg11, label)); |
10797 | emit_insn (gen_add2_insn (reg10, reg11)); |
10798 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT); |
10799 | x = gen_rtx_CONST (Pmode, x); |
10800 | emit_move_insn (reg11, x); |
10801 | x = gen_rtx_PLUS (Pmode, reg10, reg11); |
10802 | x = gen_const_mem (Pmode, x); |
10803 | fn = copy_to_suggested_reg (x, reg11, Pmode); |
10804 | } |
10805 | else if (ix86_cmodel == CM_LARGE) |
10806 | fn = copy_to_suggested_reg (fn, reg11, Pmode); |
10807 | |
10808 | /* When using the large model we need to load the address |
10809 | into a register, and we've run out of registers. So we |
10810 | switch to a different calling convention, and we call a |
10811 | different function: __morestack_large. We pass the |
10812 | argument size in the upper 32 bits of r10 and pass the |
10813 | frame size in the lower 32 bits. */ |
10814 | gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); |
10815 | gcc_assert ((args_size & 0xffffffff) == args_size); |
10816 | |
10817 | argval = ((args_size << 16) << 16) + allocate; |
10818 | emit_move_insn (reg10, GEN_INT (argval)); |
10819 | } |
10820 | else |
10821 | { |
10822 | emit_move_insn (reg10, allocate_rtx); |
10823 | emit_move_insn (reg11, GEN_INT (args_size)); |
10824 | use_reg (fusage: &call_fusage, reg: reg11); |
10825 | } |
10826 | |
10827 | use_reg (fusage: &call_fusage, reg: reg10); |
10828 | } |
10829 | else |
10830 | { |
10831 | if (flag_force_indirect_call && flag_pic) |
10832 | { |
10833 | rtx x; |
10834 | |
10835 | gcc_assert (Pmode == SImode); |
10836 | |
10837 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10838 | |
10839 | emit_insn (gen_set_got (scratch_reg)); |
10840 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn), |
10841 | UNSPEC_GOT); |
10842 | x = gen_rtx_CONST (Pmode, x); |
10843 | x = gen_rtx_PLUS (Pmode, scratch_reg, x); |
10844 | x = gen_const_mem (Pmode, x); |
10845 | fn = copy_to_suggested_reg (x, scratch_reg, Pmode); |
10846 | } |
10847 | |
10848 | rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); |
10849 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); |
10850 | insn = emit_insn (gen_push (arg: allocate_rtx)); |
10851 | add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); |
10852 | pop = GEN_INT (2 * UNITS_PER_WORD); |
10853 | } |
10854 | |
10855 | if (flag_force_indirect_call && !register_operand (fn, VOIDmode)) |
10856 | { |
10857 | scratch_reg = gen_rtx_REG (word_mode, scratch_regno); |
10858 | |
10859 | if (GET_MODE (fn) != word_mode) |
10860 | fn = gen_rtx_ZERO_EXTEND (word_mode, fn); |
10861 | |
10862 | fn = copy_to_suggested_reg (fn, scratch_reg, word_mode); |
10863 | } |
10864 | |
10865 | call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), |
10866 | GEN_INT (UNITS_PER_WORD), constm1_rtx, |
10867 | pop, false); |
10868 | add_function_usage_to (call_insn, call_fusage); |
10869 | if (!TARGET_64BIT) |
10870 | add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); |
10871 | /* Indicate that this function can't jump to non-local gotos. */ |
10872 | make_reg_eh_region_note_nothrow_nononlocal (call_insn); |
10873 | |
10874 | /* In order to make call/return prediction work right, we now need |
10875 | to execute a return instruction. See |
10876 | libgcc/config/i386/morestack.S for the details on how this works. |
10877 | |
10878 | For flow purposes gcc must not see this as a return |
10879 | instruction--we need control flow to continue at the subsequent |
10880 | label. Therefore, we use an unspec. */ |
10881 | gcc_assert (crtl->args.pops_args < 65536); |
10882 | rtx_insn *ret_insn |
10883 | = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); |
10884 | |
10885 | if ((flag_cf_protection & CF_BRANCH)) |
10886 | { |
10887 | /* Insert ENDBR since __morestack will jump back here via indirect |
10888 | call. */ |
10889 | rtx cet_eb = gen_nop_endbr (); |
10890 | emit_insn_after (cet_eb, ret_insn); |
10891 | } |
10892 | |
10893 | /* If we are in 64-bit mode and this function uses a static chain, |
10894 | we saved %r10 in %rax before calling _morestack. */ |
10895 | if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) |
10896 | emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
10897 | gen_rtx_REG (word_mode, AX_REG)); |
10898 | |
10899 | /* If this function calls va_start, we need to store a pointer to |
10900 | the arguments on the old stack, because they may not have been |
10901 | all copied to the new stack. At this point the old stack can be |
10902 | found at the frame pointer value used by __morestack, because |
10903 | __morestack has set that up before calling back to us. Here we |
10904 | store that pointer in a scratch register, and in |
10905 | ix86_expand_prologue we store the scratch register in a stack |
10906 | slot. */ |
10907 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10908 | { |
10909 | rtx frame_reg; |
10910 | int words; |
10911 | |
10912 | scratch_regno = split_stack_prologue_scratch_regno (); |
10913 | scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
10914 | frame_reg = gen_rtx_REG (Pmode, BP_REG); |
10915 | |
10916 | /* 64-bit: |
10917 | fp -> old fp value |
10918 | return address within this function |
10919 | return address of caller of this function |
10920 | stack arguments |
10921 | So we add three words to get to the stack arguments. |
10922 | |
10923 | 32-bit: |
10924 | fp -> old fp value |
10925 | return address within this function |
10926 | first argument to __morestack |
10927 | second argument to __morestack |
10928 | return address of caller of this function |
10929 | stack arguments |
10930 | So we add five words to get to the stack arguments. |
10931 | */ |
10932 | words = TARGET_64BIT ? 3 : 5; |
10933 | emit_insn (gen_rtx_SET (scratch_reg, |
10934 | plus_constant (Pmode, frame_reg, |
10935 | words * UNITS_PER_WORD))); |
10936 | |
10937 | varargs_label = gen_label_rtx (); |
10938 | emit_jump_insn (gen_jump (varargs_label)); |
10939 | JUMP_LABEL (get_last_insn ()) = varargs_label; |
10940 | |
10941 | emit_barrier (); |
10942 | } |
10943 | |
10944 | emit_label (label); |
10945 | LABEL_NUSES (label) = 1; |
10946 | |
10947 | /* If this function calls va_start, we now have to set the scratch |
10948 | register for the case where we do not call __morestack. In this |
10949 | case we need to set it based on the stack pointer. */ |
10950 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10951 | { |
10952 | emit_insn (gen_rtx_SET (scratch_reg, |
10953 | plus_constant (Pmode, stack_pointer_rtx, |
10954 | UNITS_PER_WORD))); |
10955 | |
10956 | emit_label (varargs_label); |
10957 | LABEL_NUSES (varargs_label) = 1; |
10958 | } |
10959 | } |
10960 | |
10961 | /* We may have to tell the dataflow pass that the split stack prologue |
10962 | is initializing a scratch register. */ |
10963 | |
10964 | static void |
10965 | ix86_live_on_entry (bitmap regs) |
10966 | { |
10967 | if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
10968 | { |
10969 | gcc_assert (flag_split_stack); |
10970 | bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); |
10971 | } |
10972 | } |
10973 | |
10974 | /* Extract the parts of an RTL expression that is a valid memory address |
10975 | for an instruction. Return false if the structure of the address is |
10976 | grossly off. */ |
10977 | |
10978 | bool |
10979 | ix86_decompose_address (rtx addr, struct ix86_address *out) |
10980 | { |
10981 | rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; |
10982 | rtx base_reg, index_reg; |
10983 | HOST_WIDE_INT scale = 1; |
10984 | rtx scale_rtx = NULL_RTX; |
10985 | rtx tmp; |
10986 | addr_space_t seg = ADDR_SPACE_GENERIC; |
10987 | |
10988 | /* Allow zero-extended SImode addresses, |
10989 | they will be emitted with addr32 prefix. */ |
10990 | if (TARGET_64BIT && GET_MODE (addr) == DImode) |
10991 | { |
10992 | if (GET_CODE (addr) == ZERO_EXTEND |
10993 | && GET_MODE (XEXP (addr, 0)) == SImode) |
10994 | { |
10995 | addr = XEXP (addr, 0); |
10996 | if (CONST_INT_P (addr)) |
10997 | return false; |
10998 | } |
10999 | else if (GET_CODE (addr) == AND) |
11000 | { |
11001 | rtx mask = XEXP (addr, 1); |
11002 | rtx shift_val; |
11003 | |
11004 | if (const_32bit_mask (mask, DImode) |
11005 | /* For ASHIFT inside AND, combine will not generate |
11006 | canonical zero-extend. Merge mask for AND and shift_count |
11007 | to check if it is canonical zero-extend. */ |
11008 | || (CONST_INT_P (mask) |
11009 | && GET_CODE (XEXP (addr, 0)) == ASHIFT |
11010 | && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1)) |
11011 | && ((UINTVAL (mask) |
11012 | | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1)) |
11013 | == HOST_WIDE_INT_UC (0xffffffff)))) |
11014 | { |
11015 | addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); |
11016 | if (addr == NULL_RTX) |
11017 | return false; |
11018 | |
11019 | if (CONST_INT_P (addr)) |
11020 | return false; |
11021 | } |
11022 | } |
11023 | } |
11024 | |
11025 | /* Allow SImode subregs of DImode addresses, |
11026 | they will be emitted with addr32 prefix. */ |
11027 | if (TARGET_64BIT && GET_MODE (addr) == SImode) |
11028 | { |
11029 | if (SUBREG_P (addr) |
11030 | && GET_MODE (SUBREG_REG (addr)) == DImode) |
11031 | { |
11032 | addr = SUBREG_REG (addr); |
11033 | if (CONST_INT_P (addr)) |
11034 | return false; |
11035 | } |
11036 | } |
11037 | |
11038 | if (REG_P (addr)) |
11039 | base = addr; |
11040 | else if (SUBREG_P (addr)) |
11041 | { |
11042 | if (REG_P (SUBREG_REG (addr))) |
11043 | base = addr; |
11044 | else |
11045 | return false; |
11046 | } |
11047 | else if (GET_CODE (addr) == PLUS) |
11048 | { |
11049 | rtx addends[4], op; |
11050 | int n = 0, i; |
11051 | |
11052 | op = addr; |
11053 | do |
11054 | { |
11055 | if (n >= 4) |
11056 | return false; |
11057 | addends[n++] = XEXP (op, 1); |
11058 | op = XEXP (op, 0); |
11059 | } |
11060 | while (GET_CODE (op) == PLUS); |
11061 | if (n >= 4) |
11062 | return false; |
11063 | addends[n] = op; |
11064 | |
11065 | for (i = n; i >= 0; --i) |
11066 | { |
11067 | op = addends[i]; |
11068 | switch (GET_CODE (op)) |
11069 | { |
11070 | case MULT: |
11071 | if (index) |
11072 | return false; |
11073 | index = XEXP (op, 0); |
11074 | scale_rtx = XEXP (op, 1); |
11075 | break; |
11076 | |
11077 | case ASHIFT: |
11078 | if (index) |
11079 | return false; |
11080 | index = XEXP (op, 0); |
11081 | tmp = XEXP (op, 1); |
11082 | if (!CONST_INT_P (tmp)) |
11083 | return false; |
11084 | scale = INTVAL (tmp); |
11085 | if ((unsigned HOST_WIDE_INT) scale > 3) |
11086 | return false; |
11087 | scale = 1 << scale; |
11088 | break; |
11089 | |
11090 | case ZERO_EXTEND: |
11091 | op = XEXP (op, 0); |
11092 | if (GET_CODE (op) != UNSPEC) |
11093 | return false; |
11094 | /* FALLTHRU */ |
11095 | |
11096 | case UNSPEC: |
11097 | if (XINT (op, 1) == UNSPEC_TP |
11098 | && TARGET_TLS_DIRECT_SEG_REFS |
11099 | && seg == ADDR_SPACE_GENERIC) |
11100 | seg = DEFAULT_TLS_SEG_REG; |
11101 | else |
11102 | return false; |
11103 | break; |
11104 | |
11105 | case SUBREG: |
11106 | if (!REG_P (SUBREG_REG (op))) |
11107 | return false; |
11108 | /* FALLTHRU */ |
11109 | |
11110 | case REG: |
11111 | if (!base) |
11112 | base = op; |
11113 | else if (!index) |
11114 | index = op; |
11115 | else |
11116 | return false; |
11117 | break; |
11118 | |
11119 | case CONST: |
11120 | case CONST_INT: |
11121 | case SYMBOL_REF: |
11122 | case LABEL_REF: |
11123 | if (disp) |
11124 | return false; |
11125 | disp = op; |
11126 | break; |
11127 | |
11128 | default: |
11129 | return false; |
11130 | } |
11131 | } |
11132 | } |
11133 | else if (GET_CODE (addr) == MULT) |
11134 | { |
11135 | index = XEXP (addr, 0); /* index*scale */ |
11136 | scale_rtx = XEXP (addr, 1); |
11137 | } |
11138 | else if (GET_CODE (addr) == ASHIFT) |
11139 | { |
11140 | /* We're called for lea too, which implements ashift on occasion. */ |
11141 | index = XEXP (addr, 0); |
11142 | tmp = XEXP (addr, 1); |
11143 | if (!CONST_INT_P (tmp)) |
11144 | return false; |
11145 | scale = INTVAL (tmp); |
11146 | if ((unsigned HOST_WIDE_INT) scale > 3) |
11147 | return false; |
11148 | scale = 1 << scale; |
11149 | } |
11150 | else |
11151 | disp = addr; /* displacement */ |
11152 | |
11153 | if (index) |
11154 | { |
11155 | if (REG_P (index)) |
11156 | ; |
11157 | else if (SUBREG_P (index) |
11158 | && REG_P (SUBREG_REG (index))) |
11159 | ; |
11160 | else |
11161 | return false; |
11162 | } |
11163 | |
11164 | /* Extract the integral value of scale. */ |
11165 | if (scale_rtx) |
11166 | { |
11167 | if (!CONST_INT_P (scale_rtx)) |
11168 | return false; |
11169 | scale = INTVAL (scale_rtx); |
11170 | } |
11171 | |
11172 | base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; |
11173 | index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; |
11174 | |
11175 | /* Avoid useless 0 displacement. */ |
11176 | if (disp == const0_rtx && (base || index)) |
11177 | disp = NULL_RTX; |
11178 | |
11179 | /* Allow arg pointer and stack pointer as index if there is not scaling. */ |
11180 | if (base_reg && index_reg && scale == 1 |
11181 | && (REGNO (index_reg) == ARG_POINTER_REGNUM |
11182 | || REGNO (index_reg) == FRAME_POINTER_REGNUM |
11183 | || REGNO (index_reg) == SP_REG)) |
11184 | { |
11185 | std::swap (a&: base, b&: index); |
11186 | std::swap (a&: base_reg, b&: index_reg); |
11187 | } |
11188 | |
11189 | /* Special case: %ebp cannot be encoded as a base without a displacement. |
11190 | Similarly %r13. */ |
11191 | if (!disp && base_reg |
11192 | && (REGNO (base_reg) == ARG_POINTER_REGNUM |
11193 | || REGNO (base_reg) == FRAME_POINTER_REGNUM |
11194 | || REGNO (base_reg) == BP_REG |
11195 | || REGNO (base_reg) == R13_REG)) |
11196 | disp = const0_rtx; |
11197 | |
11198 | /* Special case: on K6, [%esi] makes the instruction vector decoded. |
11199 | Avoid this by transforming to [%esi+0]. |
11200 | Reload calls address legitimization without cfun defined, so we need |
11201 | to test cfun for being non-NULL. */ |
11202 | if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun) |
11203 | && base_reg && !index_reg && !disp |
11204 | && REGNO (base_reg) == SI_REG) |
11205 | disp = const0_rtx; |
11206 | |
11207 | /* Special case: encode reg+reg instead of reg*2. */ |
11208 | if (!base && index && scale == 2) |
11209 | base = index, base_reg = index_reg, scale = 1; |
11210 | |
11211 | /* Special case: scaling cannot be encoded without base or displacement. */ |
11212 | if (!base && !disp && index && scale != 1) |
11213 | disp = const0_rtx; |
11214 | |
11215 | out->base = base; |
11216 | out->index = index; |
11217 | out->disp = disp; |
11218 | out->scale = scale; |
11219 | out->seg = seg; |
11220 | |
11221 | return true; |
11222 | } |
11223 | |
11224 | /* Return cost of the memory address x. |
11225 | For i386, it is better to use a complex address than let gcc copy |
11226 | the address into a reg and make a new pseudo. But not if the address |
11227 | requires to two regs - that would mean more pseudos with longer |
11228 | lifetimes. */ |
11229 | static int |
11230 | ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) |
11231 | { |
11232 | struct ix86_address parts; |
11233 | int cost = 1; |
11234 | int ok = ix86_decompose_address (addr: x, out: &parts); |
11235 | |
11236 | gcc_assert (ok); |
11237 | |
11238 | if (parts.base && SUBREG_P (parts.base)) |
11239 | parts.base = SUBREG_REG (parts.base); |
11240 | if (parts.index && SUBREG_P (parts.index)) |
11241 | parts.index = SUBREG_REG (parts.index); |
11242 | |
11243 | /* Attempt to minimize number of registers in the address by increasing |
11244 | address cost for each used register. We don't increase address cost |
11245 | for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" |
11246 | is not invariant itself it most likely means that base or index is not |
11247 | invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, |
11248 | which is not profitable for x86. */ |
11249 | if (parts.base |
11250 | && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) |
11251 | && (current_pass->type == GIMPLE_PASS |
11252 | || !pic_offset_table_rtx |
11253 | || !REG_P (parts.base) |
11254 | || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) |
11255 | cost++; |
11256 | |
11257 | if (parts.index |
11258 | && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) |
11259 | && (current_pass->type == GIMPLE_PASS |
11260 | || !pic_offset_table_rtx |
11261 | || !REG_P (parts.index) |
11262 | || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) |
11263 | cost++; |
11264 | |
11265 | /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, |
11266 | since it's predecode logic can't detect the length of instructions |
11267 | and it degenerates to vector decoded. Increase cost of such |
11268 | addresses here. The penalty is minimally 2 cycles. It may be worthwhile |
11269 | to split such addresses or even refuse such addresses at all. |
11270 | |
11271 | Following addressing modes are affected: |
11272 | [base+scale*index] |
11273 | [scale*index+disp] |
11274 | [base+index] |
11275 | |
11276 | The first and last case may be avoidable by explicitly coding the zero in |
11277 | memory address, but I don't have AMD-K6 machine handy to check this |
11278 | theory. */ |
11279 | |
11280 | if (TARGET_CPU_P (K6) |
11281 | && ((!parts.disp && parts.base && parts.index && parts.scale != 1) |
11282 | || (parts.disp && !parts.base && parts.index && parts.scale != 1) |
11283 | || (!parts.disp && parts.base && parts.index && parts.scale == 1))) |
11284 | cost += 10; |
11285 | |
11286 | return cost; |
11287 | } |
11288 | |
11289 | /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as |
11290 | this is used for to form addresses to local data when -fPIC is in |
11291 | use. */ |
11292 | |
11293 | static bool |
11294 | darwin_local_data_pic (rtx disp) |
11295 | { |
11296 | return (GET_CODE (disp) == UNSPEC |
11297 | && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); |
11298 | } |
11299 | |
11300 | /* True if the function symbol operand X should be loaded from GOT. |
11301 | If CALL_P is true, X is a call operand. |
11302 | |
11303 | NB: -mno-direct-extern-access doesn't force load from GOT for |
11304 | call. |
11305 | |
11306 | NB: In 32-bit mode, only non-PIC is allowed in inline assembly |
11307 | statements, since a PIC register could not be available at the |
11308 | call site. */ |
11309 | |
11310 | bool |
11311 | ix86_force_load_from_GOT_p (rtx x, bool call_p) |
11312 | { |
11313 | return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X)) |
11314 | && !TARGET_PECOFF && !TARGET_MACHO |
11315 | && (!flag_pic || this_is_asm_operands) |
11316 | && ix86_cmodel != CM_LARGE |
11317 | && ix86_cmodel != CM_LARGE_PIC |
11318 | && GET_CODE (x) == SYMBOL_REF |
11319 | && ((!call_p |
11320 | && (!ix86_direct_extern_access |
11321 | || (SYMBOL_REF_DECL (x) |
11322 | && lookup_attribute (attr_name: "nodirect_extern_access", |
11323 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))) |
11324 | || (SYMBOL_REF_FUNCTION_P (x) |
11325 | && (!flag_plt |
11326 | || (SYMBOL_REF_DECL (x) |
11327 | && lookup_attribute (attr_name: "noplt", |
11328 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))) |
11329 | && !SYMBOL_REF_LOCAL_P (x)); |
11330 | } |
11331 | |
11332 | /* Determine if a given RTX is a valid constant. We already know this |
11333 | satisfies CONSTANT_P. */ |
11334 | |
11335 | static bool |
11336 | ix86_legitimate_constant_p (machine_mode mode, rtx x) |
11337 | { |
11338 | switch (GET_CODE (x)) |
11339 | { |
11340 | case CONST: |
11341 | x = XEXP (x, 0); |
11342 | |
11343 | if (GET_CODE (x) == PLUS) |
11344 | { |
11345 | if (!CONST_INT_P (XEXP (x, 1))) |
11346 | return false; |
11347 | x = XEXP (x, 0); |
11348 | } |
11349 | |
11350 | if (TARGET_MACHO && darwin_local_data_pic (disp: x)) |
11351 | return true; |
11352 | |
11353 | /* Only some unspecs are valid as "constants". */ |
11354 | if (GET_CODE (x) == UNSPEC) |
11355 | switch (XINT (x, 1)) |
11356 | { |
11357 | case UNSPEC_GOT: |
11358 | case UNSPEC_GOTOFF: |
11359 | case UNSPEC_PLTOFF: |
11360 | return TARGET_64BIT; |
11361 | case UNSPEC_TPOFF: |
11362 | case UNSPEC_NTPOFF: |
11363 | x = XVECEXP (x, 0, 0); |
11364 | return (GET_CODE (x) == SYMBOL_REF |
11365 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
11366 | case UNSPEC_DTPOFF: |
11367 | x = XVECEXP (x, 0, 0); |
11368 | return (GET_CODE (x) == SYMBOL_REF |
11369 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); |
11370 | case UNSPEC_SECREL32: |
11371 | x = XVECEXP (x, 0, 0); |
11372 | return GET_CODE (x) == SYMBOL_REF; |
11373 | default: |
11374 | return false; |
11375 | } |
11376 | |
11377 | /* We must have drilled down to a symbol. */ |
11378 | if (GET_CODE (x) == LABEL_REF) |
11379 | return true; |
11380 | if (GET_CODE (x) != SYMBOL_REF) |
11381 | return false; |
11382 | /* FALLTHRU */ |
11383 | |
11384 | case SYMBOL_REF: |
11385 | /* TLS symbols are never valid. */ |
11386 | if (SYMBOL_REF_TLS_MODEL (x)) |
11387 | return false; |
11388 | |
11389 | /* DLLIMPORT symbols are never valid. */ |
11390 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES |
11391 | && SYMBOL_REF_DLLIMPORT_P (x)) |
11392 | return false; |
11393 | |
11394 | #if TARGET_MACHO |
11395 | /* mdynamic-no-pic */ |
11396 | if (MACHO_DYNAMIC_NO_PIC_P) |
11397 | return machopic_symbol_defined_p (x); |
11398 | #endif |
11399 | |
11400 | /* External function address should be loaded |
11401 | via the GOT slot to avoid PLT. */ |
11402 | if (ix86_force_load_from_GOT_p (x)) |
11403 | return false; |
11404 | |
11405 | break; |
11406 | |
11407 | CASE_CONST_SCALAR_INT: |
11408 | if (ix86_endbr_immediate_operand (x, VOIDmode)) |
11409 | return false; |
11410 | |
11411 | switch (mode) |
11412 | { |
11413 | case E_TImode: |
11414 | if (TARGET_64BIT) |
11415 | return true; |
11416 | /* FALLTHRU */ |
11417 | case E_OImode: |
11418 | case E_XImode: |
11419 | if (!standard_sse_constant_p (x, pred_mode: mode) |
11420 | && GET_MODE_SIZE (TARGET_AVX512F |
11421 | ? XImode |
11422 | : (TARGET_AVX |
11423 | ? OImode |
11424 | : (TARGET_SSE2 |
11425 | ? TImode : DImode))) < GET_MODE_SIZE (mode)) |
11426 | return false; |
11427 | default: |
11428 | break; |
11429 | } |
11430 | break; |
11431 | |
11432 | case CONST_VECTOR: |
11433 | if (!standard_sse_constant_p (x, pred_mode: mode)) |
11434 | return false; |
11435 | break; |
11436 | |
11437 | case CONST_DOUBLE: |
11438 | if (mode == E_BFmode) |
11439 | return false; |
11440 | |
11441 | default: |
11442 | break; |
11443 | } |
11444 | |
11445 | /* Otherwise we handle everything else in the move patterns. */ |
11446 | return true; |
11447 | } |
11448 | |
11449 | /* Determine if it's legal to put X into the constant pool. This |
11450 | is not possible for the address of thread-local symbols, which |
11451 | is checked above. */ |
11452 | |
11453 | static bool |
11454 | ix86_cannot_force_const_mem (machine_mode mode, rtx x) |
11455 | { |
11456 | /* We can put any immediate constant in memory. */ |
11457 | switch (GET_CODE (x)) |
11458 | { |
11459 | CASE_CONST_ANY: |
11460 | return false; |
11461 | |
11462 | default: |
11463 | break; |
11464 | } |
11465 | |
11466 | return !ix86_legitimate_constant_p (mode, x); |
11467 | } |
11468 | |
11469 | /* Return a unique alias set for the GOT. */ |
11470 | |
11471 | alias_set_type |
11472 | ix86_GOT_alias_set (void) |
11473 | { |
11474 | static alias_set_type set = -1; |
11475 | if (set == -1) |
11476 | set = new_alias_set (); |
11477 | return set; |
11478 | } |
11479 | |
11480 | /* Nonzero if the constant value X is a legitimate general operand |
11481 | when generating PIC code. It is given that flag_pic is on and |
11482 | that X satisfies CONSTANT_P. */ |
11483 | |
11484 | bool |
11485 | legitimate_pic_operand_p (rtx x) |
11486 | { |
11487 | rtx inner; |
11488 | |
11489 | switch (GET_CODE (x)) |
11490 | { |
11491 | case CONST: |
11492 | inner = XEXP (x, 0); |
11493 | if (GET_CODE (inner) == PLUS |
11494 | && CONST_INT_P (XEXP (inner, 1))) |
11495 | inner = XEXP (inner, 0); |
11496 | |
11497 | /* Only some unspecs are valid as "constants". */ |
11498 | if (GET_CODE (inner) == UNSPEC) |
11499 | switch (XINT (inner, 1)) |
11500 | { |
11501 | case UNSPEC_GOT: |
11502 | case UNSPEC_GOTOFF: |
11503 | case UNSPEC_PLTOFF: |
11504 | return TARGET_64BIT; |
11505 | case UNSPEC_TPOFF: |
11506 | x = XVECEXP (inner, 0, 0); |
11507 | return (GET_CODE (x) == SYMBOL_REF |
11508 | && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
11509 | case UNSPEC_SECREL32: |
11510 | x = XVECEXP (inner, 0, 0); |
11511 | return GET_CODE (x) == SYMBOL_REF; |
11512 | case UNSPEC_MACHOPIC_OFFSET: |
11513 | return legitimate_pic_address_disp_p (x); |
11514 | default: |
11515 | return false; |
11516 | } |
11517 | /* FALLTHRU */ |
11518 | |
11519 | case SYMBOL_REF: |
11520 | case LABEL_REF: |
11521 | return legitimate_pic_address_disp_p (x); |
11522 | |
11523 | default: |
11524 | return true; |
11525 | } |
11526 | } |
11527 | |
11528 | /* Determine if a given CONST RTX is a valid memory displacement |
11529 | in PIC mode. */ |
11530 | |
11531 | bool |
11532 | legitimate_pic_address_disp_p (rtx disp) |
11533 | { |
11534 | bool saw_plus; |
11535 | |
11536 | /* In 64bit mode we can allow direct addresses of symbols and labels |
11537 | when they are not dynamic symbols. */ |
11538 | if (TARGET_64BIT) |
11539 | { |
11540 | rtx op0 = disp, op1; |
11541 | |
11542 | switch (GET_CODE (disp)) |
11543 | { |
11544 | case LABEL_REF: |
11545 | return true; |
11546 | |
11547 | case CONST: |
11548 | if (GET_CODE (XEXP (disp, 0)) != PLUS) |
11549 | break; |
11550 | op0 = XEXP (XEXP (disp, 0), 0); |
11551 | op1 = XEXP (XEXP (disp, 0), 1); |
11552 | if (!CONST_INT_P (op1)) |
11553 | break; |
11554 | if (GET_CODE (op0) == UNSPEC |
11555 | && (XINT (op0, 1) == UNSPEC_DTPOFF |
11556 | || XINT (op0, 1) == UNSPEC_NTPOFF) |
11557 | && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) |
11558 | return true; |
11559 | if (INTVAL (op1) >= 16*1024*1024 |
11560 | || INTVAL (op1) < -16*1024*1024) |
11561 | break; |
11562 | if (GET_CODE (op0) == LABEL_REF) |
11563 | return true; |
11564 | if (GET_CODE (op0) == CONST |
11565 | && GET_CODE (XEXP (op0, 0)) == UNSPEC |
11566 | && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) |
11567 | return true; |
11568 | if (GET_CODE (op0) == UNSPEC |
11569 | && XINT (op0, 1) == UNSPEC_PCREL) |
11570 | return true; |
11571 | if (GET_CODE (op0) != SYMBOL_REF) |
11572 | break; |
11573 | /* FALLTHRU */ |
11574 | |
11575 | case SYMBOL_REF: |
11576 | /* TLS references should always be enclosed in UNSPEC. |
11577 | The dllimported symbol needs always to be resolved. */ |
11578 | if (SYMBOL_REF_TLS_MODEL (op0) |
11579 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) |
11580 | return false; |
11581 | |
11582 | if (TARGET_PECOFF) |
11583 | { |
11584 | #if TARGET_PECOFF |
11585 | if (is_imported_p (op0)) |
11586 | return true; |
11587 | #endif |
11588 | |
11589 | if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0)) |
11590 | break; |
11591 | |
11592 | /* Non-external-weak function symbols need to be resolved only |
11593 | for the large model. Non-external symbols don't need to be |
11594 | resolved for large and medium models. For the small model, |
11595 | we don't need to resolve anything here. */ |
11596 | if ((ix86_cmodel != CM_LARGE_PIC |
11597 | && SYMBOL_REF_FUNCTION_P (op0) |
11598 | && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0))) |
11599 | || !SYMBOL_REF_EXTERNAL_P (op0) |
11600 | || ix86_cmodel == CM_SMALL_PIC) |
11601 | return true; |
11602 | } |
11603 | else if (!SYMBOL_REF_FAR_ADDR_P (op0) |
11604 | && (SYMBOL_REF_LOCAL_P (op0) |
11605 | || ((ix86_direct_extern_access |
11606 | && !(SYMBOL_REF_DECL (op0) |
11607 | && lookup_attribute (attr_name: "nodirect_extern_access", |
11608 | DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0))))) |
11609 | && HAVE_LD_PIE_COPYRELOC |
11610 | && flag_pie |
11611 | && !SYMBOL_REF_WEAK (op0) |
11612 | && !SYMBOL_REF_FUNCTION_P (op0))) |
11613 | && ix86_cmodel != CM_LARGE_PIC) |
11614 | return true; |
11615 | break; |
11616 | |
11617 | default: |
11618 | break; |
11619 | } |
11620 | } |
11621 | if (GET_CODE (disp) != CONST) |
11622 | return false; |
11623 | disp = XEXP (disp, 0); |
11624 | |
11625 | if (TARGET_64BIT) |
11626 | { |
11627 | /* We are unsafe to allow PLUS expressions. This limit allowed distance |
11628 | of GOT tables. We should not need these anyway. */ |
11629 | if (GET_CODE (disp) != UNSPEC |
11630 | || (XINT (disp, 1) != UNSPEC_GOTPCREL |
11631 | && XINT (disp, 1) != UNSPEC_GOTOFF |
11632 | && XINT (disp, 1) != UNSPEC_PCREL |
11633 | && XINT (disp, 1) != UNSPEC_PLTOFF)) |
11634 | return false; |
11635 | |
11636 | if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF |
11637 | && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) |
11638 | return false; |
11639 | return true; |
11640 | } |
11641 | |
11642 | saw_plus = false; |
11643 | if (GET_CODE (disp) == PLUS) |
11644 | { |
11645 | if (!CONST_INT_P (XEXP (disp, 1))) |
11646 | return false; |
11647 | disp = XEXP (disp, 0); |
11648 | saw_plus = true; |
11649 | } |
11650 | |
11651 | if (TARGET_MACHO && darwin_local_data_pic (disp)) |
11652 | return true; |
11653 | |
11654 | if (GET_CODE (disp) != UNSPEC) |
11655 | return false; |
11656 | |
11657 | switch (XINT (disp, 1)) |
11658 | { |
11659 | case UNSPEC_GOT: |
11660 | if (saw_plus) |
11661 | return false; |
11662 | /* We need to check for both symbols and labels because VxWorks loads |
11663 | text labels with @GOT rather than @GOTOFF. See gotoff_operand for |
11664 | details. */ |
11665 | return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
11666 | || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); |
11667 | case UNSPEC_GOTOFF: |
11668 | /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. |
11669 | While ABI specify also 32bit relocation but we don't produce it in |
11670 | small PIC model at all. */ |
11671 | if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
11672 | || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) |
11673 | && !TARGET_64BIT) |
11674 | return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); |
11675 | return false; |
11676 | case UNSPEC_GOTTPOFF: |
11677 | case UNSPEC_GOTNTPOFF: |
11678 | case UNSPEC_INDNTPOFF: |
11679 | if (saw_plus) |
11680 | return false; |
11681 | disp = XVECEXP (disp, 0, 0); |
11682 | return (GET_CODE (disp) == SYMBOL_REF |
11683 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); |
11684 | case UNSPEC_NTPOFF: |
11685 | disp = XVECEXP (disp, 0, 0); |
11686 | return (GET_CODE (disp) == SYMBOL_REF |
11687 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); |
11688 | case UNSPEC_DTPOFF: |
11689 | disp = XVECEXP (disp, 0, 0); |
11690 | return (GET_CODE (disp) == SYMBOL_REF |
11691 | && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); |
11692 | case UNSPEC_SECREL32: |
11693 | disp = XVECEXP (disp, 0, 0); |
11694 | return GET_CODE (disp) == SYMBOL_REF; |
11695 | } |
11696 | |
11697 | return false; |
11698 | } |
11699 | |
11700 | /* Determine if op is suitable RTX for an address register. |
11701 | Return naked register if a register or a register subreg is |
11702 | found, otherwise return NULL_RTX. */ |
11703 | |
11704 | static rtx |
11705 | ix86_validate_address_register (rtx op) |
11706 | { |
11707 | machine_mode mode = GET_MODE (op); |
11708 | |
11709 | /* Only SImode or DImode registers can form the address. */ |
11710 | if (mode != SImode && mode != DImode) |
11711 | return NULL_RTX; |
11712 | |
11713 | if (REG_P (op)) |
11714 | return op; |
11715 | else if (SUBREG_P (op)) |
11716 | { |
11717 | rtx reg = SUBREG_REG (op); |
11718 | |
11719 | if (!REG_P (reg)) |
11720 | return NULL_RTX; |
11721 | |
11722 | mode = GET_MODE (reg); |
11723 | |
11724 | /* Don't allow SUBREGs that span more than a word. It can |
11725 | lead to spill failures when the register is one word out |
11726 | of a two word structure. */ |
11727 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
11728 | return NULL_RTX; |
11729 | |
11730 | /* Allow only SUBREGs of non-eliminable hard registers. */ |
11731 | if (register_no_elim_operand (reg, mode)) |
11732 | return reg; |
11733 | } |
11734 | |
11735 | /* Op is not a register. */ |
11736 | return NULL_RTX; |
11737 | } |
11738 | |
11739 | /* Determine which memory address register set insn can use. */ |
11740 | |
11741 | static enum attr_addr |
11742 | ix86_memory_address_reg_class (rtx_insn* insn) |
11743 | { |
11744 | /* LRA can do some initialization with NULL insn, |
11745 | return maximum register class in this case. */ |
11746 | enum attr_addr addr_rclass = ADDR_GPR32; |
11747 | |
11748 | if (!insn) |
11749 | return addr_rclass; |
11750 | |
11751 | if (asm_noperands (PATTERN (insn)) >= 0 |
11752 | || GET_CODE (PATTERN (insn)) == ASM_INPUT) |
11753 | return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16; |
11754 | |
11755 | /* Return maximum register class for unrecognized instructions. */ |
11756 | if (INSN_CODE (insn) < 0) |
11757 | return addr_rclass; |
11758 | |
11759 | /* Try to recognize the insn before calling get_attr_addr. |
11760 | Save current recog_data and current alternative. */ |
11761 | struct recog_data_d saved_recog_data = recog_data; |
11762 | int saved_alternative = which_alternative; |
11763 | |
11764 | /* Update recog_data for processing of alternatives. */ |
11765 | extract_insn_cached (insn); |
11766 | |
11767 | /* If current alternative is not set, loop throught enabled |
11768 | alternatives and get the most limited register class. */ |
11769 | if (saved_alternative == -1) |
11770 | { |
11771 | alternative_mask enabled = get_enabled_alternatives (insn); |
11772 | |
11773 | for (int i = 0; i < recog_data.n_alternatives; i++) |
11774 | { |
11775 | if (!TEST_BIT (enabled, i)) |
11776 | continue; |
11777 | |
11778 | which_alternative = i; |
11779 | addr_rclass = MIN (addr_rclass, get_attr_addr (insn)); |
11780 | } |
11781 | } |
11782 | else |
11783 | { |
11784 | which_alternative = saved_alternative; |
11785 | addr_rclass = get_attr_addr (insn); |
11786 | } |
11787 | |
11788 | recog_data = saved_recog_data; |
11789 | which_alternative = saved_alternative; |
11790 | |
11791 | return addr_rclass; |
11792 | } |
11793 | |
11794 | /* Return memory address register class insn can use. */ |
11795 | |
11796 | enum reg_class |
11797 | ix86_insn_base_reg_class (rtx_insn* insn) |
11798 | { |
11799 | switch (ix86_memory_address_reg_class (insn)) |
11800 | { |
11801 | case ADDR_GPR8: |
11802 | return LEGACY_GENERAL_REGS; |
11803 | case ADDR_GPR16: |
11804 | return GENERAL_GPR16; |
11805 | case ADDR_GPR32: |
11806 | break; |
11807 | default: |
11808 | gcc_unreachable (); |
11809 | } |
11810 | |
11811 | return BASE_REG_CLASS; |
11812 | } |
11813 | |
11814 | bool |
11815 | ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn) |
11816 | { |
11817 | switch (ix86_memory_address_reg_class (insn)) |
11818 | { |
11819 | case ADDR_GPR8: |
11820 | return LEGACY_INT_REGNO_P (regno); |
11821 | case ADDR_GPR16: |
11822 | return GENERAL_GPR16_REGNO_P (regno); |
11823 | case ADDR_GPR32: |
11824 | break; |
11825 | default: |
11826 | gcc_unreachable (); |
11827 | } |
11828 | |
11829 | return GENERAL_REGNO_P (regno); |
11830 | } |
11831 | |
11832 | enum reg_class |
11833 | ix86_insn_index_reg_class (rtx_insn* insn) |
11834 | { |
11835 | switch (ix86_memory_address_reg_class (insn)) |
11836 | { |
11837 | case ADDR_GPR8: |
11838 | return LEGACY_INDEX_REGS; |
11839 | case ADDR_GPR16: |
11840 | return INDEX_GPR16; |
11841 | case ADDR_GPR32: |
11842 | break; |
11843 | default: |
11844 | gcc_unreachable (); |
11845 | } |
11846 | |
11847 | return INDEX_REG_CLASS; |
11848 | } |
11849 | |
11850 | /* Recognizes RTL expressions that are valid memory addresses for an |
11851 | instruction. The MODE argument is the machine mode for the MEM |
11852 | expression that wants to use this address. |
11853 | |
11854 | It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should |
11855 | convert common non-canonical forms to canonical form so that they will |
11856 | be recognized. */ |
11857 | |
11858 | static bool |
11859 | ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, |
11860 | code_helper = ERROR_MARK) |
11861 | { |
11862 | struct ix86_address parts; |
11863 | rtx base, index, disp; |
11864 | HOST_WIDE_INT scale; |
11865 | addr_space_t seg; |
11866 | |
11867 | if (ix86_decompose_address (addr, out: &parts) == 0) |
11868 | /* Decomposition failed. */ |
11869 | return false; |
11870 | |
11871 | base = parts.base; |
11872 | index = parts.index; |
11873 | disp = parts.disp; |
11874 | scale = parts.scale; |
11875 | seg = parts.seg; |
11876 | |
11877 | /* Validate base register. */ |
11878 | if (base) |
11879 | { |
11880 | rtx reg = ix86_validate_address_register (op: base); |
11881 | |
11882 | if (reg == NULL_RTX) |
11883 | return false; |
11884 | |
11885 | unsigned int regno = REGNO (reg); |
11886 | if ((strict && !REGNO_OK_FOR_BASE_P (regno)) |
11887 | || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno))) |
11888 | /* Base is not valid. */ |
11889 | return false; |
11890 | } |
11891 | |
11892 | /* Validate index register. */ |
11893 | if (index) |
11894 | { |
11895 | rtx reg = ix86_validate_address_register (op: index); |
11896 | |
11897 | if (reg == NULL_RTX) |
11898 | return false; |
11899 | |
11900 | unsigned int regno = REGNO (reg); |
11901 | if ((strict && !REGNO_OK_FOR_INDEX_P (regno)) |
11902 | || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno))) |
11903 | /* Index is not valid. */ |
11904 | return false; |
11905 | } |
11906 | |
11907 | /* Index and base should have the same mode. */ |
11908 | if (base && index |
11909 | && GET_MODE (base) != GET_MODE (index)) |
11910 | return false; |
11911 | |
11912 | /* Address override works only on the (%reg) part of %fs:(%reg). */ |
11913 | if (seg != ADDR_SPACE_GENERIC |
11914 | && ((base && GET_MODE (base) != word_mode) |
11915 | || (index && GET_MODE (index) != word_mode))) |
11916 | return false; |
11917 | |
11918 | /* Validate scale factor. */ |
11919 | if (scale != 1) |
11920 | { |
11921 | if (!index) |
11922 | /* Scale without index. */ |
11923 | return false; |
11924 | |
11925 | if (scale != 2 && scale != 4 && scale != 8) |
11926 | /* Scale is not a valid multiplier. */ |
11927 | return false; |
11928 | } |
11929 | |
11930 | /* Validate displacement. */ |
11931 | if (disp) |
11932 | { |
11933 | if (ix86_endbr_immediate_operand (disp, VOIDmode)) |
11934 | return false; |
11935 | |
11936 | if (GET_CODE (disp) == CONST |
11937 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
11938 | && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) |
11939 | switch (XINT (XEXP (disp, 0), 1)) |
11940 | { |
11941 | /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit |
11942 | when used. While ABI specify also 32bit relocations, we |
11943 | don't produce them at all and use IP relative instead. |
11944 | Allow GOT in 32bit mode for both PIC and non-PIC if symbol |
11945 | should be loaded via GOT. */ |
11946 | case UNSPEC_GOT: |
11947 | if (!TARGET_64BIT |
11948 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
11949 | goto is_legitimate_pic; |
11950 | /* FALLTHRU */ |
11951 | case UNSPEC_GOTOFF: |
11952 | gcc_assert (flag_pic); |
11953 | if (!TARGET_64BIT) |
11954 | goto is_legitimate_pic; |
11955 | |
11956 | /* 64bit address unspec. */ |
11957 | return false; |
11958 | |
11959 | case UNSPEC_GOTPCREL: |
11960 | if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
11961 | goto is_legitimate_pic; |
11962 | /* FALLTHRU */ |
11963 | case UNSPEC_PCREL: |
11964 | gcc_assert (flag_pic); |
11965 | goto is_legitimate_pic; |
11966 | |
11967 | case UNSPEC_GOTTPOFF: |
11968 | case UNSPEC_GOTNTPOFF: |
11969 | case UNSPEC_INDNTPOFF: |
11970 | case UNSPEC_NTPOFF: |
11971 | case UNSPEC_DTPOFF: |
11972 | case UNSPEC_SECREL32: |
11973 | break; |
11974 | |
11975 | default: |
11976 | /* Invalid address unspec. */ |
11977 | return false; |
11978 | } |
11979 | |
11980 | else if (SYMBOLIC_CONST (disp) |
11981 | && (flag_pic |
11982 | #if TARGET_MACHO |
11983 | || (MACHOPIC_INDIRECT |
11984 | && !machopic_operand_p (disp)) |
11985 | #endif |
11986 | )) |
11987 | { |
11988 | |
11989 | is_legitimate_pic: |
11990 | if (TARGET_64BIT && (index || base)) |
11991 | { |
11992 | /* foo@dtpoff(%rX) is ok. */ |
11993 | if (GET_CODE (disp) != CONST |
11994 | || GET_CODE (XEXP (disp, 0)) != PLUS |
11995 | || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC |
11996 | || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) |
11997 | || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF |
11998 | && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF |
11999 | && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32)) |
12000 | /* Non-constant pic memory reference. */ |
12001 | return false; |
12002 | } |
12003 | else if ((!TARGET_MACHO || flag_pic) |
12004 | && ! legitimate_pic_address_disp_p (disp)) |
12005 | /* Displacement is an invalid pic construct. */ |
12006 | return false; |
12007 | #if TARGET_MACHO |
12008 | else if (MACHO_DYNAMIC_NO_PIC_P |
12009 | && !ix86_legitimate_constant_p (Pmode, disp)) |
12010 | /* displacment must be referenced via non_lazy_pointer */ |
12011 | return false; |
12012 | #endif |
12013 | |
12014 | /* This code used to verify that a symbolic pic displacement |
12015 | includes the pic_offset_table_rtx register. |
12016 | |
12017 | While this is good idea, unfortunately these constructs may |
12018 | be created by "adds using lea" optimization for incorrect |
12019 | code like: |
12020 | |
12021 | int a; |
12022 | int foo(int i) |
12023 | { |
12024 | return *(&a+i); |
12025 | } |
12026 | |
12027 | This code is nonsensical, but results in addressing |
12028 | GOT table with pic_offset_table_rtx base. We can't |
12029 | just refuse it easily, since it gets matched by |
12030 | "addsi3" pattern, that later gets split to lea in the |
12031 | case output register differs from input. While this |
12032 | can be handled by separate addsi pattern for this case |
12033 | that never results in lea, this seems to be easier and |
12034 | correct fix for crash to disable this test. */ |
12035 | } |
12036 | else if (GET_CODE (disp) != LABEL_REF |
12037 | && !CONST_INT_P (disp) |
12038 | && (GET_CODE (disp) != CONST |
12039 | || !ix86_legitimate_constant_p (Pmode, x: disp)) |
12040 | && (GET_CODE (disp) != SYMBOL_REF |
12041 | || !ix86_legitimate_constant_p (Pmode, x: disp))) |
12042 | /* Displacement is not constant. */ |
12043 | return false; |
12044 | else if (TARGET_64BIT |
12045 | && !x86_64_immediate_operand (disp, VOIDmode)) |
12046 | /* Displacement is out of range. */ |
12047 | return false; |
12048 | /* In x32 mode, constant addresses are sign extended to 64bit, so |
12049 | we have to prevent addresses from 0x80000000 to 0xffffffff. */ |
12050 | else if (TARGET_X32 && !(index || base) |
12051 | && CONST_INT_P (disp) |
12052 | && val_signbit_known_set_p (SImode, INTVAL (disp))) |
12053 | return false; |
12054 | } |
12055 | |
12056 | /* Everything looks valid. */ |
12057 | return true; |
12058 | } |
12059 | |
12060 | /* Determine if a given RTX is a valid constant address. */ |
12061 | |
12062 | bool |
12063 | constant_address_p (rtx x) |
12064 | { |
12065 | return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1); |
12066 | } |
12067 | |
12068 | |
12069 | /* Return a legitimate reference for ORIG (an address) using the |
12070 | register REG. If REG is 0, a new pseudo is generated. |
12071 | |
12072 | There are two types of references that must be handled: |
12073 | |
12074 | 1. Global data references must load the address from the GOT, via |
12075 | the PIC reg. An insn is emitted to do this load, and the reg is |
12076 | returned. |
12077 | |
12078 | 2. Static data references, constant pool addresses, and code labels |
12079 | compute the address as an offset from the GOT, whose base is in |
12080 | the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to |
12081 | differentiate them from global data objects. The returned |
12082 | address is the PIC reg + an unspec constant. |
12083 | |
12084 | TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC |
12085 | reg also appears in the address. */ |
12086 | |
12087 | rtx |
12088 | legitimize_pic_address (rtx orig, rtx reg) |
12089 | { |
12090 | rtx addr = orig; |
12091 | rtx new_rtx = orig; |
12092 | |
12093 | #if TARGET_MACHO |
12094 | if (TARGET_MACHO && !TARGET_64BIT) |
12095 | { |
12096 | if (reg == 0) |
12097 | reg = gen_reg_rtx (Pmode); |
12098 | /* Use the generic Mach-O PIC machinery. */ |
12099 | return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); |
12100 | } |
12101 | #endif |
12102 | |
12103 | if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
12104 | { |
12105 | #if TARGET_PECOFF |
12106 | rtx tmp = legitimize_pe_coff_symbol (addr, true); |
12107 | if (tmp) |
12108 | return tmp; |
12109 | #endif |
12110 | } |
12111 | |
12112 | if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr)) |
12113 | new_rtx = addr; |
12114 | else if ((!TARGET_64BIT |
12115 | || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) |
12116 | && !TARGET_PECOFF |
12117 | && gotoff_operand (addr, Pmode)) |
12118 | { |
12119 | /* This symbol may be referenced via a displacement |
12120 | from the PIC base address (@GOTOFF). */ |
12121 | if (GET_CODE (addr) == CONST) |
12122 | addr = XEXP (addr, 0); |
12123 | |
12124 | if (GET_CODE (addr) == PLUS) |
12125 | { |
12126 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), |
12127 | UNSPEC_GOTOFF); |
12128 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); |
12129 | } |
12130 | else |
12131 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
12132 | |
12133 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
12134 | |
12135 | if (TARGET_64BIT) |
12136 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
12137 | |
12138 | if (reg != 0) |
12139 | { |
12140 | gcc_assert (REG_P (reg)); |
12141 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
12142 | new_rtx, reg, 1, OPTAB_DIRECT); |
12143 | } |
12144 | else |
12145 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
12146 | } |
12147 | else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) |
12148 | /* We can't always use @GOTOFF for text labels |
12149 | on VxWorks, see gotoff_operand. */ |
12150 | || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) |
12151 | { |
12152 | #if TARGET_PECOFF |
12153 | rtx tmp = legitimize_pe_coff_symbol (addr, true); |
12154 | if (tmp) |
12155 | return tmp; |
12156 | #endif |
12157 | |
12158 | /* For x64 PE-COFF there is no GOT table, |
12159 | so we use address directly. */ |
12160 | if (TARGET_64BIT && TARGET_PECOFF) |
12161 | { |
12162 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); |
12163 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
12164 | } |
12165 | else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) |
12166 | { |
12167 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), |
12168 | UNSPEC_GOTPCREL); |
12169 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
12170 | new_rtx = gen_const_mem (Pmode, new_rtx); |
12171 | set_mem_alias_set (new_rtx, GOT_ALIAS_SET); |
12172 | } |
12173 | else |
12174 | { |
12175 | /* This symbol must be referenced via a load |
12176 | from the Global Offset Table (@GOT). */ |
12177 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); |
12178 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
12179 | |
12180 | if (TARGET_64BIT) |
12181 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
12182 | |
12183 | if (reg != 0) |
12184 | { |
12185 | gcc_assert (REG_P (reg)); |
12186 | new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, |
12187 | new_rtx, reg, 1, OPTAB_DIRECT); |
12188 | } |
12189 | else |
12190 | new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
12191 | |
12192 | new_rtx = gen_const_mem (Pmode, new_rtx); |
12193 | set_mem_alias_set (new_rtx, GOT_ALIAS_SET); |
12194 | } |
12195 | |
12196 | new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); |
12197 | } |
12198 | else |
12199 | { |
12200 | if (CONST_INT_P (addr) |
12201 | && !x86_64_immediate_operand (addr, VOIDmode)) |
12202 | new_rtx = copy_to_suggested_reg (addr, reg, Pmode); |
12203 | else if (GET_CODE (addr) == CONST) |
12204 | { |
12205 | addr = XEXP (addr, 0); |
12206 | |
12207 | /* We must match stuff we generate before. Assume the only |
12208 | unspecs that can get here are ours. Not that we could do |
12209 | anything with them anyway.... */ |
12210 | if (GET_CODE (addr) == UNSPEC |
12211 | || (GET_CODE (addr) == PLUS |
12212 | && GET_CODE (XEXP (addr, 0)) == UNSPEC)) |
12213 | return orig; |
12214 | gcc_assert (GET_CODE (addr) == PLUS); |
12215 | } |
12216 | |
12217 | if (GET_CODE (addr) == PLUS) |
12218 | { |
12219 | rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); |
12220 | |
12221 | /* Check first to see if this is a constant |
12222 | offset from a @GOTOFF symbol reference. */ |
12223 | if (!TARGET_PECOFF |
12224 | && gotoff_operand (op0, Pmode) |
12225 | && CONST_INT_P (op1)) |
12226 | { |
12227 | if (!TARGET_64BIT) |
12228 | { |
12229 | new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), |
12230 | UNSPEC_GOTOFF); |
12231 | new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); |
12232 | new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
12233 | |
12234 | if (reg != 0) |
12235 | { |
12236 | gcc_assert (REG_P (reg)); |
12237 | new_rtx = expand_simple_binop (Pmode, PLUS, |
12238 | pic_offset_table_rtx, |
12239 | new_rtx, reg, 1, |
12240 | OPTAB_DIRECT); |
12241 | } |
12242 | else |
12243 | new_rtx |
12244 | = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
12245 | } |
12246 | else |
12247 | { |
12248 | if (INTVAL (op1) < -16*1024*1024 |
12249 | || INTVAL (op1) >= 16*1024*1024) |
12250 | { |
12251 | if (!x86_64_immediate_operand (op1, Pmode)) |
12252 | op1 = force_reg (Pmode, op1); |
12253 | |
12254 | new_rtx |
12255 | = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); |
12256 | } |
12257 | } |
12258 | } |
12259 | else |
12260 | { |
12261 | rtx base = legitimize_pic_address (orig: op0, reg); |
12262 | machine_mode mode = GET_MODE (base); |
12263 | new_rtx |
12264 | = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg); |
12265 | |
12266 | if (CONST_INT_P (new_rtx)) |
12267 | { |
12268 | if (INTVAL (new_rtx) < -16*1024*1024 |
12269 | || INTVAL (new_rtx) >= 16*1024*1024) |
12270 | { |
12271 | if (!x86_64_immediate_operand (new_rtx, mode)) |
12272 | new_rtx = force_reg (mode, new_rtx); |
12273 | |
12274 | new_rtx |
12275 | = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); |
12276 | } |
12277 | else |
12278 | new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); |
12279 | } |
12280 | else |
12281 | { |
12282 | /* For %rip addressing, we have to use |
12283 | just disp32, not base nor index. */ |
12284 | if (TARGET_64BIT |
12285 | && (GET_CODE (base) == SYMBOL_REF |
12286 | || GET_CODE (base) == LABEL_REF)) |
12287 | base = force_reg (mode, base); |
12288 | if (GET_CODE (new_rtx) == PLUS |
12289 | && CONSTANT_P (XEXP (new_rtx, 1))) |
12290 | { |
12291 | base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); |
12292 | new_rtx = XEXP (new_rtx, 1); |
12293 | } |
12294 | new_rtx = gen_rtx_PLUS (mode, base, new_rtx); |
12295 | } |
12296 | } |
12297 | } |
12298 | } |
12299 | return new_rtx; |
12300 | } |
12301 | |
12302 | /* Load the thread pointer. If TO_REG is true, force it into a register. */ |
12303 | |
12304 | static rtx |
12305 | get_thread_pointer (machine_mode tp_mode, bool to_reg) |
12306 | { |
12307 | rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); |
12308 | |
12309 | if (GET_MODE (tp) != tp_mode) |
12310 | { |
12311 | gcc_assert (GET_MODE (tp) == SImode); |
12312 | gcc_assert (tp_mode == DImode); |
12313 | |
12314 | tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); |
12315 | } |
12316 | |
12317 | if (to_reg) |
12318 | tp = copy_to_mode_reg (tp_mode, tp); |
12319 | |
12320 | return tp; |
12321 | } |
12322 | |
12323 | /* Construct the SYMBOL_REF for the _tls_index symbol. */ |
12324 | |
12325 | static GTY(()) rtx ix86_tls_index_symbol; |
12326 | |
12327 | #if TARGET_WIN32_TLS |
12328 | static rtx |
12329 | ix86_tls_index (void) |
12330 | { |
12331 | if (!ix86_tls_index_symbol) |
12332 | ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index"); |
12333 | |
12334 | if (flag_pic) |
12335 | return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL)); |
12336 | else |
12337 | return ix86_tls_index_symbol; |
12338 | } |
12339 | #endif |
12340 | |
12341 | /* Construct the SYMBOL_REF for the tls_get_addr function. */ |
12342 | |
12343 | static GTY(()) rtx ix86_tls_symbol; |
12344 | |
12345 | static rtx |
12346 | ix86_tls_get_addr (void) |
12347 | { |
12348 | if (!ix86_tls_symbol) |
12349 | { |
12350 | const char *sym |
12351 | = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) |
12352 | ? "___tls_get_addr": "__tls_get_addr"); |
12353 | |
12354 | ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); |
12355 | } |
12356 | |
12357 | if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) |
12358 | { |
12359 | rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), |
12360 | UNSPEC_PLTOFF); |
12361 | return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, |
12362 | gen_rtx_CONST (Pmode, unspec)); |
12363 | } |
12364 | |
12365 | return ix86_tls_symbol; |
12366 | } |
12367 | |
12368 | /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ |
12369 | |
12370 | static GTY(()) rtx ix86_tls_module_base_symbol; |
12371 | |
12372 | rtx |
12373 | ix86_tls_module_base (void) |
12374 | { |
12375 | if (!ix86_tls_module_base_symbol) |
12376 | { |
12377 | ix86_tls_module_base_symbol |
12378 | = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_"); |
12379 | |
12380 | SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) |
12381 | |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; |
12382 | } |
12383 | |
12384 | return ix86_tls_module_base_symbol; |
12385 | } |
12386 | |
12387 | /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is |
12388 | false if we expect this to be used for a memory address and true if |
12389 | we expect to load the address into a register. */ |
12390 | |
12391 | rtx |
12392 | legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) |
12393 | { |
12394 | rtx dest, base, off; |
12395 | rtx pic = NULL_RTX, tp = NULL_RTX; |
12396 | machine_mode tp_mode = Pmode; |
12397 | int type; |
12398 | |
12399 | #if TARGET_WIN32_TLS |
12400 | off = gen_const_mem (SImode, ix86_tls_index ()); |
12401 | set_mem_alias_set (off, GOT_ALIAS_SET); |
12402 | |
12403 | tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44)); |
12404 | set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG); |
12405 | |
12406 | if (TARGET_64BIT) |
12407 | off = convert_to_mode (Pmode, off, 1); |
12408 | |
12409 | base = force_reg (Pmode, off); |
12410 | tp = copy_to_mode_reg (Pmode, tp); |
12411 | |
12412 | tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD)))); |
12413 | set_mem_alias_set (tp, GOT_ALIAS_SET); |
12414 | |
12415 | base = force_reg (Pmode, tp); |
12416 | |
12417 | return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32))); |
12418 | #else |
12419 | /* Fall back to global dynamic model if tool chain cannot support local |
12420 | dynamic. */ |
12421 | if (TARGET_SUN_TLS && !TARGET_64BIT |
12422 | && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM |
12423 | && model == TLS_MODEL_LOCAL_DYNAMIC) |
12424 | model = TLS_MODEL_GLOBAL_DYNAMIC; |
12425 | |
12426 | switch (model) |
12427 | { |
12428 | case TLS_MODEL_GLOBAL_DYNAMIC: |
12429 | if (!TARGET_64BIT) |
12430 | { |
12431 | if (flag_pic && !TARGET_PECOFF) |
12432 | pic = pic_offset_table_rtx; |
12433 | else |
12434 | { |
12435 | pic = gen_reg_rtx (Pmode); |
12436 | emit_insn (gen_set_got (pic)); |
12437 | } |
12438 | } |
12439 | |
12440 | if (TARGET_GNU2_TLS) |
12441 | { |
12442 | dest = gen_reg_rtx (ptr_mode); |
12443 | if (TARGET_64BIT) |
12444 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x)); |
12445 | else |
12446 | emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); |
12447 | |
12448 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
12449 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
12450 | if (GET_MODE (dest) != Pmode) |
12451 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
12452 | dest = force_reg (Pmode, dest); |
12453 | |
12454 | if (GET_MODE (x) != Pmode) |
12455 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12456 | |
12457 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
12458 | } |
12459 | else |
12460 | { |
12461 | rtx caddr = ix86_tls_get_addr (); |
12462 | |
12463 | dest = gen_reg_rtx (Pmode); |
12464 | if (TARGET_64BIT) |
12465 | { |
12466 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
12467 | rtx_insn *insns; |
12468 | |
12469 | start_sequence (); |
12470 | emit_call_insn |
12471 | (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr)); |
12472 | insns = end_sequence (); |
12473 | |
12474 | if (GET_MODE (x) != Pmode) |
12475 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12476 | |
12477 | RTL_CONST_CALL_P (insns) = 1; |
12478 | emit_libcall_block (insns, dest, rax, x); |
12479 | } |
12480 | else |
12481 | emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); |
12482 | } |
12483 | break; |
12484 | |
12485 | case TLS_MODEL_LOCAL_DYNAMIC: |
12486 | if (!TARGET_64BIT) |
12487 | { |
12488 | if (flag_pic) |
12489 | pic = pic_offset_table_rtx; |
12490 | else |
12491 | { |
12492 | pic = gen_reg_rtx (Pmode); |
12493 | emit_insn (gen_set_got (pic)); |
12494 | } |
12495 | } |
12496 | |
12497 | if (TARGET_GNU2_TLS) |
12498 | { |
12499 | rtx tmp = ix86_tls_module_base (); |
12500 | |
12501 | base = gen_reg_rtx (ptr_mode); |
12502 | if (TARGET_64BIT) |
12503 | emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp)); |
12504 | else |
12505 | emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); |
12506 | |
12507 | tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true); |
12508 | if (GET_MODE (base) != Pmode) |
12509 | base = gen_rtx_ZERO_EXTEND (Pmode, base); |
12510 | base = force_reg (Pmode, base); |
12511 | } |
12512 | else |
12513 | { |
12514 | rtx caddr = ix86_tls_get_addr (); |
12515 | |
12516 | base = gen_reg_rtx (Pmode); |
12517 | if (TARGET_64BIT) |
12518 | { |
12519 | rtx rax = gen_rtx_REG (Pmode, AX_REG); |
12520 | rtx_insn *insns; |
12521 | rtx eqv; |
12522 | |
12523 | start_sequence (); |
12524 | emit_call_insn |
12525 | (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr)); |
12526 | insns = end_sequence (); |
12527 | |
12528 | /* Attach a unique REG_EQUAL, to allow the RTL optimizers to |
12529 | share the LD_BASE result with other LD model accesses. */ |
12530 | eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), |
12531 | UNSPEC_TLS_LD_BASE); |
12532 | |
12533 | RTL_CONST_CALL_P (insns) = 1; |
12534 | emit_libcall_block (insns, base, rax, eqv); |
12535 | } |
12536 | else |
12537 | emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); |
12538 | } |
12539 | |
12540 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); |
12541 | off = gen_rtx_CONST (Pmode, off); |
12542 | |
12543 | dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); |
12544 | |
12545 | if (TARGET_GNU2_TLS) |
12546 | { |
12547 | if (GET_MODE (tp) != Pmode) |
12548 | { |
12549 | dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode); |
12550 | dest = gen_rtx_PLUS (ptr_mode, tp, dest); |
12551 | dest = gen_rtx_ZERO_EXTEND (Pmode, dest); |
12552 | } |
12553 | else |
12554 | dest = gen_rtx_PLUS (Pmode, tp, dest); |
12555 | dest = force_reg (Pmode, dest); |
12556 | |
12557 | if (GET_MODE (x) != Pmode) |
12558 | x = gen_rtx_ZERO_EXTEND (Pmode, x); |
12559 | |
12560 | set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
12561 | } |
12562 | break; |
12563 | |
12564 | case TLS_MODEL_INITIAL_EXEC: |
12565 | if (TARGET_64BIT) |
12566 | { |
12567 | /* Generate DImode references to avoid %fs:(%reg32) |
12568 | problems and linker IE->LE relaxation bug. */ |
12569 | tp_mode = DImode; |
12570 | pic = NULL; |
12571 | type = UNSPEC_GOTNTPOFF; |
12572 | } |
12573 | else if (flag_pic) |
12574 | { |
12575 | pic = pic_offset_table_rtx; |
12576 | type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; |
12577 | } |
12578 | else if (!TARGET_ANY_GNU_TLS) |
12579 | { |
12580 | pic = gen_reg_rtx (Pmode); |
12581 | emit_insn (gen_set_got (pic)); |
12582 | type = UNSPEC_GOTTPOFF; |
12583 | } |
12584 | else |
12585 | { |
12586 | pic = NULL; |
12587 | type = UNSPEC_INDNTPOFF; |
12588 | } |
12589 | |
12590 | off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); |
12591 | off = gen_rtx_CONST (tp_mode, off); |
12592 | if (pic) |
12593 | off = gen_rtx_PLUS (tp_mode, pic, off); |
12594 | off = gen_const_mem (tp_mode, off); |
12595 | set_mem_alias_set (off, GOT_ALIAS_SET); |
12596 | |
12597 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12598 | { |
12599 | base = get_thread_pointer (tp_mode, |
12600 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
12601 | off = force_reg (tp_mode, off); |
12602 | dest = gen_rtx_PLUS (tp_mode, base, off); |
12603 | if (tp_mode != Pmode) |
12604 | dest = convert_to_mode (Pmode, dest, 1); |
12605 | } |
12606 | else |
12607 | { |
12608 | base = get_thread_pointer (Pmode, to_reg: true); |
12609 | dest = gen_reg_rtx (Pmode); |
12610 | emit_insn (gen_sub3_insn (dest, base, off)); |
12611 | } |
12612 | break; |
12613 | |
12614 | case TLS_MODEL_LOCAL_EXEC: |
12615 | off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
12616 | (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12617 | ? UNSPEC_NTPOFF : UNSPEC_TPOFF); |
12618 | off = gen_rtx_CONST (Pmode, off); |
12619 | |
12620 | if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
12621 | { |
12622 | base = get_thread_pointer (Pmode, |
12623 | to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
12624 | return gen_rtx_PLUS (Pmode, base, off); |
12625 | } |
12626 | else |
12627 | { |
12628 | base = get_thread_pointer (Pmode, to_reg: true); |
12629 | dest = gen_reg_rtx (Pmode); |
12630 | emit_insn (gen_sub3_insn (dest, base, off)); |
12631 | } |
12632 | break; |
12633 | |
12634 | default: |
12635 | gcc_unreachable (); |
12636 | } |
12637 | |
12638 | return dest; |
12639 | #endif |
12640 | } |
12641 | |
12642 | /* Return true if the TLS address requires insn using integer registers. |
12643 | It's used to prevent KMOV/VMOV in TLS code sequences which require integer |
12644 | MOV instructions, refer to PR103275. */ |
12645 | bool |
12646 | ix86_gpr_tls_address_pattern_p (rtx mem) |
12647 | { |
12648 | gcc_assert (MEM_P (mem)); |
12649 | |
12650 | rtx addr = XEXP (mem, 0); |
12651 | subrtx_var_iterator::array_type array; |
12652 | FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL) |
12653 | { |
12654 | rtx op = *iter; |
12655 | if (GET_CODE (op) == UNSPEC) |
12656 | switch (XINT (op, 1)) |
12657 | { |
12658 | case UNSPEC_GOTNTPOFF: |
12659 | return true; |
12660 | case UNSPEC_TPOFF: |
12661 | if (!TARGET_64BIT) |
12662 | return true; |
12663 | break; |
12664 | default: |
12665 | break; |
12666 | } |
12667 | } |
12668 | |
12669 | return false; |
12670 | } |
12671 | |
12672 | /* Return true if OP refers to a TLS address. */ |
12673 | bool |
12674 | ix86_tls_address_pattern_p (rtx op) |
12675 | { |
12676 | subrtx_var_iterator::array_type array; |
12677 | FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) |
12678 | { |
12679 | rtx op = *iter; |
12680 | if (MEM_P (op)) |
12681 | { |
12682 | rtx *x = &XEXP (op, 0); |
12683 | while (GET_CODE (*x) == PLUS) |
12684 | { |
12685 | int i; |
12686 | for (i = 0; i < 2; i++) |
12687 | { |
12688 | rtx u = XEXP (*x, i); |
12689 | if (GET_CODE (u) == ZERO_EXTEND) |
12690 | u = XEXP (u, 0); |
12691 | if (GET_CODE (u) == UNSPEC |
12692 | && XINT (u, 1) == UNSPEC_TP) |
12693 | return true; |
12694 | } |
12695 | x = &XEXP (*x, 0); |
12696 | } |
12697 | |
12698 | iter.skip_subrtxes (); |
12699 | } |
12700 | } |
12701 | |
12702 | return false; |
12703 | } |
12704 | |
12705 | /* Rewrite *LOC so that it refers to a default TLS address space. */ |
12706 | static void |
12707 | ix86_rewrite_tls_address_1 (rtx *loc) |
12708 | { |
12709 | subrtx_ptr_iterator::array_type array; |
12710 | FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) |
12711 | { |
12712 | rtx *loc = *iter; |
12713 | if (MEM_P (*loc)) |
12714 | { |
12715 | rtx addr = XEXP (*loc, 0); |
12716 | rtx *x = &addr; |
12717 | while (GET_CODE (*x) == PLUS) |
12718 | { |
12719 | int i; |
12720 | for (i = 0; i < 2; i++) |
12721 | { |
12722 | rtx u = XEXP (*x, i); |
12723 | if (GET_CODE (u) == ZERO_EXTEND) |
12724 | u = XEXP (u, 0); |
12725 | if (GET_CODE (u) == UNSPEC |
12726 | && XINT (u, 1) == UNSPEC_TP) |
12727 | { |
12728 | /* NB: Since address override only applies to the |
12729 | (reg32) part in fs:(reg32), return if address |
12730 | override is used. */ |
12731 | if (Pmode != word_mode |
12732 | && REG_P (XEXP (*x, 1 - i))) |
12733 | return; |
12734 | |
12735 | addr_space_t as = DEFAULT_TLS_SEG_REG; |
12736 | |
12737 | *x = XEXP (*x, 1 - i); |
12738 | |
12739 | *loc = replace_equiv_address_nv (*loc, addr, true); |
12740 | set_mem_addr_space (*loc, as); |
12741 | return; |
12742 | } |
12743 | } |
12744 | x = &XEXP (*x, 0); |
12745 | } |
12746 | |
12747 | iter.skip_subrtxes (); |
12748 | } |
12749 | } |
12750 | } |
12751 | |
12752 | /* Rewrite instruction pattern involvning TLS address |
12753 | so that it refers to a default TLS address space. */ |
12754 | rtx |
12755 | ix86_rewrite_tls_address (rtx pattern) |
12756 | { |
12757 | pattern = copy_insn (pattern); |
12758 | ix86_rewrite_tls_address_1 (loc: &pattern); |
12759 | return pattern; |
12760 | } |
12761 | |
12762 | /* Try machine-dependent ways of modifying an illegitimate address |
12763 | to be legitimate. If we find one, return the new, valid address. |
12764 | This macro is used in only one place: `memory_address' in explow.cc. |
12765 | |
12766 | OLDX is the address as it was before break_out_memory_refs was called. |
12767 | In some cases it is useful to look at this to decide what needs to be done. |
12768 | |
12769 | It is always safe for this macro to do nothing. It exists to recognize |
12770 | opportunities to optimize the output. |
12771 | |
12772 | For the 80386, we handle X+REG by loading X into a register R and |
12773 | using R+REG. R will go in a general reg and indexing will be used. |
12774 | However, if REG is a broken-out memory address or multiplication, |
12775 | nothing needs to be done because REG can certainly go in a general reg. |
12776 | |
12777 | When -fpic is used, special handling is needed for symbolic references. |
12778 | See comments by legitimize_pic_address in i386.cc for details. */ |
12779 | |
12780 | static rtx |
12781 | ix86_legitimize_address (rtx x, rtx, machine_mode mode) |
12782 | { |
12783 | bool changed = false; |
12784 | unsigned log; |
12785 | |
12786 | log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; |
12787 | if (log) |
12788 | return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false); |
12789 | if (GET_CODE (x) == CONST |
12790 | && GET_CODE (XEXP (x, 0)) == PLUS |
12791 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF |
12792 | && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) |
12793 | { |
12794 | rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), |
12795 | model: (enum tls_model) log, for_mov: false); |
12796 | return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); |
12797 | } |
12798 | |
12799 | if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
12800 | { |
12801 | #if TARGET_PECOFF |
12802 | rtx tmp = legitimize_pe_coff_symbol (x, true); |
12803 | if (tmp) |
12804 | return tmp; |
12805 | #endif |
12806 | } |
12807 | |
12808 | if (flag_pic && SYMBOLIC_CONST (x)) |
12809 | return legitimize_pic_address (orig: x, reg: 0); |
12810 | |
12811 | #if TARGET_MACHO |
12812 | if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) |
12813 | return machopic_indirect_data_reference (x, 0); |
12814 | #endif |
12815 | |
12816 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ |
12817 | if (GET_CODE (x) == ASHIFT |
12818 | && CONST_INT_P (XEXP (x, 1)) |
12819 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) |
12820 | { |
12821 | changed = true; |
12822 | log = INTVAL (XEXP (x, 1)); |
12823 | x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), |
12824 | GEN_INT (1 << log)); |
12825 | } |
12826 | |
12827 | if (GET_CODE (x) == PLUS) |
12828 | { |
12829 | /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ |
12830 | |
12831 | if (GET_CODE (XEXP (x, 0)) == ASHIFT |
12832 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
12833 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) |
12834 | { |
12835 | changed = true; |
12836 | log = INTVAL (XEXP (XEXP (x, 0), 1)); |
12837 | XEXP (x, 0) = gen_rtx_MULT (Pmode, |
12838 | force_reg (Pmode, XEXP (XEXP (x, 0), 0)), |
12839 | GEN_INT (1 << log)); |
12840 | } |
12841 | |
12842 | if (GET_CODE (XEXP (x, 1)) == ASHIFT |
12843 | && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
12844 | && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) |
12845 | { |
12846 | changed = true; |
12847 | log = INTVAL (XEXP (XEXP (x, 1), 1)); |
12848 | XEXP (x, 1) = gen_rtx_MULT (Pmode, |
12849 | force_reg (Pmode, XEXP (XEXP (x, 1), 0)), |
12850 | GEN_INT (1 << log)); |
12851 | } |
12852 | |
12853 | /* Put multiply first if it isn't already. */ |
12854 | if (GET_CODE (XEXP (x, 1)) == MULT) |
12855 | { |
12856 | std::swap (XEXP (x, 0), XEXP (x, 1)); |
12857 | changed = true; |
12858 | } |
12859 | |
12860 | /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) |
12861 | into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be |
12862 | created by virtual register instantiation, register elimination, and |
12863 | similar optimizations. */ |
12864 | if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) |
12865 | { |
12866 | changed = true; |
12867 | x = gen_rtx_PLUS (Pmode, |
12868 | gen_rtx_PLUS (Pmode, XEXP (x, 0), |
12869 | XEXP (XEXP (x, 1), 0)), |
12870 | XEXP (XEXP (x, 1), 1)); |
12871 | } |
12872 | |
12873 | /* Canonicalize |
12874 | (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) |
12875 | into (plus (plus (mult (reg) (const)) (reg)) (const)). */ |
12876 | else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS |
12877 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
12878 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS |
12879 | && CONSTANT_P (XEXP (x, 1))) |
12880 | { |
12881 | rtx constant; |
12882 | rtx other = NULL_RTX; |
12883 | |
12884 | if (CONST_INT_P (XEXP (x, 1))) |
12885 | { |
12886 | constant = XEXP (x, 1); |
12887 | other = XEXP (XEXP (XEXP (x, 0), 1), 1); |
12888 | } |
12889 | else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) |
12890 | { |
12891 | constant = XEXP (XEXP (XEXP (x, 0), 1), 1); |
12892 | other = XEXP (x, 1); |
12893 | } |
12894 | else |
12895 | constant = 0; |
12896 | |
12897 | if (constant) |
12898 | { |
12899 | changed = true; |
12900 | x = gen_rtx_PLUS (Pmode, |
12901 | gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), |
12902 | XEXP (XEXP (XEXP (x, 0), 1), 0)), |
12903 | plus_constant (Pmode, other, |
12904 | INTVAL (constant))); |
12905 | } |
12906 | } |
12907 | |
12908 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
12909 | return x; |
12910 | |
12911 | if (GET_CODE (XEXP (x, 0)) == MULT) |
12912 | { |
12913 | changed = true; |
12914 | XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); |
12915 | } |
12916 | |
12917 | if (GET_CODE (XEXP (x, 1)) == MULT) |
12918 | { |
12919 | changed = true; |
12920 | XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); |
12921 | } |
12922 | |
12923 | if (changed |
12924 | && REG_P (XEXP (x, 1)) |
12925 | && REG_P (XEXP (x, 0))) |
12926 | return x; |
12927 | |
12928 | if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) |
12929 | { |
12930 | changed = true; |
12931 | x = legitimize_pic_address (orig: x, reg: 0); |
12932 | } |
12933 | |
12934 | if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false)) |
12935 | return x; |
12936 | |
12937 | if (REG_P (XEXP (x, 0))) |
12938 | { |
12939 | rtx temp = gen_reg_rtx (Pmode); |
12940 | rtx val = force_operand (XEXP (x, 1), temp); |
12941 | if (val != temp) |
12942 | { |
12943 | val = convert_to_mode (Pmode, val, 1); |
12944 | emit_move_insn (temp, val); |
12945 | } |
12946 | |
12947 | XEXP (x, 1) = temp; |
12948 | return x; |
12949 | } |
12950 | |
12951 | else if (REG_P (XEXP (x, 1))) |
12952 | { |
12953 | rtx temp = gen_reg_rtx (Pmode); |
12954 | rtx val = force_operand (XEXP (x, 0), temp); |
12955 | if (val != temp) |
12956 | { |
12957 | val = convert_to_mode (Pmode, val, 1); |
12958 | emit_move_insn (temp, val); |
12959 | } |
12960 | |
12961 | XEXP (x, 0) = temp; |
12962 | return x; |
12963 | } |
12964 | } |
12965 | |
12966 | return x; |
12967 | } |
12968 | |
12969 | /* Print an integer constant expression in assembler syntax. Addition |
12970 | and subtraction are the only arithmetic that may appear in these |
12971 | expressions. FILE is the stdio stream to write to, X is the rtx, and |
12972 | CODE is the operand print code from the output string. */ |
12973 | |
12974 | static void |
12975 | output_pic_addr_const (FILE *file, rtx x, int code) |
12976 | { |
12977 | char buf[256]; |
12978 | |
12979 | switch (GET_CODE (x)) |
12980 | { |
12981 | case PC: |
12982 | gcc_assert (flag_pic); |
12983 | putc (c: '.', stream: file); |
12984 | break; |
12985 | |
12986 | case SYMBOL_REF: |
12987 | if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) |
12988 | output_addr_const (file, x); |
12989 | else |
12990 | { |
12991 | const char *name = XSTR (x, 0); |
12992 | |
12993 | /* Mark the decl as referenced so that cgraph will |
12994 | output the function. */ |
12995 | if (SYMBOL_REF_DECL (x)) |
12996 | mark_decl_referenced (SYMBOL_REF_DECL (x)); |
12997 | |
12998 | #if TARGET_MACHO |
12999 | if (MACHOPIC_INDIRECT |
13000 | && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) |
13001 | name = machopic_indirection_name (x, /*stub_p=*/true); |
13002 | #endif |
13003 | assemble_name (file, name); |
13004 | } |
13005 | if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) |
13006 | && code == 'P' && ix86_call_use_plt_p (x)) |
13007 | fputs (s: "@PLT", stream: file); |
13008 | break; |
13009 | |
13010 | case LABEL_REF: |
13011 | x = XEXP (x, 0); |
13012 | /* FALLTHRU */ |
13013 | case CODE_LABEL: |
13014 | ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); |
13015 | assemble_name (asm_out_file, buf); |
13016 | break; |
13017 | |
13018 | CASE_CONST_SCALAR_INT: |
13019 | output_addr_const (file, x); |
13020 | break; |
13021 | |
13022 | case CONST: |
13023 | /* This used to output parentheses around the expression, |
13024 | but that does not work on the 386 (either ATT or BSD assembler). */ |
13025 | output_pic_addr_const (file, XEXP (x, 0), code); |
13026 | break; |
13027 | |
13028 | case CONST_DOUBLE: |
13029 | /* We can't handle floating point constants; |
13030 | TARGET_PRINT_OPERAND must handle them. */ |
13031 | output_operand_lossage ("floating constant misused"); |
13032 | break; |
13033 | |
13034 | case PLUS: |
13035 | /* Some assemblers need integer constants to appear first. */ |
13036 | if (CONST_INT_P (XEXP (x, 0))) |
13037 | { |
13038 | output_pic_addr_const (file, XEXP (x, 0), code); |
13039 | putc (c: '+', stream: file); |
13040 | output_pic_addr_const (file, XEXP (x, 1), code); |
13041 | } |
13042 | else |
13043 | { |
13044 | gcc_assert (CONST_INT_P (XEXP (x, 1))); |
13045 | output_pic_addr_const (file, XEXP (x, 1), code); |
13046 | putc (c: '+', stream: file); |
13047 | output_pic_addr_const (file, XEXP (x, 0), code); |
13048 | } |
13049 | break; |
13050 | |
13051 | case MINUS: |
13052 | if (!TARGET_MACHO) |
13053 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file); |
13054 | output_pic_addr_const (file, XEXP (x, 0), code); |
13055 | putc (c: '-', stream: file); |
13056 | output_pic_addr_const (file, XEXP (x, 1), code); |
13057 | if (!TARGET_MACHO) |
13058 | putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file); |
13059 | break; |
13060 | |
13061 | case UNSPEC: |
13062 | gcc_assert (XVECLEN (x, 0) == 1); |
13063 | output_pic_addr_const (file, XVECEXP (x, 0, 0), code); |
13064 | switch (XINT (x, 1)) |
13065 | { |
13066 | case UNSPEC_GOT: |
13067 | fputs (s: "@GOT", stream: file); |
13068 | break; |
13069 | case UNSPEC_GOTOFF: |
13070 | fputs (s: "@GOTOFF", stream: file); |
13071 | break; |
13072 | case UNSPEC_PLTOFF: |
13073 | fputs (s: "@PLTOFF", stream: file); |
13074 | break; |
13075 | case UNSPEC_PCREL: |
13076 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
13077 | "(%rip)": "[rip]", stream: file); |
13078 | break; |
13079 | case UNSPEC_GOTPCREL: |
13080 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
13081 | "@GOTPCREL(%rip)": "@GOTPCREL[rip]", stream: file); |
13082 | break; |
13083 | case UNSPEC_GOTTPOFF: |
13084 | /* FIXME: This might be @TPOFF in Sun ld too. */ |
13085 | fputs (s: "@gottpoff", stream: file); |
13086 | break; |
13087 | case UNSPEC_TPOFF: |
13088 | fputs (s: "@tpoff", stream: file); |
13089 | break; |
13090 | case UNSPEC_NTPOFF: |
13091 | if (TARGET_64BIT) |
13092 | fputs (s: "@tpoff", stream: file); |
13093 | else |
13094 | fputs (s: "@ntpoff", stream: file); |
13095 | break; |
13096 | case UNSPEC_DTPOFF: |
13097 | fputs (s: "@dtpoff", stream: file); |
13098 | break; |
13099 | case UNSPEC_GOTNTPOFF: |
13100 | if (TARGET_64BIT) |
13101 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
13102 | "@gottpoff(%rip)": "@gottpoff[rip]", stream: file); |
13103 | else |
13104 | fputs (s: "@gotntpoff", stream: file); |
13105 | break; |
13106 | case UNSPEC_INDNTPOFF: |
13107 | fputs (s: "@indntpoff", stream: file); |
13108 | break; |
13109 | case UNSPEC_SECREL32: |
13110 | fputs (s: "@secrel32", stream: file); |
13111 | break; |
13112 | #if TARGET_MACHO |
13113 | case UNSPEC_MACHOPIC_OFFSET: |
13114 | putc ('-', file); |
13115 | machopic_output_function_base_name (file); |
13116 | break; |
13117 | #endif |
13118 | default: |
13119 | output_operand_lossage ("invalid UNSPEC as operand"); |
13120 | break; |
13121 | } |
13122 | break; |
13123 | |
13124 | default: |
13125 | output_operand_lossage ("invalid expression as operand"); |
13126 | } |
13127 | } |
13128 | |
13129 | /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. |
13130 | We need to emit DTP-relative relocations. */ |
13131 | |
13132 | static void ATTRIBUTE_UNUSED |
13133 | i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
13134 | { |
13135 | fputs (ASM_LONG, stream: file); |
13136 | output_addr_const (file, x); |
13137 | #if TARGET_WIN32_TLS |
13138 | fputs ("@secrel32", file); |
13139 | #else |
13140 | fputs (s: "@dtpoff", stream: file); |
13141 | #endif |
13142 | switch (size) |
13143 | { |
13144 | case 4: |
13145 | break; |
13146 | case 8: |
13147 | fputs (s: ", 0", stream: file); |
13148 | break; |
13149 | default: |
13150 | gcc_unreachable (); |
13151 | } |
13152 | } |
13153 | |
13154 | /* Return true if X is a representation of the PIC register. This copes |
13155 | with calls from ix86_find_base_term, where the register might have |
13156 | been replaced by a cselib value. */ |
13157 | |
13158 | static bool |
13159 | ix86_pic_register_p (rtx x) |
13160 | { |
13161 | if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) |
13162 | return (pic_offset_table_rtx |
13163 | && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); |
13164 | else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) |
13165 | return true; |
13166 | else if (!REG_P (x)) |
13167 | return false; |
13168 | else if (pic_offset_table_rtx) |
13169 | { |
13170 | if (REGNO (x) == REGNO (pic_offset_table_rtx)) |
13171 | return true; |
13172 | if (HARD_REGISTER_P (x) |
13173 | && !HARD_REGISTER_P (pic_offset_table_rtx) |
13174 | && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) |
13175 | return true; |
13176 | return false; |
13177 | } |
13178 | else |
13179 | return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; |
13180 | } |
13181 | |
13182 | /* Helper function for ix86_delegitimize_address. |
13183 | Attempt to delegitimize TLS local-exec accesses. */ |
13184 | |
13185 | static rtx |
13186 | ix86_delegitimize_tls_address (rtx orig_x) |
13187 | { |
13188 | rtx x = orig_x, unspec; |
13189 | struct ix86_address addr; |
13190 | |
13191 | if (!TARGET_TLS_DIRECT_SEG_REFS) |
13192 | return orig_x; |
13193 | if (MEM_P (x)) |
13194 | x = XEXP (x, 0); |
13195 | if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) |
13196 | return orig_x; |
13197 | if (ix86_decompose_address (addr: x, out: &addr) == 0 |
13198 | || addr.seg != DEFAULT_TLS_SEG_REG |
13199 | || addr.disp == NULL_RTX |
13200 | || GET_CODE (addr.disp) != CONST) |
13201 | return orig_x; |
13202 | unspec = XEXP (addr.disp, 0); |
13203 | if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) |
13204 | unspec = XEXP (unspec, 0); |
13205 | if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) |
13206 | return orig_x; |
13207 | x = XVECEXP (unspec, 0, 0); |
13208 | gcc_assert (GET_CODE (x) == SYMBOL_REF); |
13209 | if (unspec != XEXP (addr.disp, 0)) |
13210 | x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); |
13211 | if (addr.index) |
13212 | { |
13213 | rtx idx = addr.index; |
13214 | if (addr.scale != 1) |
13215 | idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); |
13216 | x = gen_rtx_PLUS (Pmode, idx, x); |
13217 | } |
13218 | if (addr.base) |
13219 | x = gen_rtx_PLUS (Pmode, addr.base, x); |
13220 | if (MEM_P (orig_x)) |
13221 | x = replace_equiv_address_nv (orig_x, x); |
13222 | return x; |
13223 | } |
13224 | |
13225 | /* In the name of slightly smaller debug output, and to cater to |
13226 | general assembler lossage, recognize PIC+GOTOFF and turn it back |
13227 | into a direct symbol reference. |
13228 | |
13229 | On Darwin, this is necessary to avoid a crash, because Darwin |
13230 | has a different PIC label for each routine but the DWARF debugging |
13231 | information is not associated with any particular routine, so it's |
13232 | necessary to remove references to the PIC label from RTL stored by |
13233 | the DWARF output code. |
13234 | |
13235 | This helper is used in the normal ix86_delegitimize_address |
13236 | entrypoint (e.g. used in the target delegitimization hook) and |
13237 | in ix86_find_base_term. As compile time memory optimization, we |
13238 | avoid allocating rtxes that will not change anything on the outcome |
13239 | of the callers (find_base_value and find_base_term). */ |
13240 | |
13241 | static inline rtx |
13242 | ix86_delegitimize_address_1 (rtx x, bool base_term_p) |
13243 | { |
13244 | rtx orig_x = delegitimize_mem_from_attrs (x); |
13245 | /* addend is NULL or some rtx if x is something+GOTOFF where |
13246 | something doesn't include the PIC register. */ |
13247 | rtx addend = NULL_RTX; |
13248 | /* reg_addend is NULL or a multiple of some register. */ |
13249 | rtx reg_addend = NULL_RTX; |
13250 | /* const_addend is NULL or a const_int. */ |
13251 | rtx const_addend = NULL_RTX; |
13252 | /* This is the result, or NULL. */ |
13253 | rtx result = NULL_RTX; |
13254 | |
13255 | x = orig_x; |
13256 | |
13257 | if (MEM_P (x)) |
13258 | x = XEXP (x, 0); |
13259 | |
13260 | if (TARGET_64BIT) |
13261 | { |
13262 | if (GET_CODE (x) == CONST |
13263 | && GET_CODE (XEXP (x, 0)) == PLUS |
13264 | && GET_MODE (XEXP (x, 0)) == Pmode |
13265 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
13266 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC |
13267 | && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) |
13268 | { |
13269 | /* find_base_{value,term} only care about MEMs with arg_pointer_rtx |
13270 | base. A CONST can't be arg_pointer_rtx based. */ |
13271 | if (base_term_p && MEM_P (orig_x)) |
13272 | return orig_x; |
13273 | rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); |
13274 | x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); |
13275 | if (MEM_P (orig_x)) |
13276 | x = replace_equiv_address_nv (orig_x, x); |
13277 | return x; |
13278 | } |
13279 | |
13280 | if (GET_CODE (x) == CONST |
13281 | && GET_CODE (XEXP (x, 0)) == UNSPEC |
13282 | && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL |
13283 | || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) |
13284 | && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) |
13285 | { |
13286 | x = XVECEXP (XEXP (x, 0), 0, 0); |
13287 | if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) |
13288 | { |
13289 | x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x)); |
13290 | if (x == NULL_RTX) |
13291 | return orig_x; |
13292 | } |
13293 | return x; |
13294 | } |
13295 | |
13296 | if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) |
13297 | return ix86_delegitimize_tls_address (orig_x); |
13298 | |
13299 | /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic |
13300 | and -mcmodel=medium -fpic. */ |
13301 | } |
13302 | |
13303 | if (GET_CODE (x) != PLUS |
13304 | || GET_CODE (XEXP (x, 1)) != CONST) |
13305 | return ix86_delegitimize_tls_address (orig_x); |
13306 | |
13307 | if (ix86_pic_register_p (XEXP (x, 0))) |
13308 | /* %ebx + GOT/GOTOFF */ |
13309 | ; |
13310 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
13311 | { |
13312 | /* %ebx + %reg * scale + GOT/GOTOFF */ |
13313 | reg_addend = XEXP (x, 0); |
13314 | if (ix86_pic_register_p (XEXP (reg_addend, 0))) |
13315 | reg_addend = XEXP (reg_addend, 1); |
13316 | else if (ix86_pic_register_p (XEXP (reg_addend, 1))) |
13317 | reg_addend = XEXP (reg_addend, 0); |
13318 | else |
13319 | { |
13320 | reg_addend = NULL_RTX; |
13321 | addend = XEXP (x, 0); |
13322 | } |
13323 | } |
13324 | else |
13325 | addend = XEXP (x, 0); |
13326 | |
13327 | x = XEXP (XEXP (x, 1), 0); |
13328 | if (GET_CODE (x) == PLUS |
13329 | && CONST_INT_P (XEXP (x, 1))) |
13330 | { |
13331 | const_addend = XEXP (x, 1); |
13332 | x = XEXP (x, 0); |
13333 | } |
13334 | |
13335 | if (GET_CODE (x) == UNSPEC |
13336 | && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) |
13337 | || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) |
13338 | || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC |
13339 | && !MEM_P (orig_x) && !addend))) |
13340 | result = XVECEXP (x, 0, 0); |
13341 | |
13342 | if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x) |
13343 | && !MEM_P (orig_x)) |
13344 | result = XVECEXP (x, 0, 0); |
13345 | |
13346 | if (! result) |
13347 | return ix86_delegitimize_tls_address (orig_x); |
13348 | |
13349 | /* For (PLUS something CONST_INT) both find_base_{value,term} just |
13350 | recurse on the first operand. */ |
13351 | if (const_addend && !base_term_p) |
13352 | result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); |
13353 | if (reg_addend) |
13354 | result = gen_rtx_PLUS (Pmode, reg_addend, result); |
13355 | if (addend) |
13356 | { |
13357 | /* If the rest of original X doesn't involve the PIC register, add |
13358 | addend and subtract pic_offset_table_rtx. This can happen e.g. |
13359 | for code like: |
13360 | leal (%ebx, %ecx, 4), %ecx |
13361 | ... |
13362 | movl foo@GOTOFF(%ecx), %edx |
13363 | in which case we return (%ecx - %ebx) + foo |
13364 | or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg |
13365 | and reload has completed. Don't do the latter for debug, |
13366 | as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ |
13367 | if (pic_offset_table_rtx |
13368 | && (!reload_completed || !ix86_use_pseudo_pic_reg ())) |
13369 | result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), |
13370 | pic_offset_table_rtx), |
13371 | result); |
13372 | else if (base_term_p |
13373 | && pic_offset_table_rtx |
13374 | && !TARGET_MACHO |
13375 | && !TARGET_VXWORKS_RTP) |
13376 | { |
13377 | rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
13378 | tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); |
13379 | result = gen_rtx_PLUS (Pmode, tmp, result); |
13380 | } |
13381 | else |
13382 | return orig_x; |
13383 | } |
13384 | if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) |
13385 | { |
13386 | result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode); |
13387 | if (result == NULL_RTX) |
13388 | return orig_x; |
13389 | } |
13390 | return result; |
13391 | } |
13392 | |
13393 | /* The normal instantiation of the above template. */ |
13394 | |
13395 | static rtx |
13396 | ix86_delegitimize_address (rtx x) |
13397 | { |
13398 | return ix86_delegitimize_address_1 (x, base_term_p: false); |
13399 | } |
13400 | |
13401 | /* If X is a machine specific address (i.e. a symbol or label being |
13402 | referenced as a displacement from the GOT implemented using an |
13403 | UNSPEC), then return the base term. Otherwise return X. */ |
13404 | |
13405 | rtx |
13406 | ix86_find_base_term (rtx x) |
13407 | { |
13408 | rtx term; |
13409 | |
13410 | if (TARGET_64BIT) |
13411 | { |
13412 | if (GET_CODE (x) != CONST) |
13413 | return x; |
13414 | term = XEXP (x, 0); |
13415 | if (GET_CODE (term) == PLUS |
13416 | && CONST_INT_P (XEXP (term, 1))) |
13417 | term = XEXP (term, 0); |
13418 | if (GET_CODE (term) != UNSPEC |
13419 | || (XINT (term, 1) != UNSPEC_GOTPCREL |
13420 | && XINT (term, 1) != UNSPEC_PCREL)) |
13421 | return x; |
13422 | |
13423 | return XVECEXP (term, 0, 0); |
13424 | } |
13425 | |
13426 | return ix86_delegitimize_address_1 (x, base_term_p: true); |
13427 | } |
13428 | |
13429 | /* Return true if X shouldn't be emitted into the debug info. |
13430 | Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ |
13431 | symbol easily into the .debug_info section, so we need not to |
13432 | delegitimize, but instead assemble as @gotoff. |
13433 | Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically |
13434 | assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ |
13435 | |
13436 | static bool |
13437 | ix86_const_not_ok_for_debug_p (rtx x) |
13438 | { |
13439 | if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) |
13440 | return true; |
13441 | |
13442 | if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) |
13443 | return true; |
13444 | |
13445 | return false; |
13446 | } |
13447 | |
13448 | static void |
13449 | put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, |
13450 | bool fp, FILE *file) |
13451 | { |
13452 | const char *suffix; |
13453 | |
13454 | if (mode == CCFPmode) |
13455 | { |
13456 | code = ix86_fp_compare_code_to_integer (code); |
13457 | mode = CCmode; |
13458 | } |
13459 | if (reverse) |
13460 | code = reverse_condition (code); |
13461 | |
13462 | switch (code) |
13463 | { |
13464 | case EQ: |
13465 | gcc_assert (mode != CCGZmode); |
13466 | switch (mode) |
13467 | { |
13468 | case E_CCAmode: |
13469 | suffix = "a"; |
13470 | break; |
13471 | case E_CCCmode: |
13472 | suffix = "c"; |
13473 | break; |
13474 | case E_CCOmode: |
13475 | suffix = "o"; |
13476 | break; |
13477 | case E_CCPmode: |
13478 | suffix = "p"; |
13479 | break; |
13480 | case E_CCSmode: |
13481 | suffix = "s"; |
13482 | break; |
13483 | default: |
13484 | suffix = "e"; |
13485 | break; |
13486 | } |
13487 | break; |
13488 | case NE: |
13489 | gcc_assert (mode != CCGZmode); |
13490 | switch (mode) |
13491 | { |
13492 | case E_CCAmode: |
13493 | suffix = "na"; |
13494 | break; |
13495 | case E_CCCmode: |
13496 | suffix = "nc"; |
13497 | break; |
13498 | case E_CCOmode: |
13499 | suffix = "no"; |
13500 | break; |
13501 | case E_CCPmode: |
13502 | suffix = "np"; |
13503 | break; |
13504 | case E_CCSmode: |
13505 | suffix = "ns"; |
13506 | break; |
13507 | default: |
13508 | suffix = "ne"; |
13509 | break; |
13510 | } |
13511 | break; |
13512 | case GT: |
13513 | gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); |
13514 | suffix = "g"; |
13515 | break; |
13516 | case GTU: |
13517 | /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. |
13518 | Those same assemblers have the same but opposite lossage on cmov. */ |
13519 | if (mode == CCmode) |
13520 | suffix = fp ? "nbe": "a"; |
13521 | else |
13522 | gcc_unreachable (); |
13523 | break; |
13524 | case LT: |
13525 | switch (mode) |
13526 | { |
13527 | case E_CCNOmode: |
13528 | case E_CCGOCmode: |
13529 | suffix = "s"; |
13530 | break; |
13531 | |
13532 | case E_CCmode: |
13533 | case E_CCGCmode: |
13534 | case E_CCGZmode: |
13535 | suffix = "l"; |
13536 | break; |
13537 | |
13538 | default: |
13539 | gcc_unreachable (); |
13540 | } |
13541 | break; |
13542 | case LTU: |
13543 | if (mode == CCmode || mode == CCGZmode) |
13544 | suffix = "b"; |
13545 | else if (mode == CCCmode) |
13546 | suffix = fp ? "b": "c"; |
13547 | else |
13548 | gcc_unreachable (); |
13549 | break; |
13550 | case GE: |
13551 | switch (mode) |
13552 | { |
13553 | case E_CCNOmode: |
13554 | case E_CCGOCmode: |
13555 | suffix = "ns"; |
13556 | break; |
13557 | |
13558 | case E_CCmode: |
13559 | case E_CCGCmode: |
13560 | case E_CCGZmode: |
13561 | suffix = "ge"; |
13562 | break; |
13563 | |
13564 | default: |
13565 | gcc_unreachable (); |
13566 | } |
13567 | break; |
13568 | case GEU: |
13569 | if (mode == CCmode || mode == CCGZmode) |
13570 | suffix = "nb"; |
13571 | else if (mode == CCCmode) |
13572 | suffix = fp ? "nb": "nc"; |
13573 | else |
13574 | gcc_unreachable (); |
13575 | break; |
13576 | case LE: |
13577 | gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); |
13578 | suffix = "le"; |
13579 | break; |
13580 | case LEU: |
13581 | if (mode == CCmode) |
13582 | suffix = "be"; |
13583 | else |
13584 | gcc_unreachable (); |
13585 | break; |
13586 | case UNORDERED: |
13587 | suffix = fp ? "u": "p"; |
13588 | break; |
13589 | case ORDERED: |
13590 | suffix = fp ? "nu": "np"; |
13591 | break; |
13592 | default: |
13593 | gcc_unreachable (); |
13594 | } |
13595 | fputs (s: suffix, stream: file); |
13596 | } |
13597 | |
13598 | /* Print the name of register X to FILE based on its machine mode and number. |
13599 | If CODE is 'w', pretend the mode is HImode. |
13600 | If CODE is 'b', pretend the mode is QImode. |
13601 | If CODE is 'k', pretend the mode is SImode. |
13602 | If CODE is 'q', pretend the mode is DImode. |
13603 | If CODE is 'x', pretend the mode is V4SFmode. |
13604 | If CODE is 't', pretend the mode is V8SFmode. |
13605 | If CODE is 'g', pretend the mode is V16SFmode. |
13606 | If CODE is 'h', pretend the reg is the 'high' byte register. |
13607 | If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. |
13608 | If CODE is 'd', duplicate the operand for AVX instruction. |
13609 | If CODE is 'V', print naked full integer register name without %. |
13610 | */ |
13611 | |
13612 | void |
13613 | print_reg (rtx x, int code, FILE *file) |
13614 | { |
13615 | const char *reg; |
13616 | int msize; |
13617 | unsigned int regno; |
13618 | bool duplicated; |
13619 | |
13620 | if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') |
13621 | putc (c: '%', stream: file); |
13622 | |
13623 | if (x == pc_rtx) |
13624 | { |
13625 | gcc_assert (TARGET_64BIT); |
13626 | fputs (s: "rip", stream: file); |
13627 | return; |
13628 | } |
13629 | |
13630 | if (code == 'y' && STACK_TOP_P (x)) |
13631 | { |
13632 | fputs (s: "st(0)", stream: file); |
13633 | return; |
13634 | } |
13635 | |
13636 | if (code == 'w') |
13637 | msize = 2; |
13638 | else if (code == 'b') |
13639 | msize = 1; |
13640 | else if (code == 'k') |
13641 | msize = 4; |
13642 | else if (code == 'q') |
13643 | msize = 8; |
13644 | else if (code == 'h') |
13645 | msize = 0; |
13646 | else if (code == 'x') |
13647 | msize = 16; |
13648 | else if (code == 't') |
13649 | msize = 32; |
13650 | else if (code == 'g') |
13651 | msize = 64; |
13652 | else |
13653 | msize = GET_MODE_SIZE (GET_MODE (x)); |
13654 | |
13655 | regno = REGNO (x); |
13656 | |
13657 | if (regno == ARG_POINTER_REGNUM |
13658 | || regno == FRAME_POINTER_REGNUM |
13659 | || regno == FPSR_REG) |
13660 | { |
13661 | output_operand_lossage |
13662 | ("invalid use of register '%s'", reg_names[regno]); |
13663 | return; |
13664 | } |
13665 | else if (regno == FLAGS_REG) |
13666 | { |
13667 | output_operand_lossage ("invalid use of asm flag output"); |
13668 | return; |
13669 | } |
13670 | |
13671 | if (code == 'V') |
13672 | { |
13673 | if (GENERAL_REGNO_P (regno)) |
13674 | msize = GET_MODE_SIZE (word_mode); |
13675 | else |
13676 | error ("%<V%> modifier on non-integer register"); |
13677 | } |
13678 | |
13679 | duplicated = code == 'd' && TARGET_AVX; |
13680 | |
13681 | switch (msize) |
13682 | { |
13683 | case 16: |
13684 | case 12: |
13685 | case 8: |
13686 | if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) |
13687 | warning (0, "unsupported size for integer register"); |
13688 | /* FALLTHRU */ |
13689 | case 4: |
13690 | if (LEGACY_INT_REGNO_P (regno)) |
13691 | putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file); |
13692 | /* FALLTHRU */ |
13693 | case 2: |
13694 | normal: |
13695 | reg = hi_reg_name[regno]; |
13696 | break; |
13697 | case 1: |
13698 | if (regno >= ARRAY_SIZE (qi_reg_name)) |
13699 | goto normal; |
13700 | if (!ANY_QI_REGNO_P (regno)) |
13701 | error ("unsupported size for integer register"); |
13702 | reg = qi_reg_name[regno]; |
13703 | break; |
13704 | case 0: |
13705 | if (regno >= ARRAY_SIZE (qi_high_reg_name)) |
13706 | goto normal; |
13707 | reg = qi_high_reg_name[regno]; |
13708 | break; |
13709 | case 32: |
13710 | case 64: |
13711 | if (SSE_REGNO_P (regno)) |
13712 | { |
13713 | gcc_assert (!duplicated); |
13714 | putc (c: msize == 32 ? 'y' : 'z', stream: file); |
13715 | reg = hi_reg_name[regno] + 1; |
13716 | break; |
13717 | } |
13718 | goto normal; |
13719 | default: |
13720 | gcc_unreachable (); |
13721 | } |
13722 | |
13723 | fputs (s: reg, stream: file); |
13724 | |
13725 | /* Irritatingly, AMD extended registers use |
13726 | different naming convention: "r%d[bwd]" */ |
13727 | if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
13728 | { |
13729 | gcc_assert (TARGET_64BIT); |
13730 | switch (msize) |
13731 | { |
13732 | case 0: |
13733 | error ("extended registers have no high halves"); |
13734 | break; |
13735 | case 1: |
13736 | putc (c: 'b', stream: file); |
13737 | break; |
13738 | case 2: |
13739 | putc (c: 'w', stream: file); |
13740 | break; |
13741 | case 4: |
13742 | putc (c: 'd', stream: file); |
13743 | break; |
13744 | case 8: |
13745 | /* no suffix */ |
13746 | break; |
13747 | default: |
13748 | error ("unsupported operand size for extended register"); |
13749 | break; |
13750 | } |
13751 | return; |
13752 | } |
13753 | |
13754 | if (duplicated) |
13755 | { |
13756 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13757 | fprintf (stream: file, format: ", %%%s", reg); |
13758 | else |
13759 | fprintf (stream: file, format: ", %s", reg); |
13760 | } |
13761 | } |
13762 | |
13763 | /* Meaning of CODE: |
13764 | L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
13765 | C -- print opcode suffix for set/cmov insn. |
13766 | c -- like C, but print reversed condition |
13767 | F,f -- likewise, but for floating-point. |
13768 | O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
13769 | otherwise nothing |
13770 | R -- print embedded rounding and sae. |
13771 | r -- print only sae. |
13772 | z -- print the opcode suffix for the size of the current operand. |
13773 | Z -- likewise, with special suffixes for x87 instructions. |
13774 | * -- print a star (in certain assembler syntax) |
13775 | A -- print an absolute memory reference. |
13776 | E -- print address with DImode register names if TARGET_64BIT. |
13777 | w -- print the operand as if it's a "word" (HImode) even if it isn't. |
13778 | s -- print a shift double count, followed by the assemblers argument |
13779 | delimiter. |
13780 | b -- print the QImode name of the register for the indicated operand. |
13781 | %b0 would print %al if operands[0] is reg 0. |
13782 | w -- likewise, print the HImode name of the register. |
13783 | k -- likewise, print the SImode name of the register. |
13784 | q -- likewise, print the DImode name of the register. |
13785 | x -- likewise, print the V4SFmode name of the register. |
13786 | t -- likewise, print the V8SFmode name of the register. |
13787 | g -- likewise, print the V16SFmode name of the register. |
13788 | h -- print the QImode name for a "high" register, either ah, bh, ch or dh. |
13789 | y -- print "st(0)" instead of "st" as a register. |
13790 | d -- print duplicated register operand for AVX instruction. |
13791 | D -- print condition for SSE cmp instruction. |
13792 | P -- if PIC, print an @PLT suffix. For -fno-plt, load function |
13793 | address from GOT. |
13794 | p -- print raw symbol name. |
13795 | X -- don't print any sort of PIC '@' suffix for a symbol. |
13796 | & -- print some in-use local-dynamic symbol name. |
13797 | H -- print a memory address offset by 8; used for sse high-parts |
13798 | Y -- print condition for XOP pcom* instruction. |
13799 | V -- print naked full integer register name without %. |
13800 | v -- print segment override prefix |
13801 | + -- print a branch hint as 'cs' or 'ds' prefix |
13802 | ; -- print a semicolon (after prefixes due to bug in older gas). |
13803 | ~ -- print "i" if TARGET_AVX2, "f" otherwise. |
13804 | ^ -- print addr32 prefix if Pmode != word_mode |
13805 | M -- print addr32 prefix for TARGET_X32 with VSIB address. |
13806 | ! -- print NOTRACK prefix for jxx/call/ret instructions if required. |
13807 | N -- print maskz if it's constant 0 operand. |
13808 | G -- print embedded flag for ccmp/ctest. |
13809 | */ |
13810 | |
13811 | void |
13812 | ix86_print_operand (FILE *file, rtx x, int code) |
13813 | { |
13814 | if (code) |
13815 | { |
13816 | switch (code) |
13817 | { |
13818 | case 'A': |
13819 | switch (ASSEMBLER_DIALECT) |
13820 | { |
13821 | case ASM_ATT: |
13822 | putc (c: '*', stream: file); |
13823 | break; |
13824 | |
13825 | case ASM_INTEL: |
13826 | /* Intel syntax. For absolute addresses, registers should not |
13827 | be surrounded by braces. */ |
13828 | if (!REG_P (x)) |
13829 | { |
13830 | putc (c: '[', stream: file); |
13831 | ix86_print_operand (file, x, code: 0); |
13832 | putc (c: ']', stream: file); |
13833 | return; |
13834 | } |
13835 | break; |
13836 | |
13837 | default: |
13838 | gcc_unreachable (); |
13839 | } |
13840 | |
13841 | ix86_print_operand (file, x, code: 0); |
13842 | return; |
13843 | |
13844 | case 'E': |
13845 | /* Wrap address in an UNSPEC to declare special handling. */ |
13846 | if (TARGET_64BIT) |
13847 | x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); |
13848 | |
13849 | output_address (VOIDmode, x); |
13850 | return; |
13851 | |
13852 | case 'L': |
13853 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13854 | putc (c: 'l', stream: file); |
13855 | return; |
13856 | |
13857 | case 'W': |
13858 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13859 | putc (c: 'w', stream: file); |
13860 | return; |
13861 | |
13862 | case 'B': |
13863 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13864 | putc (c: 'b', stream: file); |
13865 | return; |
13866 | |
13867 | case 'Q': |
13868 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13869 | putc (c: 'l', stream: file); |
13870 | return; |
13871 | |
13872 | case 'S': |
13873 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13874 | putc (c: 's', stream: file); |
13875 | return; |
13876 | |
13877 | case 'T': |
13878 | if (ASSEMBLER_DIALECT == ASM_ATT) |
13879 | putc (c: 't', stream: file); |
13880 | return; |
13881 | |
13882 | case 'O': |
13883 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
13884 | if (ASSEMBLER_DIALECT != ASM_ATT) |
13885 | return; |
13886 | |
13887 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13888 | { |
13889 | case 2: |
13890 | putc ('w', file); |
13891 | break; |
13892 | |
13893 | case 4: |
13894 | putc ('l', file); |
13895 | break; |
13896 | |
13897 | case 8: |
13898 | putc ('q', file); |
13899 | break; |
13900 | |
13901 | default: |
13902 | output_operand_lossage ("invalid operand size for operand " |
13903 | "code 'O'"); |
13904 | return; |
13905 | } |
13906 | |
13907 | putc ('.', file); |
13908 | #endif |
13909 | return; |
13910 | |
13911 | case 'z': |
13912 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
13913 | { |
13914 | /* Opcodes don't get size suffixes if using Intel opcodes. */ |
13915 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
13916 | return; |
13917 | |
13918 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13919 | { |
13920 | case 1: |
13921 | putc (c: 'b', stream: file); |
13922 | return; |
13923 | |
13924 | case 2: |
13925 | putc (c: 'w', stream: file); |
13926 | return; |
13927 | |
13928 | case 4: |
13929 | putc (c: 'l', stream: file); |
13930 | return; |
13931 | |
13932 | case 8: |
13933 | putc (c: 'q', stream: file); |
13934 | return; |
13935 | |
13936 | default: |
13937 | output_operand_lossage ("invalid operand size for operand " |
13938 | "code 'z'"); |
13939 | return; |
13940 | } |
13941 | } |
13942 | |
13943 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
13944 | { |
13945 | if (this_is_asm_operands) |
13946 | warning_for_asm (this_is_asm_operands, |
13947 | "non-integer operand used with operand code %<z%>"); |
13948 | else |
13949 | warning (0, "non-integer operand used with operand code %<z%>"); |
13950 | } |
13951 | /* FALLTHRU */ |
13952 | |
13953 | case 'Z': |
13954 | /* 387 opcodes don't get size suffixes if using Intel opcodes. */ |
13955 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
13956 | return; |
13957 | |
13958 | if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
13959 | { |
13960 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13961 | { |
13962 | case 2: |
13963 | #ifdef HAVE_AS_IX86_FILDS |
13964 | putc (c: 's', stream: file); |
13965 | #endif |
13966 | return; |
13967 | |
13968 | case 4: |
13969 | putc (c: 'l', stream: file); |
13970 | return; |
13971 | |
13972 | case 8: |
13973 | #ifdef HAVE_AS_IX86_FILDQ |
13974 | putc (c: 'q', stream: file); |
13975 | #else |
13976 | fputs ("ll", file); |
13977 | #endif |
13978 | return; |
13979 | |
13980 | default: |
13981 | break; |
13982 | } |
13983 | } |
13984 | else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
13985 | { |
13986 | /* 387 opcodes don't get size suffixes |
13987 | if the operands are registers. */ |
13988 | if (STACK_REG_P (x)) |
13989 | return; |
13990 | |
13991 | switch (GET_MODE_SIZE (GET_MODE (x))) |
13992 | { |
13993 | case 4: |
13994 | putc (c: 's', stream: file); |
13995 | return; |
13996 | |
13997 | case 8: |
13998 | putc (c: 'l', stream: file); |
13999 | return; |
14000 | |
14001 | case 12: |
14002 | case 16: |
14003 | putc (c: 't', stream: file); |
14004 | return; |
14005 | |
14006 | default: |
14007 | break; |
14008 | } |
14009 | } |
14010 | else |
14011 | { |
14012 | output_operand_lossage ("invalid operand type used with " |
14013 | "operand code '%c'", code); |
14014 | return; |
14015 | } |
14016 | |
14017 | output_operand_lossage ("invalid operand size for operand code '%c'", |
14018 | code); |
14019 | return; |
14020 | |
14021 | case 'd': |
14022 | case 'b': |
14023 | case 'w': |
14024 | case 'k': |
14025 | case 'q': |
14026 | case 'h': |
14027 | case 't': |
14028 | case 'g': |
14029 | case 'y': |
14030 | case 'x': |
14031 | case 'X': |
14032 | case 'P': |
14033 | case 'p': |
14034 | case 'V': |
14035 | break; |
14036 | |
14037 | case 's': |
14038 | if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) |
14039 | { |
14040 | ix86_print_operand (file, x, code: 0); |
14041 | fputs (s: ", ", stream: file); |
14042 | } |
14043 | return; |
14044 | |
14045 | case 'Y': |
14046 | switch (GET_CODE (x)) |
14047 | { |
14048 | case NE: |
14049 | fputs (s: "neq", stream: file); |
14050 | break; |
14051 | case EQ: |
14052 | fputs (s: "eq", stream: file); |
14053 | break; |
14054 | case GE: |
14055 | case GEU: |
14056 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge": "unlt", stream: file); |
14057 | break; |
14058 | case GT: |
14059 | case GTU: |
14060 | fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt": "unle", stream: file); |
14061 | break; |
14062 | case LE: |
14063 | case LEU: |
14064 | fputs (s: "le", stream: file); |
14065 | break; |
14066 | case LT: |
14067 | case LTU: |
14068 | fputs (s: "lt", stream: file); |
14069 | break; |
14070 | case UNORDERED: |
14071 | fputs (s: "unord", stream: file); |
14072 | break; |
14073 | case ORDERED: |
14074 | fputs (s: "ord", stream: file); |
14075 | break; |
14076 | case UNEQ: |
14077 | fputs (s: "ueq", stream: file); |
14078 | break; |
14079 | case UNGE: |
14080 | fputs (s: "nlt", stream: file); |
14081 | break; |
14082 | case UNGT: |
14083 | fputs (s: "nle", stream: file); |
14084 | break; |
14085 | case UNLE: |
14086 | fputs (s: "ule", stream: file); |
14087 | break; |
14088 | case UNLT: |
14089 | fputs (s: "ult", stream: file); |
14090 | break; |
14091 | case LTGT: |
14092 | fputs (s: "une", stream: file); |
14093 | break; |
14094 | default: |
14095 | output_operand_lossage ("operand is not a condition code, " |
14096 | "invalid operand code 'Y'"); |
14097 | return; |
14098 | } |
14099 | return; |
14100 | |
14101 | case 'D': |
14102 | /* Little bit of braindamage here. The SSE compare instructions |
14103 | does use completely different names for the comparisons that the |
14104 | fp conditional moves. */ |
14105 | switch (GET_CODE (x)) |
14106 | { |
14107 | case UNEQ: |
14108 | if (TARGET_AVX) |
14109 | { |
14110 | fputs (s: "eq_us", stream: file); |
14111 | break; |
14112 | } |
14113 | /* FALLTHRU */ |
14114 | case EQ: |
14115 | fputs (s: "eq", stream: file); |
14116 | break; |
14117 | case UNLT: |
14118 | if (TARGET_AVX) |
14119 | { |
14120 | fputs (s: "nge", stream: file); |
14121 | break; |
14122 | } |
14123 | /* FALLTHRU */ |
14124 | case LT: |
14125 | fputs (s: "lt", stream: file); |
14126 | break; |
14127 | case UNLE: |
14128 | if (TARGET_AVX) |
14129 | { |
14130 | fputs (s: "ngt", stream: file); |
14131 | break; |
14132 | } |
14133 | /* FALLTHRU */ |
14134 | case LE: |
14135 | fputs (s: "le", stream: file); |
14136 | break; |
14137 | case UNORDERED: |
14138 | fputs (s: "unord", stream: file); |
14139 | break; |
14140 | case LTGT: |
14141 | if (TARGET_AVX) |
14142 | { |
14143 | fputs (s: "neq_oq", stream: file); |
14144 | break; |
14145 | } |
14146 | /* FALLTHRU */ |
14147 | case NE: |
14148 | fputs (s: "neq", stream: file); |
14149 | break; |
14150 | case GE: |
14151 | if (TARGET_AVX) |
14152 | { |
14153 | fputs (s: "ge", stream: file); |
14154 | break; |
14155 | } |
14156 | /* FALLTHRU */ |
14157 | case UNGE: |
14158 | fputs (s: "nlt", stream: file); |
14159 | break; |
14160 | case GT: |
14161 | if (TARGET_AVX) |
14162 | { |
14163 | fputs (s: "gt", stream: file); |
14164 | break; |
14165 | } |
14166 | /* FALLTHRU */ |
14167 | case UNGT: |
14168 | fputs (s: "nle", stream: file); |
14169 | break; |
14170 | case ORDERED: |
14171 | fputs (s: "ord", stream: file); |
14172 | break; |
14173 | default: |
14174 | output_operand_lossage ("operand is not a condition code, " |
14175 | "invalid operand code 'D'"); |
14176 | return; |
14177 | } |
14178 | return; |
14179 | |
14180 | case 'F': |
14181 | case 'f': |
14182 | #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
14183 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14184 | putc ('.', file); |
14185 | gcc_fallthrough (); |
14186 | #endif |
14187 | |
14188 | case 'C': |
14189 | case 'c': |
14190 | if (!COMPARISON_P (x)) |
14191 | { |
14192 | output_operand_lossage ("operand is not a condition code, " |
14193 | "invalid operand code '%c'", code); |
14194 | return; |
14195 | } |
14196 | put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), |
14197 | reverse: code == 'c' || code == 'f', |
14198 | fp: code == 'F' || code == 'f', |
14199 | file); |
14200 | return; |
14201 | |
14202 | case 'G': |
14203 | { |
14204 | int dfv = INTVAL (x); |
14205 | const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv]; |
14206 | fputs (s: dfv_suffix, stream: file); |
14207 | } |
14208 | return; |
14209 | |
14210 | case 'H': |
14211 | if (!offsettable_memref_p (x)) |
14212 | { |
14213 | output_operand_lossage ("operand is not an offsettable memory " |
14214 | "reference, invalid operand code 'H'"); |
14215 | return; |
14216 | } |
14217 | /* It doesn't actually matter what mode we use here, as we're |
14218 | only going to use this for printing. */ |
14219 | x = adjust_address_nv (x, DImode, 8); |
14220 | /* Output 'qword ptr' for intel assembler dialect. */ |
14221 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14222 | code = 'q'; |
14223 | break; |
14224 | |
14225 | case 'K': |
14226 | if (!CONST_INT_P (x)) |
14227 | { |
14228 | output_operand_lossage ("operand is not an integer, invalid " |
14229 | "operand code 'K'"); |
14230 | return; |
14231 | } |
14232 | |
14233 | if (INTVAL (x) & IX86_HLE_ACQUIRE) |
14234 | #ifdef HAVE_AS_IX86_HLE |
14235 | fputs (s: "xacquire ", stream: file); |
14236 | #else |
14237 | fputs ("\n"ASM_BYTE "0xf2\n\t", file); |
14238 | #endif |
14239 | else if (INTVAL (x) & IX86_HLE_RELEASE) |
14240 | #ifdef HAVE_AS_IX86_HLE |
14241 | fputs (s: "xrelease ", stream: file); |
14242 | #else |
14243 | fputs ("\n"ASM_BYTE "0xf3\n\t", file); |
14244 | #endif |
14245 | /* We do not want to print value of the operand. */ |
14246 | return; |
14247 | |
14248 | case 'N': |
14249 | if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) |
14250 | fputs (s: "{z}", stream: file); |
14251 | return; |
14252 | |
14253 | case 'r': |
14254 | if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) |
14255 | { |
14256 | output_operand_lossage ("operand is not a specific integer, " |
14257 | "invalid operand code 'r'"); |
14258 | return; |
14259 | } |
14260 | |
14261 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14262 | fputs (s: ", ", stream: file); |
14263 | |
14264 | fputs (s: "{sae}", stream: file); |
14265 | |
14266 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14267 | fputs (s: ", ", stream: file); |
14268 | |
14269 | return; |
14270 | |
14271 | case 'R': |
14272 | if (!CONST_INT_P (x)) |
14273 | { |
14274 | output_operand_lossage ("operand is not an integer, invalid " |
14275 | "operand code 'R'"); |
14276 | return; |
14277 | } |
14278 | |
14279 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
14280 | fputs (s: ", ", stream: file); |
14281 | |
14282 | switch (INTVAL (x)) |
14283 | { |
14284 | case ROUND_NEAREST_INT | ROUND_SAE: |
14285 | fputs (s: "{rn-sae}", stream: file); |
14286 | break; |
14287 | case ROUND_NEG_INF | ROUND_SAE: |
14288 | fputs (s: "{rd-sae}", stream: file); |
14289 | break; |
14290 | case ROUND_POS_INF | ROUND_SAE: |
14291 | fputs (s: "{ru-sae}", stream: file); |
14292 | break; |
14293 | case ROUND_ZERO | ROUND_SAE: |
14294 | fputs (s: "{rz-sae}", stream: file); |
14295 | break; |
14296 | default: |
14297 | output_operand_lossage ("operand is not a specific integer, " |
14298 | "invalid operand code 'R'"); |
14299 | } |
14300 | |
14301 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14302 | fputs (s: ", ", stream: file); |
14303 | |
14304 | return; |
14305 | |
14306 | case 'v': |
14307 | if (MEM_P (x)) |
14308 | { |
14309 | switch (MEM_ADDR_SPACE (x)) |
14310 | { |
14311 | case ADDR_SPACE_GENERIC: |
14312 | break; |
14313 | case ADDR_SPACE_SEG_FS: |
14314 | fputs (s: "fs ", stream: file); |
14315 | break; |
14316 | case ADDR_SPACE_SEG_GS: |
14317 | fputs (s: "gs ", stream: file); |
14318 | break; |
14319 | default: |
14320 | gcc_unreachable (); |
14321 | } |
14322 | } |
14323 | else |
14324 | output_operand_lossage ("operand is not a memory reference, " |
14325 | "invalid operand code 'v'"); |
14326 | return; |
14327 | |
14328 | case '*': |
14329 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14330 | putc (c: '*', stream: file); |
14331 | return; |
14332 | |
14333 | case '&': |
14334 | { |
14335 | const char *name = get_some_local_dynamic_name (); |
14336 | if (name == NULL) |
14337 | output_operand_lossage ("'%%&' used without any " |
14338 | "local dynamic TLS references"); |
14339 | else |
14340 | assemble_name (file, name); |
14341 | return; |
14342 | } |
14343 | |
14344 | case '+': |
14345 | { |
14346 | rtx x; |
14347 | |
14348 | if (!optimize |
14349 | || optimize_function_for_size_p (cfun) |
14350 | || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN |
14351 | && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN)) |
14352 | return; |
14353 | |
14354 | x = find_reg_note (current_output_insn, REG_BR_PROB, 0); |
14355 | if (x) |
14356 | { |
14357 | int pred_val = profile_probability::from_reg_br_prob_note |
14358 | (XINT (x, 0)).to_reg_br_prob_base (); |
14359 | |
14360 | bool taken = pred_val > REG_BR_PROB_BASE / 2; |
14361 | /* We use 3e (DS) prefix for taken branches and |
14362 | 2e (CS) prefix for not taken branches. */ |
14363 | if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN) |
14364 | fputs (s: "ds ; ", stream: file); |
14365 | else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN) |
14366 | fputs (s: "cs ; ", stream: file); |
14367 | } |
14368 | return; |
14369 | } |
14370 | |
14371 | case ';': |
14372 | #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX |
14373 | putc (';', file); |
14374 | #endif |
14375 | return; |
14376 | |
14377 | case '~': |
14378 | putc (TARGET_AVX2 ? 'i' : 'f', stream: file); |
14379 | return; |
14380 | |
14381 | case 'M': |
14382 | if (TARGET_X32) |
14383 | { |
14384 | /* NB: 32-bit indices in VSIB address are sign-extended |
14385 | to 64 bits. In x32, if 32-bit address 0xf7fa3010 is |
14386 | sign-extended to 0xfffffffff7fa3010 which is invalid |
14387 | address. Add addr32 prefix if there is no base |
14388 | register nor symbol. */ |
14389 | bool ok; |
14390 | struct ix86_address parts; |
14391 | ok = ix86_decompose_address (addr: x, out: &parts); |
14392 | gcc_assert (ok && parts.index == NULL_RTX); |
14393 | if (parts.base == NULL_RTX |
14394 | && (parts.disp == NULL_RTX |
14395 | || !symbolic_operand (parts.disp, |
14396 | GET_MODE (parts.disp)))) |
14397 | fputs (s: "addr32 ", stream: file); |
14398 | } |
14399 | return; |
14400 | |
14401 | case '^': |
14402 | if (Pmode != word_mode) |
14403 | fputs (s: "addr32 ", stream: file); |
14404 | return; |
14405 | |
14406 | case '!': |
14407 | if (ix86_notrack_prefixed_insn_p (current_output_insn)) |
14408 | fputs (s: "notrack ", stream: file); |
14409 | return; |
14410 | |
14411 | default: |
14412 | output_operand_lossage ("invalid operand code '%c'", code); |
14413 | } |
14414 | } |
14415 | |
14416 | if (REG_P (x)) |
14417 | print_reg (x, code, file); |
14418 | |
14419 | else if (MEM_P (x)) |
14420 | { |
14421 | rtx addr = XEXP (x, 0); |
14422 | |
14423 | /* No `byte ptr' prefix for call instructions ... */ |
14424 | if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') |
14425 | { |
14426 | machine_mode mode = GET_MODE (x); |
14427 | const char *size; |
14428 | |
14429 | /* Check for explicit size override codes. */ |
14430 | if (code == 'b') |
14431 | size = "BYTE"; |
14432 | else if (code == 'w') |
14433 | size = "WORD"; |
14434 | else if (code == 'k') |
14435 | size = "DWORD"; |
14436 | else if (code == 'q') |
14437 | size = "QWORD"; |
14438 | else if (code == 'x') |
14439 | size = "XMMWORD"; |
14440 | else if (code == 't') |
14441 | size = "YMMWORD"; |
14442 | else if (code == 'g') |
14443 | size = "ZMMWORD"; |
14444 | else if (mode == BLKmode) |
14445 | /* ... or BLKmode operands, when not overridden. */ |
14446 | size = NULL; |
14447 | else |
14448 | switch (GET_MODE_SIZE (mode)) |
14449 | { |
14450 | case 1: size = "BYTE"; break; |
14451 | case 2: size = "WORD"; break; |
14452 | case 4: size = "DWORD"; break; |
14453 | case 8: size = "QWORD"; break; |
14454 | case 12: size = "TBYTE"; break; |
14455 | case 16: |
14456 | if (mode == XFmode) |
14457 | size = "TBYTE"; |
14458 | else |
14459 | size = "XMMWORD"; |
14460 | break; |
14461 | case 32: size = "YMMWORD"; break; |
14462 | case 64: size = "ZMMWORD"; break; |
14463 | default: |
14464 | gcc_unreachable (); |
14465 | } |
14466 | if (size) |
14467 | { |
14468 | fputs (s: size, stream: file); |
14469 | fputs (s: " PTR ", stream: file); |
14470 | } |
14471 | } |
14472 | |
14473 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
14474 | output_operand_lossage ("invalid constraints for operand"); |
14475 | else |
14476 | ix86_print_operand_address_as |
14477 | (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); |
14478 | } |
14479 | |
14480 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode) |
14481 | { |
14482 | long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
14483 | REAL_MODE_FORMAT (HFmode)); |
14484 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14485 | putc (c: '$', stream: file); |
14486 | fprintf (stream: file, format: "0x%04x", (unsigned int) l); |
14487 | } |
14488 | |
14489 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) |
14490 | { |
14491 | long l; |
14492 | |
14493 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
14494 | |
14495 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14496 | putc (c: '$', stream: file); |
14497 | /* Sign extend 32bit SFmode immediate to 8 bytes. */ |
14498 | if (code == 'q') |
14499 | fprintf (stream: file, format: "0x%08"HOST_LONG_LONG_FORMAT "x", |
14500 | (unsigned long long) (int) l); |
14501 | else |
14502 | fprintf (stream: file, format: "0x%08x", (unsigned int) l); |
14503 | } |
14504 | |
14505 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) |
14506 | { |
14507 | long l[2]; |
14508 | |
14509 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); |
14510 | |
14511 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14512 | putc (c: '$', stream: file); |
14513 | fprintf (stream: file, format: "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff); |
14514 | } |
14515 | |
14516 | /* These float cases don't actually occur as immediate operands. */ |
14517 | else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) |
14518 | { |
14519 | char dstr[30]; |
14520 | |
14521 | real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
14522 | fputs (s: dstr, stream: file); |
14523 | } |
14524 | |
14525 | /* Print bcst_mem_operand. */ |
14526 | else if (GET_CODE (x) == VEC_DUPLICATE) |
14527 | { |
14528 | machine_mode vmode = GET_MODE (x); |
14529 | /* Must be bcst_memory_operand. */ |
14530 | gcc_assert (bcst_mem_operand (x, vmode)); |
14531 | |
14532 | rtx mem = XEXP (x,0); |
14533 | ix86_print_operand (file, x: mem, code: 0); |
14534 | |
14535 | switch (vmode) |
14536 | { |
14537 | case E_V2DImode: |
14538 | case E_V2DFmode: |
14539 | fputs (s: "{1to2}", stream: file); |
14540 | break; |
14541 | case E_V4SImode: |
14542 | case E_V4SFmode: |
14543 | case E_V4DImode: |
14544 | case E_V4DFmode: |
14545 | fputs (s: "{1to4}", stream: file); |
14546 | break; |
14547 | case E_V8SImode: |
14548 | case E_V8SFmode: |
14549 | case E_V8DFmode: |
14550 | case E_V8DImode: |
14551 | case E_V8HFmode: |
14552 | fputs (s: "{1to8}", stream: file); |
14553 | break; |
14554 | case E_V16SFmode: |
14555 | case E_V16SImode: |
14556 | case E_V16HFmode: |
14557 | fputs (s: "{1to16}", stream: file); |
14558 | break; |
14559 | case E_V32HFmode: |
14560 | fputs (s: "{1to32}", stream: file); |
14561 | break; |
14562 | default: |
14563 | gcc_unreachable (); |
14564 | } |
14565 | } |
14566 | |
14567 | else |
14568 | { |
14569 | /* We have patterns that allow zero sets of memory, for instance. |
14570 | In 64-bit mode, we should probably support all 8-byte vectors, |
14571 | since we can in fact encode that into an immediate. */ |
14572 | if (GET_CODE (x) == CONST_VECTOR) |
14573 | { |
14574 | if (x != CONST0_RTX (GET_MODE (x))) |
14575 | output_operand_lossage ("invalid vector immediate"); |
14576 | x = const0_rtx; |
14577 | } |
14578 | |
14579 | if (code == 'P') |
14580 | { |
14581 | if (ix86_force_load_from_GOT_p (x, call_p: true)) |
14582 | { |
14583 | /* For inline assembly statement, load function address |
14584 | from GOT with 'P' operand modifier to avoid PLT. */ |
14585 | x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
14586 | (TARGET_64BIT |
14587 | ? UNSPEC_GOTPCREL |
14588 | : UNSPEC_GOT)); |
14589 | x = gen_rtx_CONST (Pmode, x); |
14590 | x = gen_const_mem (Pmode, x); |
14591 | ix86_print_operand (file, x, code: 'A'); |
14592 | return; |
14593 | } |
14594 | } |
14595 | else if (code != 'p') |
14596 | { |
14597 | if (CONST_INT_P (x)) |
14598 | { |
14599 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14600 | putc (c: '$', stream: file); |
14601 | } |
14602 | else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF |
14603 | || GET_CODE (x) == LABEL_REF) |
14604 | { |
14605 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14606 | putc (c: '$', stream: file); |
14607 | else |
14608 | fputs (s: "OFFSET FLAT:", stream: file); |
14609 | } |
14610 | } |
14611 | if (CONST_INT_P (x)) |
14612 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); |
14613 | else if (flag_pic || MACHOPIC_INDIRECT) |
14614 | output_pic_addr_const (file, x, code); |
14615 | else |
14616 | output_addr_const (file, x); |
14617 | } |
14618 | } |
14619 | |
14620 | static bool |
14621 | ix86_print_operand_punct_valid_p (unsigned char code) |
14622 | { |
14623 | return (code == '*' || code == '+' || code == '&' || code == ';' |
14624 | || code == '~' || code == '^' || code == '!'); |
14625 | } |
14626 | |
14627 | /* Print a memory operand whose address is ADDR. */ |
14628 | |
14629 | static void |
14630 | ix86_print_operand_address_as (FILE *file, rtx addr, |
14631 | addr_space_t as, bool raw) |
14632 | { |
14633 | struct ix86_address parts; |
14634 | rtx base, index, disp; |
14635 | int scale; |
14636 | int ok; |
14637 | bool vsib = false; |
14638 | int code = 0; |
14639 | |
14640 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) |
14641 | { |
14642 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
14643 | gcc_assert (parts.index == NULL_RTX); |
14644 | parts.index = XVECEXP (addr, 0, 1); |
14645 | parts.scale = INTVAL (XVECEXP (addr, 0, 2)); |
14646 | addr = XVECEXP (addr, 0, 0); |
14647 | vsib = true; |
14648 | } |
14649 | else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) |
14650 | { |
14651 | gcc_assert (TARGET_64BIT); |
14652 | ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts); |
14653 | code = 'q'; |
14654 | } |
14655 | else |
14656 | ok = ix86_decompose_address (addr, out: &parts); |
14657 | |
14658 | gcc_assert (ok); |
14659 | |
14660 | base = parts.base; |
14661 | index = parts.index; |
14662 | disp = parts.disp; |
14663 | scale = parts.scale; |
14664 | |
14665 | if (ADDR_SPACE_GENERIC_P (as)) |
14666 | as = parts.seg; |
14667 | else |
14668 | gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); |
14669 | |
14670 | if (!ADDR_SPACE_GENERIC_P (as) && !raw) |
14671 | { |
14672 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14673 | putc (c: '%', stream: file); |
14674 | |
14675 | switch (as) |
14676 | { |
14677 | case ADDR_SPACE_SEG_FS: |
14678 | fputs (s: "fs:", stream: file); |
14679 | break; |
14680 | case ADDR_SPACE_SEG_GS: |
14681 | fputs (s: "gs:", stream: file); |
14682 | break; |
14683 | default: |
14684 | gcc_unreachable (); |
14685 | } |
14686 | } |
14687 | |
14688 | /* Use one byte shorter RIP relative addressing for 64bit mode. */ |
14689 | if (TARGET_64BIT && !base && !index && !raw) |
14690 | { |
14691 | rtx symbol = disp; |
14692 | |
14693 | if (GET_CODE (disp) == CONST |
14694 | && GET_CODE (XEXP (disp, 0)) == PLUS |
14695 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
14696 | symbol = XEXP (XEXP (disp, 0), 0); |
14697 | |
14698 | if (GET_CODE (symbol) == LABEL_REF |
14699 | || (GET_CODE (symbol) == SYMBOL_REF |
14700 | && SYMBOL_REF_TLS_MODEL (symbol) == 0)) |
14701 | base = pc_rtx; |
14702 | } |
14703 | |
14704 | if (!base && !index) |
14705 | { |
14706 | /* Displacement only requires special attention. */ |
14707 | if (CONST_INT_P (disp)) |
14708 | { |
14709 | if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) |
14710 | fputs (s: "ds:", stream: file); |
14711 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); |
14712 | } |
14713 | /* Load the external function address via the GOT slot to avoid PLT. */ |
14714 | else if (GET_CODE (disp) == CONST |
14715 | && GET_CODE (XEXP (disp, 0)) == UNSPEC |
14716 | && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL |
14717 | || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) |
14718 | && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) |
14719 | output_pic_addr_const (file, x: disp, code: 0); |
14720 | else if (flag_pic) |
14721 | output_pic_addr_const (file, x: disp, code: 0); |
14722 | else |
14723 | output_addr_const (file, disp); |
14724 | } |
14725 | else |
14726 | { |
14727 | /* Print SImode register names to force addr32 prefix. */ |
14728 | if (SImode_address_operand (addr, VOIDmode)) |
14729 | { |
14730 | if (flag_checking) |
14731 | { |
14732 | gcc_assert (TARGET_64BIT); |
14733 | switch (GET_CODE (addr)) |
14734 | { |
14735 | case SUBREG: |
14736 | gcc_assert (GET_MODE (addr) == SImode); |
14737 | gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); |
14738 | break; |
14739 | case ZERO_EXTEND: |
14740 | case AND: |
14741 | gcc_assert (GET_MODE (addr) == DImode); |
14742 | break; |
14743 | default: |
14744 | gcc_unreachable (); |
14745 | } |
14746 | } |
14747 | gcc_assert (!code); |
14748 | code = 'k'; |
14749 | } |
14750 | else if (code == 0 |
14751 | && TARGET_X32 |
14752 | && disp |
14753 | && CONST_INT_P (disp) |
14754 | && INTVAL (disp) < -16*1024*1024) |
14755 | { |
14756 | /* X32 runs in 64-bit mode, where displacement, DISP, in |
14757 | address DISP(%r64), is encoded as 32-bit immediate sign- |
14758 | extended from 32-bit to 64-bit. For -0x40000300(%r64), |
14759 | address is %r64 + 0xffffffffbffffd00. When %r64 < |
14760 | 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, |
14761 | which is invalid for x32. The correct address is %r64 |
14762 | - 0x40000300 == 0xf7ffdd64. To properly encode |
14763 | -0x40000300(%r64) for x32, we zero-extend negative |
14764 | displacement by forcing addr32 prefix which truncates |
14765 | 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should |
14766 | zero-extend all negative displacements, including -1(%rsp). |
14767 | However, for small negative displacements, sign-extension |
14768 | won't cause overflow. We only zero-extend negative |
14769 | displacements if they < -16*1024*1024, which is also used |
14770 | to check legitimate address displacements for PIC. */ |
14771 | code = 'k'; |
14772 | } |
14773 | |
14774 | /* Since the upper 32 bits of RSP are always zero for x32, |
14775 | we can encode %esp as %rsp to avoid 0x67 prefix if |
14776 | there is no index register. */ |
14777 | if (TARGET_X32 && Pmode == SImode |
14778 | && !index && base && REG_P (base) && REGNO (base) == SP_REG) |
14779 | code = 'q'; |
14780 | |
14781 | if (ASSEMBLER_DIALECT == ASM_ATT) |
14782 | { |
14783 | if (disp) |
14784 | { |
14785 | if (flag_pic) |
14786 | output_pic_addr_const (file, x: disp, code: 0); |
14787 | else if (GET_CODE (disp) == LABEL_REF) |
14788 | output_asm_label (disp); |
14789 | else |
14790 | output_addr_const (file, disp); |
14791 | } |
14792 | |
14793 | putc (c: '(', stream: file); |
14794 | if (base) |
14795 | print_reg (x: base, code, file); |
14796 | if (index) |
14797 | { |
14798 | putc (c: ',', stream: file); |
14799 | print_reg (x: index, code: vsib ? 0 : code, file); |
14800 | if (scale != 1 || vsib) |
14801 | fprintf (stream: file, format: ",%d", scale); |
14802 | } |
14803 | putc (c: ')', stream: file); |
14804 | } |
14805 | else |
14806 | { |
14807 | rtx offset = NULL_RTX; |
14808 | |
14809 | if (disp) |
14810 | { |
14811 | /* Pull out the offset of a symbol; print any symbol itself. */ |
14812 | if (GET_CODE (disp) == CONST |
14813 | && GET_CODE (XEXP (disp, 0)) == PLUS |
14814 | && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) |
14815 | { |
14816 | offset = XEXP (XEXP (disp, 0), 1); |
14817 | disp = gen_rtx_CONST (VOIDmode, |
14818 | XEXP (XEXP (disp, 0), 0)); |
14819 | } |
14820 | |
14821 | if (flag_pic) |
14822 | output_pic_addr_const (file, x: disp, code: 0); |
14823 | else if (GET_CODE (disp) == LABEL_REF) |
14824 | output_asm_label (disp); |
14825 | else if (CONST_INT_P (disp)) |
14826 | offset = disp; |
14827 | else |
14828 | output_addr_const (file, disp); |
14829 | } |
14830 | |
14831 | putc (c: '[', stream: file); |
14832 | if (base) |
14833 | { |
14834 | print_reg (x: base, code, file); |
14835 | if (offset) |
14836 | { |
14837 | if (INTVAL (offset) >= 0) |
14838 | putc (c: '+', stream: file); |
14839 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
14840 | } |
14841 | } |
14842 | else if (offset) |
14843 | fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
14844 | else |
14845 | putc (c: '0', stream: file); |
14846 | |
14847 | if (index) |
14848 | { |
14849 | putc (c: '+', stream: file); |
14850 | print_reg (x: index, code: vsib ? 0 : code, file); |
14851 | if (scale != 1 || vsib) |
14852 | fprintf (stream: file, format: "*%d", scale); |
14853 | } |
14854 | putc (c: ']', stream: file); |
14855 | } |
14856 | } |
14857 | } |
14858 | |
14859 | static void |
14860 | ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) |
14861 | { |
14862 | if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) |
14863 | output_operand_lossage ("invalid constraints for operand"); |
14864 | else |
14865 | ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false); |
14866 | } |
14867 | |
14868 | /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ |
14869 | |
14870 | static bool |
14871 | i386_asm_output_addr_const_extra (FILE *file, rtx x) |
14872 | { |
14873 | rtx op; |
14874 | |
14875 | if (GET_CODE (x) != UNSPEC) |
14876 | return false; |
14877 | |
14878 | op = XVECEXP (x, 0, 0); |
14879 | switch (XINT (x, 1)) |
14880 | { |
14881 | case UNSPEC_GOTOFF: |
14882 | output_addr_const (file, op); |
14883 | fputs (s: "@gotoff", stream: file); |
14884 | break; |
14885 | case UNSPEC_GOTTPOFF: |
14886 | output_addr_const (file, op); |
14887 | /* FIXME: This might be @TPOFF in Sun ld. */ |
14888 | fputs (s: "@gottpoff", stream: file); |
14889 | break; |
14890 | case UNSPEC_TPOFF: |
14891 | output_addr_const (file, op); |
14892 | fputs (s: "@tpoff", stream: file); |
14893 | break; |
14894 | case UNSPEC_NTPOFF: |
14895 | output_addr_const (file, op); |
14896 | if (TARGET_64BIT) |
14897 | fputs (s: "@tpoff", stream: file); |
14898 | else |
14899 | fputs (s: "@ntpoff", stream: file); |
14900 | break; |
14901 | case UNSPEC_DTPOFF: |
14902 | output_addr_const (file, op); |
14903 | fputs (s: "@dtpoff", stream: file); |
14904 | break; |
14905 | case UNSPEC_GOTNTPOFF: |
14906 | output_addr_const (file, op); |
14907 | if (TARGET_64BIT) |
14908 | fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
14909 | "@gottpoff(%rip)": "@gottpoff[rip]", stream: file); |
14910 | else |
14911 | fputs (s: "@gotntpoff", stream: file); |
14912 | break; |
14913 | case UNSPEC_INDNTPOFF: |
14914 | output_addr_const (file, op); |
14915 | fputs (s: "@indntpoff", stream: file); |
14916 | break; |
14917 | case UNSPEC_SECREL32: |
14918 | output_addr_const (file, op); |
14919 | fputs (s: "@secrel32", stream: file); |
14920 | break; |
14921 | #if TARGET_MACHO |
14922 | case UNSPEC_MACHOPIC_OFFSET: |
14923 | output_addr_const (file, op); |
14924 | putc ('-', file); |
14925 | machopic_output_function_base_name (file); |
14926 | break; |
14927 | #endif |
14928 | |
14929 | default: |
14930 | return false; |
14931 | } |
14932 | |
14933 | return true; |
14934 | } |
14935 | |
14936 | |
14937 | /* Output code to perform a 387 binary operation in INSN, one of PLUS, |
14938 | MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] |
14939 | is the expression of the binary operation. The output may either be |
14940 | emitted here, or returned to the caller, like all output_* functions. |
14941 | |
14942 | There is no guarantee that the operands are the same mode, as they |
14943 | might be within FLOAT or FLOAT_EXTEND expressions. */ |
14944 | |
14945 | #ifndef SYSV386_COMPAT |
14946 | /* Set to 1 for compatibility with brain-damaged assemblers. No-one |
14947 | wants to fix the assemblers because that causes incompatibility |
14948 | with gcc. No-one wants to fix gcc because that causes |
14949 | incompatibility with assemblers... You can use the option of |
14950 | -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ |
14951 | #define SYSV386_COMPAT 1 |
14952 | #endif |
14953 | |
14954 | const char * |
14955 | output_387_binary_op (rtx_insn *insn, rtx *operands) |
14956 | { |
14957 | static char buf[40]; |
14958 | const char *p; |
14959 | bool is_sse |
14960 | = (SSE_REG_P (operands[0]) |
14961 | || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); |
14962 | |
14963 | if (is_sse) |
14964 | p = "%v"; |
14965 | else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
14966 | || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
14967 | p = "fi"; |
14968 | else |
14969 | p = "f"; |
14970 | |
14971 | strcpy (dest: buf, src: p); |
14972 | |
14973 | switch (GET_CODE (operands[3])) |
14974 | { |
14975 | case PLUS: |
14976 | p = "add"; break; |
14977 | case MINUS: |
14978 | p = "sub"; break; |
14979 | case MULT: |
14980 | p = "mul"; break; |
14981 | case DIV: |
14982 | p = "div"; break; |
14983 | default: |
14984 | gcc_unreachable (); |
14985 | } |
14986 | |
14987 | strcat (dest: buf, src: p); |
14988 | |
14989 | if (is_sse) |
14990 | { |
14991 | p = GET_MODE (operands[0]) == SFmode ? "ss": "sd"; |
14992 | strcat (dest: buf, src: p); |
14993 | |
14994 | if (TARGET_AVX) |
14995 | p = "\t{%2, %1, %0|%0, %1, %2}"; |
14996 | else |
14997 | p = "\t{%2, %0|%0, %2}"; |
14998 | |
14999 | strcat (dest: buf, src: p); |
15000 | return buf; |
15001 | } |
15002 | |
15003 | /* Even if we do not want to check the inputs, this documents input |
15004 | constraints. Which helps in understanding the following code. */ |
15005 | if (flag_checking) |
15006 | { |
15007 | if (STACK_REG_P (operands[0]) |
15008 | && ((REG_P (operands[1]) |
15009 | && REGNO (operands[0]) == REGNO (operands[1]) |
15010 | && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) |
15011 | || (REG_P (operands[2]) |
15012 | && REGNO (operands[0]) == REGNO (operands[2]) |
15013 | && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) |
15014 | && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) |
15015 | ; /* ok */ |
15016 | else |
15017 | gcc_unreachable (); |
15018 | } |
15019 | |
15020 | switch (GET_CODE (operands[3])) |
15021 | { |
15022 | case MULT: |
15023 | case PLUS: |
15024 | if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) |
15025 | std::swap (a&: operands[1], b&: operands[2]); |
15026 | |
15027 | /* know operands[0] == operands[1]. */ |
15028 | |
15029 | if (MEM_P (operands[2])) |
15030 | { |
15031 | p = "%Z2\t%2"; |
15032 | break; |
15033 | } |
15034 | |
15035 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
15036 | { |
15037 | if (STACK_TOP_P (operands[0])) |
15038 | /* How is it that we are storing to a dead operand[2]? |
15039 | Well, presumably operands[1] is dead too. We can't |
15040 | store the result to st(0) as st(0) gets popped on this |
15041 | instruction. Instead store to operands[2] (which I |
15042 | think has to be st(1)). st(1) will be popped later. |
15043 | gcc <= 2.8.1 didn't have this check and generated |
15044 | assembly code that the Unixware assembler rejected. */ |
15045 | p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ |
15046 | else |
15047 | p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ |
15048 | break; |
15049 | } |
15050 | |
15051 | if (STACK_TOP_P (operands[0])) |
15052 | p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ |
15053 | else |
15054 | p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ |
15055 | break; |
15056 | |
15057 | case MINUS: |
15058 | case DIV: |
15059 | if (MEM_P (operands[1])) |
15060 | { |
15061 | p = "r%Z1\t%1"; |
15062 | break; |
15063 | } |
15064 | |
15065 | if (MEM_P (operands[2])) |
15066 | { |
15067 | p = "%Z2\t%2"; |
15068 | break; |
15069 | } |
15070 | |
15071 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
15072 | { |
15073 | #if SYSV386_COMPAT |
15074 | /* The SystemV/386 SVR3.2 assembler, and probably all AT&T |
15075 | derived assemblers, confusingly reverse the direction of |
15076 | the operation for fsub{r} and fdiv{r} when the |
15077 | destination register is not st(0). The Intel assembler |
15078 | doesn't have this brain damage. Read !SYSV386_COMPAT to |
15079 | figure out what the hardware really does. */ |
15080 | if (STACK_TOP_P (operands[0])) |
15081 | p = "{p\t%0, %2|rp\t%2, %0}"; |
15082 | else |
15083 | p = "{rp\t%2, %0|p\t%0, %2}"; |
15084 | #else |
15085 | if (STACK_TOP_P (operands[0])) |
15086 | /* As above for fmul/fadd, we can't store to st(0). */ |
15087 | p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ |
15088 | else |
15089 | p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ |
15090 | #endif |
15091 | break; |
15092 | } |
15093 | |
15094 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
15095 | { |
15096 | #if SYSV386_COMPAT |
15097 | if (STACK_TOP_P (operands[0])) |
15098 | p = "{rp\t%0, %1|p\t%1, %0}"; |
15099 | else |
15100 | p = "{p\t%1, %0|rp\t%0, %1}"; |
15101 | #else |
15102 | if (STACK_TOP_P (operands[0])) |
15103 | p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ |
15104 | else |
15105 | p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ |
15106 | #endif |
15107 | break; |
15108 | } |
15109 | |
15110 | if (STACK_TOP_P (operands[0])) |
15111 | { |
15112 | if (STACK_TOP_P (operands[1])) |
15113 | p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ |
15114 | else |
15115 | p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ |
15116 | break; |
15117 | } |
15118 | else if (STACK_TOP_P (operands[1])) |
15119 | { |
15120 | #if SYSV386_COMPAT |
15121 | p = "{\t%1, %0|r\t%0, %1}"; |
15122 | #else |
15123 | p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ |
15124 | #endif |
15125 | } |
15126 | else |
15127 | { |
15128 | #if SYSV386_COMPAT |
15129 | p = "{r\t%2, %0|\t%0, %2}"; |
15130 | #else |
15131 | p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ |
15132 | #endif |
15133 | } |
15134 | break; |
15135 | |
15136 | default: |
15137 | gcc_unreachable (); |
15138 | } |
15139 | |
15140 | strcat (dest: buf, src: p); |
15141 | return buf; |
15142 | } |
15143 | |
15144 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
15145 | |
15146 | static int |
15147 | ix86_dirflag_mode_needed (rtx_insn *insn) |
15148 | { |
15149 | if (CALL_P (insn)) |
15150 | { |
15151 | if (cfun->machine->func_type == TYPE_NORMAL) |
15152 | return X86_DIRFLAG_ANY; |
15153 | else |
15154 | /* No need to emit CLD in interrupt handler for TARGET_CLD. */ |
15155 | return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; |
15156 | } |
15157 | |
15158 | if (recog_memoized (insn) < 0) |
15159 | return X86_DIRFLAG_ANY; |
15160 | |
15161 | if (get_attr_type (insn) == TYPE_STR) |
15162 | { |
15163 | /* Emit cld instruction if stringops are used in the function. */ |
15164 | if (cfun->machine->func_type == TYPE_NORMAL) |
15165 | return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; |
15166 | else |
15167 | return X86_DIRFLAG_RESET; |
15168 | } |
15169 | |
15170 | return X86_DIRFLAG_ANY; |
15171 | } |
15172 | |
15173 | /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ |
15174 | |
15175 | static bool |
15176 | ix86_check_avx_upper_register (const_rtx exp) |
15177 | { |
15178 | /* construct_container may return a parallel with expr_list |
15179 | which contains the real reg and mode */ |
15180 | subrtx_iterator::array_type array; |
15181 | FOR_EACH_SUBRTX (iter, array, exp, NONCONST) |
15182 | { |
15183 | const_rtx x = *iter; |
15184 | if (SSE_REG_P (x) |
15185 | && !EXT_REX_SSE_REG_P (x) |
15186 | && GET_MODE_BITSIZE (GET_MODE (x)) > 128) |
15187 | return true; |
15188 | } |
15189 | |
15190 | return false; |
15191 | } |
15192 | |
15193 | /* Check if a 256bit or 512bit AVX register is referenced in stores. */ |
15194 | |
15195 | static void |
15196 | ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) |
15197 | { |
15198 | if (SSE_REG_P (dest) |
15199 | && !EXT_REX_SSE_REG_P (dest) |
15200 | && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) |
15201 | { |
15202 | bool *used = (bool *) data; |
15203 | *used = true; |
15204 | } |
15205 | } |
15206 | |
15207 | /* Return needed mode for entity in optimize_mode_switching pass. */ |
15208 | |
15209 | static int |
15210 | ix86_avx_u128_mode_needed (rtx_insn *insn) |
15211 | { |
15212 | if (DEBUG_INSN_P (insn)) |
15213 | return AVX_U128_ANY; |
15214 | |
15215 | if (CALL_P (insn)) |
15216 | { |
15217 | rtx link; |
15218 | |
15219 | /* Needed mode is set to AVX_U128_CLEAN if there are |
15220 | no 256bit or 512bit modes used in function arguments. */ |
15221 | for (link = CALL_INSN_FUNCTION_USAGE (insn); |
15222 | link; |
15223 | link = XEXP (link, 1)) |
15224 | { |
15225 | if (GET_CODE (XEXP (link, 0)) == USE) |
15226 | { |
15227 | rtx arg = XEXP (XEXP (link, 0), 0); |
15228 | |
15229 | if (ix86_check_avx_upper_register (exp: arg)) |
15230 | return AVX_U128_DIRTY; |
15231 | } |
15232 | } |
15233 | |
15234 | /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit |
15235 | nor 512bit registers used in the function return register. */ |
15236 | bool avx_upper_reg_found = false; |
15237 | note_stores (insn, ix86_check_avx_upper_stores, |
15238 | &avx_upper_reg_found); |
15239 | if (avx_upper_reg_found) |
15240 | return AVX_U128_DIRTY; |
15241 | |
15242 | /* If the function is known to preserve some SSE registers, |
15243 | RA and previous passes can legitimately rely on that for |
15244 | modes wider than 256 bits. It's only safe to issue a |
15245 | vzeroupper if all SSE registers are clobbered. */ |
15246 | const function_abi &abi = insn_callee_abi (insn); |
15247 | if (vzeroupper_pattern (PATTERN (insn), VOIDmode) |
15248 | /* Should be safe to issue an vzeroupper before sibling_call_p. |
15249 | Also there not mode_exit for sibling_call, so there could be |
15250 | missing vzeroupper for that. */ |
15251 | || !(SIBLING_CALL_P (insn) |
15252 | || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], |
15253 | y: abi.mode_clobbers (V4DImode)))) |
15254 | return AVX_U128_ANY; |
15255 | |
15256 | return AVX_U128_CLEAN; |
15257 | } |
15258 | |
15259 | rtx set = single_set (insn); |
15260 | if (set) |
15261 | { |
15262 | rtx dest = SET_DEST (set); |
15263 | rtx src = SET_SRC (set); |
15264 | if (SSE_REG_P (dest) |
15265 | && !EXT_REX_SSE_REG_P (dest) |
15266 | && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) |
15267 | { |
15268 | /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the |
15269 | source isn't zero. */ |
15270 | if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1) |
15271 | return AVX_U128_DIRTY; |
15272 | else |
15273 | return AVX_U128_ANY; |
15274 | } |
15275 | else |
15276 | { |
15277 | if (ix86_check_avx_upper_register (exp: src)) |
15278 | return AVX_U128_DIRTY; |
15279 | } |
15280 | |
15281 | /* This isn't YMM/ZMM load/store. */ |
15282 | return AVX_U128_ANY; |
15283 | } |
15284 | |
15285 | /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. |
15286 | Hardware changes state only when a 256bit register is written to, |
15287 | but we need to prevent the compiler from moving optimal insertion |
15288 | point above eventual read from 256bit or 512 bit register. */ |
15289 | if (ix86_check_avx_upper_register (exp: PATTERN (insn))) |
15290 | return AVX_U128_DIRTY; |
15291 | |
15292 | return AVX_U128_ANY; |
15293 | } |
15294 | |
15295 | /* Return mode that i387 must be switched into |
15296 | prior to the execution of insn. */ |
15297 | |
15298 | static int |
15299 | ix86_i387_mode_needed (int entity, rtx_insn *insn) |
15300 | { |
15301 | enum attr_i387_cw mode; |
15302 | |
15303 | /* The mode UNINITIALIZED is used to store control word after a |
15304 | function call or ASM pattern. The mode ANY specify that function |
15305 | has no requirements on the control word and make no changes in the |
15306 | bits we are interested in. */ |
15307 | |
15308 | if (CALL_P (insn) |
15309 | || (NONJUMP_INSN_P (insn) |
15310 | && (asm_noperands (PATTERN (insn)) >= 0 |
15311 | || GET_CODE (PATTERN (insn)) == ASM_INPUT))) |
15312 | return I387_CW_UNINITIALIZED; |
15313 | |
15314 | if (recog_memoized (insn) < 0) |
15315 | return I387_CW_ANY; |
15316 | |
15317 | mode = get_attr_i387_cw (insn); |
15318 | |
15319 | switch (entity) |
15320 | { |
15321 | case I387_ROUNDEVEN: |
15322 | if (mode == I387_CW_ROUNDEVEN) |
15323 | return mode; |
15324 | break; |
15325 | |
15326 | case I387_TRUNC: |
15327 | if (mode == I387_CW_TRUNC) |
15328 | return mode; |
15329 | break; |
15330 | |
15331 | case I387_FLOOR: |
15332 | if (mode == I387_CW_FLOOR) |
15333 | return mode; |
15334 | break; |
15335 | |
15336 | case I387_CEIL: |
15337 | if (mode == I387_CW_CEIL) |
15338 | return mode; |
15339 | break; |
15340 | |
15341 | default: |
15342 | gcc_unreachable (); |
15343 | } |
15344 | |
15345 | return I387_CW_ANY; |
15346 | } |
15347 | |
15348 | /* Return mode that entity must be switched into |
15349 | prior to the execution of insn. */ |
15350 | |
15351 | static int |
15352 | ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) |
15353 | { |
15354 | switch (entity) |
15355 | { |
15356 | case X86_DIRFLAG: |
15357 | return ix86_dirflag_mode_needed (insn); |
15358 | case AVX_U128: |
15359 | return ix86_avx_u128_mode_needed (insn); |
15360 | case I387_ROUNDEVEN: |
15361 | case I387_TRUNC: |
15362 | case I387_FLOOR: |
15363 | case I387_CEIL: |
15364 | return ix86_i387_mode_needed (entity, insn); |
15365 | default: |
15366 | gcc_unreachable (); |
15367 | } |
15368 | return 0; |
15369 | } |
15370 | |
15371 | /* Calculate mode of upper 128bit AVX registers after the insn. */ |
15372 | |
15373 | static int |
15374 | ix86_avx_u128_mode_after (int mode, rtx_insn *insn) |
15375 | { |
15376 | rtx pat = PATTERN (insn); |
15377 | |
15378 | if (vzeroupper_pattern (pat, VOIDmode) |
15379 | || vzeroall_pattern (pat, VOIDmode)) |
15380 | return AVX_U128_CLEAN; |
15381 | |
15382 | /* We know that state is clean after CALL insn if there are no |
15383 | 256bit or 512bit registers used in the function return register. */ |
15384 | if (CALL_P (insn)) |
15385 | { |
15386 | bool avx_upper_reg_found = false; |
15387 | note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); |
15388 | |
15389 | if (avx_upper_reg_found) |
15390 | return AVX_U128_DIRTY; |
15391 | |
15392 | /* If the function desn't clobber any sse registers or only clobber |
15393 | 128-bit part, Then vzeroupper isn't issued before the function exit. |
15394 | the status not CLEAN but ANY after the function. */ |
15395 | const function_abi &abi = insn_callee_abi (insn); |
15396 | if (!(SIBLING_CALL_P (insn) |
15397 | || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], |
15398 | y: abi.mode_clobbers (V4DImode)))) |
15399 | return AVX_U128_ANY; |
15400 | |
15401 | return AVX_U128_CLEAN; |
15402 | } |
15403 | |
15404 | /* Otherwise, return current mode. Remember that if insn |
15405 | references AVX 256bit or 512bit registers, the mode was already |
15406 | changed to DIRTY from MODE_NEEDED. */ |
15407 | return mode; |
15408 | } |
15409 | |
15410 | /* Return the mode that an insn results in. */ |
15411 | |
15412 | static int |
15413 | ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET) |
15414 | { |
15415 | switch (entity) |
15416 | { |
15417 | case X86_DIRFLAG: |
15418 | return mode; |
15419 | case AVX_U128: |
15420 | return ix86_avx_u128_mode_after (mode, insn); |
15421 | case I387_ROUNDEVEN: |
15422 | case I387_TRUNC: |
15423 | case I387_FLOOR: |
15424 | case I387_CEIL: |
15425 | return mode; |
15426 | default: |
15427 | gcc_unreachable (); |
15428 | } |
15429 | } |
15430 | |
15431 | static int |
15432 | ix86_dirflag_mode_entry (void) |
15433 | { |
15434 | /* For TARGET_CLD or in the interrupt handler we can't assume |
15435 | direction flag state at function entry. */ |
15436 | if (TARGET_CLD |
15437 | || cfun->machine->func_type != TYPE_NORMAL) |
15438 | return X86_DIRFLAG_ANY; |
15439 | |
15440 | return X86_DIRFLAG_RESET; |
15441 | } |
15442 | |
15443 | static int |
15444 | ix86_avx_u128_mode_entry (void) |
15445 | { |
15446 | tree arg; |
15447 | |
15448 | /* Entry mode is set to AVX_U128_DIRTY if there are |
15449 | 256bit or 512bit modes used in function arguments. */ |
15450 | for (arg = DECL_ARGUMENTS (current_function_decl); arg; |
15451 | arg = TREE_CHAIN (arg)) |
15452 | { |
15453 | rtx incoming = DECL_INCOMING_RTL (arg); |
15454 | |
15455 | if (incoming && ix86_check_avx_upper_register (exp: incoming)) |
15456 | return AVX_U128_DIRTY; |
15457 | } |
15458 | |
15459 | return AVX_U128_CLEAN; |
15460 | } |
15461 | |
15462 | /* Return a mode that ENTITY is assumed to be |
15463 | switched to at function entry. */ |
15464 | |
15465 | static int |
15466 | ix86_mode_entry (int entity) |
15467 | { |
15468 | switch (entity) |
15469 | { |
15470 | case X86_DIRFLAG: |
15471 | return ix86_dirflag_mode_entry (); |
15472 | case AVX_U128: |
15473 | return ix86_avx_u128_mode_entry (); |
15474 | case I387_ROUNDEVEN: |
15475 | case I387_TRUNC: |
15476 | case I387_FLOOR: |
15477 | case I387_CEIL: |
15478 | return I387_CW_ANY; |
15479 | default: |
15480 | gcc_unreachable (); |
15481 | } |
15482 | } |
15483 | |
15484 | static int |
15485 | ix86_avx_u128_mode_exit (void) |
15486 | { |
15487 | rtx reg = crtl->return_rtx; |
15488 | |
15489 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit |
15490 | or 512 bit modes used in the function return register. */ |
15491 | if (reg && ix86_check_avx_upper_register (exp: reg)) |
15492 | return AVX_U128_DIRTY; |
15493 | |
15494 | /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit |
15495 | modes used in function arguments, otherwise return AVX_U128_CLEAN. |
15496 | */ |
15497 | return ix86_avx_u128_mode_entry (); |
15498 | } |
15499 | |
15500 | /* Return a mode that ENTITY is assumed to be |
15501 | switched to at function exit. */ |
15502 | |
15503 | static int |
15504 | ix86_mode_exit (int entity) |
15505 | { |
15506 | switch (entity) |
15507 | { |
15508 | case X86_DIRFLAG: |
15509 | return X86_DIRFLAG_ANY; |
15510 | case AVX_U128: |
15511 | return ix86_avx_u128_mode_exit (); |
15512 | case I387_ROUNDEVEN: |
15513 | case I387_TRUNC: |
15514 | case I387_FLOOR: |
15515 | case I387_CEIL: |
15516 | return I387_CW_ANY; |
15517 | default: |
15518 | gcc_unreachable (); |
15519 | } |
15520 | } |
15521 | |
15522 | static int |
15523 | ix86_mode_priority (int, int n) |
15524 | { |
15525 | return n; |
15526 | } |
15527 | |
15528 | /* Output code to initialize control word copies used by trunc?f?i and |
15529 | rounding patterns. CURRENT_MODE is set to current control word, |
15530 | while NEW_MODE is set to new control word. */ |
15531 | |
15532 | static void |
15533 | emit_i387_cw_initialization (int mode) |
15534 | { |
15535 | rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); |
15536 | rtx new_mode; |
15537 | |
15538 | enum ix86_stack_slot slot; |
15539 | |
15540 | rtx reg = gen_reg_rtx (HImode); |
15541 | |
15542 | emit_insn (gen_x86_fnstcw_1 (stored_mode)); |
15543 | emit_move_insn (reg, copy_rtx (stored_mode)); |
15544 | |
15545 | switch (mode) |
15546 | { |
15547 | case I387_CW_ROUNDEVEN: |
15548 | /* round to nearest */ |
15549 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15550 | slot = SLOT_CW_ROUNDEVEN; |
15551 | break; |
15552 | |
15553 | case I387_CW_TRUNC: |
15554 | /* round toward zero (truncate) */ |
15555 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); |
15556 | slot = SLOT_CW_TRUNC; |
15557 | break; |
15558 | |
15559 | case I387_CW_FLOOR: |
15560 | /* round down toward -oo */ |
15561 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15562 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); |
15563 | slot = SLOT_CW_FLOOR; |
15564 | break; |
15565 | |
15566 | case I387_CW_CEIL: |
15567 | /* round up toward +oo */ |
15568 | emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
15569 | emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); |
15570 | slot = SLOT_CW_CEIL; |
15571 | break; |
15572 | |
15573 | default: |
15574 | gcc_unreachable (); |
15575 | } |
15576 | |
15577 | gcc_assert (slot < MAX_386_STACK_LOCALS); |
15578 | |
15579 | new_mode = assign_386_stack_local (HImode, slot); |
15580 | emit_move_insn (new_mode, reg); |
15581 | } |
15582 | |
15583 | /* Generate one or more insns to set ENTITY to MODE. */ |
15584 | |
15585 | static void |
15586 | ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, |
15587 | HARD_REG_SET regs_live ATTRIBUTE_UNUSED) |
15588 | { |
15589 | switch (entity) |
15590 | { |
15591 | case X86_DIRFLAG: |
15592 | if (mode == X86_DIRFLAG_RESET) |
15593 | emit_insn (gen_cld ()); |
15594 | break; |
15595 | case AVX_U128: |
15596 | if (mode == AVX_U128_CLEAN) |
15597 | ix86_expand_avx_vzeroupper (); |
15598 | break; |
15599 | case I387_ROUNDEVEN: |
15600 | case I387_TRUNC: |
15601 | case I387_FLOOR: |
15602 | case I387_CEIL: |
15603 | if (mode != I387_CW_ANY |
15604 | && mode != I387_CW_UNINITIALIZED) |
15605 | emit_i387_cw_initialization (mode); |
15606 | break; |
15607 | default: |
15608 | gcc_unreachable (); |
15609 | } |
15610 | } |
15611 | |
15612 | /* Output code for INSN to convert a float to a signed int. OPERANDS |
15613 | are the insn operands. The output may be [HSD]Imode and the input |
15614 | operand may be [SDX]Fmode. */ |
15615 | |
15616 | const char * |
15617 | output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) |
15618 | { |
15619 | bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
15620 | bool dimode_p = GET_MODE (operands[0]) == DImode; |
15621 | int round_mode = get_attr_i387_cw (insn); |
15622 | |
15623 | static char buf[40]; |
15624 | const char *p; |
15625 | |
15626 | /* Jump through a hoop or two for DImode, since the hardware has no |
15627 | non-popping instruction. We used to do this a different way, but |
15628 | that was somewhat fragile and broke with post-reload splitters. */ |
15629 | if ((dimode_p || fisttp) && !stack_top_dies) |
15630 | output_asm_insn ("fld\t%y1", operands); |
15631 | |
15632 | gcc_assert (STACK_TOP_P (operands[1])); |
15633 | gcc_assert (MEM_P (operands[0])); |
15634 | gcc_assert (GET_MODE (operands[1]) != TFmode); |
15635 | |
15636 | if (fisttp) |
15637 | return "fisttp%Z0\t%0"; |
15638 | |
15639 | strcpy (dest: buf, src: "fist"); |
15640 | |
15641 | if (round_mode != I387_CW_ANY) |
15642 | output_asm_insn ("fldcw\t%3", operands); |
15643 | |
15644 | p = "p%Z0\t%0"; |
15645 | strcat (dest: buf, src: p + !(stack_top_dies || dimode_p)); |
15646 | |
15647 | output_asm_insn (buf, operands); |
15648 | |
15649 | if (round_mode != I387_CW_ANY) |
15650 | output_asm_insn ("fldcw\t%2", operands); |
15651 | |
15652 | return ""; |
15653 | } |
15654 | |
15655 | /* Output code for x87 ffreep insn. The OPNO argument, which may only |
15656 | have the values zero or one, indicates the ffreep insn's operand |
15657 | from the OPERANDS array. */ |
15658 | |
15659 | static const char * |
15660 | output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) |
15661 | { |
15662 | if (TARGET_USE_FFREEP) |
15663 | #ifdef HAVE_AS_IX86_FFREEP |
15664 | return opno ? "ffreep\t%y1": "ffreep\t%y0"; |
15665 | #else |
15666 | { |
15667 | static char retval[32]; |
15668 | int regno = REGNO (operands[opno]); |
15669 | |
15670 | gcc_assert (STACK_REGNO_P (regno)); |
15671 | |
15672 | regno -= FIRST_STACK_REG; |
15673 | |
15674 | snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); |
15675 | return retval; |
15676 | } |
15677 | #endif |
15678 | |
15679 | return opno ? "fstp\t%y1": "fstp\t%y0"; |
15680 | } |
15681 | |
15682 | |
15683 | /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi |
15684 | should be used. UNORDERED_P is true when fucom should be used. */ |
15685 | |
15686 | const char * |
15687 | output_fp_compare (rtx_insn *insn, rtx *operands, |
15688 | bool eflags_p, bool unordered_p) |
15689 | { |
15690 | rtx *xops = eflags_p ? &operands[0] : &operands[1]; |
15691 | bool stack_top_dies; |
15692 | |
15693 | static char buf[40]; |
15694 | const char *p; |
15695 | |
15696 | gcc_assert (STACK_TOP_P (xops[0])); |
15697 | |
15698 | stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); |
15699 | |
15700 | if (eflags_p) |
15701 | { |
15702 | p = unordered_p ? "fucomi": "fcomi"; |
15703 | strcpy (dest: buf, src: p); |
15704 | |
15705 | p = "p\t{%y1, %0|%0, %y1}"; |
15706 | strcat (dest: buf, src: p + !stack_top_dies); |
15707 | |
15708 | return buf; |
15709 | } |
15710 | |
15711 | if (STACK_REG_P (xops[1]) |
15712 | && stack_top_dies |
15713 | && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) |
15714 | { |
15715 | gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); |
15716 | |
15717 | /* If both the top of the 387 stack die, and the other operand |
15718 | is also a stack register that dies, then this must be a |
15719 | `fcompp' float compare. */ |
15720 | p = unordered_p ? "fucompp": "fcompp"; |
15721 | strcpy (dest: buf, src: p); |
15722 | } |
15723 | else if (const0_operand (xops[1], VOIDmode)) |
15724 | { |
15725 | gcc_assert (!unordered_p); |
15726 | strcpy (dest: buf, src: "ftst"); |
15727 | } |
15728 | else |
15729 | { |
15730 | if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) |
15731 | { |
15732 | gcc_assert (!unordered_p); |
15733 | p = "ficom"; |
15734 | } |
15735 | else |
15736 | p = unordered_p ? "fucom": "fcom"; |
15737 | |
15738 | strcpy (dest: buf, src: p); |
15739 | |
15740 | p = "p%Z2\t%y2"; |
15741 | strcat (dest: buf, src: p + !stack_top_dies); |
15742 | } |
15743 | |
15744 | output_asm_insn (buf, operands); |
15745 | return "fnstsw\t%0"; |
15746 | } |
15747 | |
15748 | void |
15749 | ix86_output_addr_vec_elt (FILE *file, int value) |
15750 | { |
15751 | const char *directive = ASM_LONG; |
15752 | |
15753 | #ifdef ASM_QUAD |
15754 | if (TARGET_LP64) |
15755 | directive = ASM_QUAD; |
15756 | #else |
15757 | gcc_assert (!TARGET_64BIT); |
15758 | #endif |
15759 | |
15760 | fprintf (stream: file, format: "%s%s%d\n", directive, LPREFIX, value); |
15761 | } |
15762 | |
15763 | void |
15764 | ix86_output_addr_diff_elt (FILE *file, int value, int rel) |
15765 | { |
15766 | const char *directive = ASM_LONG; |
15767 | |
15768 | #ifdef ASM_QUAD |
15769 | if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) |
15770 | directive = ASM_QUAD; |
15771 | #else |
15772 | gcc_assert (!TARGET_64BIT); |
15773 | #endif |
15774 | /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ |
15775 | if (TARGET_64BIT || TARGET_VXWORKS_RTP) |
15776 | fprintf (stream: file, format: "%s%s%d-%s%d\n", |
15777 | directive, LPREFIX, value, LPREFIX, rel); |
15778 | #if TARGET_MACHO |
15779 | else if (TARGET_MACHO) |
15780 | { |
15781 | fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); |
15782 | machopic_output_function_base_name (file); |
15783 | putc ('\n', file); |
15784 | } |
15785 | #endif |
15786 | else if (HAVE_AS_GOTOFF_IN_DATA) |
15787 | fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); |
15788 | else |
15789 | asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", |
15790 | GOT_SYMBOL_NAME, LPREFIX, value); |
15791 | } |
15792 | |
15793 | #define LEA_MAX_STALL (3) |
15794 | #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) |
15795 | |
15796 | /* Increase given DISTANCE in half-cycles according to |
15797 | dependencies between PREV and NEXT instructions. |
15798 | Add 1 half-cycle if there is no dependency and |
15799 | go to next cycle if there is some dependecy. */ |
15800 | |
15801 | static unsigned int |
15802 | increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) |
15803 | { |
15804 | df_ref def, use; |
15805 | |
15806 | if (!prev || !next) |
15807 | return distance + (distance & 1) + 2; |
15808 | |
15809 | if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) |
15810 | return distance + 1; |
15811 | |
15812 | FOR_EACH_INSN_USE (use, next) |
15813 | FOR_EACH_INSN_DEF (def, prev) |
15814 | if (!DF_REF_IS_ARTIFICIAL (def) |
15815 | && DF_REF_REGNO (use) == DF_REF_REGNO (def)) |
15816 | return distance + (distance & 1) + 2; |
15817 | |
15818 | return distance + 1; |
15819 | } |
15820 | |
15821 | /* Function checks if instruction INSN defines register number |
15822 | REGNO1 or REGNO2. */ |
15823 | |
15824 | bool |
15825 | insn_defines_reg (unsigned int regno1, unsigned int regno2, |
15826 | rtx_insn *insn) |
15827 | { |
15828 | df_ref def; |
15829 | |
15830 | FOR_EACH_INSN_DEF (def, insn) |
15831 | if (DF_REF_REG_DEF_P (def) |
15832 | && !DF_REF_IS_ARTIFICIAL (def) |
15833 | && (regno1 == DF_REF_REGNO (def) |
15834 | || regno2 == DF_REF_REGNO (def))) |
15835 | return true; |
15836 | |
15837 | return false; |
15838 | } |
15839 | |
15840 | /* Function checks if instruction INSN uses register number |
15841 | REGNO as a part of address expression. */ |
15842 | |
15843 | static bool |
15844 | insn_uses_reg_mem (unsigned int regno, rtx insn) |
15845 | { |
15846 | df_ref use; |
15847 | |
15848 | FOR_EACH_INSN_USE (use, insn) |
15849 | if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) |
15850 | return true; |
15851 | |
15852 | return false; |
15853 | } |
15854 | |
15855 | /* Search backward for non-agu definition of register number REGNO1 |
15856 | or register number REGNO2 in basic block starting from instruction |
15857 | START up to head of basic block or instruction INSN. |
15858 | |
15859 | Function puts true value into *FOUND var if definition was found |
15860 | and false otherwise. |
15861 | |
15862 | Distance in half-cycles between START and found instruction or head |
15863 | of BB is added to DISTANCE and returned. */ |
15864 | |
15865 | static int |
15866 | distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, |
15867 | rtx_insn *insn, int distance, |
15868 | rtx_insn *start, bool *found) |
15869 | { |
15870 | basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL; |
15871 | rtx_insn *prev = start; |
15872 | rtx_insn *next = NULL; |
15873 | |
15874 | *found = false; |
15875 | |
15876 | while (prev |
15877 | && prev != insn |
15878 | && distance < LEA_SEARCH_THRESHOLD) |
15879 | { |
15880 | if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) |
15881 | { |
15882 | distance = increase_distance (prev, next, distance); |
15883 | if (insn_defines_reg (regno1, regno2, insn: prev)) |
15884 | { |
15885 | if (recog_memoized (insn: prev) < 0 |
15886 | || get_attr_type (prev) != TYPE_LEA) |
15887 | { |
15888 | *found = true; |
15889 | return distance; |
15890 | } |
15891 | } |
15892 | |
15893 | next = prev; |
15894 | } |
15895 | if (prev == BB_HEAD (bb)) |
15896 | break; |
15897 | |
15898 | prev = PREV_INSN (insn: prev); |
15899 | } |
15900 | |
15901 | return distance; |
15902 | } |
15903 | |
15904 | /* Search backward for non-agu definition of register number REGNO1 |
15905 | or register number REGNO2 in INSN's basic block until |
15906 | 1. Pass LEA_SEARCH_THRESHOLD instructions, or |
15907 | 2. Reach neighbor BBs boundary, or |
15908 | 3. Reach agu definition. |
15909 | Returns the distance between the non-agu definition point and INSN. |
15910 | If no definition point, returns -1. */ |
15911 | |
15912 | static int |
15913 | distance_non_agu_define (unsigned int regno1, unsigned int regno2, |
15914 | rtx_insn *insn) |
15915 | { |
15916 | basic_block bb = BLOCK_FOR_INSN (insn); |
15917 | int distance = 0; |
15918 | bool found = false; |
15919 | |
15920 | if (insn != BB_HEAD (bb)) |
15921 | distance = distance_non_agu_define_in_bb (regno1, regno2, insn, |
15922 | distance, start: PREV_INSN (insn), |
15923 | found: &found); |
15924 | |
15925 | if (!found && distance < LEA_SEARCH_THRESHOLD) |
15926 | { |
15927 | edge e; |
15928 | edge_iterator ei; |
15929 | bool simple_loop = false; |
15930 | |
15931 | FOR_EACH_EDGE (e, ei, bb->preds) |
15932 | if (e->src == bb) |
15933 | { |
15934 | simple_loop = true; |
15935 | break; |
15936 | } |
15937 | |
15938 | if (simple_loop) |
15939 | distance = distance_non_agu_define_in_bb (regno1, regno2, |
15940 | insn, distance, |
15941 | BB_END (bb), found: &found); |
15942 | else |
15943 | { |
15944 | int shortest_dist = -1; |
15945 | bool found_in_bb = false; |
15946 | |
15947 | FOR_EACH_EDGE (e, ei, bb->preds) |
15948 | { |
15949 | int bb_dist |
15950 | = distance_non_agu_define_in_bb (regno1, regno2, |
15951 | insn, distance, |
15952 | BB_END (e->src), |
15953 | found: &found_in_bb); |
15954 | if (found_in_bb) |
15955 | { |
15956 | if (shortest_dist < 0) |
15957 | shortest_dist = bb_dist; |
15958 | else if (bb_dist > 0) |
15959 | shortest_dist = MIN (bb_dist, shortest_dist); |
15960 | |
15961 | found = true; |
15962 | } |
15963 | } |
15964 | |
15965 | distance = shortest_dist; |
15966 | } |
15967 | } |
15968 | |
15969 | if (!found) |
15970 | return -1; |
15971 | |
15972 | return distance >> 1; |
15973 | } |
15974 | |
15975 | /* Return the distance in half-cycles between INSN and the next |
15976 | insn that uses register number REGNO in memory address added |
15977 | to DISTANCE. Return -1 if REGNO0 is set. |
15978 | |
15979 | Put true value into *FOUND if register usage was found and |
15980 | false otherwise. |
15981 | Put true value into *REDEFINED if register redefinition was |
15982 | found and false otherwise. */ |
15983 | |
15984 | static int |
15985 | distance_agu_use_in_bb (unsigned int regno, |
15986 | rtx_insn *insn, int distance, rtx_insn *start, |
15987 | bool *found, bool *redefined) |
15988 | { |
15989 | basic_block bb = NULL; |
15990 | rtx_insn *next = start; |
15991 | rtx_insn *prev = NULL; |
15992 | |
15993 | *found = false; |
15994 | *redefined = false; |
15995 | |
15996 | if (start != NULL_RTX) |
15997 | { |
15998 | bb = BLOCK_FOR_INSN (insn: start); |
15999 | if (start != BB_HEAD (bb)) |
16000 | /* If insn and start belong to the same bb, set prev to insn, |
16001 | so the call to increase_distance will increase the distance |
16002 | between insns by 1. */ |
16003 | prev = insn; |
16004 | } |
16005 | |
16006 | while (next |
16007 | && next != insn |
16008 | && distance < LEA_SEARCH_THRESHOLD) |
16009 | { |
16010 | if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) |
16011 | { |
16012 | distance = increase_distance(prev, next, distance); |
16013 | if (insn_uses_reg_mem (regno, insn: next)) |
16014 | { |
16015 | /* Return DISTANCE if OP0 is used in memory |
16016 | address in NEXT. */ |
16017 | *found = true; |
16018 | return distance; |
16019 | } |
16020 | |
16021 | if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next)) |
16022 | { |
16023 | /* Return -1 if OP0 is set in NEXT. */ |
16024 | *redefined = true; |
16025 | return -1; |
16026 | } |
16027 | |
16028 | prev = next; |
16029 | } |
16030 | |
16031 | if (next == BB_END (bb)) |
16032 | break; |
16033 | |
16034 | next = NEXT_INSN (insn: next); |
16035 | } |
16036 | |
16037 | return distance; |
16038 | } |
16039 | |
16040 | /* Return the distance between INSN and the next insn that uses |
16041 | register number REGNO0 in memory address. Return -1 if no such |
16042 | a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ |
16043 | |
16044 | static int |
16045 | distance_agu_use (unsigned int regno0, rtx_insn *insn) |
16046 | { |
16047 | basic_block bb = BLOCK_FOR_INSN (insn); |
16048 | int distance = 0; |
16049 | bool found = false; |
16050 | bool redefined = false; |
16051 | |
16052 | if (insn != BB_END (bb)) |
16053 | distance = distance_agu_use_in_bb (regno: regno0, insn, distance, |
16054 | start: NEXT_INSN (insn), |
16055 | found: &found, redefined: &redefined); |
16056 | |
16057 | if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) |
16058 | { |
16059 | edge e; |
16060 | edge_iterator ei; |
16061 | bool simple_loop = false; |
16062 | |
16063 | FOR_EACH_EDGE (e, ei, bb->succs) |
16064 | if (e->dest == bb) |
16065 | { |
16066 | simple_loop = true; |
16067 | break; |
16068 | } |
16069 | |
16070 | if (simple_loop) |
16071 | distance = distance_agu_use_in_bb (regno: regno0, insn, |
16072 | distance, BB_HEAD (bb), |
16073 | found: &found, redefined: &redefined); |
16074 | else |
16075 | { |
16076 | int shortest_dist = -1; |
16077 | bool found_in_bb = false; |
16078 | bool redefined_in_bb = false; |
16079 | |
16080 | FOR_EACH_EDGE (e, ei, bb->succs) |
16081 | { |
16082 | int bb_dist |
16083 | = distance_agu_use_in_bb (regno: regno0, insn, |
16084 | distance, BB_HEAD (e->dest), |
16085 | found: &found_in_bb, redefined: &redefined_in_bb); |
16086 | if (found_in_bb) |
16087 | { |
16088 | if (shortest_dist < 0) |
16089 | shortest_dist = bb_dist; |
16090 | else if (bb_dist > 0) |
16091 | shortest_dist = MIN (bb_dist, shortest_dist); |
16092 | |
16093 | found = true; |
16094 | } |
16095 | } |
16096 | |
16097 | distance = shortest_dist; |
16098 | } |
16099 | } |
16100 | |
16101 | if (!found || redefined) |
16102 | return -1; |
16103 | |
16104 | return distance >> 1; |
16105 | } |
16106 | |
16107 | /* Define this macro to tune LEA priority vs ADD, it take effect when |
16108 | there is a dilemma of choosing LEA or ADD |
16109 | Negative value: ADD is more preferred than LEA |
16110 | Zero: Neutral |
16111 | Positive value: LEA is more preferred than ADD. */ |
16112 | #define IX86_LEA_PRIORITY 0 |
16113 | |
16114 | /* Return true if usage of lea INSN has performance advantage |
16115 | over a sequence of instructions. Instructions sequence has |
16116 | SPLIT_COST cycles higher latency than lea latency. */ |
16117 | |
16118 | static bool |
16119 | ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, |
16120 | unsigned int regno2, int split_cost, bool has_scale) |
16121 | { |
16122 | int dist_define, dist_use; |
16123 | |
16124 | /* For Atom processors newer than Bonnell, if using a 2-source or |
16125 | 3-source LEA for non-destructive destination purposes, or due to |
16126 | wanting ability to use SCALE, the use of LEA is justified. */ |
16127 | if (!TARGET_CPU_P (BONNELL)) |
16128 | { |
16129 | if (has_scale) |
16130 | return true; |
16131 | if (split_cost < 1) |
16132 | return false; |
16133 | if (regno0 == regno1 || regno0 == regno2) |
16134 | return false; |
16135 | return true; |
16136 | } |
16137 | |
16138 | /* Remember recog_data content. */ |
16139 | struct recog_data_d recog_data_save = recog_data; |
16140 | |
16141 | dist_define = distance_non_agu_define (regno1, regno2, insn); |
16142 | dist_use = distance_agu_use (regno0, insn); |
16143 | |
16144 | /* distance_non_agu_define can call get_attr_type which can call |
16145 | recog_memoized, restore recog_data back to previous content. */ |
16146 | recog_data = recog_data_save; |
16147 | |
16148 | if (dist_define < 0 || dist_define >= LEA_MAX_STALL) |
16149 | { |
16150 | /* If there is no non AGU operand definition, no AGU |
16151 | operand usage and split cost is 0 then both lea |
16152 | and non lea variants have same priority. Currently |
16153 | we prefer lea for 64 bit code and non lea on 32 bit |
16154 | code. */ |
16155 | if (dist_use < 0 && split_cost == 0) |
16156 | return TARGET_64BIT || IX86_LEA_PRIORITY; |
16157 | else |
16158 | return true; |
16159 | } |
16160 | |
16161 | /* With longer definitions distance lea is more preferable. |
16162 | Here we change it to take into account splitting cost and |
16163 | lea priority. */ |
16164 | dist_define += split_cost + IX86_LEA_PRIORITY; |
16165 | |
16166 | /* If there is no use in memory addess then we just check |
16167 | that split cost exceeds AGU stall. */ |
16168 | if (dist_use < 0) |
16169 | return dist_define > LEA_MAX_STALL; |
16170 | |
16171 | /* If this insn has both backward non-agu dependence and forward |
16172 | agu dependence, the one with short distance takes effect. */ |
16173 | return dist_define >= dist_use; |
16174 | } |
16175 | |
16176 | /* Return true if we need to split op0 = op1 + op2 into a sequence of |
16177 | move and add to avoid AGU stalls. */ |
16178 | |
16179 | bool |
16180 | ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) |
16181 | { |
16182 | unsigned int regno0, regno1, regno2; |
16183 | |
16184 | /* Check if we need to optimize. */ |
16185 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
16186 | return false; |
16187 | |
16188 | regno0 = true_regnum (operands[0]); |
16189 | regno1 = true_regnum (operands[1]); |
16190 | regno2 = true_regnum (operands[2]); |
16191 | |
16192 | /* We need to split only adds with non destructive |
16193 | destination operand. */ |
16194 | if (regno0 == regno1 || regno0 == regno2) |
16195 | return false; |
16196 | else |
16197 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false); |
16198 | } |
16199 | |
16200 | /* Return true if we should emit lea instruction instead of mov |
16201 | instruction. */ |
16202 | |
16203 | bool |
16204 | ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) |
16205 | { |
16206 | unsigned int regno0, regno1; |
16207 | |
16208 | /* Check if we need to optimize. */ |
16209 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
16210 | return false; |
16211 | |
16212 | /* Use lea for reg to reg moves only. */ |
16213 | if (!REG_P (operands[0]) || !REG_P (operands[1])) |
16214 | return false; |
16215 | |
16216 | regno0 = true_regnum (operands[0]); |
16217 | regno1 = true_regnum (operands[1]); |
16218 | |
16219 | return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false); |
16220 | } |
16221 | |
16222 | /* Return true if we need to split lea into a sequence of |
16223 | instructions to avoid AGU stalls during peephole2. */ |
16224 | |
16225 | bool |
16226 | ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) |
16227 | { |
16228 | unsigned int regno0, regno1, regno2; |
16229 | int split_cost; |
16230 | struct ix86_address parts; |
16231 | int ok; |
16232 | |
16233 | /* The "at least two components" test below might not catch simple |
16234 | move or zero extension insns if parts.base is non-NULL and parts.disp |
16235 | is const0_rtx as the only components in the address, e.g. if the |
16236 | register is %rbp or %r13. As this test is much cheaper and moves or |
16237 | zero extensions are the common case, do this check first. */ |
16238 | if (REG_P (operands[1]) |
16239 | || (SImode_address_operand (operands[1], VOIDmode) |
16240 | && REG_P (XEXP (operands[1], 0)))) |
16241 | return false; |
16242 | |
16243 | ok = ix86_decompose_address (addr: operands[1], out: &parts); |
16244 | gcc_assert (ok); |
16245 | |
16246 | /* There should be at least two components in the address. */ |
16247 | if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) |
16248 | + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) |
16249 | return false; |
16250 | |
16251 | /* We should not split into add if non legitimate pic |
16252 | operand is used as displacement. */ |
16253 | if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) |
16254 | return false; |
16255 | |
16256 | regno0 = true_regnum (operands[0]) ; |
16257 | regno1 = INVALID_REGNUM; |
16258 | regno2 = INVALID_REGNUM; |
16259 | |
16260 | if (parts.base) |
16261 | regno1 = true_regnum (parts.base); |
16262 | if (parts.index) |
16263 | regno2 = true_regnum (parts.index); |
16264 | |
16265 | /* Use add for a = a + b and a = b + a since it is faster and shorter |
16266 | than lea for most processors. For the processors like BONNELL, if |
16267 | the destination register of LEA holds an actual address which will |
16268 | be used soon, LEA is better and otherwise ADD is better. */ |
16269 | if (!TARGET_CPU_P (BONNELL) |
16270 | && parts.scale == 1 |
16271 | && (!parts.disp || parts.disp == const0_rtx) |
16272 | && (regno0 == regno1 || regno0 == regno2)) |
16273 | return true; |
16274 | |
16275 | /* Split with -Oz if the encoding requires fewer bytes. */ |
16276 | if (optimize_size > 1 |
16277 | && parts.scale > 1 |
16278 | && !parts.base |
16279 | && (!parts.disp || parts.disp == const0_rtx)) |
16280 | return true; |
16281 | |
16282 | /* Check we need to optimize. */ |
16283 | if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) |
16284 | return false; |
16285 | |
16286 | split_cost = 0; |
16287 | |
16288 | /* Compute how many cycles we will add to execution time |
16289 | if split lea into a sequence of instructions. */ |
16290 | if (parts.base || parts.index) |
16291 | { |
16292 | /* Have to use mov instruction if non desctructive |
16293 | destination form is used. */ |
16294 | if (regno1 != regno0 && regno2 != regno0) |
16295 | split_cost += 1; |
16296 | |
16297 | /* Have to add index to base if both exist. */ |
16298 | if (parts.base && parts.index) |
16299 | split_cost += 1; |
16300 | |
16301 | /* Have to use shift and adds if scale is 2 or greater. */ |
16302 | if (parts.scale > 1) |
16303 | { |
16304 | if (regno0 != regno1) |
16305 | split_cost += 1; |
16306 | else if (regno2 == regno0) |
16307 | split_cost += 4; |
16308 | else |
16309 | split_cost += parts.scale; |
16310 | } |
16311 | |
16312 | /* Have to use add instruction with immediate if |
16313 | disp is non zero. */ |
16314 | if (parts.disp && parts.disp != const0_rtx) |
16315 | split_cost += 1; |
16316 | |
16317 | /* Subtract the price of lea. */ |
16318 | split_cost -= 1; |
16319 | } |
16320 | |
16321 | return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, |
16322 | has_scale: parts.scale > 1); |
16323 | } |
16324 | |
16325 | /* Return true if it is ok to optimize an ADD operation to LEA |
16326 | operation to avoid flag register consumation. For most processors, |
16327 | ADD is faster than LEA. For the processors like BONNELL, if the |
16328 | destination register of LEA holds an actual address which will be |
16329 | used soon, LEA is better and otherwise ADD is better. */ |
16330 | |
16331 | bool |
16332 | ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) |
16333 | { |
16334 | unsigned int regno0 = true_regnum (operands[0]); |
16335 | unsigned int regno1 = true_regnum (operands[1]); |
16336 | unsigned int regno2 = true_regnum (operands[2]); |
16337 | |
16338 | /* If a = b + c, (a!=b && a!=c), must use lea form. */ |
16339 | if (regno0 != regno1 && regno0 != regno2) |
16340 | return true; |
16341 | |
16342 | if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
16343 | return false; |
16344 | |
16345 | return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false); |
16346 | } |
16347 | |
16348 | /* Return true if destination reg of SET_BODY is shift count of |
16349 | USE_BODY. */ |
16350 | |
16351 | static bool |
16352 | ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) |
16353 | { |
16354 | rtx set_dest; |
16355 | rtx shift_rtx; |
16356 | int i; |
16357 | |
16358 | /* Retrieve destination of SET_BODY. */ |
16359 | switch (GET_CODE (set_body)) |
16360 | { |
16361 | case SET: |
16362 | set_dest = SET_DEST (set_body); |
16363 | if (!set_dest || !REG_P (set_dest)) |
16364 | return false; |
16365 | break; |
16366 | case PARALLEL: |
16367 | for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) |
16368 | if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), |
16369 | use_body)) |
16370 | return true; |
16371 | /* FALLTHROUGH */ |
16372 | default: |
16373 | return false; |
16374 | } |
16375 | |
16376 | /* Retrieve shift count of USE_BODY. */ |
16377 | switch (GET_CODE (use_body)) |
16378 | { |
16379 | case SET: |
16380 | shift_rtx = XEXP (use_body, 1); |
16381 | break; |
16382 | case PARALLEL: |
16383 | for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) |
16384 | if (ix86_dep_by_shift_count_body (set_body, |
16385 | XVECEXP (use_body, 0, i))) |
16386 | return true; |
16387 | /* FALLTHROUGH */ |
16388 | default: |
16389 | return false; |
16390 | } |
16391 | |
16392 | if (shift_rtx |
16393 | && (GET_CODE (shift_rtx) == ASHIFT |
16394 | || GET_CODE (shift_rtx) == LSHIFTRT |
16395 | || GET_CODE (shift_rtx) == ASHIFTRT |
16396 | || GET_CODE (shift_rtx) == ROTATE |
16397 | || GET_CODE (shift_rtx) == ROTATERT)) |
16398 | { |
16399 | rtx shift_count = XEXP (shift_rtx, 1); |
16400 | |
16401 | /* Return true if shift count is dest of SET_BODY. */ |
16402 | if (REG_P (shift_count)) |
16403 | { |
16404 | /* Add check since it can be invoked before register |
16405 | allocation in pre-reload schedule. */ |
16406 | if (reload_completed |
16407 | && true_regnum (set_dest) == true_regnum (shift_count)) |
16408 | return true; |
16409 | else if (REGNO(set_dest) == REGNO(shift_count)) |
16410 | return true; |
16411 | } |
16412 | } |
16413 | |
16414 | return false; |
16415 | } |
16416 | |
16417 | /* Return true if destination reg of SET_INSN is shift count of |
16418 | USE_INSN. */ |
16419 | |
16420 | bool |
16421 | ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) |
16422 | { |
16423 | return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn), |
16424 | use_body: PATTERN (insn: use_insn)); |
16425 | } |
16426 | |
16427 | /* Return TRUE if the operands to a vec_interleave_{high,low}v2df |
16428 | are ok, keeping in mind the possible movddup alternative. */ |
16429 | |
16430 | bool |
16431 | ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) |
16432 | { |
16433 | if (MEM_P (operands[0])) |
16434 | return rtx_equal_p (operands[0], operands[1 + high]); |
16435 | if (MEM_P (operands[1]) && MEM_P (operands[2])) |
16436 | return false; |
16437 | return true; |
16438 | } |
16439 | |
16440 | /* A subroutine of ix86_build_signbit_mask. If VECT is true, |
16441 | then replicate the value for all elements of the vector |
16442 | register. */ |
16443 | |
16444 | rtx |
16445 | ix86_build_const_vector (machine_mode mode, bool vect, rtx value) |
16446 | { |
16447 | int i, n_elt; |
16448 | rtvec v; |
16449 | machine_mode scalar_mode; |
16450 | |
16451 | switch (mode) |
16452 | { |
16453 | case E_V64QImode: |
16454 | case E_V32QImode: |
16455 | case E_V16QImode: |
16456 | case E_V32HImode: |
16457 | case E_V16HImode: |
16458 | case E_V8HImode: |
16459 | case E_V16SImode: |
16460 | case E_V8SImode: |
16461 | case E_V4SImode: |
16462 | case E_V2SImode: |
16463 | case E_V8DImode: |
16464 | case E_V4DImode: |
16465 | case E_V2DImode: |
16466 | gcc_assert (vect); |
16467 | /* FALLTHRU */ |
16468 | case E_V2HFmode: |
16469 | case E_V4HFmode: |
16470 | case E_V8HFmode: |
16471 | case E_V16HFmode: |
16472 | case E_V32HFmode: |
16473 | case E_V16SFmode: |
16474 | case E_V8SFmode: |
16475 | case E_V4SFmode: |
16476 | case E_V2SFmode: |
16477 | case E_V8DFmode: |
16478 | case E_V4DFmode: |
16479 | case E_V2DFmode: |
16480 | case E_V32BFmode: |
16481 | case E_V16BFmode: |
16482 | case E_V8BFmode: |
16483 | case E_V4BFmode: |
16484 | case E_V2BFmode: |
16485 | n_elt = GET_MODE_NUNITS (mode); |
16486 | v = rtvec_alloc (n_elt); |
16487 | scalar_mode = GET_MODE_INNER (mode); |
16488 | |
16489 | RTVEC_ELT (v, 0) = value; |
16490 | |
16491 | for (i = 1; i < n_elt; ++i) |
16492 | RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); |
16493 | |
16494 | return gen_rtx_CONST_VECTOR (mode, v); |
16495 | |
16496 | default: |
16497 | gcc_unreachable (); |
16498 | } |
16499 | } |
16500 | |
16501 | /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders |
16502 | and ix86_expand_int_vcond. Create a mask for the sign bit in MODE |
16503 | for an SSE register. If VECT is true, then replicate the mask for |
16504 | all elements of the vector register. If INVERT is true, then create |
16505 | a mask excluding the sign bit. */ |
16506 | |
16507 | rtx |
16508 | ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) |
16509 | { |
16510 | machine_mode vec_mode, imode; |
16511 | wide_int w; |
16512 | rtx mask, v; |
16513 | |
16514 | switch (mode) |
16515 | { |
16516 | case E_V2HFmode: |
16517 | case E_V4HFmode: |
16518 | case E_V8HFmode: |
16519 | case E_V16HFmode: |
16520 | case E_V32HFmode: |
16521 | case E_V32BFmode: |
16522 | case E_V16BFmode: |
16523 | case E_V8BFmode: |
16524 | case E_V4BFmode: |
16525 | case E_V2BFmode: |
16526 | vec_mode = mode; |
16527 | imode = HImode; |
16528 | break; |
16529 | |
16530 | case E_V16SImode: |
16531 | case E_V16SFmode: |
16532 | case E_V8SImode: |
16533 | case E_V4SImode: |
16534 | case E_V8SFmode: |
16535 | case E_V4SFmode: |
16536 | case E_V2SFmode: |
16537 | case E_V2SImode: |
16538 | vec_mode = mode; |
16539 | imode = SImode; |
16540 | break; |
16541 | |
16542 | case E_V8DImode: |
16543 | case E_V4DImode: |
16544 | case E_V2DImode: |
16545 | case E_V8DFmode: |
16546 | case E_V4DFmode: |
16547 | case E_V2DFmode: |
16548 | vec_mode = mode; |
16549 | imode = DImode; |
16550 | break; |
16551 | |
16552 | case E_TImode: |
16553 | case E_TFmode: |
16554 | vec_mode = VOIDmode; |
16555 | imode = TImode; |
16556 | break; |
16557 | |
16558 | default: |
16559 | gcc_unreachable (); |
16560 | } |
16561 | |
16562 | machine_mode inner_mode = GET_MODE_INNER (mode); |
16563 | w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, |
16564 | GET_MODE_BITSIZE (inner_mode)); |
16565 | if (invert) |
16566 | w = wi::bit_not (x: w); |
16567 | |
16568 | /* Force this value into the low part of a fp vector constant. */ |
16569 | mask = immed_wide_int_const (w, imode); |
16570 | mask = gen_lowpart (inner_mode, mask); |
16571 | |
16572 | if (vec_mode == VOIDmode) |
16573 | return force_reg (inner_mode, mask); |
16574 | |
16575 | v = ix86_build_const_vector (mode: vec_mode, vect, value: mask); |
16576 | return force_reg (vec_mode, v); |
16577 | } |
16578 | |
16579 | /* Return HOST_WIDE_INT for const vector OP in MODE. */ |
16580 | |
16581 | HOST_WIDE_INT |
16582 | ix86_convert_const_vector_to_integer (rtx op, machine_mode mode) |
16583 | { |
16584 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
16585 | gcc_unreachable (); |
16586 | |
16587 | int nunits = GET_MODE_NUNITS (mode); |
16588 | wide_int val = wi::zero (GET_MODE_BITSIZE (mode)); |
16589 | machine_mode innermode = GET_MODE_INNER (mode); |
16590 | unsigned int innermode_bits = GET_MODE_BITSIZE (innermode); |
16591 | |
16592 | switch (mode) |
16593 | { |
16594 | case E_V2QImode: |
16595 | case E_V4QImode: |
16596 | case E_V2HImode: |
16597 | case E_V8QImode: |
16598 | case E_V4HImode: |
16599 | case E_V2SImode: |
16600 | for (int i = 0; i < nunits; ++i) |
16601 | { |
16602 | int v = INTVAL (XVECEXP (op, 0, i)); |
16603 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
16604 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
16605 | } |
16606 | break; |
16607 | case E_V2HFmode: |
16608 | case E_V2BFmode: |
16609 | case E_V4HFmode: |
16610 | case E_V4BFmode: |
16611 | case E_V2SFmode: |
16612 | for (int i = 0; i < nunits; ++i) |
16613 | { |
16614 | rtx x = XVECEXP (op, 0, i); |
16615 | int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), |
16616 | REAL_MODE_FORMAT (innermode)); |
16617 | wide_int wv = wi::shwi (val: v, precision: innermode_bits); |
16618 | val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits); |
16619 | } |
16620 | break; |
16621 | default: |
16622 | gcc_unreachable (); |
16623 | } |
16624 | |
16625 | return val.to_shwi (); |
16626 | } |
16627 | |
16628 | int ix86_get_flags_cc (rtx_code code) |
16629 | { |
16630 | switch (code) |
16631 | { |
16632 | case NE: return X86_CCNE; |
16633 | case EQ: return X86_CCE; |
16634 | case GE: return X86_CCNL; |
16635 | case GT: return X86_CCNLE; |
16636 | case LE: return X86_CCLE; |
16637 | case LT: return X86_CCL; |
16638 | case GEU: return X86_CCNB; |
16639 | case GTU: return X86_CCNBE; |
16640 | case LEU: return X86_CCBE; |
16641 | case LTU: return X86_CCB; |
16642 | default: return -1; |
16643 | } |
16644 | } |
16645 | |
16646 | /* Return TRUE or FALSE depending on whether the first SET in INSN |
16647 | has source and destination with matching CC modes, and that the |
16648 | CC mode is at least as constrained as REQ_MODE. */ |
16649 | |
16650 | bool |
16651 | ix86_match_ccmode (rtx insn, machine_mode req_mode) |
16652 | { |
16653 | rtx set; |
16654 | machine_mode set_mode; |
16655 | |
16656 | set = PATTERN (insn); |
16657 | if (GET_CODE (set) == PARALLEL) |
16658 | set = XVECEXP (set, 0, 0); |
16659 | gcc_assert (GET_CODE (set) == SET); |
16660 | gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); |
16661 | |
16662 | set_mode = GET_MODE (SET_DEST (set)); |
16663 | switch (set_mode) |
16664 | { |
16665 | case E_CCNOmode: |
16666 | if (req_mode != CCNOmode |
16667 | && (req_mode != CCmode |
16668 | || XEXP (SET_SRC (set), 1) != const0_rtx)) |
16669 | return false; |
16670 | break; |
16671 | case E_CCmode: |
16672 | if (req_mode == CCGCmode) |
16673 | return false; |
16674 | /* FALLTHRU */ |
16675 | case E_CCGCmode: |
16676 | if (req_mode == CCGOCmode || req_mode == CCNOmode) |
16677 | return false; |
16678 | /* FALLTHRU */ |
16679 | case E_CCGOCmode: |
16680 | if (req_mode == CCZmode) |
16681 | return false; |
16682 | /* FALLTHRU */ |
16683 | case E_CCZmode: |
16684 | break; |
16685 | |
16686 | case E_CCGZmode: |
16687 | |
16688 | case E_CCAmode: |
16689 | case E_CCCmode: |
16690 | case E_CCOmode: |
16691 | case E_CCPmode: |
16692 | case E_CCSmode: |
16693 | if (set_mode != req_mode) |
16694 | return false; |
16695 | break; |
16696 | |
16697 | default: |
16698 | gcc_unreachable (); |
16699 | } |
16700 | |
16701 | return GET_MODE (SET_SRC (set)) == set_mode; |
16702 | } |
16703 | |
16704 | machine_mode |
16705 | ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) |
16706 | { |
16707 | machine_mode mode = GET_MODE (op0); |
16708 | |
16709 | if (SCALAR_FLOAT_MODE_P (mode)) |
16710 | { |
16711 | gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); |
16712 | return CCFPmode; |
16713 | } |
16714 | |
16715 | switch (code) |
16716 | { |
16717 | /* Only zero flag is needed. */ |
16718 | case EQ: /* ZF=0 */ |
16719 | case NE: /* ZF!=0 */ |
16720 | return CCZmode; |
16721 | /* Codes needing carry flag. */ |
16722 | case GEU: /* CF=0 */ |
16723 | case LTU: /* CF=1 */ |
16724 | rtx geu; |
16725 | /* Detect overflow checks. They need just the carry flag. */ |
16726 | if (GET_CODE (op0) == PLUS |
16727 | && (rtx_equal_p (op1, XEXP (op0, 0)) |
16728 | || rtx_equal_p (op1, XEXP (op0, 1)))) |
16729 | return CCCmode; |
16730 | /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns. |
16731 | Match LTU of op0 |
16732 | (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
16733 | and op1 |
16734 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)) |
16735 | where CC_CCC is either CC or CCC. */ |
16736 | else if (code == LTU |
16737 | && GET_CODE (op0) == NEG |
16738 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
16739 | && REG_P (XEXP (geu, 0)) |
16740 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
16741 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
16742 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
16743 | && XEXP (geu, 1) == const0_rtx |
16744 | && GET_CODE (op1) == LTU |
16745 | && REG_P (XEXP (op1, 0)) |
16746 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
16747 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
16748 | && XEXP (op1, 1) == const0_rtx) |
16749 | return CCCmode; |
16750 | /* Similarly for *x86_cmc pattern. |
16751 | Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
16752 | and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)). |
16753 | It is sufficient to test that the operand modes are CCCmode. */ |
16754 | else if (code == LTU |
16755 | && GET_CODE (op0) == NEG |
16756 | && GET_CODE (XEXP (op0, 0)) == LTU |
16757 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
16758 | && GET_CODE (op1) == GEU |
16759 | && GET_MODE (XEXP (op1, 0)) == CCCmode) |
16760 | return CCCmode; |
16761 | /* Similarly for the comparison of addcarry/subborrow pattern. */ |
16762 | else if (code == LTU |
16763 | && GET_CODE (op0) == ZERO_EXTEND |
16764 | && GET_CODE (op1) == PLUS |
16765 | && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode) |
16766 | && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND) |
16767 | return CCCmode; |
16768 | else |
16769 | return CCmode; |
16770 | case GTU: /* CF=0 & ZF=0 */ |
16771 | case LEU: /* CF=1 | ZF=1 */ |
16772 | return CCmode; |
16773 | /* Codes possibly doable only with sign flag when |
16774 | comparing against zero. */ |
16775 | case GE: /* SF=OF or SF=0 */ |
16776 | case LT: /* SF<>OF or SF=1 */ |
16777 | if (op1 == const0_rtx) |
16778 | return CCGOCmode; |
16779 | else |
16780 | /* For other cases Carry flag is not required. */ |
16781 | return CCGCmode; |
16782 | /* Codes doable only with sign flag when comparing |
16783 | against zero, but we miss jump instruction for it |
16784 | so we need to use relational tests against overflow |
16785 | that thus needs to be zero. */ |
16786 | case GT: /* ZF=0 & SF=OF */ |
16787 | case LE: /* ZF=1 | SF<>OF */ |
16788 | if (op1 == const0_rtx) |
16789 | return CCNOmode; |
16790 | else |
16791 | return CCGCmode; |
16792 | default: |
16793 | /* CCmode should be used in all other cases. */ |
16794 | return CCmode; |
16795 | } |
16796 | } |
16797 | |
16798 | /* Return TRUE or FALSE depending on whether the ptest instruction |
16799 | INSN has source and destination with suitable matching CC modes. */ |
16800 | |
16801 | bool |
16802 | ix86_match_ptest_ccmode (rtx insn) |
16803 | { |
16804 | rtx set, src; |
16805 | machine_mode set_mode; |
16806 | |
16807 | set = PATTERN (insn); |
16808 | gcc_assert (GET_CODE (set) == SET); |
16809 | src = SET_SRC (set); |
16810 | gcc_assert (GET_CODE (src) == UNSPEC |
16811 | && XINT (src, 1) == UNSPEC_PTEST); |
16812 | |
16813 | set_mode = GET_MODE (src); |
16814 | if (set_mode != CCZmode |
16815 | && set_mode != CCCmode |
16816 | && set_mode != CCmode) |
16817 | return false; |
16818 | return GET_MODE (SET_DEST (set)) == set_mode; |
16819 | } |
16820 | |
16821 | /* Return the fixed registers used for condition codes. */ |
16822 | |
16823 | static bool |
16824 | ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) |
16825 | { |
16826 | *p1 = FLAGS_REG; |
16827 | *p2 = INVALID_REGNUM; |
16828 | return true; |
16829 | } |
16830 | |
16831 | /* If two condition code modes are compatible, return a condition code |
16832 | mode which is compatible with both. Otherwise, return |
16833 | VOIDmode. */ |
16834 | |
16835 | static machine_mode |
16836 | ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) |
16837 | { |
16838 | if (m1 == m2) |
16839 | return m1; |
16840 | |
16841 | if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) |
16842 | return VOIDmode; |
16843 | |
16844 | if ((m1 == CCGCmode && m2 == CCGOCmode) |
16845 | || (m1 == CCGOCmode && m2 == CCGCmode)) |
16846 | return CCGCmode; |
16847 | |
16848 | if ((m1 == CCNOmode && m2 == CCGOCmode) |
16849 | || (m1 == CCGOCmode && m2 == CCNOmode)) |
16850 | return CCNOmode; |
16851 | |
16852 | if (m1 == CCZmode |
16853 | && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) |
16854 | return m2; |
16855 | else if (m2 == CCZmode |
16856 | && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) |
16857 | return m1; |
16858 | |
16859 | switch (m1) |
16860 | { |
16861 | default: |
16862 | gcc_unreachable (); |
16863 | |
16864 | case E_CCmode: |
16865 | case E_CCGCmode: |
16866 | case E_CCGOCmode: |
16867 | case E_CCNOmode: |
16868 | case E_CCAmode: |
16869 | case E_CCCmode: |
16870 | case E_CCOmode: |
16871 | case E_CCPmode: |
16872 | case E_CCSmode: |
16873 | case E_CCZmode: |
16874 | switch (m2) |
16875 | { |
16876 | default: |
16877 | return VOIDmode; |
16878 | |
16879 | case E_CCmode: |
16880 | case E_CCGCmode: |
16881 | case E_CCGOCmode: |
16882 | case E_CCNOmode: |
16883 | case E_CCAmode: |
16884 | case E_CCCmode: |
16885 | case E_CCOmode: |
16886 | case E_CCPmode: |
16887 | case E_CCSmode: |
16888 | case E_CCZmode: |
16889 | return CCmode; |
16890 | } |
16891 | |
16892 | case E_CCFPmode: |
16893 | /* These are only compatible with themselves, which we already |
16894 | checked above. */ |
16895 | return VOIDmode; |
16896 | } |
16897 | } |
16898 | |
16899 | /* Return strategy to use for floating-point. We assume that fcomi is always |
16900 | preferrable where available, since that is also true when looking at size |
16901 | (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ |
16902 | |
16903 | enum ix86_fpcmp_strategy |
16904 | ix86_fp_comparison_strategy (enum rtx_code) |
16905 | { |
16906 | /* Do fcomi/sahf based test when profitable. */ |
16907 | |
16908 | if (TARGET_CMOVE) |
16909 | return IX86_FPCMP_COMI; |
16910 | |
16911 | if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) |
16912 | return IX86_FPCMP_SAHF; |
16913 | |
16914 | return IX86_FPCMP_ARITH; |
16915 | } |
16916 | |
16917 | /* Convert comparison codes we use to represent FP comparison to integer |
16918 | code that will result in proper branch. Return UNKNOWN if no such code |
16919 | is available. */ |
16920 | |
16921 | enum rtx_code |
16922 | ix86_fp_compare_code_to_integer (enum rtx_code code) |
16923 | { |
16924 | switch (code) |
16925 | { |
16926 | case GT: |
16927 | return GTU; |
16928 | case GE: |
16929 | return GEU; |
16930 | case ORDERED: |
16931 | case UNORDERED: |
16932 | return code; |
16933 | case UNEQ: |
16934 | return EQ; |
16935 | case UNLT: |
16936 | return LTU; |
16937 | case UNLE: |
16938 | return LEU; |
16939 | case LTGT: |
16940 | return NE; |
16941 | case EQ: |
16942 | case NE: |
16943 | if (TARGET_AVX10_2) |
16944 | return code; |
16945 | /* FALLTHRU. */ |
16946 | default: |
16947 | return UNKNOWN; |
16948 | } |
16949 | } |
16950 | |
16951 | /* Zero extend possibly SImode EXP to Pmode register. */ |
16952 | rtx |
16953 | ix86_zero_extend_to_Pmode (rtx exp) |
16954 | { |
16955 | return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); |
16956 | } |
16957 | |
16958 | /* Return true if the function is called via PLT. */ |
16959 | |
16960 | bool |
16961 | ix86_call_use_plt_p (rtx call_op) |
16962 | { |
16963 | if (SYMBOL_REF_LOCAL_P (call_op)) |
16964 | { |
16965 | if (SYMBOL_REF_DECL (call_op) |
16966 | && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL) |
16967 | { |
16968 | /* NB: All ifunc functions must be called via PLT. */ |
16969 | cgraph_node *node |
16970 | = cgraph_node::get (SYMBOL_REF_DECL (call_op)); |
16971 | if (node && node->ifunc_resolver) |
16972 | return true; |
16973 | } |
16974 | return false; |
16975 | } |
16976 | return true; |
16977 | } |
16978 | |
16979 | /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true, |
16980 | the PLT entry will be used as the function address for local IFUNC |
16981 | functions. When the PIC register is needed for PLT call, indirect |
16982 | call via the PLT entry will fail since the PIC register may not be |
16983 | set up properly for indirect call. In this case, we should return |
16984 | false. */ |
16985 | |
16986 | static bool |
16987 | ix86_ifunc_ref_local_ok (void) |
16988 | { |
16989 | return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC); |
16990 | } |
16991 | |
16992 | /* Return true if the function being called was marked with attribute |
16993 | "noplt" or using -fno-plt and we are compiling for non-PIC. We need |
16994 | to handle the non-PIC case in the backend because there is no easy |
16995 | interface for the front-end to force non-PLT calls to use the GOT. |
16996 | This is currently used only with 64-bit or 32-bit GOT32X ELF targets |
16997 | to call the function marked "noplt" indirectly. */ |
16998 | |
16999 | bool |
17000 | ix86_nopic_noplt_attribute_p (rtx call_op) |
17001 | { |
17002 | if (flag_pic || ix86_cmodel == CM_LARGE |
17003 | || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) |
17004 | || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF |
17005 | || SYMBOL_REF_LOCAL_P (call_op)) |
17006 | return false; |
17007 | |
17008 | tree symbol_decl = SYMBOL_REF_DECL (call_op); |
17009 | |
17010 | if (!flag_plt |
17011 | || (symbol_decl != NULL_TREE |
17012 | && lookup_attribute (attr_name: "noplt", DECL_ATTRIBUTES (symbol_decl)))) |
17013 | return true; |
17014 | |
17015 | return false; |
17016 | } |
17017 | |
17018 | /* Helper to output the jmp/call. */ |
17019 | static void |
17020 | ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) |
17021 | { |
17022 | if (thunk_name != NULL) |
17023 | { |
17024 | if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno)) |
17025 | && ix86_indirect_branch_cs_prefix) |
17026 | fprintf (stream: asm_out_file, format: "\tcs\n"); |
17027 | fprintf (stream: asm_out_file, format: "\tjmp\t"); |
17028 | assemble_name (asm_out_file, thunk_name); |
17029 | putc (c: '\n', stream: asm_out_file); |
17030 | if ((ix86_harden_sls & harden_sls_indirect_jmp)) |
17031 | fputs (s: "\tint3\n", stream: asm_out_file); |
17032 | } |
17033 | else |
17034 | output_indirect_thunk (regno); |
17035 | } |
17036 | |
17037 | /* Output indirect branch via a call and return thunk. CALL_OP is a |
17038 | register which contains the branch target. XASM is the assembly |
17039 | template for CALL_OP. Branch is a tail call if SIBCALL_P is true. |
17040 | A normal call is converted to: |
17041 | |
17042 | call __x86_indirect_thunk_reg |
17043 | |
17044 | and a tail call is converted to: |
17045 | |
17046 | jmp __x86_indirect_thunk_reg |
17047 | */ |
17048 | |
17049 | static void |
17050 | ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) |
17051 | { |
17052 | char thunk_name_buf[32]; |
17053 | char *thunk_name; |
17054 | enum indirect_thunk_prefix need_prefix |
17055 | = indirect_thunk_need_prefix (insn: current_output_insn); |
17056 | int regno = REGNO (call_op); |
17057 | |
17058 | if (cfun->machine->indirect_branch_type |
17059 | != indirect_branch_thunk_inline) |
17060 | { |
17061 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
17062 | SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno); |
17063 | |
17064 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
17065 | thunk_name = thunk_name_buf; |
17066 | } |
17067 | else |
17068 | thunk_name = NULL; |
17069 | |
17070 | if (sibcall_p) |
17071 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
17072 | else |
17073 | { |
17074 | if (thunk_name != NULL) |
17075 | { |
17076 | if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno)) |
17077 | && ix86_indirect_branch_cs_prefix) |
17078 | fprintf (stream: asm_out_file, format: "\tcs\n"); |
17079 | fprintf (stream: asm_out_file, format: "\tcall\t"); |
17080 | assemble_name (asm_out_file, thunk_name); |
17081 | putc (c: '\n', stream: asm_out_file); |
17082 | return; |
17083 | } |
17084 | |
17085 | char indirectlabel1[32]; |
17086 | char indirectlabel2[32]; |
17087 | |
17088 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
17089 | INDIRECT_LABEL, |
17090 | indirectlabelno++); |
17091 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
17092 | INDIRECT_LABEL, |
17093 | indirectlabelno++); |
17094 | |
17095 | /* Jump. */ |
17096 | fputs (s: "\tjmp\t", stream: asm_out_file); |
17097 | assemble_name_raw (asm_out_file, indirectlabel2); |
17098 | fputc (c: '\n', stream: asm_out_file); |
17099 | |
17100 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
17101 | |
17102 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
17103 | |
17104 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
17105 | |
17106 | /* Call. */ |
17107 | fputs (s: "\tcall\t", stream: asm_out_file); |
17108 | assemble_name_raw (asm_out_file, indirectlabel1); |
17109 | fputc (c: '\n', stream: asm_out_file); |
17110 | } |
17111 | } |
17112 | |
17113 | /* Output indirect branch via a call and return thunk. CALL_OP is |
17114 | the branch target. XASM is the assembly template for CALL_OP. |
17115 | Branch is a tail call if SIBCALL_P is true. A normal call is |
17116 | converted to: |
17117 | |
17118 | jmp L2 |
17119 | L1: |
17120 | push CALL_OP |
17121 | jmp __x86_indirect_thunk |
17122 | L2: |
17123 | call L1 |
17124 | |
17125 | and a tail call is converted to: |
17126 | |
17127 | push CALL_OP |
17128 | jmp __x86_indirect_thunk |
17129 | */ |
17130 | |
17131 | static void |
17132 | ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, |
17133 | bool sibcall_p) |
17134 | { |
17135 | char thunk_name_buf[32]; |
17136 | char *thunk_name; |
17137 | char push_buf[64]; |
17138 | enum indirect_thunk_prefix need_prefix |
17139 | = indirect_thunk_need_prefix (insn: current_output_insn); |
17140 | int regno = -1; |
17141 | |
17142 | if (cfun->machine->indirect_branch_type |
17143 | != indirect_branch_thunk_inline) |
17144 | { |
17145 | if (cfun->machine->indirect_branch_type == indirect_branch_thunk) |
17146 | indirect_thunk_needed = true; |
17147 | indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false); |
17148 | thunk_name = thunk_name_buf; |
17149 | } |
17150 | else |
17151 | thunk_name = NULL; |
17152 | |
17153 | snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s", |
17154 | TARGET_64BIT ? 'q' : 'l', xasm); |
17155 | |
17156 | if (sibcall_p) |
17157 | { |
17158 | output_asm_insn (push_buf, &call_op); |
17159 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
17160 | } |
17161 | else |
17162 | { |
17163 | char indirectlabel1[32]; |
17164 | char indirectlabel2[32]; |
17165 | |
17166 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, |
17167 | INDIRECT_LABEL, |
17168 | indirectlabelno++); |
17169 | ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, |
17170 | INDIRECT_LABEL, |
17171 | indirectlabelno++); |
17172 | |
17173 | /* Jump. */ |
17174 | fputs (s: "\tjmp\t", stream: asm_out_file); |
17175 | assemble_name_raw (asm_out_file, indirectlabel2); |
17176 | fputc (c: '\n', stream: asm_out_file); |
17177 | |
17178 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); |
17179 | |
17180 | /* An external function may be called via GOT, instead of PLT. */ |
17181 | if (MEM_P (call_op)) |
17182 | { |
17183 | struct ix86_address parts; |
17184 | rtx addr = XEXP (call_op, 0); |
17185 | if (ix86_decompose_address (addr, out: &parts) |
17186 | && parts.base == stack_pointer_rtx) |
17187 | { |
17188 | /* Since call will adjust stack by -UNITS_PER_WORD, |
17189 | we must convert "disp(stack, index, scale)" to |
17190 | "disp+UNITS_PER_WORD(stack, index, scale)". */ |
17191 | if (parts.index) |
17192 | { |
17193 | addr = gen_rtx_MULT (Pmode, parts.index, |
17194 | GEN_INT (parts.scale)); |
17195 | addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
17196 | addr); |
17197 | } |
17198 | else |
17199 | addr = stack_pointer_rtx; |
17200 | |
17201 | rtx disp; |
17202 | if (parts.disp != NULL_RTX) |
17203 | disp = plus_constant (Pmode, parts.disp, |
17204 | UNITS_PER_WORD); |
17205 | else |
17206 | disp = GEN_INT (UNITS_PER_WORD); |
17207 | |
17208 | addr = gen_rtx_PLUS (Pmode, addr, disp); |
17209 | call_op = gen_rtx_MEM (GET_MODE (call_op), addr); |
17210 | } |
17211 | } |
17212 | |
17213 | output_asm_insn (push_buf, &call_op); |
17214 | |
17215 | ix86_output_jmp_thunk_or_indirect (thunk_name, regno); |
17216 | |
17217 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); |
17218 | |
17219 | /* Call. */ |
17220 | fputs (s: "\tcall\t", stream: asm_out_file); |
17221 | assemble_name_raw (asm_out_file, indirectlabel1); |
17222 | fputc (c: '\n', stream: asm_out_file); |
17223 | } |
17224 | } |
17225 | |
17226 | /* Output indirect branch via a call and return thunk. CALL_OP is |
17227 | the branch target. XASM is the assembly template for CALL_OP. |
17228 | Branch is a tail call if SIBCALL_P is true. */ |
17229 | |
17230 | static void |
17231 | ix86_output_indirect_branch (rtx call_op, const char *xasm, |
17232 | bool sibcall_p) |
17233 | { |
17234 | if (REG_P (call_op)) |
17235 | ix86_output_indirect_branch_via_reg (call_op, sibcall_p); |
17236 | else |
17237 | ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); |
17238 | } |
17239 | |
17240 | /* Output indirect jump. CALL_OP is the jump target. */ |
17241 | |
17242 | const char * |
17243 | ix86_output_indirect_jmp (rtx call_op) |
17244 | { |
17245 | if (cfun->machine->indirect_branch_type != indirect_branch_keep) |
17246 | { |
17247 | /* We can't have red-zone since "call" in the indirect thunk |
17248 | pushes the return address onto stack, destroying red-zone. */ |
17249 | if (ix86_red_zone_used) |
17250 | gcc_unreachable (); |
17251 | |
17252 | ix86_output_indirect_branch (call_op, xasm: "%0", sibcall_p: true); |
17253 | } |
17254 | else |
17255 | output_asm_insn ("%!jmp\t%A0", &call_op); |
17256 | return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3": ""; |
17257 | } |
17258 | |
17259 | /* Output return instrumentation for current function if needed. */ |
17260 | |
17261 | static void |
17262 | output_return_instrumentation (void) |
17263 | { |
17264 | if (ix86_instrument_return != instrument_return_none |
17265 | && flag_fentry |
17266 | && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) |
17267 | { |
17268 | if (ix86_flag_record_return) |
17269 | fprintf (stream: asm_out_file, format: "1:\n"); |
17270 | switch (ix86_instrument_return) |
17271 | { |
17272 | case instrument_return_call: |
17273 | fprintf (stream: asm_out_file, format: "\tcall\t__return__\n"); |
17274 | break; |
17275 | case instrument_return_nop5: |
17276 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
17277 | fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); |
17278 | break; |
17279 | case instrument_return_none: |
17280 | break; |
17281 | } |
17282 | |
17283 | if (ix86_flag_record_return) |
17284 | { |
17285 | fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n"); |
17286 | fprintf (stream: asm_out_file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad": "long"); |
17287 | fprintf (stream: asm_out_file, format: "\t.previous\n"); |
17288 | } |
17289 | } |
17290 | } |
17291 | |
17292 | /* Output function return. CALL_OP is the jump target. Add a REP |
17293 | prefix to RET if LONG_P is true and function return is kept. */ |
17294 | |
17295 | const char * |
17296 | ix86_output_function_return (bool long_p) |
17297 | { |
17298 | output_return_instrumentation (); |
17299 | |
17300 | if (cfun->machine->function_return_type != indirect_branch_keep) |
17301 | { |
17302 | char thunk_name[32]; |
17303 | enum indirect_thunk_prefix need_prefix |
17304 | = indirect_thunk_need_prefix (insn: current_output_insn); |
17305 | |
17306 | if (cfun->machine->function_return_type |
17307 | != indirect_branch_thunk_inline) |
17308 | { |
17309 | bool need_thunk = (cfun->machine->function_return_type |
17310 | == indirect_branch_thunk); |
17311 | indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix, |
17312 | ret_p: true); |
17313 | indirect_return_needed |= need_thunk; |
17314 | fprintf (stream: asm_out_file, format: "\tjmp\t"); |
17315 | assemble_name (asm_out_file, thunk_name); |
17316 | putc (c: '\n', stream: asm_out_file); |
17317 | } |
17318 | else |
17319 | output_indirect_thunk (INVALID_REGNUM); |
17320 | |
17321 | return ""; |
17322 | } |
17323 | |
17324 | output_asm_insn (long_p ? "rep%; ret": "ret", nullptr); |
17325 | return (ix86_harden_sls & harden_sls_return) ? "int3": ""; |
17326 | } |
17327 | |
17328 | /* Output indirect function return. RET_OP is the function return |
17329 | target. */ |
17330 | |
17331 | const char * |
17332 | ix86_output_indirect_function_return (rtx ret_op) |
17333 | { |
17334 | if (cfun->machine->function_return_type != indirect_branch_keep) |
17335 | { |
17336 | char thunk_name[32]; |
17337 | enum indirect_thunk_prefix need_prefix |
17338 | = indirect_thunk_need_prefix (insn: current_output_insn); |
17339 | unsigned int regno = REGNO (ret_op); |
17340 | gcc_assert (regno == CX_REG); |
17341 | |
17342 | if (cfun->machine->function_return_type |
17343 | != indirect_branch_thunk_inline) |
17344 | { |
17345 | bool need_thunk = (cfun->machine->function_return_type |
17346 | == indirect_branch_thunk); |
17347 | indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true); |
17348 | |
17349 | if (need_thunk) |
17350 | { |
17351 | indirect_return_via_cx = true; |
17352 | SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG); |
17353 | } |
17354 | fprintf (stream: asm_out_file, format: "\tjmp\t"); |
17355 | assemble_name (asm_out_file, thunk_name); |
17356 | putc (c: '\n', stream: asm_out_file); |
17357 | } |
17358 | else |
17359 | output_indirect_thunk (regno); |
17360 | } |
17361 | else |
17362 | { |
17363 | output_asm_insn ("%!jmp\t%A0", &ret_op); |
17364 | if (ix86_harden_sls & harden_sls_indirect_jmp) |
17365 | fputs (s: "\tint3\n", stream: asm_out_file); |
17366 | } |
17367 | return ""; |
17368 | } |
17369 | |
17370 | /* Output the assembly for a call instruction. */ |
17371 | |
17372 | const char * |
17373 | ix86_output_call_insn (rtx_insn *insn, rtx call_op) |
17374 | { |
17375 | bool direct_p = constant_call_address_operand (call_op, VOIDmode); |
17376 | bool output_indirect_p |
17377 | = (!TARGET_SEH |
17378 | && cfun->machine->indirect_branch_type != indirect_branch_keep); |
17379 | bool seh_nop_p = false; |
17380 | const char *xasm; |
17381 | |
17382 | if (SIBLING_CALL_P (insn)) |
17383 | { |
17384 | output_return_instrumentation (); |
17385 | if (direct_p) |
17386 | { |
17387 | if (ix86_nopic_noplt_attribute_p (call_op)) |
17388 | { |
17389 | direct_p = false; |
17390 | if (TARGET_64BIT) |
17391 | { |
17392 | if (output_indirect_p) |
17393 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; |
17394 | else |
17395 | xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; |
17396 | } |
17397 | else |
17398 | { |
17399 | if (output_indirect_p) |
17400 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; |
17401 | else |
17402 | xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; |
17403 | } |
17404 | } |
17405 | else |
17406 | xasm = "%!jmp\t%P0"; |
17407 | } |
17408 | /* SEH epilogue detection requires the indirect branch case |
17409 | to include REX.W. */ |
17410 | else if (TARGET_SEH) |
17411 | xasm = "%!rex.W jmp\t%A0"; |
17412 | else |
17413 | { |
17414 | if (output_indirect_p) |
17415 | xasm = "%0"; |
17416 | else |
17417 | xasm = "%!jmp\t%A0"; |
17418 | } |
17419 | |
17420 | if (output_indirect_p && !direct_p) |
17421 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: true); |
17422 | else |
17423 | { |
17424 | output_asm_insn (xasm, &call_op); |
17425 | if (!direct_p |
17426 | && (ix86_harden_sls & harden_sls_indirect_jmp)) |
17427 | return "int3"; |
17428 | } |
17429 | return ""; |
17430 | } |
17431 | |
17432 | /* SEH unwinding can require an extra nop to be emitted in several |
17433 | circumstances. Determine if we have one of those. */ |
17434 | if (TARGET_SEH) |
17435 | { |
17436 | rtx_insn *i; |
17437 | |
17438 | for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i)) |
17439 | { |
17440 | /* Prevent a catch region from being adjacent to a jump that would |
17441 | be interpreted as an epilogue sequence by the unwinder. */ |
17442 | if (JUMP_P(i) && CROSSING_JUMP_P (i)) |
17443 | { |
17444 | seh_nop_p = true; |
17445 | break; |
17446 | } |
17447 | |
17448 | /* If we get to another real insn, we don't need the nop. */ |
17449 | if (INSN_P (i)) |
17450 | break; |
17451 | |
17452 | /* If we get to the epilogue note, prevent a catch region from |
17453 | being adjacent to the standard epilogue sequence. Note that, |
17454 | if non-call exceptions are enabled, we already did it during |
17455 | epilogue expansion, or else, if the insn can throw internally, |
17456 | we already did it during the reorg pass. */ |
17457 | if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG |
17458 | && !flag_non_call_exceptions |
17459 | && !can_throw_internal (insn)) |
17460 | { |
17461 | seh_nop_p = true; |
17462 | break; |
17463 | } |
17464 | } |
17465 | |
17466 | /* If we didn't find a real insn following the call, prevent the |
17467 | unwinder from looking into the next function. */ |
17468 | if (i == NULL) |
17469 | seh_nop_p = true; |
17470 | } |
17471 | |
17472 | if (direct_p) |
17473 | { |
17474 | if (ix86_nopic_noplt_attribute_p (call_op)) |
17475 | { |
17476 | direct_p = false; |
17477 | if (TARGET_64BIT) |
17478 | { |
17479 | if (output_indirect_p) |
17480 | xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; |
17481 | else |
17482 | xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; |
17483 | } |
17484 | else |
17485 | { |
17486 | if (output_indirect_p) |
17487 | xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; |
17488 | else |
17489 | xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; |
17490 | } |
17491 | } |
17492 | else |
17493 | xasm = "%!call\t%P0"; |
17494 | } |
17495 | else |
17496 | { |
17497 | if (output_indirect_p) |
17498 | xasm = "%0"; |
17499 | else |
17500 | xasm = "%!call\t%A0"; |
17501 | } |
17502 | |
17503 | if (output_indirect_p && !direct_p) |
17504 | ix86_output_indirect_branch (call_op, xasm, sibcall_p: false); |
17505 | else |
17506 | output_asm_insn (xasm, &call_op); |
17507 | |
17508 | if (seh_nop_p) |
17509 | return "nop"; |
17510 | |
17511 | return ""; |
17512 | } |
17513 | |
17514 | /* Return a MEM corresponding to a stack slot with mode MODE. |
17515 | Allocate a new slot if necessary. |
17516 | |
17517 | The RTL for a function can have several slots available: N is |
17518 | which slot to use. */ |
17519 | |
17520 | rtx |
17521 | assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) |
17522 | { |
17523 | struct stack_local_entry *s; |
17524 | |
17525 | gcc_assert (n < MAX_386_STACK_LOCALS); |
17526 | |
17527 | for (s = ix86_stack_locals; s; s = s->next) |
17528 | if (s->mode == mode && s->n == n) |
17529 | return validize_mem (copy_rtx (s->rtl)); |
17530 | |
17531 | int align = 0; |
17532 | /* For DImode with SLOT_FLOATxFDI_387 use 32-bit |
17533 | alignment with -m32 -mpreferred-stack-boundary=2. */ |
17534 | if (mode == DImode |
17535 | && !TARGET_64BIT |
17536 | && n == SLOT_FLOATxFDI_387 |
17537 | && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode)) |
17538 | align = 32; |
17539 | s = ggc_alloc<stack_local_entry> (); |
17540 | s->n = n; |
17541 | s->mode = mode; |
17542 | s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align); |
17543 | |
17544 | s->next = ix86_stack_locals; |
17545 | ix86_stack_locals = s; |
17546 | return validize_mem (copy_rtx (s->rtl)); |
17547 | } |
17548 | |
17549 | static void |
17550 | ix86_instantiate_decls (void) |
17551 | { |
17552 | struct stack_local_entry *s; |
17553 | |
17554 | for (s = ix86_stack_locals; s; s = s->next) |
17555 | if (s->rtl != NULL_RTX) |
17556 | instantiate_decl_rtl (x: s->rtl); |
17557 | } |
17558 | |
17559 | /* Check whether x86 address PARTS is a pc-relative address. */ |
17560 | |
17561 | bool |
17562 | ix86_rip_relative_addr_p (struct ix86_address *parts) |
17563 | { |
17564 | rtx base, index, disp; |
17565 | |
17566 | base = parts->base; |
17567 | index = parts->index; |
17568 | disp = parts->disp; |
17569 | |
17570 | if (disp && !base && !index) |
17571 | { |
17572 | if (TARGET_64BIT) |
17573 | { |
17574 | rtx symbol = disp; |
17575 | |
17576 | if (GET_CODE (disp) == CONST) |
17577 | symbol = XEXP (disp, 0); |
17578 | if (GET_CODE (symbol) == PLUS |
17579 | && CONST_INT_P (XEXP (symbol, 1))) |
17580 | symbol = XEXP (symbol, 0); |
17581 | |
17582 | if (GET_CODE (symbol) == LABEL_REF |
17583 | || (GET_CODE (symbol) == SYMBOL_REF |
17584 | && SYMBOL_REF_TLS_MODEL (symbol) == 0) |
17585 | || (GET_CODE (symbol) == UNSPEC |
17586 | && (XINT (symbol, 1) == UNSPEC_GOTPCREL |
17587 | || XINT (symbol, 1) == UNSPEC_PCREL |
17588 | || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) |
17589 | return true; |
17590 | } |
17591 | } |
17592 | return false; |
17593 | } |
17594 | |
17595 | /* Calculate the length of the memory address in the instruction encoding. |
17596 | Includes addr32 prefix, does not include the one-byte modrm, opcode, |
17597 | or other prefixes. We never generate addr32 prefix for LEA insn. */ |
17598 | |
17599 | int |
17600 | memory_address_length (rtx addr, bool lea) |
17601 | { |
17602 | struct ix86_address parts; |
17603 | rtx base, index, disp; |
17604 | int len; |
17605 | int ok; |
17606 | |
17607 | if (GET_CODE (addr) == PRE_DEC |
17608 | || GET_CODE (addr) == POST_INC |
17609 | || GET_CODE (addr) == PRE_MODIFY |
17610 | || GET_CODE (addr) == POST_MODIFY) |
17611 | return 0; |
17612 | |
17613 | ok = ix86_decompose_address (addr, out: &parts); |
17614 | gcc_assert (ok); |
17615 | |
17616 | len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; |
17617 | |
17618 | /* If this is not LEA instruction, add the length of addr32 prefix. */ |
17619 | if (TARGET_64BIT && !lea |
17620 | && (SImode_address_operand (addr, VOIDmode) |
17621 | || (parts.base && GET_MODE (parts.base) == SImode) |
17622 | || (parts.index && GET_MODE (parts.index) == SImode))) |
17623 | len++; |
17624 | |
17625 | base = parts.base; |
17626 | index = parts.index; |
17627 | disp = parts.disp; |
17628 | |
17629 | if (base && SUBREG_P (base)) |
17630 | base = SUBREG_REG (base); |
17631 | if (index && SUBREG_P (index)) |
17632 | index = SUBREG_REG (index); |
17633 | |
17634 | gcc_assert (base == NULL_RTX || REG_P (base)); |
17635 | gcc_assert (index == NULL_RTX || REG_P (index)); |
17636 | |
17637 | /* Rule of thumb: |
17638 | - esp as the base always wants an index, |
17639 | - ebp as the base always wants a displacement, |
17640 | - r12 as the base always wants an index, |
17641 | - r13 as the base always wants a displacement. */ |
17642 | |
17643 | /* Register Indirect. */ |
17644 | if (base && !index && !disp) |
17645 | { |
17646 | /* esp (for its index) and ebp (for its displacement) need |
17647 | the two-byte modrm form. Similarly for r12 and r13 in 64-bit |
17648 | code. */ |
17649 | if (base == arg_pointer_rtx |
17650 | || base == frame_pointer_rtx |
17651 | || REGNO (base) == SP_REG |
17652 | || REGNO (base) == BP_REG |
17653 | || REGNO (base) == R12_REG |
17654 | || REGNO (base) == R13_REG) |
17655 | len++; |
17656 | } |
17657 | |
17658 | /* Direct Addressing. In 64-bit mode mod 00 r/m 5 |
17659 | is not disp32, but disp32(%rip), so for disp32 |
17660 | SIB byte is needed, unless print_operand_address |
17661 | optimizes it into disp32(%rip) or (%rip) is implied |
17662 | by UNSPEC. */ |
17663 | else if (disp && !base && !index) |
17664 | { |
17665 | len += 4; |
17666 | if (!ix86_rip_relative_addr_p (parts: &parts)) |
17667 | len++; |
17668 | } |
17669 | else |
17670 | { |
17671 | /* Find the length of the displacement constant. */ |
17672 | if (disp) |
17673 | { |
17674 | if (base && satisfies_constraint_K (op: disp)) |
17675 | len += 1; |
17676 | else |
17677 | len += 4; |
17678 | } |
17679 | /* ebp always wants a displacement. Similarly r13. */ |
17680 | else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) |
17681 | len++; |
17682 | |
17683 | /* An index requires the two-byte modrm form.... */ |
17684 | if (index |
17685 | /* ...like esp (or r12), which always wants an index. */ |
17686 | || base == arg_pointer_rtx |
17687 | || base == frame_pointer_rtx |
17688 | || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) |
17689 | len++; |
17690 | } |
17691 | |
17692 | return len; |
17693 | } |
17694 | |
17695 | /* Compute default value for "length_immediate" attribute. When SHORTFORM |
17696 | is set, expect that insn have 8bit immediate alternative. */ |
17697 | int |
17698 | ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) |
17699 | { |
17700 | int len = 0; |
17701 | int i; |
17702 | extract_insn_cached (insn); |
17703 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17704 | if (CONSTANT_P (recog_data.operand[i])) |
17705 | { |
17706 | enum attr_mode mode = get_attr_mode (insn); |
17707 | |
17708 | gcc_assert (!len); |
17709 | if (shortform && CONST_INT_P (recog_data.operand[i])) |
17710 | { |
17711 | HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); |
17712 | switch (mode) |
17713 | { |
17714 | case MODE_QI: |
17715 | len = 1; |
17716 | continue; |
17717 | case MODE_HI: |
17718 | ival = trunc_int_for_mode (ival, HImode); |
17719 | break; |
17720 | case MODE_SI: |
17721 | ival = trunc_int_for_mode (ival, SImode); |
17722 | break; |
17723 | default: |
17724 | break; |
17725 | } |
17726 | if (IN_RANGE (ival, -128, 127)) |
17727 | { |
17728 | len = 1; |
17729 | continue; |
17730 | } |
17731 | } |
17732 | switch (mode) |
17733 | { |
17734 | case MODE_QI: |
17735 | len = 1; |
17736 | break; |
17737 | case MODE_HI: |
17738 | len = 2; |
17739 | break; |
17740 | case MODE_SI: |
17741 | len = 4; |
17742 | break; |
17743 | /* Immediates for DImode instructions are encoded |
17744 | as 32bit sign extended values. */ |
17745 | case MODE_DI: |
17746 | len = 4; |
17747 | break; |
17748 | default: |
17749 | fatal_insn ("unknown insn mode", insn); |
17750 | } |
17751 | } |
17752 | return len; |
17753 | } |
17754 | |
17755 | /* Compute default value for "length_address" attribute. */ |
17756 | int |
17757 | ix86_attr_length_address_default (rtx_insn *insn) |
17758 | { |
17759 | int i; |
17760 | |
17761 | if (get_attr_type (insn) == TYPE_LEA) |
17762 | { |
17763 | rtx set = PATTERN (insn), addr; |
17764 | |
17765 | if (GET_CODE (set) == PARALLEL) |
17766 | set = XVECEXP (set, 0, 0); |
17767 | |
17768 | gcc_assert (GET_CODE (set) == SET); |
17769 | |
17770 | addr = SET_SRC (set); |
17771 | |
17772 | return memory_address_length (addr, lea: true); |
17773 | } |
17774 | |
17775 | extract_insn_cached (insn); |
17776 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17777 | { |
17778 | rtx op = recog_data.operand[i]; |
17779 | if (MEM_P (op)) |
17780 | { |
17781 | constrain_operands_cached (insn, reload_completed); |
17782 | if (which_alternative != -1) |
17783 | { |
17784 | const char *constraints = recog_data.constraints[i]; |
17785 | int alt = which_alternative; |
17786 | |
17787 | while (*constraints == '=' || *constraints == '+') |
17788 | constraints++; |
17789 | while (alt-- > 0) |
17790 | while (*constraints++ != ',') |
17791 | ; |
17792 | /* Skip ignored operands. */ |
17793 | if (*constraints == 'X') |
17794 | continue; |
17795 | } |
17796 | |
17797 | int len = memory_address_length (XEXP (op, 0), lea: false); |
17798 | |
17799 | /* Account for segment prefix for non-default addr spaces. */ |
17800 | if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) |
17801 | len++; |
17802 | |
17803 | return len; |
17804 | } |
17805 | } |
17806 | return 0; |
17807 | } |
17808 | |
17809 | /* Compute default value for "length_vex" attribute. It includes |
17810 | 2 or 3 byte VEX prefix and 1 opcode byte. */ |
17811 | |
17812 | int |
17813 | ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, |
17814 | bool has_vex_w) |
17815 | { |
17816 | int i, reg_only = 2 + 1; |
17817 | bool has_mem = false; |
17818 | |
17819 | /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 |
17820 | byte VEX prefix. */ |
17821 | if (!has_0f_opcode || has_vex_w) |
17822 | return 3 + 1; |
17823 | |
17824 | /* We can always use 2 byte VEX prefix in 32bit. */ |
17825 | if (!TARGET_64BIT) |
17826 | return 2 + 1; |
17827 | |
17828 | extract_insn_cached (insn); |
17829 | |
17830 | for (i = recog_data.n_operands - 1; i >= 0; --i) |
17831 | if (REG_P (recog_data.operand[i])) |
17832 | { |
17833 | /* REX.W bit uses 3 byte VEX prefix. |
17834 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
17835 | if (GET_MODE (recog_data.operand[i]) == DImode |
17836 | && GENERAL_REG_P (recog_data.operand[i])) |
17837 | return 3 + 1; |
17838 | |
17839 | /* REX.B bit requires 3-byte VEX. Right here we don't know which |
17840 | operand will be encoded using VEX.B, so be conservative. |
17841 | REX2 with vex use extended EVEX prefix length is 4-byte. */ |
17842 | if (REX_INT_REGNO_P (recog_data.operand[i]) |
17843 | || REX2_INT_REGNO_P (recog_data.operand[i]) |
17844 | || REX_SSE_REGNO_P (recog_data.operand[i])) |
17845 | reg_only = 3 + 1; |
17846 | } |
17847 | else if (MEM_P (recog_data.operand[i])) |
17848 | { |
17849 | /* REX2.X or REX2.B bits use 3 byte VEX prefix. */ |
17850 | if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i])) |
17851 | return 4; |
17852 | |
17853 | /* REX.X or REX.B bits use 3 byte VEX prefix. */ |
17854 | if (x86_extended_reg_mentioned_p (recog_data.operand[i])) |
17855 | return 3 + 1; |
17856 | |
17857 | has_mem = true; |
17858 | } |
17859 | |
17860 | return has_mem ? 2 + 1 : reg_only; |
17861 | } |
17862 | |
17863 | |
17864 | static bool |
17865 | ix86_class_likely_spilled_p (reg_class_t); |
17866 | |
17867 | /* Returns true if lhs of insn is HW function argument register and set up |
17868 | is_spilled to true if it is likely spilled HW register. */ |
17869 | static bool |
17870 | insn_is_function_arg (rtx insn, bool* is_spilled) |
17871 | { |
17872 | rtx dst; |
17873 | |
17874 | if (!NONDEBUG_INSN_P (insn)) |
17875 | return false; |
17876 | /* Call instructions are not movable, ignore it. */ |
17877 | if (CALL_P (insn)) |
17878 | return false; |
17879 | insn = PATTERN (insn); |
17880 | if (GET_CODE (insn) == PARALLEL) |
17881 | insn = XVECEXP (insn, 0, 0); |
17882 | if (GET_CODE (insn) != SET) |
17883 | return false; |
17884 | dst = SET_DEST (insn); |
17885 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
17886 | && ix86_function_arg_regno_p (REGNO (dst))) |
17887 | { |
17888 | /* Is it likely spilled HW register? */ |
17889 | if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) |
17890 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) |
17891 | *is_spilled = true; |
17892 | return true; |
17893 | } |
17894 | return false; |
17895 | } |
17896 | |
17897 | /* Add output dependencies for chain of function adjacent arguments if only |
17898 | there is a move to likely spilled HW register. Return first argument |
17899 | if at least one dependence was added or NULL otherwise. */ |
17900 | static rtx_insn * |
17901 | add_parameter_dependencies (rtx_insn *call, rtx_insn *head) |
17902 | { |
17903 | rtx_insn *insn; |
17904 | rtx_insn *last = call; |
17905 | rtx_insn *first_arg = NULL; |
17906 | bool is_spilled = false; |
17907 | |
17908 | head = PREV_INSN (insn: head); |
17909 | |
17910 | /* Find nearest to call argument passing instruction. */ |
17911 | while (true) |
17912 | { |
17913 | last = PREV_INSN (insn: last); |
17914 | if (last == head) |
17915 | return NULL; |
17916 | if (!NONDEBUG_INSN_P (last)) |
17917 | continue; |
17918 | if (insn_is_function_arg (insn: last, is_spilled: &is_spilled)) |
17919 | break; |
17920 | return NULL; |
17921 | } |
17922 | |
17923 | first_arg = last; |
17924 | while (true) |
17925 | { |
17926 | insn = PREV_INSN (insn: last); |
17927 | if (!INSN_P (insn)) |
17928 | break; |
17929 | if (insn == head) |
17930 | break; |
17931 | if (!NONDEBUG_INSN_P (insn)) |
17932 | { |
17933 | last = insn; |
17934 | continue; |
17935 | } |
17936 | if (insn_is_function_arg (insn, is_spilled: &is_spilled)) |
17937 | { |
17938 | /* Add output depdendence between two function arguments if chain |
17939 | of output arguments contains likely spilled HW registers. */ |
17940 | if (is_spilled) |
17941 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
17942 | first_arg = last = insn; |
17943 | } |
17944 | else |
17945 | break; |
17946 | } |
17947 | if (!is_spilled) |
17948 | return NULL; |
17949 | return first_arg; |
17950 | } |
17951 | |
17952 | /* Add output or anti dependency from insn to first_arg to restrict its code |
17953 | motion. */ |
17954 | static void |
17955 | avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) |
17956 | { |
17957 | rtx set; |
17958 | rtx tmp; |
17959 | |
17960 | set = single_set (insn); |
17961 | if (!set) |
17962 | return; |
17963 | tmp = SET_DEST (set); |
17964 | if (REG_P (tmp)) |
17965 | { |
17966 | /* Add output dependency to the first function argument. */ |
17967 | add_dependence (first_arg, insn, REG_DEP_OUTPUT); |
17968 | return; |
17969 | } |
17970 | /* Add anti dependency. */ |
17971 | add_dependence (first_arg, insn, REG_DEP_ANTI); |
17972 | } |
17973 | |
17974 | /* Avoid cross block motion of function argument through adding dependency |
17975 | from the first non-jump instruction in bb. */ |
17976 | static void |
17977 | add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) |
17978 | { |
17979 | rtx_insn *insn = BB_END (bb); |
17980 | |
17981 | while (insn) |
17982 | { |
17983 | if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) |
17984 | { |
17985 | rtx set = single_set (insn); |
17986 | if (set) |
17987 | { |
17988 | avoid_func_arg_motion (first_arg: arg, insn); |
17989 | return; |
17990 | } |
17991 | } |
17992 | if (insn == BB_HEAD (bb)) |
17993 | return; |
17994 | insn = PREV_INSN (insn); |
17995 | } |
17996 | } |
17997 | |
17998 | /* Hook for pre-reload schedule - avoid motion of function arguments |
17999 | passed in likely spilled HW registers. */ |
18000 | static void |
18001 | ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) |
18002 | { |
18003 | rtx_insn *insn; |
18004 | rtx_insn *first_arg = NULL; |
18005 | if (reload_completed) |
18006 | return; |
18007 | while (head != tail && DEBUG_INSN_P (head)) |
18008 | head = NEXT_INSN (insn: head); |
18009 | for (insn = tail; insn != head; insn = PREV_INSN (insn)) |
18010 | if (INSN_P (insn) && CALL_P (insn)) |
18011 | { |
18012 | first_arg = add_parameter_dependencies (call: insn, head); |
18013 | if (first_arg) |
18014 | { |
18015 | /* Add dependee for first argument to predecessors if only |
18016 | region contains more than one block. */ |
18017 | basic_block bb = BLOCK_FOR_INSN (insn); |
18018 | int rgn = CONTAINING_RGN (bb->index); |
18019 | int nr_blks = RGN_NR_BLOCKS (rgn); |
18020 | /* Skip trivial regions and region head blocks that can have |
18021 | predecessors outside of region. */ |
18022 | if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) |
18023 | { |
18024 | edge e; |
18025 | edge_iterator ei; |
18026 | |
18027 | /* Regions are SCCs with the exception of selective |
18028 | scheduling with pipelining of outer blocks enabled. |
18029 | So also check that immediate predecessors of a non-head |
18030 | block are in the same region. */ |
18031 | FOR_EACH_EDGE (e, ei, bb->preds) |
18032 | { |
18033 | /* Avoid creating of loop-carried dependencies through |
18034 | using topological ordering in the region. */ |
18035 | if (rgn == CONTAINING_RGN (e->src->index) |
18036 | && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) |
18037 | add_dependee_for_func_arg (arg: first_arg, bb: e->src); |
18038 | } |
18039 | } |
18040 | insn = first_arg; |
18041 | if (insn == head) |
18042 | break; |
18043 | } |
18044 | } |
18045 | else if (first_arg) |
18046 | avoid_func_arg_motion (first_arg, insn); |
18047 | } |
18048 | |
18049 | /* Hook for pre-reload schedule - set priority of moves from likely spilled |
18050 | HW registers to maximum, to schedule them at soon as possible. These are |
18051 | moves from function argument registers at the top of the function entry |
18052 | and moves from function return value registers after call. */ |
18053 | static int |
18054 | ix86_adjust_priority (rtx_insn *insn, int priority) |
18055 | { |
18056 | rtx set; |
18057 | |
18058 | if (reload_completed) |
18059 | return priority; |
18060 | |
18061 | if (!NONDEBUG_INSN_P (insn)) |
18062 | return priority; |
18063 | |
18064 | set = single_set (insn); |
18065 | if (set) |
18066 | { |
18067 | rtx tmp = SET_SRC (set); |
18068 | if (REG_P (tmp) |
18069 | && HARD_REGISTER_P (tmp) |
18070 | && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) |
18071 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) |
18072 | return current_sched_info->sched_max_insns_priority; |
18073 | } |
18074 | |
18075 | return priority; |
18076 | } |
18077 | |
18078 | /* Prepare for scheduling pass. */ |
18079 | static void |
18080 | ix86_sched_init_global (FILE *, int, int) |
18081 | { |
18082 | /* Install scheduling hooks for current CPU. Some of these hooks are used |
18083 | in time-critical parts of the scheduler, so we only set them up when |
18084 | they are actually used. */ |
18085 | switch (ix86_tune) |
18086 | { |
18087 | case PROCESSOR_CORE2: |
18088 | case PROCESSOR_NEHALEM: |
18089 | case PROCESSOR_SANDYBRIDGE: |
18090 | case PROCESSOR_HASWELL: |
18091 | case PROCESSOR_TREMONT: |
18092 | case PROCESSOR_ALDERLAKE: |
18093 | case PROCESSOR_GENERIC: |
18094 | /* Do not perform multipass scheduling for pre-reload schedule |
18095 | to save compile time. */ |
18096 | if (reload_completed) |
18097 | { |
18098 | ix86_core2i7_init_hooks (); |
18099 | break; |
18100 | } |
18101 | /* Fall through. */ |
18102 | default: |
18103 | targetm.sched.dfa_post_advance_cycle = NULL; |
18104 | targetm.sched.first_cycle_multipass_init = NULL; |
18105 | targetm.sched.first_cycle_multipass_begin = NULL; |
18106 | targetm.sched.first_cycle_multipass_issue = NULL; |
18107 | targetm.sched.first_cycle_multipass_backtrack = NULL; |
18108 | targetm.sched.first_cycle_multipass_end = NULL; |
18109 | targetm.sched.first_cycle_multipass_fini = NULL; |
18110 | break; |
18111 | } |
18112 | } |
18113 | |
18114 | |
18115 | /* Implement TARGET_STATIC_RTX_ALIGNMENT. */ |
18116 | |
18117 | static HOST_WIDE_INT |
18118 | ix86_static_rtx_alignment (machine_mode mode) |
18119 | { |
18120 | if (mode == DFmode) |
18121 | return 64; |
18122 | if (ALIGN_MODE_128 (mode)) |
18123 | return MAX (128, GET_MODE_ALIGNMENT (mode)); |
18124 | return GET_MODE_ALIGNMENT (mode); |
18125 | } |
18126 | |
18127 | /* Implement TARGET_CONSTANT_ALIGNMENT. */ |
18128 | |
18129 | static HOST_WIDE_INT |
18130 | ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) |
18131 | { |
18132 | if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST |
18133 | || TREE_CODE (exp) == INTEGER_CST) |
18134 | { |
18135 | machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); |
18136 | HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); |
18137 | return MAX (mode_align, align); |
18138 | } |
18139 | else if (!optimize_size && TREE_CODE (exp) == STRING_CST |
18140 | && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) |
18141 | return BITS_PER_WORD; |
18142 | |
18143 | return align; |
18144 | } |
18145 | |
18146 | /* Implement TARGET_EMPTY_RECORD_P. */ |
18147 | |
18148 | static bool |
18149 | ix86_is_empty_record (const_tree type) |
18150 | { |
18151 | if (!TARGET_64BIT) |
18152 | return false; |
18153 | return default_is_empty_record (type); |
18154 | } |
18155 | |
18156 | /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ |
18157 | |
18158 | static void |
18159 | ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) |
18160 | { |
18161 | CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v); |
18162 | |
18163 | if (!cum->warn_empty) |
18164 | return; |
18165 | |
18166 | if (!TYPE_EMPTY_P (type)) |
18167 | return; |
18168 | |
18169 | /* Don't warn if the function isn't visible outside of the TU. */ |
18170 | if (cum->decl && !TREE_PUBLIC (cum->decl)) |
18171 | return; |
18172 | |
18173 | tree decl = cum->decl; |
18174 | if (!decl) |
18175 | /* If we don't know the target, look at the current TU. */ |
18176 | decl = current_function_decl; |
18177 | |
18178 | const_tree ctx = get_ultimate_context (decl); |
18179 | if (ctx == NULL_TREE |
18180 | || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) |
18181 | return; |
18182 | |
18183 | /* If the actual size of the type is zero, then there is no change |
18184 | in how objects of this size are passed. */ |
18185 | if (int_size_in_bytes (type) == 0) |
18186 | return; |
18187 | |
18188 | warning (OPT_Wabi, "empty class %qT parameter passing ABI " |
18189 | "changes in %<-fabi-version=12%> (GCC 8)", type); |
18190 | |
18191 | /* Only warn once. */ |
18192 | cum->warn_empty = false; |
18193 | } |
18194 | |
18195 | /* This hook returns name of multilib ABI. */ |
18196 | |
18197 | static const char * |
18198 | ix86_get_multilib_abi_name (void) |
18199 | { |
18200 | if (!(TARGET_64BIT_P (ix86_isa_flags))) |
18201 | return "i386"; |
18202 | else if (TARGET_X32_P (ix86_isa_flags)) |
18203 | return "x32"; |
18204 | else |
18205 | return "x86_64"; |
18206 | } |
18207 | |
18208 | /* Compute the alignment for a variable for Intel MCU psABI. TYPE is |
18209 | the data type, and ALIGN is the alignment that the object would |
18210 | ordinarily have. */ |
18211 | |
18212 | static int |
18213 | iamcu_alignment (tree type, int align) |
18214 | { |
18215 | machine_mode mode; |
18216 | |
18217 | if (align < 32 || TYPE_USER_ALIGN (type)) |
18218 | return align; |
18219 | |
18220 | /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 |
18221 | bytes. */ |
18222 | type = strip_array_types (type); |
18223 | if (TYPE_ATOMIC (type)) |
18224 | return align; |
18225 | |
18226 | mode = TYPE_MODE (type); |
18227 | switch (GET_MODE_CLASS (mode)) |
18228 | { |
18229 | case MODE_INT: |
18230 | case MODE_COMPLEX_INT: |
18231 | case MODE_COMPLEX_FLOAT: |
18232 | case MODE_FLOAT: |
18233 | case MODE_DECIMAL_FLOAT: |
18234 | return 32; |
18235 | default: |
18236 | return align; |
18237 | } |
18238 | } |
18239 | |
18240 | /* Compute the alignment for a static variable. |
18241 | TYPE is the data type, and ALIGN is the alignment that |
18242 | the object would ordinarily have. The value of this function is used |
18243 | instead of that alignment to align the object. */ |
18244 | |
18245 | int |
18246 | ix86_data_alignment (tree type, unsigned int align, bool opt) |
18247 | { |
18248 | /* GCC 4.8 and earlier used to incorrectly assume this alignment even |
18249 | for symbols from other compilation units or symbols that don't need |
18250 | to bind locally. In order to preserve some ABI compatibility with |
18251 | those compilers, ensure we don't decrease alignment from what we |
18252 | used to assume. */ |
18253 | |
18254 | unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); |
18255 | |
18256 | /* A data structure, equal or greater than the size of a cache line |
18257 | (64 bytes in the Pentium 4 and other recent Intel processors, including |
18258 | processors based on Intel Core microarchitecture) should be aligned |
18259 | so that its base address is a multiple of a cache line size. */ |
18260 | |
18261 | unsigned int max_align |
18262 | = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); |
18263 | |
18264 | if (max_align < BITS_PER_WORD) |
18265 | max_align = BITS_PER_WORD; |
18266 | |
18267 | switch (ix86_align_data_type) |
18268 | { |
18269 | case ix86_align_data_type_abi: opt = false; break; |
18270 | case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; |
18271 | case ix86_align_data_type_cacheline: break; |
18272 | } |
18273 | |
18274 | if (TARGET_IAMCU) |
18275 | align = iamcu_alignment (type, align); |
18276 | |
18277 | if (opt |
18278 | && AGGREGATE_TYPE_P (type) |
18279 | && TYPE_SIZE (type) |
18280 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) |
18281 | { |
18282 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat) |
18283 | && align < max_align_compat) |
18284 | align = max_align_compat; |
18285 | if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align) |
18286 | && align < max_align) |
18287 | align = max_align; |
18288 | } |
18289 | |
18290 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
18291 | to 16byte boundary. */ |
18292 | if (TARGET_64BIT) |
18293 | { |
18294 | if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) |
18295 | && TYPE_SIZE (type) |
18296 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
18297 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
18298 | && align < 128) |
18299 | return 128; |
18300 | } |
18301 | |
18302 | if (!opt) |
18303 | return align; |
18304 | |
18305 | if (TREE_CODE (type) == ARRAY_TYPE) |
18306 | { |
18307 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
18308 | return 64; |
18309 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
18310 | return 128; |
18311 | } |
18312 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
18313 | { |
18314 | |
18315 | if (TYPE_MODE (type) == DCmode && align < 64) |
18316 | return 64; |
18317 | if ((TYPE_MODE (type) == XCmode |
18318 | || TYPE_MODE (type) == TCmode) && align < 128) |
18319 | return 128; |
18320 | } |
18321 | else if (RECORD_OR_UNION_TYPE_P (type) |
18322 | && TYPE_FIELDS (type)) |
18323 | { |
18324 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
18325 | return 64; |
18326 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
18327 | return 128; |
18328 | } |
18329 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
18330 | || TREE_CODE (type) == INTEGER_TYPE) |
18331 | { |
18332 | if (TYPE_MODE (type) == DFmode && align < 64) |
18333 | return 64; |
18334 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
18335 | return 128; |
18336 | } |
18337 | |
18338 | return align; |
18339 | } |
18340 | |
18341 | /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */ |
18342 | static void |
18343 | ix86_lower_local_decl_alignment (tree decl) |
18344 | { |
18345 | unsigned int new_align = ix86_local_alignment (decl, VOIDmode, |
18346 | DECL_ALIGN (decl), true); |
18347 | if (new_align < DECL_ALIGN (decl)) |
18348 | SET_DECL_ALIGN (decl, new_align); |
18349 | } |
18350 | |
18351 | /* Compute the alignment for a local variable or a stack slot. EXP is |
18352 | the data type or decl itself, MODE is the widest mode available and |
18353 | ALIGN is the alignment that the object would ordinarily have. The |
18354 | value of this macro is used instead of that alignment to align the |
18355 | object. */ |
18356 | |
18357 | unsigned int |
18358 | ix86_local_alignment (tree exp, machine_mode mode, |
18359 | unsigned int align, bool may_lower) |
18360 | { |
18361 | tree type, decl; |
18362 | |
18363 | if (exp && DECL_P (exp)) |
18364 | { |
18365 | type = TREE_TYPE (exp); |
18366 | decl = exp; |
18367 | } |
18368 | else |
18369 | { |
18370 | type = exp; |
18371 | decl = NULL; |
18372 | } |
18373 | |
18374 | /* Don't do dynamic stack realignment for long long objects with |
18375 | -mpreferred-stack-boundary=2. */ |
18376 | if (may_lower |
18377 | && !TARGET_64BIT |
18378 | && align == 64 |
18379 | && ix86_preferred_stack_boundary < 64 |
18380 | && (mode == DImode || (type && TYPE_MODE (type) == DImode)) |
18381 | && (!type || (!TYPE_USER_ALIGN (type) |
18382 | && !TYPE_ATOMIC (strip_array_types (type)))) |
18383 | && (!decl || !DECL_USER_ALIGN (decl))) |
18384 | align = 32; |
18385 | |
18386 | /* If TYPE is NULL, we are allocating a stack slot for caller-save |
18387 | register in MODE. We will return the largest alignment of XF |
18388 | and DF. */ |
18389 | if (!type) |
18390 | { |
18391 | if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) |
18392 | align = GET_MODE_ALIGNMENT (DFmode); |
18393 | return align; |
18394 | } |
18395 | |
18396 | /* Don't increase alignment for Intel MCU psABI. */ |
18397 | if (TARGET_IAMCU) |
18398 | return align; |
18399 | |
18400 | /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
18401 | to 16byte boundary. Exact wording is: |
18402 | |
18403 | An array uses the same alignment as its elements, except that a local or |
18404 | global array variable of length at least 16 bytes or |
18405 | a C99 variable-length array variable always has alignment of at least 16 bytes. |
18406 | |
18407 | This was added to allow use of aligned SSE instructions at arrays. This |
18408 | rule is meant for static storage (where compiler cannot do the analysis |
18409 | by itself). We follow it for automatic variables only when convenient. |
18410 | We fully control everything in the function compiled and functions from |
18411 | other unit cannot rely on the alignment. |
18412 | |
18413 | Exclude va_list type. It is the common case of local array where |
18414 | we cannot benefit from the alignment. |
18415 | |
18416 | TODO: Probably one should optimize for size only when var is not escaping. */ |
18417 | if (TARGET_64BIT && optimize_function_for_speed_p (cfun) |
18418 | && TARGET_SSE) |
18419 | { |
18420 | if (AGGREGATE_TYPE_P (type) |
18421 | && (va_list_type_node == NULL_TREE |
18422 | || (TYPE_MAIN_VARIANT (type) |
18423 | != TYPE_MAIN_VARIANT (va_list_type_node))) |
18424 | && TYPE_SIZE (type) |
18425 | && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
18426 | && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128) |
18427 | && align < 128) |
18428 | return 128; |
18429 | } |
18430 | if (TREE_CODE (type) == ARRAY_TYPE) |
18431 | { |
18432 | if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
18433 | return 64; |
18434 | if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
18435 | return 128; |
18436 | } |
18437 | else if (TREE_CODE (type) == COMPLEX_TYPE) |
18438 | { |
18439 | if (TYPE_MODE (type) == DCmode && align < 64) |
18440 | return 64; |
18441 | if ((TYPE_MODE (type) == XCmode |
18442 | || TYPE_MODE (type) == TCmode) && align < 128) |
18443 | return 128; |
18444 | } |
18445 | else if (RECORD_OR_UNION_TYPE_P (type) |
18446 | && TYPE_FIELDS (type)) |
18447 | { |
18448 | if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
18449 | return 64; |
18450 | if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
18451 | return 128; |
18452 | } |
18453 | else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type) |
18454 | || TREE_CODE (type) == INTEGER_TYPE) |
18455 | { |
18456 | |
18457 | if (TYPE_MODE (type) == DFmode && align < 64) |
18458 | return 64; |
18459 | if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
18460 | return 128; |
18461 | } |
18462 | return align; |
18463 | } |
18464 | |
18465 | /* Compute the minimum required alignment for dynamic stack realignment |
18466 | purposes for a local variable, parameter or a stack slot. EXP is |
18467 | the data type or decl itself, MODE is its mode and ALIGN is the |
18468 | alignment that the object would ordinarily have. */ |
18469 | |
18470 | unsigned int |
18471 | ix86_minimum_alignment (tree exp, machine_mode mode, |
18472 | unsigned int align) |
18473 | { |
18474 | tree type, decl; |
18475 | |
18476 | if (exp && DECL_P (exp)) |
18477 | { |
18478 | type = TREE_TYPE (exp); |
18479 | decl = exp; |
18480 | } |
18481 | else |
18482 | { |
18483 | type = exp; |
18484 | decl = NULL; |
18485 | } |
18486 | |
18487 | if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) |
18488 | return align; |
18489 | |
18490 | /* Don't do dynamic stack realignment for long long objects with |
18491 | -mpreferred-stack-boundary=2. */ |
18492 | if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) |
18493 | && (!type || (!TYPE_USER_ALIGN (type) |
18494 | && !TYPE_ATOMIC (strip_array_types (type)))) |
18495 | && (!decl || !DECL_USER_ALIGN (decl))) |
18496 | { |
18497 | gcc_checking_assert (!TARGET_STV); |
18498 | return 32; |
18499 | } |
18500 | |
18501 | return align; |
18502 | } |
18503 | |
18504 | /* Find a location for the static chain incoming to a nested function. |
18505 | This is a register, unless all free registers are used by arguments. */ |
18506 | |
18507 | static rtx |
18508 | ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) |
18509 | { |
18510 | unsigned regno; |
18511 | |
18512 | if (TARGET_64BIT) |
18513 | { |
18514 | /* We always use R10 in 64-bit mode. */ |
18515 | regno = R10_REG; |
18516 | } |
18517 | else |
18518 | { |
18519 | const_tree fntype, fndecl; |
18520 | unsigned int ccvt; |
18521 | |
18522 | /* By default in 32-bit mode we use ECX to pass the static chain. */ |
18523 | regno = CX_REG; |
18524 | |
18525 | if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) |
18526 | { |
18527 | fntype = TREE_TYPE (fndecl_or_type); |
18528 | fndecl = fndecl_or_type; |
18529 | } |
18530 | else |
18531 | { |
18532 | fntype = fndecl_or_type; |
18533 | fndecl = NULL; |
18534 | } |
18535 | |
18536 | ccvt = ix86_get_callcvt (type: fntype); |
18537 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
18538 | { |
18539 | /* Fastcall functions use ecx/edx for arguments, which leaves |
18540 | us with EAX for the static chain. |
18541 | Thiscall functions use ecx for arguments, which also |
18542 | leaves us with EAX for the static chain. */ |
18543 | regno = AX_REG; |
18544 | } |
18545 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
18546 | { |
18547 | /* Thiscall functions use ecx for arguments, which leaves |
18548 | us with EAX and EDX for the static chain. |
18549 | We are using for abi-compatibility EAX. */ |
18550 | regno = AX_REG; |
18551 | } |
18552 | else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3) |
18553 | { |
18554 | /* For regparm 3, we have no free call-clobbered registers in |
18555 | which to store the static chain. In order to implement this, |
18556 | we have the trampoline push the static chain to the stack. |
18557 | However, we can't push a value below the return address when |
18558 | we call the nested function directly, so we have to use an |
18559 | alternate entry point. For this we use ESI, and have the |
18560 | alternate entry point push ESI, so that things appear the |
18561 | same once we're executing the nested function. */ |
18562 | if (incoming_p) |
18563 | { |
18564 | if (fndecl == current_function_decl |
18565 | && !ix86_static_chain_on_stack) |
18566 | { |
18567 | gcc_assert (!reload_completed); |
18568 | ix86_static_chain_on_stack = true; |
18569 | } |
18570 | return gen_frame_mem (SImode, |
18571 | plus_constant (Pmode, |
18572 | arg_pointer_rtx, -8)); |
18573 | } |
18574 | regno = SI_REG; |
18575 | } |
18576 | } |
18577 | |
18578 | return gen_rtx_REG (Pmode, regno); |
18579 | } |
18580 | |
18581 | /* Emit RTL insns to initialize the variable parts of a trampoline. |
18582 | FNDECL is the decl of the target address; M_TRAMP is a MEM for |
18583 | the trampoline, and CHAIN_VALUE is an RTX for the static chain |
18584 | to be passed to the target function. */ |
18585 | |
18586 | static void |
18587 | ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) |
18588 | { |
18589 | rtx mem, fnaddr; |
18590 | int opcode; |
18591 | int offset = 0; |
18592 | bool need_endbr = (flag_cf_protection & CF_BRANCH); |
18593 | |
18594 | fnaddr = XEXP (DECL_RTL (fndecl), 0); |
18595 | |
18596 | if (TARGET_64BIT) |
18597 | { |
18598 | int size; |
18599 | |
18600 | if (need_endbr) |
18601 | { |
18602 | /* Insert ENDBR64. */ |
18603 | mem = adjust_address (m_tramp, SImode, offset); |
18604 | emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); |
18605 | offset += 4; |
18606 | } |
18607 | |
18608 | /* Load the function address to r11. Try to load address using |
18609 | the shorter movl instead of movabs. We may want to support |
18610 | movq for kernel mode, but kernel does not use trampolines at |
18611 | the moment. FNADDR is a 32bit address and may not be in |
18612 | DImode when ptr_mode == SImode. Always use movl in this |
18613 | case. */ |
18614 | if (ptr_mode == SImode |
18615 | || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) |
18616 | { |
18617 | fnaddr = copy_addr_to_reg (fnaddr); |
18618 | |
18619 | mem = adjust_address (m_tramp, HImode, offset); |
18620 | emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); |
18621 | |
18622 | mem = adjust_address (m_tramp, SImode, offset + 2); |
18623 | emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); |
18624 | offset += 6; |
18625 | } |
18626 | else |
18627 | { |
18628 | mem = adjust_address (m_tramp, HImode, offset); |
18629 | emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); |
18630 | |
18631 | mem = adjust_address (m_tramp, DImode, offset + 2); |
18632 | emit_move_insn (mem, fnaddr); |
18633 | offset += 10; |
18634 | } |
18635 | |
18636 | /* Load static chain using movabs to r10. Use the shorter movl |
18637 | instead of movabs when ptr_mode == SImode. */ |
18638 | if (ptr_mode == SImode) |
18639 | { |
18640 | opcode = 0xba41; |
18641 | size = 6; |
18642 | } |
18643 | else |
18644 | { |
18645 | opcode = 0xba49; |
18646 | size = 10; |
18647 | } |
18648 | |
18649 | mem = adjust_address (m_tramp, HImode, offset); |
18650 | emit_move_insn (mem, gen_int_mode (opcode, HImode)); |
18651 | |
18652 | mem = adjust_address (m_tramp, ptr_mode, offset + 2); |
18653 | emit_move_insn (mem, chain_value); |
18654 | offset += size; |
18655 | |
18656 | /* Jump to r11; the last (unused) byte is a nop, only there to |
18657 | pad the write out to a single 32-bit store. */ |
18658 | mem = adjust_address (m_tramp, SImode, offset); |
18659 | emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); |
18660 | offset += 4; |
18661 | } |
18662 | else |
18663 | { |
18664 | rtx disp, chain; |
18665 | |
18666 | /* Depending on the static chain location, either load a register |
18667 | with a constant, or push the constant to the stack. All of the |
18668 | instructions are the same size. */ |
18669 | chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true); |
18670 | if (REG_P (chain)) |
18671 | { |
18672 | switch (REGNO (chain)) |
18673 | { |
18674 | case AX_REG: |
18675 | opcode = 0xb8; break; |
18676 | case CX_REG: |
18677 | opcode = 0xb9; break; |
18678 | default: |
18679 | gcc_unreachable (); |
18680 | } |
18681 | } |
18682 | else |
18683 | opcode = 0x68; |
18684 | |
18685 | if (need_endbr) |
18686 | { |
18687 | /* Insert ENDBR32. */ |
18688 | mem = adjust_address (m_tramp, SImode, offset); |
18689 | emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); |
18690 | offset += 4; |
18691 | } |
18692 | |
18693 | mem = adjust_address (m_tramp, QImode, offset); |
18694 | emit_move_insn (mem, gen_int_mode (opcode, QImode)); |
18695 | |
18696 | mem = adjust_address (m_tramp, SImode, offset + 1); |
18697 | emit_move_insn (mem, chain_value); |
18698 | offset += 5; |
18699 | |
18700 | mem = adjust_address (m_tramp, QImode, offset); |
18701 | emit_move_insn (mem, gen_int_mode (0xe9, QImode)); |
18702 | |
18703 | mem = adjust_address (m_tramp, SImode, offset + 1); |
18704 | |
18705 | /* Compute offset from the end of the jmp to the target function. |
18706 | In the case in which the trampoline stores the static chain on |
18707 | the stack, we need to skip the first insn which pushes the |
18708 | (call-saved) register static chain; this push is 1 byte. */ |
18709 | offset += 5; |
18710 | int skip = MEM_P (chain) ? 1 : 0; |
18711 | /* Skip ENDBR32 at the entry of the target function. */ |
18712 | if (need_endbr |
18713 | && !cgraph_node::get (decl: fndecl)->only_called_directly_p ()) |
18714 | skip += 4; |
18715 | disp = expand_binop (SImode, sub_optab, fnaddr, |
18716 | plus_constant (Pmode, XEXP (m_tramp, 0), |
18717 | offset - skip), |
18718 | NULL_RTX, 1, OPTAB_DIRECT); |
18719 | emit_move_insn (mem, disp); |
18720 | } |
18721 | |
18722 | gcc_assert (offset <= TRAMPOLINE_SIZE); |
18723 | |
18724 | #ifdef HAVE_ENABLE_EXECUTE_STACK |
18725 | #ifdef CHECK_EXECUTE_STACK_ENABLED |
18726 | if (CHECK_EXECUTE_STACK_ENABLED) |
18727 | #endif |
18728 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), |
18729 | LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); |
18730 | #endif |
18731 | } |
18732 | |
18733 | static bool |
18734 | ix86_allocate_stack_slots_for_args (void) |
18735 | { |
18736 | /* Naked functions should not allocate stack slots for arguments. */ |
18737 | return !ix86_function_naked (fn: current_function_decl); |
18738 | } |
18739 | |
18740 | static bool |
18741 | ix86_warn_func_return (tree decl) |
18742 | { |
18743 | /* Naked functions are implemented entirely in assembly, including the |
18744 | return sequence, so suppress warnings about this. */ |
18745 | return !ix86_function_naked (fn: decl); |
18746 | } |
18747 | |
18748 | /* Return the shift count of a vector by scalar shift builtin second argument |
18749 | ARG1. */ |
18750 | static tree |
18751 | ix86_vector_shift_count (tree arg1) |
18752 | { |
18753 | if (tree_fits_uhwi_p (arg1)) |
18754 | return arg1; |
18755 | else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) |
18756 | { |
18757 | /* The count argument is weird, passed in as various 128-bit |
18758 | (or 64-bit) vectors, the low 64 bits from it are the count. */ |
18759 | unsigned char buf[16]; |
18760 | int len = native_encode_expr (arg1, buf, 16); |
18761 | if (len == 0) |
18762 | return NULL_TREE; |
18763 | tree t = native_interpret_expr (uint64_type_node, buf, len); |
18764 | if (t && tree_fits_uhwi_p (t)) |
18765 | return t; |
18766 | } |
18767 | return NULL_TREE; |
18768 | } |
18769 | |
18770 | /* Return true if arg_mask is all ones, ELEMS is elements number of |
18771 | corresponding vector. */ |
18772 | static bool |
18773 | ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask) |
18774 | { |
18775 | if (TREE_CODE (arg_mask) != INTEGER_CST) |
18776 | return false; |
18777 | |
18778 | unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask); |
18779 | if (elems == HOST_BITS_PER_WIDE_INT) |
18780 | return mask == HOST_WIDE_INT_M1U; |
18781 | if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) |
18782 | return false; |
18783 | |
18784 | return true; |
18785 | } |
18786 | |
18787 | static tree |
18788 | ix86_fold_builtin (tree fndecl, int n_args, |
18789 | tree *args, bool ignore ATTRIBUTE_UNUSED) |
18790 | { |
18791 | if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) |
18792 | { |
18793 | enum ix86_builtins fn_code |
18794 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
18795 | enum rtx_code rcode; |
18796 | bool is_vshift; |
18797 | enum tree_code tcode; |
18798 | bool is_scalar; |
18799 | unsigned HOST_WIDE_INT mask; |
18800 | |
18801 | switch (fn_code) |
18802 | { |
18803 | case IX86_BUILTIN_CPU_IS: |
18804 | case IX86_BUILTIN_CPU_SUPPORTS: |
18805 | gcc_assert (n_args == 1); |
18806 | return fold_builtin_cpu (fndecl, args); |
18807 | |
18808 | case IX86_BUILTIN_NANQ: |
18809 | case IX86_BUILTIN_NANSQ: |
18810 | { |
18811 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18812 | const char *str = c_getstr (*args); |
18813 | int quiet = fn_code == IX86_BUILTIN_NANQ; |
18814 | REAL_VALUE_TYPE real; |
18815 | |
18816 | if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) |
18817 | return build_real (type, real); |
18818 | return NULL_TREE; |
18819 | } |
18820 | |
18821 | case IX86_BUILTIN_INFQ: |
18822 | case IX86_BUILTIN_HUGE_VALQ: |
18823 | { |
18824 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18825 | REAL_VALUE_TYPE inf; |
18826 | real_inf (&inf); |
18827 | return build_real (type, inf); |
18828 | } |
18829 | |
18830 | case IX86_BUILTIN_TZCNT16: |
18831 | case IX86_BUILTIN_CTZS: |
18832 | case IX86_BUILTIN_TZCNT32: |
18833 | case IX86_BUILTIN_TZCNT64: |
18834 | gcc_assert (n_args == 1); |
18835 | if (TREE_CODE (args[0]) == INTEGER_CST) |
18836 | { |
18837 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18838 | tree arg = args[0]; |
18839 | if (fn_code == IX86_BUILTIN_TZCNT16 |
18840 | || fn_code == IX86_BUILTIN_CTZS) |
18841 | arg = fold_convert (short_unsigned_type_node, arg); |
18842 | if (integer_zerop (arg)) |
18843 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
18844 | else |
18845 | return fold_const_call (CFN_CTZ, type, arg); |
18846 | } |
18847 | break; |
18848 | |
18849 | case IX86_BUILTIN_LZCNT16: |
18850 | case IX86_BUILTIN_CLZS: |
18851 | case IX86_BUILTIN_LZCNT32: |
18852 | case IX86_BUILTIN_LZCNT64: |
18853 | gcc_assert (n_args == 1); |
18854 | if (TREE_CODE (args[0]) == INTEGER_CST) |
18855 | { |
18856 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); |
18857 | tree arg = args[0]; |
18858 | if (fn_code == IX86_BUILTIN_LZCNT16 |
18859 | || fn_code == IX86_BUILTIN_CLZS) |
18860 | arg = fold_convert (short_unsigned_type_node, arg); |
18861 | if (integer_zerop (arg)) |
18862 | return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); |
18863 | else |
18864 | return fold_const_call (CFN_CLZ, type, arg); |
18865 | } |
18866 | break; |
18867 | |
18868 | case IX86_BUILTIN_BEXTR32: |
18869 | case IX86_BUILTIN_BEXTR64: |
18870 | case IX86_BUILTIN_BEXTRI32: |
18871 | case IX86_BUILTIN_BEXTRI64: |
18872 | gcc_assert (n_args == 2); |
18873 | if (tree_fits_uhwi_p (args[1])) |
18874 | { |
18875 | unsigned HOST_WIDE_INT res = 0; |
18876 | unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); |
18877 | unsigned int start = tree_to_uhwi (args[1]); |
18878 | unsigned int len = (start & 0xff00) >> 8; |
18879 | tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl)); |
18880 | start &= 0xff; |
18881 | if (start >= prec || len == 0) |
18882 | return omit_one_operand (lhs_type, build_zero_cst (lhs_type), |
18883 | args[0]); |
18884 | else if (!tree_fits_uhwi_p (args[0])) |
18885 | break; |
18886 | else |
18887 | res = tree_to_uhwi (args[0]) >> start; |
18888 | if (len > prec) |
18889 | len = prec; |
18890 | if (len < HOST_BITS_PER_WIDE_INT) |
18891 | res &= (HOST_WIDE_INT_1U << len) - 1; |
18892 | return build_int_cstu (type: lhs_type, res); |
18893 | } |
18894 | break; |
18895 | |
18896 | case IX86_BUILTIN_BZHI32: |
18897 | case IX86_BUILTIN_BZHI64: |
18898 | gcc_assert (n_args == 2); |
18899 | if (tree_fits_uhwi_p (args[1])) |
18900 | { |
18901 | unsigned int idx = tree_to_uhwi (args[1]) & 0xff; |
18902 | tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl)); |
18903 | if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) |
18904 | return args[0]; |
18905 | if (idx == 0) |
18906 | return omit_one_operand (lhs_type, build_zero_cst (lhs_type), |
18907 | args[0]); |
18908 | if (!tree_fits_uhwi_p (args[0])) |
18909 | break; |
18910 | unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); |
18911 | res &= ~(HOST_WIDE_INT_M1U << idx); |
18912 | return build_int_cstu (type: lhs_type, res); |
18913 | } |
18914 | break; |
18915 | |
18916 | case IX86_BUILTIN_PDEP32: |
18917 | case IX86_BUILTIN_PDEP64: |
18918 | gcc_assert (n_args == 2); |
18919 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
18920 | { |
18921 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
18922 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
18923 | unsigned HOST_WIDE_INT res = 0; |
18924 | unsigned HOST_WIDE_INT m, k = 1; |
18925 | for (m = 1; m; m <<= 1) |
18926 | if ((mask & m) != 0) |
18927 | { |
18928 | if ((src & k) != 0) |
18929 | res |= m; |
18930 | k <<= 1; |
18931 | } |
18932 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18933 | } |
18934 | break; |
18935 | |
18936 | case IX86_BUILTIN_PEXT32: |
18937 | case IX86_BUILTIN_PEXT64: |
18938 | gcc_assert (n_args == 2); |
18939 | if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) |
18940 | { |
18941 | unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); |
18942 | unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); |
18943 | unsigned HOST_WIDE_INT res = 0; |
18944 | unsigned HOST_WIDE_INT m, k = 1; |
18945 | for (m = 1; m; m <<= 1) |
18946 | if ((mask & m) != 0) |
18947 | { |
18948 | if ((src & m) != 0) |
18949 | res |= k; |
18950 | k <<= 1; |
18951 | } |
18952 | return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18953 | } |
18954 | break; |
18955 | |
18956 | case IX86_BUILTIN_MOVMSKPS: |
18957 | case IX86_BUILTIN_PMOVMSKB: |
18958 | case IX86_BUILTIN_MOVMSKPD: |
18959 | case IX86_BUILTIN_PMOVMSKB128: |
18960 | case IX86_BUILTIN_MOVMSKPD256: |
18961 | case IX86_BUILTIN_MOVMSKPS256: |
18962 | case IX86_BUILTIN_PMOVMSKB256: |
18963 | gcc_assert (n_args == 1); |
18964 | if (TREE_CODE (args[0]) == VECTOR_CST) |
18965 | { |
18966 | HOST_WIDE_INT res = 0; |
18967 | for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) |
18968 | { |
18969 | tree e = VECTOR_CST_ELT (args[0], i); |
18970 | if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) |
18971 | { |
18972 | if (wi::neg_p (x: wi::to_wide (t: e))) |
18973 | res |= HOST_WIDE_INT_1 << i; |
18974 | } |
18975 | else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) |
18976 | { |
18977 | if (TREE_REAL_CST (e).sign) |
18978 | res |= HOST_WIDE_INT_1 << i; |
18979 | } |
18980 | else |
18981 | return NULL_TREE; |
18982 | } |
18983 | return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); |
18984 | } |
18985 | break; |
18986 | |
18987 | case IX86_BUILTIN_PSLLD: |
18988 | case IX86_BUILTIN_PSLLD128: |
18989 | case IX86_BUILTIN_PSLLD128_MASK: |
18990 | case IX86_BUILTIN_PSLLD256: |
18991 | case IX86_BUILTIN_PSLLD256_MASK: |
18992 | case IX86_BUILTIN_PSLLD512: |
18993 | case IX86_BUILTIN_PSLLDI: |
18994 | case IX86_BUILTIN_PSLLDI128: |
18995 | case IX86_BUILTIN_PSLLDI128_MASK: |
18996 | case IX86_BUILTIN_PSLLDI256: |
18997 | case IX86_BUILTIN_PSLLDI256_MASK: |
18998 | case IX86_BUILTIN_PSLLDI512: |
18999 | case IX86_BUILTIN_PSLLQ: |
19000 | case IX86_BUILTIN_PSLLQ128: |
19001 | case IX86_BUILTIN_PSLLQ128_MASK: |
19002 | case IX86_BUILTIN_PSLLQ256: |
19003 | case IX86_BUILTIN_PSLLQ256_MASK: |
19004 | case IX86_BUILTIN_PSLLQ512: |
19005 | case IX86_BUILTIN_PSLLQI: |
19006 | case IX86_BUILTIN_PSLLQI128: |
19007 | case IX86_BUILTIN_PSLLQI128_MASK: |
19008 | case IX86_BUILTIN_PSLLQI256: |
19009 | case IX86_BUILTIN_PSLLQI256_MASK: |
19010 | case IX86_BUILTIN_PSLLQI512: |
19011 | case IX86_BUILTIN_PSLLW: |
19012 | case IX86_BUILTIN_PSLLW128: |
19013 | case IX86_BUILTIN_PSLLW128_MASK: |
19014 | case IX86_BUILTIN_PSLLW256: |
19015 | case IX86_BUILTIN_PSLLW256_MASK: |
19016 | case IX86_BUILTIN_PSLLW512_MASK: |
19017 | case IX86_BUILTIN_PSLLWI: |
19018 | case IX86_BUILTIN_PSLLWI128: |
19019 | case IX86_BUILTIN_PSLLWI128_MASK: |
19020 | case IX86_BUILTIN_PSLLWI256: |
19021 | case IX86_BUILTIN_PSLLWI256_MASK: |
19022 | case IX86_BUILTIN_PSLLWI512_MASK: |
19023 | rcode = ASHIFT; |
19024 | is_vshift = false; |
19025 | goto do_shift; |
19026 | case IX86_BUILTIN_PSRAD: |
19027 | case IX86_BUILTIN_PSRAD128: |
19028 | case IX86_BUILTIN_PSRAD128_MASK: |
19029 | case IX86_BUILTIN_PSRAD256: |
19030 | case IX86_BUILTIN_PSRAD256_MASK: |
19031 | case IX86_BUILTIN_PSRAD512: |
19032 | case IX86_BUILTIN_PSRADI: |
19033 | case IX86_BUILTIN_PSRADI128: |
19034 | case IX86_BUILTIN_PSRADI128_MASK: |
19035 | case IX86_BUILTIN_PSRADI256: |
19036 | case IX86_BUILTIN_PSRADI256_MASK: |
19037 | case IX86_BUILTIN_PSRADI512: |
19038 | case IX86_BUILTIN_PSRAQ128_MASK: |
19039 | case IX86_BUILTIN_PSRAQ256_MASK: |
19040 | case IX86_BUILTIN_PSRAQ512: |
19041 | case IX86_BUILTIN_PSRAQI128_MASK: |
19042 | case IX86_BUILTIN_PSRAQI256_MASK: |
19043 | case IX86_BUILTIN_PSRAQI512: |
19044 | case IX86_BUILTIN_PSRAW: |
19045 | case IX86_BUILTIN_PSRAW128: |
19046 | case IX86_BUILTIN_PSRAW128_MASK: |
19047 | case IX86_BUILTIN_PSRAW256: |
19048 | case IX86_BUILTIN_PSRAW256_MASK: |
19049 | case IX86_BUILTIN_PSRAW512: |
19050 | case IX86_BUILTIN_PSRAWI: |
19051 | case IX86_BUILTIN_PSRAWI128: |
19052 | case IX86_BUILTIN_PSRAWI128_MASK: |
19053 | case IX86_BUILTIN_PSRAWI256: |
19054 | case IX86_BUILTIN_PSRAWI256_MASK: |
19055 | case IX86_BUILTIN_PSRAWI512: |
19056 | rcode = ASHIFTRT; |
19057 | is_vshift = false; |
19058 | goto do_shift; |
19059 | case IX86_BUILTIN_PSRLD: |
19060 | case IX86_BUILTIN_PSRLD128: |
19061 | case IX86_BUILTIN_PSRLD128_MASK: |
19062 | case IX86_BUILTIN_PSRLD256: |
19063 | case IX86_BUILTIN_PSRLD256_MASK: |
19064 | case IX86_BUILTIN_PSRLD512: |
19065 | case IX86_BUILTIN_PSRLDI: |
19066 | case IX86_BUILTIN_PSRLDI128: |
19067 | case IX86_BUILTIN_PSRLDI128_MASK: |
19068 | case IX86_BUILTIN_PSRLDI256: |
19069 | case IX86_BUILTIN_PSRLDI256_MASK: |
19070 | case IX86_BUILTIN_PSRLDI512: |
19071 | case IX86_BUILTIN_PSRLQ: |
19072 | case IX86_BUILTIN_PSRLQ128: |
19073 | case IX86_BUILTIN_PSRLQ128_MASK: |
19074 | case IX86_BUILTIN_PSRLQ256: |
19075 | case IX86_BUILTIN_PSRLQ256_MASK: |
19076 | case IX86_BUILTIN_PSRLQ512: |
19077 | case IX86_BUILTIN_PSRLQI: |
19078 | case IX86_BUILTIN_PSRLQI128: |
19079 | case IX86_BUILTIN_PSRLQI128_MASK: |
19080 | case IX86_BUILTIN_PSRLQI256: |
19081 | case IX86_BUILTIN_PSRLQI256_MASK: |
19082 | case IX86_BUILTIN_PSRLQI512: |
19083 | case IX86_BUILTIN_PSRLW: |
19084 | case IX86_BUILTIN_PSRLW128: |
19085 | case IX86_BUILTIN_PSRLW128_MASK: |
19086 | case IX86_BUILTIN_PSRLW256: |
19087 | case IX86_BUILTIN_PSRLW256_MASK: |
19088 | case IX86_BUILTIN_PSRLW512: |
19089 | case IX86_BUILTIN_PSRLWI: |
19090 | case IX86_BUILTIN_PSRLWI128: |
19091 | case IX86_BUILTIN_PSRLWI128_MASK: |
19092 | case IX86_BUILTIN_PSRLWI256: |
19093 | case IX86_BUILTIN_PSRLWI256_MASK: |
19094 | case IX86_BUILTIN_PSRLWI512: |
19095 | rcode = LSHIFTRT; |
19096 | is_vshift = false; |
19097 | goto do_shift; |
19098 | case IX86_BUILTIN_PSLLVV16HI: |
19099 | case IX86_BUILTIN_PSLLVV16SI: |
19100 | case IX86_BUILTIN_PSLLVV2DI: |
19101 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
19102 | case IX86_BUILTIN_PSLLVV32HI: |
19103 | case IX86_BUILTIN_PSLLVV4DI: |
19104 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
19105 | case IX86_BUILTIN_PSLLVV4SI: |
19106 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
19107 | case IX86_BUILTIN_PSLLVV8DI: |
19108 | case IX86_BUILTIN_PSLLVV8HI: |
19109 | case IX86_BUILTIN_PSLLVV8SI: |
19110 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
19111 | rcode = ASHIFT; |
19112 | is_vshift = true; |
19113 | goto do_shift; |
19114 | case IX86_BUILTIN_PSRAVQ128: |
19115 | case IX86_BUILTIN_PSRAVQ256: |
19116 | case IX86_BUILTIN_PSRAVV16HI: |
19117 | case IX86_BUILTIN_PSRAVV16SI: |
19118 | case IX86_BUILTIN_PSRAVV32HI: |
19119 | case IX86_BUILTIN_PSRAVV4SI: |
19120 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
19121 | case IX86_BUILTIN_PSRAVV8DI: |
19122 | case IX86_BUILTIN_PSRAVV8HI: |
19123 | case IX86_BUILTIN_PSRAVV8SI: |
19124 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
19125 | rcode = ASHIFTRT; |
19126 | is_vshift = true; |
19127 | goto do_shift; |
19128 | case IX86_BUILTIN_PSRLVV16HI: |
19129 | case IX86_BUILTIN_PSRLVV16SI: |
19130 | case IX86_BUILTIN_PSRLVV2DI: |
19131 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
19132 | case IX86_BUILTIN_PSRLVV32HI: |
19133 | case IX86_BUILTIN_PSRLVV4DI: |
19134 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
19135 | case IX86_BUILTIN_PSRLVV4SI: |
19136 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
19137 | case IX86_BUILTIN_PSRLVV8DI: |
19138 | case IX86_BUILTIN_PSRLVV8HI: |
19139 | case IX86_BUILTIN_PSRLVV8SI: |
19140 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
19141 | rcode = LSHIFTRT; |
19142 | is_vshift = true; |
19143 | goto do_shift; |
19144 | |
19145 | do_shift: |
19146 | gcc_assert (n_args >= 2); |
19147 | if (TREE_CODE (args[0]) != VECTOR_CST) |
19148 | break; |
19149 | mask = HOST_WIDE_INT_M1U; |
19150 | if (n_args > 2) |
19151 | { |
19152 | /* This is masked shift. */ |
19153 | if (!tree_fits_uhwi_p (args[n_args - 1]) |
19154 | || TREE_SIDE_EFFECTS (args[n_args - 2])) |
19155 | break; |
19156 | mask = tree_to_uhwi (args[n_args - 1]); |
19157 | unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
19158 | mask |= HOST_WIDE_INT_M1U << elems; |
19159 | if (mask != HOST_WIDE_INT_M1U |
19160 | && TREE_CODE (args[n_args - 2]) != VECTOR_CST) |
19161 | break; |
19162 | if (mask == (HOST_WIDE_INT_M1U << elems)) |
19163 | return args[n_args - 2]; |
19164 | } |
19165 | if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) |
19166 | break; |
19167 | if (tree tem = (is_vshift ? integer_one_node |
19168 | : ix86_vector_shift_count (arg1: args[1]))) |
19169 | { |
19170 | unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); |
19171 | unsigned HOST_WIDE_INT prec |
19172 | = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); |
19173 | if (count == 0 && mask == HOST_WIDE_INT_M1U) |
19174 | return args[0]; |
19175 | if (count >= prec) |
19176 | { |
19177 | if (rcode == ASHIFTRT) |
19178 | count = prec - 1; |
19179 | else if (mask == HOST_WIDE_INT_M1U) |
19180 | return build_zero_cst (TREE_TYPE (args[0])); |
19181 | } |
19182 | tree countt = NULL_TREE; |
19183 | if (!is_vshift) |
19184 | { |
19185 | if (count >= prec) |
19186 | countt = integer_zero_node; |
19187 | else |
19188 | countt = build_int_cst (integer_type_node, count); |
19189 | } |
19190 | tree_vector_builder builder; |
19191 | if (mask != HOST_WIDE_INT_M1U || is_vshift) |
19192 | builder.new_vector (TREE_TYPE (args[0]), |
19193 | npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), |
19194 | nelts_per_pattern: 1); |
19195 | else |
19196 | builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0], |
19197 | allow_stepped_p: false); |
19198 | unsigned int cnt = builder.encoded_nelts (); |
19199 | for (unsigned int i = 0; i < cnt; ++i) |
19200 | { |
19201 | tree elt = VECTOR_CST_ELT (args[0], i); |
19202 | if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) |
19203 | return NULL_TREE; |
19204 | tree type = TREE_TYPE (elt); |
19205 | if (rcode == LSHIFTRT) |
19206 | elt = fold_convert (unsigned_type_for (type), elt); |
19207 | if (is_vshift) |
19208 | { |
19209 | countt = VECTOR_CST_ELT (args[1], i); |
19210 | if (TREE_CODE (countt) != INTEGER_CST |
19211 | || TREE_OVERFLOW (countt)) |
19212 | return NULL_TREE; |
19213 | if (wi::neg_p (x: wi::to_wide (t: countt)) |
19214 | || wi::to_widest (t: countt) >= prec) |
19215 | { |
19216 | if (rcode == ASHIFTRT) |
19217 | countt = build_int_cst (TREE_TYPE (countt), |
19218 | prec - 1); |
19219 | else |
19220 | { |
19221 | elt = build_zero_cst (TREE_TYPE (elt)); |
19222 | countt = build_zero_cst (TREE_TYPE (countt)); |
19223 | } |
19224 | } |
19225 | } |
19226 | else if (count >= prec) |
19227 | elt = build_zero_cst (TREE_TYPE (elt)); |
19228 | elt = const_binop (rcode == ASHIFT |
19229 | ? LSHIFT_EXPR : RSHIFT_EXPR, |
19230 | TREE_TYPE (elt), elt, countt); |
19231 | if (!elt || TREE_CODE (elt) != INTEGER_CST) |
19232 | return NULL_TREE; |
19233 | if (rcode == LSHIFTRT) |
19234 | elt = fold_convert (type, elt); |
19235 | if ((mask & (HOST_WIDE_INT_1U << i)) == 0) |
19236 | { |
19237 | elt = VECTOR_CST_ELT (args[n_args - 2], i); |
19238 | if (TREE_CODE (elt) != INTEGER_CST |
19239 | || TREE_OVERFLOW (elt)) |
19240 | return NULL_TREE; |
19241 | } |
19242 | builder.quick_push (obj: elt); |
19243 | } |
19244 | return builder.build (); |
19245 | } |
19246 | break; |
19247 | |
19248 | case IX86_BUILTIN_MINSS: |
19249 | case IX86_BUILTIN_MINSH_MASK: |
19250 | tcode = LT_EXPR; |
19251 | is_scalar = true; |
19252 | goto do_minmax; |
19253 | |
19254 | case IX86_BUILTIN_MAXSS: |
19255 | case IX86_BUILTIN_MAXSH_MASK: |
19256 | tcode = GT_EXPR; |
19257 | is_scalar = true; |
19258 | goto do_minmax; |
19259 | |
19260 | case IX86_BUILTIN_MINPS: |
19261 | case IX86_BUILTIN_MINPD: |
19262 | case IX86_BUILTIN_MINPS256: |
19263 | case IX86_BUILTIN_MINPD256: |
19264 | case IX86_BUILTIN_MINPS512: |
19265 | case IX86_BUILTIN_MINPD512: |
19266 | case IX86_BUILTIN_MINPS128_MASK: |
19267 | case IX86_BUILTIN_MINPD128_MASK: |
19268 | case IX86_BUILTIN_MINPS256_MASK: |
19269 | case IX86_BUILTIN_MINPD256_MASK: |
19270 | case IX86_BUILTIN_MINPH128_MASK: |
19271 | case IX86_BUILTIN_MINPH256_MASK: |
19272 | case IX86_BUILTIN_MINPH512_MASK: |
19273 | tcode = LT_EXPR; |
19274 | is_scalar = false; |
19275 | goto do_minmax; |
19276 | |
19277 | case IX86_BUILTIN_MAXPS: |
19278 | case IX86_BUILTIN_MAXPD: |
19279 | case IX86_BUILTIN_MAXPS256: |
19280 | case IX86_BUILTIN_MAXPD256: |
19281 | case IX86_BUILTIN_MAXPS512: |
19282 | case IX86_BUILTIN_MAXPD512: |
19283 | case IX86_BUILTIN_MAXPS128_MASK: |
19284 | case IX86_BUILTIN_MAXPD128_MASK: |
19285 | case IX86_BUILTIN_MAXPS256_MASK: |
19286 | case IX86_BUILTIN_MAXPD256_MASK: |
19287 | case IX86_BUILTIN_MAXPH128_MASK: |
19288 | case IX86_BUILTIN_MAXPH256_MASK: |
19289 | case IX86_BUILTIN_MAXPH512_MASK: |
19290 | tcode = GT_EXPR; |
19291 | is_scalar = false; |
19292 | do_minmax: |
19293 | gcc_assert (n_args >= 2); |
19294 | if (TREE_CODE (args[0]) != VECTOR_CST |
19295 | || TREE_CODE (args[1]) != VECTOR_CST) |
19296 | break; |
19297 | mask = HOST_WIDE_INT_M1U; |
19298 | if (n_args > 2) |
19299 | { |
19300 | gcc_assert (n_args >= 4); |
19301 | /* This is masked minmax. */ |
19302 | if (TREE_CODE (args[3]) != INTEGER_CST |
19303 | || TREE_SIDE_EFFECTS (args[2])) |
19304 | break; |
19305 | mask = TREE_INT_CST_LOW (args[3]); |
19306 | unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
19307 | mask |= HOST_WIDE_INT_M1U << elems; |
19308 | if (mask != HOST_WIDE_INT_M1U |
19309 | && TREE_CODE (args[2]) != VECTOR_CST) |
19310 | break; |
19311 | if (n_args >= 5) |
19312 | { |
19313 | if (!tree_fits_uhwi_p (args[4])) |
19314 | break; |
19315 | if (tree_to_uhwi (args[4]) != 4 |
19316 | && tree_to_uhwi (args[4]) != 8) |
19317 | break; |
19318 | } |
19319 | if (mask == (HOST_WIDE_INT_M1U << elems)) |
19320 | return args[2]; |
19321 | } |
19322 | /* Punt on NaNs, unless exceptions are disabled. */ |
19323 | if (HONOR_NANS (args[0]) |
19324 | && (n_args < 5 || tree_to_uhwi (args[4]) != 8)) |
19325 | for (int i = 0; i < 2; ++i) |
19326 | { |
19327 | unsigned count = vector_cst_encoded_nelts (t: args[i]); |
19328 | for (unsigned j = 0; j < count; ++j) |
19329 | if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j))) |
19330 | return NULL_TREE; |
19331 | } |
19332 | { |
19333 | tree res = const_binop (tcode, |
19334 | truth_type_for (TREE_TYPE (args[0])), |
19335 | args[0], args[1]); |
19336 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
19337 | break; |
19338 | res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res, |
19339 | args[0], args[1]); |
19340 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
19341 | break; |
19342 | if (mask != HOST_WIDE_INT_M1U) |
19343 | { |
19344 | unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
19345 | vec_perm_builder sel (nelts, nelts, 1); |
19346 | for (unsigned int i = 0; i < nelts; i++) |
19347 | if (mask & (HOST_WIDE_INT_1U << i)) |
19348 | sel.quick_push (obj: i); |
19349 | else |
19350 | sel.quick_push (obj: nelts + i); |
19351 | vec_perm_indices indices (sel, 2, nelts); |
19352 | res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2], |
19353 | indices); |
19354 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
19355 | break; |
19356 | } |
19357 | if (is_scalar) |
19358 | { |
19359 | unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); |
19360 | vec_perm_builder sel (nelts, nelts, 1); |
19361 | sel.quick_push (obj: 0); |
19362 | for (unsigned int i = 1; i < nelts; i++) |
19363 | sel.quick_push (obj: nelts + i); |
19364 | vec_perm_indices indices (sel, 2, nelts); |
19365 | res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0], |
19366 | indices); |
19367 | if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST) |
19368 | break; |
19369 | } |
19370 | return res; |
19371 | } |
19372 | |
19373 | default: |
19374 | break; |
19375 | } |
19376 | } |
19377 | |
19378 | #ifdef SUBTARGET_FOLD_BUILTIN |
19379 | return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); |
19380 | #endif |
19381 | |
19382 | return NULL_TREE; |
19383 | } |
19384 | |
19385 | /* Fold a MD builtin (use ix86_fold_builtin for folding into |
19386 | constant) in GIMPLE. */ |
19387 | |
19388 | bool |
19389 | ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) |
19390 | { |
19391 | gimple *stmt = gsi_stmt (i: *gsi), *g; |
19392 | gimple_seq stmts = NULL; |
19393 | tree fndecl = gimple_call_fndecl (gs: stmt); |
19394 | gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); |
19395 | int n_args = gimple_call_num_args (gs: stmt); |
19396 | enum ix86_builtins fn_code |
19397 | = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl); |
19398 | tree decl = NULL_TREE; |
19399 | tree arg0, arg1, arg2; |
19400 | enum rtx_code rcode; |
19401 | enum tree_code tcode; |
19402 | unsigned HOST_WIDE_INT count; |
19403 | bool is_vshift; |
19404 | unsigned HOST_WIDE_INT elems; |
19405 | location_t loc; |
19406 | |
19407 | /* Don't fold when there's isa mismatch. */ |
19408 | if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL)) |
19409 | return false; |
19410 | |
19411 | switch (fn_code) |
19412 | { |
19413 | case IX86_BUILTIN_TZCNT32: |
19414 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ); |
19415 | goto fold_tzcnt_lzcnt; |
19416 | |
19417 | case IX86_BUILTIN_TZCNT64: |
19418 | decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL); |
19419 | goto fold_tzcnt_lzcnt; |
19420 | |
19421 | case IX86_BUILTIN_LZCNT32: |
19422 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ); |
19423 | goto fold_tzcnt_lzcnt; |
19424 | |
19425 | case IX86_BUILTIN_LZCNT64: |
19426 | decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL); |
19427 | goto fold_tzcnt_lzcnt; |
19428 | |
19429 | fold_tzcnt_lzcnt: |
19430 | gcc_assert (n_args == 1); |
19431 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19432 | if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt)) |
19433 | { |
19434 | int prec = TYPE_PRECISION (TREE_TYPE (arg0)); |
19435 | /* If arg0 is provably non-zero, optimize into generic |
19436 | __builtin_c[tl]z{,ll} function the middle-end handles |
19437 | better. */ |
19438 | if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec))) |
19439 | return false; |
19440 | |
19441 | loc = gimple_location (g: stmt); |
19442 | g = gimple_build_call (decl, 1, arg0); |
19443 | gimple_set_location (g, location: loc); |
19444 | tree lhs = make_ssa_name (integer_type_node); |
19445 | gimple_call_set_lhs (gs: g, lhs); |
19446 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
19447 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs); |
19448 | gimple_set_location (g, location: loc); |
19449 | gsi_replace (gsi, g, false); |
19450 | return true; |
19451 | } |
19452 | break; |
19453 | |
19454 | case IX86_BUILTIN_BZHI32: |
19455 | case IX86_BUILTIN_BZHI64: |
19456 | gcc_assert (n_args == 2); |
19457 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19458 | if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt)) |
19459 | { |
19460 | unsigned int idx = tree_to_uhwi (arg1) & 0xff; |
19461 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19462 | if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) |
19463 | break; |
19464 | loc = gimple_location (g: stmt); |
19465 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19466 | gimple_set_location (g, location: loc); |
19467 | gsi_replace (gsi, g, false); |
19468 | return true; |
19469 | } |
19470 | break; |
19471 | |
19472 | case IX86_BUILTIN_PDEP32: |
19473 | case IX86_BUILTIN_PDEP64: |
19474 | case IX86_BUILTIN_PEXT32: |
19475 | case IX86_BUILTIN_PEXT64: |
19476 | gcc_assert (n_args == 2); |
19477 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19478 | if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt)) |
19479 | { |
19480 | loc = gimple_location (g: stmt); |
19481 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19482 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19483 | gimple_set_location (g, location: loc); |
19484 | gsi_replace (gsi, g, false); |
19485 | return true; |
19486 | } |
19487 | break; |
19488 | |
19489 | case IX86_BUILTIN_PBLENDVB256: |
19490 | case IX86_BUILTIN_BLENDVPS256: |
19491 | case IX86_BUILTIN_BLENDVPD256: |
19492 | /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower |
19493 | to scalar operations and not combined back. */ |
19494 | if (!TARGET_AVX2) |
19495 | break; |
19496 | |
19497 | /* FALLTHRU. */ |
19498 | case IX86_BUILTIN_BLENDVPD: |
19499 | /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2, |
19500 | w/o sse4.2, it's veclowered to scalar operations and |
19501 | not combined back. */ |
19502 | if (!TARGET_SSE4_2) |
19503 | break; |
19504 | /* FALLTHRU. */ |
19505 | case IX86_BUILTIN_PBLENDVB128: |
19506 | case IX86_BUILTIN_BLENDVPS: |
19507 | gcc_assert (n_args == 3); |
19508 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19509 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19510 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
19511 | if (gimple_call_lhs (gs: stmt)) |
19512 | { |
19513 | loc = gimple_location (g: stmt); |
19514 | tree type = TREE_TYPE (arg2); |
19515 | if (VECTOR_FLOAT_TYPE_P (type)) |
19516 | { |
19517 | tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode |
19518 | ? intSI_type_node : intDI_type_node; |
19519 | type = get_same_sized_vectype (itype, type); |
19520 | } |
19521 | else |
19522 | type = signed_type_for (type); |
19523 | arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2); |
19524 | tree zero_vec = build_zero_cst (type); |
19525 | tree cmp_type = truth_type_for (type); |
19526 | tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec); |
19527 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19528 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19529 | VEC_COND_EXPR, cmp, |
19530 | arg1, arg0); |
19531 | gimple_set_location (g, location: loc); |
19532 | gsi_replace (gsi, g, false); |
19533 | } |
19534 | else |
19535 | gsi_replace (gsi, gimple_build_nop (), false); |
19536 | return true; |
19537 | |
19538 | |
19539 | case IX86_BUILTIN_PCMPEQB128: |
19540 | case IX86_BUILTIN_PCMPEQW128: |
19541 | case IX86_BUILTIN_PCMPEQD128: |
19542 | case IX86_BUILTIN_PCMPEQQ: |
19543 | case IX86_BUILTIN_PCMPEQB256: |
19544 | case IX86_BUILTIN_PCMPEQW256: |
19545 | case IX86_BUILTIN_PCMPEQD256: |
19546 | case IX86_BUILTIN_PCMPEQQ256: |
19547 | tcode = EQ_EXPR; |
19548 | goto do_cmp; |
19549 | |
19550 | case IX86_BUILTIN_PCMPGTB128: |
19551 | case IX86_BUILTIN_PCMPGTW128: |
19552 | case IX86_BUILTIN_PCMPGTD128: |
19553 | case IX86_BUILTIN_PCMPGTQ: |
19554 | case IX86_BUILTIN_PCMPGTB256: |
19555 | case IX86_BUILTIN_PCMPGTW256: |
19556 | case IX86_BUILTIN_PCMPGTD256: |
19557 | case IX86_BUILTIN_PCMPGTQ256: |
19558 | tcode = GT_EXPR; |
19559 | |
19560 | do_cmp: |
19561 | gcc_assert (n_args == 2); |
19562 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19563 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19564 | if (gimple_call_lhs (gs: stmt)) |
19565 | { |
19566 | loc = gimple_location (g: stmt); |
19567 | tree type = TREE_TYPE (arg0); |
19568 | tree zero_vec = build_zero_cst (type); |
19569 | tree minus_one_vec = build_minus_one_cst (type); |
19570 | tree cmp_type = truth_type_for (type); |
19571 | tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1); |
19572 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19573 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19574 | VEC_COND_EXPR, cmp, |
19575 | minus_one_vec, zero_vec); |
19576 | gimple_set_location (g, location: loc); |
19577 | gsi_replace (gsi, g, false); |
19578 | } |
19579 | else |
19580 | gsi_replace (gsi, gimple_build_nop (), false); |
19581 | return true; |
19582 | |
19583 | case IX86_BUILTIN_PSLLD: |
19584 | case IX86_BUILTIN_PSLLD128: |
19585 | case IX86_BUILTIN_PSLLD128_MASK: |
19586 | case IX86_BUILTIN_PSLLD256: |
19587 | case IX86_BUILTIN_PSLLD256_MASK: |
19588 | case IX86_BUILTIN_PSLLD512: |
19589 | case IX86_BUILTIN_PSLLDI: |
19590 | case IX86_BUILTIN_PSLLDI128: |
19591 | case IX86_BUILTIN_PSLLDI128_MASK: |
19592 | case IX86_BUILTIN_PSLLDI256: |
19593 | case IX86_BUILTIN_PSLLDI256_MASK: |
19594 | case IX86_BUILTIN_PSLLDI512: |
19595 | case IX86_BUILTIN_PSLLQ: |
19596 | case IX86_BUILTIN_PSLLQ128: |
19597 | case IX86_BUILTIN_PSLLQ128_MASK: |
19598 | case IX86_BUILTIN_PSLLQ256: |
19599 | case IX86_BUILTIN_PSLLQ256_MASK: |
19600 | case IX86_BUILTIN_PSLLQ512: |
19601 | case IX86_BUILTIN_PSLLQI: |
19602 | case IX86_BUILTIN_PSLLQI128: |
19603 | case IX86_BUILTIN_PSLLQI128_MASK: |
19604 | case IX86_BUILTIN_PSLLQI256: |
19605 | case IX86_BUILTIN_PSLLQI256_MASK: |
19606 | case IX86_BUILTIN_PSLLQI512: |
19607 | case IX86_BUILTIN_PSLLW: |
19608 | case IX86_BUILTIN_PSLLW128: |
19609 | case IX86_BUILTIN_PSLLW128_MASK: |
19610 | case IX86_BUILTIN_PSLLW256: |
19611 | case IX86_BUILTIN_PSLLW256_MASK: |
19612 | case IX86_BUILTIN_PSLLW512_MASK: |
19613 | case IX86_BUILTIN_PSLLWI: |
19614 | case IX86_BUILTIN_PSLLWI128: |
19615 | case IX86_BUILTIN_PSLLWI128_MASK: |
19616 | case IX86_BUILTIN_PSLLWI256: |
19617 | case IX86_BUILTIN_PSLLWI256_MASK: |
19618 | case IX86_BUILTIN_PSLLWI512_MASK: |
19619 | rcode = ASHIFT; |
19620 | is_vshift = false; |
19621 | goto do_shift; |
19622 | case IX86_BUILTIN_PSRAD: |
19623 | case IX86_BUILTIN_PSRAD128: |
19624 | case IX86_BUILTIN_PSRAD128_MASK: |
19625 | case IX86_BUILTIN_PSRAD256: |
19626 | case IX86_BUILTIN_PSRAD256_MASK: |
19627 | case IX86_BUILTIN_PSRAD512: |
19628 | case IX86_BUILTIN_PSRADI: |
19629 | case IX86_BUILTIN_PSRADI128: |
19630 | case IX86_BUILTIN_PSRADI128_MASK: |
19631 | case IX86_BUILTIN_PSRADI256: |
19632 | case IX86_BUILTIN_PSRADI256_MASK: |
19633 | case IX86_BUILTIN_PSRADI512: |
19634 | case IX86_BUILTIN_PSRAQ128_MASK: |
19635 | case IX86_BUILTIN_PSRAQ256_MASK: |
19636 | case IX86_BUILTIN_PSRAQ512: |
19637 | case IX86_BUILTIN_PSRAQI128_MASK: |
19638 | case IX86_BUILTIN_PSRAQI256_MASK: |
19639 | case IX86_BUILTIN_PSRAQI512: |
19640 | case IX86_BUILTIN_PSRAW: |
19641 | case IX86_BUILTIN_PSRAW128: |
19642 | case IX86_BUILTIN_PSRAW128_MASK: |
19643 | case IX86_BUILTIN_PSRAW256: |
19644 | case IX86_BUILTIN_PSRAW256_MASK: |
19645 | case IX86_BUILTIN_PSRAW512: |
19646 | case IX86_BUILTIN_PSRAWI: |
19647 | case IX86_BUILTIN_PSRAWI128: |
19648 | case IX86_BUILTIN_PSRAWI128_MASK: |
19649 | case IX86_BUILTIN_PSRAWI256: |
19650 | case IX86_BUILTIN_PSRAWI256_MASK: |
19651 | case IX86_BUILTIN_PSRAWI512: |
19652 | rcode = ASHIFTRT; |
19653 | is_vshift = false; |
19654 | goto do_shift; |
19655 | case IX86_BUILTIN_PSRLD: |
19656 | case IX86_BUILTIN_PSRLD128: |
19657 | case IX86_BUILTIN_PSRLD128_MASK: |
19658 | case IX86_BUILTIN_PSRLD256: |
19659 | case IX86_BUILTIN_PSRLD256_MASK: |
19660 | case IX86_BUILTIN_PSRLD512: |
19661 | case IX86_BUILTIN_PSRLDI: |
19662 | case IX86_BUILTIN_PSRLDI128: |
19663 | case IX86_BUILTIN_PSRLDI128_MASK: |
19664 | case IX86_BUILTIN_PSRLDI256: |
19665 | case IX86_BUILTIN_PSRLDI256_MASK: |
19666 | case IX86_BUILTIN_PSRLDI512: |
19667 | case IX86_BUILTIN_PSRLQ: |
19668 | case IX86_BUILTIN_PSRLQ128: |
19669 | case IX86_BUILTIN_PSRLQ128_MASK: |
19670 | case IX86_BUILTIN_PSRLQ256: |
19671 | case IX86_BUILTIN_PSRLQ256_MASK: |
19672 | case IX86_BUILTIN_PSRLQ512: |
19673 | case IX86_BUILTIN_PSRLQI: |
19674 | case IX86_BUILTIN_PSRLQI128: |
19675 | case IX86_BUILTIN_PSRLQI128_MASK: |
19676 | case IX86_BUILTIN_PSRLQI256: |
19677 | case IX86_BUILTIN_PSRLQI256_MASK: |
19678 | case IX86_BUILTIN_PSRLQI512: |
19679 | case IX86_BUILTIN_PSRLW: |
19680 | case IX86_BUILTIN_PSRLW128: |
19681 | case IX86_BUILTIN_PSRLW128_MASK: |
19682 | case IX86_BUILTIN_PSRLW256: |
19683 | case IX86_BUILTIN_PSRLW256_MASK: |
19684 | case IX86_BUILTIN_PSRLW512: |
19685 | case IX86_BUILTIN_PSRLWI: |
19686 | case IX86_BUILTIN_PSRLWI128: |
19687 | case IX86_BUILTIN_PSRLWI128_MASK: |
19688 | case IX86_BUILTIN_PSRLWI256: |
19689 | case IX86_BUILTIN_PSRLWI256_MASK: |
19690 | case IX86_BUILTIN_PSRLWI512: |
19691 | rcode = LSHIFTRT; |
19692 | is_vshift = false; |
19693 | goto do_shift; |
19694 | case IX86_BUILTIN_PSLLVV16HI: |
19695 | case IX86_BUILTIN_PSLLVV16SI: |
19696 | case IX86_BUILTIN_PSLLVV2DI: |
19697 | case IX86_BUILTIN_PSLLVV2DI_MASK: |
19698 | case IX86_BUILTIN_PSLLVV32HI: |
19699 | case IX86_BUILTIN_PSLLVV4DI: |
19700 | case IX86_BUILTIN_PSLLVV4DI_MASK: |
19701 | case IX86_BUILTIN_PSLLVV4SI: |
19702 | case IX86_BUILTIN_PSLLVV4SI_MASK: |
19703 | case IX86_BUILTIN_PSLLVV8DI: |
19704 | case IX86_BUILTIN_PSLLVV8HI: |
19705 | case IX86_BUILTIN_PSLLVV8SI: |
19706 | case IX86_BUILTIN_PSLLVV8SI_MASK: |
19707 | rcode = ASHIFT; |
19708 | is_vshift = true; |
19709 | goto do_shift; |
19710 | case IX86_BUILTIN_PSRAVQ128: |
19711 | case IX86_BUILTIN_PSRAVQ256: |
19712 | case IX86_BUILTIN_PSRAVV16HI: |
19713 | case IX86_BUILTIN_PSRAVV16SI: |
19714 | case IX86_BUILTIN_PSRAVV32HI: |
19715 | case IX86_BUILTIN_PSRAVV4SI: |
19716 | case IX86_BUILTIN_PSRAVV4SI_MASK: |
19717 | case IX86_BUILTIN_PSRAVV8DI: |
19718 | case IX86_BUILTIN_PSRAVV8HI: |
19719 | case IX86_BUILTIN_PSRAVV8SI: |
19720 | case IX86_BUILTIN_PSRAVV8SI_MASK: |
19721 | rcode = ASHIFTRT; |
19722 | is_vshift = true; |
19723 | goto do_shift; |
19724 | case IX86_BUILTIN_PSRLVV16HI: |
19725 | case IX86_BUILTIN_PSRLVV16SI: |
19726 | case IX86_BUILTIN_PSRLVV2DI: |
19727 | case IX86_BUILTIN_PSRLVV2DI_MASK: |
19728 | case IX86_BUILTIN_PSRLVV32HI: |
19729 | case IX86_BUILTIN_PSRLVV4DI: |
19730 | case IX86_BUILTIN_PSRLVV4DI_MASK: |
19731 | case IX86_BUILTIN_PSRLVV4SI: |
19732 | case IX86_BUILTIN_PSRLVV4SI_MASK: |
19733 | case IX86_BUILTIN_PSRLVV8DI: |
19734 | case IX86_BUILTIN_PSRLVV8HI: |
19735 | case IX86_BUILTIN_PSRLVV8SI: |
19736 | case IX86_BUILTIN_PSRLVV8SI_MASK: |
19737 | rcode = LSHIFTRT; |
19738 | is_vshift = true; |
19739 | goto do_shift; |
19740 | |
19741 | do_shift: |
19742 | gcc_assert (n_args >= 2); |
19743 | if (!gimple_call_lhs (gs: stmt)) |
19744 | { |
19745 | gsi_replace (gsi, gimple_build_nop (), false); |
19746 | return true; |
19747 | } |
19748 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19749 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19750 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19751 | /* For masked shift, only optimize if the mask is all ones. */ |
19752 | if (n_args > 2 |
19753 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19754 | break; |
19755 | if (is_vshift) |
19756 | { |
19757 | if (TREE_CODE (arg1) != VECTOR_CST) |
19758 | break; |
19759 | count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); |
19760 | if (integer_zerop (arg1)) |
19761 | count = 0; |
19762 | else if (rcode == ASHIFTRT) |
19763 | break; |
19764 | else |
19765 | for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) |
19766 | { |
19767 | tree elt = VECTOR_CST_ELT (arg1, i); |
19768 | if (!wi::neg_p (x: wi::to_wide (t: elt)) |
19769 | && wi::to_widest (t: elt) < count) |
19770 | return false; |
19771 | } |
19772 | } |
19773 | else |
19774 | { |
19775 | arg1 = ix86_vector_shift_count (arg1); |
19776 | if (!arg1) |
19777 | break; |
19778 | count = tree_to_uhwi (arg1); |
19779 | } |
19780 | if (count == 0) |
19781 | { |
19782 | /* Just return the first argument for shift by 0. */ |
19783 | loc = gimple_location (g: stmt); |
19784 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0); |
19785 | gimple_set_location (g, location: loc); |
19786 | gsi_replace (gsi, g, false); |
19787 | return true; |
19788 | } |
19789 | if (rcode != ASHIFTRT |
19790 | && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) |
19791 | { |
19792 | /* For shift counts equal or greater than precision, except for |
19793 | arithmetic right shift the result is zero. */ |
19794 | loc = gimple_location (g: stmt); |
19795 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19796 | build_zero_cst (TREE_TYPE (arg0))); |
19797 | gimple_set_location (g, location: loc); |
19798 | gsi_replace (gsi, g, false); |
19799 | return true; |
19800 | } |
19801 | break; |
19802 | |
19803 | case IX86_BUILTIN_SHUFPD512: |
19804 | case IX86_BUILTIN_SHUFPS512: |
19805 | case IX86_BUILTIN_SHUFPD: |
19806 | case IX86_BUILTIN_SHUFPD256: |
19807 | case IX86_BUILTIN_SHUFPS: |
19808 | case IX86_BUILTIN_SHUFPS256: |
19809 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19810 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19811 | /* This is masked shuffle. Only optimize if the mask is all ones. */ |
19812 | if (n_args > 3 |
19813 | && !ix86_masked_all_ones (elems, |
19814 | arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19815 | break; |
19816 | arg2 = gimple_call_arg (gs: stmt, index: 2); |
19817 | if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt)) |
19818 | { |
19819 | unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2); |
19820 | /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */ |
19821 | if (shuffle_mask > 255) |
19822 | return false; |
19823 | |
19824 | machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))); |
19825 | loc = gimple_location (g: stmt); |
19826 | tree itype = (imode == E_DFmode |
19827 | ? long_long_integer_type_node : integer_type_node); |
19828 | tree vtype = build_vector_type (itype, elems); |
19829 | tree_vector_builder elts (vtype, elems, 1); |
19830 | |
19831 | |
19832 | /* Transform integer shuffle_mask to vector perm_mask which |
19833 | is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */ |
19834 | for (unsigned i = 0; i != elems; i++) |
19835 | { |
19836 | unsigned sel_idx; |
19837 | /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6]) |
19838 | provide 2 select constrols for each element of the |
19839 | destination. */ |
19840 | if (imode == E_DFmode) |
19841 | sel_idx = (i & 1) * elems + (i & ~1) |
19842 | + ((shuffle_mask >> i) & 1); |
19843 | else |
19844 | { |
19845 | /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select |
19846 | controls for each element of the destination. */ |
19847 | unsigned j = i % 4; |
19848 | sel_idx = ((i >> 1) & 1) * elems + (i & ~3) |
19849 | + ((shuffle_mask >> 2 * j) & 3); |
19850 | } |
19851 | elts.quick_push (obj: build_int_cst (itype, sel_idx)); |
19852 | } |
19853 | |
19854 | tree perm_mask = elts.build (); |
19855 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19856 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19857 | VEC_PERM_EXPR, |
19858 | arg0, arg1, perm_mask); |
19859 | gimple_set_location (g, location: loc); |
19860 | gsi_replace (gsi, g, false); |
19861 | return true; |
19862 | } |
19863 | // Do not error yet, the constant could be propagated later? |
19864 | break; |
19865 | |
19866 | case IX86_BUILTIN_PABSB: |
19867 | case IX86_BUILTIN_PABSW: |
19868 | case IX86_BUILTIN_PABSD: |
19869 | /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */ |
19870 | if (!TARGET_MMX_WITH_SSE) |
19871 | break; |
19872 | /* FALLTHRU. */ |
19873 | case IX86_BUILTIN_PABSB128: |
19874 | case IX86_BUILTIN_PABSB256: |
19875 | case IX86_BUILTIN_PABSB512: |
19876 | case IX86_BUILTIN_PABSW128: |
19877 | case IX86_BUILTIN_PABSW256: |
19878 | case IX86_BUILTIN_PABSW512: |
19879 | case IX86_BUILTIN_PABSD128: |
19880 | case IX86_BUILTIN_PABSD256: |
19881 | case IX86_BUILTIN_PABSD512: |
19882 | case IX86_BUILTIN_PABSQ128: |
19883 | case IX86_BUILTIN_PABSQ256: |
19884 | case IX86_BUILTIN_PABSQ512: |
19885 | case IX86_BUILTIN_PABSB128_MASK: |
19886 | case IX86_BUILTIN_PABSB256_MASK: |
19887 | case IX86_BUILTIN_PABSW128_MASK: |
19888 | case IX86_BUILTIN_PABSW256_MASK: |
19889 | case IX86_BUILTIN_PABSD128_MASK: |
19890 | case IX86_BUILTIN_PABSD256_MASK: |
19891 | gcc_assert (n_args >= 1); |
19892 | if (!gimple_call_lhs (gs: stmt)) |
19893 | { |
19894 | gsi_replace (gsi, gimple_build_nop (), false); |
19895 | return true; |
19896 | } |
19897 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19898 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19899 | /* For masked ABS, only optimize if the mask is all ones. */ |
19900 | if (n_args > 1 |
19901 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1))) |
19902 | break; |
19903 | { |
19904 | tree utype, ures, vce; |
19905 | utype = unsigned_type_for (TREE_TYPE (arg0)); |
19906 | /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR |
19907 | instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ |
19908 | ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0); |
19909 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19910 | loc = gimple_location (g: stmt); |
19911 | vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures); |
19912 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19913 | VIEW_CONVERT_EXPR, vce); |
19914 | gsi_replace (gsi, g, false); |
19915 | } |
19916 | return true; |
19917 | |
19918 | case IX86_BUILTIN_MINPS: |
19919 | case IX86_BUILTIN_MINPD: |
19920 | case IX86_BUILTIN_MINPS256: |
19921 | case IX86_BUILTIN_MINPD256: |
19922 | case IX86_BUILTIN_MINPS512: |
19923 | case IX86_BUILTIN_MINPD512: |
19924 | case IX86_BUILTIN_MINPS128_MASK: |
19925 | case IX86_BUILTIN_MINPD128_MASK: |
19926 | case IX86_BUILTIN_MINPS256_MASK: |
19927 | case IX86_BUILTIN_MINPD256_MASK: |
19928 | case IX86_BUILTIN_MINPH128_MASK: |
19929 | case IX86_BUILTIN_MINPH256_MASK: |
19930 | case IX86_BUILTIN_MINPH512_MASK: |
19931 | tcode = LT_EXPR; |
19932 | goto do_minmax; |
19933 | |
19934 | case IX86_BUILTIN_MAXPS: |
19935 | case IX86_BUILTIN_MAXPD: |
19936 | case IX86_BUILTIN_MAXPS256: |
19937 | case IX86_BUILTIN_MAXPD256: |
19938 | case IX86_BUILTIN_MAXPS512: |
19939 | case IX86_BUILTIN_MAXPD512: |
19940 | case IX86_BUILTIN_MAXPS128_MASK: |
19941 | case IX86_BUILTIN_MAXPD128_MASK: |
19942 | case IX86_BUILTIN_MAXPS256_MASK: |
19943 | case IX86_BUILTIN_MAXPD256_MASK: |
19944 | case IX86_BUILTIN_MAXPH128_MASK: |
19945 | case IX86_BUILTIN_MAXPH256_MASK: |
19946 | case IX86_BUILTIN_MAXPH512_MASK: |
19947 | tcode = GT_EXPR; |
19948 | do_minmax: |
19949 | gcc_assert (n_args >= 2); |
19950 | /* Without SSE4.1 we often aren't able to pattern match it back to the |
19951 | desired instruction. */ |
19952 | if (!gimple_call_lhs (gs: stmt) || !optimize || !TARGET_SSE4_1) |
19953 | break; |
19954 | arg0 = gimple_call_arg (gs: stmt, index: 0); |
19955 | arg1 = gimple_call_arg (gs: stmt, index: 1); |
19956 | elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); |
19957 | /* For masked minmax, only optimize if the mask is all ones. */ |
19958 | if (n_args > 2 |
19959 | && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: 3))) |
19960 | break; |
19961 | if (n_args >= 5) |
19962 | { |
19963 | tree arg4 = gimple_call_arg (gs: stmt, index: 4); |
19964 | if (!tree_fits_uhwi_p (arg4)) |
19965 | break; |
19966 | if (tree_to_uhwi (arg4) == 4) |
19967 | /* Ok. */; |
19968 | else if (tree_to_uhwi (arg4) != 8) |
19969 | /* Invalid round argument. */ |
19970 | break; |
19971 | else if (HONOR_NANS (arg0)) |
19972 | /* Lowering to comparison would raise exceptions which |
19973 | shouldn't be raised. */ |
19974 | break; |
19975 | } |
19976 | { |
19977 | tree type = truth_type_for (TREE_TYPE (arg0)); |
19978 | tree cmpres = gimple_build (seq: &stmts, code: tcode, type, ops: arg0, ops: arg1); |
19979 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); |
19980 | g = gimple_build_assign (gimple_call_lhs (gs: stmt), |
19981 | VEC_COND_EXPR, cmpres, arg0, arg1); |
19982 | gsi_replace (gsi, g, false); |
19983 | } |
19984 | return true; |
19985 | |
19986 | default: |
19987 | break; |
19988 | } |
19989 | |
19990 | return false; |
19991 | } |
19992 | |
19993 | /* Handler for an SVML-style interface to |
19994 | a library with vectorized intrinsics. */ |
19995 | |
19996 | tree |
19997 | ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) |
19998 | { |
19999 | char name[20]; |
20000 | tree fntype, new_fndecl, args; |
20001 | unsigned arity; |
20002 | const char *bname; |
20003 | machine_mode el_mode, in_mode; |
20004 | int n, in_n; |
20005 | |
20006 | /* The SVML is suitable for unsafe math only. */ |
20007 | if (!flag_unsafe_math_optimizations) |
20008 | return NULL_TREE; |
20009 | |
20010 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
20011 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
20012 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
20013 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
20014 | if (el_mode != in_mode |
20015 | || n != in_n) |
20016 | return NULL_TREE; |
20017 | |
20018 | switch (fn) |
20019 | { |
20020 | CASE_CFN_EXP: |
20021 | CASE_CFN_LOG: |
20022 | CASE_CFN_LOG10: |
20023 | CASE_CFN_POW: |
20024 | CASE_CFN_TANH: |
20025 | CASE_CFN_TAN: |
20026 | CASE_CFN_ATAN: |
20027 | CASE_CFN_ATAN2: |
20028 | CASE_CFN_ATANH: |
20029 | CASE_CFN_CBRT: |
20030 | CASE_CFN_SINH: |
20031 | CASE_CFN_SIN: |
20032 | CASE_CFN_ASINH: |
20033 | CASE_CFN_ASIN: |
20034 | CASE_CFN_COSH: |
20035 | CASE_CFN_COS: |
20036 | CASE_CFN_ACOSH: |
20037 | CASE_CFN_ACOS: |
20038 | if ((el_mode != DFmode || n != 2) |
20039 | && (el_mode != SFmode || n != 4)) |
20040 | return NULL_TREE; |
20041 | break; |
20042 | |
20043 | default: |
20044 | return NULL_TREE; |
20045 | } |
20046 | |
20047 | tree fndecl = mathfn_built_in (el_mode == DFmode |
20048 | ? double_type_node : float_type_node, fn); |
20049 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
20050 | |
20051 | if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF) |
20052 | strcpy (dest: name, src: "vmlsLn4"); |
20053 | else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG) |
20054 | strcpy (dest: name, src: "vmldLn2"); |
20055 | else if (n == 4) |
20056 | { |
20057 | sprintf (s: name, format: "vmls%s", bname+10); |
20058 | name[strlen (s: name)-1] = '4'; |
20059 | } |
20060 | else |
20061 | sprintf (s: name, format: "vmld%s2", bname+10); |
20062 | |
20063 | /* Convert to uppercase. */ |
20064 | name[4] &= ~0x20; |
20065 | |
20066 | arity = 0; |
20067 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
20068 | arity++; |
20069 | |
20070 | if (arity == 1) |
20071 | fntype = build_function_type_list (type_out, type_in, NULL); |
20072 | else |
20073 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
20074 | |
20075 | /* Build a function declaration for the vectorized function. */ |
20076 | new_fndecl = build_decl (BUILTINS_LOCATION, |
20077 | FUNCTION_DECL, get_identifier (name), fntype); |
20078 | TREE_PUBLIC (new_fndecl) = 1; |
20079 | DECL_EXTERNAL (new_fndecl) = 1; |
20080 | DECL_IS_NOVOPS (new_fndecl) = 1; |
20081 | TREE_READONLY (new_fndecl) = 1; |
20082 | |
20083 | return new_fndecl; |
20084 | } |
20085 | |
20086 | /* Handler for an ACML-style interface to |
20087 | a library with vectorized intrinsics. */ |
20088 | |
20089 | tree |
20090 | ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) |
20091 | { |
20092 | char name[20] = "__vr.._"; |
20093 | tree fntype, new_fndecl, args; |
20094 | unsigned arity; |
20095 | const char *bname; |
20096 | machine_mode el_mode, in_mode; |
20097 | int n, in_n; |
20098 | |
20099 | /* The ACML is 64bits only and suitable for unsafe math only as |
20100 | it does not correctly support parts of IEEE with the required |
20101 | precision such as denormals. */ |
20102 | if (!TARGET_64BIT |
20103 | || !flag_unsafe_math_optimizations) |
20104 | return NULL_TREE; |
20105 | |
20106 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
20107 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
20108 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
20109 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
20110 | if (el_mode != in_mode |
20111 | || n != in_n) |
20112 | return NULL_TREE; |
20113 | |
20114 | switch (fn) |
20115 | { |
20116 | CASE_CFN_SIN: |
20117 | CASE_CFN_COS: |
20118 | CASE_CFN_EXP: |
20119 | CASE_CFN_LOG: |
20120 | CASE_CFN_LOG2: |
20121 | CASE_CFN_LOG10: |
20122 | if (el_mode == DFmode && n == 2) |
20123 | { |
20124 | name[4] = 'd'; |
20125 | name[5] = '2'; |
20126 | } |
20127 | else if (el_mode == SFmode && n == 4) |
20128 | { |
20129 | name[4] = 's'; |
20130 | name[5] = '4'; |
20131 | } |
20132 | else |
20133 | return NULL_TREE; |
20134 | break; |
20135 | |
20136 | default: |
20137 | return NULL_TREE; |
20138 | } |
20139 | |
20140 | tree fndecl = mathfn_built_in (el_mode == DFmode |
20141 | ? double_type_node : float_type_node, fn); |
20142 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
20143 | sprintf (s: name + 7, format: "%s", bname+10); |
20144 | |
20145 | arity = 0; |
20146 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
20147 | arity++; |
20148 | |
20149 | if (arity == 1) |
20150 | fntype = build_function_type_list (type_out, type_in, NULL); |
20151 | else |
20152 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
20153 | |
20154 | /* Build a function declaration for the vectorized function. */ |
20155 | new_fndecl = build_decl (BUILTINS_LOCATION, |
20156 | FUNCTION_DECL, get_identifier (name), fntype); |
20157 | TREE_PUBLIC (new_fndecl) = 1; |
20158 | DECL_EXTERNAL (new_fndecl) = 1; |
20159 | DECL_IS_NOVOPS (new_fndecl) = 1; |
20160 | TREE_READONLY (new_fndecl) = 1; |
20161 | |
20162 | return new_fndecl; |
20163 | } |
20164 | |
20165 | /* Handler for an AOCL-LibM-style interface to |
20166 | a library with vectorized intrinsics. */ |
20167 | |
20168 | tree |
20169 | ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in) |
20170 | { |
20171 | char name[20] = "amd_vr"; |
20172 | int name_len = 6; |
20173 | tree fntype, new_fndecl, args; |
20174 | unsigned arity; |
20175 | const char *bname; |
20176 | machine_mode el_mode, in_mode; |
20177 | int n, in_n; |
20178 | |
20179 | /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only |
20180 | as it trades off some accuracy for increased performance. */ |
20181 | if (!TARGET_64BIT |
20182 | || !flag_unsafe_math_optimizations) |
20183 | return NULL_TREE; |
20184 | |
20185 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); |
20186 | n = TYPE_VECTOR_SUBPARTS (node: type_out); |
20187 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
20188 | in_n = TYPE_VECTOR_SUBPARTS (node: type_in); |
20189 | if (el_mode != in_mode |
20190 | || n != in_n) |
20191 | return NULL_TREE; |
20192 | |
20193 | gcc_checking_assert (n > 0); |
20194 | |
20195 | /* Decide whether there exists a function for the combination of FN, the mode |
20196 | and the vector width. Return early if it doesn't. */ |
20197 | |
20198 | if (el_mode != DFmode && el_mode != SFmode) |
20199 | return NULL_TREE; |
20200 | |
20201 | /* Supported vector widths for given FN and single/double precision. Zeros |
20202 | are used to fill out unused positions in the arrays. */ |
20203 | static const int supported_n[][2][3] = { |
20204 | /* Single prec. , Double prec. */ |
20205 | { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */ |
20206 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */ |
20207 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */ |
20208 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */ |
20209 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */ |
20210 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */ |
20211 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */ |
20212 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */ |
20213 | { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */ |
20214 | { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */ |
20215 | { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */ |
20216 | { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */ |
20217 | { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */ |
20218 | { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */ |
20219 | { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */ |
20220 | { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */ |
20221 | { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */ |
20222 | { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */ |
20223 | }; |
20224 | |
20225 | /* We cannot simply index the supported_n array with FN since multiple FNs |
20226 | may correspond to a single operation (see the definitions of these |
20227 | CASE_CFN_* macros). */ |
20228 | int i; |
20229 | switch (fn) |
20230 | { |
20231 | CASE_CFN_TAN : i = 0; break; |
20232 | CASE_CFN_EXP : i = 1; break; |
20233 | CASE_CFN_EXP2 : i = 2; break; |
20234 | CASE_CFN_LOG : i = 3; break; |
20235 | CASE_CFN_LOG2 : i = 4; break; |
20236 | CASE_CFN_COS : i = 5; break; |
20237 | CASE_CFN_SIN : i = 6; break; |
20238 | CASE_CFN_POW : i = 7; break; |
20239 | CASE_CFN_ERF : i = 8; break; |
20240 | CASE_CFN_ATAN : i = 9; break; |
20241 | CASE_CFN_LOG10 : i = 10; break; |
20242 | CASE_CFN_EXP10 : i = 11; break; |
20243 | CASE_CFN_LOG1P : i = 12; break; |
20244 | CASE_CFN_ASIN : i = 13; break; |
20245 | CASE_CFN_ACOS : i = 14; break; |
20246 | CASE_CFN_TANH : i = 15; break; |
20247 | CASE_CFN_EXPM1 : i = 16; break; |
20248 | CASE_CFN_COSH : i = 17; break; |
20249 | default: return NULL_TREE; |
20250 | } |
20251 | |
20252 | int j = el_mode == DFmode; |
20253 | bool n_is_supported = false; |
20254 | for (unsigned k = 0; k < 3; k++) |
20255 | if (supported_n[i][j][k] == n) |
20256 | { |
20257 | n_is_supported = true; |
20258 | break; |
20259 | } |
20260 | if (!n_is_supported) |
20261 | return NULL_TREE; |
20262 | |
20263 | /* Append the precision and the vector width to the function name we are |
20264 | constructing. */ |
20265 | name[name_len++] = el_mode == DFmode ? 'd' : 's'; |
20266 | switch (n) |
20267 | { |
20268 | case 2: |
20269 | case 4: |
20270 | case 8: |
20271 | name[name_len++] = '0' + n; |
20272 | break; |
20273 | case 16: |
20274 | name[name_len++] = '1'; |
20275 | name[name_len++] = '6'; |
20276 | break; |
20277 | default: |
20278 | gcc_unreachable (); |
20279 | } |
20280 | name[name_len++] = '_'; |
20281 | |
20282 | /* Append the operation name (steal it from the name of a builtin). */ |
20283 | tree fndecl = mathfn_built_in (el_mode == DFmode |
20284 | ? double_type_node : float_type_node, fn); |
20285 | bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); |
20286 | sprintf (s: name + name_len, format: "%s", bname + 10); |
20287 | |
20288 | arity = 0; |
20289 | for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) |
20290 | arity++; |
20291 | |
20292 | if (arity == 1) |
20293 | fntype = build_function_type_list (type_out, type_in, NULL); |
20294 | else |
20295 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
20296 | |
20297 | /* Build a function declaration for the vectorized function. */ |
20298 | new_fndecl = build_decl (BUILTINS_LOCATION, |
20299 | FUNCTION_DECL, get_identifier (name), fntype); |
20300 | TREE_PUBLIC (new_fndecl) = 1; |
20301 | DECL_EXTERNAL (new_fndecl) = 1; |
20302 | TREE_READONLY (new_fndecl) = 1; |
20303 | |
20304 | return new_fndecl; |
20305 | } |
20306 | |
20307 | /* Returns a decl of a function that implements scatter store with |
20308 | register type VECTYPE and index type INDEX_TYPE and SCALE. |
20309 | Return NULL_TREE if it is not available. */ |
20310 | |
20311 | static tree |
20312 | ix86_vectorize_builtin_scatter (const_tree vectype, |
20313 | const_tree index_type, int scale) |
20314 | { |
20315 | bool si; |
20316 | enum ix86_builtins code; |
20317 | |
20318 | if (!TARGET_AVX512F) |
20319 | return NULL_TREE; |
20320 | |
20321 | if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u) |
20322 | ? !TARGET_USE_SCATTER_2PARTS |
20323 | : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) |
20324 | ? !TARGET_USE_SCATTER_4PARTS |
20325 | : !TARGET_USE_SCATTER_8PARTS)) |
20326 | return NULL_TREE; |
20327 | |
20328 | if ((TREE_CODE (index_type) != INTEGER_TYPE |
20329 | && !POINTER_TYPE_P (index_type)) |
20330 | || (TYPE_MODE (index_type) != SImode |
20331 | && TYPE_MODE (index_type) != DImode)) |
20332 | return NULL_TREE; |
20333 | |
20334 | if (TYPE_PRECISION (index_type) > POINTER_SIZE) |
20335 | return NULL_TREE; |
20336 | |
20337 | /* v*scatter* insn sign extends index to pointer mode. */ |
20338 | if (TYPE_PRECISION (index_type) < POINTER_SIZE |
20339 | && TYPE_UNSIGNED (index_type)) |
20340 | return NULL_TREE; |
20341 | |
20342 | /* Scale can be 1, 2, 4 or 8. */ |
20343 | if (scale <= 0 |
20344 | || scale > 8 |
20345 | || (scale & (scale - 1)) != 0) |
20346 | return NULL_TREE; |
20347 | |
20348 | si = TYPE_MODE (index_type) == SImode; |
20349 | switch (TYPE_MODE (vectype)) |
20350 | { |
20351 | case E_V8DFmode: |
20352 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; |
20353 | break; |
20354 | case E_V8DImode: |
20355 | code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; |
20356 | break; |
20357 | case E_V16SFmode: |
20358 | code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; |
20359 | break; |
20360 | case E_V16SImode: |
20361 | code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; |
20362 | break; |
20363 | case E_V4DFmode: |
20364 | if (TARGET_AVX512VL) |
20365 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; |
20366 | else |
20367 | return NULL_TREE; |
20368 | break; |
20369 | case E_V4DImode: |
20370 | if (TARGET_AVX512VL) |
20371 | code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; |
20372 | else |
20373 | return NULL_TREE; |
20374 | break; |
20375 | case E_V8SFmode: |
20376 | if (TARGET_AVX512VL) |
20377 | code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; |
20378 | else |
20379 | return NULL_TREE; |
20380 | break; |
20381 | case E_V8SImode: |
20382 | if (TARGET_AVX512VL) |
20383 | code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; |
20384 | else |
20385 | return NULL_TREE; |
20386 | break; |
20387 | case E_V2DFmode: |
20388 | if (TARGET_AVX512VL) |
20389 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; |
20390 | else |
20391 | return NULL_TREE; |
20392 | break; |
20393 | case E_V2DImode: |
20394 | if (TARGET_AVX512VL) |
20395 | code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; |
20396 | else |
20397 | return NULL_TREE; |
20398 | break; |
20399 | case E_V4SFmode: |
20400 | if (TARGET_AVX512VL) |
20401 | code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; |
20402 | else |
20403 | return NULL_TREE; |
20404 | break; |
20405 | case E_V4SImode: |
20406 | if (TARGET_AVX512VL) |
20407 | code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; |
20408 | else |
20409 | return NULL_TREE; |
20410 | break; |
20411 | default: |
20412 | return NULL_TREE; |
20413 | } |
20414 | |
20415 | return get_ix86_builtin (c: code); |
20416 | } |
20417 | |
20418 | /* Return true if it is safe to use the rsqrt optabs to optimize |
20419 | 1.0/sqrt. */ |
20420 | |
20421 | static bool |
20422 | use_rsqrt_p (machine_mode mode) |
20423 | { |
20424 | return ((mode == HFmode |
20425 | || (TARGET_SSE && TARGET_SSE_MATH)) |
20426 | && flag_finite_math_only |
20427 | && !flag_trapping_math |
20428 | && flag_unsafe_math_optimizations); |
20429 | } |
20430 | |
20431 | /* Helper for avx_vpermilps256_operand et al. This is also used by |
20432 | the expansion functions to turn the parallel back into a mask. |
20433 | The return value is 0 for no match and the imm8+1 for a match. */ |
20434 | |
20435 | int |
20436 | avx_vpermilp_parallel (rtx par, machine_mode mode) |
20437 | { |
20438 | unsigned i, nelt = GET_MODE_NUNITS (mode); |
20439 | unsigned mask = 0; |
20440 | unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ |
20441 | |
20442 | if (XVECLEN (par, 0) != (int) nelt) |
20443 | return 0; |
20444 | |
20445 | /* Validate that all of the elements are constants, and not totally |
20446 | out of range. Copy the data into an integral array to make the |
20447 | subsequent checks easier. */ |
20448 | for (i = 0; i < nelt; ++i) |
20449 | { |
20450 | rtx er = XVECEXP (par, 0, i); |
20451 | unsigned HOST_WIDE_INT ei; |
20452 | |
20453 | if (!CONST_INT_P (er)) |
20454 | return 0; |
20455 | ei = INTVAL (er); |
20456 | if (ei >= nelt) |
20457 | return 0; |
20458 | ipar[i] = ei; |
20459 | } |
20460 | |
20461 | switch (mode) |
20462 | { |
20463 | case E_V8DFmode: |
20464 | /* In the 512-bit DFmode case, we can only move elements within |
20465 | a 128-bit lane. First fill the second part of the mask, |
20466 | then fallthru. */ |
20467 | for (i = 4; i < 6; ++i) |
20468 | { |
20469 | if (ipar[i] < 4 || ipar[i] >= 6) |
20470 | return 0; |
20471 | mask |= (ipar[i] - 4) << i; |
20472 | } |
20473 | for (i = 6; i < 8; ++i) |
20474 | { |
20475 | if (ipar[i] < 6) |
20476 | return 0; |
20477 | mask |= (ipar[i] - 6) << i; |
20478 | } |
20479 | /* FALLTHRU */ |
20480 | |
20481 | case E_V4DFmode: |
20482 | /* In the 256-bit DFmode case, we can only move elements within |
20483 | a 128-bit lane. */ |
20484 | for (i = 0; i < 2; ++i) |
20485 | { |
20486 | if (ipar[i] >= 2) |
20487 | return 0; |
20488 | mask |= ipar[i] << i; |
20489 | } |
20490 | for (i = 2; i < 4; ++i) |
20491 | { |
20492 | if (ipar[i] < 2) |
20493 | return 0; |
20494 | mask |= (ipar[i] - 2) << i; |
20495 | } |
20496 | break; |
20497 | |
20498 | case E_V16SFmode: |
20499 | /* In 512 bit SFmode case, permutation in the upper 256 bits |
20500 | must mirror the permutation in the lower 256-bits. */ |
20501 | for (i = 0; i < 8; ++i) |
20502 | if (ipar[i] + 8 != ipar[i + 8]) |
20503 | return 0; |
20504 | /* FALLTHRU */ |
20505 | |
20506 | case E_V8SFmode: |
20507 | /* In 256 bit SFmode case, we have full freedom of |
20508 | movement within the low 128-bit lane, but the high 128-bit |
20509 | lane must mirror the exact same pattern. */ |
20510 | for (i = 0; i < 4; ++i) |
20511 | if (ipar[i] + 4 != ipar[i + 4]) |
20512 | return 0; |
20513 | nelt = 4; |
20514 | /* FALLTHRU */ |
20515 | |
20516 | case E_V2DFmode: |
20517 | case E_V4SFmode: |
20518 | /* In the 128-bit case, we've full freedom in the placement of |
20519 | the elements from the source operand. */ |
20520 | for (i = 0; i < nelt; ++i) |
20521 | mask |= ipar[i] << (i * (nelt / 2)); |
20522 | break; |
20523 | |
20524 | default: |
20525 | gcc_unreachable (); |
20526 | } |
20527 | |
20528 | /* Make sure success has a non-zero value by adding one. */ |
20529 | return mask + 1; |
20530 | } |
20531 | |
20532 | /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by |
20533 | the expansion functions to turn the parallel back into a mask. |
20534 | The return value is 0 for no match and the imm8+1 for a match. */ |
20535 | |
20536 | int |
20537 | avx_vperm2f128_parallel (rtx par, machine_mode mode) |
20538 | { |
20539 | unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; |
20540 | unsigned mask = 0; |
20541 | unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ |
20542 | |
20543 | if (XVECLEN (par, 0) != (int) nelt) |
20544 | return 0; |
20545 | |
20546 | /* Validate that all of the elements are constants, and not totally |
20547 | out of range. Copy the data into an integral array to make the |
20548 | subsequent checks easier. */ |
20549 | for (i = 0; i < nelt; ++i) |
20550 | { |
20551 | rtx er = XVECEXP (par, 0, i); |
20552 | unsigned HOST_WIDE_INT ei; |
20553 | |
20554 | if (!CONST_INT_P (er)) |
20555 | return 0; |
20556 | ei = INTVAL (er); |
20557 | if (ei >= 2 * nelt) |
20558 | return 0; |
20559 | ipar[i] = ei; |
20560 | } |
20561 | |
20562 | /* Validate that the halves of the permute are halves. */ |
20563 | for (i = 0; i < nelt2 - 1; ++i) |
20564 | if (ipar[i] + 1 != ipar[i + 1]) |
20565 | return 0; |
20566 | for (i = nelt2; i < nelt - 1; ++i) |
20567 | if (ipar[i] + 1 != ipar[i + 1]) |
20568 | return 0; |
20569 | |
20570 | /* Reconstruct the mask. */ |
20571 | for (i = 0; i < 2; ++i) |
20572 | { |
20573 | unsigned e = ipar[i * nelt2]; |
20574 | if (e % nelt2) |
20575 | return 0; |
20576 | e /= nelt2; |
20577 | mask |= e << (i * 4); |
20578 | } |
20579 | |
20580 | /* Make sure success has a non-zero value by adding one. */ |
20581 | return mask + 1; |
20582 | } |
20583 | |
20584 | /* Return a mask of VPTERNLOG operands that do not affect output. */ |
20585 | |
20586 | int |
20587 | vpternlog_redundant_operand_mask (rtx pternlog_imm) |
20588 | { |
20589 | int mask = 0; |
20590 | int imm8 = INTVAL (pternlog_imm); |
20591 | |
20592 | if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F)) |
20593 | mask |= 1; |
20594 | if (((imm8 >> 2) & 0x33) == (imm8 & 0x33)) |
20595 | mask |= 2; |
20596 | if (((imm8 >> 1) & 0x55) == (imm8 & 0x55)) |
20597 | mask |= 4; |
20598 | |
20599 | return mask; |
20600 | } |
20601 | |
20602 | /* Eliminate false dependencies on operands that do not affect output |
20603 | by substituting other operands of a VPTERNLOG. */ |
20604 | |
20605 | void |
20606 | substitute_vpternlog_operands (rtx *operands) |
20607 | { |
20608 | int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]); |
20609 | |
20610 | if (mask & 1) /* The first operand is redundant. */ |
20611 | operands[1] = operands[2]; |
20612 | |
20613 | if (mask & 2) /* The second operand is redundant. */ |
20614 | operands[2] = operands[1]; |
20615 | |
20616 | if (mask & 4) /* The third operand is redundant. */ |
20617 | operands[3] = operands[1]; |
20618 | else if (REG_P (operands[3])) |
20619 | { |
20620 | if (mask & 1) |
20621 | operands[1] = operands[3]; |
20622 | if (mask & 2) |
20623 | operands[2] = operands[3]; |
20624 | } |
20625 | } |
20626 | |
20627 | /* Return a register priority for hard reg REGNO. */ |
20628 | static int |
20629 | ix86_register_priority (int hard_regno) |
20630 | { |
20631 | /* ebp and r13 as the base always wants a displacement, r12 as the |
20632 | base always wants an index. So discourage their usage in an |
20633 | address. */ |
20634 | if (hard_regno == R12_REG || hard_regno == R13_REG) |
20635 | return 0; |
20636 | if (hard_regno == BP_REG) |
20637 | return 1; |
20638 | /* New x86-64 int registers result in bigger code size. Discourage them. */ |
20639 | if (REX_INT_REGNO_P (hard_regno)) |
20640 | return 2; |
20641 | if (REX2_INT_REGNO_P (hard_regno)) |
20642 | return 2; |
20643 | /* New x86-64 SSE registers result in bigger code size. Discourage them. */ |
20644 | if (REX_SSE_REGNO_P (hard_regno)) |
20645 | return 2; |
20646 | if (EXT_REX_SSE_REGNO_P (hard_regno)) |
20647 | return 1; |
20648 | /* Usage of AX register results in smaller code. Prefer it. */ |
20649 | if (hard_regno == AX_REG) |
20650 | return 4; |
20651 | return 3; |
20652 | } |
20653 | |
20654 | /* Implement TARGET_PREFERRED_RELOAD_CLASS. |
20655 | |
20656 | Put float CONST_DOUBLE in the constant pool instead of fp regs. |
20657 | QImode must go into class Q_REGS. |
20658 | Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and |
20659 | movdf to do mem-to-mem moves through integer regs. */ |
20660 | |
20661 | static reg_class_t |
20662 | ix86_preferred_reload_class (rtx x, reg_class_t regclass) |
20663 | { |
20664 | machine_mode mode = GET_MODE (x); |
20665 | |
20666 | /* We're only allowed to return a subclass of CLASS. Many of the |
20667 | following checks fail for NO_REGS, so eliminate that early. */ |
20668 | if (regclass == NO_REGS) |
20669 | return NO_REGS; |
20670 | |
20671 | /* All classes can load zeros. */ |
20672 | if (x == CONST0_RTX (mode)) |
20673 | return regclass; |
20674 | |
20675 | /* Force constants into memory if we are loading a (nonzero) constant into |
20676 | an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK |
20677 | instructions to load from a constant. */ |
20678 | if (CONSTANT_P (x) |
20679 | && (MAYBE_MMX_CLASS_P (regclass) |
20680 | || MAYBE_SSE_CLASS_P (regclass) |
20681 | || MAYBE_MASK_CLASS_P (regclass))) |
20682 | return NO_REGS; |
20683 | |
20684 | /* Floating-point constants need more complex checks. */ |
20685 | if (CONST_DOUBLE_P (x)) |
20686 | { |
20687 | /* General regs can load everything. */ |
20688 | if (INTEGER_CLASS_P (regclass)) |
20689 | return regclass; |
20690 | |
20691 | /* Floats can load 0 and 1 plus some others. Note that we eliminated |
20692 | zero above. We only want to wind up preferring 80387 registers if |
20693 | we plan on doing computation with them. */ |
20694 | if (IS_STACK_MODE (mode) |
20695 | && standard_80387_constant_p (x) > 0) |
20696 | { |
20697 | /* Limit class to FP regs. */ |
20698 | if (FLOAT_CLASS_P (regclass)) |
20699 | return FLOAT_REGS; |
20700 | } |
20701 | |
20702 | return NO_REGS; |
20703 | } |
20704 | |
20705 | /* Prefer SSE if we can use them for math. Also allow integer regs |
20706 | when moves between register units are cheap. */ |
20707 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
20708 | { |
20709 | if (TARGET_INTER_UNIT_MOVES_FROM_VEC |
20710 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
20711 | && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode)) |
20712 | return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
20713 | else |
20714 | return SSE_CLASS_P (regclass) ? regclass : NO_REGS; |
20715 | } |
20716 | |
20717 | /* Generally when we see PLUS here, it's the function invariant |
20718 | (plus soft-fp const_int). Which can only be computed into general |
20719 | regs. */ |
20720 | if (GET_CODE (x) == PLUS) |
20721 | return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; |
20722 | |
20723 | /* QImode constants are easy to load, but non-constant QImode data |
20724 | must go into Q_REGS or ALL_MASK_REGS. */ |
20725 | if (GET_MODE (x) == QImode && !CONSTANT_P (x)) |
20726 | { |
20727 | if (Q_CLASS_P (regclass)) |
20728 | return regclass; |
20729 | else if (reg_class_subset_p (Q_REGS, regclass)) |
20730 | return Q_REGS; |
20731 | else if (MASK_CLASS_P (regclass)) |
20732 | return regclass; |
20733 | else |
20734 | return NO_REGS; |
20735 | } |
20736 | |
20737 | return regclass; |
20738 | } |
20739 | |
20740 | /* Discourage putting floating-point values in SSE registers unless |
20741 | SSE math is being used, and likewise for the 387 registers. */ |
20742 | static reg_class_t |
20743 | ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) |
20744 | { |
20745 | /* Restrict the output reload class to the register bank that we are doing |
20746 | math on. If we would like not to return a subset of CLASS, reject this |
20747 | alternative: if reload cannot do this, it will still use its choice. */ |
20748 | machine_mode mode = GET_MODE (x); |
20749 | if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) |
20750 | return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; |
20751 | |
20752 | if (IS_STACK_MODE (mode)) |
20753 | return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; |
20754 | |
20755 | return regclass; |
20756 | } |
20757 | |
20758 | static reg_class_t |
20759 | ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, |
20760 | machine_mode mode, secondary_reload_info *sri) |
20761 | { |
20762 | /* Double-word spills from general registers to non-offsettable memory |
20763 | references (zero-extended addresses) require special handling. */ |
20764 | if (TARGET_64BIT |
20765 | && MEM_P (x) |
20766 | && GET_MODE_SIZE (mode) > UNITS_PER_WORD |
20767 | && INTEGER_CLASS_P (rclass) |
20768 | && !offsettable_memref_p (x)) |
20769 | { |
20770 | sri->icode = (in_p |
20771 | ? CODE_FOR_reload_noff_load |
20772 | : CODE_FOR_reload_noff_store); |
20773 | /* Add the cost of moving address to a temporary. */ |
20774 | sri->extra_cost = 1; |
20775 | |
20776 | return NO_REGS; |
20777 | } |
20778 | |
20779 | /* QImode spills from non-QI registers require |
20780 | intermediate register on 32bit targets. */ |
20781 | if (mode == QImode |
20782 | && ((!TARGET_64BIT && !in_p |
20783 | && INTEGER_CLASS_P (rclass) |
20784 | && MAYBE_NON_Q_CLASS_P (rclass)) |
20785 | || (!TARGET_AVX512DQ |
20786 | && MAYBE_MASK_CLASS_P (rclass)))) |
20787 | { |
20788 | int regno = true_regnum (x); |
20789 | |
20790 | /* Return Q_REGS if the operand is in memory. */ |
20791 | if (regno == -1) |
20792 | return Q_REGS; |
20793 | |
20794 | return NO_REGS; |
20795 | } |
20796 | |
20797 | /* Require movement to gpr, and then store to memory. */ |
20798 | if ((mode == HFmode || mode == HImode || mode == V2QImode |
20799 | || mode == BFmode) |
20800 | && !TARGET_SSE4_1 |
20801 | && SSE_CLASS_P (rclass) |
20802 | && !in_p && MEM_P (x)) |
20803 | { |
20804 | sri->extra_cost = 1; |
20805 | return GENERAL_REGS; |
20806 | } |
20807 | |
20808 | /* This condition handles corner case where an expression involving |
20809 | pointers gets vectorized. We're trying to use the address of a |
20810 | stack slot as a vector initializer. |
20811 | |
20812 | (set (reg:V2DI 74 [ vect_cst_.2 ]) |
20813 | (vec_duplicate:V2DI (reg/f:DI 20 frame))) |
20814 | |
20815 | Eventually frame gets turned into sp+offset like this: |
20816 | |
20817 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20818 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
20819 | (const_int 392 [0x188])))) |
20820 | |
20821 | That later gets turned into: |
20822 | |
20823 | (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20824 | (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) |
20825 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) |
20826 | |
20827 | We'll have the following reload recorded: |
20828 | |
20829 | Reload 0: reload_in (DI) = |
20830 | (plus:DI (reg/f:DI 7 sp) |
20831 | (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) |
20832 | reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20833 | SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine |
20834 | reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) |
20835 | reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) |
20836 | reload_reg_rtx: (reg:V2DI 22 xmm1) |
20837 | |
20838 | Which isn't going to work since SSE instructions can't handle scalar |
20839 | additions. Returning GENERAL_REGS forces the addition into integer |
20840 | register and reload can handle subsequent reloads without problems. */ |
20841 | |
20842 | if (in_p && GET_CODE (x) == PLUS |
20843 | && SSE_CLASS_P (rclass) |
20844 | && SCALAR_INT_MODE_P (mode)) |
20845 | return GENERAL_REGS; |
20846 | |
20847 | return NO_REGS; |
20848 | } |
20849 | |
20850 | /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ |
20851 | |
20852 | static bool |
20853 | ix86_class_likely_spilled_p (reg_class_t rclass) |
20854 | { |
20855 | switch (rclass) |
20856 | { |
20857 | case AREG: |
20858 | case DREG: |
20859 | case CREG: |
20860 | case BREG: |
20861 | case AD_REGS: |
20862 | case SIREG: |
20863 | case DIREG: |
20864 | case SSE_FIRST_REG: |
20865 | case FP_TOP_REG: |
20866 | case FP_SECOND_REG: |
20867 | return true; |
20868 | |
20869 | default: |
20870 | break; |
20871 | } |
20872 | |
20873 | return false; |
20874 | } |
20875 | |
20876 | /* Implement TARGET_CALLEE_SAVE_COST. */ |
20877 | |
20878 | static int |
20879 | ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode, |
20880 | unsigned int, int mem_cost, const HARD_REG_SET &, bool) |
20881 | { |
20882 | /* Account for the fact that push and pop are shorter and do their |
20883 | own allocation and deallocation. */ |
20884 | if (GENERAL_REGNO_P (hard_regno)) |
20885 | { |
20886 | /* push is 1 byte while typical spill is 4-5 bytes. |
20887 | ??? We probably should adjust size costs accordingly. |
20888 | Costs are relative to reg-reg move that has 2 bytes for 32bit |
20889 | and 3 bytes otherwise. Be sure that no cost table sets cost |
20890 | to 2, so we end up with 0. */ |
20891 | if (mem_cost <= 2 || optimize_function_for_size_p (cfun)) |
20892 | return 1; |
20893 | return mem_cost - 2; |
20894 | } |
20895 | return mem_cost; |
20896 | } |
20897 | |
20898 | /* Return true if a set of DST by the expression SRC should be allowed. |
20899 | This prevents complex sets of likely_spilled hard regs before split1. */ |
20900 | |
20901 | bool |
20902 | ix86_hardreg_mov_ok (rtx dst, rtx src) |
20903 | { |
20904 | /* Avoid complex sets of likely_spilled hard registers before reload. */ |
20905 | if (REG_P (dst) && HARD_REGISTER_P (dst) |
20906 | && !REG_P (src) && !MEM_P (src) |
20907 | && !(VECTOR_MODE_P (GET_MODE (dst)) |
20908 | ? standard_sse_constant_p (x: src, GET_MODE (dst)) |
20909 | : x86_64_immediate_operand (src, GET_MODE (dst))) |
20910 | && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))) |
20911 | && ix86_pre_reload_split ()) |
20912 | return false; |
20913 | return true; |
20914 | } |
20915 | |
20916 | /* If we are copying between registers from different register sets |
20917 | (e.g. FP and integer), we may need a memory location. |
20918 | |
20919 | The function can't work reliably when one of the CLASSES is a class |
20920 | containing registers from multiple sets. We avoid this by never combining |
20921 | different sets in a single alternative in the machine description. |
20922 | Ensure that this constraint holds to avoid unexpected surprises. |
20923 | |
20924 | When STRICT is false, we are being called from REGISTER_MOVE_COST, |
20925 | so do not enforce these sanity checks. |
20926 | |
20927 | To optimize register_move_cost performance, define inline variant. */ |
20928 | |
20929 | static inline bool |
20930 | inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
20931 | reg_class_t class2, int strict) |
20932 | { |
20933 | if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) |
20934 | return false; |
20935 | |
20936 | if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) |
20937 | || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) |
20938 | || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) |
20939 | || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) |
20940 | || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) |
20941 | || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) |
20942 | || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) |
20943 | || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) |
20944 | { |
20945 | gcc_assert (!strict || lra_in_progress); |
20946 | return true; |
20947 | } |
20948 | |
20949 | if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) |
20950 | return true; |
20951 | |
20952 | /* ??? This is a lie. We do have moves between mmx/general, and for |
20953 | mmx/sse2. But by saying we need secondary memory we discourage the |
20954 | register allocator from using the mmx registers unless needed. */ |
20955 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
20956 | return true; |
20957 | |
20958 | /* Between mask and general, we have moves no larger than word size. */ |
20959 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
20960 | { |
20961 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) |
20962 | || GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
20963 | return true; |
20964 | } |
20965 | |
20966 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
20967 | { |
20968 | /* SSE1 doesn't have any direct moves from other classes. */ |
20969 | if (!TARGET_SSE2) |
20970 | return true; |
20971 | |
20972 | if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))) |
20973 | return true; |
20974 | |
20975 | /* If the target says that inter-unit moves are more expensive |
20976 | than moving through memory, then don't generate them. */ |
20977 | if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) |
20978 | || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) |
20979 | return true; |
20980 | |
20981 | /* With SSE4.1, *mov{ti,di}_internal supports moves between |
20982 | SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */ |
20983 | if (TARGET_SSE4_1 |
20984 | && (TARGET_64BIT ? mode == TImode : mode == DImode)) |
20985 | return false; |
20986 | |
20987 | int msize = GET_MODE_SIZE (mode); |
20988 | |
20989 | /* Between SSE and general, we have moves no larger than word size. */ |
20990 | if (msize > UNITS_PER_WORD) |
20991 | return true; |
20992 | |
20993 | /* In addition to SImode moves, HImode moves are supported for SSE2 and above, |
20994 | Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */ |
20995 | int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode); |
20996 | |
20997 | if (msize < minsize) |
20998 | return true; |
20999 | } |
21000 | |
21001 | return false; |
21002 | } |
21003 | |
21004 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ |
21005 | |
21006 | static bool |
21007 | ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, |
21008 | reg_class_t class2) |
21009 | { |
21010 | return inline_secondary_memory_needed (mode, class1, class2, strict: true); |
21011 | } |
21012 | |
21013 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. |
21014 | |
21015 | get_secondary_mem widens integral modes to BITS_PER_WORD. |
21016 | There is no need to emit full 64 bit move on 64 bit targets |
21017 | for integral modes that can be moved using 32 bit move. */ |
21018 | |
21019 | static machine_mode |
21020 | ix86_secondary_memory_needed_mode (machine_mode mode) |
21021 | { |
21022 | if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) |
21023 | return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); |
21024 | return mode; |
21025 | } |
21026 | |
21027 | /* Implement the TARGET_CLASS_MAX_NREGS hook. |
21028 | |
21029 | On the 80386, this is the size of MODE in words, |
21030 | except in the FP regs, where a single reg is always enough. */ |
21031 | |
21032 | static unsigned char |
21033 | ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) |
21034 | { |
21035 | if (MAYBE_INTEGER_CLASS_P (rclass)) |
21036 | { |
21037 | if (mode == XFmode) |
21038 | return (TARGET_64BIT ? 2 : 3); |
21039 | else if (mode == XCmode) |
21040 | return (TARGET_64BIT ? 4 : 6); |
21041 | else |
21042 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
21043 | } |
21044 | else |
21045 | { |
21046 | if (COMPLEX_MODE_P (mode)) |
21047 | return 2; |
21048 | else |
21049 | return 1; |
21050 | } |
21051 | } |
21052 | |
21053 | /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ |
21054 | |
21055 | static bool |
21056 | ix86_can_change_mode_class (machine_mode from, machine_mode to, |
21057 | reg_class_t regclass) |
21058 | { |
21059 | if (from == to) |
21060 | return true; |
21061 | |
21062 | /* x87 registers can't do subreg at all, as all values are reformatted |
21063 | to extended precision. |
21064 | |
21065 | ??? middle-end queries mode changes for ALL_REGS and this makes |
21066 | vec_series_lowpart_p to always return false. We probably should |
21067 | restrict this to modes supported by i387 and check if it is enabled. */ |
21068 | if (MAYBE_FLOAT_CLASS_P (regclass)) |
21069 | return false; |
21070 | |
21071 | if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) |
21072 | { |
21073 | /* Vector registers do not support QI or HImode loads. If we don't |
21074 | disallow a change to these modes, reload will assume it's ok to |
21075 | drop the subreg from (subreg:SI (reg:HI 100) 0). This affects |
21076 | the vec_dupv4hi pattern. |
21077 | NB: SSE2 can load 16bit data to sse register via pinsrw. */ |
21078 | int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4; |
21079 | if (GET_MODE_SIZE (from) < mov_size |
21080 | || GET_MODE_SIZE (to) < mov_size) |
21081 | return false; |
21082 | } |
21083 | |
21084 | return true; |
21085 | } |
21086 | |
21087 | /* Return index of MODE in the sse load/store tables. */ |
21088 | |
21089 | static inline int |
21090 | sse_store_index (machine_mode mode) |
21091 | { |
21092 | /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store |
21093 | costs to processor_costs, which requires changes to all entries in |
21094 | processor cost table. */ |
21095 | if (mode == E_HFmode) |
21096 | mode = E_SFmode; |
21097 | |
21098 | switch (GET_MODE_SIZE (mode)) |
21099 | { |
21100 | case 4: |
21101 | return 0; |
21102 | case 8: |
21103 | return 1; |
21104 | case 16: |
21105 | return 2; |
21106 | case 32: |
21107 | return 3; |
21108 | case 64: |
21109 | return 4; |
21110 | default: |
21111 | return -1; |
21112 | } |
21113 | } |
21114 | |
21115 | /* Return the cost of moving data of mode M between a |
21116 | register and memory. A value of 2 is the default; this cost is |
21117 | relative to those in `REGISTER_MOVE_COST'. |
21118 | |
21119 | This function is used extensively by register_move_cost that is used to |
21120 | build tables at startup. Make it inline in this case. |
21121 | When IN is 2, return maximum of in and out move cost. |
21122 | |
21123 | If moving between registers and memory is more expensive than |
21124 | between two registers, you should define this macro to express the |
21125 | relative cost. |
21126 | |
21127 | Model also increased moving costs of QImode registers in non |
21128 | Q_REGS classes. |
21129 | */ |
21130 | static inline int |
21131 | inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) |
21132 | { |
21133 | int cost; |
21134 | |
21135 | if (FLOAT_CLASS_P (regclass)) |
21136 | { |
21137 | int index; |
21138 | switch (mode) |
21139 | { |
21140 | case E_SFmode: |
21141 | index = 0; |
21142 | break; |
21143 | case E_DFmode: |
21144 | index = 1; |
21145 | break; |
21146 | case E_XFmode: |
21147 | index = 2; |
21148 | break; |
21149 | default: |
21150 | return 100; |
21151 | } |
21152 | if (in == 2) |
21153 | return MAX (ix86_cost->hard_register.fp_load [index], |
21154 | ix86_cost->hard_register.fp_store [index]); |
21155 | return in ? ix86_cost->hard_register.fp_load [index] |
21156 | : ix86_cost->hard_register.fp_store [index]; |
21157 | } |
21158 | if (SSE_CLASS_P (regclass)) |
21159 | { |
21160 | int index = sse_store_index (mode); |
21161 | if (index == -1) |
21162 | return 100; |
21163 | if (in == 2) |
21164 | return MAX (ix86_cost->hard_register.sse_load [index], |
21165 | ix86_cost->hard_register.sse_store [index]); |
21166 | return in ? ix86_cost->hard_register.sse_load [index] |
21167 | : ix86_cost->hard_register.sse_store [index]; |
21168 | } |
21169 | if (MASK_CLASS_P (regclass)) |
21170 | { |
21171 | int index; |
21172 | switch (GET_MODE_SIZE (mode)) |
21173 | { |
21174 | case 1: |
21175 | index = 0; |
21176 | break; |
21177 | case 2: |
21178 | index = 1; |
21179 | break; |
21180 | /* DImode loads and stores assumed to cost the same as SImode. */ |
21181 | case 4: |
21182 | case 8: |
21183 | index = 2; |
21184 | break; |
21185 | default: |
21186 | return 100; |
21187 | } |
21188 | |
21189 | if (in == 2) |
21190 | return MAX (ix86_cost->hard_register.mask_load[index], |
21191 | ix86_cost->hard_register.mask_store[index]); |
21192 | return in ? ix86_cost->hard_register.mask_load[2] |
21193 | : ix86_cost->hard_register.mask_store[2]; |
21194 | } |
21195 | if (MMX_CLASS_P (regclass)) |
21196 | { |
21197 | int index; |
21198 | switch (GET_MODE_SIZE (mode)) |
21199 | { |
21200 | case 4: |
21201 | index = 0; |
21202 | break; |
21203 | case 8: |
21204 | index = 1; |
21205 | break; |
21206 | default: |
21207 | return 100; |
21208 | } |
21209 | if (in == 2) |
21210 | return MAX (ix86_cost->hard_register.mmx_load [index], |
21211 | ix86_cost->hard_register.mmx_store [index]); |
21212 | return in ? ix86_cost->hard_register.mmx_load [index] |
21213 | : ix86_cost->hard_register.mmx_store [index]; |
21214 | } |
21215 | switch (GET_MODE_SIZE (mode)) |
21216 | { |
21217 | case 1: |
21218 | if (Q_CLASS_P (regclass) || TARGET_64BIT) |
21219 | { |
21220 | if (!in) |
21221 | return ix86_cost->hard_register.int_store[0]; |
21222 | if (TARGET_PARTIAL_REG_DEPENDENCY |
21223 | && optimize_function_for_speed_p (cfun)) |
21224 | cost = ix86_cost->hard_register.movzbl_load; |
21225 | else |
21226 | cost = ix86_cost->hard_register.int_load[0]; |
21227 | if (in == 2) |
21228 | return MAX (cost, ix86_cost->hard_register.int_store[0]); |
21229 | return cost; |
21230 | } |
21231 | else |
21232 | { |
21233 | if (in == 2) |
21234 | return MAX (ix86_cost->hard_register.movzbl_load, |
21235 | ix86_cost->hard_register.int_store[0] + 4); |
21236 | if (in) |
21237 | return ix86_cost->hard_register.movzbl_load; |
21238 | else |
21239 | return ix86_cost->hard_register.int_store[0] + 4; |
21240 | } |
21241 | break; |
21242 | case 2: |
21243 | { |
21244 | int cost; |
21245 | if (in == 2) |
21246 | cost = MAX (ix86_cost->hard_register.int_load[1], |
21247 | ix86_cost->hard_register.int_store[1]); |
21248 | else |
21249 | cost = in ? ix86_cost->hard_register.int_load[1] |
21250 | : ix86_cost->hard_register.int_store[1]; |
21251 | |
21252 | if (mode == E_HFmode) |
21253 | { |
21254 | /* Prefer SSE over GPR for HFmode. */ |
21255 | int sse_cost; |
21256 | int index = sse_store_index (mode); |
21257 | if (in == 2) |
21258 | sse_cost = MAX (ix86_cost->hard_register.sse_load[index], |
21259 | ix86_cost->hard_register.sse_store[index]); |
21260 | else |
21261 | sse_cost = (in |
21262 | ? ix86_cost->hard_register.sse_load [index] |
21263 | : ix86_cost->hard_register.sse_store [index]); |
21264 | if (sse_cost >= cost) |
21265 | cost = sse_cost + 1; |
21266 | } |
21267 | return cost; |
21268 | } |
21269 | default: |
21270 | if (in == 2) |
21271 | cost = MAX (ix86_cost->hard_register.int_load[2], |
21272 | ix86_cost->hard_register.int_store[2]); |
21273 | else if (in) |
21274 | cost = ix86_cost->hard_register.int_load[2]; |
21275 | else |
21276 | cost = ix86_cost->hard_register.int_store[2]; |
21277 | /* Multiply with the number of GPR moves needed. */ |
21278 | return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); |
21279 | } |
21280 | } |
21281 | |
21282 | static int |
21283 | ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) |
21284 | { |
21285 | return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0); |
21286 | } |
21287 | |
21288 | |
21289 | /* Return the cost of moving data from a register in class CLASS1 to |
21290 | one in class CLASS2. |
21291 | |
21292 | It is not required that the cost always equal 2 when FROM is the same as TO; |
21293 | on some machines it is expensive to move between registers if they are not |
21294 | general registers. */ |
21295 | |
21296 | static int |
21297 | ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, |
21298 | reg_class_t class2_i) |
21299 | { |
21300 | enum reg_class class1 = (enum reg_class) class1_i; |
21301 | enum reg_class class2 = (enum reg_class) class2_i; |
21302 | |
21303 | /* In case we require secondary memory, compute cost of the store followed |
21304 | by load. In order to avoid bad register allocation choices, we need |
21305 | for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ |
21306 | |
21307 | if (inline_secondary_memory_needed (mode, class1, class2, strict: false)) |
21308 | { |
21309 | int cost = 1; |
21310 | |
21311 | cost += inline_memory_move_cost (mode, regclass: class1, in: 2); |
21312 | cost += inline_memory_move_cost (mode, regclass: class2, in: 2); |
21313 | |
21314 | /* In case of copying from general_purpose_register we may emit multiple |
21315 | stores followed by single load causing memory size mismatch stall. |
21316 | Count this as arbitrarily high cost of 20. */ |
21317 | if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD |
21318 | && TARGET_MEMORY_MISMATCH_STALL |
21319 | && targetm.class_max_nregs (class1, mode) |
21320 | > targetm.class_max_nregs (class2, mode)) |
21321 | cost += 20; |
21322 | |
21323 | /* In the case of FP/MMX moves, the registers actually overlap, and we |
21324 | have to switch modes in order to treat them differently. */ |
21325 | if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) |
21326 | || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) |
21327 | cost += 20; |
21328 | |
21329 | return cost; |
21330 | } |
21331 | |
21332 | /* Moves between MMX and non-MMX units require secondary memory. */ |
21333 | if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
21334 | gcc_unreachable (); |
21335 | |
21336 | if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
21337 | return (SSE_CLASS_P (class1) |
21338 | ? ix86_cost->hard_register.sse_to_integer |
21339 | : ix86_cost->hard_register.integer_to_sse); |
21340 | |
21341 | /* Moves between mask register and GPR. */ |
21342 | if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) |
21343 | { |
21344 | return (MASK_CLASS_P (class1) |
21345 | ? ix86_cost->hard_register.mask_to_integer |
21346 | : ix86_cost->hard_register.integer_to_mask); |
21347 | } |
21348 | /* Moving between mask registers. */ |
21349 | if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2)) |
21350 | return ix86_cost->hard_register.mask_move; |
21351 | |
21352 | if (MAYBE_FLOAT_CLASS_P (class1)) |
21353 | return ix86_cost->hard_register.fp_move; |
21354 | if (MAYBE_SSE_CLASS_P (class1)) |
21355 | { |
21356 | if (GET_MODE_BITSIZE (mode) <= 128) |
21357 | return ix86_cost->hard_register.xmm_move; |
21358 | if (GET_MODE_BITSIZE (mode) <= 256) |
21359 | return ix86_cost->hard_register.ymm_move; |
21360 | return ix86_cost->hard_register.zmm_move; |
21361 | } |
21362 | if (MAYBE_MMX_CLASS_P (class1)) |
21363 | return ix86_cost->hard_register.mmx_move; |
21364 | return 2; |
21365 | } |
21366 | |
21367 | /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in |
21368 | words of a value of mode MODE but can be less for certain modes in |
21369 | special long registers. |
21370 | |
21371 | Actually there are no two word move instructions for consecutive |
21372 | registers. And only registers 0-3 may have mov byte instructions |
21373 | applied to them. */ |
21374 | |
21375 | static unsigned int |
21376 | ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) |
21377 | { |
21378 | if (GENERAL_REGNO_P (regno)) |
21379 | { |
21380 | if (mode == XFmode) |
21381 | return TARGET_64BIT ? 2 : 3; |
21382 | if (mode == XCmode) |
21383 | return TARGET_64BIT ? 4 : 6; |
21384 | return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
21385 | } |
21386 | if (COMPLEX_MODE_P (mode)) |
21387 | return 2; |
21388 | /* Register pair for mask registers. */ |
21389 | if (mode == P2QImode || mode == P2HImode) |
21390 | return 2; |
21391 | if (mode == V64SFmode || mode == V64SImode) |
21392 | return 4; |
21393 | return 1; |
21394 | } |
21395 | |
21396 | /* Implement REGMODE_NATURAL_SIZE(MODE). */ |
21397 | unsigned int |
21398 | ix86_regmode_natural_size (machine_mode mode) |
21399 | { |
21400 | if (mode == P2HImode || mode == P2QImode) |
21401 | return GET_MODE_SIZE (mode) / 2; |
21402 | return UNITS_PER_WORD; |
21403 | } |
21404 | |
21405 | /* Implement TARGET_HARD_REGNO_MODE_OK. */ |
21406 | |
21407 | static bool |
21408 | ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) |
21409 | { |
21410 | /* Flags and only flags can only hold CCmode values. */ |
21411 | if (CC_REGNO_P (regno)) |
21412 | return GET_MODE_CLASS (mode) == MODE_CC; |
21413 | if (GET_MODE_CLASS (mode) == MODE_CC |
21414 | || GET_MODE_CLASS (mode) == MODE_RANDOM) |
21415 | return false; |
21416 | if (STACK_REGNO_P (regno)) |
21417 | return VALID_FP_MODE_P (mode); |
21418 | if (MASK_REGNO_P (regno)) |
21419 | { |
21420 | /* Register pair only starts at even register number. */ |
21421 | if ((mode == P2QImode || mode == P2HImode)) |
21422 | return MASK_PAIR_REGNO_P(regno); |
21423 | |
21424 | return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode)) |
21425 | || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode))); |
21426 | } |
21427 | |
21428 | if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) |
21429 | return false; |
21430 | |
21431 | if (SSE_REGNO_P (regno)) |
21432 | { |
21433 | /* We implement the move patterns for all vector modes into and |
21434 | out of SSE registers, even when no operation instructions |
21435 | are available. */ |
21436 | |
21437 | /* For AVX-512 we allow, regardless of regno: |
21438 | - XI mode |
21439 | - any of 512-bit wide vector mode |
21440 | - any scalar mode. */ |
21441 | if (TARGET_AVX512F |
21442 | && ((VALID_AVX512F_REG_OR_XI_MODE (mode)) |
21443 | || VALID_AVX512F_SCALAR_MODE (mode))) |
21444 | return true; |
21445 | |
21446 | /* TODO check for QI/HI scalars. */ |
21447 | /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ |
21448 | if (TARGET_AVX512VL |
21449 | && (VALID_AVX256_REG_OR_OI_MODE (mode) |
21450 | || VALID_AVX512VL_128_REG_MODE (mode))) |
21451 | return true; |
21452 | |
21453 | /* xmm16-xmm31 are only available for AVX-512. */ |
21454 | if (EXT_REX_SSE_REGNO_P (regno)) |
21455 | return false; |
21456 | |
21457 | /* OImode and AVX modes are available only when AVX is enabled. */ |
21458 | return ((TARGET_AVX |
21459 | && VALID_AVX256_REG_OR_OI_MODE (mode)) |
21460 | || VALID_SSE_REG_MODE (mode) |
21461 | || VALID_SSE2_REG_MODE (mode) |
21462 | || VALID_MMX_REG_MODE (mode) |
21463 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
21464 | } |
21465 | if (MMX_REGNO_P (regno)) |
21466 | { |
21467 | /* We implement the move patterns for 3DNOW modes even in MMX mode, |
21468 | so if the register is available at all, then we can move data of |
21469 | the given mode into or out of it. */ |
21470 | return (VALID_MMX_REG_MODE (mode) |
21471 | || VALID_MMX_REG_MODE_3DNOW (mode)); |
21472 | } |
21473 | |
21474 | if (mode == QImode) |
21475 | { |
21476 | /* Take care for QImode values - they can be in non-QI regs, |
21477 | but then they do cause partial register stalls. */ |
21478 | if (ANY_QI_REGNO_P (regno)) |
21479 | return true; |
21480 | if (!TARGET_PARTIAL_REG_STALL) |
21481 | return true; |
21482 | /* LRA checks if the hard register is OK for the given mode. |
21483 | QImode values can live in non-QI regs, so we allow all |
21484 | registers here. */ |
21485 | if (lra_in_progress) |
21486 | return true; |
21487 | return !can_create_pseudo_p (); |
21488 | } |
21489 | /* We handle both integer and floats in the general purpose registers. */ |
21490 | else if (VALID_INT_MODE_P (mode) |
21491 | || VALID_FP_MODE_P (mode)) |
21492 | return true; |
21493 | /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go |
21494 | on to use that value in smaller contexts, this can easily force a |
21495 | pseudo to be allocated to GENERAL_REGS. Since this is no worse than |
21496 | supporting DImode, allow it. */ |
21497 | else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) |
21498 | return true; |
21499 | |
21500 | return false; |
21501 | } |
21502 | |
21503 | /* Implement TARGET_INSN_CALLEE_ABI. */ |
21504 | |
21505 | const predefined_function_abi & |
21506 | ix86_insn_callee_abi (const rtx_insn *insn) |
21507 | { |
21508 | unsigned int abi_id = 0; |
21509 | rtx pat = PATTERN (insn); |
21510 | if (vzeroupper_pattern (pat, VOIDmode)) |
21511 | abi_id = ABI_VZEROUPPER; |
21512 | |
21513 | return function_abis[abi_id]; |
21514 | } |
21515 | |
21516 | /* Initialize function_abis with corresponding abi_id, |
21517 | currently only handle vzeroupper. */ |
21518 | void |
21519 | ix86_initialize_callee_abi (unsigned int abi_id) |
21520 | { |
21521 | gcc_assert (abi_id == ABI_VZEROUPPER); |
21522 | predefined_function_abi &vzeroupper_abi = function_abis[abi_id]; |
21523 | if (!vzeroupper_abi.initialized_p ()) |
21524 | { |
21525 | HARD_REG_SET full_reg_clobbers; |
21526 | CLEAR_HARD_REG_SET (set&: full_reg_clobbers); |
21527 | vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers); |
21528 | } |
21529 | } |
21530 | |
21531 | void |
21532 | ix86_expand_avx_vzeroupper (void) |
21533 | { |
21534 | /* Initialize vzeroupper_abi here. */ |
21535 | ix86_initialize_callee_abi (ABI_VZEROUPPER); |
21536 | rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ()); |
21537 | /* Return false for non-local goto in can_nonlocal_goto. */ |
21538 | make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN); |
21539 | /* Flag used for call_insn indicates it's a fake call. */ |
21540 | RTX_FLAG (insn, used) = 1; |
21541 | } |
21542 | |
21543 | |
21544 | /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that |
21545 | saves SSE registers across calls is Win64 (thus no need to check the |
21546 | current ABI here), and with AVX enabled Win64 only guarantees that |
21547 | the low 16 bytes are saved. */ |
21548 | |
21549 | static bool |
21550 | ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno, |
21551 | machine_mode mode) |
21552 | { |
21553 | /* Special ABI for vzeroupper which only clobber higher part of sse regs. */ |
21554 | if (abi_id == ABI_VZEROUPPER) |
21555 | return (GET_MODE_SIZE (mode) > 16 |
21556 | && ((TARGET_64BIT && REX_SSE_REGNO_P (regno)) |
21557 | || LEGACY_SSE_REGNO_P (regno))); |
21558 | |
21559 | return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; |
21560 | } |
21561 | |
21562 | /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a |
21563 | tieable integer mode. */ |
21564 | |
21565 | static bool |
21566 | ix86_tieable_integer_mode_p (machine_mode mode) |
21567 | { |
21568 | switch (mode) |
21569 | { |
21570 | case E_HImode: |
21571 | case E_SImode: |
21572 | return true; |
21573 | |
21574 | case E_QImode: |
21575 | return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; |
21576 | |
21577 | case E_DImode: |
21578 | return TARGET_64BIT; |
21579 | |
21580 | default: |
21581 | return false; |
21582 | } |
21583 | } |
21584 | |
21585 | /* Implement TARGET_MODES_TIEABLE_P. |
21586 | |
21587 | Return true if MODE1 is accessible in a register that can hold MODE2 |
21588 | without copying. That is, all register classes that can hold MODE2 |
21589 | can also hold MODE1. */ |
21590 | |
21591 | static bool |
21592 | ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) |
21593 | { |
21594 | if (mode1 == mode2) |
21595 | return true; |
21596 | |
21597 | if (ix86_tieable_integer_mode_p (mode: mode1) |
21598 | && ix86_tieable_integer_mode_p (mode: mode2)) |
21599 | return true; |
21600 | |
21601 | /* MODE2 being XFmode implies fp stack or general regs, which means we |
21602 | can tie any smaller floating point modes to it. Note that we do not |
21603 | tie this with TFmode. */ |
21604 | if (mode2 == XFmode) |
21605 | return mode1 == SFmode || mode1 == DFmode; |
21606 | |
21607 | /* MODE2 being DFmode implies fp stack, general or sse regs, which means |
21608 | that we can tie it with SFmode. */ |
21609 | if (mode2 == DFmode) |
21610 | return mode1 == SFmode; |
21611 | |
21612 | /* If MODE2 is only appropriate for an SSE register, then tie with |
21613 | any vector modes or scalar floating point modes acceptable to SSE |
21614 | registers, excluding scalar integer modes with SUBREG: |
21615 | (subreg:QI (reg:TI 99) 0)) |
21616 | (subreg:HI (reg:TI 99) 0)) |
21617 | (subreg:SI (reg:TI 99) 0)) |
21618 | (subreg:DI (reg:TI 99) 0)) |
21619 | to avoid unnecessary move from SSE register to integer register. |
21620 | */ |
21621 | if (GET_MODE_SIZE (mode2) >= 16 |
21622 | && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2) |
21623 | || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1)) |
21624 | && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2))) |
21625 | && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2)) |
21626 | return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1); |
21627 | |
21628 | /* If MODE2 is appropriate for an MMX register, then tie |
21629 | with any other mode acceptable to MMX registers. */ |
21630 | if (GET_MODE_SIZE (mode2) == 8 |
21631 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2)) |
21632 | return (GET_MODE_SIZE (mode1) == 8 |
21633 | && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1)); |
21634 | |
21635 | /* SCmode and DImode can be tied. */ |
21636 | if ((mode1 == E_SCmode && mode2 == E_DImode) |
21637 | || (mode1 == E_DImode && mode2 == E_SCmode)) |
21638 | return TARGET_64BIT; |
21639 | |
21640 | /* [SD]Cmode and V2[SD]Fmode modes can be tied. */ |
21641 | if ((mode1 == E_SCmode && mode2 == E_V2SFmode) |
21642 | || (mode1 == E_V2SFmode && mode2 == E_SCmode) |
21643 | || (mode1 == E_DCmode && mode2 == E_V2DFmode) |
21644 | || (mode1 == E_V2DFmode && mode2 == E_DCmode)) |
21645 | return true; |
21646 | |
21647 | return false; |
21648 | } |
21649 | |
21650 | /* Return the cost of moving between two registers of mode MODE. */ |
21651 | |
21652 | static int |
21653 | ix86_set_reg_reg_cost (machine_mode mode) |
21654 | { |
21655 | unsigned int units = UNITS_PER_WORD; |
21656 | |
21657 | switch (GET_MODE_CLASS (mode)) |
21658 | { |
21659 | default: |
21660 | break; |
21661 | |
21662 | case MODE_CC: |
21663 | units = GET_MODE_SIZE (CCmode); |
21664 | break; |
21665 | |
21666 | case MODE_FLOAT: |
21667 | if ((TARGET_SSE && mode == TFmode) |
21668 | || (TARGET_80387 && mode == XFmode) |
21669 | || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) |
21670 | || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) |
21671 | units = GET_MODE_SIZE (mode); |
21672 | break; |
21673 | |
21674 | case MODE_COMPLEX_FLOAT: |
21675 | if ((TARGET_SSE && mode == TCmode) |
21676 | || (TARGET_80387 && mode == XCmode) |
21677 | || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) |
21678 | || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) |
21679 | units = GET_MODE_SIZE (mode); |
21680 | break; |
21681 | |
21682 | case MODE_VECTOR_INT: |
21683 | case MODE_VECTOR_FLOAT: |
21684 | if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) |
21685 | || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
21686 | || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
21687 | || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
21688 | || ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
21689 | && VALID_MMX_REG_MODE (mode))) |
21690 | units = GET_MODE_SIZE (mode); |
21691 | } |
21692 | |
21693 | /* Return the cost of moving between two registers of mode MODE, |
21694 | assuming that the move will be in pieces of at most UNITS bytes. */ |
21695 | return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); |
21696 | } |
21697 | |
21698 | /* Return cost of vector operation in MODE given that scalar version has |
21699 | COST. */ |
21700 | |
21701 | static int |
21702 | ix86_vec_cost (machine_mode mode, int cost) |
21703 | { |
21704 | if (!VECTOR_MODE_P (mode)) |
21705 | return cost; |
21706 | |
21707 | if (GET_MODE_BITSIZE (mode) == 128 |
21708 | && TARGET_SSE_SPLIT_REGS) |
21709 | return cost * GET_MODE_BITSIZE (mode) / 64; |
21710 | else if (GET_MODE_BITSIZE (mode) > 128 |
21711 | && TARGET_AVX256_SPLIT_REGS) |
21712 | return cost * GET_MODE_BITSIZE (mode) / 128; |
21713 | else if (GET_MODE_BITSIZE (mode) > 256 |
21714 | && TARGET_AVX512_SPLIT_REGS) |
21715 | return cost * GET_MODE_BITSIZE (mode) / 256; |
21716 | return cost; |
21717 | } |
21718 | |
21719 | /* Return cost of vec_widen_<s>mult_hi/lo_<mode>, |
21720 | vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */ |
21721 | static int |
21722 | ix86_widen_mult_cost (const struct processor_costs *cost, |
21723 | enum machine_mode mode, bool uns_p) |
21724 | { |
21725 | gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); |
21726 | int extra_cost = 0; |
21727 | int basic_cost = 0; |
21728 | switch (mode) |
21729 | { |
21730 | case V8HImode: |
21731 | case V16HImode: |
21732 | if (!uns_p || mode == V16HImode) |
21733 | extra_cost = cost->sse_op * 2; |
21734 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
21735 | break; |
21736 | case V4SImode: |
21737 | case V8SImode: |
21738 | /* pmulhw/pmullw can be used. */ |
21739 | basic_cost = cost->mulss * 2 + cost->sse_op * 2; |
21740 | break; |
21741 | case V2DImode: |
21742 | /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend, |
21743 | require extra 4 mul, 4 add, 4 cmp and 2 shift. */ |
21744 | if (!TARGET_SSE4_1 && !uns_p) |
21745 | extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4 |
21746 | + cost->sse_op * 2; |
21747 | /* Fallthru. */ |
21748 | case V4DImode: |
21749 | basic_cost = cost->mulss * 2 + cost->sse_op * 4; |
21750 | break; |
21751 | default: |
21752 | /* Not implemented. */ |
21753 | return 100; |
21754 | } |
21755 | return ix86_vec_cost (mode, cost: basic_cost + extra_cost); |
21756 | } |
21757 | |
21758 | /* Return cost of multiplication in MODE. */ |
21759 | |
21760 | static int |
21761 | ix86_multiplication_cost (const struct processor_costs *cost, |
21762 | enum machine_mode mode) |
21763 | { |
21764 | machine_mode inner_mode = mode; |
21765 | if (VECTOR_MODE_P (mode)) |
21766 | inner_mode = GET_MODE_INNER (mode); |
21767 | |
21768 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
21769 | return inner_mode == DFmode ? cost->mulsd : cost->mulss; |
21770 | else if (X87_FLOAT_MODE_P (mode)) |
21771 | return cost->fmul; |
21772 | else if (FLOAT_MODE_P (mode)) |
21773 | return ix86_vec_cost (mode, |
21774 | cost: inner_mode == DFmode ? cost->mulsd : cost->mulss); |
21775 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21776 | { |
21777 | int nmults, nops; |
21778 | /* Cost of reading the memory. */ |
21779 | int extra; |
21780 | |
21781 | switch (mode) |
21782 | { |
21783 | case V4QImode: |
21784 | case V8QImode: |
21785 | /* Partial V*QImode is emulated with 4-6 insns. */ |
21786 | nmults = 1; |
21787 | nops = 3; |
21788 | extra = 0; |
21789 | |
21790 | if (TARGET_AVX512BW && TARGET_AVX512VL) |
21791 | ; |
21792 | else if (TARGET_AVX2) |
21793 | nops += 2; |
21794 | else if (TARGET_XOP) |
21795 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
21796 | else |
21797 | { |
21798 | nops += 1; |
21799 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
21800 | } |
21801 | goto do_qimode; |
21802 | |
21803 | case V16QImode: |
21804 | /* V*QImode is emulated with 4-11 insns. */ |
21805 | nmults = 1; |
21806 | nops = 3; |
21807 | extra = 0; |
21808 | |
21809 | if (TARGET_AVX2 && !TARGET_PREFER_AVX128) |
21810 | { |
21811 | if (!(TARGET_AVX512BW && TARGET_AVX512VL)) |
21812 | nops += 3; |
21813 | } |
21814 | else if (TARGET_XOP) |
21815 | { |
21816 | nmults += 1; |
21817 | nops += 2; |
21818 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
21819 | } |
21820 | else |
21821 | { |
21822 | nmults += 1; |
21823 | nops += 4; |
21824 | extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; |
21825 | } |
21826 | goto do_qimode; |
21827 | |
21828 | case V32QImode: |
21829 | nmults = 1; |
21830 | nops = 3; |
21831 | extra = 0; |
21832 | |
21833 | if (!TARGET_AVX512BW || TARGET_PREFER_AVX256) |
21834 | { |
21835 | nmults += 1; |
21836 | nops += 4; |
21837 | /* 2 loads, so no division by 2. */ |
21838 | extra += COSTS_N_INSNS (cost->sse_load[3]); |
21839 | } |
21840 | goto do_qimode; |
21841 | |
21842 | case V64QImode: |
21843 | nmults = 2; |
21844 | nops = 9; |
21845 | /* 2 loads of each size, so no division by 2. */ |
21846 | extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]); |
21847 | |
21848 | do_qimode: |
21849 | return ix86_vec_cost (mode, cost: cost->mulss * nmults |
21850 | + cost->sse_op * nops) + extra; |
21851 | |
21852 | case V4SImode: |
21853 | /* pmulld is used in this case. No emulation is needed. */ |
21854 | if (TARGET_SSE4_1) |
21855 | goto do_native; |
21856 | /* V4SImode is emulated with 7 insns. */ |
21857 | else |
21858 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5); |
21859 | |
21860 | case V2DImode: |
21861 | case V4DImode: |
21862 | /* vpmullq is used in this case. No emulation is needed. */ |
21863 | if (TARGET_AVX512DQ && TARGET_AVX512VL) |
21864 | goto do_native; |
21865 | /* V*DImode is emulated with 6-8 insns. */ |
21866 | else if (TARGET_XOP && mode == V2DImode) |
21867 | return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4); |
21868 | /* FALLTHRU */ |
21869 | case V8DImode: |
21870 | /* vpmullq is used in this case. No emulation is needed. */ |
21871 | if (TARGET_AVX512DQ && mode == V8DImode) |
21872 | goto do_native; |
21873 | else |
21874 | return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5); |
21875 | |
21876 | default: |
21877 | do_native: |
21878 | return ix86_vec_cost (mode, cost: cost->mulss); |
21879 | } |
21880 | } |
21881 | else |
21882 | return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); |
21883 | } |
21884 | |
21885 | /* Return cost of multiplication in MODE. */ |
21886 | |
21887 | static int |
21888 | ix86_division_cost (const struct processor_costs *cost, |
21889 | enum machine_mode mode) |
21890 | { |
21891 | machine_mode inner_mode = mode; |
21892 | if (VECTOR_MODE_P (mode)) |
21893 | inner_mode = GET_MODE_INNER (mode); |
21894 | |
21895 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
21896 | return inner_mode == DFmode ? cost->divsd : cost->divss; |
21897 | else if (X87_FLOAT_MODE_P (mode)) |
21898 | return cost->fdiv; |
21899 | else if (FLOAT_MODE_P (mode)) |
21900 | return ix86_vec_cost (mode, |
21901 | cost: inner_mode == DFmode ? cost->divsd : cost->divss); |
21902 | else |
21903 | return cost->divide[MODE_INDEX (mode)]; |
21904 | } |
21905 | |
21906 | /* Return cost of shift in MODE. |
21907 | If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. |
21908 | AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE |
21909 | if op1 is a result of subreg. |
21910 | |
21911 | SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ |
21912 | |
21913 | static int |
21914 | ix86_shift_rotate_cost (const struct processor_costs *cost, |
21915 | enum rtx_code code, |
21916 | enum machine_mode mode, bool constant_op1, |
21917 | HOST_WIDE_INT op1_val, |
21918 | bool and_in_op1, |
21919 | bool shift_and_truncate, |
21920 | bool *skip_op0, bool *skip_op1) |
21921 | { |
21922 | if (skip_op0) |
21923 | *skip_op0 = *skip_op1 = false; |
21924 | |
21925 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
21926 | { |
21927 | int count; |
21928 | /* Cost of reading the memory. */ |
21929 | int extra; |
21930 | |
21931 | switch (mode) |
21932 | { |
21933 | case V4QImode: |
21934 | case V8QImode: |
21935 | if (TARGET_AVX2) |
21936 | /* Use vpbroadcast. */ |
21937 | extra = cost->sse_op; |
21938 | else |
21939 | extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; |
21940 | |
21941 | if (constant_op1) |
21942 | { |
21943 | if (code == ASHIFTRT) |
21944 | { |
21945 | count = 4; |
21946 | extra *= 2; |
21947 | } |
21948 | else |
21949 | count = 2; |
21950 | } |
21951 | else if (TARGET_AVX512BW && TARGET_AVX512VL) |
21952 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
21953 | else if (TARGET_SSE4_1) |
21954 | count = 5; |
21955 | else if (code == ASHIFTRT) |
21956 | count = 6; |
21957 | else |
21958 | count = 5; |
21959 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
21960 | |
21961 | case V16QImode: |
21962 | if (TARGET_XOP) |
21963 | { |
21964 | /* For XOP we use vpshab, which requires a broadcast of the |
21965 | value to the variable shift insn. For constants this |
21966 | means a V16Q const in mem; even when we can perform the |
21967 | shift with one insn set the cost to prefer paddb. */ |
21968 | if (constant_op1) |
21969 | { |
21970 | extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; |
21971 | return ix86_vec_cost (mode, cost: cost->sse_op) + extra; |
21972 | } |
21973 | else |
21974 | { |
21975 | count = (code == ASHIFT) ? 3 : 4; |
21976 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
21977 | } |
21978 | } |
21979 | /* FALLTHRU */ |
21980 | case V32QImode: |
21981 | if (TARGET_AVX2) |
21982 | /* Use vpbroadcast. */ |
21983 | extra = cost->sse_op; |
21984 | else |
21985 | extra = COSTS_N_INSNS (mode == V16QImode |
21986 | ? cost->sse_load[2] |
21987 | : cost->sse_load[3]) / 2; |
21988 | |
21989 | if (constant_op1) |
21990 | { |
21991 | if (code == ASHIFTRT) |
21992 | { |
21993 | count = 4; |
21994 | extra *= 2; |
21995 | } |
21996 | else |
21997 | count = 2; |
21998 | } |
21999 | else if (TARGET_AVX512BW |
22000 | && ((mode == V32QImode && !TARGET_PREFER_AVX256) |
22001 | || (mode == V16QImode && TARGET_AVX512VL |
22002 | && !TARGET_PREFER_AVX128))) |
22003 | return ix86_vec_cost (mode, cost: cost->sse_op * 4); |
22004 | else if (TARGET_AVX2 |
22005 | && mode == V16QImode && !TARGET_PREFER_AVX128) |
22006 | count = 6; |
22007 | else if (TARGET_SSE4_1) |
22008 | count = 9; |
22009 | else if (code == ASHIFTRT) |
22010 | count = 10; |
22011 | else |
22012 | count = 9; |
22013 | return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra; |
22014 | |
22015 | case V2DImode: |
22016 | case V4DImode: |
22017 | /* V*DImode arithmetic right shift is emulated. */ |
22018 | if (code == ASHIFTRT && !TARGET_AVX512VL) |
22019 | { |
22020 | if (constant_op1) |
22021 | { |
22022 | if (op1_val == 63) |
22023 | count = TARGET_SSE4_2 ? 1 : 2; |
22024 | else if (TARGET_XOP) |
22025 | count = 2; |
22026 | else if (TARGET_SSE4_1) |
22027 | count = 3; |
22028 | else |
22029 | count = 4; |
22030 | } |
22031 | else if (TARGET_XOP) |
22032 | count = 3; |
22033 | else if (TARGET_SSE4_2) |
22034 | count = 4; |
22035 | else |
22036 | count = 5; |
22037 | |
22038 | return ix86_vec_cost (mode, cost: cost->sse_op * count); |
22039 | } |
22040 | /* FALLTHRU */ |
22041 | default: |
22042 | return ix86_vec_cost (mode, cost: cost->sse_op); |
22043 | } |
22044 | } |
22045 | |
22046 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22047 | { |
22048 | if (constant_op1) |
22049 | { |
22050 | if (op1_val > 32) |
22051 | return cost->shift_const + COSTS_N_INSNS (2); |
22052 | else |
22053 | return cost->shift_const * 2; |
22054 | } |
22055 | else |
22056 | { |
22057 | if (and_in_op1) |
22058 | return cost->shift_var * 2; |
22059 | else |
22060 | return cost->shift_var * 6 + COSTS_N_INSNS (2); |
22061 | } |
22062 | } |
22063 | else |
22064 | { |
22065 | if (constant_op1) |
22066 | return cost->shift_const; |
22067 | else if (shift_and_truncate) |
22068 | { |
22069 | if (skip_op0) |
22070 | *skip_op0 = *skip_op1 = true; |
22071 | /* Return the cost after shift-and truncation. */ |
22072 | return cost->shift_var; |
22073 | } |
22074 | else |
22075 | return cost->shift_var; |
22076 | } |
22077 | } |
22078 | |
22079 | static int |
22080 | ix86_insn_cost (rtx_insn *insn, bool speed) |
22081 | { |
22082 | int insn_cost = 0; |
22083 | /* Add extra cost to avoid post_reload late_combine revert |
22084 | the optimization did in pass_rpad. */ |
22085 | if (reload_completed |
22086 | && ix86_rpad_gate () |
22087 | && recog_memoized (insn) >= 0 |
22088 | && get_attr_avx_partial_xmm_update (insn) |
22089 | == AVX_PARTIAL_XMM_UPDATE_TRUE) |
22090 | insn_cost += COSTS_N_INSNS (3); |
22091 | |
22092 | return insn_cost + pattern_cost (PATTERN (insn), speed); |
22093 | } |
22094 | |
22095 | /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ |
22096 | |
22097 | static int |
22098 | vec_fp_conversion_cost (const struct processor_costs *cost, int size) |
22099 | { |
22100 | if (size < 128) |
22101 | return cost->cvtss2sd; |
22102 | else if (size < 256) |
22103 | { |
22104 | if (TARGET_SSE_SPLIT_REGS) |
22105 | return cost->cvtss2sd * size / 64; |
22106 | return cost->cvtss2sd; |
22107 | } |
22108 | if (size < 512) |
22109 | return cost->vcvtps2pd256; |
22110 | else |
22111 | return cost->vcvtps2pd512; |
22112 | } |
22113 | |
22114 | /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */ |
22115 | |
22116 | static bool |
22117 | unspec_pcmp_p (rtx x) |
22118 | { |
22119 | return GET_CODE (x) == UNSPEC |
22120 | && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP); |
22121 | } |
22122 | |
22123 | /* Compute a (partial) cost for rtx X. Return true if the complete |
22124 | cost has been computed, and false if subexpressions should be |
22125 | scanned. In either case, *TOTAL contains the cost result. */ |
22126 | |
22127 | static bool |
22128 | ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, |
22129 | int *total, bool speed) |
22130 | { |
22131 | rtx mask; |
22132 | enum rtx_code code = GET_CODE (x); |
22133 | enum rtx_code outer_code = (enum rtx_code) outer_code_i; |
22134 | const struct processor_costs *cost |
22135 | = speed ? ix86_tune_cost : &ix86_size_cost; |
22136 | int src_cost; |
22137 | |
22138 | /* Handling different vternlog variants. */ |
22139 | if ((GET_MODE_SIZE (mode) == 64 |
22140 | ? TARGET_AVX512F |
22141 | : (TARGET_AVX512VL |
22142 | || (TARGET_AVX512F && !TARGET_PREFER_AVX256))) |
22143 | && GET_MODE_SIZE (mode) >= 16 |
22144 | && outer_code_i == SET |
22145 | && ternlog_operand (x, mode)) |
22146 | { |
22147 | rtx args[3]; |
22148 | |
22149 | args[0] = NULL_RTX; |
22150 | args[1] = NULL_RTX; |
22151 | args[2] = NULL_RTX; |
22152 | int idx = ix86_ternlog_idx (op: x, args); |
22153 | gcc_assert (idx >= 0); |
22154 | |
22155 | *total = cost->sse_op; |
22156 | for (int i = 0; i != 3; i++) |
22157 | if (args[i]) |
22158 | *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed); |
22159 | return true; |
22160 | } |
22161 | |
22162 | |
22163 | switch (code) |
22164 | { |
22165 | case SET: |
22166 | if (register_operand (SET_DEST (x), VOIDmode) |
22167 | && register_operand (SET_SRC (x), VOIDmode)) |
22168 | { |
22169 | *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); |
22170 | return true; |
22171 | } |
22172 | |
22173 | if (register_operand (SET_SRC (x), VOIDmode)) |
22174 | /* Avoid potentially incorrect high cost from rtx_costs |
22175 | for non-tieable SUBREGs. */ |
22176 | src_cost = 0; |
22177 | else |
22178 | { |
22179 | src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); |
22180 | |
22181 | if (CONSTANT_P (SET_SRC (x))) |
22182 | /* Constant costs assume a base value of COSTS_N_INSNS (1) and add |
22183 | a small value, possibly zero for cheap constants. */ |
22184 | src_cost += COSTS_N_INSNS (1); |
22185 | } |
22186 | |
22187 | *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); |
22188 | return true; |
22189 | |
22190 | case CONST_INT: |
22191 | case CONST: |
22192 | case LABEL_REF: |
22193 | case SYMBOL_REF: |
22194 | if (x86_64_immediate_operand (x, VOIDmode)) |
22195 | *total = 0; |
22196 | else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode)) |
22197 | /* Consider the zext constants slightly more expensive, as they |
22198 | can't appear in most instructions. */ |
22199 | *total = 1; |
22200 | else |
22201 | /* movabsq is slightly more expensive than a simple instruction. */ |
22202 | *total = COSTS_N_INSNS (1) + 1; |
22203 | return true; |
22204 | |
22205 | case CONST_DOUBLE: |
22206 | if (IS_STACK_MODE (mode)) |
22207 | switch (standard_80387_constant_p (x)) |
22208 | { |
22209 | case -1: |
22210 | case 0: |
22211 | break; |
22212 | case 1: /* 0.0 */ |
22213 | *total = 1; |
22214 | return true; |
22215 | default: /* Other constants */ |
22216 | *total = 2; |
22217 | return true; |
22218 | } |
22219 | /* FALLTHRU */ |
22220 | |
22221 | case CONST_VECTOR: |
22222 | switch (standard_sse_constant_p (x, pred_mode: mode)) |
22223 | { |
22224 | case 0: |
22225 | break; |
22226 | case 1: /* 0: xor eliminates false dependency */ |
22227 | *total = 0; |
22228 | return true; |
22229 | default: /* -1: cmp contains false dependency */ |
22230 | *total = 1; |
22231 | return true; |
22232 | } |
22233 | /* FALLTHRU */ |
22234 | |
22235 | case CONST_WIDE_INT: |
22236 | /* Fall back to (MEM (SYMBOL_REF)), since that's where |
22237 | it'll probably end up. Add a penalty for size. */ |
22238 | *total = (COSTS_N_INSNS (1) |
22239 | + (!TARGET_64BIT && flag_pic) |
22240 | + (GET_MODE_SIZE (mode) <= 4 |
22241 | ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); |
22242 | return true; |
22243 | |
22244 | case ZERO_EXTEND: |
22245 | /* The zero extensions is often completely free on x86_64, so make |
22246 | it as cheap as possible. */ |
22247 | if (TARGET_64BIT && mode == DImode |
22248 | && GET_MODE (XEXP (x, 0)) == SImode) |
22249 | *total = 1; |
22250 | else if (TARGET_ZERO_EXTEND_WITH_AND) |
22251 | *total = cost->add; |
22252 | else |
22253 | *total = cost->movzx; |
22254 | return false; |
22255 | |
22256 | case SIGN_EXTEND: |
22257 | *total = cost->movsx; |
22258 | return false; |
22259 | |
22260 | case ASHIFT: |
22261 | if (SCALAR_INT_MODE_P (mode) |
22262 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD |
22263 | && CONST_INT_P (XEXP (x, 1))) |
22264 | { |
22265 | HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
22266 | if (value == 1) |
22267 | { |
22268 | *total = cost->add; |
22269 | return false; |
22270 | } |
22271 | if ((value == 2 || value == 3) |
22272 | && cost->lea <= cost->shift_const) |
22273 | { |
22274 | *total = cost->lea; |
22275 | return false; |
22276 | } |
22277 | } |
22278 | /* FALLTHRU */ |
22279 | |
22280 | case ROTATE: |
22281 | case ASHIFTRT: |
22282 | case LSHIFTRT: |
22283 | case ROTATERT: |
22284 | bool skip_op0, skip_op1; |
22285 | *total = ix86_shift_rotate_cost (cost, code, mode, |
22286 | CONSTANT_P (XEXP (x, 1)), |
22287 | CONST_INT_P (XEXP (x, 1)) |
22288 | ? INTVAL (XEXP (x, 1)) : -1, |
22289 | GET_CODE (XEXP (x, 1)) == AND, |
22290 | SUBREG_P (XEXP (x, 1)) |
22291 | && GET_CODE (XEXP (XEXP (x, 1), |
22292 | 0)) == AND, |
22293 | skip_op0: &skip_op0, skip_op1: &skip_op1); |
22294 | if (skip_op0 || skip_op1) |
22295 | { |
22296 | if (!skip_op0) |
22297 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
22298 | if (!skip_op1) |
22299 | *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); |
22300 | return true; |
22301 | } |
22302 | return false; |
22303 | |
22304 | case FMA: |
22305 | { |
22306 | rtx sub; |
22307 | |
22308 | gcc_assert (FLOAT_MODE_P (mode)); |
22309 | gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); |
22310 | |
22311 | *total = ix86_vec_cost (mode, |
22312 | GET_MODE_INNER (mode) == SFmode |
22313 | ? cost->fmass : cost->fmasd); |
22314 | *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); |
22315 | |
22316 | /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ |
22317 | sub = XEXP (x, 0); |
22318 | if (GET_CODE (sub) == NEG) |
22319 | sub = XEXP (sub, 0); |
22320 | *total += rtx_cost (sub, mode, FMA, 0, speed); |
22321 | |
22322 | sub = XEXP (x, 2); |
22323 | if (GET_CODE (sub) == NEG) |
22324 | sub = XEXP (sub, 0); |
22325 | *total += rtx_cost (sub, mode, FMA, 2, speed); |
22326 | return true; |
22327 | } |
22328 | |
22329 | case MULT: |
22330 | if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) |
22331 | { |
22332 | rtx op0 = XEXP (x, 0); |
22333 | rtx op1 = XEXP (x, 1); |
22334 | int nbits; |
22335 | if (CONST_INT_P (XEXP (x, 1))) |
22336 | { |
22337 | unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
22338 | for (nbits = 0; value != 0; value &= value - 1) |
22339 | nbits++; |
22340 | } |
22341 | else |
22342 | /* This is arbitrary. */ |
22343 | nbits = 7; |
22344 | |
22345 | /* Compute costs correctly for widening multiplication. */ |
22346 | if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) |
22347 | && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 |
22348 | == GET_MODE_SIZE (mode)) |
22349 | { |
22350 | int is_mulwiden = 0; |
22351 | machine_mode inner_mode = GET_MODE (op0); |
22352 | |
22353 | if (GET_CODE (op0) == GET_CODE (op1)) |
22354 | is_mulwiden = 1, op1 = XEXP (op1, 0); |
22355 | else if (CONST_INT_P (op1)) |
22356 | { |
22357 | if (GET_CODE (op0) == SIGN_EXTEND) |
22358 | is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) |
22359 | == INTVAL (op1); |
22360 | else |
22361 | is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); |
22362 | } |
22363 | |
22364 | if (is_mulwiden) |
22365 | op0 = XEXP (op0, 0), mode = GET_MODE (op0); |
22366 | } |
22367 | |
22368 | int mult_init; |
22369 | // Double word multiplication requires 3 mults and 2 adds. |
22370 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22371 | { |
22372 | mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)] |
22373 | + 2 * cost->add; |
22374 | nbits *= 3; |
22375 | } |
22376 | else mult_init = cost->mult_init[MODE_INDEX (mode)]; |
22377 | |
22378 | *total = (mult_init |
22379 | + nbits * cost->mult_bit |
22380 | + rtx_cost (op0, mode, outer_code, opno, speed) |
22381 | + rtx_cost (op1, mode, outer_code, opno, speed)); |
22382 | |
22383 | return true; |
22384 | } |
22385 | *total = ix86_multiplication_cost (cost, mode); |
22386 | return false; |
22387 | |
22388 | case DIV: |
22389 | case UDIV: |
22390 | case MOD: |
22391 | case UMOD: |
22392 | *total = ix86_division_cost (cost, mode); |
22393 | return false; |
22394 | |
22395 | case PLUS: |
22396 | if (GET_MODE_CLASS (mode) == MODE_INT |
22397 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
22398 | { |
22399 | if (GET_CODE (XEXP (x, 0)) == PLUS |
22400 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
22401 | && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) |
22402 | && CONSTANT_P (XEXP (x, 1))) |
22403 | { |
22404 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); |
22405 | if (val == 2 || val == 4 || val == 8) |
22406 | { |
22407 | *total = cost->lea; |
22408 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
22409 | outer_code, opno, speed); |
22410 | *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, |
22411 | outer_code, opno, speed); |
22412 | *total += rtx_cost (XEXP (x, 1), mode, |
22413 | outer_code, opno, speed); |
22414 | return true; |
22415 | } |
22416 | } |
22417 | else if (GET_CODE (XEXP (x, 0)) == MULT |
22418 | && CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
22419 | { |
22420 | HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); |
22421 | if (val == 2 || val == 4 || val == 8) |
22422 | { |
22423 | *total = cost->lea; |
22424 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22425 | outer_code, opno, speed); |
22426 | *total += rtx_cost (XEXP (x, 1), mode, |
22427 | outer_code, opno, speed); |
22428 | return true; |
22429 | } |
22430 | } |
22431 | else if (GET_CODE (XEXP (x, 0)) == PLUS) |
22432 | { |
22433 | rtx op = XEXP (XEXP (x, 0), 0); |
22434 | |
22435 | /* Add with carry, ignore the cost of adding a carry flag. */ |
22436 | if (ix86_carry_flag_operator (op, mode) |
22437 | || ix86_carry_flag_unset_operator (op, mode)) |
22438 | *total = cost->add; |
22439 | else |
22440 | { |
22441 | *total = cost->lea; |
22442 | *total += rtx_cost (op, mode, |
22443 | outer_code, opno, speed); |
22444 | } |
22445 | |
22446 | *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
22447 | outer_code, opno, speed); |
22448 | *total += rtx_cost (XEXP (x, 1), mode, |
22449 | outer_code, opno, speed); |
22450 | return true; |
22451 | } |
22452 | } |
22453 | /* FALLTHRU */ |
22454 | |
22455 | case MINUS: |
22456 | /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ |
22457 | if (GET_MODE_CLASS (mode) == MODE_INT |
22458 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD |
22459 | && GET_CODE (XEXP (x, 0)) == MINUS |
22460 | && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode) |
22461 | || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode))) |
22462 | { |
22463 | *total = cost->add; |
22464 | *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22465 | outer_code, opno, speed); |
22466 | *total += rtx_cost (XEXP (x, 1), mode, |
22467 | outer_code, opno, speed); |
22468 | return true; |
22469 | } |
22470 | |
22471 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22472 | *total = cost->addss; |
22473 | else if (X87_FLOAT_MODE_P (mode)) |
22474 | *total = cost->fadd; |
22475 | else if (FLOAT_MODE_P (mode)) |
22476 | *total = ix86_vec_cost (mode, cost: cost->addss); |
22477 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
22478 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22479 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22480 | *total = cost->add * 2; |
22481 | else |
22482 | *total = cost->add; |
22483 | return false; |
22484 | |
22485 | case IOR: |
22486 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
22487 | || SSE_FLOAT_MODE_P (mode)) |
22488 | { |
22489 | /* (ior (not ...) ...) can be a single insn in AVX512. */ |
22490 | if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F |
22491 | && (GET_MODE_SIZE (mode) == 64 |
22492 | || (TARGET_AVX512VL |
22493 | && (GET_MODE_SIZE (mode) == 32 |
22494 | || GET_MODE_SIZE (mode) == 16)))) |
22495 | { |
22496 | rtx right = GET_CODE (XEXP (x, 1)) != NOT |
22497 | ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0); |
22498 | |
22499 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
22500 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22501 | outer_code, opno, speed) |
22502 | + rtx_cost (right, mode, outer_code, opno, speed); |
22503 | return true; |
22504 | } |
22505 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22506 | } |
22507 | else if (TARGET_64BIT |
22508 | && mode == TImode |
22509 | && GET_CODE (XEXP (x, 0)) == ASHIFT |
22510 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND |
22511 | && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode |
22512 | && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
22513 | && INTVAL (XEXP (XEXP (x, 0), 1)) == 64 |
22514 | && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND |
22515 | && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode) |
22516 | { |
22517 | /* *concatditi3 is cheap. */ |
22518 | rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0); |
22519 | rtx op1 = XEXP (XEXP (x, 1), 0); |
22520 | *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode) |
22521 | ? COSTS_N_INSNS (1) /* movq. */ |
22522 | : set_src_cost (x: op0, DImode, speed_p: speed); |
22523 | *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode) |
22524 | ? COSTS_N_INSNS (1) /* movq. */ |
22525 | : set_src_cost (x: op1, DImode, speed_p: speed); |
22526 | return true; |
22527 | } |
22528 | else if (TARGET_64BIT |
22529 | && mode == TImode |
22530 | && GET_CODE (XEXP (x, 0)) == AND |
22531 | && REG_P (XEXP (XEXP (x, 0), 0)) |
22532 | && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1)) |
22533 | && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2 |
22534 | && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1 |
22535 | && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0 |
22536 | && GET_CODE (XEXP (x, 1)) == ASHIFT |
22537 | && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND |
22538 | && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode |
22539 | && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
22540 | && INTVAL (XEXP (XEXP (x, 1), 1)) == 64) |
22541 | { |
22542 | /* *insvti_highpart is cheap. */ |
22543 | rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0); |
22544 | *total = COSTS_N_INSNS (1) + 1; |
22545 | *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode) |
22546 | ? COSTS_N_INSNS (1) /* movq. */ |
22547 | : set_src_cost (x: op, DImode, speed_p: speed); |
22548 | return true; |
22549 | } |
22550 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22551 | *total = cost->add * 2; |
22552 | else |
22553 | *total = cost->add; |
22554 | return false; |
22555 | |
22556 | case XOR: |
22557 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
22558 | || SSE_FLOAT_MODE_P (mode)) |
22559 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22560 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22561 | *total = cost->add * 2; |
22562 | else |
22563 | *total = cost->add; |
22564 | return false; |
22565 | |
22566 | case AND: |
22567 | if (address_no_seg_operand (x, mode)) |
22568 | { |
22569 | *total = cost->lea; |
22570 | return true; |
22571 | } |
22572 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
22573 | || SSE_FLOAT_MODE_P (mode)) |
22574 | { |
22575 | /* pandn is a single instruction. */ |
22576 | if (GET_CODE (XEXP (x, 0)) == NOT) |
22577 | { |
22578 | rtx right = XEXP (x, 1); |
22579 | |
22580 | /* (and (not ...) (not ...)) can be a single insn in AVX512. */ |
22581 | if (GET_CODE (right) == NOT && TARGET_AVX512F |
22582 | && (GET_MODE_SIZE (mode) == 64 |
22583 | || (TARGET_AVX512VL |
22584 | && (GET_MODE_SIZE (mode) == 32 |
22585 | || GET_MODE_SIZE (mode) == 16)))) |
22586 | right = XEXP (right, 0); |
22587 | |
22588 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
22589 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22590 | outer_code, opno, speed) |
22591 | + rtx_cost (right, mode, outer_code, opno, speed); |
22592 | return true; |
22593 | } |
22594 | else if (GET_CODE (XEXP (x, 1)) == NOT) |
22595 | { |
22596 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
22597 | + rtx_cost (XEXP (x, 0), mode, |
22598 | outer_code, opno, speed) |
22599 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
22600 | outer_code, opno, speed); |
22601 | return true; |
22602 | } |
22603 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22604 | } |
22605 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22606 | { |
22607 | if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
22608 | { |
22609 | *total = cost->add * 2 |
22610 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22611 | outer_code, opno, speed) |
22612 | + rtx_cost (XEXP (x, 1), mode, |
22613 | outer_code, opno, speed); |
22614 | return true; |
22615 | } |
22616 | else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT) |
22617 | { |
22618 | *total = cost->add * 2 |
22619 | + rtx_cost (XEXP (x, 0), mode, |
22620 | outer_code, opno, speed) |
22621 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
22622 | outer_code, opno, speed); |
22623 | return true; |
22624 | } |
22625 | *total = cost->add * 2; |
22626 | } |
22627 | else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT) |
22628 | { |
22629 | *total = cost->add |
22630 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22631 | outer_code, opno, speed) |
22632 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
22633 | return true; |
22634 | } |
22635 | else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT) |
22636 | { |
22637 | *total = cost->add |
22638 | + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
22639 | + rtx_cost (XEXP (XEXP (x, 1), 0), mode, |
22640 | outer_code, opno, speed); |
22641 | return true; |
22642 | } |
22643 | else |
22644 | *total = cost->add; |
22645 | return false; |
22646 | |
22647 | case NOT: |
22648 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
22649 | { |
22650 | /* (not (xor ...)) can be a single insn in AVX512. */ |
22651 | if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F |
22652 | && (GET_MODE_SIZE (mode) == 64 |
22653 | || (TARGET_AVX512VL |
22654 | && (GET_MODE_SIZE (mode) == 32 |
22655 | || GET_MODE_SIZE (mode) == 16)))) |
22656 | { |
22657 | *total = ix86_vec_cost (mode, cost: cost->sse_op) |
22658 | + rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22659 | outer_code, opno, speed) |
22660 | + rtx_cost (XEXP (XEXP (x, 0), 1), mode, |
22661 | outer_code, opno, speed); |
22662 | return true; |
22663 | } |
22664 | |
22665 | // vnot is pxor -1. |
22666 | *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1; |
22667 | } |
22668 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22669 | *total = cost->add * 2; |
22670 | else |
22671 | *total = cost->add; |
22672 | return false; |
22673 | |
22674 | case NEG: |
22675 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22676 | *total = cost->sse_op; |
22677 | else if (X87_FLOAT_MODE_P (mode)) |
22678 | *total = cost->fchs; |
22679 | else if (FLOAT_MODE_P (mode)) |
22680 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22681 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
22682 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22683 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
22684 | *total = cost->add * 3; |
22685 | else |
22686 | *total = cost->add; |
22687 | return false; |
22688 | |
22689 | case COMPARE: |
22690 | rtx op0, op1; |
22691 | op0 = XEXP (x, 0); |
22692 | op1 = XEXP (x, 1); |
22693 | if (GET_CODE (op0) == ZERO_EXTRACT |
22694 | && XEXP (op0, 1) == const1_rtx |
22695 | && CONST_INT_P (XEXP (op0, 2)) |
22696 | && op1 == const0_rtx) |
22697 | { |
22698 | /* This kind of construct is implemented using test[bwl]. |
22699 | Treat it as if we had an AND. */ |
22700 | mode = GET_MODE (XEXP (op0, 0)); |
22701 | *total = (cost->add |
22702 | + rtx_cost (XEXP (op0, 0), mode, outer_code, |
22703 | opno, speed) |
22704 | + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); |
22705 | return true; |
22706 | } |
22707 | |
22708 | if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1)) |
22709 | { |
22710 | /* This is an overflow detection, count it as a normal compare. */ |
22711 | *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed); |
22712 | return true; |
22713 | } |
22714 | |
22715 | rtx geu; |
22716 | /* Match x |
22717 | (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) |
22718 | (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */ |
22719 | if (mode == CCCmode |
22720 | && GET_CODE (op0) == NEG |
22721 | && GET_CODE (geu = XEXP (op0, 0)) == GEU |
22722 | && REG_P (XEXP (geu, 0)) |
22723 | && (GET_MODE (XEXP (geu, 0)) == CCCmode |
22724 | || GET_MODE (XEXP (geu, 0)) == CCmode) |
22725 | && REGNO (XEXP (geu, 0)) == FLAGS_REG |
22726 | && XEXP (geu, 1) == const0_rtx |
22727 | && GET_CODE (op1) == LTU |
22728 | && REG_P (XEXP (op1, 0)) |
22729 | && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0)) |
22730 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
22731 | && XEXP (op1, 1) == const0_rtx) |
22732 | { |
22733 | /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */ |
22734 | *total = 0; |
22735 | return true; |
22736 | } |
22737 | /* Match x |
22738 | (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) |
22739 | (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */ |
22740 | if (mode == CCCmode |
22741 | && GET_CODE (op0) == NEG |
22742 | && GET_CODE (XEXP (op0, 0)) == LTU |
22743 | && REG_P (XEXP (XEXP (op0, 0), 0)) |
22744 | && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode |
22745 | && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG |
22746 | && XEXP (XEXP (op0, 0), 1) == const0_rtx |
22747 | && GET_CODE (op1) == GEU |
22748 | && REG_P (XEXP (op1, 0)) |
22749 | && GET_MODE (XEXP (op1, 0)) == CCCmode |
22750 | && REGNO (XEXP (op1, 0)) == FLAGS_REG |
22751 | && XEXP (op1, 1) == const0_rtx) |
22752 | { |
22753 | /* This is *x86_cmc. */ |
22754 | if (!speed) |
22755 | *total = COSTS_N_BYTES (1); |
22756 | else if (TARGET_SLOW_STC) |
22757 | *total = COSTS_N_INSNS (2); |
22758 | else |
22759 | *total = COSTS_N_INSNS (1); |
22760 | return true; |
22761 | } |
22762 | |
22763 | if (SCALAR_INT_MODE_P (GET_MODE (op0)) |
22764 | && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) |
22765 | { |
22766 | if (op1 == const0_rtx) |
22767 | *total = cost->add |
22768 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed); |
22769 | else |
22770 | *total = 3*cost->add |
22771 | + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed) |
22772 | + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed); |
22773 | return true; |
22774 | } |
22775 | |
22776 | /* The embedded comparison operand is completely free. */ |
22777 | if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx) |
22778 | *total = 0; |
22779 | |
22780 | return false; |
22781 | |
22782 | case FLOAT_EXTEND: |
22783 | /* x87 represents all values extended to 80bit. */ |
22784 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22785 | *total = 0; |
22786 | else |
22787 | *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); |
22788 | return false; |
22789 | |
22790 | case FLOAT_TRUNCATE: |
22791 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22792 | *total = cost->fadd; |
22793 | else |
22794 | *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); |
22795 | return false; |
22796 | case FLOAT: |
22797 | case UNSIGNED_FLOAT: |
22798 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22799 | /* TODO: We do not have cost tables for x87. */ |
22800 | *total = cost->fadd; |
22801 | else if (VECTOR_MODE_P (mode)) |
22802 | *total = ix86_vec_cost (mode, cost: cost->cvtpi2ps); |
22803 | else |
22804 | *total = cost->cvtsi2ss; |
22805 | return false; |
22806 | |
22807 | case FIX: |
22808 | case UNSIGNED_FIX: |
22809 | if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22810 | /* TODO: We do not have cost tables for x87. */ |
22811 | *total = cost->fadd; |
22812 | else if (VECTOR_MODE_P (mode)) |
22813 | *total = ix86_vec_cost (mode, cost: cost->cvtps2pi); |
22814 | else |
22815 | *total = cost->cvtss2si; |
22816 | return false; |
22817 | |
22818 | case ABS: |
22819 | /* SSE requires memory load for the constant operand. It may make |
22820 | sense to account for this. Of course the constant operand may or |
22821 | may not be reused. */ |
22822 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22823 | *total = cost->sse_op; |
22824 | else if (X87_FLOAT_MODE_P (mode)) |
22825 | *total = cost->fabs; |
22826 | else if (FLOAT_MODE_P (mode)) |
22827 | *total = ix86_vec_cost (mode, cost: cost->sse_op); |
22828 | else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
22829 | *total = cost->sse_op; |
22830 | return false; |
22831 | |
22832 | case SQRT: |
22833 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
22834 | *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; |
22835 | else if (X87_FLOAT_MODE_P (mode)) |
22836 | *total = cost->fsqrt; |
22837 | else if (FLOAT_MODE_P (mode)) |
22838 | *total = ix86_vec_cost (mode, |
22839 | cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd); |
22840 | return false; |
22841 | |
22842 | case UNSPEC: |
22843 | if (XINT (x, 1) == UNSPEC_TP) |
22844 | *total = 0; |
22845 | else if (XINT (x, 1) == UNSPEC_VTERNLOG) |
22846 | { |
22847 | *total = cost->sse_op; |
22848 | *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed); |
22849 | *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed); |
22850 | *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed); |
22851 | return true; |
22852 | } |
22853 | else if (XINT (x, 1) == UNSPEC_PTEST) |
22854 | { |
22855 | *total = cost->sse_op; |
22856 | rtx test_op0 = XVECEXP (x, 0, 0); |
22857 | if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1))) |
22858 | return false; |
22859 | if (GET_CODE (test_op0) == AND) |
22860 | { |
22861 | rtx and_op0 = XEXP (test_op0, 0); |
22862 | if (GET_CODE (and_op0) == NOT) |
22863 | and_op0 = XEXP (and_op0, 0); |
22864 | *total += rtx_cost (and_op0, GET_MODE (and_op0), |
22865 | AND, 0, speed) |
22866 | + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0), |
22867 | AND, 1, speed); |
22868 | } |
22869 | else |
22870 | *total = rtx_cost (test_op0, GET_MODE (test_op0), |
22871 | UNSPEC, 0, speed); |
22872 | return true; |
22873 | } |
22874 | return false; |
22875 | |
22876 | case VEC_CONCAT: |
22877 | /* ??? Assume all of these vector manipulation patterns are |
22878 | recognizable. In which case they all pretty much have the |
22879 | same cost. |
22880 | ??? We should still recruse when computing cost. */ |
22881 | *total = cost->sse_op; |
22882 | return true; |
22883 | |
22884 | case VEC_SELECT: |
22885 | /* Special case extracting lower part from the vector. |
22886 | This by itself needs to code and most of SSE/AVX instructions have |
22887 | packed and single forms where the single form may be represented |
22888 | by such VEC_SELECT. |
22889 | |
22890 | Use cost 1 (despite the fact that functionally equivalent SUBREG has |
22891 | cost 0). Making VEC_SELECT completely free, for example instructs CSE |
22892 | to forward propagate VEC_SELECT into |
22893 | |
22894 | (set (reg eax) (reg src)) |
22895 | |
22896 | which then prevents fwprop and combining. See i.e. |
22897 | gcc.target/i386/pr91103-1.c. |
22898 | |
22899 | ??? rtvec_series_p test should be, for valid patterns, equivalent to |
22900 | vec_series_lowpart_p but is not, since the latter calls |
22901 | can_cange_mode_class on ALL_REGS and this return false since x87 does |
22902 | not support subregs at all. */ |
22903 | if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0)) |
22904 | *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), |
22905 | outer_code, opno, speed) + 1; |
22906 | else |
22907 | /* ??? We should still recruse when computing cost. */ |
22908 | *total = cost->sse_op; |
22909 | return true; |
22910 | |
22911 | case VEC_DUPLICATE: |
22912 | *total = rtx_cost (XEXP (x, 0), |
22913 | GET_MODE (XEXP (x, 0)), |
22914 | VEC_DUPLICATE, 0, speed); |
22915 | /* It's broadcast instruction, not embedded broadcasting. */ |
22916 | if (outer_code == SET) |
22917 | *total += cost->sse_op; |
22918 | |
22919 | return true; |
22920 | |
22921 | case VEC_MERGE: |
22922 | mask = XEXP (x, 2); |
22923 | /* Scalar versions of SSE instructions may be represented as: |
22924 | |
22925 | (vec_merge (vec_duplicate (operation ....)) |
22926 | (register or memory) |
22927 | (const_int 1)) |
22928 | |
22929 | In this case vec_merge and vec_duplicate is for free. |
22930 | Just recurse into operation and second operand. */ |
22931 | if (mask == const1_rtx |
22932 | && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) |
22933 | { |
22934 | *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
22935 | outer_code, opno, speed) |
22936 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
22937 | return true; |
22938 | } |
22939 | /* This is masked instruction, assume the same cost, |
22940 | as nonmasked variant. */ |
22941 | else if (TARGET_AVX512F |
22942 | && (register_operand (mask, GET_MODE (mask)) |
22943 | /* Redunduant clean up of high bits for kmask with VL=2/4 |
22944 | .i.e (vec_merge op0, op1, (and op3 15)). */ |
22945 | || (GET_CODE (mask) == AND |
22946 | && register_operand (XEXP (mask, 0), GET_MODE (mask)) |
22947 | && CONST_INT_P (XEXP (mask, 1)) |
22948 | && ((INTVAL (XEXP (mask, 1)) == 3 |
22949 | && GET_MODE_NUNITS (mode) == 2) |
22950 | || (INTVAL (XEXP (mask, 1)) == 15 |
22951 | && GET_MODE_NUNITS (mode) == 4))))) |
22952 | { |
22953 | *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
22954 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
22955 | return true; |
22956 | } |
22957 | /* Combination of the two above: |
22958 | |
22959 | (vec_merge (vec_merge (vec_duplicate (operation ...)) |
22960 | (register or memory) |
22961 | (reg:QI mask)) |
22962 | (register or memory) |
22963 | (const_int 1)) |
22964 | |
22965 | i.e. avx512fp16_vcvtss2sh_mask. */ |
22966 | else if (TARGET_AVX512F |
22967 | && mask == const1_rtx |
22968 | && GET_CODE (XEXP (x, 0)) == VEC_MERGE |
22969 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE |
22970 | && register_operand (XEXP (XEXP (x, 0), 2), |
22971 | GET_MODE (XEXP (XEXP (x, 0), 2)))) |
22972 | { |
22973 | *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), |
22974 | mode, outer_code, opno, speed) |
22975 | + rtx_cost (XEXP (XEXP (x, 0), 1), |
22976 | mode, outer_code, opno, speed) |
22977 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
22978 | return true; |
22979 | } |
22980 | /* vcmp. */ |
22981 | else if (unspec_pcmp_p (x: mask) |
22982 | || (GET_CODE (mask) == NOT |
22983 | && unspec_pcmp_p (XEXP (mask, 0)))) |
22984 | { |
22985 | rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask; |
22986 | rtx unsop0 = XVECEXP (uns, 0, 0); |
22987 | /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0) |
22988 | cost the same as register. |
22989 | This is used by avx_cmp<mode>3_ltint_not. */ |
22990 | if (GET_CODE (unsop0) == SUBREG) |
22991 | unsop0 = XEXP (unsop0, 0); |
22992 | if (GET_CODE (unsop0) == NOT) |
22993 | unsop0 = XEXP (unsop0, 0); |
22994 | *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
22995 | + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) |
22996 | + rtx_cost (unsop0, mode, UNSPEC, opno, speed) |
22997 | + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed) |
22998 | + cost->sse_op; |
22999 | return true; |
23000 | } |
23001 | else |
23002 | *total = cost->sse_op; |
23003 | return false; |
23004 | |
23005 | case MEM: |
23006 | /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. |
23007 | or variants in ix86_vector_duplicate_simode_const. */ |
23008 | |
23009 | if (GET_MODE_SIZE (mode) >= 16 |
23010 | && VECTOR_MODE_P (mode) |
23011 | && SYMBOL_REF_P (XEXP (x, 0)) |
23012 | && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) |
23013 | && ix86_broadcast_from_constant (mode, x)) |
23014 | { |
23015 | *total = COSTS_N_INSNS (2) + speed; |
23016 | return true; |
23017 | } |
23018 | |
23019 | /* An insn that accesses memory is slightly more expensive |
23020 | than one that does not. */ |
23021 | if (speed) |
23022 | { |
23023 | *total += 1; |
23024 | rtx addr = XEXP (x, 0); |
23025 | /* For MEM, rtx_cost iterates each subrtx, and adds up the costs, |
23026 | so for MEM (reg) and MEM (reg + 4), the former costs 5, |
23027 | the latter costs 9, it is not accurate for x86. Ideally |
23028 | address_cost should be used, but it reduce cost too much. |
23029 | So current solution is make constant disp as cheap as possible. */ |
23030 | if (GET_CODE (addr) == PLUS |
23031 | && x86_64_immediate_operand (XEXP (addr, 1), Pmode) |
23032 | /* Only hanlde (reg + disp) since other forms of addr are mostly LEA, |
23033 | there's no additional cost for the plus of disp. */ |
23034 | && register_operand (XEXP (addr, 0), Pmode)) |
23035 | { |
23036 | *total += 1; |
23037 | *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed); |
23038 | return true; |
23039 | } |
23040 | } |
23041 | |
23042 | return false; |
23043 | |
23044 | case ZERO_EXTRACT: |
23045 | if (XEXP (x, 1) == const1_rtx |
23046 | && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND |
23047 | && GET_MODE (XEXP (x, 2)) == SImode |
23048 | && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode) |
23049 | { |
23050 | /* Ignore cost of zero extension and masking of last argument. */ |
23051 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
23052 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
23053 | *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed); |
23054 | return true; |
23055 | } |
23056 | return false; |
23057 | |
23058 | case IF_THEN_ELSE: |
23059 | if (TARGET_XOP |
23060 | && VECTOR_MODE_P (mode) |
23061 | && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32)) |
23062 | { |
23063 | /* vpcmov. */ |
23064 | *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6); |
23065 | if (!REG_P (XEXP (x, 0))) |
23066 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
23067 | if (!REG_P (XEXP (x, 1))) |
23068 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
23069 | if (!REG_P (XEXP (x, 2))) |
23070 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
23071 | return true; |
23072 | } |
23073 | else if (TARGET_CMOVE |
23074 | && SCALAR_INT_MODE_P (mode) |
23075 | && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
23076 | { |
23077 | /* cmov. */ |
23078 | *total = COSTS_N_INSNS (1); |
23079 | if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0))) |
23080 | *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); |
23081 | if (!REG_P (XEXP (x, 1))) |
23082 | *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); |
23083 | if (!REG_P (XEXP (x, 2))) |
23084 | *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed); |
23085 | return true; |
23086 | } |
23087 | return false; |
23088 | |
23089 | default: |
23090 | return false; |
23091 | } |
23092 | } |
23093 | |
23094 | #if TARGET_MACHO |
23095 | |
23096 | static int current_machopic_label_num; |
23097 | |
23098 | /* Given a symbol name and its associated stub, write out the |
23099 | definition of the stub. */ |
23100 | |
23101 | void |
23102 | machopic_output_stub (FILE *file, const char *symb, const char *stub) |
23103 | { |
23104 | unsigned int length; |
23105 | char *binder_name, *symbol_name, lazy_ptr_name[32]; |
23106 | int label = ++current_machopic_label_num; |
23107 | |
23108 | /* For 64-bit we shouldn't get here. */ |
23109 | gcc_assert (!TARGET_64BIT); |
23110 | |
23111 | /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ |
23112 | symb = targetm.strip_name_encoding (symb); |
23113 | |
23114 | length = strlen (stub); |
23115 | binder_name = XALLOCAVEC (char, length + 32); |
23116 | GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); |
23117 | |
23118 | length = strlen (symb); |
23119 | symbol_name = XALLOCAVEC (char, length + 32); |
23120 | GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); |
23121 | |
23122 | sprintf (lazy_ptr_name, "L%d$lz", label); |
23123 | |
23124 | if (MACHOPIC_ATT_STUB) |
23125 | switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); |
23126 | else if (MACHOPIC_PURE) |
23127 | switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); |
23128 | else |
23129 | switch_to_section (darwin_sections[machopic_symbol_stub_section]); |
23130 | |
23131 | fprintf (file, "%s:\n", stub); |
23132 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); |
23133 | |
23134 | if (MACHOPIC_ATT_STUB) |
23135 | { |
23136 | fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); |
23137 | } |
23138 | else if (MACHOPIC_PURE) |
23139 | { |
23140 | /* PIC stub. */ |
23141 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
23142 | rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); |
23143 | output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ |
23144 | fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", |
23145 | label, lazy_ptr_name, label); |
23146 | fprintf (file, "\tjmp\t*%%ecx\n"); |
23147 | } |
23148 | else |
23149 | fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); |
23150 | |
23151 | /* The AT&T-style ("self-modifying") stub is not lazily bound, thus |
23152 | it needs no stub-binding-helper. */ |
23153 | if (MACHOPIC_ATT_STUB) |
23154 | return; |
23155 | |
23156 | fprintf (file, "%s:\n", binder_name); |
23157 | |
23158 | if (MACHOPIC_PURE) |
23159 | { |
23160 | fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); |
23161 | fprintf (file, "\tpushl\t%%ecx\n"); |
23162 | } |
23163 | else |
23164 | fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); |
23165 | |
23166 | fputs ("\tjmp\tdyld_stub_binding_helper\n", file); |
23167 | |
23168 | /* N.B. Keep the correspondence of these |
23169 | 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the |
23170 | old-pic/new-pic/non-pic stubs; altering this will break |
23171 | compatibility with existing dylibs. */ |
23172 | if (MACHOPIC_PURE) |
23173 | { |
23174 | /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
23175 | switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); |
23176 | } |
23177 | else |
23178 | /* 16-byte -mdynamic-no-pic stub. */ |
23179 | switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); |
23180 | |
23181 | fprintf (file, "%s:\n", lazy_ptr_name); |
23182 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); |
23183 | fprintf (file, ASM_LONG "%s\n", binder_name); |
23184 | } |
23185 | #endif /* TARGET_MACHO */ |
23186 | |
23187 | /* Order the registers for register allocator. */ |
23188 | |
23189 | void |
23190 | x86_order_regs_for_local_alloc (void) |
23191 | { |
23192 | int pos = 0; |
23193 | int i; |
23194 | |
23195 | /* First allocate the local general purpose registers. */ |
23196 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
23197 | if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i)) |
23198 | reg_alloc_order [pos++] = i; |
23199 | |
23200 | /* Global general purpose registers. */ |
23201 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
23202 | if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i)) |
23203 | reg_alloc_order [pos++] = i; |
23204 | |
23205 | /* x87 registers come first in case we are doing FP math |
23206 | using them. */ |
23207 | if (!TARGET_SSE_MATH) |
23208 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
23209 | reg_alloc_order [pos++] = i; |
23210 | |
23211 | /* SSE registers. */ |
23212 | for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) |
23213 | reg_alloc_order [pos++] = i; |
23214 | for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
23215 | reg_alloc_order [pos++] = i; |
23216 | |
23217 | /* Extended REX SSE registers. */ |
23218 | for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
23219 | reg_alloc_order [pos++] = i; |
23220 | |
23221 | /* Mask register. */ |
23222 | for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) |
23223 | reg_alloc_order [pos++] = i; |
23224 | |
23225 | /* x87 registers. */ |
23226 | if (TARGET_SSE_MATH) |
23227 | for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
23228 | reg_alloc_order [pos++] = i; |
23229 | |
23230 | for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) |
23231 | reg_alloc_order [pos++] = i; |
23232 | |
23233 | /* Initialize the rest of array as we do not allocate some registers |
23234 | at all. */ |
23235 | while (pos < FIRST_PSEUDO_REGISTER) |
23236 | reg_alloc_order [pos++] = 0; |
23237 | } |
23238 | |
23239 | static bool |
23240 | ix86_ms_bitfield_layout_p (const_tree record_type) |
23241 | { |
23242 | return ((TARGET_MS_BITFIELD_LAYOUT |
23243 | && !lookup_attribute (attr_name: "gcc_struct", TYPE_ATTRIBUTES (record_type))) |
23244 | || lookup_attribute (attr_name: "ms_struct", TYPE_ATTRIBUTES (record_type))); |
23245 | } |
23246 | |
23247 | /* Returns an expression indicating where the this parameter is |
23248 | located on entry to the FUNCTION. */ |
23249 | |
23250 | static rtx |
23251 | x86_this_parameter (tree function) |
23252 | { |
23253 | tree type = TREE_TYPE (function); |
23254 | bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; |
23255 | int nregs; |
23256 | |
23257 | if (TARGET_64BIT) |
23258 | { |
23259 | const int *parm_regs; |
23260 | |
23261 | if (ix86_function_type_abi (fntype: type) == MS_ABI) |
23262 | parm_regs = x86_64_ms_abi_int_parameter_registers; |
23263 | else |
23264 | parm_regs = x86_64_int_parameter_registers; |
23265 | return gen_rtx_REG (Pmode, parm_regs[aggr]); |
23266 | } |
23267 | |
23268 | nregs = ix86_function_regparm (type, decl: function); |
23269 | |
23270 | if (nregs > 0 && !stdarg_p (type)) |
23271 | { |
23272 | int regno; |
23273 | unsigned int ccvt = ix86_get_callcvt (type); |
23274 | |
23275 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
23276 | regno = aggr ? DX_REG : CX_REG; |
23277 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
23278 | { |
23279 | regno = CX_REG; |
23280 | if (aggr) |
23281 | return gen_rtx_MEM (SImode, |
23282 | plus_constant (Pmode, stack_pointer_rtx, 4)); |
23283 | } |
23284 | else |
23285 | { |
23286 | regno = AX_REG; |
23287 | if (aggr) |
23288 | { |
23289 | regno = DX_REG; |
23290 | if (nregs == 1) |
23291 | return gen_rtx_MEM (SImode, |
23292 | plus_constant (Pmode, |
23293 | stack_pointer_rtx, 4)); |
23294 | } |
23295 | } |
23296 | return gen_rtx_REG (SImode, regno); |
23297 | } |
23298 | |
23299 | return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, |
23300 | aggr ? 8 : 4)); |
23301 | } |
23302 | |
23303 | /* Determine whether x86_output_mi_thunk can succeed. */ |
23304 | |
23305 | static bool |
23306 | x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, |
23307 | const_tree function) |
23308 | { |
23309 | /* 64-bit can handle anything. */ |
23310 | if (TARGET_64BIT) |
23311 | return true; |
23312 | |
23313 | /* For 32-bit, everything's fine if we have one free register. */ |
23314 | if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3) |
23315 | return true; |
23316 | |
23317 | /* Need a free register for vcall_offset. */ |
23318 | if (vcall_offset) |
23319 | return false; |
23320 | |
23321 | /* Need a free register for GOT references. */ |
23322 | if (flag_pic && !targetm.binds_local_p (function)) |
23323 | return false; |
23324 | |
23325 | /* Otherwise ok. */ |
23326 | return true; |
23327 | } |
23328 | |
23329 | /* Output the assembler code for a thunk function. THUNK_DECL is the |
23330 | declaration for the thunk function itself, FUNCTION is the decl for |
23331 | the target function. DELTA is an immediate constant offset to be |
23332 | added to THIS. If VCALL_OFFSET is nonzero, the word at |
23333 | *(*this + vcall_offset) should be added to THIS. */ |
23334 | |
23335 | static void |
23336 | x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, |
23337 | HOST_WIDE_INT vcall_offset, tree function) |
23338 | { |
23339 | const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); |
23340 | rtx this_param = x86_this_parameter (function); |
23341 | rtx this_reg, tmp, fnaddr; |
23342 | unsigned int tmp_regno; |
23343 | rtx_insn *insn; |
23344 | int saved_flag_force_indirect_call = flag_force_indirect_call; |
23345 | |
23346 | if (TARGET_64BIT) |
23347 | tmp_regno = R10_REG; |
23348 | else |
23349 | { |
23350 | unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); |
23351 | if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
23352 | tmp_regno = AX_REG; |
23353 | else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
23354 | tmp_regno = DX_REG; |
23355 | else |
23356 | tmp_regno = CX_REG; |
23357 | |
23358 | if (flag_pic) |
23359 | flag_force_indirect_call = 0; |
23360 | } |
23361 | |
23362 | emit_note (NOTE_INSN_PROLOGUE_END); |
23363 | |
23364 | /* CET is enabled, insert EB instruction. */ |
23365 | if ((flag_cf_protection & CF_BRANCH)) |
23366 | emit_insn (gen_nop_endbr ()); |
23367 | |
23368 | /* If VCALL_OFFSET, we'll need THIS in a register. Might as well |
23369 | pull it in now and let DELTA benefit. */ |
23370 | if (REG_P (this_param)) |
23371 | this_reg = this_param; |
23372 | else if (vcall_offset) |
23373 | { |
23374 | /* Put the this parameter into %eax. */ |
23375 | this_reg = gen_rtx_REG (Pmode, AX_REG); |
23376 | emit_move_insn (this_reg, this_param); |
23377 | } |
23378 | else |
23379 | this_reg = NULL_RTX; |
23380 | |
23381 | /* Adjust the this parameter by a fixed constant. */ |
23382 | if (delta) |
23383 | { |
23384 | rtx delta_rtx = GEN_INT (delta); |
23385 | rtx delta_dst = this_reg ? this_reg : this_param; |
23386 | |
23387 | if (TARGET_64BIT) |
23388 | { |
23389 | if (!x86_64_general_operand (delta_rtx, Pmode)) |
23390 | { |
23391 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
23392 | emit_move_insn (tmp, delta_rtx); |
23393 | delta_rtx = tmp; |
23394 | } |
23395 | } |
23396 | |
23397 | ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx); |
23398 | } |
23399 | |
23400 | /* Adjust the this parameter by a value stored in the vtable. */ |
23401 | if (vcall_offset) |
23402 | { |
23403 | rtx vcall_addr, vcall_mem, this_mem; |
23404 | |
23405 | tmp = gen_rtx_REG (Pmode, tmp_regno); |
23406 | |
23407 | this_mem = gen_rtx_MEM (ptr_mode, this_reg); |
23408 | if (Pmode != ptr_mode) |
23409 | this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); |
23410 | emit_move_insn (tmp, this_mem); |
23411 | |
23412 | /* Adjust the this parameter. */ |
23413 | vcall_addr = plus_constant (Pmode, tmp, vcall_offset); |
23414 | if (TARGET_64BIT |
23415 | && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true)) |
23416 | { |
23417 | rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); |
23418 | emit_move_insn (tmp2, GEN_INT (vcall_offset)); |
23419 | vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); |
23420 | } |
23421 | |
23422 | vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); |
23423 | if (Pmode != ptr_mode) |
23424 | emit_insn (gen_addsi_1_zext (this_reg, |
23425 | gen_rtx_REG (ptr_mode, |
23426 | REGNO (this_reg)), |
23427 | vcall_mem)); |
23428 | else |
23429 | ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem); |
23430 | } |
23431 | |
23432 | /* If necessary, drop THIS back to its stack slot. */ |
23433 | if (this_reg && this_reg != this_param) |
23434 | emit_move_insn (this_param, this_reg); |
23435 | |
23436 | fnaddr = XEXP (DECL_RTL (function), 0); |
23437 | if (TARGET_64BIT) |
23438 | { |
23439 | if (!flag_pic || targetm.binds_local_p (function) |
23440 | || TARGET_PECOFF) |
23441 | ; |
23442 | else |
23443 | { |
23444 | tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); |
23445 | tmp = gen_rtx_CONST (Pmode, tmp); |
23446 | fnaddr = gen_const_mem (Pmode, tmp); |
23447 | } |
23448 | } |
23449 | else |
23450 | { |
23451 | if (!flag_pic || targetm.binds_local_p (function)) |
23452 | ; |
23453 | #if TARGET_MACHO |
23454 | else if (TARGET_MACHO) |
23455 | { |
23456 | fnaddr = machopic_indirect_call_target (DECL_RTL (function)); |
23457 | fnaddr = XEXP (fnaddr, 0); |
23458 | } |
23459 | #endif /* TARGET_MACHO */ |
23460 | else |
23461 | { |
23462 | tmp = gen_rtx_REG (Pmode, CX_REG); |
23463 | output_set_got (dest: tmp, NULL_RTX); |
23464 | |
23465 | fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); |
23466 | fnaddr = gen_rtx_CONST (Pmode, fnaddr); |
23467 | fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); |
23468 | fnaddr = gen_const_mem (Pmode, fnaddr); |
23469 | } |
23470 | } |
23471 | |
23472 | /* Our sibling call patterns do not allow memories, because we have no |
23473 | predicate that can distinguish between frame and non-frame memory. |
23474 | For our purposes here, we can get away with (ab)using a jump pattern, |
23475 | because we're going to do no optimization. */ |
23476 | if (MEM_P (fnaddr)) |
23477 | { |
23478 | if (sibcall_insn_operand (fnaddr, word_mode)) |
23479 | { |
23480 | fnaddr = XEXP (DECL_RTL (function), 0); |
23481 | tmp = gen_rtx_MEM (QImode, fnaddr); |
23482 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
23483 | tmp = emit_call_insn (tmp); |
23484 | SIBLING_CALL_P (tmp) = 1; |
23485 | } |
23486 | else |
23487 | emit_jump_insn (gen_indirect_jump (fnaddr)); |
23488 | } |
23489 | else |
23490 | { |
23491 | if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) |
23492 | { |
23493 | // CM_LARGE_PIC always uses pseudo PIC register which is |
23494 | // uninitialized. Since FUNCTION is local and calling it |
23495 | // doesn't go through PLT, we use scratch register %r11 as |
23496 | // PIC register and initialize it here. |
23497 | pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); |
23498 | ix86_init_large_pic_reg (tmp_regno); |
23499 | fnaddr = legitimize_pic_address (orig: fnaddr, |
23500 | reg: gen_rtx_REG (Pmode, tmp_regno)); |
23501 | } |
23502 | |
23503 | if (!sibcall_insn_operand (fnaddr, word_mode)) |
23504 | { |
23505 | tmp = gen_rtx_REG (word_mode, tmp_regno); |
23506 | if (GET_MODE (fnaddr) != word_mode) |
23507 | fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); |
23508 | emit_move_insn (tmp, fnaddr); |
23509 | fnaddr = tmp; |
23510 | } |
23511 | |
23512 | tmp = gen_rtx_MEM (QImode, fnaddr); |
23513 | tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); |
23514 | tmp = emit_call_insn (tmp); |
23515 | SIBLING_CALL_P (tmp) = 1; |
23516 | } |
23517 | emit_barrier (); |
23518 | |
23519 | /* Emit just enough of rest_of_compilation to get the insns emitted. */ |
23520 | insn = get_insns (); |
23521 | shorten_branches (insn); |
23522 | assemble_start_function (thunk_fndecl, fnname); |
23523 | final_start_function (insn, file, 1); |
23524 | final (insn, file, 1); |
23525 | final_end_function (); |
23526 | assemble_end_function (thunk_fndecl, fnname); |
23527 | |
23528 | flag_force_indirect_call = saved_flag_force_indirect_call; |
23529 | } |
23530 | |
23531 | static void |
23532 | x86_file_start (void) |
23533 | { |
23534 | default_file_start (); |
23535 | if (TARGET_16BIT) |
23536 | fputs (s: "\t.code16gcc\n", stream: asm_out_file); |
23537 | #if TARGET_MACHO |
23538 | darwin_file_start (); |
23539 | #endif |
23540 | if (X86_FILE_START_VERSION_DIRECTIVE) |
23541 | fputs (s: "\t.version\t\"01.01\"\n", stream: asm_out_file); |
23542 | if (X86_FILE_START_FLTUSED) |
23543 | fputs (s: "\t.global\t__fltused\n", stream: asm_out_file); |
23544 | if (ix86_asm_dialect == ASM_INTEL) |
23545 | fputs (s: "\t.intel_syntax noprefix\n", stream: asm_out_file); |
23546 | } |
23547 | |
23548 | int |
23549 | x86_field_alignment (tree type, int computed) |
23550 | { |
23551 | machine_mode mode; |
23552 | |
23553 | if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) |
23554 | return computed; |
23555 | if (TARGET_IAMCU) |
23556 | return iamcu_alignment (type, align: computed); |
23557 | type = strip_array_types (type); |
23558 | mode = TYPE_MODE (type); |
23559 | if (mode == DFmode || mode == DCmode |
23560 | || GET_MODE_CLASS (mode) == MODE_INT |
23561 | || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) |
23562 | { |
23563 | if (TYPE_ATOMIC (type) && computed > 32) |
23564 | { |
23565 | static bool warned; |
23566 | |
23567 | if (!warned && warn_psabi) |
23568 | { |
23569 | const char *url |
23570 | = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic"; |
23571 | |
23572 | warned = true; |
23573 | inform (input_location, "the alignment of %<_Atomic %T%> " |
23574 | "fields changed in %{GCC 11.1%}", |
23575 | TYPE_MAIN_VARIANT (type), url); |
23576 | } |
23577 | } |
23578 | else |
23579 | return MIN (32, computed); |
23580 | } |
23581 | return computed; |
23582 | } |
23583 | |
23584 | /* Print call to TARGET to FILE. */ |
23585 | |
23586 | static void |
23587 | x86_print_call_or_nop (FILE *file, const char *target) |
23588 | { |
23589 | if (flag_nop_mcount || !strcmp (s1: target, s2: "nop")) |
23590 | /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ |
23591 | fprintf (stream: file, format: "1:"ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); |
23592 | else if (!TARGET_PECOFF && flag_pic) |
23593 | { |
23594 | gcc_assert (flag_plt); |
23595 | |
23596 | fprintf (stream: file, format: "1:\tcall\t%s@PLT\n", target); |
23597 | } |
23598 | else |
23599 | fprintf (stream: file, format: "1:\tcall\t%s\n", target); |
23600 | } |
23601 | |
23602 | static bool |
23603 | current_fentry_name (const char **name) |
23604 | { |
23605 | tree attr = lookup_attribute (attr_name: "fentry_name", |
23606 | DECL_ATTRIBUTES (current_function_decl)); |
23607 | if (!attr) |
23608 | return false; |
23609 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
23610 | return true; |
23611 | } |
23612 | |
23613 | static bool |
23614 | current_fentry_section (const char **name) |
23615 | { |
23616 | tree attr = lookup_attribute (attr_name: "fentry_section", |
23617 | DECL_ATTRIBUTES (current_function_decl)); |
23618 | if (!attr) |
23619 | return false; |
23620 | *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); |
23621 | return true; |
23622 | } |
23623 | |
23624 | /* Return a caller-saved register which isn't live or a callee-saved |
23625 | register which has been saved on stack in the prologue at entry for |
23626 | profile. */ |
23627 | |
23628 | static int |
23629 | x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED) |
23630 | { |
23631 | /* Use %r10 if the profiler is emitted before the prologue or it isn't |
23632 | used by DRAP. */ |
23633 | if (ix86_profile_before_prologue () |
23634 | || !crtl->drap_reg |
23635 | || REGNO (crtl->drap_reg) != R10_REG) |
23636 | return R10_REG; |
23637 | |
23638 | /* The profiler is emitted after the prologue. If there is a |
23639 | caller-saved register which isn't live or a callee-saved |
23640 | register saved on stack in the prologue, use it. */ |
23641 | |
23642 | bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
23643 | |
23644 | int i; |
23645 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
23646 | if (GENERAL_REGNO_P (i) |
23647 | && i != R10_REG |
23648 | #ifdef NO_PROFILE_COUNTERS |
23649 | && (r11_ok || i != R11_REG) |
23650 | #else |
23651 | && i != R11_REG |
23652 | #endif |
23653 | && TEST_HARD_REG_BIT (accessible_reg_set, bit: i) |
23654 | && (ix86_save_reg (regno: i, maybe_eh_return: true, ignore_outlined: true) |
23655 | || (call_used_regs[i] |
23656 | && !fixed_regs[i] |
23657 | && !REGNO_REG_SET_P (reg_live, i)))) |
23658 | return i; |
23659 | |
23660 | sorry ("no register available for profiling %<-mcmodel=large%s%>", |
23661 | ix86_cmodel == CM_LARGE_PIC ? " -fPIC": ""); |
23662 | |
23663 | return R10_REG; |
23664 | } |
23665 | |
23666 | /* Output assembler code to FILE to increment profiler label # LABELNO |
23667 | for profiling a function entry. */ |
23668 | void |
23669 | x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
23670 | { |
23671 | if (cfun->machine->insn_queued_at_entrance) |
23672 | { |
23673 | if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR) |
23674 | fprintf (stream: file, format: "\t%s\n", TARGET_64BIT ? "endbr64": "endbr32"); |
23675 | unsigned int patch_area_size |
23676 | = crtl->patch_area_size - crtl->patch_area_entry; |
23677 | if (patch_area_size) |
23678 | ix86_output_patchable_area (patch_area_size, |
23679 | crtl->patch_area_entry == 0); |
23680 | } |
23681 | |
23682 | const char *mcount_name = MCOUNT_NAME; |
23683 | |
23684 | if (current_fentry_name (name: &mcount_name)) |
23685 | ; |
23686 | else if (fentry_name) |
23687 | mcount_name = fentry_name; |
23688 | else if (flag_fentry) |
23689 | mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; |
23690 | |
23691 | if (TARGET_64BIT) |
23692 | { |
23693 | #ifndef NO_PROFILE_COUNTERS |
23694 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
23695 | fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno); |
23696 | else |
23697 | fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno); |
23698 | #endif |
23699 | |
23700 | int scratch; |
23701 | const char *reg; |
23702 | char legacy_reg[4] = { 0 }; |
23703 | |
23704 | if (!TARGET_PECOFF) |
23705 | { |
23706 | switch (ix86_cmodel) |
23707 | { |
23708 | case CM_LARGE: |
23709 | scratch = x86_64_select_profile_regnum (r11_ok: true); |
23710 | reg = hi_reg_name[scratch]; |
23711 | if (LEGACY_INT_REGNO_P (scratch)) |
23712 | { |
23713 | legacy_reg[0] = 'r'; |
23714 | legacy_reg[1] = reg[0]; |
23715 | legacy_reg[2] = reg[1]; |
23716 | reg = legacy_reg; |
23717 | } |
23718 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
23719 | fprintf (stream: file, format: "1:\tmovabs\t%s, OFFSET FLAT:%s\n" |
23720 | "\tcall\t%s\n", reg, mcount_name, reg); |
23721 | else |
23722 | fprintf (stream: file, format: "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n", |
23723 | mcount_name, reg, reg); |
23724 | break; |
23725 | case CM_LARGE_PIC: |
23726 | #ifdef NO_PROFILE_COUNTERS |
23727 | scratch = x86_64_select_profile_regnum (r11_ok: false); |
23728 | reg = hi_reg_name[scratch]; |
23729 | if (LEGACY_INT_REGNO_P (scratch)) |
23730 | { |
23731 | legacy_reg[0] = 'r'; |
23732 | legacy_reg[1] = reg[0]; |
23733 | legacy_reg[2] = reg[1]; |
23734 | reg = legacy_reg; |
23735 | } |
23736 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
23737 | { |
23738 | fprintf (stream: file, format: "1:movabs\tr11, " |
23739 | "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n"); |
23740 | fprintf (stream: file, format: "\tlea\t%s, 1b[rip]\n", reg); |
23741 | fprintf (stream: file, format: "\tadd\t%s, r11\n", reg); |
23742 | fprintf (stream: file, format: "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n", |
23743 | mcount_name); |
23744 | fprintf (stream: file, format: "\tadd\t%s, r11\n", reg); |
23745 | fprintf (stream: file, format: "\tcall\t%s\n", reg); |
23746 | break; |
23747 | } |
23748 | fprintf (stream: file, |
23749 | format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n"); |
23750 | fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%%s\n", reg); |
23751 | fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg); |
23752 | fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name); |
23753 | fprintf (stream: file, format: "\taddq\t%%r11, %%%s\n", reg); |
23754 | fprintf (stream: file, format: "\tcall\t*%%%s\n", reg); |
23755 | #else |
23756 | sorry ("profiling %<-mcmodel=large%> with PIC is not supported"); |
23757 | #endif |
23758 | break; |
23759 | case CM_SMALL_PIC: |
23760 | case CM_MEDIUM_PIC: |
23761 | if (!flag_plt) |
23762 | { |
23763 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
23764 | fprintf (stream: file, format: "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n", |
23765 | mcount_name); |
23766 | else |
23767 | fprintf (stream: file, format: "1:\tcall\t*%s@GOTPCREL(%%rip)\n", |
23768 | mcount_name); |
23769 | break; |
23770 | } |
23771 | /* fall through */ |
23772 | default: |
23773 | x86_print_call_or_nop (file, target: mcount_name); |
23774 | break; |
23775 | } |
23776 | } |
23777 | else |
23778 | x86_print_call_or_nop (file, target: mcount_name); |
23779 | } |
23780 | else if (flag_pic) |
23781 | { |
23782 | #ifndef NO_PROFILE_COUNTERS |
23783 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
23784 | fprintf (file, |
23785 | "\tlea\t"PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n", |
23786 | LPREFIX, labelno); |
23787 | else |
23788 | fprintf (file, |
23789 | "\tleal\t%sP%d@GOTOFF(%%ebx), %%"PROFILE_COUNT_REGISTER "\n", |
23790 | LPREFIX, labelno); |
23791 | #endif |
23792 | if (flag_plt) |
23793 | x86_print_call_or_nop (file, target: mcount_name); |
23794 | else if (ASSEMBLER_DIALECT == ASM_INTEL) |
23795 | fprintf (stream: file, format: "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name); |
23796 | else |
23797 | fprintf (stream: file, format: "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); |
23798 | } |
23799 | else |
23800 | { |
23801 | #ifndef NO_PROFILE_COUNTERS |
23802 | if (ASSEMBLER_DIALECT == ASM_INTEL) |
23803 | fprintf (file, |
23804 | "\tmov\t"PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n", |
23805 | LPREFIX, labelno); |
23806 | else |
23807 | fprintf (file, "\tmovl\t$%sP%d, %%"PROFILE_COUNT_REGISTER "\n", |
23808 | LPREFIX, labelno); |
23809 | #endif |
23810 | x86_print_call_or_nop (file, target: mcount_name); |
23811 | } |
23812 | |
23813 | if (flag_record_mcount |
23814 | || lookup_attribute (attr_name: "fentry_section", |
23815 | DECL_ATTRIBUTES (current_function_decl))) |
23816 | { |
23817 | const char *sname = "__mcount_loc"; |
23818 | |
23819 | if (current_fentry_section (name: &sname)) |
23820 | ; |
23821 | else if (fentry_section) |
23822 | sname = fentry_section; |
23823 | |
23824 | fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n", sname); |
23825 | fprintf (stream: file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad": "long"); |
23826 | fprintf (stream: file, format: "\t.previous\n"); |
23827 | } |
23828 | } |
23829 | |
23830 | /* We don't have exact information about the insn sizes, but we may assume |
23831 | quite safely that we are informed about all 1 byte insns and memory |
23832 | address sizes. This is enough to eliminate unnecessary padding in |
23833 | 99% of cases. */ |
23834 | |
23835 | int |
23836 | ix86_min_insn_size (rtx_insn *insn) |
23837 | { |
23838 | int l = 0, len; |
23839 | |
23840 | if (!INSN_P (insn) || !active_insn_p (insn)) |
23841 | return 0; |
23842 | |
23843 | /* Discard alignments we've emit and jump instructions. */ |
23844 | if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE |
23845 | && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) |
23846 | return 0; |
23847 | |
23848 | /* Important case - calls are always 5 bytes. |
23849 | It is common to have many calls in the row. */ |
23850 | if (CALL_P (insn) |
23851 | && symbolic_reference_mentioned_p (op: PATTERN (insn)) |
23852 | && !SIBLING_CALL_P (insn)) |
23853 | return 5; |
23854 | len = get_attr_length (insn); |
23855 | if (len <= 1) |
23856 | return 1; |
23857 | |
23858 | /* For normal instructions we rely on get_attr_length being exact, |
23859 | with a few exceptions. */ |
23860 | if (!JUMP_P (insn)) |
23861 | { |
23862 | enum attr_type type = get_attr_type (insn); |
23863 | |
23864 | switch (type) |
23865 | { |
23866 | case TYPE_MULTI: |
23867 | if (GET_CODE (PATTERN (insn)) == ASM_INPUT |
23868 | || asm_noperands (PATTERN (insn)) >= 0) |
23869 | return 0; |
23870 | break; |
23871 | case TYPE_OTHER: |
23872 | case TYPE_FCMP: |
23873 | break; |
23874 | default: |
23875 | /* Otherwise trust get_attr_length. */ |
23876 | return len; |
23877 | } |
23878 | |
23879 | l = get_attr_length_address (insn); |
23880 | if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn))) |
23881 | l = 4; |
23882 | } |
23883 | if (l) |
23884 | return 1+l; |
23885 | else |
23886 | return 2; |
23887 | } |
23888 | |
23889 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
23890 | |
23891 | /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte |
23892 | window. */ |
23893 | |
23894 | static void |
23895 | ix86_avoid_jump_mispredicts (void) |
23896 | { |
23897 | rtx_insn *insn, *start = get_insns (); |
23898 | int nbytes = 0, njumps = 0; |
23899 | bool isjump = false; |
23900 | |
23901 | /* Look for all minimal intervals of instructions containing 4 jumps. |
23902 | The intervals are bounded by START and INSN. NBYTES is the total |
23903 | size of instructions in the interval including INSN and not including |
23904 | START. When the NBYTES is smaller than 16 bytes, it is possible |
23905 | that the end of START and INSN ends up in the same 16byte page. |
23906 | |
23907 | The smallest offset in the page INSN can start is the case where START |
23908 | ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). |
23909 | We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). |
23910 | |
23911 | Don't consider asm goto as jump, while it can contain a jump, it doesn't |
23912 | have to, control transfer to label(s) can be performed through other |
23913 | means, and also we estimate minimum length of all asm stmts as 0. */ |
23914 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
23915 | { |
23916 | int min_size; |
23917 | |
23918 | if (LABEL_P (insn)) |
23919 | { |
23920 | align_flags alignment = label_to_alignment (insn); |
23921 | int align = alignment.levels[0].log; |
23922 | int max_skip = alignment.levels[0].maxskip; |
23923 | |
23924 | if (max_skip > 15) |
23925 | max_skip = 15; |
23926 | /* If align > 3, only up to 16 - max_skip - 1 bytes can be |
23927 | already in the current 16 byte page, because otherwise |
23928 | ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer |
23929 | bytes to reach 16 byte boundary. */ |
23930 | if (align <= 0 |
23931 | || (align <= 3 && max_skip != (1 << align) - 1)) |
23932 | max_skip = 0; |
23933 | if (dump_file) |
23934 | fprintf (stream: dump_file, format: "Label %i with max_skip %i\n", |
23935 | INSN_UID (insn), max_skip); |
23936 | if (max_skip) |
23937 | { |
23938 | while (nbytes + max_skip >= 16) |
23939 | { |
23940 | start = NEXT_INSN (insn: start); |
23941 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
23942 | || CALL_P (start)) |
23943 | njumps--, isjump = true; |
23944 | else |
23945 | isjump = false; |
23946 | nbytes -= ix86_min_insn_size (insn: start); |
23947 | } |
23948 | } |
23949 | continue; |
23950 | } |
23951 | |
23952 | min_size = ix86_min_insn_size (insn); |
23953 | nbytes += min_size; |
23954 | if (dump_file) |
23955 | fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n", |
23956 | INSN_UID (insn), min_size); |
23957 | if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) |
23958 | || CALL_P (insn)) |
23959 | njumps++; |
23960 | else |
23961 | continue; |
23962 | |
23963 | while (njumps > 3) |
23964 | { |
23965 | start = NEXT_INSN (insn: start); |
23966 | if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0) |
23967 | || CALL_P (start)) |
23968 | njumps--, isjump = true; |
23969 | else |
23970 | isjump = false; |
23971 | nbytes -= ix86_min_insn_size (insn: start); |
23972 | } |
23973 | gcc_assert (njumps >= 0); |
23974 | if (dump_file) |
23975 | fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n", |
23976 | INSN_UID (insn: start), INSN_UID (insn), nbytes); |
23977 | |
23978 | if (njumps == 3 && isjump && nbytes < 16) |
23979 | { |
23980 | int padsize = 15 - nbytes + ix86_min_insn_size (insn); |
23981 | |
23982 | if (dump_file) |
23983 | fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n", |
23984 | INSN_UID (insn), padsize); |
23985 | emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn); |
23986 | } |
23987 | } |
23988 | } |
23989 | #endif |
23990 | |
23991 | /* AMD Athlon works faster |
23992 | when RET is not destination of conditional jump or directly preceded |
23993 | by other jump instruction. We avoid the penalty by inserting NOP just |
23994 | before the RET instructions in such cases. */ |
23995 | static void |
23996 | ix86_pad_returns (void) |
23997 | { |
23998 | edge e; |
23999 | edge_iterator ei; |
24000 | |
24001 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
24002 | { |
24003 | basic_block bb = e->src; |
24004 | rtx_insn *ret = BB_END (bb); |
24005 | rtx_insn *prev; |
24006 | bool replace = false; |
24007 | |
24008 | if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) |
24009 | || optimize_bb_for_size_p (bb)) |
24010 | continue; |
24011 | for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev)) |
24012 | if (active_insn_p (prev) || LABEL_P (prev)) |
24013 | break; |
24014 | if (prev && LABEL_P (prev)) |
24015 | { |
24016 | edge e; |
24017 | edge_iterator ei; |
24018 | |
24019 | FOR_EACH_EDGE (e, ei, bb->preds) |
24020 | if (EDGE_FREQUENCY (e) && e->src->index >= 0 |
24021 | && !(e->flags & EDGE_FALLTHRU)) |
24022 | { |
24023 | replace = true; |
24024 | break; |
24025 | } |
24026 | } |
24027 | if (!replace) |
24028 | { |
24029 | prev = prev_active_insn (ret); |
24030 | if (prev |
24031 | && ((JUMP_P (prev) && any_condjump_p (prev)) |
24032 | || CALL_P (prev))) |
24033 | replace = true; |
24034 | /* Empty functions get branch mispredict even when |
24035 | the jump destination is not visible to us. */ |
24036 | if (!prev && !optimize_function_for_size_p (cfun)) |
24037 | replace = true; |
24038 | } |
24039 | if (replace) |
24040 | { |
24041 | emit_jump_insn_before (gen_simple_return_internal_long (), ret); |
24042 | delete_insn (ret); |
24043 | } |
24044 | } |
24045 | } |
24046 | |
24047 | /* Count the minimum number of instructions in BB. Return 4 if the |
24048 | number of instructions >= 4. */ |
24049 | |
24050 | static int |
24051 | ix86_count_insn_bb (basic_block bb) |
24052 | { |
24053 | rtx_insn *insn; |
24054 | int insn_count = 0; |
24055 | |
24056 | /* Count number of instructions in this block. Return 4 if the number |
24057 | of instructions >= 4. */ |
24058 | FOR_BB_INSNS (bb, insn) |
24059 | { |
24060 | /* Only happen in exit blocks. */ |
24061 | if (JUMP_P (insn) |
24062 | && ANY_RETURN_P (PATTERN (insn))) |
24063 | break; |
24064 | |
24065 | if (NONDEBUG_INSN_P (insn) |
24066 | && GET_CODE (PATTERN (insn)) != USE |
24067 | && GET_CODE (PATTERN (insn)) != CLOBBER) |
24068 | { |
24069 | insn_count++; |
24070 | if (insn_count >= 4) |
24071 | return insn_count; |
24072 | } |
24073 | } |
24074 | |
24075 | return insn_count; |
24076 | } |
24077 | |
24078 | |
24079 | /* Count the minimum number of instructions in code path in BB. |
24080 | Return 4 if the number of instructions >= 4. */ |
24081 | |
24082 | static int |
24083 | ix86_count_insn (basic_block bb) |
24084 | { |
24085 | edge e; |
24086 | edge_iterator ei; |
24087 | int min_prev_count; |
24088 | |
24089 | /* Only bother counting instructions along paths with no |
24090 | more than 2 basic blocks between entry and exit. Given |
24091 | that BB has an edge to exit, determine if a predecessor |
24092 | of BB has an edge from entry. If so, compute the number |
24093 | of instructions in the predecessor block. If there |
24094 | happen to be multiple such blocks, compute the minimum. */ |
24095 | min_prev_count = 4; |
24096 | FOR_EACH_EDGE (e, ei, bb->preds) |
24097 | { |
24098 | edge prev_e; |
24099 | edge_iterator prev_ei; |
24100 | |
24101 | if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
24102 | { |
24103 | min_prev_count = 0; |
24104 | break; |
24105 | } |
24106 | FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) |
24107 | { |
24108 | if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
24109 | { |
24110 | int count = ix86_count_insn_bb (bb: e->src); |
24111 | if (count < min_prev_count) |
24112 | min_prev_count = count; |
24113 | break; |
24114 | } |
24115 | } |
24116 | } |
24117 | |
24118 | if (min_prev_count < 4) |
24119 | min_prev_count += ix86_count_insn_bb (bb); |
24120 | |
24121 | return min_prev_count; |
24122 | } |
24123 | |
24124 | /* Pad short function to 4 instructions. */ |
24125 | |
24126 | static void |
24127 | ix86_pad_short_function (void) |
24128 | { |
24129 | edge e; |
24130 | edge_iterator ei; |
24131 | |
24132 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
24133 | { |
24134 | rtx_insn *ret = BB_END (e->src); |
24135 | if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) |
24136 | { |
24137 | int insn_count = ix86_count_insn (bb: e->src); |
24138 | |
24139 | /* Pad short function. */ |
24140 | if (insn_count < 4) |
24141 | { |
24142 | rtx_insn *insn = ret; |
24143 | |
24144 | /* Find epilogue. */ |
24145 | while (insn |
24146 | && (!NOTE_P (insn) |
24147 | || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) |
24148 | insn = PREV_INSN (insn); |
24149 | |
24150 | if (!insn) |
24151 | insn = ret; |
24152 | |
24153 | /* Two NOPs count as one instruction. */ |
24154 | insn_count = 2 * (4 - insn_count); |
24155 | emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); |
24156 | } |
24157 | } |
24158 | } |
24159 | } |
24160 | |
24161 | /* Fix up a Windows system unwinder issue. If an EH region falls through into |
24162 | the epilogue, the Windows system unwinder will apply epilogue logic and |
24163 | produce incorrect offsets. This can be avoided by adding a nop between |
24164 | the last insn that can throw and the first insn of the epilogue. */ |
24165 | |
24166 | static void |
24167 | ix86_seh_fixup_eh_fallthru (void) |
24168 | { |
24169 | edge e; |
24170 | edge_iterator ei; |
24171 | |
24172 | FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
24173 | { |
24174 | rtx_insn *insn, *next; |
24175 | |
24176 | /* Find the beginning of the epilogue. */ |
24177 | for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) |
24178 | if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) |
24179 | break; |
24180 | if (insn == NULL) |
24181 | continue; |
24182 | |
24183 | /* We only care about preceding insns that can throw. */ |
24184 | insn = prev_active_insn (insn); |
24185 | if (insn == NULL || !can_throw_internal (insn)) |
24186 | continue; |
24187 | |
24188 | /* Do not separate calls from their debug information. */ |
24189 | for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next)) |
24190 | if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) |
24191 | insn = next; |
24192 | else |
24193 | break; |
24194 | |
24195 | emit_insn_after (gen_nops (const1_rtx), insn); |
24196 | } |
24197 | } |
24198 | /* Split vector load from parm_decl to elemental loads to avoid STLF |
24199 | stalls. */ |
24200 | static void |
24201 | ix86_split_stlf_stall_load () |
24202 | { |
24203 | rtx_insn* insn, *start = get_insns (); |
24204 | unsigned window = 0; |
24205 | |
24206 | for (insn = start; insn; insn = NEXT_INSN (insn)) |
24207 | { |
24208 | if (!NONDEBUG_INSN_P (insn)) |
24209 | continue; |
24210 | window++; |
24211 | /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each |
24212 | other, just emulate for pipeline) before stalled load, stlf stall |
24213 | case is as fast as no stall cases on CLX. |
24214 | Since CFG is freed before machine_reorg, just do a rough |
24215 | calculation of the window according to the layout. */ |
24216 | if (window > (unsigned) x86_stlf_window_ninsns) |
24217 | return; |
24218 | |
24219 | if (any_uncondjump_p (insn) |
24220 | || ANY_RETURN_P (PATTERN (insn)) |
24221 | || CALL_P (insn)) |
24222 | return; |
24223 | |
24224 | rtx set = single_set (insn); |
24225 | if (!set) |
24226 | continue; |
24227 | rtx src = SET_SRC (set); |
24228 | if (!MEM_P (src) |
24229 | /* Only handle V2DFmode load since it doesn't need any scratch |
24230 | register. */ |
24231 | || GET_MODE (src) != E_V2DFmode |
24232 | || !MEM_EXPR (src) |
24233 | || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL) |
24234 | continue; |
24235 | |
24236 | rtx zero = CONST0_RTX (V2DFmode); |
24237 | rtx dest = SET_DEST (set); |
24238 | rtx m = adjust_address (src, DFmode, 0); |
24239 | rtx loadlpd = gen_sse2_loadlpd (dest, zero, m); |
24240 | emit_insn_before (loadlpd, insn); |
24241 | m = adjust_address (src, DFmode, 8); |
24242 | rtx loadhpd = gen_sse2_loadhpd (dest, dest, m); |
24243 | if (dump_file && (dump_flags & TDF_DETAILS)) |
24244 | { |
24245 | fputs (s: "Due to potential STLF stall, split instruction:\n", |
24246 | stream: dump_file); |
24247 | print_rtl_single (dump_file, insn); |
24248 | fputs (s: "To:\n", stream: dump_file); |
24249 | print_rtl_single (dump_file, loadlpd); |
24250 | print_rtl_single (dump_file, loadhpd); |
24251 | } |
24252 | PATTERN (insn) = loadhpd; |
24253 | INSN_CODE (insn) = -1; |
24254 | gcc_assert (recog_memoized (insn) != -1); |
24255 | } |
24256 | } |
24257 | |
24258 | /* Implement machine specific optimizations. We implement padding of returns |
24259 | for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ |
24260 | static void |
24261 | ix86_reorg (void) |
24262 | { |
24263 | /* We are freeing block_for_insn in the toplev to keep compatibility |
24264 | with old MDEP_REORGS that are not CFG based. Recompute it now. */ |
24265 | compute_bb_for_insn (); |
24266 | |
24267 | if (TARGET_SEH && current_function_has_exception_handlers ()) |
24268 | ix86_seh_fixup_eh_fallthru (); |
24269 | |
24270 | if (optimize && optimize_function_for_speed_p (cfun)) |
24271 | { |
24272 | if (TARGET_SSE2) |
24273 | ix86_split_stlf_stall_load (); |
24274 | if (TARGET_PAD_SHORT_FUNCTION) |
24275 | ix86_pad_short_function (); |
24276 | else if (TARGET_PAD_RETURNS) |
24277 | ix86_pad_returns (); |
24278 | #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN |
24279 | if (TARGET_FOUR_JUMP_LIMIT) |
24280 | ix86_avoid_jump_mispredicts (); |
24281 | #endif |
24282 | } |
24283 | } |
24284 | |
24285 | /* Return nonzero when QImode register that must be represented via REX prefix |
24286 | is used. */ |
24287 | bool |
24288 | x86_extended_QIreg_mentioned_p (rtx_insn *insn) |
24289 | { |
24290 | int i; |
24291 | extract_insn_cached (insn); |
24292 | for (i = 0; i < recog_data.n_operands; i++) |
24293 | if (GENERAL_REG_P (recog_data.operand[i]) |
24294 | && !QI_REGNO_P (REGNO (recog_data.operand[i]))) |
24295 | return true; |
24296 | return false; |
24297 | } |
24298 | |
24299 | /* Return true when INSN mentions register that must be encoded using REX |
24300 | prefix. */ |
24301 | bool |
24302 | x86_extended_reg_mentioned_p (rtx insn) |
24303 | { |
24304 | subrtx_iterator::array_type array; |
24305 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
24306 | { |
24307 | const_rtx x = *iter; |
24308 | if (REG_P (x) |
24309 | && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)) |
24310 | || REX2_INT_REGNO_P (REGNO (x)))) |
24311 | return true; |
24312 | } |
24313 | return false; |
24314 | } |
24315 | |
24316 | /* Return true when INSN mentions register that must be encoded using REX2 |
24317 | prefix. */ |
24318 | bool |
24319 | x86_extended_rex2reg_mentioned_p (rtx insn) |
24320 | { |
24321 | subrtx_iterator::array_type array; |
24322 | FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) |
24323 | { |
24324 | const_rtx x = *iter; |
24325 | if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x))) |
24326 | return true; |
24327 | } |
24328 | return false; |
24329 | } |
24330 | |
24331 | /* Return true when rtx operands mentions register that must be encoded using |
24332 | evex prefix. */ |
24333 | bool |
24334 | x86_evex_reg_mentioned_p (rtx operands[], int nops) |
24335 | { |
24336 | int i; |
24337 | for (i = 0; i < nops; i++) |
24338 | if (EXT_REX_SSE_REG_P (operands[i]) |
24339 | || x86_extended_rex2reg_mentioned_p (insn: operands[i])) |
24340 | return true; |
24341 | return false; |
24342 | } |
24343 | |
24344 | /* If profitable, negate (without causing overflow) integer constant |
24345 | of mode MODE at location LOC. Return true in this case. */ |
24346 | bool |
24347 | x86_maybe_negate_const_int (rtx *loc, machine_mode mode) |
24348 | { |
24349 | HOST_WIDE_INT val; |
24350 | |
24351 | if (!CONST_INT_P (*loc)) |
24352 | return false; |
24353 | |
24354 | switch (mode) |
24355 | { |
24356 | case E_DImode: |
24357 | /* DImode x86_64 constants must fit in 32 bits. */ |
24358 | gcc_assert (x86_64_immediate_operand (*loc, mode)); |
24359 | |
24360 | mode = SImode; |
24361 | break; |
24362 | |
24363 | case E_SImode: |
24364 | case E_HImode: |
24365 | case E_QImode: |
24366 | break; |
24367 | |
24368 | default: |
24369 | gcc_unreachable (); |
24370 | } |
24371 | |
24372 | /* Avoid overflows. */ |
24373 | if (mode_signbit_p (mode, *loc)) |
24374 | return false; |
24375 | |
24376 | val = INTVAL (*loc); |
24377 | |
24378 | /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. |
24379 | Exceptions: -128 encodes smaller than 128, so swap sign and op. */ |
24380 | if ((val < 0 && val != -128) |
24381 | || val == 128) |
24382 | { |
24383 | *loc = GEN_INT (-val); |
24384 | return true; |
24385 | } |
24386 | |
24387 | return false; |
24388 | } |
24389 | |
24390 | /* Generate an unsigned DImode/SImode to FP conversion. This is the same code |
24391 | optabs would emit if we didn't have TFmode patterns. */ |
24392 | |
24393 | void |
24394 | x86_emit_floatuns (rtx operands[2]) |
24395 | { |
24396 | rtx_code_label *neglab, *donelab; |
24397 | rtx i0, i1, f0, in, out; |
24398 | machine_mode mode, inmode; |
24399 | |
24400 | inmode = GET_MODE (operands[1]); |
24401 | gcc_assert (inmode == SImode || inmode == DImode); |
24402 | |
24403 | out = operands[0]; |
24404 | in = force_reg (inmode, operands[1]); |
24405 | mode = GET_MODE (out); |
24406 | neglab = gen_label_rtx (); |
24407 | donelab = gen_label_rtx (); |
24408 | f0 = gen_reg_rtx (mode); |
24409 | |
24410 | emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); |
24411 | |
24412 | expand_float (out, in, 0); |
24413 | |
24414 | emit_jump_insn (gen_jump (donelab)); |
24415 | emit_barrier (); |
24416 | |
24417 | emit_label (neglab); |
24418 | |
24419 | i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, |
24420 | 1, OPTAB_DIRECT); |
24421 | i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, |
24422 | 1, OPTAB_DIRECT); |
24423 | i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); |
24424 | |
24425 | expand_float (f0, i0, 0); |
24426 | |
24427 | emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); |
24428 | |
24429 | emit_label (donelab); |
24430 | } |
24431 | |
24432 | /* Return the diagnostic message string if conversion from FROMTYPE to |
24433 | TOTYPE is not allowed, NULL otherwise. */ |
24434 | |
24435 | static const char * |
24436 | ix86_invalid_conversion (const_tree fromtype, const_tree totype) |
24437 | { |
24438 | machine_mode from_mode = element_mode (fromtype); |
24439 | machine_mode to_mode = element_mode (totype); |
24440 | |
24441 | if (!TARGET_SSE2 && from_mode != to_mode) |
24442 | { |
24443 | /* Do no allow conversions to/from BFmode/HFmode scalar types |
24444 | when TARGET_SSE2 is not available. */ |
24445 | if (from_mode == BFmode) |
24446 | return N_("invalid conversion from type %<__bf16%> " |
24447 | "without option %<-msse2%>"); |
24448 | if (from_mode == HFmode) |
24449 | return N_("invalid conversion from type %<_Float16%> " |
24450 | "without option %<-msse2%>"); |
24451 | if (to_mode == BFmode) |
24452 | return N_("invalid conversion to type %<__bf16%> " |
24453 | "without option %<-msse2%>"); |
24454 | if (to_mode == HFmode) |
24455 | return N_("invalid conversion to type %<_Float16%> " |
24456 | "without option %<-msse2%>"); |
24457 | } |
24458 | |
24459 | /* Warn for silent implicit conversion between __bf16 and short, |
24460 | since __bfloat16 is refined as real __bf16 instead of short |
24461 | since GCC13. */ |
24462 | if (element_mode (fromtype) != element_mode (totype) |
24463 | && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT)) |
24464 | { |
24465 | /* Warn for silent implicit conversion where user may expect |
24466 | a bitcast. */ |
24467 | if ((TYPE_MODE (fromtype) == BFmode |
24468 | && TYPE_MODE (totype) == HImode) |
24469 | || (TYPE_MODE (totype) == BFmode |
24470 | && TYPE_MODE (fromtype) == HImode)) |
24471 | warning (0, "%<__bfloat16%> is redefined from typedef %<short%> " |
24472 | "to real %<__bf16%> since GCC 13.1, be careful of " |
24473 | "implicit conversion between %<__bf16%> and %<short%>; " |
24474 | "an explicit bitcast may be needed here"); |
24475 | } |
24476 | |
24477 | /* Conversion allowed. */ |
24478 | return NULL; |
24479 | } |
24480 | |
24481 | /* Return the diagnostic message string if the unary operation OP is |
24482 | not permitted on TYPE, NULL otherwise. */ |
24483 | |
24484 | static const char * |
24485 | ix86_invalid_unary_op (int op, const_tree type) |
24486 | { |
24487 | machine_mode mmode = element_mode (type); |
24488 | /* Reject all single-operand operations on BFmode/HFmode except for & |
24489 | when TARGET_SSE2 is not available. */ |
24490 | if (!TARGET_SSE2 && op != ADDR_EXPR) |
24491 | { |
24492 | if (mmode == BFmode) |
24493 | return N_("operation not permitted on type %<__bf16%> " |
24494 | "without option %<-msse2%>"); |
24495 | if (mmode == HFmode) |
24496 | return N_("operation not permitted on type %<_Float16%> " |
24497 | "without option %<-msse2%>"); |
24498 | } |
24499 | |
24500 | /* Operation allowed. */ |
24501 | return NULL; |
24502 | } |
24503 | |
24504 | /* Return the diagnostic message string if the binary operation OP is |
24505 | not permitted on TYPE1 and TYPE2, NULL otherwise. */ |
24506 | |
24507 | static const char * |
24508 | ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, |
24509 | const_tree type2) |
24510 | { |
24511 | machine_mode type1_mode = element_mode (type1); |
24512 | machine_mode type2_mode = element_mode (type2); |
24513 | /* Reject all 2-operand operations on BFmode or HFmode |
24514 | when TARGET_SSE2 is not available. */ |
24515 | if (!TARGET_SSE2) |
24516 | { |
24517 | if (type1_mode == BFmode || type2_mode == BFmode) |
24518 | return N_("operation not permitted on type %<__bf16%> " |
24519 | "without option %<-msse2%>"); |
24520 | |
24521 | if (type1_mode == HFmode || type2_mode == HFmode) |
24522 | return N_("operation not permitted on type %<_Float16%> " |
24523 | "without option %<-msse2%>"); |
24524 | } |
24525 | |
24526 | /* Operation allowed. */ |
24527 | return NULL; |
24528 | } |
24529 | |
24530 | |
24531 | /* Target hook for scalar_mode_supported_p. */ |
24532 | static bool |
24533 | ix86_scalar_mode_supported_p (scalar_mode mode) |
24534 | { |
24535 | if (DECIMAL_FLOAT_MODE_P (mode)) |
24536 | return default_decimal_float_supported_p (); |
24537 | else if (mode == TFmode) |
24538 | return true; |
24539 | else if (mode == HFmode || mode == BFmode) |
24540 | return true; |
24541 | else |
24542 | return default_scalar_mode_supported_p (mode); |
24543 | } |
24544 | |
24545 | /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE |
24546 | if MODE is HFmode, and punt to the generic implementation otherwise. */ |
24547 | |
24548 | static bool |
24549 | ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode) |
24550 | { |
24551 | /* NB: Always return TRUE for HFmode so that the _Float16 type will |
24552 | be defined by the C front-end for AVX512FP16 intrinsics. We will |
24553 | issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't |
24554 | enabled. */ |
24555 | return ((mode == HFmode || mode == BFmode) |
24556 | ? true |
24557 | : default_libgcc_floating_mode_supported_p (mode)); |
24558 | } |
24559 | |
24560 | /* Implements target hook vector_mode_supported_p. */ |
24561 | static bool |
24562 | ix86_vector_mode_supported_p (machine_mode mode) |
24563 | { |
24564 | /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be |
24565 | either. */ |
24566 | if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode) |
24567 | return false; |
24568 | if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
24569 | return true; |
24570 | if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
24571 | return true; |
24572 | if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) |
24573 | return true; |
24574 | if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) |
24575 | return true; |
24576 | if ((TARGET_MMX || TARGET_MMX_WITH_SSE) |
24577 | && VALID_MMX_REG_MODE (mode)) |
24578 | return true; |
24579 | if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE) |
24580 | && VALID_MMX_REG_MODE_3DNOW (mode)) |
24581 | return true; |
24582 | if (mode == V2QImode) |
24583 | return true; |
24584 | return false; |
24585 | } |
24586 | |
24587 | /* Target hook for c_mode_for_suffix. */ |
24588 | static machine_mode |
24589 | ix86_c_mode_for_suffix (char suffix) |
24590 | { |
24591 | if (suffix == 'q') |
24592 | return TFmode; |
24593 | if (suffix == 'w') |
24594 | return XFmode; |
24595 | |
24596 | return VOIDmode; |
24597 | } |
24598 | |
24599 | /* Helper function to map common constraints to non-EGPR ones. |
24600 | All related constraints have h prefix, and h plus Upper letter |
24601 | means the constraint is strictly EGPR enabled, while h plus |
24602 | lower letter indicates the constraint is strictly gpr16 only. |
24603 | |
24604 | Specially for "g" constraint, split it to rmi as there is |
24605 | no corresponding general constraint define for backend. |
24606 | |
24607 | Here is the full list to map constraints that may involve |
24608 | gpr to h prefixed. |
24609 | |
24610 | "g" -> "jrjmi" |
24611 | "r" -> "jr" |
24612 | "m" -> "jm" |
24613 | "<" -> "j<" |
24614 | ">" -> "j>" |
24615 | "o" -> "jo" |
24616 | "V" -> "jV" |
24617 | "p" -> "jp" |
24618 | "Bm" -> "ja" |
24619 | */ |
24620 | |
24621 | static void map_egpr_constraints (vec<const char *> &constraints) |
24622 | { |
24623 | for (size_t i = 0; i < constraints.length(); i++) |
24624 | { |
24625 | const char *cur = constraints[i]; |
24626 | |
24627 | if (startswith (str: cur, prefix: "=@cc")) |
24628 | continue; |
24629 | |
24630 | int len = strlen (s: cur); |
24631 | auto_vec<char> buf; |
24632 | |
24633 | for (int j = 0; j < len; j++) |
24634 | { |
24635 | switch (cur[j]) |
24636 | { |
24637 | case 'g': |
24638 | buf.safe_push (obj: 'j'); |
24639 | buf.safe_push (obj: 'r'); |
24640 | buf.safe_push (obj: 'j'); |
24641 | buf.safe_push (obj: 'm'); |
24642 | buf.safe_push (obj: 'i'); |
24643 | break; |
24644 | case 'r': |
24645 | case 'm': |
24646 | case '<': |
24647 | case '>': |
24648 | case 'o': |
24649 | case 'V': |
24650 | case 'p': |
24651 | buf.safe_push (obj: 'j'); |
24652 | buf.safe_push (obj: cur[j]); |
24653 | break; |
24654 | case 'B': |
24655 | if (cur[j + 1] == 'm') |
24656 | { |
24657 | buf.safe_push (obj: 'j'); |
24658 | buf.safe_push (obj: 'a'); |
24659 | j++; |
24660 | } |
24661 | else |
24662 | { |
24663 | buf.safe_push (obj: cur[j]); |
24664 | buf.safe_push (obj: cur[j + 1]); |
24665 | j++; |
24666 | } |
24667 | break; |
24668 | case 'T': |
24669 | case 'Y': |
24670 | case 'W': |
24671 | case 'j': |
24672 | buf.safe_push (obj: cur[j]); |
24673 | buf.safe_push (obj: cur[j + 1]); |
24674 | j++; |
24675 | break; |
24676 | default: |
24677 | buf.safe_push (obj: cur[j]); |
24678 | break; |
24679 | } |
24680 | } |
24681 | buf.safe_push (obj: '\0'); |
24682 | constraints[i] = xstrdup (buf.address ()); |
24683 | } |
24684 | } |
24685 | |
24686 | /* Worker function for TARGET_MD_ASM_ADJUST. |
24687 | |
24688 | We implement asm flag outputs, and maintain source compatibility |
24689 | with the old cc0-based compiler. */ |
24690 | |
24691 | static rtx_insn * |
24692 | ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, |
24693 | vec<machine_mode> & /*input_modes*/, |
24694 | vec<const char *> &constraints, vec<rtx> &/*uses*/, |
24695 | vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs, |
24696 | location_t loc) |
24697 | { |
24698 | bool saw_asm_flag = false; |
24699 | |
24700 | start_sequence (); |
24701 | |
24702 | if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32) |
24703 | map_egpr_constraints (constraints); |
24704 | |
24705 | for (unsigned i = 0, n = outputs.length (); i < n; ++i) |
24706 | { |
24707 | const char *con = constraints[i]; |
24708 | if (!startswith (str: con, prefix: "=@cc")) |
24709 | continue; |
24710 | con += 4; |
24711 | if (strchr (s: con, c: ',') != NULL) |
24712 | { |
24713 | error_at (loc, "alternatives not allowed in %<asm%> flag output"); |
24714 | continue; |
24715 | } |
24716 | |
24717 | bool invert = false; |
24718 | if (con[0] == 'n') |
24719 | invert = true, con++; |
24720 | |
24721 | machine_mode mode = CCmode; |
24722 | rtx_code code = UNKNOWN; |
24723 | |
24724 | switch (con[0]) |
24725 | { |
24726 | case 'a': |
24727 | if (con[1] == 0) |
24728 | mode = CCAmode, code = EQ; |
24729 | else if (con[1] == 'e' && con[2] == 0) |
24730 | mode = CCCmode, code = NE; |
24731 | break; |
24732 | case 'b': |
24733 | if (con[1] == 0) |
24734 | mode = CCCmode, code = EQ; |
24735 | else if (con[1] == 'e' && con[2] == 0) |
24736 | mode = CCAmode, code = NE; |
24737 | break; |
24738 | case 'c': |
24739 | if (con[1] == 0) |
24740 | mode = CCCmode, code = EQ; |
24741 | break; |
24742 | case 'e': |
24743 | if (con[1] == 0) |
24744 | mode = CCZmode, code = EQ; |
24745 | break; |
24746 | case 'g': |
24747 | if (con[1] == 0) |
24748 | mode = CCGCmode, code = GT; |
24749 | else if (con[1] == 'e' && con[2] == 0) |
24750 | mode = CCGCmode, code = GE; |
24751 | break; |
24752 | case 'l': |
24753 | if (con[1] == 0) |
24754 | mode = CCGCmode, code = LT; |
24755 | else if (con[1] == 'e' && con[2] == 0) |
24756 | mode = CCGCmode, code = LE; |
24757 | break; |
24758 | case 'o': |
24759 | if (con[1] == 0) |
24760 | mode = CCOmode, code = EQ; |
24761 | break; |
24762 | case 'p': |
24763 | if (con[1] == 0) |
24764 | mode = CCPmode, code = EQ; |
24765 | break; |
24766 | case 's': |
24767 | if (con[1] == 0) |
24768 | mode = CCSmode, code = EQ; |
24769 | break; |
24770 | case 'z': |
24771 | if (con[1] == 0) |
24772 | mode = CCZmode, code = EQ; |
24773 | break; |
24774 | } |
24775 | if (code == UNKNOWN) |
24776 | { |
24777 | error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]); |
24778 | continue; |
24779 | } |
24780 | if (invert) |
24781 | code = reverse_condition (code); |
24782 | |
24783 | rtx dest = outputs[i]; |
24784 | if (!saw_asm_flag) |
24785 | { |
24786 | /* This is the first asm flag output. Here we put the flags |
24787 | register in as the real output and adjust the condition to |
24788 | allow it. */ |
24789 | constraints[i] = "=Bf"; |
24790 | outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); |
24791 | saw_asm_flag = true; |
24792 | } |
24793 | else |
24794 | { |
24795 | /* We don't need the flags register as output twice. */ |
24796 | constraints[i] = "=X"; |
24797 | outputs[i] = gen_rtx_SCRATCH (SImode); |
24798 | } |
24799 | |
24800 | rtx x = gen_rtx_REG (mode, FLAGS_REG); |
24801 | x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); |
24802 | |
24803 | machine_mode dest_mode = GET_MODE (dest); |
24804 | if (!SCALAR_INT_MODE_P (dest_mode)) |
24805 | { |
24806 | error_at (loc, "invalid type for %<asm%> flag output"); |
24807 | continue; |
24808 | } |
24809 | |
24810 | if (dest_mode == QImode) |
24811 | emit_insn (gen_rtx_SET (dest, x)); |
24812 | else |
24813 | { |
24814 | rtx reg = gen_reg_rtx (QImode); |
24815 | emit_insn (gen_rtx_SET (reg, x)); |
24816 | |
24817 | reg = convert_to_mode (dest_mode, reg, 1); |
24818 | emit_move_insn (dest, reg); |
24819 | } |
24820 | } |
24821 | |
24822 | rtx_insn *seq = end_sequence (); |
24823 | |
24824 | if (saw_asm_flag) |
24825 | return seq; |
24826 | else |
24827 | { |
24828 | /* If we had no asm flag outputs, clobber the flags. */ |
24829 | clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG)); |
24830 | SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG); |
24831 | return NULL; |
24832 | } |
24833 | } |
24834 | |
24835 | /* Implements target vector targetm.asm.encode_section_info. */ |
24836 | |
24837 | static void ATTRIBUTE_UNUSED |
24838 | ix86_encode_section_info (tree decl, rtx rtl, int first) |
24839 | { |
24840 | default_encode_section_info (decl, rtl, first); |
24841 | |
24842 | if (ix86_in_large_data_p (exp: decl)) |
24843 | SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; |
24844 | } |
24845 | |
24846 | /* Worker function for REVERSE_CONDITION. */ |
24847 | |
24848 | enum rtx_code |
24849 | ix86_reverse_condition (enum rtx_code code, machine_mode mode) |
24850 | { |
24851 | return (mode == CCFPmode |
24852 | ? reverse_condition_maybe_unordered (code) |
24853 | : reverse_condition (code)); |
24854 | } |
24855 | |
24856 | /* Output code to perform an x87 FP register move, from OPERANDS[1] |
24857 | to OPERANDS[0]. */ |
24858 | |
24859 | const char * |
24860 | output_387_reg_move (rtx_insn *insn, rtx *operands) |
24861 | { |
24862 | if (REG_P (operands[0])) |
24863 | { |
24864 | if (REG_P (operands[1]) |
24865 | && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
24866 | { |
24867 | if (REGNO (operands[0]) == FIRST_STACK_REG) |
24868 | return output_387_ffreep (operands, opno: 0); |
24869 | return "fstp\t%y0"; |
24870 | } |
24871 | if (STACK_TOP_P (operands[0])) |
24872 | return "fld%Z1\t%y1"; |
24873 | return "fst\t%y0"; |
24874 | } |
24875 | else if (MEM_P (operands[0])) |
24876 | { |
24877 | gcc_assert (REG_P (operands[1])); |
24878 | if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
24879 | return "fstp%Z0\t%y0"; |
24880 | else |
24881 | { |
24882 | /* There is no non-popping store to memory for XFmode. |
24883 | So if we need one, follow the store with a load. */ |
24884 | if (GET_MODE (operands[0]) == XFmode) |
24885 | return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; |
24886 | else |
24887 | return "fst%Z0\t%y0"; |
24888 | } |
24889 | } |
24890 | else |
24891 | gcc_unreachable(); |
24892 | } |
24893 | #ifdef TARGET_SOLARIS |
24894 | /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ |
24895 | |
24896 | static void |
24897 | i386_solaris_elf_named_section (const char *name, unsigned int flags, |
24898 | tree decl) |
24899 | { |
24900 | /* With Binutils 2.15, the "@unwind" marker must be specified on |
24901 | every occurrence of the ".eh_frame" section, not just the first |
24902 | one. */ |
24903 | if (TARGET_64BIT |
24904 | && strcmp (name, ".eh_frame") == 0) |
24905 | { |
24906 | fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, |
24907 | flags & SECTION_WRITE ? "aw": "a"); |
24908 | return; |
24909 | } |
24910 | |
24911 | #ifndef USE_GAS |
24912 | if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) |
24913 | { |
24914 | solaris_elf_asm_comdat_section (name, flags, decl); |
24915 | return; |
24916 | } |
24917 | |
24918 | /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the |
24919 | SPARC assembler. One cannot mix single-letter flags and #exclude, so |
24920 | only emit the latter here. */ |
24921 | if (flags & SECTION_EXCLUDE) |
24922 | { |
24923 | fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name); |
24924 | return; |
24925 | } |
24926 | #endif |
24927 | |
24928 | default_elf_asm_named_section (name, flags, decl); |
24929 | } |
24930 | #endif /* TARGET_SOLARIS */ |
24931 | |
24932 | /* Return the mangling of TYPE if it is an extended fundamental type. */ |
24933 | |
24934 | static const char * |
24935 | ix86_mangle_type (const_tree type) |
24936 | { |
24937 | type = TYPE_MAIN_VARIANT (type); |
24938 | |
24939 | if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE |
24940 | && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) |
24941 | return NULL; |
24942 | |
24943 | if (type == float128_type_node || type == float64x_type_node) |
24944 | return NULL; |
24945 | |
24946 | switch (TYPE_MODE (type)) |
24947 | { |
24948 | case E_BFmode: |
24949 | return "DF16b"; |
24950 | case E_HFmode: |
24951 | /* _Float16 is "DF16_". |
24952 | Align with clang's decision in https://reviews.llvm.org/D33719. */ |
24953 | return "DF16_"; |
24954 | case E_TFmode: |
24955 | /* __float128 is "g". */ |
24956 | return "g"; |
24957 | case E_XFmode: |
24958 | /* "long double" or __float80 is "e". */ |
24959 | return "e"; |
24960 | default: |
24961 | return NULL; |
24962 | } |
24963 | } |
24964 | |
24965 | /* Create C++ tinfo symbols for only conditionally available fundamental |
24966 | types. */ |
24967 | |
24968 | static void |
24969 | ix86_emit_support_tinfos (emit_support_tinfos_callback callback) |
24970 | { |
24971 | extern tree ix86_float16_type_node; |
24972 | extern tree ix86_bf16_type_node; |
24973 | |
24974 | if (!TARGET_SSE2) |
24975 | { |
24976 | if (!float16_type_node) |
24977 | float16_type_node = ix86_float16_type_node; |
24978 | if (!bfloat16_type_node) |
24979 | bfloat16_type_node = ix86_bf16_type_node; |
24980 | callback (float16_type_node); |
24981 | callback (bfloat16_type_node); |
24982 | float16_type_node = NULL_TREE; |
24983 | bfloat16_type_node = NULL_TREE; |
24984 | } |
24985 | } |
24986 | |
24987 | static GTY(()) tree ix86_tls_stack_chk_guard_decl; |
24988 | |
24989 | static tree |
24990 | ix86_stack_protect_guard (void) |
24991 | { |
24992 | if (TARGET_SSP_TLS_GUARD) |
24993 | { |
24994 | tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); |
24995 | int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); |
24996 | tree type = build_qualified_type (type_node, qual); |
24997 | tree t; |
24998 | |
24999 | if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str)) |
25000 | { |
25001 | t = ix86_tls_stack_chk_guard_decl; |
25002 | |
25003 | if (t == NULL) |
25004 | { |
25005 | rtx x; |
25006 | |
25007 | t = build_decl |
25008 | (UNKNOWN_LOCATION, VAR_DECL, |
25009 | get_identifier (ix86_stack_protector_guard_symbol_str), |
25010 | type); |
25011 | TREE_STATIC (t) = 1; |
25012 | TREE_PUBLIC (t) = 1; |
25013 | DECL_EXTERNAL (t) = 1; |
25014 | TREE_USED (t) = 1; |
25015 | TREE_THIS_VOLATILE (t) = 1; |
25016 | DECL_ARTIFICIAL (t) = 1; |
25017 | DECL_IGNORED_P (t) = 1; |
25018 | |
25019 | /* Do not share RTL as the declaration is visible outside of |
25020 | current function. */ |
25021 | x = DECL_RTL (t); |
25022 | RTX_FLAG (x, used) = 1; |
25023 | |
25024 | ix86_tls_stack_chk_guard_decl = t; |
25025 | } |
25026 | } |
25027 | else |
25028 | { |
25029 | tree asptrtype = build_pointer_type (type); |
25030 | |
25031 | t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); |
25032 | t = build2 (MEM_REF, asptrtype, t, |
25033 | build_int_cst (asptrtype, 0)); |
25034 | TREE_THIS_VOLATILE (t) = 1; |
25035 | } |
25036 | |
25037 | return t; |
25038 | } |
25039 | |
25040 | return default_stack_protect_guard (); |
25041 | } |
25042 | |
25043 | static bool |
25044 | ix86_stack_protect_runtime_enabled_p (void) |
25045 | { |
25046 | /* Naked functions should not enable stack protector. */ |
25047 | return !ix86_function_naked (fn: current_function_decl); |
25048 | } |
25049 | |
25050 | /* For 32-bit code we can save PIC register setup by using |
25051 | __stack_chk_fail_local hidden function instead of calling |
25052 | __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC |
25053 | register, so it is better to call __stack_chk_fail directly. */ |
25054 | |
25055 | static tree ATTRIBUTE_UNUSED |
25056 | ix86_stack_protect_fail (void) |
25057 | { |
25058 | return TARGET_64BIT |
25059 | ? default_external_stack_protect_fail () |
25060 | : default_hidden_stack_protect_fail (); |
25061 | } |
25062 | |
25063 | /* Select a format to encode pointers in exception handling data. CODE |
25064 | is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is |
25065 | true if the symbol may be affected by dynamic relocations. |
25066 | |
25067 | ??? All x86 object file formats are capable of representing this. |
25068 | After all, the relocation needed is the same as for the call insn. |
25069 | Whether or not a particular assembler allows us to enter such, I |
25070 | guess we'll have to see. */ |
25071 | |
25072 | int |
25073 | asm_preferred_eh_data_format (int code, int global) |
25074 | { |
25075 | /* PE-COFF is effectively always -fPIC because of the .reloc section. */ |
25076 | if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access) |
25077 | { |
25078 | int type = DW_EH_PE_sdata8; |
25079 | if (ptr_mode == SImode |
25080 | || ix86_cmodel == CM_SMALL_PIC |
25081 | || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) |
25082 | type = DW_EH_PE_sdata4; |
25083 | return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; |
25084 | } |
25085 | |
25086 | if (ix86_cmodel == CM_SMALL |
25087 | || (ix86_cmodel == CM_MEDIUM && code)) |
25088 | return DW_EH_PE_udata4; |
25089 | |
25090 | return DW_EH_PE_absptr; |
25091 | } |
25092 | |
25093 | /* Implement targetm.vectorize.builtin_vectorization_cost. */ |
25094 | static int |
25095 | ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, |
25096 | tree vectype, int) |
25097 | { |
25098 | bool fp = false; |
25099 | machine_mode mode = TImode; |
25100 | int index; |
25101 | if (vectype != NULL) |
25102 | { |
25103 | fp = FLOAT_TYPE_P (vectype); |
25104 | mode = TYPE_MODE (vectype); |
25105 | } |
25106 | |
25107 | switch (type_of_cost) |
25108 | { |
25109 | case scalar_stmt: |
25110 | return fp ? ix86_cost->addss : COSTS_N_INSNS (1); |
25111 | |
25112 | case scalar_load: |
25113 | /* load/store costs are relative to register move which is 2. Recompute |
25114 | it to COSTS_N_INSNS so everything have same base. */ |
25115 | return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] |
25116 | : ix86_cost->int_load [2]) / 2; |
25117 | |
25118 | case scalar_store: |
25119 | return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] |
25120 | : ix86_cost->int_store [2]) / 2; |
25121 | |
25122 | case vector_stmt: |
25123 | return ix86_vec_cost (mode, |
25124 | cost: fp ? ix86_cost->addss : ix86_cost->sse_op); |
25125 | |
25126 | case vector_load: |
25127 | index = sse_store_index (mode); |
25128 | /* See PR82713 - we may end up being called on non-vector type. */ |
25129 | if (index < 0) |
25130 | index = 2; |
25131 | return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; |
25132 | |
25133 | case vector_store: |
25134 | index = sse_store_index (mode); |
25135 | /* See PR82713 - we may end up being called on non-vector type. */ |
25136 | if (index < 0) |
25137 | index = 2; |
25138 | return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; |
25139 | |
25140 | case vec_to_scalar: |
25141 | case scalar_to_vec: |
25142 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25143 | |
25144 | /* We should have separate costs for unaligned loads and gather/scatter. |
25145 | Do that incrementally. */ |
25146 | case unaligned_load: |
25147 | index = sse_store_index (mode); |
25148 | /* See PR82713 - we may end up being called on non-vector type. */ |
25149 | if (index < 0) |
25150 | index = 2; |
25151 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; |
25152 | |
25153 | case unaligned_store: |
25154 | index = sse_store_index (mode); |
25155 | /* See PR82713 - we may end up being called on non-vector type. */ |
25156 | if (index < 0) |
25157 | index = 2; |
25158 | return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; |
25159 | |
25160 | case vector_gather_load: |
25161 | return ix86_vec_cost (mode, |
25162 | COSTS_N_INSNS |
25163 | (ix86_cost->gather_static |
25164 | + ix86_cost->gather_per_elt |
25165 | * TYPE_VECTOR_SUBPARTS (vectype)) / 2); |
25166 | |
25167 | case vector_scatter_store: |
25168 | return ix86_vec_cost (mode, |
25169 | COSTS_N_INSNS |
25170 | (ix86_cost->scatter_static |
25171 | + ix86_cost->scatter_per_elt |
25172 | * TYPE_VECTOR_SUBPARTS (vectype)) / 2); |
25173 | |
25174 | case cond_branch_taken: |
25175 | return ix86_cost->cond_taken_branch_cost; |
25176 | |
25177 | case cond_branch_not_taken: |
25178 | return ix86_cost->cond_not_taken_branch_cost; |
25179 | |
25180 | case vec_perm: |
25181 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25182 | |
25183 | case vec_promote_demote: |
25184 | if (fp) |
25185 | return vec_fp_conversion_cost (cost: ix86_tune_cost, size: mode); |
25186 | return ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25187 | |
25188 | case vec_construct: |
25189 | { |
25190 | int n = TYPE_VECTOR_SUBPARTS (node: vectype); |
25191 | /* N - 1 element inserts into an SSE vector, the possible |
25192 | GPR -> XMM move is accounted for in add_stmt_cost. */ |
25193 | if (GET_MODE_BITSIZE (mode) <= 128) |
25194 | return (n - 1) * ix86_cost->sse_op; |
25195 | /* One vinserti128 for combining two SSE vectors for AVX256. */ |
25196 | else if (GET_MODE_BITSIZE (mode) == 256) |
25197 | return ((n - 2) * ix86_cost->sse_op |
25198 | + ix86_vec_cost (mode, cost: ix86_cost->sse_op)); |
25199 | /* One vinserti64x4 and two vinserti128 for combining SSE |
25200 | and AVX256 vectors to AVX512. */ |
25201 | else if (GET_MODE_BITSIZE (mode) == 512) |
25202 | { |
25203 | machine_mode half_mode |
25204 | = mode_for_vector (GET_MODE_INNER (mode), |
25205 | GET_MODE_NUNITS (mode) / 2).require (); |
25206 | return ((n - 4) * ix86_cost->sse_op |
25207 | + 2 * ix86_vec_cost (mode: half_mode, cost: ix86_cost->sse_op) |
25208 | + ix86_vec_cost (mode, cost: ix86_cost->sse_op)); |
25209 | } |
25210 | gcc_unreachable (); |
25211 | } |
25212 | |
25213 | default: |
25214 | gcc_unreachable (); |
25215 | } |
25216 | } |
25217 | |
25218 | |
25219 | /* This function returns the calling abi specific va_list type node. |
25220 | It returns the FNDECL specific va_list type. */ |
25221 | |
25222 | static tree |
25223 | ix86_fn_abi_va_list (tree fndecl) |
25224 | { |
25225 | if (!TARGET_64BIT) |
25226 | return va_list_type_node; |
25227 | gcc_assert (fndecl != NULL_TREE); |
25228 | |
25229 | if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI) |
25230 | return ms_va_list_type_node; |
25231 | else |
25232 | return sysv_va_list_type_node; |
25233 | } |
25234 | |
25235 | /* Returns the canonical va_list type specified by TYPE. If there |
25236 | is no valid TYPE provided, it return NULL_TREE. */ |
25237 | |
25238 | static tree |
25239 | ix86_canonical_va_list_type (tree type) |
25240 | { |
25241 | if (TARGET_64BIT) |
25242 | { |
25243 | if (lookup_attribute (attr_name: "ms_abi va_list", TYPE_ATTRIBUTES (type))) |
25244 | return ms_va_list_type_node; |
25245 | |
25246 | if ((TREE_CODE (type) == ARRAY_TYPE |
25247 | && integer_zerop (array_type_nelts_minus_one (type))) |
25248 | || POINTER_TYPE_P (type)) |
25249 | { |
25250 | tree elem_type = TREE_TYPE (type); |
25251 | if (TREE_CODE (elem_type) == RECORD_TYPE |
25252 | && lookup_attribute (attr_name: "sysv_abi va_list", |
25253 | TYPE_ATTRIBUTES (elem_type))) |
25254 | return sysv_va_list_type_node; |
25255 | } |
25256 | |
25257 | return NULL_TREE; |
25258 | } |
25259 | |
25260 | return std_canonical_va_list_type (type); |
25261 | } |
25262 | |
25263 | /* Iterate through the target-specific builtin types for va_list. |
25264 | IDX denotes the iterator, *PTREE is set to the result type of |
25265 | the va_list builtin, and *PNAME to its internal type. |
25266 | Returns zero if there is no element for this index, otherwise |
25267 | IDX should be increased upon the next call. |
25268 | Note, do not iterate a base builtin's name like __builtin_va_list. |
25269 | Used from c_common_nodes_and_builtins. */ |
25270 | |
25271 | static int |
25272 | ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
25273 | { |
25274 | if (TARGET_64BIT) |
25275 | { |
25276 | switch (idx) |
25277 | { |
25278 | default: |
25279 | break; |
25280 | |
25281 | case 0: |
25282 | *ptree = ms_va_list_type_node; |
25283 | *pname = "__builtin_ms_va_list"; |
25284 | return 1; |
25285 | |
25286 | case 1: |
25287 | *ptree = sysv_va_list_type_node; |
25288 | *pname = "__builtin_sysv_va_list"; |
25289 | return 1; |
25290 | } |
25291 | } |
25292 | |
25293 | return 0; |
25294 | } |
25295 | |
25296 | #undef TARGET_SCHED_DISPATCH |
25297 | #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch |
25298 | #undef TARGET_SCHED_DISPATCH_DO |
25299 | #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch |
25300 | #undef TARGET_SCHED_REASSOCIATION_WIDTH |
25301 | #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width |
25302 | #undef TARGET_SCHED_REORDER |
25303 | #define TARGET_SCHED_REORDER ix86_atom_sched_reorder |
25304 | #undef TARGET_SCHED_ADJUST_PRIORITY |
25305 | #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority |
25306 | #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK |
25307 | #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \ |
25308 | ix86_dependencies_evaluation_hook |
25309 | |
25310 | |
25311 | /* Implementation of reassociation_width target hook used by |
25312 | reassoc phase to identify parallelism level in reassociated |
25313 | tree. Statements tree_code is passed in OPC. Arguments type |
25314 | is passed in MODE. */ |
25315 | |
25316 | static int |
25317 | ix86_reassociation_width (unsigned int op, machine_mode mode) |
25318 | { |
25319 | int width = 1; |
25320 | /* Vector part. */ |
25321 | if (VECTOR_MODE_P (mode)) |
25322 | { |
25323 | int div = 1; |
25324 | if (INTEGRAL_MODE_P (mode)) |
25325 | width = ix86_cost->reassoc_vec_int; |
25326 | else if (FLOAT_MODE_P (mode)) |
25327 | width = ix86_cost->reassoc_vec_fp; |
25328 | |
25329 | if (width == 1) |
25330 | return 1; |
25331 | |
25332 | /* Znver1-4 Integer vector instructions execute in FP unit |
25333 | and can execute 3 additions and one multiplication per cycle. */ |
25334 | if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 |
25335 | || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4) |
25336 | && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) |
25337 | return 1; |
25338 | /* Znver5 can do 2 integer multiplications per cycle with latency |
25339 | of 3. */ |
25340 | if (ix86_tune == PROCESSOR_ZNVER5 |
25341 | && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) |
25342 | width = 6; |
25343 | |
25344 | /* Account for targets that splits wide vectors into multiple parts. */ |
25345 | if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256) |
25346 | div = GET_MODE_BITSIZE (mode) / 256; |
25347 | else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) |
25348 | div = GET_MODE_BITSIZE (mode) / 128; |
25349 | else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64) |
25350 | div = GET_MODE_BITSIZE (mode) / 64; |
25351 | width = (width + div - 1) / div; |
25352 | } |
25353 | /* Scalar part. */ |
25354 | else if (INTEGRAL_MODE_P (mode)) |
25355 | width = ix86_cost->reassoc_int; |
25356 | else if (FLOAT_MODE_P (mode)) |
25357 | width = ix86_cost->reassoc_fp; |
25358 | |
25359 | /* Avoid using too many registers in 32bit mode. */ |
25360 | if (!TARGET_64BIT && width > 2) |
25361 | width = 2; |
25362 | return width; |
25363 | } |
25364 | |
25365 | /* ??? No autovectorization into MMX or 3DNOW until we can reliably |
25366 | place emms and femms instructions. */ |
25367 | |
25368 | static machine_mode |
25369 | ix86_preferred_simd_mode (scalar_mode mode) |
25370 | { |
25371 | if (!TARGET_SSE) |
25372 | return word_mode; |
25373 | |
25374 | switch (mode) |
25375 | { |
25376 | case E_QImode: |
25377 | if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) |
25378 | return V64QImode; |
25379 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25380 | return V32QImode; |
25381 | else |
25382 | return V16QImode; |
25383 | |
25384 | case E_HImode: |
25385 | if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) |
25386 | return V32HImode; |
25387 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25388 | return V16HImode; |
25389 | else |
25390 | return V8HImode; |
25391 | |
25392 | case E_SImode: |
25393 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
25394 | return V16SImode; |
25395 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25396 | return V8SImode; |
25397 | else |
25398 | return V4SImode; |
25399 | |
25400 | case E_DImode: |
25401 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
25402 | return V8DImode; |
25403 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25404 | return V4DImode; |
25405 | else |
25406 | return V2DImode; |
25407 | |
25408 | case E_HFmode: |
25409 | if (TARGET_AVX512FP16) |
25410 | { |
25411 | if (TARGET_AVX512VL) |
25412 | { |
25413 | if (TARGET_PREFER_AVX128) |
25414 | return V8HFmode; |
25415 | else if (TARGET_PREFER_AVX256) |
25416 | return V16HFmode; |
25417 | } |
25418 | return V32HFmode; |
25419 | } |
25420 | return word_mode; |
25421 | |
25422 | case E_BFmode: |
25423 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
25424 | return V32BFmode; |
25425 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25426 | return V16BFmode; |
25427 | else |
25428 | return V8BFmode; |
25429 | |
25430 | case E_SFmode: |
25431 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
25432 | return V16SFmode; |
25433 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25434 | return V8SFmode; |
25435 | else |
25436 | return V4SFmode; |
25437 | |
25438 | case E_DFmode: |
25439 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
25440 | return V8DFmode; |
25441 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25442 | return V4DFmode; |
25443 | else if (TARGET_SSE2) |
25444 | return V2DFmode; |
25445 | /* FALLTHRU */ |
25446 | |
25447 | default: |
25448 | return word_mode; |
25449 | } |
25450 | } |
25451 | |
25452 | /* If AVX is enabled then try vectorizing with both 256bit and 128bit |
25453 | vectors. If AVX512F is enabled then try vectorizing with 512bit, |
25454 | 256bit and 128bit vectors. */ |
25455 | |
25456 | static unsigned int |
25457 | ix86_autovectorize_vector_modes (vector_modes *modes, bool all) |
25458 | { |
25459 | if (TARGET_AVX512F && !TARGET_PREFER_AVX256) |
25460 | { |
25461 | modes->safe_push (V64QImode); |
25462 | modes->safe_push (V32QImode); |
25463 | modes->safe_push (V16QImode); |
25464 | } |
25465 | else if (TARGET_AVX512F && all) |
25466 | { |
25467 | modes->safe_push (V32QImode); |
25468 | modes->safe_push (V16QImode); |
25469 | modes->safe_push (V64QImode); |
25470 | } |
25471 | else if (TARGET_AVX && !TARGET_PREFER_AVX128) |
25472 | { |
25473 | modes->safe_push (V32QImode); |
25474 | modes->safe_push (V16QImode); |
25475 | } |
25476 | else if (TARGET_AVX && all) |
25477 | { |
25478 | modes->safe_push (V16QImode); |
25479 | modes->safe_push (V32QImode); |
25480 | } |
25481 | else if (TARGET_SSE2) |
25482 | modes->safe_push (V16QImode); |
25483 | |
25484 | if (TARGET_MMX_WITH_SSE) |
25485 | modes->safe_push (V8QImode); |
25486 | |
25487 | if (TARGET_SSE2) |
25488 | modes->safe_push (V4QImode); |
25489 | |
25490 | return 0; |
25491 | } |
25492 | |
25493 | /* Implemenation of targetm.vectorize.get_mask_mode. */ |
25494 | |
25495 | static opt_machine_mode |
25496 | ix86_get_mask_mode (machine_mode data_mode) |
25497 | { |
25498 | unsigned vector_size = GET_MODE_SIZE (data_mode); |
25499 | unsigned nunits = GET_MODE_NUNITS (data_mode); |
25500 | unsigned elem_size = vector_size / nunits; |
25501 | |
25502 | /* Scalar mask case. */ |
25503 | if ((TARGET_AVX512F && vector_size == 64) |
25504 | || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)) |
25505 | /* AVX512FP16 only supports vector comparison |
25506 | to kmask for _Float16. */ |
25507 | || (TARGET_AVX512VL && TARGET_AVX512FP16 |
25508 | && GET_MODE_INNER (data_mode) == E_HFmode) |
25509 | || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode)) |
25510 | { |
25511 | if (elem_size == 4 |
25512 | || elem_size == 8 |
25513 | || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2))) |
25514 | return smallest_int_mode_for_size (size: nunits).require (); |
25515 | } |
25516 | |
25517 | scalar_int_mode elem_mode |
25518 | = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT).require (); |
25519 | |
25520 | gcc_assert (elem_size * nunits == vector_size); |
25521 | |
25522 | return mode_for_vector (elem_mode, nunits); |
25523 | } |
25524 | |
25525 | |
25526 | |
25527 | /* Return class of registers which could be used for pseudo of MODE |
25528 | and of class RCLASS for spilling instead of memory. Return NO_REGS |
25529 | if it is not possible or non-profitable. */ |
25530 | |
25531 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
25532 | |
25533 | static reg_class_t |
25534 | ix86_spill_class (reg_class_t rclass, machine_mode mode) |
25535 | { |
25536 | if (0 && TARGET_GENERAL_REGS_SSE_SPILL |
25537 | && TARGET_SSE2 |
25538 | && TARGET_INTER_UNIT_MOVES_TO_VEC |
25539 | && TARGET_INTER_UNIT_MOVES_FROM_VEC |
25540 | && (mode == SImode || (TARGET_64BIT && mode == DImode)) |
25541 | && INTEGER_CLASS_P (rclass)) |
25542 | return ALL_SSE_REGS; |
25543 | return NO_REGS; |
25544 | } |
25545 | |
25546 | /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation, |
25547 | but returns a lower bound. */ |
25548 | |
25549 | static unsigned int |
25550 | ix86_max_noce_ifcvt_seq_cost (edge e) |
25551 | { |
25552 | bool predictable_p = predictable_edge_p (e); |
25553 | if (predictable_p) |
25554 | { |
25555 | if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost)) |
25556 | return param_max_rtl_if_conversion_predictable_cost; |
25557 | } |
25558 | else |
25559 | { |
25560 | if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost)) |
25561 | return param_max_rtl_if_conversion_unpredictable_cost; |
25562 | } |
25563 | |
25564 | /* For modern machines with deeper pipeline, the penalty for branch |
25565 | misprediction could be higher than before to reset the pipeline |
25566 | slots. Add parameter br_mispredict_scale as a factor to describe |
25567 | the impact of reseting the pipeline. */ |
25568 | |
25569 | return BRANCH_COST (true, predictable_p) |
25570 | * ix86_tune_cost->br_mispredict_scale; |
25571 | } |
25572 | |
25573 | /* Return true if SEQ is a good candidate as a replacement for the |
25574 | if-convertible sequence described in IF_INFO. */ |
25575 | |
25576 | static bool |
25577 | ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) |
25578 | { |
25579 | if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p) |
25580 | { |
25581 | int cmov_cnt = 0; |
25582 | /* Punt if SEQ contains more than one CMOV or FCMOV instruction. |
25583 | Maybe we should allow even more conditional moves as long as they |
25584 | are used far enough not to stall the CPU, or also consider |
25585 | IF_INFO->TEST_BB succ edge probabilities. */ |
25586 | for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) |
25587 | { |
25588 | rtx set = single_set (insn); |
25589 | if (!set) |
25590 | continue; |
25591 | if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) |
25592 | continue; |
25593 | rtx src = SET_SRC (set); |
25594 | machine_mode mode = GET_MODE (src); |
25595 | if (GET_MODE_CLASS (mode) != MODE_INT |
25596 | && GET_MODE_CLASS (mode) != MODE_FLOAT) |
25597 | continue; |
25598 | if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) |
25599 | || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2)))) |
25600 | continue; |
25601 | /* insn is CMOV or FCMOV. */ |
25602 | if (++cmov_cnt > 1) |
25603 | return false; |
25604 | } |
25605 | } |
25606 | |
25607 | /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por) |
25608 | for movdfcc/movsfcc, and could possibly fail cost comparison. |
25609 | Increase branch cost will hurt performance for other modes, so |
25610 | specially add some preference for floating point ifcvt. */ |
25611 | if (!TARGET_SSE4_1 && if_info->x |
25612 | && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT |
25613 | && if_info->speed_p) |
25614 | { |
25615 | unsigned cost = seq_cost (seq, true); |
25616 | |
25617 | if (cost <= if_info->original_cost) |
25618 | return true; |
25619 | |
25620 | return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2)); |
25621 | } |
25622 | |
25623 | return default_noce_conversion_profitable_p (seq, if_info); |
25624 | } |
25625 | |
25626 | /* x86-specific vector costs. */ |
25627 | class ix86_vector_costs : public vector_costs |
25628 | { |
25629 | public: |
25630 | ix86_vector_costs (vec_info *, bool); |
25631 | |
25632 | unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, |
25633 | stmt_vec_info stmt_info, slp_tree node, |
25634 | tree vectype, int misalign, |
25635 | vect_cost_model_location where) override; |
25636 | void finish_cost (const vector_costs *) override; |
25637 | |
25638 | private: |
25639 | |
25640 | /* Estimate register pressure of the vectorized code. */ |
25641 | void ix86_vect_estimate_reg_pressure (); |
25642 | /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for |
25643 | estimation of register pressure. |
25644 | ??? Currently it's only used by vec_construct/scalar_to_vec |
25645 | where we know it's not loaded from memory. */ |
25646 | unsigned m_num_gpr_needed[3]; |
25647 | unsigned m_num_sse_needed[3]; |
25648 | /* Number of 256-bit vector permutation. */ |
25649 | unsigned m_num_avx256_vec_perm[3]; |
25650 | }; |
25651 | |
25652 | ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar) |
25653 | : vector_costs (vinfo, costing_for_scalar), |
25654 | m_num_gpr_needed (), |
25655 | m_num_sse_needed (), |
25656 | m_num_avx256_vec_perm () |
25657 | { |
25658 | } |
25659 | |
25660 | /* Implement targetm.vectorize.create_costs. */ |
25661 | |
25662 | static vector_costs * |
25663 | ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) |
25664 | { |
25665 | return new ix86_vector_costs (vinfo, costing_for_scalar); |
25666 | } |
25667 | |
25668 | unsigned |
25669 | ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, |
25670 | stmt_vec_info stmt_info, slp_tree node, |
25671 | tree vectype, int misalign, |
25672 | vect_cost_model_location where) |
25673 | { |
25674 | unsigned retval = 0; |
25675 | bool scalar_p |
25676 | = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store); |
25677 | int stmt_cost = - 1; |
25678 | |
25679 | bool fp = false; |
25680 | machine_mode mode = scalar_p ? SImode : TImode; |
25681 | |
25682 | if (vectype != NULL) |
25683 | { |
25684 | fp = FLOAT_TYPE_P (vectype); |
25685 | mode = TYPE_MODE (vectype); |
25686 | if (scalar_p) |
25687 | mode = TYPE_MODE (TREE_TYPE (vectype)); |
25688 | } |
25689 | |
25690 | if ((kind == vector_stmt || kind == scalar_stmt) |
25691 | && stmt_info |
25692 | && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN) |
25693 | { |
25694 | tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt); |
25695 | /*machine_mode inner_mode = mode; |
25696 | if (VECTOR_MODE_P (mode)) |
25697 | inner_mode = GET_MODE_INNER (mode);*/ |
25698 | |
25699 | switch (subcode) |
25700 | { |
25701 | case PLUS_EXPR: |
25702 | case POINTER_PLUS_EXPR: |
25703 | case MINUS_EXPR: |
25704 | if (kind == scalar_stmt) |
25705 | { |
25706 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25707 | stmt_cost = ix86_cost->addss; |
25708 | else if (X87_FLOAT_MODE_P (mode)) |
25709 | stmt_cost = ix86_cost->fadd; |
25710 | else |
25711 | stmt_cost = ix86_cost->add; |
25712 | } |
25713 | else |
25714 | stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss |
25715 | : ix86_cost->sse_op); |
25716 | break; |
25717 | |
25718 | case MULT_EXPR: |
25719 | /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw, |
25720 | take it as MULT_EXPR. */ |
25721 | case MULT_HIGHPART_EXPR: |
25722 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
25723 | break; |
25724 | /* There's no direct instruction for WIDEN_MULT_EXPR, |
25725 | take emulation into account. */ |
25726 | case WIDEN_MULT_EXPR: |
25727 | stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode, |
25728 | TYPE_UNSIGNED (vectype)); |
25729 | break; |
25730 | |
25731 | case NEGATE_EXPR: |
25732 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25733 | stmt_cost = ix86_cost->sse_op; |
25734 | else if (X87_FLOAT_MODE_P (mode)) |
25735 | stmt_cost = ix86_cost->fchs; |
25736 | else if (VECTOR_MODE_P (mode)) |
25737 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25738 | else |
25739 | stmt_cost = ix86_cost->add; |
25740 | break; |
25741 | case TRUNC_DIV_EXPR: |
25742 | case CEIL_DIV_EXPR: |
25743 | case FLOOR_DIV_EXPR: |
25744 | case ROUND_DIV_EXPR: |
25745 | case TRUNC_MOD_EXPR: |
25746 | case CEIL_MOD_EXPR: |
25747 | case FLOOR_MOD_EXPR: |
25748 | case RDIV_EXPR: |
25749 | case ROUND_MOD_EXPR: |
25750 | case EXACT_DIV_EXPR: |
25751 | stmt_cost = ix86_division_cost (cost: ix86_cost, mode); |
25752 | break; |
25753 | |
25754 | case RSHIFT_EXPR: |
25755 | case LSHIFT_EXPR: |
25756 | case LROTATE_EXPR: |
25757 | case RROTATE_EXPR: |
25758 | { |
25759 | tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt); |
25760 | tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt); |
25761 | stmt_cost = ix86_shift_rotate_cost |
25762 | (cost: ix86_cost, |
25763 | code: (subcode == RSHIFT_EXPR |
25764 | && !TYPE_UNSIGNED (TREE_TYPE (op1))) |
25765 | ? ASHIFTRT : LSHIFTRT, mode, |
25766 | TREE_CODE (op2) == INTEGER_CST, |
25767 | op1_val: cst_and_fits_in_hwi (op2) |
25768 | ? int_cst_value (op2) : -1, |
25769 | and_in_op1: false, shift_and_truncate: false, NULL, NULL); |
25770 | } |
25771 | break; |
25772 | case NOP_EXPR: |
25773 | /* Only sign-conversions are free. */ |
25774 | if (tree_nop_conversion_p |
25775 | (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), |
25776 | TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) |
25777 | stmt_cost = 0; |
25778 | else if (fp) |
25779 | stmt_cost = vec_fp_conversion_cost |
25780 | (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode)); |
25781 | break; |
25782 | |
25783 | case FLOAT_EXPR: |
25784 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25785 | stmt_cost = ix86_cost->cvtsi2ss; |
25786 | else if (X87_FLOAT_MODE_P (mode)) |
25787 | /* TODO: We do not have cost tables for x87. */ |
25788 | stmt_cost = ix86_cost->fadd; |
25789 | else |
25790 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps); |
25791 | break; |
25792 | |
25793 | case FIX_TRUNC_EXPR: |
25794 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25795 | stmt_cost = ix86_cost->cvtss2si; |
25796 | else if (X87_FLOAT_MODE_P (mode)) |
25797 | /* TODO: We do not have cost tables for x87. */ |
25798 | stmt_cost = ix86_cost->fadd; |
25799 | else |
25800 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi); |
25801 | break; |
25802 | |
25803 | case COND_EXPR: |
25804 | { |
25805 | /* SSE2 conditinal move sequence is: |
25806 | pcmpgtd %xmm5, %xmm0 (accounted separately) |
25807 | pand %xmm0, %xmm2 |
25808 | pandn %xmm1, %xmm0 |
25809 | por %xmm2, %xmm0 |
25810 | while SSE4 uses cmp + blend |
25811 | and AVX512 masked moves. |
25812 | |
25813 | The condition is accounted separately since we usually have |
25814 | p = a < b |
25815 | c = p ? x : y |
25816 | and we will account first statement as setcc. Exception is when |
25817 | p is loaded from memory as bool and then we will not acocunt |
25818 | the compare, but there is no way to check for this. */ |
25819 | |
25820 | int ninsns = TARGET_SSE4_1 ? 1 : 3; |
25821 | |
25822 | /* If one of parameters is 0 or -1 the sequence will be simplified: |
25823 | (if_true & mask) | (if_false & ~mask) -> if_true & mask */ |
25824 | if (ninsns > 1 |
25825 | && (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt)) |
25826 | || zerop (gimple_assign_rhs3 (gs: stmt_info->stmt)) |
25827 | || integer_minus_onep |
25828 | (gimple_assign_rhs2 (gs: stmt_info->stmt)) |
25829 | || integer_minus_onep |
25830 | (gimple_assign_rhs3 (gs: stmt_info->stmt)))) |
25831 | ninsns = 1; |
25832 | |
25833 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25834 | stmt_cost = ninsns * ix86_cost->sse_op; |
25835 | else if (X87_FLOAT_MODE_P (mode)) |
25836 | /* x87 requires conditional branch. We don't have cost for |
25837 | that. */ |
25838 | ; |
25839 | else if (VECTOR_MODE_P (mode)) |
25840 | stmt_cost = ix86_vec_cost (mode, cost: ninsns * ix86_cost->sse_op); |
25841 | else |
25842 | /* compare (accounted separately) + cmov. */ |
25843 | stmt_cost = ix86_cost->add; |
25844 | } |
25845 | break; |
25846 | |
25847 | case MIN_EXPR: |
25848 | case MAX_EXPR: |
25849 | if (fp) |
25850 | { |
25851 | if (X87_FLOAT_MODE_P (mode) |
25852 | && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25853 | /* x87 requires conditional branch. We don't have cost for |
25854 | that. */ |
25855 | ; |
25856 | else |
25857 | /* minss */ |
25858 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25859 | } |
25860 | else |
25861 | { |
25862 | if (VECTOR_MODE_P (mode)) |
25863 | { |
25864 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25865 | /* vpmin was introduced in SSE3. |
25866 | SSE2 needs pcmpgtd + pand + pandn + pxor. |
25867 | If one of parameters is 0 or -1 the sequence is simplified |
25868 | to pcmpgtd + pand. */ |
25869 | if (!TARGET_SSSE3) |
25870 | { |
25871 | if (zerop (gimple_assign_rhs2 (gs: stmt_info->stmt)) |
25872 | || integer_minus_onep |
25873 | (gimple_assign_rhs2 (gs: stmt_info->stmt))) |
25874 | stmt_cost *= 2; |
25875 | else |
25876 | stmt_cost *= 4; |
25877 | } |
25878 | } |
25879 | else |
25880 | /* cmp + cmov. */ |
25881 | stmt_cost = ix86_cost->add * 2; |
25882 | } |
25883 | break; |
25884 | |
25885 | case ABS_EXPR: |
25886 | case ABSU_EXPR: |
25887 | if (fp) |
25888 | { |
25889 | if (X87_FLOAT_MODE_P (mode) |
25890 | && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25891 | /* fabs. */ |
25892 | stmt_cost = ix86_cost->fabs; |
25893 | else |
25894 | /* andss of sign bit. */ |
25895 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25896 | } |
25897 | else |
25898 | { |
25899 | if (VECTOR_MODE_P (mode)) |
25900 | { |
25901 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25902 | /* vabs was introduced in SSE3. |
25903 | SSE3 uses psrat + pxor + psub. */ |
25904 | if (!TARGET_SSSE3) |
25905 | stmt_cost *= 3; |
25906 | } |
25907 | else |
25908 | /* neg + cmov. */ |
25909 | stmt_cost = ix86_cost->add * 2; |
25910 | } |
25911 | break; |
25912 | |
25913 | case BIT_IOR_EXPR: |
25914 | case BIT_XOR_EXPR: |
25915 | case BIT_AND_EXPR: |
25916 | case BIT_NOT_EXPR: |
25917 | gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode) |
25918 | && !X87_FLOAT_MODE_P (mode)); |
25919 | if (VECTOR_MODE_P (mode)) |
25920 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25921 | else |
25922 | stmt_cost = ix86_cost->add; |
25923 | break; |
25924 | |
25925 | default: |
25926 | if (truth_value_p (code: subcode)) |
25927 | { |
25928 | if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) |
25929 | /* CMPccS? insructions are cheap, so use sse_op. While they |
25930 | produce a mask which may need to be turned to 0/1 by and, |
25931 | expect that this will be optimized away in a common case. */ |
25932 | stmt_cost = ix86_cost->sse_op; |
25933 | else if (X87_FLOAT_MODE_P (mode)) |
25934 | /* fcmp + setcc. */ |
25935 | stmt_cost = ix86_cost->fadd + ix86_cost->add; |
25936 | else if (VECTOR_MODE_P (mode)) |
25937 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25938 | else |
25939 | /* setcc. */ |
25940 | stmt_cost = ix86_cost->add; |
25941 | break; |
25942 | } |
25943 | break; |
25944 | } |
25945 | } |
25946 | |
25947 | combined_fn cfn; |
25948 | if ((kind == vector_stmt || kind == scalar_stmt) |
25949 | && stmt_info |
25950 | && stmt_info->stmt |
25951 | && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) |
25952 | switch (cfn) |
25953 | { |
25954 | case CFN_FMA: |
25955 | stmt_cost = ix86_vec_cost (mode, |
25956 | cost: mode == SFmode ? ix86_cost->fmass |
25957 | : ix86_cost->fmasd); |
25958 | break; |
25959 | case CFN_MULH: |
25960 | stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode); |
25961 | break; |
25962 | default: |
25963 | break; |
25964 | } |
25965 | |
25966 | if (kind == vec_promote_demote) |
25967 | { |
25968 | int outer_size |
25969 | = tree_to_uhwi |
25970 | (TYPE_SIZE |
25971 | (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); |
25972 | int inner_size |
25973 | = tree_to_uhwi |
25974 | (TYPE_SIZE |
25975 | (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); |
25976 | bool inner_fp = FLOAT_TYPE_P |
25977 | (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))); |
25978 | |
25979 | if (fp && inner_fp) |
25980 | stmt_cost = vec_fp_conversion_cost |
25981 | (cost: ix86_tune_cost, GET_MODE_BITSIZE (mode)); |
25982 | else if (fp && !inner_fp) |
25983 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtpi2ps); |
25984 | else if (!fp && inner_fp) |
25985 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->cvtps2pi); |
25986 | else |
25987 | stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25988 | /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is |
25989 | greater than inner size we will end up doing two conversions and |
25990 | packing them. We always pack pairs; if the size difference is greater |
25991 | it is split into multiple demote operations. */ |
25992 | if (inner_size > outer_size) |
25993 | stmt_cost = stmt_cost * 2 |
25994 | + ix86_vec_cost (mode, cost: ix86_cost->sse_op); |
25995 | } |
25996 | |
25997 | /* If we do elementwise loads into a vector then we are bound by |
25998 | latency and execution resources for the many scalar loads |
25999 | (AGU and load ports). Try to account for this by scaling the |
26000 | construction cost by the number of elements involved. */ |
26001 | if ((kind == vec_construct || kind == vec_to_scalar) |
26002 | && ((stmt_info |
26003 | && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type |
26004 | || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) |
26005 | && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE |
26006 | && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) |
26007 | != INTEGER_CST)) |
26008 | || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) |
26009 | == VMAT_GATHER_SCATTER))) |
26010 | || (node |
26011 | && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE |
26012 | || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP |
26013 | && SLP_TREE_LANES (node) == 1)) |
26014 | && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF |
26015 | (SLP_TREE_REPRESENTATIVE (node)))) |
26016 | != INTEGER_CST)) |
26017 | || (SLP_TREE_MEMORY_ACCESS_TYPE (node) |
26018 | == VMAT_GATHER_SCATTER))))) |
26019 | { |
26020 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
26021 | stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1); |
26022 | } |
26023 | else if ((kind == vec_construct || kind == scalar_to_vec) |
26024 | && node |
26025 | && SLP_TREE_DEF_TYPE (node) == vect_external_def) |
26026 | { |
26027 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
26028 | unsigned i; |
26029 | tree op; |
26030 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
26031 | if (TREE_CODE (op) == SSA_NAME) |
26032 | TREE_VISITED (op) = 0; |
26033 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
26034 | { |
26035 | if (TREE_CODE (op) != SSA_NAME |
26036 | || TREE_VISITED (op)) |
26037 | continue; |
26038 | TREE_VISITED (op) = 1; |
26039 | gimple *def = SSA_NAME_DEF_STMT (op); |
26040 | tree tem; |
26041 | if (is_gimple_assign (gs: def) |
26042 | && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)) |
26043 | && ((tem = gimple_assign_rhs1 (gs: def)), true) |
26044 | && TREE_CODE (tem) == SSA_NAME |
26045 | /* A sign-change expands to nothing. */ |
26046 | && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)), |
26047 | TREE_TYPE (tem))) |
26048 | def = SSA_NAME_DEF_STMT (tem); |
26049 | /* When the component is loaded from memory we can directly |
26050 | move it to a vector register, otherwise we have to go |
26051 | via a GPR or via vpinsr which involves similar cost. |
26052 | Likewise with a BIT_FIELD_REF extracting from a vector |
26053 | register we can hope to avoid using a GPR. */ |
26054 | if (!is_gimple_assign (gs: def) |
26055 | || ((!gimple_assign_load_p (def) |
26056 | || (!TARGET_SSE4_1 |
26057 | && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1)) |
26058 | && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF |
26059 | || !VECTOR_TYPE_P (TREE_TYPE |
26060 | (TREE_OPERAND (gimple_assign_rhs1 (def), 0)))))) |
26061 | { |
26062 | if (fp) |
26063 | m_num_sse_needed[where]++; |
26064 | else |
26065 | { |
26066 | m_num_gpr_needed[where]++; |
26067 | |
26068 | int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; |
26069 | |
26070 | /* For integer construction, the number of actual GPR -> XMM |
26071 | moves will be somewhere between 0 and n. |
26072 | We do not have very good idea about actual number, since |
26073 | the source may be a constant, memory or a chain of |
26074 | instructions that will be later converted by |
26075 | scalar-to-vector pass. */ |
26076 | if (kind == vec_construct |
26077 | && GET_MODE_BITSIZE (mode) == 256) |
26078 | cost *= 2; |
26079 | else if (kind == vec_construct |
26080 | && GET_MODE_BITSIZE (mode) == 512) |
26081 | cost *= 3; |
26082 | stmt_cost += cost; |
26083 | } |
26084 | } |
26085 | } |
26086 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
26087 | if (TREE_CODE (op) == SSA_NAME) |
26088 | TREE_VISITED (op) = 0; |
26089 | } |
26090 | if (stmt_cost == -1) |
26091 | stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign); |
26092 | |
26093 | if (kind == vec_perm && vectype |
26094 | && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32) |
26095 | m_num_avx256_vec_perm[where]++; |
26096 | |
26097 | /* Penalize DFmode vector operations for Bonnell. */ |
26098 | if (TARGET_CPU_P (BONNELL) && kind == vector_stmt |
26099 | && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode) |
26100 | stmt_cost *= 5; /* FIXME: The value here is arbitrary. */ |
26101 | |
26102 | /* Statements in an inner loop relative to the loop being |
26103 | vectorized are weighted more heavily. The value here is |
26104 | arbitrary and could potentially be improved with analysis. */ |
26105 | retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); |
26106 | |
26107 | /* We need to multiply all vector stmt cost by 1.7 (estimated cost) |
26108 | for Silvermont as it has out of order integer pipeline and can execute |
26109 | 2 scalar instruction per tick, but has in order SIMD pipeline. */ |
26110 | if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT) |
26111 | || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL)) |
26112 | && stmt_info && stmt_info->stmt) |
26113 | { |
26114 | tree lhs_op = gimple_get_lhs (stmt_info->stmt); |
26115 | if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) |
26116 | retval = (retval * 17) / 10; |
26117 | } |
26118 | |
26119 | m_costs[where] += retval; |
26120 | |
26121 | return retval; |
26122 | } |
26123 | |
26124 | void |
26125 | ix86_vector_costs::ix86_vect_estimate_reg_pressure () |
26126 | { |
26127 | unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2; |
26128 | unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2; |
26129 | |
26130 | /* Any better way to have target available fp registers, currently use SSE_REGS. */ |
26131 | unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8; |
26132 | for (unsigned i = 0; i != 3; i++) |
26133 | { |
26134 | if (m_num_gpr_needed[i] > target_avail_regs) |
26135 | m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs); |
26136 | /* Only measure sse registers pressure. */ |
26137 | if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse)) |
26138 | m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse); |
26139 | } |
26140 | } |
26141 | |
26142 | void |
26143 | ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) |
26144 | { |
26145 | loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo); |
26146 | if (loop_vinfo && !m_costing_for_scalar) |
26147 | { |
26148 | /* We are currently not asking the vectorizer to compare costs |
26149 | between different vector mode sizes. When using predication |
26150 | that will end up always choosing the prefered mode size even |
26151 | if there's a smaller mode covering all lanes. Test for this |
26152 | situation and artificially reject the larger mode attempt. |
26153 | ??? We currently lack masked ops for sub-SSE sized modes, |
26154 | so we could restrict this rejection to AVX and AVX512 modes |
26155 | but error on the safe side for now. */ |
26156 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) |
26157 | && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
26158 | && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
26159 | && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()) |
26160 | > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo)))) |
26161 | m_costs[vect_body] = INT_MAX; |
26162 | } |
26163 | |
26164 | ix86_vect_estimate_reg_pressure (); |
26165 | |
26166 | for (int i = 0; i != 3; i++) |
26167 | if (m_num_avx256_vec_perm[i] |
26168 | && TARGET_AVX256_AVOID_VEC_PERM) |
26169 | m_costs[i] = INT_MAX; |
26170 | |
26171 | /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both |
26172 | a AVX2 and a SSE epilogue for AVX512 vectorized loops. */ |
26173 | if (loop_vinfo |
26174 | && LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
26175 | && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32 |
26176 | && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES]) |
26177 | m_suggested_epilogue_mode = V16QImode; |
26178 | /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger |
26179 | enable a 64bit SSE epilogue. */ |
26180 | if (loop_vinfo |
26181 | && LOOP_VINFO_EPILOGUE_P (loop_vinfo) |
26182 | && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16 |
26183 | && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16) |
26184 | m_suggested_epilogue_mode = V8QImode; |
26185 | |
26186 | vector_costs::finish_cost (scalar_costs); |
26187 | } |
26188 | |
26189 | /* Validate target specific memory model bits in VAL. */ |
26190 | |
26191 | static unsigned HOST_WIDE_INT |
26192 | ix86_memmodel_check (unsigned HOST_WIDE_INT val) |
26193 | { |
26194 | enum memmodel model = memmodel_from_int (val); |
26195 | bool strong; |
26196 | |
26197 | if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE |
26198 | |MEMMODEL_MASK) |
26199 | || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE))) |
26200 | { |
26201 | warning (OPT_Winvalid_memory_model, |
26202 | "unknown architecture specific memory model"); |
26203 | return MEMMODEL_SEQ_CST; |
26204 | } |
26205 | strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model)); |
26206 | if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) |
26207 | { |
26208 | warning (OPT_Winvalid_memory_model, |
26209 | "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger " |
26210 | "memory model"); |
26211 | return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; |
26212 | } |
26213 | if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) |
26214 | { |
26215 | warning (OPT_Winvalid_memory_model, |
26216 | "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger " |
26217 | "memory model"); |
26218 | return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; |
26219 | } |
26220 | return val; |
26221 | } |
26222 | |
26223 | /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, |
26224 | CLONEI->vecsize_float and if CLONEI->simdlen is 0, also |
26225 | CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, |
26226 | or number of vecsize_mangle variants that should be emitted. */ |
26227 | |
26228 | static int |
26229 | ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, |
26230 | struct cgraph_simd_clone *clonei, |
26231 | tree base_type, int num, |
26232 | bool explicit_p) |
26233 | { |
26234 | int ret = 1; |
26235 | |
26236 | if (clonei->simdlen |
26237 | && (clonei->simdlen < 2 |
26238 | || clonei->simdlen > 1024 |
26239 | || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) |
26240 | { |
26241 | if (explicit_p) |
26242 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
26243 | "unsupported simdlen %wd", clonei->simdlen.to_constant ()); |
26244 | return 0; |
26245 | } |
26246 | |
26247 | tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); |
26248 | if (TREE_CODE (ret_type) != VOID_TYPE) |
26249 | switch (TYPE_MODE (ret_type)) |
26250 | { |
26251 | case E_QImode: |
26252 | case E_HImode: |
26253 | case E_SImode: |
26254 | case E_DImode: |
26255 | case E_SFmode: |
26256 | case E_DFmode: |
26257 | /* case E_SCmode: */ |
26258 | /* case E_DCmode: */ |
26259 | if (!AGGREGATE_TYPE_P (ret_type)) |
26260 | break; |
26261 | /* FALLTHRU */ |
26262 | default: |
26263 | if (explicit_p) |
26264 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
26265 | "unsupported return type %qT for simd", ret_type); |
26266 | return 0; |
26267 | } |
26268 | |
26269 | tree t; |
26270 | int i; |
26271 | tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); |
26272 | bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE); |
26273 | |
26274 | for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; |
26275 | t && t != void_list_node; t = TREE_CHAIN (t), i++) |
26276 | { |
26277 | tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); |
26278 | switch (TYPE_MODE (arg_type)) |
26279 | { |
26280 | case E_QImode: |
26281 | case E_HImode: |
26282 | case E_SImode: |
26283 | case E_DImode: |
26284 | case E_SFmode: |
26285 | case E_DFmode: |
26286 | /* case E_SCmode: */ |
26287 | /* case E_DCmode: */ |
26288 | if (!AGGREGATE_TYPE_P (arg_type)) |
26289 | break; |
26290 | /* FALLTHRU */ |
26291 | default: |
26292 | if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM) |
26293 | break; |
26294 | if (explicit_p) |
26295 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
26296 | "unsupported argument type %qT for simd", arg_type); |
26297 | return 0; |
26298 | } |
26299 | } |
26300 | |
26301 | if (!TREE_PUBLIC (node->decl) || !explicit_p) |
26302 | { |
26303 | /* If the function isn't exported, we can pick up just one ISA |
26304 | for the clones. */ |
26305 | if (TARGET_AVX512F) |
26306 | clonei->vecsize_mangle = 'e'; |
26307 | else if (TARGET_AVX2) |
26308 | clonei->vecsize_mangle = 'd'; |
26309 | else if (TARGET_AVX) |
26310 | clonei->vecsize_mangle = 'c'; |
26311 | else |
26312 | clonei->vecsize_mangle = 'b'; |
26313 | ret = 1; |
26314 | } |
26315 | else |
26316 | { |
26317 | clonei->vecsize_mangle = "bcde"[num]; |
26318 | ret = 4; |
26319 | } |
26320 | clonei->mask_mode = VOIDmode; |
26321 | switch (clonei->vecsize_mangle) |
26322 | { |
26323 | case 'b': |
26324 | clonei->vecsize_int = 128; |
26325 | clonei->vecsize_float = 128; |
26326 | break; |
26327 | case 'c': |
26328 | clonei->vecsize_int = 128; |
26329 | clonei->vecsize_float = 256; |
26330 | break; |
26331 | case 'd': |
26332 | clonei->vecsize_int = 256; |
26333 | clonei->vecsize_float = 256; |
26334 | break; |
26335 | case 'e': |
26336 | clonei->vecsize_int = 512; |
26337 | clonei->vecsize_float = 512; |
26338 | if (TYPE_MODE (base_type) == QImode) |
26339 | clonei->mask_mode = DImode; |
26340 | else |
26341 | clonei->mask_mode = SImode; |
26342 | break; |
26343 | } |
26344 | if (clonei->simdlen == 0) |
26345 | { |
26346 | if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) |
26347 | clonei->simdlen = clonei->vecsize_int; |
26348 | else |
26349 | clonei->simdlen = clonei->vecsize_float; |
26350 | clonei->simdlen = clonei->simdlen |
26351 | / GET_MODE_BITSIZE (TYPE_MODE (base_type)); |
26352 | } |
26353 | else if (clonei->simdlen > 16) |
26354 | { |
26355 | /* For compatibility with ICC, use the same upper bounds |
26356 | for simdlen. In particular, for CTYPE below, use the return type, |
26357 | unless the function returns void, in that case use the characteristic |
26358 | type. If it is possible for given SIMDLEN to pass CTYPE value |
26359 | in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs |
26360 | for 64-bit code), accept that SIMDLEN, otherwise warn and don't |
26361 | emit corresponding clone. */ |
26362 | tree ctype = ret_type; |
26363 | if (VOID_TYPE_P (ret_type)) |
26364 | ctype = base_type; |
26365 | int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen; |
26366 | if (SCALAR_INT_MODE_P (TYPE_MODE (ctype))) |
26367 | cnt /= clonei->vecsize_int; |
26368 | else |
26369 | cnt /= clonei->vecsize_float; |
26370 | if (cnt > (TARGET_64BIT ? 16 : 8)) |
26371 | { |
26372 | if (explicit_p) |
26373 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
26374 | "unsupported simdlen %wd", |
26375 | clonei->simdlen.to_constant ()); |
26376 | return 0; |
26377 | } |
26378 | } |
26379 | return ret; |
26380 | } |
26381 | |
26382 | /* If SIMD clone NODE can't be used in a vectorized loop |
26383 | in current function, return -1, otherwise return a badness of using it |
26384 | (0 if it is most desirable from vecsize_mangle point of view, 1 |
26385 | slightly less desirable, etc.). */ |
26386 | |
26387 | static int |
26388 | ix86_simd_clone_usable (struct cgraph_node *node, machine_mode) |
26389 | { |
26390 | switch (node->simdclone->vecsize_mangle) |
26391 | { |
26392 | case 'b': |
26393 | if (!TARGET_SSE2) |
26394 | return -1; |
26395 | if (!TARGET_AVX) |
26396 | return 0; |
26397 | return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1; |
26398 | case 'c': |
26399 | if (!TARGET_AVX) |
26400 | return -1; |
26401 | return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0; |
26402 | case 'd': |
26403 | if (!TARGET_AVX2) |
26404 | return -1; |
26405 | return TARGET_AVX512F ? 1 : 0; |
26406 | case 'e': |
26407 | if (!TARGET_AVX512F) |
26408 | return -1; |
26409 | return 0; |
26410 | default: |
26411 | gcc_unreachable (); |
26412 | } |
26413 | } |
26414 | |
26415 | /* This function adjusts the unroll factor based on |
26416 | the hardware capabilities. For ex, bdver3 has |
26417 | a loop buffer which makes unrolling of smaller |
26418 | loops less important. This function decides the |
26419 | unroll factor using number of memory references |
26420 | (value 32 is used) as a heuristic. */ |
26421 | |
26422 | static unsigned |
26423 | ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) |
26424 | { |
26425 | basic_block *bbs; |
26426 | rtx_insn *insn; |
26427 | unsigned i; |
26428 | unsigned mem_count = 0; |
26429 | |
26430 | /* Unroll small size loop when unroll factor is not explicitly |
26431 | specified. */ |
26432 | if (ix86_unroll_only_small_loops && !loop->unroll) |
26433 | { |
26434 | if (loop->ninsns <= ix86_cost->small_unroll_ninsns) |
26435 | return MIN (nunroll, ix86_cost->small_unroll_factor); |
26436 | else |
26437 | return 1; |
26438 | } |
26439 | |
26440 | if (!TARGET_ADJUST_UNROLL) |
26441 | return nunroll; |
26442 | |
26443 | /* Count the number of memory references within the loop body. |
26444 | This value determines the unrolling factor for bdver3 and bdver4 |
26445 | architectures. */ |
26446 | subrtx_iterator::array_type array; |
26447 | bbs = get_loop_body (loop); |
26448 | for (i = 0; i < loop->num_nodes; i++) |
26449 | FOR_BB_INSNS (bbs[i], insn) |
26450 | if (NONDEBUG_INSN_P (insn)) |
26451 | FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) |
26452 | if (const_rtx x = *iter) |
26453 | if (MEM_P (x)) |
26454 | { |
26455 | machine_mode mode = GET_MODE (x); |
26456 | unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; |
26457 | if (n_words > 4) |
26458 | mem_count += 2; |
26459 | else |
26460 | mem_count += 1; |
26461 | } |
26462 | free (ptr: bbs); |
26463 | |
26464 | if (mem_count && mem_count <=32) |
26465 | return MIN (nunroll, 32 / mem_count); |
26466 | |
26467 | return nunroll; |
26468 | } |
26469 | |
26470 | |
26471 | /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ |
26472 | |
26473 | static bool |
26474 | ix86_float_exceptions_rounding_supported_p (void) |
26475 | { |
26476 | /* For x87 floating point with standard excess precision handling, |
26477 | there is no adddf3 pattern (since x87 floating point only has |
26478 | XFmode operations) so the default hook implementation gets this |
26479 | wrong. */ |
26480 | return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH); |
26481 | } |
26482 | |
26483 | /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ |
26484 | |
26485 | static void |
26486 | ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) |
26487 | { |
26488 | if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH)) |
26489 | return; |
26490 | tree exceptions_var = create_tmp_var_raw (integer_type_node); |
26491 | if (TARGET_80387) |
26492 | { |
26493 | tree fenv_index_type = build_index_type (size_int (6)); |
26494 | tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type); |
26495 | tree fenv_var = create_tmp_var_raw (fenv_type); |
26496 | TREE_ADDRESSABLE (fenv_var) = 1; |
26497 | tree fenv_ptr = build_pointer_type (fenv_type); |
26498 | tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); |
26499 | fenv_addr = fold_convert (ptr_type_node, fenv_addr); |
26500 | tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV); |
26501 | tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV); |
26502 | tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW); |
26503 | tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX); |
26504 | tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); |
26505 | tree hold_fnclex = build_call_expr (fnclex, 0); |
26506 | fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, |
26507 | NULL_TREE, NULL_TREE); |
26508 | *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, |
26509 | hold_fnclex); |
26510 | *clear = build_call_expr (fnclex, 0); |
26511 | tree sw_var = create_tmp_var_raw (short_unsigned_type_node); |
26512 | tree fnstsw_call = build_call_expr (fnstsw, 0); |
26513 | tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var, |
26514 | fnstsw_call, NULL_TREE, NULL_TREE); |
26515 | tree exceptions_x87 = fold_convert (integer_type_node, sw_var); |
26516 | tree update_mod = build4 (TARGET_EXPR, integer_type_node, |
26517 | exceptions_var, exceptions_x87, |
26518 | NULL_TREE, NULL_TREE); |
26519 | *update = build2 (COMPOUND_EXPR, integer_type_node, |
26520 | sw_mod, update_mod); |
26521 | tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr); |
26522 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv); |
26523 | } |
26524 | if (TARGET_SSE && TARGET_SSE_MATH) |
26525 | { |
26526 | tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); |
26527 | tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); |
26528 | tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR); |
26529 | tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR); |
26530 | tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); |
26531 | tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node, |
26532 | mxcsr_orig_var, stmxcsr_hold_call, |
26533 | NULL_TREE, NULL_TREE); |
26534 | tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node, |
26535 | mxcsr_orig_var, |
26536 | build_int_cst (unsigned_type_node, 0x1f80)); |
26537 | hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val, |
26538 | build_int_cst (unsigned_type_node, 0xffffffc0)); |
26539 | tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node, |
26540 | mxcsr_mod_var, hold_mod_val, |
26541 | NULL_TREE, NULL_TREE); |
26542 | tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
26543 | tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node, |
26544 | hold_assign_orig, hold_assign_mod); |
26545 | hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all, |
26546 | ldmxcsr_hold_call); |
26547 | if (*hold) |
26548 | *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all); |
26549 | else |
26550 | *hold = hold_all; |
26551 | tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); |
26552 | if (*clear) |
26553 | *clear = build2 (COMPOUND_EXPR, void_type_node, *clear, |
26554 | ldmxcsr_clear_call); |
26555 | else |
26556 | *clear = ldmxcsr_clear_call; |
26557 | tree stxmcsr_update_call = build_call_expr (stmxcsr, 0); |
26558 | tree exceptions_sse = fold_convert (integer_type_node, |
26559 | stxmcsr_update_call); |
26560 | if (*update) |
26561 | { |
26562 | tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node, |
26563 | exceptions_var, exceptions_sse); |
26564 | tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node, |
26565 | exceptions_var, exceptions_mod); |
26566 | *update = build2 (COMPOUND_EXPR, integer_type_node, *update, |
26567 | exceptions_assign); |
26568 | } |
26569 | else |
26570 | *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var, |
26571 | exceptions_sse, NULL_TREE, NULL_TREE); |
26572 | tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var); |
26573 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
26574 | ldmxcsr_update_call); |
26575 | } |
26576 | tree atomic_feraiseexcept |
26577 | = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT); |
26578 | tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, |
26579 | 1, exceptions_var); |
26580 | *update = build2 (COMPOUND_EXPR, void_type_node, *update, |
26581 | atomic_feraiseexcept_call); |
26582 | } |
26583 | |
26584 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
26585 | /* For i386, common symbol is local only for non-PIE binaries. For |
26586 | x86-64, common symbol is local only for non-PIE binaries or linker |
26587 | supports copy reloc in PIE binaries. */ |
26588 | |
26589 | static bool |
26590 | ix86_binds_local_p (const_tree exp) |
26591 | { |
26592 | bool direct_extern_access |
26593 | = (ix86_direct_extern_access |
26594 | && !(VAR_OR_FUNCTION_DECL_P (exp) |
26595 | && lookup_attribute (attr_name: "nodirect_extern_access", |
26596 | DECL_ATTRIBUTES (exp)))); |
26597 | if (!direct_extern_access) |
26598 | ix86_has_no_direct_extern_access = true; |
26599 | return default_binds_local_p_3 (exp, flag_shlib != 0, true, |
26600 | direct_extern_access, |
26601 | (direct_extern_access |
26602 | && (!flag_pic |
26603 | || (TARGET_64BIT |
26604 | && HAVE_LD_PIE_COPYRELOC != 0)))); |
26605 | } |
26606 | |
26607 | /* If flag_pic or ix86_direct_extern_access is false, then neither |
26608 | local nor global relocs should be placed in readonly memory. */ |
26609 | |
26610 | static int |
26611 | ix86_reloc_rw_mask (void) |
26612 | { |
26613 | return (flag_pic || !ix86_direct_extern_access) ? 3 : 0; |
26614 | } |
26615 | #endif |
26616 | |
26617 | /* Return true iff ADDR can be used as a symbolic base address. */ |
26618 | |
26619 | static bool |
26620 | symbolic_base_address_p (rtx addr) |
26621 | { |
26622 | if (GET_CODE (addr) == SYMBOL_REF) |
26623 | return true; |
26624 | |
26625 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF) |
26626 | return true; |
26627 | |
26628 | return false; |
26629 | } |
26630 | |
26631 | /* Return true iff ADDR can be used as a base address. */ |
26632 | |
26633 | static bool |
26634 | base_address_p (rtx addr) |
26635 | { |
26636 | if (REG_P (addr)) |
26637 | return true; |
26638 | |
26639 | if (symbolic_base_address_p (addr)) |
26640 | return true; |
26641 | |
26642 | return false; |
26643 | } |
26644 | |
26645 | /* If MEM is in the form of [(base+symbase)+offset], extract the three |
26646 | parts of address and set to BASE, SYMBASE and OFFSET, otherwise |
26647 | return false. */ |
26648 | |
26649 | static bool |
26650 | extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset) |
26651 | { |
26652 | rtx addr; |
26653 | |
26654 | gcc_assert (MEM_P (mem)); |
26655 | |
26656 | addr = XEXP (mem, 0); |
26657 | |
26658 | if (GET_CODE (addr) == CONST) |
26659 | addr = XEXP (addr, 0); |
26660 | |
26661 | if (base_address_p (addr)) |
26662 | { |
26663 | *base = addr; |
26664 | *symbase = const0_rtx; |
26665 | *offset = const0_rtx; |
26666 | return true; |
26667 | } |
26668 | |
26669 | if (GET_CODE (addr) == PLUS |
26670 | && base_address_p (XEXP (addr, 0))) |
26671 | { |
26672 | rtx addend = XEXP (addr, 1); |
26673 | |
26674 | if (GET_CODE (addend) == CONST) |
26675 | addend = XEXP (addend, 0); |
26676 | |
26677 | if (CONST_INT_P (addend)) |
26678 | { |
26679 | *base = XEXP (addr, 0); |
26680 | *symbase = const0_rtx; |
26681 | *offset = addend; |
26682 | return true; |
26683 | } |
26684 | |
26685 | /* Also accept REG + symbolic ref, with or without a CONST_INT |
26686 | offset. */ |
26687 | if (REG_P (XEXP (addr, 0))) |
26688 | { |
26689 | if (symbolic_base_address_p (addr: addend)) |
26690 | { |
26691 | *base = XEXP (addr, 0); |
26692 | *symbase = addend; |
26693 | *offset = const0_rtx; |
26694 | return true; |
26695 | } |
26696 | |
26697 | if (GET_CODE (addend) == PLUS |
26698 | && symbolic_base_address_p (XEXP (addend, 0)) |
26699 | && CONST_INT_P (XEXP (addend, 1))) |
26700 | { |
26701 | *base = XEXP (addr, 0); |
26702 | *symbase = XEXP (addend, 0); |
26703 | *offset = XEXP (addend, 1); |
26704 | return true; |
26705 | } |
26706 | } |
26707 | } |
26708 | |
26709 | return false; |
26710 | } |
26711 | |
26712 | /* Given OPERANDS of consecutive load/store, check if we can merge |
26713 | them into move multiple. LOAD is true if they are load instructions. |
26714 | MODE is the mode of memory operands. */ |
26715 | |
26716 | bool |
26717 | ix86_operands_ok_for_move_multiple (rtx *operands, bool load, |
26718 | machine_mode mode) |
26719 | { |
26720 | HOST_WIDE_INT offval_1, offval_2, msize; |
26721 | rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, |
26722 | symbase_1, symbase_2, offset_1, offset_2; |
26723 | |
26724 | if (load) |
26725 | { |
26726 | mem_1 = operands[1]; |
26727 | mem_2 = operands[3]; |
26728 | reg_1 = operands[0]; |
26729 | reg_2 = operands[2]; |
26730 | } |
26731 | else |
26732 | { |
26733 | mem_1 = operands[0]; |
26734 | mem_2 = operands[2]; |
26735 | reg_1 = operands[1]; |
26736 | reg_2 = operands[3]; |
26737 | } |
26738 | |
26739 | gcc_assert (REG_P (reg_1) && REG_P (reg_2)); |
26740 | |
26741 | if (REGNO (reg_1) != REGNO (reg_2)) |
26742 | return false; |
26743 | |
26744 | /* Check if the addresses are in the form of [base+offset]. */ |
26745 | if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1)) |
26746 | return false; |
26747 | if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2)) |
26748 | return false; |
26749 | |
26750 | /* Check if the bases are the same. */ |
26751 | if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2)) |
26752 | return false; |
26753 | |
26754 | offval_1 = INTVAL (offset_1); |
26755 | offval_2 = INTVAL (offset_2); |
26756 | msize = GET_MODE_SIZE (mode); |
26757 | /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ |
26758 | if (offval_1 + msize != offval_2) |
26759 | return false; |
26760 | |
26761 | return true; |
26762 | } |
26763 | |
26764 | /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ |
26765 | |
26766 | static bool |
26767 | ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, |
26768 | optimization_type opt_type) |
26769 | { |
26770 | switch (op) |
26771 | { |
26772 | case asin_optab: |
26773 | case acos_optab: |
26774 | case log1p_optab: |
26775 | case exp_optab: |
26776 | case exp10_optab: |
26777 | case exp2_optab: |
26778 | case expm1_optab: |
26779 | case ldexp_optab: |
26780 | case scalb_optab: |
26781 | case round_optab: |
26782 | case lround_optab: |
26783 | return opt_type == OPTIMIZE_FOR_SPEED; |
26784 | |
26785 | case rint_optab: |
26786 | if (SSE_FLOAT_MODE_P (mode1) |
26787 | && TARGET_SSE_MATH |
26788 | && !flag_trapping_math |
26789 | && !TARGET_SSE4_1 |
26790 | && mode1 != HFmode) |
26791 | return opt_type == OPTIMIZE_FOR_SPEED; |
26792 | return true; |
26793 | |
26794 | case floor_optab: |
26795 | case ceil_optab: |
26796 | case btrunc_optab: |
26797 | if (((SSE_FLOAT_MODE_P (mode1) |
26798 | && TARGET_SSE_MATH |
26799 | && TARGET_SSE4_1) |
26800 | || mode1 == HFmode) |
26801 | && !flag_trapping_math) |
26802 | return true; |
26803 | return opt_type == OPTIMIZE_FOR_SPEED; |
26804 | |
26805 | case rsqrt_optab: |
26806 | return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1); |
26807 | |
26808 | default: |
26809 | return true; |
26810 | } |
26811 | } |
26812 | |
26813 | /* Address space support. |
26814 | |
26815 | This is not "far pointers" in the 16-bit sense, but an easy way |
26816 | to use %fs and %gs segment prefixes. Therefore: |
26817 | |
26818 | (a) All address spaces have the same modes, |
26819 | (b) All address spaces have the same addresss forms, |
26820 | (c) While %fs and %gs are technically subsets of the generic |
26821 | address space, they are probably not subsets of each other. |
26822 | (d) Since we have no access to the segment base register values |
26823 | without resorting to a system call, we cannot convert a |
26824 | non-default address space to a default address space. |
26825 | Therefore we do not claim %fs or %gs are subsets of generic. |
26826 | |
26827 | Therefore we can (mostly) use the default hooks. */ |
26828 | |
26829 | /* All use of segmentation is assumed to make address 0 valid. */ |
26830 | |
26831 | static bool |
26832 | ix86_addr_space_zero_address_valid (addr_space_t as) |
26833 | { |
26834 | return as != ADDR_SPACE_GENERIC; |
26835 | } |
26836 | |
26837 | static void |
26838 | ix86_init_libfuncs (void) |
26839 | { |
26840 | if (TARGET_64BIT) |
26841 | { |
26842 | set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4"); |
26843 | set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4"); |
26844 | } |
26845 | else |
26846 | { |
26847 | set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4"); |
26848 | set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4"); |
26849 | } |
26850 | |
26851 | #if TARGET_MACHO |
26852 | darwin_rename_builtins (); |
26853 | #endif |
26854 | } |
26855 | |
26856 | /* Set the value of FLT_EVAL_METHOD in float.h. When using only the |
26857 | FPU, assume that the fpcw is set to extended precision; when using |
26858 | only SSE, rounding is correct; when using both SSE and the FPU, |
26859 | the rounding precision is indeterminate, since either may be chosen |
26860 | apparently at random. */ |
26861 | |
26862 | static enum flt_eval_method |
26863 | ix86_get_excess_precision (enum excess_precision_type type) |
26864 | { |
26865 | switch (type) |
26866 | { |
26867 | case EXCESS_PRECISION_TYPE_FAST: |
26868 | /* The fastest type to promote to will always be the native type, |
26869 | whether that occurs with implicit excess precision or |
26870 | otherwise. */ |
26871 | return TARGET_AVX512FP16 |
26872 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 |
26873 | : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
26874 | case EXCESS_PRECISION_TYPE_STANDARD: |
26875 | case EXCESS_PRECISION_TYPE_IMPLICIT: |
26876 | /* Otherwise, the excess precision we want when we are |
26877 | in a standards compliant mode, and the implicit precision we |
26878 | provide would be identical were it not for the unpredictable |
26879 | cases. */ |
26880 | if (TARGET_AVX512FP16 && TARGET_SSE_MATH) |
26881 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
26882 | else if (!TARGET_80387) |
26883 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
26884 | else if (!TARGET_MIX_SSE_I387) |
26885 | { |
26886 | if (!(TARGET_SSE && TARGET_SSE_MATH)) |
26887 | return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE; |
26888 | else if (TARGET_SSE2) |
26889 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; |
26890 | } |
26891 | |
26892 | /* If we are in standards compliant mode, but we know we will |
26893 | calculate in unpredictable precision, return |
26894 | FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit |
26895 | excess precision if the target can't guarantee it will honor |
26896 | it. */ |
26897 | return (type == EXCESS_PRECISION_TYPE_STANDARD |
26898 | ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT |
26899 | : FLT_EVAL_METHOD_UNPREDICTABLE); |
26900 | case EXCESS_PRECISION_TYPE_FLOAT16: |
26901 | if (TARGET_80387 |
26902 | && !(TARGET_SSE_MATH && TARGET_SSE)) |
26903 | error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>"); |
26904 | return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16; |
26905 | default: |
26906 | gcc_unreachable (); |
26907 | } |
26908 | |
26909 | return FLT_EVAL_METHOD_UNPREDICTABLE; |
26910 | } |
26911 | |
26912 | /* Return true if _BitInt(N) is supported and fill its details into *INFO. */ |
26913 | bool |
26914 | ix86_bitint_type_info (int n, struct bitint_info *info) |
26915 | { |
26916 | if (n <= 8) |
26917 | info->limb_mode = QImode; |
26918 | else if (n <= 16) |
26919 | info->limb_mode = HImode; |
26920 | else if (n <= 32 || (!TARGET_64BIT && n > 64)) |
26921 | info->limb_mode = SImode; |
26922 | else |
26923 | info->limb_mode = DImode; |
26924 | info->abi_limb_mode = info->limb_mode; |
26925 | info->big_endian = false; |
26926 | info->extended = false; |
26927 | return true; |
26928 | } |
26929 | |
26930 | /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode |
26931 | or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type, |
26932 | based on long double bits, go with the default one for the others. */ |
26933 | |
26934 | static machine_mode |
26935 | ix86_c_mode_for_floating_type (enum tree_index ti) |
26936 | { |
26937 | if (ti == TI_LONG_DOUBLE_TYPE) |
26938 | return (TARGET_LONG_DOUBLE_64 ? DFmode |
26939 | : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode)); |
26940 | return default_mode_for_floating_type (ti); |
26941 | } |
26942 | |
26943 | /* Returns modified FUNCTION_TYPE for cdtor callabi. */ |
26944 | tree |
26945 | ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype) |
26946 | { |
26947 | if (TARGET_64BIT |
26948 | || TARGET_RTD |
26949 | || ix86_function_type_abi (fntype) != MS_ABI) |
26950 | return fntype; |
26951 | /* For 32-bit MS ABI add thiscall attribute. */ |
26952 | tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE, |
26953 | TYPE_ATTRIBUTES (fntype)); |
26954 | return build_type_attribute_variant (fntype, attribs); |
26955 | } |
26956 | |
26957 | /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that |
26958 | decrements by exactly 2 no matter what the position was, there is no pushb. |
26959 | |
26960 | But as CIE data alignment factor on this arch is -4 for 32bit targets |
26961 | and -8 for 64bit targets, we need to make sure all stack pointer adjustments |
26962 | are in multiple of 4 for 32bit targets and 8 for 64bit targets. */ |
26963 | |
26964 | poly_int64 |
26965 | ix86_push_rounding (poly_int64 bytes) |
26966 | { |
26967 | return ROUND_UP (bytes, UNITS_PER_WORD); |
26968 | } |
26969 | |
26970 | /* Use 8 bits metadata start from bit48 for LAM_U48, |
26971 | 6 bits metadat start from bit57 for LAM_U57. */ |
26972 | #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \ |
26973 | ? 48 \ |
26974 | : (ix86_lam_type == lam_u57 ? 57 : 0)) |
26975 | #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \ |
26976 | ? 8 \ |
26977 | : (ix86_lam_type == lam_u57 ? 6 : 0)) |
26978 | |
26979 | /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */ |
26980 | bool |
26981 | ix86_memtag_can_tag_addresses () |
26982 | { |
26983 | return ix86_lam_type != lam_none && TARGET_LP64; |
26984 | } |
26985 | |
26986 | /* Implement TARGET_MEMTAG_TAG_SIZE. */ |
26987 | unsigned char |
26988 | ix86_memtag_tag_size () |
26989 | { |
26990 | return IX86_HWASAN_TAG_SIZE; |
26991 | } |
26992 | |
26993 | /* Implement TARGET_MEMTAG_SET_TAG. */ |
26994 | rtx |
26995 | ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target) |
26996 | { |
26997 | /* default_memtag_insert_random_tag may |
26998 | generate tag with value more than 6 bits. */ |
26999 | if (ix86_lam_type == lam_u57) |
27000 | { |
27001 | unsigned HOST_WIDE_INT and_imm |
27002 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
27003 | |
27004 | emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm))); |
27005 | } |
27006 | tag = expand_simple_binop (Pmode, ASHIFT, tag, |
27007 | GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX, |
27008 | /* unsignedp = */1, OPTAB_WIDEN); |
27009 | rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target, |
27010 | /* unsignedp = */1, OPTAB_DIRECT); |
27011 | return ret; |
27012 | } |
27013 | |
27014 | /* Implement TARGET_MEMTAG_EXTRACT_TAG. */ |
27015 | rtx |
27016 | ix86_memtag_extract_tag (rtx tagged_pointer, rtx target) |
27017 | { |
27018 | rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer, |
27019 | GEN_INT (IX86_HWASAN_SHIFT), target, |
27020 | /* unsignedp = */0, |
27021 | OPTAB_DIRECT); |
27022 | rtx ret = gen_reg_rtx (QImode); |
27023 | /* Mask off bit63 when LAM_U57. */ |
27024 | if (ix86_lam_type == lam_u57) |
27025 | { |
27026 | unsigned HOST_WIDE_INT and_imm |
27027 | = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1; |
27028 | emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag), |
27029 | gen_int_mode (and_imm, QImode))); |
27030 | } |
27031 | else |
27032 | emit_move_insn (ret, gen_lowpart (QImode, tag)); |
27033 | return ret; |
27034 | } |
27035 | |
27036 | /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */ |
27037 | rtx |
27038 | ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target) |
27039 | { |
27040 | /* Leave bit63 alone. */ |
27041 | rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) |
27042 | + (HOST_WIDE_INT_1U << 63) - 1), |
27043 | Pmode); |
27044 | rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer, |
27045 | tag_mask, target, true, |
27046 | OPTAB_DIRECT); |
27047 | gcc_assert (untagged_base); |
27048 | return untagged_base; |
27049 | } |
27050 | |
27051 | /* Implement TARGET_MEMTAG_ADD_TAG. */ |
27052 | rtx |
27053 | ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset) |
27054 | { |
27055 | rtx base_tag = gen_reg_rtx (QImode); |
27056 | rtx base_addr = gen_reg_rtx (Pmode); |
27057 | rtx tagged_addr = gen_reg_rtx (Pmode); |
27058 | rtx new_tag = gen_reg_rtx (QImode); |
27059 | unsigned HOST_WIDE_INT and_imm |
27060 | = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1; |
27061 | |
27062 | /* When there's "overflow" in tag adding, |
27063 | need to mask the most significant bit off. */ |
27064 | emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX)); |
27065 | emit_move_insn (base_addr, |
27066 | ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX)); |
27067 | emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode))); |
27068 | emit_move_insn (new_tag, base_tag); |
27069 | emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode))); |
27070 | emit_move_insn (tagged_addr, |
27071 | ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX)); |
27072 | return plus_constant (Pmode, tagged_addr, offset); |
27073 | } |
27074 | |
27075 | /* Implement TARGET_HAVE_CCMP. */ |
27076 | static bool |
27077 | ix86_have_ccmp () |
27078 | { |
27079 | return (bool) TARGET_APX_CCMP; |
27080 | } |
27081 | |
27082 | /* Implement TARGET_MODE_CAN_TRANSFER_BITS. */ |
27083 | static bool |
27084 | ix86_mode_can_transfer_bits (machine_mode mode) |
27085 | { |
27086 | if (GET_MODE_CLASS (mode) == MODE_FLOAT |
27087 | || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) |
27088 | switch (GET_MODE_INNER (mode)) |
27089 | { |
27090 | case E_SFmode: |
27091 | case E_DFmode: |
27092 | /* These suffer from normalization upon load when not using SSE. */ |
27093 | return !(ix86_fpmath & FPMATH_387); |
27094 | default: |
27095 | return true; |
27096 | } |
27097 | |
27098 | return true; |
27099 | } |
27100 | |
27101 | /* Implement TARGET_REDZONE_CLOBBER. */ |
27102 | static rtx |
27103 | ix86_redzone_clobber () |
27104 | { |
27105 | cfun->machine->asm_redzone_clobber_seen = true; |
27106 | if (ix86_using_red_zone ()) |
27107 | { |
27108 | rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE); |
27109 | rtx mem = gen_rtx_MEM (BLKmode, base); |
27110 | set_mem_size (mem, RED_ZONE_SIZE); |
27111 | return mem; |
27112 | } |
27113 | return NULL_RTX; |
27114 | } |
27115 | |
27116 | /* Target-specific selftests. */ |
27117 | |
27118 | #if CHECKING_P |
27119 | |
27120 | namespace selftest { |
27121 | |
27122 | /* Verify that hard regs are dumped as expected (in compact mode). */ |
27123 | |
27124 | static void |
27125 | ix86_test_dumping_hard_regs () |
27126 | { |
27127 | ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0)); |
27128 | ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1)); |
27129 | } |
27130 | |
27131 | /* Test dumping an insn with repeated references to the same SCRATCH, |
27132 | to verify the rtx_reuse code. */ |
27133 | |
27134 | static void |
27135 | ix86_test_dumping_memory_blockage () |
27136 | { |
27137 | set_new_first_and_last_insn (NULL, NULL); |
27138 | |
27139 | rtx pat = gen_memory_blockage (); |
27140 | rtx_reuse_manager r; |
27141 | r.preprocess (x: pat); |
27142 | |
27143 | /* Verify that the repeated references to the SCRATCH show use |
27144 | reuse IDS. The first should be prefixed with a reuse ID, |
27145 | and the second should be dumped as a "reuse_rtx" of that ID. |
27146 | The expected string assumes Pmode == DImode. */ |
27147 | if (Pmode == DImode) |
27148 | ASSERT_RTL_DUMP_EQ_WITH_REUSE |
27149 | ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n" |
27150 | " (unspec:BLK [\n" |
27151 | " (mem/v:BLK (reuse_rtx 0) [0 A8])\n" |
27152 | " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r); |
27153 | } |
27154 | |
27155 | /* Verify loading an RTL dump; specifically a dump of copying |
27156 | a param on x86_64 from a hard reg into the frame. |
27157 | This test is target-specific since the dump contains target-specific |
27158 | hard reg names. */ |
27159 | |
27160 | static void |
27161 | ix86_test_loading_dump_fragment_1 () |
27162 | { |
27163 | rtl_dump_test t (SELFTEST_LOCATION, |
27164 | locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl")); |
27165 | |
27166 | rtx_insn *insn = get_insn_by_uid (uid: 1); |
27167 | |
27168 | /* The block structure and indentation here is purely for |
27169 | readability; it mirrors the structure of the rtx. */ |
27170 | tree mem_expr; |
27171 | { |
27172 | rtx pat = PATTERN (insn); |
27173 | ASSERT_EQ (SET, GET_CODE (pat)); |
27174 | { |
27175 | rtx dest = SET_DEST (pat); |
27176 | ASSERT_EQ (MEM, GET_CODE (dest)); |
27177 | /* Verify the "/c" was parsed. */ |
27178 | ASSERT_TRUE (RTX_FLAG (dest, call)); |
27179 | ASSERT_EQ (SImode, GET_MODE (dest)); |
27180 | { |
27181 | rtx addr = XEXP (dest, 0); |
27182 | ASSERT_EQ (PLUS, GET_CODE (addr)); |
27183 | ASSERT_EQ (DImode, GET_MODE (addr)); |
27184 | { |
27185 | rtx lhs = XEXP (addr, 0); |
27186 | /* Verify that the "frame" REG was consolidated. */ |
27187 | ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs); |
27188 | } |
27189 | { |
27190 | rtx rhs = XEXP (addr, 1); |
27191 | ASSERT_EQ (CONST_INT, GET_CODE (rhs)); |
27192 | ASSERT_EQ (-4, INTVAL (rhs)); |
27193 | } |
27194 | } |
27195 | /* Verify the "[1 i+0 S4 A32]" was parsed. */ |
27196 | ASSERT_EQ (1, MEM_ALIAS_SET (dest)); |
27197 | /* "i" should have been handled by synthesizing a global int |
27198 | variable named "i". */ |
27199 | mem_expr = MEM_EXPR (dest); |
27200 | ASSERT_NE (mem_expr, NULL); |
27201 | ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr)); |
27202 | ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr)); |
27203 | ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr))); |
27204 | ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr))); |
27205 | /* "+0". */ |
27206 | ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest)); |
27207 | ASSERT_EQ (0, MEM_OFFSET (dest)); |
27208 | /* "S4". */ |
27209 | ASSERT_EQ (4, MEM_SIZE (dest)); |
27210 | /* "A32. */ |
27211 | ASSERT_EQ (32, MEM_ALIGN (dest)); |
27212 | } |
27213 | { |
27214 | rtx src = SET_SRC (pat); |
27215 | ASSERT_EQ (REG, GET_CODE (src)); |
27216 | ASSERT_EQ (SImode, GET_MODE (src)); |
27217 | ASSERT_EQ (5, REGNO (src)); |
27218 | tree reg_expr = REG_EXPR (src); |
27219 | /* "i" here should point to the same var as for the MEM_EXPR. */ |
27220 | ASSERT_EQ (reg_expr, mem_expr); |
27221 | } |
27222 | } |
27223 | } |
27224 | |
27225 | /* Verify that the RTL loader copes with a call_insn dump. |
27226 | This test is target-specific since the dump contains a target-specific |
27227 | hard reg name. */ |
27228 | |
27229 | static void |
27230 | ix86_test_loading_call_insn () |
27231 | { |
27232 | /* The test dump includes register "xmm0", where requires TARGET_SSE |
27233 | to exist. */ |
27234 | if (!TARGET_SSE) |
27235 | return; |
27236 | |
27237 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl")); |
27238 | |
27239 | rtx_insn *insn = get_insns (); |
27240 | ASSERT_EQ (CALL_INSN, GET_CODE (insn)); |
27241 | |
27242 | /* "/j". */ |
27243 | ASSERT_TRUE (RTX_FLAG (insn, jump)); |
27244 | |
27245 | rtx pat = PATTERN (insn); |
27246 | ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat))); |
27247 | |
27248 | /* Verify REG_NOTES. */ |
27249 | { |
27250 | /* "(expr_list:REG_CALL_DECL". */ |
27251 | ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn))); |
27252 | rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn)); |
27253 | ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0)); |
27254 | |
27255 | /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */ |
27256 | rtx_expr_list *note1 = note0->next (); |
27257 | ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1)); |
27258 | |
27259 | ASSERT_EQ (NULL, note1->next ()); |
27260 | } |
27261 | |
27262 | /* Verify CALL_INSN_FUNCTION_USAGE. */ |
27263 | { |
27264 | /* "(expr_list:DF (use (reg:DF 21 xmm0))". */ |
27265 | rtx_expr_list *usage |
27266 | = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn)); |
27267 | ASSERT_EQ (EXPR_LIST, GET_CODE (usage)); |
27268 | ASSERT_EQ (DFmode, GET_MODE (usage)); |
27269 | ASSERT_EQ (USE, GET_CODE (usage->element ())); |
27270 | ASSERT_EQ (NULL, usage->next ()); |
27271 | } |
27272 | } |
27273 | |
27274 | /* Verify that the RTL loader copes a dump from print_rtx_function. |
27275 | This test is target-specific since the dump contains target-specific |
27276 | hard reg names. */ |
27277 | |
27278 | static void |
27279 | ix86_test_loading_full_dump () |
27280 | { |
27281 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl")); |
27282 | |
27283 | ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
27284 | |
27285 | rtx_insn *insn_1 = get_insn_by_uid (uid: 1); |
27286 | ASSERT_EQ (NOTE, GET_CODE (insn_1)); |
27287 | |
27288 | rtx_insn *insn_7 = get_insn_by_uid (uid: 7); |
27289 | ASSERT_EQ (INSN, GET_CODE (insn_7)); |
27290 | ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7))); |
27291 | |
27292 | rtx_insn *insn_15 = get_insn_by_uid (uid: 15); |
27293 | ASSERT_EQ (INSN, GET_CODE (insn_15)); |
27294 | ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15))); |
27295 | |
27296 | /* Verify crtl->return_rtx. */ |
27297 | ASSERT_EQ (REG, GET_CODE (crtl->return_rtx)); |
27298 | ASSERT_EQ (0, REGNO (crtl->return_rtx)); |
27299 | ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx)); |
27300 | } |
27301 | |
27302 | /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns. |
27303 | In particular, verify that it correctly loads the 2nd operand. |
27304 | This test is target-specific since these are machine-specific |
27305 | operands (and enums). */ |
27306 | |
27307 | static void |
27308 | ix86_test_loading_unspec () |
27309 | { |
27310 | rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl")); |
27311 | |
27312 | ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); |
27313 | |
27314 | ASSERT_TRUE (cfun); |
27315 | |
27316 | /* Test of an UNSPEC. */ |
27317 | rtx_insn *insn = get_insns (); |
27318 | ASSERT_EQ (INSN, GET_CODE (insn)); |
27319 | rtx set = single_set (insn); |
27320 | ASSERT_NE (NULL, set); |
27321 | rtx dst = SET_DEST (set); |
27322 | ASSERT_EQ (MEM, GET_CODE (dst)); |
27323 | rtx src = SET_SRC (set); |
27324 | ASSERT_EQ (UNSPEC, GET_CODE (src)); |
27325 | ASSERT_EQ (BLKmode, GET_MODE (src)); |
27326 | ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1)); |
27327 | |
27328 | rtx v0 = XVECEXP (src, 0, 0); |
27329 | |
27330 | /* Verify that the two uses of the first SCRATCH have pointer |
27331 | equality. */ |
27332 | rtx scratch_a = XEXP (dst, 0); |
27333 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_a)); |
27334 | |
27335 | rtx scratch_b = XEXP (v0, 0); |
27336 | ASSERT_EQ (SCRATCH, GET_CODE (scratch_b)); |
27337 | |
27338 | ASSERT_EQ (scratch_a, scratch_b); |
27339 | |
27340 | /* Verify that the two mems are thus treated as equal. */ |
27341 | ASSERT_TRUE (rtx_equal_p (dst, v0)); |
27342 | |
27343 | /* Verify that the insn is recognized. */ |
27344 | ASSERT_NE(-1, recog_memoized (insn)); |
27345 | |
27346 | /* Test of an UNSPEC_VOLATILE, which has its own enum values. */ |
27347 | insn = NEXT_INSN (insn); |
27348 | ASSERT_EQ (INSN, GET_CODE (insn)); |
27349 | |
27350 | set = single_set (insn); |
27351 | ASSERT_NE (NULL, set); |
27352 | |
27353 | src = SET_SRC (set); |
27354 | ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src)); |
27355 | ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1)); |
27356 | } |
27357 | |
27358 | /* Run all target-specific selftests. */ |
27359 | |
27360 | static void |
27361 | ix86_run_selftests (void) |
27362 | { |
27363 | ix86_test_dumping_hard_regs (); |
27364 | ix86_test_dumping_memory_blockage (); |
27365 | |
27366 | /* Various tests of loading RTL dumps, here because they contain |
27367 | ix86-isms (e.g. names of hard regs). */ |
27368 | ix86_test_loading_dump_fragment_1 (); |
27369 | ix86_test_loading_call_insn (); |
27370 | ix86_test_loading_full_dump (); |
27371 | ix86_test_loading_unspec (); |
27372 | } |
27373 | |
27374 | } // namespace selftest |
27375 | |
27376 | #endif /* CHECKING_P */ |
27377 | |
27378 | static const scoped_attribute_specs *const ix86_attribute_table[] = |
27379 | { |
27380 | &ix86_gnu_attribute_table |
27381 | }; |
27382 | |
27383 | /* Initialize the GCC target structure. */ |
27384 | #undef TARGET_RETURN_IN_MEMORY |
27385 | #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory |
27386 | |
27387 | #undef TARGET_LEGITIMIZE_ADDRESS |
27388 | #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address |
27389 | |
27390 | #undef TARGET_ATTRIBUTE_TABLE |
27391 | #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table |
27392 | #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P |
27393 | #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true |
27394 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
27395 | # undef TARGET_MERGE_DECL_ATTRIBUTES |
27396 | # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
27397 | #endif |
27398 | |
27399 | #undef TARGET_INVALID_CONVERSION |
27400 | #define TARGET_INVALID_CONVERSION ix86_invalid_conversion |
27401 | |
27402 | #undef TARGET_INVALID_UNARY_OP |
27403 | #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op |
27404 | |
27405 | #undef TARGET_INVALID_BINARY_OP |
27406 | #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op |
27407 | |
27408 | #undef TARGET_COMP_TYPE_ATTRIBUTES |
27409 | #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes |
27410 | |
27411 | #undef TARGET_INIT_BUILTINS |
27412 | #define TARGET_INIT_BUILTINS ix86_init_builtins |
27413 | #undef TARGET_BUILTIN_DECL |
27414 | #define TARGET_BUILTIN_DECL ix86_builtin_decl |
27415 | #undef TARGET_EXPAND_BUILTIN |
27416 | #define TARGET_EXPAND_BUILTIN ix86_expand_builtin |
27417 | |
27418 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION |
27419 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ |
27420 | ix86_builtin_vectorized_function |
27421 | |
27422 | #undef TARGET_VECTORIZE_BUILTIN_GATHER |
27423 | #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather |
27424 | |
27425 | #undef TARGET_VECTORIZE_BUILTIN_SCATTER |
27426 | #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter |
27427 | |
27428 | #undef TARGET_BUILTIN_RECIPROCAL |
27429 | #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal |
27430 | |
27431 | #undef TARGET_ASM_FUNCTION_EPILOGUE |
27432 | #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue |
27433 | |
27434 | #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY |
27435 | #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \ |
27436 | ix86_print_patchable_function_entry |
27437 | |
27438 | #undef TARGET_ENCODE_SECTION_INFO |
27439 | #ifndef SUBTARGET_ENCODE_SECTION_INFO |
27440 | #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info |
27441 | #else |
27442 | #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO |
27443 | #endif |
27444 | |
27445 | #undef TARGET_ASM_OPEN_PAREN |
27446 | #define TARGET_ASM_OPEN_PAREN "" |
27447 | #undef TARGET_ASM_CLOSE_PAREN |
27448 | #define TARGET_ASM_CLOSE_PAREN "" |
27449 | |
27450 | #undef TARGET_ASM_BYTE_OP |
27451 | #define TARGET_ASM_BYTE_OP ASM_BYTE |
27452 | |
27453 | #undef TARGET_ASM_ALIGNED_HI_OP |
27454 | #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT |
27455 | #undef TARGET_ASM_ALIGNED_SI_OP |
27456 | #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG |
27457 | #ifdef ASM_QUAD |
27458 | #undef TARGET_ASM_ALIGNED_DI_OP |
27459 | #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD |
27460 | #endif |
27461 | |
27462 | #undef TARGET_PROFILE_BEFORE_PROLOGUE |
27463 | #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue |
27464 | |
27465 | #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME |
27466 | #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name |
27467 | |
27468 | #undef TARGET_ASM_UNALIGNED_HI_OP |
27469 | #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP |
27470 | #undef TARGET_ASM_UNALIGNED_SI_OP |
27471 | #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP |
27472 | #undef TARGET_ASM_UNALIGNED_DI_OP |
27473 | #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP |
27474 | |
27475 | #undef TARGET_PRINT_OPERAND |
27476 | #define TARGET_PRINT_OPERAND ix86_print_operand |
27477 | #undef TARGET_PRINT_OPERAND_ADDRESS |
27478 | #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address |
27479 | #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P |
27480 | #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p |
27481 | #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA |
27482 | #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra |
27483 | |
27484 | #undef TARGET_SCHED_INIT_GLOBAL |
27485 | #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global |
27486 | #undef TARGET_SCHED_ADJUST_COST |
27487 | #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost |
27488 | #undef TARGET_SCHED_ISSUE_RATE |
27489 | #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate |
27490 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD |
27491 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ |
27492 | ia32_multipass_dfa_lookahead |
27493 | #undef TARGET_SCHED_MACRO_FUSION_P |
27494 | #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p |
27495 | #undef TARGET_SCHED_MACRO_FUSION_PAIR_P |
27496 | #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p |
27497 | |
27498 | #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
27499 | #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall |
27500 | |
27501 | #undef TARGET_MEMMODEL_CHECK |
27502 | #define TARGET_MEMMODEL_CHECK ix86_memmodel_check |
27503 | |
27504 | #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV |
27505 | #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv |
27506 | |
27507 | #ifdef HAVE_AS_TLS |
27508 | #undef TARGET_HAVE_TLS |
27509 | #define TARGET_HAVE_TLS true |
27510 | #endif |
27511 | #undef TARGET_CANNOT_FORCE_CONST_MEM |
27512 | #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem |
27513 | #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P |
27514 | #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true |
27515 | |
27516 | #undef TARGET_DELEGITIMIZE_ADDRESS |
27517 | #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address |
27518 | |
27519 | #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P |
27520 | #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p |
27521 | |
27522 | #undef TARGET_MS_BITFIELD_LAYOUT_P |
27523 | #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p |
27524 | |
27525 | #if TARGET_MACHO |
27526 | #undef TARGET_BINDS_LOCAL_P |
27527 | #define TARGET_BINDS_LOCAL_P darwin_binds_local_p |
27528 | #else |
27529 | #undef TARGET_BINDS_LOCAL_P |
27530 | #define TARGET_BINDS_LOCAL_P ix86_binds_local_p |
27531 | #endif |
27532 | #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
27533 | #undef TARGET_BINDS_LOCAL_P |
27534 | #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p |
27535 | #endif |
27536 | |
27537 | #undef TARGET_ASM_OUTPUT_MI_THUNK |
27538 | #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk |
27539 | #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
27540 | #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk |
27541 | |
27542 | #undef TARGET_ASM_FILE_START |
27543 | #define TARGET_ASM_FILE_START x86_file_start |
27544 | |
27545 | #undef TARGET_OPTION_OVERRIDE |
27546 | #define TARGET_OPTION_OVERRIDE ix86_option_override |
27547 | |
27548 | #undef TARGET_REGISTER_MOVE_COST |
27549 | #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost |
27550 | #undef TARGET_MEMORY_MOVE_COST |
27551 | #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost |
27552 | #undef TARGET_RTX_COSTS |
27553 | #define TARGET_RTX_COSTS ix86_rtx_costs |
27554 | #undef TARGET_INSN_COST |
27555 | #define TARGET_INSN_COST ix86_insn_cost |
27556 | #undef TARGET_ADDRESS_COST |
27557 | #define TARGET_ADDRESS_COST ix86_address_cost |
27558 | |
27559 | #undef TARGET_OVERLAP_OP_BY_PIECES_P |
27560 | #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true |
27561 | |
27562 | #undef TARGET_FLAGS_REGNUM |
27563 | #define TARGET_FLAGS_REGNUM FLAGS_REG |
27564 | #undef TARGET_FIXED_CONDITION_CODE_REGS |
27565 | #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs |
27566 | #undef TARGET_CC_MODES_COMPATIBLE |
27567 | #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible |
27568 | |
27569 | #undef TARGET_MACHINE_DEPENDENT_REORG |
27570 | #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg |
27571 | |
27572 | #undef TARGET_BUILD_BUILTIN_VA_LIST |
27573 | #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list |
27574 | |
27575 | #undef TARGET_FOLD_BUILTIN |
27576 | #define TARGET_FOLD_BUILTIN ix86_fold_builtin |
27577 | |
27578 | #undef TARGET_GIMPLE_FOLD_BUILTIN |
27579 | #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin |
27580 | |
27581 | #undef TARGET_COMPARE_VERSION_PRIORITY |
27582 | #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority |
27583 | |
27584 | #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY |
27585 | #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ |
27586 | ix86_generate_version_dispatcher_body |
27587 | |
27588 | #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER |
27589 | #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ |
27590 | ix86_get_function_versions_dispatcher |
27591 | |
27592 | #undef TARGET_ENUM_VA_LIST_P |
27593 | #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list |
27594 | |
27595 | #undef TARGET_FN_ABI_VA_LIST |
27596 | #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list |
27597 | |
27598 | #undef TARGET_CANONICAL_VA_LIST_TYPE |
27599 | #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type |
27600 | |
27601 | #undef TARGET_EXPAND_BUILTIN_VA_START |
27602 | #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start |
27603 | |
27604 | #undef TARGET_MD_ASM_ADJUST |
27605 | #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust |
27606 | |
27607 | #undef TARGET_C_EXCESS_PRECISION |
27608 | #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision |
27609 | #undef TARGET_C_BITINT_TYPE_INFO |
27610 | #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info |
27611 | #undef TARGET_C_MODE_FOR_FLOATING_TYPE |
27612 | #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type |
27613 | #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE |
27614 | #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype |
27615 | #undef TARGET_PROMOTE_PROTOTYPES |
27616 | #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true |
27617 | #undef TARGET_PUSH_ARGUMENT |
27618 | #define TARGET_PUSH_ARGUMENT ix86_push_argument |
27619 | #undef TARGET_SETUP_INCOMING_VARARGS |
27620 | #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs |
27621 | #undef TARGET_MUST_PASS_IN_STACK |
27622 | #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack |
27623 | #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS |
27624 | #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args |
27625 | #undef TARGET_FUNCTION_ARG_ADVANCE |
27626 | #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance |
27627 | #undef TARGET_FUNCTION_ARG |
27628 | #define TARGET_FUNCTION_ARG ix86_function_arg |
27629 | #undef TARGET_INIT_PIC_REG |
27630 | #define TARGET_INIT_PIC_REG ix86_init_pic_reg |
27631 | #undef TARGET_USE_PSEUDO_PIC_REG |
27632 | #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg |
27633 | #undef TARGET_FUNCTION_ARG_BOUNDARY |
27634 | #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary |
27635 | #undef TARGET_PASS_BY_REFERENCE |
27636 | #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference |
27637 | #undef TARGET_INTERNAL_ARG_POINTER |
27638 | #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer |
27639 | #undef TARGET_UPDATE_STACK_BOUNDARY |
27640 | #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary |
27641 | #undef TARGET_GET_DRAP_RTX |
27642 | #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx |
27643 | #undef TARGET_STRICT_ARGUMENT_NAMING |
27644 | #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true |
27645 | #undef TARGET_STATIC_CHAIN |
27646 | #define TARGET_STATIC_CHAIN ix86_static_chain |
27647 | #undef TARGET_TRAMPOLINE_INIT |
27648 | #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init |
27649 | #undef TARGET_RETURN_POPS_ARGS |
27650 | #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args |
27651 | |
27652 | #undef TARGET_WARN_FUNC_RETURN |
27653 | #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return |
27654 | |
27655 | #undef TARGET_LEGITIMATE_COMBINED_INSN |
27656 | #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn |
27657 | |
27658 | #undef TARGET_ASAN_SHADOW_OFFSET |
27659 | #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset |
27660 | |
27661 | #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
27662 | #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg |
27663 | |
27664 | #undef TARGET_SCALAR_MODE_SUPPORTED_P |
27665 | #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p |
27666 | |
27667 | #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P |
27668 | #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ |
27669 | ix86_libgcc_floating_mode_supported_p |
27670 | |
27671 | #undef TARGET_VECTOR_MODE_SUPPORTED_P |
27672 | #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p |
27673 | |
27674 | #undef TARGET_C_MODE_FOR_SUFFIX |
27675 | #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix |
27676 | |
27677 | #ifdef HAVE_AS_TLS |
27678 | #undef TARGET_ASM_OUTPUT_DWARF_DTPREL |
27679 | #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel |
27680 | #endif |
27681 | |
27682 | #ifdef SUBTARGET_INSERT_ATTRIBUTES |
27683 | #undef TARGET_INSERT_ATTRIBUTES |
27684 | #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES |
27685 | #endif |
27686 | |
27687 | #undef TARGET_MANGLE_TYPE |
27688 | #define TARGET_MANGLE_TYPE ix86_mangle_type |
27689 | |
27690 | #undef TARGET_EMIT_SUPPORT_TINFOS |
27691 | #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos |
27692 | |
27693 | #undef TARGET_STACK_PROTECT_GUARD |
27694 | #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard |
27695 | |
27696 | #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P |
27697 | #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \ |
27698 | ix86_stack_protect_runtime_enabled_p |
27699 | |
27700 | #if !TARGET_MACHO |
27701 | #undef TARGET_STACK_PROTECT_FAIL |
27702 | #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail |
27703 | #endif |
27704 | |
27705 | #undef TARGET_FUNCTION_VALUE |
27706 | #define TARGET_FUNCTION_VALUE ix86_function_value |
27707 | |
27708 | #undef TARGET_FUNCTION_VALUE_REGNO_P |
27709 | #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p |
27710 | |
27711 | #undef TARGET_ZERO_CALL_USED_REGS |
27712 | #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs |
27713 | |
27714 | #undef TARGET_PROMOTE_FUNCTION_MODE |
27715 | #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode |
27716 | |
27717 | #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE |
27718 | #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change |
27719 | |
27720 | #undef TARGET_MEMBER_TYPE_FORCES_BLK |
27721 | #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk |
27722 | |
27723 | #undef TARGET_INSTANTIATE_DECLS |
27724 | #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls |
27725 | |
27726 | #undef TARGET_SECONDARY_RELOAD |
27727 | #define TARGET_SECONDARY_RELOAD ix86_secondary_reload |
27728 | #undef TARGET_SECONDARY_MEMORY_NEEDED |
27729 | #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed |
27730 | #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE |
27731 | #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode |
27732 | |
27733 | #undef TARGET_CLASS_MAX_NREGS |
27734 | #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs |
27735 | |
27736 | #undef TARGET_PREFERRED_RELOAD_CLASS |
27737 | #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class |
27738 | #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS |
27739 | #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class |
27740 | /* When this hook returns true for MODE, the compiler allows |
27741 | registers explicitly used in the rtl to be used as spill registers |
27742 | but prevents the compiler from extending the lifetime of these |
27743 | registers. */ |
27744 | #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P |
27745 | #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true |
27746 | #undef TARGET_CLASS_LIKELY_SPILLED_P |
27747 | #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p |
27748 | #undef TARGET_CALLEE_SAVE_COST |
27749 | #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost |
27750 | |
27751 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST |
27752 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ |
27753 | ix86_builtin_vectorization_cost |
27754 | #undef TARGET_VECTORIZE_VEC_PERM_CONST |
27755 | #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const |
27756 | #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE |
27757 | #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ |
27758 | ix86_preferred_simd_mode |
27759 | #undef TARGET_VECTORIZE_SPLIT_REDUCTION |
27760 | #define TARGET_VECTORIZE_SPLIT_REDUCTION \ |
27761 | ix86_split_reduction |
27762 | #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES |
27763 | #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ |
27764 | ix86_autovectorize_vector_modes |
27765 | #undef TARGET_VECTORIZE_GET_MASK_MODE |
27766 | #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode |
27767 | #undef TARGET_VECTORIZE_CREATE_COSTS |
27768 | #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs |
27769 | |
27770 | #undef TARGET_SET_CURRENT_FUNCTION |
27771 | #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function |
27772 | |
27773 | #undef TARGET_OPTION_VALID_ATTRIBUTE_P |
27774 | #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p |
27775 | |
27776 | #undef TARGET_OPTION_SAVE |
27777 | #define TARGET_OPTION_SAVE ix86_function_specific_save |
27778 | |
27779 | #undef TARGET_OPTION_RESTORE |
27780 | #define TARGET_OPTION_RESTORE ix86_function_specific_restore |
27781 | |
27782 | #undef TARGET_OPTION_POST_STREAM_IN |
27783 | #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in |
27784 | |
27785 | #undef TARGET_OPTION_PRINT |
27786 | #define TARGET_OPTION_PRINT ix86_function_specific_print |
27787 | |
27788 | #undef TARGET_OPTION_FUNCTION_VERSIONS |
27789 | #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions |
27790 | |
27791 | #undef TARGET_CAN_INLINE_P |
27792 | #define TARGET_CAN_INLINE_P ix86_can_inline_p |
27793 | |
27794 | #undef TARGET_LEGITIMATE_ADDRESS_P |
27795 | #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p |
27796 | |
27797 | #undef TARGET_REGISTER_PRIORITY |
27798 | #define TARGET_REGISTER_PRIORITY ix86_register_priority |
27799 | |
27800 | #undef TARGET_REGISTER_USAGE_LEVELING_P |
27801 | #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true |
27802 | |
27803 | #undef TARGET_LEGITIMATE_CONSTANT_P |
27804 | #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p |
27805 | |
27806 | #undef TARGET_COMPUTE_FRAME_LAYOUT |
27807 | #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout |
27808 | |
27809 | #undef TARGET_FRAME_POINTER_REQUIRED |
27810 | #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required |
27811 | |
27812 | #undef TARGET_CAN_ELIMINATE |
27813 | #define TARGET_CAN_ELIMINATE ix86_can_eliminate |
27814 | |
27815 | #undef TARGET_EXTRA_LIVE_ON_ENTRY |
27816 | #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry |
27817 | |
27818 | #undef TARGET_ASM_CODE_END |
27819 | #define TARGET_ASM_CODE_END ix86_code_end |
27820 | |
27821 | #undef TARGET_CONDITIONAL_REGISTER_USAGE |
27822 | #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage |
27823 | |
27824 | #undef TARGET_CANONICALIZE_COMPARISON |
27825 | #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison |
27826 | |
27827 | #undef TARGET_LOOP_UNROLL_ADJUST |
27828 | #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust |
27829 | |
27830 | /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ |
27831 | #undef TARGET_SPILL_CLASS |
27832 | #define TARGET_SPILL_CLASS ix86_spill_class |
27833 | |
27834 | #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN |
27835 | #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ |
27836 | ix86_simd_clone_compute_vecsize_and_simdlen |
27837 | |
27838 | #undef TARGET_SIMD_CLONE_ADJUST |
27839 | #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust |
27840 | |
27841 | #undef TARGET_SIMD_CLONE_USABLE |
27842 | #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable |
27843 | |
27844 | #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA |
27845 | #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa |
27846 | |
27847 | #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P |
27848 | #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ |
27849 | ix86_float_exceptions_rounding_supported_p |
27850 | |
27851 | #undef TARGET_MODE_EMIT |
27852 | #define TARGET_MODE_EMIT ix86_emit_mode_set |
27853 | |
27854 | #undef TARGET_MODE_NEEDED |
27855 | #define TARGET_MODE_NEEDED ix86_mode_needed |
27856 | |
27857 | #undef TARGET_MODE_AFTER |
27858 | #define TARGET_MODE_AFTER ix86_mode_after |
27859 | |
27860 | #undef TARGET_MODE_ENTRY |
27861 | #define TARGET_MODE_ENTRY ix86_mode_entry |
27862 | |
27863 | #undef TARGET_MODE_EXIT |
27864 | #define TARGET_MODE_EXIT ix86_mode_exit |
27865 | |
27866 | #undef TARGET_MODE_PRIORITY |
27867 | #define TARGET_MODE_PRIORITY ix86_mode_priority |
27868 | |
27869 | #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS |
27870 | #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true |
27871 | |
27872 | #undef TARGET_OFFLOAD_OPTIONS |
27873 | #define TARGET_OFFLOAD_OPTIONS \ |
27874 | ix86_offload_options |
27875 | |
27876 | #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT |
27877 | #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512 |
27878 | |
27879 | #undef TARGET_OPTAB_SUPPORTED_P |
27880 | #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p |
27881 | |
27882 | #undef TARGET_HARD_REGNO_SCRATCH_OK |
27883 | #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok |
27884 | |
27885 | #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS |
27886 | #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST |
27887 | |
27888 | #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID |
27889 | #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid |
27890 | |
27891 | #undef TARGET_INIT_LIBFUNCS |
27892 | #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs |
27893 | |
27894 | #undef TARGET_EXPAND_DIVMOD_LIBFUNC |
27895 | #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc |
27896 | |
27897 | #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST |
27898 | #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost |
27899 | |
27900 | #undef TARGET_NOCE_CONVERSION_PROFITABLE_P |
27901 | #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p |
27902 | |
27903 | #undef TARGET_HARD_REGNO_NREGS |
27904 | #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs |
27905 | #undef TARGET_HARD_REGNO_MODE_OK |
27906 | #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok |
27907 | |
27908 | #undef TARGET_MODES_TIEABLE_P |
27909 | #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p |
27910 | |
27911 | #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED |
27912 | #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ |
27913 | ix86_hard_regno_call_part_clobbered |
27914 | |
27915 | #undef TARGET_INSN_CALLEE_ABI |
27916 | #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi |
27917 | |
27918 | #undef TARGET_CAN_CHANGE_MODE_CLASS |
27919 | #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class |
27920 | |
27921 | #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT |
27922 | #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment |
27923 | |
27924 | #undef TARGET_STATIC_RTX_ALIGNMENT |
27925 | #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment |
27926 | #undef TARGET_CONSTANT_ALIGNMENT |
27927 | #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment |
27928 | |
27929 | #undef TARGET_EMPTY_RECORD_P |
27930 | #define TARGET_EMPTY_RECORD_P ix86_is_empty_record |
27931 | |
27932 | #undef TARGET_WARN_PARAMETER_PASSING_ABI |
27933 | #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi |
27934 | |
27935 | #undef TARGET_GET_MULTILIB_ABI_NAME |
27936 | #define TARGET_GET_MULTILIB_ABI_NAME \ |
27937 | ix86_get_multilib_abi_name |
27938 | |
27939 | #undef TARGET_IFUNC_REF_LOCAL_OK |
27940 | #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok |
27941 | |
27942 | #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES |
27943 | # undef TARGET_ASM_RELOC_RW_MASK |
27944 | # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask |
27945 | #endif |
27946 | |
27947 | #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES |
27948 | #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses |
27949 | |
27950 | #undef TARGET_MEMTAG_ADD_TAG |
27951 | #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag |
27952 | |
27953 | #undef TARGET_MEMTAG_SET_TAG |
27954 | #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag |
27955 | |
27956 | #undef TARGET_MEMTAG_EXTRACT_TAG |
27957 | #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag |
27958 | |
27959 | #undef TARGET_MEMTAG_UNTAGGED_POINTER |
27960 | #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer |
27961 | |
27962 | #undef TARGET_MEMTAG_TAG_SIZE |
27963 | #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size |
27964 | |
27965 | #undef TARGET_GEN_CCMP_FIRST |
27966 | #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first |
27967 | |
27968 | #undef TARGET_GEN_CCMP_NEXT |
27969 | #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next |
27970 | |
27971 | #undef TARGET_HAVE_CCMP |
27972 | #define TARGET_HAVE_CCMP ix86_have_ccmp |
27973 | |
27974 | #undef TARGET_MODE_CAN_TRANSFER_BITS |
27975 | #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits |
27976 | |
27977 | #undef TARGET_REDZONE_CLOBBER |
27978 | #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber |
27979 | |
27980 | static bool |
27981 | ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) |
27982 | { |
27983 | #ifdef OPTION_GLIBC |
27984 | if (OPTION_GLIBC) |
27985 | return (built_in_function)fcode == BUILT_IN_MEMPCPY; |
27986 | else |
27987 | return false; |
27988 | #else |
27989 | return false; |
27990 | #endif |
27991 | } |
27992 | |
27993 | #undef TARGET_LIBC_HAS_FAST_FUNCTION |
27994 | #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function |
27995 | |
27996 | static unsigned |
27997 | ix86_libm_function_max_error (unsigned cfn, machine_mode mode, |
27998 | bool boundary_p) |
27999 | { |
28000 | #ifdef OPTION_GLIBC |
28001 | bool glibc_p = OPTION_GLIBC; |
28002 | #else |
28003 | bool glibc_p = false; |
28004 | #endif |
28005 | if (glibc_p) |
28006 | { |
28007 | /* If __FAST_MATH__ is defined, glibc provides libmvec. */ |
28008 | unsigned int libmvec_ret = 0; |
28009 | if (!flag_trapping_math |
28010 | && flag_unsafe_math_optimizations |
28011 | && flag_finite_math_only |
28012 | && !flag_signed_zeros |
28013 | && !flag_errno_math) |
28014 | switch (cfn) |
28015 | { |
28016 | CASE_CFN_COS: |
28017 | CASE_CFN_COS_FN: |
28018 | CASE_CFN_SIN: |
28019 | CASE_CFN_SIN_FN: |
28020 | if (!boundary_p) |
28021 | { |
28022 | /* With non-default rounding modes, libmvec provides |
28023 | complete garbage in results. E.g. |
28024 | _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD |
28025 | returns 0.00333309174f rather than 1.40129846e-45f. */ |
28026 | if (flag_rounding_math) |
28027 | return ~0U; |
28028 | /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html |
28029 | claims libmvec maximum error is 4ulps. |
28030 | My own random testing indicates 2ulps for SFmode and |
28031 | 0.5ulps for DFmode, but let's go with the 4ulps. */ |
28032 | libmvec_ret = 4; |
28033 | } |
28034 | break; |
28035 | default: |
28036 | break; |
28037 | } |
28038 | unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode, |
28039 | boundary_p); |
28040 | return MAX (ret, libmvec_ret); |
28041 | } |
28042 | return default_libm_function_max_error (cfn, mode, boundary_p); |
28043 | } |
28044 | |
28045 | #undef TARGET_LIBM_FUNCTION_MAX_ERROR |
28046 | #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error |
28047 | |
28048 | #if TARGET_MACHO |
28049 | static bool |
28050 | ix86_cannot_copy_insn_p (rtx_insn *insn) |
28051 | { |
28052 | if (TARGET_64BIT) |
28053 | return false; |
28054 | |
28055 | rtx set = single_set (insn); |
28056 | if (set) |
28057 | { |
28058 | rtx src = SET_SRC (set); |
28059 | if (GET_CODE (src) == UNSPEC |
28060 | && XINT (src, 1) == UNSPEC_SET_GOT) |
28061 | return true; |
28062 | } |
28063 | return false; |
28064 | } |
28065 | |
28066 | #undef TARGET_CANNOT_COPY_INSN_P |
28067 | #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p |
28068 | |
28069 | #endif |
28070 | |
28071 | #if CHECKING_P |
28072 | #undef TARGET_RUN_TARGET_SELFTESTS |
28073 | #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests |
28074 | #endif /* #if CHECKING_P */ |
28075 | |
28076 | #undef TARGET_DOCUMENTATION_NAME |
28077 | #define TARGET_DOCUMENTATION_NAME "x86" |
28078 | |
28079 | struct gcc_target targetm = TARGET_INITIALIZER; |
28080 | |
28081 | #include "gt-i386.h" |
28082 |
Definitions
- ix86_tune_cost
- ix86_cost
- qi_reg_name
- qi_high_reg_name
- hi_reg_name
- regclass_map
- debugger_register_map
- debugger64_register_map
- svr4_debugger_register_map
- x86_64_int_parameter_registers
- x86_64_ms_abi_int_parameter_registers
- x86_64_int_return_registers
- stack_local_entry
- ix86_schedule
- ix86_tune
- ix86_arch
- ix86_prefetch_sse
- ix86_preferred_stack_boundary
- ix86_user_incoming_stack_boundary
- ix86_default_incoming_stack_boundary
- ix86_incoming_stack_boundary
- ix86_has_no_direct_extern_access
- sysv_va_list_type_node
- ms_va_list_type_node
- internal_label_prefix
- internal_label_prefix_len
- x86_mfence
- x86_64_reg_class
- ext_80387_constants_table
- ext_80387_constants_init
- ix86_flags_cc
- ix86_ccmp_dfv_mapping
- ix86_tune_defaulted
- ix86_arch_specified
- ix86_using_red_zone
- ix86_profile_before_prologue
- ix86_conditional_register_usage
- ix86_canonicalize_comparison
- ix86_can_inline_p
- ix86_in_large_data_p
- x86_64_elf_select_section
- x86_64_elf_section_type_flags
- x86_64_elf_unique_section
- x86_elf_aligned_decl_common
- x86_output_aligned_bss
- ix86_target_stack_probe
- ix86_function_ok_for_sibcall
- ix86_get_callcvt
- ix86_comp_type_attributes
- ix86_function_regparm
- ix86_function_sseregparm
- ix86_eax_live_at_start_p
- ix86_keep_aggregate_return_pointer
- ix86_return_pops_args
- ix86_legitimate_combined_insn
- ix86_asan_shadow_offset
- ix86_function_arg_regno_p
- ix86_must_pass_in_stack
- ix86_reg_parm_stack_space
- ix86_libc_has_function
- ix86_function_type_abi
- ix86_function_abi
- ix86_cfun_abi
- ix86_function_ms_hook_prologue
- ix86_function_naked
- ix86_asm_output_function_label
- ix86_asm_output_labelref
- ix86_call_abi_override
- ix86_use_pseudo_pic_reg
- ix86_init_large_pic_reg
- ix86_init_pic_reg
- init_cumulative_args
- type_natural_mode
- gen_reg_or_parallel
- merge_classes
- classify_argument
- classify_argument
- examine_argument
- construct_container
- function_arg_advance_32
- function_arg_advance_64
- function_arg_advance_ms_64
- ix86_function_arg_advance
- function_arg_32
- function_arg_64
- function_arg_ms_64
- ix86_function_arg
- ix86_pass_by_reference
- ix86_compat_aligned_value_p
- ix86_compat_function_arg_boundary
- ix86_contains_aligned_value_p
- ix86_function_arg_boundary
- ix86_function_value_regno_p
- zero_call_used_regno_p
- zero_call_used_regno_mode
- zero_all_vector_registers
- zero_all_st_registers
- zero_all_mm_registers
- ix86_zero_call_used_regs
- function_value_32
- function_value_64
- function_value_ms_32
- function_value_ms_64
- ix86_function_value_1
- ix86_function_value
- ix86_promote_function_mode
- ix86_member_type_forces_blk
- ix86_libcall_value
- ix86_return_in_memory
- ix86_push_argument
- ix86_build_builtin_va_list_64
- ix86_build_builtin_va_list
- setup_incoming_varargs_64
- setup_incoming_varargs_ms_64
- ix86_setup_incoming_varargs
- is_va_list_char_pointer
- ix86_va_start
- ix86_gimplify_va_arg
- ix86_check_movabs
- ix86_check_movs
- ix86_check_no_addr_space
- init_ext_80387_constants
- standard_80387_constant_p
- standard_80387_constant_opcode
- standard_80387_constant_rtx
- standard_sse_constant_p
- standard_sse_constant_opcode
- ix86_standard_x87sse_constant_load_p
- ix86_pre_reload_split
- ix86_get_ssemov
- ix86_output_ssemov
- symbolic_reference_mentioned_p
- ix86_can_use_return_insn_p
- ix86_get_frame_size
- ix86_frame_pointer_required
- ix86_setup_frame_addresses
- indirectlabelno
- indirect_thunk_needed
- indirect_thunks_used
- indirect_return_needed
- indirect_return_via_cx
- indirect_thunk_prefix
- indirect_thunk_need_prefix
- indirect_thunk_name
- output_indirect_thunk
- output_indirect_thunk_function
- pic_labels_used
- get_pc_thunk_name
- ix86_code_end
- output_set_got
- gen_push
- gen_pushfl
- gen_pop
- gen_popfl
- gen_push2
- ix86_select_alt_pic_regnum
- ix86_epilogue_uses
- ix86_hard_regno_scratch_ok
- ix86_save_reg
- ix86_nsaved_regs
- ix86_nsaved_sseregs
- ix86_can_eliminate
- ix86_initial_elimination_offset
- warn_once_call_ms2sysv_xlogues
- get_probe_interval
- ix86_can_use_push2pop2
- ix86_pro_and_epilogue_can_use_push2pop2
- ix86_compute_frame_layout
- choose_baseaddr_len
- sp_valid_at
- fp_valid_at
- choose_basereg
- choose_baseaddr
- ix86_emit_save_regs
- ix86_emit_save_reg_using_mov
- ix86_emit_save_regs_using_mov
- ix86_emit_save_sse_regs_using_mov
- queued_cfa_restores
- ix86_add_cfa_restore_note
- ix86_add_queued_cfa_restore_notes
- pro_epilogue_adjust_stack
- find_drap_reg
- ix86_minimum_incoming_stack_boundary
- ix86_update_stack_boundary
- ix86_get_drap_rtx
- ix86_internal_arg_pointer
- scratch_reg
- get_scratch_register_on_entry
- release_scratch_register_on_entry
- ix86_adjust_stack_and_probe
- output_adjust_stack_and_probe
- ix86_emit_probe_stack_range
- output_probe_stack_range
- stack_access_data
- ix86_update_stack_alignment
- ix86_find_all_reg_uses_1
- ix86_find_all_reg_uses
- ix86_find_max_used_stack_alignment
- ix86_finalize_stack_frame_flags
- ix86_elim_entry_set_got
- gen_frame_set
- gen_frame_load
- gen_frame_store
- ix86_emit_outlined_ms2sysv_save
- gen_and2_insn
- ix86_expand_prologue
- ix86_emit_restore_reg_using_pop
- ix86_emit_restore_reg_using_pop2
- ix86_emit_restore_regs_using_pop
- ix86_emit_restore_regs_using_pop2
- ix86_emit_leave
- ix86_emit_restore_regs_using_mov
- ix86_emit_restore_sse_regs_using_mov
- ix86_emit_outlined_ms2sysv_restore
- ix86_expand_epilogue
- ix86_output_function_epilogue
- ix86_print_patchable_function_entry
- ix86_output_patchable_area
- split_stack_prologue_scratch_regno
- split_stack_fn
- split_stack_fn_large
- ix86_split_stack_guard
- ix86_expand_split_stack_prologue
- ix86_live_on_entry
- ix86_decompose_address
- ix86_address_cost
- darwin_local_data_pic
- ix86_force_load_from_GOT_p
- ix86_legitimate_constant_p
- ix86_cannot_force_const_mem
- ix86_GOT_alias_set
- legitimate_pic_operand_p
- legitimate_pic_address_disp_p
- ix86_validate_address_register
- ix86_memory_address_reg_class
- ix86_insn_base_reg_class
- ix86_regno_ok_for_insn_base_p
- ix86_insn_index_reg_class
- ix86_legitimate_address_p
- constant_address_p
- legitimize_pic_address
- get_thread_pointer
- ix86_tls_index_symbol
- ix86_tls_symbol
- ix86_tls_get_addr
- ix86_tls_module_base_symbol
- ix86_tls_module_base
- legitimize_tls_address
- ix86_gpr_tls_address_pattern_p
- ix86_tls_address_pattern_p
- ix86_rewrite_tls_address_1
- ix86_rewrite_tls_address
- ix86_legitimize_address
- output_pic_addr_const
- i386_output_dwarf_dtprel
- ix86_pic_register_p
- ix86_delegitimize_tls_address
- ix86_delegitimize_address_1
- ix86_delegitimize_address
- ix86_find_base_term
- ix86_const_not_ok_for_debug_p
- put_condition_code
- print_reg
- ix86_print_operand
- ix86_print_operand_punct_valid_p
- ix86_print_operand_address_as
- ix86_print_operand_address
- i386_asm_output_addr_const_extra
- output_387_binary_op
- ix86_dirflag_mode_needed
- ix86_check_avx_upper_register
- ix86_check_avx_upper_stores
- ix86_avx_u128_mode_needed
- ix86_i387_mode_needed
- ix86_mode_needed
- ix86_avx_u128_mode_after
- ix86_mode_after
- ix86_dirflag_mode_entry
- ix86_avx_u128_mode_entry
- ix86_mode_entry
- ix86_avx_u128_mode_exit
- ix86_mode_exit
- ix86_mode_priority
- emit_i387_cw_initialization
- ix86_emit_mode_set
- output_fix_trunc
- output_387_ffreep
- output_fp_compare
- ix86_output_addr_vec_elt
- ix86_output_addr_diff_elt
- increase_distance
- insn_defines_reg
- insn_uses_reg_mem
- distance_non_agu_define_in_bb
- distance_non_agu_define
- distance_agu_use_in_bb
- distance_agu_use
- ix86_lea_outperforms
- ix86_avoid_lea_for_add
- ix86_use_lea_for_mov
- ix86_avoid_lea_for_addr
- ix86_lea_for_add_ok
- ix86_dep_by_shift_count_body
- ix86_dep_by_shift_count
- ix86_vec_interleave_v2df_operator_ok
- ix86_build_const_vector
- ix86_build_signbit_mask
- ix86_convert_const_vector_to_integer
- ix86_get_flags_cc
- ix86_match_ccmode
- ix86_cc_mode
- ix86_match_ptest_ccmode
- ix86_fixed_condition_code_regs
- ix86_cc_modes_compatible
- ix86_fp_comparison_strategy
- ix86_fp_compare_code_to_integer
- ix86_zero_extend_to_Pmode
- ix86_call_use_plt_p
- ix86_ifunc_ref_local_ok
- ix86_nopic_noplt_attribute_p
- ix86_output_jmp_thunk_or_indirect
- ix86_output_indirect_branch_via_reg
- ix86_output_indirect_branch_via_push
- ix86_output_indirect_branch
- ix86_output_indirect_jmp
- output_return_instrumentation
- ix86_output_function_return
- ix86_output_indirect_function_return
- ix86_output_call_insn
- assign_386_stack_local
- ix86_instantiate_decls
- ix86_rip_relative_addr_p
- memory_address_length
- ix86_attr_length_immediate_default
- ix86_attr_length_address_default
- ix86_attr_length_vex_default
- insn_is_function_arg
- add_parameter_dependencies
- avoid_func_arg_motion
- add_dependee_for_func_arg
- ix86_dependencies_evaluation_hook
- ix86_adjust_priority
- ix86_sched_init_global
- ix86_static_rtx_alignment
- ix86_constant_alignment
- ix86_is_empty_record
- ix86_warn_parameter_passing_abi
- ix86_get_multilib_abi_name
- iamcu_alignment
- ix86_data_alignment
- ix86_lower_local_decl_alignment
- ix86_local_alignment
- ix86_minimum_alignment
- ix86_static_chain
- ix86_trampoline_init
- ix86_allocate_stack_slots_for_args
- ix86_warn_func_return
- ix86_vector_shift_count
- ix86_masked_all_ones
- ix86_fold_builtin
- ix86_gimple_fold_builtin
- ix86_veclibabi_svml
- ix86_veclibabi_acml
- ix86_veclibabi_aocl
- ix86_vectorize_builtin_scatter
- use_rsqrt_p
- avx_vpermilp_parallel
- avx_vperm2f128_parallel
- vpternlog_redundant_operand_mask
- substitute_vpternlog_operands
- ix86_register_priority
- ix86_preferred_reload_class
- ix86_preferred_output_reload_class
- ix86_secondary_reload
- ix86_class_likely_spilled_p
- ix86_callee_save_cost
- ix86_hardreg_mov_ok
- inline_secondary_memory_needed
- ix86_secondary_memory_needed
- ix86_secondary_memory_needed_mode
- ix86_class_max_nregs
- ix86_can_change_mode_class
- sse_store_index
- inline_memory_move_cost
- ix86_memory_move_cost
- ix86_register_move_cost
- ix86_hard_regno_nregs
- ix86_regmode_natural_size
- ix86_hard_regno_mode_ok
- ix86_insn_callee_abi
- ix86_initialize_callee_abi
- ix86_expand_avx_vzeroupper
- ix86_hard_regno_call_part_clobbered
- ix86_tieable_integer_mode_p
- ix86_modes_tieable_p
- ix86_set_reg_reg_cost
- ix86_vec_cost
- ix86_widen_mult_cost
- ix86_multiplication_cost
- ix86_division_cost
- ix86_shift_rotate_cost
- ix86_insn_cost
- vec_fp_conversion_cost
- unspec_pcmp_p
- ix86_rtx_costs
- x86_order_regs_for_local_alloc
- ix86_ms_bitfield_layout_p
- x86_this_parameter
- x86_can_output_mi_thunk
- x86_output_mi_thunk
- x86_file_start
- x86_field_alignment
- x86_print_call_or_nop
- current_fentry_name
- current_fentry_section
- x86_64_select_profile_regnum
- x86_function_profiler
- ix86_min_insn_size
- ix86_avoid_jump_mispredicts
- ix86_pad_returns
- ix86_count_insn_bb
- ix86_count_insn
- ix86_pad_short_function
- ix86_seh_fixup_eh_fallthru
- ix86_split_stlf_stall_load
- ix86_reorg
- x86_extended_QIreg_mentioned_p
- x86_extended_reg_mentioned_p
- x86_extended_rex2reg_mentioned_p
- x86_evex_reg_mentioned_p
- x86_maybe_negate_const_int
- x86_emit_floatuns
- ix86_invalid_conversion
- ix86_invalid_unary_op
- ix86_invalid_binary_op
- ix86_scalar_mode_supported_p
- ix86_libgcc_floating_mode_supported_p
- ix86_vector_mode_supported_p
- ix86_c_mode_for_suffix
- map_egpr_constraints
- ix86_md_asm_adjust
- ix86_encode_section_info
- ix86_reverse_condition
- output_387_reg_move
- ix86_mangle_type
- ix86_emit_support_tinfos
- ix86_tls_stack_chk_guard_decl
- ix86_stack_protect_guard
- ix86_stack_protect_runtime_enabled_p
- ix86_stack_protect_fail
- asm_preferred_eh_data_format
- ix86_builtin_vectorization_cost
- ix86_fn_abi_va_list
- ix86_canonical_va_list_type
- ix86_enum_va_list
- ix86_reassociation_width
- ix86_preferred_simd_mode
- ix86_autovectorize_vector_modes
- ix86_get_mask_mode
- ix86_spill_class
- ix86_max_noce_ifcvt_seq_cost
- ix86_noce_conversion_profitable_p
- ix86_vector_costs
- ix86_vector_costs
- ix86_vectorize_create_costs
- add_stmt_cost
- ix86_vect_estimate_reg_pressure
- finish_cost
- ix86_memmodel_check
- ix86_simd_clone_compute_vecsize_and_simdlen
- ix86_simd_clone_usable
- ix86_loop_unroll_adjust
- ix86_float_exceptions_rounding_supported_p
- ix86_atomic_assign_expand_fenv
- ix86_binds_local_p
- ix86_reloc_rw_mask
- symbolic_base_address_p
- base_address_p
- extract_base_offset_in_addr
- ix86_operands_ok_for_move_multiple
- ix86_optab_supported_p
- ix86_addr_space_zero_address_valid
- ix86_init_libfuncs
- ix86_get_excess_precision
- ix86_bitint_type_info
- ix86_c_mode_for_floating_type
- ix86_cxx_adjust_cdtor_callabi_fntype
- ix86_push_rounding
- ix86_memtag_can_tag_addresses
- ix86_memtag_tag_size
- ix86_memtag_set_tag
- ix86_memtag_extract_tag
- ix86_memtag_untagged_pointer
- ix86_memtag_add_tag
- ix86_have_ccmp
- ix86_mode_can_transfer_bits
- ix86_redzone_clobber
- ix86_test_dumping_hard_regs
- ix86_test_dumping_memory_blockage
- ix86_test_loading_dump_fragment_1
- ix86_test_loading_call_insn
- ix86_test_loading_full_dump
- ix86_test_loading_unspec
- ix86_run_selftests
- ix86_attribute_table
- ix86_libc_has_fast_function
- ix86_libm_function_max_error
Improve your Profiling and Debugging skills
Find out more