1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#define INCLUDE_STRING
21#define IN_TARGET_CODE 1
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "rtl.h"
28#include "tree.h"
29#include "memmodel.h"
30#include "gimple.h"
31#include "cfghooks.h"
32#include "cfgloop.h"
33#include "df.h"
34#include "tm_p.h"
35#include "stringpool.h"
36#include "expmed.h"
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "cgraph.h"
42#include "diagnostic.h"
43#include "cfgbuild.h"
44#include "alias.h"
45#include "fold-const.h"
46#include "attribs.h"
47#include "calls.h"
48#include "stor-layout.h"
49#include "varasm.h"
50#include "output.h"
51#include "insn-attr.h"
52#include "flags.h"
53#include "except.h"
54#include "explow.h"
55#include "expr.h"
56#include "cfgrtl.h"
57#include "common/common-target.h"
58#include "langhooks.h"
59#include "reload.h"
60#include "gimplify.h"
61#include "dwarf2.h"
62#include "tm-constrs.h"
63#include "cselib.h"
64#include "sched-int.h"
65#include "opts.h"
66#include "tree-pass.h"
67#include "context.h"
68#include "pass_manager.h"
69#include "target-globals.h"
70#include "gimple-iterator.h"
71#include "gimple-fold.h"
72#include "tree-vectorizer.h"
73#include "shrink-wrap.h"
74#include "builtins.h"
75#include "rtl-iter.h"
76#include "tree-iterator.h"
77#include "dbgcnt.h"
78#include "case-cfn-macros.h"
79#include "dojump.h"
80#include "fold-const-call.h"
81#include "tree-vrp.h"
82#include "tree-ssanames.h"
83#include "selftest.h"
84#include "selftest-rtl.h"
85#include "print-rtl.h"
86#include "intl.h"
87#include "ifcvt.h"
88#include "symbol-summary.h"
89#include "ipa-prop.h"
90#include "ipa-fnsummary.h"
91#include "wide-int-bitmask.h"
92#include "tree-vector-builder.h"
93#include "debug.h"
94#include "dwarf2out.h"
95#include "i386-options.h"
96#include "i386-builtins.h"
97#include "i386-expand.h"
98#include "i386-features.h"
99#include "function-abi.h"
100#include "rtl-error.h"
101
102/* This file should be included last. */
103#include "target-def.h"
104
105static rtx legitimize_dllimport_symbol (rtx, bool);
106static rtx legitimize_pe_coff_extern_decl (rtx, bool);
107static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
108static void ix86_emit_restore_reg_using_pop (rtx);
109
110
111#ifndef CHECK_STACK_LIMIT
112#define CHECK_STACK_LIMIT (-1)
113#endif
114
115/* Return index of given mode in mult and division cost tables. */
116#define MODE_INDEX(mode) \
117 ((mode) == QImode ? 0 \
118 : (mode) == HImode ? 1 \
119 : (mode) == SImode ? 2 \
120 : (mode) == DImode ? 3 \
121 : 4)
122
123
124/* Set by -mtune. */
125const struct processor_costs *ix86_tune_cost = NULL;
126
127/* Set by -mtune or -Os. */
128const struct processor_costs *ix86_cost = NULL;
129
130/* In case the average insn count for single function invocation is
131 lower than this constant, emit fast (but longer) prologue and
132 epilogue code. */
133#define FAST_PROLOGUE_INSN_COUNT 20
134
135/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
136static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
137static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
138static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
139
140/* Array of the smallest class containing reg number REGNO, indexed by
141 REGNO. Used by REGNO_REG_CLASS in i386.h. */
142
143enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
144{
145 /* ax, dx, cx, bx */
146 AREG, DREG, CREG, BREG,
147 /* si, di, bp, sp */
148 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
149 /* FP registers */
150 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
151 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
152 /* arg pointer, flags, fpsr, frame */
153 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
154 /* SSE registers */
155 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
156 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
157 /* MMX registers */
158 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
159 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 /* REX registers */
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 /* SSE REX registers */
164 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
165 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 /* AVX-512 SSE registers */
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 /* Mask registers. */
172 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
173 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 /* REX2 registers */
175 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
176 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179};
180
181/* The "default" register map used in 32bit mode. */
182
183int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
184{
185 /* general regs */
186 0, 2, 1, 3, 6, 7, 4, 5,
187 /* fp regs */
188 12, 13, 14, 15, 16, 17, 18, 19,
189 /* arg, flags, fpsr, frame */
190 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
191 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 /* SSE */
193 21, 22, 23, 24, 25, 26, 27, 28,
194 /* MMX */
195 29, 30, 31, 32, 33, 34, 35, 36,
196 /* extended integer registers */
197 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 /* extended sse registers */
200 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
201 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 /* AVX-512 registers 16-23 */
203 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
204 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 /* AVX-512 registers 24-31 */
206 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
207 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 /* Mask registers */
209 93, 94, 95, 96, 97, 98, 99, 100
210};
211
212/* The "default" register map used in 64bit mode. */
213
214int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
215{
216 /* general regs */
217 0, 1, 2, 3, 4, 5, 6, 7,
218 /* fp regs */
219 33, 34, 35, 36, 37, 38, 39, 40,
220 /* arg, flags, fpsr, frame */
221 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
222 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 /* SSE */
224 17, 18, 19, 20, 21, 22, 23, 24,
225 /* MMX */
226 41, 42, 43, 44, 45, 46, 47, 48,
227 /* extended integer registers */
228 8, 9, 10, 11, 12, 13, 14, 15,
229 /* extended SSE registers */
230 25, 26, 27, 28, 29, 30, 31, 32,
231 /* AVX-512 registers 16-23 */
232 67, 68, 69, 70, 71, 72, 73, 74,
233 /* AVX-512 registers 24-31 */
234 75, 76, 77, 78, 79, 80, 81, 82,
235 /* Mask registers */
236 118, 119, 120, 121, 122, 123, 124, 125,
237 /* rex2 extend interger registers */
238 130, 131, 132, 133, 134, 135, 136, 137,
239 138, 139, 140, 141, 142, 143, 144, 145
240};
241
242/* Define the register numbers to be used in Dwarf debugging information.
243 The SVR4 reference port C compiler uses the following register numbers
244 in its Dwarf output code:
245 0 for %eax (gcc regno = 0)
246 1 for %ecx (gcc regno = 2)
247 2 for %edx (gcc regno = 1)
248 3 for %ebx (gcc regno = 3)
249 4 for %esp (gcc regno = 7)
250 5 for %ebp (gcc regno = 6)
251 6 for %esi (gcc regno = 4)
252 7 for %edi (gcc regno = 5)
253 The following three DWARF register numbers are never generated by
254 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
255 believed these numbers have these meanings.
256 8 for %eip (no gcc equivalent)
257 9 for %eflags (gcc regno = 17)
258 10 for %trapno (no gcc equivalent)
259 It is not at all clear how we should number the FP stack registers
260 for the x86 architecture. If the version of SDB on x86/svr4 were
261 a bit less brain dead with respect to floating-point then we would
262 have a precedent to follow with respect to DWARF register numbers
263 for x86 FP registers, but the SDB on x86/svr4 was so completely
264 broken with respect to FP registers that it is hardly worth thinking
265 of it as something to strive for compatibility with.
266 The version of x86/svr4 SDB I had does (partially)
267 seem to believe that DWARF register number 11 is associated with
268 the x86 register %st(0), but that's about all. Higher DWARF
269 register numbers don't seem to be associated with anything in
270 particular, and even for DWARF regno 11, SDB only seemed to under-
271 stand that it should say that a variable lives in %st(0) (when
272 asked via an `=' command) if we said it was in DWARF regno 11,
273 but SDB still printed garbage when asked for the value of the
274 variable in question (via a `/' command).
275 (Also note that the labels SDB printed for various FP stack regs
276 when doing an `x' command were all wrong.)
277 Note that these problems generally don't affect the native SVR4
278 C compiler because it doesn't allow the use of -O with -g and
279 because when it is *not* optimizing, it allocates a memory
280 location for each floating-point variable, and the memory
281 location is what gets described in the DWARF AT_location
282 attribute for the variable in question.
283 Regardless of the severe mental illness of the x86/svr4 SDB, we
284 do something sensible here and we use the following DWARF
285 register numbers. Note that these are all stack-top-relative
286 numbers.
287 11 for %st(0) (gcc regno = 8)
288 12 for %st(1) (gcc regno = 9)
289 13 for %st(2) (gcc regno = 10)
290 14 for %st(3) (gcc regno = 11)
291 15 for %st(4) (gcc regno = 12)
292 16 for %st(5) (gcc regno = 13)
293 17 for %st(6) (gcc regno = 14)
294 18 for %st(7) (gcc regno = 15)
295*/
296int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
297{
298 /* general regs */
299 0, 2, 1, 3, 6, 7, 5, 4,
300 /* fp regs */
301 11, 12, 13, 14, 15, 16, 17, 18,
302 /* arg, flags, fpsr, frame */
303 IGNORED_DWARF_REGNUM, 9,
304 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
305 /* SSE registers */
306 21, 22, 23, 24, 25, 26, 27, 28,
307 /* MMX registers */
308 29, 30, 31, 32, 33, 34, 35, 36,
309 /* extended integer registers */
310 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
311 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 /* extended sse registers */
313 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
314 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 /* AVX-512 registers 16-23 */
316 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
317 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 /* AVX-512 registers 24-31 */
319 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
320 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 /* Mask registers */
322 93, 94, 95, 96, 97, 98, 99, 100
323};
324
325/* Define parameter passing and return registers. */
326
327static int const x86_64_int_parameter_registers[6] =
328{
329 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
330};
331
332static int const x86_64_ms_abi_int_parameter_registers[4] =
333{
334 CX_REG, DX_REG, R8_REG, R9_REG
335};
336
337static int const x86_64_int_return_registers[4] =
338{
339 AX_REG, DX_REG, DI_REG, SI_REG
340};
341
342/* Define the structure for the machine field in struct function. */
343
344struct GTY(()) stack_local_entry {
345 unsigned short mode;
346 unsigned short n;
347 rtx rtl;
348 struct stack_local_entry *next;
349};
350
351/* Which cpu are we scheduling for. */
352enum attr_cpu ix86_schedule;
353
354/* Which cpu are we optimizing for. */
355enum processor_type ix86_tune;
356
357/* Which instruction set architecture to use. */
358enum processor_type ix86_arch;
359
360/* True if processor has SSE prefetch instruction. */
361unsigned char ix86_prefetch_sse;
362
363/* Preferred alignment for stack boundary in bits. */
364unsigned int ix86_preferred_stack_boundary;
365
366/* Alignment for incoming stack boundary in bits specified at
367 command line. */
368unsigned int ix86_user_incoming_stack_boundary;
369
370/* Default alignment for incoming stack boundary in bits. */
371unsigned int ix86_default_incoming_stack_boundary;
372
373/* Alignment for incoming stack boundary in bits. */
374unsigned int ix86_incoming_stack_boundary;
375
376/* True if there is no direct access to extern symbols. */
377bool ix86_has_no_direct_extern_access;
378
379/* Calling abi specific va_list type nodes. */
380tree sysv_va_list_type_node;
381tree ms_va_list_type_node;
382
383/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
384char internal_label_prefix[16];
385int internal_label_prefix_len;
386
387/* Fence to use after loop using movnt. */
388tree x86_mfence;
389
390/* Register class used for passing given 64bit part of the argument.
391 These represent classes as documented by the PS ABI, with the exception
392 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
393 use SF or DFmode move instead of DImode to avoid reformatting penalties.
394
395 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
396 whenever possible (upper half does contain padding). */
397enum x86_64_reg_class
398 {
399 X86_64_NO_CLASS,
400 X86_64_INTEGER_CLASS,
401 X86_64_INTEGERSI_CLASS,
402 X86_64_SSE_CLASS,
403 X86_64_SSEHF_CLASS,
404 X86_64_SSESF_CLASS,
405 X86_64_SSEDF_CLASS,
406 X86_64_SSEUP_CLASS,
407 X86_64_X87_CLASS,
408 X86_64_X87UP_CLASS,
409 X86_64_COMPLEX_X87_CLASS,
410 X86_64_MEMORY_CLASS
411 };
412
413#define MAX_CLASSES 8
414
415/* Table of constants used by fldpi, fldln2, etc.... */
416static REAL_VALUE_TYPE ext_80387_constants_table [5];
417static bool ext_80387_constants_init;
418
419
420static rtx ix86_function_value (const_tree, const_tree, bool);
421static bool ix86_function_value_regno_p (const unsigned int);
422static unsigned int ix86_function_arg_boundary (machine_mode,
423 const_tree);
424static rtx ix86_static_chain (const_tree, bool);
425static int ix86_function_regparm (const_tree, const_tree);
426static void ix86_compute_frame_layout (void);
427static tree ix86_canonical_va_list_type (tree);
428static unsigned int split_stack_prologue_scratch_regno (void);
429static bool i386_asm_output_addr_const_extra (FILE *, rtx);
430
431static bool ix86_can_inline_p (tree, tree);
432static unsigned int ix86_minimum_incoming_stack_boundary (bool);
433
434
435/* Whether -mtune= or -march= were specified */
436int ix86_tune_defaulted;
437int ix86_arch_specified;
438
439/* Return true if a red-zone is in use. We can't use red-zone when
440 there are local indirect jumps, like "indirect_jump" or "tablejump",
441 which jumps to another place in the function, since "call" in the
442 indirect thunk pushes the return address onto stack, destroying
443 red-zone.
444
445 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
446 for CALL, in red-zone, we can allow local indirect jumps with
447 indirect thunk. */
448
449bool
450ix86_using_red_zone (void)
451{
452 return (TARGET_RED_ZONE
453 && !TARGET_64BIT_MS_ABI
454 && (!cfun->machine->has_local_indirect_jump
455 || cfun->machine->indirect_branch_type == indirect_branch_keep));
456}
457
458/* Return true, if profiling code should be emitted before
459 prologue. Otherwise it returns false.
460 Note: For x86 with "hotfix" it is sorried. */
461static bool
462ix86_profile_before_prologue (void)
463{
464 return flag_fentry != 0;
465}
466
467/* Update register usage after having seen the compiler flags. */
468
469static void
470ix86_conditional_register_usage (void)
471{
472 int i, c_mask;
473
474 /* If there are no caller-saved registers, preserve all registers.
475 except fixed_regs and registers used for function return value
476 since aggregate_value_p checks call_used_regs[regno] on return
477 value. */
478 if (cfun && cfun->machine->no_caller_saved_registers)
479 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
480 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
481 call_used_regs[i] = 0;
482
483 /* For 32-bit targets, disable the REX registers. */
484 if (! TARGET_64BIT)
485 {
486 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
487 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
488 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
489 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
490 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
491 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
492 }
493
494 /* See the definition of CALL_USED_REGISTERS in i386.h. */
495 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
496
497 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
498
499 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
500 {
501 /* Set/reset conditionally defined registers from
502 CALL_USED_REGISTERS initializer. */
503 if (call_used_regs[i] > 1)
504 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
505
506 /* Calculate registers of CLOBBERED_REGS register set
507 as call used registers from GENERAL_REGS register set. */
508 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], bit: i)
509 && call_used_regs[i])
510 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], bit: i);
511 }
512
513 /* If MMX is disabled, disable the registers. */
514 if (! TARGET_MMX)
515 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
516
517 /* If SSE is disabled, disable the registers. */
518 if (! TARGET_SSE)
519 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
520
521 /* If the FPU is disabled, disable the registers. */
522 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
523 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
524
525 /* If AVX512F is disabled, disable the registers. */
526 if (! TARGET_AVX512F)
527 {
528 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
529 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
530
531 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
532 }
533
534 /* If APX is disabled, disable the registers. */
535 if (! (TARGET_APX_EGPR && TARGET_64BIT))
536 {
537 for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
538 CLEAR_HARD_REG_BIT (accessible_reg_set, bit: i);
539 }
540}
541
542/* Canonicalize a comparison from one we don't have to one we do have. */
543
544static void
545ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
546 bool op0_preserve_value)
547{
548 /* The order of operands in x87 ficom compare is forced by combine in
549 simplify_comparison () function. Float operator is treated as RTX_OBJ
550 with a precedence over other operators and is always put in the first
551 place. Swap condition and operands to match ficom instruction. */
552 if (!op0_preserve_value
553 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
554 {
555 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
556
557 /* We are called only for compares that are split to SAHF instruction.
558 Ensure that we have setcc/jcc insn for the swapped condition. */
559 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
560 {
561 std::swap (a&: *op0, b&: *op1);
562 *code = (int) scode;
563 }
564 }
565}
566
567
568/* Hook to determine if one function can safely inline another. */
569
570static bool
571ix86_can_inline_p (tree caller, tree callee)
572{
573 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
574 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
575
576 /* Changes of those flags can be tolerated for always inlines. Lets hope
577 user knows what he is doing. */
578 unsigned HOST_WIDE_INT always_inline_safe_mask
579 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
580 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
581 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
582 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
583 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
584 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
585 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
586
587
588 if (!callee_tree)
589 callee_tree = target_option_default_node;
590 if (!caller_tree)
591 caller_tree = target_option_default_node;
592 if (callee_tree == caller_tree)
593 return true;
594
595 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
596 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
597 bool ret = false;
598 bool always_inline
599 = (DECL_DISREGARD_INLINE_LIMITS (callee)
600 && lookup_attribute (attr_name: "always_inline",
601 DECL_ATTRIBUTES (callee)));
602
603 /* If callee only uses GPRs, ignore MASK_80387. */
604 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
605 always_inline_safe_mask |= MASK_80387;
606
607 cgraph_node *callee_node = cgraph_node::get (decl: callee);
608 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
609 function can inline a SSE2 function but a SSE2 function can't inline
610 a SSE4 function. */
611 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
612 != callee_opts->x_ix86_isa_flags)
613 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
614 != callee_opts->x_ix86_isa_flags2))
615 ret = false;
616
617 /* See if we have the same non-isa options. */
618 else if ((!always_inline
619 && caller_opts->x_target_flags != callee_opts->x_target_flags)
620 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
621 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
622 ret = false;
623
624 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
625 /* If the calle doesn't use FP expressions differences in
626 ix86_fpmath can be ignored. We are called from FEs
627 for multi-versioning call optimization, so beware of
628 ipa_fn_summaries not available. */
629 && (! ipa_fn_summaries
630 || ipa_fn_summaries->get (node: callee_node) == NULL
631 || ipa_fn_summaries->get (node: callee_node)->fp_expressions))
632 ret = false;
633
634 /* At this point we cannot identify whether arch or tune setting
635 comes from target attribute or not. So the most conservative way
636 is to allow the callee that uses default arch and tune string to
637 be inlined. */
638 else if (!strcmp (s1: callee_opts->x_ix86_arch_string, s2: "x86-64")
639 && !strcmp (s1: callee_opts->x_ix86_tune_string, s2: "generic"))
640 ret = true;
641
642 /* See if arch, tune, etc. are the same. As previous ISA flags already
643 checks if callee's ISA is subset of caller's, do not block
644 always_inline attribute for callee even it has different arch. */
645 else if (!always_inline && caller_opts->arch != callee_opts->arch)
646 ret = false;
647
648 else if (!always_inline && caller_opts->tune != callee_opts->tune)
649 ret = false;
650
651 else if (!always_inline
652 && caller_opts->branch_cost != callee_opts->branch_cost)
653 ret = false;
654
655 else
656 ret = true;
657
658 return ret;
659}
660
661/* Return true if this goes in large data/bss. */
662
663static bool
664ix86_in_large_data_p (tree exp)
665{
666 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
667 && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
668 return false;
669
670 if (exp == NULL_TREE)
671 return false;
672
673 /* Functions are never large data. */
674 if (TREE_CODE (exp) == FUNCTION_DECL)
675 return false;
676
677 /* Automatic variables are never large data. */
678 if (VAR_P (exp) && !is_global_var (t: exp))
679 return false;
680
681 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
682 {
683 const char *section = DECL_SECTION_NAME (exp);
684 if (strcmp (s1: section, s2: ".ldata") == 0
685 || strcmp (s1: section, s2: ".lbss") == 0)
686 return true;
687 return false;
688 }
689 else
690 {
691 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
692
693 /* If this is an incomplete type with size 0, then we can't put it
694 in data because it might be too big when completed. Also,
695 int_size_in_bytes returns -1 if size can vary or is larger than
696 an integer in which case also it is safer to assume that it goes in
697 large data. */
698 if (size <= 0 || size > ix86_section_threshold)
699 return true;
700 }
701
702 return false;
703}
704
705/* i386-specific section flag to mark large sections. */
706#define SECTION_LARGE SECTION_MACH_DEP
707
708/* Switch to the appropriate section for output of DECL.
709 DECL is either a `VAR_DECL' node or a constant of some sort.
710 RELOC indicates whether forming the initial value of DECL requires
711 link-time relocations. */
712
713ATTRIBUTE_UNUSED static section *
714x86_64_elf_select_section (tree decl, int reloc,
715 unsigned HOST_WIDE_INT align)
716{
717 if (ix86_in_large_data_p (exp: decl))
718 {
719 const char *sname = NULL;
720 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
721 switch (categorize_decl_for_section (decl, reloc))
722 {
723 case SECCAT_DATA:
724 sname = ".ldata";
725 break;
726 case SECCAT_DATA_REL:
727 sname = ".ldata.rel";
728 break;
729 case SECCAT_DATA_REL_LOCAL:
730 sname = ".ldata.rel.local";
731 break;
732 case SECCAT_DATA_REL_RO:
733 sname = ".ldata.rel.ro";
734 break;
735 case SECCAT_DATA_REL_RO_LOCAL:
736 sname = ".ldata.rel.ro.local";
737 break;
738 case SECCAT_BSS:
739 sname = ".lbss";
740 flags |= SECTION_BSS;
741 break;
742 case SECCAT_RODATA:
743 case SECCAT_RODATA_MERGE_STR:
744 case SECCAT_RODATA_MERGE_STR_INIT:
745 case SECCAT_RODATA_MERGE_CONST:
746 sname = ".lrodata";
747 flags &= ~SECTION_WRITE;
748 break;
749 case SECCAT_SRODATA:
750 case SECCAT_SDATA:
751 case SECCAT_SBSS:
752 gcc_unreachable ();
753 case SECCAT_TEXT:
754 case SECCAT_TDATA:
755 case SECCAT_TBSS:
756 /* We don't split these for medium model. Place them into
757 default sections and hope for best. */
758 break;
759 }
760 if (sname)
761 {
762 /* We might get called with string constants, but get_named_section
763 doesn't like them as they are not DECLs. Also, we need to set
764 flags in that case. */
765 if (!DECL_P (decl))
766 return get_section (sname, flags, NULL);
767 return get_named_section (decl, sname, reloc);
768 }
769 }
770 return default_elf_select_section (decl, reloc, align);
771}
772
773/* Select a set of attributes for section NAME based on the properties
774 of DECL and whether or not RELOC indicates that DECL's initializer
775 might contain runtime relocations. */
776
777static unsigned int ATTRIBUTE_UNUSED
778x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
779{
780 unsigned int flags = default_section_type_flags (decl, name, reloc);
781
782 if (ix86_in_large_data_p (exp: decl))
783 flags |= SECTION_LARGE;
784
785 if (decl == NULL_TREE
786 && (strcmp (s1: name, s2: ".ldata.rel.ro") == 0
787 || strcmp (s1: name, s2: ".ldata.rel.ro.local") == 0))
788 flags |= SECTION_RELRO;
789
790 if (strcmp (s1: name, s2: ".lbss") == 0
791 || startswith (str: name, prefix: ".lbss.")
792 || startswith (str: name, prefix: ".gnu.linkonce.lb."))
793 flags |= SECTION_BSS;
794
795 return flags;
796}
797
798/* Build up a unique section name, expressed as a
799 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
800 RELOC indicates whether the initial value of EXP requires
801 link-time relocations. */
802
803static void ATTRIBUTE_UNUSED
804x86_64_elf_unique_section (tree decl, int reloc)
805{
806 if (ix86_in_large_data_p (exp: decl))
807 {
808 const char *prefix = NULL;
809 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
810 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
811
812 switch (categorize_decl_for_section (decl, reloc))
813 {
814 case SECCAT_DATA:
815 case SECCAT_DATA_REL:
816 case SECCAT_DATA_REL_LOCAL:
817 case SECCAT_DATA_REL_RO:
818 case SECCAT_DATA_REL_RO_LOCAL:
819 prefix = one_only ? ".ld" : ".ldata";
820 break;
821 case SECCAT_BSS:
822 prefix = one_only ? ".lb" : ".lbss";
823 break;
824 case SECCAT_RODATA:
825 case SECCAT_RODATA_MERGE_STR:
826 case SECCAT_RODATA_MERGE_STR_INIT:
827 case SECCAT_RODATA_MERGE_CONST:
828 prefix = one_only ? ".lr" : ".lrodata";
829 break;
830 case SECCAT_SRODATA:
831 case SECCAT_SDATA:
832 case SECCAT_SBSS:
833 gcc_unreachable ();
834 case SECCAT_TEXT:
835 case SECCAT_TDATA:
836 case SECCAT_TBSS:
837 /* We don't split these for medium model. Place them into
838 default sections and hope for best. */
839 break;
840 }
841 if (prefix)
842 {
843 const char *name, *linkonce;
844 char *string;
845
846 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
847 name = targetm.strip_name_encoding (name);
848
849 /* If we're using one_only, then there needs to be a .gnu.linkonce
850 prefix to the section name. */
851 linkonce = one_only ? ".gnu.linkonce" : "";
852
853 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
854
855 set_decl_section_name (decl, string);
856 return;
857 }
858 }
859 default_unique_section (decl, reloc);
860}
861
862#ifdef COMMON_ASM_OP
863
864#ifndef LARGECOMM_SECTION_ASM_OP
865#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
866#endif
867
868/* This says how to output assembler code to declare an
869 uninitialized external linkage data object.
870
871 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
872 large objects. */
873void
874x86_elf_aligned_decl_common (FILE *file, tree decl,
875 const char *name, unsigned HOST_WIDE_INT size,
876 unsigned align)
877{
878 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
879 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
880 && size > (unsigned int)ix86_section_threshold)
881 {
882 switch_to_section (get_named_section (decl, ".lbss", 0));
883 fputs (LARGECOMM_SECTION_ASM_OP, stream: file);
884 }
885 else
886 fputs (COMMON_ASM_OP, stream: file);
887 assemble_name (file, name);
888 fprintf (stream: file, format: "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
889 size, align / BITS_PER_UNIT);
890}
891#endif
892
893/* Utility function for targets to use in implementing
894 ASM_OUTPUT_ALIGNED_BSS. */
895
896void
897x86_output_aligned_bss (FILE *file, tree decl, const char *name,
898 unsigned HOST_WIDE_INT size, unsigned align)
899{
900 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
901 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
902 && size > (unsigned int)ix86_section_threshold)
903 switch_to_section (get_named_section (decl, ".lbss", 0));
904 else
905 switch_to_section (bss_section);
906 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
907#ifdef ASM_DECLARE_OBJECT_NAME
908 last_assemble_variable_decl = decl;
909 ASM_DECLARE_OBJECT_NAME (file, name, decl);
910#else
911 /* Standard thing is just output label for the object. */
912 ASM_OUTPUT_LABEL (file, name);
913#endif /* ASM_DECLARE_OBJECT_NAME */
914 ASM_OUTPUT_SKIP (file, size ? size : 1);
915}
916
917/* Decide whether we must probe the stack before any space allocation
918 on this target. It's essentially TARGET_STACK_PROBE except when
919 -fstack-check causes the stack to be already probed differently. */
920
921bool
922ix86_target_stack_probe (void)
923{
924 /* Do not probe the stack twice if static stack checking is enabled. */
925 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
926 return false;
927
928 return TARGET_STACK_PROBE;
929}
930
931/* Decide whether we can make a sibling call to a function. DECL is the
932 declaration of the function being targeted by the call and EXP is the
933 CALL_EXPR representing the call. */
934
935static bool
936ix86_function_ok_for_sibcall (tree decl, tree exp)
937{
938 tree type, decl_or_type;
939 rtx a, b;
940 bool bind_global = decl && !targetm.binds_local_p (decl);
941
942 if (ix86_function_naked (fn: current_function_decl))
943 return false;
944
945 /* Sibling call isn't OK if there are no caller-saved registers
946 since all registers must be preserved before return. */
947 if (cfun->machine->no_caller_saved_registers)
948 return false;
949
950 /* If we are generating position-independent code, we cannot sibcall
951 optimize direct calls to global functions, as the PLT requires
952 %ebx be live. (Darwin does not have a PLT.) */
953 if (!TARGET_MACHO
954 && !TARGET_64BIT
955 && flag_pic
956 && flag_plt
957 && bind_global)
958 return false;
959
960 /* If we need to align the outgoing stack, then sibcalling would
961 unalign the stack, which may break the called function. */
962 if (ix86_minimum_incoming_stack_boundary (true)
963 < PREFERRED_STACK_BOUNDARY)
964 return false;
965
966 if (decl)
967 {
968 decl_or_type = decl;
969 type = TREE_TYPE (decl);
970 }
971 else
972 {
973 /* We're looking at the CALL_EXPR, we need the type of the function. */
974 type = CALL_EXPR_FN (exp); /* pointer expression */
975 type = TREE_TYPE (type); /* pointer type */
976 type = TREE_TYPE (type); /* function type */
977 decl_or_type = type;
978 }
979
980 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
981 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
982 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
983 || (REG_PARM_STACK_SPACE (decl_or_type)
984 != REG_PARM_STACK_SPACE (current_function_decl)))
985 {
986 maybe_complain_about_tail_call (exp,
987 "inconsistent size of stack space"
988 " allocated for arguments which are"
989 " passed in registers");
990 return false;
991 }
992
993 /* Check that the return value locations are the same. Like
994 if we are returning floats on the 80387 register stack, we cannot
995 make a sibcall from a function that doesn't return a float to a
996 function that does or, conversely, from a function that does return
997 a float to a function that doesn't; the necessary stack adjustment
998 would not be executed. This is also the place we notice
999 differences in the return value ABI. Note that it is ok for one
1000 of the functions to have void return type as long as the return
1001 value of the other is passed in a register. */
1002 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1003 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1004 cfun->decl, false);
1005 if (STACK_REG_P (a) || STACK_REG_P (b))
1006 {
1007 if (!rtx_equal_p (a, b))
1008 return false;
1009 }
1010 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1011 ;
1012 else if (!rtx_equal_p (a, b))
1013 return false;
1014
1015 if (TARGET_64BIT)
1016 {
1017 /* The SYSV ABI has more call-clobbered registers;
1018 disallow sibcalls from MS to SYSV. */
1019 if (cfun->machine->call_abi == MS_ABI
1020 && ix86_function_type_abi (type) == SYSV_ABI)
1021 return false;
1022 }
1023 else
1024 {
1025 /* If this call is indirect, we'll need to be able to use a
1026 call-clobbered register for the address of the target function.
1027 Make sure that all such registers are not used for passing
1028 parameters. Note that DLLIMPORT functions and call to global
1029 function via GOT slot are indirect. */
1030 if (!decl
1031 || (bind_global && flag_pic && !flag_plt)
1032 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1033 || flag_force_indirect_call)
1034 {
1035 /* Check if regparm >= 3 since arg_reg_available is set to
1036 false if regparm == 0. If regparm is 1 or 2, there is
1037 always a call-clobbered register available.
1038
1039 ??? The symbol indirect call doesn't need a call-clobbered
1040 register. But we don't know if this is a symbol indirect
1041 call or not here. */
1042 if (ix86_function_regparm (type, decl) >= 3
1043 && !cfun->machine->arg_reg_available)
1044 return false;
1045 }
1046 }
1047
1048 if (decl && ix86_use_pseudo_pic_reg ())
1049 {
1050 /* When PIC register is used, it must be restored after ifunc
1051 function returns. */
1052 cgraph_node *node = cgraph_node::get (decl);
1053 if (node && node->ifunc_resolver)
1054 return false;
1055 }
1056
1057 /* Disable sibcall if callee has indirect_return attribute and
1058 caller doesn't since callee will return to the caller's caller
1059 via an indirect jump. */
1060 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1061 == (CF_RETURN | CF_BRANCH))
1062 && lookup_attribute (attr_name: "indirect_return", TYPE_ATTRIBUTES (type))
1063 && !lookup_attribute (attr_name: "indirect_return",
1064 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1065 return false;
1066
1067 /* Otherwise okay. That also includes certain types of indirect calls. */
1068 return true;
1069}
1070
1071/* This function determines from TYPE the calling-convention. */
1072
1073unsigned int
1074ix86_get_callcvt (const_tree type)
1075{
1076 unsigned int ret = 0;
1077 bool is_stdarg;
1078 tree attrs;
1079
1080 if (TARGET_64BIT)
1081 return IX86_CALLCVT_CDECL;
1082
1083 attrs = TYPE_ATTRIBUTES (type);
1084 if (attrs != NULL_TREE)
1085 {
1086 if (lookup_attribute (attr_name: "cdecl", list: attrs))
1087 ret |= IX86_CALLCVT_CDECL;
1088 else if (lookup_attribute (attr_name: "stdcall", list: attrs))
1089 ret |= IX86_CALLCVT_STDCALL;
1090 else if (lookup_attribute (attr_name: "fastcall", list: attrs))
1091 ret |= IX86_CALLCVT_FASTCALL;
1092 else if (lookup_attribute (attr_name: "thiscall", list: attrs))
1093 ret |= IX86_CALLCVT_THISCALL;
1094
1095 /* Regparam isn't allowed for thiscall and fastcall. */
1096 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1097 {
1098 if (lookup_attribute (attr_name: "regparm", list: attrs))
1099 ret |= IX86_CALLCVT_REGPARM;
1100 if (lookup_attribute (attr_name: "sseregparm", list: attrs))
1101 ret |= IX86_CALLCVT_SSEREGPARM;
1102 }
1103
1104 if (IX86_BASE_CALLCVT(ret) != 0)
1105 return ret;
1106 }
1107
1108 is_stdarg = stdarg_p (type);
1109 if (TARGET_RTD && !is_stdarg)
1110 return IX86_CALLCVT_STDCALL | ret;
1111
1112 if (ret != 0
1113 || is_stdarg
1114 || TREE_CODE (type) != METHOD_TYPE
1115 || ix86_function_type_abi (type) != MS_ABI)
1116 return IX86_CALLCVT_CDECL | ret;
1117
1118 return IX86_CALLCVT_THISCALL;
1119}
1120
1121/* Return 0 if the attributes for two types are incompatible, 1 if they
1122 are compatible, and 2 if they are nearly compatible (which causes a
1123 warning to be generated). */
1124
1125static int
1126ix86_comp_type_attributes (const_tree type1, const_tree type2)
1127{
1128 unsigned int ccvt1, ccvt2;
1129
1130 if (TREE_CODE (type1) != FUNCTION_TYPE
1131 && TREE_CODE (type1) != METHOD_TYPE)
1132 return 1;
1133
1134 ccvt1 = ix86_get_callcvt (type: type1);
1135 ccvt2 = ix86_get_callcvt (type: type2);
1136 if (ccvt1 != ccvt2)
1137 return 0;
1138 if (ix86_function_regparm (type1, NULL)
1139 != ix86_function_regparm (type2, NULL))
1140 return 0;
1141
1142 return 1;
1143}
1144
1145/* Return the regparm value for a function with the indicated TYPE and DECL.
1146 DECL may be NULL when calling function indirectly
1147 or considering a libcall. */
1148
1149static int
1150ix86_function_regparm (const_tree type, const_tree decl)
1151{
1152 tree attr;
1153 int regparm;
1154 unsigned int ccvt;
1155
1156 if (TARGET_64BIT)
1157 return (ix86_function_type_abi (type) == SYSV_ABI
1158 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1159 ccvt = ix86_get_callcvt (type);
1160 regparm = ix86_regparm;
1161
1162 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1163 {
1164 attr = lookup_attribute (attr_name: "regparm", TYPE_ATTRIBUTES (type));
1165 if (attr)
1166 {
1167 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1168 return regparm;
1169 }
1170 }
1171 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1172 return 2;
1173 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1174 return 1;
1175
1176 /* Use register calling convention for local functions when possible. */
1177 if (decl
1178 && TREE_CODE (decl) == FUNCTION_DECL)
1179 {
1180 cgraph_node *target = cgraph_node::get (decl);
1181 if (target)
1182 target = target->function_symbol ();
1183
1184 /* Caller and callee must agree on the calling convention, so
1185 checking here just optimize means that with
1186 __attribute__((optimize (...))) caller could use regparm convention
1187 and callee not, or vice versa. Instead look at whether the callee
1188 is optimized or not. */
1189 if (target && opt_for_fn (target->decl, optimize)
1190 && !(profile_flag && !flag_fentry))
1191 {
1192 if (target->local && target->can_change_signature)
1193 {
1194 int local_regparm, globals = 0, regno;
1195
1196 /* Make sure no regparm register is taken by a
1197 fixed register variable. */
1198 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1199 local_regparm++)
1200 if (fixed_regs[local_regparm])
1201 break;
1202
1203 /* We don't want to use regparm(3) for nested functions as
1204 these use a static chain pointer in the third argument. */
1205 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1206 local_regparm = 2;
1207
1208 /* Save a register for the split stack. */
1209 if (flag_split_stack)
1210 {
1211 if (local_regparm == 3)
1212 local_regparm = 2;
1213 else if (local_regparm == 2
1214 && DECL_STATIC_CHAIN (target->decl))
1215 local_regparm = 1;
1216 }
1217
1218 /* Each fixed register usage increases register pressure,
1219 so less registers should be used for argument passing.
1220 This functionality can be overriden by an explicit
1221 regparm value. */
1222 for (regno = AX_REG; regno <= DI_REG; regno++)
1223 if (fixed_regs[regno])
1224 globals++;
1225
1226 local_regparm
1227 = globals < local_regparm ? local_regparm - globals : 0;
1228
1229 if (local_regparm > regparm)
1230 regparm = local_regparm;
1231 }
1232 }
1233 }
1234
1235 return regparm;
1236}
1237
1238/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1239 DFmode (2) arguments in SSE registers for a function with the
1240 indicated TYPE and DECL. DECL may be NULL when calling function
1241 indirectly or considering a libcall. Return -1 if any FP parameter
1242 should be rejected by error. This is used in siutation we imply SSE
1243 calling convetion but the function is called from another function with
1244 SSE disabled. Otherwise return 0. */
1245
1246static int
1247ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1248{
1249 gcc_assert (!TARGET_64BIT);
1250
1251 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1252 by the sseregparm attribute. */
1253 if (TARGET_SSEREGPARM
1254 || (type && lookup_attribute (attr_name: "sseregparm", TYPE_ATTRIBUTES (type))))
1255 {
1256 if (!TARGET_SSE)
1257 {
1258 if (warn)
1259 {
1260 if (decl)
1261 error ("calling %qD with attribute sseregparm without "
1262 "SSE/SSE2 enabled", decl);
1263 else
1264 error ("calling %qT with attribute sseregparm without "
1265 "SSE/SSE2 enabled", type);
1266 }
1267 return 0;
1268 }
1269
1270 return 2;
1271 }
1272
1273 if (!decl)
1274 return 0;
1275
1276 cgraph_node *target = cgraph_node::get (decl);
1277 if (target)
1278 target = target->function_symbol ();
1279
1280 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1281 (and DFmode for SSE2) arguments in SSE registers. */
1282 if (target
1283 /* TARGET_SSE_MATH */
1284 && (target_opts_for_fn (fndecl: target->decl)->x_ix86_fpmath & FPMATH_SSE)
1285 && opt_for_fn (target->decl, optimize)
1286 && !(profile_flag && !flag_fentry))
1287 {
1288 if (target->local && target->can_change_signature)
1289 {
1290 /* Refuse to produce wrong code when local function with SSE enabled
1291 is called from SSE disabled function.
1292 FIXME: We need a way to detect these cases cross-ltrans partition
1293 and avoid using SSE calling conventions on local functions called
1294 from function with SSE disabled. For now at least delay the
1295 warning until we know we are going to produce wrong code.
1296 See PR66047 */
1297 if (!TARGET_SSE && warn)
1298 return -1;
1299 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1300 ->x_ix86_isa_flags) ? 2 : 1;
1301 }
1302 }
1303
1304 return 0;
1305}
1306
1307/* Return true if EAX is live at the start of the function. Used by
1308 ix86_expand_prologue to determine if we need special help before
1309 calling allocate_stack_worker. */
1310
1311static bool
1312ix86_eax_live_at_start_p (void)
1313{
1314 /* Cheat. Don't bother working forward from ix86_function_regparm
1315 to the function type to whether an actual argument is located in
1316 eax. Instead just look at cfg info, which is still close enough
1317 to correct at this point. This gives false positives for broken
1318 functions that might use uninitialized data that happens to be
1319 allocated in eax, but who cares? */
1320 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1321}
1322
1323static bool
1324ix86_keep_aggregate_return_pointer (tree fntype)
1325{
1326 tree attr;
1327
1328 if (!TARGET_64BIT)
1329 {
1330 attr = lookup_attribute (attr_name: "callee_pop_aggregate_return",
1331 TYPE_ATTRIBUTES (fntype));
1332 if (attr)
1333 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1334
1335 /* For 32-bit MS-ABI the default is to keep aggregate
1336 return pointer. */
1337 if (ix86_function_type_abi (fntype) == MS_ABI)
1338 return true;
1339 }
1340 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1341}
1342
1343/* Value is the number of bytes of arguments automatically
1344 popped when returning from a subroutine call.
1345 FUNDECL is the declaration node of the function (as a tree),
1346 FUNTYPE is the data type of the function (as a tree),
1347 or for a library call it is an identifier node for the subroutine name.
1348 SIZE is the number of bytes of arguments passed on the stack.
1349
1350 On the 80386, the RTD insn may be used to pop them if the number
1351 of args is fixed, but if the number is variable then the caller
1352 must pop them all. RTD can't be used for library calls now
1353 because the library is compiled with the Unix compiler.
1354 Use of RTD is a selectable option, since it is incompatible with
1355 standard Unix calling sequences. If the option is not selected,
1356 the caller must always pop the args.
1357
1358 The attribute stdcall is equivalent to RTD on a per module basis. */
1359
1360static poly_int64
1361ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1362{
1363 unsigned int ccvt;
1364
1365 /* None of the 64-bit ABIs pop arguments. */
1366 if (TARGET_64BIT)
1367 return 0;
1368
1369 ccvt = ix86_get_callcvt (type: funtype);
1370
1371 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1372 | IX86_CALLCVT_THISCALL)) != 0
1373 && ! stdarg_p (funtype))
1374 return size;
1375
1376 /* Lose any fake structure return argument if it is passed on the stack. */
1377 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1378 && !ix86_keep_aggregate_return_pointer (fntype: funtype))
1379 {
1380 int nregs = ix86_function_regparm (type: funtype, decl: fundecl);
1381 if (nregs == 0)
1382 return GET_MODE_SIZE (Pmode);
1383 }
1384
1385 return 0;
1386}
1387
1388/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1389
1390static bool
1391ix86_legitimate_combined_insn (rtx_insn *insn)
1392{
1393 int i;
1394
1395 /* Check operand constraints in case hard registers were propagated
1396 into insn pattern. This check prevents combine pass from
1397 generating insn patterns with invalid hard register operands.
1398 These invalid insns can eventually confuse reload to error out
1399 with a spill failure. See also PRs 46829 and 46843. */
1400
1401 gcc_assert (INSN_CODE (insn) >= 0);
1402
1403 extract_insn (insn);
1404 preprocess_constraints (insn);
1405
1406 int n_operands = recog_data.n_operands;
1407 int n_alternatives = recog_data.n_alternatives;
1408 for (i = 0; i < n_operands; i++)
1409 {
1410 rtx op = recog_data.operand[i];
1411 machine_mode mode = GET_MODE (op);
1412 const operand_alternative *op_alt;
1413 int offset = 0;
1414 bool win;
1415 int j;
1416
1417 /* A unary operator may be accepted by the predicate, but it
1418 is irrelevant for matching constraints. */
1419 if (UNARY_P (op))
1420 op = XEXP (op, 0);
1421
1422 if (SUBREG_P (op))
1423 {
1424 if (REG_P (SUBREG_REG (op))
1425 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1426 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1427 GET_MODE (SUBREG_REG (op)),
1428 SUBREG_BYTE (op),
1429 GET_MODE (op));
1430 op = SUBREG_REG (op);
1431 }
1432
1433 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1434 continue;
1435
1436 op_alt = recog_op_alt;
1437
1438 /* Operand has no constraints, anything is OK. */
1439 win = !n_alternatives;
1440
1441 alternative_mask preferred = get_preferred_alternatives (insn);
1442 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1443 {
1444 if (!TEST_BIT (preferred, j))
1445 continue;
1446 if (op_alt[i].anything_ok
1447 || (op_alt[i].matches != -1
1448 && operands_match_p
1449 (recog_data.operand[i],
1450 recog_data.operand[op_alt[i].matches]))
1451 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1452 {
1453 win = true;
1454 break;
1455 }
1456 }
1457
1458 if (!win)
1459 return false;
1460 }
1461
1462 return true;
1463}
1464
1465/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1466
1467static unsigned HOST_WIDE_INT
1468ix86_asan_shadow_offset (void)
1469{
1470 return SUBTARGET_SHADOW_OFFSET;
1471}
1472
1473/* Argument support functions. */
1474
1475/* Return true when register may be used to pass function parameters. */
1476bool
1477ix86_function_arg_regno_p (int regno)
1478{
1479 int i;
1480 enum calling_abi call_abi;
1481 const int *parm_regs;
1482
1483 if (TARGET_SSE && SSE_REGNO_P (regno)
1484 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1485 return true;
1486
1487 if (!TARGET_64BIT)
1488 return (regno < REGPARM_MAX
1489 || (TARGET_MMX && MMX_REGNO_P (regno)
1490 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1491
1492 /* TODO: The function should depend on current function ABI but
1493 builtins.cc would need updating then. Therefore we use the
1494 default ABI. */
1495 call_abi = ix86_cfun_abi ();
1496
1497 /* RAX is used as hidden argument to va_arg functions. */
1498 if (call_abi == SYSV_ABI && regno == AX_REG)
1499 return true;
1500
1501 if (call_abi == MS_ABI)
1502 parm_regs = x86_64_ms_abi_int_parameter_registers;
1503 else
1504 parm_regs = x86_64_int_parameter_registers;
1505
1506 for (i = 0; i < (call_abi == MS_ABI
1507 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1508 if (regno == parm_regs[i])
1509 return true;
1510 return false;
1511}
1512
1513/* Return if we do not know how to pass ARG solely in registers. */
1514
1515static bool
1516ix86_must_pass_in_stack (const function_arg_info &arg)
1517{
1518 if (must_pass_in_stack_var_size_or_pad (arg))
1519 return true;
1520
1521 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1522 The layout_type routine is crafty and tries to trick us into passing
1523 currently unsupported vector types on the stack by using TImode. */
1524 return (!TARGET_64BIT && arg.mode == TImode
1525 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1526}
1527
1528/* It returns the size, in bytes, of the area reserved for arguments passed
1529 in registers for the function represented by fndecl dependent to the used
1530 abi format. */
1531int
1532ix86_reg_parm_stack_space (const_tree fndecl)
1533{
1534 enum calling_abi call_abi = SYSV_ABI;
1535 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1536 call_abi = ix86_function_abi (fndecl);
1537 else
1538 call_abi = ix86_function_type_abi (fndecl);
1539 if (TARGET_64BIT && call_abi == MS_ABI)
1540 return 32;
1541 return 0;
1542}
1543
1544/* We add this as a workaround in order to use libc_has_function
1545 hook in i386.md. */
1546bool
1547ix86_libc_has_function (enum function_class fn_class)
1548{
1549 return targetm.libc_has_function (fn_class, NULL_TREE);
1550}
1551
1552/* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1553 specifying the call abi used. */
1554enum calling_abi
1555ix86_function_type_abi (const_tree fntype)
1556{
1557 enum calling_abi abi = ix86_abi;
1558
1559 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1560 return abi;
1561
1562 if (abi == SYSV_ABI
1563 && lookup_attribute (attr_name: "ms_abi", TYPE_ATTRIBUTES (fntype)))
1564 {
1565 static int warned;
1566 if (TARGET_X32 && !warned)
1567 {
1568 error ("X32 does not support %<ms_abi%> attribute");
1569 warned = 1;
1570 }
1571
1572 abi = MS_ABI;
1573 }
1574 else if (abi == MS_ABI
1575 && lookup_attribute (attr_name: "sysv_abi", TYPE_ATTRIBUTES (fntype)))
1576 abi = SYSV_ABI;
1577
1578 return abi;
1579}
1580
1581enum calling_abi
1582ix86_function_abi (const_tree fndecl)
1583{
1584 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1585}
1586
1587/* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1588 specifying the call abi used. */
1589enum calling_abi
1590ix86_cfun_abi (void)
1591{
1592 return cfun ? cfun->machine->call_abi : ix86_abi;
1593}
1594
1595bool
1596ix86_function_ms_hook_prologue (const_tree fn)
1597{
1598 if (fn && lookup_attribute (attr_name: "ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1599 {
1600 if (decl_function_context (fn) != NULL_TREE)
1601 error_at (DECL_SOURCE_LOCATION (fn),
1602 "%<ms_hook_prologue%> attribute is not compatible "
1603 "with nested function");
1604 else
1605 return true;
1606 }
1607 return false;
1608}
1609
1610bool
1611ix86_function_naked (const_tree fn)
1612{
1613 if (fn && lookup_attribute (attr_name: "naked", DECL_ATTRIBUTES (fn)))
1614 return true;
1615
1616 return false;
1617}
1618
1619/* Write the extra assembler code needed to declare a function properly. */
1620
1621void
1622ix86_asm_output_function_label (FILE *out_file, const char *fname,
1623 tree decl)
1624{
1625 bool is_ms_hook = ix86_function_ms_hook_prologue (fn: decl);
1626
1627 if (cfun)
1628 cfun->machine->function_label_emitted = true;
1629
1630 if (is_ms_hook)
1631 {
1632 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1633 unsigned int filler_cc = 0xcccccccc;
1634
1635 for (i = 0; i < filler_count; i += 4)
1636 fprintf (stream: out_file, ASM_LONG " %#x\n", filler_cc);
1637 }
1638
1639#ifdef SUBTARGET_ASM_UNWIND_INIT
1640 SUBTARGET_ASM_UNWIND_INIT (out_file);
1641#endif
1642
1643 ASM_OUTPUT_LABEL (out_file, fname);
1644
1645 /* Output magic byte marker, if hot-patch attribute is set. */
1646 if (is_ms_hook)
1647 {
1648 if (TARGET_64BIT)
1649 {
1650 /* leaq [%rsp + 0], %rsp */
1651 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1652 stream: out_file);
1653 }
1654 else
1655 {
1656 /* movl.s %edi, %edi
1657 push %ebp
1658 movl.s %esp, %ebp */
1659 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", stream: out_file);
1660 }
1661 }
1662}
1663
1664/* Implementation of call abi switching target hook. Specific to FNDECL
1665 the specific call register sets are set. See also
1666 ix86_conditional_register_usage for more details. */
1667void
1668ix86_call_abi_override (const_tree fndecl)
1669{
1670 cfun->machine->call_abi = ix86_function_abi (fndecl);
1671}
1672
1673/* Return 1 if pseudo register should be created and used to hold
1674 GOT address for PIC code. */
1675bool
1676ix86_use_pseudo_pic_reg (void)
1677{
1678 if ((TARGET_64BIT
1679 && (ix86_cmodel == CM_SMALL_PIC
1680 || TARGET_PECOFF))
1681 || !flag_pic)
1682 return false;
1683 return true;
1684}
1685
1686/* Initialize large model PIC register. */
1687
1688static void
1689ix86_init_large_pic_reg (unsigned int tmp_regno)
1690{
1691 rtx_code_label *label;
1692 rtx tmp_reg;
1693
1694 gcc_assert (Pmode == DImode);
1695 label = gen_label_rtx ();
1696 emit_label (label);
1697 LABEL_PRESERVE_P (label) = 1;
1698 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1699 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1700 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1701 label));
1702 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1703 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1704 const char *name = LABEL_NAME (label);
1705 PUT_CODE (label, NOTE);
1706 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1707 NOTE_DELETED_LABEL_NAME (label) = name;
1708}
1709
1710/* Create and initialize PIC register if required. */
1711static void
1712ix86_init_pic_reg (void)
1713{
1714 edge entry_edge;
1715 rtx_insn *seq;
1716
1717 if (!ix86_use_pseudo_pic_reg ())
1718 return;
1719
1720 start_sequence ();
1721
1722 if (TARGET_64BIT)
1723 {
1724 if (ix86_cmodel == CM_LARGE_PIC)
1725 ix86_init_large_pic_reg (R11_REG);
1726 else
1727 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1728 }
1729 else
1730 {
1731 /* If there is future mcount call in the function it is more profitable
1732 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1733 rtx reg = crtl->profile
1734 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1735 : pic_offset_table_rtx;
1736 rtx_insn *insn = emit_insn (gen_set_got (reg));
1737 RTX_FRAME_RELATED_P (insn) = 1;
1738 if (crtl->profile)
1739 emit_move_insn (pic_offset_table_rtx, reg);
1740 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1741 }
1742
1743 seq = get_insns ();
1744 end_sequence ();
1745
1746 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1747 insert_insn_on_edge (seq, entry_edge);
1748 commit_one_edge_insertion (e: entry_edge);
1749}
1750
1751/* Initialize a variable CUM of type CUMULATIVE_ARGS
1752 for a call to a function whose data type is FNTYPE.
1753 For a library call, FNTYPE is 0. */
1754
1755void
1756init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1757 tree fntype, /* tree ptr for function decl */
1758 rtx libname, /* SYMBOL_REF of library name or 0 */
1759 tree fndecl,
1760 int caller)
1761{
1762 struct cgraph_node *local_info_node = NULL;
1763 struct cgraph_node *target = NULL;
1764
1765 /* Set silent_p to false to raise an error for invalid calls when
1766 expanding function body. */
1767 cfun->machine->silent_p = false;
1768
1769 memset (s: cum, c: 0, n: sizeof (*cum));
1770
1771 if (fndecl)
1772 {
1773 target = cgraph_node::get (decl: fndecl);
1774 if (target)
1775 {
1776 target = target->function_symbol ();
1777 local_info_node = cgraph_node::local_info_node (decl: target->decl);
1778 cum->call_abi = ix86_function_abi (fndecl: target->decl);
1779 }
1780 else
1781 cum->call_abi = ix86_function_abi (fndecl);
1782 }
1783 else
1784 cum->call_abi = ix86_function_type_abi (fntype);
1785
1786 cum->caller = caller;
1787
1788 /* Set up the number of registers to use for passing arguments. */
1789 cum->nregs = ix86_regparm;
1790 if (TARGET_64BIT)
1791 {
1792 cum->nregs = (cum->call_abi == SYSV_ABI
1793 ? X86_64_REGPARM_MAX
1794 : X86_64_MS_REGPARM_MAX);
1795 }
1796 if (TARGET_SSE)
1797 {
1798 cum->sse_nregs = SSE_REGPARM_MAX;
1799 if (TARGET_64BIT)
1800 {
1801 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1802 ? X86_64_SSE_REGPARM_MAX
1803 : X86_64_MS_SSE_REGPARM_MAX);
1804 }
1805 }
1806 if (TARGET_MMX)
1807 cum->mmx_nregs = MMX_REGPARM_MAX;
1808 cum->warn_avx512f = true;
1809 cum->warn_avx = true;
1810 cum->warn_sse = true;
1811 cum->warn_mmx = true;
1812
1813 /* Because type might mismatch in between caller and callee, we need to
1814 use actual type of function for local calls.
1815 FIXME: cgraph_analyze can be told to actually record if function uses
1816 va_start so for local functions maybe_vaarg can be made aggressive
1817 helping K&R code.
1818 FIXME: once typesytem is fixed, we won't need this code anymore. */
1819 if (local_info_node && local_info_node->local
1820 && local_info_node->can_change_signature)
1821 fntype = TREE_TYPE (target->decl);
1822 cum->stdarg = stdarg_p (fntype);
1823 cum->maybe_vaarg = (fntype
1824 ? (!prototype_p (fntype) || stdarg_p (fntype))
1825 : !libname);
1826
1827 cum->decl = fndecl;
1828
1829 cum->warn_empty = !warn_abi || cum->stdarg;
1830 if (!cum->warn_empty && fntype)
1831 {
1832 function_args_iterator iter;
1833 tree argtype;
1834 bool seen_empty_type = false;
1835 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1836 {
1837 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1838 break;
1839 if (TYPE_EMPTY_P (argtype))
1840 seen_empty_type = true;
1841 else if (seen_empty_type)
1842 {
1843 cum->warn_empty = true;
1844 break;
1845 }
1846 }
1847 }
1848
1849 if (!TARGET_64BIT)
1850 {
1851 /* If there are variable arguments, then we won't pass anything
1852 in registers in 32-bit mode. */
1853 if (stdarg_p (fntype))
1854 {
1855 cum->nregs = 0;
1856 /* Since in 32-bit, variable arguments are always passed on
1857 stack, there is scratch register available for indirect
1858 sibcall. */
1859 cfun->machine->arg_reg_available = true;
1860 cum->sse_nregs = 0;
1861 cum->mmx_nregs = 0;
1862 cum->warn_avx512f = false;
1863 cum->warn_avx = false;
1864 cum->warn_sse = false;
1865 cum->warn_mmx = false;
1866 return;
1867 }
1868
1869 /* Use ecx and edx registers if function has fastcall attribute,
1870 else look for regparm information. */
1871 if (fntype)
1872 {
1873 unsigned int ccvt = ix86_get_callcvt (type: fntype);
1874 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1875 {
1876 cum->nregs = 1;
1877 cum->fastcall = 1; /* Same first register as in fastcall. */
1878 }
1879 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1880 {
1881 cum->nregs = 2;
1882 cum->fastcall = 1;
1883 }
1884 else
1885 cum->nregs = ix86_function_regparm (type: fntype, decl: fndecl);
1886 }
1887
1888 /* Set up the number of SSE registers used for passing SFmode
1889 and DFmode arguments. Warn for mismatching ABI. */
1890 cum->float_in_sse = ix86_function_sseregparm (type: fntype, decl: fndecl, warn: true);
1891 }
1892
1893 cfun->machine->arg_reg_available = (cum->nregs > 0);
1894}
1895
1896/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1897 But in the case of vector types, it is some vector mode.
1898
1899 When we have only some of our vector isa extensions enabled, then there
1900 are some modes for which vector_mode_supported_p is false. For these
1901 modes, the generic vector support in gcc will choose some non-vector mode
1902 in order to implement the type. By computing the natural mode, we'll
1903 select the proper ABI location for the operand and not depend on whatever
1904 the middle-end decides to do with these vector types.
1905
1906 The midde-end can't deal with the vector types > 16 bytes. In this
1907 case, we return the original mode and warn ABI change if CUM isn't
1908 NULL.
1909
1910 If INT_RETURN is true, warn ABI change if the vector mode isn't
1911 available for function return value. */
1912
1913static machine_mode
1914type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1915 bool in_return)
1916{
1917 machine_mode mode = TYPE_MODE (type);
1918
1919 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
1920 {
1921 HOST_WIDE_INT size = int_size_in_bytes (type);
1922 if ((size == 8 || size == 16 || size == 32 || size == 64)
1923 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1924 && TYPE_VECTOR_SUBPARTS (node: type) > 1)
1925 {
1926 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1927
1928 /* There are no XFmode vector modes ... */
1929 if (innermode == XFmode)
1930 return mode;
1931
1932 /* ... and no decimal float vector modes. */
1933 if (DECIMAL_FLOAT_MODE_P (innermode))
1934 return mode;
1935
1936 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
1937 mode = MIN_MODE_VECTOR_FLOAT;
1938 else
1939 mode = MIN_MODE_VECTOR_INT;
1940
1941 /* Get the mode which has this inner mode and number of units. */
1942 FOR_EACH_MODE_FROM (mode, mode)
1943 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (node: type)
1944 && GET_MODE_INNER (mode) == innermode)
1945 {
1946 if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
1947 && !TARGET_IAMCU)
1948 {
1949 static bool warnedavx512f;
1950 static bool warnedavx512f_ret;
1951
1952 if (cum && cum->warn_avx512f && !warnedavx512f)
1953 {
1954 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1955 "without AVX512F enabled changes the ABI"))
1956 warnedavx512f = true;
1957 }
1958 else if (in_return && !warnedavx512f_ret)
1959 {
1960 if (warning (OPT_Wpsabi, "AVX512F vector return "
1961 "without AVX512F enabled changes the ABI"))
1962 warnedavx512f_ret = true;
1963 }
1964
1965 return TYPE_MODE (type);
1966 }
1967 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1968 {
1969 static bool warnedavx;
1970 static bool warnedavx_ret;
1971
1972 if (cum && cum->warn_avx && !warnedavx)
1973 {
1974 if (warning (OPT_Wpsabi, "AVX vector argument "
1975 "without AVX enabled changes the ABI"))
1976 warnedavx = true;
1977 }
1978 else if (in_return && !warnedavx_ret)
1979 {
1980 if (warning (OPT_Wpsabi, "AVX vector return "
1981 "without AVX enabled changes the ABI"))
1982 warnedavx_ret = true;
1983 }
1984
1985 return TYPE_MODE (type);
1986 }
1987 else if (((size == 8 && TARGET_64BIT) || size == 16)
1988 && !TARGET_SSE
1989 && !TARGET_IAMCU)
1990 {
1991 static bool warnedsse;
1992 static bool warnedsse_ret;
1993
1994 if (cum && cum->warn_sse && !warnedsse)
1995 {
1996 if (warning (OPT_Wpsabi, "SSE vector argument "
1997 "without SSE enabled changes the ABI"))
1998 warnedsse = true;
1999 }
2000 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2001 {
2002 if (warning (OPT_Wpsabi, "SSE vector return "
2003 "without SSE enabled changes the ABI"))
2004 warnedsse_ret = true;
2005 }
2006 }
2007 else if ((size == 8 && !TARGET_64BIT)
2008 && (!cfun
2009 || cfun->machine->func_type == TYPE_NORMAL)
2010 && !TARGET_MMX
2011 && !TARGET_IAMCU)
2012 {
2013 static bool warnedmmx;
2014 static bool warnedmmx_ret;
2015
2016 if (cum && cum->warn_mmx && !warnedmmx)
2017 {
2018 if (warning (OPT_Wpsabi, "MMX vector argument "
2019 "without MMX enabled changes the ABI"))
2020 warnedmmx = true;
2021 }
2022 else if (in_return && !warnedmmx_ret)
2023 {
2024 if (warning (OPT_Wpsabi, "MMX vector return "
2025 "without MMX enabled changes the ABI"))
2026 warnedmmx_ret = true;
2027 }
2028 }
2029 return mode;
2030 }
2031
2032 gcc_unreachable ();
2033 }
2034 }
2035
2036 return mode;
2037}
2038
2039/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2040 this may not agree with the mode that the type system has chosen for the
2041 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2042 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2043
2044static rtx
2045gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2046 unsigned int regno)
2047{
2048 rtx tmp;
2049
2050 if (orig_mode != BLKmode)
2051 tmp = gen_rtx_REG (orig_mode, regno);
2052 else
2053 {
2054 tmp = gen_rtx_REG (mode, regno);
2055 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2056 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2057 }
2058
2059 return tmp;
2060}
2061
2062/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2063 of this code is to classify each 8bytes of incoming argument by the register
2064 class and assign registers accordingly. */
2065
2066/* Return the union class of CLASS1 and CLASS2.
2067 See the x86-64 PS ABI for details. */
2068
2069static enum x86_64_reg_class
2070merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2071{
2072 /* Rule #1: If both classes are equal, this is the resulting class. */
2073 if (class1 == class2)
2074 return class1;
2075
2076 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2077 the other class. */
2078 if (class1 == X86_64_NO_CLASS)
2079 return class2;
2080 if (class2 == X86_64_NO_CLASS)
2081 return class1;
2082
2083 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2084 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2085 return X86_64_MEMORY_CLASS;
2086
2087 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2088 if ((class1 == X86_64_INTEGERSI_CLASS
2089 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2090 || (class2 == X86_64_INTEGERSI_CLASS
2091 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2092 return X86_64_INTEGERSI_CLASS;
2093 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2094 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2095 return X86_64_INTEGER_CLASS;
2096
2097 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2098 MEMORY is used. */
2099 if (class1 == X86_64_X87_CLASS
2100 || class1 == X86_64_X87UP_CLASS
2101 || class1 == X86_64_COMPLEX_X87_CLASS
2102 || class2 == X86_64_X87_CLASS
2103 || class2 == X86_64_X87UP_CLASS
2104 || class2 == X86_64_COMPLEX_X87_CLASS)
2105 return X86_64_MEMORY_CLASS;
2106
2107 /* Rule #6: Otherwise class SSE is used. */
2108 return X86_64_SSE_CLASS;
2109}
2110
2111/* Classify the argument of type TYPE and mode MODE.
2112 CLASSES will be filled by the register class used to pass each word
2113 of the operand. The number of words is returned. In case the parameter
2114 should be passed in memory, 0 is returned. As a special case for zero
2115 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2116
2117 BIT_OFFSET is used internally for handling records and specifies offset
2118 of the offset in bits modulo 512 to avoid overflow cases.
2119
2120 See the x86-64 PS ABI for details.
2121*/
2122
2123static int
2124classify_argument (machine_mode mode, const_tree type,
2125 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2126 int &zero_width_bitfields)
2127{
2128 HOST_WIDE_INT bytes
2129 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2130 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2131
2132 /* Variable sized entities are always passed/returned in memory. */
2133 if (bytes < 0)
2134 return 0;
2135
2136 if (mode != VOIDmode)
2137 {
2138 /* The value of "named" doesn't matter. */
2139 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2140 if (targetm.calls.must_pass_in_stack (arg))
2141 return 0;
2142 }
2143
2144 if (type && (AGGREGATE_TYPE_P (type)
2145 || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2146 {
2147 int i;
2148 tree field;
2149 enum x86_64_reg_class subclasses[MAX_CLASSES];
2150
2151 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2152 if (bytes > 64)
2153 return 0;
2154
2155 for (i = 0; i < words; i++)
2156 classes[i] = X86_64_NO_CLASS;
2157
2158 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2159 signalize memory class, so handle it as special case. */
2160 if (!words)
2161 {
2162 classes[0] = X86_64_NO_CLASS;
2163 return 1;
2164 }
2165
2166 /* Classify each field of record and merge classes. */
2167 switch (TREE_CODE (type))
2168 {
2169 case RECORD_TYPE:
2170 /* And now merge the fields of structure. */
2171 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2172 {
2173 if (TREE_CODE (field) == FIELD_DECL)
2174 {
2175 int num;
2176
2177 if (TREE_TYPE (field) == error_mark_node)
2178 continue;
2179
2180 /* Bitfields are always classified as integer. Handle them
2181 early, since later code would consider them to be
2182 misaligned integers. */
2183 if (DECL_BIT_FIELD (field))
2184 {
2185 if (integer_zerop (DECL_SIZE (field)))
2186 {
2187 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2188 continue;
2189 if (zero_width_bitfields != 2)
2190 {
2191 zero_width_bitfields = 1;
2192 continue;
2193 }
2194 }
2195 for (i = (int_bit_position (field)
2196 + (bit_offset % 64)) / 8 / 8;
2197 i < ((int_bit_position (field) + (bit_offset % 64))
2198 + tree_to_shwi (DECL_SIZE (field))
2199 + 63) / 8 / 8; i++)
2200 classes[i]
2201 = merge_classes (class1: X86_64_INTEGER_CLASS, class2: classes[i]);
2202 }
2203 else
2204 {
2205 int pos;
2206
2207 type = TREE_TYPE (field);
2208
2209 /* Flexible array member is ignored. */
2210 if (TYPE_MODE (type) == BLKmode
2211 && TREE_CODE (type) == ARRAY_TYPE
2212 && TYPE_SIZE (type) == NULL_TREE
2213 && TYPE_DOMAIN (type) != NULL_TREE
2214 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2215 == NULL_TREE))
2216 {
2217 static bool warned;
2218
2219 if (!warned && warn_psabi)
2220 {
2221 warned = true;
2222 inform (input_location,
2223 "the ABI of passing struct with"
2224 " a flexible array member has"
2225 " changed in GCC 4.4");
2226 }
2227 continue;
2228 }
2229 num = classify_argument (TYPE_MODE (type), type,
2230 classes: subclasses,
2231 bit_offset: (int_bit_position (field)
2232 + bit_offset) % 512,
2233 zero_width_bitfields);
2234 if (!num)
2235 return 0;
2236 pos = (int_bit_position (field)
2237 + (bit_offset % 64)) / 8 / 8;
2238 for (i = 0; i < num && (i + pos) < words; i++)
2239 classes[i + pos]
2240 = merge_classes (class1: subclasses[i], class2: classes[i + pos]);
2241 }
2242 }
2243 }
2244 break;
2245
2246 case ARRAY_TYPE:
2247 /* Arrays are handled as small records. */
2248 {
2249 int num;
2250 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2251 TREE_TYPE (type), classes: subclasses, bit_offset,
2252 zero_width_bitfields);
2253 if (!num)
2254 return 0;
2255
2256 /* The partial classes are now full classes. */
2257 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2258 subclasses[0] = X86_64_SSE_CLASS;
2259 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2260 subclasses[0] = X86_64_SSE_CLASS;
2261 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2262 && !((bit_offset % 64) == 0 && bytes == 4))
2263 subclasses[0] = X86_64_INTEGER_CLASS;
2264
2265 for (i = 0; i < words; i++)
2266 classes[i] = subclasses[i % num];
2267
2268 break;
2269 }
2270 case UNION_TYPE:
2271 case QUAL_UNION_TYPE:
2272 /* Unions are similar to RECORD_TYPE but offset is always 0.
2273 */
2274 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2275 {
2276 if (TREE_CODE (field) == FIELD_DECL)
2277 {
2278 int num;
2279
2280 if (TREE_TYPE (field) == error_mark_node)
2281 continue;
2282
2283 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2284 TREE_TYPE (field), classes: subclasses,
2285 bit_offset, zero_width_bitfields);
2286 if (!num)
2287 return 0;
2288 for (i = 0; i < num && i < words; i++)
2289 classes[i] = merge_classes (class1: subclasses[i], class2: classes[i]);
2290 }
2291 }
2292 break;
2293
2294 case BITINT_TYPE:
2295 /* _BitInt(N) for N > 64 is passed as structure containing
2296 (N + 63) / 64 64-bit elements. */
2297 if (words > 2)
2298 return 0;
2299 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2300 return 2;
2301
2302 default:
2303 gcc_unreachable ();
2304 }
2305
2306 if (words > 2)
2307 {
2308 /* When size > 16 bytes, if the first one isn't
2309 X86_64_SSE_CLASS or any other ones aren't
2310 X86_64_SSEUP_CLASS, everything should be passed in
2311 memory. */
2312 if (classes[0] != X86_64_SSE_CLASS)
2313 return 0;
2314
2315 for (i = 1; i < words; i++)
2316 if (classes[i] != X86_64_SSEUP_CLASS)
2317 return 0;
2318 }
2319
2320 /* Final merger cleanup. */
2321 for (i = 0; i < words; i++)
2322 {
2323 /* If one class is MEMORY, everything should be passed in
2324 memory. */
2325 if (classes[i] == X86_64_MEMORY_CLASS)
2326 return 0;
2327
2328 /* The X86_64_SSEUP_CLASS should be always preceded by
2329 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2330 if (classes[i] == X86_64_SSEUP_CLASS
2331 && classes[i - 1] != X86_64_SSE_CLASS
2332 && classes[i - 1] != X86_64_SSEUP_CLASS)
2333 {
2334 /* The first one should never be X86_64_SSEUP_CLASS. */
2335 gcc_assert (i != 0);
2336 classes[i] = X86_64_SSE_CLASS;
2337 }
2338
2339 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2340 everything should be passed in memory. */
2341 if (classes[i] == X86_64_X87UP_CLASS
2342 && (classes[i - 1] != X86_64_X87_CLASS))
2343 {
2344 static bool warned;
2345
2346 /* The first one should never be X86_64_X87UP_CLASS. */
2347 gcc_assert (i != 0);
2348 if (!warned && warn_psabi)
2349 {
2350 warned = true;
2351 inform (input_location,
2352 "the ABI of passing union with %<long double%>"
2353 " has changed in GCC 4.4");
2354 }
2355 return 0;
2356 }
2357 }
2358 return words;
2359 }
2360
2361 /* Compute alignment needed. We align all types to natural boundaries with
2362 exception of XFmode that is aligned to 64bits. */
2363 if (mode != VOIDmode && mode != BLKmode)
2364 {
2365 int mode_alignment = GET_MODE_BITSIZE (mode);
2366
2367 if (mode == XFmode)
2368 mode_alignment = 128;
2369 else if (mode == XCmode)
2370 mode_alignment = 256;
2371 if (COMPLEX_MODE_P (mode))
2372 mode_alignment /= 2;
2373 /* Misaligned fields are always returned in memory. */
2374 if (bit_offset % mode_alignment)
2375 return 0;
2376 }
2377
2378 /* for V1xx modes, just use the base mode */
2379 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2380 && GET_MODE_UNIT_SIZE (mode) == bytes)
2381 mode = GET_MODE_INNER (mode);
2382
2383 /* Classification of atomic types. */
2384 switch (mode)
2385 {
2386 case E_SDmode:
2387 case E_DDmode:
2388 classes[0] = X86_64_SSE_CLASS;
2389 return 1;
2390 case E_TDmode:
2391 classes[0] = X86_64_SSE_CLASS;
2392 classes[1] = X86_64_SSEUP_CLASS;
2393 return 2;
2394 case E_DImode:
2395 case E_SImode:
2396 case E_HImode:
2397 case E_QImode:
2398 case E_CSImode:
2399 case E_CHImode:
2400 case E_CQImode:
2401 {
2402 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2403
2404 /* Analyze last 128 bits only. */
2405 size = (size - 1) & 0x7f;
2406
2407 if (size < 32)
2408 {
2409 classes[0] = X86_64_INTEGERSI_CLASS;
2410 return 1;
2411 }
2412 else if (size < 64)
2413 {
2414 classes[0] = X86_64_INTEGER_CLASS;
2415 return 1;
2416 }
2417 else if (size < 64+32)
2418 {
2419 classes[0] = X86_64_INTEGER_CLASS;
2420 classes[1] = X86_64_INTEGERSI_CLASS;
2421 return 2;
2422 }
2423 else if (size < 64+64)
2424 {
2425 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2426 return 2;
2427 }
2428 else
2429 gcc_unreachable ();
2430 }
2431 case E_CDImode:
2432 case E_TImode:
2433 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2434 return 2;
2435 case E_COImode:
2436 case E_OImode:
2437 /* OImode shouldn't be used directly. */
2438 gcc_unreachable ();
2439 case E_CTImode:
2440 return 0;
2441 case E_HFmode:
2442 case E_BFmode:
2443 if (!(bit_offset % 64))
2444 classes[0] = X86_64_SSEHF_CLASS;
2445 else
2446 classes[0] = X86_64_SSE_CLASS;
2447 return 1;
2448 case E_SFmode:
2449 if (!(bit_offset % 64))
2450 classes[0] = X86_64_SSESF_CLASS;
2451 else
2452 classes[0] = X86_64_SSE_CLASS;
2453 return 1;
2454 case E_DFmode:
2455 classes[0] = X86_64_SSEDF_CLASS;
2456 return 1;
2457 case E_XFmode:
2458 classes[0] = X86_64_X87_CLASS;
2459 classes[1] = X86_64_X87UP_CLASS;
2460 return 2;
2461 case E_TFmode:
2462 classes[0] = X86_64_SSE_CLASS;
2463 classes[1] = X86_64_SSEUP_CLASS;
2464 return 2;
2465 case E_HCmode:
2466 case E_BCmode:
2467 classes[0] = X86_64_SSE_CLASS;
2468 if (!(bit_offset % 64))
2469 return 1;
2470 else
2471 {
2472 classes[1] = X86_64_SSEHF_CLASS;
2473 return 2;
2474 }
2475 case E_SCmode:
2476 classes[0] = X86_64_SSE_CLASS;
2477 if (!(bit_offset % 64))
2478 return 1;
2479 else
2480 {
2481 static bool warned;
2482
2483 if (!warned && warn_psabi)
2484 {
2485 warned = true;
2486 inform (input_location,
2487 "the ABI of passing structure with %<complex float%>"
2488 " member has changed in GCC 4.4");
2489 }
2490 classes[1] = X86_64_SSESF_CLASS;
2491 return 2;
2492 }
2493 case E_DCmode:
2494 classes[0] = X86_64_SSEDF_CLASS;
2495 classes[1] = X86_64_SSEDF_CLASS;
2496 return 2;
2497 case E_XCmode:
2498 classes[0] = X86_64_COMPLEX_X87_CLASS;
2499 return 1;
2500 case E_TCmode:
2501 /* This modes is larger than 16 bytes. */
2502 return 0;
2503 case E_V8SFmode:
2504 case E_V8SImode:
2505 case E_V32QImode:
2506 case E_V16HFmode:
2507 case E_V16BFmode:
2508 case E_V16HImode:
2509 case E_V4DFmode:
2510 case E_V4DImode:
2511 classes[0] = X86_64_SSE_CLASS;
2512 classes[1] = X86_64_SSEUP_CLASS;
2513 classes[2] = X86_64_SSEUP_CLASS;
2514 classes[3] = X86_64_SSEUP_CLASS;
2515 return 4;
2516 case E_V8DFmode:
2517 case E_V16SFmode:
2518 case E_V32HFmode:
2519 case E_V32BFmode:
2520 case E_V8DImode:
2521 case E_V16SImode:
2522 case E_V32HImode:
2523 case E_V64QImode:
2524 classes[0] = X86_64_SSE_CLASS;
2525 classes[1] = X86_64_SSEUP_CLASS;
2526 classes[2] = X86_64_SSEUP_CLASS;
2527 classes[3] = X86_64_SSEUP_CLASS;
2528 classes[4] = X86_64_SSEUP_CLASS;
2529 classes[5] = X86_64_SSEUP_CLASS;
2530 classes[6] = X86_64_SSEUP_CLASS;
2531 classes[7] = X86_64_SSEUP_CLASS;
2532 return 8;
2533 case E_V4SFmode:
2534 case E_V4SImode:
2535 case E_V16QImode:
2536 case E_V8HImode:
2537 case E_V8HFmode:
2538 case E_V8BFmode:
2539 case E_V2DFmode:
2540 case E_V2DImode:
2541 classes[0] = X86_64_SSE_CLASS;
2542 classes[1] = X86_64_SSEUP_CLASS;
2543 return 2;
2544 case E_V1TImode:
2545 case E_V1DImode:
2546 case E_V2SFmode:
2547 case E_V2SImode:
2548 case E_V4HImode:
2549 case E_V4HFmode:
2550 case E_V4BFmode:
2551 case E_V2HFmode:
2552 case E_V2BFmode:
2553 case E_V8QImode:
2554 classes[0] = X86_64_SSE_CLASS;
2555 return 1;
2556 case E_BLKmode:
2557 case E_VOIDmode:
2558 return 0;
2559 default:
2560 gcc_assert (VECTOR_MODE_P (mode));
2561
2562 if (bytes > 16)
2563 return 0;
2564
2565 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2566
2567 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2568 classes[0] = X86_64_INTEGERSI_CLASS;
2569 else
2570 classes[0] = X86_64_INTEGER_CLASS;
2571 classes[1] = X86_64_INTEGER_CLASS;
2572 return 1 + (bytes > 8);
2573 }
2574}
2575
2576/* Wrapper around classify_argument with the extra zero_width_bitfields
2577 argument, to diagnose GCC 12.1 ABI differences for C. */
2578
2579static int
2580classify_argument (machine_mode mode, const_tree type,
2581 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2582{
2583 int zero_width_bitfields = 0;
2584 static bool warned = false;
2585 int n = classify_argument (mode, type, classes, bit_offset,
2586 zero_width_bitfields);
2587 if (!zero_width_bitfields || warned || !warn_psabi)
2588 return n;
2589 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2590 zero_width_bitfields = 2;
2591 if (classify_argument (mode, type, classes: alt_classes, bit_offset,
2592 zero_width_bitfields) != n)
2593 zero_width_bitfields = 3;
2594 else
2595 for (int i = 0; i < n; i++)
2596 if (classes[i] != alt_classes[i])
2597 {
2598 zero_width_bitfields = 3;
2599 break;
2600 }
2601 if (zero_width_bitfields == 3)
2602 {
2603 warned = true;
2604 const char *url
2605 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2606
2607 inform (input_location,
2608 "the ABI of passing C structures with zero-width bit-fields"
2609 " has changed in GCC %{12.1%}", url);
2610 }
2611 return n;
2612}
2613
2614/* Examine the argument and return set number of register required in each
2615 class. Return true iff parameter should be passed in memory. */
2616
2617static bool
2618examine_argument (machine_mode mode, const_tree type, int in_return,
2619 int *int_nregs, int *sse_nregs)
2620{
2621 enum x86_64_reg_class regclass[MAX_CLASSES];
2622 int n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2623
2624 *int_nregs = 0;
2625 *sse_nregs = 0;
2626
2627 if (!n)
2628 return true;
2629 for (n--; n >= 0; n--)
2630 switch (regclass[n])
2631 {
2632 case X86_64_INTEGER_CLASS:
2633 case X86_64_INTEGERSI_CLASS:
2634 (*int_nregs)++;
2635 break;
2636 case X86_64_SSE_CLASS:
2637 case X86_64_SSEHF_CLASS:
2638 case X86_64_SSESF_CLASS:
2639 case X86_64_SSEDF_CLASS:
2640 (*sse_nregs)++;
2641 break;
2642 case X86_64_NO_CLASS:
2643 case X86_64_SSEUP_CLASS:
2644 break;
2645 case X86_64_X87_CLASS:
2646 case X86_64_X87UP_CLASS:
2647 case X86_64_COMPLEX_X87_CLASS:
2648 if (!in_return)
2649 return true;
2650 break;
2651 case X86_64_MEMORY_CLASS:
2652 gcc_unreachable ();
2653 }
2654
2655 return false;
2656}
2657
2658/* Construct container for the argument used by GCC interface. See
2659 FUNCTION_ARG for the detailed description. */
2660
2661static rtx
2662construct_container (machine_mode mode, machine_mode orig_mode,
2663 const_tree type, int in_return, int nintregs, int nsseregs,
2664 const int *intreg, int sse_regno)
2665{
2666 /* The following variables hold the static issued_error state. */
2667 static bool issued_sse_arg_error;
2668 static bool issued_sse_ret_error;
2669 static bool issued_x87_ret_error;
2670
2671 machine_mode tmpmode;
2672 int bytes
2673 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2674 enum x86_64_reg_class regclass[MAX_CLASSES];
2675 int n;
2676 int i;
2677 int nexps = 0;
2678 int needed_sseregs, needed_intregs;
2679 rtx exp[MAX_CLASSES];
2680 rtx ret;
2681
2682 n = classify_argument (mode, type, classes: regclass, bit_offset: 0);
2683 if (!n)
2684 return NULL;
2685 if (examine_argument (mode, type, in_return, int_nregs: &needed_intregs,
2686 sse_nregs: &needed_sseregs))
2687 return NULL;
2688 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2689 return NULL;
2690
2691 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2692 some less clueful developer tries to use floating-point anyway. */
2693 if (needed_sseregs
2694 && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2695 {
2696 /* Return early if we shouldn't raise an error for invalid
2697 calls. */
2698 if (cfun != NULL && cfun->machine->silent_p)
2699 return NULL;
2700 if (in_return)
2701 {
2702 if (!issued_sse_ret_error)
2703 {
2704 if (VALID_SSE2_TYPE_MODE (mode))
2705 error ("SSE register return with SSE2 disabled");
2706 else
2707 error ("SSE register return with SSE disabled");
2708 issued_sse_ret_error = true;
2709 }
2710 }
2711 else if (!issued_sse_arg_error)
2712 {
2713 if (VALID_SSE2_TYPE_MODE (mode))
2714 error ("SSE register argument with SSE2 disabled");
2715 else
2716 error ("SSE register argument with SSE disabled");
2717 issued_sse_arg_error = true;
2718 }
2719 return NULL;
2720 }
2721
2722 /* Likewise, error if the ABI requires us to return values in the
2723 x87 registers and the user specified -mno-80387. */
2724 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2725 for (i = 0; i < n; i++)
2726 if (regclass[i] == X86_64_X87_CLASS
2727 || regclass[i] == X86_64_X87UP_CLASS
2728 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2729 {
2730 /* Return early if we shouldn't raise an error for invalid
2731 calls. */
2732 if (cfun != NULL && cfun->machine->silent_p)
2733 return NULL;
2734 if (!issued_x87_ret_error)
2735 {
2736 error ("x87 register return with x87 disabled");
2737 issued_x87_ret_error = true;
2738 }
2739 return NULL;
2740 }
2741
2742 /* First construct simple cases. Avoid SCmode, since we want to use
2743 single register to pass this type. */
2744 if (n == 1 && mode != SCmode && mode != HCmode)
2745 switch (regclass[0])
2746 {
2747 case X86_64_INTEGER_CLASS:
2748 case X86_64_INTEGERSI_CLASS:
2749 return gen_rtx_REG (mode, intreg[0]);
2750 case X86_64_SSE_CLASS:
2751 case X86_64_SSEHF_CLASS:
2752 case X86_64_SSESF_CLASS:
2753 case X86_64_SSEDF_CLASS:
2754 if (mode != BLKmode)
2755 return gen_reg_or_parallel (mode, orig_mode,
2756 GET_SSE_REGNO (sse_regno));
2757 break;
2758 case X86_64_X87_CLASS:
2759 case X86_64_COMPLEX_X87_CLASS:
2760 return gen_rtx_REG (mode, FIRST_STACK_REG);
2761 case X86_64_NO_CLASS:
2762 /* Zero sized array, struct or class. */
2763 return NULL;
2764 default:
2765 gcc_unreachable ();
2766 }
2767 if (n == 2
2768 && regclass[0] == X86_64_SSE_CLASS
2769 && regclass[1] == X86_64_SSEUP_CLASS
2770 && mode != BLKmode)
2771 return gen_reg_or_parallel (mode, orig_mode,
2772 GET_SSE_REGNO (sse_regno));
2773 if (n == 4
2774 && regclass[0] == X86_64_SSE_CLASS
2775 && regclass[1] == X86_64_SSEUP_CLASS
2776 && regclass[2] == X86_64_SSEUP_CLASS
2777 && regclass[3] == X86_64_SSEUP_CLASS
2778 && mode != BLKmode)
2779 return gen_reg_or_parallel (mode, orig_mode,
2780 GET_SSE_REGNO (sse_regno));
2781 if (n == 8
2782 && regclass[0] == X86_64_SSE_CLASS
2783 && regclass[1] == X86_64_SSEUP_CLASS
2784 && regclass[2] == X86_64_SSEUP_CLASS
2785 && regclass[3] == X86_64_SSEUP_CLASS
2786 && regclass[4] == X86_64_SSEUP_CLASS
2787 && regclass[5] == X86_64_SSEUP_CLASS
2788 && regclass[6] == X86_64_SSEUP_CLASS
2789 && regclass[7] == X86_64_SSEUP_CLASS
2790 && mode != BLKmode)
2791 return gen_reg_or_parallel (mode, orig_mode,
2792 GET_SSE_REGNO (sse_regno));
2793 if (n == 2
2794 && regclass[0] == X86_64_X87_CLASS
2795 && regclass[1] == X86_64_X87UP_CLASS)
2796 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2797
2798 if (n == 2
2799 && regclass[0] == X86_64_INTEGER_CLASS
2800 && regclass[1] == X86_64_INTEGER_CLASS
2801 && (mode == CDImode || mode == TImode || mode == BLKmode)
2802 && intreg[0] + 1 == intreg[1])
2803 {
2804 if (mode == BLKmode)
2805 {
2806 /* Use TImode for BLKmode values in 2 integer registers. */
2807 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2808 gen_rtx_REG (TImode, intreg[0]),
2809 GEN_INT (0));
2810 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2811 XVECEXP (ret, 0, 0) = exp[0];
2812 return ret;
2813 }
2814 else
2815 return gen_rtx_REG (mode, intreg[0]);
2816 }
2817
2818 /* Otherwise figure out the entries of the PARALLEL. */
2819 for (i = 0; i < n; i++)
2820 {
2821 int pos;
2822
2823 switch (regclass[i])
2824 {
2825 case X86_64_NO_CLASS:
2826 break;
2827 case X86_64_INTEGER_CLASS:
2828 case X86_64_INTEGERSI_CLASS:
2829 /* Merge TImodes on aligned occasions here too. */
2830 if (i * 8 + 8 > bytes)
2831 {
2832 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2833 if (!int_mode_for_size (size: tmpbits, limit: 0).exists (mode: &tmpmode))
2834 /* We've requested 24 bytes we
2835 don't have mode for. Use DImode. */
2836 tmpmode = DImode;
2837 }
2838 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2839 tmpmode = SImode;
2840 else
2841 tmpmode = DImode;
2842 exp [nexps++]
2843 = gen_rtx_EXPR_LIST (VOIDmode,
2844 gen_rtx_REG (tmpmode, *intreg),
2845 GEN_INT (i*8));
2846 intreg++;
2847 break;
2848 case X86_64_SSEHF_CLASS:
2849 tmpmode = (mode == BFmode ? BFmode : HFmode);
2850 exp [nexps++]
2851 = gen_rtx_EXPR_LIST (VOIDmode,
2852 gen_rtx_REG (tmpmode,
2853 GET_SSE_REGNO (sse_regno)),
2854 GEN_INT (i*8));
2855 sse_regno++;
2856 break;
2857 case X86_64_SSESF_CLASS:
2858 exp [nexps++]
2859 = gen_rtx_EXPR_LIST (VOIDmode,
2860 gen_rtx_REG (SFmode,
2861 GET_SSE_REGNO (sse_regno)),
2862 GEN_INT (i*8));
2863 sse_regno++;
2864 break;
2865 case X86_64_SSEDF_CLASS:
2866 exp [nexps++]
2867 = gen_rtx_EXPR_LIST (VOIDmode,
2868 gen_rtx_REG (DFmode,
2869 GET_SSE_REGNO (sse_regno)),
2870 GEN_INT (i*8));
2871 sse_regno++;
2872 break;
2873 case X86_64_SSE_CLASS:
2874 pos = i;
2875 switch (n)
2876 {
2877 case 1:
2878 tmpmode = DImode;
2879 break;
2880 case 2:
2881 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2882 {
2883 tmpmode = TImode;
2884 i++;
2885 }
2886 else
2887 tmpmode = DImode;
2888 break;
2889 case 4:
2890 gcc_assert (i == 0
2891 && regclass[1] == X86_64_SSEUP_CLASS
2892 && regclass[2] == X86_64_SSEUP_CLASS
2893 && regclass[3] == X86_64_SSEUP_CLASS);
2894 tmpmode = OImode;
2895 i += 3;
2896 break;
2897 case 8:
2898 gcc_assert (i == 0
2899 && regclass[1] == X86_64_SSEUP_CLASS
2900 && regclass[2] == X86_64_SSEUP_CLASS
2901 && regclass[3] == X86_64_SSEUP_CLASS
2902 && regclass[4] == X86_64_SSEUP_CLASS
2903 && regclass[5] == X86_64_SSEUP_CLASS
2904 && regclass[6] == X86_64_SSEUP_CLASS
2905 && regclass[7] == X86_64_SSEUP_CLASS);
2906 tmpmode = XImode;
2907 i += 7;
2908 break;
2909 default:
2910 gcc_unreachable ();
2911 }
2912 exp [nexps++]
2913 = gen_rtx_EXPR_LIST (VOIDmode,
2914 gen_rtx_REG (tmpmode,
2915 GET_SSE_REGNO (sse_regno)),
2916 GEN_INT (pos*8));
2917 sse_regno++;
2918 break;
2919 default:
2920 gcc_unreachable ();
2921 }
2922 }
2923
2924 /* Empty aligned struct, union or class. */
2925 if (nexps == 0)
2926 return NULL;
2927
2928 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2929 for (i = 0; i < nexps; i++)
2930 XVECEXP (ret, 0, i) = exp [i];
2931 return ret;
2932}
2933
2934/* Update the data in CUM to advance over an argument of mode MODE
2935 and data type TYPE. (TYPE is null for libcalls where that information
2936 may not be available.)
2937
2938 Return a number of integer regsiters advanced over. */
2939
2940static int
2941function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2942 const_tree type, HOST_WIDE_INT bytes,
2943 HOST_WIDE_INT words)
2944{
2945 int res = 0;
2946 bool error_p = false;
2947
2948 if (TARGET_IAMCU)
2949 {
2950 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2951 bytes in registers. */
2952 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2953 goto pass_in_reg;
2954 return res;
2955 }
2956
2957 switch (mode)
2958 {
2959 default:
2960 break;
2961
2962 case E_BLKmode:
2963 if (bytes < 0)
2964 break;
2965 /* FALLTHRU */
2966
2967 case E_DImode:
2968 case E_SImode:
2969 case E_HImode:
2970 case E_QImode:
2971pass_in_reg:
2972 cum->words += words;
2973 cum->nregs -= words;
2974 cum->regno += words;
2975 if (cum->nregs >= 0)
2976 res = words;
2977 if (cum->nregs <= 0)
2978 {
2979 cum->nregs = 0;
2980 cfun->machine->arg_reg_available = false;
2981 cum->regno = 0;
2982 }
2983 break;
2984
2985 case E_OImode:
2986 /* OImode shouldn't be used directly. */
2987 gcc_unreachable ();
2988
2989 case E_DFmode:
2990 if (cum->float_in_sse == -1)
2991 error_p = true;
2992 if (cum->float_in_sse < 2)
2993 break;
2994 /* FALLTHRU */
2995 case E_SFmode:
2996 if (cum->float_in_sse == -1)
2997 error_p = true;
2998 if (cum->float_in_sse < 1)
2999 break;
3000 /* FALLTHRU */
3001
3002 case E_V16HFmode:
3003 case E_V16BFmode:
3004 case E_V8SFmode:
3005 case E_V8SImode:
3006 case E_V64QImode:
3007 case E_V32HImode:
3008 case E_V16SImode:
3009 case E_V8DImode:
3010 case E_V32HFmode:
3011 case E_V32BFmode:
3012 case E_V16SFmode:
3013 case E_V8DFmode:
3014 case E_V32QImode:
3015 case E_V16HImode:
3016 case E_V4DFmode:
3017 case E_V4DImode:
3018 case E_TImode:
3019 case E_V16QImode:
3020 case E_V8HImode:
3021 case E_V4SImode:
3022 case E_V2DImode:
3023 case E_V8HFmode:
3024 case E_V8BFmode:
3025 case E_V4SFmode:
3026 case E_V2DFmode:
3027 if (!type || !AGGREGATE_TYPE_P (type))
3028 {
3029 cum->sse_words += words;
3030 cum->sse_nregs -= 1;
3031 cum->sse_regno += 1;
3032 if (cum->sse_nregs <= 0)
3033 {
3034 cum->sse_nregs = 0;
3035 cum->sse_regno = 0;
3036 }
3037 }
3038 break;
3039
3040 case E_V8QImode:
3041 case E_V4HImode:
3042 case E_V4HFmode:
3043 case E_V4BFmode:
3044 case E_V2SImode:
3045 case E_V2SFmode:
3046 case E_V1TImode:
3047 case E_V1DImode:
3048 if (!type || !AGGREGATE_TYPE_P (type))
3049 {
3050 cum->mmx_words += words;
3051 cum->mmx_nregs -= 1;
3052 cum->mmx_regno += 1;
3053 if (cum->mmx_nregs <= 0)
3054 {
3055 cum->mmx_nregs = 0;
3056 cum->mmx_regno = 0;
3057 }
3058 }
3059 break;
3060 }
3061 if (error_p)
3062 {
3063 cum->float_in_sse = 0;
3064 error ("calling %qD with SSE calling convention without "
3065 "SSE/SSE2 enabled", cum->decl);
3066 sorry ("this is a GCC bug that can be worked around by adding "
3067 "attribute used to function called");
3068 }
3069
3070 return res;
3071}
3072
3073static int
3074function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3075 const_tree type, HOST_WIDE_INT words, bool named)
3076{
3077 int int_nregs, sse_nregs;
3078
3079 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3080 if (!named && (VALID_AVX512F_REG_MODE (mode)
3081 || VALID_AVX256_REG_MODE (mode)))
3082 return 0;
3083
3084 if (!examine_argument (mode, type, in_return: 0, int_nregs: &int_nregs, sse_nregs: &sse_nregs)
3085 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3086 {
3087 cum->nregs -= int_nregs;
3088 cum->sse_nregs -= sse_nregs;
3089 cum->regno += int_nregs;
3090 cum->sse_regno += sse_nregs;
3091 return int_nregs;
3092 }
3093 else
3094 {
3095 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3096 cum->words = ROUND_UP (cum->words, align);
3097 cum->words += words;
3098 return 0;
3099 }
3100}
3101
3102static int
3103function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3104 HOST_WIDE_INT words)
3105{
3106 /* Otherwise, this should be passed indirect. */
3107 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3108
3109 cum->words += words;
3110 if (cum->nregs > 0)
3111 {
3112 cum->nregs -= 1;
3113 cum->regno += 1;
3114 return 1;
3115 }
3116 return 0;
3117}
3118
3119/* Update the data in CUM to advance over argument ARG. */
3120
3121static void
3122ix86_function_arg_advance (cumulative_args_t cum_v,
3123 const function_arg_info &arg)
3124{
3125 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3126 machine_mode mode = arg.mode;
3127 HOST_WIDE_INT bytes, words;
3128 int nregs;
3129
3130 /* The argument of interrupt handler is a special case and is
3131 handled in ix86_function_arg. */
3132 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3133 return;
3134
3135 bytes = arg.promoted_size_in_bytes ();
3136 words = CEIL (bytes, UNITS_PER_WORD);
3137
3138 if (arg.type)
3139 mode = type_natural_mode (type: arg.type, NULL, in_return: false);
3140
3141 if (TARGET_64BIT)
3142 {
3143 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3144
3145 if (call_abi == MS_ABI)
3146 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3147 else
3148 nregs = function_arg_advance_64 (cum, mode, type: arg.type, words,
3149 named: arg.named);
3150 }
3151 else
3152 nregs = function_arg_advance_32 (cum, mode, type: arg.type, bytes, words);
3153
3154 if (!nregs)
3155 {
3156 /* Track if there are outgoing arguments on stack. */
3157 if (cum->caller)
3158 cfun->machine->outgoing_args_on_stack = true;
3159 }
3160}
3161
3162/* Define where to put the arguments to a function.
3163 Value is zero to push the argument on the stack,
3164 or a hard register in which to store the argument.
3165
3166 MODE is the argument's machine mode.
3167 TYPE is the data type of the argument (as a tree).
3168 This is null for libcalls where that information may
3169 not be available.
3170 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3171 the preceding args and about the function being called.
3172 NAMED is nonzero if this argument is a named parameter
3173 (otherwise it is an extra parameter matching an ellipsis). */
3174
3175static rtx
3176function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3177 machine_mode orig_mode, const_tree type,
3178 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3179{
3180 bool error_p = false;
3181
3182 /* Avoid the AL settings for the Unix64 ABI. */
3183 if (mode == VOIDmode)
3184 return constm1_rtx;
3185
3186 if (TARGET_IAMCU)
3187 {
3188 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3189 bytes in registers. */
3190 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3191 goto pass_in_reg;
3192 return NULL_RTX;
3193 }
3194
3195 switch (mode)
3196 {
3197 default:
3198 break;
3199
3200 case E_BLKmode:
3201 if (bytes < 0)
3202 break;
3203 /* FALLTHRU */
3204 case E_DImode:
3205 case E_SImode:
3206 case E_HImode:
3207 case E_QImode:
3208pass_in_reg:
3209 if (words <= cum->nregs)
3210 {
3211 int regno = cum->regno;
3212
3213 /* Fastcall allocates the first two DWORD (SImode) or
3214 smaller arguments to ECX and EDX if it isn't an
3215 aggregate type . */
3216 if (cum->fastcall)
3217 {
3218 if (mode == BLKmode
3219 || mode == DImode
3220 || (type && AGGREGATE_TYPE_P (type)))
3221 break;
3222
3223 /* ECX not EAX is the first allocated register. */
3224 if (regno == AX_REG)
3225 regno = CX_REG;
3226 }
3227 return gen_rtx_REG (mode, regno);
3228 }
3229 break;
3230
3231 case E_DFmode:
3232 if (cum->float_in_sse == -1)
3233 error_p = true;
3234 if (cum->float_in_sse < 2)
3235 break;
3236 /* FALLTHRU */
3237 case E_SFmode:
3238 if (cum->float_in_sse == -1)
3239 error_p = true;
3240 if (cum->float_in_sse < 1)
3241 break;
3242 /* FALLTHRU */
3243 case E_TImode:
3244 /* In 32bit, we pass TImode in xmm registers. */
3245 case E_V16QImode:
3246 case E_V8HImode:
3247 case E_V4SImode:
3248 case E_V2DImode:
3249 case E_V8HFmode:
3250 case E_V8BFmode:
3251 case E_V4SFmode:
3252 case E_V2DFmode:
3253 if (!type || !AGGREGATE_TYPE_P (type))
3254 {
3255 if (cum->sse_nregs)
3256 return gen_reg_or_parallel (mode, orig_mode,
3257 regno: cum->sse_regno + FIRST_SSE_REG);
3258 }
3259 break;
3260
3261 case E_OImode:
3262 case E_XImode:
3263 /* OImode and XImode shouldn't be used directly. */
3264 gcc_unreachable ();
3265
3266 case E_V64QImode:
3267 case E_V32HImode:
3268 case E_V16SImode:
3269 case E_V8DImode:
3270 case E_V32HFmode:
3271 case E_V32BFmode:
3272 case E_V16SFmode:
3273 case E_V8DFmode:
3274 case E_V16HFmode:
3275 case E_V16BFmode:
3276 case E_V8SFmode:
3277 case E_V8SImode:
3278 case E_V32QImode:
3279 case E_V16HImode:
3280 case E_V4DFmode:
3281 case E_V4DImode:
3282 if (!type || !AGGREGATE_TYPE_P (type))
3283 {
3284 if (cum->sse_nregs)
3285 return gen_reg_or_parallel (mode, orig_mode,
3286 regno: cum->sse_regno + FIRST_SSE_REG);
3287 }
3288 break;
3289
3290 case E_V8QImode:
3291 case E_V4HImode:
3292 case E_V4HFmode:
3293 case E_V4BFmode:
3294 case E_V2SImode:
3295 case E_V2SFmode:
3296 case E_V1TImode:
3297 case E_V1DImode:
3298 if (!type || !AGGREGATE_TYPE_P (type))
3299 {
3300 if (cum->mmx_nregs)
3301 return gen_reg_or_parallel (mode, orig_mode,
3302 regno: cum->mmx_regno + FIRST_MMX_REG);
3303 }
3304 break;
3305 }
3306 if (error_p)
3307 {
3308 cum->float_in_sse = 0;
3309 error ("calling %qD with SSE calling convention without "
3310 "SSE/SSE2 enabled", cum->decl);
3311 sorry ("this is a GCC bug that can be worked around by adding "
3312 "attribute used to function called");
3313 }
3314
3315 return NULL_RTX;
3316}
3317
3318static rtx
3319function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3320 machine_mode orig_mode, const_tree type, bool named)
3321{
3322 /* Handle a hidden AL argument containing number of registers
3323 for varargs x86-64 functions. */
3324 if (mode == VOIDmode)
3325 return GEN_INT (cum->maybe_vaarg
3326 ? (cum->sse_nregs < 0
3327 ? X86_64_SSE_REGPARM_MAX
3328 : cum->sse_regno)
3329 : -1);
3330
3331 switch (mode)
3332 {
3333 default:
3334 break;
3335
3336 case E_V16HFmode:
3337 case E_V16BFmode:
3338 case E_V8SFmode:
3339 case E_V8SImode:
3340 case E_V32QImode:
3341 case E_V16HImode:
3342 case E_V4DFmode:
3343 case E_V4DImode:
3344 case E_V32HFmode:
3345 case E_V32BFmode:
3346 case E_V16SFmode:
3347 case E_V16SImode:
3348 case E_V64QImode:
3349 case E_V32HImode:
3350 case E_V8DFmode:
3351 case E_V8DImode:
3352 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3353 if (!named)
3354 return NULL;
3355 break;
3356 }
3357
3358 return construct_container (mode, orig_mode, type, in_return: 0, nintregs: cum->nregs,
3359 nsseregs: cum->sse_nregs,
3360 intreg: &x86_64_int_parameter_registers [cum->regno],
3361 sse_regno: cum->sse_regno);
3362}
3363
3364static rtx
3365function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3366 machine_mode orig_mode, bool named, const_tree type,
3367 HOST_WIDE_INT bytes)
3368{
3369 unsigned int regno;
3370
3371 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3372 We use value of -2 to specify that current function call is MSABI. */
3373 if (mode == VOIDmode)
3374 return GEN_INT (-2);
3375
3376 /* If we've run out of registers, it goes on the stack. */
3377 if (cum->nregs == 0)
3378 return NULL_RTX;
3379
3380 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3381
3382 /* Only floating point modes are passed in anything but integer regs. */
3383 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3384 {
3385 if (named)
3386 {
3387 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3388 regno = cum->regno + FIRST_SSE_REG;
3389 }
3390 else
3391 {
3392 rtx t1, t2;
3393
3394 /* Unnamed floating parameters are passed in both the
3395 SSE and integer registers. */
3396 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3397 t2 = gen_rtx_REG (mode, regno);
3398 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3399 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3400 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3401 }
3402 }
3403 /* Handle aggregated types passed in register. */
3404 if (orig_mode == BLKmode)
3405 {
3406 if (bytes > 0 && bytes <= 8)
3407 mode = (bytes > 4 ? DImode : SImode);
3408 if (mode == BLKmode)
3409 mode = DImode;
3410 }
3411
3412 return gen_reg_or_parallel (mode, orig_mode, regno);
3413}
3414
3415/* Return where to put the arguments to a function.
3416 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3417
3418 ARG describes the argument while CUM gives information about the
3419 preceding args and about the function being called. */
3420
3421static rtx
3422ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3423{
3424 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3425 machine_mode mode = arg.mode;
3426 HOST_WIDE_INT bytes, words;
3427 rtx reg;
3428
3429 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3430 {
3431 gcc_assert (arg.type != NULL_TREE);
3432 if (POINTER_TYPE_P (arg.type))
3433 {
3434 /* This is the pointer argument. */
3435 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3436 /* It is at -WORD(AP) in the current frame in interrupt and
3437 exception handlers. */
3438 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3439 }
3440 else
3441 {
3442 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3443 && TREE_CODE (arg.type) == INTEGER_TYPE
3444 && TYPE_MODE (arg.type) == word_mode);
3445 /* The error code is the word-mode integer argument at
3446 -2 * WORD(AP) in the current frame of the exception
3447 handler. */
3448 reg = gen_rtx_MEM (word_mode,
3449 plus_constant (Pmode,
3450 arg_pointer_rtx,
3451 -2 * UNITS_PER_WORD));
3452 }
3453 return reg;
3454 }
3455
3456 bytes = arg.promoted_size_in_bytes ();
3457 words = CEIL (bytes, UNITS_PER_WORD);
3458
3459 /* To simplify the code below, represent vector types with a vector mode
3460 even if MMX/SSE are not active. */
3461 if (arg.type && VECTOR_TYPE_P (arg.type))
3462 mode = type_natural_mode (type: arg.type, cum, in_return: false);
3463
3464 if (TARGET_64BIT)
3465 {
3466 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3467
3468 if (call_abi == MS_ABI)
3469 reg = function_arg_ms_64 (cum, mode, orig_mode: arg.mode, named: arg.named,
3470 type: arg.type, bytes);
3471 else
3472 reg = function_arg_64 (cum, mode, orig_mode: arg.mode, type: arg.type, named: arg.named);
3473 }
3474 else
3475 reg = function_arg_32 (cum, mode, orig_mode: arg.mode, type: arg.type, bytes, words);
3476
3477 /* Track if there are outgoing arguments on stack. */
3478 if (reg == NULL_RTX && cum->caller)
3479 cfun->machine->outgoing_args_on_stack = true;
3480
3481 return reg;
3482}
3483
3484/* A C expression that indicates when an argument must be passed by
3485 reference. If nonzero for an argument, a copy of that argument is
3486 made in memory and a pointer to the argument is passed instead of
3487 the argument itself. The pointer is passed in whatever way is
3488 appropriate for passing a pointer to that type. */
3489
3490static bool
3491ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3492{
3493 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
3494
3495 if (TARGET_64BIT)
3496 {
3497 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3498
3499 /* See Windows x64 Software Convention. */
3500 if (call_abi == MS_ABI)
3501 {
3502 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3503
3504 if (tree type = arg.type)
3505 {
3506 /* Arrays are passed by reference. */
3507 if (TREE_CODE (type) == ARRAY_TYPE)
3508 return true;
3509
3510 if (RECORD_OR_UNION_TYPE_P (type))
3511 {
3512 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3513 are passed by reference. */
3514 msize = int_size_in_bytes (type);
3515 }
3516 }
3517
3518 /* __m128 is passed by reference. */
3519 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3520 }
3521 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3522 return true;
3523 }
3524
3525 return false;
3526}
3527
3528/* Return true when TYPE should be 128bit aligned for 32bit argument
3529 passing ABI. XXX: This function is obsolete and is only used for
3530 checking psABI compatibility with previous versions of GCC. */
3531
3532static bool
3533ix86_compat_aligned_value_p (const_tree type)
3534{
3535 machine_mode mode = TYPE_MODE (type);
3536 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3537 || mode == TDmode
3538 || mode == TFmode
3539 || mode == TCmode)
3540 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3541 return true;
3542 if (TYPE_ALIGN (type) < 128)
3543 return false;
3544
3545 if (AGGREGATE_TYPE_P (type))
3546 {
3547 /* Walk the aggregates recursively. */
3548 switch (TREE_CODE (type))
3549 {
3550 case RECORD_TYPE:
3551 case UNION_TYPE:
3552 case QUAL_UNION_TYPE:
3553 {
3554 tree field;
3555
3556 /* Walk all the structure fields. */
3557 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3558 {
3559 if (TREE_CODE (field) == FIELD_DECL
3560 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3561 return true;
3562 }
3563 break;
3564 }
3565
3566 case ARRAY_TYPE:
3567 /* Just for use if some languages passes arrays by value. */
3568 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3569 return true;
3570 break;
3571
3572 default:
3573 gcc_unreachable ();
3574 }
3575 }
3576 return false;
3577}
3578
3579/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3580 XXX: This function is obsolete and is only used for checking psABI
3581 compatibility with previous versions of GCC. */
3582
3583static unsigned int
3584ix86_compat_function_arg_boundary (machine_mode mode,
3585 const_tree type, unsigned int align)
3586{
3587 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3588 natural boundaries. */
3589 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3590 {
3591 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3592 make an exception for SSE modes since these require 128bit
3593 alignment.
3594
3595 The handling here differs from field_alignment. ICC aligns MMX
3596 arguments to 4 byte boundaries, while structure fields are aligned
3597 to 8 byte boundaries. */
3598 if (!type)
3599 {
3600 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3601 align = PARM_BOUNDARY;
3602 }
3603 else
3604 {
3605 if (!ix86_compat_aligned_value_p (type))
3606 align = PARM_BOUNDARY;
3607 }
3608 }
3609 if (align > BIGGEST_ALIGNMENT)
3610 align = BIGGEST_ALIGNMENT;
3611 return align;
3612}
3613
3614/* Return true when TYPE should be 128bit aligned for 32bit argument
3615 passing ABI. */
3616
3617static bool
3618ix86_contains_aligned_value_p (const_tree type)
3619{
3620 machine_mode mode = TYPE_MODE (type);
3621
3622 if (mode == XFmode || mode == XCmode)
3623 return false;
3624
3625 if (TYPE_ALIGN (type) < 128)
3626 return false;
3627
3628 if (AGGREGATE_TYPE_P (type))
3629 {
3630 /* Walk the aggregates recursively. */
3631 switch (TREE_CODE (type))
3632 {
3633 case RECORD_TYPE:
3634 case UNION_TYPE:
3635 case QUAL_UNION_TYPE:
3636 {
3637 tree field;
3638
3639 /* Walk all the structure fields. */
3640 for (field = TYPE_FIELDS (type);
3641 field;
3642 field = DECL_CHAIN (field))
3643 {
3644 if (TREE_CODE (field) == FIELD_DECL
3645 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3646 return true;
3647 }
3648 break;
3649 }
3650
3651 case ARRAY_TYPE:
3652 /* Just for use if some languages passes arrays by value. */
3653 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3654 return true;
3655 break;
3656
3657 default:
3658 gcc_unreachable ();
3659 }
3660 }
3661 else
3662 return TYPE_ALIGN (type) >= 128;
3663
3664 return false;
3665}
3666
3667/* Gives the alignment boundary, in bits, of an argument with the
3668 specified mode and type. */
3669
3670static unsigned int
3671ix86_function_arg_boundary (machine_mode mode, const_tree type)
3672{
3673 unsigned int align;
3674 if (type)
3675 {
3676 /* Since the main variant type is used for call, we convert it to
3677 the main variant type. */
3678 type = TYPE_MAIN_VARIANT (type);
3679 align = TYPE_ALIGN (type);
3680 if (TYPE_EMPTY_P (type))
3681 return PARM_BOUNDARY;
3682 }
3683 else
3684 align = GET_MODE_ALIGNMENT (mode);
3685 if (align < PARM_BOUNDARY)
3686 align = PARM_BOUNDARY;
3687 else
3688 {
3689 static bool warned;
3690 unsigned int saved_align = align;
3691
3692 if (!TARGET_64BIT)
3693 {
3694 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3695 if (!type)
3696 {
3697 if (mode == XFmode || mode == XCmode)
3698 align = PARM_BOUNDARY;
3699 }
3700 else if (!ix86_contains_aligned_value_p (type))
3701 align = PARM_BOUNDARY;
3702
3703 if (align < 128)
3704 align = PARM_BOUNDARY;
3705 }
3706
3707 if (warn_psabi
3708 && !warned
3709 && align != ix86_compat_function_arg_boundary (mode, type,
3710 align: saved_align))
3711 {
3712 warned = true;
3713 inform (input_location,
3714 "the ABI for passing parameters with %d-byte"
3715 " alignment has changed in GCC 4.6",
3716 align / BITS_PER_UNIT);
3717 }
3718 }
3719
3720 return align;
3721}
3722
3723/* Return true if N is a possible register number of function value. */
3724
3725static bool
3726ix86_function_value_regno_p (const unsigned int regno)
3727{
3728 switch (regno)
3729 {
3730 case AX_REG:
3731 return true;
3732 case DX_REG:
3733 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3734 case DI_REG:
3735 case SI_REG:
3736 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3737
3738 /* Complex values are returned in %st(0)/%st(1) pair. */
3739 case ST0_REG:
3740 case ST1_REG:
3741 /* TODO: The function should depend on current function ABI but
3742 builtins.cc would need updating then. Therefore we use the
3743 default ABI. */
3744 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3745 return false;
3746 return TARGET_FLOAT_RETURNS_IN_80387;
3747
3748 /* Complex values are returned in %xmm0/%xmm1 pair. */
3749 case XMM0_REG:
3750 case XMM1_REG:
3751 return TARGET_SSE;
3752
3753 case MM0_REG:
3754 if (TARGET_MACHO || TARGET_64BIT)
3755 return false;
3756 return TARGET_MMX;
3757 }
3758
3759 return false;
3760}
3761
3762/* Check whether the register REGNO should be zeroed on X86.
3763 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3764 together, no need to zero it again.
3765 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3766
3767static bool
3768zero_call_used_regno_p (const unsigned int regno,
3769 bool all_sse_zeroed,
3770 bool need_zero_mmx)
3771{
3772 return GENERAL_REGNO_P (regno)
3773 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3774 || MASK_REGNO_P (regno)
3775 || (need_zero_mmx && MMX_REGNO_P (regno));
3776}
3777
3778/* Return the machine_mode that is used to zero register REGNO. */
3779
3780static machine_mode
3781zero_call_used_regno_mode (const unsigned int regno)
3782{
3783 /* NB: We only need to zero the lower 32 bits for integer registers
3784 and the lower 128 bits for vector registers since destination are
3785 zero-extended to the full register width. */
3786 if (GENERAL_REGNO_P (regno))
3787 return SImode;
3788 else if (SSE_REGNO_P (regno))
3789 return V4SFmode;
3790 else if (MASK_REGNO_P (regno))
3791 return HImode;
3792 else if (MMX_REGNO_P (regno))
3793 return V2SImode;
3794 else
3795 gcc_unreachable ();
3796}
3797
3798/* Generate a rtx to zero all vector registers together if possible,
3799 otherwise, return NULL. */
3800
3801static rtx
3802zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3803{
3804 if (!TARGET_AVX)
3805 return NULL;
3806
3807 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3808 if ((LEGACY_SSE_REGNO_P (regno)
3809 || (TARGET_64BIT
3810 && (REX_SSE_REGNO_P (regno)
3811 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3812 && !TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3813 return NULL;
3814
3815 return gen_avx_vzeroall ();
3816}
3817
3818/* Generate insns to zero all st registers together.
3819 Return true when zeroing instructions are generated.
3820 Assume the number of st registers that are zeroed is num_of_st,
3821 we will emit the following sequence to zero them together:
3822 fldz; \
3823 fldz; \
3824 ...
3825 fldz; \
3826 fstp %%st(0); \
3827 fstp %%st(0); \
3828 ...
3829 fstp %%st(0);
3830 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3831 mark stack slots empty.
3832
3833 How to compute the num_of_st:
3834 There is no direct mapping from stack registers to hard register
3835 numbers. If one stack register needs to be cleared, we don't know
3836 where in the stack the value remains. So, if any stack register
3837 needs to be cleared, the whole stack should be cleared. However,
3838 x87 stack registers that hold the return value should be excluded.
3839 x87 returns in the top (two for complex values) register, so
3840 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3841 return the value of num_of_st. */
3842
3843
3844static int
3845zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3846{
3847
3848 /* If the FPU is disabled, no need to zero all st registers. */
3849 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3850 return 0;
3851
3852 unsigned int num_of_st = 0;
3853 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3854 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3855 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3856 {
3857 num_of_st++;
3858 break;
3859 }
3860
3861 if (num_of_st == 0)
3862 return 0;
3863
3864 bool return_with_x87 = false;
3865 return_with_x87 = (crtl->return_rtx
3866 && (STACK_REG_P (crtl->return_rtx)));
3867
3868 bool complex_return = false;
3869 complex_return = (crtl->return_rtx
3870 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3871
3872 if (return_with_x87)
3873 if (complex_return)
3874 num_of_st = 6;
3875 else
3876 num_of_st = 7;
3877 else
3878 num_of_st = 8;
3879
3880 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3881 for (unsigned int i = 0; i < num_of_st; i++)
3882 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3883
3884 for (unsigned int i = 0; i < num_of_st; i++)
3885 {
3886 rtx insn;
3887 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3888 add_reg_note (insn, REG_DEAD, st_reg);
3889 }
3890 return num_of_st;
3891}
3892
3893
3894/* When the routine exit in MMX mode, if any ST register needs
3895 to be zeroed, we should clear all MMX registers except the
3896 RET_MMX_REGNO that holds the return value. */
3897static bool
3898zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3899 unsigned int ret_mmx_regno)
3900{
3901 bool need_zero_all_mm = false;
3902 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3903 if (STACK_REGNO_P (regno)
3904 && TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3905 {
3906 need_zero_all_mm = true;
3907 break;
3908 }
3909
3910 if (!need_zero_all_mm)
3911 return false;
3912
3913 machine_mode mode = V2SImode;
3914 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3915 if (regno != ret_mmx_regno)
3916 {
3917 rtx reg = gen_rtx_REG (mode, regno);
3918 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3919 }
3920 return true;
3921}
3922
3923/* TARGET_ZERO_CALL_USED_REGS. */
3924/* Generate a sequence of instructions that zero registers specified by
3925 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3926 zeroed. */
3927static HARD_REG_SET
3928ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3929{
3930 HARD_REG_SET zeroed_hardregs;
3931 bool all_sse_zeroed = false;
3932 int all_st_zeroed_num = 0;
3933 bool all_mm_zeroed = false;
3934
3935 CLEAR_HARD_REG_SET (set&: zeroed_hardregs);
3936
3937 /* first, let's see whether we can zero all vector registers together. */
3938 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3939 if (zero_all_vec_insn)
3940 {
3941 emit_insn (zero_all_vec_insn);
3942 all_sse_zeroed = true;
3943 }
3944
3945 /* mm/st registers are shared registers set, we should follow the following
3946 rules to clear them:
3947 MMX exit mode x87 exit mode
3948 -------------|----------------------|---------------
3949 uses x87 reg | clear all MMX | clear all x87
3950 uses MMX reg | clear individual MMX | clear all x87
3951 x87 + MMX | clear all MMX | clear all x87
3952
3953 first, we should decide which mode (MMX mode or x87 mode) the function
3954 exit with. */
3955
3956 bool exit_with_mmx_mode = (crtl->return_rtx
3957 && (MMX_REG_P (crtl->return_rtx)));
3958
3959 if (!exit_with_mmx_mode)
3960 /* x87 exit mode, we should zero all st registers together. */
3961 {
3962 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
3963
3964 if (all_st_zeroed_num > 0)
3965 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
3966 /* x87 stack registers that hold the return value should be excluded.
3967 x87 returns in the top (two for complex values) register. */
3968 if (all_st_zeroed_num == 8
3969 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
3970 || (all_st_zeroed_num == 6
3971 && (regno == (REGNO (crtl->return_rtx) + 1)))))
3972 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
3973 }
3974 else
3975 /* MMX exit mode, check whether we can zero all mm registers. */
3976 {
3977 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3978 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3979 ret_mmx_regno: exit_mmx_regno);
3980 if (all_mm_zeroed)
3981 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3982 if (regno != exit_mmx_regno)
3983 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
3984 }
3985
3986 /* Now, generate instructions to zero all the other registers. */
3987
3988 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3989 {
3990 if (!TEST_HARD_REG_BIT (set: need_zeroed_hardregs, bit: regno))
3991 continue;
3992 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
3993 need_zero_mmx: exit_with_mmx_mode && !all_mm_zeroed))
3994 continue;
3995
3996 SET_HARD_REG_BIT (set&: zeroed_hardregs, bit: regno);
3997
3998 machine_mode mode = zero_call_used_regno_mode (regno);
3999
4000 rtx reg = gen_rtx_REG (mode, regno);
4001 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4002
4003 switch (mode)
4004 {
4005 case E_SImode:
4006 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4007 {
4008 rtx clob = gen_rtx_CLOBBER (VOIDmode,
4009 gen_rtx_REG (CCmode,
4010 FLAGS_REG));
4011 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4012 tmp,
4013 clob));
4014 }
4015 /* FALLTHRU. */
4016
4017 case E_V4SFmode:
4018 case E_HImode:
4019 case E_V2SImode:
4020 emit_insn (tmp);
4021 break;
4022
4023 default:
4024 gcc_unreachable ();
4025 }
4026 }
4027 return zeroed_hardregs;
4028}
4029
4030/* Define how to find the value returned by a function.
4031 VALTYPE is the data type of the value (as a tree).
4032 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4033 otherwise, FUNC is 0. */
4034
4035static rtx
4036function_value_32 (machine_mode orig_mode, machine_mode mode,
4037 const_tree fntype, const_tree fn)
4038{
4039 unsigned int regno;
4040
4041 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4042 we normally prevent this case when mmx is not available. However
4043 some ABIs may require the result to be returned like DImode. */
4044 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4045 regno = FIRST_MMX_REG;
4046
4047 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4048 we prevent this case when sse is not available. However some ABIs
4049 may require the result to be returned like integer TImode. */
4050 else if (mode == TImode
4051 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4052 regno = FIRST_SSE_REG;
4053
4054 /* 32-byte vector modes in %ymm0. */
4055 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4056 regno = FIRST_SSE_REG;
4057
4058 /* 64-byte vector modes in %zmm0. */
4059 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4060 regno = FIRST_SSE_REG;
4061
4062 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4063 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4064 regno = FIRST_FLOAT_REG;
4065 else
4066 /* Most things go in %eax. */
4067 regno = AX_REG;
4068
4069 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4070 if (mode == HFmode || mode == BFmode)
4071 {
4072 if (!TARGET_SSE2)
4073 {
4074 error ("SSE register return with SSE2 disabled");
4075 regno = AX_REG;
4076 }
4077 else
4078 regno = FIRST_SSE_REG;
4079 }
4080
4081 if (mode == HCmode)
4082 {
4083 if (!TARGET_SSE2)
4084 error ("SSE register return with SSE2 disabled");
4085
4086 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4087 XVECEXP (ret, 0, 0)
4088 = gen_rtx_EXPR_LIST (VOIDmode,
4089 gen_rtx_REG (SImode,
4090 TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4091 GEN_INT (0));
4092 return ret;
4093 }
4094
4095 /* Override FP return register with %xmm0 for local functions when
4096 SSE math is enabled or for functions with sseregparm attribute. */
4097 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4098 {
4099 int sse_level = ix86_function_sseregparm (type: fntype, decl: fn, warn: false);
4100 if (sse_level == -1)
4101 {
4102 error ("calling %qD with SSE calling convention without "
4103 "SSE/SSE2 enabled", fn);
4104 sorry ("this is a GCC bug that can be worked around by adding "
4105 "attribute used to function called");
4106 }
4107 else if ((sse_level >= 1 && mode == SFmode)
4108 || (sse_level == 2 && mode == DFmode))
4109 regno = FIRST_SSE_REG;
4110 }
4111
4112 /* OImode shouldn't be used directly. */
4113 gcc_assert (mode != OImode);
4114
4115 return gen_rtx_REG (orig_mode, regno);
4116}
4117
4118static rtx
4119function_value_64 (machine_mode orig_mode, machine_mode mode,
4120 const_tree valtype)
4121{
4122 rtx ret;
4123
4124 /* Handle libcalls, which don't provide a type node. */
4125 if (valtype == NULL)
4126 {
4127 unsigned int regno;
4128
4129 switch (mode)
4130 {
4131 case E_BFmode:
4132 case E_HFmode:
4133 case E_HCmode:
4134 case E_SFmode:
4135 case E_SCmode:
4136 case E_DFmode:
4137 case E_DCmode:
4138 case E_TFmode:
4139 case E_SDmode:
4140 case E_DDmode:
4141 case E_TDmode:
4142 regno = FIRST_SSE_REG;
4143 break;
4144 case E_XFmode:
4145 case E_XCmode:
4146 regno = FIRST_FLOAT_REG;
4147 break;
4148 case E_TCmode:
4149 return NULL;
4150 default:
4151 regno = AX_REG;
4152 }
4153
4154 return gen_rtx_REG (mode, regno);
4155 }
4156 else if (POINTER_TYPE_P (valtype))
4157 {
4158 /* Pointers are always returned in word_mode. */
4159 mode = word_mode;
4160 }
4161
4162 ret = construct_container (mode, orig_mode, type: valtype, in_return: 1,
4163 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4164 intreg: x86_64_int_return_registers, sse_regno: 0);
4165
4166 /* For zero sized structures, construct_container returns NULL, but we
4167 need to keep rest of compiler happy by returning meaningful value. */
4168 if (!ret)
4169 ret = gen_rtx_REG (orig_mode, AX_REG);
4170
4171 return ret;
4172}
4173
4174static rtx
4175function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4176 const_tree fntype, const_tree fn, const_tree valtype)
4177{
4178 unsigned int regno;
4179
4180 /* Floating point return values in %st(0)
4181 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4182 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4183 && (GET_MODE_SIZE (mode) > 8
4184 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4185 {
4186 regno = FIRST_FLOAT_REG;
4187 return gen_rtx_REG (orig_mode, regno);
4188 }
4189 else
4190 return function_value_32(orig_mode, mode, fntype,fn);
4191}
4192
4193static rtx
4194function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4195 const_tree valtype)
4196{
4197 unsigned int regno = AX_REG;
4198
4199 if (TARGET_SSE)
4200 {
4201 switch (GET_MODE_SIZE (mode))
4202 {
4203 case 16:
4204 if (valtype != NULL_TREE
4205 && !VECTOR_INTEGER_TYPE_P (valtype)
4206 && !VECTOR_INTEGER_TYPE_P (valtype)
4207 && !INTEGRAL_TYPE_P (valtype)
4208 && !VECTOR_FLOAT_TYPE_P (valtype))
4209 break;
4210 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4211 && !COMPLEX_MODE_P (mode))
4212 regno = FIRST_SSE_REG;
4213 break;
4214 case 8:
4215 case 4:
4216 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4217 break;
4218 if (mode == SFmode || mode == DFmode)
4219 regno = FIRST_SSE_REG;
4220 break;
4221 default:
4222 break;
4223 }
4224 }
4225 return gen_rtx_REG (orig_mode, regno);
4226}
4227
4228static rtx
4229ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4230 machine_mode orig_mode, machine_mode mode)
4231{
4232 const_tree fn, fntype;
4233
4234 fn = NULL_TREE;
4235 if (fntype_or_decl && DECL_P (fntype_or_decl))
4236 fn = fntype_or_decl;
4237 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4238
4239 if (ix86_function_type_abi (fntype) == MS_ABI)
4240 {
4241 if (TARGET_64BIT)
4242 return function_value_ms_64 (orig_mode, mode, valtype);
4243 else
4244 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4245 }
4246 else if (TARGET_64BIT)
4247 return function_value_64 (orig_mode, mode, valtype);
4248 else
4249 return function_value_32 (orig_mode, mode, fntype, fn);
4250}
4251
4252static rtx
4253ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4254{
4255 machine_mode mode, orig_mode;
4256
4257 orig_mode = TYPE_MODE (valtype);
4258 mode = type_natural_mode (type: valtype, NULL, in_return: true);
4259 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4260}
4261
4262/* Pointer function arguments and return values are promoted to
4263 word_mode for normal functions. */
4264
4265static machine_mode
4266ix86_promote_function_mode (const_tree type, machine_mode mode,
4267 int *punsignedp, const_tree fntype,
4268 int for_return)
4269{
4270 if (cfun->machine->func_type == TYPE_NORMAL
4271 && type != NULL_TREE
4272 && POINTER_TYPE_P (type))
4273 {
4274 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4275 return word_mode;
4276 }
4277 return default_promote_function_mode (type, mode, punsignedp, fntype,
4278 for_return);
4279}
4280
4281/* Return true if a structure, union or array with MODE containing FIELD
4282 should be accessed using BLKmode. */
4283
4284static bool
4285ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4286{
4287 /* Union with XFmode must be in BLKmode. */
4288 return (mode == XFmode
4289 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4290 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4291}
4292
4293rtx
4294ix86_libcall_value (machine_mode mode)
4295{
4296 return ix86_function_value_1 (NULL, NULL, orig_mode: mode, mode);
4297}
4298
4299/* Return true iff type is returned in memory. */
4300
4301static bool
4302ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4303{
4304 const machine_mode mode = type_natural_mode (type, NULL, in_return: true);
4305 HOST_WIDE_INT size;
4306
4307 if (TARGET_64BIT)
4308 {
4309 if (ix86_function_type_abi (fntype) == MS_ABI)
4310 {
4311 size = int_size_in_bytes (type);
4312
4313 /* __m128 is returned in xmm0. */
4314 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4315 || INTEGRAL_TYPE_P (type)
4316 || VECTOR_FLOAT_TYPE_P (type))
4317 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4318 && !COMPLEX_MODE_P (mode)
4319 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4320 return false;
4321
4322 /* Otherwise, the size must be exactly in [1248]. */
4323 return size != 1 && size != 2 && size != 4 && size != 8;
4324 }
4325 else
4326 {
4327 int needed_intregs, needed_sseregs;
4328
4329 return examine_argument (mode, type, in_return: 1,
4330 int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
4331 }
4332 }
4333 else
4334 {
4335 size = int_size_in_bytes (type);
4336
4337 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4338 bytes in registers. */
4339 if (TARGET_IAMCU)
4340 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4341
4342 if (mode == BLKmode)
4343 return true;
4344
4345 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4346 return false;
4347
4348 if (VECTOR_MODE_P (mode) || mode == TImode)
4349 {
4350 /* User-created vectors small enough to fit in EAX. */
4351 if (size < 8)
4352 return false;
4353
4354 /* Unless ABI prescibes otherwise,
4355 MMX/3dNow values are returned in MM0 if available. */
4356
4357 if (size == 8)
4358 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4359
4360 /* SSE values are returned in XMM0 if available. */
4361 if (size == 16)
4362 return !TARGET_SSE;
4363
4364 /* AVX values are returned in YMM0 if available. */
4365 if (size == 32)
4366 return !TARGET_AVX;
4367
4368 /* AVX512F values are returned in ZMM0 if available. */
4369 if (size == 64)
4370 return !TARGET_AVX512F || !TARGET_EVEX512;
4371 }
4372
4373 if (mode == XFmode)
4374 return false;
4375
4376 if (size > 12)
4377 return true;
4378
4379 /* OImode shouldn't be used directly. */
4380 gcc_assert (mode != OImode);
4381
4382 return false;
4383 }
4384}
4385
4386/* Implement TARGET_PUSH_ARGUMENT. */
4387
4388static bool
4389ix86_push_argument (unsigned int npush)
4390{
4391 /* If SSE2 is available, use vector move to put large argument onto
4392 stack. NB: In 32-bit mode, use 8-byte vector move. */
4393 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4394 && TARGET_PUSH_ARGS
4395 && !ACCUMULATE_OUTGOING_ARGS);
4396}
4397
4398
4399/* Create the va_list data type. */
4400
4401static tree
4402ix86_build_builtin_va_list_64 (void)
4403{
4404 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4405
4406 record = lang_hooks.types.make_type (RECORD_TYPE);
4407 type_decl = build_decl (BUILTINS_LOCATION,
4408 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4409
4410 f_gpr = build_decl (BUILTINS_LOCATION,
4411 FIELD_DECL, get_identifier ("gp_offset"),
4412 unsigned_type_node);
4413 f_fpr = build_decl (BUILTINS_LOCATION,
4414 FIELD_DECL, get_identifier ("fp_offset"),
4415 unsigned_type_node);
4416 f_ovf = build_decl (BUILTINS_LOCATION,
4417 FIELD_DECL, get_identifier ("overflow_arg_area"),
4418 ptr_type_node);
4419 f_sav = build_decl (BUILTINS_LOCATION,
4420 FIELD_DECL, get_identifier ("reg_save_area"),
4421 ptr_type_node);
4422
4423 va_list_gpr_counter_field = f_gpr;
4424 va_list_fpr_counter_field = f_fpr;
4425
4426 DECL_FIELD_CONTEXT (f_gpr) = record;
4427 DECL_FIELD_CONTEXT (f_fpr) = record;
4428 DECL_FIELD_CONTEXT (f_ovf) = record;
4429 DECL_FIELD_CONTEXT (f_sav) = record;
4430
4431 TYPE_STUB_DECL (record) = type_decl;
4432 TYPE_NAME (record) = type_decl;
4433 TYPE_FIELDS (record) = f_gpr;
4434 DECL_CHAIN (f_gpr) = f_fpr;
4435 DECL_CHAIN (f_fpr) = f_ovf;
4436 DECL_CHAIN (f_ovf) = f_sav;
4437
4438 layout_type (record);
4439
4440 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4441 NULL_TREE, TYPE_ATTRIBUTES (record));
4442
4443 /* The correct type is an array type of one element. */
4444 return build_array_type (record, build_index_type (size_zero_node));
4445}
4446
4447/* Setup the builtin va_list data type and for 64-bit the additional
4448 calling convention specific va_list data types. */
4449
4450static tree
4451ix86_build_builtin_va_list (void)
4452{
4453 if (TARGET_64BIT)
4454 {
4455 /* Initialize ABI specific va_list builtin types.
4456
4457 In lto1, we can encounter two va_list types:
4458 - one as a result of the type-merge across TUs, and
4459 - the one constructed here.
4460 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4461 a type identity check in canonical_va_list_type based on
4462 TYPE_MAIN_VARIANT (which we used to have) will not work.
4463 Instead, we tag each va_list_type_node with its unique attribute, and
4464 look for the attribute in the type identity check in
4465 canonical_va_list_type.
4466
4467 Tagging sysv_va_list_type_node directly with the attribute is
4468 problematic since it's a array of one record, which will degrade into a
4469 pointer to record when used as parameter (see build_va_arg comments for
4470 an example), dropping the attribute in the process. So we tag the
4471 record instead. */
4472
4473 /* For SYSV_ABI we use an array of one record. */
4474 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4475
4476 /* For MS_ABI we use plain pointer to argument area. */
4477 tree char_ptr_type = build_pointer_type (char_type_node);
4478 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4479 TYPE_ATTRIBUTES (char_ptr_type));
4480 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4481
4482 return ((ix86_abi == MS_ABI)
4483 ? ms_va_list_type_node
4484 : sysv_va_list_type_node);
4485 }
4486 else
4487 {
4488 /* For i386 we use plain pointer to argument area. */
4489 return build_pointer_type (char_type_node);
4490 }
4491}
4492
4493/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4494
4495static void
4496setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4497{
4498 rtx save_area, mem;
4499 alias_set_type set;
4500 int i, max;
4501
4502 /* GPR size of varargs save area. */
4503 if (cfun->va_list_gpr_size)
4504 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4505 else
4506 ix86_varargs_gpr_size = 0;
4507
4508 /* FPR size of varargs save area. We don't need it if we don't pass
4509 anything in SSE registers. */
4510 if (TARGET_SSE && cfun->va_list_fpr_size)
4511 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4512 else
4513 ix86_varargs_fpr_size = 0;
4514
4515 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4516 return;
4517
4518 save_area = frame_pointer_rtx;
4519 set = get_varargs_alias_set ();
4520
4521 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4522 if (max > X86_64_REGPARM_MAX)
4523 max = X86_64_REGPARM_MAX;
4524
4525 for (i = cum->regno; i < max; i++)
4526 {
4527 mem = gen_rtx_MEM (word_mode,
4528 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4529 MEM_NOTRAP_P (mem) = 1;
4530 set_mem_alias_set (mem, set);
4531 emit_move_insn (mem,
4532 gen_rtx_REG (word_mode,
4533 x86_64_int_parameter_registers[i]));
4534 }
4535
4536 if (ix86_varargs_fpr_size)
4537 {
4538 machine_mode smode;
4539 rtx_code_label *label;
4540 rtx test;
4541
4542 /* Now emit code to save SSE registers. The AX parameter contains number
4543 of SSE parameter registers used to call this function, though all we
4544 actually check here is the zero/non-zero status. */
4545
4546 label = gen_label_rtx ();
4547 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4548 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4549 label));
4550
4551 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4552 we used movdqa (i.e. TImode) instead? Perhaps even better would
4553 be if we could determine the real mode of the data, via a hook
4554 into pass_stdarg. Ignore all that for now. */
4555 smode = V4SFmode;
4556 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4557 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4558
4559 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4560 if (max > X86_64_SSE_REGPARM_MAX)
4561 max = X86_64_SSE_REGPARM_MAX;
4562
4563 for (i = cum->sse_regno; i < max; ++i)
4564 {
4565 mem = plus_constant (Pmode, save_area,
4566 i * 16 + ix86_varargs_gpr_size);
4567 mem = gen_rtx_MEM (smode, mem);
4568 MEM_NOTRAP_P (mem) = 1;
4569 set_mem_alias_set (mem, set);
4570 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4571
4572 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4573 }
4574
4575 emit_label (label);
4576 }
4577}
4578
4579static void
4580setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4581{
4582 alias_set_type set = get_varargs_alias_set ();
4583 int i;
4584
4585 /* Reset to zero, as there might be a sysv vaarg used
4586 before. */
4587 ix86_varargs_gpr_size = 0;
4588 ix86_varargs_fpr_size = 0;
4589
4590 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4591 {
4592 rtx reg, mem;
4593
4594 mem = gen_rtx_MEM (Pmode,
4595 plus_constant (Pmode, virtual_incoming_args_rtx,
4596 i * UNITS_PER_WORD));
4597 MEM_NOTRAP_P (mem) = 1;
4598 set_mem_alias_set (mem, set);
4599
4600 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4601 emit_move_insn (mem, reg);
4602 }
4603}
4604
4605static void
4606ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4607 const function_arg_info &arg,
4608 int *, int no_rtl)
4609{
4610 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
4611 CUMULATIVE_ARGS next_cum;
4612 tree fntype;
4613
4614 /* This argument doesn't appear to be used anymore. Which is good,
4615 because the old code here didn't suppress rtl generation. */
4616 gcc_assert (!no_rtl);
4617
4618 if (!TARGET_64BIT)
4619 return;
4620
4621 fntype = TREE_TYPE (current_function_decl);
4622
4623 /* For varargs, we do not want to skip the dummy va_dcl argument.
4624 For stdargs, we do want to skip the last named argument. */
4625 next_cum = *cum;
4626 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4627 && stdarg_p (fntype))
4628 ix86_function_arg_advance (cum_v: pack_cumulative_args (arg: &next_cum), arg);
4629
4630 if (cum->call_abi == MS_ABI)
4631 setup_incoming_varargs_ms_64 (&next_cum);
4632 else
4633 setup_incoming_varargs_64 (&next_cum);
4634}
4635
4636/* Checks if TYPE is of kind va_list char *. */
4637
4638static bool
4639is_va_list_char_pointer (tree type)
4640{
4641 tree canonic;
4642
4643 /* For 32-bit it is always true. */
4644 if (!TARGET_64BIT)
4645 return true;
4646 canonic = ix86_canonical_va_list_type (type);
4647 return (canonic == ms_va_list_type_node
4648 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4649}
4650
4651/* Implement va_start. */
4652
4653static void
4654ix86_va_start (tree valist, rtx nextarg)
4655{
4656 HOST_WIDE_INT words, n_gpr, n_fpr;
4657 tree f_gpr, f_fpr, f_ovf, f_sav;
4658 tree gpr, fpr, ovf, sav, t;
4659 tree type;
4660 rtx ovf_rtx;
4661
4662 if (flag_split_stack
4663 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4664 {
4665 unsigned int scratch_regno;
4666
4667 /* When we are splitting the stack, we can't refer to the stack
4668 arguments using internal_arg_pointer, because they may be on
4669 the old stack. The split stack prologue will arrange to
4670 leave a pointer to the old stack arguments in a scratch
4671 register, which we here copy to a pseudo-register. The split
4672 stack prologue can't set the pseudo-register directly because
4673 it (the prologue) runs before any registers have been saved. */
4674
4675 scratch_regno = split_stack_prologue_scratch_regno ();
4676 if (scratch_regno != INVALID_REGNUM)
4677 {
4678 rtx reg;
4679 rtx_insn *seq;
4680
4681 reg = gen_reg_rtx (Pmode);
4682 cfun->machine->split_stack_varargs_pointer = reg;
4683
4684 start_sequence ();
4685 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4686 seq = get_insns ();
4687 end_sequence ();
4688
4689 push_topmost_sequence ();
4690 emit_insn_after (seq, entry_of_function ());
4691 pop_topmost_sequence ();
4692 }
4693 }
4694
4695 /* Only 64bit target needs something special. */
4696 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4697 {
4698 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4699 std_expand_builtin_va_start (valist, nextarg);
4700 else
4701 {
4702 rtx va_r, next;
4703
4704 va_r = expand_expr (exp: valist, NULL_RTX, VOIDmode, modifier: EXPAND_WRITE);
4705 next = expand_binop (ptr_mode, add_optab,
4706 cfun->machine->split_stack_varargs_pointer,
4707 crtl->args.arg_offset_rtx,
4708 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4709 convert_move (va_r, next, 0);
4710 }
4711 return;
4712 }
4713
4714 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4715 f_fpr = DECL_CHAIN (f_gpr);
4716 f_ovf = DECL_CHAIN (f_fpr);
4717 f_sav = DECL_CHAIN (f_ovf);
4718
4719 valist = build_simple_mem_ref (valist);
4720 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4721 /* The following should be folded into the MEM_REF offset. */
4722 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4723 f_gpr, NULL_TREE);
4724 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4725 f_fpr, NULL_TREE);
4726 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4727 f_ovf, NULL_TREE);
4728 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4729 f_sav, NULL_TREE);
4730
4731 /* Count number of gp and fp argument registers used. */
4732 words = crtl->args.info.words;
4733 n_gpr = crtl->args.info.regno;
4734 n_fpr = crtl->args.info.sse_regno;
4735
4736 if (cfun->va_list_gpr_size)
4737 {
4738 type = TREE_TYPE (gpr);
4739 t = build2 (MODIFY_EXPR, type,
4740 gpr, build_int_cst (type, n_gpr * 8));
4741 TREE_SIDE_EFFECTS (t) = 1;
4742 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4743 }
4744
4745 if (TARGET_SSE && cfun->va_list_fpr_size)
4746 {
4747 type = TREE_TYPE (fpr);
4748 t = build2 (MODIFY_EXPR, type, fpr,
4749 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4750 TREE_SIDE_EFFECTS (t) = 1;
4751 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4752 }
4753
4754 /* Find the overflow area. */
4755 type = TREE_TYPE (ovf);
4756 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4757 ovf_rtx = crtl->args.internal_arg_pointer;
4758 else
4759 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4760 t = make_tree (type, ovf_rtx);
4761 if (words != 0)
4762 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4763
4764 t = build2 (MODIFY_EXPR, type, ovf, t);
4765 TREE_SIDE_EFFECTS (t) = 1;
4766 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4767
4768 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4769 {
4770 /* Find the register save area.
4771 Prologue of the function save it right above stack frame. */
4772 type = TREE_TYPE (sav);
4773 t = make_tree (type, frame_pointer_rtx);
4774 if (!ix86_varargs_gpr_size)
4775 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4776
4777 t = build2 (MODIFY_EXPR, type, sav, t);
4778 TREE_SIDE_EFFECTS (t) = 1;
4779 expand_expr (exp: t, const0_rtx, VOIDmode, modifier: EXPAND_NORMAL);
4780 }
4781}
4782
4783/* Implement va_arg. */
4784
4785static tree
4786ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4787 gimple_seq *post_p)
4788{
4789 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4790 tree f_gpr, f_fpr, f_ovf, f_sav;
4791 tree gpr, fpr, ovf, sav, t;
4792 int size, rsize;
4793 tree lab_false, lab_over = NULL_TREE;
4794 tree addr, t2;
4795 rtx container;
4796 int indirect_p = 0;
4797 tree ptrtype;
4798 machine_mode nat_mode;
4799 unsigned int arg_boundary;
4800 unsigned int type_align;
4801
4802 /* Only 64bit target needs something special. */
4803 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4804 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4805
4806 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4807 f_fpr = DECL_CHAIN (f_gpr);
4808 f_ovf = DECL_CHAIN (f_fpr);
4809 f_sav = DECL_CHAIN (f_ovf);
4810
4811 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4812 valist, f_gpr, NULL_TREE);
4813
4814 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4815 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4816 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4817
4818 indirect_p = pass_va_arg_by_reference (type);
4819 if (indirect_p)
4820 type = build_pointer_type (type);
4821 size = arg_int_size_in_bytes (type);
4822 rsize = CEIL (size, UNITS_PER_WORD);
4823
4824 nat_mode = type_natural_mode (type, NULL, in_return: false);
4825 switch (nat_mode)
4826 {
4827 case E_V16HFmode:
4828 case E_V16BFmode:
4829 case E_V8SFmode:
4830 case E_V8SImode:
4831 case E_V32QImode:
4832 case E_V16HImode:
4833 case E_V4DFmode:
4834 case E_V4DImode:
4835 case E_V32HFmode:
4836 case E_V32BFmode:
4837 case E_V16SFmode:
4838 case E_V16SImode:
4839 case E_V64QImode:
4840 case E_V32HImode:
4841 case E_V8DFmode:
4842 case E_V8DImode:
4843 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4844 if (!TARGET_64BIT_MS_ABI)
4845 {
4846 container = NULL;
4847 break;
4848 }
4849 /* FALLTHRU */
4850
4851 default:
4852 container = construct_container (mode: nat_mode, TYPE_MODE (type),
4853 type, in_return: 0, X86_64_REGPARM_MAX,
4854 X86_64_SSE_REGPARM_MAX, intreg,
4855 sse_regno: 0);
4856 break;
4857 }
4858
4859 /* Pull the value out of the saved registers. */
4860
4861 addr = create_tmp_var (ptr_type_node, "addr");
4862 type_align = TYPE_ALIGN (type);
4863
4864 if (container)
4865 {
4866 int needed_intregs, needed_sseregs;
4867 bool need_temp;
4868 tree int_addr, sse_addr;
4869
4870 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4871 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4872
4873 examine_argument (mode: nat_mode, type, in_return: 0, int_nregs: &needed_intregs, sse_nregs: &needed_sseregs);
4874
4875 need_temp = (!REG_P (container)
4876 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4877 || TYPE_ALIGN (type) > 128));
4878
4879 /* In case we are passing structure, verify that it is consecutive block
4880 on the register save area. If not we need to do moves. */
4881 if (!need_temp && !REG_P (container))
4882 {
4883 /* Verify that all registers are strictly consecutive */
4884 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4885 {
4886 int i;
4887
4888 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4889 {
4890 rtx slot = XVECEXP (container, 0, i);
4891 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4892 || INTVAL (XEXP (slot, 1)) != i * 16)
4893 need_temp = true;
4894 }
4895 }
4896 else
4897 {
4898 int i;
4899
4900 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4901 {
4902 rtx slot = XVECEXP (container, 0, i);
4903 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4904 || INTVAL (XEXP (slot, 1)) != i * 8)
4905 need_temp = true;
4906 }
4907 }
4908 }
4909 if (!need_temp)
4910 {
4911 int_addr = addr;
4912 sse_addr = addr;
4913 }
4914 else
4915 {
4916 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4917 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4918 }
4919
4920 /* First ensure that we fit completely in registers. */
4921 if (needed_intregs)
4922 {
4923 t = build_int_cst (TREE_TYPE (gpr),
4924 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4925 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4926 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4927 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4928 gimplify_and_add (t, pre_p);
4929 }
4930 if (needed_sseregs)
4931 {
4932 t = build_int_cst (TREE_TYPE (fpr),
4933 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4934 + X86_64_REGPARM_MAX * 8);
4935 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4936 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4937 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4938 gimplify_and_add (t, pre_p);
4939 }
4940
4941 /* Compute index to start of area used for integer regs. */
4942 if (needed_intregs)
4943 {
4944 /* int_addr = gpr + sav; */
4945 t = fold_build_pointer_plus (sav, gpr);
4946 gimplify_assign (int_addr, t, pre_p);
4947 }
4948 if (needed_sseregs)
4949 {
4950 /* sse_addr = fpr + sav; */
4951 t = fold_build_pointer_plus (sav, fpr);
4952 gimplify_assign (sse_addr, t, pre_p);
4953 }
4954 if (need_temp)
4955 {
4956 int i, prev_size = 0;
4957 tree temp = create_tmp_var (type, "va_arg_tmp");
4958 TREE_ADDRESSABLE (temp) = 1;
4959
4960 /* addr = &temp; */
4961 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4962 gimplify_assign (addr, t, pre_p);
4963
4964 for (i = 0; i < XVECLEN (container, 0); i++)
4965 {
4966 rtx slot = XVECEXP (container, 0, i);
4967 rtx reg = XEXP (slot, 0);
4968 machine_mode mode = GET_MODE (reg);
4969 tree piece_type;
4970 tree addr_type;
4971 tree daddr_type;
4972 tree src_addr, src;
4973 int src_offset;
4974 tree dest_addr, dest;
4975 int cur_size = GET_MODE_SIZE (mode);
4976
4977 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4978 prev_size = INTVAL (XEXP (slot, 1));
4979 if (prev_size + cur_size > size)
4980 {
4981 cur_size = size - prev_size;
4982 unsigned int nbits = cur_size * BITS_PER_UNIT;
4983 if (!int_mode_for_size (size: nbits, limit: 1).exists (mode: &mode))
4984 mode = QImode;
4985 }
4986 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4987 if (mode == GET_MODE (reg))
4988 addr_type = build_pointer_type (piece_type);
4989 else
4990 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4991 true);
4992 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4993 true);
4994
4995 if (SSE_REGNO_P (REGNO (reg)))
4996 {
4997 src_addr = sse_addr;
4998 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4999 }
5000 else
5001 {
5002 src_addr = int_addr;
5003 src_offset = REGNO (reg) * 8;
5004 }
5005 src_addr = fold_convert (addr_type, src_addr);
5006 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5007
5008 dest_addr = fold_convert (daddr_type, addr);
5009 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5010 if (cur_size == GET_MODE_SIZE (mode))
5011 {
5012 src = build_va_arg_indirect_ref (src_addr);
5013 dest = build_va_arg_indirect_ref (dest_addr);
5014
5015 gimplify_assign (dest, src, pre_p);
5016 }
5017 else
5018 {
5019 tree copy
5020 = build_call_expr (builtin_decl_implicit (fncode: BUILT_IN_MEMCPY),
5021 3, dest_addr, src_addr,
5022 size_int (cur_size));
5023 gimplify_and_add (copy, pre_p);
5024 }
5025 prev_size += cur_size;
5026 }
5027 }
5028
5029 if (needed_intregs)
5030 {
5031 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5032 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5033 gimplify_assign (gpr, t, pre_p);
5034 /* The GPR save area guarantees only 8-byte alignment. */
5035 if (!need_temp)
5036 type_align = MIN (type_align, 64);
5037 }
5038
5039 if (needed_sseregs)
5040 {
5041 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5042 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5043 gimplify_assign (unshare_expr (fpr), t, pre_p);
5044 }
5045
5046 gimple_seq_add_stmt (pre_p, gimple_build_goto (dest: lab_over));
5047
5048 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_false));
5049 }
5050
5051 /* ... otherwise out of the overflow area. */
5052
5053 /* When we align parameter on stack for caller, if the parameter
5054 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5055 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5056 here with caller. */
5057 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5058 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5059 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5060
5061 /* Care for on-stack alignment if needed. */
5062 if (arg_boundary <= 64 || size == 0)
5063 t = ovf;
5064 else
5065 {
5066 HOST_WIDE_INT align = arg_boundary / 8;
5067 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5068 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5069 build_int_cst (TREE_TYPE (t), -align));
5070 }
5071
5072 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5073 gimplify_assign (addr, t, pre_p);
5074
5075 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5076 gimplify_assign (unshare_expr (ovf), t, pre_p);
5077
5078 if (container)
5079 gimple_seq_add_stmt (pre_p, gimple_build_label (label: lab_over));
5080
5081 type = build_aligned_type (type, type_align);
5082 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5083 addr = fold_convert (ptrtype, addr);
5084
5085 if (indirect_p)
5086 addr = build_va_arg_indirect_ref (addr);
5087 return build_va_arg_indirect_ref (addr);
5088}
5089
5090/* Return true if OPNUM's MEM should be matched
5091 in movabs* patterns. */
5092
5093bool
5094ix86_check_movabs (rtx insn, int opnum)
5095{
5096 rtx set, mem;
5097
5098 set = PATTERN (insn);
5099 if (GET_CODE (set) == PARALLEL)
5100 set = XVECEXP (set, 0, 0);
5101 gcc_assert (GET_CODE (set) == SET);
5102 mem = XEXP (set, opnum);
5103 while (SUBREG_P (mem))
5104 mem = SUBREG_REG (mem);
5105 gcc_assert (MEM_P (mem));
5106 return volatile_ok || !MEM_VOLATILE_P (mem);
5107}
5108
5109/* Return false if INSN contains a MEM with a non-default address space. */
5110bool
5111ix86_check_no_addr_space (rtx insn)
5112{
5113 subrtx_var_iterator::array_type array;
5114 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5115 {
5116 rtx x = *iter;
5117 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5118 return false;
5119 }
5120 return true;
5121}
5122
5123/* Initialize the table of extra 80387 mathematical constants. */
5124
5125static void
5126init_ext_80387_constants (void)
5127{
5128 static const char * cst[5] =
5129 {
5130 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5131 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5132 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5133 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5134 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5135 };
5136 int i;
5137
5138 for (i = 0; i < 5; i++)
5139 {
5140 real_from_string (&ext_80387_constants_table[i], cst[i]);
5141 /* Ensure each constant is rounded to XFmode precision. */
5142 real_convert (&ext_80387_constants_table[i],
5143 XFmode, &ext_80387_constants_table[i]);
5144 }
5145
5146 ext_80387_constants_init = 1;
5147}
5148
5149/* Return non-zero if the constant is something that
5150 can be loaded with a special instruction. */
5151
5152int
5153standard_80387_constant_p (rtx x)
5154{
5155 machine_mode mode = GET_MODE (x);
5156
5157 const REAL_VALUE_TYPE *r;
5158
5159 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5160 return -1;
5161
5162 if (x == CONST0_RTX (mode))
5163 return 1;
5164 if (x == CONST1_RTX (mode))
5165 return 2;
5166
5167 r = CONST_DOUBLE_REAL_VALUE (x);
5168
5169 /* For XFmode constants, try to find a special 80387 instruction when
5170 optimizing for size or on those CPUs that benefit from them. */
5171 if (mode == XFmode
5172 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5173 && !flag_rounding_math)
5174 {
5175 int i;
5176
5177 if (! ext_80387_constants_init)
5178 init_ext_80387_constants ();
5179
5180 for (i = 0; i < 5; i++)
5181 if (real_identical (r, &ext_80387_constants_table[i]))
5182 return i + 3;
5183 }
5184
5185 /* Load of the constant -0.0 or -1.0 will be split as
5186 fldz;fchs or fld1;fchs sequence. */
5187 if (real_isnegzero (r))
5188 return 8;
5189 if (real_identical (r, &dconstm1))
5190 return 9;
5191
5192 return 0;
5193}
5194
5195/* Return the opcode of the special instruction to be used to load
5196 the constant X. */
5197
5198const char *
5199standard_80387_constant_opcode (rtx x)
5200{
5201 switch (standard_80387_constant_p (x))
5202 {
5203 case 1:
5204 return "fldz";
5205 case 2:
5206 return "fld1";
5207 case 3:
5208 return "fldlg2";
5209 case 4:
5210 return "fldln2";
5211 case 5:
5212 return "fldl2e";
5213 case 6:
5214 return "fldl2t";
5215 case 7:
5216 return "fldpi";
5217 case 8:
5218 case 9:
5219 return "#";
5220 default:
5221 gcc_unreachable ();
5222 }
5223}
5224
5225/* Return the CONST_DOUBLE representing the 80387 constant that is
5226 loaded by the specified special instruction. The argument IDX
5227 matches the return value from standard_80387_constant_p. */
5228
5229rtx
5230standard_80387_constant_rtx (int idx)
5231{
5232 int i;
5233
5234 if (! ext_80387_constants_init)
5235 init_ext_80387_constants ();
5236
5237 switch (idx)
5238 {
5239 case 3:
5240 case 4:
5241 case 5:
5242 case 6:
5243 case 7:
5244 i = idx - 3;
5245 break;
5246
5247 default:
5248 gcc_unreachable ();
5249 }
5250
5251 return const_double_from_real_value (ext_80387_constants_table[i],
5252 XFmode);
5253}
5254
5255/* Return 1 if X is all bits 0, 2 if X is all bits 1
5256 and 3 if X is all bits 1 with zero extend
5257 in supported SSE/AVX vector mode. */
5258
5259int
5260standard_sse_constant_p (rtx x, machine_mode pred_mode)
5261{
5262 machine_mode mode;
5263
5264 if (!TARGET_SSE)
5265 return 0;
5266
5267 mode = GET_MODE (x);
5268
5269 if (x == const0_rtx || const0_operand (x, mode))
5270 return 1;
5271
5272 if (x == constm1_rtx
5273 || vector_all_ones_operand (x, mode)
5274 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5275 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5276 && float_vector_all_ones_operand (x, mode)))
5277 {
5278 /* VOIDmode integer constant, get mode from the predicate. */
5279 if (mode == VOIDmode)
5280 mode = pred_mode;
5281
5282 switch (GET_MODE_SIZE (mode))
5283 {
5284 case 64:
5285 if (TARGET_AVX512F && TARGET_EVEX512)
5286 return 2;
5287 break;
5288 case 32:
5289 if (TARGET_AVX2)
5290 return 2;
5291 break;
5292 case 16:
5293 if (TARGET_SSE2)
5294 return 2;
5295 break;
5296 case 0:
5297 /* VOIDmode */
5298 gcc_unreachable ();
5299 default:
5300 break;
5301 }
5302 }
5303
5304 if (vector_all_ones_zero_extend_half_operand (x, mode)
5305 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5306 return 3;
5307
5308 return 0;
5309}
5310
5311/* Return the opcode of the special instruction to be used to load
5312 the constant operands[1] into operands[0]. */
5313
5314const char *
5315standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5316{
5317 machine_mode mode;
5318 rtx x = operands[1];
5319
5320 gcc_assert (TARGET_SSE);
5321
5322 mode = GET_MODE (x);
5323
5324 if (x == const0_rtx || const0_operand (x, mode))
5325 {
5326 switch (get_attr_mode (insn))
5327 {
5328 case MODE_TI:
5329 if (!EXT_REX_SSE_REG_P (operands[0]))
5330 return "%vpxor\t%0, %d0";
5331 /* FALLTHRU */
5332 case MODE_XI:
5333 case MODE_OI:
5334 if (EXT_REX_SSE_REG_P (operands[0]))
5335 {
5336 if (TARGET_AVX512VL)
5337 return "vpxord\t%x0, %x0, %x0";
5338 else if (TARGET_EVEX512)
5339 return "vpxord\t%g0, %g0, %g0";
5340 else
5341 gcc_unreachable ();
5342 }
5343 return "vpxor\t%x0, %x0, %x0";
5344
5345 case MODE_V2DF:
5346 if (!EXT_REX_SSE_REG_P (operands[0]))
5347 return "%vxorpd\t%0, %d0";
5348 /* FALLTHRU */
5349 case MODE_V8DF:
5350 case MODE_V4DF:
5351 if (EXT_REX_SSE_REG_P (operands[0]))
5352 {
5353 if (TARGET_AVX512DQ)
5354 {
5355 if (TARGET_AVX512VL)
5356 return "vxorpd\t%x0, %x0, %x0";
5357 else if (TARGET_EVEX512)
5358 return "vxorpd\t%g0, %g0, %g0";
5359 else
5360 gcc_unreachable ();
5361 }
5362 else
5363 {
5364 if (TARGET_AVX512VL)
5365 return "vpxorq\t%x0, %x0, %x0";
5366 else if (TARGET_EVEX512)
5367 return "vpxorq\t%g0, %g0, %g0";
5368 else
5369 gcc_unreachable ();
5370 }
5371 }
5372 return "vxorpd\t%x0, %x0, %x0";
5373
5374 case MODE_V4SF:
5375 if (!EXT_REX_SSE_REG_P (operands[0]))
5376 return "%vxorps\t%0, %d0";
5377 /* FALLTHRU */
5378 case MODE_V16SF:
5379 case MODE_V8SF:
5380 if (EXT_REX_SSE_REG_P (operands[0]))
5381 {
5382 if (TARGET_AVX512DQ)
5383 {
5384 if (TARGET_AVX512VL)
5385 return "vxorps\t%x0, %x0, %x0";
5386 else if (TARGET_EVEX512)
5387 return "vxorps\t%g0, %g0, %g0";
5388 else
5389 gcc_unreachable ();
5390 }
5391 else
5392 {
5393 if (TARGET_AVX512VL)
5394 return "vpxord\t%x0, %x0, %x0";
5395 else if (TARGET_EVEX512)
5396 return "vpxord\t%g0, %g0, %g0";
5397 else
5398 gcc_unreachable ();
5399 }
5400 }
5401 return "vxorps\t%x0, %x0, %x0";
5402
5403 default:
5404 gcc_unreachable ();
5405 }
5406 }
5407 else if (x == constm1_rtx
5408 || vector_all_ones_operand (x, mode)
5409 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5410 && float_vector_all_ones_operand (x, mode)))
5411 {
5412 enum attr_mode insn_mode = get_attr_mode (insn);
5413
5414 switch (insn_mode)
5415 {
5416 case MODE_XI:
5417 case MODE_V8DF:
5418 case MODE_V16SF:
5419 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5420 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5421
5422 case MODE_OI:
5423 case MODE_V4DF:
5424 case MODE_V8SF:
5425 gcc_assert (TARGET_AVX2);
5426 /* FALLTHRU */
5427 case MODE_TI:
5428 case MODE_V2DF:
5429 case MODE_V4SF:
5430 gcc_assert (TARGET_SSE2);
5431 if (EXT_REX_SSE_REG_P (operands[0]))
5432 {
5433 if (TARGET_AVX512VL)
5434 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5435 else if (TARGET_EVEX512)
5436 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5437 else
5438 gcc_unreachable ();
5439 }
5440 return (TARGET_AVX
5441 ? "vpcmpeqd\t%0, %0, %0"
5442 : "pcmpeqd\t%0, %0");
5443
5444 default:
5445 gcc_unreachable ();
5446 }
5447 }
5448 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5449 {
5450 if (GET_MODE_SIZE (mode) == 64)
5451 {
5452 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5453 return "vpcmpeqd\t%t0, %t0, %t0";
5454 }
5455 else if (GET_MODE_SIZE (mode) == 32)
5456 {
5457 gcc_assert (TARGET_AVX);
5458 return "vpcmpeqd\t%x0, %x0, %x0";
5459 }
5460 gcc_unreachable ();
5461 }
5462 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5463 {
5464 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5465 return "vpcmpeqd\t%x0, %x0, %x0";
5466 }
5467
5468 gcc_unreachable ();
5469}
5470
5471/* Returns true if INSN can be transformed from a memory load
5472 to a supported FP constant load. */
5473
5474bool
5475ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5476{
5477 rtx src = find_constant_src (insn);
5478
5479 gcc_assert (REG_P (dst));
5480
5481 if (src == NULL
5482 || (SSE_REGNO_P (REGNO (dst))
5483 && standard_sse_constant_p (x: src, GET_MODE (dst)) != 1)
5484 || (!TARGET_AVX512VL
5485 && EXT_REX_SSE_REGNO_P (REGNO (dst))
5486 && standard_sse_constant_p (x: src, GET_MODE (dst)) == 1)
5487 || (STACK_REGNO_P (REGNO (dst))
5488 && standard_80387_constant_p (x: src) < 1))
5489 return false;
5490
5491 return true;
5492}
5493
5494/* Predicate for pre-reload splitters with associated instructions,
5495 which can match any time before the split1 pass (usually combine),
5496 then are unconditionally split in that pass and should not be
5497 matched again afterwards. */
5498
5499bool
5500ix86_pre_reload_split (void)
5501{
5502 return (can_create_pseudo_p ()
5503 && !(cfun->curr_properties & PROP_rtl_split_insns));
5504}
5505
5506/* Return the opcode of the TYPE_SSEMOV instruction. To move from
5507 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5508 TARGET_AVX512VL or it is a register to register move which can
5509 be done with zmm register move. */
5510
5511static const char *
5512ix86_get_ssemov (rtx *operands, unsigned size,
5513 enum attr_mode insn_mode, machine_mode mode)
5514{
5515 char buf[128];
5516 bool misaligned_p = (misaligned_operand (operands[0], mode)
5517 || misaligned_operand (operands[1], mode));
5518 bool evex_reg_p = (size == 64
5519 || EXT_REX_SSE_REG_P (operands[0])
5520 || EXT_REX_SSE_REG_P (operands[1]));
5521
5522 bool egpr_p = (TARGET_APX_EGPR
5523 && (x86_extended_rex2reg_mentioned_p (operands[0])
5524 || x86_extended_rex2reg_mentioned_p (operands[1])));
5525 bool egpr_vl = egpr_p && TARGET_AVX512VL;
5526
5527 machine_mode scalar_mode;
5528
5529 const char *opcode = NULL;
5530 enum
5531 {
5532 opcode_int,
5533 opcode_float,
5534 opcode_double
5535 } type = opcode_int;
5536
5537 switch (insn_mode)
5538 {
5539 case MODE_V16SF:
5540 case MODE_V8SF:
5541 case MODE_V4SF:
5542 scalar_mode = E_SFmode;
5543 type = opcode_float;
5544 break;
5545 case MODE_V8DF:
5546 case MODE_V4DF:
5547 case MODE_V2DF:
5548 scalar_mode = E_DFmode;
5549 type = opcode_double;
5550 break;
5551 case MODE_XI:
5552 case MODE_OI:
5553 case MODE_TI:
5554 scalar_mode = GET_MODE_INNER (mode);
5555 break;
5556 default:
5557 gcc_unreachable ();
5558 }
5559
5560 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5561 we can only use zmm register move without memory operand. */
5562 if (evex_reg_p
5563 && !TARGET_AVX512VL
5564 && GET_MODE_SIZE (mode) < 64)
5565 {
5566 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5567 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5568 AVX512VL is disabled, LRA can still generate reg to
5569 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5570 modes. */
5571 if (memory_operand (operands[0], mode)
5572 || memory_operand (operands[1], mode))
5573 gcc_unreachable ();
5574 size = 64;
5575 /* We need TARGET_EVEX512 to move into zmm register. */
5576 gcc_assert (TARGET_EVEX512);
5577 switch (type)
5578 {
5579 case opcode_int:
5580 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5581 opcode = (misaligned_p
5582 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5583 : "vmovdqa64");
5584 else
5585 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5586 break;
5587 case opcode_float:
5588 opcode = misaligned_p ? "vmovups" : "vmovaps";
5589 break;
5590 case opcode_double:
5591 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5592 break;
5593 }
5594 }
5595 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5596 {
5597 switch (scalar_mode)
5598 {
5599 case E_HFmode:
5600 case E_BFmode:
5601 if (evex_reg_p || egpr_vl)
5602 opcode = (misaligned_p
5603 ? (TARGET_AVX512BW
5604 ? "vmovdqu16"
5605 : "vmovdqu64")
5606 : "vmovdqa64");
5607 else if (egpr_p)
5608 opcode = (misaligned_p
5609 ? (TARGET_AVX512BW
5610 ? "vmovdqu16"
5611 : "%vmovups")
5612 : "%vmovaps");
5613 else
5614 opcode = (misaligned_p
5615 ? (TARGET_AVX512BW
5616 ? "vmovdqu16"
5617 : "%vmovdqu")
5618 : "%vmovdqa");
5619 break;
5620 case E_SFmode:
5621 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5622 break;
5623 case E_DFmode:
5624 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5625 break;
5626 case E_TFmode:
5627 if (evex_reg_p || egpr_vl)
5628 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5629 else if (egpr_p)
5630 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5631 else
5632 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5633 break;
5634 default:
5635 gcc_unreachable ();
5636 }
5637 }
5638 else if (SCALAR_INT_MODE_P (scalar_mode))
5639 {
5640 switch (scalar_mode)
5641 {
5642 case E_QImode:
5643 if (evex_reg_p || egpr_vl)
5644 opcode = (misaligned_p
5645 ? (TARGET_AVX512BW
5646 ? "vmovdqu8"
5647 : "vmovdqu64")
5648 : "vmovdqa64");
5649 else if (egpr_p)
5650 opcode = (misaligned_p
5651 ? (TARGET_AVX512BW
5652 ? "vmovdqu8"
5653 : "%vmovups")
5654 : "%vmovaps");
5655 else
5656 opcode = (misaligned_p
5657 ? (TARGET_AVX512BW
5658 ? "vmovdqu8"
5659 : "%vmovdqu")
5660 : "%vmovdqa");
5661 break;
5662 case E_HImode:
5663 if (evex_reg_p || egpr_vl)
5664 opcode = (misaligned_p
5665 ? (TARGET_AVX512BW
5666 ? "vmovdqu16"
5667 : "vmovdqu64")
5668 : "vmovdqa64");
5669 else if (egpr_p)
5670 opcode = (misaligned_p
5671 ? (TARGET_AVX512BW
5672 ? "vmovdqu16"
5673 : "%vmovups")
5674 : "%vmovaps");
5675 else
5676 opcode = (misaligned_p
5677 ? (TARGET_AVX512BW
5678 ? "vmovdqu16"
5679 : "%vmovdqu")
5680 : "%vmovdqa");
5681 break;
5682 case E_SImode:
5683 if (evex_reg_p || egpr_vl)
5684 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5685 else if (egpr_p)
5686 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5687 else
5688 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5689 break;
5690 case E_DImode:
5691 case E_TImode:
5692 case E_OImode:
5693 if (evex_reg_p || egpr_vl)
5694 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5695 else if (egpr_p)
5696 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5697 else
5698 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5699 break;
5700 case E_XImode:
5701 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5702 break;
5703 default:
5704 gcc_unreachable ();
5705 }
5706 }
5707 else
5708 gcc_unreachable ();
5709
5710 switch (size)
5711 {
5712 case 64:
5713 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%g1, %%g0|%%g0, %%g1}",
5714 opcode);
5715 break;
5716 case 32:
5717 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%t1, %%t0|%%t0, %%t1}",
5718 opcode);
5719 break;
5720 case 16:
5721 snprintf (s: buf, maxlen: sizeof (buf), format: "%s\t{%%x1, %%x0|%%x0, %%x1}",
5722 opcode);
5723 break;
5724 default:
5725 gcc_unreachable ();
5726 }
5727 output_asm_insn (buf, operands);
5728 return "";
5729}
5730
5731/* Return the template of the TYPE_SSEMOV instruction to move
5732 operands[1] into operands[0]. */
5733
5734const char *
5735ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5736{
5737 machine_mode mode = GET_MODE (operands[0]);
5738 if (get_attr_type (insn) != TYPE_SSEMOV
5739 || mode != GET_MODE (operands[1]))
5740 gcc_unreachable ();
5741
5742 enum attr_mode insn_mode = get_attr_mode (insn);
5743
5744 switch (insn_mode)
5745 {
5746 case MODE_XI:
5747 case MODE_V8DF:
5748 case MODE_V16SF:
5749 return ix86_get_ssemov (operands, size: 64, insn_mode, mode);
5750
5751 case MODE_OI:
5752 case MODE_V4DF:
5753 case MODE_V8SF:
5754 return ix86_get_ssemov (operands, size: 32, insn_mode, mode);
5755
5756 case MODE_TI:
5757 case MODE_V2DF:
5758 case MODE_V4SF:
5759 return ix86_get_ssemov (operands, size: 16, insn_mode, mode);
5760
5761 case MODE_DI:
5762 /* Handle broken assemblers that require movd instead of movq. */
5763 if (GENERAL_REG_P (operands[0]))
5764 {
5765 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5766 return "%vmovq\t{%1, %q0|%q0, %1}";
5767 else
5768 return "%vmovd\t{%1, %q0|%q0, %1}";
5769 }
5770 else if (GENERAL_REG_P (operands[1]))
5771 {
5772 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5773 return "%vmovq\t{%q1, %0|%0, %q1}";
5774 else
5775 return "%vmovd\t{%q1, %0|%0, %q1}";
5776 }
5777 else
5778 return "%vmovq\t{%1, %0|%0, %1}";
5779
5780 case MODE_SI:
5781 if (GENERAL_REG_P (operands[0]))
5782 return "%vmovd\t{%1, %k0|%k0, %1}";
5783 else if (GENERAL_REG_P (operands[1]))
5784 return "%vmovd\t{%k1, %0|%0, %k1}";
5785 else
5786 return "%vmovd\t{%1, %0|%0, %1}";
5787
5788 case MODE_HI:
5789 if (GENERAL_REG_P (operands[0]))
5790 return "vmovw\t{%1, %k0|%k0, %1}";
5791 else if (GENERAL_REG_P (operands[1]))
5792 return "vmovw\t{%k1, %0|%0, %k1}";
5793 else
5794 return "vmovw\t{%1, %0|%0, %1}";
5795
5796 case MODE_DF:
5797 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5798 return "vmovsd\t{%d1, %0|%0, %d1}";
5799 else
5800 return "%vmovsd\t{%1, %0|%0, %1}";
5801
5802 case MODE_SF:
5803 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5804 return "vmovss\t{%d1, %0|%0, %d1}";
5805 else
5806 return "%vmovss\t{%1, %0|%0, %1}";
5807
5808 case MODE_HF:
5809 case MODE_BF:
5810 if (REG_P (operands[0]) && REG_P (operands[1]))
5811 return "vmovsh\t{%d1, %0|%0, %d1}";
5812 else
5813 return "vmovsh\t{%1, %0|%0, %1}";
5814
5815 case MODE_V1DF:
5816 gcc_assert (!TARGET_AVX);
5817 return "movlpd\t{%1, %0|%0, %1}";
5818
5819 case MODE_V2SF:
5820 if (TARGET_AVX && REG_P (operands[0]))
5821 return "vmovlps\t{%1, %d0|%d0, %1}";
5822 else
5823 return "%vmovlps\t{%1, %0|%0, %1}";
5824
5825 default:
5826 gcc_unreachable ();
5827 }
5828}
5829
5830/* Returns true if OP contains a symbol reference */
5831
5832bool
5833symbolic_reference_mentioned_p (rtx op)
5834{
5835 const char *fmt;
5836 int i;
5837
5838 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5839 return true;
5840
5841 fmt = GET_RTX_FORMAT (GET_CODE (op));
5842 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5843 {
5844 if (fmt[i] == 'E')
5845 {
5846 int j;
5847
5848 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5849 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5850 return true;
5851 }
5852
5853 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5854 return true;
5855 }
5856
5857 return false;
5858}
5859
5860/* Return true if it is appropriate to emit `ret' instructions in the
5861 body of a function. Do this only if the epilogue is simple, needing a
5862 couple of insns. Prior to reloading, we can't tell how many registers
5863 must be saved, so return false then. Return false if there is no frame
5864 marker to de-allocate. */
5865
5866bool
5867ix86_can_use_return_insn_p (void)
5868{
5869 if (ix86_function_ms_hook_prologue (fn: current_function_decl))
5870 return false;
5871
5872 if (ix86_function_naked (fn: current_function_decl))
5873 return false;
5874
5875 /* Don't use `ret' instruction in interrupt handler. */
5876 if (! reload_completed
5877 || frame_pointer_needed
5878 || cfun->machine->func_type != TYPE_NORMAL)
5879 return 0;
5880
5881 /* Don't allow more than 32k pop, since that's all we can do
5882 with one instruction. */
5883 if (crtl->args.pops_args && crtl->args.size >= 32768)
5884 return 0;
5885
5886 struct ix86_frame &frame = cfun->machine->frame;
5887 return (frame.stack_pointer_offset == UNITS_PER_WORD
5888 && (frame.nregs + frame.nsseregs) == 0);
5889}
5890
5891/* Return stack frame size. get_frame_size () returns used stack slots
5892 during compilation, which may be optimized out later. If stack frame
5893 is needed, stack_frame_required should be true. */
5894
5895static HOST_WIDE_INT
5896ix86_get_frame_size (void)
5897{
5898 if (cfun->machine->stack_frame_required)
5899 return get_frame_size ();
5900 else
5901 return 0;
5902}
5903
5904/* Value should be nonzero if functions must have frame pointers.
5905 Zero means the frame pointer need not be set up (and parms may
5906 be accessed via the stack pointer) in functions that seem suitable. */
5907
5908static bool
5909ix86_frame_pointer_required (void)
5910{
5911 /* If we accessed previous frames, then the generated code expects
5912 to be able to access the saved ebp value in our frame. */
5913 if (cfun->machine->accesses_prev_frame)
5914 return true;
5915
5916 /* Several x86 os'es need a frame pointer for other reasons,
5917 usually pertaining to setjmp. */
5918 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5919 return true;
5920
5921 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5922 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5923 return true;
5924
5925 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5926 allocation is 4GB. */
5927 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5928 return true;
5929
5930 /* SSE saves require frame-pointer when stack is misaligned. */
5931 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5932 return true;
5933
5934 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5935 turns off the frame pointer by default. Turn it back on now if
5936 we've not got a leaf function. */
5937 if (TARGET_OMIT_LEAF_FRAME_POINTER
5938 && (!crtl->is_leaf
5939 || ix86_current_function_calls_tls_descriptor))
5940 return true;
5941
5942 /* Several versions of mcount for the x86 assumes that there is a
5943 frame, so we cannot allow profiling without a frame pointer. */
5944 if (crtl->profile && !flag_fentry)
5945 return true;
5946
5947 return false;
5948}
5949
5950/* Record that the current function accesses previous call frames. */
5951
5952void
5953ix86_setup_frame_addresses (void)
5954{
5955 cfun->machine->accesses_prev_frame = 1;
5956}
5957
5958#ifndef USE_HIDDEN_LINKONCE
5959# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5960# define USE_HIDDEN_LINKONCE 1
5961# else
5962# define USE_HIDDEN_LINKONCE 0
5963# endif
5964#endif
5965
5966/* Label count for call and return thunks. It is used to make unique
5967 labels in call and return thunks. */
5968static int indirectlabelno;
5969
5970/* True if call thunk function is needed. */
5971static bool indirect_thunk_needed = false;
5972
5973/* Bit masks of integer registers, which contain branch target, used
5974 by call thunk functions. */
5975static HARD_REG_SET indirect_thunks_used;
5976
5977/* True if return thunk function is needed. */
5978static bool indirect_return_needed = false;
5979
5980/* True if return thunk function via CX is needed. */
5981static bool indirect_return_via_cx;
5982
5983#ifndef INDIRECT_LABEL
5984# define INDIRECT_LABEL "LIND"
5985#endif
5986
5987/* Indicate what prefix is needed for an indirect branch. */
5988enum indirect_thunk_prefix
5989{
5990 indirect_thunk_prefix_none,
5991 indirect_thunk_prefix_nt
5992};
5993
5994/* Return the prefix needed for an indirect branch INSN. */
5995
5996enum indirect_thunk_prefix
5997indirect_thunk_need_prefix (rtx_insn *insn)
5998{
5999 enum indirect_thunk_prefix need_prefix;
6000 if ((cfun->machine->indirect_branch_type
6001 == indirect_branch_thunk_extern)
6002 && ix86_notrack_prefixed_insn_p (insn))
6003 {
6004 /* NOTRACK prefix is only used with external thunk so that it
6005 can be properly updated to support CET at run-time. */
6006 need_prefix = indirect_thunk_prefix_nt;
6007 }
6008 else
6009 need_prefix = indirect_thunk_prefix_none;
6010 return need_prefix;
6011}
6012
6013/* Fills in the label name that should be used for the indirect thunk. */
6014
6015static void
6016indirect_thunk_name (char name[32], unsigned int regno,
6017 enum indirect_thunk_prefix need_prefix,
6018 bool ret_p)
6019{
6020 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6021 gcc_unreachable ();
6022
6023 if (USE_HIDDEN_LINKONCE)
6024 {
6025 const char *prefix;
6026
6027 if (need_prefix == indirect_thunk_prefix_nt
6028 && regno != INVALID_REGNUM)
6029 {
6030 /* NOTRACK prefix is only used with external thunk via
6031 register so that NOTRACK prefix can be added to indirect
6032 branch via register to support CET at run-time. */
6033 prefix = "_nt";
6034 }
6035 else
6036 prefix = "";
6037
6038 const char *ret = ret_p ? "return" : "indirect";
6039
6040 if (regno != INVALID_REGNUM)
6041 {
6042 const char *reg_prefix;
6043 if (LEGACY_INT_REGNO_P (regno))
6044 reg_prefix = TARGET_64BIT ? "r" : "e";
6045 else
6046 reg_prefix = "";
6047 sprintf (s: name, format: "__x86_%s_thunk%s_%s%s",
6048 ret, prefix, reg_prefix, reg_names[regno]);
6049 }
6050 else
6051 sprintf (s: name, format: "__x86_%s_thunk%s", ret, prefix);
6052 }
6053 else
6054 {
6055 if (regno != INVALID_REGNUM)
6056 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6057 else
6058 {
6059 if (ret_p)
6060 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6061 else
6062 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6063 }
6064 }
6065}
6066
6067/* Output a call and return thunk for indirect branch. If REGNO != -1,
6068 the function address is in REGNO and the call and return thunk looks like:
6069
6070 call L2
6071 L1:
6072 pause
6073 lfence
6074 jmp L1
6075 L2:
6076 mov %REG, (%sp)
6077 ret
6078
6079 Otherwise, the function address is on the top of stack and the
6080 call and return thunk looks like:
6081
6082 call L2
6083 L1:
6084 pause
6085 lfence
6086 jmp L1
6087 L2:
6088 lea WORD_SIZE(%sp), %sp
6089 ret
6090 */
6091
6092static void
6093output_indirect_thunk (unsigned int regno)
6094{
6095 char indirectlabel1[32];
6096 char indirectlabel2[32];
6097
6098 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6099 indirectlabelno++);
6100 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6101 indirectlabelno++);
6102
6103 /* Call */
6104 fputs (s: "\tcall\t", stream: asm_out_file);
6105 assemble_name_raw (asm_out_file, indirectlabel2);
6106 fputc (c: '\n', stream: asm_out_file);
6107
6108 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6109
6110 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6111 Usage of both pause + lfence is compromise solution. */
6112 fprintf (stream: asm_out_file, format: "\tpause\n\tlfence\n");
6113
6114 /* Jump. */
6115 fputs (s: "\tjmp\t", stream: asm_out_file);
6116 assemble_name_raw (asm_out_file, indirectlabel1);
6117 fputc (c: '\n', stream: asm_out_file);
6118
6119 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6120
6121 /* The above call insn pushed a word to stack. Adjust CFI info. */
6122 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6123 {
6124 if (! dwarf2out_do_cfi_asm ())
6125 {
6126 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6127 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6128 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6129 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6130 }
6131 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6132 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6133 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6134 vec_safe_push (cfun->fde->dw_fde_cfi, obj: xcfi);
6135 dwarf2out_emit_cfi (cfi: xcfi);
6136 }
6137
6138 if (regno != INVALID_REGNUM)
6139 {
6140 /* MOV. */
6141 rtx xops[2];
6142 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6143 xops[1] = gen_rtx_REG (word_mode, regno);
6144 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6145 }
6146 else
6147 {
6148 /* LEA. */
6149 rtx xops[2];
6150 xops[0] = stack_pointer_rtx;
6151 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6152 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6153 }
6154
6155 fputs (s: "\tret\n", stream: asm_out_file);
6156 if ((ix86_harden_sls & harden_sls_return))
6157 fputs (s: "\tint3\n", stream: asm_out_file);
6158}
6159
6160/* Output a funtion with a call and return thunk for indirect branch.
6161 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6162 Otherwise, the function address is on the top of stack. Thunk is
6163 used for function return if RET_P is true. */
6164
6165static void
6166output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6167 unsigned int regno, bool ret_p)
6168{
6169 char name[32];
6170 tree decl;
6171
6172 /* Create __x86_indirect_thunk. */
6173 indirect_thunk_name (name, regno, need_prefix, ret_p);
6174 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6175 get_identifier (name),
6176 build_function_type_list (void_type_node, NULL_TREE));
6177 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6178 NULL_TREE, void_type_node);
6179 TREE_PUBLIC (decl) = 1;
6180 TREE_STATIC (decl) = 1;
6181 DECL_IGNORED_P (decl) = 1;
6182
6183#if TARGET_MACHO
6184 if (TARGET_MACHO)
6185 {
6186 switch_to_section (darwin_sections[picbase_thunk_section]);
6187 fputs ("\t.weak_definition\t", asm_out_file);
6188 assemble_name (asm_out_file, name);
6189 fputs ("\n\t.private_extern\t", asm_out_file);
6190 assemble_name (asm_out_file, name);
6191 putc ('\n', asm_out_file);
6192 ASM_OUTPUT_LABEL (asm_out_file, name);
6193 DECL_WEAK (decl) = 1;
6194 }
6195 else
6196#endif
6197 if (USE_HIDDEN_LINKONCE)
6198 {
6199 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6200
6201 targetm.asm_out.unique_section (decl, 0);
6202 switch_to_section (get_named_section (decl, NULL, 0));
6203
6204 targetm.asm_out.globalize_label (asm_out_file, name);
6205 fputs (s: "\t.hidden\t", stream: asm_out_file);
6206 assemble_name (asm_out_file, name);
6207 putc (c: '\n', stream: asm_out_file);
6208 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6209 }
6210 else
6211 {
6212 switch_to_section (text_section);
6213 ASM_OUTPUT_LABEL (asm_out_file, name);
6214 }
6215
6216 DECL_INITIAL (decl) = make_node (BLOCK);
6217 current_function_decl = decl;
6218 allocate_struct_function (decl, false);
6219 init_function_start (decl);
6220 /* We're about to hide the function body from callees of final_* by
6221 emitting it directly; tell them we're a thunk, if they care. */
6222 cfun->is_thunk = true;
6223 first_function_block_is_cold = false;
6224 /* Make sure unwind info is emitted for the thunk if needed. */
6225 final_start_function (emit_barrier (), asm_out_file, 1);
6226
6227 output_indirect_thunk (regno);
6228
6229 final_end_function ();
6230 init_insn_lengths ();
6231 free_after_compilation (cfun);
6232 set_cfun (NULL);
6233 current_function_decl = NULL;
6234}
6235
6236static int pic_labels_used;
6237
6238/* Fills in the label name that should be used for a pc thunk for
6239 the given register. */
6240
6241static void
6242get_pc_thunk_name (char name[32], unsigned int regno)
6243{
6244 gcc_assert (!TARGET_64BIT);
6245
6246 if (USE_HIDDEN_LINKONCE)
6247 sprintf (s: name, format: "__x86.get_pc_thunk.%s", reg_names[regno]);
6248 else
6249 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6250}
6251
6252
6253/* This function generates code for -fpic that loads %ebx with
6254 the return address of the caller and then returns. */
6255
6256static void
6257ix86_code_end (void)
6258{
6259 rtx xops[2];
6260 unsigned int regno;
6261
6262 if (indirect_return_needed)
6263 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6264 INVALID_REGNUM, ret_p: true);
6265 if (indirect_return_via_cx)
6266 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6267 CX_REG, ret_p: true);
6268 if (indirect_thunk_needed)
6269 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6270 INVALID_REGNUM, ret_p: false);
6271
6272 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6273 {
6274 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6275 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6276 regno, ret_p: false);
6277 }
6278
6279 for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6280 {
6281 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6282 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6283 regno, ret_p: false);
6284 }
6285
6286 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6287 {
6288 char name[32];
6289 tree decl;
6290
6291 if (TEST_HARD_REG_BIT (set: indirect_thunks_used, bit: regno))
6292 output_indirect_thunk_function (need_prefix: indirect_thunk_prefix_none,
6293 regno, ret_p: false);
6294
6295 if (!(pic_labels_used & (1 << regno)))
6296 continue;
6297
6298 get_pc_thunk_name (name, regno);
6299
6300 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6301 get_identifier (name),
6302 build_function_type_list (void_type_node, NULL_TREE));
6303 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6304 NULL_TREE, void_type_node);
6305 TREE_PUBLIC (decl) = 1;
6306 TREE_STATIC (decl) = 1;
6307 DECL_IGNORED_P (decl) = 1;
6308
6309#if TARGET_MACHO
6310 if (TARGET_MACHO)
6311 {
6312 switch_to_section (darwin_sections[picbase_thunk_section]);
6313 fputs ("\t.weak_definition\t", asm_out_file);
6314 assemble_name (asm_out_file, name);
6315 fputs ("\n\t.private_extern\t", asm_out_file);
6316 assemble_name (asm_out_file, name);
6317 putc ('\n', asm_out_file);
6318 ASM_OUTPUT_LABEL (asm_out_file, name);
6319 DECL_WEAK (decl) = 1;
6320 }
6321 else
6322#endif
6323 if (USE_HIDDEN_LINKONCE)
6324 {
6325 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6326
6327 targetm.asm_out.unique_section (decl, 0);
6328 switch_to_section (get_named_section (decl, NULL, 0));
6329
6330 targetm.asm_out.globalize_label (asm_out_file, name);
6331 fputs (s: "\t.hidden\t", stream: asm_out_file);
6332 assemble_name (asm_out_file, name);
6333 putc (c: '\n', stream: asm_out_file);
6334 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6335 }
6336 else
6337 {
6338 switch_to_section (text_section);
6339 ASM_OUTPUT_LABEL (asm_out_file, name);
6340 }
6341
6342 DECL_INITIAL (decl) = make_node (BLOCK);
6343 current_function_decl = decl;
6344 allocate_struct_function (decl, false);
6345 init_function_start (decl);
6346 /* We're about to hide the function body from callees of final_* by
6347 emitting it directly; tell them we're a thunk, if they care. */
6348 cfun->is_thunk = true;
6349 first_function_block_is_cold = false;
6350 /* Make sure unwind info is emitted for the thunk if needed. */
6351 final_start_function (emit_barrier (), asm_out_file, 1);
6352
6353 /* Pad stack IP move with 4 instructions (two NOPs count
6354 as one instruction). */
6355 if (TARGET_PAD_SHORT_FUNCTION)
6356 {
6357 int i = 8;
6358
6359 while (i--)
6360 fputs (s: "\tnop\n", stream: asm_out_file);
6361 }
6362
6363 xops[0] = gen_rtx_REG (Pmode, regno);
6364 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6365 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6366 fputs (s: "\tret\n", stream: asm_out_file);
6367 final_end_function ();
6368 init_insn_lengths ();
6369 free_after_compilation (cfun);
6370 set_cfun (NULL);
6371 current_function_decl = NULL;
6372 }
6373
6374 if (flag_split_stack)
6375 file_end_indicate_split_stack ();
6376}
6377
6378/* Emit code for the SET_GOT patterns. */
6379
6380const char *
6381output_set_got (rtx dest, rtx label)
6382{
6383 rtx xops[3];
6384
6385 xops[0] = dest;
6386
6387 if (TARGET_VXWORKS_RTP && flag_pic)
6388 {
6389 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6390 xops[2] = gen_rtx_MEM (Pmode,
6391 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6392 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6393
6394 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6395 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6396 an unadorned address. */
6397 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6398 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6399 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6400 return "";
6401 }
6402
6403 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6404
6405 if (flag_pic)
6406 {
6407 char name[32];
6408 get_pc_thunk_name (name, REGNO (dest));
6409 pic_labels_used |= 1 << REGNO (dest);
6410
6411 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6412 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6413 output_asm_insn ("%!call\t%X2", xops);
6414
6415#if TARGET_MACHO
6416 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6417 This is what will be referenced by the Mach-O PIC subsystem. */
6418 if (machopic_should_output_picbase_label () || !label)
6419 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6420
6421 /* When we are restoring the pic base at the site of a nonlocal label,
6422 and we decided to emit the pic base above, we will still output a
6423 local label used for calculating the correction offset (even though
6424 the offset will be 0 in that case). */
6425 if (label)
6426 targetm.asm_out.internal_label (asm_out_file, "L",
6427 CODE_LABEL_NUMBER (label));
6428#endif
6429 }
6430 else
6431 {
6432 if (TARGET_MACHO)
6433 /* We don't need a pic base, we're not producing pic. */
6434 gcc_unreachable ();
6435
6436 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6437 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6438 targetm.asm_out.internal_label (asm_out_file, "L",
6439 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6440 }
6441
6442 if (!TARGET_MACHO)
6443 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6444
6445 return "";
6446}
6447
6448/* Generate an "push" pattern for input ARG. */
6449
6450rtx
6451gen_push (rtx arg)
6452{
6453 struct machine_function *m = cfun->machine;
6454
6455 if (m->fs.cfa_reg == stack_pointer_rtx)
6456 m->fs.cfa_offset += UNITS_PER_WORD;
6457 m->fs.sp_offset += UNITS_PER_WORD;
6458
6459 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6460 arg = gen_rtx_REG (word_mode, REGNO (arg));
6461
6462 return gen_rtx_SET (gen_rtx_MEM (word_mode,
6463 gen_rtx_PRE_DEC (Pmode,
6464 stack_pointer_rtx)),
6465 arg);
6466}
6467
6468/* Generate an "pop" pattern for input ARG. */
6469
6470rtx
6471gen_pop (rtx arg)
6472{
6473 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6474 arg = gen_rtx_REG (word_mode, REGNO (arg));
6475
6476 return gen_rtx_SET (arg,
6477 gen_rtx_MEM (word_mode,
6478 gen_rtx_POST_INC (Pmode,
6479 stack_pointer_rtx)));
6480}
6481
6482/* Generate a "push2" pattern for input ARG. */
6483rtx
6484gen_push2 (rtx mem, rtx reg1, rtx reg2)
6485{
6486 struct machine_function *m = cfun->machine;
6487 const int offset = UNITS_PER_WORD * 2;
6488
6489 if (m->fs.cfa_reg == stack_pointer_rtx)
6490 m->fs.cfa_offset += offset;
6491 m->fs.sp_offset += offset;
6492
6493 if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6494 reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6495
6496 if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6497 reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6498
6499 return gen_push2_di (mem, reg1, reg2);
6500}
6501
6502/* Return >= 0 if there is an unused call-clobbered register available
6503 for the entire function. */
6504
6505static unsigned int
6506ix86_select_alt_pic_regnum (void)
6507{
6508 if (ix86_use_pseudo_pic_reg ())
6509 return INVALID_REGNUM;
6510
6511 if (crtl->is_leaf
6512 && !crtl->profile
6513 && !ix86_current_function_calls_tls_descriptor)
6514 {
6515 int i, drap;
6516 /* Can't use the same register for both PIC and DRAP. */
6517 if (crtl->drap_reg)
6518 drap = REGNO (crtl->drap_reg);
6519 else
6520 drap = -1;
6521 for (i = 2; i >= 0; --i)
6522 if (i != drap && !df_regs_ever_live_p (i))
6523 return i;
6524 }
6525
6526 return INVALID_REGNUM;
6527}
6528
6529/* Return true if REGNO is used by the epilogue. */
6530
6531bool
6532ix86_epilogue_uses (int regno)
6533{
6534 /* If there are no caller-saved registers, we preserve all registers,
6535 except for MMX and x87 registers which aren't supported when saving
6536 and restoring registers. Don't explicitly save SP register since
6537 it is always preserved. */
6538 return (epilogue_completed
6539 && cfun->machine->no_caller_saved_registers
6540 && !fixed_regs[regno]
6541 && !STACK_REGNO_P (regno)
6542 && !MMX_REGNO_P (regno));
6543}
6544
6545/* Return nonzero if register REGNO can be used as a scratch register
6546 in peephole2. */
6547
6548static bool
6549ix86_hard_regno_scratch_ok (unsigned int regno)
6550{
6551 /* If there are no caller-saved registers, we can't use any register
6552 as a scratch register after epilogue and use REGNO as scratch
6553 register only if it has been used before to avoid saving and
6554 restoring it. */
6555 return (!cfun->machine->no_caller_saved_registers
6556 || (!epilogue_completed
6557 && df_regs_ever_live_p (regno)));
6558}
6559
6560/* Return TRUE if we need to save REGNO. */
6561
6562bool
6563ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6564{
6565 /* If there are no caller-saved registers, we preserve all registers,
6566 except for MMX and x87 registers which aren't supported when saving
6567 and restoring registers. Don't explicitly save SP register since
6568 it is always preserved. */
6569 if (cfun->machine->no_caller_saved_registers)
6570 {
6571 /* Don't preserve registers used for function return value. */
6572 rtx reg = crtl->return_rtx;
6573 if (reg)
6574 {
6575 unsigned int i = REGNO (reg);
6576 unsigned int nregs = REG_NREGS (reg);
6577 while (nregs-- > 0)
6578 if ((i + nregs) == regno)
6579 return false;
6580 }
6581
6582 return (df_regs_ever_live_p (regno)
6583 && !fixed_regs[regno]
6584 && !STACK_REGNO_P (regno)
6585 && !MMX_REGNO_P (regno)
6586 && (regno != HARD_FRAME_POINTER_REGNUM
6587 || !frame_pointer_needed));
6588 }
6589
6590 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6591 && pic_offset_table_rtx)
6592 {
6593 if (ix86_use_pseudo_pic_reg ())
6594 {
6595 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6596 _mcount in prologue. */
6597 if (!TARGET_64BIT && flag_pic && crtl->profile)
6598 return true;
6599 }
6600 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6601 || crtl->profile
6602 || crtl->calls_eh_return
6603 || crtl->uses_const_pool
6604 || cfun->has_nonlocal_label)
6605 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6606 }
6607
6608 if (crtl->calls_eh_return && maybe_eh_return)
6609 {
6610 unsigned i;
6611 for (i = 0; ; i++)
6612 {
6613 unsigned test = EH_RETURN_DATA_REGNO (i);
6614 if (test == INVALID_REGNUM)
6615 break;
6616 if (test == regno)
6617 return true;
6618 }
6619 }
6620
6621 if (ignore_outlined && cfun->machine->call_ms2sysv)
6622 {
6623 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6624 + xlogue_layout::MIN_REGS;
6625 if (xlogue_layout::is_stub_managed_reg (regno, count))
6626 return false;
6627 }
6628
6629 if (crtl->drap_reg
6630 && regno == REGNO (crtl->drap_reg)
6631 && !cfun->machine->no_drap_save_restore)
6632 return true;
6633
6634 return (df_regs_ever_live_p (regno)
6635 && !call_used_or_fixed_reg_p (regno)
6636 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6637}
6638
6639/* Return number of saved general prupose registers. */
6640
6641static int
6642ix86_nsaved_regs (void)
6643{
6644 int nregs = 0;
6645 int regno;
6646
6647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6648 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6649 nregs ++;
6650 return nregs;
6651}
6652
6653/* Return number of saved SSE registers. */
6654
6655static int
6656ix86_nsaved_sseregs (void)
6657{
6658 int nregs = 0;
6659 int regno;
6660
6661 if (!TARGET_64BIT_MS_ABI)
6662 return 0;
6663 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6664 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
6665 nregs ++;
6666 return nregs;
6667}
6668
6669/* Given FROM and TO register numbers, say whether this elimination is
6670 allowed. If stack alignment is needed, we can only replace argument
6671 pointer with hard frame pointer, or replace frame pointer with stack
6672 pointer. Otherwise, frame pointer elimination is automatically
6673 handled and all other eliminations are valid. */
6674
6675static bool
6676ix86_can_eliminate (const int from, const int to)
6677{
6678 if (stack_realign_fp)
6679 return ((from == ARG_POINTER_REGNUM
6680 && to == HARD_FRAME_POINTER_REGNUM)
6681 || (from == FRAME_POINTER_REGNUM
6682 && to == STACK_POINTER_REGNUM));
6683 else
6684 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6685}
6686
6687/* Return the offset between two registers, one to be eliminated, and the other
6688 its replacement, at the start of a routine. */
6689
6690HOST_WIDE_INT
6691ix86_initial_elimination_offset (int from, int to)
6692{
6693 struct ix86_frame &frame = cfun->machine->frame;
6694
6695 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6696 return frame.hard_frame_pointer_offset;
6697 else if (from == FRAME_POINTER_REGNUM
6698 && to == HARD_FRAME_POINTER_REGNUM)
6699 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6700 else
6701 {
6702 gcc_assert (to == STACK_POINTER_REGNUM);
6703
6704 if (from == ARG_POINTER_REGNUM)
6705 return frame.stack_pointer_offset;
6706
6707 gcc_assert (from == FRAME_POINTER_REGNUM);
6708 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6709 }
6710}
6711
6712/* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6713void
6714warn_once_call_ms2sysv_xlogues (const char *feature)
6715{
6716 static bool warned_once = false;
6717 if (!warned_once)
6718 {
6719 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6720 feature);
6721 warned_once = true;
6722 }
6723}
6724
6725/* Return the probing interval for -fstack-clash-protection. */
6726
6727static HOST_WIDE_INT
6728get_probe_interval (void)
6729{
6730 if (flag_stack_clash_protection)
6731 return (HOST_WIDE_INT_1U
6732 << param_stack_clash_protection_probe_interval);
6733 else
6734 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6735}
6736
6737/* When using -fsplit-stack, the allocation routines set a field in
6738 the TCB to the bottom of the stack plus this much space, measured
6739 in bytes. */
6740
6741#define SPLIT_STACK_AVAILABLE 256
6742
6743/* Helper function to determine whether push2/pop2 can be used in prologue or
6744 epilogue for register save/restore. */
6745static bool
6746ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6747{
6748 int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6749 return TARGET_APX_PUSH2POP2
6750 && !cfun->machine->frame.save_regs_using_mov
6751 && cfun->machine->func_type == TYPE_NORMAL
6752 && (nregs + aligned) >= 3;
6753}
6754
6755/* Fill structure ix86_frame about frame of currently computed function. */
6756
6757static void
6758ix86_compute_frame_layout (void)
6759{
6760 struct ix86_frame *frame = &cfun->machine->frame;
6761 struct machine_function *m = cfun->machine;
6762 unsigned HOST_WIDE_INT stack_alignment_needed;
6763 HOST_WIDE_INT offset;
6764 unsigned HOST_WIDE_INT preferred_alignment;
6765 HOST_WIDE_INT size = ix86_get_frame_size ();
6766 HOST_WIDE_INT to_allocate;
6767
6768 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6769 * ms_abi functions that call a sysv function. We now need to prune away
6770 * cases where it should be disabled. */
6771 if (TARGET_64BIT && m->call_ms2sysv)
6772 {
6773 gcc_assert (TARGET_64BIT_MS_ABI);
6774 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6775 gcc_assert (!TARGET_SEH);
6776 gcc_assert (TARGET_SSE);
6777 gcc_assert (!ix86_using_red_zone ());
6778
6779 if (crtl->calls_eh_return)
6780 {
6781 gcc_assert (!reload_completed);
6782 m->call_ms2sysv = false;
6783 warn_once_call_ms2sysv_xlogues (feature: "__builtin_eh_return");
6784 }
6785
6786 else if (ix86_static_chain_on_stack)
6787 {
6788 gcc_assert (!reload_completed);
6789 m->call_ms2sysv = false;
6790 warn_once_call_ms2sysv_xlogues (feature: "static call chains");
6791 }
6792
6793 /* Finally, compute which registers the stub will manage. */
6794 else
6795 {
6796 unsigned count = xlogue_layout::count_stub_managed_regs ();
6797 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6798 m->call_ms2sysv_pad_in = 0;
6799 }
6800 }
6801
6802 frame->nregs = ix86_nsaved_regs ();
6803 frame->nsseregs = ix86_nsaved_sseregs ();
6804
6805 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6806 except for function prologues, leaf functions and when the defult
6807 incoming stack boundary is overriden at command line or via
6808 force_align_arg_pointer attribute.
6809
6810 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6811 at call sites, including profile function calls.
6812
6813 For APX push2/pop2, the stack also requires 128b alignment. */
6814 if ((ix86_pro_and_epilogue_can_use_push2pop2 (nregs: frame->nregs)
6815 && crtl->preferred_stack_boundary < 128)
6816 || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6817 && crtl->preferred_stack_boundary < 128)
6818 && (!crtl->is_leaf || cfun->calls_alloca != 0
6819 || ix86_current_function_calls_tls_descriptor
6820 || (TARGET_MACHO && crtl->profile)
6821 || ix86_incoming_stack_boundary < 128)))
6822 {
6823 crtl->preferred_stack_boundary = 128;
6824 if (crtl->stack_alignment_needed < 128)
6825 crtl->stack_alignment_needed = 128;
6826 }
6827
6828 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6829 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6830
6831 gcc_assert (!size || stack_alignment_needed);
6832 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6833 gcc_assert (preferred_alignment <= stack_alignment_needed);
6834
6835 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6836 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6837 if (TARGET_64BIT && m->call_ms2sysv)
6838 {
6839 gcc_assert (stack_alignment_needed >= 16);
6840 gcc_assert (!frame->nsseregs);
6841 }
6842
6843 /* For SEH we have to limit the amount of code movement into the prologue.
6844 At present we do this via a BLOCKAGE, at which point there's very little
6845 scheduling that can be done, which means that there's very little point
6846 in doing anything except PUSHs. */
6847 if (TARGET_SEH)
6848 m->use_fast_prologue_epilogue = false;
6849 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6850 {
6851 int count = frame->nregs;
6852 struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
6853
6854 /* The fast prologue uses move instead of push to save registers. This
6855 is significantly longer, but also executes faster as modern hardware
6856 can execute the moves in parallel, but can't do that for push/pop.
6857
6858 Be careful about choosing what prologue to emit: When function takes
6859 many instructions to execute we may use slow version as well as in
6860 case function is known to be outside hot spot (this is known with
6861 feedback only). Weight the size of function by number of registers
6862 to save as it is cheap to use one or two push instructions but very
6863 slow to use many of them.
6864
6865 Calling this hook multiple times with the same frame requirements
6866 must produce the same layout, since the RA might otherwise be
6867 unable to reach a fixed point or might fail its final sanity checks.
6868 This means that once we've assumed that a function does or doesn't
6869 have a particular size, we have to stick to that assumption
6870 regardless of how the function has changed since. */
6871 if (count)
6872 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6873 if (node->frequency < NODE_FREQUENCY_NORMAL
6874 || (flag_branch_probabilities
6875 && node->frequency < NODE_FREQUENCY_HOT))
6876 m->use_fast_prologue_epilogue = false;
6877 else
6878 {
6879 if (count != frame->expensive_count)
6880 {
6881 frame->expensive_count = count;
6882 frame->expensive_p = expensive_function_p (count);
6883 }
6884 m->use_fast_prologue_epilogue = !frame->expensive_p;
6885 }
6886 }
6887
6888 frame->save_regs_using_mov
6889 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6890
6891 /* Skip return address and error code in exception handler. */
6892 offset = INCOMING_FRAME_SP_OFFSET;
6893
6894 /* Skip pushed static chain. */
6895 if (ix86_static_chain_on_stack)
6896 offset += UNITS_PER_WORD;
6897
6898 /* Skip saved base pointer. */
6899 if (frame_pointer_needed)
6900 offset += UNITS_PER_WORD;
6901 frame->hfp_save_offset = offset;
6902
6903 /* The traditional frame pointer location is at the top of the frame. */
6904 frame->hard_frame_pointer_offset = offset;
6905
6906 /* Register save area */
6907 offset += frame->nregs * UNITS_PER_WORD;
6908 frame->reg_save_offset = offset;
6909
6910 /* Calculate the size of the va-arg area (not including padding, if any). */
6911 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6912
6913 /* Also adjust stack_realign_offset for the largest alignment of
6914 stack slot actually used. */
6915 if (stack_realign_fp
6916 || (cfun->machine->max_used_stack_alignment != 0
6917 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6918 {
6919 /* We may need a 16-byte aligned stack for the remainder of the
6920 register save area, but the stack frame for the local function
6921 may require a greater alignment if using AVX/2/512. In order
6922 to avoid wasting space, we first calculate the space needed for
6923 the rest of the register saves, add that to the stack pointer,
6924 and then realign the stack to the boundary of the start of the
6925 frame for the local function. */
6926 HOST_WIDE_INT space_needed = 0;
6927 HOST_WIDE_INT sse_reg_space_needed = 0;
6928
6929 if (TARGET_64BIT)
6930 {
6931 if (m->call_ms2sysv)
6932 {
6933 m->call_ms2sysv_pad_in = 0;
6934 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6935 }
6936
6937 else if (frame->nsseregs)
6938 /* The only ABI that has saved SSE registers (Win64) also has a
6939 16-byte aligned default stack. However, many programs violate
6940 the ABI, and Wine64 forces stack realignment to compensate. */
6941 space_needed = frame->nsseregs * 16;
6942
6943 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6944
6945 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6946 rounding to be pedantic. */
6947 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6948 }
6949 else
6950 space_needed = frame->va_arg_size;
6951
6952 /* Record the allocation size required prior to the realignment AND. */
6953 frame->stack_realign_allocate = space_needed;
6954
6955 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6956 before this point are not directly comparable with values below
6957 this point. Use sp_valid_at to determine if the stack pointer is
6958 valid for a given offset, fp_valid_at for the frame pointer, or
6959 choose_baseaddr to have a base register chosen for you.
6960
6961 Note that the result of (frame->stack_realign_offset
6962 & (stack_alignment_needed - 1)) may not equal zero. */
6963 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6964 frame->stack_realign_offset = offset - space_needed;
6965 frame->sse_reg_save_offset = frame->stack_realign_offset
6966 + sse_reg_space_needed;
6967 }
6968 else
6969 {
6970 frame->stack_realign_offset = offset;
6971
6972 if (TARGET_64BIT && m->call_ms2sysv)
6973 {
6974 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6975 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6976 }
6977
6978 /* Align and set SSE register save area. */
6979 else if (frame->nsseregs)
6980 {
6981 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6982 required and the DRAP re-alignment boundary is at least 16 bytes,
6983 then we want the SSE register save area properly aligned. */
6984 if (ix86_incoming_stack_boundary >= 128
6985 || (stack_realign_drap && stack_alignment_needed >= 16))
6986 offset = ROUND_UP (offset, 16);
6987 offset += frame->nsseregs * 16;
6988 }
6989 frame->sse_reg_save_offset = offset;
6990 offset += frame->va_arg_size;
6991 }
6992
6993 /* Align start of frame for local function. When a function call
6994 is removed, it may become a leaf function. But if argument may
6995 be passed on stack, we need to align the stack when there is no
6996 tail call. */
6997 if (m->call_ms2sysv
6998 || frame->va_arg_size != 0
6999 || size != 0
7000 || !crtl->is_leaf
7001 || (!crtl->tail_call_emit
7002 && cfun->machine->outgoing_args_on_stack)
7003 || cfun->calls_alloca
7004 || ix86_current_function_calls_tls_descriptor)
7005 offset = ROUND_UP (offset, stack_alignment_needed);
7006
7007 /* Frame pointer points here. */
7008 frame->frame_pointer_offset = offset;
7009
7010 offset += size;
7011
7012 /* Add outgoing arguments area. Can be skipped if we eliminated
7013 all the function calls as dead code.
7014 Skipping is however impossible when function calls alloca. Alloca
7015 expander assumes that last crtl->outgoing_args_size
7016 of stack frame are unused. */
7017 if (ACCUMULATE_OUTGOING_ARGS
7018 && (!crtl->is_leaf || cfun->calls_alloca
7019 || ix86_current_function_calls_tls_descriptor))
7020 {
7021 offset += crtl->outgoing_args_size;
7022 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7023 }
7024 else
7025 frame->outgoing_arguments_size = 0;
7026
7027 /* Align stack boundary. Only needed if we're calling another function
7028 or using alloca. */
7029 if (!crtl->is_leaf || cfun->calls_alloca
7030 || ix86_current_function_calls_tls_descriptor)
7031 offset = ROUND_UP (offset, preferred_alignment);
7032
7033 /* We've reached end of stack frame. */
7034 frame->stack_pointer_offset = offset;
7035
7036 /* Size prologue needs to allocate. */
7037 to_allocate = offset - frame->sse_reg_save_offset;
7038
7039 if ((!to_allocate && frame->nregs <= 1)
7040 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7041 /* If static stack checking is enabled and done with probes,
7042 the registers need to be saved before allocating the frame. */
7043 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7044 /* If stack clash probing needs a loop, then it needs a
7045 scratch register. But the returned register is only guaranteed
7046 to be safe to use after register saves are complete. So if
7047 stack clash protections are enabled and the allocated frame is
7048 larger than the probe interval, then use pushes to save
7049 callee saved registers. */
7050 || (flag_stack_clash_protection
7051 && !ix86_target_stack_probe ()
7052 && to_allocate > get_probe_interval ()))
7053 frame->save_regs_using_mov = false;
7054
7055 if (ix86_using_red_zone ()
7056 && crtl->sp_is_unchanging
7057 && crtl->is_leaf
7058 && !ix86_pc_thunk_call_expanded
7059 && !ix86_current_function_calls_tls_descriptor)
7060 {
7061 frame->red_zone_size = to_allocate;
7062 if (frame->save_regs_using_mov)
7063 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7064 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7065 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7066 }
7067 else
7068 frame->red_zone_size = 0;
7069 frame->stack_pointer_offset -= frame->red_zone_size;
7070
7071 /* The SEH frame pointer location is near the bottom of the frame.
7072 This is enforced by the fact that the difference between the
7073 stack pointer and the frame pointer is limited to 240 bytes in
7074 the unwind data structure. */
7075 if (TARGET_SEH)
7076 {
7077 /* Force the frame pointer to point at or below the lowest register save
7078 area, see the SEH code in config/i386/winnt.cc for the rationale. */
7079 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7080
7081 /* If we can leave the frame pointer where it is, do so; however return
7082 the establisher frame for __builtin_frame_address (0) or else if the
7083 frame overflows the SEH maximum frame size.
7084
7085 Note that the value returned by __builtin_frame_address (0) is quite
7086 constrained, because setjmp is piggybacked on the SEH machinery with
7087 recent versions of MinGW:
7088
7089 # elif defined(__SEH__)
7090 # if defined(__aarch64__) || defined(_ARM64_)
7091 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7092 # elif (__MINGW_GCC_VERSION < 40702)
7093 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7094 # else
7095 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7096 # endif
7097
7098 and the second argument passed to _setjmp, if not null, is forwarded
7099 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7100 built an ExceptionRecord on the fly describing the setjmp buffer). */
7101 const HOST_WIDE_INT diff
7102 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7103 if (diff <= 255 && !crtl->accesses_prior_frames)
7104 {
7105 /* The resulting diff will be a multiple of 16 lower than 255,
7106 i.e. at most 240 as required by the unwind data structure. */
7107 frame->hard_frame_pointer_offset += (diff & 15);
7108 }
7109 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7110 {
7111 /* Ideally we'd determine what portion of the local stack frame
7112 (within the constraint of the lowest 240) is most heavily used.
7113 But without that complication, simply bias the frame pointer
7114 by 128 bytes so as to maximize the amount of the local stack
7115 frame that is addressable with 8-bit offsets. */
7116 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7117 }
7118 else
7119 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7120 }
7121}
7122
7123/* This is semi-inlined memory_address_length, but simplified
7124 since we know that we're always dealing with reg+offset, and
7125 to avoid having to create and discard all that rtl. */
7126
7127static inline int
7128choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7129{
7130 int len = 4;
7131
7132 if (offset == 0)
7133 {
7134 /* EBP and R13 cannot be encoded without an offset. */
7135 len = (regno == BP_REG || regno == R13_REG);
7136 }
7137 else if (IN_RANGE (offset, -128, 127))
7138 len = 1;
7139
7140 /* ESP and R12 must be encoded with a SIB byte. */
7141 if (regno == SP_REG || regno == R12_REG)
7142 len++;
7143
7144 return len;
7145}
7146
7147/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7148 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7149
7150static bool
7151sp_valid_at (HOST_WIDE_INT cfa_offset)
7152{
7153 const struct machine_frame_state &fs = cfun->machine->fs;
7154 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7155 {
7156 /* Validate that the cfa_offset isn't in a "no-man's land". */
7157 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7158 return false;
7159 }
7160 return fs.sp_valid;
7161}
7162
7163/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7164 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7165
7166static inline bool
7167fp_valid_at (HOST_WIDE_INT cfa_offset)
7168{
7169 const struct machine_frame_state &fs = cfun->machine->fs;
7170 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7171 {
7172 /* Validate that the cfa_offset isn't in a "no-man's land". */
7173 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7174 return false;
7175 }
7176 return fs.fp_valid;
7177}
7178
7179/* Choose a base register based upon alignment requested, speed and/or
7180 size. */
7181
7182static void
7183choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7184 HOST_WIDE_INT &base_offset,
7185 unsigned int align_reqested, unsigned int *align)
7186{
7187 const struct machine_function *m = cfun->machine;
7188 unsigned int hfp_align;
7189 unsigned int drap_align;
7190 unsigned int sp_align;
7191 bool hfp_ok = fp_valid_at (cfa_offset);
7192 bool drap_ok = m->fs.drap_valid;
7193 bool sp_ok = sp_valid_at (cfa_offset);
7194
7195 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7196
7197 /* Filter out any registers that don't meet the requested alignment
7198 criteria. */
7199 if (align_reqested)
7200 {
7201 if (m->fs.realigned)
7202 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7203 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7204 notes (which we would need to use a realigned stack pointer),
7205 so disable on SEH targets. */
7206 else if (m->fs.sp_realigned)
7207 sp_align = crtl->stack_alignment_needed;
7208
7209 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7210 drap_ok = drap_ok && drap_align >= align_reqested;
7211 sp_ok = sp_ok && sp_align >= align_reqested;
7212 }
7213
7214 if (m->use_fast_prologue_epilogue)
7215 {
7216 /* Choose the base register most likely to allow the most scheduling
7217 opportunities. Generally FP is valid throughout the function,
7218 while DRAP must be reloaded within the epilogue. But choose either
7219 over the SP due to increased encoding size. */
7220
7221 if (hfp_ok)
7222 {
7223 base_reg = hard_frame_pointer_rtx;
7224 base_offset = m->fs.fp_offset - cfa_offset;
7225 }
7226 else if (drap_ok)
7227 {
7228 base_reg = crtl->drap_reg;
7229 base_offset = 0 - cfa_offset;
7230 }
7231 else if (sp_ok)
7232 {
7233 base_reg = stack_pointer_rtx;
7234 base_offset = m->fs.sp_offset - cfa_offset;
7235 }
7236 }
7237 else
7238 {
7239 HOST_WIDE_INT toffset;
7240 int len = 16, tlen;
7241
7242 /* Choose the base register with the smallest address encoding.
7243 With a tie, choose FP > DRAP > SP. */
7244 if (sp_ok)
7245 {
7246 base_reg = stack_pointer_rtx;
7247 base_offset = m->fs.sp_offset - cfa_offset;
7248 len = choose_baseaddr_len (STACK_POINTER_REGNUM, offset: base_offset);
7249 }
7250 if (drap_ok)
7251 {
7252 toffset = 0 - cfa_offset;
7253 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), offset: toffset);
7254 if (tlen <= len)
7255 {
7256 base_reg = crtl->drap_reg;
7257 base_offset = toffset;
7258 len = tlen;
7259 }
7260 }
7261 if (hfp_ok)
7262 {
7263 toffset = m->fs.fp_offset - cfa_offset;
7264 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, offset: toffset);
7265 if (tlen <= len)
7266 {
7267 base_reg = hard_frame_pointer_rtx;
7268 base_offset = toffset;
7269 }
7270 }
7271 }
7272
7273 /* Set the align return value. */
7274 if (align)
7275 {
7276 if (base_reg == stack_pointer_rtx)
7277 *align = sp_align;
7278 else if (base_reg == crtl->drap_reg)
7279 *align = drap_align;
7280 else if (base_reg == hard_frame_pointer_rtx)
7281 *align = hfp_align;
7282 }
7283}
7284
7285/* Return an RTX that points to CFA_OFFSET within the stack frame and
7286 the alignment of address. If ALIGN is non-null, it should point to
7287 an alignment value (in bits) that is preferred or zero and will
7288 recieve the alignment of the base register that was selected,
7289 irrespective of rather or not CFA_OFFSET is a multiple of that
7290 alignment value. If it is possible for the base register offset to be
7291 non-immediate then SCRATCH_REGNO should specify a scratch register to
7292 use.
7293
7294 The valid base registers are taken from CFUN->MACHINE->FS. */
7295
7296static rtx
7297choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7298 unsigned int scratch_regno = INVALID_REGNUM)
7299{
7300 rtx base_reg = NULL;
7301 HOST_WIDE_INT base_offset = 0;
7302
7303 /* If a specific alignment is requested, try to get a base register
7304 with that alignment first. */
7305 if (align && *align)
7306 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: *align, align);
7307
7308 if (!base_reg)
7309 choose_basereg (cfa_offset, base_reg, base_offset, align_reqested: 0, align);
7310
7311 gcc_assert (base_reg != NULL);
7312
7313 rtx base_offset_rtx = GEN_INT (base_offset);
7314
7315 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7316 {
7317 gcc_assert (scratch_regno != INVALID_REGNUM);
7318
7319 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7320 emit_move_insn (scratch_reg, base_offset_rtx);
7321
7322 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7323 }
7324
7325 return plus_constant (Pmode, base_reg, base_offset);
7326}
7327
7328/* Emit code to save registers in the prologue. */
7329
7330static void
7331ix86_emit_save_regs (void)
7332{
7333 int regno;
7334 rtx_insn *insn;
7335
7336 if (!TARGET_APX_PUSH2POP2 || cfun->machine->func_type != TYPE_NORMAL)
7337 {
7338 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7339 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7340 {
7341 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno)));
7342 RTX_FRAME_RELATED_P (insn) = 1;
7343 }
7344 }
7345 else
7346 {
7347 int regno_list[2];
7348 regno_list[0] = regno_list[1] = -1;
7349 int loaded_regnum = 0;
7350 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7351
7352 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7353 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7354 {
7355 if (aligned)
7356 {
7357 regno_list[loaded_regnum++] = regno;
7358 if (loaded_regnum == 2)
7359 {
7360 gcc_assert (regno_list[0] != -1
7361 && regno_list[1] != -1
7362 && regno_list[0] != regno_list[1]);
7363 const int offset = UNITS_PER_WORD * 2;
7364 rtx mem = gen_rtx_MEM (TImode,
7365 gen_rtx_PRE_DEC (Pmode,
7366 stack_pointer_rtx));
7367 insn = emit_insn (gen_push2 (mem,
7368 reg1: gen_rtx_REG (word_mode,
7369 regno_list[0]),
7370 reg2: gen_rtx_REG (word_mode,
7371 regno_list[1])));
7372 RTX_FRAME_RELATED_P (insn) = 1;
7373 rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7374
7375 for (int i = 0; i < 2; i++)
7376 {
7377 rtx dwarf_reg = gen_rtx_REG (word_mode,
7378 regno_list[i]);
7379 rtx sp_offset = plus_constant (Pmode,
7380 stack_pointer_rtx,
7381 + UNITS_PER_WORD
7382 * (1 - i));
7383 rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7384 sp_offset),
7385 dwarf_reg);
7386 RTX_FRAME_RELATED_P (tmp) = 1;
7387 XVECEXP (dwarf, 0, i + 1) = tmp;
7388 }
7389 rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7390 plus_constant (Pmode,
7391 stack_pointer_rtx,
7392 -offset));
7393 RTX_FRAME_RELATED_P (sp_tmp) = 1;
7394 XVECEXP (dwarf, 0, 0) = sp_tmp;
7395 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7396
7397 loaded_regnum = 0;
7398 regno_list[0] = regno_list[1] = -1;
7399 }
7400 }
7401 else
7402 {
7403 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode, regno)));
7404 RTX_FRAME_RELATED_P (insn) = 1;
7405 aligned = true;
7406 }
7407 }
7408 if (loaded_regnum == 1)
7409 {
7410 insn = emit_insn (gen_push (arg: gen_rtx_REG (word_mode,
7411 regno_list[0])));
7412 RTX_FRAME_RELATED_P (insn) = 1;
7413 }
7414 }
7415}
7416
7417/* Emit a single register save at CFA - CFA_OFFSET. */
7418
7419static void
7420ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7421 HOST_WIDE_INT cfa_offset)
7422{
7423 struct machine_function *m = cfun->machine;
7424 rtx reg = gen_rtx_REG (mode, regno);
7425 rtx mem, addr, base, insn;
7426 unsigned int align = GET_MODE_ALIGNMENT (mode);
7427
7428 addr = choose_baseaddr (cfa_offset, align: &align);
7429 mem = gen_frame_mem (mode, addr);
7430
7431 /* The location aligment depends upon the base register. */
7432 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7433 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7434 set_mem_align (mem, align);
7435
7436 insn = emit_insn (gen_rtx_SET (mem, reg));
7437 RTX_FRAME_RELATED_P (insn) = 1;
7438
7439 base = addr;
7440 if (GET_CODE (base) == PLUS)
7441 base = XEXP (base, 0);
7442 gcc_checking_assert (REG_P (base));
7443
7444 /* When saving registers into a re-aligned local stack frame, avoid
7445 any tricky guessing by dwarf2out. */
7446 if (m->fs.realigned)
7447 {
7448 gcc_checking_assert (stack_realign_drap);
7449
7450 if (regno == REGNO (crtl->drap_reg))
7451 {
7452 /* A bit of a hack. We force the DRAP register to be saved in
7453 the re-aligned stack frame, which provides us with a copy
7454 of the CFA that will last past the prologue. Install it. */
7455 gcc_checking_assert (cfun->machine->fs.fp_valid);
7456 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7457 cfun->machine->fs.fp_offset - cfa_offset);
7458 mem = gen_rtx_MEM (mode, addr);
7459 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7460 }
7461 else
7462 {
7463 /* The frame pointer is a stable reference within the
7464 aligned frame. Use it. */
7465 gcc_checking_assert (cfun->machine->fs.fp_valid);
7466 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7467 cfun->machine->fs.fp_offset - cfa_offset);
7468 mem = gen_rtx_MEM (mode, addr);
7469 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7470 }
7471 }
7472
7473 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7474 && cfa_offset >= m->fs.sp_realigned_offset)
7475 {
7476 gcc_checking_assert (stack_realign_fp);
7477 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7478 }
7479
7480 /* The memory may not be relative to the current CFA register,
7481 which means that we may need to generate a new pattern for
7482 use by the unwind info. */
7483 else if (base != m->fs.cfa_reg)
7484 {
7485 addr = plus_constant (Pmode, m->fs.cfa_reg,
7486 m->fs.cfa_offset - cfa_offset);
7487 mem = gen_rtx_MEM (mode, addr);
7488 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7489 }
7490}
7491
7492/* Emit code to save registers using MOV insns.
7493 First register is stored at CFA - CFA_OFFSET. */
7494static void
7495ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7496{
7497 unsigned int regno;
7498
7499 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7500 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7501 {
7502 ix86_emit_save_reg_using_mov (mode: word_mode, regno, cfa_offset);
7503 cfa_offset -= UNITS_PER_WORD;
7504 }
7505}
7506
7507/* Emit code to save SSE registers using MOV insns.
7508 First register is stored at CFA - CFA_OFFSET. */
7509static void
7510ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7511{
7512 unsigned int regno;
7513
7514 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7515 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: true, ignore_outlined: true))
7516 {
7517 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7518 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7519 }
7520}
7521
7522static GTY(()) rtx queued_cfa_restores;
7523
7524/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7525 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7526 Don't add the note if the previously saved value will be left untouched
7527 within stack red-zone till return, as unwinders can find the same value
7528 in the register and on the stack. */
7529
7530static void
7531ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7532{
7533 if (!crtl->shrink_wrapped
7534 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7535 return;
7536
7537 if (insn)
7538 {
7539 add_reg_note (insn, REG_CFA_RESTORE, reg);
7540 RTX_FRAME_RELATED_P (insn) = 1;
7541 }
7542 else
7543 queued_cfa_restores
7544 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7545}
7546
7547/* Add queued REG_CFA_RESTORE notes if any to INSN. */
7548
7549static void
7550ix86_add_queued_cfa_restore_notes (rtx insn)
7551{
7552 rtx last;
7553 if (!queued_cfa_restores)
7554 return;
7555 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7556 ;
7557 XEXP (last, 1) = REG_NOTES (insn);
7558 REG_NOTES (insn) = queued_cfa_restores;
7559 queued_cfa_restores = NULL_RTX;
7560 RTX_FRAME_RELATED_P (insn) = 1;
7561}
7562
7563/* Expand prologue or epilogue stack adjustment.
7564 The pattern exist to put a dependency on all ebp-based memory accesses.
7565 STYLE should be negative if instructions should be marked as frame related,
7566 zero if %r11 register is live and cannot be freely used and positive
7567 otherwise. */
7568
7569static rtx
7570pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7571 int style, bool set_cfa)
7572{
7573 struct machine_function *m = cfun->machine;
7574 rtx addend = offset;
7575 rtx insn;
7576 bool add_frame_related_expr = false;
7577
7578 if (!x86_64_immediate_operand (offset, Pmode))
7579 {
7580 /* r11 is used by indirect sibcall return as well, set before the
7581 epilogue and used after the epilogue. */
7582 if (style)
7583 addend = gen_rtx_REG (Pmode, R11_REG);
7584 else
7585 {
7586 gcc_assert (src != hard_frame_pointer_rtx
7587 && dest != hard_frame_pointer_rtx);
7588 addend = hard_frame_pointer_rtx;
7589 }
7590 emit_insn (gen_rtx_SET (addend, offset));
7591 if (style < 0)
7592 add_frame_related_expr = true;
7593 }
7594
7595 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7596 (Pmode, x0: dest, x1: src, x2: addend));
7597 if (style >= 0)
7598 ix86_add_queued_cfa_restore_notes (insn);
7599
7600 if (set_cfa)
7601 {
7602 rtx r;
7603
7604 gcc_assert (m->fs.cfa_reg == src);
7605 m->fs.cfa_offset += INTVAL (offset);
7606 m->fs.cfa_reg = dest;
7607
7608 r = gen_rtx_PLUS (Pmode, src, offset);
7609 r = gen_rtx_SET (dest, r);
7610 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7611 RTX_FRAME_RELATED_P (insn) = 1;
7612 }
7613 else if (style < 0)
7614 {
7615 RTX_FRAME_RELATED_P (insn) = 1;
7616 if (add_frame_related_expr)
7617 {
7618 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7619 r = gen_rtx_SET (dest, r);
7620 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7621 }
7622 }
7623
7624 if (dest == stack_pointer_rtx)
7625 {
7626 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7627 bool valid = m->fs.sp_valid;
7628 bool realigned = m->fs.sp_realigned;
7629
7630 if (src == hard_frame_pointer_rtx)
7631 {
7632 valid = m->fs.fp_valid;
7633 realigned = false;
7634 ooffset = m->fs.fp_offset;
7635 }
7636 else if (src == crtl->drap_reg)
7637 {
7638 valid = m->fs.drap_valid;
7639 realigned = false;
7640 ooffset = 0;
7641 }
7642 else
7643 {
7644 /* Else there are two possibilities: SP itself, which we set
7645 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7646 taken care of this by hand along the eh_return path. */
7647 gcc_checking_assert (src == stack_pointer_rtx
7648 || offset == const0_rtx);
7649 }
7650
7651 m->fs.sp_offset = ooffset - INTVAL (offset);
7652 m->fs.sp_valid = valid;
7653 m->fs.sp_realigned = realigned;
7654 }
7655 return insn;
7656}
7657
7658/* Find an available register to be used as dynamic realign argument
7659 pointer regsiter. Such a register will be written in prologue and
7660 used in begin of body, so it must not be
7661 1. parameter passing register.
7662 2. GOT pointer.
7663 We reuse static-chain register if it is available. Otherwise, we
7664 use DI for i386 and R13 for x86-64. We chose R13 since it has
7665 shorter encoding.
7666
7667 Return: the regno of chosen register. */
7668
7669static unsigned int
7670find_drap_reg (void)
7671{
7672 tree decl = cfun->decl;
7673
7674 /* Always use callee-saved register if there are no caller-saved
7675 registers. */
7676 if (TARGET_64BIT)
7677 {
7678 /* Use R13 for nested function or function need static chain.
7679 Since function with tail call may use any caller-saved
7680 registers in epilogue, DRAP must not use caller-saved
7681 register in such case. */
7682 if (DECL_STATIC_CHAIN (decl)
7683 || cfun->machine->no_caller_saved_registers
7684 || crtl->tail_call_emit)
7685 return R13_REG;
7686
7687 return R10_REG;
7688 }
7689 else
7690 {
7691 /* Use DI for nested function or function need static chain.
7692 Since function with tail call may use any caller-saved
7693 registers in epilogue, DRAP must not use caller-saved
7694 register in such case. */
7695 if (DECL_STATIC_CHAIN (decl)
7696 || cfun->machine->no_caller_saved_registers
7697 || crtl->tail_call_emit
7698 || crtl->calls_eh_return)
7699 return DI_REG;
7700
7701 /* Reuse static chain register if it isn't used for parameter
7702 passing. */
7703 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7704 {
7705 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7706 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7707 return CX_REG;
7708 }
7709 return DI_REG;
7710 }
7711}
7712
7713/* Return minimum incoming stack alignment. */
7714
7715static unsigned int
7716ix86_minimum_incoming_stack_boundary (bool sibcall)
7717{
7718 unsigned int incoming_stack_boundary;
7719
7720 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7721 if (cfun->machine->func_type != TYPE_NORMAL)
7722 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7723 /* Prefer the one specified at command line. */
7724 else if (ix86_user_incoming_stack_boundary)
7725 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7726 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7727 if -mstackrealign is used, it isn't used for sibcall check and
7728 estimated stack alignment is 128bit. */
7729 else if (!sibcall
7730 && ix86_force_align_arg_pointer
7731 && crtl->stack_alignment_estimated == 128)
7732 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7733 else
7734 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7735
7736 /* Incoming stack alignment can be changed on individual functions
7737 via force_align_arg_pointer attribute. We use the smallest
7738 incoming stack boundary. */
7739 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7740 && lookup_attribute (attr_name: "force_align_arg_pointer",
7741 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7742 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7743
7744 /* The incoming stack frame has to be aligned at least at
7745 parm_stack_boundary. */
7746 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7747 incoming_stack_boundary = crtl->parm_stack_boundary;
7748
7749 /* Stack at entrance of main is aligned by runtime. We use the
7750 smallest incoming stack boundary. */
7751 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7752 && DECL_NAME (current_function_decl)
7753 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7754 && DECL_FILE_SCOPE_P (current_function_decl))
7755 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7756
7757 return incoming_stack_boundary;
7758}
7759
7760/* Update incoming stack boundary and estimated stack alignment. */
7761
7762static void
7763ix86_update_stack_boundary (void)
7764{
7765 ix86_incoming_stack_boundary
7766 = ix86_minimum_incoming_stack_boundary (sibcall: false);
7767
7768 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7769 if (TARGET_64BIT
7770 && cfun->stdarg
7771 && crtl->stack_alignment_estimated < 128)
7772 crtl->stack_alignment_estimated = 128;
7773
7774 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7775 if (ix86_tls_descriptor_calls_expanded_in_cfun
7776 && crtl->preferred_stack_boundary < 128)
7777 crtl->preferred_stack_boundary = 128;
7778}
7779
7780/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7781 needed or an rtx for DRAP otherwise. */
7782
7783static rtx
7784ix86_get_drap_rtx (void)
7785{
7786 /* We must use DRAP if there are outgoing arguments on stack or
7787 the stack pointer register is clobbered by asm statment and
7788 ACCUMULATE_OUTGOING_ARGS is false. */
7789 if (ix86_force_drap
7790 || ((cfun->machine->outgoing_args_on_stack
7791 || crtl->sp_is_clobbered_by_asm)
7792 && !ACCUMULATE_OUTGOING_ARGS))
7793 crtl->need_drap = true;
7794
7795 if (stack_realign_drap)
7796 {
7797 /* Assign DRAP to vDRAP and returns vDRAP */
7798 unsigned int regno = find_drap_reg ();
7799 rtx drap_vreg;
7800 rtx arg_ptr;
7801 rtx_insn *seq, *insn;
7802
7803 arg_ptr = gen_rtx_REG (Pmode, regno);
7804 crtl->drap_reg = arg_ptr;
7805
7806 start_sequence ();
7807 drap_vreg = copy_to_reg (arg_ptr);
7808 seq = get_insns ();
7809 end_sequence ();
7810
7811 insn = emit_insn_before (seq, NEXT_INSN (insn: entry_of_function ()));
7812 if (!optimize)
7813 {
7814 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7815 RTX_FRAME_RELATED_P (insn) = 1;
7816 }
7817 return drap_vreg;
7818 }
7819 else
7820 return NULL;
7821}
7822
7823/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7824
7825static rtx
7826ix86_internal_arg_pointer (void)
7827{
7828 return virtual_incoming_args_rtx;
7829}
7830
7831struct scratch_reg {
7832 rtx reg;
7833 bool saved;
7834};
7835
7836/* Return a short-lived scratch register for use on function entry.
7837 In 32-bit mode, it is valid only after the registers are saved
7838 in the prologue. This register must be released by means of
7839 release_scratch_register_on_entry once it is dead. */
7840
7841static void
7842get_scratch_register_on_entry (struct scratch_reg *sr)
7843{
7844 int regno;
7845
7846 sr->saved = false;
7847
7848 if (TARGET_64BIT)
7849 {
7850 /* We always use R11 in 64-bit mode. */
7851 regno = R11_REG;
7852 }
7853 else
7854 {
7855 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7856 bool fastcall_p
7857 = lookup_attribute (attr_name: "fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7858 bool thiscall_p
7859 = lookup_attribute (attr_name: "thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7860 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7861 int regparm = ix86_function_regparm (type: fntype, decl);
7862 int drap_regno
7863 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7864
7865 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7866 for the static chain register. */
7867 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7868 && drap_regno != AX_REG)
7869 regno = AX_REG;
7870 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7871 for the static chain register. */
7872 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7873 regno = AX_REG;
7874 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7875 regno = DX_REG;
7876 /* ecx is the static chain register. */
7877 else if (regparm < 3 && !fastcall_p && !thiscall_p
7878 && !static_chain_p
7879 && drap_regno != CX_REG)
7880 regno = CX_REG;
7881 else if (ix86_save_reg (BX_REG, maybe_eh_return: true, ignore_outlined: false))
7882 regno = BX_REG;
7883 /* esi is the static chain register. */
7884 else if (!(regparm == 3 && static_chain_p)
7885 && ix86_save_reg (SI_REG, maybe_eh_return: true, ignore_outlined: false))
7886 regno = SI_REG;
7887 else if (ix86_save_reg (DI_REG, maybe_eh_return: true, ignore_outlined: false))
7888 regno = DI_REG;
7889 else
7890 {
7891 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7892 sr->saved = true;
7893 }
7894 }
7895
7896 sr->reg = gen_rtx_REG (Pmode, regno);
7897 if (sr->saved)
7898 {
7899 rtx_insn *insn = emit_insn (gen_push (arg: sr->reg));
7900 RTX_FRAME_RELATED_P (insn) = 1;
7901 }
7902}
7903
7904/* Release a scratch register obtained from the preceding function.
7905
7906 If RELEASE_VIA_POP is true, we just pop the register off the stack
7907 to release it. This is what non-Linux systems use with -fstack-check.
7908
7909 Otherwise we use OFFSET to locate the saved register and the
7910 allocated stack space becomes part of the local frame and is
7911 deallocated by the epilogue. */
7912
7913static void
7914release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7915 bool release_via_pop)
7916{
7917 if (sr->saved)
7918 {
7919 if (release_via_pop)
7920 {
7921 struct machine_function *m = cfun->machine;
7922 rtx x, insn = emit_insn (gen_pop (arg: sr->reg));
7923
7924 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7925 RTX_FRAME_RELATED_P (insn) = 1;
7926 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7927 x = gen_rtx_SET (stack_pointer_rtx, x);
7928 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7929 m->fs.sp_offset -= UNITS_PER_WORD;
7930 }
7931 else
7932 {
7933 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
7934 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7935 emit_insn (x);
7936 }
7937 }
7938}
7939
7940/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7941
7942 If INT_REGISTERS_SAVED is true, then integer registers have already been
7943 pushed on the stack.
7944
7945 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7946 beyond SIZE bytes.
7947
7948 This assumes no knowledge of the current probing state, i.e. it is never
7949 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7950 a suitable probe. */
7951
7952static void
7953ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7954 const bool int_registers_saved,
7955 const bool protection_area)
7956{
7957 struct machine_function *m = cfun->machine;
7958
7959 /* If this function does not statically allocate stack space, then
7960 no probes are needed. */
7961 if (!size)
7962 {
7963 /* However, the allocation of space via pushes for register
7964 saves could be viewed as allocating space, but without the
7965 need to probe. */
7966 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7967 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7968 else
7969 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7970 return;
7971 }
7972
7973 /* If we are a noreturn function, then we have to consider the
7974 possibility that we're called via a jump rather than a call.
7975
7976 Thus we don't have the implicit probe generated by saving the
7977 return address into the stack at the call. Thus, the stack
7978 pointer could be anywhere in the guard page. The safe thing
7979 to do is emit a probe now.
7980
7981 The probe can be avoided if we have already emitted any callee
7982 register saves into the stack or have a frame pointer (which will
7983 have been saved as well). Those saves will function as implicit
7984 probes.
7985
7986 ?!? This should be revamped to work like aarch64 and s390 where
7987 we track the offset from the most recent probe. Normally that
7988 offset would be zero. For a noreturn function we would reset
7989 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7990 we just probe when we cross PROBE_INTERVAL. */
7991 if (TREE_THIS_VOLATILE (cfun->decl)
7992 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7993 {
7994 /* We can safely use any register here since we're just going to push
7995 its value and immediately pop it back. But we do try and avoid
7996 argument passing registers so as not to introduce dependencies in
7997 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7998 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7999 rtx_insn *insn_push = emit_insn (gen_push (arg: dummy_reg));
8000 rtx_insn *insn_pop = emit_insn (gen_pop (arg: dummy_reg));
8001 m->fs.sp_offset -= UNITS_PER_WORD;
8002 if (m->fs.cfa_reg == stack_pointer_rtx)
8003 {
8004 m->fs.cfa_offset -= UNITS_PER_WORD;
8005 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8006 x = gen_rtx_SET (stack_pointer_rtx, x);
8007 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8008 RTX_FRAME_RELATED_P (insn_push) = 1;
8009 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8010 x = gen_rtx_SET (stack_pointer_rtx, x);
8011 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8012 RTX_FRAME_RELATED_P (insn_pop) = 1;
8013 }
8014 emit_insn (gen_blockage ());
8015 }
8016
8017 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8018 const int dope = 4 * UNITS_PER_WORD;
8019
8020 /* If there is protection area, take it into account in the size. */
8021 if (protection_area)
8022 size += probe_interval + dope;
8023
8024 /* If we allocate less than the size of the guard statically,
8025 then no probing is necessary, but we do need to allocate
8026 the stack. */
8027 else if (size < (1 << param_stack_clash_protection_guard_size))
8028 {
8029 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8030 GEN_INT (-size), style: -1,
8031 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8032 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8033 return;
8034 }
8035
8036 /* We're allocating a large enough stack frame that we need to
8037 emit probes. Either emit them inline or in a loop depending
8038 on the size. */
8039 if (size <= 4 * probe_interval)
8040 {
8041 HOST_WIDE_INT i;
8042 for (i = probe_interval; i <= size; i += probe_interval)
8043 {
8044 /* Allocate PROBE_INTERVAL bytes. */
8045 rtx insn
8046 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8047 GEN_INT (-probe_interval), style: -1,
8048 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8049 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8050
8051 /* And probe at *sp. */
8052 emit_stack_probe (stack_pointer_rtx);
8053 emit_insn (gen_blockage ());
8054 }
8055
8056 /* We need to allocate space for the residual, but we do not need
8057 to probe the residual... */
8058 HOST_WIDE_INT residual = (i - probe_interval - size);
8059 if (residual)
8060 {
8061 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8062 GEN_INT (residual), style: -1,
8063 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8064
8065 /* ...except if there is a protection area to maintain. */
8066 if (protection_area)
8067 emit_stack_probe (stack_pointer_rtx);
8068 }
8069
8070 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8071 }
8072 else
8073 {
8074 /* We expect the GP registers to be saved when probes are used
8075 as the probing sequences might need a scratch register and
8076 the routine to allocate one assumes the integer registers
8077 have already been saved. */
8078 gcc_assert (int_registers_saved);
8079
8080 struct scratch_reg sr;
8081 get_scratch_register_on_entry (sr: &sr);
8082
8083 /* If we needed to save a register, then account for any space
8084 that was pushed (we are not going to pop the register when
8085 we do the restore). */
8086 if (sr.saved)
8087 size -= UNITS_PER_WORD;
8088
8089 /* Step 1: round SIZE down to a multiple of the interval. */
8090 HOST_WIDE_INT rounded_size = size & -probe_interval;
8091
8092 /* Step 2: compute final value of the loop counter. Use lea if
8093 possible. */
8094 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8095 rtx insn;
8096 if (address_no_seg_operand (addr, Pmode))
8097 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8098 else
8099 {
8100 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8101 insn = emit_insn (gen_rtx_SET (sr.reg,
8102 gen_rtx_PLUS (Pmode, sr.reg,
8103 stack_pointer_rtx)));
8104 }
8105 if (m->fs.cfa_reg == stack_pointer_rtx)
8106 {
8107 add_reg_note (insn, REG_CFA_DEF_CFA,
8108 plus_constant (Pmode, sr.reg,
8109 m->fs.cfa_offset + rounded_size));
8110 RTX_FRAME_RELATED_P (insn) = 1;
8111 }
8112
8113 /* Step 3: the loop. */
8114 rtx size_rtx = GEN_INT (rounded_size);
8115 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, x0: sr.reg, x1: sr.reg,
8116 x2: size_rtx));
8117 if (m->fs.cfa_reg == stack_pointer_rtx)
8118 {
8119 m->fs.cfa_offset += rounded_size;
8120 add_reg_note (insn, REG_CFA_DEF_CFA,
8121 plus_constant (Pmode, stack_pointer_rtx,
8122 m->fs.cfa_offset));
8123 RTX_FRAME_RELATED_P (insn) = 1;
8124 }
8125 m->fs.sp_offset += rounded_size;
8126 emit_insn (gen_blockage ());
8127
8128 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8129 is equal to ROUNDED_SIZE. */
8130
8131 if (size != rounded_size)
8132 {
8133 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8134 GEN_INT (rounded_size - size), style: -1,
8135 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8136
8137 if (protection_area)
8138 emit_stack_probe (stack_pointer_rtx);
8139 }
8140
8141 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8142
8143 /* This does not deallocate the space reserved for the scratch
8144 register. That will be deallocated in the epilogue. */
8145 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: false);
8146 }
8147
8148 /* Adjust back to account for the protection area. */
8149 if (protection_area)
8150 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8151 GEN_INT (probe_interval + dope), style: -1,
8152 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
8153
8154 /* Make sure nothing is scheduled before we are done. */
8155 emit_insn (gen_blockage ());
8156}
8157
8158/* Adjust the stack pointer up to REG while probing it. */
8159
8160const char *
8161output_adjust_stack_and_probe (rtx reg)
8162{
8163 static int labelno = 0;
8164 char loop_lab[32];
8165 rtx xops[2];
8166
8167 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8168
8169 /* Loop. */
8170 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8171
8172 /* SP = SP + PROBE_INTERVAL. */
8173 xops[0] = stack_pointer_rtx;
8174 xops[1] = GEN_INT (get_probe_interval ());
8175 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8176
8177 /* Probe at SP. */
8178 xops[1] = const0_rtx;
8179 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
8180
8181 /* Test if SP == LAST_ADDR. */
8182 xops[0] = stack_pointer_rtx;
8183 xops[1] = reg;
8184 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8185
8186 /* Branch. */
8187 fputs (s: "\tjne\t", stream: asm_out_file);
8188 assemble_name_raw (asm_out_file, loop_lab);
8189 fputc (c: '\n', stream: asm_out_file);
8190
8191 return "";
8192}
8193
8194/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8195 inclusive. These are offsets from the current stack pointer.
8196
8197 INT_REGISTERS_SAVED is true if integer registers have already been
8198 pushed on the stack. */
8199
8200static void
8201ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8202 const bool int_registers_saved)
8203{
8204 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8205
8206 /* See if we have a constant small number of probes to generate. If so,
8207 that's the easy case. The run-time loop is made up of 6 insns in the
8208 generic case while the compile-time loop is made up of n insns for n #
8209 of intervals. */
8210 if (size <= 6 * probe_interval)
8211 {
8212 HOST_WIDE_INT i;
8213
8214 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8215 it exceeds SIZE. If only one probe is needed, this will not
8216 generate any code. Then probe at FIRST + SIZE. */
8217 for (i = probe_interval; i < size; i += probe_interval)
8218 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8219 -(first + i)));
8220
8221 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8222 -(first + size)));
8223 }
8224
8225 /* Otherwise, do the same as above, but in a loop. Note that we must be
8226 extra careful with variables wrapping around because we might be at
8227 the very top (or the very bottom) of the address space and we have
8228 to be able to handle this case properly; in particular, we use an
8229 equality test for the loop condition. */
8230 else
8231 {
8232 /* We expect the GP registers to be saved when probes are used
8233 as the probing sequences might need a scratch register and
8234 the routine to allocate one assumes the integer registers
8235 have already been saved. */
8236 gcc_assert (int_registers_saved);
8237
8238 HOST_WIDE_INT rounded_size, last;
8239 struct scratch_reg sr;
8240
8241 get_scratch_register_on_entry (sr: &sr);
8242
8243
8244 /* Step 1: round SIZE to the previous multiple of the interval. */
8245
8246 rounded_size = ROUND_DOWN (size, probe_interval);
8247
8248
8249 /* Step 2: compute initial and final value of the loop counter. */
8250
8251 /* TEST_OFFSET = FIRST. */
8252 emit_move_insn (sr.reg, GEN_INT (-first));
8253
8254 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8255 last = first + rounded_size;
8256
8257
8258 /* Step 3: the loop
8259
8260 do
8261 {
8262 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8263 probe at TEST_ADDR
8264 }
8265 while (TEST_ADDR != LAST_ADDR)
8266
8267 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8268 until it is equal to ROUNDED_SIZE. */
8269
8270 emit_insn
8271 (gen_probe_stack_range (Pmode, x0: sr.reg, x1: sr.reg, GEN_INT (-last)));
8272
8273
8274 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8275 that SIZE is equal to ROUNDED_SIZE. */
8276
8277 if (size != rounded_size)
8278 emit_stack_probe (plus_constant (Pmode,
8279 gen_rtx_PLUS (Pmode,
8280 stack_pointer_rtx,
8281 sr.reg),
8282 rounded_size - size));
8283
8284 release_scratch_register_on_entry (sr: &sr, offset: size, release_via_pop: true);
8285 }
8286
8287 /* Make sure nothing is scheduled before we are done. */
8288 emit_insn (gen_blockage ());
8289}
8290
8291/* Probe a range of stack addresses from REG to END, inclusive. These are
8292 offsets from the current stack pointer. */
8293
8294const char *
8295output_probe_stack_range (rtx reg, rtx end)
8296{
8297 static int labelno = 0;
8298 char loop_lab[32];
8299 rtx xops[3];
8300
8301 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8302
8303 /* Loop. */
8304 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8305
8306 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8307 xops[0] = reg;
8308 xops[1] = GEN_INT (get_probe_interval ());
8309 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8310
8311 /* Probe at TEST_ADDR. */
8312 xops[0] = stack_pointer_rtx;
8313 xops[1] = reg;
8314 xops[2] = const0_rtx;
8315 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8316
8317 /* Test if TEST_ADDR == LAST_ADDR. */
8318 xops[0] = reg;
8319 xops[1] = end;
8320 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8321
8322 /* Branch. */
8323 fputs (s: "\tjne\t", stream: asm_out_file);
8324 assemble_name_raw (asm_out_file, loop_lab);
8325 fputc (c: '\n', stream: asm_out_file);
8326
8327 return "";
8328}
8329
8330/* Set stack_frame_required to false if stack frame isn't required.
8331 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8332 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8333
8334static void
8335ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8336 bool check_stack_slot)
8337{
8338 HARD_REG_SET set_up_by_prologue, prologue_used;
8339 basic_block bb;
8340
8341 CLEAR_HARD_REG_SET (set&: prologue_used);
8342 CLEAR_HARD_REG_SET (set&: set_up_by_prologue);
8343 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8344 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8345 add_to_hard_reg_set (regs: &set_up_by_prologue, Pmode,
8346 HARD_FRAME_POINTER_REGNUM);
8347
8348 /* The preferred stack alignment is the minimum stack alignment. */
8349 if (stack_alignment > crtl->preferred_stack_boundary)
8350 stack_alignment = crtl->preferred_stack_boundary;
8351
8352 bool require_stack_frame = false;
8353
8354 FOR_EACH_BB_FN (bb, cfun)
8355 {
8356 rtx_insn *insn;
8357 FOR_BB_INSNS (bb, insn)
8358 if (NONDEBUG_INSN_P (insn)
8359 && requires_stack_frame_p (insn, prologue_used,
8360 set_up_by_prologue))
8361 {
8362 require_stack_frame = true;
8363
8364 if (check_stack_slot)
8365 {
8366 /* Find the maximum stack alignment. */
8367 subrtx_iterator::array_type array;
8368 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
8369 if (MEM_P (*iter)
8370 && (reg_mentioned_p (stack_pointer_rtx,
8371 *iter)
8372 || reg_mentioned_p (frame_pointer_rtx,
8373 *iter)))
8374 {
8375 unsigned int alignment = MEM_ALIGN (*iter);
8376 if (alignment > stack_alignment)
8377 stack_alignment = alignment;
8378 }
8379 }
8380 }
8381 }
8382
8383 cfun->machine->stack_frame_required = require_stack_frame;
8384}
8385
8386/* Finalize stack_realign_needed and frame_pointer_needed flags, which
8387 will guide prologue/epilogue to be generated in correct form. */
8388
8389static void
8390ix86_finalize_stack_frame_flags (void)
8391{
8392 /* Check if stack realign is really needed after reload, and
8393 stores result in cfun */
8394 unsigned int incoming_stack_boundary
8395 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8396 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8397 unsigned int stack_alignment
8398 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8399 ? crtl->max_used_stack_slot_alignment
8400 : crtl->stack_alignment_needed);
8401 unsigned int stack_realign
8402 = (incoming_stack_boundary < stack_alignment);
8403 bool recompute_frame_layout_p = false;
8404
8405 if (crtl->stack_realign_finalized)
8406 {
8407 /* After stack_realign_needed is finalized, we can't no longer
8408 change it. */
8409 gcc_assert (crtl->stack_realign_needed == stack_realign);
8410 return;
8411 }
8412
8413 /* It is always safe to compute max_used_stack_alignment. We
8414 compute it only if 128-bit aligned load/store may be generated
8415 on misaligned stack slot which will lead to segfault. */
8416 bool check_stack_slot
8417 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8418 ix86_find_max_used_stack_alignment (stack_alignment,
8419 check_stack_slot);
8420
8421 /* If the only reason for frame_pointer_needed is that we conservatively
8422 assumed stack realignment might be needed or -fno-omit-frame-pointer
8423 is used, but in the end nothing that needed the stack alignment had
8424 been spilled nor stack access, clear frame_pointer_needed and say we
8425 don't need stack realignment.
8426
8427 When vector register is used for piecewise move and store, we don't
8428 increase stack_alignment_needed as there is no register spill for
8429 piecewise move and store. Since stack_realign_needed is set to true
8430 by checking stack_alignment_estimated which is updated by pseudo
8431 vector register usage, we also need to check stack_realign_needed to
8432 eliminate frame pointer. */
8433 if ((stack_realign
8434 || (!flag_omit_frame_pointer && optimize)
8435 || crtl->stack_realign_needed)
8436 && frame_pointer_needed
8437 && crtl->is_leaf
8438 && crtl->sp_is_unchanging
8439 && !ix86_current_function_calls_tls_descriptor
8440 && !crtl->accesses_prior_frames
8441 && !cfun->calls_alloca
8442 && !crtl->calls_eh_return
8443 /* See ira_setup_eliminable_regset for the rationale. */
8444 && !(STACK_CHECK_MOVING_SP
8445 && flag_stack_check
8446 && flag_exceptions
8447 && cfun->can_throw_non_call_exceptions)
8448 && !ix86_frame_pointer_required ()
8449 && ix86_get_frame_size () == 0
8450 && ix86_nsaved_sseregs () == 0
8451 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8452 {
8453 if (cfun->machine->stack_frame_required)
8454 {
8455 /* Stack frame is required. If stack alignment needed is less
8456 than incoming stack boundary, don't realign stack. */
8457 stack_realign = incoming_stack_boundary < stack_alignment;
8458 if (!stack_realign)
8459 {
8460 crtl->max_used_stack_slot_alignment
8461 = incoming_stack_boundary;
8462 crtl->stack_alignment_needed
8463 = incoming_stack_boundary;
8464 /* Also update preferred_stack_boundary for leaf
8465 functions. */
8466 crtl->preferred_stack_boundary
8467 = incoming_stack_boundary;
8468 }
8469 }
8470 else
8471 {
8472 /* If drap has been set, but it actually isn't live at the
8473 start of the function, there is no reason to set it up. */
8474 if (crtl->drap_reg)
8475 {
8476 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8477 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8478 REGNO (crtl->drap_reg)))
8479 {
8480 crtl->drap_reg = NULL_RTX;
8481 crtl->need_drap = false;
8482 }
8483 }
8484 else
8485 cfun->machine->no_drap_save_restore = true;
8486
8487 frame_pointer_needed = false;
8488 stack_realign = false;
8489 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8490 crtl->stack_alignment_needed = incoming_stack_boundary;
8491 crtl->stack_alignment_estimated = incoming_stack_boundary;
8492 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8493 crtl->preferred_stack_boundary = incoming_stack_boundary;
8494 df_finish_pass (true);
8495 df_scan_alloc (NULL);
8496 df_scan_blocks ();
8497 df_compute_regs_ever_live (true);
8498 df_analyze ();
8499
8500 if (flag_var_tracking)
8501 {
8502 /* Since frame pointer is no longer available, replace it with
8503 stack pointer - UNITS_PER_WORD in debug insns. */
8504 df_ref ref, next;
8505 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8506 ref; ref = next)
8507 {
8508 next = DF_REF_NEXT_REG (ref);
8509 if (!DF_REF_INSN_INFO (ref))
8510 continue;
8511
8512 /* Make sure the next ref is for a different instruction,
8513 so that we're not affected by the rescan. */
8514 rtx_insn *insn = DF_REF_INSN (ref);
8515 while (next && DF_REF_INSN (next) == insn)
8516 next = DF_REF_NEXT_REG (next);
8517
8518 if (DEBUG_INSN_P (insn))
8519 {
8520 bool changed = false;
8521 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8522 {
8523 rtx *loc = DF_REF_LOC (ref);
8524 if (*loc == hard_frame_pointer_rtx)
8525 {
8526 *loc = plus_constant (Pmode,
8527 stack_pointer_rtx,
8528 -UNITS_PER_WORD);
8529 changed = true;
8530 }
8531 }
8532 if (changed)
8533 df_insn_rescan (insn);
8534 }
8535 }
8536 }
8537
8538 recompute_frame_layout_p = true;
8539 }
8540 }
8541 else if (crtl->max_used_stack_slot_alignment >= 128
8542 && cfun->machine->stack_frame_required)
8543 {
8544 /* We don't need to realign stack. max_used_stack_alignment is
8545 used to decide how stack frame should be aligned. This is
8546 independent of any psABIs nor 32-bit vs 64-bit. */
8547 cfun->machine->max_used_stack_alignment
8548 = stack_alignment / BITS_PER_UNIT;
8549 }
8550
8551 if (crtl->stack_realign_needed != stack_realign)
8552 recompute_frame_layout_p = true;
8553 crtl->stack_realign_needed = stack_realign;
8554 crtl->stack_realign_finalized = true;
8555 if (recompute_frame_layout_p)
8556 ix86_compute_frame_layout ();
8557}
8558
8559/* Delete SET_GOT right after entry block if it is allocated to reg. */
8560
8561static void
8562ix86_elim_entry_set_got (rtx reg)
8563{
8564 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8565 rtx_insn *c_insn = BB_HEAD (bb);
8566 if (!NONDEBUG_INSN_P (c_insn))
8567 c_insn = next_nonnote_nondebug_insn (c_insn);
8568 if (c_insn && NONJUMP_INSN_P (c_insn))
8569 {
8570 rtx pat = PATTERN (insn: c_insn);
8571 if (GET_CODE (pat) == PARALLEL)
8572 {
8573 rtx vec = XVECEXP (pat, 0, 0);
8574 if (GET_CODE (vec) == SET
8575 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
8576 && REGNO (XEXP (vec, 0)) == REGNO (reg))
8577 delete_insn (c_insn);
8578 }
8579 }
8580}
8581
8582static rtx
8583gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8584{
8585 rtx addr, mem;
8586
8587 if (offset)
8588 addr = plus_constant (Pmode, frame_reg, offset);
8589 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8590 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8591}
8592
8593static inline rtx
8594gen_frame_load (rtx reg, rtx frame_reg, int offset)
8595{
8596 return gen_frame_set (reg, frame_reg, offset, store: false);
8597}
8598
8599static inline rtx
8600gen_frame_store (rtx reg, rtx frame_reg, int offset)
8601{
8602 return gen_frame_set (reg, frame_reg, offset, store: true);
8603}
8604
8605static void
8606ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8607{
8608 struct machine_function *m = cfun->machine;
8609 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8610 + m->call_ms2sysv_extra_regs;
8611 rtvec v = rtvec_alloc (ncregs + 1);
8612 unsigned int align, i, vi = 0;
8613 rtx_insn *insn;
8614 rtx sym, addr;
8615 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8616 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8617
8618 /* AL should only be live with sysv_abi. */
8619 gcc_assert (!ix86_eax_live_at_start_p ());
8620 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8621
8622 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8623 we've actually realigned the stack or not. */
8624 align = GET_MODE_ALIGNMENT (V4SFmode);
8625 addr = choose_baseaddr (cfa_offset: frame.stack_realign_offset
8626 + xlogue.get_stub_ptr_offset (), align: &align, AX_REG);
8627 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8628
8629 emit_insn (gen_rtx_SET (rax, addr));
8630
8631 /* Get the stub symbol. */
8632 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8633 : XLOGUE_STUB_SAVE);
8634 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8635
8636 for (i = 0; i < ncregs; ++i)
8637 {
8638 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
8639 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8640 r.regno);
8641 RTVEC_ELT (v, vi++) = gen_frame_store (reg, frame_reg: rax, offset: -r.offset);
8642 }
8643
8644 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8645
8646 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8647 RTX_FRAME_RELATED_P (insn) = true;
8648}
8649
8650/* Generate and return an insn body to AND X with Y. */
8651
8652static rtx_insn *
8653gen_and2_insn (rtx x, rtx y)
8654{
8655 enum insn_code icode = optab_handler (op: and_optab, GET_MODE (x));
8656
8657 gcc_assert (insn_operand_matches (icode, 0, x));
8658 gcc_assert (insn_operand_matches (icode, 1, x));
8659 gcc_assert (insn_operand_matches (icode, 2, y));
8660
8661 return GEN_FCN (icode) (x, x, y);
8662}
8663
8664/* Expand the prologue into a bunch of separate insns. */
8665
8666void
8667ix86_expand_prologue (void)
8668{
8669 struct machine_function *m = cfun->machine;
8670 rtx insn, t;
8671 HOST_WIDE_INT allocate;
8672 bool int_registers_saved;
8673 bool sse_registers_saved;
8674 bool save_stub_call_needed;
8675 rtx static_chain = NULL_RTX;
8676
8677 ix86_last_zero_store_uid = 0;
8678 if (ix86_function_naked (fn: current_function_decl))
8679 {
8680 if (flag_stack_usage_info)
8681 current_function_static_stack_size = 0;
8682 return;
8683 }
8684
8685 ix86_finalize_stack_frame_flags ();
8686
8687 /* DRAP should not coexist with stack_realign_fp */
8688 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8689
8690 memset (s: &m->fs, c: 0, n: sizeof (m->fs));
8691
8692 /* Initialize CFA state for before the prologue. */
8693 m->fs.cfa_reg = stack_pointer_rtx;
8694 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8695
8696 /* Track SP offset to the CFA. We continue tracking this after we've
8697 swapped the CFA register away from SP. In the case of re-alignment
8698 this is fudged; we're interested to offsets within the local frame. */
8699 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8700 m->fs.sp_valid = true;
8701 m->fs.sp_realigned = false;
8702
8703 const struct ix86_frame &frame = cfun->machine->frame;
8704
8705 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (fn: current_function_decl))
8706 {
8707 /* We should have already generated an error for any use of
8708 ms_hook on a nested function. */
8709 gcc_checking_assert (!ix86_static_chain_on_stack);
8710
8711 /* Check if profiling is active and we shall use profiling before
8712 prologue variant. If so sorry. */
8713 if (crtl->profile && flag_fentry != 0)
8714 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8715 "with %<-mfentry%> for 32-bit");
8716
8717 /* In ix86_asm_output_function_label we emitted:
8718 8b ff movl.s %edi,%edi
8719 55 push %ebp
8720 8b ec movl.s %esp,%ebp
8721
8722 This matches the hookable function prologue in Win32 API
8723 functions in Microsoft Windows XP Service Pack 2 and newer.
8724 Wine uses this to enable Windows apps to hook the Win32 API
8725 functions provided by Wine.
8726
8727 What that means is that we've already set up the frame pointer. */
8728
8729 if (frame_pointer_needed
8730 && !(crtl->drap_reg && crtl->stack_realign_needed))
8731 {
8732 rtx push, mov;
8733
8734 /* We've decided to use the frame pointer already set up.
8735 Describe this to the unwinder by pretending that both
8736 push and mov insns happen right here.
8737
8738 Putting the unwind info here at the end of the ms_hook
8739 is done so that we can make absolutely certain we get
8740 the required byte sequence at the start of the function,
8741 rather than relying on an assembler that can produce
8742 the exact encoding required.
8743
8744 However it does mean (in the unpatched case) that we have
8745 a 1 insn window where the asynchronous unwind info is
8746 incorrect. However, if we placed the unwind info at
8747 its correct location we would have incorrect unwind info
8748 in the patched case. Which is probably all moot since
8749 I don't expect Wine generates dwarf2 unwind info for the
8750 system libraries that use this feature. */
8751
8752 insn = emit_insn (gen_blockage ());
8753
8754 push = gen_push (hard_frame_pointer_rtx);
8755 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8756 stack_pointer_rtx);
8757 RTX_FRAME_RELATED_P (push) = 1;
8758 RTX_FRAME_RELATED_P (mov) = 1;
8759
8760 RTX_FRAME_RELATED_P (insn) = 1;
8761 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8762 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8763
8764 /* Note that gen_push incremented m->fs.cfa_offset, even
8765 though we didn't emit the push insn here. */
8766 m->fs.cfa_reg = hard_frame_pointer_rtx;
8767 m->fs.fp_offset = m->fs.cfa_offset;
8768 m->fs.fp_valid = true;
8769 }
8770 else
8771 {
8772 /* The frame pointer is not needed so pop %ebp again.
8773 This leaves us with a pristine state. */
8774 emit_insn (gen_pop (hard_frame_pointer_rtx));
8775 }
8776 }
8777
8778 /* The first insn of a function that accepts its static chain on the
8779 stack is to push the register that would be filled in by a direct
8780 call. This insn will be skipped by the trampoline. */
8781 else if (ix86_static_chain_on_stack)
8782 {
8783 static_chain = ix86_static_chain (cfun->decl, false);
8784 insn = emit_insn (gen_push (arg: static_chain));
8785 emit_insn (gen_blockage ());
8786
8787 /* We don't want to interpret this push insn as a register save,
8788 only as a stack adjustment. The real copy of the register as
8789 a save will be done later, if needed. */
8790 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8791 t = gen_rtx_SET (stack_pointer_rtx, t);
8792 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8793 RTX_FRAME_RELATED_P (insn) = 1;
8794 }
8795
8796 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8797 of DRAP is needed and stack realignment is really needed after reload */
8798 if (stack_realign_drap)
8799 {
8800 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8801
8802 /* Can't use DRAP in interrupt function. */
8803 if (cfun->machine->func_type != TYPE_NORMAL)
8804 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8805 "in interrupt service routine. This may be worked "
8806 "around by avoiding functions with aggregate return.");
8807
8808 /* Only need to push parameter pointer reg if it is caller saved. */
8809 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8810 {
8811 /* Push arg pointer reg */
8812 insn = emit_insn (gen_push (crtl->drap_reg));
8813 RTX_FRAME_RELATED_P (insn) = 1;
8814 }
8815
8816 /* Grab the argument pointer. */
8817 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8818 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8819 RTX_FRAME_RELATED_P (insn) = 1;
8820 m->fs.cfa_reg = crtl->drap_reg;
8821 m->fs.cfa_offset = 0;
8822
8823 /* Align the stack. */
8824 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8825 GEN_INT (-align_bytes)));
8826 RTX_FRAME_RELATED_P (insn) = 1;
8827
8828 /* Replicate the return address on the stack so that return
8829 address can be reached via (argp - 1) slot. This is needed
8830 to implement macro RETURN_ADDR_RTX and intrinsic function
8831 expand_builtin_return_addr etc. */
8832 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8833 t = gen_frame_mem (word_mode, t);
8834 insn = emit_insn (gen_push (arg: t));
8835 RTX_FRAME_RELATED_P (insn) = 1;
8836
8837 /* For the purposes of frame and register save area addressing,
8838 we've started over with a new frame. */
8839 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8840 m->fs.realigned = true;
8841
8842 if (static_chain)
8843 {
8844 /* Replicate static chain on the stack so that static chain
8845 can be reached via (argp - 2) slot. This is needed for
8846 nested function with stack realignment. */
8847 insn = emit_insn (gen_push (arg: static_chain));
8848 RTX_FRAME_RELATED_P (insn) = 1;
8849 }
8850 }
8851
8852 int_registers_saved = (frame.nregs == 0);
8853 sse_registers_saved = (frame.nsseregs == 0);
8854 save_stub_call_needed = (m->call_ms2sysv);
8855 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8856
8857 if (frame_pointer_needed && !m->fs.fp_valid)
8858 {
8859 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8860 slower on all targets. Also sdb didn't like it. */
8861 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8862 RTX_FRAME_RELATED_P (insn) = 1;
8863
8864 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8865 {
8866 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8867 RTX_FRAME_RELATED_P (insn) = 1;
8868
8869 if (m->fs.cfa_reg == stack_pointer_rtx)
8870 m->fs.cfa_reg = hard_frame_pointer_rtx;
8871 m->fs.fp_offset = m->fs.sp_offset;
8872 m->fs.fp_valid = true;
8873 }
8874 }
8875
8876 if (!int_registers_saved)
8877 {
8878 /* If saving registers via PUSH, do so now. */
8879 if (!frame.save_regs_using_mov)
8880 {
8881 ix86_emit_save_regs ();
8882 int_registers_saved = true;
8883 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8884 }
8885
8886 /* When using red zone we may start register saving before allocating
8887 the stack frame saving one cycle of the prologue. However, avoid
8888 doing this if we have to probe the stack; at least on x86_64 the
8889 stack probe can turn into a call that clobbers a red zone location. */
8890 else if (ix86_using_red_zone ()
8891 && (! TARGET_STACK_PROBE
8892 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8893 {
8894 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
8895 cfun->machine->red_zone_used = true;
8896 int_registers_saved = true;
8897 }
8898 }
8899
8900 if (frame.red_zone_size != 0)
8901 cfun->machine->red_zone_used = true;
8902
8903 if (stack_realign_fp)
8904 {
8905 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8906 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8907
8908 /* Record last valid frame pointer offset. */
8909 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8910
8911 /* The computation of the size of the re-aligned stack frame means
8912 that we must allocate the size of the register save area before
8913 performing the actual alignment. Otherwise we cannot guarantee
8914 that there's enough storage above the realignment point. */
8915 allocate = frame.reg_save_offset - m->fs.sp_offset
8916 + frame.stack_realign_allocate;
8917 if (allocate)
8918 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8919 GEN_INT (-allocate), style: -1, set_cfa: false);
8920
8921 /* Align the stack. */
8922 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8923 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8924 m->fs.sp_realigned_offset = m->fs.sp_offset
8925 - frame.stack_realign_allocate;
8926 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8927 Beyond this point, stack access should be done via choose_baseaddr or
8928 by using sp_valid_at and fp_valid_at to determine the correct base
8929 register. Henceforth, any CFA offset should be thought of as logical
8930 and not physical. */
8931 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8932 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8933 m->fs.sp_realigned = true;
8934
8935 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8936 is needed to describe where a register is saved using a realigned
8937 stack pointer, so we need to invalidate the stack pointer for that
8938 target. */
8939 if (TARGET_SEH)
8940 m->fs.sp_valid = false;
8941
8942 /* If SP offset is non-immediate after allocation of the stack frame,
8943 then emit SSE saves or stub call prior to allocating the rest of the
8944 stack frame. This is less efficient for the out-of-line stub because
8945 we can't combine allocations across the call barrier, but it's better
8946 than using a scratch register. */
8947 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8948 - m->fs.sp_realigned_offset),
8949 Pmode))
8950 {
8951 if (!sse_registers_saved)
8952 {
8953 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
8954 sse_registers_saved = true;
8955 }
8956 else if (save_stub_call_needed)
8957 {
8958 ix86_emit_outlined_ms2sysv_save (frame);
8959 save_stub_call_needed = false;
8960 }
8961 }
8962 }
8963
8964 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8965
8966 if (flag_stack_usage_info)
8967 {
8968 /* We start to count from ARG_POINTER. */
8969 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8970
8971 /* If it was realigned, take into account the fake frame. */
8972 if (stack_realign_drap)
8973 {
8974 if (ix86_static_chain_on_stack)
8975 stack_size += UNITS_PER_WORD;
8976
8977 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8978 stack_size += UNITS_PER_WORD;
8979
8980 /* This over-estimates by 1 minimal-stack-alignment-unit but
8981 mitigates that by counting in the new return address slot. */
8982 current_function_dynamic_stack_size
8983 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8984 }
8985
8986 current_function_static_stack_size = stack_size;
8987 }
8988
8989 /* On SEH target with very large frame size, allocate an area to save
8990 SSE registers (as the very large allocation won't be described). */
8991 if (TARGET_SEH
8992 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8993 && !sse_registers_saved)
8994 {
8995 HOST_WIDE_INT sse_size
8996 = frame.sse_reg_save_offset - frame.reg_save_offset;
8997
8998 gcc_assert (int_registers_saved);
8999
9000 /* No need to do stack checking as the area will be immediately
9001 written. */
9002 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9003 GEN_INT (-sse_size), style: -1,
9004 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9005 allocate -= sse_size;
9006 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9007 sse_registers_saved = true;
9008 }
9009
9010 /* If stack clash protection is requested, then probe the stack, unless it
9011 is already probed on the target. */
9012 if (allocate >= 0
9013 && flag_stack_clash_protection
9014 && !ix86_target_stack_probe ())
9015 {
9016 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: false);
9017 allocate = 0;
9018 }
9019
9020 /* The stack has already been decremented by the instruction calling us
9021 so probe if the size is non-negative to preserve the protection area. */
9022 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9023 {
9024 const HOST_WIDE_INT probe_interval = get_probe_interval ();
9025
9026 if (STACK_CHECK_MOVING_SP)
9027 {
9028 if (crtl->is_leaf
9029 && !cfun->calls_alloca
9030 && allocate <= probe_interval)
9031 ;
9032
9033 else
9034 {
9035 ix86_adjust_stack_and_probe (size: allocate, int_registers_saved, protection_area: true);
9036 allocate = 0;
9037 }
9038 }
9039
9040 else
9041 {
9042 HOST_WIDE_INT size = allocate;
9043
9044 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9045 size = 0x80000000 - get_stack_check_protect () - 1;
9046
9047 if (TARGET_STACK_PROBE)
9048 {
9049 if (crtl->is_leaf && !cfun->calls_alloca)
9050 {
9051 if (size > probe_interval)
9052 ix86_emit_probe_stack_range (first: 0, size, int_registers_saved);
9053 }
9054 else
9055 ix86_emit_probe_stack_range (first: 0,
9056 size: size + get_stack_check_protect (),
9057 int_registers_saved);
9058 }
9059 else
9060 {
9061 if (crtl->is_leaf && !cfun->calls_alloca)
9062 {
9063 if (size > probe_interval
9064 && size > get_stack_check_protect ())
9065 ix86_emit_probe_stack_range (first: get_stack_check_protect (),
9066 size: (size
9067 - get_stack_check_protect ()),
9068 int_registers_saved);
9069 }
9070 else
9071 ix86_emit_probe_stack_range (first: get_stack_check_protect (), size,
9072 int_registers_saved);
9073 }
9074 }
9075 }
9076
9077 if (allocate == 0)
9078 ;
9079 else if (!ix86_target_stack_probe ()
9080 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9081 {
9082 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9083 GEN_INT (-allocate), style: -1,
9084 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9085 }
9086 else
9087 {
9088 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9089 rtx r10 = NULL;
9090 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9091 bool eax_live = ix86_eax_live_at_start_p ();
9092 bool r10_live = false;
9093
9094 if (TARGET_64BIT)
9095 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9096
9097 if (eax_live)
9098 {
9099 insn = emit_insn (gen_push (arg: eax));
9100 allocate -= UNITS_PER_WORD;
9101 /* Note that SEH directives need to continue tracking the stack
9102 pointer even after the frame pointer has been set up. */
9103 if (sp_is_cfa_reg || TARGET_SEH)
9104 {
9105 if (sp_is_cfa_reg)
9106 m->fs.cfa_offset += UNITS_PER_WORD;
9107 RTX_FRAME_RELATED_P (insn) = 1;
9108 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9109 gen_rtx_SET (stack_pointer_rtx,
9110 plus_constant (Pmode,
9111 stack_pointer_rtx,
9112 -UNITS_PER_WORD)));
9113 }
9114 }
9115
9116 if (r10_live)
9117 {
9118 r10 = gen_rtx_REG (Pmode, R10_REG);
9119 insn = emit_insn (gen_push (arg: r10));
9120 allocate -= UNITS_PER_WORD;
9121 if (sp_is_cfa_reg || TARGET_SEH)
9122 {
9123 if (sp_is_cfa_reg)
9124 m->fs.cfa_offset += UNITS_PER_WORD;
9125 RTX_FRAME_RELATED_P (insn) = 1;
9126 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9127 gen_rtx_SET (stack_pointer_rtx,
9128 plus_constant (Pmode,
9129 stack_pointer_rtx,
9130 -UNITS_PER_WORD)));
9131 }
9132 }
9133
9134 emit_move_insn (eax, GEN_INT (allocate));
9135 emit_insn (gen_allocate_stack_worker_probe (Pmode, x0: eax, x1: eax));
9136
9137 /* Use the fact that AX still contains ALLOCATE. */
9138 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9139 (Pmode, stack_pointer_rtx, stack_pointer_rtx, x2: eax));
9140
9141 if (sp_is_cfa_reg || TARGET_SEH)
9142 {
9143 if (sp_is_cfa_reg)
9144 m->fs.cfa_offset += allocate;
9145 RTX_FRAME_RELATED_P (insn) = 1;
9146 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9147 gen_rtx_SET (stack_pointer_rtx,
9148 plus_constant (Pmode, stack_pointer_rtx,
9149 -allocate)));
9150 }
9151 m->fs.sp_offset += allocate;
9152
9153 /* Use stack_pointer_rtx for relative addressing so that code works for
9154 realigned stack. But this means that we need a blockage to prevent
9155 stores based on the frame pointer from being scheduled before. */
9156 if (r10_live && eax_live)
9157 {
9158 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9159 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9160 gen_frame_mem (word_mode, t));
9161 t = plus_constant (Pmode, t, UNITS_PER_WORD);
9162 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9163 gen_frame_mem (word_mode, t));
9164 emit_insn (gen_memory_blockage ());
9165 }
9166 else if (eax_live || r10_live)
9167 {
9168 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9169 emit_move_insn (gen_rtx_REG (word_mode,
9170 (eax_live ? AX_REG : R10_REG)),
9171 gen_frame_mem (word_mode, t));
9172 emit_insn (gen_memory_blockage ());
9173 }
9174 }
9175 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9176
9177 /* If we havn't already set up the frame pointer, do so now. */
9178 if (frame_pointer_needed && !m->fs.fp_valid)
9179 {
9180 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9181 GEN_INT (frame.stack_pointer_offset
9182 - frame.hard_frame_pointer_offset));
9183 insn = emit_insn (insn);
9184 RTX_FRAME_RELATED_P (insn) = 1;
9185 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9186
9187 if (m->fs.cfa_reg == stack_pointer_rtx)
9188 m->fs.cfa_reg = hard_frame_pointer_rtx;
9189 m->fs.fp_offset = frame.hard_frame_pointer_offset;
9190 m->fs.fp_valid = true;
9191 }
9192
9193 if (!int_registers_saved)
9194 ix86_emit_save_regs_using_mov (cfa_offset: frame.reg_save_offset);
9195 if (!sse_registers_saved)
9196 ix86_emit_save_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset);
9197 else if (save_stub_call_needed)
9198 ix86_emit_outlined_ms2sysv_save (frame);
9199
9200 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9201 in PROLOGUE. */
9202 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9203 {
9204 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9205 insn = emit_insn (gen_set_got (pic));
9206 RTX_FRAME_RELATED_P (insn) = 1;
9207 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9208 emit_insn (gen_prologue_use (pic));
9209 /* Deleting already emmitted SET_GOT if exist and allocated to
9210 REAL_PIC_OFFSET_TABLE_REGNUM. */
9211 ix86_elim_entry_set_got (reg: pic);
9212 }
9213
9214 if (crtl->drap_reg && !crtl->stack_realign_needed)
9215 {
9216 /* vDRAP is setup but after reload it turns out stack realign
9217 isn't necessary, here we will emit prologue to setup DRAP
9218 without stack realign adjustment */
9219 t = choose_baseaddr (cfa_offset: 0, NULL);
9220 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9221 }
9222
9223 /* Prevent instructions from being scheduled into register save push
9224 sequence when access to the redzone area is done through frame pointer.
9225 The offset between the frame pointer and the stack pointer is calculated
9226 relative to the value of the stack pointer at the end of the function
9227 prologue, and moving instructions that access redzone area via frame
9228 pointer inside push sequence violates this assumption. */
9229 if (frame_pointer_needed && frame.red_zone_size)
9230 emit_insn (gen_memory_blockage ());
9231
9232 /* SEH requires that the prologue end within 256 bytes of the start of
9233 the function. Prevent instruction schedules that would extend that.
9234 Further, prevent alloca modifications to the stack pointer from being
9235 combined with prologue modifications. */
9236 if (TARGET_SEH)
9237 emit_insn (gen_prologue_use (stack_pointer_rtx));
9238}
9239
9240/* Emit code to restore REG using a POP insn. */
9241
9242static void
9243ix86_emit_restore_reg_using_pop (rtx reg)
9244{
9245 struct machine_function *m = cfun->machine;
9246 rtx_insn *insn = emit_insn (gen_pop (arg: reg));
9247
9248 ix86_add_cfa_restore_note (insn, reg, cfa_offset: m->fs.sp_offset);
9249 m->fs.sp_offset -= UNITS_PER_WORD;
9250
9251 if (m->fs.cfa_reg == crtl->drap_reg
9252 && REGNO (reg) == REGNO (crtl->drap_reg))
9253 {
9254 /* Previously we'd represented the CFA as an expression
9255 like *(%ebp - 8). We've just popped that value from
9256 the stack, which means we need to reset the CFA to
9257 the drap register. This will remain until we restore
9258 the stack pointer. */
9259 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9260 RTX_FRAME_RELATED_P (insn) = 1;
9261
9262 /* This means that the DRAP register is valid for addressing too. */
9263 m->fs.drap_valid = true;
9264 return;
9265 }
9266
9267 if (m->fs.cfa_reg == stack_pointer_rtx)
9268 {
9269 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9270 x = gen_rtx_SET (stack_pointer_rtx, x);
9271 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9272 RTX_FRAME_RELATED_P (insn) = 1;
9273
9274 m->fs.cfa_offset -= UNITS_PER_WORD;
9275 }
9276
9277 /* When the frame pointer is the CFA, and we pop it, we are
9278 swapping back to the stack pointer as the CFA. This happens
9279 for stack frames that don't allocate other data, so we assume
9280 the stack pointer is now pointing at the return address, i.e.
9281 the function entry state, which makes the offset be 1 word. */
9282 if (reg == hard_frame_pointer_rtx)
9283 {
9284 m->fs.fp_valid = false;
9285 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9286 {
9287 m->fs.cfa_reg = stack_pointer_rtx;
9288 m->fs.cfa_offset -= UNITS_PER_WORD;
9289
9290 add_reg_note (insn, REG_CFA_DEF_CFA,
9291 plus_constant (Pmode, stack_pointer_rtx,
9292 m->fs.cfa_offset));
9293 RTX_FRAME_RELATED_P (insn) = 1;
9294 }
9295 }
9296}
9297
9298/* Emit code to restore REG using a POP2 insn. */
9299static void
9300ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2)
9301{
9302 struct machine_function *m = cfun->machine;
9303 const int offset = UNITS_PER_WORD * 2;
9304
9305 rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9306 stack_pointer_rtx));
9307 rtx_insn *insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9308
9309 RTX_FRAME_RELATED_P (insn) = 1;
9310
9311 rtx dwarf = NULL_RTX;
9312 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9313 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9314 REG_NOTES (insn) = dwarf;
9315 m->fs.sp_offset -= offset;
9316
9317 if (m->fs.cfa_reg == crtl->drap_reg
9318 && (REGNO (reg1) == REGNO (crtl->drap_reg)
9319 || REGNO (reg2) == REGNO (crtl->drap_reg)))
9320 {
9321 /* Previously we'd represented the CFA as an expression
9322 like *(%ebp - 8). We've just popped that value from
9323 the stack, which means we need to reset the CFA to
9324 the drap register. This will remain until we restore
9325 the stack pointer. */
9326 add_reg_note (insn, REG_CFA_DEF_CFA,
9327 REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9328 RTX_FRAME_RELATED_P (insn) = 1;
9329
9330 /* This means that the DRAP register is valid for addressing too. */
9331 m->fs.drap_valid = true;
9332 return;
9333 }
9334
9335 if (m->fs.cfa_reg == stack_pointer_rtx)
9336 {
9337 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9338 x = gen_rtx_SET (stack_pointer_rtx, x);
9339 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9340 RTX_FRAME_RELATED_P (insn) = 1;
9341
9342 m->fs.cfa_offset -= offset;
9343 }
9344
9345 /* When the frame pointer is the CFA, and we pop it, we are
9346 swapping back to the stack pointer as the CFA. This happens
9347 for stack frames that don't allocate other data, so we assume
9348 the stack pointer is now pointing at the return address, i.e.
9349 the function entry state, which makes the offset be 1 word. */
9350 if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9351 {
9352 m->fs.fp_valid = false;
9353 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9354 {
9355 m->fs.cfa_reg = stack_pointer_rtx;
9356 m->fs.cfa_offset -= offset;
9357
9358 add_reg_note (insn, REG_CFA_DEF_CFA,
9359 plus_constant (Pmode, stack_pointer_rtx,
9360 m->fs.cfa_offset));
9361 RTX_FRAME_RELATED_P (insn) = 1;
9362 }
9363 }
9364}
9365
9366/* Emit code to restore saved registers using POP insns. */
9367
9368static void
9369ix86_emit_restore_regs_using_pop (void)
9370{
9371 unsigned int regno;
9372
9373 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9374 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9375 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno));
9376}
9377
9378/* Emit code to restore saved registers using POP2 insns. */
9379
9380static void
9381ix86_emit_restore_regs_using_pop2 (void)
9382{
9383 int regno;
9384 int regno_list[2];
9385 regno_list[0] = regno_list[1] = -1;
9386 int loaded_regnum = 0;
9387 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
9388
9389 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9390 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return: false, ignore_outlined: true))
9391 {
9392 if (aligned)
9393 {
9394 regno_list[loaded_regnum++] = regno;
9395 if (loaded_regnum == 2)
9396 {
9397 gcc_assert (regno_list[0] != -1
9398 && regno_list[1] != -1
9399 && regno_list[0] != regno_list[1]);
9400
9401 ix86_emit_restore_reg_using_pop2 (reg1: gen_rtx_REG (word_mode,
9402 regno_list[0]),
9403 reg2: gen_rtx_REG (word_mode,
9404 regno_list[1]));
9405 loaded_regnum = 0;
9406 regno_list[0] = regno_list[1] = -1;
9407 }
9408 }
9409 else
9410 {
9411 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno));
9412 aligned = true;
9413 }
9414 }
9415
9416 if (loaded_regnum == 1)
9417 ix86_emit_restore_reg_using_pop (reg: gen_rtx_REG (word_mode, regno_list[0]));
9418}
9419
9420/* Emit code and notes for the LEAVE instruction. If insn is non-null,
9421 omits the emit and only attaches the notes. */
9422
9423static void
9424ix86_emit_leave (rtx_insn *insn)
9425{
9426 struct machine_function *m = cfun->machine;
9427
9428 if (!insn)
9429 insn = emit_insn (gen_leave (arg0: word_mode));
9430
9431 ix86_add_queued_cfa_restore_notes (insn);
9432
9433 gcc_assert (m->fs.fp_valid);
9434 m->fs.sp_valid = true;
9435 m->fs.sp_realigned = false;
9436 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9437 m->fs.fp_valid = false;
9438
9439 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9440 {
9441 m->fs.cfa_reg = stack_pointer_rtx;
9442 m->fs.cfa_offset = m->fs.sp_offset;
9443
9444 add_reg_note (insn, REG_CFA_DEF_CFA,
9445 plus_constant (Pmode, stack_pointer_rtx,
9446 m->fs.sp_offset));
9447 RTX_FRAME_RELATED_P (insn) = 1;
9448 }
9449 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9450 cfa_offset: m->fs.fp_offset);
9451}
9452
9453/* Emit code to restore saved registers using MOV insns.
9454 First register is restored from CFA - CFA_OFFSET. */
9455static void
9456ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9457 bool maybe_eh_return)
9458{
9459 struct machine_function *m = cfun->machine;
9460 unsigned int regno;
9461
9462 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9463 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9464 {
9465 rtx reg = gen_rtx_REG (word_mode, regno);
9466 rtx mem;
9467 rtx_insn *insn;
9468
9469 mem = choose_baseaddr (cfa_offset, NULL);
9470 mem = gen_frame_mem (word_mode, mem);
9471 insn = emit_move_insn (reg, mem);
9472
9473 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9474 {
9475 /* Previously we'd represented the CFA as an expression
9476 like *(%ebp - 8). We've just popped that value from
9477 the stack, which means we need to reset the CFA to
9478 the drap register. This will remain until we restore
9479 the stack pointer. */
9480 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9481 RTX_FRAME_RELATED_P (insn) = 1;
9482
9483 /* This means that the DRAP register is valid for addressing. */
9484 m->fs.drap_valid = true;
9485 }
9486 else
9487 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9488
9489 cfa_offset -= UNITS_PER_WORD;
9490 }
9491}
9492
9493/* Emit code to restore saved registers using MOV insns.
9494 First register is restored from CFA - CFA_OFFSET. */
9495static void
9496ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9497 bool maybe_eh_return)
9498{
9499 unsigned int regno;
9500
9501 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9502 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, ignore_outlined: true))
9503 {
9504 rtx reg = gen_rtx_REG (V4SFmode, regno);
9505 rtx mem;
9506 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9507
9508 mem = choose_baseaddr (cfa_offset, align: &align);
9509 mem = gen_rtx_MEM (V4SFmode, mem);
9510
9511 /* The location aligment depends upon the base register. */
9512 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9513 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9514 set_mem_align (mem, align);
9515 emit_insn (gen_rtx_SET (reg, mem));
9516
9517 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9518
9519 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9520 }
9521}
9522
9523static void
9524ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9525 bool use_call, int style)
9526{
9527 struct machine_function *m = cfun->machine;
9528 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9529 + m->call_ms2sysv_extra_regs;
9530 rtvec v;
9531 unsigned int elems_needed, align, i, vi = 0;
9532 rtx_insn *insn;
9533 rtx sym, tmp;
9534 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9535 rtx r10 = NULL_RTX;
9536 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9537 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9538 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9539 rtx rsi_frame_load = NULL_RTX;
9540 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9541 enum xlogue_stub stub;
9542
9543 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9544
9545 /* If using a realigned stack, we should never start with padding. */
9546 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9547
9548 /* Setup RSI as the stub's base pointer. */
9549 align = GET_MODE_ALIGNMENT (V4SFmode);
9550 tmp = choose_baseaddr (cfa_offset: rsi_offset, align: &align, SI_REG);
9551 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9552
9553 emit_insn (gen_rtx_SET (rsi, tmp));
9554
9555 /* Get a symbol for the stub. */
9556 if (frame_pointer_needed)
9557 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9558 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9559 else
9560 stub = use_call ? XLOGUE_STUB_RESTORE
9561 : XLOGUE_STUB_RESTORE_TAIL;
9562 sym = xlogue.get_stub_rtx (stub);
9563
9564 elems_needed = ncregs;
9565 if (use_call)
9566 elems_needed += 1;
9567 else
9568 elems_needed += frame_pointer_needed ? 5 : 3;
9569 v = rtvec_alloc (elems_needed);
9570
9571 /* We call the epilogue stub when we need to pop incoming args or we are
9572 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9573 epilogue stub and it is the tail-call. */
9574 if (use_call)
9575 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9576 else
9577 {
9578 RTVEC_ELT (v, vi++) = ret_rtx;
9579 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9580 if (frame_pointer_needed)
9581 {
9582 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9583 gcc_assert (m->fs.fp_valid);
9584 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9585
9586 tmp = plus_constant (DImode, rbp, 8);
9587 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9588 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9589 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9590 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9591 }
9592 else
9593 {
9594 /* If no hard frame pointer, we set R10 to the SP restore value. */
9595 gcc_assert (!m->fs.fp_valid);
9596 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9597 gcc_assert (m->fs.sp_valid);
9598
9599 r10 = gen_rtx_REG (DImode, R10_REG);
9600 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9601 emit_insn (gen_rtx_SET (r10, tmp));
9602
9603 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9604 }
9605 }
9606
9607 /* Generate frame load insns and restore notes. */
9608 for (i = 0; i < ncregs; ++i)
9609 {
9610 const xlogue_layout::reginfo &r = xlogue.get_reginfo (reg: i);
9611 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9612 rtx reg, frame_load;
9613
9614 reg = gen_rtx_REG (mode, r.regno);
9615 frame_load = gen_frame_load (reg, frame_reg: rsi, offset: r.offset);
9616
9617 /* Save RSI frame load insn & note to add last. */
9618 if (r.regno == SI_REG)
9619 {
9620 gcc_assert (!rsi_frame_load);
9621 rsi_frame_load = frame_load;
9622 rsi_restore_offset = r.offset;
9623 }
9624 else
9625 {
9626 RTVEC_ELT (v, vi++) = frame_load;
9627 ix86_add_cfa_restore_note (NULL, reg, cfa_offset: r.offset);
9628 }
9629 }
9630
9631 /* Add RSI frame load & restore note at the end. */
9632 gcc_assert (rsi_frame_load);
9633 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9634 RTVEC_ELT (v, vi++) = rsi_frame_load;
9635 ix86_add_cfa_restore_note (NULL, reg: gen_rtx_REG (DImode, SI_REG),
9636 cfa_offset: rsi_restore_offset);
9637
9638 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9639 if (!use_call && !frame_pointer_needed)
9640 {
9641 gcc_assert (m->fs.sp_valid);
9642 gcc_assert (!m->fs.sp_realigned);
9643
9644 /* At this point, R10 should point to frame.stack_realign_offset. */
9645 if (m->fs.cfa_reg == stack_pointer_rtx)
9646 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9647 m->fs.sp_offset = frame.stack_realign_offset;
9648 }
9649
9650 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9651 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9652 if (use_call)
9653 insn = emit_insn (tmp);
9654 else
9655 {
9656 insn = emit_jump_insn (tmp);
9657 JUMP_LABEL (insn) = ret_rtx;
9658
9659 if (frame_pointer_needed)
9660 ix86_emit_leave (insn);
9661 else
9662 {
9663 /* Need CFA adjust note. */
9664 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9665 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9666 }
9667 }
9668
9669 RTX_FRAME_RELATED_P (insn) = true;
9670 ix86_add_queued_cfa_restore_notes (insn);
9671
9672 /* If we're not doing a tail-call, we need to adjust the stack. */
9673 if (use_call && m->fs.sp_valid)
9674 {
9675 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9676 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9677 GEN_INT (dealloc), style,
9678 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9679 }
9680}
9681
9682/* Restore function stack, frame, and registers. */
9683
9684void
9685ix86_expand_epilogue (int style)
9686{
9687 struct machine_function *m = cfun->machine;
9688 struct machine_frame_state frame_state_save = m->fs;
9689 bool restore_regs_via_mov;
9690 bool using_drap;
9691 bool restore_stub_is_tail = false;
9692
9693 if (ix86_function_naked (fn: current_function_decl))
9694 {
9695 /* The program should not reach this point. */
9696 emit_insn (gen_ud2 ());
9697 return;
9698 }
9699
9700 ix86_finalize_stack_frame_flags ();
9701 const struct ix86_frame &frame = cfun->machine->frame;
9702
9703 m->fs.sp_realigned = stack_realign_fp;
9704 m->fs.sp_valid = stack_realign_fp
9705 || !frame_pointer_needed
9706 || crtl->sp_is_unchanging;
9707 gcc_assert (!m->fs.sp_valid
9708 || m->fs.sp_offset == frame.stack_pointer_offset);
9709
9710 /* The FP must be valid if the frame pointer is present. */
9711 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9712 gcc_assert (!m->fs.fp_valid
9713 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9714
9715 /* We must have *some* valid pointer to the stack frame. */
9716 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9717
9718 /* The DRAP is never valid at this point. */
9719 gcc_assert (!m->fs.drap_valid);
9720
9721 /* See the comment about red zone and frame
9722 pointer usage in ix86_expand_prologue. */
9723 if (frame_pointer_needed && frame.red_zone_size)
9724 emit_insn (gen_memory_blockage ());
9725
9726 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9727 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9728
9729 /* Determine the CFA offset of the end of the red-zone. */
9730 m->fs.red_zone_offset = 0;
9731 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9732 {
9733 /* The red-zone begins below return address and error code in
9734 exception handler. */
9735 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9736
9737 /* When the register save area is in the aligned portion of
9738 the stack, determine the maximum runtime displacement that
9739 matches up with the aligned frame. */
9740 if (stack_realign_drap)
9741 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9742 + UNITS_PER_WORD);
9743 }
9744
9745 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9746
9747 /* Special care must be taken for the normal return case of a function
9748 using eh_return: the eax and edx registers are marked as saved, but
9749 not restored along this path. Adjust the save location to match. */
9750 if (crtl->calls_eh_return && style != 2)
9751 reg_save_offset -= 2 * UNITS_PER_WORD;
9752
9753 /* EH_RETURN requires the use of moves to function properly. */
9754 if (crtl->calls_eh_return)
9755 restore_regs_via_mov = true;
9756 /* SEH requires the use of pops to identify the epilogue. */
9757 else if (TARGET_SEH)
9758 restore_regs_via_mov = false;
9759 /* If we're only restoring one register and sp cannot be used then
9760 using a move instruction to restore the register since it's
9761 less work than reloading sp and popping the register. */
9762 else if (!sp_valid_at (cfa_offset: frame.hfp_save_offset) && frame.nregs <= 1)
9763 restore_regs_via_mov = true;
9764 else if (TARGET_EPILOGUE_USING_MOVE
9765 && cfun->machine->use_fast_prologue_epilogue
9766 && (frame.nregs > 1
9767 || m->fs.sp_offset != reg_save_offset))
9768 restore_regs_via_mov = true;
9769 else if (frame_pointer_needed
9770 && !frame.nregs
9771 && m->fs.sp_offset != reg_save_offset)
9772 restore_regs_via_mov = true;
9773 else if (frame_pointer_needed
9774 && TARGET_USE_LEAVE
9775 && cfun->machine->use_fast_prologue_epilogue
9776 && frame.nregs == 1)
9777 restore_regs_via_mov = true;
9778 else
9779 restore_regs_via_mov = false;
9780
9781 if (restore_regs_via_mov || frame.nsseregs)
9782 {
9783 /* Ensure that the entire register save area is addressable via
9784 the stack pointer, if we will restore SSE regs via sp. */
9785 if (TARGET_64BIT
9786 && m->fs.sp_offset > 0x7fffffff
9787 && sp_valid_at (cfa_offset: frame.stack_realign_offset + 1)
9788 && (frame.nsseregs + frame.nregs) != 0)
9789 {
9790 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9791 GEN_INT (m->fs.sp_offset
9792 - frame.sse_reg_save_offset),
9793 style,
9794 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9795 }
9796 }
9797
9798 /* If there are any SSE registers to restore, then we have to do it
9799 via moves, since there's obviously no pop for SSE regs. */
9800 if (frame.nsseregs)
9801 ix86_emit_restore_sse_regs_using_mov (cfa_offset: frame.sse_reg_save_offset,
9802 maybe_eh_return: style == 2);
9803
9804 if (m->call_ms2sysv)
9805 {
9806 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9807
9808 /* We cannot use a tail-call for the stub if:
9809 1. We have to pop incoming args,
9810 2. We have additional int regs to restore, or
9811 3. A sibling call will be the tail-call, or
9812 4. We are emitting an eh_return_internal epilogue.
9813
9814 TODO: Item 4 has not yet tested!
9815
9816 If any of the above are true, we will call the stub rather than
9817 jump to it. */
9818 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9819 ix86_emit_outlined_ms2sysv_restore (frame, use_call: !restore_stub_is_tail, style);
9820 }
9821
9822 /* If using out-of-line stub that is a tail-call, then...*/
9823 if (m->call_ms2sysv && restore_stub_is_tail)
9824 {
9825 /* TODO: parinoid tests. (remove eventually) */
9826 gcc_assert (m->fs.sp_valid);
9827 gcc_assert (!m->fs.sp_realigned);
9828 gcc_assert (!m->fs.fp_valid);
9829 gcc_assert (!m->fs.realigned);
9830 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9831 gcc_assert (!crtl->drap_reg);
9832 gcc_assert (!frame.nregs);
9833 }
9834 else if (restore_regs_via_mov)
9835 {
9836 rtx t;
9837
9838 if (frame.nregs)
9839 ix86_emit_restore_regs_using_mov (cfa_offset: reg_save_offset, maybe_eh_return: style == 2);
9840
9841 /* eh_return epilogues need %ecx added to the stack pointer. */
9842 if (style == 2)
9843 {
9844 rtx sa = EH_RETURN_STACKADJ_RTX;
9845 rtx_insn *insn;
9846
9847 /* Stack realignment doesn't work with eh_return. */
9848 if (crtl->stack_realign_needed)
9849 sorry ("Stack realignment not supported with "
9850 "%<__builtin_eh_return%>");
9851
9852 /* regparm nested functions don't work with eh_return. */
9853 if (ix86_static_chain_on_stack)
9854 sorry ("regparm nested function not supported with "
9855 "%<__builtin_eh_return%>");
9856
9857 if (frame_pointer_needed)
9858 {
9859 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9860 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9861 emit_insn (gen_rtx_SET (sa, t));
9862
9863 /* NB: eh_return epilogues must restore the frame pointer
9864 in word_mode since the upper 32 bits of RBP register
9865 can have any values. */
9866 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9867 rtx frame_reg = gen_rtx_REG (word_mode,
9868 HARD_FRAME_POINTER_REGNUM);
9869 insn = emit_move_insn (frame_reg, t);
9870
9871 /* Note that we use SA as a temporary CFA, as the return
9872 address is at the proper place relative to it. We
9873 pretend this happens at the FP restore insn because
9874 prior to this insn the FP would be stored at the wrong
9875 offset relative to SA, and after this insn we have no
9876 other reasonable register to use for the CFA. We don't
9877 bother resetting the CFA to the SP for the duration of
9878 the return insn, unless the control flow instrumentation
9879 is done. In this case the SP is used later and we have
9880 to reset CFA to SP. */
9881 add_reg_note (insn, REG_CFA_DEF_CFA,
9882 plus_constant (Pmode, sa, UNITS_PER_WORD));
9883 ix86_add_queued_cfa_restore_notes (insn);
9884 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9885 RTX_FRAME_RELATED_P (insn) = 1;
9886
9887 m->fs.cfa_reg = sa;
9888 m->fs.cfa_offset = UNITS_PER_WORD;
9889 m->fs.fp_valid = false;
9890
9891 pro_epilogue_adjust_stack (stack_pointer_rtx, src: sa,
9892 const0_rtx, style,
9893 flag_cf_protection);
9894 }
9895 else
9896 {
9897 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9898 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9899 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9900 ix86_add_queued_cfa_restore_notes (insn);
9901
9902 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9903 if (m->fs.cfa_offset != UNITS_PER_WORD)
9904 {
9905 m->fs.cfa_offset = UNITS_PER_WORD;
9906 add_reg_note (insn, REG_CFA_DEF_CFA,
9907 plus_constant (Pmode, stack_pointer_rtx,
9908 UNITS_PER_WORD));
9909 RTX_FRAME_RELATED_P (insn) = 1;
9910 }
9911 }
9912 m->fs.sp_offset = UNITS_PER_WORD;
9913 m->fs.sp_valid = true;
9914 m->fs.sp_realigned = false;
9915 }
9916 }
9917 else
9918 {
9919 /* SEH requires that the function end with (1) a stack adjustment
9920 if necessary, (2) a sequence of pops, and (3) a return or
9921 jump instruction. Prevent insns from the function body from
9922 being scheduled into this sequence. */
9923 if (TARGET_SEH)
9924 {
9925 /* Prevent a catch region from being adjacent to the standard
9926 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9927 nor several other flags that would be interesting to test are
9928 set up yet. */
9929 if (flag_non_call_exceptions)
9930 emit_insn (gen_nops (const1_rtx));
9931 else
9932 emit_insn (gen_blockage ());
9933 }
9934
9935 /* First step is to deallocate the stack frame so that we can
9936 pop the registers. If the stack pointer was realigned, it needs
9937 to be restored now. Also do it on SEH target for very large
9938 frame as the emitted instructions aren't allowed by the ABI
9939 in epilogues. */
9940 if (!m->fs.sp_valid || m->fs.sp_realigned
9941 || (TARGET_SEH
9942 && (m->fs.sp_offset - reg_save_offset
9943 >= SEH_MAX_FRAME_SIZE)))
9944 {
9945 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9946 GEN_INT (m->fs.fp_offset
9947 - reg_save_offset),
9948 style, set_cfa: false);
9949 }
9950 else if (m->fs.sp_offset != reg_save_offset)
9951 {
9952 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9953 GEN_INT (m->fs.sp_offset
9954 - reg_save_offset),
9955 style,
9956 set_cfa: m->fs.cfa_reg == stack_pointer_rtx);
9957 }
9958
9959 if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL)
9960 ix86_emit_restore_regs_using_pop2 ();
9961 else
9962 ix86_emit_restore_regs_using_pop ();
9963 }
9964
9965 /* If we used a stack pointer and haven't already got rid of it,
9966 then do so now. */
9967 if (m->fs.fp_valid)
9968 {
9969 /* If the stack pointer is valid and pointing at the frame
9970 pointer store address, then we only need a pop. */
9971 if (sp_valid_at (cfa_offset: frame.hfp_save_offset)
9972 && m->fs.sp_offset == frame.hfp_save_offset)
9973 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9974 /* Leave results in shorter dependency chains on CPUs that are
9975 able to grok it fast. */
9976 else if (TARGET_USE_LEAVE
9977 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9978 || !cfun->machine->use_fast_prologue_epilogue)
9979 ix86_emit_leave (NULL);
9980 else
9981 {
9982 pro_epilogue_adjust_stack (stack_pointer_rtx,
9983 hard_frame_pointer_rtx,
9984 const0_rtx, style, set_cfa: !using_drap);
9985 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9986 }
9987 }
9988
9989 if (using_drap)
9990 {
9991 int param_ptr_offset = UNITS_PER_WORD;
9992 rtx_insn *insn;
9993
9994 gcc_assert (stack_realign_drap);
9995
9996 if (ix86_static_chain_on_stack)
9997 param_ptr_offset += UNITS_PER_WORD;
9998 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9999 param_ptr_offset += UNITS_PER_WORD;
10000
10001 insn = emit_insn (gen_rtx_SET
10002 (stack_pointer_rtx,
10003 plus_constant (Pmode, crtl->drap_reg,
10004 -param_ptr_offset)));
10005 m->fs.cfa_reg = stack_pointer_rtx;
10006 m->fs.cfa_offset = param_ptr_offset;
10007 m->fs.sp_offset = param_ptr_offset;
10008 m->fs.realigned = false;
10009
10010 add_reg_note (insn, REG_CFA_DEF_CFA,
10011 plus_constant (Pmode, stack_pointer_rtx,
10012 param_ptr_offset));
10013 RTX_FRAME_RELATED_P (insn) = 1;
10014
10015 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10016 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10017 }
10018
10019 /* At this point the stack pointer must be valid, and we must have
10020 restored all of the registers. We may not have deallocated the
10021 entire stack frame. We've delayed this until now because it may
10022 be possible to merge the local stack deallocation with the
10023 deallocation forced by ix86_static_chain_on_stack. */
10024 gcc_assert (m->fs.sp_valid);
10025 gcc_assert (!m->fs.sp_realigned);
10026 gcc_assert (!m->fs.fp_valid);
10027 gcc_assert (!m->fs.realigned);
10028 if (m->fs.sp_offset != UNITS_PER_WORD)
10029 {
10030 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10031 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10032 style, set_cfa: true);
10033 }
10034 else
10035 ix86_add_queued_cfa_restore_notes (insn: get_last_insn ());
10036
10037 /* Sibcall epilogues don't want a return instruction. */
10038 if (style == 0)
10039 {
10040 m->fs = frame_state_save;
10041 return;
10042 }
10043
10044 if (cfun->machine->func_type != TYPE_NORMAL)
10045 emit_jump_insn (gen_interrupt_return ());
10046 else if (crtl->args.pops_args && crtl->args.size)
10047 {
10048 rtx popc = GEN_INT (crtl->args.pops_args);
10049
10050 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10051 address, do explicit add, and jump indirectly to the caller. */
10052
10053 if (crtl->args.pops_args >= 65536)
10054 {
10055 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10056 rtx_insn *insn;
10057
10058 /* There is no "pascal" calling convention in any 64bit ABI. */
10059 gcc_assert (!TARGET_64BIT);
10060
10061 insn = emit_insn (gen_pop (arg: ecx));
10062 m->fs.cfa_offset -= UNITS_PER_WORD;
10063 m->fs.sp_offset -= UNITS_PER_WORD;
10064
10065 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10066 x = gen_rtx_SET (stack_pointer_rtx, x);
10067 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10068 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10069 RTX_FRAME_RELATED_P (insn) = 1;
10070
10071 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10072 offset: popc, style: -1, set_cfa: true);
10073 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10074 }
10075 else
10076 emit_jump_insn (gen_simple_return_pop_internal (popc));
10077 }
10078 else if (!m->call_ms2sysv || !restore_stub_is_tail)
10079 {
10080 /* In case of return from EH a simple return cannot be used
10081 as a return address will be compared with a shadow stack
10082 return address. Use indirect jump instead. */
10083 if (style == 2 && flag_cf_protection)
10084 {
10085 /* Register used in indirect jump must be in word_mode. But
10086 Pmode may not be the same as word_mode for x32. */
10087 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10088 rtx_insn *insn;
10089
10090 insn = emit_insn (gen_pop (arg: ecx));
10091 m->fs.cfa_offset -= UNITS_PER_WORD;
10092 m->fs.sp_offset -= UNITS_PER_WORD;
10093
10094 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10095 x = gen_rtx_SET (stack_pointer_rtx, x);
10096 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10097 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10098 RTX_FRAME_RELATED_P (insn) = 1;
10099
10100 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10101 }
10102 else
10103 emit_jump_insn (gen_simple_return_internal ());
10104 }
10105
10106 /* Restore the state back to the state from the prologue,
10107 so that it's correct for the next epilogue. */
10108 m->fs = frame_state_save;
10109}
10110
10111/* Reset from the function's potential modifications. */
10112
10113static void
10114ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10115{
10116 if (pic_offset_table_rtx
10117 && !ix86_use_pseudo_pic_reg ())
10118 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10119
10120 if (TARGET_MACHO)
10121 {
10122 rtx_insn *insn = get_last_insn ();
10123 rtx_insn *deleted_debug_label = NULL;
10124
10125 /* Mach-O doesn't support labels at the end of objects, so if
10126 it looks like we might want one, take special action.
10127 First, collect any sequence of deleted debug labels. */
10128 while (insn
10129 && NOTE_P (insn)
10130 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10131 {
10132 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10133 notes only, instead set their CODE_LABEL_NUMBER to -1,
10134 otherwise there would be code generation differences
10135 in between -g and -g0. */
10136 if (NOTE_P (insn) && NOTE_KIND (insn)
10137 == NOTE_INSN_DELETED_DEBUG_LABEL)
10138 deleted_debug_label = insn;
10139 insn = PREV_INSN (insn);
10140 }
10141
10142 /* If we have:
10143 label:
10144 barrier
10145 then this needs to be detected, so skip past the barrier. */
10146
10147 if (insn && BARRIER_P (insn))
10148 insn = PREV_INSN (insn);
10149
10150 /* Up to now we've only seen notes or barriers. */
10151 if (insn)
10152 {
10153 if (LABEL_P (insn)
10154 || (NOTE_P (insn)
10155 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10156 /* Trailing label. */
10157 fputs (s: "\tnop\n", stream: file);
10158 else if (cfun && ! cfun->is_thunk)
10159 {
10160 /* See if we have a completely empty function body, skipping
10161 the special case of the picbase thunk emitted as asm. */
10162 while (insn && ! INSN_P (insn))
10163 insn = PREV_INSN (insn);
10164 /* If we don't find any insns, we've got an empty function body;
10165 I.e. completely empty - without a return or branch. This is
10166 taken as the case where a function body has been removed
10167 because it contains an inline __builtin_unreachable(). GCC
10168 declares that reaching __builtin_unreachable() means UB so
10169 we're not obliged to do anything special; however, we want
10170 non-zero-sized function bodies. To meet this, and help the
10171 user out, let's trap the case. */
10172 if (insn == NULL)
10173 fputs (s: "\tud2\n", stream: file);
10174 }
10175 }
10176 else if (deleted_debug_label)
10177 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10178 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10179 CODE_LABEL_NUMBER (insn) = -1;
10180 }
10181}
10182
10183/* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10184
10185void
10186ix86_print_patchable_function_entry (FILE *file,
10187 unsigned HOST_WIDE_INT patch_area_size,
10188 bool record_p)
10189{
10190 if (cfun->machine->function_label_emitted)
10191 {
10192 /* NB: When ix86_print_patchable_function_entry is called after
10193 function table has been emitted, we have inserted or queued
10194 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10195 place. There is nothing to do here. */
10196 return;
10197 }
10198
10199 default_print_patchable_function_entry (file, patch_area_size,
10200 record_p);
10201}
10202
10203/* Output patchable area. NB: default_print_patchable_function_entry
10204 isn't available in i386.md. */
10205
10206void
10207ix86_output_patchable_area (unsigned int patch_area_size,
10208 bool record_p)
10209{
10210 default_print_patchable_function_entry (asm_out_file,
10211 patch_area_size,
10212 record_p);
10213}
10214
10215/* Return a scratch register to use in the split stack prologue. The
10216 split stack prologue is used for -fsplit-stack. It is the first
10217 instructions in the function, even before the regular prologue.
10218 The scratch register can be any caller-saved register which is not
10219 used for parameters or for the static chain. */
10220
10221static unsigned int
10222split_stack_prologue_scratch_regno (void)
10223{
10224 if (TARGET_64BIT)
10225 return R11_REG;
10226 else
10227 {
10228 bool is_fastcall, is_thiscall;
10229 int regparm;
10230
10231 is_fastcall = (lookup_attribute (attr_name: "fastcall",
10232 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10233 != NULL);
10234 is_thiscall = (lookup_attribute (attr_name: "thiscall",
10235 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10236 != NULL);
10237 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10238
10239 if (is_fastcall)
10240 {
10241 if (DECL_STATIC_CHAIN (cfun->decl))
10242 {
10243 sorry ("%<-fsplit-stack%> does not support fastcall with "
10244 "nested function");
10245 return INVALID_REGNUM;
10246 }
10247 return AX_REG;
10248 }
10249 else if (is_thiscall)
10250 {
10251 if (!DECL_STATIC_CHAIN (cfun->decl))
10252 return DX_REG;
10253 return AX_REG;
10254 }
10255 else if (regparm < 3)
10256 {
10257 if (!DECL_STATIC_CHAIN (cfun->decl))
10258 return CX_REG;
10259 else
10260 {
10261 if (regparm >= 2)
10262 {
10263 sorry ("%<-fsplit-stack%> does not support 2 register "
10264 "parameters for a nested function");
10265 return INVALID_REGNUM;
10266 }
10267 return DX_REG;
10268 }
10269 }
10270 else
10271 {
10272 /* FIXME: We could make this work by pushing a register
10273 around the addition and comparison. */
10274 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10275 return INVALID_REGNUM;
10276 }
10277 }
10278}
10279
10280/* A SYMBOL_REF for the function which allocates new stackspace for
10281 -fsplit-stack. */
10282
10283static GTY(()) rtx split_stack_fn;
10284
10285/* A SYMBOL_REF for the more stack function when using the large
10286 model. */
10287
10288static GTY(()) rtx split_stack_fn_large;
10289
10290/* Return location of the stack guard value in the TLS block. */
10291
10292rtx
10293ix86_split_stack_guard (void)
10294{
10295 int offset;
10296 addr_space_t as = DEFAULT_TLS_SEG_REG;
10297 rtx r;
10298
10299 gcc_assert (flag_split_stack);
10300
10301#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10302 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10303#else
10304 gcc_unreachable ();
10305#endif
10306
10307 r = GEN_INT (offset);
10308 r = gen_const_mem (Pmode, r);
10309 set_mem_addr_space (r, as);
10310
10311 return r;
10312}
10313
10314/* Handle -fsplit-stack. These are the first instructions in the
10315 function, even before the regular prologue. */
10316
10317void
10318ix86_expand_split_stack_prologue (void)
10319{
10320 HOST_WIDE_INT allocate;
10321 unsigned HOST_WIDE_INT args_size;
10322 rtx_code_label *label;
10323 rtx limit, current, allocate_rtx, call_fusage;
10324 rtx_insn *call_insn;
10325 rtx scratch_reg = NULL_RTX;
10326 rtx_code_label *varargs_label = NULL;
10327 rtx fn;
10328
10329 gcc_assert (flag_split_stack && reload_completed);
10330
10331 ix86_finalize_stack_frame_flags ();
10332 struct ix86_frame &frame = cfun->machine->frame;
10333 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10334
10335 /* This is the label we will branch to if we have enough stack
10336 space. We expect the basic block reordering pass to reverse this
10337 branch if optimizing, so that we branch in the unlikely case. */
10338 label = gen_label_rtx ();
10339
10340 /* We need to compare the stack pointer minus the frame size with
10341 the stack boundary in the TCB. The stack boundary always gives
10342 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10343 can compare directly. Otherwise we need to do an addition. */
10344
10345 limit = ix86_split_stack_guard ();
10346
10347 if (allocate < SPLIT_STACK_AVAILABLE)
10348 current = stack_pointer_rtx;
10349 else
10350 {
10351 unsigned int scratch_regno;
10352 rtx offset;
10353
10354 /* We need a scratch register to hold the stack pointer minus
10355 the required frame size. Since this is the very start of the
10356 function, the scratch register can be any caller-saved
10357 register which is not used for parameters. */
10358 offset = GEN_INT (- allocate);
10359 scratch_regno = split_stack_prologue_scratch_regno ();
10360 if (scratch_regno == INVALID_REGNUM)
10361 return;
10362 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10363 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10364 {
10365 /* We don't use gen_add in this case because it will
10366 want to split to lea, but when not optimizing the insn
10367 will not be split after this point. */
10368 emit_insn (gen_rtx_SET (scratch_reg,
10369 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10370 offset)));
10371 }
10372 else
10373 {
10374 emit_move_insn (scratch_reg, offset);
10375 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10376 }
10377 current = scratch_reg;
10378 }
10379
10380 ix86_expand_branch (GEU, current, limit, label);
10381 rtx_insn *jump_insn = get_last_insn ();
10382 JUMP_LABEL (jump_insn) = label;
10383
10384 /* Mark the jump as very likely to be taken. */
10385 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10386
10387 if (split_stack_fn == NULL_RTX)
10388 {
10389 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10390 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10391 }
10392 fn = split_stack_fn;
10393
10394 /* Get more stack space. We pass in the desired stack space and the
10395 size of the arguments to copy to the new stack. In 32-bit mode
10396 we push the parameters; __morestack will return on a new stack
10397 anyhow. In 64-bit mode we pass the parameters in r10 and
10398 r11. */
10399 allocate_rtx = GEN_INT (allocate);
10400 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10401 call_fusage = NULL_RTX;
10402 rtx pop = NULL_RTX;
10403 if (TARGET_64BIT)
10404 {
10405 rtx reg10, reg11;
10406
10407 reg10 = gen_rtx_REG (Pmode, R10_REG);
10408 reg11 = gen_rtx_REG (Pmode, R11_REG);
10409
10410 /* If this function uses a static chain, it will be in %r10.
10411 Preserve it across the call to __morestack. */
10412 if (DECL_STATIC_CHAIN (cfun->decl))
10413 {
10414 rtx rax;
10415
10416 rax = gen_rtx_REG (word_mode, AX_REG);
10417 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10418 use_reg (fusage: &call_fusage, reg: rax);
10419 }
10420
10421 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10422 && !TARGET_PECOFF)
10423 {
10424 HOST_WIDE_INT argval;
10425
10426 gcc_assert (Pmode == DImode);
10427 /* When using the large model we need to load the address
10428 into a register, and we've run out of registers. So we
10429 switch to a different calling convention, and we call a
10430 different function: __morestack_large. We pass the
10431 argument size in the upper 32 bits of r10 and pass the
10432 frame size in the lower 32 bits. */
10433 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10434 gcc_assert ((args_size & 0xffffffff) == args_size);
10435
10436 if (split_stack_fn_large == NULL_RTX)
10437 {
10438 split_stack_fn_large
10439 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10440 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10441 }
10442 if (ix86_cmodel == CM_LARGE_PIC)
10443 {
10444 rtx_code_label *label;
10445 rtx x;
10446
10447 label = gen_label_rtx ();
10448 emit_label (label);
10449 LABEL_PRESERVE_P (label) = 1;
10450 emit_insn (gen_set_rip_rex64 (reg10, label));
10451 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10452 emit_insn (gen_add2_insn (reg10, reg11));
10453 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10454 UNSPEC_GOT);
10455 x = gen_rtx_CONST (Pmode, x);
10456 emit_move_insn (reg11, x);
10457 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10458 x = gen_const_mem (Pmode, x);
10459 emit_move_insn (reg11, x);
10460 }
10461 else
10462 emit_move_insn (reg11, split_stack_fn_large);
10463
10464 fn = reg11;
10465
10466 argval = ((args_size << 16) << 16) + allocate;
10467 emit_move_insn (reg10, GEN_INT (argval));
10468 }
10469 else
10470 {
10471 emit_move_insn (reg10, allocate_rtx);
10472 emit_move_insn (reg11, GEN_INT (args_size));
10473 use_reg (fusage: &call_fusage, reg: reg11);
10474 }
10475
10476 use_reg (fusage: &call_fusage, reg: reg10);
10477 }
10478 else
10479 {
10480 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10481 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10482 insn = emit_insn (gen_push (arg: allocate_rtx));
10483 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10484 pop = GEN_INT (2 * UNITS_PER_WORD);
10485 }
10486 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10487 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10488 pop, false);
10489 add_function_usage_to (call_insn, call_fusage);
10490 if (!TARGET_64BIT)
10491 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10492 /* Indicate that this function can't jump to non-local gotos. */
10493 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10494
10495 /* In order to make call/return prediction work right, we now need
10496 to execute a return instruction. See
10497 libgcc/config/i386/morestack.S for the details on how this works.
10498
10499 For flow purposes gcc must not see this as a return
10500 instruction--we need control flow to continue at the subsequent
10501 label. Therefore, we use an unspec. */
10502 gcc_assert (crtl->args.pops_args < 65536);
10503 rtx_insn *ret_insn
10504 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10505
10506 if ((flag_cf_protection & CF_BRANCH))
10507 {
10508 /* Insert ENDBR since __morestack will jump back here via indirect
10509 call. */
10510 rtx cet_eb = gen_nop_endbr ();
10511 emit_insn_after (cet_eb, ret_insn);
10512 }
10513
10514 /* If we are in 64-bit mode and this function uses a static chain,
10515 we saved %r10 in %rax before calling _morestack. */
10516 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10517 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10518 gen_rtx_REG (word_mode, AX_REG));
10519
10520 /* If this function calls va_start, we need to store a pointer to
10521 the arguments on the old stack, because they may not have been
10522 all copied to the new stack. At this point the old stack can be
10523 found at the frame pointer value used by __morestack, because
10524 __morestack has set that up before calling back to us. Here we
10525 store that pointer in a scratch register, and in
10526 ix86_expand_prologue we store the scratch register in a stack
10527 slot. */
10528 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10529 {
10530 unsigned int scratch_regno;
10531 rtx frame_reg;
10532 int words;
10533
10534 scratch_regno = split_stack_prologue_scratch_regno ();
10535 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10536 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10537
10538 /* 64-bit:
10539 fp -> old fp value
10540 return address within this function
10541 return address of caller of this function
10542 stack arguments
10543 So we add three words to get to the stack arguments.
10544
10545 32-bit:
10546 fp -> old fp value
10547 return address within this function
10548 first argument to __morestack
10549 second argument to __morestack
10550 return address of caller of this function
10551 stack arguments
10552 So we add five words to get to the stack arguments.
10553 */
10554 words = TARGET_64BIT ? 3 : 5;
10555 emit_insn (gen_rtx_SET (scratch_reg,
10556 plus_constant (Pmode, frame_reg,
10557 words * UNITS_PER_WORD)));
10558
10559 varargs_label = gen_label_rtx ();
10560 emit_jump_insn (gen_jump (varargs_label));
10561 JUMP_LABEL (get_last_insn ()) = varargs_label;
10562
10563 emit_barrier ();
10564 }
10565
10566 emit_label (label);
10567 LABEL_NUSES (label) = 1;
10568
10569 /* If this function calls va_start, we now have to set the scratch
10570 register for the case where we do not call __morestack. In this
10571 case we need to set it based on the stack pointer. */
10572 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10573 {
10574 emit_insn (gen_rtx_SET (scratch_reg,
10575 plus_constant (Pmode, stack_pointer_rtx,
10576 UNITS_PER_WORD)));
10577
10578 emit_label (varargs_label);
10579 LABEL_NUSES (varargs_label) = 1;
10580 }
10581}
10582
10583/* We may have to tell the dataflow pass that the split stack prologue
10584 is initializing a scratch register. */
10585
10586static void
10587ix86_live_on_entry (bitmap regs)
10588{
10589 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10590 {
10591 gcc_assert (flag_split_stack);
10592 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10593 }
10594}
10595
10596/* Extract the parts of an RTL expression that is a valid memory address
10597 for an instruction. Return false if the structure of the address is
10598 grossly off. */
10599
10600bool
10601ix86_decompose_address (rtx addr, struct ix86_address *out)
10602{
10603 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10604 rtx base_reg, index_reg;
10605 HOST_WIDE_INT scale = 1;
10606 rtx scale_rtx = NULL_RTX;
10607 rtx tmp;
10608 addr_space_t seg = ADDR_SPACE_GENERIC;
10609
10610 /* Allow zero-extended SImode addresses,
10611 they will be emitted with addr32 prefix. */
10612 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10613 {
10614 if (GET_CODE (addr) == ZERO_EXTEND
10615 && GET_MODE (XEXP (addr, 0)) == SImode)
10616 {
10617 addr = XEXP (addr, 0);
10618 if (CONST_INT_P (addr))
10619 return false;
10620 }
10621 else if (GET_CODE (addr) == AND
10622 && const_32bit_mask (XEXP (addr, 1), DImode))
10623 {
10624 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
10625 if (addr == NULL_RTX)
10626 return false;
10627
10628 if (CONST_INT_P (addr))
10629 return false;
10630 }
10631 else if (GET_CODE (addr) == AND)
10632 {
10633 /* For ASHIFT inside AND, combine will not generate
10634 canonical zero-extend. Merge mask for AND and shift_count
10635 to check if it is canonical zero-extend. */
10636 tmp = XEXP (addr, 0);
10637 rtx mask = XEXP (addr, 1);
10638 if (tmp && GET_CODE(tmp) == ASHIFT)
10639 {
10640 rtx shift_val = XEXP (tmp, 1);
10641 if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
10642 && (((unsigned HOST_WIDE_INT) INTVAL(mask)
10643 | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
10644 == 0xffffffff))
10645 {
10646 addr = lowpart_subreg (SImode, XEXP (addr, 0),
10647 DImode);
10648 }
10649 }
10650
10651 }
10652 }
10653
10654 /* Allow SImode subregs of DImode addresses,
10655 they will be emitted with addr32 prefix. */
10656 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10657 {
10658 if (SUBREG_P (addr)
10659 && GET_MODE (SUBREG_REG (addr)) == DImode)
10660 {
10661 addr = SUBREG_REG (addr);
10662 if (CONST_INT_P (addr))
10663 return false;
10664 }
10665 }
10666
10667 if (REG_P (addr))
10668 base = addr;
10669 else if (SUBREG_P (addr))
10670 {
10671 if (REG_P (SUBREG_REG (addr)))
10672 base = addr;
10673 else
10674 return false;
10675 }
10676 else if (GET_CODE (addr) == PLUS)
10677 {
10678 rtx addends[4], op;
10679 int n = 0, i;
10680
10681 op = addr;
10682 do
10683 {
10684 if (n >= 4)
10685 return false;
10686 addends[n++] = XEXP (op, 1);
10687 op = XEXP (op, 0);
10688 }
10689 while (GET_CODE (op) == PLUS);
10690 if (n >= 4)
10691 return false;
10692 addends[n] = op;
10693
10694 for (i = n; i >= 0; --i)
10695 {
10696 op = addends[i];
10697 switch (GET_CODE (op))
10698 {
10699 case MULT:
10700 if (index)
10701 return false;
10702 index = XEXP (op, 0);
10703 scale_rtx = XEXP (op, 1);
10704 break;
10705
10706 case ASHIFT:
10707 if (index)
10708 return false;
10709 index = XEXP (op, 0);
10710 tmp = XEXP (op, 1);
10711 if (!CONST_INT_P (tmp))
10712 return false;
10713 scale = INTVAL (tmp);
10714 if ((unsigned HOST_WIDE_INT) scale > 3)
10715 return false;
10716 scale = 1 << scale;
10717 break;
10718
10719 case ZERO_EXTEND:
10720 op = XEXP (op, 0);
10721 if (GET_CODE (op) != UNSPEC)
10722 return false;
10723 /* FALLTHRU */
10724
10725 case UNSPEC:
10726 if (XINT (op, 1) == UNSPEC_TP
10727 && TARGET_TLS_DIRECT_SEG_REFS
10728 && seg == ADDR_SPACE_GENERIC)
10729 seg = DEFAULT_TLS_SEG_REG;
10730 else
10731 return false;
10732 break;
10733
10734 case SUBREG:
10735 if (!REG_P (SUBREG_REG (op)))
10736 return false;
10737 /* FALLTHRU */
10738
10739 case REG:
10740 if (!base)
10741 base = op;
10742 else if (!index)
10743 index = op;
10744 else
10745 return false;
10746 break;
10747
10748 case CONST:
10749 case CONST_INT:
10750 case SYMBOL_REF:
10751 case LABEL_REF:
10752 if (disp)
10753 return false;
10754 disp = op;
10755 break;
10756
10757 default:
10758 return false;
10759 }
10760 }
10761 }
10762 else if (GET_CODE (addr) == MULT)
10763 {
10764 index = XEXP (addr, 0); /* index*scale */
10765 scale_rtx = XEXP (addr, 1);
10766 }
10767 else if (GET_CODE (addr) == ASHIFT)
10768 {
10769 /* We're called for lea too, which implements ashift on occasion. */
10770 index = XEXP (addr, 0);
10771 tmp = XEXP (addr, 1);
10772 if (!CONST_INT_P (tmp))
10773 return false;
10774 scale = INTVAL (tmp);
10775 if ((unsigned HOST_WIDE_INT) scale > 3)
10776 return false;
10777 scale = 1 << scale;
10778 }
10779 else
10780 disp = addr; /* displacement */
10781
10782 if (index)
10783 {
10784 if (REG_P (index))
10785 ;
10786 else if (SUBREG_P (index)
10787 && REG_P (SUBREG_REG (index)))
10788 ;
10789 else
10790 return false;
10791 }
10792
10793 /* Extract the integral value of scale. */
10794 if (scale_rtx)
10795 {
10796 if (!CONST_INT_P (scale_rtx))
10797 return false;
10798 scale = INTVAL (scale_rtx);
10799 }
10800
10801 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10802 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10803
10804 /* Avoid useless 0 displacement. */
10805 if (disp == const0_rtx && (base || index))
10806 disp = NULL_RTX;
10807
10808 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10809 if (base_reg && index_reg && scale == 1
10810 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10811 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10812 || REGNO (index_reg) == SP_REG))
10813 {
10814 std::swap (a&: base, b&: index);
10815 std::swap (a&: base_reg, b&: index_reg);
10816 }
10817
10818 /* Special case: %ebp cannot be encoded as a base without a displacement.
10819 Similarly %r13. */
10820 if (!disp && base_reg
10821 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10822 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10823 || REGNO (base_reg) == BP_REG
10824 || REGNO (base_reg) == R13_REG))
10825 disp = const0_rtx;
10826
10827 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10828 Avoid this by transforming to [%esi+0].
10829 Reload calls address legitimization without cfun defined, so we need
10830 to test cfun for being non-NULL. */
10831 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
10832 && base_reg && !index_reg && !disp
10833 && REGNO (base_reg) == SI_REG)
10834 disp = const0_rtx;
10835
10836 /* Special case: encode reg+reg instead of reg*2. */
10837 if (!base && index && scale == 2)
10838 base = index, base_reg = index_reg, scale = 1;
10839
10840 /* Special case: scaling cannot be encoded without base or displacement. */
10841 if (!base && !disp && index && scale != 1)
10842 disp = const0_rtx;
10843
10844 out->base = base;
10845 out->index = index;
10846 out->disp = disp;
10847 out->scale = scale;
10848 out->seg = seg;
10849
10850 return true;
10851}
10852
10853/* Return cost of the memory address x.
10854 For i386, it is better to use a complex address than let gcc copy
10855 the address into a reg and make a new pseudo. But not if the address
10856 requires to two regs - that would mean more pseudos with longer
10857 lifetimes. */
10858static int
10859ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10860{
10861 struct ix86_address parts;
10862 int cost = 1;
10863 int ok = ix86_decompose_address (addr: x, out: &parts);
10864
10865 gcc_assert (ok);
10866
10867 if (parts.base && SUBREG_P (parts.base))
10868 parts.base = SUBREG_REG (parts.base);
10869 if (parts.index && SUBREG_P (parts.index))
10870 parts.index = SUBREG_REG (parts.index);
10871
10872 /* Attempt to minimize number of registers in the address by increasing
10873 address cost for each used register. We don't increase address cost
10874 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10875 is not invariant itself it most likely means that base or index is not
10876 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10877 which is not profitable for x86. */
10878 if (parts.base
10879 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10880 && (current_pass->type == GIMPLE_PASS
10881 || !pic_offset_table_rtx
10882 || !REG_P (parts.base)
10883 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10884 cost++;
10885
10886 if (parts.index
10887 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10888 && (current_pass->type == GIMPLE_PASS
10889 || !pic_offset_table_rtx
10890 || !REG_P (parts.index)
10891 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10892 cost++;
10893
10894 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10895 since it's predecode logic can't detect the length of instructions
10896 and it degenerates to vector decoded. Increase cost of such
10897 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10898 to split such addresses or even refuse such addresses at all.
10899
10900 Following addressing modes are affected:
10901 [base+scale*index]
10902 [scale*index+disp]
10903 [base+index]
10904
10905 The first and last case may be avoidable by explicitly coding the zero in
10906 memory address, but I don't have AMD-K6 machine handy to check this
10907 theory. */
10908
10909 if (TARGET_CPU_P (K6)
10910 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10911 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10912 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10913 cost += 10;
10914
10915 return cost;
10916}
10917
10918/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10919 this is used for to form addresses to local data when -fPIC is in
10920 use. */
10921
10922static bool
10923darwin_local_data_pic (rtx disp)
10924{
10925 return (GET_CODE (disp) == UNSPEC
10926 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10927}
10928
10929/* True if the function symbol operand X should be loaded from GOT.
10930 If CALL_P is true, X is a call operand.
10931
10932 NB: -mno-direct-extern-access doesn't force load from GOT for
10933 call.
10934
10935 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
10936 statements, since a PIC register could not be available at the
10937 call site. */
10938
10939bool
10940ix86_force_load_from_GOT_p (rtx x, bool call_p)
10941{
10942 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
10943 && !TARGET_PECOFF && !TARGET_MACHO
10944 && (!flag_pic || this_is_asm_operands)
10945 && ix86_cmodel != CM_LARGE
10946 && ix86_cmodel != CM_LARGE_PIC
10947 && GET_CODE (x) == SYMBOL_REF
10948 && ((!call_p
10949 && (!ix86_direct_extern_access
10950 || (SYMBOL_REF_DECL (x)
10951 && lookup_attribute (attr_name: "nodirect_extern_access",
10952 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
10953 || (SYMBOL_REF_FUNCTION_P (x)
10954 && (!flag_plt
10955 || (SYMBOL_REF_DECL (x)
10956 && lookup_attribute (attr_name: "noplt",
10957 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
10958 && !SYMBOL_REF_LOCAL_P (x));
10959}
10960
10961/* Determine if a given RTX is a valid constant. We already know this
10962 satisfies CONSTANT_P. */
10963
10964static bool
10965ix86_legitimate_constant_p (machine_mode mode, rtx x)
10966{
10967 switch (GET_CODE (x))
10968 {
10969 case CONST:
10970 x = XEXP (x, 0);
10971
10972 if (GET_CODE (x) == PLUS)
10973 {
10974 if (!CONST_INT_P (XEXP (x, 1)))
10975 return false;
10976 x = XEXP (x, 0);
10977 }
10978
10979 if (TARGET_MACHO && darwin_local_data_pic (disp: x))
10980 return true;
10981
10982 /* Only some unspecs are valid as "constants". */
10983 if (GET_CODE (x) == UNSPEC)
10984 switch (XINT (x, 1))
10985 {
10986 case UNSPEC_GOT:
10987 case UNSPEC_GOTOFF:
10988 case UNSPEC_PLTOFF:
10989 return TARGET_64BIT;
10990 case UNSPEC_TPOFF:
10991 case UNSPEC_NTPOFF:
10992 x = XVECEXP (x, 0, 0);
10993 return (GET_CODE (x) == SYMBOL_REF
10994 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10995 case UNSPEC_DTPOFF:
10996 x = XVECEXP (x, 0, 0);
10997 return (GET_CODE (x) == SYMBOL_REF
10998 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10999 default:
11000 return false;
11001 }
11002
11003 /* We must have drilled down to a symbol. */
11004 if (GET_CODE (x) == LABEL_REF)
11005 return true;
11006 if (GET_CODE (x) != SYMBOL_REF)
11007 return false;
11008 /* FALLTHRU */
11009
11010 case SYMBOL_REF:
11011 /* TLS symbols are never valid. */
11012 if (SYMBOL_REF_TLS_MODEL (x))
11013 return false;
11014
11015 /* DLLIMPORT symbols are never valid. */
11016 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11017 && SYMBOL_REF_DLLIMPORT_P (x))
11018 return false;
11019
11020#if TARGET_MACHO
11021 /* mdynamic-no-pic */
11022 if (MACHO_DYNAMIC_NO_PIC_P)
11023 return machopic_symbol_defined_p (x);
11024#endif
11025
11026 /* External function address should be loaded
11027 via the GOT slot to avoid PLT. */
11028 if (ix86_force_load_from_GOT_p (x))
11029 return false;
11030
11031 break;
11032
11033 CASE_CONST_SCALAR_INT:
11034 if (ix86_endbr_immediate_operand (x, VOIDmode))
11035 return false;
11036
11037 switch (mode)
11038 {
11039 case E_TImode:
11040 if (TARGET_64BIT)
11041 return true;
11042 /* FALLTHRU */
11043 case E_OImode:
11044 case E_XImode:
11045 if (!standard_sse_constant_p (x, pred_mode: mode)
11046 && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
11047 ? XImode
11048 : (TARGET_AVX
11049 ? OImode
11050 : (TARGET_SSE2
11051 ? TImode : DImode))) < GET_MODE_SIZE (mode))
11052 return false;
11053 default:
11054 break;
11055 }
11056 break;
11057
11058 case CONST_VECTOR:
11059 if (!standard_sse_constant_p (x, pred_mode: mode))
11060 return false;
11061 break;
11062
11063 case CONST_DOUBLE:
11064 if (mode == E_BFmode)
11065 return false;
11066
11067 default:
11068 break;
11069 }
11070
11071 /* Otherwise we handle everything else in the move patterns. */
11072 return true;
11073}
11074
11075/* Determine if it's legal to put X into the constant pool. This
11076 is not possible for the address of thread-local symbols, which
11077 is checked above. */
11078
11079static bool
11080ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11081{
11082 /* We can put any immediate constant in memory. */
11083 switch (GET_CODE (x))
11084 {
11085 CASE_CONST_ANY:
11086 return false;
11087
11088 default:
11089 break;
11090 }
11091
11092 return !ix86_legitimate_constant_p (mode, x);
11093}
11094
11095/* Nonzero if the symbol is marked as dllimport, or as stub-variable,
11096 otherwise zero. */
11097
11098static bool
11099is_imported_p (rtx x)
11100{
11101 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
11102 || GET_CODE (x) != SYMBOL_REF)
11103 return false;
11104
11105 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
11106}
11107
11108
11109/* Nonzero if the constant value X is a legitimate general operand
11110 when generating PIC code. It is given that flag_pic is on and
11111 that X satisfies CONSTANT_P. */
11112
11113bool
11114legitimate_pic_operand_p (rtx x)
11115{
11116 rtx inner;
11117
11118 switch (GET_CODE (x))
11119 {
11120 case CONST:
11121 inner = XEXP (x, 0);
11122 if (GET_CODE (inner) == PLUS
11123 && CONST_INT_P (XEXP (inner, 1)))
11124 inner = XEXP (inner, 0);
11125
11126 /* Only some unspecs are valid as "constants". */
11127 if (GET_CODE (inner) == UNSPEC)
11128 switch (XINT (inner, 1))
11129 {
11130 case UNSPEC_GOT:
11131 case UNSPEC_GOTOFF:
11132 case UNSPEC_PLTOFF:
11133 return TARGET_64BIT;
11134 case UNSPEC_TPOFF:
11135 x = XVECEXP (inner, 0, 0);
11136 return (GET_CODE (x) == SYMBOL_REF
11137 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11138 case UNSPEC_MACHOPIC_OFFSET:
11139 return legitimate_pic_address_disp_p (x);
11140 default:
11141 return false;
11142 }
11143 /* FALLTHRU */
11144
11145 case SYMBOL_REF:
11146 case LABEL_REF:
11147 return legitimate_pic_address_disp_p (x);
11148
11149 default:
11150 return true;
11151 }
11152}
11153
11154/* Determine if a given CONST RTX is a valid memory displacement
11155 in PIC mode. */
11156
11157bool
11158legitimate_pic_address_disp_p (rtx disp)
11159{
11160 bool saw_plus;
11161
11162 /* In 64bit mode we can allow direct addresses of symbols and labels
11163 when they are not dynamic symbols. */
11164 if (TARGET_64BIT)
11165 {
11166 rtx op0 = disp, op1;
11167
11168 switch (GET_CODE (disp))
11169 {
11170 case LABEL_REF:
11171 return true;
11172
11173 case CONST:
11174 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11175 break;
11176 op0 = XEXP (XEXP (disp, 0), 0);
11177 op1 = XEXP (XEXP (disp, 0), 1);
11178 if (!CONST_INT_P (op1))
11179 break;
11180 if (GET_CODE (op0) == UNSPEC
11181 && (XINT (op0, 1) == UNSPEC_DTPOFF
11182 || XINT (op0, 1) == UNSPEC_NTPOFF)
11183 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11184 return true;
11185 if (INTVAL (op1) >= 16*1024*1024
11186 || INTVAL (op1) < -16*1024*1024)
11187 break;
11188 if (GET_CODE (op0) == LABEL_REF)
11189 return true;
11190 if (GET_CODE (op0) == CONST
11191 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11192 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11193 return true;
11194 if (GET_CODE (op0) == UNSPEC
11195 && XINT (op0, 1) == UNSPEC_PCREL)
11196 return true;
11197 if (GET_CODE (op0) != SYMBOL_REF)
11198 break;
11199 /* FALLTHRU */
11200
11201 case SYMBOL_REF:
11202 /* TLS references should always be enclosed in UNSPEC.
11203 The dllimported symbol needs always to be resolved. */
11204 if (SYMBOL_REF_TLS_MODEL (op0)
11205 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11206 return false;
11207
11208 if (TARGET_PECOFF)
11209 {
11210 if (is_imported_p (x: op0))
11211 return true;
11212
11213 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11214 break;
11215
11216 /* Non-external-weak function symbols need to be resolved only
11217 for the large model. Non-external symbols don't need to be
11218 resolved for large and medium models. For the small model,
11219 we don't need to resolve anything here. */
11220 if ((ix86_cmodel != CM_LARGE_PIC
11221 && SYMBOL_REF_FUNCTION_P (op0)
11222 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11223 || !SYMBOL_REF_EXTERNAL_P (op0)
11224 || ix86_cmodel == CM_SMALL_PIC)
11225 return true;
11226 }
11227 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11228 && (SYMBOL_REF_LOCAL_P (op0)
11229 || ((ix86_direct_extern_access
11230 && !(SYMBOL_REF_DECL (op0)
11231 && lookup_attribute (attr_name: "nodirect_extern_access",
11232 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11233 && HAVE_LD_PIE_COPYRELOC
11234 && flag_pie
11235 && !SYMBOL_REF_WEAK (op0)
11236 && !SYMBOL_REF_FUNCTION_P (op0)))
11237 && ix86_cmodel != CM_LARGE_PIC)
11238 return true;
11239 break;
11240
11241 default:
11242 break;
11243 }
11244 }
11245 if (GET_CODE (disp) != CONST)
11246 return false;
11247 disp = XEXP (disp, 0);
11248
11249 if (TARGET_64BIT)
11250 {
11251 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11252 of GOT tables. We should not need these anyway. */
11253 if (GET_CODE (disp) != UNSPEC
11254 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11255 && XINT (disp, 1) != UNSPEC_GOTOFF
11256 && XINT (disp, 1) != UNSPEC_PCREL
11257 && XINT (disp, 1) != UNSPEC_PLTOFF))
11258 return false;
11259
11260 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11261 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11262 return false;
11263 return true;
11264 }
11265
11266 saw_plus = false;
11267 if (GET_CODE (disp) == PLUS)
11268 {
11269 if (!CONST_INT_P (XEXP (disp, 1)))
11270 return false;
11271 disp = XEXP (disp, 0);
11272 saw_plus = true;
11273 }
11274
11275 if (TARGET_MACHO && darwin_local_data_pic (disp))
11276 return true;
11277
11278 if (GET_CODE (disp) != UNSPEC)
11279 return false;
11280
11281 switch (XINT (disp, 1))
11282 {
11283 case UNSPEC_GOT:
11284 if (saw_plus)
11285 return false;
11286 /* We need to check for both symbols and labels because VxWorks loads
11287 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11288 details. */
11289 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11290 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11291 case UNSPEC_GOTOFF:
11292 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11293 While ABI specify also 32bit relocation but we don't produce it in
11294 small PIC model at all. */
11295 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11296 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11297 && !TARGET_64BIT)
11298 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11299 return false;
11300 case UNSPEC_GOTTPOFF:
11301 case UNSPEC_GOTNTPOFF:
11302 case UNSPEC_INDNTPOFF:
11303 if (saw_plus)
11304 return false;
11305 disp = XVECEXP (disp, 0, 0);
11306 return (GET_CODE (disp) == SYMBOL_REF
11307 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11308 case UNSPEC_NTPOFF:
11309 disp = XVECEXP (disp, 0, 0);
11310 return (GET_CODE (disp) == SYMBOL_REF
11311 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11312 case UNSPEC_DTPOFF:
11313 disp = XVECEXP (disp, 0, 0);
11314 return (GET_CODE (disp) == SYMBOL_REF
11315 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11316 }
11317
11318 return false;
11319}
11320
11321/* Determine if op is suitable RTX for an address register.
11322 Return naked register if a register or a register subreg is
11323 found, otherwise return NULL_RTX. */
11324
11325static rtx
11326ix86_validate_address_register (rtx op)
11327{
11328 machine_mode mode = GET_MODE (op);
11329
11330 /* Only SImode or DImode registers can form the address. */
11331 if (mode != SImode && mode != DImode)
11332 return NULL_RTX;
11333
11334 if (REG_P (op))
11335 return op;
11336 else if (SUBREG_P (op))
11337 {
11338 rtx reg = SUBREG_REG (op);
11339
11340 if (!REG_P (reg))
11341 return NULL_RTX;
11342
11343 mode = GET_MODE (reg);
11344
11345 /* Don't allow SUBREGs that span more than a word. It can
11346 lead to spill failures when the register is one word out
11347 of a two word structure. */
11348 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11349 return NULL_RTX;
11350
11351 /* Allow only SUBREGs of non-eliminable hard registers. */
11352 if (register_no_elim_operand (reg, mode))
11353 return reg;
11354 }
11355
11356 /* Op is not a register. */
11357 return NULL_RTX;
11358}
11359
11360/* Determine which memory address register set insn can use. */
11361
11362static enum attr_addr
11363ix86_memory_address_reg_class (rtx_insn* insn)
11364{
11365 /* LRA can do some initialization with NULL insn,
11366 return maximum register class in this case. */
11367 enum attr_addr addr_rclass = ADDR_GPR32;
11368
11369 if (!insn)
11370 return addr_rclass;
11371
11372 if (asm_noperands (PATTERN (insn)) >= 0
11373 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
11374 return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
11375
11376 /* Return maximum register class for unrecognized instructions. */
11377 if (INSN_CODE (insn) < 0)
11378 return addr_rclass;
11379
11380 /* Try to recognize the insn before calling get_attr_addr.
11381 Save current recog_data and current alternative. */
11382 struct recog_data_d saved_recog_data = recog_data;
11383 int saved_alternative = which_alternative;
11384
11385 /* Update recog_data for processing of alternatives. */
11386 extract_insn_cached (insn);
11387
11388 /* If current alternative is not set, loop throught enabled
11389 alternatives and get the most limited register class. */
11390 if (saved_alternative == -1)
11391 {
11392 alternative_mask enabled = get_enabled_alternatives (insn);
11393
11394 for (int i = 0; i < recog_data.n_alternatives; i++)
11395 {
11396 if (!TEST_BIT (enabled, i))
11397 continue;
11398
11399 which_alternative = i;
11400 addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
11401 }
11402 }
11403 else
11404 {
11405 which_alternative = saved_alternative;
11406 addr_rclass = get_attr_addr (insn);
11407 }
11408
11409 recog_data = saved_recog_data;
11410 which_alternative = saved_alternative;
11411
11412 return addr_rclass;
11413}
11414
11415/* Return memory address register class insn can use. */
11416
11417enum reg_class
11418ix86_insn_base_reg_class (rtx_insn* insn)
11419{
11420 switch (ix86_memory_address_reg_class (insn))
11421 {
11422 case ADDR_GPR8:
11423 return LEGACY_GENERAL_REGS;
11424 case ADDR_GPR16:
11425 return GENERAL_GPR16;
11426 case ADDR_GPR32:
11427 break;
11428 default:
11429 gcc_unreachable ();
11430 }
11431
11432 return BASE_REG_CLASS;
11433}
11434
11435bool
11436ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
11437{
11438 switch (ix86_memory_address_reg_class (insn))
11439 {
11440 case ADDR_GPR8:
11441 return LEGACY_INT_REGNO_P (regno);
11442 case ADDR_GPR16:
11443 return GENERAL_GPR16_REGNO_P (regno);
11444 case ADDR_GPR32:
11445 break;
11446 default:
11447 gcc_unreachable ();
11448 }
11449
11450 return GENERAL_REGNO_P (regno);
11451}
11452
11453enum reg_class
11454ix86_insn_index_reg_class (rtx_insn* insn)
11455{
11456 switch (ix86_memory_address_reg_class (insn))
11457 {
11458 case ADDR_GPR8:
11459 return LEGACY_INDEX_REGS;
11460 case ADDR_GPR16:
11461 return INDEX_GPR16;
11462 case ADDR_GPR32:
11463 break;
11464 default:
11465 gcc_unreachable ();
11466 }
11467
11468 return INDEX_REG_CLASS;
11469}
11470
11471/* Recognizes RTL expressions that are valid memory addresses for an
11472 instruction. The MODE argument is the machine mode for the MEM
11473 expression that wants to use this address.
11474
11475 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11476 convert common non-canonical forms to canonical form so that they will
11477 be recognized. */
11478
11479static bool
11480ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
11481 code_helper = ERROR_MARK)
11482{
11483 struct ix86_address parts;
11484 rtx base, index, disp;
11485 HOST_WIDE_INT scale;
11486 addr_space_t seg;
11487
11488 if (ix86_decompose_address (addr, out: &parts) == 0)
11489 /* Decomposition failed. */
11490 return false;
11491
11492 base = parts.base;
11493 index = parts.index;
11494 disp = parts.disp;
11495 scale = parts.scale;
11496 seg = parts.seg;
11497
11498 /* Validate base register. */
11499 if (base)
11500 {
11501 rtx reg = ix86_validate_address_register (op: base);
11502
11503 if (reg == NULL_RTX)
11504 return false;
11505
11506 unsigned int regno = REGNO (reg);
11507 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
11508 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
11509 /* Base is not valid. */
11510 return false;
11511 }
11512
11513 /* Validate index register. */
11514 if (index)
11515 {
11516 rtx reg = ix86_validate_address_register (op: index);
11517
11518 if (reg == NULL_RTX)
11519 return false;
11520
11521 unsigned int regno = REGNO (reg);
11522 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
11523 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
11524 /* Index is not valid. */
11525 return false;
11526 }
11527
11528 /* Index and base should have the same mode. */
11529 if (base && index
11530 && GET_MODE (base) != GET_MODE (index))
11531 return false;
11532
11533 /* Address override works only on the (%reg) part of %fs:(%reg). */
11534 if (seg != ADDR_SPACE_GENERIC
11535 && ((base && GET_MODE (base) != word_mode)
11536 || (index && GET_MODE (index) != word_mode)))
11537 return false;
11538
11539 /* Validate scale factor. */
11540 if (scale != 1)
11541 {
11542 if (!index)
11543 /* Scale without index. */
11544 return false;
11545
11546 if (scale != 2 && scale != 4 && scale != 8)
11547 /* Scale is not a valid multiplier. */
11548 return false;
11549 }
11550
11551 /* Validate displacement. */
11552 if (disp)
11553 {
11554 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11555 return false;
11556
11557 if (GET_CODE (disp) == CONST
11558 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11559 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11560 switch (XINT (XEXP (disp, 0), 1))
11561 {
11562 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11563 when used. While ABI specify also 32bit relocations, we
11564 don't produce them at all and use IP relative instead.
11565 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11566 should be loaded via GOT. */
11567 case UNSPEC_GOT:
11568 if (!TARGET_64BIT
11569 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11570 goto is_legitimate_pic;
11571 /* FALLTHRU */
11572 case UNSPEC_GOTOFF:
11573 gcc_assert (flag_pic);
11574 if (!TARGET_64BIT)
11575 goto is_legitimate_pic;
11576
11577 /* 64bit address unspec. */
11578 return false;
11579
11580 case UNSPEC_GOTPCREL:
11581 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11582 goto is_legitimate_pic;
11583 /* FALLTHRU */
11584 case UNSPEC_PCREL:
11585 gcc_assert (flag_pic);
11586 goto is_legitimate_pic;
11587
11588 case UNSPEC_GOTTPOFF:
11589 case UNSPEC_GOTNTPOFF:
11590 case UNSPEC_INDNTPOFF:
11591 case UNSPEC_NTPOFF:
11592 case UNSPEC_DTPOFF:
11593 break;
11594
11595 default:
11596 /* Invalid address unspec. */
11597 return false;
11598 }
11599
11600 else if (SYMBOLIC_CONST (disp)
11601 && (flag_pic
11602#if TARGET_MACHO
11603 || (MACHOPIC_INDIRECT
11604 && !machopic_operand_p (disp))
11605#endif
11606 ))
11607 {
11608
11609 is_legitimate_pic:
11610 if (TARGET_64BIT && (index || base))
11611 {
11612 /* foo@dtpoff(%rX) is ok. */
11613 if (GET_CODE (disp) != CONST
11614 || GET_CODE (XEXP (disp, 0)) != PLUS
11615 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11616 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11617 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11618 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11619 /* Non-constant pic memory reference. */
11620 return false;
11621 }
11622 else if ((!TARGET_MACHO || flag_pic)
11623 && ! legitimate_pic_address_disp_p (disp))
11624 /* Displacement is an invalid pic construct. */
11625 return false;
11626#if TARGET_MACHO
11627 else if (MACHO_DYNAMIC_NO_PIC_P
11628 && !ix86_legitimate_constant_p (Pmode, disp))
11629 /* displacment must be referenced via non_lazy_pointer */
11630 return false;
11631#endif
11632
11633 /* This code used to verify that a symbolic pic displacement
11634 includes the pic_offset_table_rtx register.
11635
11636 While this is good idea, unfortunately these constructs may
11637 be created by "adds using lea" optimization for incorrect
11638 code like:
11639
11640 int a;
11641 int foo(int i)
11642 {
11643 return *(&a+i);
11644 }
11645
11646 This code is nonsensical, but results in addressing
11647 GOT table with pic_offset_table_rtx base. We can't
11648 just refuse it easily, since it gets matched by
11649 "addsi3" pattern, that later gets split to lea in the
11650 case output register differs from input. While this
11651 can be handled by separate addsi pattern for this case
11652 that never results in lea, this seems to be easier and
11653 correct fix for crash to disable this test. */
11654 }
11655 else if (GET_CODE (disp) != LABEL_REF
11656 && !CONST_INT_P (disp)
11657 && (GET_CODE (disp) != CONST
11658 || !ix86_legitimate_constant_p (Pmode, x: disp))
11659 && (GET_CODE (disp) != SYMBOL_REF
11660 || !ix86_legitimate_constant_p (Pmode, x: disp)))
11661 /* Displacement is not constant. */
11662 return false;
11663 else if (TARGET_64BIT
11664 && !x86_64_immediate_operand (disp, VOIDmode))
11665 /* Displacement is out of range. */
11666 return false;
11667 /* In x32 mode, constant addresses are sign extended to 64bit, so
11668 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11669 else if (TARGET_X32 && !(index || base)
11670 && CONST_INT_P (disp)
11671 && val_signbit_known_set_p (SImode, INTVAL (disp)))
11672 return false;
11673 }
11674
11675 /* Everything looks valid. */
11676 return true;
11677}
11678
11679/* Determine if a given RTX is a valid constant address. */
11680
11681bool
11682constant_address_p (rtx x)
11683{
11684 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, addr: x, strict: 1);
11685}
11686
11687/* Return a unique alias set for the GOT. */
11688
11689alias_set_type
11690ix86_GOT_alias_set (void)
11691{
11692 static alias_set_type set = -1;
11693 if (set == -1)
11694 set = new_alias_set ();
11695 return set;
11696}
11697
11698/* Return a legitimate reference for ORIG (an address) using the
11699 register REG. If REG is 0, a new pseudo is generated.
11700
11701 There are two types of references that must be handled:
11702
11703 1. Global data references must load the address from the GOT, via
11704 the PIC reg. An insn is emitted to do this load, and the reg is
11705 returned.
11706
11707 2. Static data references, constant pool addresses, and code labels
11708 compute the address as an offset from the GOT, whose base is in
11709 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11710 differentiate them from global data objects. The returned
11711 address is the PIC reg + an unspec constant.
11712
11713 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11714 reg also appears in the address. */
11715
11716rtx
11717legitimize_pic_address (rtx orig, rtx reg)
11718{
11719 rtx addr = orig;
11720 rtx new_rtx = orig;
11721
11722#if TARGET_MACHO
11723 if (TARGET_MACHO && !TARGET_64BIT)
11724 {
11725 if (reg == 0)
11726 reg = gen_reg_rtx (Pmode);
11727 /* Use the generic Mach-O PIC machinery. */
11728 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11729 }
11730#endif
11731
11732 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11733 {
11734 rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true);
11735 if (tmp)
11736 return tmp;
11737 }
11738
11739 if (TARGET_64BIT && legitimate_pic_address_disp_p (disp: addr))
11740 new_rtx = addr;
11741 else if ((!TARGET_64BIT
11742 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
11743 && !TARGET_PECOFF
11744 && gotoff_operand (addr, Pmode))
11745 {
11746 /* This symbol may be referenced via a displacement
11747 from the PIC base address (@GOTOFF). */
11748 if (GET_CODE (addr) == CONST)
11749 addr = XEXP (addr, 0);
11750
11751 if (GET_CODE (addr) == PLUS)
11752 {
11753 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11754 UNSPEC_GOTOFF);
11755 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11756 }
11757 else
11758 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11759
11760 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11761
11762 if (TARGET_64BIT)
11763 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11764
11765 if (reg != 0)
11766 {
11767 gcc_assert (REG_P (reg));
11768 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11769 new_rtx, reg, 1, OPTAB_DIRECT);
11770 }
11771 else
11772 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11773 }
11774 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11775 /* We can't always use @GOTOFF for text labels
11776 on VxWorks, see gotoff_operand. */
11777 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11778 {
11779 rtx tmp = legitimize_pe_coff_symbol (addr, inreg: true);
11780 if (tmp)
11781 return tmp;
11782
11783 /* For x64 PE-COFF there is no GOT table,
11784 so we use address directly. */
11785 if (TARGET_64BIT && TARGET_PECOFF)
11786 {
11787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11788 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11789 }
11790 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11791 {
11792 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11793 UNSPEC_GOTPCREL);
11794 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11795 new_rtx = gen_const_mem (Pmode, new_rtx);
11796 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11797 }
11798 else
11799 {
11800 /* This symbol must be referenced via a load
11801 from the Global Offset Table (@GOT). */
11802 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11803 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11804
11805 if (TARGET_64BIT)
11806 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11807
11808 if (reg != 0)
11809 {
11810 gcc_assert (REG_P (reg));
11811 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11812 new_rtx, reg, 1, OPTAB_DIRECT);
11813 }
11814 else
11815 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11816
11817 new_rtx = gen_const_mem (Pmode, new_rtx);
11818 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11819 }
11820
11821 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11822 }
11823 else
11824 {
11825 if (CONST_INT_P (addr)
11826 && !x86_64_immediate_operand (addr, VOIDmode))
11827 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11828 else if (GET_CODE (addr) == CONST)
11829 {
11830 addr = XEXP (addr, 0);
11831
11832 /* We must match stuff we generate before. Assume the only
11833 unspecs that can get here are ours. Not that we could do
11834 anything with them anyway.... */
11835 if (GET_CODE (addr) == UNSPEC
11836 || (GET_CODE (addr) == PLUS
11837 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11838 return orig;
11839 gcc_assert (GET_CODE (addr) == PLUS);
11840 }
11841
11842 if (GET_CODE (addr) == PLUS)
11843 {
11844 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11845
11846 /* Check first to see if this is a constant
11847 offset from a @GOTOFF symbol reference. */
11848 if (!TARGET_PECOFF
11849 && gotoff_operand (op0, Pmode)
11850 && CONST_INT_P (op1))
11851 {
11852 if (!TARGET_64BIT)
11853 {
11854 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11855 UNSPEC_GOTOFF);
11856 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11857 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11858
11859 if (reg != 0)
11860 {
11861 gcc_assert (REG_P (reg));
11862 new_rtx = expand_simple_binop (Pmode, PLUS,
11863 pic_offset_table_rtx,
11864 new_rtx, reg, 1,
11865 OPTAB_DIRECT);
11866 }
11867 else
11868 new_rtx
11869 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11870 }
11871 else
11872 {
11873 if (INTVAL (op1) < -16*1024*1024
11874 || INTVAL (op1) >= 16*1024*1024)
11875 {
11876 if (!x86_64_immediate_operand (op1, Pmode))
11877 op1 = force_reg (Pmode, op1);
11878
11879 new_rtx
11880 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11881 }
11882 }
11883 }
11884 else
11885 {
11886 rtx base = legitimize_pic_address (orig: op0, reg);
11887 machine_mode mode = GET_MODE (base);
11888 new_rtx
11889 = legitimize_pic_address (orig: op1, reg: base == reg ? NULL_RTX : reg);
11890
11891 if (CONST_INT_P (new_rtx))
11892 {
11893 if (INTVAL (new_rtx) < -16*1024*1024
11894 || INTVAL (new_rtx) >= 16*1024*1024)
11895 {
11896 if (!x86_64_immediate_operand (new_rtx, mode))
11897 new_rtx = force_reg (mode, new_rtx);
11898
11899 new_rtx
11900 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
11901 }
11902 else
11903 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
11904 }
11905 else
11906 {
11907 /* For %rip addressing, we have to use
11908 just disp32, not base nor index. */
11909 if (TARGET_64BIT
11910 && (GET_CODE (base) == SYMBOL_REF
11911 || GET_CODE (base) == LABEL_REF))
11912 base = force_reg (mode, base);
11913 if (GET_CODE (new_rtx) == PLUS
11914 && CONSTANT_P (XEXP (new_rtx, 1)))
11915 {
11916 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
11917 new_rtx = XEXP (new_rtx, 1);
11918 }
11919 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
11920 }
11921 }
11922 }
11923 }
11924 return new_rtx;
11925}
11926
11927/* Load the thread pointer. If TO_REG is true, force it into a register. */
11928
11929static rtx
11930get_thread_pointer (machine_mode tp_mode, bool to_reg)
11931{
11932 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11933
11934 if (GET_MODE (tp) != tp_mode)
11935 {
11936 gcc_assert (GET_MODE (tp) == SImode);
11937 gcc_assert (tp_mode == DImode);
11938
11939 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
11940 }
11941
11942 if (to_reg)
11943 tp = copy_to_mode_reg (tp_mode, tp);
11944
11945 return tp;
11946}
11947
11948/* Construct the SYMBOL_REF for the tls_get_addr function. */
11949
11950static GTY(()) rtx ix86_tls_symbol;
11951
11952static rtx
11953ix86_tls_get_addr (void)
11954{
11955 if (!ix86_tls_symbol)
11956 {
11957 const char *sym
11958 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
11959 ? "___tls_get_addr" : "__tls_get_addr");
11960
11961 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
11962 }
11963
11964 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
11965 {
11966 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
11967 UNSPEC_PLTOFF);
11968 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
11969 gen_rtx_CONST (Pmode, unspec));
11970 }
11971
11972 return ix86_tls_symbol;
11973}
11974
11975/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11976
11977static GTY(()) rtx ix86_tls_module_base_symbol;
11978
11979rtx
11980ix86_tls_module_base (void)
11981{
11982 if (!ix86_tls_module_base_symbol)
11983 {
11984 ix86_tls_module_base_symbol
11985 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
11986
11987 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
11988 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
11989 }
11990
11991 return ix86_tls_module_base_symbol;
11992}
11993
11994/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11995 false if we expect this to be used for a memory address and true if
11996 we expect to load the address into a register. */
11997
11998rtx
11999legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12000{
12001 rtx dest, base, off;
12002 rtx pic = NULL_RTX, tp = NULL_RTX;
12003 machine_mode tp_mode = Pmode;
12004 int type;
12005
12006 /* Fall back to global dynamic model if tool chain cannot support local
12007 dynamic. */
12008 if (TARGET_SUN_TLS && !TARGET_64BIT
12009 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12010 && model == TLS_MODEL_LOCAL_DYNAMIC)
12011 model = TLS_MODEL_GLOBAL_DYNAMIC;
12012
12013 switch (model)
12014 {
12015 case TLS_MODEL_GLOBAL_DYNAMIC:
12016 if (!TARGET_64BIT)
12017 {
12018 if (flag_pic && !TARGET_PECOFF)
12019 pic = pic_offset_table_rtx;
12020 else
12021 {
12022 pic = gen_reg_rtx (Pmode);
12023 emit_insn (gen_set_got (pic));
12024 }
12025 }
12026
12027 if (TARGET_GNU2_TLS)
12028 {
12029 dest = gen_reg_rtx (ptr_mode);
12030 if (TARGET_64BIT)
12031 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: dest, x1: x));
12032 else
12033 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12034
12035 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12036 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12037 if (GET_MODE (dest) != Pmode)
12038 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12039 dest = force_reg (Pmode, dest);
12040
12041 if (GET_MODE (x) != Pmode)
12042 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12043
12044 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12045 }
12046 else
12047 {
12048 rtx caddr = ix86_tls_get_addr ();
12049
12050 dest = gen_reg_rtx (Pmode);
12051 if (TARGET_64BIT)
12052 {
12053 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12054 rtx_insn *insns;
12055
12056 start_sequence ();
12057 emit_call_insn
12058 (gen_tls_global_dynamic_64 (Pmode, x0: rax, x1: x, x2: caddr));
12059 insns = get_insns ();
12060 end_sequence ();
12061
12062 if (GET_MODE (x) != Pmode)
12063 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12064
12065 RTL_CONST_CALL_P (insns) = 1;
12066 emit_libcall_block (insns, dest, rax, x);
12067 }
12068 else
12069 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12070 }
12071 break;
12072
12073 case TLS_MODEL_LOCAL_DYNAMIC:
12074 if (!TARGET_64BIT)
12075 {
12076 if (flag_pic)
12077 pic = pic_offset_table_rtx;
12078 else
12079 {
12080 pic = gen_reg_rtx (Pmode);
12081 emit_insn (gen_set_got (pic));
12082 }
12083 }
12084
12085 if (TARGET_GNU2_TLS)
12086 {
12087 rtx tmp = ix86_tls_module_base ();
12088
12089 base = gen_reg_rtx (ptr_mode);
12090 if (TARGET_64BIT)
12091 emit_insn (gen_tls_dynamic_gnu2_64 (arg0: ptr_mode, x0: base, x1: tmp));
12092 else
12093 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12094
12095 tp = get_thread_pointer (tp_mode: ptr_mode, to_reg: true);
12096 if (GET_MODE (base) != Pmode)
12097 base = gen_rtx_ZERO_EXTEND (Pmode, base);
12098 base = force_reg (Pmode, base);
12099 }
12100 else
12101 {
12102 rtx caddr = ix86_tls_get_addr ();
12103
12104 base = gen_reg_rtx (Pmode);
12105 if (TARGET_64BIT)
12106 {
12107 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12108 rtx_insn *insns;
12109 rtx eqv;
12110
12111 start_sequence ();
12112 emit_call_insn
12113 (gen_tls_local_dynamic_base_64 (Pmode, x0: rax, x1: caddr));
12114 insns = get_insns ();
12115 end_sequence ();
12116
12117 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12118 share the LD_BASE result with other LD model accesses. */
12119 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12120 UNSPEC_TLS_LD_BASE);
12121
12122 RTL_CONST_CALL_P (insns) = 1;
12123 emit_libcall_block (insns, base, rax, eqv);
12124 }
12125 else
12126 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12127 }
12128
12129 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12130 off = gen_rtx_CONST (Pmode, off);
12131
12132 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12133
12134 if (TARGET_GNU2_TLS)
12135 {
12136 if (GET_MODE (tp) != Pmode)
12137 {
12138 dest = lowpart_subreg (outermode: ptr_mode, op: dest, Pmode);
12139 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12140 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12141 }
12142 else
12143 dest = gen_rtx_PLUS (Pmode, tp, dest);
12144 dest = force_reg (Pmode, dest);
12145
12146 if (GET_MODE (x) != Pmode)
12147 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12148
12149 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12150 }
12151 break;
12152
12153 case TLS_MODEL_INITIAL_EXEC:
12154 if (TARGET_64BIT)
12155 {
12156 if (TARGET_SUN_TLS && !TARGET_X32)
12157 {
12158 /* The Sun linker took the AMD64 TLS spec literally
12159 and can only handle %rax as destination of the
12160 initial executable code sequence. */
12161
12162 dest = gen_reg_rtx (DImode);
12163 emit_insn (gen_tls_initial_exec_64_sun (a: dest, b: x));
12164 return dest;
12165 }
12166
12167 /* Generate DImode references to avoid %fs:(%reg32)
12168 problems and linker IE->LE relaxation bug. */
12169 tp_mode = DImode;
12170 pic = NULL;
12171 type = UNSPEC_GOTNTPOFF;
12172 }
12173 else if (flag_pic)
12174 {
12175 pic = pic_offset_table_rtx;
12176 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12177 }
12178 else if (!TARGET_ANY_GNU_TLS)
12179 {
12180 pic = gen_reg_rtx (Pmode);
12181 emit_insn (gen_set_got (pic));
12182 type = UNSPEC_GOTTPOFF;
12183 }
12184 else
12185 {
12186 pic = NULL;
12187 type = UNSPEC_INDNTPOFF;
12188 }
12189
12190 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12191 off = gen_rtx_CONST (tp_mode, off);
12192 if (pic)
12193 off = gen_rtx_PLUS (tp_mode, pic, off);
12194 off = gen_const_mem (tp_mode, off);
12195 set_mem_alias_set (off, ix86_GOT_alias_set ());
12196
12197 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12198 {
12199 base = get_thread_pointer (tp_mode,
12200 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12201 off = force_reg (tp_mode, off);
12202 dest = gen_rtx_PLUS (tp_mode, base, off);
12203 if (tp_mode != Pmode)
12204 dest = convert_to_mode (Pmode, dest, 1);
12205 }
12206 else
12207 {
12208 base = get_thread_pointer (Pmode, to_reg: true);
12209 dest = gen_reg_rtx (Pmode);
12210 emit_insn (gen_sub3_insn (dest, base, off));
12211 }
12212 break;
12213
12214 case TLS_MODEL_LOCAL_EXEC:
12215 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12216 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12217 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12218 off = gen_rtx_CONST (Pmode, off);
12219
12220 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12221 {
12222 base = get_thread_pointer (Pmode,
12223 to_reg: for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12224 return gen_rtx_PLUS (Pmode, base, off);
12225 }
12226 else
12227 {
12228 base = get_thread_pointer (Pmode, to_reg: true);
12229 dest = gen_reg_rtx (Pmode);
12230 emit_insn (gen_sub3_insn (dest, base, off));
12231 }
12232 break;
12233
12234 default:
12235 gcc_unreachable ();
12236 }
12237
12238 return dest;
12239}
12240
12241/* Return true if the TLS address requires insn using integer registers.
12242 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12243 MOV instructions, refer to PR103275. */
12244bool
12245ix86_gpr_tls_address_pattern_p (rtx mem)
12246{
12247 gcc_assert (MEM_P (mem));
12248
12249 rtx addr = XEXP (mem, 0);
12250 subrtx_var_iterator::array_type array;
12251 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12252 {
12253 rtx op = *iter;
12254 if (GET_CODE (op) == UNSPEC)
12255 switch (XINT (op, 1))
12256 {
12257 case UNSPEC_GOTNTPOFF:
12258 return true;
12259 case UNSPEC_TPOFF:
12260 if (!TARGET_64BIT)
12261 return true;
12262 break;
12263 default:
12264 break;
12265 }
12266 }
12267
12268 return false;
12269}
12270
12271/* Return true if OP refers to a TLS address. */
12272bool
12273ix86_tls_address_pattern_p (rtx op)
12274{
12275 subrtx_var_iterator::array_type array;
12276 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
12277 {
12278 rtx op = *iter;
12279 if (MEM_P (op))
12280 {
12281 rtx *x = &XEXP (op, 0);
12282 while (GET_CODE (*x) == PLUS)
12283 {
12284 int i;
12285 for (i = 0; i < 2; i++)
12286 {
12287 rtx u = XEXP (*x, i);
12288 if (GET_CODE (u) == ZERO_EXTEND)
12289 u = XEXP (u, 0);
12290 if (GET_CODE (u) == UNSPEC
12291 && XINT (u, 1) == UNSPEC_TP)
12292 return true;
12293 }
12294 x = &XEXP (*x, 0);
12295 }
12296
12297 iter.skip_subrtxes ();
12298 }
12299 }
12300
12301 return false;
12302}
12303
12304/* Rewrite *LOC so that it refers to a default TLS address space. */
12305void
12306ix86_rewrite_tls_address_1 (rtx *loc)
12307{
12308 subrtx_ptr_iterator::array_type array;
12309 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
12310 {
12311 rtx *loc = *iter;
12312 if (MEM_P (*loc))
12313 {
12314 rtx addr = XEXP (*loc, 0);
12315 rtx *x = &addr;
12316 while (GET_CODE (*x) == PLUS)
12317 {
12318 int i;
12319 for (i = 0; i < 2; i++)
12320 {
12321 rtx u = XEXP (*x, i);
12322 if (GET_CODE (u) == ZERO_EXTEND)
12323 u = XEXP (u, 0);
12324 if (GET_CODE (u) == UNSPEC
12325 && XINT (u, 1) == UNSPEC_TP)
12326 {
12327 addr_space_t as = DEFAULT_TLS_SEG_REG;
12328
12329 *x = XEXP (*x, 1 - i);
12330
12331 *loc = replace_equiv_address_nv (*loc, addr, true);
12332 set_mem_addr_space (*loc, as);
12333 return;
12334 }
12335 }
12336 x = &XEXP (*x, 0);
12337 }
12338
12339 iter.skip_subrtxes ();
12340 }
12341 }
12342}
12343
12344/* Rewrite instruction pattern involvning TLS address
12345 so that it refers to a default TLS address space. */
12346rtx
12347ix86_rewrite_tls_address (rtx pattern)
12348{
12349 pattern = copy_insn (pattern);
12350 ix86_rewrite_tls_address_1 (loc: &pattern);
12351 return pattern;
12352}
12353
12354/* Create or return the unique __imp_DECL dllimport symbol corresponding
12355 to symbol DECL if BEIMPORT is true. Otherwise create or return the
12356 unique refptr-DECL symbol corresponding to symbol DECL. */
12357
12358struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
12359{
12360 static inline hashval_t hash (tree_map *m) { return m->hash; }
12361 static inline bool
12362 equal (tree_map *a, tree_map *b)
12363 {
12364 return a->base.from == b->base.from;
12365 }
12366
12367 static int
12368 keep_cache_entry (tree_map *&m)
12369 {
12370 return ggc_marked_p (m->base.from);
12371 }
12372};
12373
12374static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
12375
12376static tree
12377get_dllimport_decl (tree decl, bool beimport)
12378{
12379 struct tree_map *h, in;
12380 const char *name;
12381 const char *prefix;
12382 size_t namelen, prefixlen;
12383 char *imp_name;
12384 tree to;
12385 rtx rtl;
12386
12387 if (!dllimport_map)
12388 dllimport_map = hash_table<dllimport_hasher>::create_ggc (n: 512);
12389
12390 in.hash = htab_hash_pointer (decl);
12391 in.base.from = decl;
12392 tree_map **loc = dllimport_map->find_slot_with_hash (comparable: &in, hash: in.hash, insert: INSERT);
12393 h = *loc;
12394 if (h)
12395 return h->to;
12396
12397 *loc = h = ggc_alloc<tree_map> ();
12398 h->hash = in.hash;
12399 h->base.from = decl;
12400 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12401 VAR_DECL, NULL, ptr_type_node);
12402 DECL_ARTIFICIAL (to) = 1;
12403 DECL_IGNORED_P (to) = 1;
12404 DECL_EXTERNAL (to) = 1;
12405 TREE_READONLY (to) = 1;
12406
12407 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12408 name = targetm.strip_name_encoding (name);
12409 if (beimport)
12410 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12411 ? "*__imp_" : "*__imp__";
12412 else
12413 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
12414 namelen = strlen (s: name);
12415 prefixlen = strlen (s: prefix);
12416 imp_name = (char *) alloca (namelen + prefixlen + 1);
12417 memcpy (dest: imp_name, src: prefix, n: prefixlen);
12418 memcpy (dest: imp_name + prefixlen, src: name, n: namelen + 1);
12419
12420 name = ggc_alloc_string (contents: imp_name, length: namelen + prefixlen);
12421 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12422 SET_SYMBOL_REF_DECL (rtl, to);
12423 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
12424 if (!beimport)
12425 {
12426 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
12427#ifdef SUB_TARGET_RECORD_STUB
12428 SUB_TARGET_RECORD_STUB (name);
12429#endif
12430 }
12431
12432 rtl = gen_const_mem (Pmode, rtl);
12433 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12434
12435 SET_DECL_RTL (to, rtl);
12436 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12437
12438 return to;
12439}
12440
12441/* Expand SYMBOL into its corresponding far-address symbol.
12442 WANT_REG is true if we require the result be a register. */
12443
12444static rtx
12445legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
12446{
12447 tree imp_decl;
12448 rtx x;
12449
12450 gcc_assert (SYMBOL_REF_DECL (symbol));
12451 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: false);
12452
12453 x = DECL_RTL (imp_decl);
12454 if (want_reg)
12455 x = force_reg (Pmode, x);
12456 return x;
12457}
12458
12459/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12460 true if we require the result be a register. */
12461
12462static rtx
12463legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12464{
12465 tree imp_decl;
12466 rtx x;
12467
12468 gcc_assert (SYMBOL_REF_DECL (symbol));
12469 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), beimport: true);
12470
12471 x = DECL_RTL (imp_decl);
12472 if (want_reg)
12473 x = force_reg (Pmode, x);
12474 return x;
12475}
12476
12477/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
12478 is true if we require the result be a register. */
12479
12480rtx
12481legitimize_pe_coff_symbol (rtx addr, bool inreg)
12482{
12483 if (!TARGET_PECOFF)
12484 return NULL_RTX;
12485
12486 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12487 {
12488 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12489 return legitimize_dllimport_symbol (symbol: addr, want_reg: inreg);
12490 if (GET_CODE (addr) == CONST
12491 && GET_CODE (XEXP (addr, 0)) == PLUS
12492 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12493 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12494 {
12495 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), want_reg: inreg);
12496 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12497 }
12498 }
12499
12500 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
12501 return NULL_RTX;
12502 if (GET_CODE (addr) == SYMBOL_REF
12503 && !is_imported_p (x: addr)
12504 && SYMBOL_REF_EXTERNAL_P (addr)
12505 && SYMBOL_REF_DECL (addr))
12506 return legitimize_pe_coff_extern_decl (symbol: addr, want_reg: inreg);
12507
12508 if (GET_CODE (addr) == CONST
12509 && GET_CODE (XEXP (addr, 0)) == PLUS
12510 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12511 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
12512 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
12513 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
12514 {
12515 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), want_reg: inreg);
12516 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12517 }
12518 return NULL_RTX;
12519}
12520
12521/* Try machine-dependent ways of modifying an illegitimate address
12522 to be legitimate. If we find one, return the new, valid address.
12523 This macro is used in only one place: `memory_address' in explow.cc.
12524
12525 OLDX is the address as it was before break_out_memory_refs was called.
12526 In some cases it is useful to look at this to decide what needs to be done.
12527
12528 It is always safe for this macro to do nothing. It exists to recognize
12529 opportunities to optimize the output.
12530
12531 For the 80386, we handle X+REG by loading X into a register R and
12532 using R+REG. R will go in a general reg and indexing will be used.
12533 However, if REG is a broken-out memory address or multiplication,
12534 nothing needs to be done because REG can certainly go in a general reg.
12535
12536 When -fpic is used, special handling is needed for symbolic references.
12537 See comments by legitimize_pic_address in i386.cc for details. */
12538
12539static rtx
12540ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12541{
12542 bool changed = false;
12543 unsigned log;
12544
12545 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12546 if (log)
12547 return legitimize_tls_address (x, model: (enum tls_model) log, for_mov: false);
12548 if (GET_CODE (x) == CONST
12549 && GET_CODE (XEXP (x, 0)) == PLUS
12550 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12551 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12552 {
12553 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12554 model: (enum tls_model) log, for_mov: false);
12555 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12556 }
12557
12558 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12559 {
12560 rtx tmp = legitimize_pe_coff_symbol (addr: x, inreg: true);
12561 if (tmp)
12562 return tmp;
12563 }
12564
12565 if (flag_pic && SYMBOLIC_CONST (x))
12566 return legitimize_pic_address (orig: x, reg: 0);
12567
12568#if TARGET_MACHO
12569 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12570 return machopic_indirect_data_reference (x, 0);
12571#endif
12572
12573 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12574 if (GET_CODE (x) == ASHIFT
12575 && CONST_INT_P (XEXP (x, 1))
12576 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12577 {
12578 changed = true;
12579 log = INTVAL (XEXP (x, 1));
12580 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12581 GEN_INT (1 << log));
12582 }
12583
12584 if (GET_CODE (x) == PLUS)
12585 {
12586 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12587
12588 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12589 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12590 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12591 {
12592 changed = true;
12593 log = INTVAL (XEXP (XEXP (x, 0), 1));
12594 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12595 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12596 GEN_INT (1 << log));
12597 }
12598
12599 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12600 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12601 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12602 {
12603 changed = true;
12604 log = INTVAL (XEXP (XEXP (x, 1), 1));
12605 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12606 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12607 GEN_INT (1 << log));
12608 }
12609
12610 /* Put multiply first if it isn't already. */
12611 if (GET_CODE (XEXP (x, 1)) == MULT)
12612 {
12613 std::swap (XEXP (x, 0), XEXP (x, 1));
12614 changed = true;
12615 }
12616
12617 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12618 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12619 created by virtual register instantiation, register elimination, and
12620 similar optimizations. */
12621 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12622 {
12623 changed = true;
12624 x = gen_rtx_PLUS (Pmode,
12625 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12626 XEXP (XEXP (x, 1), 0)),
12627 XEXP (XEXP (x, 1), 1));
12628 }
12629
12630 /* Canonicalize
12631 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12632 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12633 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12634 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12635 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12636 && CONSTANT_P (XEXP (x, 1)))
12637 {
12638 rtx constant;
12639 rtx other = NULL_RTX;
12640
12641 if (CONST_INT_P (XEXP (x, 1)))
12642 {
12643 constant = XEXP (x, 1);
12644 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12645 }
12646 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12647 {
12648 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12649 other = XEXP (x, 1);
12650 }
12651 else
12652 constant = 0;
12653
12654 if (constant)
12655 {
12656 changed = true;
12657 x = gen_rtx_PLUS (Pmode,
12658 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12659 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12660 plus_constant (Pmode, other,
12661 INTVAL (constant)));
12662 }
12663 }
12664
12665 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
12666 return x;
12667
12668 if (GET_CODE (XEXP (x, 0)) == MULT)
12669 {
12670 changed = true;
12671 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12672 }
12673
12674 if (GET_CODE (XEXP (x, 1)) == MULT)
12675 {
12676 changed = true;
12677 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12678 }
12679
12680 if (changed
12681 && REG_P (XEXP (x, 1))
12682 && REG_P (XEXP (x, 0)))
12683 return x;
12684
12685 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12686 {
12687 changed = true;
12688 x = legitimize_pic_address (orig: x, reg: 0);
12689 }
12690
12691 if (changed && ix86_legitimate_address_p (mode, addr: x, strict: false))
12692 return x;
12693
12694 if (REG_P (XEXP (x, 0)))
12695 {
12696 rtx temp = gen_reg_rtx (Pmode);
12697 rtx val = force_operand (XEXP (x, 1), temp);
12698 if (val != temp)
12699 {
12700 val = convert_to_mode (Pmode, val, 1);
12701 emit_move_insn (temp, val);
12702 }
12703
12704 XEXP (x, 1) = temp;
12705 return x;
12706 }
12707
12708 else if (REG_P (XEXP (x, 1)))
12709 {
12710 rtx temp = gen_reg_rtx (Pmode);
12711 rtx val = force_operand (XEXP (x, 0), temp);
12712 if (val != temp)
12713 {
12714 val = convert_to_mode (Pmode, val, 1);
12715 emit_move_insn (temp, val);
12716 }
12717
12718 XEXP (x, 0) = temp;
12719 return x;
12720 }
12721 }
12722
12723 return x;
12724}
12725
12726/* Print an integer constant expression in assembler syntax. Addition
12727 and subtraction are the only arithmetic that may appear in these
12728 expressions. FILE is the stdio stream to write to, X is the rtx, and
12729 CODE is the operand print code from the output string. */
12730
12731static void
12732output_pic_addr_const (FILE *file, rtx x, int code)
12733{
12734 char buf[256];
12735
12736 switch (GET_CODE (x))
12737 {
12738 case PC:
12739 gcc_assert (flag_pic);
12740 putc (c: '.', stream: file);
12741 break;
12742
12743 case SYMBOL_REF:
12744 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12745 output_addr_const (file, x);
12746 else
12747 {
12748 const char *name = XSTR (x, 0);
12749
12750 /* Mark the decl as referenced so that cgraph will
12751 output the function. */
12752 if (SYMBOL_REF_DECL (x))
12753 mark_decl_referenced (SYMBOL_REF_DECL (x));
12754
12755#if TARGET_MACHO
12756 if (MACHOPIC_INDIRECT
12757 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12758 name = machopic_indirection_name (x, /*stub_p=*/true);
12759#endif
12760 assemble_name (file, name);
12761 }
12762 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
12763 && code == 'P' && ix86_call_use_plt_p (x))
12764 fputs (s: "@PLT", stream: file);
12765 break;
12766
12767 case LABEL_REF:
12768 x = XEXP (x, 0);
12769 /* FALLTHRU */
12770 case CODE_LABEL:
12771 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12772 assemble_name (asm_out_file, buf);
12773 break;
12774
12775 CASE_CONST_SCALAR_INT:
12776 output_addr_const (file, x);
12777 break;
12778
12779 case CONST:
12780 /* This used to output parentheses around the expression,
12781 but that does not work on the 386 (either ATT or BSD assembler). */
12782 output_pic_addr_const (file, XEXP (x, 0), code);
12783 break;
12784
12785 case CONST_DOUBLE:
12786 /* We can't handle floating point constants;
12787 TARGET_PRINT_OPERAND must handle them. */
12788 output_operand_lossage ("floating constant misused");
12789 break;
12790
12791 case PLUS:
12792 /* Some assemblers need integer constants to appear first. */
12793 if (CONST_INT_P (XEXP (x, 0)))
12794 {
12795 output_pic_addr_const (file, XEXP (x, 0), code);
12796 putc (c: '+', stream: file);
12797 output_pic_addr_const (file, XEXP (x, 1), code);
12798 }
12799 else
12800 {
12801 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12802 output_pic_addr_const (file, XEXP (x, 1), code);
12803 putc (c: '+', stream: file);
12804 output_pic_addr_const (file, XEXP (x, 0), code);
12805 }
12806 break;
12807
12808 case MINUS:
12809 if (!TARGET_MACHO)
12810 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', stream: file);
12811 output_pic_addr_const (file, XEXP (x, 0), code);
12812 putc (c: '-', stream: file);
12813 output_pic_addr_const (file, XEXP (x, 1), code);
12814 if (!TARGET_MACHO)
12815 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', stream: file);
12816 break;
12817
12818 case UNSPEC:
12819 gcc_assert (XVECLEN (x, 0) == 1);
12820 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12821 switch (XINT (x, 1))
12822 {
12823 case UNSPEC_GOT:
12824 fputs (s: "@GOT", stream: file);
12825 break;
12826 case UNSPEC_GOTOFF:
12827 fputs (s: "@GOTOFF", stream: file);
12828 break;
12829 case UNSPEC_PLTOFF:
12830 fputs (s: "@PLTOFF", stream: file);
12831 break;
12832 case UNSPEC_PCREL:
12833 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12834 "(%rip)" : "[rip]", stream: file);
12835 break;
12836 case UNSPEC_GOTPCREL:
12837 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12838 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", stream: file);
12839 break;
12840 case UNSPEC_GOTTPOFF:
12841 /* FIXME: This might be @TPOFF in Sun ld too. */
12842 fputs (s: "@gottpoff", stream: file);
12843 break;
12844 case UNSPEC_TPOFF:
12845 fputs (s: "@tpoff", stream: file);
12846 break;
12847 case UNSPEC_NTPOFF:
12848 if (TARGET_64BIT)
12849 fputs (s: "@tpoff", stream: file);
12850 else
12851 fputs (s: "@ntpoff", stream: file);
12852 break;
12853 case UNSPEC_DTPOFF:
12854 fputs (s: "@dtpoff", stream: file);
12855 break;
12856 case UNSPEC_GOTNTPOFF:
12857 if (TARGET_64BIT)
12858 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12859 "@gottpoff(%rip)": "@gottpoff[rip]", stream: file);
12860 else
12861 fputs (s: "@gotntpoff", stream: file);
12862 break;
12863 case UNSPEC_INDNTPOFF:
12864 fputs (s: "@indntpoff", stream: file);
12865 break;
12866#if TARGET_MACHO
12867 case UNSPEC_MACHOPIC_OFFSET:
12868 putc ('-', file);
12869 machopic_output_function_base_name (file);
12870 break;
12871#endif
12872 default:
12873 output_operand_lossage ("invalid UNSPEC as operand");
12874 break;
12875 }
12876 break;
12877
12878 default:
12879 output_operand_lossage ("invalid expression as operand");
12880 }
12881}
12882
12883/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12884 We need to emit DTP-relative relocations. */
12885
12886static void ATTRIBUTE_UNUSED
12887i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12888{
12889 fputs (ASM_LONG, stream: file);
12890 output_addr_const (file, x);
12891 fputs (s: "@dtpoff", stream: file);
12892 switch (size)
12893 {
12894 case 4:
12895 break;
12896 case 8:
12897 fputs (s: ", 0", stream: file);
12898 break;
12899 default:
12900 gcc_unreachable ();
12901 }
12902}
12903
12904/* Return true if X is a representation of the PIC register. This copes
12905 with calls from ix86_find_base_term, where the register might have
12906 been replaced by a cselib value. */
12907
12908static bool
12909ix86_pic_register_p (rtx x)
12910{
12911 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12912 return (pic_offset_table_rtx
12913 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12914 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
12915 return true;
12916 else if (!REG_P (x))
12917 return false;
12918 else if (pic_offset_table_rtx)
12919 {
12920 if (REGNO (x) == REGNO (pic_offset_table_rtx))
12921 return true;
12922 if (HARD_REGISTER_P (x)
12923 && !HARD_REGISTER_P (pic_offset_table_rtx)
12924 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
12925 return true;
12926 return false;
12927 }
12928 else
12929 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12930}
12931
12932/* Helper function for ix86_delegitimize_address.
12933 Attempt to delegitimize TLS local-exec accesses. */
12934
12935static rtx
12936ix86_delegitimize_tls_address (rtx orig_x)
12937{
12938 rtx x = orig_x, unspec;
12939 struct ix86_address addr;
12940
12941 if (!TARGET_TLS_DIRECT_SEG_REFS)
12942 return orig_x;
12943 if (MEM_P (x))
12944 x = XEXP (x, 0);
12945 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12946 return orig_x;
12947 if (ix86_decompose_address (addr: x, out: &addr) == 0
12948 || addr.seg != DEFAULT_TLS_SEG_REG
12949 || addr.disp == NULL_RTX
12950 || GET_CODE (addr.disp) != CONST)
12951 return orig_x;
12952 unspec = XEXP (addr.disp, 0);
12953 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12954 unspec = XEXP (unspec, 0);
12955 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12956 return orig_x;
12957 x = XVECEXP (unspec, 0, 0);
12958 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12959 if (unspec != XEXP (addr.disp, 0))
12960 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12961 if (addr.index)
12962 {
12963 rtx idx = addr.index;
12964 if (addr.scale != 1)
12965 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12966 x = gen_rtx_PLUS (Pmode, idx, x);
12967 }
12968 if (addr.base)
12969 x = gen_rtx_PLUS (Pmode, addr.base, x);
12970 if (MEM_P (orig_x))
12971 x = replace_equiv_address_nv (orig_x, x);
12972 return x;
12973}
12974
12975/* In the name of slightly smaller debug output, and to cater to
12976 general assembler lossage, recognize PIC+GOTOFF and turn it back
12977 into a direct symbol reference.
12978
12979 On Darwin, this is necessary to avoid a crash, because Darwin
12980 has a different PIC label for each routine but the DWARF debugging
12981 information is not associated with any particular routine, so it's
12982 necessary to remove references to the PIC label from RTL stored by
12983 the DWARF output code.
12984
12985 This helper is used in the normal ix86_delegitimize_address
12986 entrypoint (e.g. used in the target delegitimization hook) and
12987 in ix86_find_base_term. As compile time memory optimization, we
12988 avoid allocating rtxes that will not change anything on the outcome
12989 of the callers (find_base_value and find_base_term). */
12990
12991static inline rtx
12992ix86_delegitimize_address_1 (rtx x, bool base_term_p)
12993{
12994 rtx orig_x = delegitimize_mem_from_attrs (x);
12995 /* addend is NULL or some rtx if x is something+GOTOFF where
12996 something doesn't include the PIC register. */
12997 rtx addend = NULL_RTX;
12998 /* reg_addend is NULL or a multiple of some register. */
12999 rtx reg_addend = NULL_RTX;
13000 /* const_addend is NULL or a const_int. */
13001 rtx const_addend = NULL_RTX;
13002 /* This is the result, or NULL. */
13003 rtx result = NULL_RTX;
13004
13005 x = orig_x;
13006
13007 if (MEM_P (x))
13008 x = XEXP (x, 0);
13009
13010 if (TARGET_64BIT)
13011 {
13012 if (GET_CODE (x) == CONST
13013 && GET_CODE (XEXP (x, 0)) == PLUS
13014 && GET_MODE (XEXP (x, 0)) == Pmode
13015 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13016 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13017 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13018 {
13019 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13020 base. A CONST can't be arg_pointer_rtx based. */
13021 if (base_term_p && MEM_P (orig_x))
13022 return orig_x;
13023 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13024 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13025 if (MEM_P (orig_x))
13026 x = replace_equiv_address_nv (orig_x, x);
13027 return x;
13028 }
13029
13030 if (GET_CODE (x) == CONST
13031 && GET_CODE (XEXP (x, 0)) == UNSPEC
13032 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13033 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13034 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13035 {
13036 x = XVECEXP (XEXP (x, 0), 0, 0);
13037 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13038 {
13039 x = lowpart_subreg (GET_MODE (orig_x), op: x, GET_MODE (x));
13040 if (x == NULL_RTX)
13041 return orig_x;
13042 }
13043 return x;
13044 }
13045
13046 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13047 return ix86_delegitimize_tls_address (orig_x);
13048
13049 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13050 and -mcmodel=medium -fpic. */
13051 }
13052
13053 if (GET_CODE (x) != PLUS
13054 || GET_CODE (XEXP (x, 1)) != CONST)
13055 return ix86_delegitimize_tls_address (orig_x);
13056
13057 if (ix86_pic_register_p (XEXP (x, 0)))
13058 /* %ebx + GOT/GOTOFF */
13059 ;
13060 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13061 {
13062 /* %ebx + %reg * scale + GOT/GOTOFF */
13063 reg_addend = XEXP (x, 0);
13064 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13065 reg_addend = XEXP (reg_addend, 1);
13066 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13067 reg_addend = XEXP (reg_addend, 0);
13068 else
13069 {
13070 reg_addend = NULL_RTX;
13071 addend = XEXP (x, 0);
13072 }
13073 }
13074 else
13075 addend = XEXP (x, 0);
13076
13077 x = XEXP (XEXP (x, 1), 0);
13078 if (GET_CODE (x) == PLUS
13079 && CONST_INT_P (XEXP (x, 1)))
13080 {
13081 const_addend = XEXP (x, 1);
13082 x = XEXP (x, 0);
13083 }
13084
13085 if (GET_CODE (x) == UNSPEC
13086 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13087 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13088 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13089 && !MEM_P (orig_x) && !addend)))
13090 result = XVECEXP (x, 0, 0);
13091
13092 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (disp: x)
13093 && !MEM_P (orig_x))
13094 result = XVECEXP (x, 0, 0);
13095
13096 if (! result)
13097 return ix86_delegitimize_tls_address (orig_x);
13098
13099 /* For (PLUS something CONST_INT) both find_base_{value,term} just
13100 recurse on the first operand. */
13101 if (const_addend && !base_term_p)
13102 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13103 if (reg_addend)
13104 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13105 if (addend)
13106 {
13107 /* If the rest of original X doesn't involve the PIC register, add
13108 addend and subtract pic_offset_table_rtx. This can happen e.g.
13109 for code like:
13110 leal (%ebx, %ecx, 4), %ecx
13111 ...
13112 movl foo@GOTOFF(%ecx), %edx
13113 in which case we return (%ecx - %ebx) + foo
13114 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13115 and reload has completed. Don't do the latter for debug,
13116 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13117 if (pic_offset_table_rtx
13118 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13119 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13120 pic_offset_table_rtx),
13121 result);
13122 else if (base_term_p
13123 && pic_offset_table_rtx
13124 && !TARGET_MACHO
13125 && !TARGET_VXWORKS_RTP)
13126 {
13127 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13128 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13129 result = gen_rtx_PLUS (Pmode, tmp, result);
13130 }
13131 else
13132 return orig_x;
13133 }
13134 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13135 {
13136 result = lowpart_subreg (GET_MODE (orig_x), op: result, Pmode);
13137 if (result == NULL_RTX)
13138 return orig_x;
13139 }
13140 return result;
13141}
13142
13143/* The normal instantiation of the above template. */
13144
13145static rtx
13146ix86_delegitimize_address (rtx x)
13147{
13148 return ix86_delegitimize_address_1 (x, base_term_p: false);
13149}
13150
13151/* If X is a machine specific address (i.e. a symbol or label being
13152 referenced as a displacement from the GOT implemented using an
13153 UNSPEC), then return the base term. Otherwise return X. */
13154
13155rtx
13156ix86_find_base_term (rtx x)
13157{
13158 rtx term;
13159
13160 if (TARGET_64BIT)
13161 {
13162 if (GET_CODE (x) != CONST)
13163 return x;
13164 term = XEXP (x, 0);
13165 if (GET_CODE (term) == PLUS
13166 && CONST_INT_P (XEXP (term, 1)))
13167 term = XEXP (term, 0);
13168 if (GET_CODE (term) != UNSPEC
13169 || (XINT (term, 1) != UNSPEC_GOTPCREL
13170 && XINT (term, 1) != UNSPEC_PCREL))
13171 return x;
13172
13173 return XVECEXP (term, 0, 0);
13174 }
13175
13176 return ix86_delegitimize_address_1 (x, base_term_p: true);
13177}
13178
13179/* Return true if X shouldn't be emitted into the debug info.
13180 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13181 symbol easily into the .debug_info section, so we need not to
13182 delegitimize, but instead assemble as @gotoff.
13183 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13184 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13185
13186static bool
13187ix86_const_not_ok_for_debug_p (rtx x)
13188{
13189 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13190 return true;
13191
13192 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13193 return true;
13194
13195 return false;
13196}
13197
13198static void
13199put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13200 bool fp, FILE *file)
13201{
13202 const char *suffix;
13203
13204 if (mode == CCFPmode)
13205 {
13206 code = ix86_fp_compare_code_to_integer (code);
13207 mode = CCmode;
13208 }
13209 if (reverse)
13210 code = reverse_condition (code);
13211
13212 switch (code)
13213 {
13214 case EQ:
13215 gcc_assert (mode != CCGZmode);
13216 switch (mode)
13217 {
13218 case E_CCAmode:
13219 suffix = "a";
13220 break;
13221 case E_CCCmode:
13222 suffix = "c";
13223 break;
13224 case E_CCOmode:
13225 suffix = "o";
13226 break;
13227 case E_CCPmode:
13228 suffix = "p";
13229 break;
13230 case E_CCSmode:
13231 suffix = "s";
13232 break;
13233 default:
13234 suffix = "e";
13235 break;
13236 }
13237 break;
13238 case NE:
13239 gcc_assert (mode != CCGZmode);
13240 switch (mode)
13241 {
13242 case E_CCAmode:
13243 suffix = "na";
13244 break;
13245 case E_CCCmode:
13246 suffix = "nc";
13247 break;
13248 case E_CCOmode:
13249 suffix = "no";
13250 break;
13251 case E_CCPmode:
13252 suffix = "np";
13253 break;
13254 case E_CCSmode:
13255 suffix = "ns";
13256 break;
13257 default:
13258 suffix = "ne";
13259 break;
13260 }
13261 break;
13262 case GT:
13263 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13264 suffix = "g";
13265 break;
13266 case GTU:
13267 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13268 Those same assemblers have the same but opposite lossage on cmov. */
13269 if (mode == CCmode)
13270 suffix = fp ? "nbe" : "a";
13271 else
13272 gcc_unreachable ();
13273 break;
13274 case LT:
13275 switch (mode)
13276 {
13277 case E_CCNOmode:
13278 case E_CCGOCmode:
13279 suffix = "s";
13280 break;
13281
13282 case E_CCmode:
13283 case E_CCGCmode:
13284 case E_CCGZmode:
13285 suffix = "l";
13286 break;
13287
13288 default:
13289 gcc_unreachable ();
13290 }
13291 break;
13292 case LTU:
13293 if (mode == CCmode || mode == CCGZmode)
13294 suffix = "b";
13295 else if (mode == CCCmode)
13296 suffix = fp ? "b" : "c";
13297 else
13298 gcc_unreachable ();
13299 break;
13300 case GE:
13301 switch (mode)
13302 {
13303 case E_CCNOmode:
13304 case E_CCGOCmode:
13305 suffix = "ns";
13306 break;
13307
13308 case E_CCmode:
13309 case E_CCGCmode:
13310 case E_CCGZmode:
13311 suffix = "ge";
13312 break;
13313
13314 default:
13315 gcc_unreachable ();
13316 }
13317 break;
13318 case GEU:
13319 if (mode == CCmode || mode == CCGZmode)
13320 suffix = "nb";
13321 else if (mode == CCCmode)
13322 suffix = fp ? "nb" : "nc";
13323 else
13324 gcc_unreachable ();
13325 break;
13326 case LE:
13327 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13328 suffix = "le";
13329 break;
13330 case LEU:
13331 if (mode == CCmode)
13332 suffix = "be";
13333 else
13334 gcc_unreachable ();
13335 break;
13336 case UNORDERED:
13337 suffix = fp ? "u" : "p";
13338 break;
13339 case ORDERED:
13340 suffix = fp ? "nu" : "np";
13341 break;
13342 default:
13343 gcc_unreachable ();
13344 }
13345 fputs (s: suffix, stream: file);
13346}
13347
13348/* Print the name of register X to FILE based on its machine mode and number.
13349 If CODE is 'w', pretend the mode is HImode.
13350 If CODE is 'b', pretend the mode is QImode.
13351 If CODE is 'k', pretend the mode is SImode.
13352 If CODE is 'q', pretend the mode is DImode.
13353 If CODE is 'x', pretend the mode is V4SFmode.
13354 If CODE is 't', pretend the mode is V8SFmode.
13355 If CODE is 'g', pretend the mode is V16SFmode.
13356 If CODE is 'h', pretend the reg is the 'high' byte register.
13357 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13358 If CODE is 'd', duplicate the operand for AVX instruction.
13359 If CODE is 'V', print naked full integer register name without %.
13360 */
13361
13362void
13363print_reg (rtx x, int code, FILE *file)
13364{
13365 const char *reg;
13366 int msize;
13367 unsigned int regno;
13368 bool duplicated;
13369
13370 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13371 putc (c: '%', stream: file);
13372
13373 if (x == pc_rtx)
13374 {
13375 gcc_assert (TARGET_64BIT);
13376 fputs (s: "rip", stream: file);
13377 return;
13378 }
13379
13380 if (code == 'y' && STACK_TOP_P (x))
13381 {
13382 fputs (s: "st(0)", stream: file);
13383 return;
13384 }
13385
13386 if (code == 'w')
13387 msize = 2;
13388 else if (code == 'b')
13389 msize = 1;
13390 else if (code == 'k')
13391 msize = 4;
13392 else if (code == 'q')
13393 msize = 8;
13394 else if (code == 'h')
13395 msize = 0;
13396 else if (code == 'x')
13397 msize = 16;
13398 else if (code == 't')
13399 msize = 32;
13400 else if (code == 'g')
13401 msize = 64;
13402 else
13403 msize = GET_MODE_SIZE (GET_MODE (x));
13404
13405 regno = REGNO (x);
13406
13407 if (regno == ARG_POINTER_REGNUM
13408 || regno == FRAME_POINTER_REGNUM
13409 || regno == FPSR_REG)
13410 {
13411 output_operand_lossage
13412 ("invalid use of register '%s'", reg_names[regno]);
13413 return;
13414 }
13415 else if (regno == FLAGS_REG)
13416 {
13417 output_operand_lossage ("invalid use of asm flag output");
13418 return;
13419 }
13420
13421 if (code == 'V')
13422 {
13423 if (GENERAL_REGNO_P (regno))
13424 msize = GET_MODE_SIZE (word_mode);
13425 else
13426 error ("%<V%> modifier on non-integer register");
13427 }
13428
13429 duplicated = code == 'd' && TARGET_AVX;
13430
13431 switch (msize)
13432 {
13433 case 16:
13434 case 12:
13435 case 8:
13436 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13437 warning (0, "unsupported size for integer register");
13438 /* FALLTHRU */
13439 case 4:
13440 if (LEGACY_INT_REGNO_P (regno))
13441 putc (c: msize > 4 && TARGET_64BIT ? 'r' : 'e', stream: file);
13442 /* FALLTHRU */
13443 case 2:
13444 normal:
13445 reg = hi_reg_name[regno];
13446 break;
13447 case 1:
13448 if (regno >= ARRAY_SIZE (qi_reg_name))
13449 goto normal;
13450 if (!ANY_QI_REGNO_P (regno))
13451 error ("unsupported size for integer register");
13452 reg = qi_reg_name[regno];
13453 break;
13454 case 0:
13455 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13456 goto normal;
13457 reg = qi_high_reg_name[regno];
13458 break;
13459 case 32:
13460 case 64:
13461 if (SSE_REGNO_P (regno))
13462 {
13463 gcc_assert (!duplicated);
13464 putc (c: msize == 32 ? 'y' : 'z', stream: file);
13465 reg = hi_reg_name[regno] + 1;
13466 break;
13467 }
13468 goto normal;
13469 default:
13470 gcc_unreachable ();
13471 }
13472
13473 fputs (s: reg, stream: file);
13474
13475 /* Irritatingly, AMD extended registers use
13476 different naming convention: "r%d[bwd]" */
13477 if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
13478 {
13479 gcc_assert (TARGET_64BIT);
13480 switch (msize)
13481 {
13482 case 0:
13483 error ("extended registers have no high halves");
13484 break;
13485 case 1:
13486 putc (c: 'b', stream: file);
13487 break;
13488 case 2:
13489 putc (c: 'w', stream: file);
13490 break;
13491 case 4:
13492 putc (c: 'd', stream: file);
13493 break;
13494 case 8:
13495 /* no suffix */
13496 break;
13497 default:
13498 error ("unsupported operand size for extended register");
13499 break;
13500 }
13501 return;
13502 }
13503
13504 if (duplicated)
13505 {
13506 if (ASSEMBLER_DIALECT == ASM_ATT)
13507 fprintf (stream: file, format: ", %%%s", reg);
13508 else
13509 fprintf (stream: file, format: ", %s", reg);
13510 }
13511}
13512
13513/* Meaning of CODE:
13514 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13515 C -- print opcode suffix for set/cmov insn.
13516 c -- like C, but print reversed condition
13517 F,f -- likewise, but for floating-point.
13518 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13519 otherwise nothing
13520 R -- print embedded rounding and sae.
13521 r -- print only sae.
13522 z -- print the opcode suffix for the size of the current operand.
13523 Z -- likewise, with special suffixes for x87 instructions.
13524 * -- print a star (in certain assembler syntax)
13525 A -- print an absolute memory reference.
13526 E -- print address with DImode register names if TARGET_64BIT.
13527 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13528 s -- print a shift double count, followed by the assemblers argument
13529 delimiter.
13530 b -- print the QImode name of the register for the indicated operand.
13531 %b0 would print %al if operands[0] is reg 0.
13532 w -- likewise, print the HImode name of the register.
13533 k -- likewise, print the SImode name of the register.
13534 q -- likewise, print the DImode name of the register.
13535 x -- likewise, print the V4SFmode name of the register.
13536 t -- likewise, print the V8SFmode name of the register.
13537 g -- likewise, print the V16SFmode name of the register.
13538 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13539 y -- print "st(0)" instead of "st" as a register.
13540 d -- print duplicated register operand for AVX instruction.
13541 D -- print condition for SSE cmp instruction.
13542 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13543 address from GOT.
13544 p -- print raw symbol name.
13545 X -- don't print any sort of PIC '@' suffix for a symbol.
13546 & -- print some in-use local-dynamic symbol name.
13547 H -- print a memory address offset by 8; used for sse high-parts
13548 Y -- print condition for XOP pcom* instruction.
13549 V -- print naked full integer register name without %.
13550 + -- print a branch hint as 'cs' or 'ds' prefix
13551 ; -- print a semicolon (after prefixes due to bug in older gas).
13552 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13553 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13554 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13555 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13556 N -- print maskz if it's constant 0 operand.
13557 */
13558
13559void
13560ix86_print_operand (FILE *file, rtx x, int code)
13561{
13562 if (code)
13563 {
13564 switch (code)
13565 {
13566 case 'A':
13567 switch (ASSEMBLER_DIALECT)
13568 {
13569 case ASM_ATT:
13570 putc (c: '*', stream: file);
13571 break;
13572
13573 case ASM_INTEL:
13574 /* Intel syntax. For absolute addresses, registers should not
13575 be surrounded by braces. */
13576 if (!REG_P (x))
13577 {
13578 putc (c: '[', stream: file);
13579 ix86_print_operand (file, x, code: 0);
13580 putc (c: ']', stream: file);
13581 return;
13582 }
13583 break;
13584
13585 default:
13586 gcc_unreachable ();
13587 }
13588
13589 ix86_print_operand (file, x, code: 0);
13590 return;
13591
13592 case 'E':
13593 /* Wrap address in an UNSPEC to declare special handling. */
13594 if (TARGET_64BIT)
13595 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13596
13597 output_address (VOIDmode, x);
13598 return;
13599
13600 case 'L':
13601 if (ASSEMBLER_DIALECT == ASM_ATT)
13602 putc (c: 'l', stream: file);
13603 return;
13604
13605 case 'W':
13606 if (ASSEMBLER_DIALECT == ASM_ATT)
13607 putc (c: 'w', stream: file);
13608 return;
13609
13610 case 'B':
13611 if (ASSEMBLER_DIALECT == ASM_ATT)
13612 putc (c: 'b', stream: file);
13613 return;
13614
13615 case 'Q':
13616 if (ASSEMBLER_DIALECT == ASM_ATT)
13617 putc (c: 'l', stream: file);
13618 return;
13619
13620 case 'S':
13621 if (ASSEMBLER_DIALECT == ASM_ATT)
13622 putc (c: 's', stream: file);
13623 return;
13624
13625 case 'T':
13626 if (ASSEMBLER_DIALECT == ASM_ATT)
13627 putc (c: 't', stream: file);
13628 return;
13629
13630 case 'O':
13631#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13632 if (ASSEMBLER_DIALECT != ASM_ATT)
13633 return;
13634
13635 switch (GET_MODE_SIZE (GET_MODE (x)))
13636 {
13637 case 2:
13638 putc ('w', file);
13639 break;
13640
13641 case 4:
13642 putc ('l', file);
13643 break;
13644
13645 case 8:
13646 putc ('q', file);
13647 break;
13648
13649 default:
13650 output_operand_lossage ("invalid operand size for operand "
13651 "code 'O'");
13652 return;
13653 }
13654
13655 putc ('.', file);
13656#endif
13657 return;
13658
13659 case 'z':
13660 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13661 {
13662 /* Opcodes don't get size suffixes if using Intel opcodes. */
13663 if (ASSEMBLER_DIALECT == ASM_INTEL)
13664 return;
13665
13666 switch (GET_MODE_SIZE (GET_MODE (x)))
13667 {
13668 case 1:
13669 putc (c: 'b', stream: file);
13670 return;
13671
13672 case 2:
13673 putc (c: 'w', stream: file);
13674 return;
13675
13676 case 4:
13677 putc (c: 'l', stream: file);
13678 return;
13679
13680 case 8:
13681 putc (c: 'q', stream: file);
13682 return;
13683
13684 default:
13685 output_operand_lossage ("invalid operand size for operand "
13686 "code 'z'");
13687 return;
13688 }
13689 }
13690
13691 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13692 {
13693 if (this_is_asm_operands)
13694 warning_for_asm (this_is_asm_operands,
13695 "non-integer operand used with operand code %<z%>");
13696 else
13697 warning (0, "non-integer operand used with operand code %<z%>");
13698 }
13699 /* FALLTHRU */
13700
13701 case 'Z':
13702 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13703 if (ASSEMBLER_DIALECT == ASM_INTEL)
13704 return;
13705
13706 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13707 {
13708 switch (GET_MODE_SIZE (GET_MODE (x)))
13709 {
13710 case 2:
13711#ifdef HAVE_AS_IX86_FILDS
13712 putc (c: 's', stream: file);
13713#endif
13714 return;
13715
13716 case 4:
13717 putc (c: 'l', stream: file);
13718 return;
13719
13720 case 8:
13721#ifdef HAVE_AS_IX86_FILDQ
13722 putc (c: 'q', stream: file);
13723#else
13724 fputs ("ll", file);
13725#endif
13726 return;
13727
13728 default:
13729 break;
13730 }
13731 }
13732 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13733 {
13734 /* 387 opcodes don't get size suffixes
13735 if the operands are registers. */
13736 if (STACK_REG_P (x))
13737 return;
13738
13739 switch (GET_MODE_SIZE (GET_MODE (x)))
13740 {
13741 case 4:
13742 putc (c: 's', stream: file);
13743 return;
13744
13745 case 8:
13746 putc (c: 'l', stream: file);
13747 return;
13748
13749 case 12:
13750 case 16:
13751 putc (c: 't', stream: file);
13752 return;
13753
13754 default:
13755 break;
13756 }
13757 }
13758 else
13759 {
13760 output_operand_lossage ("invalid operand type used with "
13761 "operand code '%c'", code);
13762 return;
13763 }
13764
13765 output_operand_lossage ("invalid operand size for operand code '%c'",
13766 code);
13767 return;
13768
13769 case 'd':
13770 case 'b':
13771 case 'w':
13772 case 'k':
13773 case 'q':
13774 case 'h':
13775 case 't':
13776 case 'g':
13777 case 'y':
13778 case 'x':
13779 case 'X':
13780 case 'P':
13781 case 'p':
13782 case 'V':
13783 break;
13784
13785 case 's':
13786 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13787 {
13788 ix86_print_operand (file, x, code: 0);
13789 fputs (s: ", ", stream: file);
13790 }
13791 return;
13792
13793 case 'Y':
13794 switch (GET_CODE (x))
13795 {
13796 case NE:
13797 fputs (s: "neq", stream: file);
13798 break;
13799 case EQ:
13800 fputs (s: "eq", stream: file);
13801 break;
13802 case GE:
13803 case GEU:
13804 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", stream: file);
13805 break;
13806 case GT:
13807 case GTU:
13808 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", stream: file);
13809 break;
13810 case LE:
13811 case LEU:
13812 fputs (s: "le", stream: file);
13813 break;
13814 case LT:
13815 case LTU:
13816 fputs (s: "lt", stream: file);
13817 break;
13818 case UNORDERED:
13819 fputs (s: "unord", stream: file);
13820 break;
13821 case ORDERED:
13822 fputs (s: "ord", stream: file);
13823 break;
13824 case UNEQ:
13825 fputs (s: "ueq", stream: file);
13826 break;
13827 case UNGE:
13828 fputs (s: "nlt", stream: file);
13829 break;
13830 case UNGT:
13831 fputs (s: "nle", stream: file);
13832 break;
13833 case UNLE:
13834 fputs (s: "ule", stream: file);
13835 break;
13836 case UNLT:
13837 fputs (s: "ult", stream: file);
13838 break;
13839 case LTGT:
13840 fputs (s: "une", stream: file);
13841 break;
13842 default:
13843 output_operand_lossage ("operand is not a condition code, "
13844 "invalid operand code 'Y'");
13845 return;
13846 }
13847 return;
13848
13849 case 'D':
13850 /* Little bit of braindamage here. The SSE compare instructions
13851 does use completely different names for the comparisons that the
13852 fp conditional moves. */
13853 switch (GET_CODE (x))
13854 {
13855 case UNEQ:
13856 if (TARGET_AVX)
13857 {
13858 fputs (s: "eq_us", stream: file);
13859 break;
13860 }
13861 /* FALLTHRU */
13862 case EQ:
13863 fputs (s: "eq", stream: file);
13864 break;
13865 case UNLT:
13866 if (TARGET_AVX)
13867 {
13868 fputs (s: "nge", stream: file);
13869 break;
13870 }
13871 /* FALLTHRU */
13872 case LT:
13873 fputs (s: "lt", stream: file);
13874 break;
13875 case UNLE:
13876 if (TARGET_AVX)
13877 {
13878 fputs (s: "ngt", stream: file);
13879 break;
13880 }
13881 /* FALLTHRU */
13882 case LE:
13883 fputs (s: "le", stream: file);
13884 break;
13885 case UNORDERED:
13886 fputs (s: "unord", stream: file);
13887 break;
13888 case LTGT:
13889 if (TARGET_AVX)
13890 {
13891 fputs (s: "neq_oq", stream: file);
13892 break;
13893 }
13894 /* FALLTHRU */
13895 case NE:
13896 fputs (s: "neq", stream: file);
13897 break;
13898 case GE:
13899 if (TARGET_AVX)
13900 {
13901 fputs (s: "ge", stream: file);
13902 break;
13903 }
13904 /* FALLTHRU */
13905 case UNGE:
13906 fputs (s: "nlt", stream: file);
13907 break;
13908 case GT:
13909 if (TARGET_AVX)
13910 {
13911 fputs (s: "gt", stream: file);
13912 break;
13913 }
13914 /* FALLTHRU */
13915 case UNGT:
13916 fputs (s: "nle", stream: file);
13917 break;
13918 case ORDERED:
13919 fputs (s: "ord", stream: file);
13920 break;
13921 default:
13922 output_operand_lossage ("operand is not a condition code, "
13923 "invalid operand code 'D'");
13924 return;
13925 }
13926 return;
13927
13928 case 'F':
13929 case 'f':
13930#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13931 if (ASSEMBLER_DIALECT == ASM_ATT)
13932 putc ('.', file);
13933 gcc_fallthrough ();
13934#endif
13935
13936 case 'C':
13937 case 'c':
13938 if (!COMPARISON_P (x))
13939 {
13940 output_operand_lossage ("operand is not a condition code, "
13941 "invalid operand code '%c'", code);
13942 return;
13943 }
13944 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
13945 reverse: code == 'c' || code == 'f',
13946 fp: code == 'F' || code == 'f',
13947 file);
13948 return;
13949
13950 case 'H':
13951 if (!offsettable_memref_p (x))
13952 {
13953 output_operand_lossage ("operand is not an offsettable memory "
13954 "reference, invalid operand code 'H'");
13955 return;
13956 }
13957 /* It doesn't actually matter what mode we use here, as we're
13958 only going to use this for printing. */
13959 x = adjust_address_nv (x, DImode, 8);
13960 /* Output 'qword ptr' for intel assembler dialect. */
13961 if (ASSEMBLER_DIALECT == ASM_INTEL)
13962 code = 'q';
13963 break;
13964
13965 case 'K':
13966 if (!CONST_INT_P (x))
13967 {
13968 output_operand_lossage ("operand is not an integer, invalid "
13969 "operand code 'K'");
13970 return;
13971 }
13972
13973 if (INTVAL (x) & IX86_HLE_ACQUIRE)
13974#ifdef HAVE_AS_IX86_HLE
13975 fputs (s: "xacquire ", stream: file);
13976#else
13977 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
13978#endif
13979 else if (INTVAL (x) & IX86_HLE_RELEASE)
13980#ifdef HAVE_AS_IX86_HLE
13981 fputs (s: "xrelease ", stream: file);
13982#else
13983 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
13984#endif
13985 /* We do not want to print value of the operand. */
13986 return;
13987
13988 case 'N':
13989 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
13990 fputs (s: "{z}", stream: file);
13991 return;
13992
13993 case 'r':
13994 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
13995 {
13996 output_operand_lossage ("operand is not a specific integer, "
13997 "invalid operand code 'r'");
13998 return;
13999 }
14000
14001 if (ASSEMBLER_DIALECT == ASM_INTEL)
14002 fputs (s: ", ", stream: file);
14003
14004 fputs (s: "{sae}", stream: file);
14005
14006 if (ASSEMBLER_DIALECT == ASM_ATT)
14007 fputs (s: ", ", stream: file);
14008
14009 return;
14010
14011 case 'R':
14012 if (!CONST_INT_P (x))
14013 {
14014 output_operand_lossage ("operand is not an integer, invalid "
14015 "operand code 'R'");
14016 return;
14017 }
14018
14019 if (ASSEMBLER_DIALECT == ASM_INTEL)
14020 fputs (s: ", ", stream: file);
14021
14022 switch (INTVAL (x))
14023 {
14024 case ROUND_NEAREST_INT | ROUND_SAE:
14025 fputs (s: "{rn-sae}", stream: file);
14026 break;
14027 case ROUND_NEG_INF | ROUND_SAE:
14028 fputs (s: "{rd-sae}", stream: file);
14029 break;
14030 case ROUND_POS_INF | ROUND_SAE:
14031 fputs (s: "{ru-sae}", stream: file);
14032 break;
14033 case ROUND_ZERO | ROUND_SAE:
14034 fputs (s: "{rz-sae}", stream: file);
14035 break;
14036 default:
14037 output_operand_lossage ("operand is not a specific integer, "
14038 "invalid operand code 'R'");
14039 }
14040
14041 if (ASSEMBLER_DIALECT == ASM_ATT)
14042 fputs (s: ", ", stream: file);
14043
14044 return;
14045
14046 case '*':
14047 if (ASSEMBLER_DIALECT == ASM_ATT)
14048 putc (c: '*', stream: file);
14049 return;
14050
14051 case '&':
14052 {
14053 const char *name = get_some_local_dynamic_name ();
14054 if (name == NULL)
14055 output_operand_lossage ("'%%&' used without any "
14056 "local dynamic TLS references");
14057 else
14058 assemble_name (file, name);
14059 return;
14060 }
14061
14062 case '+':
14063 {
14064 rtx x;
14065
14066 if (!optimize
14067 || optimize_function_for_size_p (cfun)
14068 || !TARGET_BRANCH_PREDICTION_HINTS)
14069 return;
14070
14071 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14072 if (x)
14073 {
14074 int pred_val = profile_probability::from_reg_br_prob_note
14075 (XINT (x, 0)).to_reg_br_prob_base ();
14076
14077 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14078 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14079 {
14080 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14081 bool cputaken
14082 = final_forward_branch_p (current_output_insn) == 0;
14083
14084 /* Emit hints only in the case default branch prediction
14085 heuristics would fail. */
14086 if (taken != cputaken)
14087 {
14088 /* We use 3e (DS) prefix for taken branches and
14089 2e (CS) prefix for not taken branches. */
14090 if (taken)
14091 fputs (s: "ds ; ", stream: file);
14092 else
14093 fputs (s: "cs ; ", stream: file);
14094 }
14095 }
14096 }
14097 return;
14098 }
14099
14100 case ';':
14101#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14102 putc (';', file);
14103#endif
14104 return;
14105
14106 case '~':
14107 putc (TARGET_AVX2 ? 'i' : 'f', stream: file);
14108 return;
14109
14110 case 'M':
14111 if (TARGET_X32)
14112 {
14113 /* NB: 32-bit indices in VSIB address are sign-extended
14114 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14115 sign-extended to 0xfffffffff7fa3010 which is invalid
14116 address. Add addr32 prefix if there is no base
14117 register nor symbol. */
14118 bool ok;
14119 struct ix86_address parts;
14120 ok = ix86_decompose_address (addr: x, out: &parts);
14121 gcc_assert (ok && parts.index == NULL_RTX);
14122 if (parts.base == NULL_RTX
14123 && (parts.disp == NULL_RTX
14124 || !symbolic_operand (parts.disp,
14125 GET_MODE (parts.disp))))
14126 fputs (s: "addr32 ", stream: file);
14127 }
14128 return;
14129
14130 case '^':
14131 if (TARGET_64BIT && Pmode != word_mode)
14132 fputs (s: "addr32 ", stream: file);
14133 return;
14134
14135 case '!':
14136 if (ix86_notrack_prefixed_insn_p (current_output_insn))
14137 fputs (s: "notrack ", stream: file);
14138 return;
14139
14140 default:
14141 output_operand_lossage ("invalid operand code '%c'", code);
14142 }
14143 }
14144
14145 if (REG_P (x))
14146 print_reg (x, code, file);
14147
14148 else if (MEM_P (x))
14149 {
14150 rtx addr = XEXP (x, 0);
14151
14152 /* No `byte ptr' prefix for call instructions ... */
14153 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14154 {
14155 machine_mode mode = GET_MODE (x);
14156 const char *size;
14157
14158 /* Check for explicit size override codes. */
14159 if (code == 'b')
14160 size = "BYTE";
14161 else if (code == 'w')
14162 size = "WORD";
14163 else if (code == 'k')
14164 size = "DWORD";
14165 else if (code == 'q')
14166 size = "QWORD";
14167 else if (code == 'x')
14168 size = "XMMWORD";
14169 else if (code == 't')
14170 size = "YMMWORD";
14171 else if (code == 'g')
14172 size = "ZMMWORD";
14173 else if (mode == BLKmode)
14174 /* ... or BLKmode operands, when not overridden. */
14175 size = NULL;
14176 else
14177 switch (GET_MODE_SIZE (mode))
14178 {
14179 case 1: size = "BYTE"; break;
14180 case 2: size = "WORD"; break;
14181 case 4: size = "DWORD"; break;
14182 case 8: size = "QWORD"; break;
14183 case 12: size = "TBYTE"; break;
14184 case 16:
14185 if (mode == XFmode)
14186 size = "TBYTE";
14187 else
14188 size = "XMMWORD";
14189 break;
14190 case 32: size = "YMMWORD"; break;
14191 case 64: size = "ZMMWORD"; break;
14192 default:
14193 gcc_unreachable ();
14194 }
14195 if (size)
14196 {
14197 fputs (s: size, stream: file);
14198 fputs (s: " PTR ", stream: file);
14199 }
14200 }
14201
14202 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14203 output_operand_lossage ("invalid constraints for operand");
14204 else
14205 ix86_print_operand_address_as
14206 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14207 }
14208
14209 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14210 {
14211 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14212 REAL_MODE_FORMAT (HFmode));
14213 if (ASSEMBLER_DIALECT == ASM_ATT)
14214 putc (c: '$', stream: file);
14215 fprintf (stream: file, format: "0x%04x", (unsigned int) l);
14216 }
14217
14218 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14219 {
14220 long l;
14221
14222 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14223
14224 if (ASSEMBLER_DIALECT == ASM_ATT)
14225 putc (c: '$', stream: file);
14226 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14227 if (code == 'q')
14228 fprintf (stream: file, format: "0x%08" HOST_LONG_LONG_FORMAT "x",
14229 (unsigned long long) (int) l);
14230 else
14231 fprintf (stream: file, format: "0x%08x", (unsigned int) l);
14232 }
14233
14234 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14235 {
14236 long l[2];
14237
14238 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14239
14240 if (ASSEMBLER_DIALECT == ASM_ATT)
14241 putc (c: '$', stream: file);
14242 fprintf (stream: file, format: "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14243 }
14244
14245 /* These float cases don't actually occur as immediate operands. */
14246 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14247 {
14248 char dstr[30];
14249
14250 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14251 fputs (s: dstr, stream: file);
14252 }
14253
14254 /* Print bcst_mem_operand. */
14255 else if (GET_CODE (x) == VEC_DUPLICATE)
14256 {
14257 machine_mode vmode = GET_MODE (x);
14258 /* Must be bcst_memory_operand. */
14259 gcc_assert (bcst_mem_operand (x, vmode));
14260
14261 rtx mem = XEXP (x,0);
14262 ix86_print_operand (file, x: mem, code: 0);
14263
14264 switch (vmode)
14265 {
14266 case E_V2DImode:
14267 case E_V2DFmode:
14268 fputs (s: "{1to2}", stream: file);
14269 break;
14270 case E_V4SImode:
14271 case E_V4SFmode:
14272 case E_V4DImode:
14273 case E_V4DFmode:
14274 fputs (s: "{1to4}", stream: file);
14275 break;
14276 case E_V8SImode:
14277 case E_V8SFmode:
14278 case E_V8DFmode:
14279 case E_V8DImode:
14280 case E_V8HFmode:
14281 fputs (s: "{1to8}", stream: file);
14282 break;
14283 case E_V16SFmode:
14284 case E_V16SImode:
14285 case E_V16HFmode:
14286 fputs (s: "{1to16}", stream: file);
14287 break;
14288 case E_V32HFmode:
14289 fputs (s: "{1to32}", stream: file);
14290 break;
14291 default:
14292 gcc_unreachable ();
14293 }
14294 }
14295
14296 else
14297 {
14298 /* We have patterns that allow zero sets of memory, for instance.
14299 In 64-bit mode, we should probably support all 8-byte vectors,
14300 since we can in fact encode that into an immediate. */
14301 if (GET_CODE (x) == CONST_VECTOR)
14302 {
14303 if (x != CONST0_RTX (GET_MODE (x)))
14304 output_operand_lossage ("invalid vector immediate");
14305 x = const0_rtx;
14306 }
14307
14308 if (code == 'P')
14309 {
14310 if (ix86_force_load_from_GOT_p (x, call_p: true))
14311 {
14312 /* For inline assembly statement, load function address
14313 from GOT with 'P' operand modifier to avoid PLT. */
14314 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14315 (TARGET_64BIT
14316 ? UNSPEC_GOTPCREL
14317 : UNSPEC_GOT));
14318 x = gen_rtx_CONST (Pmode, x);
14319 x = gen_const_mem (Pmode, x);
14320 ix86_print_operand (file, x, code: 'A');
14321 return;
14322 }
14323 }
14324 else if (code != 'p')
14325 {
14326 if (CONST_INT_P (x))
14327 {
14328 if (ASSEMBLER_DIALECT == ASM_ATT)
14329 putc (c: '$', stream: file);
14330 }
14331 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14332 || GET_CODE (x) == LABEL_REF)
14333 {
14334 if (ASSEMBLER_DIALECT == ASM_ATT)
14335 putc (c: '$', stream: file);
14336 else
14337 fputs (s: "OFFSET FLAT:", stream: file);
14338 }
14339 }
14340 if (CONST_INT_P (x))
14341 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14342 else if (flag_pic || MACHOPIC_INDIRECT)
14343 output_pic_addr_const (file, x, code);
14344 else
14345 output_addr_const (file, x);
14346 }
14347}
14348
14349static bool
14350ix86_print_operand_punct_valid_p (unsigned char code)
14351{
14352 return (code == '*' || code == '+' || code == '&' || code == ';'
14353 || code == '~' || code == '^' || code == '!');
14354}
14355
14356/* Print a memory operand whose address is ADDR. */
14357
14358static void
14359ix86_print_operand_address_as (FILE *file, rtx addr,
14360 addr_space_t as, bool raw)
14361{
14362 struct ix86_address parts;
14363 rtx base, index, disp;
14364 int scale;
14365 int ok;
14366 bool vsib = false;
14367 int code = 0;
14368
14369 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14370 {
14371 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14372 gcc_assert (parts.index == NULL_RTX);
14373 parts.index = XVECEXP (addr, 0, 1);
14374 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14375 addr = XVECEXP (addr, 0, 0);
14376 vsib = true;
14377 }
14378 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14379 {
14380 gcc_assert (TARGET_64BIT);
14381 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), out: &parts);
14382 code = 'q';
14383 }
14384 else
14385 ok = ix86_decompose_address (addr, out: &parts);
14386
14387 gcc_assert (ok);
14388
14389 base = parts.base;
14390 index = parts.index;
14391 disp = parts.disp;
14392 scale = parts.scale;
14393
14394 if (ADDR_SPACE_GENERIC_P (as))
14395 as = parts.seg;
14396 else
14397 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
14398
14399 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
14400 {
14401 if (ASSEMBLER_DIALECT == ASM_ATT)
14402 putc (c: '%', stream: file);
14403
14404 switch (as)
14405 {
14406 case ADDR_SPACE_SEG_FS:
14407 fputs (s: "fs:", stream: file);
14408 break;
14409 case ADDR_SPACE_SEG_GS:
14410 fputs (s: "gs:", stream: file);
14411 break;
14412 default:
14413 gcc_unreachable ();
14414 }
14415 }
14416
14417 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14418 if (TARGET_64BIT && !base && !index && !raw)
14419 {
14420 rtx symbol = disp;
14421
14422 if (GET_CODE (disp) == CONST
14423 && GET_CODE (XEXP (disp, 0)) == PLUS
14424 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14425 symbol = XEXP (XEXP (disp, 0), 0);
14426
14427 if (GET_CODE (symbol) == LABEL_REF
14428 || (GET_CODE (symbol) == SYMBOL_REF
14429 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14430 base = pc_rtx;
14431 }
14432
14433 if (!base && !index)
14434 {
14435 /* Displacement only requires special attention. */
14436 if (CONST_INT_P (disp))
14437 {
14438 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14439 fputs (s: "ds:", stream: file);
14440 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14441 }
14442 /* Load the external function address via the GOT slot to avoid PLT. */
14443 else if (GET_CODE (disp) == CONST
14444 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14445 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14446 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14447 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14448 output_pic_addr_const (file, x: disp, code: 0);
14449 else if (flag_pic)
14450 output_pic_addr_const (file, x: disp, code: 0);
14451 else
14452 output_addr_const (file, disp);
14453 }
14454 else
14455 {
14456 /* Print SImode register names to force addr32 prefix. */
14457 if (SImode_address_operand (addr, VOIDmode))
14458 {
14459 if (flag_checking)
14460 {
14461 gcc_assert (TARGET_64BIT);
14462 switch (GET_CODE (addr))
14463 {
14464 case SUBREG:
14465 gcc_assert (GET_MODE (addr) == SImode);
14466 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14467 break;
14468 case ZERO_EXTEND:
14469 case AND:
14470 gcc_assert (GET_MODE (addr) == DImode);
14471 break;
14472 default:
14473 gcc_unreachable ();
14474 }
14475 }
14476 gcc_assert (!code);
14477 code = 'k';
14478 }
14479 else if (code == 0
14480 && TARGET_X32
14481 && disp
14482 && CONST_INT_P (disp)
14483 && INTVAL (disp) < -16*1024*1024)
14484 {
14485 /* X32 runs in 64-bit mode, where displacement, DISP, in
14486 address DISP(%r64), is encoded as 32-bit immediate sign-
14487 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14488 address is %r64 + 0xffffffffbffffd00. When %r64 <
14489 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14490 which is invalid for x32. The correct address is %r64
14491 - 0x40000300 == 0xf7ffdd64. To properly encode
14492 -0x40000300(%r64) for x32, we zero-extend negative
14493 displacement by forcing addr32 prefix which truncates
14494 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14495 zero-extend all negative displacements, including -1(%rsp).
14496 However, for small negative displacements, sign-extension
14497 won't cause overflow. We only zero-extend negative
14498 displacements if they < -16*1024*1024, which is also used
14499 to check legitimate address displacements for PIC. */
14500 code = 'k';
14501 }
14502
14503 /* Since the upper 32 bits of RSP are always zero for x32,
14504 we can encode %esp as %rsp to avoid 0x67 prefix if
14505 there is no index register. */
14506 if (TARGET_X32 && Pmode == SImode
14507 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14508 code = 'q';
14509
14510 if (ASSEMBLER_DIALECT == ASM_ATT)
14511 {
14512 if (disp)
14513 {
14514 if (flag_pic)
14515 output_pic_addr_const (file, x: disp, code: 0);
14516 else if (GET_CODE (disp) == LABEL_REF)
14517 output_asm_label (disp);
14518 else
14519 output_addr_const (file, disp);
14520 }
14521
14522 putc (c: '(', stream: file);
14523 if (base)
14524 print_reg (x: base, code, file);
14525 if (index)
14526 {
14527 putc (c: ',', stream: file);
14528 print_reg (x: index, code: vsib ? 0 : code, file);
14529 if (scale != 1 || vsib)
14530 fprintf (stream: file, format: ",%d", scale);
14531 }
14532 putc (c: ')', stream: file);
14533 }
14534 else
14535 {
14536 rtx offset = NULL_RTX;
14537
14538 if (disp)
14539 {
14540 /* Pull out the offset of a symbol; print any symbol itself. */
14541 if (GET_CODE (disp) == CONST
14542 && GET_CODE (XEXP (disp, 0)) == PLUS
14543 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14544 {
14545 offset = XEXP (XEXP (disp, 0), 1);
14546 disp = gen_rtx_CONST (VOIDmode,
14547 XEXP (XEXP (disp, 0), 0));
14548 }
14549
14550 if (flag_pic)
14551 output_pic_addr_const (file, x: disp, code: 0);
14552 else if (GET_CODE (disp) == LABEL_REF)
14553 output_asm_label (disp);
14554 else if (CONST_INT_P (disp))
14555 offset = disp;
14556 else
14557 output_addr_const (file, disp);
14558 }
14559
14560 putc (c: '[', stream: file);
14561 if (base)
14562 {
14563 print_reg (x: base, code, file);
14564 if (offset)
14565 {
14566 if (INTVAL (offset) >= 0)
14567 putc (c: '+', stream: file);
14568 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14569 }
14570 }
14571 else if (offset)
14572 fprintf (stream: file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14573 else
14574 putc (c: '0', stream: file);
14575
14576 if (index)
14577 {
14578 putc (c: '+', stream: file);
14579 print_reg (x: index, code: vsib ? 0 : code, file);
14580 if (scale != 1 || vsib)
14581 fprintf (stream: file, format: "*%d", scale);
14582 }
14583 putc (c: ']', stream: file);
14584 }
14585 }
14586}
14587
14588static void
14589ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14590{
14591 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14592 output_operand_lossage ("invalid constraints for operand");
14593 else
14594 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, raw: false);
14595}
14596
14597/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14598
14599static bool
14600i386_asm_output_addr_const_extra (FILE *file, rtx x)
14601{
14602 rtx op;
14603
14604 if (GET_CODE (x) != UNSPEC)
14605 return false;
14606
14607 op = XVECEXP (x, 0, 0);
14608 switch (XINT (x, 1))
14609 {
14610 case UNSPEC_GOTOFF:
14611 output_addr_const (file, op);
14612 fputs (s: "@gotoff", stream: file);
14613 break;
14614 case UNSPEC_GOTTPOFF:
14615 output_addr_const (file, op);
14616 /* FIXME: This might be @TPOFF in Sun ld. */
14617 fputs (s: "@gottpoff", stream: file);
14618 break;
14619 case UNSPEC_TPOFF:
14620 output_addr_const (file, op);
14621 fputs (s: "@tpoff", stream: file);
14622 break;
14623 case UNSPEC_NTPOFF:
14624 output_addr_const (file, op);
14625 if (TARGET_64BIT)
14626 fputs (s: "@tpoff", stream: file);
14627 else
14628 fputs (s: "@ntpoff", stream: file);
14629 break;
14630 case UNSPEC_DTPOFF:
14631 output_addr_const (file, op);
14632 fputs (s: "@dtpoff", stream: file);
14633 break;
14634 case UNSPEC_GOTNTPOFF:
14635 output_addr_const (file, op);
14636 if (TARGET_64BIT)
14637 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14638 "@gottpoff(%rip)" : "@gottpoff[rip]", stream: file);
14639 else
14640 fputs (s: "@gotntpoff", stream: file);
14641 break;
14642 case UNSPEC_INDNTPOFF:
14643 output_addr_const (file, op);
14644 fputs (s: "@indntpoff", stream: file);
14645 break;
14646#if TARGET_MACHO
14647 case UNSPEC_MACHOPIC_OFFSET:
14648 output_addr_const (file, op);
14649 putc ('-', file);
14650 machopic_output_function_base_name (file);
14651 break;
14652#endif
14653
14654 default:
14655 return false;
14656 }
14657
14658 return true;
14659}
14660
14661
14662/* Output code to perform a 387 binary operation in INSN, one of PLUS,
14663 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14664 is the expression of the binary operation. The output may either be
14665 emitted here, or returned to the caller, like all output_* functions.
14666
14667 There is no guarantee that the operands are the same mode, as they
14668 might be within FLOAT or FLOAT_EXTEND expressions. */
14669
14670#ifndef SYSV386_COMPAT
14671/* Set to 1 for compatibility with brain-damaged assemblers. No-one
14672 wants to fix the assemblers because that causes incompatibility
14673 with gcc. No-one wants to fix gcc because that causes
14674 incompatibility with assemblers... You can use the option of
14675 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14676#define SYSV386_COMPAT 1
14677#endif
14678
14679const char *
14680output_387_binary_op (rtx_insn *insn, rtx *operands)
14681{
14682 static char buf[40];
14683 const char *p;
14684 bool is_sse
14685 = (SSE_REG_P (operands[0])
14686 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14687
14688 if (is_sse)
14689 p = "%v";
14690 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14691 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14692 p = "fi";
14693 else
14694 p = "f";
14695
14696 strcpy (dest: buf, src: p);
14697
14698 switch (GET_CODE (operands[3]))
14699 {
14700 case PLUS:
14701 p = "add"; break;
14702 case MINUS:
14703 p = "sub"; break;
14704 case MULT:
14705 p = "mul"; break;
14706 case DIV:
14707 p = "div"; break;
14708 default:
14709 gcc_unreachable ();
14710 }
14711
14712 strcat (dest: buf, src: p);
14713
14714 if (is_sse)
14715 {
14716 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14717 strcat (dest: buf, src: p);
14718
14719 if (TARGET_AVX)
14720 p = "\t{%2, %1, %0|%0, %1, %2}";
14721 else
14722 p = "\t{%2, %0|%0, %2}";
14723
14724 strcat (dest: buf, src: p);
14725 return buf;
14726 }
14727
14728 /* Even if we do not want to check the inputs, this documents input
14729 constraints. Which helps in understanding the following code. */
14730 if (flag_checking)
14731 {
14732 if (STACK_REG_P (operands[0])
14733 && ((REG_P (operands[1])
14734 && REGNO (operands[0]) == REGNO (operands[1])
14735 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14736 || (REG_P (operands[2])
14737 && REGNO (operands[0]) == REGNO (operands[2])
14738 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14739 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14740 ; /* ok */
14741 else
14742 gcc_unreachable ();
14743 }
14744
14745 switch (GET_CODE (operands[3]))
14746 {
14747 case MULT:
14748 case PLUS:
14749 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14750 std::swap (a&: operands[1], b&: operands[2]);
14751
14752 /* know operands[0] == operands[1]. */
14753
14754 if (MEM_P (operands[2]))
14755 {
14756 p = "%Z2\t%2";
14757 break;
14758 }
14759
14760 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14761 {
14762 if (STACK_TOP_P (operands[0]))
14763 /* How is it that we are storing to a dead operand[2]?
14764 Well, presumably operands[1] is dead too. We can't
14765 store the result to st(0) as st(0) gets popped on this
14766 instruction. Instead store to operands[2] (which I
14767 think has to be st(1)). st(1) will be popped later.
14768 gcc <= 2.8.1 didn't have this check and generated
14769 assembly code that the Unixware assembler rejected. */
14770 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14771 else
14772 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14773 break;
14774 }
14775
14776 if (STACK_TOP_P (operands[0]))
14777 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14778 else
14779 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14780 break;
14781
14782 case MINUS:
14783 case DIV:
14784 if (MEM_P (operands[1]))
14785 {
14786 p = "r%Z1\t%1";
14787 break;
14788 }
14789
14790 if (MEM_P (operands[2]))
14791 {
14792 p = "%Z2\t%2";
14793 break;
14794 }
14795
14796 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14797 {
14798#if SYSV386_COMPAT
14799 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14800 derived assemblers, confusingly reverse the direction of
14801 the operation for fsub{r} and fdiv{r} when the
14802 destination register is not st(0). The Intel assembler
14803 doesn't have this brain damage. Read !SYSV386_COMPAT to
14804 figure out what the hardware really does. */
14805 if (STACK_TOP_P (operands[0]))
14806 p = "{p\t%0, %2|rp\t%2, %0}";
14807 else
14808 p = "{rp\t%2, %0|p\t%0, %2}";
14809#else
14810 if (STACK_TOP_P (operands[0]))
14811 /* As above for fmul/fadd, we can't store to st(0). */
14812 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14813 else
14814 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14815#endif
14816 break;
14817 }
14818
14819 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14820 {
14821#if SYSV386_COMPAT
14822 if (STACK_TOP_P (operands[0]))
14823 p = "{rp\t%0, %1|p\t%1, %0}";
14824 else
14825 p = "{p\t%1, %0|rp\t%0, %1}";
14826#else
14827 if (STACK_TOP_P (operands[0]))
14828 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14829 else
14830 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14831#endif
14832 break;
14833 }
14834
14835 if (STACK_TOP_P (operands[0]))
14836 {
14837 if (STACK_TOP_P (operands[1]))
14838 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14839 else
14840 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14841 break;
14842 }
14843 else if (STACK_TOP_P (operands[1]))
14844 {
14845#if SYSV386_COMPAT
14846 p = "{\t%1, %0|r\t%0, %1}";
14847#else
14848 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14849#endif
14850 }
14851 else
14852 {
14853#if SYSV386_COMPAT
14854 p = "{r\t%2, %0|\t%0, %2}";
14855#else
14856 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14857#endif
14858 }
14859 break;
14860
14861 default:
14862 gcc_unreachable ();
14863 }
14864
14865 strcat (dest: buf, src: p);
14866 return buf;
14867}
14868
14869/* Return needed mode for entity in optimize_mode_switching pass. */
14870
14871static int
14872ix86_dirflag_mode_needed (rtx_insn *insn)
14873{
14874 if (CALL_P (insn))
14875 {
14876 if (cfun->machine->func_type == TYPE_NORMAL)
14877 return X86_DIRFLAG_ANY;
14878 else
14879 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14880 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
14881 }
14882
14883 if (recog_memoized (insn) < 0)
14884 return X86_DIRFLAG_ANY;
14885
14886 if (get_attr_type (insn) == TYPE_STR)
14887 {
14888 /* Emit cld instruction if stringops are used in the function. */
14889 if (cfun->machine->func_type == TYPE_NORMAL)
14890 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
14891 else
14892 return X86_DIRFLAG_RESET;
14893 }
14894
14895 return X86_DIRFLAG_ANY;
14896}
14897
14898/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14899
14900static bool
14901ix86_check_avx_upper_register (const_rtx exp)
14902{
14903 return (SSE_REG_P (exp)
14904 && !EXT_REX_SSE_REG_P (exp)
14905 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
14906}
14907
14908/* Check if a 256bit or 512bit AVX register is referenced in stores. */
14909
14910static void
14911ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
14912{
14913 if (ix86_check_avx_upper_register (exp: dest))
14914 {
14915 bool *used = (bool *) data;
14916 *used = true;
14917 }
14918}
14919
14920/* Return needed mode for entity in optimize_mode_switching pass. */
14921
14922static int
14923ix86_avx_u128_mode_needed (rtx_insn *insn)
14924{
14925 if (DEBUG_INSN_P (insn))
14926 return AVX_U128_ANY;
14927
14928 if (CALL_P (insn))
14929 {
14930 rtx link;
14931
14932 /* Needed mode is set to AVX_U128_CLEAN if there are
14933 no 256bit or 512bit modes used in function arguments. */
14934 for (link = CALL_INSN_FUNCTION_USAGE (insn);
14935 link;
14936 link = XEXP (link, 1))
14937 {
14938 if (GET_CODE (XEXP (link, 0)) == USE)
14939 {
14940 rtx arg = XEXP (XEXP (link, 0), 0);
14941
14942 if (ix86_check_avx_upper_register (exp: arg))
14943 return AVX_U128_DIRTY;
14944 }
14945 }
14946
14947 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
14948 nor 512bit registers used in the function return register. */
14949 bool avx_upper_reg_found = false;
14950 note_stores (insn, ix86_check_avx_upper_stores,
14951 &avx_upper_reg_found);
14952 if (avx_upper_reg_found)
14953 return AVX_U128_DIRTY;
14954
14955 /* If the function is known to preserve some SSE registers,
14956 RA and previous passes can legitimately rely on that for
14957 modes wider than 256 bits. It's only safe to issue a
14958 vzeroupper if all SSE registers are clobbered. */
14959 const function_abi &abi = insn_callee_abi (insn);
14960 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
14961 || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
14962 y: abi.mode_clobbers (V4DImode)))
14963 return AVX_U128_ANY;
14964
14965 return AVX_U128_CLEAN;
14966 }
14967
14968 subrtx_iterator::array_type array;
14969
14970 rtx set = single_set (insn);
14971 if (set)
14972 {
14973 rtx dest = SET_DEST (set);
14974 rtx src = SET_SRC (set);
14975 if (ix86_check_avx_upper_register (exp: dest))
14976 {
14977 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
14978 source isn't zero. */
14979 if (standard_sse_constant_p (x: src, GET_MODE (dest)) != 1)
14980 return AVX_U128_DIRTY;
14981 else
14982 return AVX_U128_ANY;
14983 }
14984 else
14985 {
14986 FOR_EACH_SUBRTX (iter, array, src, NONCONST)
14987 if (ix86_check_avx_upper_register (exp: *iter))
14988 return AVX_U128_DIRTY;
14989 }
14990
14991 /* This isn't YMM/ZMM load/store. */
14992 return AVX_U128_ANY;
14993 }
14994
14995 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14996 Hardware changes state only when a 256bit register is written to,
14997 but we need to prevent the compiler from moving optimal insertion
14998 point above eventual read from 256bit or 512 bit register. */
14999 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15000 if (ix86_check_avx_upper_register (exp: *iter))
15001 return AVX_U128_DIRTY;
15002
15003 return AVX_U128_ANY;
15004}
15005
15006/* Return mode that i387 must be switched into
15007 prior to the execution of insn. */
15008
15009static int
15010ix86_i387_mode_needed (int entity, rtx_insn *insn)
15011{
15012 enum attr_i387_cw mode;
15013
15014 /* The mode UNINITIALIZED is used to store control word after a
15015 function call or ASM pattern. The mode ANY specify that function
15016 has no requirements on the control word and make no changes in the
15017 bits we are interested in. */
15018
15019 if (CALL_P (insn)
15020 || (NONJUMP_INSN_P (insn)
15021 && (asm_noperands (PATTERN (insn)) >= 0
15022 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15023 return I387_CW_UNINITIALIZED;
15024
15025 if (recog_memoized (insn) < 0)
15026 return I387_CW_ANY;
15027
15028 mode = get_attr_i387_cw (insn);
15029
15030 switch (entity)
15031 {
15032 case I387_ROUNDEVEN:
15033 if (mode == I387_CW_ROUNDEVEN)
15034 return mode;
15035 break;
15036
15037 case I387_TRUNC:
15038 if (mode == I387_CW_TRUNC)
15039 return mode;
15040 break;
15041
15042 case I387_FLOOR:
15043 if (mode == I387_CW_FLOOR)
15044 return mode;
15045 break;
15046
15047 case I387_CEIL:
15048 if (mode == I387_CW_CEIL)
15049 return mode;
15050 break;
15051
15052 default:
15053 gcc_unreachable ();
15054 }
15055
15056 return I387_CW_ANY;
15057}
15058
15059/* Return mode that entity must be switched into
15060 prior to the execution of insn. */
15061
15062static int
15063ix86_mode_needed (int entity, rtx_insn *insn)
15064{
15065 switch (entity)
15066 {
15067 case X86_DIRFLAG:
15068 return ix86_dirflag_mode_needed (insn);
15069 case AVX_U128:
15070 return ix86_avx_u128_mode_needed (insn);
15071 case I387_ROUNDEVEN:
15072 case I387_TRUNC:
15073 case I387_FLOOR:
15074 case I387_CEIL:
15075 return ix86_i387_mode_needed (entity, insn);
15076 default:
15077 gcc_unreachable ();
15078 }
15079 return 0;
15080}
15081
15082/* Calculate mode of upper 128bit AVX registers after the insn. */
15083
15084static int
15085ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15086{
15087 rtx pat = PATTERN (insn);
15088
15089 if (vzeroupper_pattern (pat, VOIDmode)
15090 || vzeroall_pattern (pat, VOIDmode))
15091 return AVX_U128_CLEAN;
15092
15093 /* We know that state is clean after CALL insn if there are no
15094 256bit or 512bit registers used in the function return register. */
15095 if (CALL_P (insn))
15096 {
15097 bool avx_upper_reg_found = false;
15098 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15099
15100 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
15101 }
15102
15103 /* Otherwise, return current mode. Remember that if insn
15104 references AVX 256bit or 512bit registers, the mode was already
15105 changed to DIRTY from MODE_NEEDED. */
15106 return mode;
15107}
15108
15109/* Return the mode that an insn results in. */
15110
15111static int
15112ix86_mode_after (int entity, int mode, rtx_insn *insn)
15113{
15114 switch (entity)
15115 {
15116 case X86_DIRFLAG:
15117 return mode;
15118 case AVX_U128:
15119 return ix86_avx_u128_mode_after (mode, insn);
15120 case I387_ROUNDEVEN:
15121 case I387_TRUNC:
15122 case I387_FLOOR:
15123 case I387_CEIL:
15124 return mode;
15125 default:
15126 gcc_unreachable ();
15127 }
15128}
15129
15130static int
15131ix86_dirflag_mode_entry (void)
15132{
15133 /* For TARGET_CLD or in the interrupt handler we can't assume
15134 direction flag state at function entry. */
15135 if (TARGET_CLD
15136 || cfun->machine->func_type != TYPE_NORMAL)
15137 return X86_DIRFLAG_ANY;
15138
15139 return X86_DIRFLAG_RESET;
15140}
15141
15142static int
15143ix86_avx_u128_mode_entry (void)
15144{
15145 tree arg;
15146
15147 /* Entry mode is set to AVX_U128_DIRTY if there are
15148 256bit or 512bit modes used in function arguments. */
15149 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15150 arg = TREE_CHAIN (arg))
15151 {
15152 rtx incoming = DECL_INCOMING_RTL (arg);
15153
15154 if (incoming && ix86_check_avx_upper_register (exp: incoming))
15155 return AVX_U128_DIRTY;
15156 }
15157
15158 return AVX_U128_CLEAN;
15159}
15160
15161/* Return a mode that ENTITY is assumed to be
15162 switched to at function entry. */
15163
15164static int
15165ix86_mode_entry (int entity)
15166{
15167 switch (entity)
15168 {
15169 case X86_DIRFLAG:
15170 return ix86_dirflag_mode_entry ();
15171 case AVX_U128:
15172 return ix86_avx_u128_mode_entry ();
15173 case I387_ROUNDEVEN:
15174 case I387_TRUNC:
15175 case I387_FLOOR:
15176 case I387_CEIL:
15177 return I387_CW_ANY;
15178 default:
15179 gcc_unreachable ();
15180 }
15181}
15182
15183static int
15184ix86_avx_u128_mode_exit (void)
15185{
15186 rtx reg = crtl->return_rtx;
15187
15188 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15189 or 512 bit modes used in the function return register. */
15190 if (reg && ix86_check_avx_upper_register (exp: reg))
15191 return AVX_U128_DIRTY;
15192
15193 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15194 modes used in function arguments, otherwise return AVX_U128_CLEAN.
15195 */
15196 return ix86_avx_u128_mode_entry ();
15197}
15198
15199/* Return a mode that ENTITY is assumed to be
15200 switched to at function exit. */
15201
15202static int
15203ix86_mode_exit (int entity)
15204{
15205 switch (entity)
15206 {
15207 case X86_DIRFLAG:
15208 return X86_DIRFLAG_ANY;
15209 case AVX_U128:
15210 return ix86_avx_u128_mode_exit ();
15211 case I387_ROUNDEVEN:
15212 case I387_TRUNC:
15213 case I387_FLOOR:
15214 case I387_CEIL:
15215 return I387_CW_ANY;
15216 default:
15217 gcc_unreachable ();
15218 }
15219}
15220
15221static int
15222ix86_mode_priority (int, int n)
15223{
15224 return n;
15225}
15226
15227/* Output code to initialize control word copies used by trunc?f?i and
15228 rounding patterns. CURRENT_MODE is set to current control word,
15229 while NEW_MODE is set to new control word. */
15230
15231static void
15232emit_i387_cw_initialization (int mode)
15233{
15234 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15235 rtx new_mode;
15236
15237 enum ix86_stack_slot slot;
15238
15239 rtx reg = gen_reg_rtx (HImode);
15240
15241 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15242 emit_move_insn (reg, copy_rtx (stored_mode));
15243
15244 switch (mode)
15245 {
15246 case I387_CW_ROUNDEVEN:
15247 /* round to nearest */
15248 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15249 slot = SLOT_CW_ROUNDEVEN;
15250 break;
15251
15252 case I387_CW_TRUNC:
15253 /* round toward zero (truncate) */
15254 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15255 slot = SLOT_CW_TRUNC;
15256 break;
15257
15258 case I387_CW_FLOOR:
15259 /* round down toward -oo */
15260 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15261 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15262 slot = SLOT_CW_FLOOR;
15263 break;
15264
15265 case I387_CW_CEIL:
15266 /* round up toward +oo */
15267 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15268 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15269 slot = SLOT_CW_CEIL;
15270 break;
15271
15272 default:
15273 gcc_unreachable ();
15274 }
15275
15276 gcc_assert (slot < MAX_386_STACK_LOCALS);
15277
15278 new_mode = assign_386_stack_local (HImode, slot);
15279 emit_move_insn (new_mode, reg);
15280}
15281
15282/* Generate one or more insns to set ENTITY to MODE. */
15283
15284static void
15285ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15286 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15287{
15288 switch (entity)
15289 {
15290 case X86_DIRFLAG:
15291 if (mode == X86_DIRFLAG_RESET)
15292 emit_insn (gen_cld ());
15293 break;
15294 case AVX_U128:
15295 if (mode == AVX_U128_CLEAN)
15296 ix86_expand_avx_vzeroupper ();
15297 break;
15298 case I387_ROUNDEVEN:
15299 case I387_TRUNC:
15300 case I387_FLOOR:
15301 case I387_CEIL:
15302 if (mode != I387_CW_ANY
15303 && mode != I387_CW_UNINITIALIZED)
15304 emit_i387_cw_initialization (mode);
15305 break;
15306 default:
15307 gcc_unreachable ();
15308 }
15309}
15310
15311/* Output code for INSN to convert a float to a signed int. OPERANDS
15312 are the insn operands. The output may be [HSD]Imode and the input
15313 operand may be [SDX]Fmode. */
15314
15315const char *
15316output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15317{
15318 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15319 bool dimode_p = GET_MODE (operands[0]) == DImode;
15320 int round_mode = get_attr_i387_cw (insn);
15321
15322 static char buf[40];
15323 const char *p;
15324
15325 /* Jump through a hoop or two for DImode, since the hardware has no
15326 non-popping instruction. We used to do this a different way, but
15327 that was somewhat fragile and broke with post-reload splitters. */
15328 if ((dimode_p || fisttp) && !stack_top_dies)
15329 output_asm_insn ("fld\t%y1", operands);
15330
15331 gcc_assert (STACK_TOP_P (operands[1]));
15332 gcc_assert (MEM_P (operands[0]));
15333 gcc_assert (GET_MODE (operands[1]) != TFmode);
15334
15335 if (fisttp)
15336 return "fisttp%Z0\t%0";
15337
15338 strcpy (dest: buf, src: "fist");
15339
15340 if (round_mode != I387_CW_ANY)
15341 output_asm_insn ("fldcw\t%3", operands);
15342
15343 p = "p%Z0\t%0";
15344 strcat (dest: buf, src: p + !(stack_top_dies || dimode_p));
15345
15346 output_asm_insn (buf, operands);
15347
15348 if (round_mode != I387_CW_ANY)
15349 output_asm_insn ("fldcw\t%2", operands);
15350
15351 return "";
15352}
15353
15354/* Output code for x87 ffreep insn. The OPNO argument, which may only
15355 have the values zero or one, indicates the ffreep insn's operand
15356 from the OPERANDS array. */
15357
15358static const char *
15359output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15360{
15361 if (TARGET_USE_FFREEP)
15362#ifdef HAVE_AS_IX86_FFREEP
15363 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15364#else
15365 {
15366 static char retval[32];
15367 int regno = REGNO (operands[opno]);
15368
15369 gcc_assert (STACK_REGNO_P (regno));
15370
15371 regno -= FIRST_STACK_REG;
15372
15373 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15374 return retval;
15375 }
15376#endif
15377
15378 return opno ? "fstp\t%y1" : "fstp\t%y0";
15379}
15380
15381
15382/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15383 should be used. UNORDERED_P is true when fucom should be used. */
15384
15385const char *
15386output_fp_compare (rtx_insn *insn, rtx *operands,
15387 bool eflags_p, bool unordered_p)
15388{
15389 rtx *xops = eflags_p ? &operands[0] : &operands[1];
15390 bool stack_top_dies;
15391
15392 static char buf[40];
15393 const char *p;
15394
15395 gcc_assert (STACK_TOP_P (xops[0]));
15396
15397 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15398
15399 if (eflags_p)
15400 {
15401 p = unordered_p ? "fucomi" : "fcomi";
15402 strcpy (dest: buf, src: p);
15403
15404 p = "p\t{%y1, %0|%0, %y1}";
15405 strcat (dest: buf, src: p + !stack_top_dies);
15406
15407 return buf;
15408 }
15409
15410 if (STACK_REG_P (xops[1])
15411 && stack_top_dies
15412 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
15413 {
15414 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
15415
15416 /* If both the top of the 387 stack die, and the other operand
15417 is also a stack register that dies, then this must be a
15418 `fcompp' float compare. */
15419 p = unordered_p ? "fucompp" : "fcompp";
15420 strcpy (dest: buf, src: p);
15421 }
15422 else if (const0_operand (xops[1], VOIDmode))
15423 {
15424 gcc_assert (!unordered_p);
15425 strcpy (dest: buf, src: "ftst");
15426 }
15427 else
15428 {
15429 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15430 {
15431 gcc_assert (!unordered_p);
15432 p = "ficom";
15433 }
15434 else
15435 p = unordered_p ? "fucom" : "fcom";
15436
15437 strcpy (dest: buf, src: p);
15438
15439 p = "p%Z2\t%y2";
15440 strcat (dest: buf, src: p + !stack_top_dies);
15441 }
15442
15443 output_asm_insn (buf, operands);
15444 return "fnstsw\t%0";
15445}
15446
15447void
15448ix86_output_addr_vec_elt (FILE *file, int value)
15449{
15450 const char *directive = ASM_LONG;
15451
15452#ifdef ASM_QUAD
15453 if (TARGET_LP64)
15454 directive = ASM_QUAD;
15455#else
15456 gcc_assert (!TARGET_64BIT);
15457#endif
15458
15459 fprintf (stream: file, format: "%s%s%d\n", directive, LPREFIX, value);
15460}
15461
15462void
15463ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15464{
15465 const char *directive = ASM_LONG;
15466
15467#ifdef ASM_QUAD
15468 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15469 directive = ASM_QUAD;
15470#else
15471 gcc_assert (!TARGET_64BIT);
15472#endif
15473 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15474 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15475 fprintf (stream: file, format: "%s%s%d-%s%d\n",
15476 directive, LPREFIX, value, LPREFIX, rel);
15477#if TARGET_MACHO
15478 else if (TARGET_MACHO)
15479 {
15480 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15481 machopic_output_function_base_name (file);
15482 putc ('\n', file);
15483 }
15484#endif
15485 else if (HAVE_AS_GOTOFF_IN_DATA)
15486 fprintf (stream: file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15487 else
15488 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15489 GOT_SYMBOL_NAME, LPREFIX, value);
15490}
15491
15492#define LEA_MAX_STALL (3)
15493#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15494
15495/* Increase given DISTANCE in half-cycles according to
15496 dependencies between PREV and NEXT instructions.
15497 Add 1 half-cycle if there is no dependency and
15498 go to next cycle if there is some dependecy. */
15499
15500static unsigned int
15501increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15502{
15503 df_ref def, use;
15504
15505 if (!prev || !next)
15506 return distance + (distance & 1) + 2;
15507
15508 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15509 return distance + 1;
15510
15511 FOR_EACH_INSN_USE (use, next)
15512 FOR_EACH_INSN_DEF (def, prev)
15513 if (!DF_REF_IS_ARTIFICIAL (def)
15514 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15515 return distance + (distance & 1) + 2;
15516
15517 return distance + 1;
15518}
15519
15520/* Function checks if instruction INSN defines register number
15521 REGNO1 or REGNO2. */
15522
15523bool
15524insn_defines_reg (unsigned int regno1, unsigned int regno2,
15525 rtx_insn *insn)
15526{
15527 df_ref def;
15528
15529 FOR_EACH_INSN_DEF (def, insn)
15530 if (DF_REF_REG_DEF_P (def)
15531 && !DF_REF_IS_ARTIFICIAL (def)
15532 && (regno1 == DF_REF_REGNO (def)
15533 || regno2 == DF_REF_REGNO (def)))
15534 return true;
15535
15536 return false;
15537}
15538
15539/* Function checks if instruction INSN uses register number
15540 REGNO as a part of address expression. */
15541
15542static bool
15543insn_uses_reg_mem (unsigned int regno, rtx insn)
15544{
15545 df_ref use;
15546
15547 FOR_EACH_INSN_USE (use, insn)
15548 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15549 return true;
15550
15551 return false;
15552}
15553
15554/* Search backward for non-agu definition of register number REGNO1
15555 or register number REGNO2 in basic block starting from instruction
15556 START up to head of basic block or instruction INSN.
15557
15558 Function puts true value into *FOUND var if definition was found
15559 and false otherwise.
15560
15561 Distance in half-cycles between START and found instruction or head
15562 of BB is added to DISTANCE and returned. */
15563
15564static int
15565distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15566 rtx_insn *insn, int distance,
15567 rtx_insn *start, bool *found)
15568{
15569 basic_block bb = start ? BLOCK_FOR_INSN (insn: start) : NULL;
15570 rtx_insn *prev = start;
15571 rtx_insn *next = NULL;
15572
15573 *found = false;
15574
15575 while (prev
15576 && prev != insn
15577 && distance < LEA_SEARCH_THRESHOLD)
15578 {
15579 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15580 {
15581 distance = increase_distance (prev, next, distance);
15582 if (insn_defines_reg (regno1, regno2, insn: prev))
15583 {
15584 if (recog_memoized (insn: prev) < 0
15585 || get_attr_type (prev) != TYPE_LEA)
15586 {
15587 *found = true;
15588 return distance;
15589 }
15590 }
15591
15592 next = prev;
15593 }
15594 if (prev == BB_HEAD (bb))
15595 break;
15596
15597 prev = PREV_INSN (insn: prev);
15598 }
15599
15600 return distance;
15601}
15602
15603/* Search backward for non-agu definition of register number REGNO1
15604 or register number REGNO2 in INSN's basic block until
15605 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15606 2. Reach neighbor BBs boundary, or
15607 3. Reach agu definition.
15608 Returns the distance between the non-agu definition point and INSN.
15609 If no definition point, returns -1. */
15610
15611static int
15612distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15613 rtx_insn *insn)
15614{
15615 basic_block bb = BLOCK_FOR_INSN (insn);
15616 int distance = 0;
15617 bool found = false;
15618
15619 if (insn != BB_HEAD (bb))
15620 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15621 distance, start: PREV_INSN (insn),
15622 found: &found);
15623
15624 if (!found && distance < LEA_SEARCH_THRESHOLD)
15625 {
15626 edge e;
15627 edge_iterator ei;
15628 bool simple_loop = false;
15629
15630 FOR_EACH_EDGE (e, ei, bb->preds)
15631 if (e->src == bb)
15632 {
15633 simple_loop = true;
15634 break;
15635 }
15636
15637 if (simple_loop)
15638 distance = distance_non_agu_define_in_bb (regno1, regno2,
15639 insn, distance,
15640 BB_END (bb), found: &found);
15641 else
15642 {
15643 int shortest_dist = -1;
15644 bool found_in_bb = false;
15645
15646 FOR_EACH_EDGE (e, ei, bb->preds)
15647 {
15648 int bb_dist
15649 = distance_non_agu_define_in_bb (regno1, regno2,
15650 insn, distance,
15651 BB_END (e->src),
15652 found: &found_in_bb);
15653 if (found_in_bb)
15654 {
15655 if (shortest_dist < 0)
15656 shortest_dist = bb_dist;
15657 else if (bb_dist > 0)
15658 shortest_dist = MIN (bb_dist, shortest_dist);
15659
15660 found = true;
15661 }
15662 }
15663
15664 distance = shortest_dist;
15665 }
15666 }
15667
15668 if (!found)
15669 return -1;
15670
15671 return distance >> 1;
15672}
15673
15674/* Return the distance in half-cycles between INSN and the next
15675 insn that uses register number REGNO in memory address added
15676 to DISTANCE. Return -1 if REGNO0 is set.
15677
15678 Put true value into *FOUND if register usage was found and
15679 false otherwise.
15680 Put true value into *REDEFINED if register redefinition was
15681 found and false otherwise. */
15682
15683static int
15684distance_agu_use_in_bb (unsigned int regno,
15685 rtx_insn *insn, int distance, rtx_insn *start,
15686 bool *found, bool *redefined)
15687{
15688 basic_block bb = NULL;
15689 rtx_insn *next = start;
15690 rtx_insn *prev = NULL;
15691
15692 *found = false;
15693 *redefined = false;
15694
15695 if (start != NULL_RTX)
15696 {
15697 bb = BLOCK_FOR_INSN (insn: start);
15698 if (start != BB_HEAD (bb))
15699 /* If insn and start belong to the same bb, set prev to insn,
15700 so the call to increase_distance will increase the distance
15701 between insns by 1. */
15702 prev = insn;
15703 }
15704
15705 while (next
15706 && next != insn
15707 && distance < LEA_SEARCH_THRESHOLD)
15708 {
15709 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
15710 {
15711 distance = increase_distance(prev, next, distance);
15712 if (insn_uses_reg_mem (regno, insn: next))
15713 {
15714 /* Return DISTANCE if OP0 is used in memory
15715 address in NEXT. */
15716 *found = true;
15717 return distance;
15718 }
15719
15720 if (insn_defines_reg (regno1: regno, INVALID_REGNUM, insn: next))
15721 {
15722 /* Return -1 if OP0 is set in NEXT. */
15723 *redefined = true;
15724 return -1;
15725 }
15726
15727 prev = next;
15728 }
15729
15730 if (next == BB_END (bb))
15731 break;
15732
15733 next = NEXT_INSN (insn: next);
15734 }
15735
15736 return distance;
15737}
15738
15739/* Return the distance between INSN and the next insn that uses
15740 register number REGNO0 in memory address. Return -1 if no such
15741 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15742
15743static int
15744distance_agu_use (unsigned int regno0, rtx_insn *insn)
15745{
15746 basic_block bb = BLOCK_FOR_INSN (insn);
15747 int distance = 0;
15748 bool found = false;
15749 bool redefined = false;
15750
15751 if (insn != BB_END (bb))
15752 distance = distance_agu_use_in_bb (regno: regno0, insn, distance,
15753 start: NEXT_INSN (insn),
15754 found: &found, redefined: &redefined);
15755
15756 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
15757 {
15758 edge e;
15759 edge_iterator ei;
15760 bool simple_loop = false;
15761
15762 FOR_EACH_EDGE (e, ei, bb->succs)
15763 if (e->dest == bb)
15764 {
15765 simple_loop = true;
15766 break;
15767 }
15768
15769 if (simple_loop)
15770 distance = distance_agu_use_in_bb (regno: regno0, insn,
15771 distance, BB_HEAD (bb),
15772 found: &found, redefined: &redefined);
15773 else
15774 {
15775 int shortest_dist = -1;
15776 bool found_in_bb = false;
15777 bool redefined_in_bb = false;
15778
15779 FOR_EACH_EDGE (e, ei, bb->succs)
15780 {
15781 int bb_dist
15782 = distance_agu_use_in_bb (regno: regno0, insn,
15783 distance, BB_HEAD (e->dest),
15784 found: &found_in_bb, redefined: &redefined_in_bb);
15785 if (found_in_bb)
15786 {
15787 if (shortest_dist < 0)
15788 shortest_dist = bb_dist;
15789 else if (bb_dist > 0)
15790 shortest_dist = MIN (bb_dist, shortest_dist);
15791
15792 found = true;
15793 }
15794 }
15795
15796 distance = shortest_dist;
15797 }
15798 }
15799
15800 if (!found || redefined)
15801 return -1;
15802
15803 return distance >> 1;
15804}
15805
15806/* Define this macro to tune LEA priority vs ADD, it take effect when
15807 there is a dilemma of choosing LEA or ADD
15808 Negative value: ADD is more preferred than LEA
15809 Zero: Neutral
15810 Positive value: LEA is more preferred than ADD. */
15811#define IX86_LEA_PRIORITY 0
15812
15813/* Return true if usage of lea INSN has performance advantage
15814 over a sequence of instructions. Instructions sequence has
15815 SPLIT_COST cycles higher latency than lea latency. */
15816
15817static bool
15818ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
15819 unsigned int regno2, int split_cost, bool has_scale)
15820{
15821 int dist_define, dist_use;
15822
15823 /* For Atom processors newer than Bonnell, if using a 2-source or
15824 3-source LEA for non-destructive destination purposes, or due to
15825 wanting ability to use SCALE, the use of LEA is justified. */
15826 if (!TARGET_CPU_P (BONNELL))
15827 {
15828 if (has_scale)
15829 return true;
15830 if (split_cost < 1)
15831 return false;
15832 if (regno0 == regno1 || regno0 == regno2)
15833 return false;
15834 return true;
15835 }
15836
15837 /* Remember recog_data content. */
15838 struct recog_data_d recog_data_save = recog_data;
15839
15840 dist_define = distance_non_agu_define (regno1, regno2, insn);
15841 dist_use = distance_agu_use (regno0, insn);
15842
15843 /* distance_non_agu_define can call get_attr_type which can call
15844 recog_memoized, restore recog_data back to previous content. */
15845 recog_data = recog_data_save;
15846
15847 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
15848 {
15849 /* If there is no non AGU operand definition, no AGU
15850 operand usage and split cost is 0 then both lea
15851 and non lea variants have same priority. Currently
15852 we prefer lea for 64 bit code and non lea on 32 bit
15853 code. */
15854 if (dist_use < 0 && split_cost == 0)
15855 return TARGET_64BIT || IX86_LEA_PRIORITY;
15856 else
15857 return true;
15858 }
15859
15860 /* With longer definitions distance lea is more preferable.
15861 Here we change it to take into account splitting cost and
15862 lea priority. */
15863 dist_define += split_cost + IX86_LEA_PRIORITY;
15864
15865 /* If there is no use in memory addess then we just check
15866 that split cost exceeds AGU stall. */
15867 if (dist_use < 0)
15868 return dist_define > LEA_MAX_STALL;
15869
15870 /* If this insn has both backward non-agu dependence and forward
15871 agu dependence, the one with short distance takes effect. */
15872 return dist_define >= dist_use;
15873}
15874
15875/* Return true if we need to split op0 = op1 + op2 into a sequence of
15876 move and add to avoid AGU stalls. */
15877
15878bool
15879ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
15880{
15881 unsigned int regno0, regno1, regno2;
15882
15883 /* Check if we need to optimize. */
15884 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15885 return false;
15886
15887 regno0 = true_regnum (operands[0]);
15888 regno1 = true_regnum (operands[1]);
15889 regno2 = true_regnum (operands[2]);
15890
15891 /* We need to split only adds with non destructive
15892 destination operand. */
15893 if (regno0 == regno1 || regno0 == regno2)
15894 return false;
15895 else
15896 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 1, has_scale: false);
15897}
15898
15899/* Return true if we should emit lea instruction instead of mov
15900 instruction. */
15901
15902bool
15903ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
15904{
15905 unsigned int regno0, regno1;
15906
15907 /* Check if we need to optimize. */
15908 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15909 return false;
15910
15911 /* Use lea for reg to reg moves only. */
15912 if (!REG_P (operands[0]) || !REG_P (operands[1]))
15913 return false;
15914
15915 regno0 = true_regnum (operands[0]);
15916 regno1 = true_regnum (operands[1]);
15917
15918 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, split_cost: 0, has_scale: false);
15919}
15920
15921/* Return true if we need to split lea into a sequence of
15922 instructions to avoid AGU stalls during peephole2. */
15923
15924bool
15925ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
15926{
15927 unsigned int regno0, regno1, regno2;
15928 int split_cost;
15929 struct ix86_address parts;
15930 int ok;
15931
15932 /* The "at least two components" test below might not catch simple
15933 move or zero extension insns if parts.base is non-NULL and parts.disp
15934 is const0_rtx as the only components in the address, e.g. if the
15935 register is %rbp or %r13. As this test is much cheaper and moves or
15936 zero extensions are the common case, do this check first. */
15937 if (REG_P (operands[1])
15938 || (SImode_address_operand (operands[1], VOIDmode)
15939 && REG_P (XEXP (operands[1], 0))))
15940 return false;
15941
15942 ok = ix86_decompose_address (addr: operands[1], out: &parts);
15943 gcc_assert (ok);
15944
15945 /* There should be at least two components in the address. */
15946 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
15947 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
15948 return false;
15949
15950 /* We should not split into add if non legitimate pic
15951 operand is used as displacement. */
15952 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
15953 return false;
15954
15955 regno0 = true_regnum (operands[0]) ;
15956 regno1 = INVALID_REGNUM;
15957 regno2 = INVALID_REGNUM;
15958
15959 if (parts.base)
15960 regno1 = true_regnum (parts.base);
15961 if (parts.index)
15962 regno2 = true_regnum (parts.index);
15963
15964 /* Use add for a = a + b and a = b + a since it is faster and shorter
15965 than lea for most processors. For the processors like BONNELL, if
15966 the destination register of LEA holds an actual address which will
15967 be used soon, LEA is better and otherwise ADD is better. */
15968 if (!TARGET_CPU_P (BONNELL)
15969 && parts.scale == 1
15970 && (!parts.disp || parts.disp == const0_rtx)
15971 && (regno0 == regno1 || regno0 == regno2))
15972 return true;
15973
15974 /* Split with -Oz if the encoding requires fewer bytes. */
15975 if (optimize_size > 1
15976 && parts.scale > 1
15977 && !parts.base
15978 && (!parts.disp || parts.disp == const0_rtx))
15979 return true;
15980
15981 /* Check we need to optimize. */
15982 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
15983 return false;
15984
15985 split_cost = 0;
15986
15987 /* Compute how many cycles we will add to execution time
15988 if split lea into a sequence of instructions. */
15989 if (parts.base || parts.index)
15990 {
15991 /* Have to use mov instruction if non desctructive
15992 destination form is used. */
15993 if (regno1 != regno0 && regno2 != regno0)
15994 split_cost += 1;
15995
15996 /* Have to add index to base if both exist. */
15997 if (parts.base && parts.index)
15998 split_cost += 1;
15999
16000 /* Have to use shift and adds if scale is 2 or greater. */
16001 if (parts.scale > 1)
16002 {
16003 if (regno0 != regno1)
16004 split_cost += 1;
16005 else if (regno2 == regno0)
16006 split_cost += 4;
16007 else
16008 split_cost += parts.scale;
16009 }
16010
16011 /* Have to use add instruction with immediate if
16012 disp is non zero. */
16013 if (parts.disp && parts.disp != const0_rtx)
16014 split_cost += 1;
16015
16016 /* Subtract the price of lea. */
16017 split_cost -= 1;
16018 }
16019
16020 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16021 has_scale: parts.scale > 1);
16022}
16023
16024/* Return true if it is ok to optimize an ADD operation to LEA
16025 operation to avoid flag register consumation. For most processors,
16026 ADD is faster than LEA. For the processors like BONNELL, if the
16027 destination register of LEA holds an actual address which will be
16028 used soon, LEA is better and otherwise ADD is better. */
16029
16030bool
16031ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16032{
16033 unsigned int regno0 = true_regnum (operands[0]);
16034 unsigned int regno1 = true_regnum (operands[1]);
16035 unsigned int regno2 = true_regnum (operands[2]);
16036
16037 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16038 if (regno0 != regno1 && regno0 != regno2)
16039 return true;
16040
16041 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16042 return false;
16043
16044 return ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost: 0, has_scale: false);
16045}
16046
16047/* Return true if destination reg of SET_BODY is shift count of
16048 USE_BODY. */
16049
16050static bool
16051ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16052{
16053 rtx set_dest;
16054 rtx shift_rtx;
16055 int i;
16056
16057 /* Retrieve destination of SET_BODY. */
16058 switch (GET_CODE (set_body))
16059 {
16060 case SET:
16061 set_dest = SET_DEST (set_body);
16062 if (!set_dest || !REG_P (set_dest))
16063 return false;
16064 break;
16065 case PARALLEL:
16066 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16067 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16068 use_body))
16069 return true;
16070 /* FALLTHROUGH */
16071 default:
16072 return false;
16073 }
16074
16075 /* Retrieve shift count of USE_BODY. */
16076 switch (GET_CODE (use_body))
16077 {
16078 case SET:
16079 shift_rtx = XEXP (use_body, 1);
16080 break;
16081 case PARALLEL:
16082 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16083 if (ix86_dep_by_shift_count_body (set_body,
16084 XVECEXP (use_body, 0, i)))
16085 return true;
16086 /* FALLTHROUGH */
16087 default:
16088 return false;
16089 }
16090
16091 if (shift_rtx
16092 && (GET_CODE (shift_rtx) == ASHIFT
16093 || GET_CODE (shift_rtx) == LSHIFTRT
16094 || GET_CODE (shift_rtx) == ASHIFTRT
16095 || GET_CODE (shift_rtx) == ROTATE
16096 || GET_CODE (shift_rtx) == ROTATERT))
16097 {
16098 rtx shift_count = XEXP (shift_rtx, 1);
16099
16100 /* Return true if shift count is dest of SET_BODY. */
16101 if (REG_P (shift_count))
16102 {
16103 /* Add check since it can be invoked before register
16104 allocation in pre-reload schedule. */
16105 if (reload_completed
16106 && true_regnum (set_dest) == true_regnum (shift_count))
16107 return true;
16108 else if (REGNO(set_dest) == REGNO(shift_count))
16109 return true;
16110 }
16111 }
16112
16113 return false;
16114}
16115
16116/* Return true if destination reg of SET_INSN is shift count of
16117 USE_INSN. */
16118
16119bool
16120ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16121{
16122 return ix86_dep_by_shift_count_body (set_body: PATTERN (insn: set_insn),
16123 use_body: PATTERN (insn: use_insn));
16124}
16125
16126/* Return TRUE or FALSE depending on whether the unary operator meets the
16127 appropriate constraints. */
16128
16129bool
16130ix86_unary_operator_ok (enum rtx_code,
16131 machine_mode,
16132 rtx operands[2])
16133{
16134 /* If one of operands is memory, source and destination must match. */
16135 if ((MEM_P (operands[0])
16136 || MEM_P (operands[1]))
16137 && ! rtx_equal_p (operands[0], operands[1]))
16138 return false;
16139 return true;
16140}
16141
16142/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16143 are ok, keeping in mind the possible movddup alternative. */
16144
16145bool
16146ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16147{
16148 if (MEM_P (operands[0]))
16149 return rtx_equal_p (operands[0], operands[1 + high]);
16150 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16151 return false;
16152 return true;
16153}
16154
16155/* A subroutine of ix86_build_signbit_mask. If VECT is true,
16156 then replicate the value for all elements of the vector
16157 register. */
16158
16159rtx
16160ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16161{
16162 int i, n_elt;
16163 rtvec v;
16164 machine_mode scalar_mode;
16165
16166 switch (mode)
16167 {
16168 case E_V64QImode:
16169 case E_V32QImode:
16170 case E_V16QImode:
16171 case E_V32HImode:
16172 case E_V16HImode:
16173 case E_V8HImode:
16174 case E_V16SImode:
16175 case E_V8SImode:
16176 case E_V4SImode:
16177 case E_V2SImode:
16178 case E_V8DImode:
16179 case E_V4DImode:
16180 case E_V2DImode:
16181 gcc_assert (vect);
16182 /* FALLTHRU */
16183 case E_V2HFmode:
16184 case E_V4HFmode:
16185 case E_V8HFmode:
16186 case E_V16HFmode:
16187 case E_V32HFmode:
16188 case E_V16SFmode:
16189 case E_V8SFmode:
16190 case E_V4SFmode:
16191 case E_V2SFmode:
16192 case E_V8DFmode:
16193 case E_V4DFmode:
16194 case E_V2DFmode:
16195 n_elt = GET_MODE_NUNITS (mode);
16196 v = rtvec_alloc (n_elt);
16197 scalar_mode = GET_MODE_INNER (mode);
16198
16199 RTVEC_ELT (v, 0) = value;
16200
16201 for (i = 1; i < n_elt; ++i)
16202 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16203
16204 return gen_rtx_CONST_VECTOR (mode, v);
16205
16206 default:
16207 gcc_unreachable ();
16208 }
16209}
16210
16211/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16212 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16213 for an SSE register. If VECT is true, then replicate the mask for
16214 all elements of the vector register. If INVERT is true, then create
16215 a mask excluding the sign bit. */
16216
16217rtx
16218ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16219{
16220 machine_mode vec_mode, imode;
16221 wide_int w;
16222 rtx mask, v;
16223
16224 switch (mode)
16225 {
16226 case E_V2HFmode:
16227 case E_V4HFmode:
16228 case E_V8HFmode:
16229 case E_V16HFmode:
16230 case E_V32HFmode:
16231 vec_mode = mode;
16232 imode = HImode;
16233 break;
16234
16235 case E_V16SImode:
16236 case E_V16SFmode:
16237 case E_V8SImode:
16238 case E_V4SImode:
16239 case E_V8SFmode:
16240 case E_V4SFmode:
16241 case E_V2SFmode:
16242 case E_V2SImode:
16243 vec_mode = mode;
16244 imode = SImode;
16245 break;
16246
16247 case E_V8DImode:
16248 case E_V4DImode:
16249 case E_V2DImode:
16250 case E_V8DFmode:
16251 case E_V4DFmode:
16252 case E_V2DFmode:
16253 vec_mode = mode;
16254 imode = DImode;
16255 break;
16256
16257 case E_TImode:
16258 case E_TFmode:
16259 vec_mode = VOIDmode;
16260 imode = TImode;
16261 break;
16262
16263 default:
16264 gcc_unreachable ();
16265 }
16266
16267 machine_mode inner_mode = GET_MODE_INNER (mode);
16268 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16269 GET_MODE_BITSIZE (inner_mode));
16270 if (invert)
16271 w = wi::bit_not (x: w);
16272
16273 /* Force this value into the low part of a fp vector constant. */
16274 mask = immed_wide_int_const (w, imode);
16275 mask = gen_lowpart (inner_mode, mask);
16276
16277 if (vec_mode == VOIDmode)
16278 return force_reg (inner_mode, mask);
16279
16280 v = ix86_build_const_vector (mode: vec_mode, vect, value: mask);
16281 return force_reg (vec_mode, v);
16282}
16283
16284/* Return HOST_WIDE_INT for const vector OP in MODE. */
16285
16286HOST_WIDE_INT
16287ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16288{
16289 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16290 gcc_unreachable ();
16291
16292 int nunits = GET_MODE_NUNITS (mode);
16293 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16294 machine_mode innermode = GET_MODE_INNER (mode);
16295 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16296
16297 switch (mode)
16298 {
16299 case E_V2QImode:
16300 case E_V4QImode:
16301 case E_V2HImode:
16302 case E_V8QImode:
16303 case E_V4HImode:
16304 case E_V2SImode:
16305 for (int i = 0; i < nunits; ++i)
16306 {
16307 int v = INTVAL (XVECEXP (op, 0, i));
16308 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16309 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16310 }
16311 break;
16312 case E_V2HFmode:
16313 case E_V2BFmode:
16314 case E_V4HFmode:
16315 case E_V4BFmode:
16316 case E_V2SFmode:
16317 for (int i = 0; i < nunits; ++i)
16318 {
16319 rtx x = XVECEXP (op, 0, i);
16320 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16321 REAL_MODE_FORMAT (innermode));
16322 wide_int wv = wi::shwi (val: v, precision: innermode_bits);
16323 val = wi::insert (x: val, y: wv, innermode_bits * i, innermode_bits);
16324 }
16325 break;
16326 default:
16327 gcc_unreachable ();
16328 }
16329
16330 return val.to_shwi ();
16331}
16332
16333/* Return TRUE or FALSE depending on whether the first SET in INSN
16334 has source and destination with matching CC modes, and that the
16335 CC mode is at least as constrained as REQ_MODE. */
16336
16337bool
16338ix86_match_ccmode (rtx insn, machine_mode req_mode)
16339{
16340 rtx set;
16341 machine_mode set_mode;
16342
16343 set = PATTERN (insn);
16344 if (GET_CODE (set) == PARALLEL)
16345 set = XVECEXP (set, 0, 0);
16346 gcc_assert (GET_CODE (set) == SET);
16347 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16348
16349 set_mode = GET_MODE (SET_DEST (set));
16350 switch (set_mode)
16351 {
16352 case E_CCNOmode:
16353 if (req_mode != CCNOmode
16354 && (req_mode != CCmode
16355 || XEXP (SET_SRC (set), 1) != const0_rtx))
16356 return false;
16357 break;
16358 case E_CCmode:
16359 if (req_mode == CCGCmode)
16360 return false;
16361 /* FALLTHRU */
16362 case E_CCGCmode:
16363 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16364 return false;
16365 /* FALLTHRU */
16366 case E_CCGOCmode:
16367 if (req_mode == CCZmode)
16368 return false;
16369 /* FALLTHRU */
16370 case E_CCZmode:
16371 break;
16372
16373 case E_CCGZmode:
16374
16375 case E_CCAmode:
16376 case E_CCCmode:
16377 case E_CCOmode:
16378 case E_CCPmode:
16379 case E_CCSmode:
16380 if (set_mode != req_mode)
16381 return false;
16382 break;
16383
16384 default:
16385 gcc_unreachable ();
16386 }
16387
16388 return GET_MODE (SET_SRC (set)) == set_mode;
16389}
16390
16391machine_mode
16392ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16393{
16394 machine_mode mode = GET_MODE (op0);
16395
16396 if (SCALAR_FLOAT_MODE_P (mode))
16397 {
16398 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16399 return CCFPmode;
16400 }
16401
16402 switch (code)
16403 {
16404 /* Only zero flag is needed. */
16405 case EQ: /* ZF=0 */
16406 case NE: /* ZF!=0 */
16407 return CCZmode;
16408 /* Codes needing carry flag. */
16409 case GEU: /* CF=0 */
16410 case LTU: /* CF=1 */
16411 rtx geu;
16412 /* Detect overflow checks. They need just the carry flag. */
16413 if (GET_CODE (op0) == PLUS
16414 && (rtx_equal_p (op1, XEXP (op0, 0))
16415 || rtx_equal_p (op1, XEXP (op0, 1))))
16416 return CCCmode;
16417 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16418 Match LTU of op0
16419 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16420 and op1
16421 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16422 where CC_CCC is either CC or CCC. */
16423 else if (code == LTU
16424 && GET_CODE (op0) == NEG
16425 && GET_CODE (geu = XEXP (op0, 0)) == GEU
16426 && REG_P (XEXP (geu, 0))
16427 && (GET_MODE (XEXP (geu, 0)) == CCCmode
16428 || GET_MODE (XEXP (geu, 0)) == CCmode)
16429 && REGNO (XEXP (geu, 0)) == FLAGS_REG
16430 && XEXP (geu, 1) == const0_rtx
16431 && GET_CODE (op1) == LTU
16432 && REG_P (XEXP (op1, 0))
16433 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
16434 && REGNO (XEXP (op1, 0)) == FLAGS_REG
16435 && XEXP (op1, 1) == const0_rtx)
16436 return CCCmode;
16437 /* Similarly for *x86_cmc pattern.
16438 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16439 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16440 It is sufficient to test that the operand modes are CCCmode. */
16441 else if (code == LTU
16442 && GET_CODE (op0) == NEG
16443 && GET_CODE (XEXP (op0, 0)) == LTU
16444 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16445 && GET_CODE (op1) == GEU
16446 && GET_MODE (XEXP (op1, 0)) == CCCmode)
16447 return CCCmode;
16448 else
16449 return CCmode;
16450 case GTU: /* CF=0 & ZF=0 */
16451 case LEU: /* CF=1 | ZF=1 */
16452 return CCmode;
16453 /* Codes possibly doable only with sign flag when
16454 comparing against zero. */
16455 case GE: /* SF=OF or SF=0 */
16456 case LT: /* SF<>OF or SF=1 */
16457 if (op1 == const0_rtx)
16458 return CCGOCmode;
16459 else
16460 /* For other cases Carry flag is not required. */
16461 return CCGCmode;
16462 /* Codes doable only with sign flag when comparing
16463 against zero, but we miss jump instruction for it
16464 so we need to use relational tests against overflow
16465 that thus needs to be zero. */
16466 case GT: /* ZF=0 & SF=OF */
16467 case LE: /* ZF=1 | SF<>OF */
16468 if (op1 == const0_rtx)
16469 return CCNOmode;
16470 else
16471 return CCGCmode;
16472 /* strcmp pattern do (use flags) and combine may ask us for proper
16473 mode. */
16474 case USE:
16475 return CCmode;
16476 default:
16477 gcc_unreachable ();
16478 }
16479}
16480
16481/* Return TRUE or FALSE depending on whether the ptest instruction
16482 INSN has source and destination with suitable matching CC modes. */
16483
16484bool
16485ix86_match_ptest_ccmode (rtx insn)
16486{
16487 rtx set, src;
16488 machine_mode set_mode;
16489
16490 set = PATTERN (insn);
16491 gcc_assert (GET_CODE (set) == SET);
16492 src = SET_SRC (set);
16493 gcc_assert (GET_CODE (src) == UNSPEC
16494 && XINT (src, 1) == UNSPEC_PTEST);
16495
16496 set_mode = GET_MODE (src);
16497 if (set_mode != CCZmode
16498 && set_mode != CCCmode
16499 && set_mode != CCmode)
16500 return false;
16501 return GET_MODE (SET_DEST (set)) == set_mode;
16502}
16503
16504/* Return the fixed registers used for condition codes. */
16505
16506static bool
16507ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16508{
16509 *p1 = FLAGS_REG;
16510 *p2 = INVALID_REGNUM;
16511 return true;
16512}
16513
16514/* If two condition code modes are compatible, return a condition code
16515 mode which is compatible with both. Otherwise, return
16516 VOIDmode. */
16517
16518static machine_mode
16519ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
16520{
16521 if (m1 == m2)
16522 return m1;
16523
16524 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16525 return VOIDmode;
16526
16527 if ((m1 == CCGCmode && m2 == CCGOCmode)
16528 || (m1 == CCGOCmode && m2 == CCGCmode))
16529 return CCGCmode;
16530
16531 if ((m1 == CCNOmode && m2 == CCGOCmode)
16532 || (m1 == CCGOCmode && m2 == CCNOmode))
16533 return CCNOmode;
16534
16535 if (m1 == CCZmode
16536 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
16537 return m2;
16538 else if (m2 == CCZmode
16539 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
16540 return m1;
16541
16542 switch (m1)
16543 {
16544 default:
16545 gcc_unreachable ();
16546
16547 case E_CCmode:
16548 case E_CCGCmode:
16549 case E_CCGOCmode:
16550 case E_CCNOmode:
16551 case E_CCAmode:
16552 case E_CCCmode:
16553 case E_CCOmode:
16554 case E_CCPmode:
16555 case E_CCSmode:
16556 case E_CCZmode:
16557 switch (m2)
16558 {
16559 default:
16560 return VOIDmode;
16561
16562 case E_CCmode:
16563 case E_CCGCmode:
16564 case E_CCGOCmode:
16565 case E_CCNOmode:
16566 case E_CCAmode:
16567 case E_CCCmode:
16568 case E_CCOmode:
16569 case E_CCPmode:
16570 case E_CCSmode:
16571 case E_CCZmode:
16572 return CCmode;
16573 }
16574
16575 case E_CCFPmode:
16576 /* These are only compatible with themselves, which we already
16577 checked above. */
16578 return VOIDmode;
16579 }
16580}
16581
16582/* Return strategy to use for floating-point. We assume that fcomi is always
16583 preferrable where available, since that is also true when looking at size
16584 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16585
16586enum ix86_fpcmp_strategy
16587ix86_fp_comparison_strategy (enum rtx_code)
16588{
16589 /* Do fcomi/sahf based test when profitable. */
16590
16591 if (TARGET_CMOVE)
16592 return IX86_FPCMP_COMI;
16593
16594 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
16595 return IX86_FPCMP_SAHF;
16596
16597 return IX86_FPCMP_ARITH;
16598}
16599
16600/* Convert comparison codes we use to represent FP comparison to integer
16601 code that will result in proper branch. Return UNKNOWN if no such code
16602 is available. */
16603
16604enum rtx_code
16605ix86_fp_compare_code_to_integer (enum rtx_code code)
16606{
16607 switch (code)
16608 {
16609 case GT:
16610 return GTU;
16611 case GE:
16612 return GEU;
16613 case ORDERED:
16614 case UNORDERED:
16615 return code;
16616 case UNEQ:
16617 return EQ;
16618 case UNLT:
16619 return LTU;
16620 case UNLE:
16621 return LEU;
16622 case LTGT:
16623 return NE;
16624 default:
16625 return UNKNOWN;
16626 }
16627}
16628
16629/* Zero extend possibly SImode EXP to Pmode register. */
16630rtx
16631ix86_zero_extend_to_Pmode (rtx exp)
16632{
16633 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16634}
16635
16636/* Return true if the function is called via PLT. */
16637
16638bool
16639ix86_call_use_plt_p (rtx call_op)
16640{
16641 if (SYMBOL_REF_LOCAL_P (call_op))
16642 {
16643 if (SYMBOL_REF_DECL (call_op)
16644 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16645 {
16646 /* NB: All ifunc functions must be called via PLT. */
16647 cgraph_node *node
16648 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16649 if (node && node->ifunc_resolver)
16650 return true;
16651 }
16652 return false;
16653 }
16654 return true;
16655}
16656
16657/* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16658 the PLT entry will be used as the function address for local IFUNC
16659 functions. When the PIC register is needed for PLT call, indirect
16660 call via the PLT entry will fail since the PIC register may not be
16661 set up properly for indirect call. In this case, we should return
16662 false. */
16663
16664static bool
16665ix86_ifunc_ref_local_ok (void)
16666{
16667 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
16668}
16669
16670/* Return true if the function being called was marked with attribute
16671 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16672 to handle the non-PIC case in the backend because there is no easy
16673 interface for the front-end to force non-PLT calls to use the GOT.
16674 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16675 to call the function marked "noplt" indirectly. */
16676
16677static bool
16678ix86_nopic_noplt_attribute_p (rtx call_op)
16679{
16680 if (flag_pic || ix86_cmodel == CM_LARGE
16681 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
16682 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
16683 || SYMBOL_REF_LOCAL_P (call_op))
16684 return false;
16685
16686 tree symbol_decl = SYMBOL_REF_DECL (call_op);
16687
16688 if (!flag_plt
16689 || (symbol_decl != NULL_TREE
16690 && lookup_attribute (attr_name: "noplt", DECL_ATTRIBUTES (symbol_decl))))
16691 return true;
16692
16693 return false;
16694}
16695
16696/* Helper to output the jmp/call. */
16697static void
16698ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
16699{
16700 if (thunk_name != NULL)
16701 {
16702 if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
16703 && ix86_indirect_branch_cs_prefix)
16704 fprintf (stream: asm_out_file, format: "\tcs\n");
16705 fprintf (stream: asm_out_file, format: "\tjmp\t");
16706 assemble_name (asm_out_file, thunk_name);
16707 putc (c: '\n', stream: asm_out_file);
16708 if ((ix86_harden_sls & harden_sls_indirect_jmp))
16709 fputs (s: "\tint3\n", stream: asm_out_file);
16710 }
16711 else
16712 output_indirect_thunk (regno);
16713}
16714
16715/* Output indirect branch via a call and return thunk. CALL_OP is a
16716 register which contains the branch target. XASM is the assembly
16717 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16718 A normal call is converted to:
16719
16720 call __x86_indirect_thunk_reg
16721
16722 and a tail call is converted to:
16723
16724 jmp __x86_indirect_thunk_reg
16725 */
16726
16727static void
16728ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
16729{
16730 char thunk_name_buf[32];
16731 char *thunk_name;
16732 enum indirect_thunk_prefix need_prefix
16733 = indirect_thunk_need_prefix (insn: current_output_insn);
16734 int regno = REGNO (call_op);
16735
16736 if (cfun->machine->indirect_branch_type
16737 != indirect_branch_thunk_inline)
16738 {
16739 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16740 SET_HARD_REG_BIT (set&: indirect_thunks_used, bit: regno);
16741
16742 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
16743 thunk_name = thunk_name_buf;
16744 }
16745 else
16746 thunk_name = NULL;
16747
16748 if (sibcall_p)
16749 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16750 else
16751 {
16752 if (thunk_name != NULL)
16753 {
16754 if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
16755 && ix86_indirect_branch_cs_prefix)
16756 fprintf (stream: asm_out_file, format: "\tcs\n");
16757 fprintf (stream: asm_out_file, format: "\tcall\t");
16758 assemble_name (asm_out_file, thunk_name);
16759 putc (c: '\n', stream: asm_out_file);
16760 return;
16761 }
16762
16763 char indirectlabel1[32];
16764 char indirectlabel2[32];
16765
16766 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16767 INDIRECT_LABEL,
16768 indirectlabelno++);
16769 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16770 INDIRECT_LABEL,
16771 indirectlabelno++);
16772
16773 /* Jump. */
16774 fputs (s: "\tjmp\t", stream: asm_out_file);
16775 assemble_name_raw (asm_out_file, indirectlabel2);
16776 fputc (c: '\n', stream: asm_out_file);
16777
16778 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16779
16780 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16781
16782 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16783
16784 /* Call. */
16785 fputs (s: "\tcall\t", stream: asm_out_file);
16786 assemble_name_raw (asm_out_file, indirectlabel1);
16787 fputc (c: '\n', stream: asm_out_file);
16788 }
16789}
16790
16791/* Output indirect branch via a call and return thunk. CALL_OP is
16792 the branch target. XASM is the assembly template for CALL_OP.
16793 Branch is a tail call if SIBCALL_P is true. A normal call is
16794 converted to:
16795
16796 jmp L2
16797 L1:
16798 push CALL_OP
16799 jmp __x86_indirect_thunk
16800 L2:
16801 call L1
16802
16803 and a tail call is converted to:
16804
16805 push CALL_OP
16806 jmp __x86_indirect_thunk
16807 */
16808
16809static void
16810ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
16811 bool sibcall_p)
16812{
16813 char thunk_name_buf[32];
16814 char *thunk_name;
16815 char push_buf[64];
16816 enum indirect_thunk_prefix need_prefix
16817 = indirect_thunk_need_prefix (insn: current_output_insn);
16818 int regno = -1;
16819
16820 if (cfun->machine->indirect_branch_type
16821 != indirect_branch_thunk_inline)
16822 {
16823 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16824 indirect_thunk_needed = true;
16825 indirect_thunk_name (name: thunk_name_buf, regno, need_prefix, ret_p: false);
16826 thunk_name = thunk_name_buf;
16827 }
16828 else
16829 thunk_name = NULL;
16830
16831 snprintf (s: push_buf, maxlen: sizeof (push_buf), format: "push{%c}\t%s",
16832 TARGET_64BIT ? 'q' : 'l', xasm);
16833
16834 if (sibcall_p)
16835 {
16836 output_asm_insn (push_buf, &call_op);
16837 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16838 }
16839 else
16840 {
16841 char indirectlabel1[32];
16842 char indirectlabel2[32];
16843
16844 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16845 INDIRECT_LABEL,
16846 indirectlabelno++);
16847 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16848 INDIRECT_LABEL,
16849 indirectlabelno++);
16850
16851 /* Jump. */
16852 fputs (s: "\tjmp\t", stream: asm_out_file);
16853 assemble_name_raw (asm_out_file, indirectlabel2);
16854 fputc (c: '\n', stream: asm_out_file);
16855
16856 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16857
16858 /* An external function may be called via GOT, instead of PLT. */
16859 if (MEM_P (call_op))
16860 {
16861 struct ix86_address parts;
16862 rtx addr = XEXP (call_op, 0);
16863 if (ix86_decompose_address (addr, out: &parts)
16864 && parts.base == stack_pointer_rtx)
16865 {
16866 /* Since call will adjust stack by -UNITS_PER_WORD,
16867 we must convert "disp(stack, index, scale)" to
16868 "disp+UNITS_PER_WORD(stack, index, scale)". */
16869 if (parts.index)
16870 {
16871 addr = gen_rtx_MULT (Pmode, parts.index,
16872 GEN_INT (parts.scale));
16873 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16874 addr);
16875 }
16876 else
16877 addr = stack_pointer_rtx;
16878
16879 rtx disp;
16880 if (parts.disp != NULL_RTX)
16881 disp = plus_constant (Pmode, parts.disp,
16882 UNITS_PER_WORD);
16883 else
16884 disp = GEN_INT (UNITS_PER_WORD);
16885
16886 addr = gen_rtx_PLUS (Pmode, addr, disp);
16887 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
16888 }
16889 }
16890
16891 output_asm_insn (push_buf, &call_op);
16892
16893 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16894
16895 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16896
16897 /* Call. */
16898 fputs (s: "\tcall\t", stream: asm_out_file);
16899 assemble_name_raw (asm_out_file, indirectlabel1);
16900 fputc (c: '\n', stream: asm_out_file);
16901 }
16902}
16903
16904/* Output indirect branch via a call and return thunk. CALL_OP is
16905 the branch target. XASM is the assembly template for CALL_OP.
16906 Branch is a tail call if SIBCALL_P is true. */
16907
16908static void
16909ix86_output_indirect_branch (rtx call_op, const char *xasm,
16910 bool sibcall_p)
16911{
16912 if (REG_P (call_op))
16913 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
16914 else
16915 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
16916}
16917
16918/* Output indirect jump. CALL_OP is the jump target. */
16919
16920const char *
16921ix86_output_indirect_jmp (rtx call_op)
16922{
16923 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
16924 {
16925 /* We can't have red-zone since "call" in the indirect thunk
16926 pushes the return address onto stack, destroying red-zone. */
16927 if (ix86_red_zone_used)
16928 gcc_unreachable ();
16929
16930 ix86_output_indirect_branch (call_op, xasm: "%0", sibcall_p: true);
16931 }
16932 else
16933 output_asm_insn ("%!jmp\t%A0", &call_op);
16934 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
16935}
16936
16937/* Output return instrumentation for current function if needed. */
16938
16939static void
16940output_return_instrumentation (void)
16941{
16942 if (ix86_instrument_return != instrument_return_none
16943 && flag_fentry
16944 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
16945 {
16946 if (ix86_flag_record_return)
16947 fprintf (stream: asm_out_file, format: "1:\n");
16948 switch (ix86_instrument_return)
16949 {
16950 case instrument_return_call:
16951 fprintf (stream: asm_out_file, format: "\tcall\t__return__\n");
16952 break;
16953 case instrument_return_nop5:
16954 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
16955 fprintf (stream: asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
16956 break;
16957 case instrument_return_none:
16958 break;
16959 }
16960
16961 if (ix86_flag_record_return)
16962 {
16963 fprintf (stream: asm_out_file, format: "\t.section __return_loc, \"a\",@progbits\n");
16964 fprintf (stream: asm_out_file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
16965 fprintf (stream: asm_out_file, format: "\t.previous\n");
16966 }
16967 }
16968}
16969
16970/* Output function return. CALL_OP is the jump target. Add a REP
16971 prefix to RET if LONG_P is true and function return is kept. */
16972
16973const char *
16974ix86_output_function_return (bool long_p)
16975{
16976 output_return_instrumentation ();
16977
16978 if (cfun->machine->function_return_type != indirect_branch_keep)
16979 {
16980 char thunk_name[32];
16981 enum indirect_thunk_prefix need_prefix
16982 = indirect_thunk_need_prefix (insn: current_output_insn);
16983
16984 if (cfun->machine->function_return_type
16985 != indirect_branch_thunk_inline)
16986 {
16987 bool need_thunk = (cfun->machine->function_return_type
16988 == indirect_branch_thunk);
16989 indirect_thunk_name (name: thunk_name, INVALID_REGNUM, need_prefix,
16990 ret_p: true);
16991 indirect_return_needed |= need_thunk;
16992 fprintf (stream: asm_out_file, format: "\tjmp\t");
16993 assemble_name (asm_out_file, thunk_name);
16994 putc (c: '\n', stream: asm_out_file);
16995 }
16996 else
16997 output_indirect_thunk (INVALID_REGNUM);
16998
16999 return "";
17000 }
17001
17002 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17003 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17004}
17005
17006/* Output indirect function return. RET_OP is the function return
17007 target. */
17008
17009const char *
17010ix86_output_indirect_function_return (rtx ret_op)
17011{
17012 if (cfun->machine->function_return_type != indirect_branch_keep)
17013 {
17014 char thunk_name[32];
17015 enum indirect_thunk_prefix need_prefix
17016 = indirect_thunk_need_prefix (insn: current_output_insn);
17017 unsigned int regno = REGNO (ret_op);
17018 gcc_assert (regno == CX_REG);
17019
17020 if (cfun->machine->function_return_type
17021 != indirect_branch_thunk_inline)
17022 {
17023 bool need_thunk = (cfun->machine->function_return_type
17024 == indirect_branch_thunk);
17025 indirect_thunk_name (name: thunk_name, regno, need_prefix, ret_p: true);
17026
17027 if (need_thunk)
17028 {
17029 indirect_return_via_cx = true;
17030 SET_HARD_REG_BIT (set&: indirect_thunks_used, CX_REG);
17031 }
17032 fprintf (stream: asm_out_file, format: "\tjmp\t");
17033 assemble_name (asm_out_file, thunk_name);
17034 putc (c: '\n', stream: asm_out_file);
17035 }
17036 else
17037 output_indirect_thunk (regno);
17038 }
17039 else
17040 {
17041 output_asm_insn ("%!jmp\t%A0", &ret_op);
17042 if (ix86_harden_sls & harden_sls_indirect_jmp)
17043 fputs (s: "\tint3\n", stream: asm_out_file);
17044 }
17045 return "";
17046}
17047
17048/* Output the assembly for a call instruction. */
17049
17050const char *
17051ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17052{
17053 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17054 bool output_indirect_p
17055 = (!TARGET_SEH
17056 && cfun->machine->indirect_branch_type != indirect_branch_keep);
17057 bool seh_nop_p = false;
17058 const char *xasm;
17059
17060 if (SIBLING_CALL_P (insn))
17061 {
17062 output_return_instrumentation ();
17063 if (direct_p)
17064 {
17065 if (ix86_nopic_noplt_attribute_p (call_op))
17066 {
17067 direct_p = false;
17068 if (TARGET_64BIT)
17069 {
17070 if (output_indirect_p)
17071 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17072 else
17073 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17074 }
17075 else
17076 {
17077 if (output_indirect_p)
17078 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17079 else
17080 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17081 }
17082 }
17083 else
17084 xasm = "%!jmp\t%P0";
17085 }
17086 /* SEH epilogue detection requires the indirect branch case
17087 to include REX.W. */
17088 else if (TARGET_SEH)
17089 xasm = "%!rex.W jmp\t%A0";
17090 else
17091 {
17092 if (output_indirect_p)
17093 xasm = "%0";
17094 else
17095 xasm = "%!jmp\t%A0";
17096 }
17097
17098 if (output_indirect_p && !direct_p)
17099 ix86_output_indirect_branch (call_op, xasm, sibcall_p: true);
17100 else
17101 {
17102 output_asm_insn (xasm, &call_op);
17103 if (!direct_p
17104 && (ix86_harden_sls & harden_sls_indirect_jmp))
17105 return "int3";
17106 }
17107 return "";
17108 }
17109
17110 /* SEH unwinding can require an extra nop to be emitted in several
17111 circumstances. Determine if we have one of those. */
17112 if (TARGET_SEH)
17113 {
17114 rtx_insn *i;
17115
17116 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (insn: i))
17117 {
17118 /* Prevent a catch region from being adjacent to a jump that would
17119 be interpreted as an epilogue sequence by the unwinder. */
17120 if (JUMP_P(i) && CROSSING_JUMP_P (i))
17121 {
17122 seh_nop_p = true;
17123 break;
17124 }
17125
17126 /* If we get to another real insn, we don't need the nop. */
17127 if (INSN_P (i))
17128 break;
17129
17130 /* If we get to the epilogue note, prevent a catch region from
17131 being adjacent to the standard epilogue sequence. Note that,
17132 if non-call exceptions are enabled, we already did it during
17133 epilogue expansion, or else, if the insn can throw internally,
17134 we already did it during the reorg pass. */
17135 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17136 && !flag_non_call_exceptions
17137 && !can_throw_internal (insn))
17138 {
17139 seh_nop_p = true;
17140 break;
17141 }
17142 }
17143
17144 /* If we didn't find a real insn following the call, prevent the
17145 unwinder from looking into the next function. */
17146 if (i == NULL)
17147 seh_nop_p = true;
17148 }
17149
17150 if (direct_p)
17151 {
17152 if (ix86_nopic_noplt_attribute_p (call_op))
17153 {
17154 direct_p = false;
17155 if (TARGET_64BIT)
17156 {
17157 if (output_indirect_p)
17158 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17159 else
17160 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17161 }
17162 else
17163 {
17164 if (output_indirect_p)
17165 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17166 else
17167 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17168 }
17169 }
17170 else
17171 xasm = "%!call\t%P0";
17172 }
17173 else
17174 {
17175 if (output_indirect_p)
17176 xasm = "%0";
17177 else
17178 xasm = "%!call\t%A0";
17179 }
17180
17181 if (output_indirect_p && !direct_p)
17182 ix86_output_indirect_branch (call_op, xasm, sibcall_p: false);
17183 else
17184 output_asm_insn (xasm, &call_op);
17185
17186 if (seh_nop_p)
17187 return "nop";
17188
17189 return "";
17190}
17191
17192/* Return a MEM corresponding to a stack slot with mode MODE.
17193 Allocate a new slot if necessary.
17194
17195 The RTL for a function can have several slots available: N is
17196 which slot to use. */
17197
17198rtx
17199assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17200{
17201 struct stack_local_entry *s;
17202
17203 gcc_assert (n < MAX_386_STACK_LOCALS);
17204
17205 for (s = ix86_stack_locals; s; s = s->next)
17206 if (s->mode == mode && s->n == n)
17207 return validize_mem (copy_rtx (s->rtl));
17208
17209 int align = 0;
17210 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17211 alignment with -m32 -mpreferred-stack-boundary=2. */
17212 if (mode == DImode
17213 && !TARGET_64BIT
17214 && n == SLOT_FLOATxFDI_387
17215 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17216 align = 32;
17217 s = ggc_alloc<stack_local_entry> ();
17218 s->n = n;
17219 s->mode = mode;
17220 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17221
17222 s->next = ix86_stack_locals;
17223 ix86_stack_locals = s;
17224 return validize_mem (copy_rtx (s->rtl));
17225}
17226
17227static void
17228ix86_instantiate_decls (void)
17229{
17230 struct stack_local_entry *s;
17231
17232 for (s = ix86_stack_locals; s; s = s->next)
17233 if (s->rtl != NULL_RTX)
17234 instantiate_decl_rtl (x: s->rtl);
17235}
17236
17237/* Check whether x86 address PARTS is a pc-relative address. */
17238
17239bool
17240ix86_rip_relative_addr_p (struct ix86_address *parts)
17241{
17242 rtx base, index, disp;
17243
17244 base = parts->base;
17245 index = parts->index;
17246 disp = parts->disp;
17247
17248 if (disp && !base && !index)
17249 {
17250 if (TARGET_64BIT)
17251 {
17252 rtx symbol = disp;
17253
17254 if (GET_CODE (disp) == CONST)
17255 symbol = XEXP (disp, 0);
17256 if (GET_CODE (symbol) == PLUS
17257 && CONST_INT_P (XEXP (symbol, 1)))
17258 symbol = XEXP (symbol, 0);
17259
17260 if (GET_CODE (symbol) == LABEL_REF
17261 || (GET_CODE (symbol) == SYMBOL_REF
17262 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17263 || (GET_CODE (symbol) == UNSPEC
17264 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17265 || XINT (symbol, 1) == UNSPEC_PCREL
17266 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17267 return true;
17268 }
17269 }
17270 return false;
17271}
17272
17273/* Calculate the length of the memory address in the instruction encoding.
17274 Includes addr32 prefix, does not include the one-byte modrm, opcode,
17275 or other prefixes. We never generate addr32 prefix for LEA insn. */
17276
17277int
17278memory_address_length (rtx addr, bool lea)
17279{
17280 struct ix86_address parts;
17281 rtx base, index, disp;
17282 int len;
17283 int ok;
17284
17285 if (GET_CODE (addr) == PRE_DEC
17286 || GET_CODE (addr) == POST_INC
17287 || GET_CODE (addr) == PRE_MODIFY
17288 || GET_CODE (addr) == POST_MODIFY)
17289 return 0;
17290
17291 ok = ix86_decompose_address (addr, out: &parts);
17292 gcc_assert (ok);
17293
17294 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17295
17296 /* If this is not LEA instruction, add the length of addr32 prefix. */
17297 if (TARGET_64BIT && !lea
17298 && (SImode_address_operand (addr, VOIDmode)
17299 || (parts.base && GET_MODE (parts.base) == SImode)
17300 || (parts.index && GET_MODE (parts.index) == SImode)))
17301 len++;
17302
17303 base = parts.base;
17304 index = parts.index;
17305 disp = parts.disp;
17306
17307 if (base && SUBREG_P (base))
17308 base = SUBREG_REG (base);
17309 if (index && SUBREG_P (index))
17310 index = SUBREG_REG (index);
17311
17312 gcc_assert (base == NULL_RTX || REG_P (base));
17313 gcc_assert (index == NULL_RTX || REG_P (index));
17314
17315 /* Rule of thumb:
17316 - esp as the base always wants an index,
17317 - ebp as the base always wants a displacement,
17318 - r12 as the base always wants an index,
17319 - r13 as the base always wants a displacement. */
17320
17321 /* Register Indirect. */
17322 if (base && !index && !disp)
17323 {
17324 /* esp (for its index) and ebp (for its displacement) need
17325 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17326 code. */
17327 if (base == arg_pointer_rtx
17328 || base == frame_pointer_rtx
17329 || REGNO (base) == SP_REG
17330 || REGNO (base) == BP_REG
17331 || REGNO (base) == R12_REG
17332 || REGNO (base) == R13_REG)
17333 len++;
17334 }
17335
17336 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17337 is not disp32, but disp32(%rip), so for disp32
17338 SIB byte is needed, unless print_operand_address
17339 optimizes it into disp32(%rip) or (%rip) is implied
17340 by UNSPEC. */
17341 else if (disp && !base && !index)
17342 {
17343 len += 4;
17344 if (!ix86_rip_relative_addr_p (parts: &parts))
17345 len++;
17346 }
17347 else
17348 {
17349 /* Find the length of the displacement constant. */
17350 if (disp)
17351 {
17352 if (base && satisfies_constraint_K (op: disp))
17353 len += 1;
17354 else
17355 len += 4;
17356 }
17357 /* ebp always wants a displacement. Similarly r13. */
17358 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
17359 len++;
17360
17361 /* An index requires the two-byte modrm form.... */
17362 if (index
17363 /* ...like esp (or r12), which always wants an index. */
17364 || base == arg_pointer_rtx
17365 || base == frame_pointer_rtx
17366 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
17367 len++;
17368 }
17369
17370 return len;
17371}
17372
17373/* Compute default value for "length_immediate" attribute. When SHORTFORM
17374 is set, expect that insn have 8bit immediate alternative. */
17375int
17376ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
17377{
17378 int len = 0;
17379 int i;
17380 extract_insn_cached (insn);
17381 for (i = recog_data.n_operands - 1; i >= 0; --i)
17382 if (CONSTANT_P (recog_data.operand[i]))
17383 {
17384 enum attr_mode mode = get_attr_mode (insn);
17385
17386 gcc_assert (!len);
17387 if (shortform && CONST_INT_P (recog_data.operand[i]))
17388 {
17389 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
17390 switch (mode)
17391 {
17392 case MODE_QI:
17393 len = 1;
17394 continue;
17395 case MODE_HI:
17396 ival = trunc_int_for_mode (ival, HImode);
17397 break;
17398 case MODE_SI:
17399 ival = trunc_int_for_mode (ival, SImode);
17400 break;
17401 default:
17402 break;
17403 }
17404 if (IN_RANGE (ival, -128, 127))
17405 {
17406 len = 1;
17407 continue;
17408 }
17409 }
17410 switch (mode)
17411 {
17412 case MODE_QI:
17413 len = 1;
17414 break;
17415 case MODE_HI:
17416 len = 2;
17417 break;
17418 case MODE_SI:
17419 len = 4;
17420 break;
17421 /* Immediates for DImode instructions are encoded
17422 as 32bit sign extended values. */
17423 case MODE_DI:
17424 len = 4;
17425 break;
17426 default:
17427 fatal_insn ("unknown insn mode", insn);
17428 }
17429 }
17430 return len;
17431}
17432
17433/* Compute default value for "length_address" attribute. */
17434int
17435ix86_attr_length_address_default (rtx_insn *insn)
17436{
17437 int i;
17438
17439 if (get_attr_type (insn) == TYPE_LEA)
17440 {
17441 rtx set = PATTERN (insn), addr;
17442
17443 if (GET_CODE (set) == PARALLEL)
17444 set = XVECEXP (set, 0, 0);
17445
17446 gcc_assert (GET_CODE (set) == SET);
17447
17448 addr = SET_SRC (set);
17449
17450 return memory_address_length (addr, lea: true);
17451 }
17452
17453 extract_insn_cached (insn);
17454 for (i = recog_data.n_operands - 1; i >= 0; --i)
17455 {
17456 rtx op = recog_data.operand[i];
17457 if (MEM_P (op))
17458 {
17459 constrain_operands_cached (insn, reload_completed);
17460 if (which_alternative != -1)
17461 {
17462 const char *constraints = recog_data.constraints[i];
17463 int alt = which_alternative;
17464
17465 while (*constraints == '=' || *constraints == '+')
17466 constraints++;
17467 while (alt-- > 0)
17468 while (*constraints++ != ',')
17469 ;
17470 /* Skip ignored operands. */
17471 if (*constraints == 'X')
17472 continue;
17473 }
17474
17475 int len = memory_address_length (XEXP (op, 0), lea: false);
17476
17477 /* Account for segment prefix for non-default addr spaces. */
17478 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
17479 len++;
17480
17481 return len;
17482 }
17483 }
17484 return 0;
17485}
17486
17487/* Compute default value for "length_vex" attribute. It includes
17488 2 or 3 byte VEX prefix and 1 opcode byte. */
17489
17490int
17491ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
17492 bool has_vex_w)
17493{
17494 int i, reg_only = 2 + 1;
17495 bool has_mem = false;
17496
17497 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17498 byte VEX prefix. */
17499 if (!has_0f_opcode || has_vex_w)
17500 return 3 + 1;
17501
17502 /* We can always use 2 byte VEX prefix in 32bit. */
17503 if (!TARGET_64BIT)
17504 return 2 + 1;
17505
17506 extract_insn_cached (insn);
17507
17508 for (i = recog_data.n_operands - 1; i >= 0; --i)
17509 if (REG_P (recog_data.operand[i]))
17510 {
17511 /* REX.W bit uses 3 byte VEX prefix.
17512 REX2 with vex use extended EVEX prefix length is 4-byte. */
17513 if (GET_MODE (recog_data.operand[i]) == DImode
17514 && GENERAL_REG_P (recog_data.operand[i]))
17515 return 3 + 1;
17516
17517 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17518 operand will be encoded using VEX.B, so be conservative.
17519 REX2 with vex use extended EVEX prefix length is 4-byte. */
17520 if (REX_INT_REGNO_P (recog_data.operand[i])
17521 || REX2_INT_REGNO_P (recog_data.operand[i])
17522 || REX_SSE_REGNO_P (recog_data.operand[i]))
17523 reg_only = 3 + 1;
17524 }
17525 else if (MEM_P (recog_data.operand[i]))
17526 {
17527 /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
17528 if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
17529 return 4;
17530
17531 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17532 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
17533 return 3 + 1;
17534
17535 has_mem = true;
17536 }
17537
17538 return has_mem ? 2 + 1 : reg_only;
17539}
17540
17541
17542static bool
17543ix86_class_likely_spilled_p (reg_class_t);
17544
17545/* Returns true if lhs of insn is HW function argument register and set up
17546 is_spilled to true if it is likely spilled HW register. */
17547static bool
17548insn_is_function_arg (rtx insn, bool* is_spilled)
17549{
17550 rtx dst;
17551
17552 if (!NONDEBUG_INSN_P (insn))
17553 return false;
17554 /* Call instructions are not movable, ignore it. */
17555 if (CALL_P (insn))
17556 return false;
17557 insn = PATTERN (insn);
17558 if (GET_CODE (insn) == PARALLEL)
17559 insn = XVECEXP (insn, 0, 0);
17560 if (GET_CODE (insn) != SET)
17561 return false;
17562 dst = SET_DEST (insn);
17563 if (REG_P (dst) && HARD_REGISTER_P (dst)
17564 && ix86_function_arg_regno_p (REGNO (dst)))
17565 {
17566 /* Is it likely spilled HW register? */
17567 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
17568 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
17569 *is_spilled = true;
17570 return true;
17571 }
17572 return false;
17573}
17574
17575/* Add output dependencies for chain of function adjacent arguments if only
17576 there is a move to likely spilled HW register. Return first argument
17577 if at least one dependence was added or NULL otherwise. */
17578static rtx_insn *
17579add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
17580{
17581 rtx_insn *insn;
17582 rtx_insn *last = call;
17583 rtx_insn *first_arg = NULL;
17584 bool is_spilled = false;
17585
17586 head = PREV_INSN (insn: head);
17587
17588 /* Find nearest to call argument passing instruction. */
17589 while (true)
17590 {
17591 last = PREV_INSN (insn: last);
17592 if (last == head)
17593 return NULL;
17594 if (!NONDEBUG_INSN_P (last))
17595 continue;
17596 if (insn_is_function_arg (insn: last, is_spilled: &is_spilled))
17597 break;
17598 return NULL;
17599 }
17600
17601 first_arg = last;
17602 while (true)
17603 {
17604 insn = PREV_INSN (insn: last);
17605 if (!INSN_P (insn))
17606 break;
17607 if (insn == head)
17608 break;
17609 if (!NONDEBUG_INSN_P (insn))
17610 {
17611 last = insn;
17612 continue;
17613 }
17614 if (insn_is_function_arg (insn, is_spilled: &is_spilled))
17615 {
17616 /* Add output depdendence between two function arguments if chain
17617 of output arguments contains likely spilled HW registers. */
17618 if (is_spilled)
17619 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17620 first_arg = last = insn;
17621 }
17622 else
17623 break;
17624 }
17625 if (!is_spilled)
17626 return NULL;
17627 return first_arg;
17628}
17629
17630/* Add output or anti dependency from insn to first_arg to restrict its code
17631 motion. */
17632static void
17633avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
17634{
17635 rtx set;
17636 rtx tmp;
17637
17638 set = single_set (insn);
17639 if (!set)
17640 return;
17641 tmp = SET_DEST (set);
17642 if (REG_P (tmp))
17643 {
17644 /* Add output dependency to the first function argument. */
17645 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17646 return;
17647 }
17648 /* Add anti dependency. */
17649 add_dependence (first_arg, insn, REG_DEP_ANTI);
17650}
17651
17652/* Avoid cross block motion of function argument through adding dependency
17653 from the first non-jump instruction in bb. */
17654static void
17655add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
17656{
17657 rtx_insn *insn = BB_END (bb);
17658
17659 while (insn)
17660 {
17661 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17662 {
17663 rtx set = single_set (insn);
17664 if (set)
17665 {
17666 avoid_func_arg_motion (first_arg: arg, insn);
17667 return;
17668 }
17669 }
17670 if (insn == BB_HEAD (bb))
17671 return;
17672 insn = PREV_INSN (insn);
17673 }
17674}
17675
17676/* Hook for pre-reload schedule - avoid motion of function arguments
17677 passed in likely spilled HW registers. */
17678static void
17679ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
17680{
17681 rtx_insn *insn;
17682 rtx_insn *first_arg = NULL;
17683 if (reload_completed)
17684 return;
17685 while (head != tail && DEBUG_INSN_P (head))
17686 head = NEXT_INSN (insn: head);
17687 for (insn = tail; insn != head; insn = PREV_INSN (insn))
17688 if (INSN_P (insn) && CALL_P (insn))
17689 {
17690 first_arg = add_parameter_dependencies (call: insn, head);
17691 if (first_arg)
17692 {
17693 /* Add dependee for first argument to predecessors if only
17694 region contains more than one block. */
17695 basic_block bb = BLOCK_FOR_INSN (insn);
17696 int rgn = CONTAINING_RGN (bb->index);
17697 int nr_blks = RGN_NR_BLOCKS (rgn);
17698 /* Skip trivial regions and region head blocks that can have
17699 predecessors outside of region. */
17700 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
17701 {
17702 edge e;
17703 edge_iterator ei;
17704
17705 /* Regions are SCCs with the exception of selective
17706 scheduling with pipelining of outer blocks enabled.
17707 So also check that immediate predecessors of a non-head
17708 block are in the same region. */
17709 FOR_EACH_EDGE (e, ei, bb->preds)
17710 {
17711 /* Avoid creating of loop-carried dependencies through
17712 using topological ordering in the region. */
17713 if (rgn == CONTAINING_RGN (e->src->index)
17714 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
17715 add_dependee_for_func_arg (arg: first_arg, bb: e->src);
17716 }
17717 }
17718 insn = first_arg;
17719 if (insn == head)
17720 break;
17721 }
17722 }
17723 else if (first_arg)
17724 avoid_func_arg_motion (first_arg, insn);
17725}
17726
17727/* Hook for pre-reload schedule - set priority of moves from likely spilled
17728 HW registers to maximum, to schedule them at soon as possible. These are
17729 moves from function argument registers at the top of the function entry
17730 and moves from function return value registers after call. */
17731static int
17732ix86_adjust_priority (rtx_insn *insn, int priority)
17733{
17734 rtx set;
17735
17736 if (reload_completed)
17737 return priority;
17738
17739 if (!NONDEBUG_INSN_P (insn))
17740 return priority;
17741
17742 set = single_set (insn);
17743 if (set)
17744 {
17745 rtx tmp = SET_SRC (set);
17746 if (REG_P (tmp)
17747 && HARD_REGISTER_P (tmp)
17748 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
17749 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
17750 return current_sched_info->sched_max_insns_priority;
17751 }
17752
17753 return priority;
17754}
17755
17756/* Prepare for scheduling pass. */
17757static void
17758ix86_sched_init_global (FILE *, int, int)
17759{
17760 /* Install scheduling hooks for current CPU. Some of these hooks are used
17761 in time-critical parts of the scheduler, so we only set them up when
17762 they are actually used. */
17763 switch (ix86_tune)
17764 {
17765 case PROCESSOR_CORE2:
17766 case PROCESSOR_NEHALEM:
17767 case PROCESSOR_SANDYBRIDGE:
17768 case PROCESSOR_HASWELL:
17769 case PROCESSOR_TREMONT:
17770 case PROCESSOR_ALDERLAKE:
17771 case PROCESSOR_GENERIC:
17772 /* Do not perform multipass scheduling for pre-reload schedule
17773 to save compile time. */
17774 if (reload_completed)
17775 {
17776 ix86_core2i7_init_hooks ();
17777 break;
17778 }
17779 /* Fall through. */
17780 default:
17781 targetm.sched.dfa_post_advance_cycle = NULL;
17782 targetm.sched.first_cycle_multipass_init = NULL;
17783 targetm.sched.first_cycle_multipass_begin = NULL;
17784 targetm.sched.first_cycle_multipass_issue = NULL;
17785 targetm.sched.first_cycle_multipass_backtrack = NULL;
17786 targetm.sched.first_cycle_multipass_end = NULL;
17787 targetm.sched.first_cycle_multipass_fini = NULL;
17788 break;
17789 }
17790}
17791
17792
17793/* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17794
17795static HOST_WIDE_INT
17796ix86_static_rtx_alignment (machine_mode mode)
17797{
17798 if (mode == DFmode)
17799 return 64;
17800 if (ALIGN_MODE_128 (mode))
17801 return MAX (128, GET_MODE_ALIGNMENT (mode));
17802 return GET_MODE_ALIGNMENT (mode);
17803}
17804
17805/* Implement TARGET_CONSTANT_ALIGNMENT. */
17806
17807static HOST_WIDE_INT
17808ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
17809{
17810 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17811 || TREE_CODE (exp) == INTEGER_CST)
17812 {
17813 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
17814 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
17815 return MAX (mode_align, align);
17816 }
17817 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17818 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17819 return BITS_PER_WORD;
17820
17821 return align;
17822}
17823
17824/* Implement TARGET_EMPTY_RECORD_P. */
17825
17826static bool
17827ix86_is_empty_record (const_tree type)
17828{
17829 if (!TARGET_64BIT)
17830 return false;
17831 return default_is_empty_record (type);
17832}
17833
17834/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17835
17836static void
17837ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
17838{
17839 CUMULATIVE_ARGS *cum = get_cumulative_args (arg: cum_v);
17840
17841 if (!cum->warn_empty)
17842 return;
17843
17844 if (!TYPE_EMPTY_P (type))
17845 return;
17846
17847 /* Don't warn if the function isn't visible outside of the TU. */
17848 if (cum->decl && !TREE_PUBLIC (cum->decl))
17849 return;
17850
17851 const_tree ctx = get_ultimate_context (cum->decl);
17852 if (ctx != NULL_TREE
17853 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
17854 return;
17855
17856 /* If the actual size of the type is zero, then there is no change
17857 in how objects of this size are passed. */
17858 if (int_size_in_bytes (type) == 0)
17859 return;
17860
17861 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
17862 "changes in %<-fabi-version=12%> (GCC 8)", type);
17863
17864 /* Only warn once. */
17865 cum->warn_empty = false;
17866}
17867
17868/* This hook returns name of multilib ABI. */
17869
17870static const char *
17871ix86_get_multilib_abi_name (void)
17872{
17873 if (!(TARGET_64BIT_P (ix86_isa_flags)))
17874 return "i386";
17875 else if (TARGET_X32_P (ix86_isa_flags))
17876 return "x32";
17877 else
17878 return "x86_64";
17879}
17880
17881/* Compute the alignment for a variable for Intel MCU psABI. TYPE is
17882 the data type, and ALIGN is the alignment that the object would
17883 ordinarily have. */
17884
17885static int
17886iamcu_alignment (tree type, int align)
17887{
17888 machine_mode mode;
17889
17890 if (align < 32 || TYPE_USER_ALIGN (type))
17891 return align;
17892
17893 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
17894 bytes. */
17895 type = strip_array_types (type);
17896 if (TYPE_ATOMIC (type))
17897 return align;
17898
17899 mode = TYPE_MODE (type);
17900 switch (GET_MODE_CLASS (mode))
17901 {
17902 case MODE_INT:
17903 case MODE_COMPLEX_INT:
17904 case MODE_COMPLEX_FLOAT:
17905 case MODE_FLOAT:
17906 case MODE_DECIMAL_FLOAT:
17907 return 32;
17908 default:
17909 return align;
17910 }
17911}
17912
17913/* Compute the alignment for a static variable.
17914 TYPE is the data type, and ALIGN is the alignment that
17915 the object would ordinarily have. The value of this function is used
17916 instead of that alignment to align the object. */
17917
17918int
17919ix86_data_alignment (tree type, unsigned int align, bool opt)
17920{
17921 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
17922 for symbols from other compilation units or symbols that don't need
17923 to bind locally. In order to preserve some ABI compatibility with
17924 those compilers, ensure we don't decrease alignment from what we
17925 used to assume. */
17926
17927 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
17928
17929 /* A data structure, equal or greater than the size of a cache line
17930 (64 bytes in the Pentium 4 and other recent Intel processors, including
17931 processors based on Intel Core microarchitecture) should be aligned
17932 so that its base address is a multiple of a cache line size. */
17933
17934 unsigned int max_align
17935 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
17936
17937 if (max_align < BITS_PER_WORD)
17938 max_align = BITS_PER_WORD;
17939
17940 switch (ix86_align_data_type)
17941 {
17942 case ix86_align_data_type_abi: opt = false; break;
17943 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
17944 case ix86_align_data_type_cacheline: break;
17945 }
17946
17947 if (TARGET_IAMCU)
17948 align = iamcu_alignment (type, align);
17949
17950 if (opt
17951 && AGGREGATE_TYPE_P (type)
17952 && TYPE_SIZE (type)
17953 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
17954 {
17955 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align_compat)
17956 && align < max_align_compat)
17957 align = max_align_compat;
17958 if (wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: max_align)
17959 && align < max_align)
17960 align = max_align;
17961 }
17962
17963 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17964 to 16byte boundary. */
17965 if (TARGET_64BIT)
17966 {
17967 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
17968 && TYPE_SIZE (type)
17969 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17970 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
17971 && align < 128)
17972 return 128;
17973 }
17974
17975 if (!opt)
17976 return align;
17977
17978 if (TREE_CODE (type) == ARRAY_TYPE)
17979 {
17980 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17981 return 64;
17982 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17983 return 128;
17984 }
17985 else if (TREE_CODE (type) == COMPLEX_TYPE)
17986 {
17987
17988 if (TYPE_MODE (type) == DCmode && align < 64)
17989 return 64;
17990 if ((TYPE_MODE (type) == XCmode
17991 || TYPE_MODE (type) == TCmode) && align < 128)
17992 return 128;
17993 }
17994 else if (RECORD_OR_UNION_TYPE_P (type)
17995 && TYPE_FIELDS (type))
17996 {
17997 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17998 return 64;
17999 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18000 return 128;
18001 }
18002 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18003 || TREE_CODE (type) == INTEGER_TYPE)
18004 {
18005 if (TYPE_MODE (type) == DFmode && align < 64)
18006 return 64;
18007 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18008 return 128;
18009 }
18010
18011 return align;
18012}
18013
18014/* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18015static void
18016ix86_lower_local_decl_alignment (tree decl)
18017{
18018 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18019 DECL_ALIGN (decl), true);
18020 if (new_align < DECL_ALIGN (decl))
18021 SET_DECL_ALIGN (decl, new_align);
18022}
18023
18024/* Compute the alignment for a local variable or a stack slot. EXP is
18025 the data type or decl itself, MODE is the widest mode available and
18026 ALIGN is the alignment that the object would ordinarily have. The
18027 value of this macro is used instead of that alignment to align the
18028 object. */
18029
18030unsigned int
18031ix86_local_alignment (tree exp, machine_mode mode,
18032 unsigned int align, bool may_lower)
18033{
18034 tree type, decl;
18035
18036 if (exp && DECL_P (exp))
18037 {
18038 type = TREE_TYPE (exp);
18039 decl = exp;
18040 }
18041 else
18042 {
18043 type = exp;
18044 decl = NULL;
18045 }
18046
18047 /* Don't do dynamic stack realignment for long long objects with
18048 -mpreferred-stack-boundary=2. */
18049 if (may_lower
18050 && !TARGET_64BIT
18051 && align == 64
18052 && ix86_preferred_stack_boundary < 64
18053 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18054 && (!type || (!TYPE_USER_ALIGN (type)
18055 && !TYPE_ATOMIC (strip_array_types (type))))
18056 && (!decl || !DECL_USER_ALIGN (decl)))
18057 align = 32;
18058
18059 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18060 register in MODE. We will return the largest alignment of XF
18061 and DF. */
18062 if (!type)
18063 {
18064 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18065 align = GET_MODE_ALIGNMENT (DFmode);
18066 return align;
18067 }
18068
18069 /* Don't increase alignment for Intel MCU psABI. */
18070 if (TARGET_IAMCU)
18071 return align;
18072
18073 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18074 to 16byte boundary. Exact wording is:
18075
18076 An array uses the same alignment as its elements, except that a local or
18077 global array variable of length at least 16 bytes or
18078 a C99 variable-length array variable always has alignment of at least 16 bytes.
18079
18080 This was added to allow use of aligned SSE instructions at arrays. This
18081 rule is meant for static storage (where compiler cannot do the analysis
18082 by itself). We follow it for automatic variables only when convenient.
18083 We fully control everything in the function compiled and functions from
18084 other unit cannot rely on the alignment.
18085
18086 Exclude va_list type. It is the common case of local array where
18087 we cannot benefit from the alignment.
18088
18089 TODO: Probably one should optimize for size only when var is not escaping. */
18090 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18091 && TARGET_SSE)
18092 {
18093 if (AGGREGATE_TYPE_P (type)
18094 && (va_list_type_node == NULL_TREE
18095 || (TYPE_MAIN_VARIANT (type)
18096 != TYPE_MAIN_VARIANT (va_list_type_node)))
18097 && TYPE_SIZE (type)
18098 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18099 && wi::geu_p (x: wi::to_wide (TYPE_SIZE (type)), y: 128)
18100 && align < 128)
18101 return 128;
18102 }
18103 if (TREE_CODE (type) == ARRAY_TYPE)
18104 {
18105 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18106 return 64;
18107 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18108 return 128;
18109 }
18110 else if (TREE_CODE (type) == COMPLEX_TYPE)
18111 {
18112 if (TYPE_MODE (type) == DCmode && align < 64)
18113 return 64;
18114 if ((TYPE_MODE (type) == XCmode
18115 || TYPE_MODE (type) == TCmode) && align < 128)
18116 return 128;
18117 }
18118 else if (RECORD_OR_UNION_TYPE_P (type)
18119 && TYPE_FIELDS (type))
18120 {
18121 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18122 return 64;
18123 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18124 return 128;
18125 }
18126 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18127 || TREE_CODE (type) == INTEGER_TYPE)
18128 {
18129
18130 if (TYPE_MODE (type) == DFmode && align < 64)
18131 return 64;
18132 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18133 return 128;
18134 }
18135 return align;
18136}
18137
18138/* Compute the minimum required alignment for dynamic stack realignment
18139 purposes for a local variable, parameter or a stack slot. EXP is
18140 the data type or decl itself, MODE is its mode and ALIGN is the
18141 alignment that the object would ordinarily have. */
18142
18143unsigned int
18144ix86_minimum_alignment (tree exp, machine_mode mode,
18145 unsigned int align)
18146{
18147 tree type, decl;
18148
18149 if (exp && DECL_P (exp))
18150 {
18151 type = TREE_TYPE (exp);
18152 decl = exp;
18153 }
18154 else
18155 {
18156 type = exp;
18157 decl = NULL;
18158 }
18159
18160 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18161 return align;
18162
18163 /* Don't do dynamic stack realignment for long long objects with
18164 -mpreferred-stack-boundary=2. */
18165 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18166 && (!type || (!TYPE_USER_ALIGN (type)
18167 && !TYPE_ATOMIC (strip_array_types (type))))
18168 && (!decl || !DECL_USER_ALIGN (decl)))
18169 {
18170 gcc_checking_assert (!TARGET_STV);
18171 return 32;
18172 }
18173
18174 return align;
18175}
18176
18177/* Find a location for the static chain incoming to a nested function.
18178 This is a register, unless all free registers are used by arguments. */
18179
18180static rtx
18181ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18182{
18183 unsigned regno;
18184
18185 if (TARGET_64BIT)
18186 {
18187 /* We always use R10 in 64-bit mode. */
18188 regno = R10_REG;
18189 }
18190 else
18191 {
18192 const_tree fntype, fndecl;
18193 unsigned int ccvt;
18194
18195 /* By default in 32-bit mode we use ECX to pass the static chain. */
18196 regno = CX_REG;
18197
18198 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18199 {
18200 fntype = TREE_TYPE (fndecl_or_type);
18201 fndecl = fndecl_or_type;
18202 }
18203 else
18204 {
18205 fntype = fndecl_or_type;
18206 fndecl = NULL;
18207 }
18208
18209 ccvt = ix86_get_callcvt (type: fntype);
18210 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18211 {
18212 /* Fastcall functions use ecx/edx for arguments, which leaves
18213 us with EAX for the static chain.
18214 Thiscall functions use ecx for arguments, which also
18215 leaves us with EAX for the static chain. */
18216 regno = AX_REG;
18217 }
18218 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18219 {
18220 /* Thiscall functions use ecx for arguments, which leaves
18221 us with EAX and EDX for the static chain.
18222 We are using for abi-compatibility EAX. */
18223 regno = AX_REG;
18224 }
18225 else if (ix86_function_regparm (type: fntype, decl: fndecl) == 3)
18226 {
18227 /* For regparm 3, we have no free call-clobbered registers in
18228 which to store the static chain. In order to implement this,
18229 we have the trampoline push the static chain to the stack.
18230 However, we can't push a value below the return address when
18231 we call the nested function directly, so we have to use an
18232 alternate entry point. For this we use ESI, and have the
18233 alternate entry point push ESI, so that things appear the
18234 same once we're executing the nested function. */
18235 if (incoming_p)
18236 {
18237 if (fndecl == current_function_decl
18238 && !ix86_static_chain_on_stack)
18239 {
18240 gcc_assert (!reload_completed);
18241 ix86_static_chain_on_stack = true;
18242 }
18243 return gen_frame_mem (SImode,
18244 plus_constant (Pmode,
18245 arg_pointer_rtx, -8));
18246 }
18247 regno = SI_REG;
18248 }
18249 }
18250
18251 return gen_rtx_REG (Pmode, regno);
18252}
18253
18254/* Emit RTL insns to initialize the variable parts of a trampoline.
18255 FNDECL is the decl of the target address; M_TRAMP is a MEM for
18256 the trampoline, and CHAIN_VALUE is an RTX for the static chain
18257 to be passed to the target function. */
18258
18259static void
18260ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18261{
18262 rtx mem, fnaddr;
18263 int opcode;
18264 int offset = 0;
18265 bool need_endbr = (flag_cf_protection & CF_BRANCH);
18266
18267 fnaddr = XEXP (DECL_RTL (fndecl), 0);
18268
18269 if (TARGET_64BIT)
18270 {
18271 int size;
18272
18273 if (need_endbr)
18274 {
18275 /* Insert ENDBR64. */
18276 mem = adjust_address (m_tramp, SImode, offset);
18277 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18278 offset += 4;
18279 }
18280
18281 /* Load the function address to r11. Try to load address using
18282 the shorter movl instead of movabs. We may want to support
18283 movq for kernel mode, but kernel does not use trampolines at
18284 the moment. FNADDR is a 32bit address and may not be in
18285 DImode when ptr_mode == SImode. Always use movl in this
18286 case. */
18287 if (ptr_mode == SImode
18288 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18289 {
18290 fnaddr = copy_addr_to_reg (fnaddr);
18291
18292 mem = adjust_address (m_tramp, HImode, offset);
18293 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18294
18295 mem = adjust_address (m_tramp, SImode, offset + 2);
18296 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18297 offset += 6;
18298 }
18299 else
18300 {
18301 mem = adjust_address (m_tramp, HImode, offset);
18302 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18303
18304 mem = adjust_address (m_tramp, DImode, offset + 2);
18305 emit_move_insn (mem, fnaddr);
18306 offset += 10;
18307 }
18308
18309 /* Load static chain using movabs to r10. Use the shorter movl
18310 instead of movabs when ptr_mode == SImode. */
18311 if (ptr_mode == SImode)
18312 {
18313 opcode = 0xba41;
18314 size = 6;
18315 }
18316 else
18317 {
18318 opcode = 0xba49;
18319 size = 10;
18320 }
18321
18322 mem = adjust_address (m_tramp, HImode, offset);
18323 emit_move_insn (mem, gen_int_mode (opcode, HImode));
18324
18325 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18326 emit_move_insn (mem, chain_value);
18327 offset += size;
18328
18329 /* Jump to r11; the last (unused) byte is a nop, only there to
18330 pad the write out to a single 32-bit store. */
18331 mem = adjust_address (m_tramp, SImode, offset);
18332 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
18333 offset += 4;
18334 }
18335 else
18336 {
18337 rtx disp, chain;
18338
18339 /* Depending on the static chain location, either load a register
18340 with a constant, or push the constant to the stack. All of the
18341 instructions are the same size. */
18342 chain = ix86_static_chain (fndecl_or_type: fndecl, incoming_p: true);
18343 if (REG_P (chain))
18344 {
18345 switch (REGNO (chain))
18346 {
18347 case AX_REG:
18348 opcode = 0xb8; break;
18349 case CX_REG:
18350 opcode = 0xb9; break;
18351 default:
18352 gcc_unreachable ();
18353 }
18354 }
18355 else
18356 opcode = 0x68;
18357
18358 if (need_endbr)
18359 {
18360 /* Insert ENDBR32. */
18361 mem = adjust_address (m_tramp, SImode, offset);
18362 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
18363 offset += 4;
18364 }
18365
18366 mem = adjust_address (m_tramp, QImode, offset);
18367 emit_move_insn (mem, gen_int_mode (opcode, QImode));
18368
18369 mem = adjust_address (m_tramp, SImode, offset + 1);
18370 emit_move_insn (mem, chain_value);
18371 offset += 5;
18372
18373 mem = adjust_address (m_tramp, QImode, offset);
18374 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
18375
18376 mem = adjust_address (m_tramp, SImode, offset + 1);
18377
18378 /* Compute offset from the end of the jmp to the target function.
18379 In the case in which the trampoline stores the static chain on
18380 the stack, we need to skip the first insn which pushes the
18381 (call-saved) register static chain; this push is 1 byte. */
18382 offset += 5;
18383 int skip = MEM_P (chain) ? 1 : 0;
18384 /* Skip ENDBR32 at the entry of the target function. */
18385 if (need_endbr
18386 && !cgraph_node::get (decl: fndecl)->only_called_directly_p ())
18387 skip += 4;
18388 disp = expand_binop (SImode, sub_optab, fnaddr,
18389 plus_constant (Pmode, XEXP (m_tramp, 0),
18390 offset - skip),
18391 NULL_RTX, 1, OPTAB_DIRECT);
18392 emit_move_insn (mem, disp);
18393 }
18394
18395 gcc_assert (offset <= TRAMPOLINE_SIZE);
18396
18397#ifdef HAVE_ENABLE_EXECUTE_STACK
18398#ifdef CHECK_EXECUTE_STACK_ENABLED
18399 if (CHECK_EXECUTE_STACK_ENABLED)
18400#endif
18401 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18402 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
18403#endif
18404}
18405
18406static bool
18407ix86_allocate_stack_slots_for_args (void)
18408{
18409 /* Naked functions should not allocate stack slots for arguments. */
18410 return !ix86_function_naked (fn: current_function_decl);
18411}
18412
18413static bool
18414ix86_warn_func_return (tree decl)
18415{
18416 /* Naked functions are implemented entirely in assembly, including the
18417 return sequence, so suppress warnings about this. */
18418 return !ix86_function_naked (fn: decl);
18419}
18420
18421/* Return the shift count of a vector by scalar shift builtin second argument
18422 ARG1. */
18423static tree
18424ix86_vector_shift_count (tree arg1)
18425{
18426 if (tree_fits_uhwi_p (arg1))
18427 return arg1;
18428 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
18429 {
18430 /* The count argument is weird, passed in as various 128-bit
18431 (or 64-bit) vectors, the low 64 bits from it are the count. */
18432 unsigned char buf[16];
18433 int len = native_encode_expr (arg1, buf, 16);
18434 if (len == 0)
18435 return NULL_TREE;
18436 tree t = native_interpret_expr (uint64_type_node, buf, len);
18437 if (t && tree_fits_uhwi_p (t))
18438 return t;
18439 }
18440 return NULL_TREE;
18441}
18442
18443/* Return true if arg_mask is all ones, ELEMS is elements number of
18444 corresponding vector. */
18445static bool
18446ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18447{
18448 if (TREE_CODE (arg_mask) != INTEGER_CST)
18449 return false;
18450
18451 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18452 if (elems == HOST_BITS_PER_WIDE_INT)
18453 return mask == HOST_WIDE_INT_M1U;
18454 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18455 return false;
18456
18457 return true;
18458}
18459
18460static tree
18461ix86_fold_builtin (tree fndecl, int n_args,
18462 tree *args, bool ignore ATTRIBUTE_UNUSED)
18463{
18464 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
18465 {
18466 enum ix86_builtins fn_code
18467 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
18468 enum rtx_code rcode;
18469 bool is_vshift;
18470 unsigned HOST_WIDE_INT mask;
18471
18472 switch (fn_code)
18473 {
18474 case IX86_BUILTIN_CPU_IS:
18475 case IX86_BUILTIN_CPU_SUPPORTS:
18476 gcc_assert (n_args == 1);
18477 return fold_builtin_cpu (fndecl, args);
18478
18479 case IX86_BUILTIN_NANQ:
18480 case IX86_BUILTIN_NANSQ:
18481 {
18482 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18483 const char *str = c_getstr (*args);
18484 int quiet = fn_code == IX86_BUILTIN_NANQ;
18485 REAL_VALUE_TYPE real;
18486
18487 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
18488 return build_real (type, real);
18489 return NULL_TREE;
18490 }
18491
18492 case IX86_BUILTIN_INFQ:
18493 case IX86_BUILTIN_HUGE_VALQ:
18494 {
18495 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18496 REAL_VALUE_TYPE inf;
18497 real_inf (&inf);
18498 return build_real (type, inf);
18499 }
18500
18501 case IX86_BUILTIN_TZCNT16:
18502 case IX86_BUILTIN_CTZS:
18503 case IX86_BUILTIN_TZCNT32:
18504 case IX86_BUILTIN_TZCNT64:
18505 gcc_assert (n_args == 1);
18506 if (TREE_CODE (args[0]) == INTEGER_CST)
18507 {
18508 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18509 tree arg = args[0];
18510 if (fn_code == IX86_BUILTIN_TZCNT16
18511 || fn_code == IX86_BUILTIN_CTZS)
18512 arg = fold_convert (short_unsigned_type_node, arg);
18513 if (integer_zerop (arg))
18514 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18515 else
18516 return fold_const_call (CFN_CTZ, type, arg);
18517 }
18518 break;
18519
18520 case IX86_BUILTIN_LZCNT16:
18521 case IX86_BUILTIN_CLZS:
18522 case IX86_BUILTIN_LZCNT32:
18523 case IX86_BUILTIN_LZCNT64:
18524 gcc_assert (n_args == 1);
18525 if (TREE_CODE (args[0]) == INTEGER_CST)
18526 {
18527 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18528 tree arg = args[0];
18529 if (fn_code == IX86_BUILTIN_LZCNT16
18530 || fn_code == IX86_BUILTIN_CLZS)
18531 arg = fold_convert (short_unsigned_type_node, arg);
18532 if (integer_zerop (arg))
18533 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18534 else
18535 return fold_const_call (CFN_CLZ, type, arg);
18536 }
18537 break;
18538
18539 case IX86_BUILTIN_BEXTR32:
18540 case IX86_BUILTIN_BEXTR64:
18541 case IX86_BUILTIN_BEXTRI32:
18542 case IX86_BUILTIN_BEXTRI64:
18543 gcc_assert (n_args == 2);
18544 if (tree_fits_uhwi_p (args[1]))
18545 {
18546 unsigned HOST_WIDE_INT res = 0;
18547 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
18548 unsigned int start = tree_to_uhwi (args[1]);
18549 unsigned int len = (start & 0xff00) >> 8;
18550 start &= 0xff;
18551 if (start >= prec || len == 0)
18552 res = 0;
18553 else if (!tree_fits_uhwi_p (args[0]))
18554 break;
18555 else
18556 res = tree_to_uhwi (args[0]) >> start;
18557 if (len > prec)
18558 len = prec;
18559 if (len < HOST_BITS_PER_WIDE_INT)
18560 res &= (HOST_WIDE_INT_1U << len) - 1;
18561 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18562 }
18563 break;
18564
18565 case IX86_BUILTIN_BZHI32:
18566 case IX86_BUILTIN_BZHI64:
18567 gcc_assert (n_args == 2);
18568 if (tree_fits_uhwi_p (args[1]))
18569 {
18570 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
18571 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
18572 return args[0];
18573 if (idx == 0)
18574 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
18575 if (!tree_fits_uhwi_p (args[0]))
18576 break;
18577 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
18578 res &= ~(HOST_WIDE_INT_M1U << idx);
18579 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18580 }
18581 break;
18582
18583 case IX86_BUILTIN_PDEP32:
18584 case IX86_BUILTIN_PDEP64:
18585 gcc_assert (n_args == 2);
18586 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18587 {
18588 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18589 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18590 unsigned HOST_WIDE_INT res = 0;
18591 unsigned HOST_WIDE_INT m, k = 1;
18592 for (m = 1; m; m <<= 1)
18593 if ((mask & m) != 0)
18594 {
18595 if ((src & k) != 0)
18596 res |= m;
18597 k <<= 1;
18598 }
18599 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18600 }
18601 break;
18602
18603 case IX86_BUILTIN_PEXT32:
18604 case IX86_BUILTIN_PEXT64:
18605 gcc_assert (n_args == 2);
18606 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18607 {
18608 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18609 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18610 unsigned HOST_WIDE_INT res = 0;
18611 unsigned HOST_WIDE_INT m, k = 1;
18612 for (m = 1; m; m <<= 1)
18613 if ((mask & m) != 0)
18614 {
18615 if ((src & m) != 0)
18616 res |= k;
18617 k <<= 1;
18618 }
18619 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18620 }
18621 break;
18622
18623 case IX86_BUILTIN_MOVMSKPS:
18624 case IX86_BUILTIN_PMOVMSKB:
18625 case IX86_BUILTIN_MOVMSKPD:
18626 case IX86_BUILTIN_PMOVMSKB128:
18627 case IX86_BUILTIN_MOVMSKPD256:
18628 case IX86_BUILTIN_MOVMSKPS256:
18629 case IX86_BUILTIN_PMOVMSKB256:
18630 gcc_assert (n_args == 1);
18631 if (TREE_CODE (args[0]) == VECTOR_CST)
18632 {
18633 HOST_WIDE_INT res = 0;
18634 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
18635 {
18636 tree e = VECTOR_CST_ELT (args[0], i);
18637 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
18638 {
18639 if (wi::neg_p (x: wi::to_wide (t: e)))
18640 res |= HOST_WIDE_INT_1 << i;
18641 }
18642 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
18643 {
18644 if (TREE_REAL_CST (e).sign)
18645 res |= HOST_WIDE_INT_1 << i;
18646 }
18647 else
18648 return NULL_TREE;
18649 }
18650 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
18651 }
18652 break;
18653
18654 case IX86_BUILTIN_PSLLD:
18655 case IX86_BUILTIN_PSLLD128:
18656 case IX86_BUILTIN_PSLLD128_MASK:
18657 case IX86_BUILTIN_PSLLD256:
18658 case IX86_BUILTIN_PSLLD256_MASK:
18659 case IX86_BUILTIN_PSLLD512:
18660 case IX86_BUILTIN_PSLLDI:
18661 case IX86_BUILTIN_PSLLDI128:
18662 case IX86_BUILTIN_PSLLDI128_MASK:
18663 case IX86_BUILTIN_PSLLDI256:
18664 case IX86_BUILTIN_PSLLDI256_MASK:
18665 case IX86_BUILTIN_PSLLDI512:
18666 case IX86_BUILTIN_PSLLQ:
18667 case IX86_BUILTIN_PSLLQ128:
18668 case IX86_BUILTIN_PSLLQ128_MASK:
18669 case IX86_BUILTIN_PSLLQ256:
18670 case IX86_BUILTIN_PSLLQ256_MASK:
18671 case IX86_BUILTIN_PSLLQ512:
18672 case IX86_BUILTIN_PSLLQI:
18673 case IX86_BUILTIN_PSLLQI128:
18674 case IX86_BUILTIN_PSLLQI128_MASK:
18675 case IX86_BUILTIN_PSLLQI256:
18676 case IX86_BUILTIN_PSLLQI256_MASK:
18677 case IX86_BUILTIN_PSLLQI512:
18678 case IX86_BUILTIN_PSLLW:
18679 case IX86_BUILTIN_PSLLW128:
18680 case IX86_BUILTIN_PSLLW128_MASK:
18681 case IX86_BUILTIN_PSLLW256:
18682 case IX86_BUILTIN_PSLLW256_MASK:
18683 case IX86_BUILTIN_PSLLW512_MASK:
18684 case IX86_BUILTIN_PSLLWI:
18685 case IX86_BUILTIN_PSLLWI128:
18686 case IX86_BUILTIN_PSLLWI128_MASK:
18687 case IX86_BUILTIN_PSLLWI256:
18688 case IX86_BUILTIN_PSLLWI256_MASK:
18689 case IX86_BUILTIN_PSLLWI512_MASK:
18690 rcode = ASHIFT;
18691 is_vshift = false;
18692 goto do_shift;
18693 case IX86_BUILTIN_PSRAD:
18694 case IX86_BUILTIN_PSRAD128:
18695 case IX86_BUILTIN_PSRAD128_MASK:
18696 case IX86_BUILTIN_PSRAD256:
18697 case IX86_BUILTIN_PSRAD256_MASK:
18698 case IX86_BUILTIN_PSRAD512:
18699 case IX86_BUILTIN_PSRADI:
18700 case IX86_BUILTIN_PSRADI128:
18701 case IX86_BUILTIN_PSRADI128_MASK:
18702 case IX86_BUILTIN_PSRADI256:
18703 case IX86_BUILTIN_PSRADI256_MASK:
18704 case IX86_BUILTIN_PSRADI512:
18705 case IX86_BUILTIN_PSRAQ128_MASK:
18706 case IX86_BUILTIN_PSRAQ256_MASK:
18707 case IX86_BUILTIN_PSRAQ512:
18708 case IX86_BUILTIN_PSRAQI128_MASK:
18709 case IX86_BUILTIN_PSRAQI256_MASK:
18710 case IX86_BUILTIN_PSRAQI512:
18711 case IX86_BUILTIN_PSRAW:
18712 case IX86_BUILTIN_PSRAW128:
18713 case IX86_BUILTIN_PSRAW128_MASK:
18714 case IX86_BUILTIN_PSRAW256:
18715 case IX86_BUILTIN_PSRAW256_MASK:
18716 case IX86_BUILTIN_PSRAW512:
18717 case IX86_BUILTIN_PSRAWI:
18718 case IX86_BUILTIN_PSRAWI128:
18719 case IX86_BUILTIN_PSRAWI128_MASK:
18720 case IX86_BUILTIN_PSRAWI256:
18721 case IX86_BUILTIN_PSRAWI256_MASK:
18722 case IX86_BUILTIN_PSRAWI512:
18723 rcode = ASHIFTRT;
18724 is_vshift = false;
18725 goto do_shift;
18726 case IX86_BUILTIN_PSRLD:
18727 case IX86_BUILTIN_PSRLD128:
18728 case IX86_BUILTIN_PSRLD128_MASK:
18729 case IX86_BUILTIN_PSRLD256:
18730 case IX86_BUILTIN_PSRLD256_MASK:
18731 case IX86_BUILTIN_PSRLD512:
18732 case IX86_BUILTIN_PSRLDI:
18733 case IX86_BUILTIN_PSRLDI128:
18734 case IX86_BUILTIN_PSRLDI128_MASK:
18735 case IX86_BUILTIN_PSRLDI256:
18736 case IX86_BUILTIN_PSRLDI256_MASK:
18737 case IX86_BUILTIN_PSRLDI512:
18738 case IX86_BUILTIN_PSRLQ:
18739 case IX86_BUILTIN_PSRLQ128:
18740 case IX86_BUILTIN_PSRLQ128_MASK:
18741 case IX86_BUILTIN_PSRLQ256:
18742 case IX86_BUILTIN_PSRLQ256_MASK:
18743 case IX86_BUILTIN_PSRLQ512:
18744 case IX86_BUILTIN_PSRLQI:
18745 case IX86_BUILTIN_PSRLQI128:
18746 case IX86_BUILTIN_PSRLQI128_MASK:
18747 case IX86_BUILTIN_PSRLQI256:
18748 case IX86_BUILTIN_PSRLQI256_MASK:
18749 case IX86_BUILTIN_PSRLQI512:
18750 case IX86_BUILTIN_PSRLW:
18751 case IX86_BUILTIN_PSRLW128:
18752 case IX86_BUILTIN_PSRLW128_MASK:
18753 case IX86_BUILTIN_PSRLW256:
18754 case IX86_BUILTIN_PSRLW256_MASK:
18755 case IX86_BUILTIN_PSRLW512:
18756 case IX86_BUILTIN_PSRLWI:
18757 case IX86_BUILTIN_PSRLWI128:
18758 case IX86_BUILTIN_PSRLWI128_MASK:
18759 case IX86_BUILTIN_PSRLWI256:
18760 case IX86_BUILTIN_PSRLWI256_MASK:
18761 case IX86_BUILTIN_PSRLWI512:
18762 rcode = LSHIFTRT;
18763 is_vshift = false;
18764 goto do_shift;
18765 case IX86_BUILTIN_PSLLVV16HI:
18766 case IX86_BUILTIN_PSLLVV16SI:
18767 case IX86_BUILTIN_PSLLVV2DI:
18768 case IX86_BUILTIN_PSLLVV2DI_MASK:
18769 case IX86_BUILTIN_PSLLVV32HI:
18770 case IX86_BUILTIN_PSLLVV4DI:
18771 case IX86_BUILTIN_PSLLVV4DI_MASK:
18772 case IX86_BUILTIN_PSLLVV4SI:
18773 case IX86_BUILTIN_PSLLVV4SI_MASK:
18774 case IX86_BUILTIN_PSLLVV8DI:
18775 case IX86_BUILTIN_PSLLVV8HI:
18776 case IX86_BUILTIN_PSLLVV8SI:
18777 case IX86_BUILTIN_PSLLVV8SI_MASK:
18778 rcode = ASHIFT;
18779 is_vshift = true;
18780 goto do_shift;
18781 case IX86_BUILTIN_PSRAVQ128:
18782 case IX86_BUILTIN_PSRAVQ256:
18783 case IX86_BUILTIN_PSRAVV16HI:
18784 case IX86_BUILTIN_PSRAVV16SI:
18785 case IX86_BUILTIN_PSRAVV32HI:
18786 case IX86_BUILTIN_PSRAVV4SI:
18787 case IX86_BUILTIN_PSRAVV4SI_MASK:
18788 case IX86_BUILTIN_PSRAVV8DI:
18789 case IX86_BUILTIN_PSRAVV8HI:
18790 case IX86_BUILTIN_PSRAVV8SI:
18791 case IX86_BUILTIN_PSRAVV8SI_MASK:
18792 rcode = ASHIFTRT;
18793 is_vshift = true;
18794 goto do_shift;
18795 case IX86_BUILTIN_PSRLVV16HI:
18796 case IX86_BUILTIN_PSRLVV16SI:
18797 case IX86_BUILTIN_PSRLVV2DI:
18798 case IX86_BUILTIN_PSRLVV2DI_MASK:
18799 case IX86_BUILTIN_PSRLVV32HI:
18800 case IX86_BUILTIN_PSRLVV4DI:
18801 case IX86_BUILTIN_PSRLVV4DI_MASK:
18802 case IX86_BUILTIN_PSRLVV4SI:
18803 case IX86_BUILTIN_PSRLVV4SI_MASK:
18804 case IX86_BUILTIN_PSRLVV8DI:
18805 case IX86_BUILTIN_PSRLVV8HI:
18806 case IX86_BUILTIN_PSRLVV8SI:
18807 case IX86_BUILTIN_PSRLVV8SI_MASK:
18808 rcode = LSHIFTRT;
18809 is_vshift = true;
18810 goto do_shift;
18811
18812 do_shift:
18813 gcc_assert (n_args >= 2);
18814 if (TREE_CODE (args[0]) != VECTOR_CST)
18815 break;
18816 mask = HOST_WIDE_INT_M1U;
18817 if (n_args > 2)
18818 {
18819 /* This is masked shift. */
18820 if (!tree_fits_uhwi_p (args[n_args - 1])
18821 || TREE_SIDE_EFFECTS (args[n_args - 2]))
18822 break;
18823 mask = tree_to_uhwi (args[n_args - 1]);
18824 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
18825 mask |= HOST_WIDE_INT_M1U << elems;
18826 if (mask != HOST_WIDE_INT_M1U
18827 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
18828 break;
18829 if (mask == (HOST_WIDE_INT_M1U << elems))
18830 return args[n_args - 2];
18831 }
18832 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
18833 break;
18834 if (tree tem = (is_vshift ? integer_one_node
18835 : ix86_vector_shift_count (arg1: args[1])))
18836 {
18837 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
18838 unsigned HOST_WIDE_INT prec
18839 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
18840 if (count == 0 && mask == HOST_WIDE_INT_M1U)
18841 return args[0];
18842 if (count >= prec)
18843 {
18844 if (rcode == ASHIFTRT)
18845 count = prec - 1;
18846 else if (mask == HOST_WIDE_INT_M1U)
18847 return build_zero_cst (TREE_TYPE (args[0]));
18848 }
18849 tree countt = NULL_TREE;
18850 if (!is_vshift)
18851 {
18852 if (count >= prec)
18853 countt = integer_zero_node;
18854 else
18855 countt = build_int_cst (integer_type_node, count);
18856 }
18857 tree_vector_builder builder;
18858 if (mask != HOST_WIDE_INT_M1U || is_vshift)
18859 builder.new_vector (TREE_TYPE (args[0]),
18860 npatterns: TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
18861 nelts_per_pattern: 1);
18862 else
18863 builder.new_unary_operation (TREE_TYPE (args[0]), vec: args[0],
18864 allow_stepped_p: false);
18865 unsigned int cnt = builder.encoded_nelts ();
18866 for (unsigned int i = 0; i < cnt; ++i)
18867 {
18868 tree elt = VECTOR_CST_ELT (args[0], i);
18869 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
18870 return NULL_TREE;
18871 tree type = TREE_TYPE (elt);
18872 if (rcode == LSHIFTRT)
18873 elt = fold_convert (unsigned_type_for (type), elt);
18874 if (is_vshift)
18875 {
18876 countt = VECTOR_CST_ELT (args[1], i);
18877 if (TREE_CODE (countt) != INTEGER_CST
18878 || TREE_OVERFLOW (countt))
18879 return NULL_TREE;
18880 if (wi::neg_p (x: wi::to_wide (t: countt))
18881 || wi::to_widest (t: countt) >= prec)
18882 {
18883 if (rcode == ASHIFTRT)
18884 countt = build_int_cst (TREE_TYPE (countt),
18885 prec - 1);
18886 else
18887 {
18888 elt = build_zero_cst (TREE_TYPE (elt));
18889 countt = build_zero_cst (TREE_TYPE (countt));
18890 }
18891 }
18892 }
18893 else if (count >= prec)
18894 elt = build_zero_cst (TREE_TYPE (elt));
18895 elt = const_binop (rcode == ASHIFT
18896 ? LSHIFT_EXPR : RSHIFT_EXPR,
18897 TREE_TYPE (elt), elt, countt);
18898 if (!elt || TREE_CODE (elt) != INTEGER_CST)
18899 return NULL_TREE;
18900 if (rcode == LSHIFTRT)
18901 elt = fold_convert (type, elt);
18902 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
18903 {
18904 elt = VECTOR_CST_ELT (args[n_args - 2], i);
18905 if (TREE_CODE (elt) != INTEGER_CST
18906 || TREE_OVERFLOW (elt))
18907 return NULL_TREE;
18908 }
18909 builder.quick_push (obj: elt);
18910 }
18911 return builder.build ();
18912 }
18913 break;
18914
18915 default:
18916 break;
18917 }
18918 }
18919
18920#ifdef SUBTARGET_FOLD_BUILTIN
18921 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
18922#endif
18923
18924 return NULL_TREE;
18925}
18926
18927/* Fold a MD builtin (use ix86_fold_builtin for folding into
18928 constant) in GIMPLE. */
18929
18930bool
18931ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
18932{
18933 gimple *stmt = gsi_stmt (i: *gsi), *g;
18934 gimple_seq stmts = NULL;
18935 tree fndecl = gimple_call_fndecl (gs: stmt);
18936 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
18937 int n_args = gimple_call_num_args (gs: stmt);
18938 enum ix86_builtins fn_code
18939 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (decl: fndecl);
18940 tree decl = NULL_TREE;
18941 tree arg0, arg1, arg2;
18942 enum rtx_code rcode;
18943 enum tree_code tcode;
18944 unsigned HOST_WIDE_INT count;
18945 bool is_vshift;
18946 unsigned HOST_WIDE_INT elems;
18947 location_t loc;
18948
18949 /* Don't fold when there's isa mismatch. */
18950 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
18951 return false;
18952
18953 switch (fn_code)
18954 {
18955 case IX86_BUILTIN_TZCNT32:
18956 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZ);
18957 goto fold_tzcnt_lzcnt;
18958
18959 case IX86_BUILTIN_TZCNT64:
18960 decl = builtin_decl_implicit (fncode: BUILT_IN_CTZLL);
18961 goto fold_tzcnt_lzcnt;
18962
18963 case IX86_BUILTIN_LZCNT32:
18964 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZ);
18965 goto fold_tzcnt_lzcnt;
18966
18967 case IX86_BUILTIN_LZCNT64:
18968 decl = builtin_decl_implicit (fncode: BUILT_IN_CLZLL);
18969 goto fold_tzcnt_lzcnt;
18970
18971 fold_tzcnt_lzcnt:
18972 gcc_assert (n_args == 1);
18973 arg0 = gimple_call_arg (gs: stmt, index: 0);
18974 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (gs: stmt))
18975 {
18976 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
18977 /* If arg0 is provably non-zero, optimize into generic
18978 __builtin_c[tl]z{,ll} function the middle-end handles
18979 better. */
18980 if (!expr_not_equal_to (t: arg0, wi::zero (precision: prec)))
18981 return false;
18982
18983 loc = gimple_location (g: stmt);
18984 g = gimple_build_call (decl, 1, arg0);
18985 gimple_set_location (g, location: loc);
18986 tree lhs = make_ssa_name (integer_type_node);
18987 gimple_call_set_lhs (gs: g, lhs);
18988 gsi_insert_before (gsi, g, GSI_SAME_STMT);
18989 g = gimple_build_assign (gimple_call_lhs (gs: stmt), NOP_EXPR, lhs);
18990 gimple_set_location (g, location: loc);
18991 gsi_replace (gsi, g, false);
18992 return true;
18993 }
18994 break;
18995
18996 case IX86_BUILTIN_BZHI32:
18997 case IX86_BUILTIN_BZHI64:
18998 gcc_assert (n_args == 2);
18999 arg1 = gimple_call_arg (gs: stmt, index: 1);
19000 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (gs: stmt))
19001 {
19002 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19003 arg0 = gimple_call_arg (gs: stmt, index: 0);
19004 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19005 break;
19006 loc = gimple_location (g: stmt);
19007 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19008 gimple_set_location (g, location: loc);
19009 gsi_replace (gsi, g, false);
19010 return true;
19011 }
19012 break;
19013
19014 case IX86_BUILTIN_PDEP32:
19015 case IX86_BUILTIN_PDEP64:
19016 case IX86_BUILTIN_PEXT32:
19017 case IX86_BUILTIN_PEXT64:
19018 gcc_assert (n_args == 2);
19019 arg1 = gimple_call_arg (gs: stmt, index: 1);
19020 if (integer_all_onesp (arg1) && gimple_call_lhs (gs: stmt))
19021 {
19022 loc = gimple_location (g: stmt);
19023 arg0 = gimple_call_arg (gs: stmt, index: 0);
19024 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19025 gimple_set_location (g, location: loc);
19026 gsi_replace (gsi, g, false);
19027 return true;
19028 }
19029 break;
19030
19031 case IX86_BUILTIN_PBLENDVB256:
19032 case IX86_BUILTIN_BLENDVPS256:
19033 case IX86_BUILTIN_BLENDVPD256:
19034 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19035 to scalar operations and not combined back. */
19036 if (!TARGET_AVX2)
19037 break;
19038
19039 /* FALLTHRU. */
19040 case IX86_BUILTIN_BLENDVPD:
19041 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19042 w/o sse4.2, it's veclowered to scalar operations and
19043 not combined back. */
19044 if (!TARGET_SSE4_2)
19045 break;
19046 /* FALLTHRU. */
19047 case IX86_BUILTIN_PBLENDVB128:
19048 case IX86_BUILTIN_BLENDVPS:
19049 gcc_assert (n_args == 3);
19050 arg0 = gimple_call_arg (gs: stmt, index: 0);
19051 arg1 = gimple_call_arg (gs: stmt, index: 1);
19052 arg2 = gimple_call_arg (gs: stmt, index: 2);
19053 if (gimple_call_lhs (gs: stmt))
19054 {
19055 loc = gimple_location (g: stmt);
19056 tree type = TREE_TYPE (arg2);
19057 if (VECTOR_FLOAT_TYPE_P (type))
19058 {
19059 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19060 ? intSI_type_node : intDI_type_node;
19061 type = get_same_sized_vectype (itype, type);
19062 }
19063 else
19064 type = signed_type_for (type);
19065 arg2 = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type, ops: arg2);
19066 tree zero_vec = build_zero_cst (type);
19067 tree cmp_type = truth_type_for (type);
19068 tree cmp = gimple_build (seq: &stmts, code: LT_EXPR, type: cmp_type, ops: arg2, ops: zero_vec);
19069 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19070 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19071 VEC_COND_EXPR, cmp,
19072 arg1, arg0);
19073 gimple_set_location (g, location: loc);
19074 gsi_replace (gsi, g, false);
19075 }
19076 else
19077 gsi_replace (gsi, gimple_build_nop (), false);
19078 return true;
19079
19080
19081 case IX86_BUILTIN_PCMPEQB128:
19082 case IX86_BUILTIN_PCMPEQW128:
19083 case IX86_BUILTIN_PCMPEQD128:
19084 case IX86_BUILTIN_PCMPEQQ:
19085 case IX86_BUILTIN_PCMPEQB256:
19086 case IX86_BUILTIN_PCMPEQW256:
19087 case IX86_BUILTIN_PCMPEQD256:
19088 case IX86_BUILTIN_PCMPEQQ256:
19089 tcode = EQ_EXPR;
19090 goto do_cmp;
19091
19092 case IX86_BUILTIN_PCMPGTB128:
19093 case IX86_BUILTIN_PCMPGTW128:
19094 case IX86_BUILTIN_PCMPGTD128:
19095 case IX86_BUILTIN_PCMPGTQ:
19096 case IX86_BUILTIN_PCMPGTB256:
19097 case IX86_BUILTIN_PCMPGTW256:
19098 case IX86_BUILTIN_PCMPGTD256:
19099 case IX86_BUILTIN_PCMPGTQ256:
19100 tcode = GT_EXPR;
19101
19102 do_cmp:
19103 gcc_assert (n_args == 2);
19104 arg0 = gimple_call_arg (gs: stmt, index: 0);
19105 arg1 = gimple_call_arg (gs: stmt, index: 1);
19106 if (gimple_call_lhs (gs: stmt))
19107 {
19108 loc = gimple_location (g: stmt);
19109 tree type = TREE_TYPE (arg0);
19110 tree zero_vec = build_zero_cst (type);
19111 tree minus_one_vec = build_minus_one_cst (type);
19112 tree cmp_type = truth_type_for (type);
19113 tree cmp = gimple_build (seq: &stmts, code: tcode, type: cmp_type, ops: arg0, ops: arg1);
19114 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19115 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19116 VEC_COND_EXPR, cmp,
19117 minus_one_vec, zero_vec);
19118 gimple_set_location (g, location: loc);
19119 gsi_replace (gsi, g, false);
19120 }
19121 else
19122 gsi_replace (gsi, gimple_build_nop (), false);
19123 return true;
19124
19125 case IX86_BUILTIN_PSLLD:
19126 case IX86_BUILTIN_PSLLD128:
19127 case IX86_BUILTIN_PSLLD128_MASK:
19128 case IX86_BUILTIN_PSLLD256:
19129 case IX86_BUILTIN_PSLLD256_MASK:
19130 case IX86_BUILTIN_PSLLD512:
19131 case IX86_BUILTIN_PSLLDI:
19132 case IX86_BUILTIN_PSLLDI128:
19133 case IX86_BUILTIN_PSLLDI128_MASK:
19134 case IX86_BUILTIN_PSLLDI256:
19135 case IX86_BUILTIN_PSLLDI256_MASK:
19136 case IX86_BUILTIN_PSLLDI512:
19137 case IX86_BUILTIN_PSLLQ:
19138 case IX86_BUILTIN_PSLLQ128:
19139 case IX86_BUILTIN_PSLLQ128_MASK:
19140 case IX86_BUILTIN_PSLLQ256:
19141 case IX86_BUILTIN_PSLLQ256_MASK:
19142 case IX86_BUILTIN_PSLLQ512:
19143 case IX86_BUILTIN_PSLLQI:
19144 case IX86_BUILTIN_PSLLQI128:
19145 case IX86_BUILTIN_PSLLQI128_MASK:
19146 case IX86_BUILTIN_PSLLQI256:
19147 case IX86_BUILTIN_PSLLQI256_MASK:
19148 case IX86_BUILTIN_PSLLQI512:
19149 case IX86_BUILTIN_PSLLW:
19150 case IX86_BUILTIN_PSLLW128:
19151 case IX86_BUILTIN_PSLLW128_MASK:
19152 case IX86_BUILTIN_PSLLW256:
19153 case IX86_BUILTIN_PSLLW256_MASK:
19154 case IX86_BUILTIN_PSLLW512_MASK:
19155 case IX86_BUILTIN_PSLLWI:
19156 case IX86_BUILTIN_PSLLWI128:
19157 case IX86_BUILTIN_PSLLWI128_MASK:
19158 case IX86_BUILTIN_PSLLWI256:
19159 case IX86_BUILTIN_PSLLWI256_MASK:
19160 case IX86_BUILTIN_PSLLWI512_MASK:
19161 rcode = ASHIFT;
19162 is_vshift = false;
19163 goto do_shift;
19164 case IX86_BUILTIN_PSRAD:
19165 case IX86_BUILTIN_PSRAD128:
19166 case IX86_BUILTIN_PSRAD128_MASK:
19167 case IX86_BUILTIN_PSRAD256:
19168 case IX86_BUILTIN_PSRAD256_MASK:
19169 case IX86_BUILTIN_PSRAD512:
19170 case IX86_BUILTIN_PSRADI:
19171 case IX86_BUILTIN_PSRADI128:
19172 case IX86_BUILTIN_PSRADI128_MASK:
19173 case IX86_BUILTIN_PSRADI256:
19174 case IX86_BUILTIN_PSRADI256_MASK:
19175 case IX86_BUILTIN_PSRADI512:
19176 case IX86_BUILTIN_PSRAQ128_MASK:
19177 case IX86_BUILTIN_PSRAQ256_MASK:
19178 case IX86_BUILTIN_PSRAQ512:
19179 case IX86_BUILTIN_PSRAQI128_MASK:
19180 case IX86_BUILTIN_PSRAQI256_MASK:
19181 case IX86_BUILTIN_PSRAQI512:
19182 case IX86_BUILTIN_PSRAW:
19183 case IX86_BUILTIN_PSRAW128:
19184 case IX86_BUILTIN_PSRAW128_MASK:
19185 case IX86_BUILTIN_PSRAW256:
19186 case IX86_BUILTIN_PSRAW256_MASK:
19187 case IX86_BUILTIN_PSRAW512:
19188 case IX86_BUILTIN_PSRAWI:
19189 case IX86_BUILTIN_PSRAWI128:
19190 case IX86_BUILTIN_PSRAWI128_MASK:
19191 case IX86_BUILTIN_PSRAWI256:
19192 case IX86_BUILTIN_PSRAWI256_MASK:
19193 case IX86_BUILTIN_PSRAWI512:
19194 rcode = ASHIFTRT;
19195 is_vshift = false;
19196 goto do_shift;
19197 case IX86_BUILTIN_PSRLD:
19198 case IX86_BUILTIN_PSRLD128:
19199 case IX86_BUILTIN_PSRLD128_MASK:
19200 case IX86_BUILTIN_PSRLD256:
19201 case IX86_BUILTIN_PSRLD256_MASK:
19202 case IX86_BUILTIN_PSRLD512:
19203 case IX86_BUILTIN_PSRLDI:
19204 case IX86_BUILTIN_PSRLDI128:
19205 case IX86_BUILTIN_PSRLDI128_MASK:
19206 case IX86_BUILTIN_PSRLDI256:
19207 case IX86_BUILTIN_PSRLDI256_MASK:
19208 case IX86_BUILTIN_PSRLDI512:
19209 case IX86_BUILTIN_PSRLQ:
19210 case IX86_BUILTIN_PSRLQ128:
19211 case IX86_BUILTIN_PSRLQ128_MASK:
19212 case IX86_BUILTIN_PSRLQ256:
19213 case IX86_BUILTIN_PSRLQ256_MASK:
19214 case IX86_BUILTIN_PSRLQ512:
19215 case IX86_BUILTIN_PSRLQI:
19216 case IX86_BUILTIN_PSRLQI128:
19217 case IX86_BUILTIN_PSRLQI128_MASK:
19218 case IX86_BUILTIN_PSRLQI256:
19219 case IX86_BUILTIN_PSRLQI256_MASK:
19220 case IX86_BUILTIN_PSRLQI512:
19221 case IX86_BUILTIN_PSRLW:
19222 case IX86_BUILTIN_PSRLW128:
19223 case IX86_BUILTIN_PSRLW128_MASK:
19224 case IX86_BUILTIN_PSRLW256:
19225 case IX86_BUILTIN_PSRLW256_MASK:
19226 case IX86_BUILTIN_PSRLW512:
19227 case IX86_BUILTIN_PSRLWI:
19228 case IX86_BUILTIN_PSRLWI128:
19229 case IX86_BUILTIN_PSRLWI128_MASK:
19230 case IX86_BUILTIN_PSRLWI256:
19231 case IX86_BUILTIN_PSRLWI256_MASK:
19232 case IX86_BUILTIN_PSRLWI512:
19233 rcode = LSHIFTRT;
19234 is_vshift = false;
19235 goto do_shift;
19236 case IX86_BUILTIN_PSLLVV16HI:
19237 case IX86_BUILTIN_PSLLVV16SI:
19238 case IX86_BUILTIN_PSLLVV2DI:
19239 case IX86_BUILTIN_PSLLVV2DI_MASK:
19240 case IX86_BUILTIN_PSLLVV32HI:
19241 case IX86_BUILTIN_PSLLVV4DI:
19242 case IX86_BUILTIN_PSLLVV4DI_MASK:
19243 case IX86_BUILTIN_PSLLVV4SI:
19244 case IX86_BUILTIN_PSLLVV4SI_MASK:
19245 case IX86_BUILTIN_PSLLVV8DI:
19246 case IX86_BUILTIN_PSLLVV8HI:
19247 case IX86_BUILTIN_PSLLVV8SI:
19248 case IX86_BUILTIN_PSLLVV8SI_MASK:
19249 rcode = ASHIFT;
19250 is_vshift = true;
19251 goto do_shift;
19252 case IX86_BUILTIN_PSRAVQ128:
19253 case IX86_BUILTIN_PSRAVQ256:
19254 case IX86_BUILTIN_PSRAVV16HI:
19255 case IX86_BUILTIN_PSRAVV16SI:
19256 case IX86_BUILTIN_PSRAVV32HI:
19257 case IX86_BUILTIN_PSRAVV4SI:
19258 case IX86_BUILTIN_PSRAVV4SI_MASK:
19259 case IX86_BUILTIN_PSRAVV8DI:
19260 case IX86_BUILTIN_PSRAVV8HI:
19261 case IX86_BUILTIN_PSRAVV8SI:
19262 case IX86_BUILTIN_PSRAVV8SI_MASK:
19263 rcode = ASHIFTRT;
19264 is_vshift = true;
19265 goto do_shift;
19266 case IX86_BUILTIN_PSRLVV16HI:
19267 case IX86_BUILTIN_PSRLVV16SI:
19268 case IX86_BUILTIN_PSRLVV2DI:
19269 case IX86_BUILTIN_PSRLVV2DI_MASK:
19270 case IX86_BUILTIN_PSRLVV32HI:
19271 case IX86_BUILTIN_PSRLVV4DI:
19272 case IX86_BUILTIN_PSRLVV4DI_MASK:
19273 case IX86_BUILTIN_PSRLVV4SI:
19274 case IX86_BUILTIN_PSRLVV4SI_MASK:
19275 case IX86_BUILTIN_PSRLVV8DI:
19276 case IX86_BUILTIN_PSRLVV8HI:
19277 case IX86_BUILTIN_PSRLVV8SI:
19278 case IX86_BUILTIN_PSRLVV8SI_MASK:
19279 rcode = LSHIFTRT;
19280 is_vshift = true;
19281 goto do_shift;
19282
19283 do_shift:
19284 gcc_assert (n_args >= 2);
19285 if (!gimple_call_lhs (gs: stmt))
19286 break;
19287 arg0 = gimple_call_arg (gs: stmt, index: 0);
19288 arg1 = gimple_call_arg (gs: stmt, index: 1);
19289 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19290 /* For masked shift, only optimize if the mask is all ones. */
19291 if (n_args > 2
19292 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19293 break;
19294 if (is_vshift)
19295 {
19296 if (TREE_CODE (arg1) != VECTOR_CST)
19297 break;
19298 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
19299 if (integer_zerop (arg1))
19300 count = 0;
19301 else if (rcode == ASHIFTRT)
19302 break;
19303 else
19304 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
19305 {
19306 tree elt = VECTOR_CST_ELT (arg1, i);
19307 if (!wi::neg_p (x: wi::to_wide (t: elt))
19308 && wi::to_widest (t: elt) < count)
19309 return false;
19310 }
19311 }
19312 else
19313 {
19314 arg1 = ix86_vector_shift_count (arg1);
19315 if (!arg1)
19316 break;
19317 count = tree_to_uhwi (arg1);
19318 }
19319 if (count == 0)
19320 {
19321 /* Just return the first argument for shift by 0. */
19322 loc = gimple_location (g: stmt);
19323 g = gimple_build_assign (gimple_call_lhs (gs: stmt), arg0);
19324 gimple_set_location (g, location: loc);
19325 gsi_replace (gsi, g, false);
19326 return true;
19327 }
19328 if (rcode != ASHIFTRT
19329 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
19330 {
19331 /* For shift counts equal or greater than precision, except for
19332 arithmetic right shift the result is zero. */
19333 loc = gimple_location (g: stmt);
19334 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19335 build_zero_cst (TREE_TYPE (arg0)));
19336 gimple_set_location (g, location: loc);
19337 gsi_replace (gsi, g, false);
19338 return true;
19339 }
19340 break;
19341
19342 case IX86_BUILTIN_SHUFPD512:
19343 case IX86_BUILTIN_SHUFPS512:
19344 case IX86_BUILTIN_SHUFPD:
19345 case IX86_BUILTIN_SHUFPD256:
19346 case IX86_BUILTIN_SHUFPS:
19347 case IX86_BUILTIN_SHUFPS256:
19348 arg0 = gimple_call_arg (gs: stmt, index: 0);
19349 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19350 /* This is masked shuffle. Only optimize if the mask is all ones. */
19351 if (n_args > 3
19352 && !ix86_masked_all_ones (elems,
19353 arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19354 break;
19355 arg2 = gimple_call_arg (gs: stmt, index: 2);
19356 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (gs: stmt))
19357 {
19358 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
19359 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
19360 if (shuffle_mask > 255)
19361 return false;
19362
19363 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
19364 loc = gimple_location (g: stmt);
19365 tree itype = (imode == E_DFmode
19366 ? long_long_integer_type_node : integer_type_node);
19367 tree vtype = build_vector_type (itype, elems);
19368 tree_vector_builder elts (vtype, elems, 1);
19369
19370
19371 /* Transform integer shuffle_mask to vector perm_mask which
19372 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
19373 for (unsigned i = 0; i != elems; i++)
19374 {
19375 unsigned sel_idx;
19376 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
19377 provide 2 select constrols for each element of the
19378 destination. */
19379 if (imode == E_DFmode)
19380 sel_idx = (i & 1) * elems + (i & ~1)
19381 + ((shuffle_mask >> i) & 1);
19382 else
19383 {
19384 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
19385 controls for each element of the destination. */
19386 unsigned j = i % 4;
19387 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
19388 + ((shuffle_mask >> 2 * j) & 3);
19389 }
19390 elts.quick_push (obj: build_int_cst (itype, sel_idx));
19391 }
19392
19393 tree perm_mask = elts.build ();
19394 arg1 = gimple_call_arg (gs: stmt, index: 1);
19395 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19396 VEC_PERM_EXPR,
19397 arg0, arg1, perm_mask);
19398 gimple_set_location (g, location: loc);
19399 gsi_replace (gsi, g, false);
19400 return true;
19401 }
19402 // Do not error yet, the constant could be propagated later?
19403 break;
19404
19405 case IX86_BUILTIN_PABSB:
19406 case IX86_BUILTIN_PABSW:
19407 case IX86_BUILTIN_PABSD:
19408 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
19409 if (!TARGET_MMX_WITH_SSE)
19410 break;
19411 /* FALLTHRU. */
19412 case IX86_BUILTIN_PABSB128:
19413 case IX86_BUILTIN_PABSB256:
19414 case IX86_BUILTIN_PABSB512:
19415 case IX86_BUILTIN_PABSW128:
19416 case IX86_BUILTIN_PABSW256:
19417 case IX86_BUILTIN_PABSW512:
19418 case IX86_BUILTIN_PABSD128:
19419 case IX86_BUILTIN_PABSD256:
19420 case IX86_BUILTIN_PABSD512:
19421 case IX86_BUILTIN_PABSQ128:
19422 case IX86_BUILTIN_PABSQ256:
19423 case IX86_BUILTIN_PABSQ512:
19424 case IX86_BUILTIN_PABSB128_MASK:
19425 case IX86_BUILTIN_PABSB256_MASK:
19426 case IX86_BUILTIN_PABSW128_MASK:
19427 case IX86_BUILTIN_PABSW256_MASK:
19428 case IX86_BUILTIN_PABSD128_MASK:
19429 case IX86_BUILTIN_PABSD256_MASK:
19430 gcc_assert (n_args >= 1);
19431 if (!gimple_call_lhs (gs: stmt))
19432 break;
19433 arg0 = gimple_call_arg (gs: stmt, index: 0);
19434 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19435 /* For masked ABS, only optimize if the mask is all ones. */
19436 if (n_args > 1
19437 && !ix86_masked_all_ones (elems, arg_mask: gimple_call_arg (gs: stmt, index: n_args - 1)))
19438 break;
19439 {
19440 tree utype, ures, vce;
19441 utype = unsigned_type_for (TREE_TYPE (arg0));
19442 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
19443 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
19444 ures = gimple_build (seq: &stmts, code: ABSU_EXPR, type: utype, ops: arg0);
19445 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19446 loc = gimple_location (g: stmt);
19447 vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
19448 g = gimple_build_assign (gimple_call_lhs (gs: stmt),
19449 VIEW_CONVERT_EXPR, vce);
19450 gsi_replace (gsi, g, false);
19451 }
19452 return true;
19453
19454 default:
19455 break;
19456 }
19457
19458 return false;
19459}
19460
19461/* Handler for an SVML-style interface to
19462 a library with vectorized intrinsics. */
19463
19464tree
19465ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
19466{
19467 char name[20];
19468 tree fntype, new_fndecl, args;
19469 unsigned arity;
19470 const char *bname;
19471 machine_mode el_mode, in_mode;
19472 int n, in_n;
19473
19474 /* The SVML is suitable for unsafe math only. */
19475 if (!flag_unsafe_math_optimizations)
19476 return NULL_TREE;
19477
19478 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19479 n = TYPE_VECTOR_SUBPARTS (node: type_out);
19480 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19481 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
19482 if (el_mode != in_mode
19483 || n != in_n)
19484 return NULL_TREE;
19485
19486 switch (fn)
19487 {
19488 CASE_CFN_EXP:
19489 CASE_CFN_LOG:
19490 CASE_CFN_LOG10:
19491 CASE_CFN_POW:
19492 CASE_CFN_TANH:
19493 CASE_CFN_TAN:
19494 CASE_CFN_ATAN:
19495 CASE_CFN_ATAN2:
19496 CASE_CFN_ATANH:
19497 CASE_CFN_CBRT:
19498 CASE_CFN_SINH:
19499 CASE_CFN_SIN:
19500 CASE_CFN_ASINH:
19501 CASE_CFN_ASIN:
19502 CASE_CFN_COSH:
19503 CASE_CFN_COS:
19504 CASE_CFN_ACOSH:
19505 CASE_CFN_ACOS:
19506 if ((el_mode != DFmode || n != 2)
19507 && (el_mode != SFmode || n != 4))
19508 return NULL_TREE;
19509 break;
19510
19511 default:
19512 return NULL_TREE;
19513 }
19514
19515 tree fndecl = mathfn_built_in (el_mode == DFmode
19516 ? double_type_node : float_type_node, fn);
19517 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19518
19519 if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOGF)
19520 strcpy (dest: name, src: "vmlsLn4");
19521 else if (DECL_FUNCTION_CODE (decl: fndecl) == BUILT_IN_LOG)
19522 strcpy (dest: name, src: "vmldLn2");
19523 else if (n == 4)
19524 {
19525 sprintf (s: name, format: "vmls%s", bname+10);
19526 name[strlen (s: name)-1] = '4';
19527 }
19528 else
19529 sprintf (s: name, format: "vmld%s2", bname+10);
19530
19531 /* Convert to uppercase. */
19532 name[4] &= ~0x20;
19533
19534 arity = 0;
19535 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19536 arity++;
19537
19538 if (arity == 1)
19539 fntype = build_function_type_list (type_out, type_in, NULL);
19540 else
19541 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19542
19543 /* Build a function declaration for the vectorized function. */
19544 new_fndecl = build_decl (BUILTINS_LOCATION,
19545 FUNCTION_DECL, get_identifier (name), fntype);
19546 TREE_PUBLIC (new_fndecl) = 1;
19547 DECL_EXTERNAL (new_fndecl) = 1;
19548 DECL_IS_NOVOPS (new_fndecl) = 1;
19549 TREE_READONLY (new_fndecl) = 1;
19550
19551 return new_fndecl;
19552}
19553
19554/* Handler for an ACML-style interface to
19555 a library with vectorized intrinsics. */
19556
19557tree
19558ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
19559{
19560 char name[20] = "__vr.._";
19561 tree fntype, new_fndecl, args;
19562 unsigned arity;
19563 const char *bname;
19564 machine_mode el_mode, in_mode;
19565 int n, in_n;
19566
19567 /* The ACML is 64bits only and suitable for unsafe math only as
19568 it does not correctly support parts of IEEE with the required
19569 precision such as denormals. */
19570 if (!TARGET_64BIT
19571 || !flag_unsafe_math_optimizations)
19572 return NULL_TREE;
19573
19574 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19575 n = TYPE_VECTOR_SUBPARTS (node: type_out);
19576 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19577 in_n = TYPE_VECTOR_SUBPARTS (node: type_in);
19578 if (el_mode != in_mode
19579 || n != in_n)
19580 return NULL_TREE;
19581
19582 switch (fn)
19583 {
19584 CASE_CFN_SIN:
19585 CASE_CFN_COS:
19586 CASE_CFN_EXP:
19587 CASE_CFN_LOG:
19588 CASE_CFN_LOG2:
19589 CASE_CFN_LOG10:
19590 if (el_mode == DFmode && n == 2)
19591 {
19592 name[4] = 'd';
19593 name[5] = '2';
19594 }
19595 else if (el_mode == SFmode && n == 4)
19596 {
19597 name[4] = 's';
19598 name[5] = '4';
19599 }
19600 else
19601 return NULL_TREE;
19602 break;
19603
19604 default:
19605 return NULL_TREE;
19606 }
19607
19608 tree fndecl = mathfn_built_in (el_mode == DFmode
19609 ? double_type_node : float_type_node, fn);
19610 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19611 sprintf (s: name + 7, format: "%s", bname+10);
19612
19613 arity = 0;
19614 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19615 arity++;
19616
19617 if (arity == 1)
19618 fntype = build_function_type_list (type_out, type_in, NULL);
19619 else
19620 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19621
19622 /* Build a function declaration for the vectorized function. */
19623 new_fndecl = build_decl (BUILTINS_LOCATION,
19624 FUNCTION_DECL, get_identifier (name), fntype);
19625 TREE_PUBLIC (new_fndecl) = 1;
19626 DECL_EXTERNAL (new_fndecl) = 1;
19627 DECL_IS_NOVOPS (new_fndecl) = 1;
19628 TREE_READONLY (new_fndecl) = 1;
19629
19630 return new_fndecl;
19631}
19632
19633/* Returns a decl of a function that implements scatter store with
19634 register type VECTYPE and index type INDEX_TYPE and SCALE.
19635 Return NULL_TREE if it is not available. */
19636
19637static tree
19638ix86_vectorize_builtin_scatter (const_tree vectype,
19639 const_tree index_type, int scale)
19640{
19641 bool si;
19642 enum ix86_builtins code;
19643 const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
19644
19645 if (!TARGET_AVX512F)
19646 return NULL_TREE;
19647
19648 if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
19649 return NULL_TREE;
19650
19651 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
19652 ? !TARGET_USE_SCATTER_2PARTS
19653 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
19654 ? !TARGET_USE_SCATTER_4PARTS
19655 : !TARGET_USE_SCATTER_8PARTS))
19656 return NULL_TREE;
19657
19658 if ((TREE_CODE (index_type) != INTEGER_TYPE
19659 && !POINTER_TYPE_P (index_type))
19660 || (TYPE_MODE (index_type) != SImode
19661 && TYPE_MODE (index_type) != DImode))
19662 return NULL_TREE;
19663
19664 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
19665 return NULL_TREE;
19666
19667 /* v*scatter* insn sign extends index to pointer mode. */
19668 if (TYPE_PRECISION (index_type) < POINTER_SIZE
19669 && TYPE_UNSIGNED (index_type))
19670 return NULL_TREE;
19671
19672 /* Scale can be 1, 2, 4 or 8. */
19673 if (scale <= 0
19674 || scale > 8
19675 || (scale & (scale - 1)) != 0)
19676 return NULL_TREE;
19677
19678 si = TYPE_MODE (index_type) == SImode;
19679 switch (TYPE_MODE (vectype))
19680 {
19681 case E_V8DFmode:
19682 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
19683 break;
19684 case E_V8DImode:
19685 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
19686 break;
19687 case E_V16SFmode:
19688 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
19689 break;
19690 case E_V16SImode:
19691 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
19692 break;
19693 case E_V4DFmode:
19694 if (TARGET_AVX512VL)
19695 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
19696 else
19697 return NULL_TREE;
19698 break;
19699 case E_V4DImode:
19700 if (TARGET_AVX512VL)
19701 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
19702 else
19703 return NULL_TREE;
19704 break;
19705 case E_V8SFmode:
19706 if (TARGET_AVX512VL)
19707 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
19708 else
19709 return NULL_TREE;
19710 break;
19711 case E_V8SImode:
19712 if (TARGET_AVX512VL)
19713 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
19714 else
19715 return NULL_TREE;
19716 break;
19717 case E_V2DFmode:
19718 if (TARGET_AVX512VL)
19719 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
19720 else
19721 return NULL_TREE;
19722 break;
19723 case E_V2DImode:
19724 if (TARGET_AVX512VL)
19725 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
19726 else
19727 return NULL_TREE;
19728 break;
19729 case E_V4SFmode:
19730 if (TARGET_AVX512VL)
19731 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
19732 else
19733 return NULL_TREE;
19734 break;
19735 case E_V4SImode:
19736 if (TARGET_AVX512VL)
19737 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
19738 else
19739 return NULL_TREE;
19740 break;
19741 default:
19742 return NULL_TREE;
19743 }
19744
19745 return get_ix86_builtin (c: code);
19746}
19747
19748/* Return true if it is safe to use the rsqrt optabs to optimize
19749 1.0/sqrt. */
19750
19751static bool
19752use_rsqrt_p (machine_mode mode)
19753{
19754 return ((mode == HFmode
19755 || (TARGET_SSE && TARGET_SSE_MATH))
19756 && flag_finite_math_only
19757 && !flag_trapping_math
19758 && flag_unsafe_math_optimizations);
19759}
19760
19761/* Helper for avx_vpermilps256_operand et al. This is also used by
19762 the expansion functions to turn the parallel back into a mask.
19763 The return value is 0 for no match and the imm8+1 for a match. */
19764
19765int
19766avx_vpermilp_parallel (rtx par, machine_mode mode)
19767{
19768 unsigned i, nelt = GET_MODE_NUNITS (mode);
19769 unsigned mask = 0;
19770 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
19771
19772 if (XVECLEN (par, 0) != (int) nelt)
19773 return 0;
19774
19775 /* Validate that all of the elements are constants, and not totally
19776 out of range. Copy the data into an integral array to make the
19777 subsequent checks easier. */
19778 for (i = 0; i < nelt; ++i)
19779 {
19780 rtx er = XVECEXP (par, 0, i);
19781 unsigned HOST_WIDE_INT ei;
19782
19783 if (!CONST_INT_P (er))
19784 return 0;
19785 ei = INTVAL (er);
19786 if (ei >= nelt)
19787 return 0;
19788 ipar[i] = ei;
19789 }
19790
19791 switch (mode)
19792 {
19793 case E_V8DFmode:
19794 /* In the 512-bit DFmode case, we can only move elements within
19795 a 128-bit lane. First fill the second part of the mask,
19796 then fallthru. */
19797 for (i = 4; i < 6; ++i)
19798 {
19799 if (ipar[i] < 4 || ipar[i] >= 6)
19800 return 0;
19801 mask |= (ipar[i] - 4) << i;
19802 }
19803 for (i = 6; i < 8; ++i)
19804 {
19805 if (ipar[i] < 6)
19806 return 0;
19807 mask |= (ipar[i] - 6) << i;
19808 }
19809 /* FALLTHRU */
19810
19811 case E_V4DFmode:
19812 /* In the 256-bit DFmode case, we can only move elements within
19813 a 128-bit lane. */
19814 for (i = 0; i < 2; ++i)
19815 {
19816 if (ipar[i] >= 2)
19817 return 0;
19818 mask |= ipar[i] << i;
19819 }
19820 for (i = 2; i < 4; ++i)
19821 {
19822 if (ipar[i] < 2)
19823 return 0;
19824 mask |= (ipar[i] - 2) << i;
19825 }
19826 break;
19827
19828 case E_V16SFmode:
19829 /* In 512 bit SFmode case, permutation in the upper 256 bits
19830 must mirror the permutation in the lower 256-bits. */
19831 for (i = 0; i < 8; ++i)
19832 if (ipar[i] + 8 != ipar[i + 8])
19833 return 0;
19834 /* FALLTHRU */
19835
19836 case E_V8SFmode:
19837 /* In 256 bit SFmode case, we have full freedom of
19838 movement within the low 128-bit lane, but the high 128-bit
19839 lane must mirror the exact same pattern. */
19840 for (i = 0; i < 4; ++i)
19841 if (ipar[i] + 4 != ipar[i + 4])
19842 return 0;
19843 nelt = 4;
19844 /* FALLTHRU */
19845
19846 case E_V2DFmode:
19847 case E_V4SFmode:
19848 /* In the 128-bit case, we've full freedom in the placement of
19849 the elements from the source operand. */
19850 for (i = 0; i < nelt; ++i)
19851 mask |= ipar[i] << (i * (nelt / 2));
19852 break;
19853
19854 default:
19855 gcc_unreachable ();
19856 }
19857
19858 /* Make sure success has a non-zero value by adding one. */
19859 return mask + 1;
19860}
19861
19862/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
19863 the expansion functions to turn the parallel back into a mask.
19864 The return value is 0 for no match and the imm8+1 for a match. */
19865
19866int
19867avx_vperm2f128_parallel (rtx par, machine_mode mode)
19868{
19869 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
19870 unsigned mask = 0;
19871 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
19872
19873 if (XVECLEN (par, 0) != (int) nelt)
19874 return 0;
19875
19876 /* Validate that all of the elements are constants, and not totally
19877 out of range. Copy the data into an integral array to make the
19878 subsequent checks easier. */
19879 for (i = 0; i < nelt; ++i)
19880 {
19881 rtx er = XVECEXP (par, 0, i);
19882 unsigned HOST_WIDE_INT ei;
19883
19884 if (!CONST_INT_P (er))
19885 return 0;
19886 ei = INTVAL (er);
19887 if (ei >= 2 * nelt)
19888 return 0;
19889 ipar[i] = ei;
19890 }
19891
19892 /* Validate that the halves of the permute are halves. */
19893 for (i = 0; i < nelt2 - 1; ++i)
19894 if (ipar[i] + 1 != ipar[i + 1])
19895 return 0;
19896 for (i = nelt2; i < nelt - 1; ++i)
19897 if (ipar[i] + 1 != ipar[i + 1])
19898 return 0;
19899
19900 /* Reconstruct the mask. */
19901 for (i = 0; i < 2; ++i)
19902 {
19903 unsigned e = ipar[i * nelt2];
19904 if (e % nelt2)
19905 return 0;
19906 e /= nelt2;
19907 mask |= e << (i * 4);
19908 }
19909
19910 /* Make sure success has a non-zero value by adding one. */
19911 return mask + 1;
19912}
19913
19914/* Return a mask of VPTERNLOG operands that do not affect output. */
19915
19916int
19917vpternlog_redundant_operand_mask (rtx pternlog_imm)
19918{
19919 int mask = 0;
19920 int imm8 = INTVAL (pternlog_imm);
19921
19922 if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
19923 mask |= 1;
19924 if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
19925 mask |= 2;
19926 if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
19927 mask |= 4;
19928
19929 return mask;
19930}
19931
19932/* Eliminate false dependencies on operands that do not affect output
19933 by substituting other operands of a VPTERNLOG. */
19934
19935void
19936substitute_vpternlog_operands (rtx *operands)
19937{
19938 int mask = vpternlog_redundant_operand_mask (pternlog_imm: operands[4]);
19939
19940 if (mask & 1) /* The first operand is redundant. */
19941 operands[1] = operands[2];
19942
19943 if (mask & 2) /* The second operand is redundant. */
19944 operands[2] = operands[1];
19945
19946 if (mask & 4) /* The third operand is redundant. */
19947 operands[3] = operands[1];
19948 else if (REG_P (operands[3]))
19949 {
19950 if (mask & 1)
19951 operands[1] = operands[3];
19952 if (mask & 2)
19953 operands[2] = operands[3];
19954 }
19955}
19956
19957/* Return a register priority for hard reg REGNO. */
19958static int
19959ix86_register_priority (int hard_regno)
19960{
19961 /* ebp and r13 as the base always wants a displacement, r12 as the
19962 base always wants an index. So discourage their usage in an
19963 address. */
19964 if (hard_regno == R12_REG || hard_regno == R13_REG)
19965 return 0;
19966 if (hard_regno == BP_REG)
19967 return 1;
19968 /* New x86-64 int registers result in bigger code size. Discourage them. */
19969 if (REX_INT_REGNO_P (hard_regno))
19970 return 2;
19971 if (REX2_INT_REGNO_P (hard_regno))
19972 return 2;
19973 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
19974 if (REX_SSE_REGNO_P (hard_regno))
19975 return 2;
19976 if (EXT_REX_SSE_REGNO_P (hard_regno))
19977 return 1;
19978 /* Usage of AX register results in smaller code. Prefer it. */
19979 if (hard_regno == AX_REG)
19980 return 4;
19981 return 3;
19982}
19983
19984/* Implement TARGET_PREFERRED_RELOAD_CLASS.
19985
19986 Put float CONST_DOUBLE in the constant pool instead of fp regs.
19987 QImode must go into class Q_REGS.
19988 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19989 movdf to do mem-to-mem moves through integer regs. */
19990
19991static reg_class_t
19992ix86_preferred_reload_class (rtx x, reg_class_t regclass)
19993{
19994 machine_mode mode = GET_MODE (x);
19995
19996 /* We're only allowed to return a subclass of CLASS. Many of the
19997 following checks fail for NO_REGS, so eliminate that early. */
19998 if (regclass == NO_REGS)
19999 return NO_REGS;
20000
20001 /* All classes can load zeros. */
20002 if (x == CONST0_RTX (mode))
20003 return regclass;
20004
20005 /* Force constants into memory if we are loading a (nonzero) constant into
20006 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20007 instructions to load from a constant. */
20008 if (CONSTANT_P (x)
20009 && (MAYBE_MMX_CLASS_P (regclass)
20010 || MAYBE_SSE_CLASS_P (regclass)
20011 || MAYBE_MASK_CLASS_P (regclass)))
20012 return NO_REGS;
20013
20014 /* Floating-point constants need more complex checks. */
20015 if (CONST_DOUBLE_P (x))
20016 {
20017 /* General regs can load everything. */
20018 if (INTEGER_CLASS_P (regclass))
20019 return regclass;
20020
20021 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20022 zero above. We only want to wind up preferring 80387 registers if
20023 we plan on doing computation with them. */
20024 if (IS_STACK_MODE (mode)
20025 && standard_80387_constant_p (x) > 0)
20026 {
20027 /* Limit class to FP regs. */
20028 if (FLOAT_CLASS_P (regclass))
20029 return FLOAT_REGS;
20030 }
20031
20032 return NO_REGS;
20033 }
20034
20035 /* Prefer SSE if we can use them for math. Also allow integer regs
20036 when moves between register units are cheap. */
20037 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20038 {
20039 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20040 && TARGET_INTER_UNIT_MOVES_TO_VEC
20041 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
20042 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20043 else
20044 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20045 }
20046
20047 /* Generally when we see PLUS here, it's the function invariant
20048 (plus soft-fp const_int). Which can only be computed into general
20049 regs. */
20050 if (GET_CODE (x) == PLUS)
20051 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
20052
20053 /* QImode constants are easy to load, but non-constant QImode data
20054 must go into Q_REGS or ALL_MASK_REGS. */
20055 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20056 {
20057 if (Q_CLASS_P (regclass))
20058 return regclass;
20059 else if (reg_class_subset_p (Q_REGS, regclass))
20060 return Q_REGS;
20061 else if (MASK_CLASS_P (regclass))
20062 return regclass;
20063 else
20064 return NO_REGS;
20065 }
20066
20067 return regclass;
20068}
20069
20070/* Discourage putting floating-point values in SSE registers unless
20071 SSE math is being used, and likewise for the 387 registers. */
20072static reg_class_t
20073ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
20074{
20075 /* Restrict the output reload class to the register bank that we are doing
20076 math on. If we would like not to return a subset of CLASS, reject this
20077 alternative: if reload cannot do this, it will still use its choice. */
20078 machine_mode mode = GET_MODE (x);
20079 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20080 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
20081
20082 if (IS_STACK_MODE (mode))
20083 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20084
20085 return regclass;
20086}
20087
20088static reg_class_t
20089ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
20090 machine_mode mode, secondary_reload_info *sri)
20091{
20092 /* Double-word spills from general registers to non-offsettable memory
20093 references (zero-extended addresses) require special handling. */
20094 if (TARGET_64BIT
20095 && MEM_P (x)
20096 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
20097 && INTEGER_CLASS_P (rclass)
20098 && !offsettable_memref_p (x))
20099 {
20100 sri->icode = (in_p
20101 ? CODE_FOR_reload_noff_load
20102 : CODE_FOR_reload_noff_store);
20103 /* Add the cost of moving address to a temporary. */
20104 sri->extra_cost = 1;
20105
20106 return NO_REGS;
20107 }
20108
20109 /* QImode spills from non-QI registers require
20110 intermediate register on 32bit targets. */
20111 if (mode == QImode
20112 && ((!TARGET_64BIT && !in_p
20113 && INTEGER_CLASS_P (rclass)
20114 && MAYBE_NON_Q_CLASS_P (rclass))
20115 || (!TARGET_AVX512DQ
20116 && MAYBE_MASK_CLASS_P (rclass))))
20117 {
20118 int regno = true_regnum (x);
20119
20120 /* Return Q_REGS if the operand is in memory. */
20121 if (regno == -1)
20122 return Q_REGS;
20123
20124 return NO_REGS;
20125 }
20126
20127 /* Require movement to gpr, and then store to memory. */
20128 if ((mode == HFmode || mode == HImode || mode == V2QImode
20129 || mode == BFmode)
20130 && !TARGET_SSE4_1
20131 && SSE_CLASS_P (rclass)
20132 && !in_p && MEM_P (x))
20133 {
20134 sri->extra_cost = 1;
20135 return GENERAL_REGS;
20136 }
20137
20138 /* This condition handles corner case where an expression involving
20139 pointers gets vectorized. We're trying to use the address of a
20140 stack slot as a vector initializer.
20141
20142 (set (reg:V2DI 74 [ vect_cst_.2 ])
20143 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
20144
20145 Eventually frame gets turned into sp+offset like this:
20146
20147 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20148 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20149 (const_int 392 [0x188]))))
20150
20151 That later gets turned into:
20152
20153 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20154 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20155 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
20156
20157 We'll have the following reload recorded:
20158
20159 Reload 0: reload_in (DI) =
20160 (plus:DI (reg/f:DI 7 sp)
20161 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
20162 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20163 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
20164 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
20165 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20166 reload_reg_rtx: (reg:V2DI 22 xmm1)
20167
20168 Which isn't going to work since SSE instructions can't handle scalar
20169 additions. Returning GENERAL_REGS forces the addition into integer
20170 register and reload can handle subsequent reloads without problems. */
20171
20172 if (in_p && GET_CODE (x) == PLUS
20173 && SSE_CLASS_P (rclass)
20174 && SCALAR_INT_MODE_P (mode))
20175 return GENERAL_REGS;
20176
20177 return NO_REGS;
20178}
20179
20180/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
20181
20182static bool
20183ix86_class_likely_spilled_p (reg_class_t rclass)
20184{
20185 switch (rclass)
20186 {
20187 case AREG:
20188 case DREG:
20189 case CREG:
20190 case BREG:
20191 case AD_REGS:
20192 case SIREG:
20193 case DIREG:
20194 case SSE_FIRST_REG:
20195 case FP_TOP_REG:
20196 case FP_SECOND_REG:
20197 return true;
20198
20199 default:
20200 break;
20201 }
20202
20203 return false;
20204}
20205
20206/* Return true if a set of DST by the expression SRC should be allowed.
20207 This prevents complex sets of likely_spilled hard regs before reload. */
20208
20209bool
20210ix86_hardreg_mov_ok (rtx dst, rtx src)
20211{
20212 /* Avoid complex sets of likely_spilled hard registers before reload. */
20213 if (REG_P (dst) && HARD_REGISTER_P (dst)
20214 && !REG_P (src) && !MEM_P (src)
20215 && !(VECTOR_MODE_P (GET_MODE (dst))
20216 ? standard_sse_constant_p (x: src, GET_MODE (dst))
20217 : x86_64_immediate_operand (src, GET_MODE (dst)))
20218 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
20219 && !reload_completed)
20220 return false;
20221 return true;
20222}
20223
20224/* If we are copying between registers from different register sets
20225 (e.g. FP and integer), we may need a memory location.
20226
20227 The function can't work reliably when one of the CLASSES is a class
20228 containing registers from multiple sets. We avoid this by never combining
20229 different sets in a single alternative in the machine description.
20230 Ensure that this constraint holds to avoid unexpected surprises.
20231
20232 When STRICT is false, we are being called from REGISTER_MOVE_COST,
20233 so do not enforce these sanity checks.
20234
20235 To optimize register_move_cost performance, define inline variant. */
20236
20237static inline bool
20238inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20239 reg_class_t class2, int strict)
20240{
20241 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
20242 return false;
20243
20244 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20245 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20246 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20247 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20248 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20249 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
20250 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
20251 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
20252 {
20253 gcc_assert (!strict || lra_in_progress);
20254 return true;
20255 }
20256
20257 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20258 return true;
20259
20260 /* ??? This is a lie. We do have moves between mmx/general, and for
20261 mmx/sse2. But by saying we need secondary memory we discourage the
20262 register allocator from using the mmx registers unless needed. */
20263 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20264 return true;
20265
20266 /* Between mask and general, we have moves no larger than word size. */
20267 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20268 {
20269 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
20270 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20271 return true;
20272 }
20273
20274 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20275 {
20276 /* SSE1 doesn't have any direct moves from other classes. */
20277 if (!TARGET_SSE2)
20278 return true;
20279
20280 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
20281 return true;
20282
20283 int msize = GET_MODE_SIZE (mode);
20284
20285 /* Between SSE and general, we have moves no larger than word size. */
20286 if (msize > UNITS_PER_WORD)
20287 return true;
20288
20289 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
20290 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
20291 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
20292
20293 if (msize < minsize)
20294 return true;
20295
20296 /* If the target says that inter-unit moves are more expensive
20297 than moving through memory, then don't generate them. */
20298 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
20299 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
20300 return true;
20301 }
20302
20303 return false;
20304}
20305
20306/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
20307
20308static bool
20309ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20310 reg_class_t class2)
20311{
20312 return inline_secondary_memory_needed (mode, class1, class2, strict: true);
20313}
20314
20315/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
20316
20317 get_secondary_mem widens integral modes to BITS_PER_WORD.
20318 There is no need to emit full 64 bit move on 64 bit targets
20319 for integral modes that can be moved using 32 bit move. */
20320
20321static machine_mode
20322ix86_secondary_memory_needed_mode (machine_mode mode)
20323{
20324 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
20325 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
20326 return mode;
20327}
20328
20329/* Implement the TARGET_CLASS_MAX_NREGS hook.
20330
20331 On the 80386, this is the size of MODE in words,
20332 except in the FP regs, where a single reg is always enough. */
20333
20334static unsigned char
20335ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
20336{
20337 if (MAYBE_INTEGER_CLASS_P (rclass))
20338 {
20339 if (mode == XFmode)
20340 return (TARGET_64BIT ? 2 : 3);
20341 else if (mode == XCmode)
20342 return (TARGET_64BIT ? 4 : 6);
20343 else
20344 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20345 }
20346 else
20347 {
20348 if (COMPLEX_MODE_P (mode))
20349 return 2;
20350 else
20351 return 1;
20352 }
20353}
20354
20355/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20356
20357static bool
20358ix86_can_change_mode_class (machine_mode from, machine_mode to,
20359 reg_class_t regclass)
20360{
20361 if (from == to)
20362 return true;
20363
20364 /* x87 registers can't do subreg at all, as all values are reformatted
20365 to extended precision. */
20366 if (MAYBE_FLOAT_CLASS_P (regclass))
20367 return false;
20368
20369 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20370 {
20371 /* Vector registers do not support QI or HImode loads. If we don't
20372 disallow a change to these modes, reload will assume it's ok to
20373 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20374 the vec_dupv4hi pattern.
20375 NB: SSE2 can load 16bit data to sse register via pinsrw. */
20376 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
20377 if (GET_MODE_SIZE (from) < mov_size
20378 || GET_MODE_SIZE (to) < mov_size)
20379 return false;
20380 }
20381
20382 return true;
20383}
20384
20385/* Return index of MODE in the sse load/store tables. */
20386
20387static inline int
20388sse_store_index (machine_mode mode)
20389{
20390 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
20391 costs to processor_costs, which requires changes to all entries in
20392 processor cost table. */
20393 if (mode == E_HFmode)
20394 mode = E_SFmode;
20395
20396 switch (GET_MODE_SIZE (mode))
20397 {
20398 case 4:
20399 return 0;
20400 case 8:
20401 return 1;
20402 case 16:
20403 return 2;
20404 case 32:
20405 return 3;
20406 case 64:
20407 return 4;
20408 default:
20409 return -1;
20410 }
20411}
20412
20413/* Return the cost of moving data of mode M between a
20414 register and memory. A value of 2 is the default; this cost is
20415 relative to those in `REGISTER_MOVE_COST'.
20416
20417 This function is used extensively by register_move_cost that is used to
20418 build tables at startup. Make it inline in this case.
20419 When IN is 2, return maximum of in and out move cost.
20420
20421 If moving between registers and memory is more expensive than
20422 between two registers, you should define this macro to express the
20423 relative cost.
20424
20425 Model also increased moving costs of QImode registers in non
20426 Q_REGS classes.
20427 */
20428static inline int
20429inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
20430{
20431 int cost;
20432
20433 if (FLOAT_CLASS_P (regclass))
20434 {
20435 int index;
20436 switch (mode)
20437 {
20438 case E_SFmode:
20439 index = 0;
20440 break;
20441 case E_DFmode:
20442 index = 1;
20443 break;
20444 case E_XFmode:
20445 index = 2;
20446 break;
20447 default:
20448 return 100;
20449 }
20450 if (in == 2)
20451 return MAX (ix86_cost->hard_register.fp_load [index],
20452 ix86_cost->hard_register.fp_store [index]);
20453 return in ? ix86_cost->hard_register.fp_load [index]
20454 : ix86_cost->hard_register.fp_store [index];
20455 }
20456 if (SSE_CLASS_P (regclass))
20457 {
20458 int index = sse_store_index (mode);
20459 if (index == -1)
20460 return 100;
20461 if (in == 2)
20462 return MAX (ix86_cost->hard_register.sse_load [index],
20463 ix86_cost->hard_register.sse_store [index]);
20464 return in ? ix86_cost->hard_register.sse_load [index]
20465 : ix86_cost->hard_register.sse_store [index];
20466 }
20467 if (MASK_CLASS_P (regclass))
20468 {
20469 int index;
20470 switch (GET_MODE_SIZE (mode))
20471 {
20472 case 1:
20473 index = 0;
20474 break;
20475 case 2:
20476 index = 1;
20477 break;
20478 /* DImode loads and stores assumed to cost the same as SImode. */
20479 case 4:
20480 case 8:
20481 index = 2;
20482 break;
20483 default:
20484 return 100;
20485 }
20486
20487 if (in == 2)
20488 return MAX (ix86_cost->hard_register.mask_load[index],
20489 ix86_cost->hard_register.mask_store[index]);
20490 return in ? ix86_cost->hard_register.mask_load[2]
20491 : ix86_cost->hard_register.mask_store[2];
20492 }
20493 if (MMX_CLASS_P (regclass))
20494 {
20495 int index;
20496 switch (GET_MODE_SIZE (mode))
20497 {
20498 case 4:
20499 index = 0;
20500 break;
20501 case 8:
20502 index = 1;
20503 break;
20504 default:
20505 return 100;
20506 }
20507 if (in == 2)
20508 return MAX (ix86_cost->hard_register.mmx_load [index],
20509 ix86_cost->hard_register.mmx_store [index]);
20510 return in ? ix86_cost->hard_register.mmx_load [index]
20511 : ix86_cost->hard_register.mmx_store [index];
20512 }
20513 switch (GET_MODE_SIZE (mode))
20514 {
20515 case 1:
20516 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20517 {
20518 if (!in)
20519 return ix86_cost->hard_register.int_store[0];
20520 if (TARGET_PARTIAL_REG_DEPENDENCY
20521 && optimize_function_for_speed_p (cfun))
20522 cost = ix86_cost->hard_register.movzbl_load;
20523 else
20524 cost = ix86_cost->hard_register.int_load[0];
20525 if (in == 2)
20526 return MAX (cost, ix86_cost->hard_register.int_store[0]);
20527 return cost;
20528 }
20529 else
20530 {
20531 if (in == 2)
20532 return MAX (ix86_cost->hard_register.movzbl_load,
20533 ix86_cost->hard_register.int_store[0] + 4);
20534 if (in)
20535 return ix86_cost->hard_register.movzbl_load;
20536 else
20537 return ix86_cost->hard_register.int_store[0] + 4;
20538 }
20539 break;
20540 case 2:
20541 {
20542 int cost;
20543 if (in == 2)
20544 cost = MAX (ix86_cost->hard_register.int_load[1],
20545 ix86_cost->hard_register.int_store[1]);
20546 else
20547 cost = in ? ix86_cost->hard_register.int_load[1]
20548 : ix86_cost->hard_register.int_store[1];
20549
20550 if (mode == E_HFmode)
20551 {
20552 /* Prefer SSE over GPR for HFmode. */
20553 int sse_cost;
20554 int index = sse_store_index (mode);
20555 if (in == 2)
20556 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
20557 ix86_cost->hard_register.sse_store[index]);
20558 else
20559 sse_cost = (in
20560 ? ix86_cost->hard_register.sse_load [index]
20561 : ix86_cost->hard_register.sse_store [index]);
20562 if (sse_cost >= cost)
20563 cost = sse_cost + 1;
20564 }
20565 return cost;
20566 }
20567 default:
20568 if (in == 2)
20569 cost = MAX (ix86_cost->hard_register.int_load[2],
20570 ix86_cost->hard_register.int_store[2]);
20571 else if (in)
20572 cost = ix86_cost->hard_register.int_load[2];
20573 else
20574 cost = ix86_cost->hard_register.int_store[2];
20575 /* Multiply with the number of GPR moves needed. */
20576 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
20577 }
20578}
20579
20580static int
20581ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
20582{
20583 return inline_memory_move_cost (mode, regclass: (enum reg_class) regclass, in: in ? 1 : 0);
20584}
20585
20586
20587/* Return the cost of moving data from a register in class CLASS1 to
20588 one in class CLASS2.
20589
20590 It is not required that the cost always equal 2 when FROM is the same as TO;
20591 on some machines it is expensive to move between registers if they are not
20592 general registers. */
20593
20594static int
20595ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
20596 reg_class_t class2_i)
20597{
20598 enum reg_class class1 = (enum reg_class) class1_i;
20599 enum reg_class class2 = (enum reg_class) class2_i;
20600
20601 /* In case we require secondary memory, compute cost of the store followed
20602 by load. In order to avoid bad register allocation choices, we need
20603 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20604
20605 if (inline_secondary_memory_needed (mode, class1, class2, strict: false))
20606 {
20607 int cost = 1;
20608
20609 cost += inline_memory_move_cost (mode, regclass: class1, in: 2);
20610 cost += inline_memory_move_cost (mode, regclass: class2, in: 2);
20611
20612 /* In case of copying from general_purpose_register we may emit multiple
20613 stores followed by single load causing memory size mismatch stall.
20614 Count this as arbitrarily high cost of 20. */
20615 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
20616 && TARGET_MEMORY_MISMATCH_STALL
20617 && targetm.class_max_nregs (class1, mode)
20618 > targetm.class_max_nregs (class2, mode))
20619 cost += 20;
20620
20621 /* In the case of FP/MMX moves, the registers actually overlap, and we
20622 have to switch modes in order to treat them differently. */
20623 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20624 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20625 cost += 20;
20626
20627 return cost;
20628 }
20629
20630 /* Moves between MMX and non-MMX units require secondary memory. */
20631 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20632 gcc_unreachable ();
20633
20634 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20635 return (SSE_CLASS_P (class1)
20636 ? ix86_cost->hard_register.sse_to_integer
20637 : ix86_cost->hard_register.integer_to_sse);
20638
20639 /* Moves between mask register and GPR. */
20640 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20641 {
20642 return (MASK_CLASS_P (class1)
20643 ? ix86_cost->hard_register.mask_to_integer
20644 : ix86_cost->hard_register.integer_to_mask);
20645 }
20646 /* Moving between mask registers. */
20647 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
20648 return ix86_cost->hard_register.mask_move;
20649
20650 if (MAYBE_FLOAT_CLASS_P (class1))
20651 return ix86_cost->hard_register.fp_move;
20652 if (MAYBE_SSE_CLASS_P (class1))
20653 {
20654 if (GET_MODE_BITSIZE (mode) <= 128)
20655 return ix86_cost->hard_register.xmm_move;
20656 if (GET_MODE_BITSIZE (mode) <= 256)
20657 return ix86_cost->hard_register.ymm_move;
20658 return ix86_cost->hard_register.zmm_move;
20659 }
20660 if (MAYBE_MMX_CLASS_P (class1))
20661 return ix86_cost->hard_register.mmx_move;
20662 return 2;
20663}
20664
20665/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20666 words of a value of mode MODE but can be less for certain modes in
20667 special long registers.
20668
20669 Actually there are no two word move instructions for consecutive
20670 registers. And only registers 0-3 may have mov byte instructions
20671 applied to them. */
20672
20673static unsigned int
20674ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
20675{
20676 if (GENERAL_REGNO_P (regno))
20677 {
20678 if (mode == XFmode)
20679 return TARGET_64BIT ? 2 : 3;
20680 if (mode == XCmode)
20681 return TARGET_64BIT ? 4 : 6;
20682 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20683 }
20684 if (COMPLEX_MODE_P (mode))
20685 return 2;
20686 /* Register pair for mask registers. */
20687 if (mode == P2QImode || mode == P2HImode)
20688 return 2;
20689 if (mode == V64SFmode || mode == V64SImode)
20690 return 4;
20691 return 1;
20692}
20693
20694/* Implement REGMODE_NATURAL_SIZE(MODE). */
20695unsigned int
20696ix86_regmode_natural_size (machine_mode mode)
20697{
20698 if (mode == P2HImode || mode == P2QImode)
20699 return GET_MODE_SIZE (mode) / 2;
20700 return UNITS_PER_WORD;
20701}
20702
20703/* Implement TARGET_HARD_REGNO_MODE_OK. */
20704
20705static bool
20706ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
20707{
20708 /* Flags and only flags can only hold CCmode values. */
20709 if (CC_REGNO_P (regno))
20710 return GET_MODE_CLASS (mode) == MODE_CC;
20711 if (GET_MODE_CLASS (mode) == MODE_CC
20712 || GET_MODE_CLASS (mode) == MODE_RANDOM)
20713 return false;
20714 if (STACK_REGNO_P (regno))
20715 return VALID_FP_MODE_P (mode);
20716 if (MASK_REGNO_P (regno))
20717 {
20718 /* Register pair only starts at even register number. */
20719 if ((mode == P2QImode || mode == P2HImode))
20720 return MASK_PAIR_REGNO_P(regno);
20721
20722 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
20723 || (TARGET_AVX512BW && mode == SImode)
20724 || (TARGET_AVX512BW && TARGET_EVEX512 && mode == DImode));
20725 }
20726
20727 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20728 return false;
20729
20730 if (SSE_REGNO_P (regno))
20731 {
20732 /* We implement the move patterns for all vector modes into and
20733 out of SSE registers, even when no operation instructions
20734 are available. */
20735
20736 /* For AVX-512 we allow, regardless of regno:
20737 - XI mode
20738 - any of 512-bit wide vector mode
20739 - any scalar mode. */
20740 if (TARGET_AVX512F
20741 && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
20742 || VALID_AVX512F_SCALAR_MODE (mode)))
20743 return true;
20744
20745 /* For AVX-5124FMAPS or AVX-5124VNNIW
20746 allow V64SF and V64SI modes for special regnos. */
20747 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
20748 && (mode == V64SFmode || mode == V64SImode)
20749 && MOD4_SSE_REGNO_P (regno))
20750 return true;
20751
20752 /* TODO check for QI/HI scalars. */
20753 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
20754 if (TARGET_AVX512VL
20755 && (VALID_AVX256_REG_OR_OI_MODE (mode)
20756 || VALID_AVX512VL_128_REG_MODE (mode)))
20757 return true;
20758
20759 /* xmm16-xmm31 are only available for AVX-512. */
20760 if (EXT_REX_SSE_REGNO_P (regno))
20761 return false;
20762
20763 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
20764 if (TARGET_SSE2 && mode == HImode)
20765 return true;
20766
20767 /* OImode and AVX modes are available only when AVX is enabled. */
20768 return ((TARGET_AVX
20769 && VALID_AVX256_REG_OR_OI_MODE (mode))
20770 || VALID_SSE_REG_MODE (mode)
20771 || VALID_SSE2_REG_MODE (mode)
20772 || VALID_MMX_REG_MODE (mode)
20773 || VALID_MMX_REG_MODE_3DNOW (mode));
20774 }
20775 if (MMX_REGNO_P (regno))
20776 {
20777 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20778 so if the register is available at all, then we can move data of
20779 the given mode into or out of it. */
20780 return (VALID_MMX_REG_MODE (mode)
20781 || VALID_MMX_REG_MODE_3DNOW (mode));
20782 }
20783
20784 if (mode == QImode)
20785 {
20786 /* Take care for QImode values - they can be in non-QI regs,
20787 but then they do cause partial register stalls. */
20788 if (ANY_QI_REGNO_P (regno))
20789 return true;
20790 if (!TARGET_PARTIAL_REG_STALL)
20791 return true;
20792 /* LRA checks if the hard register is OK for the given mode.
20793 QImode values can live in non-QI regs, so we allow all
20794 registers here. */
20795 if (lra_in_progress)
20796 return true;
20797 return !can_create_pseudo_p ();
20798 }
20799 /* We handle both integer and floats in the general purpose registers. */
20800 else if (VALID_INT_MODE_P (mode)
20801 || VALID_FP_MODE_P (mode))
20802 return true;
20803 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20804 on to use that value in smaller contexts, this can easily force a
20805 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20806 supporting DImode, allow it. */
20807 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20808 return true;
20809
20810 return false;
20811}
20812
20813/* Implement TARGET_INSN_CALLEE_ABI. */
20814
20815const predefined_function_abi &
20816ix86_insn_callee_abi (const rtx_insn *insn)
20817{
20818 unsigned int abi_id = 0;
20819 rtx pat = PATTERN (insn);
20820 if (vzeroupper_pattern (pat, VOIDmode))
20821 abi_id = ABI_VZEROUPPER;
20822
20823 return function_abis[abi_id];
20824}
20825
20826/* Initialize function_abis with corresponding abi_id,
20827 currently only handle vzeroupper. */
20828void
20829ix86_initialize_callee_abi (unsigned int abi_id)
20830{
20831 gcc_assert (abi_id == ABI_VZEROUPPER);
20832 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
20833 if (!vzeroupper_abi.initialized_p ())
20834 {
20835 HARD_REG_SET full_reg_clobbers;
20836 CLEAR_HARD_REG_SET (set&: full_reg_clobbers);
20837 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
20838 }
20839}
20840
20841void
20842ix86_expand_avx_vzeroupper (void)
20843{
20844 /* Initialize vzeroupper_abi here. */
20845 ix86_initialize_callee_abi (ABI_VZEROUPPER);
20846 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20847 /* Return false for non-local goto in can_nonlocal_goto. */
20848 make_reg_eh_region_note (insn, ecf_flags: 0, INT_MIN);
20849 /* Flag used for call_insn indicates it's a fake call. */
20850 RTX_FLAG (insn, used) = 1;
20851}
20852
20853
20854/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20855 saves SSE registers across calls is Win64 (thus no need to check the
20856 current ABI here), and with AVX enabled Win64 only guarantees that
20857 the low 16 bytes are saved. */
20858
20859static bool
20860ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
20861 machine_mode mode)
20862{
20863 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
20864 if (abi_id == ABI_VZEROUPPER)
20865 return (GET_MODE_SIZE (mode) > 16
20866 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
20867 || LEGACY_SSE_REGNO_P (regno)));
20868
20869 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
20870}
20871
20872/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20873 tieable integer mode. */
20874
20875static bool
20876ix86_tieable_integer_mode_p (machine_mode mode)
20877{
20878 switch (mode)
20879 {
20880 case E_HImode:
20881 case E_SImode:
20882 return true;
20883
20884 case E_QImode:
20885 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20886
20887 case E_DImode:
20888 return TARGET_64BIT;
20889
20890 default:
20891 return false;
20892 }
20893}
20894
20895/* Implement TARGET_MODES_TIEABLE_P.
20896
20897 Return true if MODE1 is accessible in a register that can hold MODE2
20898 without copying. That is, all register classes that can hold MODE2
20899 can also hold MODE1. */
20900
20901static bool
20902ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
20903{
20904 if (mode1 == mode2)
20905 return true;
20906
20907 if (ix86_tieable_integer_mode_p (mode: mode1)
20908 && ix86_tieable_integer_mode_p (mode: mode2))
20909 return true;
20910
20911 /* MODE2 being XFmode implies fp stack or general regs, which means we
20912 can tie any smaller floating point modes to it. Note that we do not
20913 tie this with TFmode. */
20914 if (mode2 == XFmode)
20915 return mode1 == SFmode || mode1 == DFmode;
20916
20917 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20918 that we can tie it with SFmode. */
20919 if (mode2 == DFmode)
20920 return mode1 == SFmode;
20921
20922 /* If MODE2 is only appropriate for an SSE register, then tie with
20923 any other mode acceptable to SSE registers. */
20924 if (GET_MODE_SIZE (mode2) == 64
20925 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
20926 return (GET_MODE_SIZE (mode1) == 64
20927 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1));
20928 if (GET_MODE_SIZE (mode2) == 32
20929 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
20930 return (GET_MODE_SIZE (mode1) == 32
20931 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1));
20932 if (GET_MODE_SIZE (mode2) == 16
20933 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode2))
20934 return (GET_MODE_SIZE (mode1) == 16
20935 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode: mode1));
20936
20937 /* If MODE2 is appropriate for an MMX register, then tie
20938 with any other mode acceptable to MMX registers. */
20939 if (GET_MODE_SIZE (mode2) == 8
20940 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode2))
20941 return (GET_MODE_SIZE (mode1) == 8
20942 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode: mode1));
20943
20944 /* SCmode and DImode can be tied. */
20945 if ((mode1 == E_SCmode && mode2 == E_DImode)
20946 || (mode1 == E_DImode && mode2 == E_SCmode))
20947 return TARGET_64BIT;
20948
20949 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
20950 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
20951 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
20952 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
20953 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
20954 return true;
20955
20956 return false;
20957}
20958
20959/* Return the cost of moving between two registers of mode MODE. */
20960
20961static int
20962ix86_set_reg_reg_cost (machine_mode mode)
20963{
20964 unsigned int units = UNITS_PER_WORD;
20965
20966 switch (GET_MODE_CLASS (mode))
20967 {
20968 default:
20969 break;
20970
20971 case MODE_CC:
20972 units = GET_MODE_SIZE (CCmode);
20973 break;
20974
20975 case MODE_FLOAT:
20976 if ((TARGET_SSE && mode == TFmode)
20977 || (TARGET_80387 && mode == XFmode)
20978 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
20979 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
20980 units = GET_MODE_SIZE (mode);
20981 break;
20982
20983 case MODE_COMPLEX_FLOAT:
20984 if ((TARGET_SSE && mode == TCmode)
20985 || (TARGET_80387 && mode == XCmode)
20986 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
20987 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
20988 units = GET_MODE_SIZE (mode);
20989 break;
20990
20991 case MODE_VECTOR_INT:
20992 case MODE_VECTOR_FLOAT:
20993 if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
20994 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20995 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20996 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20997 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
20998 && VALID_MMX_REG_MODE (mode)))
20999 units = GET_MODE_SIZE (mode);
21000 }
21001
21002 /* Return the cost of moving between two registers of mode MODE,
21003 assuming that the move will be in pieces of at most UNITS bytes. */
21004 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
21005}
21006
21007/* Return cost of vector operation in MODE given that scalar version has
21008 COST. */
21009
21010static int
21011ix86_vec_cost (machine_mode mode, int cost)
21012{
21013 if (!VECTOR_MODE_P (mode))
21014 return cost;
21015
21016 if (GET_MODE_BITSIZE (mode) == 128
21017 && TARGET_SSE_SPLIT_REGS)
21018 return cost * GET_MODE_BITSIZE (mode) / 64;
21019 else if (GET_MODE_BITSIZE (mode) > 128
21020 && TARGET_AVX256_SPLIT_REGS)
21021 return cost * GET_MODE_BITSIZE (mode) / 128;
21022 else if (GET_MODE_BITSIZE (mode) > 256
21023 && TARGET_AVX512_SPLIT_REGS)
21024 return cost * GET_MODE_BITSIZE (mode) / 256;
21025 return cost;
21026}
21027
21028/* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21029 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21030static int
21031ix86_widen_mult_cost (const struct processor_costs *cost,
21032 enum machine_mode mode, bool uns_p)
21033{
21034 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
21035 int extra_cost = 0;
21036 int basic_cost = 0;
21037 switch (mode)
21038 {
21039 case V8HImode:
21040 case V16HImode:
21041 if (!uns_p || mode == V16HImode)
21042 extra_cost = cost->sse_op * 2;
21043 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21044 break;
21045 case V4SImode:
21046 case V8SImode:
21047 /* pmulhw/pmullw can be used. */
21048 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
21049 break;
21050 case V2DImode:
21051 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21052 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21053 if (!TARGET_SSE4_1 && !uns_p)
21054 extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
21055 + cost->sse_op * 2;
21056 /* Fallthru. */
21057 case V4DImode:
21058 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21059 break;
21060 default:
21061 /* Not implemented. */
21062 return 100;
21063 }
21064 return ix86_vec_cost (mode, cost: basic_cost + extra_cost);
21065}
21066
21067/* Return cost of multiplication in MODE. */
21068
21069static int
21070ix86_multiplication_cost (const struct processor_costs *cost,
21071 enum machine_mode mode)
21072{
21073 machine_mode inner_mode = mode;
21074 if (VECTOR_MODE_P (mode))
21075 inner_mode = GET_MODE_INNER (mode);
21076
21077 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21078 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
21079 else if (X87_FLOAT_MODE_P (mode))
21080 return cost->fmul;
21081 else if (FLOAT_MODE_P (mode))
21082 return ix86_vec_cost (mode,
21083 cost: inner_mode == DFmode ? cost->mulsd : cost->mulss);
21084 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21085 {
21086 int nmults, nops;
21087 /* Cost of reading the memory. */
21088 int extra;
21089
21090 switch (mode)
21091 {
21092 case V4QImode:
21093 case V8QImode:
21094 /* Partial V*QImode is emulated with 4-6 insns. */
21095 nmults = 1;
21096 nops = 3;
21097 extra = 0;
21098
21099 if (TARGET_AVX512BW && TARGET_AVX512VL)
21100 ;
21101 else if (TARGET_AVX2)
21102 nops += 2;
21103 else if (TARGET_XOP)
21104 extra += cost->sse_load[2];
21105 else
21106 {
21107 nops += 1;
21108 extra += cost->sse_load[2];
21109 }
21110 goto do_qimode;
21111
21112 case V16QImode:
21113 /* V*QImode is emulated with 4-11 insns. */
21114 nmults = 1;
21115 nops = 3;
21116 extra = 0;
21117
21118 if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
21119 {
21120 if (!(TARGET_AVX512BW && TARGET_AVX512VL))
21121 nops += 3;
21122 }
21123 else if (TARGET_XOP)
21124 {
21125 nmults += 1;
21126 nops += 2;
21127 extra += cost->sse_load[2];
21128 }
21129 else
21130 {
21131 nmults += 1;
21132 nops += 4;
21133 extra += cost->sse_load[2];
21134 }
21135 goto do_qimode;
21136
21137 case V32QImode:
21138 nmults = 1;
21139 nops = 3;
21140 extra = 0;
21141
21142 if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
21143 {
21144 nmults += 1;
21145 nops += 4;
21146 extra += cost->sse_load[3] * 2;
21147 }
21148 goto do_qimode;
21149
21150 case V64QImode:
21151 nmults = 2;
21152 nops = 9;
21153 extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
21154
21155 do_qimode:
21156 return ix86_vec_cost (mode, cost: cost->mulss * nmults
21157 + cost->sse_op * nops) + extra;
21158
21159 case V4SImode:
21160 /* pmulld is used in this case. No emulation is needed. */
21161 if (TARGET_SSE4_1)
21162 goto do_native;
21163 /* V4SImode is emulated with 7 insns. */
21164 else
21165 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 5);
21166
21167 case V2DImode:
21168 case V4DImode:
21169 /* vpmullq is used in this case. No emulation is needed. */
21170 if (TARGET_AVX512DQ && TARGET_AVX512VL)
21171 goto do_native;
21172 /* V*DImode is emulated with 6-8 insns. */
21173 else if (TARGET_XOP && mode == V2DImode)
21174 return ix86_vec_cost (mode, cost: cost->mulss * 2 + cost->sse_op * 4);
21175 /* FALLTHRU */
21176 case V8DImode:
21177 /* vpmullq is used in this case. No emulation is needed. */
21178 if (TARGET_AVX512DQ && mode == V8DImode)
21179 goto do_native;
21180 else
21181 return ix86_vec_cost (mode, cost: cost->mulss * 3 + cost->sse_op * 5);
21182
21183 default:
21184 do_native:
21185 return ix86_vec_cost (mode, cost: cost->mulss);
21186 }
21187 }
21188 else
21189 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
21190}
21191
21192/* Return cost of multiplication in MODE. */
21193
21194static int
21195ix86_division_cost (const struct processor_costs *cost,
21196 enum machine_mode mode)
21197{
21198 machine_mode inner_mode = mode;
21199 if (VECTOR_MODE_P (mode))
21200 inner_mode = GET_MODE_INNER (mode);
21201
21202 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21203 return inner_mode == DFmode ? cost->divsd : cost->divss;
21204 else if (X87_FLOAT_MODE_P (mode))
21205 return cost->fdiv;
21206 else if (FLOAT_MODE_P (mode))
21207 return ix86_vec_cost (mode,
21208 cost: inner_mode == DFmode ? cost->divsd : cost->divss);
21209 else
21210 return cost->divide[MODE_INDEX (mode)];
21211}
21212
21213/* Return cost of shift in MODE.
21214 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
21215 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
21216 if op1 is a result of subreg.
21217
21218 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
21219
21220static int
21221ix86_shift_rotate_cost (const struct processor_costs *cost,
21222 enum rtx_code code,
21223 enum machine_mode mode, bool constant_op1,
21224 HOST_WIDE_INT op1_val,
21225 bool and_in_op1,
21226 bool shift_and_truncate,
21227 bool *skip_op0, bool *skip_op1)
21228{
21229 if (skip_op0)
21230 *skip_op0 = *skip_op1 = false;
21231
21232 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21233 {
21234 int count;
21235 /* Cost of reading the memory. */
21236 int extra;
21237
21238 switch (mode)
21239 {
21240 case V4QImode:
21241 case V8QImode:
21242 if (TARGET_AVX2)
21243 /* Use vpbroadcast. */
21244 extra = cost->sse_op;
21245 else
21246 extra = cost->sse_load[2];
21247
21248 if (constant_op1)
21249 {
21250 if (code == ASHIFTRT)
21251 {
21252 count = 4;
21253 extra *= 2;
21254 }
21255 else
21256 count = 2;
21257 }
21258 else if (TARGET_AVX512BW && TARGET_AVX512VL)
21259 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
21260 else if (TARGET_SSE4_1)
21261 count = 5;
21262 else if (code == ASHIFTRT)
21263 count = 6;
21264 else
21265 count = 5;
21266 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
21267
21268 case V16QImode:
21269 if (TARGET_XOP)
21270 {
21271 /* For XOP we use vpshab, which requires a broadcast of the
21272 value to the variable shift insn. For constants this
21273 means a V16Q const in mem; even when we can perform the
21274 shift with one insn set the cost to prefer paddb. */
21275 if (constant_op1)
21276 {
21277 extra = cost->sse_load[2];
21278 return ix86_vec_cost (mode, cost: cost->sse_op) + extra;
21279 }
21280 else
21281 {
21282 count = (code == ASHIFT) ? 3 : 4;
21283 return ix86_vec_cost (mode, cost: cost->sse_op * count);
21284 }
21285 }
21286 /* FALLTHRU */
21287 case V32QImode:
21288 if (TARGET_AVX2)
21289 /* Use vpbroadcast. */
21290 extra = cost->sse_op;
21291 else
21292 extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
21293
21294 if (constant_op1)
21295 {
21296 if (code == ASHIFTRT)
21297 {
21298 count = 4;
21299 extra *= 2;
21300 }
21301 else
21302 count = 2;
21303 }
21304 else if (TARGET_AVX512BW
21305 && ((mode == V32QImode && !TARGET_PREFER_AVX256)
21306 || (mode == V16QImode && TARGET_AVX512VL
21307 && !TARGET_PREFER_AVX128)))
21308 return ix86_vec_cost (mode, cost: cost->sse_op * 4);
21309 else if (TARGET_AVX2
21310 && mode == V16QImode && !TARGET_PREFER_AVX128)
21311 count = 6;
21312 else if (TARGET_SSE4_1)
21313 count = 9;
21314 else if (code == ASHIFTRT)
21315 count = 10;
21316 else
21317 count = 9;
21318 return ix86_vec_cost (mode, cost: cost->sse_op * count) + extra;
21319
21320 case V2DImode:
21321 case V4DImode:
21322 /* V*DImode arithmetic right shift is emulated. */
21323 if (code == ASHIFTRT && !TARGET_AVX512VL)
21324 {
21325 if (constant_op1)
21326 {
21327 if (op1_val == 63)
21328 count = TARGET_SSE4_2 ? 1 : 2;
21329 else if (TARGET_XOP)
21330 count = 2;
21331 else if (TARGET_SSE4_1)
21332 count = 3;
21333 else
21334 count = 4;
21335 }
21336 else if (TARGET_XOP)
21337 count = 3;
21338 else if (TARGET_SSE4_2)
21339 count = 4;
21340 else
21341 count = 5;
21342
21343 return ix86_vec_cost (mode, cost: cost->sse_op * count);
21344 }
21345 /* FALLTHRU */
21346 default:
21347 return ix86_vec_cost (mode, cost: cost->sse_op);
21348 }
21349 }
21350
21351 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21352 {
21353 if (constant_op1)
21354 {
21355 if (op1_val > 32)
21356 return cost->shift_const + COSTS_N_INSNS (2);
21357 else
21358 return cost->shift_const * 2;
21359 }
21360 else
21361 {
21362 if (and_in_op1)
21363 return cost->shift_var * 2;
21364 else
21365 return cost->shift_var * 6 + COSTS_N_INSNS (2);
21366 }
21367 }
21368 else
21369 {
21370 if (constant_op1)
21371 return cost->shift_const;
21372 else if (shift_and_truncate)
21373 {
21374 if (skip_op0)
21375 *skip_op0 = *skip_op1 = true;
21376 /* Return the cost after shift-and truncation. */
21377 return cost->shift_var;
21378 }
21379 else
21380 return cost->shift_var;
21381 }
21382}
21383
21384/* Compute a (partial) cost for rtx X. Return true if the complete
21385 cost has been computed, and false if subexpressions should be
21386 scanned. In either case, *TOTAL contains the cost result. */
21387
21388static bool
21389ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
21390 int *total, bool speed)
21391{
21392 rtx mask;
21393 enum rtx_code code = GET_CODE (x);
21394 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
21395 const struct processor_costs *cost
21396 = speed ? ix86_tune_cost : &ix86_size_cost;
21397 int src_cost;
21398
21399 switch (code)
21400 {
21401 case SET:
21402 if (register_operand (SET_DEST (x), VOIDmode)
21403 && register_operand (SET_SRC (x), VOIDmode))
21404 {
21405 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
21406 return true;
21407 }
21408
21409 if (register_operand (SET_SRC (x), VOIDmode))
21410 /* Avoid potentially incorrect high cost from rtx_costs
21411 for non-tieable SUBREGs. */
21412 src_cost = 0;
21413 else
21414 {
21415 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
21416
21417 if (CONSTANT_P (SET_SRC (x)))
21418 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
21419 a small value, possibly zero for cheap constants. */
21420 src_cost += COSTS_N_INSNS (1);
21421 }
21422
21423 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
21424 return true;
21425
21426 case CONST_INT:
21427 case CONST:
21428 case LABEL_REF:
21429 case SYMBOL_REF:
21430 if (x86_64_immediate_operand (x, VOIDmode))
21431 *total = 0;
21432 else
21433 *total = 1;
21434 return true;
21435
21436 case CONST_DOUBLE:
21437 if (IS_STACK_MODE (mode))
21438 switch (standard_80387_constant_p (x))
21439 {
21440 case -1:
21441 case 0:
21442 break;
21443 case 1: /* 0.0 */
21444 *total = 1;
21445 return true;
21446 default: /* Other constants */
21447 *total = 2;
21448 return true;
21449 }
21450 /* FALLTHRU */
21451
21452 case CONST_VECTOR:
21453 switch (standard_sse_constant_p (x, pred_mode: mode))
21454 {
21455 case 0:
21456 break;
21457 case 1: /* 0: xor eliminates false dependency */
21458 *total = 0;
21459 return true;
21460 default: /* -1: cmp contains false dependency */
21461 *total = 1;
21462 return true;
21463 }
21464 /* FALLTHRU */
21465
21466 case CONST_WIDE_INT:
21467 /* Fall back to (MEM (SYMBOL_REF)), since that's where
21468 it'll probably end up. Add a penalty for size. */
21469 *total = (COSTS_N_INSNS (1)
21470 + (!TARGET_64BIT && flag_pic)
21471 + (GET_MODE_SIZE (mode) <= 4
21472 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
21473 return true;
21474
21475 case ZERO_EXTEND:
21476 /* The zero extensions is often completely free on x86_64, so make
21477 it as cheap as possible. */
21478 if (TARGET_64BIT && mode == DImode
21479 && GET_MODE (XEXP (x, 0)) == SImode)
21480 *total = 1;
21481 else if (TARGET_ZERO_EXTEND_WITH_AND)
21482 *total = cost->add;
21483 else
21484 *total = cost->movzx;
21485 return false;
21486
21487 case SIGN_EXTEND:
21488 *total = cost->movsx;
21489 return false;
21490
21491 case ASHIFT:
21492 if (SCALAR_INT_MODE_P (mode)
21493 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
21494 && CONST_INT_P (XEXP (x, 1)))
21495 {
21496 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21497 if (value == 1)
21498 {
21499 *total = cost->add;
21500 return false;
21501 }
21502 if ((value == 2 || value == 3)
21503 && cost->lea <= cost->shift_const)
21504 {
21505 *total = cost->lea;
21506 return false;
21507 }
21508 }
21509 /* FALLTHRU */
21510
21511 case ROTATE:
21512 case ASHIFTRT:
21513 case LSHIFTRT:
21514 case ROTATERT:
21515 bool skip_op0, skip_op1;
21516 *total = ix86_shift_rotate_cost (cost, code, mode,
21517 CONSTANT_P (XEXP (x, 1)),
21518 CONST_INT_P (XEXP (x, 1))
21519 ? INTVAL (XEXP (x, 1)) : -1,
21520 GET_CODE (XEXP (x, 1)) == AND,
21521 SUBREG_P (XEXP (x, 1))
21522 && GET_CODE (XEXP (XEXP (x, 1),
21523 0)) == AND,
21524 skip_op0: &skip_op0, skip_op1: &skip_op1);
21525 if (skip_op0 || skip_op1)
21526 {
21527 if (!skip_op0)
21528 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21529 if (!skip_op1)
21530 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
21531 return true;
21532 }
21533 return false;
21534
21535 case FMA:
21536 {
21537 rtx sub;
21538
21539 gcc_assert (FLOAT_MODE_P (mode));
21540 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
21541
21542 *total = ix86_vec_cost (mode,
21543 GET_MODE_INNER (mode) == SFmode
21544 ? cost->fmass : cost->fmasd);
21545 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
21546
21547 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
21548 sub = XEXP (x, 0);
21549 if (GET_CODE (sub) == NEG)
21550 sub = XEXP (sub, 0);
21551 *total += rtx_cost (sub, mode, FMA, 0, speed);
21552
21553 sub = XEXP (x, 2);
21554 if (GET_CODE (sub) == NEG)
21555 sub = XEXP (sub, 0);
21556 *total += rtx_cost (sub, mode, FMA, 2, speed);
21557 return true;
21558 }
21559
21560 case MULT:
21561 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
21562 {
21563 rtx op0 = XEXP (x, 0);
21564 rtx op1 = XEXP (x, 1);
21565 int nbits;
21566 if (CONST_INT_P (XEXP (x, 1)))
21567 {
21568 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21569 for (nbits = 0; value != 0; value &= value - 1)
21570 nbits++;
21571 }
21572 else
21573 /* This is arbitrary. */
21574 nbits = 7;
21575
21576 /* Compute costs correctly for widening multiplication. */
21577 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
21578 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
21579 == GET_MODE_SIZE (mode))
21580 {
21581 int is_mulwiden = 0;
21582 machine_mode inner_mode = GET_MODE (op0);
21583
21584 if (GET_CODE (op0) == GET_CODE (op1))
21585 is_mulwiden = 1, op1 = XEXP (op1, 0);
21586 else if (CONST_INT_P (op1))
21587 {
21588 if (GET_CODE (op0) == SIGN_EXTEND)
21589 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
21590 == INTVAL (op1);
21591 else
21592 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
21593 }
21594
21595 if (is_mulwiden)
21596 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
21597 }
21598
21599 int mult_init;
21600 // Double word multiplication requires 3 mults and 2 adds.
21601 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21602 {
21603 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
21604 + 2 * cost->add;
21605 nbits *= 3;
21606 }
21607 else mult_init = cost->mult_init[MODE_INDEX (mode)];
21608
21609 *total = (mult_init
21610 + nbits * cost->mult_bit
21611 + rtx_cost (op0, mode, outer_code, opno, speed)
21612 + rtx_cost (op1, mode, outer_code, opno, speed));
21613
21614 return true;
21615 }
21616 *total = ix86_multiplication_cost (cost, mode);
21617 return false;
21618
21619 case DIV:
21620 case UDIV:
21621 case MOD:
21622 case UMOD:
21623 *total = ix86_division_cost (cost, mode);
21624 return false;
21625
21626 case PLUS:
21627 if (GET_MODE_CLASS (mode) == MODE_INT
21628 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
21629 {
21630 if (GET_CODE (XEXP (x, 0)) == PLUS
21631 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
21632 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
21633 && CONSTANT_P (XEXP (x, 1)))
21634 {
21635 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
21636 if (val == 2 || val == 4 || val == 8)
21637 {
21638 *total = cost->lea;
21639 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21640 outer_code, opno, speed);
21641 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
21642 outer_code, opno, speed);
21643 *total += rtx_cost (XEXP (x, 1), mode,
21644 outer_code, opno, speed);
21645 return true;
21646 }
21647 }
21648 else if (GET_CODE (XEXP (x, 0)) == MULT
21649 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
21650 {
21651 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
21652 if (val == 2 || val == 4 || val == 8)
21653 {
21654 *total = cost->lea;
21655 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21656 outer_code, opno, speed);
21657 *total += rtx_cost (XEXP (x, 1), mode,
21658 outer_code, opno, speed);
21659 return true;
21660 }
21661 }
21662 else if (GET_CODE (XEXP (x, 0)) == PLUS)
21663 {
21664 rtx op = XEXP (XEXP (x, 0), 0);
21665
21666 /* Add with carry, ignore the cost of adding a carry flag. */
21667 if (ix86_carry_flag_operator (op, mode)
21668 || ix86_carry_flag_unset_operator (op, mode))
21669 *total = cost->add;
21670 else
21671 {
21672 *total = cost->lea;
21673 *total += rtx_cost (op, mode,
21674 outer_code, opno, speed);
21675 }
21676
21677 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21678 outer_code, opno, speed);
21679 *total += rtx_cost (XEXP (x, 1), mode,
21680 outer_code, opno, speed);
21681 return true;
21682 }
21683 }
21684 /* FALLTHRU */
21685
21686 case MINUS:
21687 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21688 if (GET_MODE_CLASS (mode) == MODE_INT
21689 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
21690 && GET_CODE (XEXP (x, 0)) == MINUS
21691 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
21692 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
21693 {
21694 *total = cost->add;
21695 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21696 outer_code, opno, speed);
21697 *total += rtx_cost (XEXP (x, 1), mode,
21698 outer_code, opno, speed);
21699 return true;
21700 }
21701
21702 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21703 *total = cost->addss;
21704 else if (X87_FLOAT_MODE_P (mode))
21705 *total = cost->fadd;
21706 else if (FLOAT_MODE_P (mode))
21707 *total = ix86_vec_cost (mode, cost: cost->addss);
21708 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21709 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21710 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21711 *total = cost->add * 2;
21712 else
21713 *total = cost->add;
21714 return false;
21715
21716 case IOR:
21717 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21718 || SSE_FLOAT_MODE_P (mode))
21719 {
21720 /* (ior (not ...) ...) can be a single insn in AVX512. */
21721 if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
21722 && ((TARGET_EVEX512
21723 && GET_MODE_SIZE (mode) == 64)
21724 || (TARGET_AVX512VL
21725 && (GET_MODE_SIZE (mode) == 32
21726 || GET_MODE_SIZE (mode) == 16))))
21727 {
21728 rtx right = GET_CODE (XEXP (x, 1)) != NOT
21729 ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
21730
21731 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21732 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21733 outer_code, opno, speed)
21734 + rtx_cost (right, mode, outer_code, opno, speed);
21735 return true;
21736 }
21737 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21738 }
21739 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21740 *total = cost->add * 2;
21741 else
21742 *total = cost->add;
21743 return false;
21744
21745 case XOR:
21746 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21747 || SSE_FLOAT_MODE_P (mode))
21748 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21749 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21750 *total = cost->add * 2;
21751 else
21752 *total = cost->add;
21753 return false;
21754
21755 case AND:
21756 if (address_no_seg_operand (x, mode))
21757 {
21758 *total = cost->lea;
21759 return true;
21760 }
21761 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21762 || SSE_FLOAT_MODE_P (mode))
21763 {
21764 /* pandn is a single instruction. */
21765 if (GET_CODE (XEXP (x, 0)) == NOT)
21766 {
21767 rtx right = XEXP (x, 1);
21768
21769 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
21770 if (GET_CODE (right) == NOT && TARGET_AVX512F
21771 && ((TARGET_EVEX512
21772 && GET_MODE_SIZE (mode) == 64)
21773 || (TARGET_AVX512VL
21774 && (GET_MODE_SIZE (mode) == 32
21775 || GET_MODE_SIZE (mode) == 16))))
21776 right = XEXP (right, 0);
21777
21778 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21779 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21780 outer_code, opno, speed)
21781 + rtx_cost (right, mode, outer_code, opno, speed);
21782 return true;
21783 }
21784 else if (GET_CODE (XEXP (x, 1)) == NOT)
21785 {
21786 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21787 + rtx_cost (XEXP (x, 0), mode,
21788 outer_code, opno, speed)
21789 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21790 outer_code, opno, speed);
21791 return true;
21792 }
21793 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21794 }
21795 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21796 {
21797 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21798 {
21799 *total = cost->add * 2
21800 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21801 outer_code, opno, speed)
21802 + rtx_cost (XEXP (x, 1), mode,
21803 outer_code, opno, speed);
21804 return true;
21805 }
21806 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
21807 {
21808 *total = cost->add * 2
21809 + rtx_cost (XEXP (x, 0), mode,
21810 outer_code, opno, speed)
21811 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21812 outer_code, opno, speed);
21813 return true;
21814 }
21815 *total = cost->add * 2;
21816 }
21817 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21818 {
21819 *total = cost->add
21820 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21821 outer_code, opno, speed)
21822 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
21823 return true;
21824 }
21825 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
21826 {
21827 *total = cost->add
21828 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
21829 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21830 outer_code, opno, speed);
21831 return true;
21832 }
21833 else
21834 *total = cost->add;
21835 return false;
21836
21837 case NOT:
21838 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21839 {
21840 /* (not (xor ...)) can be a single insn in AVX512. */
21841 if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
21842 && ((TARGET_EVEX512
21843 && GET_MODE_SIZE (mode) == 64)
21844 || (TARGET_AVX512VL
21845 && (GET_MODE_SIZE (mode) == 32
21846 || GET_MODE_SIZE (mode) == 16))))
21847 {
21848 *total = ix86_vec_cost (mode, cost: cost->sse_op)
21849 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21850 outer_code, opno, speed)
21851 + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21852 outer_code, opno, speed);
21853 return true;
21854 }
21855
21856 // vnot is pxor -1.
21857 *total = ix86_vec_cost (mode, cost: cost->sse_op) + 1;
21858 }
21859 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21860 *total = cost->add * 2;
21861 else
21862 *total = cost->add;
21863 return false;
21864
21865 case NEG:
21866 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21867 *total = cost->sse_op;
21868 else if (X87_FLOAT_MODE_P (mode))
21869 *total = cost->fchs;
21870 else if (FLOAT_MODE_P (mode))
21871 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21872 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21873 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21874 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21875 *total = cost->add * 3;
21876 else
21877 *total = cost->add;
21878 return false;
21879
21880 case COMPARE:
21881 rtx op0, op1;
21882 op0 = XEXP (x, 0);
21883 op1 = XEXP (x, 1);
21884 if (GET_CODE (op0) == ZERO_EXTRACT
21885 && XEXP (op0, 1) == const1_rtx
21886 && CONST_INT_P (XEXP (op0, 2))
21887 && op1 == const0_rtx)
21888 {
21889 /* This kind of construct is implemented using test[bwl].
21890 Treat it as if we had an AND. */
21891 mode = GET_MODE (XEXP (op0, 0));
21892 *total = (cost->add
21893 + rtx_cost (XEXP (op0, 0), mode, outer_code,
21894 opno, speed)
21895 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
21896 return true;
21897 }
21898
21899 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
21900 {
21901 /* This is an overflow detection, count it as a normal compare. */
21902 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
21903 return true;
21904 }
21905
21906 rtx geu;
21907 /* Match x
21908 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
21909 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
21910 if (mode == CCCmode
21911 && GET_CODE (op0) == NEG
21912 && GET_CODE (geu = XEXP (op0, 0)) == GEU
21913 && REG_P (XEXP (geu, 0))
21914 && (GET_MODE (XEXP (geu, 0)) == CCCmode
21915 || GET_MODE (XEXP (geu, 0)) == CCmode)
21916 && REGNO (XEXP (geu, 0)) == FLAGS_REG
21917 && XEXP (geu, 1) == const0_rtx
21918 && GET_CODE (op1) == LTU
21919 && REG_P (XEXP (op1, 0))
21920 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
21921 && REGNO (XEXP (op1, 0)) == FLAGS_REG
21922 && XEXP (op1, 1) == const0_rtx)
21923 {
21924 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
21925 *total = 0;
21926 return true;
21927 }
21928 /* Match x
21929 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
21930 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
21931 if (mode == CCCmode
21932 && GET_CODE (op0) == NEG
21933 && GET_CODE (XEXP (op0, 0)) == LTU
21934 && REG_P (XEXP (XEXP (op0, 0), 0))
21935 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
21936 && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
21937 && XEXP (XEXP (op0, 0), 1) == const0_rtx
21938 && GET_CODE (op1) == GEU
21939 && REG_P (XEXP (op1, 0))
21940 && GET_MODE (XEXP (op1, 0)) == CCCmode
21941 && REGNO (XEXP (op1, 0)) == FLAGS_REG
21942 && XEXP (op1, 1) == const0_rtx)
21943 {
21944 /* This is *x86_cmc. */
21945 if (!speed)
21946 *total = COSTS_N_BYTES (1);
21947 else if (TARGET_SLOW_STC)
21948 *total = COSTS_N_INSNS (2);
21949 else
21950 *total = COSTS_N_INSNS (1);
21951 return true;
21952 }
21953
21954 if (SCALAR_INT_MODE_P (GET_MODE (op0))
21955 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
21956 {
21957 if (op1 == const0_rtx)
21958 *total = cost->add
21959 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
21960 else
21961 *total = 3*cost->add
21962 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
21963 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
21964 return true;
21965 }
21966
21967 /* The embedded comparison operand is completely free. */
21968 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
21969 *total = 0;
21970
21971 return false;
21972
21973 case FLOAT_EXTEND:
21974 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21975 *total = 0;
21976 else
21977 *total = ix86_vec_cost (mode, cost: cost->addss);
21978 return false;
21979
21980 case FLOAT_TRUNCATE:
21981 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21982 *total = cost->fadd;
21983 else
21984 *total = ix86_vec_cost (mode, cost: cost->addss);
21985 return false;
21986
21987 case ABS:
21988 /* SSE requires memory load for the constant operand. It may make
21989 sense to account for this. Of course the constant operand may or
21990 may not be reused. */
21991 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21992 *total = cost->sse_op;
21993 else if (X87_FLOAT_MODE_P (mode))
21994 *total = cost->fabs;
21995 else if (FLOAT_MODE_P (mode))
21996 *total = ix86_vec_cost (mode, cost: cost->sse_op);
21997 return false;
21998
21999 case SQRT:
22000 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22001 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
22002 else if (X87_FLOAT_MODE_P (mode))
22003 *total = cost->fsqrt;
22004 else if (FLOAT_MODE_P (mode))
22005 *total = ix86_vec_cost (mode,
22006 cost: mode == SFmode ? cost->sqrtss : cost->sqrtsd);
22007 return false;
22008
22009 case UNSPEC:
22010 if (XINT (x, 1) == UNSPEC_TP)
22011 *total = 0;
22012 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
22013 {
22014 *total = cost->sse_op;
22015 return true;
22016 }
22017 else if (XINT (x, 1) == UNSPEC_PTEST)
22018 {
22019 *total = cost->sse_op;
22020 rtx test_op0 = XVECEXP (x, 0, 0);
22021 if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
22022 return false;
22023 if (GET_CODE (test_op0) == AND)
22024 {
22025 rtx and_op0 = XEXP (test_op0, 0);
22026 if (GET_CODE (and_op0) == NOT)
22027 and_op0 = XEXP (and_op0, 0);
22028 *total += rtx_cost (and_op0, GET_MODE (and_op0),
22029 AND, 0, speed)
22030 + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
22031 AND, 1, speed);
22032 }
22033 else
22034 *total = rtx_cost (test_op0, GET_MODE (test_op0),
22035 UNSPEC, 0, speed);
22036 return true;
22037 }
22038 return false;
22039
22040 case VEC_SELECT:
22041 case VEC_CONCAT:
22042 case VEC_DUPLICATE:
22043 /* ??? Assume all of these vector manipulation patterns are
22044 recognizable. In which case they all pretty much have the
22045 same cost. */
22046 *total = cost->sse_op;
22047 return true;
22048 case VEC_MERGE:
22049 mask = XEXP (x, 2);
22050 /* This is masked instruction, assume the same cost,
22051 as nonmasked variant. */
22052 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
22053 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
22054 else
22055 *total = cost->sse_op;
22056 return true;
22057
22058 case MEM:
22059 /* An insn that accesses memory is slightly more expensive
22060 than one that does not. */
22061 if (speed)
22062 *total += 1;
22063 return false;
22064
22065 case ZERO_EXTRACT:
22066 if (XEXP (x, 1) == const1_rtx
22067 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
22068 && GET_MODE (XEXP (x, 2)) == SImode
22069 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
22070 {
22071 /* Ignore cost of zero extension and masking of last argument. */
22072 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22073 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22074 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
22075 return true;
22076 }
22077 return false;
22078
22079 case IF_THEN_ELSE:
22080 if (TARGET_XOP
22081 && VECTOR_MODE_P (mode)
22082 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
22083 {
22084 /* vpcmov. */
22085 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
22086 if (!REG_P (XEXP (x, 0)))
22087 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22088 if (!REG_P (XEXP (x, 1)))
22089 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22090 if (!REG_P (XEXP (x, 2)))
22091 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22092 return true;
22093 }
22094 else if (TARGET_CMOVE
22095 && SCALAR_INT_MODE_P (mode)
22096 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22097 {
22098 /* cmov. */
22099 *total = COSTS_N_INSNS (1);
22100 if (!REG_P (XEXP (x, 0)))
22101 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22102 if (!REG_P (XEXP (x, 1)))
22103 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22104 if (!REG_P (XEXP (x, 2)))
22105 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22106 return true;
22107 }
22108 return false;
22109
22110 default:
22111 return false;
22112 }
22113}
22114
22115#if TARGET_MACHO
22116
22117static int current_machopic_label_num;
22118
22119/* Given a symbol name and its associated stub, write out the
22120 definition of the stub. */
22121
22122void
22123machopic_output_stub (FILE *file, const char *symb, const char *stub)
22124{
22125 unsigned int length;
22126 char *binder_name, *symbol_name, lazy_ptr_name[32];
22127 int label = ++current_machopic_label_num;
22128
22129 /* For 64-bit we shouldn't get here. */
22130 gcc_assert (!TARGET_64BIT);
22131
22132 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22133 symb = targetm.strip_name_encoding (symb);
22134
22135 length = strlen (stub);
22136 binder_name = XALLOCAVEC (char, length + 32);
22137 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22138
22139 length = strlen (symb);
22140 symbol_name = XALLOCAVEC (char, length + 32);
22141 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22142
22143 sprintf (lazy_ptr_name, "L%d$lz", label);
22144
22145 if (MACHOPIC_ATT_STUB)
22146 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
22147 else if (MACHOPIC_PURE)
22148 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
22149 else
22150 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22151
22152 fprintf (file, "%s:\n", stub);
22153 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22154
22155 if (MACHOPIC_ATT_STUB)
22156 {
22157 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
22158 }
22159 else if (MACHOPIC_PURE)
22160 {
22161 /* PIC stub. */
22162 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22163 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
22164 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
22165 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
22166 label, lazy_ptr_name, label);
22167 fprintf (file, "\tjmp\t*%%ecx\n");
22168 }
22169 else
22170 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22171
22172 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
22173 it needs no stub-binding-helper. */
22174 if (MACHOPIC_ATT_STUB)
22175 return;
22176
22177 fprintf (file, "%s:\n", binder_name);
22178
22179 if (MACHOPIC_PURE)
22180 {
22181 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
22182 fprintf (file, "\tpushl\t%%ecx\n");
22183 }
22184 else
22185 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22186
22187 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
22188
22189 /* N.B. Keep the correspondence of these
22190 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
22191 old-pic/new-pic/non-pic stubs; altering this will break
22192 compatibility with existing dylibs. */
22193 if (MACHOPIC_PURE)
22194 {
22195 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22196 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
22197 }
22198 else
22199 /* 16-byte -mdynamic-no-pic stub. */
22200 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
22201
22202 fprintf (file, "%s:\n", lazy_ptr_name);
22203 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22204 fprintf (file, ASM_LONG "%s\n", binder_name);
22205}
22206#endif /* TARGET_MACHO */
22207
22208/* Order the registers for register allocator. */
22209
22210void
22211x86_order_regs_for_local_alloc (void)
22212{
22213 int pos = 0;
22214 int i;
22215
22216 /* First allocate the local general purpose registers. */
22217 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22218 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (regno: i))
22219 reg_alloc_order [pos++] = i;
22220
22221 /* Global general purpose registers. */
22222 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22223 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (regno: i))
22224 reg_alloc_order [pos++] = i;
22225
22226 /* x87 registers come first in case we are doing FP math
22227 using them. */
22228 if (!TARGET_SSE_MATH)
22229 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22230 reg_alloc_order [pos++] = i;
22231
22232 /* SSE registers. */
22233 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22234 reg_alloc_order [pos++] = i;
22235 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22236 reg_alloc_order [pos++] = i;
22237
22238 /* Extended REX SSE registers. */
22239 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
22240 reg_alloc_order [pos++] = i;
22241
22242 /* Mask register. */
22243 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
22244 reg_alloc_order [pos++] = i;
22245
22246 /* x87 registers. */
22247 if (TARGET_SSE_MATH)
22248 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22249 reg_alloc_order [pos++] = i;
22250
22251 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22252 reg_alloc_order [pos++] = i;
22253
22254 /* Initialize the rest of array as we do not allocate some registers
22255 at all. */
22256 while (pos < FIRST_PSEUDO_REGISTER)
22257 reg_alloc_order [pos++] = 0;
22258}
22259
22260static bool
22261ix86_ms_bitfield_layout_p (const_tree record_type)
22262{
22263 return ((TARGET_MS_BITFIELD_LAYOUT
22264 && !lookup_attribute (attr_name: "gcc_struct", TYPE_ATTRIBUTES (record_type)))
22265 || lookup_attribute (attr_name: "ms_struct", TYPE_ATTRIBUTES (record_type)));
22266}
22267
22268/* Returns an expression indicating where the this parameter is
22269 located on entry to the FUNCTION. */
22270
22271static rtx
22272x86_this_parameter (tree function)
22273{
22274 tree type = TREE_TYPE (function);
22275 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22276 int nregs;
22277
22278 if (TARGET_64BIT)
22279 {
22280 const int *parm_regs;
22281
22282 if (ix86_function_type_abi (fntype: type) == MS_ABI)
22283 parm_regs = x86_64_ms_abi_int_parameter_registers;
22284 else
22285 parm_regs = x86_64_int_parameter_registers;
22286 return gen_rtx_REG (Pmode, parm_regs[aggr]);
22287 }
22288
22289 nregs = ix86_function_regparm (type, decl: function);
22290
22291 if (nregs > 0 && !stdarg_p (type))
22292 {
22293 int regno;
22294 unsigned int ccvt = ix86_get_callcvt (type);
22295
22296 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22297 regno = aggr ? DX_REG : CX_REG;
22298 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22299 {
22300 regno = CX_REG;
22301 if (aggr)
22302 return gen_rtx_MEM (SImode,
22303 plus_constant (Pmode, stack_pointer_rtx, 4));
22304 }
22305 else
22306 {
22307 regno = AX_REG;
22308 if (aggr)
22309 {
22310 regno = DX_REG;
22311 if (nregs == 1)
22312 return gen_rtx_MEM (SImode,
22313 plus_constant (Pmode,
22314 stack_pointer_rtx, 4));
22315 }
22316 }
22317 return gen_rtx_REG (SImode, regno);
22318 }
22319
22320 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
22321 aggr ? 8 : 4));
22322}
22323
22324/* Determine whether x86_output_mi_thunk can succeed. */
22325
22326static bool
22327x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
22328 const_tree function)
22329{
22330 /* 64-bit can handle anything. */
22331 if (TARGET_64BIT)
22332 return true;
22333
22334 /* For 32-bit, everything's fine if we have one free register. */
22335 if (ix86_function_regparm (TREE_TYPE (function), decl: function) < 3)
22336 return true;
22337
22338 /* Need a free register for vcall_offset. */
22339 if (vcall_offset)
22340 return false;
22341
22342 /* Need a free register for GOT references. */
22343 if (flag_pic && !targetm.binds_local_p (function))
22344 return false;
22345
22346 /* Otherwise ok. */
22347 return true;
22348}
22349
22350/* Output the assembler code for a thunk function. THUNK_DECL is the
22351 declaration for the thunk function itself, FUNCTION is the decl for
22352 the target function. DELTA is an immediate constant offset to be
22353 added to THIS. If VCALL_OFFSET is nonzero, the word at
22354 *(*this + vcall_offset) should be added to THIS. */
22355
22356static void
22357x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
22358 HOST_WIDE_INT vcall_offset, tree function)
22359{
22360 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
22361 rtx this_param = x86_this_parameter (function);
22362 rtx this_reg, tmp, fnaddr;
22363 unsigned int tmp_regno;
22364 rtx_insn *insn;
22365 int saved_flag_force_indirect_call = flag_force_indirect_call;
22366
22367 if (TARGET_64BIT)
22368 tmp_regno = R10_REG;
22369 else
22370 {
22371 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
22372 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22373 tmp_regno = AX_REG;
22374 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22375 tmp_regno = DX_REG;
22376 else
22377 tmp_regno = CX_REG;
22378
22379 if (flag_pic)
22380 flag_force_indirect_call = 0;
22381 }
22382
22383 emit_note (NOTE_INSN_PROLOGUE_END);
22384
22385 /* CET is enabled, insert EB instruction. */
22386 if ((flag_cf_protection & CF_BRANCH))
22387 emit_insn (gen_nop_endbr ());
22388
22389 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22390 pull it in now and let DELTA benefit. */
22391 if (REG_P (this_param))
22392 this_reg = this_param;
22393 else if (vcall_offset)
22394 {
22395 /* Put the this parameter into %eax. */
22396 this_reg = gen_rtx_REG (Pmode, AX_REG);
22397 emit_move_insn (this_reg, this_param);
22398 }
22399 else
22400 this_reg = NULL_RTX;
22401
22402 /* Adjust the this parameter by a fixed constant. */
22403 if (delta)
22404 {
22405 rtx delta_rtx = GEN_INT (delta);
22406 rtx delta_dst = this_reg ? this_reg : this_param;
22407
22408 if (TARGET_64BIT)
22409 {
22410 if (!x86_64_general_operand (delta_rtx, Pmode))
22411 {
22412 tmp = gen_rtx_REG (Pmode, tmp_regno);
22413 emit_move_insn (tmp, delta_rtx);
22414 delta_rtx = tmp;
22415 }
22416 }
22417
22418 ix86_emit_binop (code: PLUS, Pmode, dst: delta_dst, src: delta_rtx);
22419 }
22420
22421 /* Adjust the this parameter by a value stored in the vtable. */
22422 if (vcall_offset)
22423 {
22424 rtx vcall_addr, vcall_mem, this_mem;
22425
22426 tmp = gen_rtx_REG (Pmode, tmp_regno);
22427
22428 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
22429 if (Pmode != ptr_mode)
22430 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
22431 emit_move_insn (tmp, this_mem);
22432
22433 /* Adjust the this parameter. */
22434 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
22435 if (TARGET_64BIT
22436 && !ix86_legitimate_address_p (ptr_mode, addr: vcall_addr, strict: true))
22437 {
22438 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
22439 emit_move_insn (tmp2, GEN_INT (vcall_offset));
22440 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
22441 }
22442
22443 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
22444 if (Pmode != ptr_mode)
22445 emit_insn (gen_addsi_1_zext (this_reg,
22446 gen_rtx_REG (ptr_mode,
22447 REGNO (this_reg)),
22448 vcall_mem));
22449 else
22450 ix86_emit_binop (code: PLUS, Pmode, dst: this_reg, src: vcall_mem);
22451 }
22452
22453 /* If necessary, drop THIS back to its stack slot. */
22454 if (this_reg && this_reg != this_param)
22455 emit_move_insn (this_param, this_reg);
22456
22457 fnaddr = XEXP (DECL_RTL (function), 0);
22458 if (TARGET_64BIT)
22459 {
22460 if (!flag_pic || targetm.binds_local_p (function)
22461 || TARGET_PECOFF)
22462 ;
22463 else
22464 {
22465 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
22466 tmp = gen_rtx_CONST (Pmode, tmp);
22467 fnaddr = gen_const_mem (Pmode, tmp);
22468 }
22469 }
22470 else
22471 {
22472 if (!flag_pic || targetm.binds_local_p (function))
22473 ;
22474#if TARGET_MACHO
22475 else if (TARGET_MACHO)
22476 {
22477 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
22478 fnaddr = XEXP (fnaddr, 0);
22479 }
22480#endif /* TARGET_MACHO */
22481 else
22482 {
22483 tmp = gen_rtx_REG (Pmode, CX_REG);
22484 output_set_got (dest: tmp, NULL_RTX);
22485
22486 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
22487 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
22488 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
22489 fnaddr = gen_const_mem (Pmode, fnaddr);
22490 }
22491 }
22492
22493 /* Our sibling call patterns do not allow memories, because we have no
22494 predicate that can distinguish between frame and non-frame memory.
22495 For our purposes here, we can get away with (ab)using a jump pattern,
22496 because we're going to do no optimization. */
22497 if (MEM_P (fnaddr))
22498 {
22499 if (sibcall_insn_operand (fnaddr, word_mode))
22500 {
22501 fnaddr = XEXP (DECL_RTL (function), 0);
22502 tmp = gen_rtx_MEM (QImode, fnaddr);
22503 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22504 tmp = emit_call_insn (tmp);
22505 SIBLING_CALL_P (tmp) = 1;
22506 }
22507 else
22508 emit_jump_insn (gen_indirect_jump (fnaddr));
22509 }
22510 else
22511 {
22512 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
22513 {
22514 // CM_LARGE_PIC always uses pseudo PIC register which is
22515 // uninitialized. Since FUNCTION is local and calling it
22516 // doesn't go through PLT, we use scratch register %r11 as
22517 // PIC register and initialize it here.
22518 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
22519 ix86_init_large_pic_reg (tmp_regno);
22520 fnaddr = legitimize_pic_address (orig: fnaddr,
22521 reg: gen_rtx_REG (Pmode, tmp_regno));
22522 }
22523
22524 if (!sibcall_insn_operand (fnaddr, word_mode))
22525 {
22526 tmp = gen_rtx_REG (word_mode, tmp_regno);
22527 if (GET_MODE (fnaddr) != word_mode)
22528 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
22529 emit_move_insn (tmp, fnaddr);
22530 fnaddr = tmp;
22531 }
22532
22533 tmp = gen_rtx_MEM (QImode, fnaddr);
22534 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22535 tmp = emit_call_insn (tmp);
22536 SIBLING_CALL_P (tmp) = 1;
22537 }
22538 emit_barrier ();
22539
22540 /* Emit just enough of rest_of_compilation to get the insns emitted. */
22541 insn = get_insns ();
22542 shorten_branches (insn);
22543 assemble_start_function (thunk_fndecl, fnname);
22544 final_start_function (insn, file, 1);
22545 final (insn, file, 1);
22546 final_end_function ();
22547 assemble_end_function (thunk_fndecl, fnname);
22548
22549 flag_force_indirect_call = saved_flag_force_indirect_call;
22550}
22551
22552static void
22553x86_file_start (void)
22554{
22555 default_file_start ();
22556 if (TARGET_16BIT)
22557 fputs (s: "\t.code16gcc\n", stream: asm_out_file);
22558#if TARGET_MACHO
22559 darwin_file_start ();
22560#endif
22561 if (X86_FILE_START_VERSION_DIRECTIVE)
22562 fputs (s: "\t.version\t\"01.01\"\n", stream: asm_out_file);
22563 if (X86_FILE_START_FLTUSED)
22564 fputs (s: "\t.global\t__fltused\n", stream: asm_out_file);
22565 if (ix86_asm_dialect == ASM_INTEL)
22566 fputs (s: "\t.intel_syntax noprefix\n", stream: asm_out_file);
22567}
22568
22569int
22570x86_field_alignment (tree type, int computed)
22571{
22572 machine_mode mode;
22573
22574 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22575 return computed;
22576 if (TARGET_IAMCU)
22577 return iamcu_alignment (type, align: computed);
22578 type = strip_array_types (type);
22579 mode = TYPE_MODE (type);
22580 if (mode == DFmode || mode == DCmode
22581 || GET_MODE_CLASS (mode) == MODE_INT
22582 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22583 {
22584 if (TYPE_ATOMIC (type) && computed > 32)
22585 {
22586 static bool warned;
22587
22588 if (!warned && warn_psabi)
22589 {
22590 const char *url
22591 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
22592
22593 warned = true;
22594 inform (input_location, "the alignment of %<_Atomic %T%> "
22595 "fields changed in %{GCC 11.1%}",
22596 TYPE_MAIN_VARIANT (type), url);
22597 }
22598 }
22599 else
22600 return MIN (32, computed);
22601 }
22602 return computed;
22603}
22604
22605/* Print call to TARGET to FILE. */
22606
22607static void
22608x86_print_call_or_nop (FILE *file, const char *target)
22609{
22610 if (flag_nop_mcount || !strcmp (s1: target, s2: "nop"))
22611 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
22612 fprintf (stream: file, format: "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
22613 else
22614 fprintf (stream: file, format: "1:\tcall\t%s\n", target);
22615}
22616
22617static bool
22618current_fentry_name (const char **name)
22619{
22620 tree attr = lookup_attribute (attr_name: "fentry_name",
22621 DECL_ATTRIBUTES (current_function_decl));
22622 if (!attr)
22623 return false;
22624 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22625 return true;
22626}
22627
22628static bool
22629current_fentry_section (const char **name)
22630{
22631 tree attr = lookup_attribute (attr_name: "fentry_section",
22632 DECL_ATTRIBUTES (current_function_decl));
22633 if (!attr)
22634 return false;
22635 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22636 return true;
22637}
22638
22639/* Output assembler code to FILE to increment profiler label # LABELNO
22640 for profiling a function entry. */
22641void
22642x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22643{
22644 if (cfun->machine->insn_queued_at_entrance)
22645 {
22646 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
22647 fprintf (stream: file, format: "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
22648 unsigned int patch_area_size
22649 = crtl->patch_area_size - crtl->patch_area_entry;
22650 if (patch_area_size)
22651 ix86_output_patchable_area (patch_area_size,
22652 crtl->patch_area_entry == 0);
22653 }
22654
22655 const char *mcount_name = MCOUNT_NAME;
22656
22657 if (current_fentry_name (name: &mcount_name))
22658 ;
22659 else if (fentry_name)
22660 mcount_name = fentry_name;
22661 else if (flag_fentry)
22662 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
22663
22664 if (TARGET_64BIT)
22665 {
22666#ifndef NO_PROFILE_COUNTERS
22667 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
22668#endif
22669
22670 if (!TARGET_PECOFF)
22671 {
22672 switch (ix86_cmodel)
22673 {
22674 case CM_LARGE:
22675 /* NB: R10 is caller-saved. Although it can be used as a
22676 static chain register, it is preserved when calling
22677 mcount for nested functions. */
22678 fprintf (stream: file, format: "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
22679 mcount_name);
22680 break;
22681 case CM_LARGE_PIC:
22682#ifdef NO_PROFILE_COUNTERS
22683 fprintf (stream: file, format: "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
22684 fprintf (stream: file, format: "\tleaq\t1b(%%rip), %%r10\n");
22685 fprintf (stream: file, format: "\taddq\t%%r11, %%r10\n");
22686 fprintf (stream: file, format: "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
22687 fprintf (stream: file, format: "\taddq\t%%r11, %%r10\n");
22688 fprintf (stream: file, format: "\tcall\t*%%r10\n");
22689#else
22690 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
22691#endif
22692 break;
22693 case CM_SMALL_PIC:
22694 case CM_MEDIUM_PIC:
22695 if (!ix86_direct_extern_access)
22696 {
22697 fprintf (stream: file, format: "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
22698 break;
22699 }
22700 /* fall through */
22701 default:
22702 x86_print_call_or_nop (file, target: mcount_name);
22703 break;
22704 }
22705 }
22706 else
22707 x86_print_call_or_nop (file, target: mcount_name);
22708 }
22709 else if (flag_pic)
22710 {
22711#ifndef NO_PROFILE_COUNTERS
22712 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
22713 LPREFIX, labelno);
22714#endif
22715 fprintf (stream: file, format: "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
22716 }
22717 else
22718 {
22719#ifndef NO_PROFILE_COUNTERS
22720 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
22721 LPREFIX, labelno);
22722#endif
22723 x86_print_call_or_nop (file, target: mcount_name);
22724 }
22725
22726 if (flag_record_mcount
22727 || lookup_attribute (attr_name: "fentry_section",
22728 DECL_ATTRIBUTES (current_function_decl)))
22729 {
22730 const char *sname = "__mcount_loc";
22731
22732 if (current_fentry_section (name: &sname))
22733 ;
22734 else if (fentry_section)
22735 sname = fentry_section;
22736
22737 fprintf (stream: file, format: "\t.section %s, \"a\",@progbits\n", sname);
22738 fprintf (stream: file, format: "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
22739 fprintf (stream: file, format: "\t.previous\n");
22740 }
22741}
22742
22743/* We don't have exact information about the insn sizes, but we may assume
22744 quite safely that we are informed about all 1 byte insns and memory
22745 address sizes. This is enough to eliminate unnecessary padding in
22746 99% of cases. */
22747
22748int
22749ix86_min_insn_size (rtx_insn *insn)
22750{
22751 int l = 0, len;
22752
22753 if (!INSN_P (insn) || !active_insn_p (insn))
22754 return 0;
22755
22756 /* Discard alignments we've emit and jump instructions. */
22757 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22758 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22759 return 0;
22760
22761 /* Important case - calls are always 5 bytes.
22762 It is common to have many calls in the row. */
22763 if (CALL_P (insn)
22764 && symbolic_reference_mentioned_p (op: PATTERN (insn))
22765 && !SIBLING_CALL_P (insn))
22766 return 5;
22767 len = get_attr_length (insn);
22768 if (len <= 1)
22769 return 1;
22770
22771 /* For normal instructions we rely on get_attr_length being exact,
22772 with a few exceptions. */
22773 if (!JUMP_P (insn))
22774 {
22775 enum attr_type type = get_attr_type (insn);
22776
22777 switch (type)
22778 {
22779 case TYPE_MULTI:
22780 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
22781 || asm_noperands (PATTERN (insn)) >= 0)
22782 return 0;
22783 break;
22784 case TYPE_OTHER:
22785 case TYPE_FCMP:
22786 break;
22787 default:
22788 /* Otherwise trust get_attr_length. */
22789 return len;
22790 }
22791
22792 l = get_attr_length_address (insn);
22793 if (l < 4 && symbolic_reference_mentioned_p (op: PATTERN (insn)))
22794 l = 4;
22795 }
22796 if (l)
22797 return 1+l;
22798 else
22799 return 2;
22800}
22801
22802#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
22803
22804/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22805 window. */
22806
22807static void
22808ix86_avoid_jump_mispredicts (void)
22809{
22810 rtx_insn *insn, *start = get_insns ();
22811 int nbytes = 0, njumps = 0;
22812 bool isjump = false;
22813
22814 /* Look for all minimal intervals of instructions containing 4 jumps.
22815 The intervals are bounded by START and INSN. NBYTES is the total
22816 size of instructions in the interval including INSN and not including
22817 START. When the NBYTES is smaller than 16 bytes, it is possible
22818 that the end of START and INSN ends up in the same 16byte page.
22819
22820 The smallest offset in the page INSN can start is the case where START
22821 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22822 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
22823
22824 Don't consider asm goto as jump, while it can contain a jump, it doesn't
22825 have to, control transfer to label(s) can be performed through other
22826 means, and also we estimate minimum length of all asm stmts as 0. */
22827 for (insn = start; insn; insn = NEXT_INSN (insn))
22828 {
22829 int min_size;
22830
22831 if (LABEL_P (insn))
22832 {
22833 align_flags alignment = label_to_alignment (insn);
22834 int align = alignment.levels[0].log;
22835 int max_skip = alignment.levels[0].maxskip;
22836
22837 if (max_skip > 15)
22838 max_skip = 15;
22839 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
22840 already in the current 16 byte page, because otherwise
22841 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
22842 bytes to reach 16 byte boundary. */
22843 if (align <= 0
22844 || (align <= 3 && max_skip != (1 << align) - 1))
22845 max_skip = 0;
22846 if (dump_file)
22847 fprintf (stream: dump_file, format: "Label %i with max_skip %i\n",
22848 INSN_UID (insn), max_skip);
22849 if (max_skip)
22850 {
22851 while (nbytes + max_skip >= 16)
22852 {
22853 start = NEXT_INSN (insn: start);
22854 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
22855 || CALL_P (start))
22856 njumps--, isjump = true;
22857 else
22858 isjump = false;
22859 nbytes -= ix86_min_insn_size (insn: start);
22860 }
22861 }
22862 continue;
22863 }
22864
22865 min_size = ix86_min_insn_size (insn);
22866 nbytes += min_size;
22867 if (dump_file)
22868 fprintf (stream: dump_file, format: "Insn %i estimated to %i bytes\n",
22869 INSN_UID (insn), min_size);
22870 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
22871 || CALL_P (insn))
22872 njumps++;
22873 else
22874 continue;
22875
22876 while (njumps > 3)
22877 {
22878 start = NEXT_INSN (insn: start);
22879 if ((JUMP_P (start) && asm_noperands (PATTERN (insn: start)) < 0)
22880 || CALL_P (start))
22881 njumps--, isjump = true;
22882 else
22883 isjump = false;
22884 nbytes -= ix86_min_insn_size (insn: start);
22885 }
22886 gcc_assert (njumps >= 0);
22887 if (dump_file)
22888 fprintf (stream: dump_file, format: "Interval %i to %i has %i bytes\n",
22889 INSN_UID (insn: start), INSN_UID (insn), nbytes);
22890
22891 if (njumps == 3 && isjump && nbytes < 16)
22892 {
22893 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
22894
22895 if (dump_file)
22896 fprintf (stream: dump_file, format: "Padding insn %i by %i bytes!\n",
22897 INSN_UID (insn), padsize);
22898 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
22899 }
22900 }
22901}
22902#endif
22903
22904/* AMD Athlon works faster
22905 when RET is not destination of conditional jump or directly preceded
22906 by other jump instruction. We avoid the penalty by inserting NOP just
22907 before the RET instructions in such cases. */
22908static void
22909ix86_pad_returns (void)
22910{
22911 edge e;
22912 edge_iterator ei;
22913
22914 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22915 {
22916 basic_block bb = e->src;
22917 rtx_insn *ret = BB_END (bb);
22918 rtx_insn *prev;
22919 bool replace = false;
22920
22921 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
22922 || optimize_bb_for_size_p (bb))
22923 continue;
22924 for (prev = PREV_INSN (insn: ret); prev; prev = PREV_INSN (insn: prev))
22925 if (active_insn_p (prev) || LABEL_P (prev))
22926 break;
22927 if (prev && LABEL_P (prev))
22928 {
22929 edge e;
22930 edge_iterator ei;
22931
22932 FOR_EACH_EDGE (e, ei, bb->preds)
22933 if (EDGE_FREQUENCY (e) && e->src->index >= 0
22934 && !(e->flags & EDGE_FALLTHRU))
22935 {
22936 replace = true;
22937 break;
22938 }
22939 }
22940 if (!replace)
22941 {
22942 prev = prev_active_insn (ret);
22943 if (prev
22944 && ((JUMP_P (prev) && any_condjump_p (prev))
22945 || CALL_P (prev)))
22946 replace = true;
22947 /* Empty functions get branch mispredict even when
22948 the jump destination is not visible to us. */
22949 if (!prev && !optimize_function_for_size_p (cfun))
22950 replace = true;
22951 }
22952 if (replace)
22953 {
22954 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
22955 delete_insn (ret);
22956 }
22957 }
22958}
22959
22960/* Count the minimum number of instructions in BB. Return 4 if the
22961 number of instructions >= 4. */
22962
22963static int
22964ix86_count_insn_bb (basic_block bb)
22965{
22966 rtx_insn *insn;
22967 int insn_count = 0;
22968
22969 /* Count number of instructions in this block. Return 4 if the number
22970 of instructions >= 4. */
22971 FOR_BB_INSNS (bb, insn)
22972 {
22973 /* Only happen in exit blocks. */
22974 if (JUMP_P (insn)
22975 && ANY_RETURN_P (PATTERN (insn)))
22976 break;
22977
22978 if (NONDEBUG_INSN_P (insn)
22979 && GET_CODE (PATTERN (insn)) != USE
22980 && GET_CODE (PATTERN (insn)) != CLOBBER)
22981 {
22982 insn_count++;
22983 if (insn_count >= 4)
22984 return insn_count;
22985 }
22986 }
22987
22988 return insn_count;
22989}
22990
22991
22992/* Count the minimum number of instructions in code path in BB.
22993 Return 4 if the number of instructions >= 4. */
22994
22995static int
22996ix86_count_insn (basic_block bb)
22997{
22998 edge e;
22999 edge_iterator ei;
23000 int min_prev_count;
23001
23002 /* Only bother counting instructions along paths with no
23003 more than 2 basic blocks between entry and exit. Given
23004 that BB has an edge to exit, determine if a predecessor
23005 of BB has an edge from entry. If so, compute the number
23006 of instructions in the predecessor block. If there
23007 happen to be multiple such blocks, compute the minimum. */
23008 min_prev_count = 4;
23009 FOR_EACH_EDGE (e, ei, bb->preds)
23010 {
23011 edge prev_e;
23012 edge_iterator prev_ei;
23013
23014 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23015 {
23016 min_prev_count = 0;
23017 break;
23018 }
23019 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
23020 {
23021 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23022 {
23023 int count = ix86_count_insn_bb (bb: e->src);
23024 if (count < min_prev_count)
23025 min_prev_count = count;
23026 break;
23027 }
23028 }
23029 }
23030
23031 if (min_prev_count < 4)
23032 min_prev_count += ix86_count_insn_bb (bb);
23033
23034 return min_prev_count;
23035}
23036
23037/* Pad short function to 4 instructions. */
23038
23039static void
23040ix86_pad_short_function (void)
23041{
23042 edge e;
23043 edge_iterator ei;
23044
23045 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23046 {
23047 rtx_insn *ret = BB_END (e->src);
23048 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
23049 {
23050 int insn_count = ix86_count_insn (bb: e->src);
23051
23052 /* Pad short function. */
23053 if (insn_count < 4)
23054 {
23055 rtx_insn *insn = ret;
23056
23057 /* Find epilogue. */
23058 while (insn
23059 && (!NOTE_P (insn)
23060 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
23061 insn = PREV_INSN (insn);
23062
23063 if (!insn)
23064 insn = ret;
23065
23066 /* Two NOPs count as one instruction. */
23067 insn_count = 2 * (4 - insn_count);
23068 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
23069 }
23070 }
23071 }
23072}
23073
23074/* Fix up a Windows system unwinder issue. If an EH region falls through into
23075 the epilogue, the Windows system unwinder will apply epilogue logic and
23076 produce incorrect offsets. This can be avoided by adding a nop between
23077 the last insn that can throw and the first insn of the epilogue. */
23078
23079static void
23080ix86_seh_fixup_eh_fallthru (void)
23081{
23082 edge e;
23083 edge_iterator ei;
23084
23085 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23086 {
23087 rtx_insn *insn, *next;
23088
23089 /* Find the beginning of the epilogue. */
23090 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
23091 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
23092 break;
23093 if (insn == NULL)
23094 continue;
23095
23096 /* We only care about preceding insns that can throw. */
23097 insn = prev_active_insn (insn);
23098 if (insn == NULL || !can_throw_internal (insn))
23099 continue;
23100
23101 /* Do not separate calls from their debug information. */
23102 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (insn: next))
23103 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
23104 insn = next;
23105 else
23106 break;
23107
23108 emit_insn_after (gen_nops (const1_rtx), insn);
23109 }
23110}
23111/* Split vector load from parm_decl to elemental loads to avoid STLF
23112 stalls. */
23113static void
23114ix86_split_stlf_stall_load ()
23115{
23116 rtx_insn* insn, *start = get_insns ();
23117 unsigned window = 0;
23118
23119 for (insn = start; insn; insn = NEXT_INSN (insn))
23120 {
23121 if (!NONDEBUG_INSN_P (insn))
23122 continue;
23123 window++;
23124 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
23125 other, just emulate for pipeline) before stalled load, stlf stall
23126 case is as fast as no stall cases on CLX.
23127 Since CFG is freed before machine_reorg, just do a rough
23128 calculation of the window according to the layout. */
23129 if (window > (unsigned) x86_stlf_window_ninsns)
23130 return;
23131
23132 if (any_uncondjump_p (insn)
23133 || ANY_RETURN_P (PATTERN (insn))
23134 || CALL_P (insn))
23135 return;
23136
23137 rtx set = single_set (insn);
23138 if (!set)
23139 continue;
23140 rtx src = SET_SRC (set);
23141 if (!MEM_P (src)
23142 /* Only handle V2DFmode load since it doesn't need any scratch
23143 register. */
23144 || GET_MODE (src) != E_V2DFmode
23145 || !MEM_EXPR (src)
23146 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
23147 continue;
23148
23149 rtx zero = CONST0_RTX (V2DFmode);
23150 rtx dest = SET_DEST (set);
23151 rtx m = adjust_address (src, DFmode, 0);
23152 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
23153 emit_insn_before (loadlpd, insn);
23154 m = adjust_address (src, DFmode, 8);
23155 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
23156 if (dump_file && (dump_flags & TDF_DETAILS))
23157 {
23158 fputs (s: "Due to potential STLF stall, split instruction:\n",
23159 stream: dump_file);
23160 print_rtl_single (dump_file, insn);
23161 fputs (s: "To:\n", stream: dump_file);
23162 print_rtl_single (dump_file, loadlpd);
23163 print_rtl_single (dump_file, loadhpd);
23164 }
23165 PATTERN (insn) = loadhpd;
23166 INSN_CODE (insn) = -1;
23167 gcc_assert (recog_memoized (insn) != -1);
23168 }
23169}
23170
23171/* Implement machine specific optimizations. We implement padding of returns
23172 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23173static void
23174ix86_reorg (void)
23175{
23176 /* We are freeing block_for_insn in the toplev to keep compatibility
23177 with old MDEP_REORGS that are not CFG based. Recompute it now. */
23178 compute_bb_for_insn ();
23179
23180 if (TARGET_SEH && current_function_has_exception_handlers ())
23181 ix86_seh_fixup_eh_fallthru ();
23182
23183 if (optimize && optimize_function_for_speed_p (cfun))
23184 {
23185 if (TARGET_SSE2)
23186 ix86_split_stlf_stall_load ();
23187 if (TARGET_PAD_SHORT_FUNCTION)
23188 ix86_pad_short_function ();
23189 else if (TARGET_PAD_RETURNS)
23190 ix86_pad_returns ();
23191#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23192 if (TARGET_FOUR_JUMP_LIMIT)
23193 ix86_avoid_jump_mispredicts ();
23194#endif
23195 }
23196}
23197
23198/* Return nonzero when QImode register that must be represented via REX prefix
23199 is used. */
23200bool
23201x86_extended_QIreg_mentioned_p (rtx_insn *insn)
23202{
23203 int i;
23204 extract_insn_cached (insn);
23205 for (i = 0; i < recog_data.n_operands; i++)
23206 if (GENERAL_REG_P (recog_data.operand[i])
23207 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
23208 return true;
23209 return false;
23210}
23211
23212/* Return true when INSN mentions register that must be encoded using REX
23213 prefix. */
23214bool
23215x86_extended_reg_mentioned_p (rtx insn)
23216{
23217 subrtx_iterator::array_type array;
23218 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23219 {
23220 const_rtx x = *iter;
23221 if (REG_P (x)
23222 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
23223 || REX2_INT_REGNO_P (REGNO (x))))
23224 return true;
23225 }
23226 return false;
23227}
23228
23229/* Return true when INSN mentions register that must be encoded using REX2
23230 prefix. */
23231bool
23232x86_extended_rex2reg_mentioned_p (rtx insn)
23233{
23234 subrtx_iterator::array_type array;
23235 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23236 {
23237 const_rtx x = *iter;
23238 if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
23239 return true;
23240 }
23241 return false;
23242}
23243
23244/* Return true when rtx operands mentions register that must be encoded using
23245 evex prefix. */
23246bool
23247x86_evex_reg_mentioned_p (rtx operands[], int nops)
23248{
23249 int i;
23250 for (i = 0; i < nops; i++)
23251 if (EXT_REX_SSE_REG_P (operands[i])
23252 || x86_extended_rex2reg_mentioned_p (insn: operands[i]))
23253 return true;
23254 return false;
23255}
23256
23257/* If profitable, negate (without causing overflow) integer constant
23258 of mode MODE at location LOC. Return true in this case. */
23259bool
23260x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
23261{
23262 HOST_WIDE_INT val;
23263
23264 if (!CONST_INT_P (*loc))
23265 return false;
23266
23267 switch (mode)
23268 {
23269 case E_DImode:
23270 /* DImode x86_64 constants must fit in 32 bits. */
23271 gcc_assert (x86_64_immediate_operand (*loc, mode));
23272
23273 mode = SImode;
23274 break;
23275
23276 case E_SImode:
23277 case E_HImode:
23278 case E_QImode:
23279 break;
23280
23281 default:
23282 gcc_unreachable ();
23283 }
23284
23285 /* Avoid overflows. */
23286 if (mode_signbit_p (mode, *loc))
23287 return false;
23288
23289 val = INTVAL (*loc);
23290
23291 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
23292 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
23293 if ((val < 0 && val != -128)
23294 || val == 128)
23295 {
23296 *loc = GEN_INT (-val);
23297 return true;
23298 }
23299
23300 return false;
23301}
23302
23303/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23304 optabs would emit if we didn't have TFmode patterns. */
23305
23306void
23307x86_emit_floatuns (rtx operands[2])
23308{
23309 rtx_code_label *neglab, *donelab;
23310 rtx i0, i1, f0, in, out;
23311 machine_mode mode, inmode;
23312
23313 inmode = GET_MODE (operands[1]);
23314 gcc_assert (inmode == SImode || inmode == DImode);
23315
23316 out = operands[0];
23317 in = force_reg (inmode, operands[1]);
23318 mode = GET_MODE (out);
23319 neglab = gen_label_rtx ();
23320 donelab = gen_label_rtx ();
23321 f0 = gen_reg_rtx (mode);
23322
23323 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23324
23325 expand_float (out, in, 0);
23326
23327 emit_jump_insn (gen_jump (donelab));
23328 emit_barrier ();
23329
23330 emit_label (neglab);
23331
23332 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23333 1, OPTAB_DIRECT);
23334 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23335 1, OPTAB_DIRECT);
23336 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23337
23338 expand_float (f0, i0, 0);
23339
23340 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
23341
23342 emit_label (donelab);
23343}
23344
23345/* Return the diagnostic message string if conversion from FROMTYPE to
23346 TOTYPE is not allowed, NULL otherwise. */
23347
23348static const char *
23349ix86_invalid_conversion (const_tree fromtype, const_tree totype)
23350{
23351 machine_mode from_mode = element_mode (fromtype);
23352 machine_mode to_mode = element_mode (totype);
23353
23354 if (!TARGET_SSE2 && from_mode != to_mode)
23355 {
23356 /* Do no allow conversions to/from BFmode/HFmode scalar types
23357 when TARGET_SSE2 is not available. */
23358 if (from_mode == BFmode)
23359 return N_("invalid conversion from type %<__bf16%> "
23360 "without option %<-msse2%>");
23361 if (from_mode == HFmode)
23362 return N_("invalid conversion from type %<_Float16%> "
23363 "without option %<-msse2%>");
23364 if (to_mode == BFmode)
23365 return N_("invalid conversion to type %<__bf16%> "
23366 "without option %<-msse2%>");
23367 if (to_mode == HFmode)
23368 return N_("invalid conversion to type %<_Float16%> "
23369 "without option %<-msse2%>");
23370 }
23371
23372 /* Warn for silent implicit conversion between __bf16 and short,
23373 since __bfloat16 is refined as real __bf16 instead of short
23374 since GCC13. */
23375 if (element_mode (fromtype) != element_mode (totype)
23376 && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
23377 {
23378 /* Warn for silent implicit conversion where user may expect
23379 a bitcast. */
23380 if ((TYPE_MODE (fromtype) == BFmode
23381 && TYPE_MODE (totype) == HImode)
23382 || (TYPE_MODE (totype) == BFmode
23383 && TYPE_MODE (fromtype) == HImode))
23384 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
23385 "to real %<__bf16%> since GCC 13.1, be careful of "
23386 "implicit conversion between %<__bf16%> and %<short%>; "
23387 "an explicit bitcast may be needed here");
23388 }
23389
23390 /* Conversion allowed. */
23391 return NULL;
23392}
23393
23394/* Return the diagnostic message string if the unary operation OP is
23395 not permitted on TYPE, NULL otherwise. */
23396
23397static const char *
23398ix86_invalid_unary_op (int op, const_tree type)
23399{
23400 machine_mode mmode = element_mode (type);
23401 /* Reject all single-operand operations on BFmode/HFmode except for &
23402 when TARGET_SSE2 is not available. */
23403 if (!TARGET_SSE2 && op != ADDR_EXPR)
23404 {
23405 if (mmode == BFmode)
23406 return N_("operation not permitted on type %<__bf16%> "
23407 "without option %<-msse2%>");
23408 if (mmode == HFmode)
23409 return N_("operation not permitted on type %<_Float16%> "
23410 "without option %<-msse2%>");
23411 }
23412
23413 /* Operation allowed. */
23414 return NULL;
23415}
23416
23417/* Return the diagnostic message string if the binary operation OP is
23418 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23419
23420static const char *
23421ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
23422 const_tree type2)
23423{
23424 machine_mode type1_mode = element_mode (type1);
23425 machine_mode type2_mode = element_mode (type2);
23426 /* Reject all 2-operand operations on BFmode or HFmode
23427 when TARGET_SSE2 is not available. */
23428 if (!TARGET_SSE2)
23429 {
23430 if (type1_mode == BFmode || type2_mode == BFmode)
23431 return N_("operation not permitted on type %<__bf16%> "
23432 "without option %<-msse2%>");
23433
23434 if (type1_mode == HFmode || type2_mode == HFmode)
23435 return N_("operation not permitted on type %<_Float16%> "
23436 "without option %<-msse2%>");
23437 }
23438
23439 /* Operation allowed. */
23440 return NULL;
23441}
23442
23443
23444/* Target hook for scalar_mode_supported_p. */
23445static bool
23446ix86_scalar_mode_supported_p (scalar_mode mode)
23447{
23448 if (DECIMAL_FLOAT_MODE_P (mode))
23449 return default_decimal_float_supported_p ();
23450 else if (mode == TFmode)
23451 return true;
23452 else if (mode == HFmode || mode == BFmode)
23453 return true;
23454 else
23455 return default_scalar_mode_supported_p (mode);
23456}
23457
23458/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
23459 if MODE is HFmode, and punt to the generic implementation otherwise. */
23460
23461static bool
23462ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23463{
23464 /* NB: Always return TRUE for HFmode so that the _Float16 type will
23465 be defined by the C front-end for AVX512FP16 intrinsics. We will
23466 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
23467 enabled. */
23468 return ((mode == HFmode || mode == BFmode)
23469 ? true
23470 : default_libgcc_floating_mode_supported_p (mode));
23471}
23472
23473/* Implements target hook vector_mode_supported_p. */
23474static bool
23475ix86_vector_mode_supported_p (machine_mode mode)
23476{
23477 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
23478 either. */
23479 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
23480 return false;
23481 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23482 return true;
23483 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23484 return true;
23485 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
23486 return true;
23487 if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
23488 return true;
23489 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
23490 && VALID_MMX_REG_MODE (mode))
23491 return true;
23492 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
23493 && VALID_MMX_REG_MODE_3DNOW (mode))
23494 return true;
23495 if (mode == V2QImode)
23496 return true;
23497 return false;
23498}
23499
23500/* Target hook for c_mode_for_suffix. */
23501static machine_mode
23502ix86_c_mode_for_suffix (char suffix)
23503{
23504 if (suffix == 'q')
23505 return TFmode;
23506 if (suffix == 'w')
23507 return XFmode;
23508
23509 return VOIDmode;
23510}
23511
23512/* Helper function to map common constraints to non-EGPR ones.
23513 All related constraints have h prefix, and h plus Upper letter
23514 means the constraint is strictly EGPR enabled, while h plus
23515 lower letter indicates the constraint is strictly gpr16 only.
23516
23517 Specially for "g" constraint, split it to rmi as there is
23518 no corresponding general constraint define for backend.
23519
23520 Here is the full list to map constraints that may involve
23521 gpr to h prefixed.
23522
23523 "g" -> "jrjmi"
23524 "r" -> "jr"
23525 "m" -> "jm"
23526 "<" -> "j<"
23527 ">" -> "j>"
23528 "o" -> "jo"
23529 "V" -> "jV"
23530 "p" -> "jp"
23531 "Bm" -> "ja"
23532*/
23533
23534static void map_egpr_constraints (vec<const char *> &constraints)
23535{
23536 for (size_t i = 0; i < constraints.length(); i++)
23537 {
23538 const char *cur = constraints[i];
23539
23540 if (startswith (str: cur, prefix: "=@cc"))
23541 continue;
23542
23543 int len = strlen (s: cur);
23544 auto_vec<char> buf;
23545
23546 for (int j = 0; j < len; j++)
23547 {
23548 switch (cur[j])
23549 {
23550 case 'g':
23551 buf.safe_push (obj: 'j');
23552 buf.safe_push (obj: 'r');
23553 buf.safe_push (obj: 'j');
23554 buf.safe_push (obj: 'm');
23555 buf.safe_push (obj: 'i');
23556 break;
23557 case 'r':
23558 case 'm':
23559 case '<':
23560 case '>':
23561 case 'o':
23562 case 'V':
23563 case 'p':
23564 buf.safe_push (obj: 'j');
23565 buf.safe_push (obj: cur[j]);
23566 break;
23567 case 'B':
23568 if (cur[j + 1] == 'm')
23569 {
23570 buf.safe_push (obj: 'j');
23571 buf.safe_push (obj: 'a');
23572 j++;
23573 }
23574 else
23575 {
23576 buf.safe_push (obj: cur[j]);
23577 buf.safe_push (obj: cur[j + 1]);
23578 j++;
23579 }
23580 break;
23581 case 'T':
23582 case 'Y':
23583 case 'W':
23584 case 'j':
23585 buf.safe_push (obj: cur[j]);
23586 buf.safe_push (obj: cur[j + 1]);
23587 j++;
23588 break;
23589 default:
23590 buf.safe_push (obj: cur[j]);
23591 break;
23592 }
23593 }
23594 buf.safe_push (obj: '\0');
23595 constraints[i] = xstrdup (buf.address ());
23596 }
23597}
23598
23599/* Worker function for TARGET_MD_ASM_ADJUST.
23600
23601 We implement asm flag outputs, and maintain source compatibility
23602 with the old cc0-based compiler. */
23603
23604static rtx_insn *
23605ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
23606 vec<machine_mode> & /*input_modes*/,
23607 vec<const char *> &constraints, vec<rtx> &clobbers,
23608 HARD_REG_SET &clobbered_regs, location_t loc)
23609{
23610 bool saw_asm_flag = false;
23611
23612 start_sequence ();
23613
23614 if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
23615 map_egpr_constraints (constraints);
23616
23617 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
23618 {
23619 const char *con = constraints[i];
23620 if (!startswith (str: con, prefix: "=@cc"))
23621 continue;
23622 con += 4;
23623 if (strchr (s: con, c: ',') != NULL)
23624 {
23625 error_at (loc, "alternatives not allowed in %<asm%> flag output");
23626 continue;
23627 }
23628
23629 bool invert = false;
23630 if (con[0] == 'n')
23631 invert = true, con++;
23632
23633 machine_mode mode = CCmode;
23634 rtx_code code = UNKNOWN;
23635
23636 switch (con[0])
23637 {
23638 case 'a':
23639 if (con[1] == 0)
23640 mode = CCAmode, code = EQ;
23641 else if (con[1] == 'e' && con[2] == 0)
23642 mode = CCCmode, code = NE;
23643 break;
23644 case 'b':
23645 if (con[1] == 0)
23646 mode = CCCmode, code = EQ;
23647 else if (con[1] == 'e' && con[2] == 0)
23648 mode = CCAmode, code = NE;
23649 break;
23650 case 'c':
23651 if (con[1] == 0)
23652 mode = CCCmode, code = EQ;
23653 break;
23654 case 'e':
23655 if (con[1] == 0)
23656 mode = CCZmode, code = EQ;
23657 break;
23658 case 'g':
23659 if (con[1] == 0)
23660 mode = CCGCmode, code = GT;
23661 else if (con[1] == 'e' && con[2] == 0)
23662 mode = CCGCmode, code = GE;
23663 break;
23664 case 'l':
23665 if (con[1] == 0)
23666 mode = CCGCmode, code = LT;
23667 else if (con[1] == 'e' && con[2] == 0)
23668 mode = CCGCmode, code = LE;
23669 break;
23670 case 'o':
23671 if (con[1] == 0)
23672 mode = CCOmode, code = EQ;
23673 break;
23674 case 'p':
23675 if (con[1] == 0)
23676 mode = CCPmode, code = EQ;
23677 break;
23678 case 's':
23679 if (con[1] == 0)
23680 mode = CCSmode, code = EQ;
23681 break;
23682 case 'z':
23683 if (con[1] == 0)
23684 mode = CCZmode, code = EQ;
23685 break;
23686 }
23687 if (code == UNKNOWN)
23688 {
23689 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
23690 continue;
23691 }
23692 if (invert)
23693 code = reverse_condition (code);
23694
23695 rtx dest = outputs[i];
23696 if (!saw_asm_flag)
23697 {
23698 /* This is the first asm flag output. Here we put the flags
23699 register in as the real output and adjust the condition to
23700 allow it. */
23701 constraints[i] = "=Bf";
23702 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
23703 saw_asm_flag = true;
23704 }
23705 else
23706 {
23707 /* We don't need the flags register as output twice. */
23708 constraints[i] = "=X";
23709 outputs[i] = gen_rtx_SCRATCH (SImode);
23710 }
23711
23712 rtx x = gen_rtx_REG (mode, FLAGS_REG);
23713 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
23714
23715 machine_mode dest_mode = GET_MODE (dest);
23716 if (!SCALAR_INT_MODE_P (dest_mode))
23717 {
23718 error_at (loc, "invalid type for %<asm%> flag output");
23719 continue;
23720 }
23721
23722 if (dest_mode == QImode)
23723 emit_insn (gen_rtx_SET (dest, x));
23724 else
23725 {
23726 rtx reg = gen_reg_rtx (QImode);
23727 emit_insn (gen_rtx_SET (reg, x));
23728
23729 reg = convert_to_mode (dest_mode, reg, 1);
23730 emit_move_insn (dest, reg);
23731 }
23732 }
23733
23734 rtx_insn *seq = get_insns ();
23735 end_sequence ();
23736
23737 if (saw_asm_flag)
23738 return seq;
23739 else
23740 {
23741 /* If we had no asm flag outputs, clobber the flags. */
23742 clobbers.safe_push (obj: gen_rtx_REG (CCmode, FLAGS_REG));
23743 SET_HARD_REG_BIT (set&: clobbered_regs, FLAGS_REG);
23744 return NULL;
23745 }
23746}
23747
23748/* Implements target vector targetm.asm.encode_section_info. */
23749
23750static void ATTRIBUTE_UNUSED
23751ix86_encode_section_info (tree decl, rtx rtl, int first)
23752{
23753 default_encode_section_info (decl, rtl, first);
23754
23755 if (ix86_in_large_data_p (exp: decl))
23756 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
23757}
23758
23759/* Worker function for REVERSE_CONDITION. */
23760
23761enum rtx_code
23762ix86_reverse_condition (enum rtx_code code, machine_mode mode)
23763{
23764 return (mode == CCFPmode
23765 ? reverse_condition_maybe_unordered (code)
23766 : reverse_condition (code));
23767}
23768
23769/* Output code to perform an x87 FP register move, from OPERANDS[1]
23770 to OPERANDS[0]. */
23771
23772const char *
23773output_387_reg_move (rtx_insn *insn, rtx *operands)
23774{
23775 if (REG_P (operands[0]))
23776 {
23777 if (REG_P (operands[1])
23778 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23779 {
23780 if (REGNO (operands[0]) == FIRST_STACK_REG)
23781 return output_387_ffreep (operands, opno: 0);
23782 return "fstp\t%y0";
23783 }
23784 if (STACK_TOP_P (operands[0]))
23785 return "fld%Z1\t%y1";
23786 return "fst\t%y0";
23787 }
23788 else if (MEM_P (operands[0]))
23789 {
23790 gcc_assert (REG_P (operands[1]));
23791 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23792 return "fstp%Z0\t%y0";
23793 else
23794 {
23795 /* There is no non-popping store to memory for XFmode.
23796 So if we need one, follow the store with a load. */
23797 if (GET_MODE (operands[0]) == XFmode)
23798 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
23799 else
23800 return "fst%Z0\t%y0";
23801 }
23802 }
23803 else
23804 gcc_unreachable();
23805}
23806#ifdef TARGET_SOLARIS
23807/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
23808
23809static void
23810i386_solaris_elf_named_section (const char *name, unsigned int flags,
23811 tree decl)
23812{
23813 /* With Binutils 2.15, the "@unwind" marker must be specified on
23814 every occurrence of the ".eh_frame" section, not just the first
23815 one. */
23816 if (TARGET_64BIT
23817 && strcmp (name, ".eh_frame") == 0)
23818 {
23819 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
23820 flags & SECTION_WRITE ? "aw" : "a");
23821 return;
23822 }
23823
23824#ifndef USE_GAS
23825 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
23826 {
23827 solaris_elf_asm_comdat_section (name, flags, decl);
23828 return;
23829 }
23830
23831 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
23832 SPARC assembler. One cannot mix single-letter flags and #exclude, so
23833 only emit the latter here. */
23834 if (flags & SECTION_EXCLUDE)
23835 {
23836 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
23837 return;
23838 }
23839#endif
23840
23841 default_elf_asm_named_section (name, flags, decl);
23842}
23843#endif /* TARGET_SOLARIS */
23844
23845/* Return the mangling of TYPE if it is an extended fundamental type. */
23846
23847static const char *
23848ix86_mangle_type (const_tree type)
23849{
23850 type = TYPE_MAIN_VARIANT (type);
23851
23852 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
23853 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
23854 return NULL;
23855
23856 if (type == float128_type_node || type == float64x_type_node)
23857 return NULL;
23858
23859 switch (TYPE_MODE (type))
23860 {
23861 case E_BFmode:
23862 return "DF16b";
23863 case E_HFmode:
23864 /* _Float16 is "DF16_".
23865 Align with clang's decision in https://reviews.llvm.org/D33719. */
23866 return "DF16_";
23867 case E_TFmode:
23868 /* __float128 is "g". */
23869 return "g";
23870 case E_XFmode:
23871 /* "long double" or __float80 is "e". */
23872 return "e";
23873 default:
23874 return NULL;
23875 }
23876}
23877
23878/* Create C++ tinfo symbols for only conditionally available fundamental
23879 types. */
23880
23881static void
23882ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
23883{
23884 extern tree ix86_float16_type_node;
23885 extern tree ix86_bf16_type_node;
23886
23887 if (!TARGET_SSE2)
23888 {
23889 if (!float16_type_node)
23890 float16_type_node = ix86_float16_type_node;
23891 if (!bfloat16_type_node)
23892 bfloat16_type_node = ix86_bf16_type_node;
23893 callback (float16_type_node);
23894 callback (bfloat16_type_node);
23895 float16_type_node = NULL_TREE;
23896 bfloat16_type_node = NULL_TREE;
23897 }
23898}
23899
23900static GTY(()) tree ix86_tls_stack_chk_guard_decl;
23901
23902static tree
23903ix86_stack_protect_guard (void)
23904{
23905 if (TARGET_SSP_TLS_GUARD)
23906 {
23907 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
23908 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
23909 tree type = build_qualified_type (type_node, qual);
23910 tree t;
23911
23912 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
23913 {
23914 t = ix86_tls_stack_chk_guard_decl;
23915
23916 if (t == NULL)
23917 {
23918 rtx x;
23919
23920 t = build_decl
23921 (UNKNOWN_LOCATION, VAR_DECL,
23922 get_identifier (ix86_stack_protector_guard_symbol_str),
23923 type);
23924 TREE_STATIC (t) = 1;
23925 TREE_PUBLIC (t) = 1;
23926 DECL_EXTERNAL (t) = 1;
23927 TREE_USED (t) = 1;
23928 TREE_THIS_VOLATILE (t) = 1;
23929 DECL_ARTIFICIAL (t) = 1;
23930 DECL_IGNORED_P (t) = 1;
23931
23932 /* Do not share RTL as the declaration is visible outside of
23933 current function. */
23934 x = DECL_RTL (t);
23935 RTX_FLAG (x, used) = 1;
23936
23937 ix86_tls_stack_chk_guard_decl = t;
23938 }
23939 }
23940 else
23941 {
23942 tree asptrtype = build_pointer_type (type);
23943
23944 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
23945 t = build2 (MEM_REF, asptrtype, t,
23946 build_int_cst (asptrtype, 0));
23947 TREE_THIS_VOLATILE (t) = 1;
23948 }
23949
23950 return t;
23951 }
23952
23953 return default_stack_protect_guard ();
23954}
23955
23956/* For 32-bit code we can save PIC register setup by using
23957 __stack_chk_fail_local hidden function instead of calling
23958 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
23959 register, so it is better to call __stack_chk_fail directly. */
23960
23961static tree ATTRIBUTE_UNUSED
23962ix86_stack_protect_fail (void)
23963{
23964 return TARGET_64BIT
23965 ? default_external_stack_protect_fail ()
23966 : default_hidden_stack_protect_fail ();
23967}
23968
23969/* Select a format to encode pointers in exception handling data. CODE
23970 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
23971 true if the symbol may be affected by dynamic relocations.
23972
23973 ??? All x86 object file formats are capable of representing this.
23974 After all, the relocation needed is the same as for the call insn.
23975 Whether or not a particular assembler allows us to enter such, I
23976 guess we'll have to see. */
23977
23978int
23979asm_preferred_eh_data_format (int code, int global)
23980{
23981 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
23982 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
23983 {
23984 int type = DW_EH_PE_sdata8;
23985 if (ptr_mode == SImode
23986 || ix86_cmodel == CM_SMALL_PIC
23987 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
23988 type = DW_EH_PE_sdata4;
23989 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
23990 }
23991
23992 if (ix86_cmodel == CM_SMALL
23993 || (ix86_cmodel == CM_MEDIUM && code))
23994 return DW_EH_PE_udata4;
23995
23996 return DW_EH_PE_absptr;
23997}
23998
23999/* Implement targetm.vectorize.builtin_vectorization_cost. */
24000static int
24001ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
24002 tree vectype, int)
24003{
24004 bool fp = false;
24005 machine_mode mode = TImode;
24006 int index;
24007 if (vectype != NULL)
24008 {
24009 fp = FLOAT_TYPE_P (vectype);
24010 mode = TYPE_MODE (vectype);
24011 }
24012
24013 switch (type_of_cost)
24014 {
24015 case scalar_stmt:
24016 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
24017
24018 case scalar_load:
24019 /* load/store costs are relative to register move which is 2. Recompute
24020 it to COSTS_N_INSNS so everything have same base. */
24021 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
24022 : ix86_cost->int_load [2]) / 2;
24023
24024 case scalar_store:
24025 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
24026 : ix86_cost->int_store [2]) / 2;
24027
24028 case vector_stmt:
24029 return ix86_vec_cost (mode,
24030 cost: fp ? ix86_cost->addss : ix86_cost->sse_op);
24031
24032 case vector_load:
24033 index = sse_store_index (mode);
24034 /* See PR82713 - we may end up being called on non-vector type. */
24035 if (index < 0)
24036 index = 2;
24037 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
24038
24039 case vector_store:
24040 index = sse_store_index (mode);
24041 /* See PR82713 - we may end up being called on non-vector type. */
24042 if (index < 0)
24043 index = 2;
24044 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
24045
24046 case vec_to_scalar:
24047 case scalar_to_vec:
24048 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24049
24050 /* We should have separate costs for unaligned loads and gather/scatter.
24051 Do that incrementally. */
24052 case unaligned_load:
24053 index = sse_store_index (mode);
24054 /* See PR82713 - we may end up being called on non-vector type. */
24055 if (index < 0)
24056 index = 2;
24057 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
24058
24059 case unaligned_store:
24060 index = sse_store_index (mode);
24061 /* See PR82713 - we may end up being called on non-vector type. */
24062 if (index < 0)
24063 index = 2;
24064 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
24065
24066 case vector_gather_load:
24067 return ix86_vec_cost (mode,
24068 COSTS_N_INSNS
24069 (ix86_cost->gather_static
24070 + ix86_cost->gather_per_elt
24071 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24072
24073 case vector_scatter_store:
24074 return ix86_vec_cost (mode,
24075 COSTS_N_INSNS
24076 (ix86_cost->scatter_static
24077 + ix86_cost->scatter_per_elt
24078 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24079
24080 case cond_branch_taken:
24081 return ix86_cost->cond_taken_branch_cost;
24082
24083 case cond_branch_not_taken:
24084 return ix86_cost->cond_not_taken_branch_cost;
24085
24086 case vec_perm:
24087 case vec_promote_demote:
24088 return ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24089
24090 case vec_construct:
24091 {
24092 int n = TYPE_VECTOR_SUBPARTS (node: vectype);
24093 /* N - 1 element inserts into an SSE vector, the possible
24094 GPR -> XMM move is accounted for in add_stmt_cost. */
24095 if (GET_MODE_BITSIZE (mode) <= 128)
24096 return (n - 1) * ix86_cost->sse_op;
24097 /* One vinserti128 for combining two SSE vectors for AVX256. */
24098 else if (GET_MODE_BITSIZE (mode) == 256)
24099 return ((n - 2) * ix86_cost->sse_op
24100 + ix86_vec_cost (mode, cost: ix86_cost->addss));
24101 /* One vinserti64x4 and two vinserti128 for combining SSE
24102 and AVX256 vectors to AVX512. */
24103 else if (GET_MODE_BITSIZE (mode) == 512)
24104 return ((n - 4) * ix86_cost->sse_op
24105 + 3 * ix86_vec_cost (mode, cost: ix86_cost->addss));
24106 gcc_unreachable ();
24107 }
24108
24109 default:
24110 gcc_unreachable ();
24111 }
24112}
24113
24114
24115/* This function returns the calling abi specific va_list type node.
24116 It returns the FNDECL specific va_list type. */
24117
24118static tree
24119ix86_fn_abi_va_list (tree fndecl)
24120{
24121 if (!TARGET_64BIT)
24122 return va_list_type_node;
24123 gcc_assert (fndecl != NULL_TREE);
24124
24125 if (ix86_function_abi (fndecl: (const_tree) fndecl) == MS_ABI)
24126 return ms_va_list_type_node;
24127 else
24128 return sysv_va_list_type_node;
24129}
24130
24131/* Returns the canonical va_list type specified by TYPE. If there
24132 is no valid TYPE provided, it return NULL_TREE. */
24133
24134static tree
24135ix86_canonical_va_list_type (tree type)
24136{
24137 if (TARGET_64BIT)
24138 {
24139 if (lookup_attribute (attr_name: "ms_abi va_list", TYPE_ATTRIBUTES (type)))
24140 return ms_va_list_type_node;
24141
24142 if ((TREE_CODE (type) == ARRAY_TYPE
24143 && integer_zerop (array_type_nelts (type)))
24144 || POINTER_TYPE_P (type))
24145 {
24146 tree elem_type = TREE_TYPE (type);
24147 if (TREE_CODE (elem_type) == RECORD_TYPE
24148 && lookup_attribute (attr_name: "sysv_abi va_list",
24149 TYPE_ATTRIBUTES (elem_type)))
24150 return sysv_va_list_type_node;
24151 }
24152
24153 return NULL_TREE;
24154 }
24155
24156 return std_canonical_va_list_type (type);
24157}
24158
24159/* Iterate through the target-specific builtin types for va_list.
24160 IDX denotes the iterator, *PTREE is set to the result type of
24161 the va_list builtin, and *PNAME to its internal type.
24162 Returns zero if there is no element for this index, otherwise
24163 IDX should be increased upon the next call.
24164 Note, do not iterate a base builtin's name like __builtin_va_list.
24165 Used from c_common_nodes_and_builtins. */
24166
24167static int
24168ix86_enum_va_list (int idx, const char **pname, tree *ptree)
24169{
24170 if (TARGET_64BIT)
24171 {
24172 switch (idx)
24173 {
24174 default:
24175 break;
24176
24177 case 0:
24178 *ptree = ms_va_list_type_node;
24179 *pname = "__builtin_ms_va_list";
24180 return 1;
24181
24182 case 1:
24183 *ptree = sysv_va_list_type_node;
24184 *pname = "__builtin_sysv_va_list";
24185 return 1;
24186 }
24187 }
24188
24189 return 0;
24190}
24191
24192#undef TARGET_SCHED_DISPATCH
24193#define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
24194#undef TARGET_SCHED_DISPATCH_DO
24195#define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
24196#undef TARGET_SCHED_REASSOCIATION_WIDTH
24197#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
24198#undef TARGET_SCHED_REORDER
24199#define TARGET_SCHED_REORDER ix86_atom_sched_reorder
24200#undef TARGET_SCHED_ADJUST_PRIORITY
24201#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
24202#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
24203#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
24204 ix86_dependencies_evaluation_hook
24205
24206
24207/* Implementation of reassociation_width target hook used by
24208 reassoc phase to identify parallelism level in reassociated
24209 tree. Statements tree_code is passed in OPC. Arguments type
24210 is passed in MODE. */
24211
24212static int
24213ix86_reassociation_width (unsigned int op, machine_mode mode)
24214{
24215 int width = 1;
24216 /* Vector part. */
24217 if (VECTOR_MODE_P (mode))
24218 {
24219 int div = 1;
24220 if (INTEGRAL_MODE_P (mode))
24221 width = ix86_cost->reassoc_vec_int;
24222 else if (FLOAT_MODE_P (mode))
24223 width = ix86_cost->reassoc_vec_fp;
24224
24225 if (width == 1)
24226 return 1;
24227
24228 /* Integer vector instructions execute in FP unit
24229 and can execute 3 additions and one multiplication per cycle. */
24230 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
24231 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
24232 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
24233 return 1;
24234
24235 /* Account for targets that splits wide vectors into multiple parts. */
24236 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
24237 div = GET_MODE_BITSIZE (mode) / 256;
24238 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
24239 div = GET_MODE_BITSIZE (mode) / 128;
24240 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
24241 div = GET_MODE_BITSIZE (mode) / 64;
24242 width = (width + div - 1) / div;
24243 }
24244 /* Scalar part. */
24245 else if (INTEGRAL_MODE_P (mode))
24246 width = ix86_cost->reassoc_int;
24247 else if (FLOAT_MODE_P (mode))
24248 width = ix86_cost->reassoc_fp;
24249
24250 /* Avoid using too many registers in 32bit mode. */
24251 if (!TARGET_64BIT && width > 2)
24252 width = 2;
24253 return width;
24254}
24255
24256/* ??? No autovectorization into MMX or 3DNOW until we can reliably
24257 place emms and femms instructions. */
24258
24259static machine_mode
24260ix86_preferred_simd_mode (scalar_mode mode)
24261{
24262 if (!TARGET_SSE)
24263 return word_mode;
24264
24265 switch (mode)
24266 {
24267 case E_QImode:
24268 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24269 return V64QImode;
24270 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24271 return V32QImode;
24272 else
24273 return V16QImode;
24274
24275 case E_HImode:
24276 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24277 return V32HImode;
24278 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24279 return V16HImode;
24280 else
24281 return V8HImode;
24282
24283 case E_SImode:
24284 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24285 return V16SImode;
24286 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24287 return V8SImode;
24288 else
24289 return V4SImode;
24290
24291 case E_DImode:
24292 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24293 return V8DImode;
24294 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24295 return V4DImode;
24296 else
24297 return V2DImode;
24298
24299 case E_HFmode:
24300 if (TARGET_AVX512FP16)
24301 {
24302 if (TARGET_AVX512VL)
24303 {
24304 if (TARGET_PREFER_AVX128)
24305 return V8HFmode;
24306 else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
24307 return V16HFmode;
24308 }
24309 if (TARGET_EVEX512)
24310 return V32HFmode;
24311 }
24312 return word_mode;
24313
24314 case E_SFmode:
24315 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24316 return V16SFmode;
24317 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24318 return V8SFmode;
24319 else
24320 return V4SFmode;
24321
24322 case E_DFmode:
24323 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24324 return V8DFmode;
24325 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24326 return V4DFmode;
24327 else if (TARGET_SSE2)
24328 return V2DFmode;
24329 /* FALLTHRU */
24330
24331 default:
24332 return word_mode;
24333 }
24334}
24335
24336/* If AVX is enabled then try vectorizing with both 256bit and 128bit
24337 vectors. If AVX512F is enabled then try vectorizing with 512bit,
24338 256bit and 128bit vectors. */
24339
24340static unsigned int
24341ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
24342{
24343 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24344 {
24345 modes->safe_push (V64QImode);
24346 modes->safe_push (V32QImode);
24347 modes->safe_push (V16QImode);
24348 }
24349 else if (TARGET_AVX512F && TARGET_EVEX512 && all)
24350 {
24351 modes->safe_push (V32QImode);
24352 modes->safe_push (V16QImode);
24353 modes->safe_push (V64QImode);
24354 }
24355 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24356 {
24357 modes->safe_push (V32QImode);
24358 modes->safe_push (V16QImode);
24359 }
24360 else if (TARGET_AVX && all)
24361 {
24362 modes->safe_push (V16QImode);
24363 modes->safe_push (V32QImode);
24364 }
24365 else if (TARGET_SSE2)
24366 modes->safe_push (V16QImode);
24367
24368 if (TARGET_MMX_WITH_SSE)
24369 modes->safe_push (V8QImode);
24370
24371 if (TARGET_SSE2)
24372 modes->safe_push (V4QImode);
24373
24374 return 0;
24375}
24376
24377/* Implemenation of targetm.vectorize.get_mask_mode. */
24378
24379static opt_machine_mode
24380ix86_get_mask_mode (machine_mode data_mode)
24381{
24382 unsigned vector_size = GET_MODE_SIZE (data_mode);
24383 unsigned nunits = GET_MODE_NUNITS (data_mode);
24384 unsigned elem_size = vector_size / nunits;
24385
24386 /* Scalar mask case. */
24387 if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
24388 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
24389 /* AVX512FP16 only supports vector comparison
24390 to kmask for _Float16. */
24391 || (TARGET_AVX512VL && TARGET_AVX512FP16
24392 && GET_MODE_INNER (data_mode) == E_HFmode))
24393 {
24394 if (elem_size == 4
24395 || elem_size == 8
24396 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
24397 return smallest_int_mode_for_size (size: nunits);
24398 }
24399
24400 scalar_int_mode elem_mode
24401 = smallest_int_mode_for_size (size: elem_size * BITS_PER_UNIT);
24402
24403 gcc_assert (elem_size * nunits == vector_size);
24404
24405 return mode_for_vector (elem_mode, nunits);
24406}
24407
24408
24409
24410/* Return class of registers which could be used for pseudo of MODE
24411 and of class RCLASS for spilling instead of memory. Return NO_REGS
24412 if it is not possible or non-profitable. */
24413
24414/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
24415
24416static reg_class_t
24417ix86_spill_class (reg_class_t rclass, machine_mode mode)
24418{
24419 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
24420 && TARGET_SSE2
24421 && TARGET_INTER_UNIT_MOVES_TO_VEC
24422 && TARGET_INTER_UNIT_MOVES_FROM_VEC
24423 && (mode == SImode || (TARGET_64BIT && mode == DImode))
24424 && INTEGER_CLASS_P (rclass))
24425 return ALL_SSE_REGS;
24426 return NO_REGS;
24427}
24428
24429/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
24430 but returns a lower bound. */
24431
24432static unsigned int
24433ix86_max_noce_ifcvt_seq_cost (edge e)
24434{
24435 bool predictable_p = predictable_edge_p (e);
24436 if (predictable_p)
24437 {
24438 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
24439 return param_max_rtl_if_conversion_predictable_cost;
24440 }
24441 else
24442 {
24443 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
24444 return param_max_rtl_if_conversion_unpredictable_cost;
24445 }
24446
24447 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
24448}
24449
24450/* Return true if SEQ is a good candidate as a replacement for the
24451 if-convertible sequence described in IF_INFO. */
24452
24453static bool
24454ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
24455{
24456 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
24457 {
24458 int cmov_cnt = 0;
24459 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
24460 Maybe we should allow even more conditional moves as long as they
24461 are used far enough not to stall the CPU, or also consider
24462 IF_INFO->TEST_BB succ edge probabilities. */
24463 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
24464 {
24465 rtx set = single_set (insn);
24466 if (!set)
24467 continue;
24468 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
24469 continue;
24470 rtx src = SET_SRC (set);
24471 machine_mode mode = GET_MODE (src);
24472 if (GET_MODE_CLASS (mode) != MODE_INT
24473 && GET_MODE_CLASS (mode) != MODE_FLOAT)
24474 continue;
24475 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
24476 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
24477 continue;
24478 /* insn is CMOV or FCMOV. */
24479 if (++cmov_cnt > 1)
24480 return false;
24481 }
24482 }
24483 return default_noce_conversion_profitable_p (seq, if_info);
24484}
24485
24486/* x86-specific vector costs. */
24487class ix86_vector_costs : public vector_costs
24488{
24489 using vector_costs::vector_costs;
24490
24491 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
24492 stmt_vec_info stmt_info, slp_tree node,
24493 tree vectype, int misalign,
24494 vect_cost_model_location where) override;
24495 void finish_cost (const vector_costs *) override;
24496};
24497
24498/* Implement targetm.vectorize.create_costs. */
24499
24500static vector_costs *
24501ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
24502{
24503 return new ix86_vector_costs (vinfo, costing_for_scalar);
24504}
24505
24506unsigned
24507ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
24508 stmt_vec_info stmt_info, slp_tree node,
24509 tree vectype, int misalign,
24510 vect_cost_model_location where)
24511{
24512 unsigned retval = 0;
24513 bool scalar_p
24514 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
24515 int stmt_cost = - 1;
24516
24517 bool fp = false;
24518 machine_mode mode = scalar_p ? SImode : TImode;
24519
24520 if (vectype != NULL)
24521 {
24522 fp = FLOAT_TYPE_P (vectype);
24523 mode = TYPE_MODE (vectype);
24524 if (scalar_p)
24525 mode = TYPE_MODE (TREE_TYPE (vectype));
24526 }
24527
24528 if ((kind == vector_stmt || kind == scalar_stmt)
24529 && stmt_info
24530 && stmt_info->stmt && gimple_code (g: stmt_info->stmt) == GIMPLE_ASSIGN)
24531 {
24532 tree_code subcode = gimple_assign_rhs_code (gs: stmt_info->stmt);
24533 /*machine_mode inner_mode = mode;
24534 if (VECTOR_MODE_P (mode))
24535 inner_mode = GET_MODE_INNER (mode);*/
24536
24537 switch (subcode)
24538 {
24539 case PLUS_EXPR:
24540 case POINTER_PLUS_EXPR:
24541 case MINUS_EXPR:
24542 if (kind == scalar_stmt)
24543 {
24544 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24545 stmt_cost = ix86_cost->addss;
24546 else if (X87_FLOAT_MODE_P (mode))
24547 stmt_cost = ix86_cost->fadd;
24548 else
24549 stmt_cost = ix86_cost->add;
24550 }
24551 else
24552 stmt_cost = ix86_vec_cost (mode, cost: fp ? ix86_cost->addss
24553 : ix86_cost->sse_op);
24554 break;
24555
24556 case MULT_EXPR:
24557 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
24558 take it as MULT_EXPR. */
24559 case MULT_HIGHPART_EXPR:
24560 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
24561 break;
24562 /* There's no direct instruction for WIDEN_MULT_EXPR,
24563 take emulation into account. */
24564 case WIDEN_MULT_EXPR:
24565 stmt_cost = ix86_widen_mult_cost (cost: ix86_cost, mode,
24566 TYPE_UNSIGNED (vectype));
24567 break;
24568
24569 case NEGATE_EXPR:
24570 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24571 stmt_cost = ix86_cost->sse_op;
24572 else if (X87_FLOAT_MODE_P (mode))
24573 stmt_cost = ix86_cost->fchs;
24574 else if (VECTOR_MODE_P (mode))
24575 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24576 else
24577 stmt_cost = ix86_cost->add;
24578 break;
24579 case TRUNC_DIV_EXPR:
24580 case CEIL_DIV_EXPR:
24581 case FLOOR_DIV_EXPR:
24582 case ROUND_DIV_EXPR:
24583 case TRUNC_MOD_EXPR:
24584 case CEIL_MOD_EXPR:
24585 case FLOOR_MOD_EXPR:
24586 case RDIV_EXPR:
24587 case ROUND_MOD_EXPR:
24588 case EXACT_DIV_EXPR:
24589 stmt_cost = ix86_division_cost (cost: ix86_cost, mode);
24590 break;
24591
24592 case RSHIFT_EXPR:
24593 case LSHIFT_EXPR:
24594 case LROTATE_EXPR:
24595 case RROTATE_EXPR:
24596 {
24597 tree op1 = gimple_assign_rhs1 (gs: stmt_info->stmt);
24598 tree op2 = gimple_assign_rhs2 (gs: stmt_info->stmt);
24599 stmt_cost = ix86_shift_rotate_cost
24600 (cost: ix86_cost,
24601 code: (subcode == RSHIFT_EXPR
24602 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
24603 ? ASHIFTRT : LSHIFTRT, mode,
24604 TREE_CODE (op2) == INTEGER_CST,
24605 op1_val: cst_and_fits_in_hwi (op2)
24606 ? int_cst_value (op2) : -1,
24607 and_in_op1: false, shift_and_truncate: false, NULL, NULL);
24608 }
24609 break;
24610 case NOP_EXPR:
24611 /* Only sign-conversions are free. */
24612 if (tree_nop_conversion_p
24613 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
24614 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
24615 stmt_cost = 0;
24616 break;
24617
24618 case BIT_IOR_EXPR:
24619 case ABS_EXPR:
24620 case ABSU_EXPR:
24621 case MIN_EXPR:
24622 case MAX_EXPR:
24623 case BIT_XOR_EXPR:
24624 case BIT_AND_EXPR:
24625 case BIT_NOT_EXPR:
24626 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24627 stmt_cost = ix86_cost->sse_op;
24628 else if (VECTOR_MODE_P (mode))
24629 stmt_cost = ix86_vec_cost (mode, cost: ix86_cost->sse_op);
24630 else
24631 stmt_cost = ix86_cost->add;
24632 break;
24633 default:
24634 break;
24635 }
24636 }
24637
24638 combined_fn cfn;
24639 if ((kind == vector_stmt || kind == scalar_stmt)
24640 && stmt_info
24641 && stmt_info->stmt
24642 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
24643 switch (cfn)
24644 {
24645 case CFN_FMA:
24646 stmt_cost = ix86_vec_cost (mode,
24647 cost: mode == SFmode ? ix86_cost->fmass
24648 : ix86_cost->fmasd);
24649 break;
24650 case CFN_MULH:
24651 stmt_cost = ix86_multiplication_cost (cost: ix86_cost, mode);
24652 break;
24653 default:
24654 break;
24655 }
24656
24657 /* If we do elementwise loads into a vector then we are bound by
24658 latency and execution resources for the many scalar loads
24659 (AGU and load ports). Try to account for this by scaling the
24660 construction cost by the number of elements involved. */
24661 if ((kind == vec_construct || kind == vec_to_scalar)
24662 && stmt_info
24663 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
24664 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
24665 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
24666 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
24667 != INTEGER_CST))
24668 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
24669 {
24670 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
24671 stmt_cost *= (TYPE_VECTOR_SUBPARTS (node: vectype) + 1);
24672 }
24673 else if ((kind == vec_construct || kind == scalar_to_vec)
24674 && node
24675 && SLP_TREE_DEF_TYPE (node) == vect_external_def
24676 && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
24677 {
24678 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
24679 unsigned i;
24680 tree op;
24681 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24682 if (TREE_CODE (op) == SSA_NAME)
24683 TREE_VISITED (op) = 0;
24684 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24685 {
24686 if (TREE_CODE (op) != SSA_NAME
24687 || TREE_VISITED (op))
24688 continue;
24689 TREE_VISITED (op) = 1;
24690 gimple *def = SSA_NAME_DEF_STMT (op);
24691 tree tem;
24692 if (is_gimple_assign (gs: def)
24693 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
24694 && ((tem = gimple_assign_rhs1 (gs: def)), true)
24695 && TREE_CODE (tem) == SSA_NAME
24696 /* A sign-change expands to nothing. */
24697 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
24698 TREE_TYPE (tem)))
24699 def = SSA_NAME_DEF_STMT (tem);
24700 /* When the component is loaded from memory we can directly
24701 move it to a vector register, otherwise we have to go
24702 via a GPR or via vpinsr which involves similar cost.
24703 Likewise with a BIT_FIELD_REF extracting from a vector
24704 register we can hope to avoid using a GPR. */
24705 if (!is_gimple_assign (gs: def)
24706 || ((!gimple_assign_load_p (def)
24707 || (!TARGET_SSE4_1
24708 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1))
24709 && (gimple_assign_rhs_code (gs: def) != BIT_FIELD_REF
24710 || !VECTOR_TYPE_P (TREE_TYPE
24711 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
24712 stmt_cost += ix86_cost->sse_to_integer;
24713 }
24714 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24715 if (TREE_CODE (op) == SSA_NAME)
24716 TREE_VISITED (op) = 0;
24717 }
24718 if (stmt_cost == -1)
24719 stmt_cost = ix86_builtin_vectorization_cost (type_of_cost: kind, vectype, misalign);
24720
24721 /* Penalize DFmode vector operations for Bonnell. */
24722 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
24723 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
24724 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
24725
24726 /* Statements in an inner loop relative to the loop being
24727 vectorized are weighted more heavily. The value here is
24728 arbitrary and could potentially be improved with analysis. */
24729 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
24730
24731 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
24732 for Silvermont as it has out of order integer pipeline and can execute
24733 2 scalar instruction per tick, but has in order SIMD pipeline. */
24734 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
24735 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
24736 && stmt_info && stmt_info->stmt)
24737 {
24738 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
24739 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
24740 retval = (retval * 17) / 10;
24741 }
24742
24743 m_costs[where] += retval;
24744
24745 return retval;
24746}
24747
24748void
24749ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
24750{
24751 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: m_vinfo);
24752 if (loop_vinfo && !m_costing_for_scalar)
24753 {
24754 /* We are currently not asking the vectorizer to compare costs
24755 between different vector mode sizes. When using predication
24756 that will end up always choosing the prefered mode size even
24757 if there's a smaller mode covering all lanes. Test for this
24758 situation and artificially reject the larger mode attempt.
24759 ??? We currently lack masked ops for sub-SSE sized modes,
24760 so we could restrict this rejection to AVX and AVX512 modes
24761 but error on the safe side for now. */
24762 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
24763 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
24764 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
24765 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
24766 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
24767 m_costs[vect_body] = INT_MAX;
24768 }
24769
24770 vector_costs::finish_cost (scalar_costs);
24771}
24772
24773/* Validate target specific memory model bits in VAL. */
24774
24775static unsigned HOST_WIDE_INT
24776ix86_memmodel_check (unsigned HOST_WIDE_INT val)
24777{
24778 enum memmodel model = memmodel_from_int (val);
24779 bool strong;
24780
24781 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
24782 |MEMMODEL_MASK)
24783 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
24784 {
24785 warning (OPT_Winvalid_memory_model,
24786 "unknown architecture specific memory model");
24787 return MEMMODEL_SEQ_CST;
24788 }
24789 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
24790 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
24791 {
24792 warning (OPT_Winvalid_memory_model,
24793 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
24794 "memory model");
24795 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
24796 }
24797 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
24798 {
24799 warning (OPT_Winvalid_memory_model,
24800 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
24801 "memory model");
24802 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
24803 }
24804 return val;
24805}
24806
24807/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
24808 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
24809 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
24810 or number of vecsize_mangle variants that should be emitted. */
24811
24812static int
24813ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
24814 struct cgraph_simd_clone *clonei,
24815 tree base_type, int num,
24816 bool explicit_p)
24817{
24818 int ret = 1;
24819
24820 if (clonei->simdlen
24821 && (clonei->simdlen < 2
24822 || clonei->simdlen > 1024
24823 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
24824 {
24825 if (explicit_p)
24826 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24827 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
24828 return 0;
24829 }
24830
24831 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
24832 if (TREE_CODE (ret_type) != VOID_TYPE)
24833 switch (TYPE_MODE (ret_type))
24834 {
24835 case E_QImode:
24836 case E_HImode:
24837 case E_SImode:
24838 case E_DImode:
24839 case E_SFmode:
24840 case E_DFmode:
24841 /* case E_SCmode: */
24842 /* case E_DCmode: */
24843 if (!AGGREGATE_TYPE_P (ret_type))
24844 break;
24845 /* FALLTHRU */
24846 default:
24847 if (explicit_p)
24848 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24849 "unsupported return type %qT for simd", ret_type);
24850 return 0;
24851 }
24852
24853 tree t;
24854 int i;
24855 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
24856 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
24857
24858 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
24859 t && t != void_list_node; t = TREE_CHAIN (t), i++)
24860 {
24861 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
24862 switch (TYPE_MODE (arg_type))
24863 {
24864 case E_QImode:
24865 case E_HImode:
24866 case E_SImode:
24867 case E_DImode:
24868 case E_SFmode:
24869 case E_DFmode:
24870 /* case E_SCmode: */
24871 /* case E_DCmode: */
24872 if (!AGGREGATE_TYPE_P (arg_type))
24873 break;
24874 /* FALLTHRU */
24875 default:
24876 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
24877 break;
24878 if (explicit_p)
24879 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24880 "unsupported argument type %qT for simd", arg_type);
24881 return 0;
24882 }
24883 }
24884
24885 if (!TREE_PUBLIC (node->decl) || !explicit_p)
24886 {
24887 /* If the function isn't exported, we can pick up just one ISA
24888 for the clones. */
24889 if (TARGET_AVX512F && TARGET_EVEX512)
24890 clonei->vecsize_mangle = 'e';
24891 else if (TARGET_AVX2)
24892 clonei->vecsize_mangle = 'd';
24893 else if (TARGET_AVX)
24894 clonei->vecsize_mangle = 'c';
24895 else
24896 clonei->vecsize_mangle = 'b';
24897 ret = 1;
24898 }
24899 else
24900 {
24901 clonei->vecsize_mangle = "bcde"[num];
24902 ret = 4;
24903 }
24904 clonei->mask_mode = VOIDmode;
24905 switch (clonei->vecsize_mangle)
24906 {
24907 case 'b':
24908 clonei->vecsize_int = 128;
24909 clonei->vecsize_float = 128;
24910 break;
24911 case 'c':
24912 clonei->vecsize_int = 128;
24913 clonei->vecsize_float = 256;
24914 break;
24915 case 'd':
24916 clonei->vecsize_int = 256;
24917 clonei->vecsize_float = 256;
24918 break;
24919 case 'e':
24920 clonei->vecsize_int = 512;
24921 clonei->vecsize_float = 512;
24922 if (TYPE_MODE (base_type) == QImode)
24923 clonei->mask_mode = DImode;
24924 else
24925 clonei->mask_mode = SImode;
24926 break;
24927 }
24928 if (clonei->simdlen == 0)
24929 {
24930 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
24931 clonei->simdlen = clonei->vecsize_int;
24932 else
24933 clonei->simdlen = clonei->vecsize_float;
24934 clonei->simdlen = clonei->simdlen
24935 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
24936 }
24937 else if (clonei->simdlen > 16)
24938 {
24939 /* For compatibility with ICC, use the same upper bounds
24940 for simdlen. In particular, for CTYPE below, use the return type,
24941 unless the function returns void, in that case use the characteristic
24942 type. If it is possible for given SIMDLEN to pass CTYPE value
24943 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
24944 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
24945 emit corresponding clone. */
24946 tree ctype = ret_type;
24947 if (VOID_TYPE_P (ret_type))
24948 ctype = base_type;
24949 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
24950 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
24951 cnt /= clonei->vecsize_int;
24952 else
24953 cnt /= clonei->vecsize_float;
24954 if (cnt > (TARGET_64BIT ? 16 : 8))
24955 {
24956 if (explicit_p)
24957 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24958 "unsupported simdlen %wd",
24959 clonei->simdlen.to_constant ());
24960 return 0;
24961 }
24962 }
24963 return ret;
24964}
24965
24966/* If SIMD clone NODE can't be used in a vectorized loop
24967 in current function, return -1, otherwise return a badness of using it
24968 (0 if it is most desirable from vecsize_mangle point of view, 1
24969 slightly less desirable, etc.). */
24970
24971static int
24972ix86_simd_clone_usable (struct cgraph_node *node)
24973{
24974 switch (node->simdclone->vecsize_mangle)
24975 {
24976 case 'b':
24977 if (!TARGET_SSE2)
24978 return -1;
24979 if (!TARGET_AVX)
24980 return 0;
24981 return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
24982 case 'c':
24983 if (!TARGET_AVX)
24984 return -1;
24985 return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
24986 case 'd':
24987 if (!TARGET_AVX2)
24988 return -1;
24989 return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
24990 case 'e':
24991 if (!TARGET_AVX512F || !TARGET_EVEX512)
24992 return -1;
24993 return 0;
24994 default:
24995 gcc_unreachable ();
24996 }
24997}
24998
24999/* This function adjusts the unroll factor based on
25000 the hardware capabilities. For ex, bdver3 has
25001 a loop buffer which makes unrolling of smaller
25002 loops less important. This function decides the
25003 unroll factor using number of memory references
25004 (value 32 is used) as a heuristic. */
25005
25006static unsigned
25007ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
25008{
25009 basic_block *bbs;
25010 rtx_insn *insn;
25011 unsigned i;
25012 unsigned mem_count = 0;
25013
25014 /* Unroll small size loop when unroll factor is not explicitly
25015 specified. */
25016 if (ix86_unroll_only_small_loops && !loop->unroll)
25017 {
25018 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
25019 return MIN (nunroll, ix86_cost->small_unroll_factor);
25020 else
25021 return 1;
25022 }
25023
25024 if (!TARGET_ADJUST_UNROLL)
25025 return nunroll;
25026
25027 /* Count the number of memory references within the loop body.
25028 This value determines the unrolling factor for bdver3 and bdver4
25029 architectures. */
25030 subrtx_iterator::array_type array;
25031 bbs = get_loop_body (loop);
25032 for (i = 0; i < loop->num_nodes; i++)
25033 FOR_BB_INSNS (bbs[i], insn)
25034 if (NONDEBUG_INSN_P (insn))
25035 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
25036 if (const_rtx x = *iter)
25037 if (MEM_P (x))
25038 {
25039 machine_mode mode = GET_MODE (x);
25040 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
25041 if (n_words > 4)
25042 mem_count += 2;
25043 else
25044 mem_count += 1;
25045 }
25046 free (ptr: bbs);
25047
25048 if (mem_count && mem_count <=32)
25049 return MIN (nunroll, 32 / mem_count);
25050
25051 return nunroll;
25052}
25053
25054
25055/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
25056
25057static bool
25058ix86_float_exceptions_rounding_supported_p (void)
25059{
25060 /* For x87 floating point with standard excess precision handling,
25061 there is no adddf3 pattern (since x87 floating point only has
25062 XFmode operations) so the default hook implementation gets this
25063 wrong. */
25064 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
25065}
25066
25067/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
25068
25069static void
25070ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25071{
25072 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
25073 return;
25074 tree exceptions_var = create_tmp_var_raw (integer_type_node);
25075 if (TARGET_80387)
25076 {
25077 tree fenv_index_type = build_index_type (size_int (6));
25078 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
25079 tree fenv_var = create_tmp_var_raw (fenv_type);
25080 TREE_ADDRESSABLE (fenv_var) = 1;
25081 tree fenv_ptr = build_pointer_type (fenv_type);
25082 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
25083 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
25084 tree fnstenv = get_ix86_builtin (c: IX86_BUILTIN_FNSTENV);
25085 tree fldenv = get_ix86_builtin (c: IX86_BUILTIN_FLDENV);
25086 tree fnstsw = get_ix86_builtin (c: IX86_BUILTIN_FNSTSW);
25087 tree fnclex = get_ix86_builtin (c: IX86_BUILTIN_FNCLEX);
25088 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
25089 tree hold_fnclex = build_call_expr (fnclex, 0);
25090 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
25091 NULL_TREE, NULL_TREE);
25092 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
25093 hold_fnclex);
25094 *clear = build_call_expr (fnclex, 0);
25095 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
25096 tree fnstsw_call = build_call_expr (fnstsw, 0);
25097 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
25098 fnstsw_call, NULL_TREE, NULL_TREE);
25099 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
25100 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
25101 exceptions_var, exceptions_x87,
25102 NULL_TREE, NULL_TREE);
25103 *update = build2 (COMPOUND_EXPR, integer_type_node,
25104 sw_mod, update_mod);
25105 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
25106 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
25107 }
25108 if (TARGET_SSE && TARGET_SSE_MATH)
25109 {
25110 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
25111 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
25112 tree stmxcsr = get_ix86_builtin (c: IX86_BUILTIN_STMXCSR);
25113 tree ldmxcsr = get_ix86_builtin (c: IX86_BUILTIN_LDMXCSR);
25114 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
25115 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
25116 mxcsr_orig_var, stmxcsr_hold_call,
25117 NULL_TREE, NULL_TREE);
25118 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
25119 mxcsr_orig_var,
25120 build_int_cst (unsigned_type_node, 0x1f80));
25121 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
25122 build_int_cst (unsigned_type_node, 0xffffffc0));
25123 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
25124 mxcsr_mod_var, hold_mod_val,
25125 NULL_TREE, NULL_TREE);
25126 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25127 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
25128 hold_assign_orig, hold_assign_mod);
25129 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
25130 ldmxcsr_hold_call);
25131 if (*hold)
25132 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
25133 else
25134 *hold = hold_all;
25135 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25136 if (*clear)
25137 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
25138 ldmxcsr_clear_call);
25139 else
25140 *clear = ldmxcsr_clear_call;
25141 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
25142 tree exceptions_sse = fold_convert (integer_type_node,
25143 stxmcsr_update_call);
25144 if (*update)
25145 {
25146 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
25147 exceptions_var, exceptions_sse);
25148 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
25149 exceptions_var, exceptions_mod);
25150 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
25151 exceptions_assign);
25152 }
25153 else
25154 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
25155 exceptions_sse, NULL_TREE, NULL_TREE);
25156 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
25157 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25158 ldmxcsr_update_call);
25159 }
25160 tree atomic_feraiseexcept
25161 = builtin_decl_implicit (fncode: BUILT_IN_ATOMIC_FERAISEEXCEPT);
25162 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
25163 1, exceptions_var);
25164 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25165 atomic_feraiseexcept_call);
25166}
25167
25168#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25169/* For i386, common symbol is local only for non-PIE binaries. For
25170 x86-64, common symbol is local only for non-PIE binaries or linker
25171 supports copy reloc in PIE binaries. */
25172
25173static bool
25174ix86_binds_local_p (const_tree exp)
25175{
25176 bool direct_extern_access
25177 = (ix86_direct_extern_access
25178 && !(VAR_OR_FUNCTION_DECL_P (exp)
25179 && lookup_attribute (attr_name: "nodirect_extern_access",
25180 DECL_ATTRIBUTES (exp))));
25181 if (!direct_extern_access)
25182 ix86_has_no_direct_extern_access = true;
25183 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
25184 direct_extern_access,
25185 (direct_extern_access
25186 && (!flag_pic
25187 || (TARGET_64BIT
25188 && HAVE_LD_PIE_COPYRELOC != 0))));
25189}
25190
25191/* If flag_pic or ix86_direct_extern_access is false, then neither
25192 local nor global relocs should be placed in readonly memory. */
25193
25194static int
25195ix86_reloc_rw_mask (void)
25196{
25197 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
25198}
25199#endif
25200
25201/* Return true iff ADDR can be used as a symbolic base address. */
25202
25203static bool
25204symbolic_base_address_p (rtx addr)
25205{
25206 if (GET_CODE (addr) == SYMBOL_REF)
25207 return true;
25208
25209 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
25210 return true;
25211
25212 return false;
25213}
25214
25215/* Return true iff ADDR can be used as a base address. */
25216
25217static bool
25218base_address_p (rtx addr)
25219{
25220 if (REG_P (addr))
25221 return true;
25222
25223 if (symbolic_base_address_p (addr))
25224 return true;
25225
25226 return false;
25227}
25228
25229/* If MEM is in the form of [(base+symbase)+offset], extract the three
25230 parts of address and set to BASE, SYMBASE and OFFSET, otherwise
25231 return false. */
25232
25233static bool
25234extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
25235{
25236 rtx addr;
25237
25238 gcc_assert (MEM_P (mem));
25239
25240 addr = XEXP (mem, 0);
25241
25242 if (GET_CODE (addr) == CONST)
25243 addr = XEXP (addr, 0);
25244
25245 if (base_address_p (addr))
25246 {
25247 *base = addr;
25248 *symbase = const0_rtx;
25249 *offset = const0_rtx;
25250 return true;
25251 }
25252
25253 if (GET_CODE (addr) == PLUS
25254 && base_address_p (XEXP (addr, 0)))
25255 {
25256 rtx addend = XEXP (addr, 1);
25257
25258 if (GET_CODE (addend) == CONST)
25259 addend = XEXP (addend, 0);
25260
25261 if (CONST_INT_P (addend))
25262 {
25263 *base = XEXP (addr, 0);
25264 *symbase = const0_rtx;
25265 *offset = addend;
25266 return true;
25267 }
25268
25269 /* Also accept REG + symbolic ref, with or without a CONST_INT
25270 offset. */
25271 if (REG_P (XEXP (addr, 0)))
25272 {
25273 if (symbolic_base_address_p (addr: addend))
25274 {
25275 *base = XEXP (addr, 0);
25276 *symbase = addend;
25277 *offset = const0_rtx;
25278 return true;
25279 }
25280
25281 if (GET_CODE (addend) == PLUS
25282 && symbolic_base_address_p (XEXP (addend, 0))
25283 && CONST_INT_P (XEXP (addend, 1)))
25284 {
25285 *base = XEXP (addr, 0);
25286 *symbase = XEXP (addend, 0);
25287 *offset = XEXP (addend, 1);
25288 return true;
25289 }
25290 }
25291 }
25292
25293 return false;
25294}
25295
25296/* Given OPERANDS of consecutive load/store, check if we can merge
25297 them into move multiple. LOAD is true if they are load instructions.
25298 MODE is the mode of memory operands. */
25299
25300bool
25301ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
25302 machine_mode mode)
25303{
25304 HOST_WIDE_INT offval_1, offval_2, msize;
25305 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
25306 symbase_1, symbase_2, offset_1, offset_2;
25307
25308 if (load)
25309 {
25310 mem_1 = operands[1];
25311 mem_2 = operands[3];
25312 reg_1 = operands[0];
25313 reg_2 = operands[2];
25314 }
25315 else
25316 {
25317 mem_1 = operands[0];
25318 mem_2 = operands[2];
25319 reg_1 = operands[1];
25320 reg_2 = operands[3];
25321 }
25322
25323 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
25324
25325 if (REGNO (reg_1) != REGNO (reg_2))
25326 return false;
25327
25328 /* Check if the addresses are in the form of [base+offset]. */
25329 if (!extract_base_offset_in_addr (mem: mem_1, base: &base_1, symbase: &symbase_1, offset: &offset_1))
25330 return false;
25331 if (!extract_base_offset_in_addr (mem: mem_2, base: &base_2, symbase: &symbase_2, offset: &offset_2))
25332 return false;
25333
25334 /* Check if the bases are the same. */
25335 if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
25336 return false;
25337
25338 offval_1 = INTVAL (offset_1);
25339 offval_2 = INTVAL (offset_2);
25340 msize = GET_MODE_SIZE (mode);
25341 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
25342 if (offval_1 + msize != offval_2)
25343 return false;
25344
25345 return true;
25346}
25347
25348/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
25349
25350static bool
25351ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
25352 optimization_type opt_type)
25353{
25354 switch (op)
25355 {
25356 case asin_optab:
25357 case acos_optab:
25358 case log1p_optab:
25359 case exp_optab:
25360 case exp10_optab:
25361 case exp2_optab:
25362 case expm1_optab:
25363 case ldexp_optab:
25364 case scalb_optab:
25365 case round_optab:
25366 case lround_optab:
25367 return opt_type == OPTIMIZE_FOR_SPEED;
25368
25369 case rint_optab:
25370 if (SSE_FLOAT_MODE_P (mode1)
25371 && TARGET_SSE_MATH
25372 && !flag_trapping_math
25373 && !TARGET_SSE4_1
25374 && mode1 != HFmode)
25375 return opt_type == OPTIMIZE_FOR_SPEED;
25376 return true;
25377
25378 case floor_optab:
25379 case ceil_optab:
25380 case btrunc_optab:
25381 if (((SSE_FLOAT_MODE_P (mode1)
25382 && TARGET_SSE_MATH
25383 && TARGET_SSE4_1)
25384 || mode1 == HFmode)
25385 && !flag_trapping_math)
25386 return true;
25387 return opt_type == OPTIMIZE_FOR_SPEED;
25388
25389 case rsqrt_optab:
25390 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode: mode1);
25391
25392 default:
25393 return true;
25394 }
25395}
25396
25397/* Address space support.
25398
25399 This is not "far pointers" in the 16-bit sense, but an easy way
25400 to use %fs and %gs segment prefixes. Therefore:
25401
25402 (a) All address spaces have the same modes,
25403 (b) All address spaces have the same addresss forms,
25404 (c) While %fs and %gs are technically subsets of the generic
25405 address space, they are probably not subsets of each other.
25406 (d) Since we have no access to the segment base register values
25407 without resorting to a system call, we cannot convert a
25408 non-default address space to a default address space.
25409 Therefore we do not claim %fs or %gs are subsets of generic.
25410
25411 Therefore we can (mostly) use the default hooks. */
25412
25413/* All use of segmentation is assumed to make address 0 valid. */
25414
25415static bool
25416ix86_addr_space_zero_address_valid (addr_space_t as)
25417{
25418 return as != ADDR_SPACE_GENERIC;
25419}
25420
25421static void
25422ix86_init_libfuncs (void)
25423{
25424 if (TARGET_64BIT)
25425 {
25426 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
25427 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
25428 }
25429 else
25430 {
25431 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
25432 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
25433 }
25434
25435#if TARGET_MACHO
25436 darwin_rename_builtins ();
25437#endif
25438}
25439
25440/* Set the value of FLT_EVAL_METHOD in float.h. When using only the
25441 FPU, assume that the fpcw is set to extended precision; when using
25442 only SSE, rounding is correct; when using both SSE and the FPU,
25443 the rounding precision is indeterminate, since either may be chosen
25444 apparently at random. */
25445
25446static enum flt_eval_method
25447ix86_get_excess_precision (enum excess_precision_type type)
25448{
25449 switch (type)
25450 {
25451 case EXCESS_PRECISION_TYPE_FAST:
25452 /* The fastest type to promote to will always be the native type,
25453 whether that occurs with implicit excess precision or
25454 otherwise. */
25455 return TARGET_AVX512FP16
25456 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25457 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25458 case EXCESS_PRECISION_TYPE_STANDARD:
25459 case EXCESS_PRECISION_TYPE_IMPLICIT:
25460 /* Otherwise, the excess precision we want when we are
25461 in a standards compliant mode, and the implicit precision we
25462 provide would be identical were it not for the unpredictable
25463 cases. */
25464 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
25465 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25466 else if (!TARGET_80387)
25467 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25468 else if (!TARGET_MIX_SSE_I387)
25469 {
25470 if (!(TARGET_SSE && TARGET_SSE_MATH))
25471 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
25472 else if (TARGET_SSE2)
25473 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25474 }
25475
25476 /* If we are in standards compliant mode, but we know we will
25477 calculate in unpredictable precision, return
25478 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
25479 excess precision if the target can't guarantee it will honor
25480 it. */
25481 return (type == EXCESS_PRECISION_TYPE_STANDARD
25482 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
25483 : FLT_EVAL_METHOD_UNPREDICTABLE);
25484 case EXCESS_PRECISION_TYPE_FLOAT16:
25485 if (TARGET_80387
25486 && !(TARGET_SSE_MATH && TARGET_SSE))
25487 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
25488 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25489 default:
25490 gcc_unreachable ();
25491 }
25492
25493 return FLT_EVAL_METHOD_UNPREDICTABLE;
25494}
25495
25496/* Return true if _BitInt(N) is supported and fill its details into *INFO. */
25497bool
25498ix86_bitint_type_info (int n, struct bitint_info *info)
25499{
25500 if (!TARGET_64BIT)
25501 return false;
25502 if (n <= 8)
25503 info->limb_mode = QImode;
25504 else if (n <= 16)
25505 info->limb_mode = HImode;
25506 else if (n <= 32)
25507 info->limb_mode = SImode;
25508 else
25509 info->limb_mode = DImode;
25510 info->big_endian = false;
25511 info->extended = false;
25512 return true;
25513}
25514
25515/* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
25516 decrements by exactly 2 no matter what the position was, there is no pushb.
25517
25518 But as CIE data alignment factor on this arch is -4 for 32bit targets
25519 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
25520 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
25521
25522poly_int64
25523ix86_push_rounding (poly_int64 bytes)
25524{
25525 return ROUND_UP (bytes, UNITS_PER_WORD);
25526}
25527
25528/* Use 8 bits metadata start from bit48 for LAM_U48,
25529 6 bits metadat start from bit57 for LAM_U57. */
25530#define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
25531 ? 48 \
25532 : (ix86_lam_type == lam_u57 ? 57 : 0))
25533#define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
25534 ? 8 \
25535 : (ix86_lam_type == lam_u57 ? 6 : 0))
25536
25537/* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
25538bool
25539ix86_memtag_can_tag_addresses ()
25540{
25541 return ix86_lam_type != lam_none && TARGET_LP64;
25542}
25543
25544/* Implement TARGET_MEMTAG_TAG_SIZE. */
25545unsigned char
25546ix86_memtag_tag_size ()
25547{
25548 return IX86_HWASAN_TAG_SIZE;
25549}
25550
25551/* Implement TARGET_MEMTAG_SET_TAG. */
25552rtx
25553ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
25554{
25555 /* default_memtag_insert_random_tag may
25556 generate tag with value more than 6 bits. */
25557 if (ix86_lam_type == lam_u57)
25558 {
25559 unsigned HOST_WIDE_INT and_imm
25560 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
25561
25562 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
25563 }
25564 tag = expand_simple_binop (Pmode, ASHIFT, tag,
25565 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
25566 /* unsignedp = */1, OPTAB_WIDEN);
25567 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
25568 /* unsignedp = */1, OPTAB_DIRECT);
25569 return ret;
25570}
25571
25572/* Implement TARGET_MEMTAG_EXTRACT_TAG. */
25573rtx
25574ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
25575{
25576 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
25577 GEN_INT (IX86_HWASAN_SHIFT), target,
25578 /* unsignedp = */0,
25579 OPTAB_DIRECT);
25580 rtx ret = gen_reg_rtx (QImode);
25581 /* Mask off bit63 when LAM_U57. */
25582 if (ix86_lam_type == lam_u57)
25583 {
25584 unsigned HOST_WIDE_INT and_imm
25585 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
25586 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
25587 gen_int_mode (and_imm, QImode)));
25588 }
25589 else
25590 emit_move_insn (ret, gen_lowpart (QImode, tag));
25591 return ret;
25592}
25593
25594/* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
25595rtx
25596ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
25597{
25598 /* Leave bit63 alone. */
25599 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
25600 + (HOST_WIDE_INT_1U << 63) - 1),
25601 Pmode);
25602 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
25603 tag_mask, target, true,
25604 OPTAB_DIRECT);
25605 gcc_assert (untagged_base);
25606 return untagged_base;
25607}
25608
25609/* Implement TARGET_MEMTAG_ADD_TAG. */
25610rtx
25611ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
25612{
25613 rtx base_tag = gen_reg_rtx (QImode);
25614 rtx base_addr = gen_reg_rtx (Pmode);
25615 rtx tagged_addr = gen_reg_rtx (Pmode);
25616 rtx new_tag = gen_reg_rtx (QImode);
25617 unsigned HOST_WIDE_INT and_imm
25618 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
25619
25620 /* When there's "overflow" in tag adding,
25621 need to mask the most significant bit off. */
25622 emit_move_insn (base_tag, ix86_memtag_extract_tag (tagged_pointer: base, NULL_RTX));
25623 emit_move_insn (base_addr,
25624 ix86_memtag_untagged_pointer (tagged_pointer: base, NULL_RTX));
25625 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
25626 emit_move_insn (new_tag, base_tag);
25627 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
25628 emit_move_insn (tagged_addr,
25629 ix86_memtag_set_tag (untagged: base_addr, tag: new_tag, NULL_RTX));
25630 return plus_constant (Pmode, tagged_addr, offset);
25631}
25632
25633/* Target-specific selftests. */
25634
25635#if CHECKING_P
25636
25637namespace selftest {
25638
25639/* Verify that hard regs are dumped as expected (in compact mode). */
25640
25641static void
25642ix86_test_dumping_hard_regs ()
25643{
25644 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
25645 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
25646}
25647
25648/* Test dumping an insn with repeated references to the same SCRATCH,
25649 to verify the rtx_reuse code. */
25650
25651static void
25652ix86_test_dumping_memory_blockage ()
25653{
25654 set_new_first_and_last_insn (NULL, NULL);
25655
25656 rtx pat = gen_memory_blockage ();
25657 rtx_reuse_manager r;
25658 r.preprocess (x: pat);
25659
25660 /* Verify that the repeated references to the SCRATCH show use
25661 reuse IDS. The first should be prefixed with a reuse ID,
25662 and the second should be dumped as a "reuse_rtx" of that ID.
25663 The expected string assumes Pmode == DImode. */
25664 if (Pmode == DImode)
25665 ASSERT_RTL_DUMP_EQ_WITH_REUSE
25666 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
25667 " (unspec:BLK [\n"
25668 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
25669 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
25670}
25671
25672/* Verify loading an RTL dump; specifically a dump of copying
25673 a param on x86_64 from a hard reg into the frame.
25674 This test is target-specific since the dump contains target-specific
25675 hard reg names. */
25676
25677static void
25678ix86_test_loading_dump_fragment_1 ()
25679{
25680 rtl_dump_test t (SELFTEST_LOCATION,
25681 locate_file (path: "x86_64/copy-hard-reg-into-frame.rtl"));
25682
25683 rtx_insn *insn = get_insn_by_uid (uid: 1);
25684
25685 /* The block structure and indentation here is purely for
25686 readability; it mirrors the structure of the rtx. */
25687 tree mem_expr;
25688 {
25689 rtx pat = PATTERN (insn);
25690 ASSERT_EQ (SET, GET_CODE (pat));
25691 {
25692 rtx dest = SET_DEST (pat);
25693 ASSERT_EQ (MEM, GET_CODE (dest));
25694 /* Verify the "/c" was parsed. */
25695 ASSERT_TRUE (RTX_FLAG (dest, call));
25696 ASSERT_EQ (SImode, GET_MODE (dest));
25697 {
25698 rtx addr = XEXP (dest, 0);
25699 ASSERT_EQ (PLUS, GET_CODE (addr));
25700 ASSERT_EQ (DImode, GET_MODE (addr));
25701 {
25702 rtx lhs = XEXP (addr, 0);
25703 /* Verify that the "frame" REG was consolidated. */
25704 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
25705 }
25706 {
25707 rtx rhs = XEXP (addr, 1);
25708 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
25709 ASSERT_EQ (-4, INTVAL (rhs));
25710 }
25711 }
25712 /* Verify the "[1 i+0 S4 A32]" was parsed. */
25713 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
25714 /* "i" should have been handled by synthesizing a global int
25715 variable named "i". */
25716 mem_expr = MEM_EXPR (dest);
25717 ASSERT_NE (mem_expr, NULL);
25718 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
25719 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
25720 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
25721 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
25722 /* "+0". */
25723 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
25724 ASSERT_EQ (0, MEM_OFFSET (dest));
25725 /* "S4". */
25726 ASSERT_EQ (4, MEM_SIZE (dest));
25727 /* "A32. */
25728 ASSERT_EQ (32, MEM_ALIGN (dest));
25729 }
25730 {
25731 rtx src = SET_SRC (pat);
25732 ASSERT_EQ (REG, GET_CODE (src));
25733 ASSERT_EQ (SImode, GET_MODE (src));
25734 ASSERT_EQ (5, REGNO (src));
25735 tree reg_expr = REG_EXPR (src);
25736 /* "i" here should point to the same var as for the MEM_EXPR. */
25737 ASSERT_EQ (reg_expr, mem_expr);
25738 }
25739 }
25740}
25741
25742/* Verify that the RTL loader copes with a call_insn dump.
25743 This test is target-specific since the dump contains a target-specific
25744 hard reg name. */
25745
25746static void
25747ix86_test_loading_call_insn ()
25748{
25749 /* The test dump includes register "xmm0", where requires TARGET_SSE
25750 to exist. */
25751 if (!TARGET_SSE)
25752 return;
25753
25754 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/call-insn.rtl"));
25755
25756 rtx_insn *insn = get_insns ();
25757 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
25758
25759 /* "/j". */
25760 ASSERT_TRUE (RTX_FLAG (insn, jump));
25761
25762 rtx pat = PATTERN (insn);
25763 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
25764
25765 /* Verify REG_NOTES. */
25766 {
25767 /* "(expr_list:REG_CALL_DECL". */
25768 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
25769 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
25770 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
25771
25772 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
25773 rtx_expr_list *note1 = note0->next ();
25774 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
25775
25776 ASSERT_EQ (NULL, note1->next ());
25777 }
25778
25779 /* Verify CALL_INSN_FUNCTION_USAGE. */
25780 {
25781 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
25782 rtx_expr_list *usage
25783 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
25784 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
25785 ASSERT_EQ (DFmode, GET_MODE (usage));
25786 ASSERT_EQ (USE, GET_CODE (usage->element ()));
25787 ASSERT_EQ (NULL, usage->next ());
25788 }
25789}
25790
25791/* Verify that the RTL loader copes a dump from print_rtx_function.
25792 This test is target-specific since the dump contains target-specific
25793 hard reg names. */
25794
25795static void
25796ix86_test_loading_full_dump ()
25797{
25798 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/times-two.rtl"));
25799
25800 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
25801
25802 rtx_insn *insn_1 = get_insn_by_uid (uid: 1);
25803 ASSERT_EQ (NOTE, GET_CODE (insn_1));
25804
25805 rtx_insn *insn_7 = get_insn_by_uid (uid: 7);
25806 ASSERT_EQ (INSN, GET_CODE (insn_7));
25807 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
25808
25809 rtx_insn *insn_15 = get_insn_by_uid (uid: 15);
25810 ASSERT_EQ (INSN, GET_CODE (insn_15));
25811 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
25812
25813 /* Verify crtl->return_rtx. */
25814 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
25815 ASSERT_EQ (0, REGNO (crtl->return_rtx));
25816 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
25817}
25818
25819/* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
25820 In particular, verify that it correctly loads the 2nd operand.
25821 This test is target-specific since these are machine-specific
25822 operands (and enums). */
25823
25824static void
25825ix86_test_loading_unspec ()
25826{
25827 rtl_dump_test t (SELFTEST_LOCATION, locate_file (path: "x86_64/unspec.rtl"));
25828
25829 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
25830
25831 ASSERT_TRUE (cfun);
25832
25833 /* Test of an UNSPEC. */
25834 rtx_insn *insn = get_insns ();
25835 ASSERT_EQ (INSN, GET_CODE (insn));
25836 rtx set = single_set (insn);
25837 ASSERT_NE (NULL, set);
25838 rtx dst = SET_DEST (set);
25839 ASSERT_EQ (MEM, GET_CODE (dst));
25840 rtx src = SET_SRC (set);
25841 ASSERT_EQ (UNSPEC, GET_CODE (src));
25842 ASSERT_EQ (BLKmode, GET_MODE (src));
25843 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
25844
25845 rtx v0 = XVECEXP (src, 0, 0);
25846
25847 /* Verify that the two uses of the first SCRATCH have pointer
25848 equality. */
25849 rtx scratch_a = XEXP (dst, 0);
25850 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
25851
25852 rtx scratch_b = XEXP (v0, 0);
25853 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
25854
25855 ASSERT_EQ (scratch_a, scratch_b);
25856
25857 /* Verify that the two mems are thus treated as equal. */
25858 ASSERT_TRUE (rtx_equal_p (dst, v0));
25859
25860 /* Verify that the insn is recognized. */
25861 ASSERT_NE(-1, recog_memoized (insn));
25862
25863 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
25864 insn = NEXT_INSN (insn);
25865 ASSERT_EQ (INSN, GET_CODE (insn));
25866
25867 set = single_set (insn);
25868 ASSERT_NE (NULL, set);
25869
25870 src = SET_SRC (set);
25871 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
25872 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
25873}
25874
25875/* Run all target-specific selftests. */
25876
25877static void
25878ix86_run_selftests (void)
25879{
25880 ix86_test_dumping_hard_regs ();
25881 ix86_test_dumping_memory_blockage ();
25882
25883 /* Various tests of loading RTL dumps, here because they contain
25884 ix86-isms (e.g. names of hard regs). */
25885 ix86_test_loading_dump_fragment_1 ();
25886 ix86_test_loading_call_insn ();
25887 ix86_test_loading_full_dump ();
25888 ix86_test_loading_unspec ();
25889}
25890
25891} // namespace selftest
25892
25893#endif /* CHECKING_P */
25894
25895/* Initialize the GCC target structure. */
25896#undef TARGET_RETURN_IN_MEMORY
25897#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25898
25899#undef TARGET_LEGITIMIZE_ADDRESS
25900#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
25901
25902#undef TARGET_ATTRIBUTE_TABLE
25903#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25904#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
25905#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
25906#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25907# undef TARGET_MERGE_DECL_ATTRIBUTES
25908# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25909#endif
25910
25911#undef TARGET_INVALID_CONVERSION
25912#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
25913
25914#undef TARGET_INVALID_UNARY_OP
25915#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
25916
25917#undef TARGET_INVALID_BINARY_OP
25918#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
25919
25920#undef TARGET_COMP_TYPE_ATTRIBUTES
25921#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25922
25923#undef TARGET_INIT_BUILTINS
25924#define TARGET_INIT_BUILTINS ix86_init_builtins
25925#undef TARGET_BUILTIN_DECL
25926#define TARGET_BUILTIN_DECL ix86_builtin_decl
25927#undef TARGET_EXPAND_BUILTIN
25928#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25929
25930#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25931#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25932 ix86_builtin_vectorized_function
25933
25934#undef TARGET_VECTORIZE_BUILTIN_GATHER
25935#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
25936
25937#undef TARGET_VECTORIZE_BUILTIN_SCATTER
25938#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
25939
25940#undef TARGET_BUILTIN_RECIPROCAL
25941#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25942
25943#undef TARGET_ASM_FUNCTION_EPILOGUE
25944#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25945
25946#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
25947#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
25948 ix86_print_patchable_function_entry
25949
25950#undef TARGET_ENCODE_SECTION_INFO
25951#ifndef SUBTARGET_ENCODE_SECTION_INFO
25952#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25953#else
25954#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25955#endif
25956
25957#undef TARGET_ASM_OPEN_PAREN
25958#define TARGET_ASM_OPEN_PAREN ""
25959#undef TARGET_ASM_CLOSE_PAREN
25960#define TARGET_ASM_CLOSE_PAREN ""
25961
25962#undef TARGET_ASM_BYTE_OP
25963#define TARGET_ASM_BYTE_OP ASM_BYTE
25964
25965#undef TARGET_ASM_ALIGNED_HI_OP
25966#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25967#undef TARGET_ASM_ALIGNED_SI_OP
25968#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25969#ifdef ASM_QUAD
25970#undef TARGET_ASM_ALIGNED_DI_OP
25971#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25972#endif
25973
25974#undef TARGET_PROFILE_BEFORE_PROLOGUE
25975#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
25976
25977#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
25978#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
25979
25980#undef TARGET_ASM_UNALIGNED_HI_OP
25981#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25982#undef TARGET_ASM_UNALIGNED_SI_OP
25983#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25984#undef TARGET_ASM_UNALIGNED_DI_OP
25985#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25986
25987#undef TARGET_PRINT_OPERAND
25988#define TARGET_PRINT_OPERAND ix86_print_operand
25989#undef TARGET_PRINT_OPERAND_ADDRESS
25990#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
25991#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
25992#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
25993#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
25994#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
25995
25996#undef TARGET_SCHED_INIT_GLOBAL
25997#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
25998#undef TARGET_SCHED_ADJUST_COST
25999#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
26000#undef TARGET_SCHED_ISSUE_RATE
26001#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
26002#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
26003#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
26004 ia32_multipass_dfa_lookahead
26005#undef TARGET_SCHED_MACRO_FUSION_P
26006#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
26007#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
26008#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
26009
26010#undef TARGET_FUNCTION_OK_FOR_SIBCALL
26011#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26012
26013#undef TARGET_MEMMODEL_CHECK
26014#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
26015
26016#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
26017#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
26018
26019#ifdef HAVE_AS_TLS
26020#undef TARGET_HAVE_TLS
26021#define TARGET_HAVE_TLS true
26022#endif
26023#undef TARGET_CANNOT_FORCE_CONST_MEM
26024#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26025#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26026#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26027
26028#undef TARGET_DELEGITIMIZE_ADDRESS
26029#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26030
26031#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
26032#define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
26033
26034#undef TARGET_MS_BITFIELD_LAYOUT_P
26035#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26036
26037#if TARGET_MACHO
26038#undef TARGET_BINDS_LOCAL_P
26039#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26040#else
26041#undef TARGET_BINDS_LOCAL_P
26042#define TARGET_BINDS_LOCAL_P ix86_binds_local_p
26043#endif
26044#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26045#undef TARGET_BINDS_LOCAL_P
26046#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26047#endif
26048
26049#undef TARGET_ASM_OUTPUT_MI_THUNK
26050#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26051#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26052#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26053
26054#undef TARGET_ASM_FILE_START
26055#define TARGET_ASM_FILE_START x86_file_start
26056
26057#undef TARGET_OPTION_OVERRIDE
26058#define TARGET_OPTION_OVERRIDE ix86_option_override
26059
26060#undef TARGET_REGISTER_MOVE_COST
26061#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
26062#undef TARGET_MEMORY_MOVE_COST
26063#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
26064#undef TARGET_RTX_COSTS
26065#define TARGET_RTX_COSTS ix86_rtx_costs
26066#undef TARGET_ADDRESS_COST
26067#define TARGET_ADDRESS_COST ix86_address_cost
26068
26069#undef TARGET_OVERLAP_OP_BY_PIECES_P
26070#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
26071
26072#undef TARGET_FLAGS_REGNUM
26073#define TARGET_FLAGS_REGNUM FLAGS_REG
26074#undef TARGET_FIXED_CONDITION_CODE_REGS
26075#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26076#undef TARGET_CC_MODES_COMPATIBLE
26077#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26078
26079#undef TARGET_MACHINE_DEPENDENT_REORG
26080#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26081
26082#undef TARGET_BUILD_BUILTIN_VA_LIST
26083#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26084
26085#undef TARGET_FOLD_BUILTIN
26086#define TARGET_FOLD_BUILTIN ix86_fold_builtin
26087
26088#undef TARGET_GIMPLE_FOLD_BUILTIN
26089#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
26090
26091#undef TARGET_COMPARE_VERSION_PRIORITY
26092#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
26093
26094#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
26095#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
26096 ix86_generate_version_dispatcher_body
26097
26098#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
26099#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
26100 ix86_get_function_versions_dispatcher
26101
26102#undef TARGET_ENUM_VA_LIST_P
26103#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
26104
26105#undef TARGET_FN_ABI_VA_LIST
26106#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
26107
26108#undef TARGET_CANONICAL_VA_LIST_TYPE
26109#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
26110
26111#undef TARGET_EXPAND_BUILTIN_VA_START
26112#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26113
26114#undef TARGET_MD_ASM_ADJUST
26115#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
26116
26117#undef TARGET_C_EXCESS_PRECISION
26118#define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
26119#undef TARGET_C_BITINT_TYPE_INFO
26120#define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
26121#undef TARGET_PROMOTE_PROTOTYPES
26122#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26123#undef TARGET_PUSH_ARGUMENT
26124#define TARGET_PUSH_ARGUMENT ix86_push_argument
26125#undef TARGET_SETUP_INCOMING_VARARGS
26126#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26127#undef TARGET_MUST_PASS_IN_STACK
26128#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26129#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
26130#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
26131#undef TARGET_FUNCTION_ARG_ADVANCE
26132#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
26133#undef TARGET_FUNCTION_ARG
26134#define TARGET_FUNCTION_ARG ix86_function_arg
26135#undef TARGET_INIT_PIC_REG
26136#define TARGET_INIT_PIC_REG ix86_init_pic_reg
26137#undef TARGET_USE_PSEUDO_PIC_REG
26138#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
26139#undef TARGET_FUNCTION_ARG_BOUNDARY
26140#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
26141#undef TARGET_PASS_BY_REFERENCE
26142#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26143#undef TARGET_INTERNAL_ARG_POINTER
26144#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26145#undef TARGET_UPDATE_STACK_BOUNDARY
26146#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
26147#undef TARGET_GET_DRAP_RTX
26148#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
26149#undef TARGET_STRICT_ARGUMENT_NAMING
26150#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26151#undef TARGET_STATIC_CHAIN
26152#define TARGET_STATIC_CHAIN ix86_static_chain
26153#undef TARGET_TRAMPOLINE_INIT
26154#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
26155#undef TARGET_RETURN_POPS_ARGS
26156#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
26157
26158#undef TARGET_WARN_FUNC_RETURN
26159#define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
26160
26161#undef TARGET_LEGITIMATE_COMBINED_INSN
26162#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
26163
26164#undef TARGET_ASAN_SHADOW_OFFSET
26165#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
26166
26167#undef TARGET_GIMPLIFY_VA_ARG_EXPR
26168#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26169
26170#undef TARGET_SCALAR_MODE_SUPPORTED_P
26171#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26172
26173#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
26174#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
26175ix86_libgcc_floating_mode_supported_p
26176
26177#undef TARGET_VECTOR_MODE_SUPPORTED_P
26178#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26179
26180#undef TARGET_C_MODE_FOR_SUFFIX
26181#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26182
26183#ifdef HAVE_AS_TLS
26184#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26185#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26186#endif
26187
26188#ifdef SUBTARGET_INSERT_ATTRIBUTES
26189#undef TARGET_INSERT_ATTRIBUTES
26190#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26191#endif
26192
26193#undef TARGET_MANGLE_TYPE
26194#define TARGET_MANGLE_TYPE ix86_mangle_type
26195
26196#undef TARGET_EMIT_SUPPORT_TINFOS
26197#define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
26198
26199#undef TARGET_STACK_PROTECT_GUARD
26200#define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
26201
26202#if !TARGET_MACHO
26203#undef TARGET_STACK_PROTECT_FAIL
26204#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26205#endif
26206
26207#undef TARGET_FUNCTION_VALUE
26208#define TARGET_FUNCTION_VALUE ix86_function_value
26209
26210#undef TARGET_FUNCTION_VALUE_REGNO_P
26211#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
26212
26213#undef TARGET_ZERO_CALL_USED_REGS
26214#define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
26215
26216#undef TARGET_PROMOTE_FUNCTION_MODE
26217#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
26218
26219#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
26220#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
26221
26222#undef TARGET_MEMBER_TYPE_FORCES_BLK
26223#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
26224
26225#undef TARGET_INSTANTIATE_DECLS
26226#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
26227
26228#undef TARGET_SECONDARY_RELOAD
26229#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26230#undef TARGET_SECONDARY_MEMORY_NEEDED
26231#define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
26232#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
26233#define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
26234
26235#undef TARGET_CLASS_MAX_NREGS
26236#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
26237
26238#undef TARGET_PREFERRED_RELOAD_CLASS
26239#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
26240#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
26241#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
26242#undef TARGET_CLASS_LIKELY_SPILLED_P
26243#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
26244
26245#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26246#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
26247 ix86_builtin_vectorization_cost
26248#undef TARGET_VECTORIZE_VEC_PERM_CONST
26249#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
26250#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
26251#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
26252 ix86_preferred_simd_mode
26253#undef TARGET_VECTORIZE_SPLIT_REDUCTION
26254#define TARGET_VECTORIZE_SPLIT_REDUCTION \
26255 ix86_split_reduction
26256#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
26257#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
26258 ix86_autovectorize_vector_modes
26259#undef TARGET_VECTORIZE_GET_MASK_MODE
26260#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
26261#undef TARGET_VECTORIZE_CREATE_COSTS
26262#define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
26263
26264#undef TARGET_SET_CURRENT_FUNCTION
26265#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
26266
26267#undef TARGET_OPTION_VALID_ATTRIBUTE_P
26268#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
26269
26270#undef TARGET_OPTION_SAVE
26271#define TARGET_OPTION_SAVE ix86_function_specific_save
26272
26273#undef TARGET_OPTION_RESTORE
26274#define TARGET_OPTION_RESTORE ix86_function_specific_restore
26275
26276#undef TARGET_OPTION_POST_STREAM_IN
26277#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
26278
26279#undef TARGET_OPTION_PRINT
26280#define TARGET_OPTION_PRINT ix86_function_specific_print
26281
26282#undef TARGET_OPTION_FUNCTION_VERSIONS
26283#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
26284
26285#undef TARGET_CAN_INLINE_P
26286#define TARGET_CAN_INLINE_P ix86_can_inline_p
26287
26288#undef TARGET_LEGITIMATE_ADDRESS_P
26289#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
26290
26291#undef TARGET_REGISTER_PRIORITY
26292#define TARGET_REGISTER_PRIORITY ix86_register_priority
26293
26294#undef TARGET_REGISTER_USAGE_LEVELING_P
26295#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
26296
26297#undef TARGET_LEGITIMATE_CONSTANT_P
26298#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
26299
26300#undef TARGET_COMPUTE_FRAME_LAYOUT
26301#define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
26302
26303#undef TARGET_FRAME_POINTER_REQUIRED
26304#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
26305
26306#undef TARGET_CAN_ELIMINATE
26307#define TARGET_CAN_ELIMINATE ix86_can_eliminate
26308
26309#undef TARGET_EXTRA_LIVE_ON_ENTRY
26310#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
26311
26312#undef TARGET_ASM_CODE_END
26313#define TARGET_ASM_CODE_END ix86_code_end
26314
26315#undef TARGET_CONDITIONAL_REGISTER_USAGE
26316#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
26317
26318#undef TARGET_CANONICALIZE_COMPARISON
26319#define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
26320
26321#undef TARGET_LOOP_UNROLL_ADJUST
26322#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
26323
26324/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26325#undef TARGET_SPILL_CLASS
26326#define TARGET_SPILL_CLASS ix86_spill_class
26327
26328#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
26329#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
26330 ix86_simd_clone_compute_vecsize_and_simdlen
26331
26332#undef TARGET_SIMD_CLONE_ADJUST
26333#define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
26334
26335#undef TARGET_SIMD_CLONE_USABLE
26336#define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
26337
26338#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
26339#define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
26340
26341#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
26342#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
26343 ix86_float_exceptions_rounding_supported_p
26344
26345#undef TARGET_MODE_EMIT
26346#define TARGET_MODE_EMIT ix86_emit_mode_set
26347
26348#undef TARGET_MODE_NEEDED
26349#define TARGET_MODE_NEEDED ix86_mode_needed
26350
26351#undef TARGET_MODE_AFTER
26352#define TARGET_MODE_AFTER ix86_mode_after
26353
26354#undef TARGET_MODE_ENTRY
26355#define TARGET_MODE_ENTRY ix86_mode_entry
26356
26357#undef TARGET_MODE_EXIT
26358#define TARGET_MODE_EXIT ix86_mode_exit
26359
26360#undef TARGET_MODE_PRIORITY
26361#define TARGET_MODE_PRIORITY ix86_mode_priority
26362
26363#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
26364#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
26365
26366#undef TARGET_OFFLOAD_OPTIONS
26367#define TARGET_OFFLOAD_OPTIONS \
26368 ix86_offload_options
26369
26370#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
26371#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
26372
26373#undef TARGET_OPTAB_SUPPORTED_P
26374#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
26375
26376#undef TARGET_HARD_REGNO_SCRATCH_OK
26377#define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
26378
26379#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
26380#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
26381
26382#undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
26383#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
26384
26385#undef TARGET_INIT_LIBFUNCS
26386#define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
26387
26388#undef TARGET_EXPAND_DIVMOD_LIBFUNC
26389#define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
26390
26391#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
26392#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
26393
26394#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
26395#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
26396
26397#undef TARGET_HARD_REGNO_NREGS
26398#define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
26399#undef TARGET_HARD_REGNO_MODE_OK
26400#define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
26401
26402#undef TARGET_MODES_TIEABLE_P
26403#define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
26404
26405#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
26406#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
26407 ix86_hard_regno_call_part_clobbered
26408
26409#undef TARGET_INSN_CALLEE_ABI
26410#define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
26411
26412#undef TARGET_CAN_CHANGE_MODE_CLASS
26413#define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
26414
26415#undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
26416#define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
26417
26418#undef TARGET_STATIC_RTX_ALIGNMENT
26419#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
26420#undef TARGET_CONSTANT_ALIGNMENT
26421#define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
26422
26423#undef TARGET_EMPTY_RECORD_P
26424#define TARGET_EMPTY_RECORD_P ix86_is_empty_record
26425
26426#undef TARGET_WARN_PARAMETER_PASSING_ABI
26427#define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
26428
26429#undef TARGET_GET_MULTILIB_ABI_NAME
26430#define TARGET_GET_MULTILIB_ABI_NAME \
26431 ix86_get_multilib_abi_name
26432
26433#undef TARGET_IFUNC_REF_LOCAL_OK
26434#define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
26435
26436#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
26437# undef TARGET_ASM_RELOC_RW_MASK
26438# define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
26439#endif
26440
26441#undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
26442#define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
26443
26444#undef TARGET_MEMTAG_ADD_TAG
26445#define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
26446
26447#undef TARGET_MEMTAG_SET_TAG
26448#define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
26449
26450#undef TARGET_MEMTAG_EXTRACT_TAG
26451#define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
26452
26453#undef TARGET_MEMTAG_UNTAGGED_POINTER
26454#define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
26455
26456#undef TARGET_MEMTAG_TAG_SIZE
26457#define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
26458
26459static bool
26460ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
26461{
26462#ifdef OPTION_GLIBC
26463 if (OPTION_GLIBC)
26464 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
26465 else
26466 return false;
26467#else
26468 return false;
26469#endif
26470}
26471
26472#undef TARGET_LIBC_HAS_FAST_FUNCTION
26473#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
26474
26475static unsigned
26476ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
26477 bool boundary_p)
26478{
26479#ifdef OPTION_GLIBC
26480 bool glibc_p = OPTION_GLIBC;
26481#else
26482 bool glibc_p = false;
26483#endif
26484 if (glibc_p)
26485 {
26486 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
26487 unsigned int libmvec_ret = 0;
26488 if (!flag_trapping_math
26489 && flag_unsafe_math_optimizations
26490 && flag_finite_math_only
26491 && !flag_signed_zeros
26492 && !flag_errno_math)
26493 switch (cfn)
26494 {
26495 CASE_CFN_COS:
26496 CASE_CFN_COS_FN:
26497 CASE_CFN_SIN:
26498 CASE_CFN_SIN_FN:
26499 if (!boundary_p)
26500 {
26501 /* With non-default rounding modes, libmvec provides
26502 complete garbage in results. E.g.
26503 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
26504 returns 0.00333309174f rather than 1.40129846e-45f. */
26505 if (flag_rounding_math)
26506 return ~0U;
26507 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
26508 claims libmvec maximum error is 4ulps.
26509 My own random testing indicates 2ulps for SFmode and
26510 0.5ulps for DFmode, but let's go with the 4ulps. */
26511 libmvec_ret = 4;
26512 }
26513 break;
26514 default:
26515 break;
26516 }
26517 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
26518 boundary_p);
26519 return MAX (ret, libmvec_ret);
26520 }
26521 return default_libm_function_max_error (cfn, mode, boundary_p);
26522}
26523
26524#undef TARGET_LIBM_FUNCTION_MAX_ERROR
26525#define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
26526
26527#if CHECKING_P
26528#undef TARGET_RUN_TARGET_SELFTESTS
26529#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
26530#endif /* #if CHECKING_P */
26531
26532struct gcc_target targetm = TARGET_INITIALIZER;
26533
26534#include "gt-i386.h"
26535

source code of gcc/config/i386/i386.cc